linux/net/ipv4/tcp_fastopen.c
<<
>>
Prefs
   1#include <linux/crypto.h>
   2#include <linux/err.h>
   3#include <linux/init.h>
   4#include <linux/kernel.h>
   5#include <linux/list.h>
   6#include <linux/tcp.h>
   7#include <linux/rcupdate.h>
   8#include <linux/rculist.h>
   9#include <net/inetpeer.h>
  10#include <net/tcp.h>
  11
  12int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
  13
  14struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
  15
  16static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
  17
  18void tcp_fastopen_init_key_once(bool publish)
  19{
  20        static u8 key[TCP_FASTOPEN_KEY_LENGTH];
  21
  22        /* tcp_fastopen_reset_cipher publishes the new context
  23         * atomically, so we allow this race happening here.
  24         *
  25         * All call sites of tcp_fastopen_cookie_gen also check
  26         * for a valid cookie, so this is an acceptable risk.
  27         */
  28        if (net_get_random_once(key, sizeof(key)) && publish)
  29                tcp_fastopen_reset_cipher(key, sizeof(key));
  30}
  31
  32static void tcp_fastopen_ctx_free(struct rcu_head *head)
  33{
  34        struct tcp_fastopen_context *ctx =
  35            container_of(head, struct tcp_fastopen_context, rcu);
  36        crypto_free_cipher(ctx->tfm);
  37        kfree(ctx);
  38}
  39
  40int tcp_fastopen_reset_cipher(void *key, unsigned int len)
  41{
  42        int err;
  43        struct tcp_fastopen_context *ctx, *octx;
  44
  45        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
  46        if (!ctx)
  47                return -ENOMEM;
  48        ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
  49
  50        if (IS_ERR(ctx->tfm)) {
  51                err = PTR_ERR(ctx->tfm);
  52error:          kfree(ctx);
  53                pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
  54                return err;
  55        }
  56        err = crypto_cipher_setkey(ctx->tfm, key, len);
  57        if (err) {
  58                pr_err("TCP: TFO cipher key error: %d\n", err);
  59                crypto_free_cipher(ctx->tfm);
  60                goto error;
  61        }
  62        memcpy(ctx->key, key, len);
  63
  64        spin_lock(&tcp_fastopen_ctx_lock);
  65
  66        octx = rcu_dereference_protected(tcp_fastopen_ctx,
  67                                lockdep_is_held(&tcp_fastopen_ctx_lock));
  68        rcu_assign_pointer(tcp_fastopen_ctx, ctx);
  69        spin_unlock(&tcp_fastopen_ctx_lock);
  70
  71        if (octx)
  72                call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
  73        return err;
  74}
  75
  76static bool __tcp_fastopen_cookie_gen(const void *path,
  77                                      struct tcp_fastopen_cookie *foc)
  78{
  79        struct tcp_fastopen_context *ctx;
  80        bool ok = false;
  81
  82        rcu_read_lock();
  83        ctx = rcu_dereference(tcp_fastopen_ctx);
  84        if (ctx) {
  85                crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
  86                foc->len = TCP_FASTOPEN_COOKIE_SIZE;
  87                ok = true;
  88        }
  89        rcu_read_unlock();
  90        return ok;
  91}
  92
  93/* Generate the fastopen cookie by doing aes128 encryption on both
  94 * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6
  95 * addresses. For the longer IPv6 addresses use CBC-MAC.
  96 *
  97 * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
  98 */
  99static bool tcp_fastopen_cookie_gen(struct request_sock *req,
 100                                    struct sk_buff *syn,
 101                                    struct tcp_fastopen_cookie *foc)
 102{
 103        if (req->rsk_ops->family == AF_INET) {
 104                const struct iphdr *iph = ip_hdr(syn);
 105
 106                __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
 107                return __tcp_fastopen_cookie_gen(path, foc);
 108        }
 109
 110#if IS_ENABLED(CONFIG_IPV6)
 111        if (req->rsk_ops->family == AF_INET6) {
 112                const struct ipv6hdr *ip6h = ipv6_hdr(syn);
 113                struct tcp_fastopen_cookie tmp;
 114
 115                if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
 116                        struct in6_addr *buf = (struct in6_addr *) tmp.val;
 117                        int i;
 118
 119                        for (i = 0; i < 4; i++)
 120                                buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
 121                        return __tcp_fastopen_cookie_gen(buf, foc);
 122                }
 123        }
 124#endif
 125        return false;
 126}
 127
 128
 129/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
 130 * queue this additional data / FIN.
 131 */
 132void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 133{
 134        struct tcp_sock *tp = tcp_sk(sk);
 135
 136        if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
 137                return;
 138
 139        skb = skb_clone(skb, GFP_ATOMIC);
 140        if (!skb)
 141                return;
 142
 143        skb_dst_drop(skb);
 144        /* segs_in has been initialized to 1 in tcp_create_openreq_child().
 145         * Hence, reset segs_in to 0 before calling tcp_segs_in()
 146         * to avoid double counting.  Also, tcp_segs_in() expects
 147         * skb->len to include the tcp_hdrlen.  Hence, it should
 148         * be called before __skb_pull().
 149         */
 150        tp->segs_in = 0;
 151        tcp_segs_in(tp, skb);
 152        __skb_pull(skb, tcp_hdrlen(skb));
 153        sk_forced_mem_schedule(sk, skb->truesize);
 154        skb_set_owner_r(skb, sk);
 155
 156        TCP_SKB_CB(skb)->seq++;
 157        TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
 158
 159        tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 160        __skb_queue_tail(&sk->sk_receive_queue, skb);
 161        tp->syn_data_acked = 1;
 162
 163        /* u64_stats_update_begin(&tp->syncp) not needed here,
 164         * as we certainly are not changing upper 32bit value (0)
 165         */
 166        tp->bytes_received = skb->len;
 167
 168        if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 169                tcp_fin(sk);
 170}
 171
 172static struct sock *tcp_fastopen_create_child(struct sock *sk,
 173                                              struct sk_buff *skb,
 174                                              struct dst_entry *dst,
 175                                              struct request_sock *req)
 176{
 177        struct tcp_sock *tp;
 178        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 179        struct sock *child;
 180        bool own_req;
 181
 182        req->num_retrans = 0;
 183        req->num_timeout = 0;
 184        req->sk = NULL;
 185
 186        child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
 187                                                         NULL, &own_req);
 188        if (!child)
 189                return NULL;
 190
 191        spin_lock(&queue->fastopenq.lock);
 192        queue->fastopenq.qlen++;
 193        spin_unlock(&queue->fastopenq.lock);
 194
 195        /* Initialize the child socket. Have to fix some values to take
 196         * into account the child is a Fast Open socket and is created
 197         * only out of the bits carried in the SYN packet.
 198         */
 199        tp = tcp_sk(child);
 200
 201        tp->fastopen_rsk = req;
 202        tcp_rsk(req)->tfo_listener = true;
 203
 204        /* RFC1323: The window in SYN & SYN/ACK segments is never
 205         * scaled. So correct it appropriately.
 206         */
 207        tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
 208
 209        /* Activate the retrans timer so that SYNACK can be retransmitted.
 210         * The request socket is not added to the ehash
 211         * because it's been added to the accept queue directly.
 212         */
 213        inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
 214                                  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 215
 216        atomic_set(&req->rsk_refcnt, 2);
 217
 218        /* Now finish processing the fastopen child socket. */
 219        inet_csk(child)->icsk_af_ops->rebuild_header(child);
 220        tcp_init_congestion_control(child);
 221        tcp_mtup_init(child);
 222        tcp_init_metrics(child);
 223        tcp_init_buffer_space(child);
 224
 225        tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 226
 227        tcp_fastopen_add_skb(child, skb);
 228
 229        tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
 230        tp->rcv_wup = tp->rcv_nxt;
 231        /* tcp_conn_request() is sending the SYNACK,
 232         * and queues the child into listener accept queue.
 233         */
 234        return child;
 235}
 236
 237static bool tcp_fastopen_queue_check(struct sock *sk)
 238{
 239        struct fastopen_queue *fastopenq;
 240
 241        /* Make sure the listener has enabled fastopen, and we don't
 242         * exceed the max # of pending TFO requests allowed before trying
 243         * to validating the cookie in order to avoid burning CPU cycles
 244         * unnecessarily.
 245         *
 246         * XXX (TFO) - The implication of checking the max_qlen before
 247         * processing a cookie request is that clients can't differentiate
 248         * between qlen overflow causing Fast Open to be disabled
 249         * temporarily vs a server not supporting Fast Open at all.
 250         */
 251        fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
 252        if (fastopenq->max_qlen == 0)
 253                return false;
 254
 255        if (fastopenq->qlen >= fastopenq->max_qlen) {
 256                struct request_sock *req1;
 257                spin_lock(&fastopenq->lock);
 258                req1 = fastopenq->rskq_rst_head;
 259                if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) {
 260                        __NET_INC_STATS(sock_net(sk),
 261                                        LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
 262                        spin_unlock(&fastopenq->lock);
 263                        return false;
 264                }
 265                fastopenq->rskq_rst_head = req1->dl_next;
 266                fastopenq->qlen--;
 267                spin_unlock(&fastopenq->lock);
 268                reqsk_put(req1);
 269        }
 270        return true;
 271}
 272
 273/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
 274 * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
 275 * cookie request (foc->len == 0).
 276 */
 277struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 278                              struct request_sock *req,
 279                              struct tcp_fastopen_cookie *foc,
 280                              struct dst_entry *dst)
 281{
 282        struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 283        bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
 284        struct sock *child;
 285
 286        if (foc->len == 0) /* Client requests a cookie */
 287                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
 288
 289        if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
 290              (syn_data || foc->len >= 0) &&
 291              tcp_fastopen_queue_check(sk))) {
 292                foc->len = -1;
 293                return NULL;
 294        }
 295
 296        if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
 297                goto fastopen;
 298
 299        if (foc->len >= 0 &&  /* Client presents or requests a cookie */
 300            tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
 301            foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
 302            foc->len == valid_foc.len &&
 303            !memcmp(foc->val, valid_foc.val, foc->len)) {
 304                /* Cookie is valid. Create a (full) child socket to accept
 305                 * the data in SYN before returning a SYN-ACK to ack the
 306                 * data. If we fail to create the socket, fall back and
 307                 * ack the ISN only but includes the same cookie.
 308                 *
 309                 * Note: Data-less SYN with valid cookie is allowed to send
 310                 * data in SYN_RECV state.
 311                 */
 312fastopen:
 313                child = tcp_fastopen_create_child(sk, skb, dst, req);
 314                if (child) {
 315                        foc->len = -1;
 316                        NET_INC_STATS(sock_net(sk),
 317                                      LINUX_MIB_TCPFASTOPENPASSIVE);
 318                        return child;
 319                }
 320                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 321        } else if (foc->len > 0) /* Client presents an invalid cookie */
 322                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 323
 324        valid_foc.exp = foc->exp;
 325        *foc = valid_foc;
 326        return NULL;
 327}
 328