linux/net/ipv4/tcp_offload.c
<<
>>
Prefs
   1/*
   2 *      IPV4 GSO/GRO offload support
   3 *      Linux INET implementation
   4 *
   5 *      This program is free software; you can redistribute it and/or
   6 *      modify it under the terms of the GNU General Public License
   7 *      as published by the Free Software Foundation; either version
   8 *      2 of the License, or (at your option) any later version.
   9 *
  10 *      TCPv4 GSO/GRO support
  11 */
  12
  13#include <linux/skbuff.h>
  14#include <net/tcp.h>
  15#include <net/protocol.h>
  16
  17static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
  18                           unsigned int seq, unsigned int mss)
  19{
  20        while (skb) {
  21                if (before(ts_seq, seq + mss)) {
  22                        skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP;
  23                        skb_shinfo(skb)->tskey = ts_seq;
  24                        return;
  25                }
  26
  27                skb = skb->next;
  28                seq += mss;
  29        }
  30}
  31
  32static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
  33                                        netdev_features_t features)
  34{
  35        if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
  36                return ERR_PTR(-EINVAL);
  37
  38        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
  39                const struct iphdr *iph = ip_hdr(skb);
  40                struct tcphdr *th = tcp_hdr(skb);
  41
  42                /* Set up checksum pseudo header, usually expect stack to
  43                 * have done this already.
  44                 */
  45
  46                th->check = 0;
  47                skb->ip_summed = CHECKSUM_PARTIAL;
  48                __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
  49        }
  50
  51        return tcp_gso_segment(skb, features);
  52}
  53
  54struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
  55                                netdev_features_t features)
  56{
  57        struct sk_buff *segs = ERR_PTR(-EINVAL);
  58        unsigned int sum_truesize = 0;
  59        struct tcphdr *th;
  60        unsigned int thlen;
  61        unsigned int seq;
  62        __be32 delta;
  63        unsigned int oldlen;
  64        unsigned int mss;
  65        struct sk_buff *gso_skb = skb;
  66        __sum16 newcheck;
  67        bool ooo_okay, copy_destructor;
  68
  69        th = tcp_hdr(skb);
  70        thlen = th->doff * 4;
  71        if (thlen < sizeof(*th))
  72                goto out;
  73
  74        if (!pskb_may_pull(skb, thlen))
  75                goto out;
  76
  77        oldlen = (u16)~skb->len;
  78        __skb_pull(skb, thlen);
  79
  80        mss = skb_shinfo(skb)->gso_size;
  81        if (unlikely(skb->len <= mss))
  82                goto out;
  83
  84        if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
  85                /* Packet is from an untrusted source, reset gso_segs. */
  86
  87                skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
  88
  89                segs = NULL;
  90                goto out;
  91        }
  92
  93        /* GSO partial only requires splitting the frame into an MSS
  94         * multiple and possibly a remainder.  So update the mss now.
  95         */
  96        if (features & NETIF_F_GSO_PARTIAL)
  97                mss = skb->len - (skb->len % mss);
  98
  99        copy_destructor = gso_skb->destructor == tcp_wfree;
 100        ooo_okay = gso_skb->ooo_okay;
 101        /* All segments but the first should have ooo_okay cleared */
 102        skb->ooo_okay = 0;
 103
 104        segs = skb_segment(skb, features);
 105        if (IS_ERR(segs))
 106                goto out;
 107
 108        /* Only first segment might have ooo_okay set */
 109        segs->ooo_okay = ooo_okay;
 110
 111        delta = htonl(oldlen + (thlen + mss));
 112
 113        skb = segs;
 114        th = tcp_hdr(skb);
 115        seq = ntohl(th->seq);
 116
 117        if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP))
 118                tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss);
 119
 120        newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
 121                                               (__force u32)delta));
 122
 123        while (skb->next) {
 124                th->fin = th->psh = 0;
 125                th->check = newcheck;
 126
 127                if (skb->ip_summed == CHECKSUM_PARTIAL)
 128                        gso_reset_checksum(skb, ~th->check);
 129                else
 130                        th->check = gso_make_checksum(skb, ~th->check);
 131
 132                seq += mss;
 133                if (copy_destructor) {
 134                        skb->destructor = gso_skb->destructor;
 135                        skb->sk = gso_skb->sk;
 136                        sum_truesize += skb->truesize;
 137                }
 138                skb = skb->next;
 139                th = tcp_hdr(skb);
 140
 141                th->seq = htonl(seq);
 142                th->cwr = 0;
 143        }
 144
 145        /* Following permits TCP Small Queues to work well with GSO :
 146         * The callback to TCP stack will be called at the time last frag
 147         * is freed at TX completion, and not right now when gso_skb
 148         * is freed by GSO engine
 149         */
 150        if (copy_destructor) {
 151                swap(gso_skb->sk, skb->sk);
 152                swap(gso_skb->destructor, skb->destructor);
 153                sum_truesize += skb->truesize;
 154                atomic_add(sum_truesize - gso_skb->truesize,
 155                           &skb->sk->sk_wmem_alloc);
 156        }
 157
 158        delta = htonl(oldlen + (skb_tail_pointer(skb) -
 159                                skb_transport_header(skb)) +
 160                      skb->data_len);
 161        th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 162                                (__force u32)delta));
 163        if (skb->ip_summed == CHECKSUM_PARTIAL)
 164                gso_reset_checksum(skb, ~th->check);
 165        else
 166                th->check = gso_make_checksum(skb, ~th->check);
 167out:
 168        return segs;
 169}
 170
 171struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 172{
 173        struct sk_buff **pp = NULL;
 174        struct sk_buff *p;
 175        struct tcphdr *th;
 176        struct tcphdr *th2;
 177        unsigned int len;
 178        unsigned int thlen;
 179        __be32 flags;
 180        unsigned int mss = 1;
 181        unsigned int hlen;
 182        unsigned int off;
 183        int flush = 1;
 184        int i;
 185
 186        off = skb_gro_offset(skb);
 187        hlen = off + sizeof(*th);
 188        th = skb_gro_header_fast(skb, off);
 189        if (skb_gro_header_hard(skb, hlen)) {
 190                th = skb_gro_header_slow(skb, hlen, off);
 191                if (unlikely(!th))
 192                        goto out;
 193        }
 194
 195        thlen = th->doff * 4;
 196        if (thlen < sizeof(*th))
 197                goto out;
 198
 199        hlen = off + thlen;
 200        if (skb_gro_header_hard(skb, hlen)) {
 201                th = skb_gro_header_slow(skb, hlen, off);
 202                if (unlikely(!th))
 203                        goto out;
 204        }
 205
 206        skb_gro_pull(skb, thlen);
 207
 208        len = skb_gro_len(skb);
 209        flags = tcp_flag_word(th);
 210
 211        for (; (p = *head); head = &p->next) {
 212                if (!NAPI_GRO_CB(p)->same_flow)
 213                        continue;
 214
 215                th2 = tcp_hdr(p);
 216
 217                if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
 218                        NAPI_GRO_CB(p)->same_flow = 0;
 219                        continue;
 220                }
 221
 222                goto found;
 223        }
 224
 225        goto out_check_final;
 226
 227found:
 228        /* Include the IP ID check below from the inner most IP hdr */
 229        flush = NAPI_GRO_CB(p)->flush;
 230        flush |= (__force int)(flags & TCP_FLAG_CWR);
 231        flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
 232                  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
 233        flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
 234        for (i = sizeof(*th); i < thlen; i += 4)
 235                flush |= *(u32 *)((u8 *)th + i) ^
 236                         *(u32 *)((u8 *)th2 + i);
 237
 238        /* When we receive our second frame we can made a decision on if we
 239         * continue this flow as an atomic flow with a fixed ID or if we use
 240         * an incrementing ID.
 241         */
 242        if (NAPI_GRO_CB(p)->flush_id != 1 ||
 243            NAPI_GRO_CB(p)->count != 1 ||
 244            !NAPI_GRO_CB(p)->is_atomic)
 245                flush |= NAPI_GRO_CB(p)->flush_id;
 246        else
 247                NAPI_GRO_CB(p)->is_atomic = false;
 248
 249        mss = skb_shinfo(p)->gso_size;
 250
 251        flush |= (len - 1) >= mss;
 252        flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 253
 254        if (flush || skb_gro_receive(head, skb)) {
 255                mss = 1;
 256                goto out_check_final;
 257        }
 258
 259        p = *head;
 260        th2 = tcp_hdr(p);
 261        tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
 262
 263out_check_final:
 264        flush = len < mss;
 265        flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
 266                                        TCP_FLAG_RST | TCP_FLAG_SYN |
 267                                        TCP_FLAG_FIN));
 268
 269        if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
 270                pp = head;
 271
 272out:
 273        NAPI_GRO_CB(skb)->flush |= (flush != 0);
 274
 275        return pp;
 276}
 277
 278int tcp_gro_complete(struct sk_buff *skb)
 279{
 280        struct tcphdr *th = tcp_hdr(skb);
 281
 282        skb->csum_start = (unsigned char *)th - skb->head;
 283        skb->csum_offset = offsetof(struct tcphdr, check);
 284        skb->ip_summed = CHECKSUM_PARTIAL;
 285
 286        skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 287
 288        if (th->cwr)
 289                skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 290
 291        return 0;
 292}
 293EXPORT_SYMBOL(tcp_gro_complete);
 294
 295static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 296{
 297        /* Don't bother verifying checksum if we're going to flush anyway. */
 298        if (!NAPI_GRO_CB(skb)->flush &&
 299            skb_gro_checksum_validate(skb, IPPROTO_TCP,
 300                                      inet_gro_compute_pseudo)) {
 301                NAPI_GRO_CB(skb)->flush = 1;
 302                return NULL;
 303        }
 304
 305        return tcp_gro_receive(head, skb);
 306}
 307
 308static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 309{
 310        const struct iphdr *iph = ip_hdr(skb);
 311        struct tcphdr *th = tcp_hdr(skb);
 312
 313        th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
 314                                  iph->daddr, 0);
 315        skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
 316
 317        if (NAPI_GRO_CB(skb)->is_atomic)
 318                skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
 319
 320        return tcp_gro_complete(skb);
 321}
 322
 323static const struct net_offload tcpv4_offload = {
 324        .callbacks = {
 325                .gso_segment    =       tcp4_gso_segment,
 326                .gro_receive    =       tcp4_gro_receive,
 327                .gro_complete   =       tcp4_gro_complete,
 328        },
 329};
 330
 331int __init tcpv4_offload_init(void)
 332{
 333        return inet_add_offload(&tcpv4_offload, IPPROTO_TCP);
 334}
 335