linux/net/ipv4/inet_lro.c
<<
>>
Prefs
   1/*
   2 *  linux/net/ipv4/inet_lro.c
   3 *
   4 *  Large Receive Offload (ipv4 / tcp)
   5 *
   6 *  (C) Copyright IBM Corp. 2007
   7 *
   8 *  Authors:
   9 *       Jan-Bernd Themann <themann@de.ibm.com>
  10 *       Christoph Raisch <raisch@de.ibm.com>
  11 *
  12 *
  13 * This program is free software; you can redistribute it and/or modify
  14 * it under the terms of the GNU General Public License as published by
  15 * the Free Software Foundation; either version 2, or (at your option)
  16 * any later version.
  17 *
  18 * This program is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 * GNU General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU General Public License
  24 * along with this program; if not, write to the Free Software
  25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  26 */
  27
  28
  29#include <linux/module.h>
  30#include <linux/if_vlan.h>
  31#include <linux/inet_lro.h>
  32#include <net/checksum.h>
  33
  34MODULE_LICENSE("GPL");
  35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
  36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
  37
  38#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
  39#define IP_HDR_LEN(iph) (iph->ihl << 2)
  40#define TCP_PAYLOAD_LENGTH(iph, tcph) \
  41        (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
  42
  43#define IPH_LEN_WO_OPTIONS 5
  44#define TCPH_LEN_WO_OPTIONS 5
  45#define TCPH_LEN_W_TIMESTAMP 8
  46
  47#define LRO_MAX_PG_HLEN 64
  48
  49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
  50
  51/*
  52 * Basic tcp checks whether packet is suitable for LRO
  53 */
  54
  55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
  56                            int len, const struct net_lro_desc *lro_desc)
  57{
  58        /* check ip header: don't aggregate padded frames */
  59        if (ntohs(iph->tot_len) != len)
  60                return -1;
  61
  62        if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
  63                return -1;
  64
  65        if (iph->ihl != IPH_LEN_WO_OPTIONS)
  66                return -1;
  67
  68        if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
  69            tcph->rst || tcph->syn || tcph->fin)
  70                return -1;
  71
  72        if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
  73                return -1;
  74
  75        if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
  76            tcph->doff != TCPH_LEN_W_TIMESTAMP)
  77                return -1;
  78
  79        /* check tcp options (only timestamp allowed) */
  80        if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
  81                __be32 *topt = (__be32 *)(tcph + 1);
  82
  83                if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
  84                                   | (TCPOPT_TIMESTAMP << 8)
  85                                   | TCPOLEN_TIMESTAMP))
  86                        return -1;
  87
  88                /* timestamp should be in right order */
  89                topt++;
  90                if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
  91                                      ntohl(*topt)))
  92                        return -1;
  93
  94                /* timestamp reply should not be zero */
  95                topt++;
  96                if (*topt == 0)
  97                        return -1;
  98        }
  99
 100        return 0;
 101}
 102
 103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 104{
 105        struct iphdr *iph = lro_desc->iph;
 106        struct tcphdr *tcph = lro_desc->tcph;
 107        __be32 *p;
 108        __wsum tcp_hdr_csum;
 109
 110        tcph->ack_seq = lro_desc->tcp_ack;
 111        tcph->window = lro_desc->tcp_window;
 112
 113        if (lro_desc->tcp_saw_tstamp) {
 114                p = (__be32 *)(tcph + 1);
 115                *(p+2) = lro_desc->tcp_rcv_tsecr;
 116        }
 117
 118        csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
 119        iph->tot_len = htons(lro_desc->ip_tot_len);
 120
 121        tcph->check = 0;
 122        tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
 123        lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
 124        tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 125                                        lro_desc->ip_tot_len -
 126                                        IP_HDR_LEN(iph), IPPROTO_TCP,
 127                                        lro_desc->data_csum);
 128}
 129
 130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
 131{
 132        __wsum tcp_csum;
 133        __wsum tcp_hdr_csum;
 134        __wsum tcp_ps_hdr_csum;
 135
 136        tcp_csum = ~csum_unfold(tcph->check);
 137        tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
 138
 139        tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 140                                             len + TCP_HDR_LEN(tcph),
 141                                             IPPROTO_TCP, 0);
 142
 143        return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
 144                        tcp_ps_hdr_csum);
 145}
 146
 147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 148                          struct iphdr *iph, struct tcphdr *tcph)
 149{
 150        int nr_frags;
 151        __be32 *ptr;
 152        u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 153
 154        nr_frags = skb_shinfo(skb)->nr_frags;
 155        lro_desc->parent = skb;
 156        lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
 157        lro_desc->iph = iph;
 158        lro_desc->tcph = tcph;
 159        lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
 160        lro_desc->tcp_ack = tcph->ack_seq;
 161        lro_desc->tcp_window = tcph->window;
 162
 163        lro_desc->pkt_aggr_cnt = 1;
 164        lro_desc->ip_tot_len = ntohs(iph->tot_len);
 165
 166        if (tcph->doff == 8) {
 167                ptr = (__be32 *)(tcph+1);
 168                lro_desc->tcp_saw_tstamp = 1;
 169                lro_desc->tcp_rcv_tsval = *(ptr+1);
 170                lro_desc->tcp_rcv_tsecr = *(ptr+2);
 171        }
 172
 173        lro_desc->mss = tcp_data_len;
 174        lro_desc->active = 1;
 175
 176        lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
 177                                                tcp_data_len);
 178}
 179
 180static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
 181{
 182        memset(lro_desc, 0, sizeof(struct net_lro_desc));
 183}
 184
 185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
 186                           struct tcphdr *tcph, int tcp_data_len)
 187{
 188        struct sk_buff *parent = lro_desc->parent;
 189        __be32 *topt;
 190
 191        lro_desc->pkt_aggr_cnt++;
 192        lro_desc->ip_tot_len += tcp_data_len;
 193        lro_desc->tcp_next_seq += tcp_data_len;
 194        lro_desc->tcp_window = tcph->window;
 195        lro_desc->tcp_ack = tcph->ack_seq;
 196
 197        /* don't update tcp_rcv_tsval, would not work with PAWS */
 198        if (lro_desc->tcp_saw_tstamp) {
 199                topt = (__be32 *) (tcph + 1);
 200                lro_desc->tcp_rcv_tsecr = *(topt + 2);
 201        }
 202
 203        lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
 204                                             lro_tcp_data_csum(iph, tcph,
 205                                                               tcp_data_len),
 206                                             parent->len);
 207
 208        parent->len += tcp_data_len;
 209        parent->data_len += tcp_data_len;
 210        if (tcp_data_len > lro_desc->mss)
 211                lro_desc->mss = tcp_data_len;
 212}
 213
 214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 215                           struct iphdr *iph, struct tcphdr *tcph)
 216{
 217        struct sk_buff *parent = lro_desc->parent;
 218        int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 219
 220        lro_add_common(lro_desc, iph, tcph, tcp_data_len);
 221
 222        skb_pull(skb, (skb->len - tcp_data_len));
 223        parent->truesize += skb->truesize;
 224
 225        if (lro_desc->last_skb)
 226                lro_desc->last_skb->next = skb;
 227        else
 228                skb_shinfo(parent)->frag_list = skb;
 229
 230        lro_desc->last_skb = skb;
 231}
 232
 233static void lro_add_frags(struct net_lro_desc *lro_desc,
 234                          int len, int hlen, int truesize,
 235                          struct skb_frag_struct *skb_frags,
 236                          struct iphdr *iph, struct tcphdr *tcph)
 237{
 238        struct sk_buff *skb = lro_desc->parent;
 239        int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 240
 241        lro_add_common(lro_desc, iph, tcph, tcp_data_len);
 242
 243        skb->truesize += truesize;
 244
 245        skb_frags[0].page_offset += hlen;
 246        skb_frag_size_sub(&skb_frags[0], hlen);
 247
 248        while (tcp_data_len > 0) {
 249                *(lro_desc->next_frag) = *skb_frags;
 250                tcp_data_len -= skb_frag_size(skb_frags);
 251                lro_desc->next_frag++;
 252                skb_frags++;
 253                skb_shinfo(skb)->nr_frags++;
 254        }
 255}
 256
 257static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
 258                              struct iphdr *iph,
 259                              struct tcphdr *tcph)
 260{
 261        if ((lro_desc->iph->saddr != iph->saddr) ||
 262            (lro_desc->iph->daddr != iph->daddr) ||
 263            (lro_desc->tcph->source != tcph->source) ||
 264            (lro_desc->tcph->dest != tcph->dest))
 265                return -1;
 266        return 0;
 267}
 268
 269static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
 270                                         struct net_lro_desc *lro_arr,
 271                                         struct iphdr *iph,
 272                                         struct tcphdr *tcph)
 273{
 274        struct net_lro_desc *lro_desc = NULL;
 275        struct net_lro_desc *tmp;
 276        int max_desc = lro_mgr->max_desc;
 277        int i;
 278
 279        for (i = 0; i < max_desc; i++) {
 280                tmp = &lro_arr[i];
 281                if (tmp->active)
 282                        if (!lro_check_tcp_conn(tmp, iph, tcph)) {
 283                                lro_desc = tmp;
 284                                goto out;
 285                        }
 286        }
 287
 288        for (i = 0; i < max_desc; i++) {
 289                if (!lro_arr[i].active) {
 290                        lro_desc = &lro_arr[i];
 291                        goto out;
 292                }
 293        }
 294
 295        LRO_INC_STATS(lro_mgr, no_desc);
 296out:
 297        return lro_desc;
 298}
 299
 300static void lro_flush(struct net_lro_mgr *lro_mgr,
 301                      struct net_lro_desc *lro_desc)
 302{
 303        if (lro_desc->pkt_aggr_cnt > 1)
 304                lro_update_tcp_ip_header(lro_desc);
 305
 306        skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
 307
 308        if (lro_mgr->features & LRO_F_NAPI)
 309                netif_receive_skb(lro_desc->parent);
 310        else
 311                netif_rx(lro_desc->parent);
 312
 313        LRO_INC_STATS(lro_mgr, flushed);
 314        lro_clear_desc(lro_desc);
 315}
 316
 317static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
 318                          void *priv)
 319{
 320        struct net_lro_desc *lro_desc;
 321        struct iphdr *iph;
 322        struct tcphdr *tcph;
 323        u64 flags;
 324        int vlan_hdr_len = 0;
 325
 326        if (!lro_mgr->get_skb_header ||
 327            lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
 328                                    &flags, priv))
 329                goto out;
 330
 331        if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
 332                goto out;
 333
 334        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 335        if (!lro_desc)
 336                goto out;
 337
 338        if ((skb->protocol == htons(ETH_P_8021Q)) &&
 339            !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
 340                vlan_hdr_len = VLAN_HLEN;
 341
 342        if (!lro_desc->active) { /* start new lro session */
 343                if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
 344                        goto out;
 345
 346                skb->ip_summed = lro_mgr->ip_summed_aggr;
 347                lro_init_desc(lro_desc, skb, iph, tcph);
 348                LRO_INC_STATS(lro_mgr, aggregated);
 349                return 0;
 350        }
 351
 352        if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
 353                goto out2;
 354
 355        if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
 356                goto out2;
 357
 358        lro_add_packet(lro_desc, skb, iph, tcph);
 359        LRO_INC_STATS(lro_mgr, aggregated);
 360
 361        if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
 362            lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
 363                lro_flush(lro_mgr, lro_desc);
 364
 365        return 0;
 366
 367out2: /* send aggregated SKBs to stack */
 368        lro_flush(lro_mgr, lro_desc);
 369
 370out:
 371        return 1;
 372}
 373
 374
 375static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
 376                                   struct skb_frag_struct *frags,
 377                                   int len, int true_size,
 378                                   void *mac_hdr,
 379                                   int hlen, __wsum sum,
 380                                   u32 ip_summed)
 381{
 382        struct sk_buff *skb;
 383        struct skb_frag_struct *skb_frags;
 384        int data_len = len;
 385        int hdr_len = min(len, hlen);
 386
 387        skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
 388        if (!skb)
 389                return NULL;
 390
 391        skb_reserve(skb, lro_mgr->frag_align_pad);
 392        skb->len = len;
 393        skb->data_len = len - hdr_len;
 394        skb->truesize += true_size;
 395        skb->tail += hdr_len;
 396
 397        memcpy(skb->data, mac_hdr, hdr_len);
 398
 399        skb_frags = skb_shinfo(skb)->frags;
 400        while (data_len > 0) {
 401                *skb_frags = *frags;
 402                data_len -= skb_frag_size(frags);
 403                skb_frags++;
 404                frags++;
 405                skb_shinfo(skb)->nr_frags++;
 406        }
 407
 408        skb_shinfo(skb)->frags[0].page_offset += hdr_len;
 409        skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
 410
 411        skb->ip_summed = ip_summed;
 412        skb->csum = sum;
 413        skb->protocol = eth_type_trans(skb, lro_mgr->dev);
 414        return skb;
 415}
 416
 417static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
 418                                          struct skb_frag_struct *frags,
 419                                          int len, int true_size,
 420                                          void *priv, __wsum sum)
 421{
 422        struct net_lro_desc *lro_desc;
 423        struct iphdr *iph;
 424        struct tcphdr *tcph;
 425        struct sk_buff *skb;
 426        u64 flags;
 427        void *mac_hdr;
 428        int mac_hdr_len;
 429        int hdr_len = LRO_MAX_PG_HLEN;
 430        int vlan_hdr_len = 0;
 431
 432        if (!lro_mgr->get_frag_header ||
 433            lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
 434                                     (void *)&tcph, &flags, priv)) {
 435                mac_hdr = skb_frag_address(frags);
 436                goto out1;
 437        }
 438
 439        if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
 440                goto out1;
 441
 442        hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
 443        mac_hdr_len = (int)((void *)(iph) - mac_hdr);
 444
 445        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 446        if (!lro_desc)
 447                goto out1;
 448
 449        if (!lro_desc->active) { /* start new lro session */
 450                if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
 451                        goto out1;
 452
 453                skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
 454                                  hdr_len, 0, lro_mgr->ip_summed_aggr);
 455                if (!skb)
 456                        goto out;
 457
 458                if ((skb->protocol == htons(ETH_P_8021Q)) &&
 459                    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
 460                        vlan_hdr_len = VLAN_HLEN;
 461
 462                iph = (void *)(skb->data + vlan_hdr_len);
 463                tcph = (void *)((u8 *)skb->data + vlan_hdr_len
 464                                + IP_HDR_LEN(iph));
 465
 466                lro_init_desc(lro_desc, skb, iph, tcph);
 467                LRO_INC_STATS(lro_mgr, aggregated);
 468                return NULL;
 469        }
 470
 471        if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
 472                goto out2;
 473
 474        if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
 475                goto out2;
 476
 477        lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
 478        LRO_INC_STATS(lro_mgr, aggregated);
 479
 480        if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
 481            lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
 482                lro_flush(lro_mgr, lro_desc);
 483
 484        return NULL;
 485
 486out2: /* send aggregated packets to the stack */
 487        lro_flush(lro_mgr, lro_desc);
 488
 489out1:  /* Original packet has to be posted to the stack */
 490        skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
 491                          hdr_len, sum, lro_mgr->ip_summed);
 492out:
 493        return skb;
 494}
 495
 496void lro_receive_skb(struct net_lro_mgr *lro_mgr,
 497                     struct sk_buff *skb,
 498                     void *priv)
 499{
 500        if (__lro_proc_skb(lro_mgr, skb, priv)) {
 501                if (lro_mgr->features & LRO_F_NAPI)
 502                        netif_receive_skb(skb);
 503                else
 504                        netif_rx(skb);
 505        }
 506}
 507EXPORT_SYMBOL(lro_receive_skb);
 508
 509void lro_receive_frags(struct net_lro_mgr *lro_mgr,
 510                       struct skb_frag_struct *frags,
 511                       int len, int true_size, void *priv, __wsum sum)
 512{
 513        struct sk_buff *skb;
 514
 515        skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
 516        if (!skb)
 517                return;
 518
 519        if (lro_mgr->features & LRO_F_NAPI)
 520                netif_receive_skb(skb);
 521        else
 522                netif_rx(skb);
 523}
 524EXPORT_SYMBOL(lro_receive_frags);
 525
 526void lro_flush_all(struct net_lro_mgr *lro_mgr)
 527{
 528        int i;
 529        struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
 530
 531        for (i = 0; i < lro_mgr->max_desc; i++) {
 532                if (lro_desc[i].active)
 533                        lro_flush(lro_mgr, &lro_desc[i]);
 534        }
 535}
 536EXPORT_SYMBOL(lro_flush_all);
 537
 538void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
 539                  struct iphdr *iph, struct tcphdr *tcph)
 540{
 541        struct net_lro_desc *lro_desc;
 542
 543        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 544        if (lro_desc->active)
 545                lro_flush(lro_mgr, lro_desc);
 546}
 547EXPORT_SYMBOL(lro_flush_pkt);
 548