linux/net/ipv4/inet_lro.c
<<
>>
Prefs
   1/*
   2 *  linux/net/ipv4/inet_lro.c
   3 *
   4 *  Large Receive Offload (ipv4 / tcp)
   5 *
   6 *  (C) Copyright IBM Corp. 2007
   7 *
   8 *  Authors:
   9 *       Jan-Bernd Themann <themann@de.ibm.com>
  10 *       Christoph Raisch <raisch@de.ibm.com>
  11 *
  12 *
  13 * This program is free software; you can redistribute it and/or modify
  14 * it under the terms of the GNU General Public License as published by
  15 * the Free Software Foundation; either version 2, or (at your option)
  16 * any later version.
  17 *
  18 * This program is distributed in the hope that it will be useful,
  19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 * GNU General Public License for more details.
  22 *
  23 * You should have received a copy of the GNU General Public License
  24 * along with this program; if not, write to the Free Software
  25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  26 */
  27
  28
  29#include <linux/module.h>
  30#include <linux/if_vlan.h>
  31#include <linux/inet_lro.h>
  32#include <net/checksum.h>
  33
  34MODULE_LICENSE("GPL");
  35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
  36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
  37
  38#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
  39#define IP_HDR_LEN(iph) (iph->ihl << 2)
  40#define TCP_PAYLOAD_LENGTH(iph, tcph) \
  41        (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
  42
  43#define IPH_LEN_WO_OPTIONS 5
  44#define TCPH_LEN_WO_OPTIONS 5
  45#define TCPH_LEN_W_TIMESTAMP 8
  46
  47#define LRO_MAX_PG_HLEN 64
  48
  49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
  50
  51/*
  52 * Basic tcp checks whether packet is suitable for LRO
  53 */
  54
  55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
  56                            int len, const struct net_lro_desc *lro_desc)
  57{
  58        /* check ip header: don't aggregate padded frames */
  59        if (ntohs(iph->tot_len) != len)
  60                return -1;
  61
  62        if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
  63                return -1;
  64
  65        if (iph->ihl != IPH_LEN_WO_OPTIONS)
  66                return -1;
  67
  68        if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
  69            tcph->rst || tcph->syn || tcph->fin)
  70                return -1;
  71
  72        if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
  73                return -1;
  74
  75        if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
  76            tcph->doff != TCPH_LEN_W_TIMESTAMP)
  77                return -1;
  78
  79        /* check tcp options (only timestamp allowed) */
  80        if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
  81                __be32 *topt = (__be32 *)(tcph + 1);
  82
  83                if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
  84                                   | (TCPOPT_TIMESTAMP << 8)
  85                                   | TCPOLEN_TIMESTAMP))
  86                        return -1;
  87
  88                /* timestamp should be in right order */
  89                topt++;
  90                if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
  91                                      ntohl(*topt)))
  92                        return -1;
  93
  94                /* timestamp reply should not be zero */
  95                topt++;
  96                if (*topt == 0)
  97                        return -1;
  98        }
  99
 100        return 0;
 101}
 102
 103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 104{
 105        struct iphdr *iph = lro_desc->iph;
 106        struct tcphdr *tcph = lro_desc->tcph;
 107        __be32 *p;
 108        __wsum tcp_hdr_csum;
 109
 110        tcph->ack_seq = lro_desc->tcp_ack;
 111        tcph->window = lro_desc->tcp_window;
 112
 113        if (lro_desc->tcp_saw_tstamp) {
 114                p = (__be32 *)(tcph + 1);
 115                *(p+2) = lro_desc->tcp_rcv_tsecr;
 116        }
 117
 118        csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
 119        iph->tot_len = htons(lro_desc->ip_tot_len);
 120
 121        tcph->check = 0;
 122        tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
 123        lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
 124        tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
 125                                        lro_desc->ip_tot_len -
 126                                        IP_HDR_LEN(iph), IPPROTO_TCP,
 127                                        lro_desc->data_csum);
 128}
 129
 130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
 131{
 132        __wsum tcp_csum;
 133        __wsum tcp_hdr_csum;
 134        __wsum tcp_ps_hdr_csum;
 135
 136        tcp_csum = ~csum_unfold(tcph->check);
 137        tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
 138
 139        tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 140                                             len + TCP_HDR_LEN(tcph),
 141                                             IPPROTO_TCP, 0);
 142
 143        return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
 144                        tcp_ps_hdr_csum);
 145}
 146
 147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 148                          struct iphdr *iph, struct tcphdr *tcph)
 149{
 150        int nr_frags;
 151        __be32 *ptr;
 152        u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 153
 154        nr_frags = skb_shinfo(skb)->nr_frags;
 155        lro_desc->parent = skb;
 156        lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
 157        lro_desc->iph = iph;
 158        lro_desc->tcph = tcph;
 159        lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
 160        lro_desc->tcp_ack = tcph->ack_seq;
 161        lro_desc->tcp_window = tcph->window;
 162
 163        lro_desc->pkt_aggr_cnt = 1;
 164        lro_desc->ip_tot_len = ntohs(iph->tot_len);
 165
 166        if (tcph->doff == 8) {
 167                ptr = (__be32 *)(tcph+1);
 168                lro_desc->tcp_saw_tstamp = 1;
 169                lro_desc->tcp_rcv_tsval = *(ptr+1);
 170                lro_desc->tcp_rcv_tsecr = *(ptr+2);
 171        }
 172
 173        lro_desc->mss = tcp_data_len;
 174        lro_desc->active = 1;
 175
 176        lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
 177                                                tcp_data_len);
 178}
 179
 180static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
 181{
 182        memset(lro_desc, 0, sizeof(struct net_lro_desc));
 183}
 184
 185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
 186                           struct tcphdr *tcph, int tcp_data_len)
 187{
 188        struct sk_buff *parent = lro_desc->parent;
 189        __be32 *topt;
 190
 191        lro_desc->pkt_aggr_cnt++;
 192        lro_desc->ip_tot_len += tcp_data_len;
 193        lro_desc->tcp_next_seq += tcp_data_len;
 194        lro_desc->tcp_window = tcph->window;
 195        lro_desc->tcp_ack = tcph->ack_seq;
 196
 197        /* don't update tcp_rcv_tsval, would not work with PAWS */
 198        if (lro_desc->tcp_saw_tstamp) {
 199                topt = (__be32 *) (tcph + 1);
 200                lro_desc->tcp_rcv_tsecr = *(topt + 2);
 201        }
 202
 203        lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
 204                                             lro_tcp_data_csum(iph, tcph,
 205                                                               tcp_data_len),
 206                                             parent->len);
 207
 208        parent->len += tcp_data_len;
 209        parent->data_len += tcp_data_len;
 210        if (tcp_data_len > lro_desc->mss)
 211                lro_desc->mss = tcp_data_len;
 212}
 213
 214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 215                           struct iphdr *iph, struct tcphdr *tcph)
 216{
 217        struct sk_buff *parent = lro_desc->parent;
 218        int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 219
 220        lro_add_common(lro_desc, iph, tcph, tcp_data_len);
 221
 222        skb_pull(skb, (skb->len - tcp_data_len));
 223        parent->truesize += skb->truesize;
 224
 225        if (lro_desc->last_skb)
 226                lro_desc->last_skb->next = skb;
 227        else
 228                skb_shinfo(parent)->frag_list = skb;
 229
 230        lro_desc->last_skb = skb;
 231}
 232
 233
 234static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
 235                              struct iphdr *iph,
 236                              struct tcphdr *tcph)
 237{
 238        if ((lro_desc->iph->saddr != iph->saddr) ||
 239            (lro_desc->iph->daddr != iph->daddr) ||
 240            (lro_desc->tcph->source != tcph->source) ||
 241            (lro_desc->tcph->dest != tcph->dest))
 242                return -1;
 243        return 0;
 244}
 245
 246static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
 247                                         struct net_lro_desc *lro_arr,
 248                                         struct iphdr *iph,
 249                                         struct tcphdr *tcph)
 250{
 251        struct net_lro_desc *lro_desc = NULL;
 252        struct net_lro_desc *tmp;
 253        int max_desc = lro_mgr->max_desc;
 254        int i;
 255
 256        for (i = 0; i < max_desc; i++) {
 257                tmp = &lro_arr[i];
 258                if (tmp->active)
 259                        if (!lro_check_tcp_conn(tmp, iph, tcph)) {
 260                                lro_desc = tmp;
 261                                goto out;
 262                        }
 263        }
 264
 265        for (i = 0; i < max_desc; i++) {
 266                if (!lro_arr[i].active) {
 267                        lro_desc = &lro_arr[i];
 268                        goto out;
 269                }
 270        }
 271
 272        LRO_INC_STATS(lro_mgr, no_desc);
 273out:
 274        return lro_desc;
 275}
 276
 277static void lro_flush(struct net_lro_mgr *lro_mgr,
 278                      struct net_lro_desc *lro_desc)
 279{
 280        if (lro_desc->pkt_aggr_cnt > 1)
 281                lro_update_tcp_ip_header(lro_desc);
 282
 283        skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
 284
 285        if (lro_mgr->features & LRO_F_NAPI)
 286                netif_receive_skb(lro_desc->parent);
 287        else
 288                netif_rx(lro_desc->parent);
 289
 290        LRO_INC_STATS(lro_mgr, flushed);
 291        lro_clear_desc(lro_desc);
 292}
 293
 294static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
 295                          void *priv)
 296{
 297        struct net_lro_desc *lro_desc;
 298        struct iphdr *iph;
 299        struct tcphdr *tcph;
 300        u64 flags;
 301        int vlan_hdr_len = 0;
 302
 303        if (!lro_mgr->get_skb_header ||
 304            lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
 305                                    &flags, priv))
 306                goto out;
 307
 308        if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
 309                goto out;
 310
 311        lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
 312        if (!lro_desc)
 313                goto out;
 314
 315        if ((skb->protocol == htons(ETH_P_8021Q)) &&
 316            !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
 317                vlan_hdr_len = VLAN_HLEN;
 318
 319        if (!lro_desc->active) { /* start new lro session */
 320                if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
 321                        goto out;
 322
 323                skb->ip_summed = lro_mgr->ip_summed_aggr;
 324                lro_init_desc(lro_desc, skb, iph, tcph);
 325                LRO_INC_STATS(lro_mgr, aggregated);
 326                return 0;
 327        }
 328
 329        if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
 330                goto out2;
 331
 332        if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
 333                goto out2;
 334
 335        lro_add_packet(lro_desc, skb, iph, tcph);
 336        LRO_INC_STATS(lro_mgr, aggregated);
 337
 338        if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
 339            lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
 340                lro_flush(lro_mgr, lro_desc);
 341
 342        return 0;
 343
 344out2: /* send aggregated SKBs to stack */
 345        lro_flush(lro_mgr, lro_desc);
 346
 347out:
 348        return 1;
 349}
 350
 351void lro_receive_skb(struct net_lro_mgr *lro_mgr,
 352                     struct sk_buff *skb,
 353                     void *priv)
 354{
 355        if (__lro_proc_skb(lro_mgr, skb, priv)) {
 356                if (lro_mgr->features & LRO_F_NAPI)
 357                        netif_receive_skb(skb);
 358                else
 359                        netif_rx(skb);
 360        }
 361}
 362EXPORT_SYMBOL(lro_receive_skb);
 363
 364void lro_flush_all(struct net_lro_mgr *lro_mgr)
 365{
 366        int i;
 367        struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
 368
 369        for (i = 0; i < lro_mgr->max_desc; i++) {
 370                if (lro_desc[i].active)
 371                        lro_flush(lro_mgr, &lro_desc[i]);
 372        }
 373}
 374EXPORT_SYMBOL(lro_flush_all);
 375