1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/module.h>
30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h>
32#include <net/checksum.h>
33
34MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37
38#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39#define IP_HDR_LEN(iph) (iph->ihl << 2)
40#define TCP_PAYLOAD_LENGTH(iph, tcph) \
41 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42
43#define IPH_LEN_WO_OPTIONS 5
44#define TCPH_LEN_WO_OPTIONS 5
45#define TCPH_LEN_W_TIMESTAMP 8
46
47#define LRO_MAX_PG_HLEN 64
48
49#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
50
51
52
53
54
55static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56 int len, const struct net_lro_desc *lro_desc)
57{
58
59 if (ntohs(iph->tot_len) != len)
60 return -1;
61
62 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
63 return -1;
64
65 if (iph->ihl != IPH_LEN_WO_OPTIONS)
66 return -1;
67
68 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69 tcph->rst || tcph->syn || tcph->fin)
70 return -1;
71
72 if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
73 return -1;
74
75 if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76 tcph->doff != TCPH_LEN_W_TIMESTAMP)
77 return -1;
78
79
80 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81 __be32 *topt = (__be32 *)(tcph + 1);
82
83 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84 | (TCPOPT_TIMESTAMP << 8)
85 | TCPOLEN_TIMESTAMP))
86 return -1;
87
88
89 topt++;
90 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
91 ntohl(*topt)))
92 return -1;
93
94
95 topt++;
96 if (*topt == 0)
97 return -1;
98 }
99
100 return 0;
101}
102
103static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
104{
105 struct iphdr *iph = lro_desc->iph;
106 struct tcphdr *tcph = lro_desc->tcph;
107 __be32 *p;
108 __wsum tcp_hdr_csum;
109
110 tcph->ack_seq = lro_desc->tcp_ack;
111 tcph->window = lro_desc->tcp_window;
112
113 if (lro_desc->tcp_saw_tstamp) {
114 p = (__be32 *)(tcph + 1);
115 *(p+2) = lro_desc->tcp_rcv_tsecr;
116 }
117
118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119 iph->tot_len = htons(lro_desc->ip_tot_len);
120
121 tcph->check = 0;
122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125 lro_desc->ip_tot_len -
126 IP_HDR_LEN(iph), IPPROTO_TCP,
127 lro_desc->data_csum);
128}
129
130static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
131{
132 __wsum tcp_csum;
133 __wsum tcp_hdr_csum;
134 __wsum tcp_ps_hdr_csum;
135
136 tcp_csum = ~csum_unfold(tcph->check);
137 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
138
139 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140 len + TCP_HDR_LEN(tcph),
141 IPPROTO_TCP, 0);
142
143 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
144 tcp_ps_hdr_csum);
145}
146
147static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148 struct iphdr *iph, struct tcphdr *tcph)
149{
150 int nr_frags;
151 __be32 *ptr;
152 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
153
154 nr_frags = skb_shinfo(skb)->nr_frags;
155 lro_desc->parent = skb;
156 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
157 lro_desc->iph = iph;
158 lro_desc->tcph = tcph;
159 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160 lro_desc->tcp_ack = tcph->ack_seq;
161 lro_desc->tcp_window = tcph->window;
162
163 lro_desc->pkt_aggr_cnt = 1;
164 lro_desc->ip_tot_len = ntohs(iph->tot_len);
165
166 if (tcph->doff == 8) {
167 ptr = (__be32 *)(tcph+1);
168 lro_desc->tcp_saw_tstamp = 1;
169 lro_desc->tcp_rcv_tsval = *(ptr+1);
170 lro_desc->tcp_rcv_tsecr = *(ptr+2);
171 }
172
173 lro_desc->mss = tcp_data_len;
174 lro_desc->active = 1;
175
176 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
177 tcp_data_len);
178}
179
180static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
181{
182 memset(lro_desc, 0, sizeof(struct net_lro_desc));
183}
184
185static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186 struct tcphdr *tcph, int tcp_data_len)
187{
188 struct sk_buff *parent = lro_desc->parent;
189 __be32 *topt;
190
191 lro_desc->pkt_aggr_cnt++;
192 lro_desc->ip_tot_len += tcp_data_len;
193 lro_desc->tcp_next_seq += tcp_data_len;
194 lro_desc->tcp_window = tcph->window;
195 lro_desc->tcp_ack = tcph->ack_seq;
196
197
198 if (lro_desc->tcp_saw_tstamp) {
199 topt = (__be32 *) (tcph + 1);
200 lro_desc->tcp_rcv_tsecr = *(topt + 2);
201 }
202
203 lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204 lro_tcp_data_csum(iph, tcph,
205 tcp_data_len),
206 parent->len);
207
208 parent->len += tcp_data_len;
209 parent->data_len += tcp_data_len;
210 if (tcp_data_len > lro_desc->mss)
211 lro_desc->mss = tcp_data_len;
212}
213
214static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215 struct iphdr *iph, struct tcphdr *tcph)
216{
217 struct sk_buff *parent = lro_desc->parent;
218 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
219
220 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
221
222 skb_pull(skb, (skb->len - tcp_data_len));
223 parent->truesize += skb->truesize;
224
225 if (lro_desc->last_skb)
226 lro_desc->last_skb->next = skb;
227 else
228 skb_shinfo(parent)->frag_list = skb;
229
230 lro_desc->last_skb = skb;
231}
232
233static void lro_add_frags(struct net_lro_desc *lro_desc,
234 int len, int hlen, int truesize,
235 struct skb_frag_struct *skb_frags,
236 struct iphdr *iph, struct tcphdr *tcph)
237{
238 struct sk_buff *skb = lro_desc->parent;
239 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
240
241 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
242
243 skb->truesize += truesize;
244
245 skb_frags[0].page_offset += hlen;
246 skb_frag_size_sub(&skb_frags[0], hlen);
247
248 while (tcp_data_len > 0) {
249 *(lro_desc->next_frag) = *skb_frags;
250 tcp_data_len -= skb_frag_size(skb_frags);
251 lro_desc->next_frag++;
252 skb_frags++;
253 skb_shinfo(skb)->nr_frags++;
254 }
255}
256
257static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
258 struct iphdr *iph,
259 struct tcphdr *tcph)
260{
261 if ((lro_desc->iph->saddr != iph->saddr) ||
262 (lro_desc->iph->daddr != iph->daddr) ||
263 (lro_desc->tcph->source != tcph->source) ||
264 (lro_desc->tcph->dest != tcph->dest))
265 return -1;
266 return 0;
267}
268
269static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
270 struct net_lro_desc *lro_arr,
271 struct iphdr *iph,
272 struct tcphdr *tcph)
273{
274 struct net_lro_desc *lro_desc = NULL;
275 struct net_lro_desc *tmp;
276 int max_desc = lro_mgr->max_desc;
277 int i;
278
279 for (i = 0; i < max_desc; i++) {
280 tmp = &lro_arr[i];
281 if (tmp->active)
282 if (!lro_check_tcp_conn(tmp, iph, tcph)) {
283 lro_desc = tmp;
284 goto out;
285 }
286 }
287
288 for (i = 0; i < max_desc; i++) {
289 if (!lro_arr[i].active) {
290 lro_desc = &lro_arr[i];
291 goto out;
292 }
293 }
294
295 LRO_INC_STATS(lro_mgr, no_desc);
296out:
297 return lro_desc;
298}
299
300static void lro_flush(struct net_lro_mgr *lro_mgr,
301 struct net_lro_desc *lro_desc)
302{
303 if (lro_desc->pkt_aggr_cnt > 1)
304 lro_update_tcp_ip_header(lro_desc);
305
306 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
307
308 if (lro_mgr->features & LRO_F_NAPI)
309 netif_receive_skb(lro_desc->parent);
310 else
311 netif_rx(lro_desc->parent);
312
313 LRO_INC_STATS(lro_mgr, flushed);
314 lro_clear_desc(lro_desc);
315}
316
317static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
318 void *priv)
319{
320 struct net_lro_desc *lro_desc;
321 struct iphdr *iph;
322 struct tcphdr *tcph;
323 u64 flags;
324 int vlan_hdr_len = 0;
325
326 if (!lro_mgr->get_skb_header ||
327 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
328 &flags, priv))
329 goto out;
330
331 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
332 goto out;
333
334 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
335 if (!lro_desc)
336 goto out;
337
338 if ((skb->protocol == htons(ETH_P_8021Q)) &&
339 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
340 vlan_hdr_len = VLAN_HLEN;
341
342 if (!lro_desc->active) {
343 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
344 goto out;
345
346 skb->ip_summed = lro_mgr->ip_summed_aggr;
347 lro_init_desc(lro_desc, skb, iph, tcph);
348 LRO_INC_STATS(lro_mgr, aggregated);
349 return 0;
350 }
351
352 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
353 goto out2;
354
355 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
356 goto out2;
357
358 lro_add_packet(lro_desc, skb, iph, tcph);
359 LRO_INC_STATS(lro_mgr, aggregated);
360
361 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
362 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
363 lro_flush(lro_mgr, lro_desc);
364
365 return 0;
366
367out2:
368 lro_flush(lro_mgr, lro_desc);
369
370out:
371 return 1;
372}
373
374
375static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
376 struct skb_frag_struct *frags,
377 int len, int true_size,
378 void *mac_hdr,
379 int hlen, __wsum sum,
380 u32 ip_summed)
381{
382 struct sk_buff *skb;
383 struct skb_frag_struct *skb_frags;
384 int data_len = len;
385 int hdr_len = min(len, hlen);
386
387 skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
388 if (!skb)
389 return NULL;
390
391 skb_reserve(skb, lro_mgr->frag_align_pad);
392 skb->len = len;
393 skb->data_len = len - hdr_len;
394 skb->truesize += true_size;
395 skb->tail += hdr_len;
396
397 memcpy(skb->data, mac_hdr, hdr_len);
398
399 skb_frags = skb_shinfo(skb)->frags;
400 while (data_len > 0) {
401 *skb_frags = *frags;
402 data_len -= skb_frag_size(frags);
403 skb_frags++;
404 frags++;
405 skb_shinfo(skb)->nr_frags++;
406 }
407
408 skb_shinfo(skb)->frags[0].page_offset += hdr_len;
409 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
410
411 skb->ip_summed = ip_summed;
412 skb->csum = sum;
413 skb->protocol = eth_type_trans(skb, lro_mgr->dev);
414 return skb;
415}
416
417static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
418 struct skb_frag_struct *frags,
419 int len, int true_size,
420 void *priv, __wsum sum)
421{
422 struct net_lro_desc *lro_desc;
423 struct iphdr *iph;
424 struct tcphdr *tcph;
425 struct sk_buff *skb;
426 u64 flags;
427 void *mac_hdr;
428 int mac_hdr_len;
429 int hdr_len = LRO_MAX_PG_HLEN;
430 int vlan_hdr_len = 0;
431
432 if (!lro_mgr->get_frag_header ||
433 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
434 (void *)&tcph, &flags, priv)) {
435 mac_hdr = skb_frag_address(frags);
436 goto out1;
437 }
438
439 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
440 goto out1;
441
442 hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
443 mac_hdr_len = (int)((void *)(iph) - mac_hdr);
444
445 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
446 if (!lro_desc)
447 goto out1;
448
449 if (!lro_desc->active) {
450 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
451 goto out1;
452
453 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
454 hdr_len, 0, lro_mgr->ip_summed_aggr);
455 if (!skb)
456 goto out;
457
458 if ((skb->protocol == htons(ETH_P_8021Q)) &&
459 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
460 vlan_hdr_len = VLAN_HLEN;
461
462 iph = (void *)(skb->data + vlan_hdr_len);
463 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
464 + IP_HDR_LEN(iph));
465
466 lro_init_desc(lro_desc, skb, iph, tcph);
467 LRO_INC_STATS(lro_mgr, aggregated);
468 return NULL;
469 }
470
471 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
472 goto out2;
473
474 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
475 goto out2;
476
477 lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
478 LRO_INC_STATS(lro_mgr, aggregated);
479
480 if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
481 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
482 lro_flush(lro_mgr, lro_desc);
483
484 return NULL;
485
486out2:
487 lro_flush(lro_mgr, lro_desc);
488
489out1:
490 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
491 hdr_len, sum, lro_mgr->ip_summed);
492out:
493 return skb;
494}
495
496void lro_receive_skb(struct net_lro_mgr *lro_mgr,
497 struct sk_buff *skb,
498 void *priv)
499{
500 if (__lro_proc_skb(lro_mgr, skb, priv)) {
501 if (lro_mgr->features & LRO_F_NAPI)
502 netif_receive_skb(skb);
503 else
504 netif_rx(skb);
505 }
506}
507EXPORT_SYMBOL(lro_receive_skb);
508
509void lro_receive_frags(struct net_lro_mgr *lro_mgr,
510 struct skb_frag_struct *frags,
511 int len, int true_size, void *priv, __wsum sum)
512{
513 struct sk_buff *skb;
514
515 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
516 if (!skb)
517 return;
518
519 if (lro_mgr->features & LRO_F_NAPI)
520 netif_receive_skb(skb);
521 else
522 netif_rx(skb);
523}
524EXPORT_SYMBOL(lro_receive_frags);
525
526void lro_flush_all(struct net_lro_mgr *lro_mgr)
527{
528 int i;
529 struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
530
531 for (i = 0; i < lro_mgr->max_desc; i++) {
532 if (lro_desc[i].active)
533 lro_flush(lro_mgr, &lro_desc[i]);
534 }
535}
536EXPORT_SYMBOL(lro_flush_all);
537
538void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
539 struct iphdr *iph, struct tcphdr *tcph)
540{
541 struct net_lro_desc *lro_desc;
542
543 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
544 if (lro_desc->active)
545 lro_flush(lro_mgr, lro_desc);
546}
547EXPORT_SYMBOL(lro_flush_pkt);
548