1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <asm/uaccess.h>
46#include <asm/system.h>
47#include <linux/module.h>
48#include <linux/types.h>
49#include <linux/kernel.h>
50#include <linux/mm.h>
51#include <linux/string.h>
52#include <linux/errno.h>
53#include <linux/highmem.h>
54#include <linux/slab.h>
55
56#include <linux/socket.h>
57#include <linux/sockios.h>
58#include <linux/in.h>
59#include <linux/inet.h>
60#include <linux/netdevice.h>
61#include <linux/etherdevice.h>
62#include <linux/proc_fs.h>
63#include <linux/stat.h>
64#include <linux/init.h>
65
66#include <net/snmp.h>
67#include <net/ip.h>
68#include <net/protocol.h>
69#include <net/route.h>
70#include <net/xfrm.h>
71#include <linux/skbuff.h>
72#include <net/sock.h>
73#include <net/arp.h>
74#include <net/icmp.h>
75#include <net/checksum.h>
76#include <net/inetpeer.h>
77#include <linux/igmp.h>
78#include <linux/netfilter_ipv4.h>
79#include <linux/netfilter_bridge.h>
80#include <linux/mroute.h>
81#include <linux/netlink.h>
82#include <linux/tcp.h>
83
84int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
85EXPORT_SYMBOL(sysctl_ip_default_ttl);
86
87
88__inline__ void ip_send_check(struct iphdr *iph)
89{
90 iph->check = 0;
91 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
92}
93EXPORT_SYMBOL(ip_send_check);
94
95int __ip_local_out(struct sk_buff *skb)
96{
97 struct iphdr *iph = ip_hdr(skb);
98
99 iph->tot_len = htons(skb->len);
100 ip_send_check(iph);
101 return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
102 skb_dst(skb)->dev, dst_output);
103}
104
105int ip_local_out(struct sk_buff *skb)
106{
107 int err;
108
109 err = __ip_local_out(skb);
110 if (likely(err == 1))
111 err = dst_output(skb);
112
113 return err;
114}
115EXPORT_SYMBOL_GPL(ip_local_out);
116
117
118static int ip_dev_loopback_xmit(struct sk_buff *newskb)
119{
120 skb_reset_mac_header(newskb);
121 __skb_pull(newskb, skb_network_offset(newskb));
122 newskb->pkt_type = PACKET_LOOPBACK;
123 newskb->ip_summed = CHECKSUM_UNNECESSARY;
124 WARN_ON(!skb_dst(newskb));
125 netif_rx_ni(newskb);
126 return 0;
127}
128
129static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
130{
131 int ttl = inet->uc_ttl;
132
133 if (ttl < 0)
134 ttl = ip4_dst_hoplimit(dst);
135 return ttl;
136}
137
138
139
140
141
142int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
143 __be32 saddr, __be32 daddr, struct ip_options *opt)
144{
145 struct inet_sock *inet = inet_sk(sk);
146 struct rtable *rt = skb_rtable(skb);
147 struct iphdr *iph;
148
149
150 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
151 skb_reset_network_header(skb);
152 iph = ip_hdr(skb);
153 iph->version = 4;
154 iph->ihl = 5;
155 iph->tos = inet->tos;
156 if (ip_dont_fragment(sk, &rt->dst))
157 iph->frag_off = htons(IP_DF);
158 else
159 iph->frag_off = 0;
160 iph->ttl = ip_select_ttl(inet, &rt->dst);
161 iph->daddr = rt->rt_dst;
162 iph->saddr = rt->rt_src;
163 iph->protocol = sk->sk_protocol;
164 ip_select_ident(iph, &rt->dst, sk);
165
166 if (opt && opt->optlen) {
167 iph->ihl += opt->optlen>>2;
168 ip_options_build(skb, opt, daddr, rt, 0);
169 }
170
171 skb->priority = sk->sk_priority;
172 skb->mark = sk->sk_mark;
173
174
175 return ip_local_out(skb);
176}
177EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
178
179static inline int ip_finish_output2(struct sk_buff *skb)
180{
181 struct dst_entry *dst = skb_dst(skb);
182 struct rtable *rt = (struct rtable *)dst;
183 struct net_device *dev = dst->dev;
184 unsigned int hh_len = LL_RESERVED_SPACE(dev);
185
186 if (rt->rt_type == RTN_MULTICAST) {
187 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
188 } else if (rt->rt_type == RTN_BROADCAST)
189 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
190
191
192 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
193 struct sk_buff *skb2;
194
195 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
196 if (skb2 == NULL) {
197 kfree_skb(skb);
198 return -ENOMEM;
199 }
200 if (skb->sk)
201 skb_set_owner_w(skb2, skb->sk);
202 kfree_skb(skb);
203 skb = skb2;
204 }
205
206 if (dst->hh)
207 return neigh_hh_output(dst->hh, skb);
208 else if (dst->neighbour)
209 return dst->neighbour->output(skb);
210
211 if (net_ratelimit())
212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
213 kfree_skb(skb);
214 return -EINVAL;
215}
216
217static inline int ip_skb_dst_mtu(struct sk_buff *skb)
218{
219 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
220
221 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
222 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
223}
224
225static int ip_finish_output(struct sk_buff *skb)
226{
227#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
228
229 if (skb_dst(skb)->xfrm != NULL) {
230 IPCB(skb)->flags |= IPSKB_REROUTED;
231 return dst_output(skb);
232 }
233#endif
234 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
235 return ip_fragment(skb, ip_finish_output2);
236 else
237 return ip_finish_output2(skb);
238}
239
240int ip_mc_output(struct sk_buff *skb)
241{
242 struct sock *sk = skb->sk;
243 struct rtable *rt = skb_rtable(skb);
244 struct net_device *dev = rt->dst.dev;
245
246
247
248
249 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
250
251 skb->dev = dev;
252 skb->protocol = htons(ETH_P_IP);
253
254
255
256
257
258 if (rt->rt_flags&RTCF_MULTICAST) {
259 if (sk_mc_loop(sk)
260#ifdef CONFIG_IP_MROUTE
261
262
263
264
265
266
267
268
269 &&
270 ((rt->rt_flags & RTCF_LOCAL) ||
271 !(IPCB(skb)->flags & IPSKB_FORWARDED))
272#endif
273 ) {
274 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
275 if (newskb)
276 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
277 newskb, NULL, newskb->dev,
278 ip_dev_loopback_xmit);
279 }
280
281
282
283 if (ip_hdr(skb)->ttl == 0) {
284 kfree_skb(skb);
285 return 0;
286 }
287 }
288
289 if (rt->rt_flags&RTCF_BROADCAST) {
290 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
291 if (newskb)
292 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
293 NULL, newskb->dev, ip_dev_loopback_xmit);
294 }
295
296 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
297 skb->dev, ip_finish_output,
298 !(IPCB(skb)->flags & IPSKB_REROUTED));
299}
300
301int ip_output(struct sk_buff *skb)
302{
303 struct net_device *dev = skb_dst(skb)->dev;
304
305 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
306
307 skb->dev = dev;
308 skb->protocol = htons(ETH_P_IP);
309
310 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
311 ip_finish_output,
312 !(IPCB(skb)->flags & IPSKB_REROUTED));
313}
314
315int ip_queue_xmit(struct sk_buff *skb)
316{
317 struct sock *sk = skb->sk;
318 struct inet_sock *inet = inet_sk(sk);
319 struct ip_options *opt = inet->opt;
320 struct rtable *rt;
321 struct iphdr *iph;
322 int res;
323
324
325
326
327 rcu_read_lock();
328 rt = skb_rtable(skb);
329 if (rt != NULL)
330 goto packet_routed;
331
332
333 rt = (struct rtable *)__sk_dst_check(sk, 0);
334 if (rt == NULL) {
335 __be32 daddr;
336
337
338 daddr = inet->inet_daddr;
339 if(opt && opt->srr)
340 daddr = opt->faddr;
341
342 {
343 struct flowi fl = { .oif = sk->sk_bound_dev_if,
344 .mark = sk->sk_mark,
345 .fl4_dst = daddr,
346 .fl4_src = inet->inet_saddr,
347 .fl4_tos = RT_CONN_FLAGS(sk),
348 .proto = sk->sk_protocol,
349 .flags = inet_sk_flowi_flags(sk),
350 .fl_ip_sport = inet->inet_sport,
351 .fl_ip_dport = inet->inet_dport };
352
353
354
355
356
357 security_sk_classify_flow(sk, &fl);
358 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
359 goto no_route;
360 }
361 sk_setup_caps(sk, &rt->dst);
362 }
363 skb_dst_set_noref(skb, &rt->dst);
364
365packet_routed:
366 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
367 goto no_route;
368
369
370 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
371 skb_reset_network_header(skb);
372 iph = ip_hdr(skb);
373 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
374 if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
375 iph->frag_off = htons(IP_DF);
376 else
377 iph->frag_off = 0;
378 iph->ttl = ip_select_ttl(inet, &rt->dst);
379 iph->protocol = sk->sk_protocol;
380 iph->saddr = rt->rt_src;
381 iph->daddr = rt->rt_dst;
382
383
384 if (opt && opt->optlen) {
385 iph->ihl += opt->optlen >> 2;
386 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
387 }
388
389 ip_select_ident_more(iph, &rt->dst, sk,
390 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
391
392 skb->priority = sk->sk_priority;
393 skb->mark = sk->sk_mark;
394
395 res = ip_local_out(skb);
396 rcu_read_unlock();
397 return res;
398
399no_route:
400 rcu_read_unlock();
401 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
402 kfree_skb(skb);
403 return -EHOSTUNREACH;
404}
405EXPORT_SYMBOL(ip_queue_xmit);
406
407
408static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
409{
410 to->pkt_type = from->pkt_type;
411 to->priority = from->priority;
412 to->protocol = from->protocol;
413 skb_dst_drop(to);
414 skb_dst_copy(to, from);
415 to->dev = from->dev;
416 to->mark = from->mark;
417
418
419 IPCB(to)->flags = IPCB(from)->flags;
420
421#ifdef CONFIG_NET_SCHED
422 to->tc_index = from->tc_index;
423#endif
424 nf_copy(to, from);
425#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
426 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
427 to->nf_trace = from->nf_trace;
428#endif
429#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
430 to->ipvs_property = from->ipvs_property;
431#endif
432 skb_copy_secmark(to, from);
433}
434
435
436
437
438
439
440
441
442int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
443{
444 struct iphdr *iph;
445 int ptr;
446 struct net_device *dev;
447 struct sk_buff *skb2;
448 unsigned int mtu, hlen, left, len, ll_rs;
449 int offset;
450 __be16 not_last_frag;
451 struct rtable *rt = skb_rtable(skb);
452 int err = 0;
453
454 dev = rt->dst.dev;
455
456
457
458
459
460 iph = ip_hdr(skb);
461
462 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
463 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
464 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
465 htonl(ip_skb_dst_mtu(skb)));
466 kfree_skb(skb);
467 return -EMSGSIZE;
468 }
469
470
471
472
473
474 hlen = iph->ihl * 4;
475 mtu = dst_mtu(&rt->dst) - hlen;
476#ifdef CONFIG_BRIDGE_NETFILTER
477 if (skb->nf_bridge)
478 mtu -= nf_bridge_mtu_reduction(skb);
479#endif
480 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
481
482
483
484
485
486
487
488
489 if (skb_has_frag_list(skb)) {
490 struct sk_buff *frag, *frag2;
491 int first_len = skb_pagelen(skb);
492
493 if (first_len - hlen > mtu ||
494 ((first_len - hlen) & 7) ||
495 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
496 skb_cloned(skb))
497 goto slow_path;
498
499 skb_walk_frags(skb, frag) {
500
501 if (frag->len > mtu ||
502 ((frag->len & 7) && frag->next) ||
503 skb_headroom(frag) < hlen)
504 goto slow_path_clean;
505
506
507 if (skb_shared(frag))
508 goto slow_path_clean;
509
510 BUG_ON(frag->sk);
511 if (skb->sk) {
512 frag->sk = skb->sk;
513 frag->destructor = sock_wfree;
514 }
515 skb->truesize -= frag->truesize;
516 }
517
518
519
520 err = 0;
521 offset = 0;
522 frag = skb_shinfo(skb)->frag_list;
523 skb_frag_list_init(skb);
524 skb->data_len = first_len - skb_headlen(skb);
525 skb->len = first_len;
526 iph->tot_len = htons(first_len);
527 iph->frag_off = htons(IP_MF);
528 ip_send_check(iph);
529
530 for (;;) {
531
532
533 if (frag) {
534 frag->ip_summed = CHECKSUM_NONE;
535 skb_reset_transport_header(frag);
536 __skb_push(frag, hlen);
537 skb_reset_network_header(frag);
538 memcpy(skb_network_header(frag), iph, hlen);
539 iph = ip_hdr(frag);
540 iph->tot_len = htons(frag->len);
541 ip_copy_metadata(frag, skb);
542 if (offset == 0)
543 ip_options_fragment(frag);
544 offset += skb->len - hlen;
545 iph->frag_off = htons(offset>>3);
546 if (frag->next != NULL)
547 iph->frag_off |= htons(IP_MF);
548
549 ip_send_check(iph);
550 }
551
552 err = output(skb);
553
554 if (!err)
555 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
556 if (err || !frag)
557 break;
558
559 skb = frag;
560 frag = skb->next;
561 skb->next = NULL;
562 }
563
564 if (err == 0) {
565 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
566 return 0;
567 }
568
569 while (frag) {
570 skb = frag->next;
571 kfree_skb(frag);
572 frag = skb;
573 }
574 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
575 return err;
576
577slow_path_clean:
578 skb_walk_frags(skb, frag2) {
579 if (frag2 == frag)
580 break;
581 frag2->sk = NULL;
582 frag2->destructor = NULL;
583 skb->truesize += frag2->truesize;
584 }
585 }
586
587slow_path:
588 left = skb->len - hlen;
589 ptr = hlen;
590
591
592
593
594 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
595
596
597
598
599
600 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
601 not_last_frag = iph->frag_off & htons(IP_MF);
602
603
604
605
606
607 while (left > 0) {
608 len = left;
609
610 if (len > mtu)
611 len = mtu;
612
613
614 if (len < left) {
615 len &= ~7;
616 }
617
618
619
620
621 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
622 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
623 err = -ENOMEM;
624 goto fail;
625 }
626
627
628
629
630
631 ip_copy_metadata(skb2, skb);
632 skb_reserve(skb2, ll_rs);
633 skb_put(skb2, len + hlen);
634 skb_reset_network_header(skb2);
635 skb2->transport_header = skb2->network_header + hlen;
636
637
638
639
640
641
642 if (skb->sk)
643 skb_set_owner_w(skb2, skb->sk);
644
645
646
647
648
649 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
650
651
652
653
654 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
655 BUG();
656 left -= len;
657
658
659
660
661 iph = ip_hdr(skb2);
662 iph->frag_off = htons((offset >> 3));
663
664
665
666
667
668
669
670 if (offset == 0)
671 ip_options_fragment(skb);
672
673
674
675
676
677 if (left > 0 || not_last_frag)
678 iph->frag_off |= htons(IP_MF);
679 ptr += len;
680 offset += len;
681
682
683
684
685 iph->tot_len = htons(len + hlen);
686
687 ip_send_check(iph);
688
689 err = output(skb2);
690 if (err)
691 goto fail;
692
693 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
694 }
695 kfree_skb(skb);
696 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
697 return err;
698
699fail:
700 kfree_skb(skb);
701 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
702 return err;
703}
704EXPORT_SYMBOL(ip_fragment);
705
706int
707ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
708{
709 struct iovec *iov = from;
710
711 if (skb->ip_summed == CHECKSUM_PARTIAL) {
712 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
713 return -EFAULT;
714 } else {
715 __wsum csum = 0;
716 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
717 return -EFAULT;
718 skb->csum = csum_block_add(skb->csum, csum, odd);
719 }
720 return 0;
721}
722EXPORT_SYMBOL(ip_generic_getfrag);
723
724static inline __wsum
725csum_page(struct page *page, int offset, int copy)
726{
727 char *kaddr;
728 __wsum csum;
729 kaddr = kmap(page);
730 csum = csum_partial(kaddr + offset, copy, 0);
731 kunmap(page);
732 return csum;
733}
734
735static inline int ip_ufo_append_data(struct sock *sk,
736 int getfrag(void *from, char *to, int offset, int len,
737 int odd, struct sk_buff *skb),
738 void *from, int length, int hh_len, int fragheaderlen,
739 int transhdrlen, int mtu, unsigned int flags)
740{
741 struct sk_buff *skb;
742 int err;
743
744
745
746
747
748 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
749 skb = sock_alloc_send_skb(sk,
750 hh_len + fragheaderlen + transhdrlen + 20,
751 (flags & MSG_DONTWAIT), &err);
752
753 if (skb == NULL)
754 return err;
755
756
757 skb_reserve(skb, hh_len);
758
759
760 skb_put(skb, fragheaderlen + transhdrlen);
761
762
763 skb_reset_network_header(skb);
764
765
766 skb->transport_header = skb->network_header + fragheaderlen;
767
768 skb->ip_summed = CHECKSUM_PARTIAL;
769 skb->csum = 0;
770 sk->sk_sndmsg_off = 0;
771
772
773 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
774 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
775 __skb_queue_tail(&sk->sk_write_queue, skb);
776 }
777
778 return skb_append_datato_frags(sk, skb, getfrag, from,
779 (length - transhdrlen));
780}
781
782
783
784
785
786
787
788
789
790
791
792
793int ip_append_data(struct sock *sk,
794 int getfrag(void *from, char *to, int offset, int len,
795 int odd, struct sk_buff *skb),
796 void *from, int length, int transhdrlen,
797 struct ipcm_cookie *ipc, struct rtable **rtp,
798 unsigned int flags)
799{
800 struct inet_sock *inet = inet_sk(sk);
801 struct sk_buff *skb;
802
803 struct ip_options *opt = NULL;
804 int hh_len;
805 int exthdrlen;
806 int mtu;
807 int copy;
808 int err;
809 int offset = 0;
810 unsigned int maxfraglen, fragheaderlen;
811 int csummode = CHECKSUM_NONE;
812 struct rtable *rt;
813
814 if (flags&MSG_PROBE)
815 return 0;
816
817 if (skb_queue_empty(&sk->sk_write_queue)) {
818
819
820
821 opt = ipc->opt;
822 if (opt) {
823 if (inet->cork.opt == NULL) {
824 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
825 if (unlikely(inet->cork.opt == NULL))
826 return -ENOBUFS;
827 }
828 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
829 inet->cork.flags |= IPCORK_OPT;
830 inet->cork.addr = ipc->addr;
831 }
832 rt = *rtp;
833 if (unlikely(!rt))
834 return -EFAULT;
835
836
837
838 *rtp = NULL;
839 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
840 rt->dst.dev->mtu :
841 dst_mtu(rt->dst.path);
842 inet->cork.dst = &rt->dst;
843 inet->cork.length = 0;
844 sk->sk_sndmsg_page = NULL;
845 sk->sk_sndmsg_off = 0;
846 exthdrlen = rt->dst.header_len;
847 length += exthdrlen;
848 transhdrlen += exthdrlen;
849 } else {
850 rt = (struct rtable *)inet->cork.dst;
851 if (inet->cork.flags & IPCORK_OPT)
852 opt = inet->cork.opt;
853
854 transhdrlen = 0;
855 exthdrlen = 0;
856 mtu = inet->cork.fragsize;
857 }
858 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
859
860 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
861 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
862
863 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
864 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
865 mtu-exthdrlen);
866 return -EMSGSIZE;
867 }
868
869
870
871
872
873 if (transhdrlen &&
874 length + fragheaderlen <= mtu &&
875 rt->dst.dev->features & NETIF_F_V4_CSUM &&
876 !exthdrlen)
877 csummode = CHECKSUM_PARTIAL;
878
879 skb = skb_peek_tail(&sk->sk_write_queue);
880
881 inet->cork.length += length;
882 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
883 (sk->sk_protocol == IPPROTO_UDP) &&
884 (rt->dst.dev->features & NETIF_F_UFO)) {
885 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
886 fragheaderlen, transhdrlen, mtu,
887 flags);
888 if (err)
889 goto error;
890 return 0;
891 }
892
893
894
895
896
897
898
899
900 if (!skb)
901 goto alloc_new_skb;
902
903 while (length > 0) {
904
905 copy = mtu - skb->len;
906 if (copy < length)
907 copy = maxfraglen - skb->len;
908 if (copy <= 0) {
909 char *data;
910 unsigned int datalen;
911 unsigned int fraglen;
912 unsigned int fraggap;
913 unsigned int alloclen;
914 struct sk_buff *skb_prev;
915alloc_new_skb:
916 skb_prev = skb;
917 if (skb_prev)
918 fraggap = skb_prev->len - maxfraglen;
919 else
920 fraggap = 0;
921
922
923
924
925
926 datalen = length + fraggap;
927 if (datalen > mtu - fragheaderlen)
928 datalen = maxfraglen - fragheaderlen;
929 fraglen = datalen + fragheaderlen;
930
931 if ((flags & MSG_MORE) &&
932 !(rt->dst.dev->features&NETIF_F_SG))
933 alloclen = mtu;
934 else
935 alloclen = fraglen;
936
937
938
939
940
941
942 if (datalen == length + fraggap) {
943 alloclen += rt->dst.trailer_len;
944
945 if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
946 datalen -= ALIGN(rt->dst.trailer_len, 8);
947 }
948 if (transhdrlen) {
949 skb = sock_alloc_send_skb(sk,
950 alloclen + hh_len + 15,
951 (flags & MSG_DONTWAIT), &err);
952 } else {
953 skb = NULL;
954 if (atomic_read(&sk->sk_wmem_alloc) <=
955 2 * sk->sk_sndbuf)
956 skb = sock_wmalloc(sk,
957 alloclen + hh_len + 15, 1,
958 sk->sk_allocation);
959 if (unlikely(skb == NULL))
960 err = -ENOBUFS;
961 else
962
963
964 ipc->tx_flags = 0;
965 }
966 if (skb == NULL)
967 goto error;
968
969
970
971
972 skb->ip_summed = csummode;
973 skb->csum = 0;
974 skb_reserve(skb, hh_len);
975 skb_shinfo(skb)->tx_flags = ipc->tx_flags;
976
977
978
979
980 data = skb_put(skb, fraglen);
981 skb_set_network_header(skb, exthdrlen);
982 skb->transport_header = (skb->network_header +
983 fragheaderlen);
984 data += fragheaderlen;
985
986 if (fraggap) {
987 skb->csum = skb_copy_and_csum_bits(
988 skb_prev, maxfraglen,
989 data + transhdrlen, fraggap, 0);
990 skb_prev->csum = csum_sub(skb_prev->csum,
991 skb->csum);
992 data += fraggap;
993 pskb_trim_unique(skb_prev, maxfraglen);
994 }
995
996 copy = datalen - transhdrlen - fraggap;
997 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
998 err = -EFAULT;
999 kfree_skb(skb);
1000 goto error;
1001 }
1002
1003 offset += copy;
1004 length -= datalen - fraggap;
1005 transhdrlen = 0;
1006 exthdrlen = 0;
1007 csummode = CHECKSUM_NONE;
1008
1009
1010
1011
1012 __skb_queue_tail(&sk->sk_write_queue, skb);
1013 continue;
1014 }
1015
1016 if (copy > length)
1017 copy = length;
1018
1019 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1020 unsigned int off;
1021
1022 off = skb->len;
1023 if (getfrag(from, skb_put(skb, copy),
1024 offset, copy, off, skb) < 0) {
1025 __skb_trim(skb, off);
1026 err = -EFAULT;
1027 goto error;
1028 }
1029 } else {
1030 int i = skb_shinfo(skb)->nr_frags;
1031 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1032 struct page *page = sk->sk_sndmsg_page;
1033 int off = sk->sk_sndmsg_off;
1034 unsigned int left;
1035
1036 if (page && (left = PAGE_SIZE - off) > 0) {
1037 if (copy >= left)
1038 copy = left;
1039 if (page != frag->page) {
1040 if (i == MAX_SKB_FRAGS) {
1041 err = -EMSGSIZE;
1042 goto error;
1043 }
1044 get_page(page);
1045 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1046 frag = &skb_shinfo(skb)->frags[i];
1047 }
1048 } else if (i < MAX_SKB_FRAGS) {
1049 if (copy > PAGE_SIZE)
1050 copy = PAGE_SIZE;
1051 page = alloc_pages(sk->sk_allocation, 0);
1052 if (page == NULL) {
1053 err = -ENOMEM;
1054 goto error;
1055 }
1056 sk->sk_sndmsg_page = page;
1057 sk->sk_sndmsg_off = 0;
1058
1059 skb_fill_page_desc(skb, i, page, 0, 0);
1060 frag = &skb_shinfo(skb)->frags[i];
1061 } else {
1062 err = -EMSGSIZE;
1063 goto error;
1064 }
1065 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1066 err = -EFAULT;
1067 goto error;
1068 }
1069 sk->sk_sndmsg_off += copy;
1070 frag->size += copy;
1071 skb->len += copy;
1072 skb->data_len += copy;
1073 skb->truesize += copy;
1074 atomic_add(copy, &sk->sk_wmem_alloc);
1075 }
1076 offset += copy;
1077 length -= copy;
1078 }
1079
1080 return 0;
1081
1082error:
1083 inet->cork.length -= length;
1084 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1085 return err;
1086}
1087
1088ssize_t ip_append_page(struct sock *sk, struct page *page,
1089 int offset, size_t size, int flags)
1090{
1091 struct inet_sock *inet = inet_sk(sk);
1092 struct sk_buff *skb;
1093 struct rtable *rt;
1094 struct ip_options *opt = NULL;
1095 int hh_len;
1096 int mtu;
1097 int len;
1098 int err;
1099 unsigned int maxfraglen, fragheaderlen, fraggap;
1100
1101 if (inet->hdrincl)
1102 return -EPERM;
1103
1104 if (flags&MSG_PROBE)
1105 return 0;
1106
1107 if (skb_queue_empty(&sk->sk_write_queue))
1108 return -EINVAL;
1109
1110 rt = (struct rtable *)inet->cork.dst;
1111 if (inet->cork.flags & IPCORK_OPT)
1112 opt = inet->cork.opt;
1113
1114 if (!(rt->dst.dev->features&NETIF_F_SG))
1115 return -EOPNOTSUPP;
1116
1117 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1118 mtu = inet->cork.fragsize;
1119
1120 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1121 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1122
1123 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1124 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
1125 return -EMSGSIZE;
1126 }
1127
1128 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1129 return -EINVAL;
1130
1131 inet->cork.length += size;
1132 if ((size + skb->len > mtu) &&
1133 (sk->sk_protocol == IPPROTO_UDP) &&
1134 (rt->dst.dev->features & NETIF_F_UFO)) {
1135 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1136 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1137 }
1138
1139
1140 while (size > 0) {
1141 int i;
1142
1143 if (skb_is_gso(skb))
1144 len = size;
1145 else {
1146
1147
1148 len = mtu - skb->len;
1149 if (len < size)
1150 len = maxfraglen - skb->len;
1151 }
1152 if (len <= 0) {
1153 struct sk_buff *skb_prev;
1154 int alloclen;
1155
1156 skb_prev = skb;
1157 fraggap = skb_prev->len - maxfraglen;
1158
1159 alloclen = fragheaderlen + hh_len + fraggap + 15;
1160 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1161 if (unlikely(!skb)) {
1162 err = -ENOBUFS;
1163 goto error;
1164 }
1165
1166
1167
1168
1169 skb->ip_summed = CHECKSUM_NONE;
1170 skb->csum = 0;
1171 skb_reserve(skb, hh_len);
1172
1173
1174
1175
1176 skb_put(skb, fragheaderlen + fraggap);
1177 skb_reset_network_header(skb);
1178 skb->transport_header = (skb->network_header +
1179 fragheaderlen);
1180 if (fraggap) {
1181 skb->csum = skb_copy_and_csum_bits(skb_prev,
1182 maxfraglen,
1183 skb_transport_header(skb),
1184 fraggap, 0);
1185 skb_prev->csum = csum_sub(skb_prev->csum,
1186 skb->csum);
1187 pskb_trim_unique(skb_prev, maxfraglen);
1188 }
1189
1190
1191
1192
1193 __skb_queue_tail(&sk->sk_write_queue, skb);
1194 continue;
1195 }
1196
1197 i = skb_shinfo(skb)->nr_frags;
1198 if (len > size)
1199 len = size;
1200 if (skb_can_coalesce(skb, i, page, offset)) {
1201 skb_shinfo(skb)->frags[i-1].size += len;
1202 } else if (i < MAX_SKB_FRAGS) {
1203 get_page(page);
1204 skb_fill_page_desc(skb, i, page, offset, len);
1205 } else {
1206 err = -EMSGSIZE;
1207 goto error;
1208 }
1209
1210 if (skb->ip_summed == CHECKSUM_NONE) {
1211 __wsum csum;
1212 csum = csum_page(page, offset, len);
1213 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1214 }
1215
1216 skb->len += len;
1217 skb->data_len += len;
1218 skb->truesize += len;
1219 atomic_add(len, &sk->sk_wmem_alloc);
1220 offset += len;
1221 size -= len;
1222 }
1223 return 0;
1224
1225error:
1226 inet->cork.length -= size;
1227 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1228 return err;
1229}
1230
1231static void ip_cork_release(struct inet_sock *inet)
1232{
1233 inet->cork.flags &= ~IPCORK_OPT;
1234 kfree(inet->cork.opt);
1235 inet->cork.opt = NULL;
1236 dst_release(inet->cork.dst);
1237 inet->cork.dst = NULL;
1238}
1239
1240
1241
1242
1243
1244int ip_push_pending_frames(struct sock *sk)
1245{
1246 struct sk_buff *skb, *tmp_skb;
1247 struct sk_buff **tail_skb;
1248 struct inet_sock *inet = inet_sk(sk);
1249 struct net *net = sock_net(sk);
1250 struct ip_options *opt = NULL;
1251 struct rtable *rt = (struct rtable *)inet->cork.dst;
1252 struct iphdr *iph;
1253 __be16 df = 0;
1254 __u8 ttl;
1255 int err = 0;
1256
1257 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1258 goto out;
1259 tail_skb = &(skb_shinfo(skb)->frag_list);
1260
1261
1262 if (skb->data < skb_network_header(skb))
1263 __skb_pull(skb, skb_network_offset(skb));
1264 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1265 __skb_pull(tmp_skb, skb_network_header_len(skb));
1266 *tail_skb = tmp_skb;
1267 tail_skb = &(tmp_skb->next);
1268 skb->len += tmp_skb->len;
1269 skb->data_len += tmp_skb->len;
1270 skb->truesize += tmp_skb->truesize;
1271 tmp_skb->destructor = NULL;
1272 tmp_skb->sk = NULL;
1273 }
1274
1275
1276
1277
1278
1279 if (inet->pmtudisc < IP_PMTUDISC_DO)
1280 skb->local_df = 1;
1281
1282
1283
1284
1285 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1286 (skb->len <= dst_mtu(&rt->dst) &&
1287 ip_dont_fragment(sk, &rt->dst)))
1288 df = htons(IP_DF);
1289
1290 if (inet->cork.flags & IPCORK_OPT)
1291 opt = inet->cork.opt;
1292
1293 if (rt->rt_type == RTN_MULTICAST)
1294 ttl = inet->mc_ttl;
1295 else
1296 ttl = ip_select_ttl(inet, &rt->dst);
1297
1298 iph = (struct iphdr *)skb->data;
1299 iph->version = 4;
1300 iph->ihl = 5;
1301 if (opt) {
1302 iph->ihl += opt->optlen>>2;
1303 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1304 }
1305 iph->tos = inet->tos;
1306 iph->frag_off = df;
1307 ip_select_ident(iph, &rt->dst, sk);
1308 iph->ttl = ttl;
1309 iph->protocol = sk->sk_protocol;
1310 iph->saddr = rt->rt_src;
1311 iph->daddr = rt->rt_dst;
1312
1313 skb->priority = sk->sk_priority;
1314 skb->mark = sk->sk_mark;
1315
1316
1317
1318
1319 inet->cork.dst = NULL;
1320 skb_dst_set(skb, &rt->dst);
1321
1322 if (iph->protocol == IPPROTO_ICMP)
1323 icmp_out_count(net, ((struct icmphdr *)
1324 skb_transport_header(skb))->type);
1325
1326
1327 err = ip_local_out(skb);
1328 if (err) {
1329 if (err > 0)
1330 err = net_xmit_errno(err);
1331 if (err)
1332 goto error;
1333 }
1334
1335out:
1336 ip_cork_release(inet);
1337 return err;
1338
1339error:
1340 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1341 goto out;
1342}
1343
1344
1345
1346
1347void ip_flush_pending_frames(struct sock *sk)
1348{
1349 struct sk_buff *skb;
1350
1351 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1352 kfree_skb(skb);
1353
1354 ip_cork_release(inet_sk(sk));
1355}
1356
1357
1358
1359
1360
1361static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1362 int len, int odd, struct sk_buff *skb)
1363{
1364 __wsum csum;
1365
1366 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1367 skb->csum = csum_block_add(skb->csum, csum, odd);
1368 return 0;
1369}
1370
1371
1372
1373
1374
1375
1376
1377
1378void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1379 unsigned int len)
1380{
1381 struct inet_sock *inet = inet_sk(sk);
1382 struct {
1383 struct ip_options opt;
1384 char data[40];
1385 } replyopts;
1386 struct ipcm_cookie ipc;
1387 __be32 daddr;
1388 struct rtable *rt = skb_rtable(skb);
1389
1390 if (ip_options_echo(&replyopts.opt, skb))
1391 return;
1392
1393 daddr = ipc.addr = rt->rt_src;
1394 ipc.opt = NULL;
1395 ipc.tx_flags = 0;
1396
1397 if (replyopts.opt.optlen) {
1398 ipc.opt = &replyopts.opt;
1399
1400 if (ipc.opt->srr)
1401 daddr = replyopts.opt.faddr;
1402 }
1403
1404 {
1405 struct flowi fl = { .oif = arg->bound_dev_if,
1406 .fl4_dst = daddr,
1407 .fl4_src = rt->rt_spec_dst,
1408 .fl4_tos = RT_TOS(ip_hdr(skb)->tos),
1409 .fl_ip_sport = tcp_hdr(skb)->dest,
1410 .fl_ip_dport = tcp_hdr(skb)->source,
1411 .proto = sk->sk_protocol,
1412 .flags = ip_reply_arg_flowi_flags(arg) };
1413 security_skb_classify_flow(skb, &fl);
1414 if (ip_route_output_key(sock_net(sk), &rt, &fl))
1415 return;
1416 }
1417
1418
1419
1420
1421
1422
1423
1424 bh_lock_sock(sk);
1425 inet->tos = ip_hdr(skb)->tos;
1426 sk->sk_priority = skb->priority;
1427 sk->sk_protocol = ip_hdr(skb)->protocol;
1428 sk->sk_bound_dev_if = arg->bound_dev_if;
1429 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1430 &ipc, &rt, MSG_DONTWAIT);
1431 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1432 if (arg->csumoffset >= 0)
1433 *((__sum16 *)skb_transport_header(skb) +
1434 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1435 arg->csum));
1436 skb->ip_summed = CHECKSUM_NONE;
1437 ip_push_pending_frames(sk);
1438 }
1439
1440 bh_unlock_sock(sk);
1441
1442 ip_rt_put(rt);
1443}
1444
1445void __init ip_init(void)
1446{
1447 ip_rt_init();
1448 inet_initpeers();
1449
1450#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1451 igmp_mc_proc_init();
1452#endif
1453}
1454