1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <asm/uaccess.h>
46#include <asm/system.h>
47#include <linux/module.h>
48#include <linux/types.h>
49#include <linux/kernel.h>
50#include <linux/mm.h>
51#include <linux/string.h>
52#include <linux/errno.h>
53#include <linux/highmem.h>
54
55#include <linux/socket.h>
56#include <linux/sockios.h>
57#include <linux/in.h>
58#include <linux/inet.h>
59#include <linux/netdevice.h>
60#include <linux/etherdevice.h>
61#include <linux/proc_fs.h>
62#include <linux/stat.h>
63#include <linux/init.h>
64
65#include <net/snmp.h>
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <net/route.h>
69#include <net/xfrm.h>
70#include <linux/skbuff.h>
71#include <net/sock.h>
72#include <net/arp.h>
73#include <net/icmp.h>
74#include <net/checksum.h>
75#include <net/inetpeer.h>
76#include <linux/igmp.h>
77#include <linux/netfilter_ipv4.h>
78#include <linux/netfilter_bridge.h>
79#include <linux/mroute.h>
80#include <linux/netlink.h>
81#include <linux/tcp.h>
82
83int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
84
85
86__inline__ void ip_send_check(struct iphdr *iph)
87{
88 iph->check = 0;
89 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
90}
91
92int __ip_local_out(struct sk_buff *skb)
93{
94 struct iphdr *iph = ip_hdr(skb);
95
96 iph->tot_len = htons(skb->len);
97 ip_send_check(iph);
98 return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
99 dst_output);
100}
101
102int ip_local_out(struct sk_buff *skb)
103{
104 int err;
105
106 err = __ip_local_out(skb);
107 if (likely(err == 1))
108 err = dst_output(skb);
109
110 return err;
111}
112EXPORT_SYMBOL_GPL(ip_local_out);
113
114
115static int ip_dev_loopback_xmit(struct sk_buff *newskb)
116{
117 skb_reset_mac_header(newskb);
118 __skb_pull(newskb, skb_network_offset(newskb));
119 newskb->pkt_type = PACKET_LOOPBACK;
120 newskb->ip_summed = CHECKSUM_UNNECESSARY;
121 WARN_ON(!skb_dst(newskb));
122 netif_rx(newskb);
123 return 0;
124}
125
126static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
127{
128 int ttl = inet->uc_ttl;
129
130 if (ttl < 0)
131 ttl = dst_metric(dst, RTAX_HOPLIMIT);
132 return ttl;
133}
134
135
136
137
138
139int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
140 __be32 saddr, __be32 daddr, struct ip_options *opt)
141{
142 struct inet_sock *inet = inet_sk(sk);
143 struct rtable *rt = skb_rtable(skb);
144 struct iphdr *iph;
145
146
147 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
148 skb_reset_network_header(skb);
149 iph = ip_hdr(skb);
150 iph->version = 4;
151 iph->ihl = 5;
152 iph->tos = inet->tos;
153 if (ip_dont_fragment(sk, &rt->u.dst))
154 iph->frag_off = htons(IP_DF);
155 else
156 iph->frag_off = 0;
157 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
158 iph->daddr = rt->rt_dst;
159 iph->saddr = rt->rt_src;
160 iph->protocol = sk->sk_protocol;
161 ip_select_ident(iph, &rt->u.dst, sk);
162
163 if (opt && opt->optlen) {
164 iph->ihl += opt->optlen>>2;
165 ip_options_build(skb, opt, daddr, rt, 0);
166 }
167
168 skb->priority = sk->sk_priority;
169 skb->mark = sk->sk_mark;
170
171
172 return ip_local_out(skb);
173}
174
175EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
176
177static inline int ip_finish_output2(struct sk_buff *skb)
178{
179 struct dst_entry *dst = skb_dst(skb);
180 struct rtable *rt = (struct rtable *)dst;
181 struct net_device *dev = dst->dev;
182 unsigned int hh_len = LL_RESERVED_SPACE(dev);
183
184 if (rt->rt_type == RTN_MULTICAST) {
185 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
186 } else if (rt->rt_type == RTN_BROADCAST)
187 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
188
189
190 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
191 struct sk_buff *skb2;
192
193 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
194 if (skb2 == NULL) {
195 kfree_skb(skb);
196 return -ENOMEM;
197 }
198 if (skb->sk)
199 skb_set_owner_w(skb2, skb->sk);
200 kfree_skb(skb);
201 skb = skb2;
202 }
203
204 if (dst->hh)
205 return neigh_hh_output(dst->hh, skb);
206 else if (dst->neighbour)
207 return dst->neighbour->output(skb);
208
209 if (net_ratelimit())
210 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
211 kfree_skb(skb);
212 return -EINVAL;
213}
214
215static inline int ip_skb_dst_mtu(struct sk_buff *skb)
216{
217 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
218
219 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
220 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
221}
222
223static int ip_finish_output(struct sk_buff *skb)
224{
225#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
226
227 if (skb_dst(skb)->xfrm != NULL) {
228 IPCB(skb)->flags |= IPSKB_REROUTED;
229 return dst_output(skb);
230 }
231#endif
232 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
233 return ip_fragment(skb, ip_finish_output2);
234 else
235 return ip_finish_output2(skb);
236}
237
238int ip_mc_output(struct sk_buff *skb)
239{
240 struct sock *sk = skb->sk;
241 struct rtable *rt = skb_rtable(skb);
242 struct net_device *dev = rt->u.dst.dev;
243
244
245
246
247 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
248
249 skb->dev = dev;
250 skb->protocol = htons(ETH_P_IP);
251
252
253
254
255
256 if (rt->rt_flags&RTCF_MULTICAST) {
257 if ((!sk || inet_sk(sk)->mc_loop)
258#ifdef CONFIG_IP_MROUTE
259
260
261
262
263
264
265
266
267 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
268#endif
269 ) {
270 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
271 if (newskb)
272 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
273 NULL, newskb->dev,
274 ip_dev_loopback_xmit);
275 }
276
277
278
279 if (ip_hdr(skb)->ttl == 0) {
280 kfree_skb(skb);
281 return 0;
282 }
283 }
284
285 if (rt->rt_flags&RTCF_BROADCAST) {
286 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
287 if (newskb)
288 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
289 newskb->dev, ip_dev_loopback_xmit);
290 }
291
292 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
293 ip_finish_output,
294 !(IPCB(skb)->flags & IPSKB_REROUTED));
295}
296
297int ip_output(struct sk_buff *skb)
298{
299 struct net_device *dev = skb_dst(skb)->dev;
300
301 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
302
303 skb->dev = dev;
304 skb->protocol = htons(ETH_P_IP);
305
306 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
307 ip_finish_output,
308 !(IPCB(skb)->flags & IPSKB_REROUTED));
309}
310
311int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
312{
313 struct sock *sk = skb->sk;
314 struct inet_sock *inet = inet_sk(sk);
315 struct ip_options *opt = inet->opt;
316 struct rtable *rt;
317 struct iphdr *iph;
318
319
320
321
322 rt = skb_rtable(skb);
323 if (rt != NULL)
324 goto packet_routed;
325
326
327 rt = (struct rtable *)__sk_dst_check(sk, 0);
328 if (rt == NULL) {
329 __be32 daddr;
330
331
332 daddr = inet->daddr;
333 if(opt && opt->srr)
334 daddr = opt->faddr;
335
336 {
337 struct flowi fl = { .oif = sk->sk_bound_dev_if,
338 .mark = sk->sk_mark,
339 .nl_u = { .ip4_u =
340 { .daddr = daddr,
341 .saddr = inet->saddr,
342 .tos = RT_CONN_FLAGS(sk) } },
343 .proto = sk->sk_protocol,
344 .flags = inet_sk_flowi_flags(sk),
345 .uli_u = { .ports =
346 { .sport = inet->sport,
347 .dport = inet->dport } } };
348
349
350
351
352
353 security_sk_classify_flow(sk, &fl);
354 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
355 goto no_route;
356 }
357 sk_setup_caps(sk, &rt->u.dst);
358 }
359 skb_dst_set(skb, dst_clone(&rt->u.dst));
360
361packet_routed:
362 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
363 goto no_route;
364
365
366 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
367 skb_reset_network_header(skb);
368 iph = ip_hdr(skb);
369 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
370 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
371 iph->frag_off = htons(IP_DF);
372 else
373 iph->frag_off = 0;
374 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
375 iph->protocol = sk->sk_protocol;
376 iph->saddr = rt->rt_src;
377 iph->daddr = rt->rt_dst;
378
379
380 if (opt && opt->optlen) {
381 iph->ihl += opt->optlen >> 2;
382 ip_options_build(skb, opt, inet->daddr, rt, 0);
383 }
384
385 ip_select_ident_more(iph, &rt->u.dst, sk,
386 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
387
388 skb->priority = sk->sk_priority;
389 skb->mark = sk->sk_mark;
390
391 return ip_local_out(skb);
392
393no_route:
394 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
395 kfree_skb(skb);
396 return -EHOSTUNREACH;
397}
398
399
400static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
401{
402 to->pkt_type = from->pkt_type;
403 to->priority = from->priority;
404 to->protocol = from->protocol;
405 skb_dst_drop(to);
406 skb_dst_set(to, dst_clone(skb_dst(from)));
407 to->dev = from->dev;
408 to->mark = from->mark;
409
410
411 IPCB(to)->flags = IPCB(from)->flags;
412
413#ifdef CONFIG_NET_SCHED
414 to->tc_index = from->tc_index;
415#endif
416 nf_copy(to, from);
417#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
418 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
419 to->nf_trace = from->nf_trace;
420#endif
421#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
422 to->ipvs_property = from->ipvs_property;
423#endif
424 skb_copy_secmark(to, from);
425}
426
427
428
429
430
431
432
433
434int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
435{
436 struct iphdr *iph;
437 int raw = 0;
438 int ptr;
439 struct net_device *dev;
440 struct sk_buff *skb2;
441 unsigned int mtu, hlen, left, len, ll_rs, pad;
442 int offset;
443 __be16 not_last_frag;
444 struct rtable *rt = skb_rtable(skb);
445 int err = 0;
446
447 dev = rt->u.dst.dev;
448
449
450
451
452
453 iph = ip_hdr(skb);
454
455 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
456 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
457 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
458 htonl(ip_skb_dst_mtu(skb)));
459 kfree_skb(skb);
460 return -EMSGSIZE;
461 }
462
463
464
465
466
467 hlen = iph->ihl * 4;
468 mtu = dst_mtu(&rt->u.dst) - hlen;
469 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
470
471
472
473
474
475
476
477
478 if (skb_has_frags(skb)) {
479 struct sk_buff *frag;
480 int first_len = skb_pagelen(skb);
481 int truesizes = 0;
482
483 if (first_len - hlen > mtu ||
484 ((first_len - hlen) & 7) ||
485 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
486 skb_cloned(skb))
487 goto slow_path;
488
489 skb_walk_frags(skb, frag) {
490
491 if (frag->len > mtu ||
492 ((frag->len & 7) && frag->next) ||
493 skb_headroom(frag) < hlen)
494 goto slow_path;
495
496
497 if (skb_shared(frag))
498 goto slow_path;
499
500 BUG_ON(frag->sk);
501 if (skb->sk) {
502 frag->sk = skb->sk;
503 frag->destructor = sock_wfree;
504 truesizes += frag->truesize;
505 }
506 }
507
508
509
510 err = 0;
511 offset = 0;
512 frag = skb_shinfo(skb)->frag_list;
513 skb_frag_list_init(skb);
514 skb->data_len = first_len - skb_headlen(skb);
515 skb->truesize -= truesizes;
516 skb->len = first_len;
517 iph->tot_len = htons(first_len);
518 iph->frag_off = htons(IP_MF);
519 ip_send_check(iph);
520
521 for (;;) {
522
523
524 if (frag) {
525 frag->ip_summed = CHECKSUM_NONE;
526 skb_reset_transport_header(frag);
527 __skb_push(frag, hlen);
528 skb_reset_network_header(frag);
529 memcpy(skb_network_header(frag), iph, hlen);
530 iph = ip_hdr(frag);
531 iph->tot_len = htons(frag->len);
532 ip_copy_metadata(frag, skb);
533 if (offset == 0)
534 ip_options_fragment(frag);
535 offset += skb->len - hlen;
536 iph->frag_off = htons(offset>>3);
537 if (frag->next != NULL)
538 iph->frag_off |= htons(IP_MF);
539
540 ip_send_check(iph);
541 }
542
543 err = output(skb);
544
545 if (!err)
546 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
547 if (err || !frag)
548 break;
549
550 skb = frag;
551 frag = skb->next;
552 skb->next = NULL;
553 }
554
555 if (err == 0) {
556 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
557 return 0;
558 }
559
560 while (frag) {
561 skb = frag->next;
562 kfree_skb(frag);
563 frag = skb;
564 }
565 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
566 return err;
567 }
568
569slow_path:
570 left = skb->len - hlen;
571 ptr = raw + hlen;
572
573
574
575
576 pad = nf_bridge_pad(skb);
577 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
578 mtu -= pad;
579
580
581
582
583
584 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
585 not_last_frag = iph->frag_off & htons(IP_MF);
586
587
588
589
590
591 while (left > 0) {
592 len = left;
593
594 if (len > mtu)
595 len = mtu;
596
597
598 if (len < left) {
599 len &= ~7;
600 }
601
602
603
604
605 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
606 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
607 err = -ENOMEM;
608 goto fail;
609 }
610
611
612
613
614
615 ip_copy_metadata(skb2, skb);
616 skb_reserve(skb2, ll_rs);
617 skb_put(skb2, len + hlen);
618 skb_reset_network_header(skb2);
619 skb2->transport_header = skb2->network_header + hlen;
620
621
622
623
624
625
626 if (skb->sk)
627 skb_set_owner_w(skb2, skb->sk);
628
629
630
631
632
633 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
634
635
636
637
638 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
639 BUG();
640 left -= len;
641
642
643
644
645 iph = ip_hdr(skb2);
646 iph->frag_off = htons((offset >> 3));
647
648
649
650
651
652
653
654 if (offset == 0)
655 ip_options_fragment(skb);
656
657
658
659
660
661 if (left > 0 || not_last_frag)
662 iph->frag_off |= htons(IP_MF);
663 ptr += len;
664 offset += len;
665
666
667
668
669 iph->tot_len = htons(len + hlen);
670
671 ip_send_check(iph);
672
673 err = output(skb2);
674 if (err)
675 goto fail;
676
677 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
678 }
679 kfree_skb(skb);
680 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
681 return err;
682
683fail:
684 kfree_skb(skb);
685 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
686 return err;
687}
688
689EXPORT_SYMBOL(ip_fragment);
690
691int
692ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
693{
694 struct iovec *iov = from;
695
696 if (skb->ip_summed == CHECKSUM_PARTIAL) {
697 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
698 return -EFAULT;
699 } else {
700 __wsum csum = 0;
701 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
702 return -EFAULT;
703 skb->csum = csum_block_add(skb->csum, csum, odd);
704 }
705 return 0;
706}
707
708static inline __wsum
709csum_page(struct page *page, int offset, int copy)
710{
711 char *kaddr;
712 __wsum csum;
713 kaddr = kmap(page);
714 csum = csum_partial(kaddr + offset, copy, 0);
715 kunmap(page);
716 return csum;
717}
718
719static inline int ip_ufo_append_data(struct sock *sk,
720 int getfrag(void *from, char *to, int offset, int len,
721 int odd, struct sk_buff *skb),
722 void *from, int length, int hh_len, int fragheaderlen,
723 int transhdrlen, int mtu, unsigned int flags)
724{
725 struct sk_buff *skb;
726 int err;
727
728
729
730
731
732 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
733 skb = sock_alloc_send_skb(sk,
734 hh_len + fragheaderlen + transhdrlen + 20,
735 (flags & MSG_DONTWAIT), &err);
736
737 if (skb == NULL)
738 return err;
739
740
741 skb_reserve(skb, hh_len);
742
743
744 skb_put(skb, fragheaderlen + transhdrlen);
745
746
747 skb_reset_network_header(skb);
748
749
750 skb->transport_header = skb->network_header + fragheaderlen;
751
752 skb->ip_summed = CHECKSUM_PARTIAL;
753 skb->csum = 0;
754 sk->sk_sndmsg_off = 0;
755
756
757 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
758 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
759 __skb_queue_tail(&sk->sk_write_queue, skb);
760 }
761
762 return skb_append_datato_frags(sk, skb, getfrag, from,
763 (length - transhdrlen));
764}
765
766
767
768
769
770
771
772
773
774
775
776
777int ip_append_data(struct sock *sk,
778 int getfrag(void *from, char *to, int offset, int len,
779 int odd, struct sk_buff *skb),
780 void *from, int length, int transhdrlen,
781 struct ipcm_cookie *ipc, struct rtable **rtp,
782 unsigned int flags)
783{
784 struct inet_sock *inet = inet_sk(sk);
785 struct sk_buff *skb;
786
787 struct ip_options *opt = NULL;
788 int hh_len;
789 int exthdrlen;
790 int mtu;
791 int copy;
792 int err;
793 int offset = 0;
794 unsigned int maxfraglen, fragheaderlen;
795 int csummode = CHECKSUM_NONE;
796 struct rtable *rt;
797
798 if (flags&MSG_PROBE)
799 return 0;
800
801 if (skb_queue_empty(&sk->sk_write_queue)) {
802
803
804
805 opt = ipc->opt;
806 if (opt) {
807 if (inet->cork.opt == NULL) {
808 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
809 if (unlikely(inet->cork.opt == NULL))
810 return -ENOBUFS;
811 }
812 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
813 inet->cork.flags |= IPCORK_OPT;
814 inet->cork.addr = ipc->addr;
815 }
816 rt = *rtp;
817 if (unlikely(!rt))
818 return -EFAULT;
819
820
821
822 *rtp = NULL;
823 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
824 rt->u.dst.dev->mtu :
825 dst_mtu(rt->u.dst.path);
826 inet->cork.dst = &rt->u.dst;
827 inet->cork.length = 0;
828 sk->sk_sndmsg_page = NULL;
829 sk->sk_sndmsg_off = 0;
830 if ((exthdrlen = rt->u.dst.header_len) != 0) {
831 length += exthdrlen;
832 transhdrlen += exthdrlen;
833 }
834 } else {
835 rt = (struct rtable *)inet->cork.dst;
836 if (inet->cork.flags & IPCORK_OPT)
837 opt = inet->cork.opt;
838
839 transhdrlen = 0;
840 exthdrlen = 0;
841 mtu = inet->cork.fragsize;
842 }
843 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
844
845 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
846 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
847
848 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
849 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
850 return -EMSGSIZE;
851 }
852
853
854
855
856
857 if (transhdrlen &&
858 length + fragheaderlen <= mtu &&
859 rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
860 !exthdrlen)
861 csummode = CHECKSUM_PARTIAL;
862
863 inet->cork.length += length;
864 if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
865 (sk->sk_protocol == IPPROTO_UDP) &&
866 (rt->u.dst.dev->features & NETIF_F_UFO)) {
867 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
868 fragheaderlen, transhdrlen, mtu,
869 flags);
870 if (err)
871 goto error;
872 return 0;
873 }
874
875
876
877
878
879
880
881
882 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
883 goto alloc_new_skb;
884
885 while (length > 0) {
886
887 copy = mtu - skb->len;
888 if (copy < length)
889 copy = maxfraglen - skb->len;
890 if (copy <= 0) {
891 char *data;
892 unsigned int datalen;
893 unsigned int fraglen;
894 unsigned int fraggap;
895 unsigned int alloclen;
896 struct sk_buff *skb_prev;
897alloc_new_skb:
898 skb_prev = skb;
899 if (skb_prev)
900 fraggap = skb_prev->len - maxfraglen;
901 else
902 fraggap = 0;
903
904
905
906
907
908 datalen = length + fraggap;
909 if (datalen > mtu - fragheaderlen)
910 datalen = maxfraglen - fragheaderlen;
911 fraglen = datalen + fragheaderlen;
912
913 if ((flags & MSG_MORE) &&
914 !(rt->u.dst.dev->features&NETIF_F_SG))
915 alloclen = mtu;
916 else
917 alloclen = datalen + fragheaderlen;
918
919
920
921
922
923
924 if (datalen == length + fraggap)
925 alloclen += rt->u.dst.trailer_len;
926
927 if (transhdrlen) {
928 skb = sock_alloc_send_skb(sk,
929 alloclen + hh_len + 15,
930 (flags & MSG_DONTWAIT), &err);
931 } else {
932 skb = NULL;
933 if (atomic_read(&sk->sk_wmem_alloc) <=
934 2 * sk->sk_sndbuf)
935 skb = sock_wmalloc(sk,
936 alloclen + hh_len + 15, 1,
937 sk->sk_allocation);
938 if (unlikely(skb == NULL))
939 err = -ENOBUFS;
940 else
941
942
943 ipc->shtx.flags = 0;
944 }
945 if (skb == NULL)
946 goto error;
947
948
949
950
951 skb->ip_summed = csummode;
952 skb->csum = 0;
953 skb_reserve(skb, hh_len);
954 *skb_tx(skb) = ipc->shtx;
955
956
957
958
959 data = skb_put(skb, fraglen);
960 skb_set_network_header(skb, exthdrlen);
961 skb->transport_header = (skb->network_header +
962 fragheaderlen);
963 data += fragheaderlen;
964
965 if (fraggap) {
966 skb->csum = skb_copy_and_csum_bits(
967 skb_prev, maxfraglen,
968 data + transhdrlen, fraggap, 0);
969 skb_prev->csum = csum_sub(skb_prev->csum,
970 skb->csum);
971 data += fraggap;
972 pskb_trim_unique(skb_prev, maxfraglen);
973 }
974
975 copy = datalen - transhdrlen - fraggap;
976 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
977 err = -EFAULT;
978 kfree_skb(skb);
979 goto error;
980 }
981
982 offset += copy;
983 length -= datalen - fraggap;
984 transhdrlen = 0;
985 exthdrlen = 0;
986 csummode = CHECKSUM_NONE;
987
988
989
990
991 __skb_queue_tail(&sk->sk_write_queue, skb);
992 continue;
993 }
994
995 if (copy > length)
996 copy = length;
997
998 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
999 unsigned int off;
1000
1001 off = skb->len;
1002 if (getfrag(from, skb_put(skb, copy),
1003 offset, copy, off, skb) < 0) {
1004 __skb_trim(skb, off);
1005 err = -EFAULT;
1006 goto error;
1007 }
1008 } else {
1009 int i = skb_shinfo(skb)->nr_frags;
1010 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1011 struct page *page = sk->sk_sndmsg_page;
1012 int off = sk->sk_sndmsg_off;
1013 unsigned int left;
1014
1015 if (page && (left = PAGE_SIZE - off) > 0) {
1016 if (copy >= left)
1017 copy = left;
1018 if (page != frag->page) {
1019 if (i == MAX_SKB_FRAGS) {
1020 err = -EMSGSIZE;
1021 goto error;
1022 }
1023 get_page(page);
1024 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1025 frag = &skb_shinfo(skb)->frags[i];
1026 }
1027 } else if (i < MAX_SKB_FRAGS) {
1028 if (copy > PAGE_SIZE)
1029 copy = PAGE_SIZE;
1030 page = alloc_pages(sk->sk_allocation, 0);
1031 if (page == NULL) {
1032 err = -ENOMEM;
1033 goto error;
1034 }
1035 sk->sk_sndmsg_page = page;
1036 sk->sk_sndmsg_off = 0;
1037
1038 skb_fill_page_desc(skb, i, page, 0, 0);
1039 frag = &skb_shinfo(skb)->frags[i];
1040 } else {
1041 err = -EMSGSIZE;
1042 goto error;
1043 }
1044 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1045 err = -EFAULT;
1046 goto error;
1047 }
1048 sk->sk_sndmsg_off += copy;
1049 frag->size += copy;
1050 skb->len += copy;
1051 skb->data_len += copy;
1052 skb->truesize += copy;
1053 atomic_add(copy, &sk->sk_wmem_alloc);
1054 }
1055 offset += copy;
1056 length -= copy;
1057 }
1058
1059 return 0;
1060
1061error:
1062 inet->cork.length -= length;
1063 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1064 return err;
1065}
1066
1067ssize_t ip_append_page(struct sock *sk, struct page *page,
1068 int offset, size_t size, int flags)
1069{
1070 struct inet_sock *inet = inet_sk(sk);
1071 struct sk_buff *skb;
1072 struct rtable *rt;
1073 struct ip_options *opt = NULL;
1074 int hh_len;
1075 int mtu;
1076 int len;
1077 int err;
1078 unsigned int maxfraglen, fragheaderlen, fraggap;
1079
1080 if (inet->hdrincl)
1081 return -EPERM;
1082
1083 if (flags&MSG_PROBE)
1084 return 0;
1085
1086 if (skb_queue_empty(&sk->sk_write_queue))
1087 return -EINVAL;
1088
1089 rt = (struct rtable *)inet->cork.dst;
1090 if (inet->cork.flags & IPCORK_OPT)
1091 opt = inet->cork.opt;
1092
1093 if (!(rt->u.dst.dev->features&NETIF_F_SG))
1094 return -EOPNOTSUPP;
1095
1096 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1097 mtu = inet->cork.fragsize;
1098
1099 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1100 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1101
1102 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1103 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1104 return -EMSGSIZE;
1105 }
1106
1107 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1108 return -EINVAL;
1109
1110 inet->cork.length += size;
1111 if ((sk->sk_protocol == IPPROTO_UDP) &&
1112 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1113 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1114 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1115 }
1116
1117
1118 while (size > 0) {
1119 int i;
1120
1121 if (skb_is_gso(skb))
1122 len = size;
1123 else {
1124
1125
1126 len = mtu - skb->len;
1127 if (len < size)
1128 len = maxfraglen - skb->len;
1129 }
1130 if (len <= 0) {
1131 struct sk_buff *skb_prev;
1132 int alloclen;
1133
1134 skb_prev = skb;
1135 fraggap = skb_prev->len - maxfraglen;
1136
1137 alloclen = fragheaderlen + hh_len + fraggap + 15;
1138 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1139 if (unlikely(!skb)) {
1140 err = -ENOBUFS;
1141 goto error;
1142 }
1143
1144
1145
1146
1147 skb->ip_summed = CHECKSUM_NONE;
1148 skb->csum = 0;
1149 skb_reserve(skb, hh_len);
1150
1151
1152
1153
1154 skb_put(skb, fragheaderlen + fraggap);
1155 skb_reset_network_header(skb);
1156 skb->transport_header = (skb->network_header +
1157 fragheaderlen);
1158 if (fraggap) {
1159 skb->csum = skb_copy_and_csum_bits(skb_prev,
1160 maxfraglen,
1161 skb_transport_header(skb),
1162 fraggap, 0);
1163 skb_prev->csum = csum_sub(skb_prev->csum,
1164 skb->csum);
1165 pskb_trim_unique(skb_prev, maxfraglen);
1166 }
1167
1168
1169
1170
1171 __skb_queue_tail(&sk->sk_write_queue, skb);
1172 continue;
1173 }
1174
1175 i = skb_shinfo(skb)->nr_frags;
1176 if (len > size)
1177 len = size;
1178 if (skb_can_coalesce(skb, i, page, offset)) {
1179 skb_shinfo(skb)->frags[i-1].size += len;
1180 } else if (i < MAX_SKB_FRAGS) {
1181 get_page(page);
1182 skb_fill_page_desc(skb, i, page, offset, len);
1183 } else {
1184 err = -EMSGSIZE;
1185 goto error;
1186 }
1187
1188 if (skb->ip_summed == CHECKSUM_NONE) {
1189 __wsum csum;
1190 csum = csum_page(page, offset, len);
1191 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1192 }
1193
1194 skb->len += len;
1195 skb->data_len += len;
1196 skb->truesize += len;
1197 atomic_add(len, &sk->sk_wmem_alloc);
1198 offset += len;
1199 size -= len;
1200 }
1201 return 0;
1202
1203error:
1204 inet->cork.length -= size;
1205 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1206 return err;
1207}
1208
1209static void ip_cork_release(struct inet_sock *inet)
1210{
1211 inet->cork.flags &= ~IPCORK_OPT;
1212 kfree(inet->cork.opt);
1213 inet->cork.opt = NULL;
1214 dst_release(inet->cork.dst);
1215 inet->cork.dst = NULL;
1216}
1217
1218
1219
1220
1221
1222int ip_push_pending_frames(struct sock *sk)
1223{
1224 struct sk_buff *skb, *tmp_skb;
1225 struct sk_buff **tail_skb;
1226 struct inet_sock *inet = inet_sk(sk);
1227 struct net *net = sock_net(sk);
1228 struct ip_options *opt = NULL;
1229 struct rtable *rt = (struct rtable *)inet->cork.dst;
1230 struct iphdr *iph;
1231 __be16 df = 0;
1232 __u8 ttl;
1233 int err = 0;
1234
1235 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1236 goto out;
1237 tail_skb = &(skb_shinfo(skb)->frag_list);
1238
1239
1240 if (skb->data < skb_network_header(skb))
1241 __skb_pull(skb, skb_network_offset(skb));
1242 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1243 __skb_pull(tmp_skb, skb_network_header_len(skb));
1244 *tail_skb = tmp_skb;
1245 tail_skb = &(tmp_skb->next);
1246 skb->len += tmp_skb->len;
1247 skb->data_len += tmp_skb->len;
1248 skb->truesize += tmp_skb->truesize;
1249 tmp_skb->destructor = NULL;
1250 tmp_skb->sk = NULL;
1251 }
1252
1253
1254
1255
1256
1257 if (inet->pmtudisc < IP_PMTUDISC_DO)
1258 skb->local_df = 1;
1259
1260
1261
1262
1263 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1264 (skb->len <= dst_mtu(&rt->u.dst) &&
1265 ip_dont_fragment(sk, &rt->u.dst)))
1266 df = htons(IP_DF);
1267
1268 if (inet->cork.flags & IPCORK_OPT)
1269 opt = inet->cork.opt;
1270
1271 if (rt->rt_type == RTN_MULTICAST)
1272 ttl = inet->mc_ttl;
1273 else
1274 ttl = ip_select_ttl(inet, &rt->u.dst);
1275
1276 iph = (struct iphdr *)skb->data;
1277 iph->version = 4;
1278 iph->ihl = 5;
1279 if (opt) {
1280 iph->ihl += opt->optlen>>2;
1281 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1282 }
1283 iph->tos = inet->tos;
1284 iph->frag_off = df;
1285 ip_select_ident(iph, &rt->u.dst, sk);
1286 iph->ttl = ttl;
1287 iph->protocol = sk->sk_protocol;
1288 iph->saddr = rt->rt_src;
1289 iph->daddr = rt->rt_dst;
1290
1291 skb->priority = sk->sk_priority;
1292 skb->mark = sk->sk_mark;
1293
1294
1295
1296
1297 inet->cork.dst = NULL;
1298 skb_dst_set(skb, &rt->u.dst);
1299
1300 if (iph->protocol == IPPROTO_ICMP)
1301 icmp_out_count(net, ((struct icmphdr *)
1302 skb_transport_header(skb))->type);
1303
1304
1305 err = ip_local_out(skb);
1306 if (err) {
1307 if (err > 0)
1308 err = net_xmit_errno(err);
1309 if (err)
1310 goto error;
1311 }
1312
1313out:
1314 ip_cork_release(inet);
1315 return err;
1316
1317error:
1318 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1319 goto out;
1320}
1321
1322
1323
1324
1325void ip_flush_pending_frames(struct sock *sk)
1326{
1327 struct sk_buff *skb;
1328
1329 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1330 kfree_skb(skb);
1331
1332 ip_cork_release(inet_sk(sk));
1333}
1334
1335
1336
1337
1338
1339static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1340 int len, int odd, struct sk_buff *skb)
1341{
1342 __wsum csum;
1343
1344 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1345 skb->csum = csum_block_add(skb->csum, csum, odd);
1346 return 0;
1347}
1348
1349
1350
1351
1352
1353
1354
1355
1356void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1357 unsigned int len)
1358{
1359 struct inet_sock *inet = inet_sk(sk);
1360 struct {
1361 struct ip_options opt;
1362 char data[40];
1363 } replyopts;
1364 struct ipcm_cookie ipc;
1365 __be32 daddr;
1366 struct rtable *rt = skb_rtable(skb);
1367
1368 if (ip_options_echo(&replyopts.opt, skb))
1369 return;
1370
1371 daddr = ipc.addr = rt->rt_src;
1372 ipc.opt = NULL;
1373 ipc.shtx.flags = 0;
1374
1375 if (replyopts.opt.optlen) {
1376 ipc.opt = &replyopts.opt;
1377
1378 if (ipc.opt->srr)
1379 daddr = replyopts.opt.faddr;
1380 }
1381
1382 {
1383 struct flowi fl = { .oif = arg->bound_dev_if,
1384 .nl_u = { .ip4_u =
1385 { .daddr = daddr,
1386 .saddr = rt->rt_spec_dst,
1387 .tos = RT_TOS(ip_hdr(skb)->tos) } },
1388
1389 .uli_u = { .ports =
1390 { .sport = tcp_hdr(skb)->dest,
1391 .dport = tcp_hdr(skb)->source } },
1392 .proto = sk->sk_protocol,
1393 .flags = ip_reply_arg_flowi_flags(arg) };
1394 security_skb_classify_flow(skb, &fl);
1395 if (ip_route_output_key(sock_net(sk), &rt, &fl))
1396 return;
1397 }
1398
1399
1400
1401
1402
1403
1404
1405 bh_lock_sock(sk);
1406 inet->tos = ip_hdr(skb)->tos;
1407 sk->sk_priority = skb->priority;
1408 sk->sk_protocol = ip_hdr(skb)->protocol;
1409 sk->sk_bound_dev_if = arg->bound_dev_if;
1410 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1411 &ipc, &rt, MSG_DONTWAIT);
1412 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1413 if (arg->csumoffset >= 0)
1414 *((__sum16 *)skb_transport_header(skb) +
1415 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1416 arg->csum));
1417 skb->ip_summed = CHECKSUM_NONE;
1418 ip_push_pending_frames(sk);
1419 }
1420
1421 bh_unlock_sock(sk);
1422
1423 ip_rt_put(rt);
1424}
1425
1426void __init ip_init(void)
1427{
1428 ip_rt_init();
1429 inet_initpeers();
1430
1431#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1432 igmp_mc_proc_init();
1433#endif
1434}
1435
1436EXPORT_SYMBOL(ip_generic_getfrag);
1437EXPORT_SYMBOL(ip_queue_xmit);
1438EXPORT_SYMBOL(ip_send_check);
1439