1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57#include <net/ip_tunnels.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 unsigned int hh_len = LL_RESERVED_SPACE(dev);
64 int delta = hh_len - skb_headroom(skb);
65 const struct in6_addr *nexthop;
66 struct neighbour *neigh;
67 int ret;
68
69
70 if (unlikely(delta > 0) && dev->header_ops) {
71
72 if (skb_shared(skb)) {
73 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
74
75 if (likely(nskb)) {
76 if (skb->sk)
77 skb_set_owner_w(nskb, skb->sk);
78 consume_skb(skb);
79 } else {
80 kfree_skb(skb);
81 }
82 skb = nskb;
83 }
84 if (skb &&
85 pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(skb);
87 skb = NULL;
88 }
89 if (!skb) {
90 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
91 return -ENOMEM;
92 }
93 }
94
95 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
99 ((mroute6_is_socket(net, skb) &&
100 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102 &ipv6_hdr(skb)->saddr))) {
103 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105
106
107
108 if (newskb)
109 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110 net, sk, newskb, NULL, newskb->dev,
111 dev_loopback_xmit);
112
113 if (ipv6_hdr(skb)->hop_limit == 0) {
114 IP6_INC_STATS(net, idev,
115 IPSTATS_MIB_OUTDISCARDS);
116 kfree_skb(skb);
117 return 0;
118 }
119 }
120
121 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
122
123 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
124 IPV6_ADDR_SCOPE_NODELOCAL &&
125 !(dev->flags & IFF_LOOPBACK)) {
126 kfree_skb(skb);
127 return 0;
128 }
129 }
130
131 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
132 int res = lwtunnel_xmit(skb);
133
134 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
135 return res;
136 }
137
138 rcu_read_lock_bh();
139 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
140 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
141 if (unlikely(!neigh))
142 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
143 if (!IS_ERR(neigh)) {
144 sock_confirm_neigh(skb, neigh);
145 ret = neigh_output(neigh, skb, false);
146 rcu_read_unlock_bh();
147 return ret;
148 }
149 rcu_read_unlock_bh();
150
151 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
152 kfree_skb(skb);
153 return -EINVAL;
154}
155
156static int
157ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
158 struct sk_buff *skb, unsigned int mtu)
159{
160 struct sk_buff *segs, *nskb;
161 netdev_features_t features;
162 int ret = 0;
163
164
165
166
167
168 features = netif_skb_features(skb);
169 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
170 if (IS_ERR_OR_NULL(segs)) {
171 kfree_skb(skb);
172 return -ENOMEM;
173 }
174
175 consume_skb(skb);
176
177 skb_list_walk_safe(segs, segs, nskb) {
178 int err;
179
180 skb_mark_not_on_list(segs);
181 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
182 if (err && ret == 0)
183 ret = err;
184 }
185
186 return ret;
187}
188
189static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
190{
191 unsigned int mtu;
192
193#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
194
195 if (skb_dst(skb)->xfrm) {
196 IPCB(skb)->flags |= IPSKB_REROUTED;
197 return dst_output(net, sk, skb);
198 }
199#endif
200
201 mtu = ip6_skb_dst_mtu(skb);
202 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
203 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
204
205 if ((skb->len > mtu && !skb_is_gso(skb)) ||
206 dst_allfrag(skb_dst(skb)) ||
207 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
208 return ip6_fragment(net, sk, skb, ip6_finish_output2);
209 else
210 return ip6_finish_output2(net, sk, skb);
211}
212
213static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
214{
215 int ret;
216
217 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
218 switch (ret) {
219 case NET_XMIT_SUCCESS:
220 return __ip6_finish_output(net, sk, skb);
221 case NET_XMIT_CN:
222 return __ip6_finish_output(net, sk, skb) ? : ret;
223 default:
224 kfree_skb(skb);
225 return ret;
226 }
227}
228
229int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
230{
231 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
232 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
233
234 skb->protocol = htons(ETH_P_IPV6);
235 skb->dev = dev;
236
237 if (unlikely(idev->cnf.disable_ipv6)) {
238 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
239 kfree_skb(skb);
240 return 0;
241 }
242
243 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
244 net, sk, skb, indev, dev,
245 ip6_finish_output,
246 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
247}
248EXPORT_SYMBOL(ip6_output);
249
250bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
251{
252 if (!np->autoflowlabel_set)
253 return ip6_default_np_autolabel(net);
254 else
255 return np->autoflowlabel;
256}
257
258
259
260
261
262
263
264int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
265 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
266{
267 struct net *net = sock_net(sk);
268 const struct ipv6_pinfo *np = inet6_sk(sk);
269 struct in6_addr *first_hop = &fl6->daddr;
270 struct dst_entry *dst = skb_dst(skb);
271 unsigned int head_room;
272 struct ipv6hdr *hdr;
273 u8 proto = fl6->flowi6_proto;
274 int seg_len = skb->len;
275 int hlimit = -1;
276 u32 mtu;
277
278 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
279 if (opt)
280 head_room += opt->opt_nflen + opt->opt_flen;
281
282 if (unlikely(skb_headroom(skb) < head_room)) {
283 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
284 if (!skb2) {
285 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
286 IPSTATS_MIB_OUTDISCARDS);
287 kfree_skb(skb);
288 return -ENOBUFS;
289 }
290 if (skb->sk)
291 skb_set_owner_w(skb2, skb->sk);
292 consume_skb(skb);
293 skb = skb2;
294 }
295
296 if (opt) {
297 seg_len += opt->opt_nflen + opt->opt_flen;
298
299 if (opt->opt_flen)
300 ipv6_push_frag_opts(skb, opt, &proto);
301
302 if (opt->opt_nflen)
303 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
304 &fl6->saddr);
305 }
306
307 skb_push(skb, sizeof(struct ipv6hdr));
308 skb_reset_network_header(skb);
309 hdr = ipv6_hdr(skb);
310
311
312
313
314 if (np)
315 hlimit = np->hop_limit;
316 if (hlimit < 0)
317 hlimit = ip6_dst_hoplimit(dst);
318
319 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
320 ip6_autoflowlabel(net, np), fl6));
321
322 hdr->payload_len = htons(seg_len);
323 hdr->nexthdr = proto;
324 hdr->hop_limit = hlimit;
325
326 hdr->saddr = fl6->saddr;
327 hdr->daddr = *first_hop;
328
329 skb->protocol = htons(ETH_P_IPV6);
330 skb->priority = priority;
331 skb->mark = mark;
332
333 mtu = dst_mtu(dst);
334 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
335 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
336 IPSTATS_MIB_OUT, skb->len);
337
338
339
340
341 skb = l3mdev_ip6_out((struct sock *)sk, skb);
342 if (unlikely(!skb))
343 return 0;
344
345
346
347
348 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
349 net, (struct sock *)sk, skb, NULL, dst->dev,
350 dst_output);
351 }
352
353 skb->dev = dst->dev;
354
355
356
357 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
358
359 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
360 kfree_skb(skb);
361 return -EMSGSIZE;
362}
363EXPORT_SYMBOL(ip6_xmit);
364
365static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
366{
367 struct ip6_ra_chain *ra;
368 struct sock *last = NULL;
369
370 read_lock(&ip6_ra_lock);
371 for (ra = ip6_ra_chain; ra; ra = ra->next) {
372 struct sock *sk = ra->sk;
373 if (sk && ra->sel == sel &&
374 (!sk->sk_bound_dev_if ||
375 sk->sk_bound_dev_if == skb->dev->ifindex)) {
376 struct ipv6_pinfo *np = inet6_sk(sk);
377
378 if (np && np->rtalert_isolate &&
379 !net_eq(sock_net(sk), dev_net(skb->dev))) {
380 continue;
381 }
382 if (last) {
383 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
384 if (skb2)
385 rawv6_rcv(last, skb2);
386 }
387 last = sk;
388 }
389 }
390
391 if (last) {
392 rawv6_rcv(last, skb);
393 read_unlock(&ip6_ra_lock);
394 return 1;
395 }
396 read_unlock(&ip6_ra_lock);
397 return 0;
398}
399
400static int ip6_forward_proxy_check(struct sk_buff *skb)
401{
402 struct ipv6hdr *hdr = ipv6_hdr(skb);
403 u8 nexthdr = hdr->nexthdr;
404 __be16 frag_off;
405 int offset;
406
407 if (ipv6_ext_hdr(nexthdr)) {
408 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
409 if (offset < 0)
410 return 0;
411 } else
412 offset = sizeof(struct ipv6hdr);
413
414 if (nexthdr == IPPROTO_ICMPV6) {
415 struct icmp6hdr *icmp6;
416
417 if (!pskb_may_pull(skb, (skb_network_header(skb) +
418 offset + 1 - skb->data)))
419 return 0;
420
421 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
422
423 switch (icmp6->icmp6_type) {
424 case NDISC_ROUTER_SOLICITATION:
425 case NDISC_ROUTER_ADVERTISEMENT:
426 case NDISC_NEIGHBOUR_SOLICITATION:
427 case NDISC_NEIGHBOUR_ADVERTISEMENT:
428 case NDISC_REDIRECT:
429
430
431
432
433 return 1;
434 default:
435 break;
436 }
437 }
438
439
440
441
442
443
444 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
445 dst_link_failure(skb);
446 return -1;
447 }
448
449 return 0;
450}
451
452static inline int ip6_forward_finish(struct net *net, struct sock *sk,
453 struct sk_buff *skb)
454{
455 struct dst_entry *dst = skb_dst(skb);
456
457 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
458 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
459
460#ifdef CONFIG_NET_SWITCHDEV
461 if (skb->offload_l3_fwd_mark) {
462 consume_skb(skb);
463 return 0;
464 }
465#endif
466
467 skb->tstamp = 0;
468 return dst_output(net, sk, skb);
469}
470
471static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
472{
473 if (skb->len <= mtu)
474 return false;
475
476
477 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
478 return true;
479
480 if (skb->ignore_df)
481 return false;
482
483 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
484 return false;
485
486 return true;
487}
488
489int ip6_forward(struct sk_buff *skb)
490{
491 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
492 struct dst_entry *dst = skb_dst(skb);
493 struct ipv6hdr *hdr = ipv6_hdr(skb);
494 struct inet6_skb_parm *opt = IP6CB(skb);
495 struct net *net = dev_net(dst->dev);
496 u32 mtu;
497
498 if (net->ipv6.devconf_all->forwarding == 0)
499 goto error;
500
501 if (skb->pkt_type != PACKET_HOST)
502 goto drop;
503
504 if (unlikely(skb->sk))
505 goto drop;
506
507 if (skb_warn_if_lro(skb))
508 goto drop;
509
510 if (!net->ipv6.devconf_all->disable_policy &&
511 !idev->cnf.disable_policy &&
512 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
513 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
514 goto drop;
515 }
516
517 skb_forward_csum(skb);
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
533 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
534 return 0;
535 }
536
537
538
539
540 if (hdr->hop_limit <= 1) {
541 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
542 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
543
544 kfree_skb(skb);
545 return -ETIMEDOUT;
546 }
547
548
549 if (net->ipv6.devconf_all->proxy_ndp &&
550 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
551 int proxied = ip6_forward_proxy_check(skb);
552 if (proxied > 0) {
553 hdr->hop_limit--;
554 return ip6_input(skb);
555 } else if (proxied < 0) {
556 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
557 goto drop;
558 }
559 }
560
561 if (!xfrm6_route_forward(skb)) {
562 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
563 goto drop;
564 }
565 dst = skb_dst(skb);
566
567
568
569
570
571 if (IP6CB(skb)->iif == dst->dev->ifindex &&
572 opt->srcrt == 0 && !skb_sec_path(skb)) {
573 struct in6_addr *target = NULL;
574 struct inet_peer *peer;
575 struct rt6_info *rt;
576
577
578
579
580
581
582 rt = (struct rt6_info *) dst;
583 if (rt->rt6i_flags & RTF_GATEWAY)
584 target = &rt->rt6i_gateway;
585 else
586 target = &hdr->daddr;
587
588 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
589
590
591
592
593 if (inet_peer_xrlim_allow(peer, 1*HZ))
594 ndisc_send_redirect(skb, target);
595 if (peer)
596 inet_putpeer(peer);
597 } else {
598 int addrtype = ipv6_addr_type(&hdr->saddr);
599
600
601 if (addrtype == IPV6_ADDR_ANY ||
602 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
603 goto error;
604 if (addrtype & IPV6_ADDR_LINKLOCAL) {
605 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
606 ICMPV6_NOT_NEIGHBOUR, 0);
607 goto error;
608 }
609 }
610
611 mtu = ip6_dst_mtu_forward(dst);
612 if (mtu < IPV6_MIN_MTU)
613 mtu = IPV6_MIN_MTU;
614
615 if (ip6_pkt_too_big(skb, mtu)) {
616
617 skb->dev = dst->dev;
618 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
619 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
620 __IP6_INC_STATS(net, ip6_dst_idev(dst),
621 IPSTATS_MIB_FRAGFAILS);
622 kfree_skb(skb);
623 return -EMSGSIZE;
624 }
625
626 if (skb_cow(skb, dst->dev->hard_header_len)) {
627 __IP6_INC_STATS(net, ip6_dst_idev(dst),
628 IPSTATS_MIB_OUTDISCARDS);
629 goto drop;
630 }
631
632 hdr = ipv6_hdr(skb);
633
634
635
636 hdr->hop_limit--;
637
638 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
639 net, NULL, skb, skb->dev, dst->dev,
640 ip6_forward_finish);
641
642error:
643 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
644drop:
645 kfree_skb(skb);
646 return -EINVAL;
647}
648
649static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
650{
651 to->pkt_type = from->pkt_type;
652 to->priority = from->priority;
653 to->protocol = from->protocol;
654 skb_dst_drop(to);
655 skb_dst_set(to, dst_clone(skb_dst(from)));
656 to->dev = from->dev;
657 to->mark = from->mark;
658
659 skb_copy_hash(to, from);
660
661#ifdef CONFIG_NET_SCHED
662 to->tc_index = from->tc_index;
663#endif
664 nf_copy(to, from);
665 skb_ext_copy(to, from);
666 skb_copy_secmark(to, from);
667}
668
669int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
670 u8 nexthdr, __be32 frag_id,
671 struct ip6_fraglist_iter *iter)
672{
673 unsigned int first_len;
674 struct frag_hdr *fh;
675
676
677 *prevhdr = NEXTHDR_FRAGMENT;
678 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
679 if (!iter->tmp_hdr)
680 return -ENOMEM;
681
682 iter->frag = skb_shinfo(skb)->frag_list;
683 skb_frag_list_init(skb);
684
685 iter->offset = 0;
686 iter->hlen = hlen;
687 iter->frag_id = frag_id;
688 iter->nexthdr = nexthdr;
689
690 __skb_pull(skb, hlen);
691 fh = __skb_push(skb, sizeof(struct frag_hdr));
692 __skb_push(skb, hlen);
693 skb_reset_network_header(skb);
694 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
695
696 fh->nexthdr = nexthdr;
697 fh->reserved = 0;
698 fh->frag_off = htons(IP6_MF);
699 fh->identification = frag_id;
700
701 first_len = skb_pagelen(skb);
702 skb->data_len = first_len - skb_headlen(skb);
703 skb->len = first_len;
704 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
705
706 return 0;
707}
708EXPORT_SYMBOL(ip6_fraglist_init);
709
710void ip6_fraglist_prepare(struct sk_buff *skb,
711 struct ip6_fraglist_iter *iter)
712{
713 struct sk_buff *frag = iter->frag;
714 unsigned int hlen = iter->hlen;
715 struct frag_hdr *fh;
716
717 frag->ip_summed = CHECKSUM_NONE;
718 skb_reset_transport_header(frag);
719 fh = __skb_push(frag, sizeof(struct frag_hdr));
720 __skb_push(frag, hlen);
721 skb_reset_network_header(frag);
722 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
723 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
724 fh->nexthdr = iter->nexthdr;
725 fh->reserved = 0;
726 fh->frag_off = htons(iter->offset);
727 if (frag->next)
728 fh->frag_off |= htons(IP6_MF);
729 fh->identification = iter->frag_id;
730 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
731 ip6_copy_metadata(frag, skb);
732}
733EXPORT_SYMBOL(ip6_fraglist_prepare);
734
735void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
736 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
737 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
738{
739 state->prevhdr = prevhdr;
740 state->nexthdr = nexthdr;
741 state->frag_id = frag_id;
742
743 state->hlen = hlen;
744 state->mtu = mtu;
745
746 state->left = skb->len - hlen;
747 state->ptr = hlen;
748
749 state->hroom = hdr_room;
750 state->troom = needed_tailroom;
751
752 state->offset = 0;
753}
754EXPORT_SYMBOL(ip6_frag_init);
755
756struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
757{
758 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
759 struct sk_buff *frag;
760 struct frag_hdr *fh;
761 unsigned int len;
762
763 len = state->left;
764
765 if (len > state->mtu)
766 len = state->mtu;
767
768
769 if (len < state->left)
770 len &= ~7;
771
772
773 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
774 state->hroom + state->troom, GFP_ATOMIC);
775 if (!frag)
776 return ERR_PTR(-ENOMEM);
777
778
779
780
781
782 ip6_copy_metadata(frag, skb);
783 skb_reserve(frag, state->hroom);
784 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
785 skb_reset_network_header(frag);
786 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
787 frag->transport_header = (frag->network_header + state->hlen +
788 sizeof(struct frag_hdr));
789
790
791
792
793
794 if (skb->sk)
795 skb_set_owner_w(frag, skb->sk);
796
797
798
799
800 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
801
802 fragnexthdr_offset = skb_network_header(frag);
803 fragnexthdr_offset += prevhdr - skb_network_header(skb);
804 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
805
806
807
808
809 fh->nexthdr = state->nexthdr;
810 fh->reserved = 0;
811 fh->identification = state->frag_id;
812
813
814
815
816 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
817 len));
818 state->left -= len;
819
820 fh->frag_off = htons(state->offset);
821 if (state->left > 0)
822 fh->frag_off |= htons(IP6_MF);
823 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
824
825 state->ptr += len;
826 state->offset += len;
827
828 return frag;
829}
830EXPORT_SYMBOL(ip6_frag_next);
831
832int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
833 int (*output)(struct net *, struct sock *, struct sk_buff *))
834{
835 struct sk_buff *frag;
836 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
837 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
838 inet6_sk(skb->sk) : NULL;
839 struct ip6_frag_state state;
840 unsigned int mtu, hlen, nexthdr_offset;
841 ktime_t tstamp = skb->tstamp;
842 int hroom, err = 0;
843 __be32 frag_id;
844 u8 *prevhdr, nexthdr = 0;
845
846 err = ip6_find_1stfragopt(skb, &prevhdr);
847 if (err < 0)
848 goto fail;
849 hlen = err;
850 nexthdr = *prevhdr;
851 nexthdr_offset = prevhdr - skb_network_header(skb);
852
853 mtu = ip6_skb_dst_mtu(skb);
854
855
856
857
858 if (unlikely(!skb->ignore_df && skb->len > mtu))
859 goto fail_toobig;
860
861 if (IP6CB(skb)->frag_max_size) {
862 if (IP6CB(skb)->frag_max_size > mtu)
863 goto fail_toobig;
864
865
866 mtu = IP6CB(skb)->frag_max_size;
867 if (mtu < IPV6_MIN_MTU)
868 mtu = IPV6_MIN_MTU;
869 }
870
871 if (np && np->frag_size < mtu) {
872 if (np->frag_size)
873 mtu = np->frag_size;
874 }
875 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
876 goto fail_toobig;
877 mtu -= hlen + sizeof(struct frag_hdr);
878
879 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
880 &ipv6_hdr(skb)->saddr);
881
882 if (skb->ip_summed == CHECKSUM_PARTIAL &&
883 (err = skb_checksum_help(skb)))
884 goto fail;
885
886 prevhdr = skb_network_header(skb) + nexthdr_offset;
887 hroom = LL_RESERVED_SPACE(rt->dst.dev);
888 if (skb_has_frag_list(skb)) {
889 unsigned int first_len = skb_pagelen(skb);
890 struct ip6_fraglist_iter iter;
891 struct sk_buff *frag2;
892
893 if (first_len - hlen > mtu ||
894 ((first_len - hlen) & 7) ||
895 skb_cloned(skb) ||
896 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
897 goto slow_path;
898
899 skb_walk_frags(skb, frag) {
900
901 if (frag->len > mtu ||
902 ((frag->len & 7) && frag->next) ||
903 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
904 goto slow_path_clean;
905
906
907 if (skb_shared(frag))
908 goto slow_path_clean;
909
910 BUG_ON(frag->sk);
911 if (skb->sk) {
912 frag->sk = skb->sk;
913 frag->destructor = sock_wfree;
914 }
915 skb->truesize -= frag->truesize;
916 }
917
918 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
919 &iter);
920 if (err < 0)
921 goto fail;
922
923 for (;;) {
924
925
926 if (iter.frag)
927 ip6_fraglist_prepare(skb, &iter);
928
929 skb->tstamp = tstamp;
930 err = output(net, sk, skb);
931 if (!err)
932 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
933 IPSTATS_MIB_FRAGCREATES);
934
935 if (err || !iter.frag)
936 break;
937
938 skb = ip6_fraglist_next(&iter);
939 }
940
941 kfree(iter.tmp_hdr);
942
943 if (err == 0) {
944 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
945 IPSTATS_MIB_FRAGOKS);
946 return 0;
947 }
948
949 kfree_skb_list(iter.frag);
950
951 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
952 IPSTATS_MIB_FRAGFAILS);
953 return err;
954
955slow_path_clean:
956 skb_walk_frags(skb, frag2) {
957 if (frag2 == frag)
958 break;
959 frag2->sk = NULL;
960 frag2->destructor = NULL;
961 skb->truesize += frag2->truesize;
962 }
963 }
964
965slow_path:
966
967
968
969
970 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
971 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
972 &state);
973
974
975
976
977
978 while (state.left > 0) {
979 frag = ip6_frag_next(skb, &state);
980 if (IS_ERR(frag)) {
981 err = PTR_ERR(frag);
982 goto fail;
983 }
984
985
986
987
988 frag->tstamp = tstamp;
989 err = output(net, sk, frag);
990 if (err)
991 goto fail;
992
993 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
994 IPSTATS_MIB_FRAGCREATES);
995 }
996 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
997 IPSTATS_MIB_FRAGOKS);
998 consume_skb(skb);
999 return err;
1000
1001fail_toobig:
1002 if (skb->sk && dst_allfrag(skb_dst(skb)))
1003 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1004
1005 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1006 err = -EMSGSIZE;
1007
1008fail:
1009 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1010 IPSTATS_MIB_FRAGFAILS);
1011 kfree_skb(skb);
1012 return err;
1013}
1014
1015static inline int ip6_rt_check(const struct rt6key *rt_key,
1016 const struct in6_addr *fl_addr,
1017 const struct in6_addr *addr_cache)
1018{
1019 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1020 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1021}
1022
1023static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1024 struct dst_entry *dst,
1025 const struct flowi6 *fl6)
1026{
1027 struct ipv6_pinfo *np = inet6_sk(sk);
1028 struct rt6_info *rt;
1029
1030 if (!dst)
1031 goto out;
1032
1033 if (dst->ops->family != AF_INET6) {
1034 dst_release(dst);
1035 return NULL;
1036 }
1037
1038 rt = (struct rt6_info *)dst;
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1057#ifdef CONFIG_IPV6_SUBTREES
1058 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1059#endif
1060 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1061 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1062 dst_release(dst);
1063 dst = NULL;
1064 }
1065
1066out:
1067 return dst;
1068}
1069
1070static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1071 struct dst_entry **dst, struct flowi6 *fl6)
1072{
1073#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1074 struct neighbour *n;
1075 struct rt6_info *rt;
1076#endif
1077 int err;
1078 int flags = 0;
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089 if (ipv6_addr_any(&fl6->saddr)) {
1090 struct fib6_info *from;
1091 struct rt6_info *rt;
1092
1093 *dst = ip6_route_output(net, sk, fl6);
1094 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1095
1096 rcu_read_lock();
1097 from = rt ? rcu_dereference(rt->from) : NULL;
1098 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1099 sk ? inet6_sk(sk)->srcprefs : 0,
1100 &fl6->saddr);
1101 rcu_read_unlock();
1102
1103 if (err)
1104 goto out_err_release;
1105
1106
1107
1108
1109
1110 if ((*dst)->error) {
1111 dst_release(*dst);
1112 *dst = NULL;
1113 }
1114
1115 if (fl6->flowi6_oif)
1116 flags |= RT6_LOOKUP_F_IFACE;
1117 }
1118
1119 if (!*dst)
1120 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1121
1122 err = (*dst)->error;
1123 if (err)
1124 goto out_err_release;
1125
1126#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1127
1128
1129
1130
1131
1132
1133
1134
1135 rt = (struct rt6_info *) *dst;
1136 rcu_read_lock_bh();
1137 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1138 rt6_nexthop(rt, &fl6->daddr));
1139 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1140 rcu_read_unlock_bh();
1141
1142 if (err) {
1143 struct inet6_ifaddr *ifp;
1144 struct flowi6 fl_gw6;
1145 int redirect;
1146
1147 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1148 (*dst)->dev, 1);
1149
1150 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1151 if (ifp)
1152 in6_ifa_put(ifp);
1153
1154 if (redirect) {
1155
1156
1157
1158
1159 dst_release(*dst);
1160 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1161 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1162 *dst = ip6_route_output(net, sk, &fl_gw6);
1163 err = (*dst)->error;
1164 if (err)
1165 goto out_err_release;
1166 }
1167 }
1168#endif
1169 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1170 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1171 err = -EAFNOSUPPORT;
1172 goto out_err_release;
1173 }
1174
1175 return 0;
1176
1177out_err_release:
1178 dst_release(*dst);
1179 *dst = NULL;
1180
1181 if (err == -ENETUNREACH)
1182 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1183 return err;
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1198 struct flowi6 *fl6)
1199{
1200 *dst = NULL;
1201 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1202}
1203EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1218 const struct in6_addr *final_dst)
1219{
1220 struct dst_entry *dst = NULL;
1221 int err;
1222
1223 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1224 if (err)
1225 return ERR_PTR(err);
1226 if (final_dst)
1227 fl6->daddr = *final_dst;
1228
1229 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1230}
1231EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1252 const struct in6_addr *final_dst,
1253 bool connected)
1254{
1255 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1256
1257 dst = ip6_sk_dst_check(sk, dst, fl6);
1258 if (dst)
1259 return dst;
1260
1261 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1262 if (connected && !IS_ERR(dst))
1263 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1264
1265 return dst;
1266}
1267EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1286 struct net_device *dev,
1287 struct net *net,
1288 struct socket *sock,
1289 struct in6_addr *saddr,
1290 const struct ip_tunnel_info *info,
1291 u8 protocol,
1292 bool use_cache)
1293{
1294 struct dst_entry *dst = NULL;
1295#ifdef CONFIG_DST_CACHE
1296 struct dst_cache *dst_cache;
1297#endif
1298 struct flowi6 fl6;
1299 __u8 prio;
1300
1301#ifdef CONFIG_DST_CACHE
1302 dst_cache = (struct dst_cache *)&info->dst_cache;
1303 if (use_cache) {
1304 dst = dst_cache_get_ip6(dst_cache, saddr);
1305 if (dst)
1306 return dst;
1307 }
1308#endif
1309 memset(&fl6, 0, sizeof(fl6));
1310 fl6.flowi6_mark = skb->mark;
1311 fl6.flowi6_proto = protocol;
1312 fl6.daddr = info->key.u.ipv6.dst;
1313 fl6.saddr = info->key.u.ipv6.src;
1314 prio = info->key.tos;
1315 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1316 info->key.label);
1317
1318 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1319 NULL);
1320 if (IS_ERR(dst)) {
1321 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1322 return ERR_PTR(-ENETUNREACH);
1323 }
1324 if (dst->dev == dev) {
1325 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1326 dst_release(dst);
1327 return ERR_PTR(-ELOOP);
1328 }
1329#ifdef CONFIG_DST_CACHE
1330 if (use_cache)
1331 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1332#endif
1333 *saddr = fl6.saddr;
1334 return dst;
1335}
1336EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1337
1338static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1339 gfp_t gfp)
1340{
1341 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1342}
1343
1344static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1345 gfp_t gfp)
1346{
1347 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1348}
1349
1350static void ip6_append_data_mtu(unsigned int *mtu,
1351 int *maxfraglen,
1352 unsigned int fragheaderlen,
1353 struct sk_buff *skb,
1354 struct rt6_info *rt,
1355 unsigned int orig_mtu)
1356{
1357 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1358 if (!skb) {
1359
1360 *mtu = orig_mtu - rt->dst.header_len;
1361
1362 } else {
1363
1364
1365
1366
1367 *mtu = orig_mtu;
1368 }
1369 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1370 + fragheaderlen - sizeof(struct frag_hdr);
1371 }
1372}
1373
1374static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1375 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1376 struct rt6_info *rt, struct flowi6 *fl6)
1377{
1378 struct ipv6_pinfo *np = inet6_sk(sk);
1379 unsigned int mtu;
1380 struct ipv6_txoptions *opt = ipc6->opt;
1381
1382
1383
1384
1385 if (opt) {
1386 if (WARN_ON(v6_cork->opt))
1387 return -EINVAL;
1388
1389 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1390 if (unlikely(!v6_cork->opt))
1391 return -ENOBUFS;
1392
1393 v6_cork->opt->tot_len = sizeof(*opt);
1394 v6_cork->opt->opt_flen = opt->opt_flen;
1395 v6_cork->opt->opt_nflen = opt->opt_nflen;
1396
1397 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1398 sk->sk_allocation);
1399 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1400 return -ENOBUFS;
1401
1402 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1403 sk->sk_allocation);
1404 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1405 return -ENOBUFS;
1406
1407 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1408 sk->sk_allocation);
1409 if (opt->hopopt && !v6_cork->opt->hopopt)
1410 return -ENOBUFS;
1411
1412 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1413 sk->sk_allocation);
1414 if (opt->srcrt && !v6_cork->opt->srcrt)
1415 return -ENOBUFS;
1416
1417
1418 }
1419 dst_hold(&rt->dst);
1420 cork->base.dst = &rt->dst;
1421 cork->fl.u.ip6 = *fl6;
1422 v6_cork->hop_limit = ipc6->hlimit;
1423 v6_cork->tclass = ipc6->tclass;
1424 if (rt->dst.flags & DST_XFRM_TUNNEL)
1425 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1426 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1427 else
1428 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1429 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1430 if (np->frag_size < mtu) {
1431 if (np->frag_size)
1432 mtu = np->frag_size;
1433 }
1434 if (mtu < IPV6_MIN_MTU)
1435 return -EINVAL;
1436 cork->base.fragsize = mtu;
1437 cork->base.gso_size = ipc6->gso_size;
1438 cork->base.tx_flags = 0;
1439 cork->base.mark = ipc6->sockc.mark;
1440 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1441
1442 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1443 cork->base.flags |= IPCORK_ALLFRAG;
1444 cork->base.length = 0;
1445
1446 cork->base.transmit_time = ipc6->sockc.transmit_time;
1447
1448 return 0;
1449}
1450
1451static int __ip6_append_data(struct sock *sk,
1452 struct flowi6 *fl6,
1453 struct sk_buff_head *queue,
1454 struct inet_cork *cork,
1455 struct inet6_cork *v6_cork,
1456 struct page_frag *pfrag,
1457 int getfrag(void *from, char *to, int offset,
1458 int len, int odd, struct sk_buff *skb),
1459 void *from, int length, int transhdrlen,
1460 unsigned int flags, struct ipcm6_cookie *ipc6)
1461{
1462 struct sk_buff *skb, *skb_prev = NULL;
1463 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1464 struct ubuf_info *uarg = NULL;
1465 int exthdrlen = 0;
1466 int dst_exthdrlen = 0;
1467 int hh_len;
1468 int copy;
1469 int err;
1470 int offset = 0;
1471 u32 tskey = 0;
1472 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1473 struct ipv6_txoptions *opt = v6_cork->opt;
1474 int csummode = CHECKSUM_NONE;
1475 unsigned int maxnonfragsize, headersize;
1476 unsigned int wmem_alloc_delta = 0;
1477 bool paged, extra_uref = false;
1478
1479 skb = skb_peek_tail(queue);
1480 if (!skb) {
1481 exthdrlen = opt ? opt->opt_flen : 0;
1482 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1483 }
1484
1485 paged = !!cork->gso_size;
1486 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1487 orig_mtu = mtu;
1488
1489 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1490 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1491 tskey = sk->sk_tskey++;
1492
1493 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1494
1495 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1496 (opt ? opt->opt_nflen : 0);
1497 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1498 sizeof(struct frag_hdr);
1499
1500 headersize = sizeof(struct ipv6hdr) +
1501 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1502 (dst_allfrag(&rt->dst) ?
1503 sizeof(struct frag_hdr) : 0) +
1504 rt->rt6i_nfheader_len;
1505
1506
1507
1508
1509 if (headersize + transhdrlen > mtu)
1510 goto emsgsize;
1511
1512 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1513 (sk->sk_protocol == IPPROTO_UDP ||
1514 sk->sk_protocol == IPPROTO_RAW)) {
1515 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1516 sizeof(struct ipv6hdr));
1517 goto emsgsize;
1518 }
1519
1520 if (ip6_sk_ignore_df(sk))
1521 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1522 else
1523 maxnonfragsize = mtu;
1524
1525 if (cork->length + length > maxnonfragsize - headersize) {
1526emsgsize:
1527 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1528 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1529 return -EMSGSIZE;
1530 }
1531
1532
1533
1534
1535 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1536 headersize == sizeof(struct ipv6hdr) &&
1537 length <= mtu - headersize &&
1538 (!(flags & MSG_MORE) || cork->gso_size) &&
1539 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1540 csummode = CHECKSUM_PARTIAL;
1541
1542 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1543 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1544 if (!uarg)
1545 return -ENOBUFS;
1546 extra_uref = !skb_zcopy(skb);
1547 if (rt->dst.dev->features & NETIF_F_SG &&
1548 csummode == CHECKSUM_PARTIAL) {
1549 paged = true;
1550 } else {
1551 uarg->zerocopy = 0;
1552 skb_zcopy_set(skb, uarg, &extra_uref);
1553 }
1554 }
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572 cork->length += length;
1573 if (!skb)
1574 goto alloc_new_skb;
1575
1576 while (length > 0) {
1577
1578 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1579 if (copy < length)
1580 copy = maxfraglen - skb->len;
1581
1582 if (copy <= 0) {
1583 char *data;
1584 unsigned int datalen;
1585 unsigned int fraglen;
1586 unsigned int fraggap;
1587 unsigned int alloclen, alloc_extra;
1588 unsigned int pagedlen;
1589alloc_new_skb:
1590
1591 if (skb)
1592 fraggap = skb->len - maxfraglen;
1593 else
1594 fraggap = 0;
1595
1596 if (!skb || !skb_prev)
1597 ip6_append_data_mtu(&mtu, &maxfraglen,
1598 fragheaderlen, skb, rt,
1599 orig_mtu);
1600
1601 skb_prev = skb;
1602
1603
1604
1605
1606
1607 datalen = length + fraggap;
1608
1609 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1610 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1611 fraglen = datalen + fragheaderlen;
1612 pagedlen = 0;
1613
1614 alloc_extra = hh_len;
1615 alloc_extra += dst_exthdrlen;
1616 alloc_extra += rt->dst.trailer_len;
1617
1618
1619
1620
1621
1622 alloc_extra += sizeof(struct frag_hdr);
1623
1624 if ((flags & MSG_MORE) &&
1625 !(rt->dst.dev->features&NETIF_F_SG))
1626 alloclen = mtu;
1627 else if (!paged &&
1628 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1629 !(rt->dst.dev->features & NETIF_F_SG)))
1630 alloclen = fraglen;
1631 else {
1632 alloclen = min_t(int, fraglen, MAX_HEADER);
1633 pagedlen = fraglen - alloclen;
1634 }
1635 alloclen += alloc_extra;
1636
1637 if (datalen != length + fraggap) {
1638
1639
1640
1641
1642 datalen += rt->dst.trailer_len;
1643 }
1644
1645 fraglen = datalen + fragheaderlen;
1646
1647 copy = datalen - transhdrlen - fraggap - pagedlen;
1648 if (copy < 0) {
1649 err = -EINVAL;
1650 goto error;
1651 }
1652 if (transhdrlen) {
1653 skb = sock_alloc_send_skb(sk, alloclen,
1654 (flags & MSG_DONTWAIT), &err);
1655 } else {
1656 skb = NULL;
1657 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1658 2 * sk->sk_sndbuf)
1659 skb = alloc_skb(alloclen,
1660 sk->sk_allocation);
1661 if (unlikely(!skb))
1662 err = -ENOBUFS;
1663 }
1664 if (!skb)
1665 goto error;
1666
1667
1668
1669 skb->protocol = htons(ETH_P_IPV6);
1670 skb->ip_summed = csummode;
1671 skb->csum = 0;
1672
1673 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1674 dst_exthdrlen);
1675
1676
1677
1678
1679 data = skb_put(skb, fraglen - pagedlen);
1680 skb_set_network_header(skb, exthdrlen);
1681 data += fragheaderlen;
1682 skb->transport_header = (skb->network_header +
1683 fragheaderlen);
1684 if (fraggap) {
1685 skb->csum = skb_copy_and_csum_bits(
1686 skb_prev, maxfraglen,
1687 data + transhdrlen, fraggap);
1688 skb_prev->csum = csum_sub(skb_prev->csum,
1689 skb->csum);
1690 data += fraggap;
1691 pskb_trim_unique(skb_prev, maxfraglen);
1692 }
1693 if (copy > 0 &&
1694 getfrag(from, data + transhdrlen, offset,
1695 copy, fraggap, skb) < 0) {
1696 err = -EFAULT;
1697 kfree_skb(skb);
1698 goto error;
1699 }
1700
1701 offset += copy;
1702 length -= copy + transhdrlen;
1703 transhdrlen = 0;
1704 exthdrlen = 0;
1705 dst_exthdrlen = 0;
1706
1707
1708 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1709 cork->tx_flags = 0;
1710 skb_shinfo(skb)->tskey = tskey;
1711 tskey = 0;
1712 skb_zcopy_set(skb, uarg, &extra_uref);
1713
1714 if ((flags & MSG_CONFIRM) && !skb_prev)
1715 skb_set_dst_pending_confirm(skb, 1);
1716
1717
1718
1719
1720 if (!skb->destructor) {
1721 skb->destructor = sock_wfree;
1722 skb->sk = sk;
1723 wmem_alloc_delta += skb->truesize;
1724 }
1725 __skb_queue_tail(queue, skb);
1726 continue;
1727 }
1728
1729 if (copy > length)
1730 copy = length;
1731
1732 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1733 skb_tailroom(skb) >= copy) {
1734 unsigned int off;
1735
1736 off = skb->len;
1737 if (getfrag(from, skb_put(skb, copy),
1738 offset, copy, off, skb) < 0) {
1739 __skb_trim(skb, off);
1740 err = -EFAULT;
1741 goto error;
1742 }
1743 } else if (!uarg || !uarg->zerocopy) {
1744 int i = skb_shinfo(skb)->nr_frags;
1745
1746 err = -ENOMEM;
1747 if (!sk_page_frag_refill(sk, pfrag))
1748 goto error;
1749
1750 if (!skb_can_coalesce(skb, i, pfrag->page,
1751 pfrag->offset)) {
1752 err = -EMSGSIZE;
1753 if (i == MAX_SKB_FRAGS)
1754 goto error;
1755
1756 __skb_fill_page_desc(skb, i, pfrag->page,
1757 pfrag->offset, 0);
1758 skb_shinfo(skb)->nr_frags = ++i;
1759 get_page(pfrag->page);
1760 }
1761 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1762 if (getfrag(from,
1763 page_address(pfrag->page) + pfrag->offset,
1764 offset, copy, skb->len, skb) < 0)
1765 goto error_efault;
1766
1767 pfrag->offset += copy;
1768 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1769 skb->len += copy;
1770 skb->data_len += copy;
1771 skb->truesize += copy;
1772 wmem_alloc_delta += copy;
1773 } else {
1774 err = skb_zerocopy_iter_dgram(skb, from, copy);
1775 if (err < 0)
1776 goto error;
1777 }
1778 offset += copy;
1779 length -= copy;
1780 }
1781
1782 if (wmem_alloc_delta)
1783 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1784 return 0;
1785
1786error_efault:
1787 err = -EFAULT;
1788error:
1789 net_zcopy_put_abort(uarg, extra_uref);
1790 cork->length -= length;
1791 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1792 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1793 return err;
1794}
1795
1796int ip6_append_data(struct sock *sk,
1797 int getfrag(void *from, char *to, int offset, int len,
1798 int odd, struct sk_buff *skb),
1799 void *from, int length, int transhdrlen,
1800 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1801 struct rt6_info *rt, unsigned int flags)
1802{
1803 struct inet_sock *inet = inet_sk(sk);
1804 struct ipv6_pinfo *np = inet6_sk(sk);
1805 int exthdrlen;
1806 int err;
1807
1808 if (flags&MSG_PROBE)
1809 return 0;
1810 if (skb_queue_empty(&sk->sk_write_queue)) {
1811
1812
1813
1814 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1815 ipc6, rt, fl6);
1816 if (err)
1817 return err;
1818
1819 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1820 length += exthdrlen;
1821 transhdrlen += exthdrlen;
1822 } else {
1823 fl6 = &inet->cork.fl.u.ip6;
1824 transhdrlen = 0;
1825 }
1826
1827 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1828 &np->cork, sk_page_frag(sk), getfrag,
1829 from, length, transhdrlen, flags, ipc6);
1830}
1831EXPORT_SYMBOL_GPL(ip6_append_data);
1832
1833static void ip6_cork_release(struct inet_cork_full *cork,
1834 struct inet6_cork *v6_cork)
1835{
1836 if (v6_cork->opt) {
1837 kfree(v6_cork->opt->dst0opt);
1838 kfree(v6_cork->opt->dst1opt);
1839 kfree(v6_cork->opt->hopopt);
1840 kfree(v6_cork->opt->srcrt);
1841 kfree(v6_cork->opt);
1842 v6_cork->opt = NULL;
1843 }
1844
1845 if (cork->base.dst) {
1846 dst_release(cork->base.dst);
1847 cork->base.dst = NULL;
1848 cork->base.flags &= ~IPCORK_ALLFRAG;
1849 }
1850 memset(&cork->fl, 0, sizeof(cork->fl));
1851}
1852
1853struct sk_buff *__ip6_make_skb(struct sock *sk,
1854 struct sk_buff_head *queue,
1855 struct inet_cork_full *cork,
1856 struct inet6_cork *v6_cork)
1857{
1858 struct sk_buff *skb, *tmp_skb;
1859 struct sk_buff **tail_skb;
1860 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1861 struct ipv6_pinfo *np = inet6_sk(sk);
1862 struct net *net = sock_net(sk);
1863 struct ipv6hdr *hdr;
1864 struct ipv6_txoptions *opt = v6_cork->opt;
1865 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1866 struct flowi6 *fl6 = &cork->fl.u.ip6;
1867 unsigned char proto = fl6->flowi6_proto;
1868
1869 skb = __skb_dequeue(queue);
1870 if (!skb)
1871 goto out;
1872 tail_skb = &(skb_shinfo(skb)->frag_list);
1873
1874
1875 if (skb->data < skb_network_header(skb))
1876 __skb_pull(skb, skb_network_offset(skb));
1877 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1878 __skb_pull(tmp_skb, skb_network_header_len(skb));
1879 *tail_skb = tmp_skb;
1880 tail_skb = &(tmp_skb->next);
1881 skb->len += tmp_skb->len;
1882 skb->data_len += tmp_skb->len;
1883 skb->truesize += tmp_skb->truesize;
1884 tmp_skb->destructor = NULL;
1885 tmp_skb->sk = NULL;
1886 }
1887
1888
1889 skb->ignore_df = ip6_sk_ignore_df(sk);
1890
1891 *final_dst = fl6->daddr;
1892 __skb_pull(skb, skb_network_header_len(skb));
1893 if (opt && opt->opt_flen)
1894 ipv6_push_frag_opts(skb, opt, &proto);
1895 if (opt && opt->opt_nflen)
1896 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1897
1898 skb_push(skb, sizeof(struct ipv6hdr));
1899 skb_reset_network_header(skb);
1900 hdr = ipv6_hdr(skb);
1901
1902 ip6_flow_hdr(hdr, v6_cork->tclass,
1903 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1904 ip6_autoflowlabel(net, np), fl6));
1905 hdr->hop_limit = v6_cork->hop_limit;
1906 hdr->nexthdr = proto;
1907 hdr->saddr = fl6->saddr;
1908 hdr->daddr = *final_dst;
1909
1910 skb->priority = sk->sk_priority;
1911 skb->mark = cork->base.mark;
1912
1913 skb->tstamp = cork->base.transmit_time;
1914
1915 skb_dst_set(skb, dst_clone(&rt->dst));
1916 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1917 if (proto == IPPROTO_ICMPV6) {
1918 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1919
1920 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1921 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1922 }
1923
1924 ip6_cork_release(cork, v6_cork);
1925out:
1926 return skb;
1927}
1928
1929int ip6_send_skb(struct sk_buff *skb)
1930{
1931 struct net *net = sock_net(skb->sk);
1932 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1933 int err;
1934
1935 err = ip6_local_out(net, skb->sk, skb);
1936 if (err) {
1937 if (err > 0)
1938 err = net_xmit_errno(err);
1939 if (err)
1940 IP6_INC_STATS(net, rt->rt6i_idev,
1941 IPSTATS_MIB_OUTDISCARDS);
1942 }
1943
1944 return err;
1945}
1946
1947int ip6_push_pending_frames(struct sock *sk)
1948{
1949 struct sk_buff *skb;
1950
1951 skb = ip6_finish_skb(sk);
1952 if (!skb)
1953 return 0;
1954
1955 return ip6_send_skb(skb);
1956}
1957EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1958
1959static void __ip6_flush_pending_frames(struct sock *sk,
1960 struct sk_buff_head *queue,
1961 struct inet_cork_full *cork,
1962 struct inet6_cork *v6_cork)
1963{
1964 struct sk_buff *skb;
1965
1966 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1967 if (skb_dst(skb))
1968 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1969 IPSTATS_MIB_OUTDISCARDS);
1970 kfree_skb(skb);
1971 }
1972
1973 ip6_cork_release(cork, v6_cork);
1974}
1975
1976void ip6_flush_pending_frames(struct sock *sk)
1977{
1978 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1979 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1980}
1981EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1982
1983struct sk_buff *ip6_make_skb(struct sock *sk,
1984 int getfrag(void *from, char *to, int offset,
1985 int len, int odd, struct sk_buff *skb),
1986 void *from, int length, int transhdrlen,
1987 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1988 struct rt6_info *rt, unsigned int flags,
1989 struct inet_cork_full *cork)
1990{
1991 struct inet6_cork v6_cork;
1992 struct sk_buff_head queue;
1993 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1994 int err;
1995
1996 if (flags & MSG_PROBE)
1997 return NULL;
1998
1999 __skb_queue_head_init(&queue);
2000
2001 cork->base.flags = 0;
2002 cork->base.addr = 0;
2003 cork->base.opt = NULL;
2004 cork->base.dst = NULL;
2005 v6_cork.opt = NULL;
2006 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
2007 if (err) {
2008 ip6_cork_release(cork, &v6_cork);
2009 return ERR_PTR(err);
2010 }
2011 if (ipc6->dontfrag < 0)
2012 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2013
2014 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
2015 ¤t->task_frag, getfrag, from,
2016 length + exthdrlen, transhdrlen + exthdrlen,
2017 flags, ipc6);
2018 if (err) {
2019 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2020 return ERR_PTR(err);
2021 }
2022
2023 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2024}
2025