1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57#include <net/ip_tunnels.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct inet6_dev *idev = ip6_dst_idev(dst);
64 unsigned int hh_len = LL_RESERVED_SPACE(dev);
65 const struct in6_addr *daddr, *nexthop;
66 struct ipv6hdr *hdr;
67 struct neighbour *neigh;
68 int ret;
69
70
71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
72 skb = skb_expand_head(skb, hh_len);
73 if (!skb) {
74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
75 return -ENOMEM;
76 }
77 }
78
79 hdr = ipv6_hdr(skb);
80 daddr = &hdr->daddr;
81 if (ipv6_addr_is_multicast(daddr)) {
82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
83 ((mroute6_is_socket(net, skb) &&
84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
87
88
89
90
91 if (newskb)
92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
93 net, sk, newskb, NULL, newskb->dev,
94 dev_loopback_xmit);
95
96 if (hdr->hop_limit == 0) {
97 IP6_INC_STATS(net, idev,
98 IPSTATS_MIB_OUTDISCARDS);
99 kfree_skb(skb);
100 return 0;
101 }
102 }
103
104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
106 !(dev->flags & IFF_LOOPBACK)) {
107 kfree_skb(skb);
108 return 0;
109 }
110 }
111
112 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
113 int res = lwtunnel_xmit(skb);
114
115 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
116 return res;
117 }
118
119 rcu_read_lock_bh();
120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
122
123 if (unlikely(IS_ERR_OR_NULL(neigh))) {
124 if (unlikely(!neigh))
125 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
126 if (IS_ERR(neigh)) {
127 rcu_read_unlock_bh();
128 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
129 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
130 return -EINVAL;
131 }
132 }
133 sock_confirm_neigh(skb, neigh);
134 ret = neigh_output(neigh, skb, false);
135 rcu_read_unlock_bh();
136 return ret;
137}
138
139static int
140ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
141 struct sk_buff *skb, unsigned int mtu)
142{
143 struct sk_buff *segs, *nskb;
144 netdev_features_t features;
145 int ret = 0;
146
147
148
149
150
151 features = netif_skb_features(skb);
152 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
153 if (IS_ERR_OR_NULL(segs)) {
154 kfree_skb(skb);
155 return -ENOMEM;
156 }
157
158 consume_skb(skb);
159
160 skb_list_walk_safe(segs, segs, nskb) {
161 int err;
162
163 skb_mark_not_on_list(segs);
164 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
165 if (err && ret == 0)
166 ret = err;
167 }
168
169 return ret;
170}
171
172static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
173{
174 unsigned int mtu;
175
176#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
177
178 if (skb_dst(skb)->xfrm) {
179 IP6CB(skb)->flags |= IP6SKB_REROUTED;
180 return dst_output(net, sk, skb);
181 }
182#endif
183
184 mtu = ip6_skb_dst_mtu(skb);
185 if (skb_is_gso(skb) &&
186 !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
187 !skb_gso_validate_network_len(skb, mtu))
188 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
189
190 if ((skb->len > mtu && !skb_is_gso(skb)) ||
191 dst_allfrag(skb_dst(skb)) ||
192 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
193 return ip6_fragment(net, sk, skb, ip6_finish_output2);
194 else
195 return ip6_finish_output2(net, sk, skb);
196}
197
198static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
199{
200 int ret;
201
202 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
203 switch (ret) {
204 case NET_XMIT_SUCCESS:
205 case NET_XMIT_CN:
206 return __ip6_finish_output(net, sk, skb) ? : ret;
207 default:
208 kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS);
209 return ret;
210 }
211}
212
213int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
214{
215 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
216 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
217
218 skb->protocol = htons(ETH_P_IPV6);
219 skb->dev = dev;
220
221 if (unlikely(idev->cnf.disable_ipv6)) {
222 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
223 kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
224 return 0;
225 }
226
227 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
228 net, sk, skb, indev, dev,
229 ip6_finish_output,
230 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
231}
232EXPORT_SYMBOL(ip6_output);
233
234bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
235{
236 if (!np->autoflowlabel_set)
237 return ip6_default_np_autolabel(net);
238 else
239 return np->autoflowlabel;
240}
241
242
243
244
245
246
247
248int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
249 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
250{
251 struct net *net = sock_net(sk);
252 const struct ipv6_pinfo *np = inet6_sk(sk);
253 struct in6_addr *first_hop = &fl6->daddr;
254 struct dst_entry *dst = skb_dst(skb);
255 struct net_device *dev = dst->dev;
256 struct inet6_dev *idev = ip6_dst_idev(dst);
257 struct hop_jumbo_hdr *hop_jumbo;
258 int hoplen = sizeof(*hop_jumbo);
259 unsigned int head_room;
260 struct ipv6hdr *hdr;
261 u8 proto = fl6->flowi6_proto;
262 int seg_len = skb->len;
263 int hlimit = -1;
264 u32 mtu;
265
266 head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev);
267 if (opt)
268 head_room += opt->opt_nflen + opt->opt_flen;
269
270 if (unlikely(head_room > skb_headroom(skb))) {
271 skb = skb_expand_head(skb, head_room);
272 if (!skb) {
273 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
274 return -ENOBUFS;
275 }
276 }
277
278 if (opt) {
279 seg_len += opt->opt_nflen + opt->opt_flen;
280
281 if (opt->opt_flen)
282 ipv6_push_frag_opts(skb, opt, &proto);
283
284 if (opt->opt_nflen)
285 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
286 &fl6->saddr);
287 }
288
289 if (unlikely(seg_len > IPV6_MAXPLEN)) {
290 hop_jumbo = skb_push(skb, hoplen);
291
292 hop_jumbo->nexthdr = proto;
293 hop_jumbo->hdrlen = 0;
294 hop_jumbo->tlv_type = IPV6_TLV_JUMBO;
295 hop_jumbo->tlv_len = 4;
296 hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen);
297
298 proto = IPPROTO_HOPOPTS;
299 seg_len = 0;
300 IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO;
301 }
302
303 skb_push(skb, sizeof(struct ipv6hdr));
304 skb_reset_network_header(skb);
305 hdr = ipv6_hdr(skb);
306
307
308
309
310 if (np)
311 hlimit = np->hop_limit;
312 if (hlimit < 0)
313 hlimit = ip6_dst_hoplimit(dst);
314
315 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
316 ip6_autoflowlabel(net, np), fl6));
317
318 hdr->payload_len = htons(seg_len);
319 hdr->nexthdr = proto;
320 hdr->hop_limit = hlimit;
321
322 hdr->saddr = fl6->saddr;
323 hdr->daddr = *first_hop;
324
325 skb->protocol = htons(ETH_P_IPV6);
326 skb->priority = priority;
327 skb->mark = mark;
328
329 mtu = dst_mtu(dst);
330 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
331 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
332
333
334
335
336 skb = l3mdev_ip6_out((struct sock *)sk, skb);
337 if (unlikely(!skb))
338 return 0;
339
340
341
342
343 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
344 net, (struct sock *)sk, skb, NULL, dev,
345 dst_output);
346 }
347
348 skb->dev = dev;
349
350
351
352 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
353
354 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
355 kfree_skb(skb);
356 return -EMSGSIZE;
357}
358EXPORT_SYMBOL(ip6_xmit);
359
360static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
361{
362 struct ip6_ra_chain *ra;
363 struct sock *last = NULL;
364
365 read_lock(&ip6_ra_lock);
366 for (ra = ip6_ra_chain; ra; ra = ra->next) {
367 struct sock *sk = ra->sk;
368 if (sk && ra->sel == sel &&
369 (!sk->sk_bound_dev_if ||
370 sk->sk_bound_dev_if == skb->dev->ifindex)) {
371 struct ipv6_pinfo *np = inet6_sk(sk);
372
373 if (np && np->rtalert_isolate &&
374 !net_eq(sock_net(sk), dev_net(skb->dev))) {
375 continue;
376 }
377 if (last) {
378 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
379 if (skb2)
380 rawv6_rcv(last, skb2);
381 }
382 last = sk;
383 }
384 }
385
386 if (last) {
387 rawv6_rcv(last, skb);
388 read_unlock(&ip6_ra_lock);
389 return 1;
390 }
391 read_unlock(&ip6_ra_lock);
392 return 0;
393}
394
395static int ip6_forward_proxy_check(struct sk_buff *skb)
396{
397 struct ipv6hdr *hdr = ipv6_hdr(skb);
398 u8 nexthdr = hdr->nexthdr;
399 __be16 frag_off;
400 int offset;
401
402 if (ipv6_ext_hdr(nexthdr)) {
403 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
404 if (offset < 0)
405 return 0;
406 } else
407 offset = sizeof(struct ipv6hdr);
408
409 if (nexthdr == IPPROTO_ICMPV6) {
410 struct icmp6hdr *icmp6;
411
412 if (!pskb_may_pull(skb, (skb_network_header(skb) +
413 offset + 1 - skb->data)))
414 return 0;
415
416 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
417
418 switch (icmp6->icmp6_type) {
419 case NDISC_ROUTER_SOLICITATION:
420 case NDISC_ROUTER_ADVERTISEMENT:
421 case NDISC_NEIGHBOUR_SOLICITATION:
422 case NDISC_NEIGHBOUR_ADVERTISEMENT:
423 case NDISC_REDIRECT:
424
425
426
427
428 return 1;
429 default:
430 break;
431 }
432 }
433
434
435
436
437
438
439 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
440 dst_link_failure(skb);
441 return -1;
442 }
443
444 return 0;
445}
446
447static inline int ip6_forward_finish(struct net *net, struct sock *sk,
448 struct sk_buff *skb)
449{
450 struct dst_entry *dst = skb_dst(skb);
451
452 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
453 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
454
455#ifdef CONFIG_NET_SWITCHDEV
456 if (skb->offload_l3_fwd_mark) {
457 consume_skb(skb);
458 return 0;
459 }
460#endif
461
462 skb_clear_tstamp(skb);
463 return dst_output(net, sk, skb);
464}
465
466static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
467{
468 if (skb->len <= mtu)
469 return false;
470
471
472 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
473 return true;
474
475 if (skb->ignore_df)
476 return false;
477
478 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
479 return false;
480
481 return true;
482}
483
484int ip6_forward(struct sk_buff *skb)
485{
486 struct dst_entry *dst = skb_dst(skb);
487 struct ipv6hdr *hdr = ipv6_hdr(skb);
488 struct inet6_skb_parm *opt = IP6CB(skb);
489 struct net *net = dev_net(dst->dev);
490 struct inet6_dev *idev;
491 SKB_DR(reason);
492 u32 mtu;
493
494 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
495 if (net->ipv6.devconf_all->forwarding == 0)
496 goto error;
497
498 if (skb->pkt_type != PACKET_HOST)
499 goto drop;
500
501 if (unlikely(skb->sk))
502 goto drop;
503
504 if (skb_warn_if_lro(skb))
505 goto drop;
506
507 if (!net->ipv6.devconf_all->disable_policy &&
508 (!idev || !idev->cnf.disable_policy) &&
509 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
510 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
511 goto drop;
512 }
513
514 skb_forward_csum(skb);
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
530 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
531 return 0;
532 }
533
534
535
536
537 if (hdr->hop_limit <= 1) {
538 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
539 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
540
541 kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR);
542 return -ETIMEDOUT;
543 }
544
545
546 if (net->ipv6.devconf_all->proxy_ndp &&
547 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
548 int proxied = ip6_forward_proxy_check(skb);
549 if (proxied > 0) {
550 hdr->hop_limit--;
551 return ip6_input(skb);
552 } else if (proxied < 0) {
553 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
554 goto drop;
555 }
556 }
557
558 if (!xfrm6_route_forward(skb)) {
559 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
560 SKB_DR_SET(reason, XFRM_POLICY);
561 goto drop;
562 }
563 dst = skb_dst(skb);
564
565
566
567
568
569 if (IP6CB(skb)->iif == dst->dev->ifindex &&
570 opt->srcrt == 0 && !skb_sec_path(skb)) {
571 struct in6_addr *target = NULL;
572 struct inet_peer *peer;
573 struct rt6_info *rt;
574
575
576
577
578
579
580 rt = (struct rt6_info *) dst;
581 if (rt->rt6i_flags & RTF_GATEWAY)
582 target = &rt->rt6i_gateway;
583 else
584 target = &hdr->daddr;
585
586 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
587
588
589
590
591 if (inet_peer_xrlim_allow(peer, 1*HZ))
592 ndisc_send_redirect(skb, target);
593 if (peer)
594 inet_putpeer(peer);
595 } else {
596 int addrtype = ipv6_addr_type(&hdr->saddr);
597
598
599 if (addrtype == IPV6_ADDR_ANY ||
600 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
601 goto error;
602 if (addrtype & IPV6_ADDR_LINKLOCAL) {
603 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
604 ICMPV6_NOT_NEIGHBOUR, 0);
605 goto error;
606 }
607 }
608
609 mtu = ip6_dst_mtu_maybe_forward(dst, true);
610 if (mtu < IPV6_MIN_MTU)
611 mtu = IPV6_MIN_MTU;
612
613 if (ip6_pkt_too_big(skb, mtu)) {
614
615 skb->dev = dst->dev;
616 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
617 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
618 __IP6_INC_STATS(net, ip6_dst_idev(dst),
619 IPSTATS_MIB_FRAGFAILS);
620 kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
621 return -EMSGSIZE;
622 }
623
624 if (skb_cow(skb, dst->dev->hard_header_len)) {
625 __IP6_INC_STATS(net, ip6_dst_idev(dst),
626 IPSTATS_MIB_OUTDISCARDS);
627 goto drop;
628 }
629
630 hdr = ipv6_hdr(skb);
631
632
633
634 hdr->hop_limit--;
635
636 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
637 net, NULL, skb, skb->dev, dst->dev,
638 ip6_forward_finish);
639
640error:
641 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
642 SKB_DR_SET(reason, IP_INADDRERRORS);
643drop:
644 kfree_skb_reason(skb, reason);
645 return -EINVAL;
646}
647
648static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
649{
650 to->pkt_type = from->pkt_type;
651 to->priority = from->priority;
652 to->protocol = from->protocol;
653 skb_dst_drop(to);
654 skb_dst_set(to, dst_clone(skb_dst(from)));
655 to->dev = from->dev;
656 to->mark = from->mark;
657
658 skb_copy_hash(to, from);
659
660#ifdef CONFIG_NET_SCHED
661 to->tc_index = from->tc_index;
662#endif
663 nf_copy(to, from);
664 skb_ext_copy(to, from);
665 skb_copy_secmark(to, from);
666}
667
668int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
669 u8 nexthdr, __be32 frag_id,
670 struct ip6_fraglist_iter *iter)
671{
672 unsigned int first_len;
673 struct frag_hdr *fh;
674
675
676 *prevhdr = NEXTHDR_FRAGMENT;
677 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
678 if (!iter->tmp_hdr)
679 return -ENOMEM;
680
681 iter->frag = skb_shinfo(skb)->frag_list;
682 skb_frag_list_init(skb);
683
684 iter->offset = 0;
685 iter->hlen = hlen;
686 iter->frag_id = frag_id;
687 iter->nexthdr = nexthdr;
688
689 __skb_pull(skb, hlen);
690 fh = __skb_push(skb, sizeof(struct frag_hdr));
691 __skb_push(skb, hlen);
692 skb_reset_network_header(skb);
693 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
694
695 fh->nexthdr = nexthdr;
696 fh->reserved = 0;
697 fh->frag_off = htons(IP6_MF);
698 fh->identification = frag_id;
699
700 first_len = skb_pagelen(skb);
701 skb->data_len = first_len - skb_headlen(skb);
702 skb->len = first_len;
703 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
704
705 return 0;
706}
707EXPORT_SYMBOL(ip6_fraglist_init);
708
709void ip6_fraglist_prepare(struct sk_buff *skb,
710 struct ip6_fraglist_iter *iter)
711{
712 struct sk_buff *frag = iter->frag;
713 unsigned int hlen = iter->hlen;
714 struct frag_hdr *fh;
715
716 frag->ip_summed = CHECKSUM_NONE;
717 skb_reset_transport_header(frag);
718 fh = __skb_push(frag, sizeof(struct frag_hdr));
719 __skb_push(frag, hlen);
720 skb_reset_network_header(frag);
721 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
722 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
723 fh->nexthdr = iter->nexthdr;
724 fh->reserved = 0;
725 fh->frag_off = htons(iter->offset);
726 if (frag->next)
727 fh->frag_off |= htons(IP6_MF);
728 fh->identification = iter->frag_id;
729 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
730 ip6_copy_metadata(frag, skb);
731}
732EXPORT_SYMBOL(ip6_fraglist_prepare);
733
734void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
735 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
736 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
737{
738 state->prevhdr = prevhdr;
739 state->nexthdr = nexthdr;
740 state->frag_id = frag_id;
741
742 state->hlen = hlen;
743 state->mtu = mtu;
744
745 state->left = skb->len - hlen;
746 state->ptr = hlen;
747
748 state->hroom = hdr_room;
749 state->troom = needed_tailroom;
750
751 state->offset = 0;
752}
753EXPORT_SYMBOL(ip6_frag_init);
754
755struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
756{
757 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
758 struct sk_buff *frag;
759 struct frag_hdr *fh;
760 unsigned int len;
761
762 len = state->left;
763
764 if (len > state->mtu)
765 len = state->mtu;
766
767
768 if (len < state->left)
769 len &= ~7;
770
771
772 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
773 state->hroom + state->troom, GFP_ATOMIC);
774 if (!frag)
775 return ERR_PTR(-ENOMEM);
776
777
778
779
780
781 ip6_copy_metadata(frag, skb);
782 skb_reserve(frag, state->hroom);
783 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
784 skb_reset_network_header(frag);
785 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
786 frag->transport_header = (frag->network_header + state->hlen +
787 sizeof(struct frag_hdr));
788
789
790
791
792
793 if (skb->sk)
794 skb_set_owner_w(frag, skb->sk);
795
796
797
798
799 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
800
801 fragnexthdr_offset = skb_network_header(frag);
802 fragnexthdr_offset += prevhdr - skb_network_header(skb);
803 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
804
805
806
807
808 fh->nexthdr = state->nexthdr;
809 fh->reserved = 0;
810 fh->identification = state->frag_id;
811
812
813
814
815 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
816 len));
817 state->left -= len;
818
819 fh->frag_off = htons(state->offset);
820 if (state->left > 0)
821 fh->frag_off |= htons(IP6_MF);
822 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
823
824 state->ptr += len;
825 state->offset += len;
826
827 return frag;
828}
829EXPORT_SYMBOL(ip6_frag_next);
830
831int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
832 int (*output)(struct net *, struct sock *, struct sk_buff *))
833{
834 struct sk_buff *frag;
835 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
836 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
837 inet6_sk(skb->sk) : NULL;
838 bool mono_delivery_time = skb->mono_delivery_time;
839 struct ip6_frag_state state;
840 unsigned int mtu, hlen, nexthdr_offset;
841 ktime_t tstamp = skb->tstamp;
842 int hroom, err = 0;
843 __be32 frag_id;
844 u8 *prevhdr, nexthdr = 0;
845
846 err = ip6_find_1stfragopt(skb, &prevhdr);
847 if (err < 0)
848 goto fail;
849 hlen = err;
850 nexthdr = *prevhdr;
851 nexthdr_offset = prevhdr - skb_network_header(skb);
852
853 mtu = ip6_skb_dst_mtu(skb);
854
855
856
857
858 if (unlikely(!skb->ignore_df && skb->len > mtu))
859 goto fail_toobig;
860
861 if (IP6CB(skb)->frag_max_size) {
862 if (IP6CB(skb)->frag_max_size > mtu)
863 goto fail_toobig;
864
865
866 mtu = IP6CB(skb)->frag_max_size;
867 if (mtu < IPV6_MIN_MTU)
868 mtu = IPV6_MIN_MTU;
869 }
870
871 if (np && np->frag_size < mtu) {
872 if (np->frag_size)
873 mtu = np->frag_size;
874 }
875 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
876 goto fail_toobig;
877 mtu -= hlen + sizeof(struct frag_hdr);
878
879 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
880 &ipv6_hdr(skb)->saddr);
881
882 if (skb->ip_summed == CHECKSUM_PARTIAL &&
883 (err = skb_checksum_help(skb)))
884 goto fail;
885
886 prevhdr = skb_network_header(skb) + nexthdr_offset;
887 hroom = LL_RESERVED_SPACE(rt->dst.dev);
888 if (skb_has_frag_list(skb)) {
889 unsigned int first_len = skb_pagelen(skb);
890 struct ip6_fraglist_iter iter;
891 struct sk_buff *frag2;
892
893 if (first_len - hlen > mtu ||
894 ((first_len - hlen) & 7) ||
895 skb_cloned(skb) ||
896 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
897 goto slow_path;
898
899 skb_walk_frags(skb, frag) {
900
901 if (frag->len > mtu ||
902 ((frag->len & 7) && frag->next) ||
903 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
904 goto slow_path_clean;
905
906
907 if (skb_shared(frag))
908 goto slow_path_clean;
909
910 BUG_ON(frag->sk);
911 if (skb->sk) {
912 frag->sk = skb->sk;
913 frag->destructor = sock_wfree;
914 }
915 skb->truesize -= frag->truesize;
916 }
917
918 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
919 &iter);
920 if (err < 0)
921 goto fail;
922
923 for (;;) {
924
925
926 if (iter.frag)
927 ip6_fraglist_prepare(skb, &iter);
928
929 skb_set_delivery_time(skb, tstamp, mono_delivery_time);
930 err = output(net, sk, skb);
931 if (!err)
932 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
933 IPSTATS_MIB_FRAGCREATES);
934
935 if (err || !iter.frag)
936 break;
937
938 skb = ip6_fraglist_next(&iter);
939 }
940
941 kfree(iter.tmp_hdr);
942
943 if (err == 0) {
944 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
945 IPSTATS_MIB_FRAGOKS);
946 return 0;
947 }
948
949 kfree_skb_list(iter.frag);
950
951 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
952 IPSTATS_MIB_FRAGFAILS);
953 return err;
954
955slow_path_clean:
956 skb_walk_frags(skb, frag2) {
957 if (frag2 == frag)
958 break;
959 frag2->sk = NULL;
960 frag2->destructor = NULL;
961 skb->truesize += frag2->truesize;
962 }
963 }
964
965slow_path:
966
967
968
969
970 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
971 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
972 &state);
973
974
975
976
977
978 while (state.left > 0) {
979 frag = ip6_frag_next(skb, &state);
980 if (IS_ERR(frag)) {
981 err = PTR_ERR(frag);
982 goto fail;
983 }
984
985
986
987
988 skb_set_delivery_time(frag, tstamp, mono_delivery_time);
989 err = output(net, sk, frag);
990 if (err)
991 goto fail;
992
993 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
994 IPSTATS_MIB_FRAGCREATES);
995 }
996 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
997 IPSTATS_MIB_FRAGOKS);
998 consume_skb(skb);
999 return err;
1000
1001fail_toobig:
1002 if (skb->sk && dst_allfrag(skb_dst(skb)))
1003 sk_gso_disable(skb->sk);
1004
1005 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1006 err = -EMSGSIZE;
1007
1008fail:
1009 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1010 IPSTATS_MIB_FRAGFAILS);
1011 kfree_skb(skb);
1012 return err;
1013}
1014
1015static inline int ip6_rt_check(const struct rt6key *rt_key,
1016 const struct in6_addr *fl_addr,
1017 const struct in6_addr *addr_cache)
1018{
1019 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1020 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1021}
1022
1023static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1024 struct dst_entry *dst,
1025 const struct flowi6 *fl6)
1026{
1027 struct ipv6_pinfo *np = inet6_sk(sk);
1028 struct rt6_info *rt;
1029
1030 if (!dst)
1031 goto out;
1032
1033 if (dst->ops->family != AF_INET6) {
1034 dst_release(dst);
1035 return NULL;
1036 }
1037
1038 rt = (struct rt6_info *)dst;
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1057#ifdef CONFIG_IPV6_SUBTREES
1058 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1059#endif
1060 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
1061 dst_release(dst);
1062 dst = NULL;
1063 }
1064
1065out:
1066 return dst;
1067}
1068
1069static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1070 struct dst_entry **dst, struct flowi6 *fl6)
1071{
1072#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1073 struct neighbour *n;
1074 struct rt6_info *rt;
1075#endif
1076 int err;
1077 int flags = 0;
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088 if (ipv6_addr_any(&fl6->saddr)) {
1089 struct fib6_info *from;
1090 struct rt6_info *rt;
1091
1092 *dst = ip6_route_output(net, sk, fl6);
1093 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1094
1095 rcu_read_lock();
1096 from = rt ? rcu_dereference(rt->from) : NULL;
1097 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1098 sk ? inet6_sk(sk)->srcprefs : 0,
1099 &fl6->saddr);
1100 rcu_read_unlock();
1101
1102 if (err)
1103 goto out_err_release;
1104
1105
1106
1107
1108
1109 if ((*dst)->error) {
1110 dst_release(*dst);
1111 *dst = NULL;
1112 }
1113
1114 if (fl6->flowi6_oif)
1115 flags |= RT6_LOOKUP_F_IFACE;
1116 }
1117
1118 if (!*dst)
1119 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1120
1121 err = (*dst)->error;
1122 if (err)
1123 goto out_err_release;
1124
1125#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1126
1127
1128
1129
1130
1131
1132
1133
1134 rt = (struct rt6_info *) *dst;
1135 rcu_read_lock_bh();
1136 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1137 rt6_nexthop(rt, &fl6->daddr));
1138 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1139 rcu_read_unlock_bh();
1140
1141 if (err) {
1142 struct inet6_ifaddr *ifp;
1143 struct flowi6 fl_gw6;
1144 int redirect;
1145
1146 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1147 (*dst)->dev, 1);
1148
1149 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1150 if (ifp)
1151 in6_ifa_put(ifp);
1152
1153 if (redirect) {
1154
1155
1156
1157
1158 dst_release(*dst);
1159 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1160 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1161 *dst = ip6_route_output(net, sk, &fl_gw6);
1162 err = (*dst)->error;
1163 if (err)
1164 goto out_err_release;
1165 }
1166 }
1167#endif
1168 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1169 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1170 err = -EAFNOSUPPORT;
1171 goto out_err_release;
1172 }
1173
1174 return 0;
1175
1176out_err_release:
1177 dst_release(*dst);
1178 *dst = NULL;
1179
1180 if (err == -ENETUNREACH)
1181 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1182 return err;
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1197 struct flowi6 *fl6)
1198{
1199 *dst = NULL;
1200 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1201}
1202EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1217 const struct in6_addr *final_dst)
1218{
1219 struct dst_entry *dst = NULL;
1220 int err;
1221
1222 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1223 if (err)
1224 return ERR_PTR(err);
1225 if (final_dst)
1226 fl6->daddr = *final_dst;
1227
1228 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1229}
1230EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1251 const struct in6_addr *final_dst,
1252 bool connected)
1253{
1254 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1255
1256 dst = ip6_sk_dst_check(sk, dst, fl6);
1257 if (dst)
1258 return dst;
1259
1260 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1261 if (connected && !IS_ERR(dst))
1262 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1263
1264 return dst;
1265}
1266EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1285 struct net_device *dev,
1286 struct net *net,
1287 struct socket *sock,
1288 struct in6_addr *saddr,
1289 const struct ip_tunnel_info *info,
1290 u8 protocol,
1291 bool use_cache)
1292{
1293 struct dst_entry *dst = NULL;
1294#ifdef CONFIG_DST_CACHE
1295 struct dst_cache *dst_cache;
1296#endif
1297 struct flowi6 fl6;
1298 __u8 prio;
1299
1300#ifdef CONFIG_DST_CACHE
1301 dst_cache = (struct dst_cache *)&info->dst_cache;
1302 if (use_cache) {
1303 dst = dst_cache_get_ip6(dst_cache, saddr);
1304 if (dst)
1305 return dst;
1306 }
1307#endif
1308 memset(&fl6, 0, sizeof(fl6));
1309 fl6.flowi6_mark = skb->mark;
1310 fl6.flowi6_proto = protocol;
1311 fl6.daddr = info->key.u.ipv6.dst;
1312 fl6.saddr = info->key.u.ipv6.src;
1313 prio = info->key.tos;
1314 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1315 info->key.label);
1316
1317 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1318 NULL);
1319 if (IS_ERR(dst)) {
1320 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1321 return ERR_PTR(-ENETUNREACH);
1322 }
1323 if (dst->dev == dev) {
1324 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1325 dst_release(dst);
1326 return ERR_PTR(-ELOOP);
1327 }
1328#ifdef CONFIG_DST_CACHE
1329 if (use_cache)
1330 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1331#endif
1332 *saddr = fl6.saddr;
1333 return dst;
1334}
1335EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1336
1337static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1338 gfp_t gfp)
1339{
1340 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1341}
1342
1343static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1344 gfp_t gfp)
1345{
1346 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1347}
1348
1349static void ip6_append_data_mtu(unsigned int *mtu,
1350 int *maxfraglen,
1351 unsigned int fragheaderlen,
1352 struct sk_buff *skb,
1353 struct rt6_info *rt,
1354 unsigned int orig_mtu)
1355{
1356 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1357 if (!skb) {
1358
1359 *mtu = orig_mtu - rt->dst.header_len;
1360
1361 } else {
1362
1363
1364
1365
1366 *mtu = orig_mtu;
1367 }
1368 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1369 + fragheaderlen - sizeof(struct frag_hdr);
1370 }
1371}
1372
1373static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1374 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1375 struct rt6_info *rt)
1376{
1377 struct ipv6_pinfo *np = inet6_sk(sk);
1378 unsigned int mtu;
1379 struct ipv6_txoptions *nopt, *opt = ipc6->opt;
1380
1381
1382
1383
1384 cork->base.dst = &rt->dst;
1385
1386
1387
1388
1389 if (opt) {
1390 if (WARN_ON(v6_cork->opt))
1391 return -EINVAL;
1392
1393 nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1394 if (unlikely(!nopt))
1395 return -ENOBUFS;
1396
1397 nopt->tot_len = sizeof(*opt);
1398 nopt->opt_flen = opt->opt_flen;
1399 nopt->opt_nflen = opt->opt_nflen;
1400
1401 nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation);
1402 if (opt->dst0opt && !nopt->dst0opt)
1403 return -ENOBUFS;
1404
1405 nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation);
1406 if (opt->dst1opt && !nopt->dst1opt)
1407 return -ENOBUFS;
1408
1409 nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation);
1410 if (opt->hopopt && !nopt->hopopt)
1411 return -ENOBUFS;
1412
1413 nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation);
1414 if (opt->srcrt && !nopt->srcrt)
1415 return -ENOBUFS;
1416
1417
1418 }
1419 v6_cork->hop_limit = ipc6->hlimit;
1420 v6_cork->tclass = ipc6->tclass;
1421 if (rt->dst.flags & DST_XFRM_TUNNEL)
1422 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1423 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1424 else
1425 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1426 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1427 if (np->frag_size < mtu) {
1428 if (np->frag_size)
1429 mtu = np->frag_size;
1430 }
1431 cork->base.fragsize = mtu;
1432 cork->base.gso_size = ipc6->gso_size;
1433 cork->base.tx_flags = 0;
1434 cork->base.mark = ipc6->sockc.mark;
1435 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1436
1437 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1438 cork->base.flags |= IPCORK_ALLFRAG;
1439 cork->base.length = 0;
1440
1441 cork->base.transmit_time = ipc6->sockc.transmit_time;
1442
1443 return 0;
1444}
1445
1446static int __ip6_append_data(struct sock *sk,
1447 struct sk_buff_head *queue,
1448 struct inet_cork_full *cork_full,
1449 struct inet6_cork *v6_cork,
1450 struct page_frag *pfrag,
1451 int getfrag(void *from, char *to, int offset,
1452 int len, int odd, struct sk_buff *skb),
1453 void *from, size_t length, int transhdrlen,
1454 unsigned int flags, struct ipcm6_cookie *ipc6)
1455{
1456 struct sk_buff *skb, *skb_prev = NULL;
1457 struct inet_cork *cork = &cork_full->base;
1458 struct flowi6 *fl6 = &cork_full->fl.u.ip6;
1459 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1460 struct ubuf_info *uarg = NULL;
1461 int exthdrlen = 0;
1462 int dst_exthdrlen = 0;
1463 int hh_len;
1464 int copy;
1465 int err;
1466 int offset = 0;
1467 u32 tskey = 0;
1468 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1469 struct ipv6_txoptions *opt = v6_cork->opt;
1470 int csummode = CHECKSUM_NONE;
1471 unsigned int maxnonfragsize, headersize;
1472 unsigned int wmem_alloc_delta = 0;
1473 bool paged, extra_uref = false;
1474
1475 skb = skb_peek_tail(queue);
1476 if (!skb) {
1477 exthdrlen = opt ? opt->opt_flen : 0;
1478 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1479 }
1480
1481 paged = !!cork->gso_size;
1482 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1483 orig_mtu = mtu;
1484
1485 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1486 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1487 tskey = atomic_inc_return(&sk->sk_tskey) - 1;
1488
1489 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1490
1491 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1492 (opt ? opt->opt_nflen : 0);
1493
1494 headersize = sizeof(struct ipv6hdr) +
1495 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1496 (dst_allfrag(&rt->dst) ?
1497 sizeof(struct frag_hdr) : 0) +
1498 rt->rt6i_nfheader_len;
1499
1500 if (mtu <= fragheaderlen ||
1501 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1502 goto emsgsize;
1503
1504 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1505 sizeof(struct frag_hdr);
1506
1507
1508
1509
1510 if (headersize + transhdrlen > mtu)
1511 goto emsgsize;
1512
1513 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1514 (sk->sk_protocol == IPPROTO_UDP ||
1515 sk->sk_protocol == IPPROTO_ICMPV6 ||
1516 sk->sk_protocol == IPPROTO_RAW)) {
1517 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1518 sizeof(struct ipv6hdr));
1519 goto emsgsize;
1520 }
1521
1522 if (ip6_sk_ignore_df(sk))
1523 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1524 else
1525 maxnonfragsize = mtu;
1526
1527 if (cork->length + length > maxnonfragsize - headersize) {
1528emsgsize:
1529 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1530 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1531 return -EMSGSIZE;
1532 }
1533
1534
1535
1536
1537 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1538 headersize == sizeof(struct ipv6hdr) &&
1539 length <= mtu - headersize &&
1540 (!(flags & MSG_MORE) || cork->gso_size) &&
1541 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1542 csummode = CHECKSUM_PARTIAL;
1543
1544 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1545 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1546 if (!uarg)
1547 return -ENOBUFS;
1548 extra_uref = !skb_zcopy(skb);
1549 if (rt->dst.dev->features & NETIF_F_SG &&
1550 csummode == CHECKSUM_PARTIAL) {
1551 paged = true;
1552 } else {
1553 uarg->zerocopy = 0;
1554 skb_zcopy_set(skb, uarg, &extra_uref);
1555 }
1556 }
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574 cork->length += length;
1575 if (!skb)
1576 goto alloc_new_skb;
1577
1578 while (length > 0) {
1579
1580 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1581 if (copy < length)
1582 copy = maxfraglen - skb->len;
1583
1584 if (copy <= 0) {
1585 char *data;
1586 unsigned int datalen;
1587 unsigned int fraglen;
1588 unsigned int fraggap;
1589 unsigned int alloclen, alloc_extra;
1590 unsigned int pagedlen;
1591alloc_new_skb:
1592
1593 if (skb)
1594 fraggap = skb->len - maxfraglen;
1595 else
1596 fraggap = 0;
1597
1598 if (!skb || !skb_prev)
1599 ip6_append_data_mtu(&mtu, &maxfraglen,
1600 fragheaderlen, skb, rt,
1601 orig_mtu);
1602
1603 skb_prev = skb;
1604
1605
1606
1607
1608
1609 datalen = length + fraggap;
1610
1611 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1612 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1613 fraglen = datalen + fragheaderlen;
1614 pagedlen = 0;
1615
1616 alloc_extra = hh_len;
1617 alloc_extra += dst_exthdrlen;
1618 alloc_extra += rt->dst.trailer_len;
1619
1620
1621
1622
1623
1624 alloc_extra += sizeof(struct frag_hdr);
1625
1626 if ((flags & MSG_MORE) &&
1627 !(rt->dst.dev->features&NETIF_F_SG))
1628 alloclen = mtu;
1629 else if (!paged &&
1630 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1631 !(rt->dst.dev->features & NETIF_F_SG)))
1632 alloclen = fraglen;
1633 else {
1634 alloclen = min_t(int, fraglen, MAX_HEADER);
1635 pagedlen = fraglen - alloclen;
1636 }
1637 alloclen += alloc_extra;
1638
1639 if (datalen != length + fraggap) {
1640
1641
1642
1643
1644 datalen += rt->dst.trailer_len;
1645 }
1646
1647 fraglen = datalen + fragheaderlen;
1648
1649 copy = datalen - transhdrlen - fraggap - pagedlen;
1650 if (copy < 0) {
1651 err = -EINVAL;
1652 goto error;
1653 }
1654 if (transhdrlen) {
1655 skb = sock_alloc_send_skb(sk, alloclen,
1656 (flags & MSG_DONTWAIT), &err);
1657 } else {
1658 skb = NULL;
1659 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1660 2 * sk->sk_sndbuf)
1661 skb = alloc_skb(alloclen,
1662 sk->sk_allocation);
1663 if (unlikely(!skb))
1664 err = -ENOBUFS;
1665 }
1666 if (!skb)
1667 goto error;
1668
1669
1670
1671 skb->protocol = htons(ETH_P_IPV6);
1672 skb->ip_summed = csummode;
1673 skb->csum = 0;
1674
1675 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1676 dst_exthdrlen);
1677
1678
1679
1680
1681 data = skb_put(skb, fraglen - pagedlen);
1682 skb_set_network_header(skb, exthdrlen);
1683 data += fragheaderlen;
1684 skb->transport_header = (skb->network_header +
1685 fragheaderlen);
1686 if (fraggap) {
1687 skb->csum = skb_copy_and_csum_bits(
1688 skb_prev, maxfraglen,
1689 data + transhdrlen, fraggap);
1690 skb_prev->csum = csum_sub(skb_prev->csum,
1691 skb->csum);
1692 data += fraggap;
1693 pskb_trim_unique(skb_prev, maxfraglen);
1694 }
1695 if (copy > 0 &&
1696 getfrag(from, data + transhdrlen, offset,
1697 copy, fraggap, skb) < 0) {
1698 err = -EFAULT;
1699 kfree_skb(skb);
1700 goto error;
1701 }
1702
1703 offset += copy;
1704 length -= copy + transhdrlen;
1705 transhdrlen = 0;
1706 exthdrlen = 0;
1707 dst_exthdrlen = 0;
1708
1709
1710 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1711 cork->tx_flags = 0;
1712 skb_shinfo(skb)->tskey = tskey;
1713 tskey = 0;
1714 skb_zcopy_set(skb, uarg, &extra_uref);
1715
1716 if ((flags & MSG_CONFIRM) && !skb_prev)
1717 skb_set_dst_pending_confirm(skb, 1);
1718
1719
1720
1721
1722 if (!skb->destructor) {
1723 skb->destructor = sock_wfree;
1724 skb->sk = sk;
1725 wmem_alloc_delta += skb->truesize;
1726 }
1727 __skb_queue_tail(queue, skb);
1728 continue;
1729 }
1730
1731 if (copy > length)
1732 copy = length;
1733
1734 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1735 skb_tailroom(skb) >= copy) {
1736 unsigned int off;
1737
1738 off = skb->len;
1739 if (getfrag(from, skb_put(skb, copy),
1740 offset, copy, off, skb) < 0) {
1741 __skb_trim(skb, off);
1742 err = -EFAULT;
1743 goto error;
1744 }
1745 } else if (!uarg || !uarg->zerocopy) {
1746 int i = skb_shinfo(skb)->nr_frags;
1747
1748 err = -ENOMEM;
1749 if (!sk_page_frag_refill(sk, pfrag))
1750 goto error;
1751
1752 if (!skb_can_coalesce(skb, i, pfrag->page,
1753 pfrag->offset)) {
1754 err = -EMSGSIZE;
1755 if (i == MAX_SKB_FRAGS)
1756 goto error;
1757
1758 __skb_fill_page_desc(skb, i, pfrag->page,
1759 pfrag->offset, 0);
1760 skb_shinfo(skb)->nr_frags = ++i;
1761 get_page(pfrag->page);
1762 }
1763 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1764 if (getfrag(from,
1765 page_address(pfrag->page) + pfrag->offset,
1766 offset, copy, skb->len, skb) < 0)
1767 goto error_efault;
1768
1769 pfrag->offset += copy;
1770 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1771 skb->len += copy;
1772 skb->data_len += copy;
1773 skb->truesize += copy;
1774 wmem_alloc_delta += copy;
1775 } else {
1776 err = skb_zerocopy_iter_dgram(skb, from, copy);
1777 if (err < 0)
1778 goto error;
1779 }
1780 offset += copy;
1781 length -= copy;
1782 }
1783
1784 if (wmem_alloc_delta)
1785 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1786 return 0;
1787
1788error_efault:
1789 err = -EFAULT;
1790error:
1791 net_zcopy_put_abort(uarg, extra_uref);
1792 cork->length -= length;
1793 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1794 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1795 return err;
1796}
1797
1798int ip6_append_data(struct sock *sk,
1799 int getfrag(void *from, char *to, int offset, int len,
1800 int odd, struct sk_buff *skb),
1801 void *from, size_t length, int transhdrlen,
1802 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1803 struct rt6_info *rt, unsigned int flags)
1804{
1805 struct inet_sock *inet = inet_sk(sk);
1806 struct ipv6_pinfo *np = inet6_sk(sk);
1807 int exthdrlen;
1808 int err;
1809
1810 if (flags&MSG_PROBE)
1811 return 0;
1812 if (skb_queue_empty(&sk->sk_write_queue)) {
1813
1814
1815
1816 dst_hold(&rt->dst);
1817 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1818 ipc6, rt);
1819 if (err)
1820 return err;
1821
1822 inet->cork.fl.u.ip6 = *fl6;
1823 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1824 length += exthdrlen;
1825 transhdrlen += exthdrlen;
1826 } else {
1827 transhdrlen = 0;
1828 }
1829
1830 return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
1831 &np->cork, sk_page_frag(sk), getfrag,
1832 from, length, transhdrlen, flags, ipc6);
1833}
1834EXPORT_SYMBOL_GPL(ip6_append_data);
1835
1836static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
1837{
1838 struct dst_entry *dst = cork->base.dst;
1839
1840 cork->base.dst = NULL;
1841 cork->base.flags &= ~IPCORK_ALLFRAG;
1842 skb_dst_set(skb, dst);
1843}
1844
1845static void ip6_cork_release(struct inet_cork_full *cork,
1846 struct inet6_cork *v6_cork)
1847{
1848 if (v6_cork->opt) {
1849 struct ipv6_txoptions *opt = v6_cork->opt;
1850
1851 kfree(opt->dst0opt);
1852 kfree(opt->dst1opt);
1853 kfree(opt->hopopt);
1854 kfree(opt->srcrt);
1855 kfree(opt);
1856 v6_cork->opt = NULL;
1857 }
1858
1859 if (cork->base.dst) {
1860 dst_release(cork->base.dst);
1861 cork->base.dst = NULL;
1862 cork->base.flags &= ~IPCORK_ALLFRAG;
1863 }
1864}
1865
1866struct sk_buff *__ip6_make_skb(struct sock *sk,
1867 struct sk_buff_head *queue,
1868 struct inet_cork_full *cork,
1869 struct inet6_cork *v6_cork)
1870{
1871 struct sk_buff *skb, *tmp_skb;
1872 struct sk_buff **tail_skb;
1873 struct in6_addr *final_dst;
1874 struct ipv6_pinfo *np = inet6_sk(sk);
1875 struct net *net = sock_net(sk);
1876 struct ipv6hdr *hdr;
1877 struct ipv6_txoptions *opt = v6_cork->opt;
1878 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1879 struct flowi6 *fl6 = &cork->fl.u.ip6;
1880 unsigned char proto = fl6->flowi6_proto;
1881
1882 skb = __skb_dequeue(queue);
1883 if (!skb)
1884 goto out;
1885 tail_skb = &(skb_shinfo(skb)->frag_list);
1886
1887
1888 if (skb->data < skb_network_header(skb))
1889 __skb_pull(skb, skb_network_offset(skb));
1890 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1891 __skb_pull(tmp_skb, skb_network_header_len(skb));
1892 *tail_skb = tmp_skb;
1893 tail_skb = &(tmp_skb->next);
1894 skb->len += tmp_skb->len;
1895 skb->data_len += tmp_skb->len;
1896 skb->truesize += tmp_skb->truesize;
1897 tmp_skb->destructor = NULL;
1898 tmp_skb->sk = NULL;
1899 }
1900
1901
1902 skb->ignore_df = ip6_sk_ignore_df(sk);
1903 __skb_pull(skb, skb_network_header_len(skb));
1904
1905 final_dst = &fl6->daddr;
1906 if (opt && opt->opt_flen)
1907 ipv6_push_frag_opts(skb, opt, &proto);
1908 if (opt && opt->opt_nflen)
1909 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1910
1911 skb_push(skb, sizeof(struct ipv6hdr));
1912 skb_reset_network_header(skb);
1913 hdr = ipv6_hdr(skb);
1914
1915 ip6_flow_hdr(hdr, v6_cork->tclass,
1916 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1917 ip6_autoflowlabel(net, np), fl6));
1918 hdr->hop_limit = v6_cork->hop_limit;
1919 hdr->nexthdr = proto;
1920 hdr->saddr = fl6->saddr;
1921 hdr->daddr = *final_dst;
1922
1923 skb->priority = sk->sk_priority;
1924 skb->mark = cork->base.mark;
1925 skb->tstamp = cork->base.transmit_time;
1926
1927 ip6_cork_steal_dst(skb, cork);
1928 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1929 if (proto == IPPROTO_ICMPV6) {
1930 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1931
1932 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1933 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1934 }
1935
1936 ip6_cork_release(cork, v6_cork);
1937out:
1938 return skb;
1939}
1940
1941int ip6_send_skb(struct sk_buff *skb)
1942{
1943 struct net *net = sock_net(skb->sk);
1944 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1945 int err;
1946
1947 err = ip6_local_out(net, skb->sk, skb);
1948 if (err) {
1949 if (err > 0)
1950 err = net_xmit_errno(err);
1951 if (err)
1952 IP6_INC_STATS(net, rt->rt6i_idev,
1953 IPSTATS_MIB_OUTDISCARDS);
1954 }
1955
1956 return err;
1957}
1958
1959int ip6_push_pending_frames(struct sock *sk)
1960{
1961 struct sk_buff *skb;
1962
1963 skb = ip6_finish_skb(sk);
1964 if (!skb)
1965 return 0;
1966
1967 return ip6_send_skb(skb);
1968}
1969EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1970
1971static void __ip6_flush_pending_frames(struct sock *sk,
1972 struct sk_buff_head *queue,
1973 struct inet_cork_full *cork,
1974 struct inet6_cork *v6_cork)
1975{
1976 struct sk_buff *skb;
1977
1978 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1979 if (skb_dst(skb))
1980 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1981 IPSTATS_MIB_OUTDISCARDS);
1982 kfree_skb(skb);
1983 }
1984
1985 ip6_cork_release(cork, v6_cork);
1986}
1987
1988void ip6_flush_pending_frames(struct sock *sk)
1989{
1990 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1991 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1992}
1993EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1994
1995struct sk_buff *ip6_make_skb(struct sock *sk,
1996 int getfrag(void *from, char *to, int offset,
1997 int len, int odd, struct sk_buff *skb),
1998 void *from, size_t length, int transhdrlen,
1999 struct ipcm6_cookie *ipc6, struct rt6_info *rt,
2000 unsigned int flags, struct inet_cork_full *cork)
2001{
2002 struct inet6_cork v6_cork;
2003 struct sk_buff_head queue;
2004 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
2005 int err;
2006
2007 if (flags & MSG_PROBE) {
2008 dst_release(&rt->dst);
2009 return NULL;
2010 }
2011
2012 __skb_queue_head_init(&queue);
2013
2014 cork->base.flags = 0;
2015 cork->base.addr = 0;
2016 cork->base.opt = NULL;
2017 v6_cork.opt = NULL;
2018 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt);
2019 if (err) {
2020 ip6_cork_release(cork, &v6_cork);
2021 return ERR_PTR(err);
2022 }
2023 if (ipc6->dontfrag < 0)
2024 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2025
2026 err = __ip6_append_data(sk, &queue, cork, &v6_cork,
2027 ¤t->task_frag, getfrag, from,
2028 length + exthdrlen, transhdrlen + exthdrlen,
2029 flags, ipc6);
2030 if (err) {
2031 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2032 return ERR_PTR(err);
2033 }
2034
2035 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2036}
2037