1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57#include <net/ip_tunnels.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 struct inet6_dev *idev = ip6_dst_idev(dst);
64 unsigned int hh_len = LL_RESERVED_SPACE(dev);
65 const struct in6_addr *daddr, *nexthop;
66 struct ipv6hdr *hdr;
67 struct neighbour *neigh;
68 int ret;
69
70
71 if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
72 skb = skb_expand_head(skb, hh_len);
73 if (!skb) {
74 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
75 return -ENOMEM;
76 }
77 }
78
79 hdr = ipv6_hdr(skb);
80 daddr = &hdr->daddr;
81 if (ipv6_addr_is_multicast(daddr)) {
82 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
83 ((mroute6_is_socket(net, skb) &&
84 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
85 ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
86 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
87
88
89
90
91 if (newskb)
92 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
93 net, sk, newskb, NULL, newskb->dev,
94 dev_loopback_xmit);
95
96 if (hdr->hop_limit == 0) {
97 IP6_INC_STATS(net, idev,
98 IPSTATS_MIB_OUTDISCARDS);
99 kfree_skb(skb);
100 return 0;
101 }
102 }
103
104 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
105 if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
106 !(dev->flags & IFF_LOOPBACK)) {
107 kfree_skb(skb);
108 return 0;
109 }
110 }
111
112 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
113 int res = lwtunnel_xmit(skb);
114
115 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
116 return res;
117 }
118
119 rcu_read_lock_bh();
120 nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
121 neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
122 if (unlikely(!neigh))
123 neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
124 if (!IS_ERR(neigh)) {
125 sock_confirm_neigh(skb, neigh);
126 ret = neigh_output(neigh, skb, false);
127 rcu_read_unlock_bh();
128 return ret;
129 }
130 rcu_read_unlock_bh();
131
132 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
133 kfree_skb(skb);
134 return -EINVAL;
135}
136
137static int
138ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
139 struct sk_buff *skb, unsigned int mtu)
140{
141 struct sk_buff *segs, *nskb;
142 netdev_features_t features;
143 int ret = 0;
144
145
146
147
148
149 features = netif_skb_features(skb);
150 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
151 if (IS_ERR_OR_NULL(segs)) {
152 kfree_skb(skb);
153 return -ENOMEM;
154 }
155
156 consume_skb(skb);
157
158 skb_list_walk_safe(segs, segs, nskb) {
159 int err;
160
161 skb_mark_not_on_list(segs);
162 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
163 if (err && ret == 0)
164 ret = err;
165 }
166
167 return ret;
168}
169
170static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
171{
172 unsigned int mtu;
173
174#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
175
176 if (skb_dst(skb)->xfrm) {
177 IPCB(skb)->flags |= IPSKB_REROUTED;
178 return dst_output(net, sk, skb);
179 }
180#endif
181
182 mtu = ip6_skb_dst_mtu(skb);
183 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
184 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
185
186 if ((skb->len > mtu && !skb_is_gso(skb)) ||
187 dst_allfrag(skb_dst(skb)) ||
188 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
189 return ip6_fragment(net, sk, skb, ip6_finish_output2);
190 else
191 return ip6_finish_output2(net, sk, skb);
192}
193
194static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
195{
196 int ret;
197
198 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
199 switch (ret) {
200 case NET_XMIT_SUCCESS:
201 return __ip6_finish_output(net, sk, skb);
202 case NET_XMIT_CN:
203 return __ip6_finish_output(net, sk, skb) ? : ret;
204 default:
205 kfree_skb(skb);
206 return ret;
207 }
208}
209
210int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
211{
212 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
213 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
214
215 skb->protocol = htons(ETH_P_IPV6);
216 skb->dev = dev;
217
218 if (unlikely(idev->cnf.disable_ipv6)) {
219 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
220 kfree_skb(skb);
221 return 0;
222 }
223
224 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
225 net, sk, skb, indev, dev,
226 ip6_finish_output,
227 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
228}
229EXPORT_SYMBOL(ip6_output);
230
231bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
232{
233 if (!np->autoflowlabel_set)
234 return ip6_default_np_autolabel(net);
235 else
236 return np->autoflowlabel;
237}
238
239
240
241
242
243
244
245int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
246 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
247{
248 struct net *net = sock_net(sk);
249 const struct ipv6_pinfo *np = inet6_sk(sk);
250 struct in6_addr *first_hop = &fl6->daddr;
251 struct dst_entry *dst = skb_dst(skb);
252 struct net_device *dev = dst->dev;
253 struct inet6_dev *idev = ip6_dst_idev(dst);
254 unsigned int head_room;
255 struct ipv6hdr *hdr;
256 u8 proto = fl6->flowi6_proto;
257 int seg_len = skb->len;
258 int hlimit = -1;
259 u32 mtu;
260
261 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
262 if (opt)
263 head_room += opt->opt_nflen + opt->opt_flen;
264
265 if (unlikely(head_room > skb_headroom(skb))) {
266 skb = skb_expand_head(skb, head_room);
267 if (!skb) {
268 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
269 return -ENOBUFS;
270 }
271 }
272
273 if (opt) {
274 seg_len += opt->opt_nflen + opt->opt_flen;
275
276 if (opt->opt_flen)
277 ipv6_push_frag_opts(skb, opt, &proto);
278
279 if (opt->opt_nflen)
280 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
281 &fl6->saddr);
282 }
283
284 skb_push(skb, sizeof(struct ipv6hdr));
285 skb_reset_network_header(skb);
286 hdr = ipv6_hdr(skb);
287
288
289
290
291 if (np)
292 hlimit = np->hop_limit;
293 if (hlimit < 0)
294 hlimit = ip6_dst_hoplimit(dst);
295
296 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
297 ip6_autoflowlabel(net, np), fl6));
298
299 hdr->payload_len = htons(seg_len);
300 hdr->nexthdr = proto;
301 hdr->hop_limit = hlimit;
302
303 hdr->saddr = fl6->saddr;
304 hdr->daddr = *first_hop;
305
306 skb->protocol = htons(ETH_P_IPV6);
307 skb->priority = priority;
308 skb->mark = mark;
309
310 mtu = dst_mtu(dst);
311 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
312 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
313
314
315
316
317 skb = l3mdev_ip6_out((struct sock *)sk, skb);
318 if (unlikely(!skb))
319 return 0;
320
321
322
323
324 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
325 net, (struct sock *)sk, skb, NULL, dev,
326 dst_output);
327 }
328
329 skb->dev = dev;
330
331
332
333 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
334
335 IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
336 kfree_skb(skb);
337 return -EMSGSIZE;
338}
339EXPORT_SYMBOL(ip6_xmit);
340
341static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
342{
343 struct ip6_ra_chain *ra;
344 struct sock *last = NULL;
345
346 read_lock(&ip6_ra_lock);
347 for (ra = ip6_ra_chain; ra; ra = ra->next) {
348 struct sock *sk = ra->sk;
349 if (sk && ra->sel == sel &&
350 (!sk->sk_bound_dev_if ||
351 sk->sk_bound_dev_if == skb->dev->ifindex)) {
352 struct ipv6_pinfo *np = inet6_sk(sk);
353
354 if (np && np->rtalert_isolate &&
355 !net_eq(sock_net(sk), dev_net(skb->dev))) {
356 continue;
357 }
358 if (last) {
359 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
360 if (skb2)
361 rawv6_rcv(last, skb2);
362 }
363 last = sk;
364 }
365 }
366
367 if (last) {
368 rawv6_rcv(last, skb);
369 read_unlock(&ip6_ra_lock);
370 return 1;
371 }
372 read_unlock(&ip6_ra_lock);
373 return 0;
374}
375
376static int ip6_forward_proxy_check(struct sk_buff *skb)
377{
378 struct ipv6hdr *hdr = ipv6_hdr(skb);
379 u8 nexthdr = hdr->nexthdr;
380 __be16 frag_off;
381 int offset;
382
383 if (ipv6_ext_hdr(nexthdr)) {
384 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
385 if (offset < 0)
386 return 0;
387 } else
388 offset = sizeof(struct ipv6hdr);
389
390 if (nexthdr == IPPROTO_ICMPV6) {
391 struct icmp6hdr *icmp6;
392
393 if (!pskb_may_pull(skb, (skb_network_header(skb) +
394 offset + 1 - skb->data)))
395 return 0;
396
397 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
398
399 switch (icmp6->icmp6_type) {
400 case NDISC_ROUTER_SOLICITATION:
401 case NDISC_ROUTER_ADVERTISEMENT:
402 case NDISC_NEIGHBOUR_SOLICITATION:
403 case NDISC_NEIGHBOUR_ADVERTISEMENT:
404 case NDISC_REDIRECT:
405
406
407
408
409 return 1;
410 default:
411 break;
412 }
413 }
414
415
416
417
418
419
420 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
421 dst_link_failure(skb);
422 return -1;
423 }
424
425 return 0;
426}
427
428static inline int ip6_forward_finish(struct net *net, struct sock *sk,
429 struct sk_buff *skb)
430{
431 struct dst_entry *dst = skb_dst(skb);
432
433 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
434 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
435
436#ifdef CONFIG_NET_SWITCHDEV
437 if (skb->offload_l3_fwd_mark) {
438 consume_skb(skb);
439 return 0;
440 }
441#endif
442
443 skb->tstamp = 0;
444 return dst_output(net, sk, skb);
445}
446
447static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
448{
449 if (skb->len <= mtu)
450 return false;
451
452
453 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
454 return true;
455
456 if (skb->ignore_df)
457 return false;
458
459 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
460 return false;
461
462 return true;
463}
464
465int ip6_forward(struct sk_buff *skb)
466{
467 struct dst_entry *dst = skb_dst(skb);
468 struct ipv6hdr *hdr = ipv6_hdr(skb);
469 struct inet6_skb_parm *opt = IP6CB(skb);
470 struct net *net = dev_net(dst->dev);
471 struct inet6_dev *idev;
472 u32 mtu;
473
474 idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
475 if (net->ipv6.devconf_all->forwarding == 0)
476 goto error;
477
478 if (skb->pkt_type != PACKET_HOST)
479 goto drop;
480
481 if (unlikely(skb->sk))
482 goto drop;
483
484 if (skb_warn_if_lro(skb))
485 goto drop;
486
487 if (!net->ipv6.devconf_all->disable_policy &&
488 !idev->cnf.disable_policy &&
489 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
490 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
491 goto drop;
492 }
493
494 skb_forward_csum(skb);
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
510 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
511 return 0;
512 }
513
514
515
516
517 if (hdr->hop_limit <= 1) {
518 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
519 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
520
521 kfree_skb(skb);
522 return -ETIMEDOUT;
523 }
524
525
526 if (net->ipv6.devconf_all->proxy_ndp &&
527 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
528 int proxied = ip6_forward_proxy_check(skb);
529 if (proxied > 0) {
530 hdr->hop_limit--;
531 return ip6_input(skb);
532 } else if (proxied < 0) {
533 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
534 goto drop;
535 }
536 }
537
538 if (!xfrm6_route_forward(skb)) {
539 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
540 goto drop;
541 }
542 dst = skb_dst(skb);
543
544
545
546
547
548 if (IP6CB(skb)->iif == dst->dev->ifindex &&
549 opt->srcrt == 0 && !skb_sec_path(skb)) {
550 struct in6_addr *target = NULL;
551 struct inet_peer *peer;
552 struct rt6_info *rt;
553
554
555
556
557
558
559 rt = (struct rt6_info *) dst;
560 if (rt->rt6i_flags & RTF_GATEWAY)
561 target = &rt->rt6i_gateway;
562 else
563 target = &hdr->daddr;
564
565 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
566
567
568
569
570 if (inet_peer_xrlim_allow(peer, 1*HZ))
571 ndisc_send_redirect(skb, target);
572 if (peer)
573 inet_putpeer(peer);
574 } else {
575 int addrtype = ipv6_addr_type(&hdr->saddr);
576
577
578 if (addrtype == IPV6_ADDR_ANY ||
579 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
580 goto error;
581 if (addrtype & IPV6_ADDR_LINKLOCAL) {
582 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
583 ICMPV6_NOT_NEIGHBOUR, 0);
584 goto error;
585 }
586 }
587
588 mtu = ip6_dst_mtu_maybe_forward(dst, true);
589 if (mtu < IPV6_MIN_MTU)
590 mtu = IPV6_MIN_MTU;
591
592 if (ip6_pkt_too_big(skb, mtu)) {
593
594 skb->dev = dst->dev;
595 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
596 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
597 __IP6_INC_STATS(net, ip6_dst_idev(dst),
598 IPSTATS_MIB_FRAGFAILS);
599 kfree_skb(skb);
600 return -EMSGSIZE;
601 }
602
603 if (skb_cow(skb, dst->dev->hard_header_len)) {
604 __IP6_INC_STATS(net, ip6_dst_idev(dst),
605 IPSTATS_MIB_OUTDISCARDS);
606 goto drop;
607 }
608
609 hdr = ipv6_hdr(skb);
610
611
612
613 hdr->hop_limit--;
614
615 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
616 net, NULL, skb, skb->dev, dst->dev,
617 ip6_forward_finish);
618
619error:
620 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
621drop:
622 kfree_skb(skb);
623 return -EINVAL;
624}
625
626static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
627{
628 to->pkt_type = from->pkt_type;
629 to->priority = from->priority;
630 to->protocol = from->protocol;
631 skb_dst_drop(to);
632 skb_dst_set(to, dst_clone(skb_dst(from)));
633 to->dev = from->dev;
634 to->mark = from->mark;
635
636 skb_copy_hash(to, from);
637
638#ifdef CONFIG_NET_SCHED
639 to->tc_index = from->tc_index;
640#endif
641 nf_copy(to, from);
642 skb_ext_copy(to, from);
643 skb_copy_secmark(to, from);
644}
645
646int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
647 u8 nexthdr, __be32 frag_id,
648 struct ip6_fraglist_iter *iter)
649{
650 unsigned int first_len;
651 struct frag_hdr *fh;
652
653
654 *prevhdr = NEXTHDR_FRAGMENT;
655 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
656 if (!iter->tmp_hdr)
657 return -ENOMEM;
658
659 iter->frag = skb_shinfo(skb)->frag_list;
660 skb_frag_list_init(skb);
661
662 iter->offset = 0;
663 iter->hlen = hlen;
664 iter->frag_id = frag_id;
665 iter->nexthdr = nexthdr;
666
667 __skb_pull(skb, hlen);
668 fh = __skb_push(skb, sizeof(struct frag_hdr));
669 __skb_push(skb, hlen);
670 skb_reset_network_header(skb);
671 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
672
673 fh->nexthdr = nexthdr;
674 fh->reserved = 0;
675 fh->frag_off = htons(IP6_MF);
676 fh->identification = frag_id;
677
678 first_len = skb_pagelen(skb);
679 skb->data_len = first_len - skb_headlen(skb);
680 skb->len = first_len;
681 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
682
683 return 0;
684}
685EXPORT_SYMBOL(ip6_fraglist_init);
686
687void ip6_fraglist_prepare(struct sk_buff *skb,
688 struct ip6_fraglist_iter *iter)
689{
690 struct sk_buff *frag = iter->frag;
691 unsigned int hlen = iter->hlen;
692 struct frag_hdr *fh;
693
694 frag->ip_summed = CHECKSUM_NONE;
695 skb_reset_transport_header(frag);
696 fh = __skb_push(frag, sizeof(struct frag_hdr));
697 __skb_push(frag, hlen);
698 skb_reset_network_header(frag);
699 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
700 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
701 fh->nexthdr = iter->nexthdr;
702 fh->reserved = 0;
703 fh->frag_off = htons(iter->offset);
704 if (frag->next)
705 fh->frag_off |= htons(IP6_MF);
706 fh->identification = iter->frag_id;
707 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
708 ip6_copy_metadata(frag, skb);
709}
710EXPORT_SYMBOL(ip6_fraglist_prepare);
711
712void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
713 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
714 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
715{
716 state->prevhdr = prevhdr;
717 state->nexthdr = nexthdr;
718 state->frag_id = frag_id;
719
720 state->hlen = hlen;
721 state->mtu = mtu;
722
723 state->left = skb->len - hlen;
724 state->ptr = hlen;
725
726 state->hroom = hdr_room;
727 state->troom = needed_tailroom;
728
729 state->offset = 0;
730}
731EXPORT_SYMBOL(ip6_frag_init);
732
733struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
734{
735 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
736 struct sk_buff *frag;
737 struct frag_hdr *fh;
738 unsigned int len;
739
740 len = state->left;
741
742 if (len > state->mtu)
743 len = state->mtu;
744
745
746 if (len < state->left)
747 len &= ~7;
748
749
750 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
751 state->hroom + state->troom, GFP_ATOMIC);
752 if (!frag)
753 return ERR_PTR(-ENOMEM);
754
755
756
757
758
759 ip6_copy_metadata(frag, skb);
760 skb_reserve(frag, state->hroom);
761 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
762 skb_reset_network_header(frag);
763 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
764 frag->transport_header = (frag->network_header + state->hlen +
765 sizeof(struct frag_hdr));
766
767
768
769
770
771 if (skb->sk)
772 skb_set_owner_w(frag, skb->sk);
773
774
775
776
777 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
778
779 fragnexthdr_offset = skb_network_header(frag);
780 fragnexthdr_offset += prevhdr - skb_network_header(skb);
781 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
782
783
784
785
786 fh->nexthdr = state->nexthdr;
787 fh->reserved = 0;
788 fh->identification = state->frag_id;
789
790
791
792
793 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
794 len));
795 state->left -= len;
796
797 fh->frag_off = htons(state->offset);
798 if (state->left > 0)
799 fh->frag_off |= htons(IP6_MF);
800 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
801
802 state->ptr += len;
803 state->offset += len;
804
805 return frag;
806}
807EXPORT_SYMBOL(ip6_frag_next);
808
809int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
810 int (*output)(struct net *, struct sock *, struct sk_buff *))
811{
812 struct sk_buff *frag;
813 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
814 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
815 inet6_sk(skb->sk) : NULL;
816 struct ip6_frag_state state;
817 unsigned int mtu, hlen, nexthdr_offset;
818 ktime_t tstamp = skb->tstamp;
819 int hroom, err = 0;
820 __be32 frag_id;
821 u8 *prevhdr, nexthdr = 0;
822
823 err = ip6_find_1stfragopt(skb, &prevhdr);
824 if (err < 0)
825 goto fail;
826 hlen = err;
827 nexthdr = *prevhdr;
828 nexthdr_offset = prevhdr - skb_network_header(skb);
829
830 mtu = ip6_skb_dst_mtu(skb);
831
832
833
834
835 if (unlikely(!skb->ignore_df && skb->len > mtu))
836 goto fail_toobig;
837
838 if (IP6CB(skb)->frag_max_size) {
839 if (IP6CB(skb)->frag_max_size > mtu)
840 goto fail_toobig;
841
842
843 mtu = IP6CB(skb)->frag_max_size;
844 if (mtu < IPV6_MIN_MTU)
845 mtu = IPV6_MIN_MTU;
846 }
847
848 if (np && np->frag_size < mtu) {
849 if (np->frag_size)
850 mtu = np->frag_size;
851 }
852 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
853 goto fail_toobig;
854 mtu -= hlen + sizeof(struct frag_hdr);
855
856 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
857 &ipv6_hdr(skb)->saddr);
858
859 if (skb->ip_summed == CHECKSUM_PARTIAL &&
860 (err = skb_checksum_help(skb)))
861 goto fail;
862
863 prevhdr = skb_network_header(skb) + nexthdr_offset;
864 hroom = LL_RESERVED_SPACE(rt->dst.dev);
865 if (skb_has_frag_list(skb)) {
866 unsigned int first_len = skb_pagelen(skb);
867 struct ip6_fraglist_iter iter;
868 struct sk_buff *frag2;
869
870 if (first_len - hlen > mtu ||
871 ((first_len - hlen) & 7) ||
872 skb_cloned(skb) ||
873 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
874 goto slow_path;
875
876 skb_walk_frags(skb, frag) {
877
878 if (frag->len > mtu ||
879 ((frag->len & 7) && frag->next) ||
880 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
881 goto slow_path_clean;
882
883
884 if (skb_shared(frag))
885 goto slow_path_clean;
886
887 BUG_ON(frag->sk);
888 if (skb->sk) {
889 frag->sk = skb->sk;
890 frag->destructor = sock_wfree;
891 }
892 skb->truesize -= frag->truesize;
893 }
894
895 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
896 &iter);
897 if (err < 0)
898 goto fail;
899
900 for (;;) {
901
902
903 if (iter.frag)
904 ip6_fraglist_prepare(skb, &iter);
905
906 skb->tstamp = tstamp;
907 err = output(net, sk, skb);
908 if (!err)
909 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
910 IPSTATS_MIB_FRAGCREATES);
911
912 if (err || !iter.frag)
913 break;
914
915 skb = ip6_fraglist_next(&iter);
916 }
917
918 kfree(iter.tmp_hdr);
919
920 if (err == 0) {
921 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
922 IPSTATS_MIB_FRAGOKS);
923 return 0;
924 }
925
926 kfree_skb_list(iter.frag);
927
928 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
929 IPSTATS_MIB_FRAGFAILS);
930 return err;
931
932slow_path_clean:
933 skb_walk_frags(skb, frag2) {
934 if (frag2 == frag)
935 break;
936 frag2->sk = NULL;
937 frag2->destructor = NULL;
938 skb->truesize += frag2->truesize;
939 }
940 }
941
942slow_path:
943
944
945
946
947 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
948 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
949 &state);
950
951
952
953
954
955 while (state.left > 0) {
956 frag = ip6_frag_next(skb, &state);
957 if (IS_ERR(frag)) {
958 err = PTR_ERR(frag);
959 goto fail;
960 }
961
962
963
964
965 frag->tstamp = tstamp;
966 err = output(net, sk, frag);
967 if (err)
968 goto fail;
969
970 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
971 IPSTATS_MIB_FRAGCREATES);
972 }
973 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
974 IPSTATS_MIB_FRAGOKS);
975 consume_skb(skb);
976 return err;
977
978fail_toobig:
979 if (skb->sk && dst_allfrag(skb_dst(skb)))
980 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
981
982 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
983 err = -EMSGSIZE;
984
985fail:
986 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
987 IPSTATS_MIB_FRAGFAILS);
988 kfree_skb(skb);
989 return err;
990}
991
992static inline int ip6_rt_check(const struct rt6key *rt_key,
993 const struct in6_addr *fl_addr,
994 const struct in6_addr *addr_cache)
995{
996 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
997 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
998}
999
1000static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1001 struct dst_entry *dst,
1002 const struct flowi6 *fl6)
1003{
1004 struct ipv6_pinfo *np = inet6_sk(sk);
1005 struct rt6_info *rt;
1006
1007 if (!dst)
1008 goto out;
1009
1010 if (dst->ops->family != AF_INET6) {
1011 dst_release(dst);
1012 return NULL;
1013 }
1014
1015 rt = (struct rt6_info *)dst;
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1034#ifdef CONFIG_IPV6_SUBTREES
1035 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1036#endif
1037 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1038 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1039 dst_release(dst);
1040 dst = NULL;
1041 }
1042
1043out:
1044 return dst;
1045}
1046
1047static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1048 struct dst_entry **dst, struct flowi6 *fl6)
1049{
1050#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1051 struct neighbour *n;
1052 struct rt6_info *rt;
1053#endif
1054 int err;
1055 int flags = 0;
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066 if (ipv6_addr_any(&fl6->saddr)) {
1067 struct fib6_info *from;
1068 struct rt6_info *rt;
1069
1070 *dst = ip6_route_output(net, sk, fl6);
1071 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1072
1073 rcu_read_lock();
1074 from = rt ? rcu_dereference(rt->from) : NULL;
1075 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1076 sk ? inet6_sk(sk)->srcprefs : 0,
1077 &fl6->saddr);
1078 rcu_read_unlock();
1079
1080 if (err)
1081 goto out_err_release;
1082
1083
1084
1085
1086
1087 if ((*dst)->error) {
1088 dst_release(*dst);
1089 *dst = NULL;
1090 }
1091
1092 if (fl6->flowi6_oif)
1093 flags |= RT6_LOOKUP_F_IFACE;
1094 }
1095
1096 if (!*dst)
1097 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1098
1099 err = (*dst)->error;
1100 if (err)
1101 goto out_err_release;
1102
1103#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1104
1105
1106
1107
1108
1109
1110
1111
1112 rt = (struct rt6_info *) *dst;
1113 rcu_read_lock_bh();
1114 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1115 rt6_nexthop(rt, &fl6->daddr));
1116 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1117 rcu_read_unlock_bh();
1118
1119 if (err) {
1120 struct inet6_ifaddr *ifp;
1121 struct flowi6 fl_gw6;
1122 int redirect;
1123
1124 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1125 (*dst)->dev, 1);
1126
1127 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1128 if (ifp)
1129 in6_ifa_put(ifp);
1130
1131 if (redirect) {
1132
1133
1134
1135
1136 dst_release(*dst);
1137 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1138 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1139 *dst = ip6_route_output(net, sk, &fl_gw6);
1140 err = (*dst)->error;
1141 if (err)
1142 goto out_err_release;
1143 }
1144 }
1145#endif
1146 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1147 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1148 err = -EAFNOSUPPORT;
1149 goto out_err_release;
1150 }
1151
1152 return 0;
1153
1154out_err_release:
1155 dst_release(*dst);
1156 *dst = NULL;
1157
1158 if (err == -ENETUNREACH)
1159 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1160 return err;
1161}
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1175 struct flowi6 *fl6)
1176{
1177 *dst = NULL;
1178 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1179}
1180EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1195 const struct in6_addr *final_dst)
1196{
1197 struct dst_entry *dst = NULL;
1198 int err;
1199
1200 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1201 if (err)
1202 return ERR_PTR(err);
1203 if (final_dst)
1204 fl6->daddr = *final_dst;
1205
1206 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1207}
1208EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1229 const struct in6_addr *final_dst,
1230 bool connected)
1231{
1232 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1233
1234 dst = ip6_sk_dst_check(sk, dst, fl6);
1235 if (dst)
1236 return dst;
1237
1238 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1239 if (connected && !IS_ERR(dst))
1240 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1241
1242 return dst;
1243}
1244EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1263 struct net_device *dev,
1264 struct net *net,
1265 struct socket *sock,
1266 struct in6_addr *saddr,
1267 const struct ip_tunnel_info *info,
1268 u8 protocol,
1269 bool use_cache)
1270{
1271 struct dst_entry *dst = NULL;
1272#ifdef CONFIG_DST_CACHE
1273 struct dst_cache *dst_cache;
1274#endif
1275 struct flowi6 fl6;
1276 __u8 prio;
1277
1278#ifdef CONFIG_DST_CACHE
1279 dst_cache = (struct dst_cache *)&info->dst_cache;
1280 if (use_cache) {
1281 dst = dst_cache_get_ip6(dst_cache, saddr);
1282 if (dst)
1283 return dst;
1284 }
1285#endif
1286 memset(&fl6, 0, sizeof(fl6));
1287 fl6.flowi6_mark = skb->mark;
1288 fl6.flowi6_proto = protocol;
1289 fl6.daddr = info->key.u.ipv6.dst;
1290 fl6.saddr = info->key.u.ipv6.src;
1291 prio = info->key.tos;
1292 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1293 info->key.label);
1294
1295 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1296 NULL);
1297 if (IS_ERR(dst)) {
1298 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1299 return ERR_PTR(-ENETUNREACH);
1300 }
1301 if (dst->dev == dev) {
1302 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1303 dst_release(dst);
1304 return ERR_PTR(-ELOOP);
1305 }
1306#ifdef CONFIG_DST_CACHE
1307 if (use_cache)
1308 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1309#endif
1310 *saddr = fl6.saddr;
1311 return dst;
1312}
1313EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1314
1315static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1316 gfp_t gfp)
1317{
1318 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1319}
1320
1321static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1322 gfp_t gfp)
1323{
1324 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1325}
1326
1327static void ip6_append_data_mtu(unsigned int *mtu,
1328 int *maxfraglen,
1329 unsigned int fragheaderlen,
1330 struct sk_buff *skb,
1331 struct rt6_info *rt,
1332 unsigned int orig_mtu)
1333{
1334 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1335 if (!skb) {
1336
1337 *mtu = orig_mtu - rt->dst.header_len;
1338
1339 } else {
1340
1341
1342
1343
1344 *mtu = orig_mtu;
1345 }
1346 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1347 + fragheaderlen - sizeof(struct frag_hdr);
1348 }
1349}
1350
1351static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1352 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1353 struct rt6_info *rt, struct flowi6 *fl6)
1354{
1355 struct ipv6_pinfo *np = inet6_sk(sk);
1356 unsigned int mtu;
1357 struct ipv6_txoptions *opt = ipc6->opt;
1358
1359
1360
1361
1362 if (opt) {
1363 if (WARN_ON(v6_cork->opt))
1364 return -EINVAL;
1365
1366 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1367 if (unlikely(!v6_cork->opt))
1368 return -ENOBUFS;
1369
1370 v6_cork->opt->tot_len = sizeof(*opt);
1371 v6_cork->opt->opt_flen = opt->opt_flen;
1372 v6_cork->opt->opt_nflen = opt->opt_nflen;
1373
1374 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1375 sk->sk_allocation);
1376 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1377 return -ENOBUFS;
1378
1379 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1380 sk->sk_allocation);
1381 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1382 return -ENOBUFS;
1383
1384 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1385 sk->sk_allocation);
1386 if (opt->hopopt && !v6_cork->opt->hopopt)
1387 return -ENOBUFS;
1388
1389 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1390 sk->sk_allocation);
1391 if (opt->srcrt && !v6_cork->opt->srcrt)
1392 return -ENOBUFS;
1393
1394
1395 }
1396 dst_hold(&rt->dst);
1397 cork->base.dst = &rt->dst;
1398 cork->fl.u.ip6 = *fl6;
1399 v6_cork->hop_limit = ipc6->hlimit;
1400 v6_cork->tclass = ipc6->tclass;
1401 if (rt->dst.flags & DST_XFRM_TUNNEL)
1402 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1403 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1404 else
1405 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1406 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1407 if (np->frag_size < mtu) {
1408 if (np->frag_size)
1409 mtu = np->frag_size;
1410 }
1411 if (mtu < IPV6_MIN_MTU)
1412 return -EINVAL;
1413 cork->base.fragsize = mtu;
1414 cork->base.gso_size = ipc6->gso_size;
1415 cork->base.tx_flags = 0;
1416 cork->base.mark = ipc6->sockc.mark;
1417 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1418
1419 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1420 cork->base.flags |= IPCORK_ALLFRAG;
1421 cork->base.length = 0;
1422
1423 cork->base.transmit_time = ipc6->sockc.transmit_time;
1424
1425 return 0;
1426}
1427
1428static int __ip6_append_data(struct sock *sk,
1429 struct flowi6 *fl6,
1430 struct sk_buff_head *queue,
1431 struct inet_cork *cork,
1432 struct inet6_cork *v6_cork,
1433 struct page_frag *pfrag,
1434 int getfrag(void *from, char *to, int offset,
1435 int len, int odd, struct sk_buff *skb),
1436 void *from, int length, int transhdrlen,
1437 unsigned int flags, struct ipcm6_cookie *ipc6)
1438{
1439 struct sk_buff *skb, *skb_prev = NULL;
1440 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1441 struct ubuf_info *uarg = NULL;
1442 int exthdrlen = 0;
1443 int dst_exthdrlen = 0;
1444 int hh_len;
1445 int copy;
1446 int err;
1447 int offset = 0;
1448 u32 tskey = 0;
1449 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1450 struct ipv6_txoptions *opt = v6_cork->opt;
1451 int csummode = CHECKSUM_NONE;
1452 unsigned int maxnonfragsize, headersize;
1453 unsigned int wmem_alloc_delta = 0;
1454 bool paged, extra_uref = false;
1455
1456 skb = skb_peek_tail(queue);
1457 if (!skb) {
1458 exthdrlen = opt ? opt->opt_flen : 0;
1459 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1460 }
1461
1462 paged = !!cork->gso_size;
1463 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1464 orig_mtu = mtu;
1465
1466 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1467 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1468 tskey = sk->sk_tskey++;
1469
1470 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1471
1472 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1473 (opt ? opt->opt_nflen : 0);
1474 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1475 sizeof(struct frag_hdr);
1476
1477 headersize = sizeof(struct ipv6hdr) +
1478 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1479 (dst_allfrag(&rt->dst) ?
1480 sizeof(struct frag_hdr) : 0) +
1481 rt->rt6i_nfheader_len;
1482
1483
1484
1485
1486 if (headersize + transhdrlen > mtu)
1487 goto emsgsize;
1488
1489 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1490 (sk->sk_protocol == IPPROTO_UDP ||
1491 sk->sk_protocol == IPPROTO_RAW)) {
1492 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1493 sizeof(struct ipv6hdr));
1494 goto emsgsize;
1495 }
1496
1497 if (ip6_sk_ignore_df(sk))
1498 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1499 else
1500 maxnonfragsize = mtu;
1501
1502 if (cork->length + length > maxnonfragsize - headersize) {
1503emsgsize:
1504 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1505 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1506 return -EMSGSIZE;
1507 }
1508
1509
1510
1511
1512 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1513 headersize == sizeof(struct ipv6hdr) &&
1514 length <= mtu - headersize &&
1515 (!(flags & MSG_MORE) || cork->gso_size) &&
1516 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1517 csummode = CHECKSUM_PARTIAL;
1518
1519 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1520 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1521 if (!uarg)
1522 return -ENOBUFS;
1523 extra_uref = !skb_zcopy(skb);
1524 if (rt->dst.dev->features & NETIF_F_SG &&
1525 csummode == CHECKSUM_PARTIAL) {
1526 paged = true;
1527 } else {
1528 uarg->zerocopy = 0;
1529 skb_zcopy_set(skb, uarg, &extra_uref);
1530 }
1531 }
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549 cork->length += length;
1550 if (!skb)
1551 goto alloc_new_skb;
1552
1553 while (length > 0) {
1554
1555 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1556 if (copy < length)
1557 copy = maxfraglen - skb->len;
1558
1559 if (copy <= 0) {
1560 char *data;
1561 unsigned int datalen;
1562 unsigned int fraglen;
1563 unsigned int fraggap;
1564 unsigned int alloclen, alloc_extra;
1565 unsigned int pagedlen;
1566alloc_new_skb:
1567
1568 if (skb)
1569 fraggap = skb->len - maxfraglen;
1570 else
1571 fraggap = 0;
1572
1573 if (!skb || !skb_prev)
1574 ip6_append_data_mtu(&mtu, &maxfraglen,
1575 fragheaderlen, skb, rt,
1576 orig_mtu);
1577
1578 skb_prev = skb;
1579
1580
1581
1582
1583
1584 datalen = length + fraggap;
1585
1586 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1587 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1588 fraglen = datalen + fragheaderlen;
1589 pagedlen = 0;
1590
1591 alloc_extra = hh_len;
1592 alloc_extra += dst_exthdrlen;
1593 alloc_extra += rt->dst.trailer_len;
1594
1595
1596
1597
1598
1599 alloc_extra += sizeof(struct frag_hdr);
1600
1601 if ((flags & MSG_MORE) &&
1602 !(rt->dst.dev->features&NETIF_F_SG))
1603 alloclen = mtu;
1604 else if (!paged &&
1605 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1606 !(rt->dst.dev->features & NETIF_F_SG)))
1607 alloclen = fraglen;
1608 else {
1609 alloclen = min_t(int, fraglen, MAX_HEADER);
1610 pagedlen = fraglen - alloclen;
1611 }
1612 alloclen += alloc_extra;
1613
1614 if (datalen != length + fraggap) {
1615
1616
1617
1618
1619 datalen += rt->dst.trailer_len;
1620 }
1621
1622 fraglen = datalen + fragheaderlen;
1623
1624 copy = datalen - transhdrlen - fraggap - pagedlen;
1625 if (copy < 0) {
1626 err = -EINVAL;
1627 goto error;
1628 }
1629 if (transhdrlen) {
1630 skb = sock_alloc_send_skb(sk, alloclen,
1631 (flags & MSG_DONTWAIT), &err);
1632 } else {
1633 skb = NULL;
1634 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1635 2 * sk->sk_sndbuf)
1636 skb = alloc_skb(alloclen,
1637 sk->sk_allocation);
1638 if (unlikely(!skb))
1639 err = -ENOBUFS;
1640 }
1641 if (!skb)
1642 goto error;
1643
1644
1645
1646 skb->protocol = htons(ETH_P_IPV6);
1647 skb->ip_summed = csummode;
1648 skb->csum = 0;
1649
1650 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1651 dst_exthdrlen);
1652
1653
1654
1655
1656 data = skb_put(skb, fraglen - pagedlen);
1657 skb_set_network_header(skb, exthdrlen);
1658 data += fragheaderlen;
1659 skb->transport_header = (skb->network_header +
1660 fragheaderlen);
1661 if (fraggap) {
1662 skb->csum = skb_copy_and_csum_bits(
1663 skb_prev, maxfraglen,
1664 data + transhdrlen, fraggap);
1665 skb_prev->csum = csum_sub(skb_prev->csum,
1666 skb->csum);
1667 data += fraggap;
1668 pskb_trim_unique(skb_prev, maxfraglen);
1669 }
1670 if (copy > 0 &&
1671 getfrag(from, data + transhdrlen, offset,
1672 copy, fraggap, skb) < 0) {
1673 err = -EFAULT;
1674 kfree_skb(skb);
1675 goto error;
1676 }
1677
1678 offset += copy;
1679 length -= copy + transhdrlen;
1680 transhdrlen = 0;
1681 exthdrlen = 0;
1682 dst_exthdrlen = 0;
1683
1684
1685 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1686 cork->tx_flags = 0;
1687 skb_shinfo(skb)->tskey = tskey;
1688 tskey = 0;
1689 skb_zcopy_set(skb, uarg, &extra_uref);
1690
1691 if ((flags & MSG_CONFIRM) && !skb_prev)
1692 skb_set_dst_pending_confirm(skb, 1);
1693
1694
1695
1696
1697 if (!skb->destructor) {
1698 skb->destructor = sock_wfree;
1699 skb->sk = sk;
1700 wmem_alloc_delta += skb->truesize;
1701 }
1702 __skb_queue_tail(queue, skb);
1703 continue;
1704 }
1705
1706 if (copy > length)
1707 copy = length;
1708
1709 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1710 skb_tailroom(skb) >= copy) {
1711 unsigned int off;
1712
1713 off = skb->len;
1714 if (getfrag(from, skb_put(skb, copy),
1715 offset, copy, off, skb) < 0) {
1716 __skb_trim(skb, off);
1717 err = -EFAULT;
1718 goto error;
1719 }
1720 } else if (!uarg || !uarg->zerocopy) {
1721 int i = skb_shinfo(skb)->nr_frags;
1722
1723 err = -ENOMEM;
1724 if (!sk_page_frag_refill(sk, pfrag))
1725 goto error;
1726
1727 if (!skb_can_coalesce(skb, i, pfrag->page,
1728 pfrag->offset)) {
1729 err = -EMSGSIZE;
1730 if (i == MAX_SKB_FRAGS)
1731 goto error;
1732
1733 __skb_fill_page_desc(skb, i, pfrag->page,
1734 pfrag->offset, 0);
1735 skb_shinfo(skb)->nr_frags = ++i;
1736 get_page(pfrag->page);
1737 }
1738 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1739 if (getfrag(from,
1740 page_address(pfrag->page) + pfrag->offset,
1741 offset, copy, skb->len, skb) < 0)
1742 goto error_efault;
1743
1744 pfrag->offset += copy;
1745 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1746 skb->len += copy;
1747 skb->data_len += copy;
1748 skb->truesize += copy;
1749 wmem_alloc_delta += copy;
1750 } else {
1751 err = skb_zerocopy_iter_dgram(skb, from, copy);
1752 if (err < 0)
1753 goto error;
1754 }
1755 offset += copy;
1756 length -= copy;
1757 }
1758
1759 if (wmem_alloc_delta)
1760 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1761 return 0;
1762
1763error_efault:
1764 err = -EFAULT;
1765error:
1766 net_zcopy_put_abort(uarg, extra_uref);
1767 cork->length -= length;
1768 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1769 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1770 return err;
1771}
1772
1773int ip6_append_data(struct sock *sk,
1774 int getfrag(void *from, char *to, int offset, int len,
1775 int odd, struct sk_buff *skb),
1776 void *from, int length, int transhdrlen,
1777 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1778 struct rt6_info *rt, unsigned int flags)
1779{
1780 struct inet_sock *inet = inet_sk(sk);
1781 struct ipv6_pinfo *np = inet6_sk(sk);
1782 int exthdrlen;
1783 int err;
1784
1785 if (flags&MSG_PROBE)
1786 return 0;
1787 if (skb_queue_empty(&sk->sk_write_queue)) {
1788
1789
1790
1791 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1792 ipc6, rt, fl6);
1793 if (err)
1794 return err;
1795
1796 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1797 length += exthdrlen;
1798 transhdrlen += exthdrlen;
1799 } else {
1800 fl6 = &inet->cork.fl.u.ip6;
1801 transhdrlen = 0;
1802 }
1803
1804 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1805 &np->cork, sk_page_frag(sk), getfrag,
1806 from, length, transhdrlen, flags, ipc6);
1807}
1808EXPORT_SYMBOL_GPL(ip6_append_data);
1809
1810static void ip6_cork_release(struct inet_cork_full *cork,
1811 struct inet6_cork *v6_cork)
1812{
1813 if (v6_cork->opt) {
1814 kfree(v6_cork->opt->dst0opt);
1815 kfree(v6_cork->opt->dst1opt);
1816 kfree(v6_cork->opt->hopopt);
1817 kfree(v6_cork->opt->srcrt);
1818 kfree(v6_cork->opt);
1819 v6_cork->opt = NULL;
1820 }
1821
1822 if (cork->base.dst) {
1823 dst_release(cork->base.dst);
1824 cork->base.dst = NULL;
1825 cork->base.flags &= ~IPCORK_ALLFRAG;
1826 }
1827 memset(&cork->fl, 0, sizeof(cork->fl));
1828}
1829
1830struct sk_buff *__ip6_make_skb(struct sock *sk,
1831 struct sk_buff_head *queue,
1832 struct inet_cork_full *cork,
1833 struct inet6_cork *v6_cork)
1834{
1835 struct sk_buff *skb, *tmp_skb;
1836 struct sk_buff **tail_skb;
1837 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1838 struct ipv6_pinfo *np = inet6_sk(sk);
1839 struct net *net = sock_net(sk);
1840 struct ipv6hdr *hdr;
1841 struct ipv6_txoptions *opt = v6_cork->opt;
1842 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1843 struct flowi6 *fl6 = &cork->fl.u.ip6;
1844 unsigned char proto = fl6->flowi6_proto;
1845
1846 skb = __skb_dequeue(queue);
1847 if (!skb)
1848 goto out;
1849 tail_skb = &(skb_shinfo(skb)->frag_list);
1850
1851
1852 if (skb->data < skb_network_header(skb))
1853 __skb_pull(skb, skb_network_offset(skb));
1854 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1855 __skb_pull(tmp_skb, skb_network_header_len(skb));
1856 *tail_skb = tmp_skb;
1857 tail_skb = &(tmp_skb->next);
1858 skb->len += tmp_skb->len;
1859 skb->data_len += tmp_skb->len;
1860 skb->truesize += tmp_skb->truesize;
1861 tmp_skb->destructor = NULL;
1862 tmp_skb->sk = NULL;
1863 }
1864
1865
1866 skb->ignore_df = ip6_sk_ignore_df(sk);
1867
1868 *final_dst = fl6->daddr;
1869 __skb_pull(skb, skb_network_header_len(skb));
1870 if (opt && opt->opt_flen)
1871 ipv6_push_frag_opts(skb, opt, &proto);
1872 if (opt && opt->opt_nflen)
1873 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1874
1875 skb_push(skb, sizeof(struct ipv6hdr));
1876 skb_reset_network_header(skb);
1877 hdr = ipv6_hdr(skb);
1878
1879 ip6_flow_hdr(hdr, v6_cork->tclass,
1880 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1881 ip6_autoflowlabel(net, np), fl6));
1882 hdr->hop_limit = v6_cork->hop_limit;
1883 hdr->nexthdr = proto;
1884 hdr->saddr = fl6->saddr;
1885 hdr->daddr = *final_dst;
1886
1887 skb->priority = sk->sk_priority;
1888 skb->mark = cork->base.mark;
1889
1890 skb->tstamp = cork->base.transmit_time;
1891
1892 skb_dst_set(skb, dst_clone(&rt->dst));
1893 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1894 if (proto == IPPROTO_ICMPV6) {
1895 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1896
1897 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1898 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1899 }
1900
1901 ip6_cork_release(cork, v6_cork);
1902out:
1903 return skb;
1904}
1905
1906int ip6_send_skb(struct sk_buff *skb)
1907{
1908 struct net *net = sock_net(skb->sk);
1909 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1910 int err;
1911
1912 err = ip6_local_out(net, skb->sk, skb);
1913 if (err) {
1914 if (err > 0)
1915 err = net_xmit_errno(err);
1916 if (err)
1917 IP6_INC_STATS(net, rt->rt6i_idev,
1918 IPSTATS_MIB_OUTDISCARDS);
1919 }
1920
1921 return err;
1922}
1923
1924int ip6_push_pending_frames(struct sock *sk)
1925{
1926 struct sk_buff *skb;
1927
1928 skb = ip6_finish_skb(sk);
1929 if (!skb)
1930 return 0;
1931
1932 return ip6_send_skb(skb);
1933}
1934EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1935
1936static void __ip6_flush_pending_frames(struct sock *sk,
1937 struct sk_buff_head *queue,
1938 struct inet_cork_full *cork,
1939 struct inet6_cork *v6_cork)
1940{
1941 struct sk_buff *skb;
1942
1943 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1944 if (skb_dst(skb))
1945 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1946 IPSTATS_MIB_OUTDISCARDS);
1947 kfree_skb(skb);
1948 }
1949
1950 ip6_cork_release(cork, v6_cork);
1951}
1952
1953void ip6_flush_pending_frames(struct sock *sk)
1954{
1955 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1956 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1957}
1958EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1959
1960struct sk_buff *ip6_make_skb(struct sock *sk,
1961 int getfrag(void *from, char *to, int offset,
1962 int len, int odd, struct sk_buff *skb),
1963 void *from, int length, int transhdrlen,
1964 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1965 struct rt6_info *rt, unsigned int flags,
1966 struct inet_cork_full *cork)
1967{
1968 struct inet6_cork v6_cork;
1969 struct sk_buff_head queue;
1970 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1971 int err;
1972
1973 if (flags & MSG_PROBE)
1974 return NULL;
1975
1976 __skb_queue_head_init(&queue);
1977
1978 cork->base.flags = 0;
1979 cork->base.addr = 0;
1980 cork->base.opt = NULL;
1981 cork->base.dst = NULL;
1982 v6_cork.opt = NULL;
1983 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1984 if (err) {
1985 ip6_cork_release(cork, &v6_cork);
1986 return ERR_PTR(err);
1987 }
1988 if (ipc6->dontfrag < 0)
1989 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1990
1991 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1992 ¤t->task_frag, getfrag, from,
1993 length + exthdrlen, transhdrlen + exthdrlen,
1994 flags, ipc6);
1995 if (err) {
1996 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1997 return ERR_PTR(err);
1998 }
1999
2000 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2001}
2002