1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57#include <net/ip_tunnels.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 const struct in6_addr *nexthop;
64 struct neighbour *neigh;
65 int ret;
66
67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71 ((mroute6_is_socket(net, skb) &&
72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 &ipv6_hdr(skb)->saddr))) {
75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77
78
79
80 if (newskb)
81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82 net, sk, newskb, NULL, newskb->dev,
83 dev_loopback_xmit);
84
85 if (ipv6_hdr(skb)->hop_limit == 0) {
86 IP6_INC_STATS(net, idev,
87 IPSTATS_MIB_OUTDISCARDS);
88 kfree_skb(skb);
89 return 0;
90 }
91 }
92
93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 IPV6_ADDR_SCOPE_NODELOCAL &&
97 !(dev->flags & IFF_LOOPBACK)) {
98 kfree_skb(skb);
99 return 0;
100 }
101 }
102
103 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 int res = lwtunnel_xmit(skb);
105
106 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 return res;
108 }
109
110 rcu_read_lock_bh();
111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 if (unlikely(!neigh))
114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 if (!IS_ERR(neigh)) {
116 sock_confirm_neigh(skb, neigh);
117 ret = neigh_output(neigh, skb, false);
118 rcu_read_unlock_bh();
119 return ret;
120 }
121 rcu_read_unlock_bh();
122
123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 kfree_skb(skb);
125 return -EINVAL;
126}
127
128static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
129{
130#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
131
132 if (skb_dst(skb)->xfrm) {
133 IPCB(skb)->flags |= IPSKB_REROUTED;
134 return dst_output(net, sk, skb);
135 }
136#endif
137
138 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
139 dst_allfrag(skb_dst(skb)) ||
140 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
141 return ip6_fragment(net, sk, skb, ip6_finish_output2);
142 else
143 return ip6_finish_output2(net, sk, skb);
144}
145
146static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
147{
148 int ret;
149
150 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
151 switch (ret) {
152 case NET_XMIT_SUCCESS:
153 return __ip6_finish_output(net, sk, skb);
154 case NET_XMIT_CN:
155 return __ip6_finish_output(net, sk, skb) ? : ret;
156 default:
157 kfree_skb(skb);
158 return ret;
159 }
160}
161
162int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
163{
164 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166
167 skb->protocol = htons(ETH_P_IPV6);
168 skb->dev = dev;
169
170 if (unlikely(idev->cnf.disable_ipv6)) {
171 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
174 }
175
176 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
177 net, sk, skb, indev, dev,
178 ip6_finish_output,
179 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
180}
181
182bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
183{
184 if (!np->autoflowlabel_set)
185 return ip6_default_np_autolabel(net);
186 else
187 return np->autoflowlabel;
188}
189
190
191
192
193
194
195
196int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
197 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
198{
199 struct net *net = sock_net(sk);
200 const struct ipv6_pinfo *np = inet6_sk(sk);
201 struct in6_addr *first_hop = &fl6->daddr;
202 struct dst_entry *dst = skb_dst(skb);
203 unsigned int head_room;
204 struct ipv6hdr *hdr;
205 u8 proto = fl6->flowi6_proto;
206 int seg_len = skb->len;
207 int hlimit = -1;
208 u32 mtu;
209
210 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 if (opt)
212 head_room += opt->opt_nflen + opt->opt_flen;
213
214 if (unlikely(skb_headroom(skb) < head_room)) {
215 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
216 if (!skb2) {
217 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
218 IPSTATS_MIB_OUTDISCARDS);
219 kfree_skb(skb);
220 return -ENOBUFS;
221 }
222 if (skb->sk)
223 skb_set_owner_w(skb2, skb->sk);
224 consume_skb(skb);
225 skb = skb2;
226 }
227
228 if (opt) {
229 seg_len += opt->opt_nflen + opt->opt_flen;
230
231 if (opt->opt_flen)
232 ipv6_push_frag_opts(skb, opt, &proto);
233
234 if (opt->opt_nflen)
235 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
236 &fl6->saddr);
237 }
238
239 skb_push(skb, sizeof(struct ipv6hdr));
240 skb_reset_network_header(skb);
241 hdr = ipv6_hdr(skb);
242
243
244
245
246 if (np)
247 hlimit = np->hop_limit;
248 if (hlimit < 0)
249 hlimit = ip6_dst_hoplimit(dst);
250
251 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
252 ip6_autoflowlabel(net, np), fl6));
253
254 hdr->payload_len = htons(seg_len);
255 hdr->nexthdr = proto;
256 hdr->hop_limit = hlimit;
257
258 hdr->saddr = fl6->saddr;
259 hdr->daddr = *first_hop;
260
261 skb->protocol = htons(ETH_P_IPV6);
262 skb->priority = priority;
263 skb->mark = mark;
264
265 mtu = dst_mtu(dst);
266 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
267 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
268 IPSTATS_MIB_OUT, skb->len);
269
270
271
272
273 skb = l3mdev_ip6_out((struct sock *)sk, skb);
274 if (unlikely(!skb))
275 return 0;
276
277
278
279
280 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
281 net, (struct sock *)sk, skb, NULL, dst->dev,
282 dst_output);
283 }
284
285 skb->dev = dst->dev;
286
287
288
289 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
290
291 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
292 kfree_skb(skb);
293 return -EMSGSIZE;
294}
295EXPORT_SYMBOL(ip6_xmit);
296
297static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298{
299 struct ip6_ra_chain *ra;
300 struct sock *last = NULL;
301
302 read_lock(&ip6_ra_lock);
303 for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 struct sock *sk = ra->sk;
305 if (sk && ra->sel == sel &&
306 (!sk->sk_bound_dev_if ||
307 sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 struct ipv6_pinfo *np = inet6_sk(sk);
309
310 if (np && np->rtalert_isolate &&
311 !net_eq(sock_net(sk), dev_net(skb->dev))) {
312 continue;
313 }
314 if (last) {
315 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
316 if (skb2)
317 rawv6_rcv(last, skb2);
318 }
319 last = sk;
320 }
321 }
322
323 if (last) {
324 rawv6_rcv(last, skb);
325 read_unlock(&ip6_ra_lock);
326 return 1;
327 }
328 read_unlock(&ip6_ra_lock);
329 return 0;
330}
331
332static int ip6_forward_proxy_check(struct sk_buff *skb)
333{
334 struct ipv6hdr *hdr = ipv6_hdr(skb);
335 u8 nexthdr = hdr->nexthdr;
336 __be16 frag_off;
337 int offset;
338
339 if (ipv6_ext_hdr(nexthdr)) {
340 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
341 if (offset < 0)
342 return 0;
343 } else
344 offset = sizeof(struct ipv6hdr);
345
346 if (nexthdr == IPPROTO_ICMPV6) {
347 struct icmp6hdr *icmp6;
348
349 if (!pskb_may_pull(skb, (skb_network_header(skb) +
350 offset + 1 - skb->data)))
351 return 0;
352
353 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
354
355 switch (icmp6->icmp6_type) {
356 case NDISC_ROUTER_SOLICITATION:
357 case NDISC_ROUTER_ADVERTISEMENT:
358 case NDISC_NEIGHBOUR_SOLICITATION:
359 case NDISC_NEIGHBOUR_ADVERTISEMENT:
360 case NDISC_REDIRECT:
361
362
363
364
365 return 1;
366 default:
367 break;
368 }
369 }
370
371
372
373
374
375
376 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
377 dst_link_failure(skb);
378 return -1;
379 }
380
381 return 0;
382}
383
384static inline int ip6_forward_finish(struct net *net, struct sock *sk,
385 struct sk_buff *skb)
386{
387 struct dst_entry *dst = skb_dst(skb);
388
389 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
390 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
391
392#ifdef CONFIG_NET_SWITCHDEV
393 if (skb->offload_l3_fwd_mark) {
394 consume_skb(skb);
395 return 0;
396 }
397#endif
398
399 skb->tstamp = 0;
400 return dst_output(net, sk, skb);
401}
402
403static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
404{
405 if (skb->len <= mtu)
406 return false;
407
408
409 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
410 return true;
411
412 if (skb->ignore_df)
413 return false;
414
415 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
416 return false;
417
418 return true;
419}
420
421int ip6_forward(struct sk_buff *skb)
422{
423 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
424 struct dst_entry *dst = skb_dst(skb);
425 struct ipv6hdr *hdr = ipv6_hdr(skb);
426 struct inet6_skb_parm *opt = IP6CB(skb);
427 struct net *net = dev_net(dst->dev);
428 u32 mtu;
429
430 if (net->ipv6.devconf_all->forwarding == 0)
431 goto error;
432
433 if (skb->pkt_type != PACKET_HOST)
434 goto drop;
435
436 if (unlikely(skb->sk))
437 goto drop;
438
439 if (skb_warn_if_lro(skb))
440 goto drop;
441
442 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
443 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
444 goto drop;
445 }
446
447 skb_forward_csum(skb);
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
463 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
464 return 0;
465 }
466
467
468
469
470 if (hdr->hop_limit <= 1) {
471
472 skb->dev = dst->dev;
473 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
474 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
475
476 kfree_skb(skb);
477 return -ETIMEDOUT;
478 }
479
480
481 if (net->ipv6.devconf_all->proxy_ndp &&
482 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
483 int proxied = ip6_forward_proxy_check(skb);
484 if (proxied > 0)
485 return ip6_input(skb);
486 else if (proxied < 0) {
487 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
488 goto drop;
489 }
490 }
491
492 if (!xfrm6_route_forward(skb)) {
493 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
494 goto drop;
495 }
496 dst = skb_dst(skb);
497
498
499
500
501
502 if (IP6CB(skb)->iif == dst->dev->ifindex &&
503 opt->srcrt == 0 && !skb_sec_path(skb)) {
504 struct in6_addr *target = NULL;
505 struct inet_peer *peer;
506 struct rt6_info *rt;
507
508
509
510
511
512
513 rt = (struct rt6_info *) dst;
514 if (rt->rt6i_flags & RTF_GATEWAY)
515 target = &rt->rt6i_gateway;
516 else
517 target = &hdr->daddr;
518
519 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
520
521
522
523
524 if (inet_peer_xrlim_allow(peer, 1*HZ))
525 ndisc_send_redirect(skb, target);
526 if (peer)
527 inet_putpeer(peer);
528 } else {
529 int addrtype = ipv6_addr_type(&hdr->saddr);
530
531
532 if (addrtype == IPV6_ADDR_ANY ||
533 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
534 goto error;
535 if (addrtype & IPV6_ADDR_LINKLOCAL) {
536 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
537 ICMPV6_NOT_NEIGHBOUR, 0);
538 goto error;
539 }
540 }
541
542 mtu = ip6_dst_mtu_forward(dst);
543 if (mtu < IPV6_MIN_MTU)
544 mtu = IPV6_MIN_MTU;
545
546 if (ip6_pkt_too_big(skb, mtu)) {
547
548 skb->dev = dst->dev;
549 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
550 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
551 __IP6_INC_STATS(net, ip6_dst_idev(dst),
552 IPSTATS_MIB_FRAGFAILS);
553 kfree_skb(skb);
554 return -EMSGSIZE;
555 }
556
557 if (skb_cow(skb, dst->dev->hard_header_len)) {
558 __IP6_INC_STATS(net, ip6_dst_idev(dst),
559 IPSTATS_MIB_OUTDISCARDS);
560 goto drop;
561 }
562
563 hdr = ipv6_hdr(skb);
564
565
566
567 hdr->hop_limit--;
568
569 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
570 net, NULL, skb, skb->dev, dst->dev,
571 ip6_forward_finish);
572
573error:
574 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
575drop:
576 kfree_skb(skb);
577 return -EINVAL;
578}
579
580static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
581{
582 to->pkt_type = from->pkt_type;
583 to->priority = from->priority;
584 to->protocol = from->protocol;
585 skb_dst_drop(to);
586 skb_dst_set(to, dst_clone(skb_dst(from)));
587 to->dev = from->dev;
588 to->mark = from->mark;
589
590 skb_copy_hash(to, from);
591
592#ifdef CONFIG_NET_SCHED
593 to->tc_index = from->tc_index;
594#endif
595 nf_copy(to, from);
596 skb_ext_copy(to, from);
597 skb_copy_secmark(to, from);
598}
599
600int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
601 u8 nexthdr, __be32 frag_id,
602 struct ip6_fraglist_iter *iter)
603{
604 unsigned int first_len;
605 struct frag_hdr *fh;
606
607
608 *prevhdr = NEXTHDR_FRAGMENT;
609 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
610 if (!iter->tmp_hdr)
611 return -ENOMEM;
612
613 iter->frag = skb_shinfo(skb)->frag_list;
614 skb_frag_list_init(skb);
615
616 iter->offset = 0;
617 iter->hlen = hlen;
618 iter->frag_id = frag_id;
619 iter->nexthdr = nexthdr;
620
621 __skb_pull(skb, hlen);
622 fh = __skb_push(skb, sizeof(struct frag_hdr));
623 __skb_push(skb, hlen);
624 skb_reset_network_header(skb);
625 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
626
627 fh->nexthdr = nexthdr;
628 fh->reserved = 0;
629 fh->frag_off = htons(IP6_MF);
630 fh->identification = frag_id;
631
632 first_len = skb_pagelen(skb);
633 skb->data_len = first_len - skb_headlen(skb);
634 skb->len = first_len;
635 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
636
637 return 0;
638}
639EXPORT_SYMBOL(ip6_fraglist_init);
640
641void ip6_fraglist_prepare(struct sk_buff *skb,
642 struct ip6_fraglist_iter *iter)
643{
644 struct sk_buff *frag = iter->frag;
645 unsigned int hlen = iter->hlen;
646 struct frag_hdr *fh;
647
648 frag->ip_summed = CHECKSUM_NONE;
649 skb_reset_transport_header(frag);
650 fh = __skb_push(frag, sizeof(struct frag_hdr));
651 __skb_push(frag, hlen);
652 skb_reset_network_header(frag);
653 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
654 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
655 fh->nexthdr = iter->nexthdr;
656 fh->reserved = 0;
657 fh->frag_off = htons(iter->offset);
658 if (frag->next)
659 fh->frag_off |= htons(IP6_MF);
660 fh->identification = iter->frag_id;
661 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
662 ip6_copy_metadata(frag, skb);
663}
664EXPORT_SYMBOL(ip6_fraglist_prepare);
665
666void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
667 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
668 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
669{
670 state->prevhdr = prevhdr;
671 state->nexthdr = nexthdr;
672 state->frag_id = frag_id;
673
674 state->hlen = hlen;
675 state->mtu = mtu;
676
677 state->left = skb->len - hlen;
678 state->ptr = hlen;
679
680 state->hroom = hdr_room;
681 state->troom = needed_tailroom;
682
683 state->offset = 0;
684}
685EXPORT_SYMBOL(ip6_frag_init);
686
687struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
688{
689 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
690 struct sk_buff *frag;
691 struct frag_hdr *fh;
692 unsigned int len;
693
694 len = state->left;
695
696 if (len > state->mtu)
697 len = state->mtu;
698
699
700 if (len < state->left)
701 len &= ~7;
702
703
704 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
705 state->hroom + state->troom, GFP_ATOMIC);
706 if (!frag)
707 return ERR_PTR(-ENOMEM);
708
709
710
711
712
713 ip6_copy_metadata(frag, skb);
714 skb_reserve(frag, state->hroom);
715 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
716 skb_reset_network_header(frag);
717 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
718 frag->transport_header = (frag->network_header + state->hlen +
719 sizeof(struct frag_hdr));
720
721
722
723
724
725 if (skb->sk)
726 skb_set_owner_w(frag, skb->sk);
727
728
729
730
731 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
732
733 fragnexthdr_offset = skb_network_header(frag);
734 fragnexthdr_offset += prevhdr - skb_network_header(skb);
735 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
736
737
738
739
740 fh->nexthdr = state->nexthdr;
741 fh->reserved = 0;
742 fh->identification = state->frag_id;
743
744
745
746
747 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
748 len));
749 state->left -= len;
750
751 fh->frag_off = htons(state->offset);
752 if (state->left > 0)
753 fh->frag_off |= htons(IP6_MF);
754 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
755
756 state->ptr += len;
757 state->offset += len;
758
759 return frag;
760}
761EXPORT_SYMBOL(ip6_frag_next);
762
763int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
764 int (*output)(struct net *, struct sock *, struct sk_buff *))
765{
766 struct sk_buff *frag;
767 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
768 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
769 inet6_sk(skb->sk) : NULL;
770 struct ip6_frag_state state;
771 unsigned int mtu, hlen, nexthdr_offset;
772 ktime_t tstamp = skb->tstamp;
773 int hroom, err = 0;
774 __be32 frag_id;
775 u8 *prevhdr, nexthdr = 0;
776
777 err = ip6_find_1stfragopt(skb, &prevhdr);
778 if (err < 0)
779 goto fail;
780 hlen = err;
781 nexthdr = *prevhdr;
782 nexthdr_offset = prevhdr - skb_network_header(skb);
783
784 mtu = ip6_skb_dst_mtu(skb);
785
786
787
788
789 if (unlikely(!skb->ignore_df && skb->len > mtu))
790 goto fail_toobig;
791
792 if (IP6CB(skb)->frag_max_size) {
793 if (IP6CB(skb)->frag_max_size > mtu)
794 goto fail_toobig;
795
796
797 mtu = IP6CB(skb)->frag_max_size;
798 if (mtu < IPV6_MIN_MTU)
799 mtu = IPV6_MIN_MTU;
800 }
801
802 if (np && np->frag_size < mtu) {
803 if (np->frag_size)
804 mtu = np->frag_size;
805 }
806 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
807 goto fail_toobig;
808 mtu -= hlen + sizeof(struct frag_hdr);
809
810 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
811 &ipv6_hdr(skb)->saddr);
812
813 if (skb->ip_summed == CHECKSUM_PARTIAL &&
814 (err = skb_checksum_help(skb)))
815 goto fail;
816
817 prevhdr = skb_network_header(skb) + nexthdr_offset;
818 hroom = LL_RESERVED_SPACE(rt->dst.dev);
819 if (skb_has_frag_list(skb)) {
820 unsigned int first_len = skb_pagelen(skb);
821 struct ip6_fraglist_iter iter;
822 struct sk_buff *frag2;
823
824 if (first_len - hlen > mtu ||
825 ((first_len - hlen) & 7) ||
826 skb_cloned(skb) ||
827 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
828 goto slow_path;
829
830 skb_walk_frags(skb, frag) {
831
832 if (frag->len > mtu ||
833 ((frag->len & 7) && frag->next) ||
834 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
835 goto slow_path_clean;
836
837
838 if (skb_shared(frag))
839 goto slow_path_clean;
840
841 BUG_ON(frag->sk);
842 if (skb->sk) {
843 frag->sk = skb->sk;
844 frag->destructor = sock_wfree;
845 }
846 skb->truesize -= frag->truesize;
847 }
848
849 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
850 &iter);
851 if (err < 0)
852 goto fail;
853
854 for (;;) {
855
856
857 if (iter.frag)
858 ip6_fraglist_prepare(skb, &iter);
859
860 skb->tstamp = tstamp;
861 err = output(net, sk, skb);
862 if (!err)
863 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
864 IPSTATS_MIB_FRAGCREATES);
865
866 if (err || !iter.frag)
867 break;
868
869 skb = ip6_fraglist_next(&iter);
870 }
871
872 kfree(iter.tmp_hdr);
873
874 if (err == 0) {
875 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
876 IPSTATS_MIB_FRAGOKS);
877 return 0;
878 }
879
880 kfree_skb_list(iter.frag);
881
882 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
883 IPSTATS_MIB_FRAGFAILS);
884 return err;
885
886slow_path_clean:
887 skb_walk_frags(skb, frag2) {
888 if (frag2 == frag)
889 break;
890 frag2->sk = NULL;
891 frag2->destructor = NULL;
892 skb->truesize += frag2->truesize;
893 }
894 }
895
896slow_path:
897
898
899
900
901 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
902 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
903 &state);
904
905
906
907
908
909 while (state.left > 0) {
910 frag = ip6_frag_next(skb, &state);
911 if (IS_ERR(frag)) {
912 err = PTR_ERR(frag);
913 goto fail;
914 }
915
916
917
918
919 frag->tstamp = tstamp;
920 err = output(net, sk, frag);
921 if (err)
922 goto fail;
923
924 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
925 IPSTATS_MIB_FRAGCREATES);
926 }
927 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
928 IPSTATS_MIB_FRAGOKS);
929 consume_skb(skb);
930 return err;
931
932fail_toobig:
933 if (skb->sk && dst_allfrag(skb_dst(skb)))
934 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
935
936 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
937 err = -EMSGSIZE;
938
939fail:
940 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
941 IPSTATS_MIB_FRAGFAILS);
942 kfree_skb(skb);
943 return err;
944}
945
946static inline int ip6_rt_check(const struct rt6key *rt_key,
947 const struct in6_addr *fl_addr,
948 const struct in6_addr *addr_cache)
949{
950 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
951 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
952}
953
954static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
955 struct dst_entry *dst,
956 const struct flowi6 *fl6)
957{
958 struct ipv6_pinfo *np = inet6_sk(sk);
959 struct rt6_info *rt;
960
961 if (!dst)
962 goto out;
963
964 if (dst->ops->family != AF_INET6) {
965 dst_release(dst);
966 return NULL;
967 }
968
969 rt = (struct rt6_info *)dst;
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
988#ifdef CONFIG_IPV6_SUBTREES
989 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
990#endif
991 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
992 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
993 dst_release(dst);
994 dst = NULL;
995 }
996
997out:
998 return dst;
999}
1000
1001static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1002 struct dst_entry **dst, struct flowi6 *fl6)
1003{
1004#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005 struct neighbour *n;
1006 struct rt6_info *rt;
1007#endif
1008 int err;
1009 int flags = 0;
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1021 struct fib6_info *from;
1022 struct rt6_info *rt;
1023 bool had_dst = *dst != NULL;
1024
1025 if (!had_dst)
1026 *dst = ip6_route_output(net, sk, fl6);
1027 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1028
1029 rcu_read_lock();
1030 from = rt ? rcu_dereference(rt->from) : NULL;
1031 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1032 sk ? inet6_sk(sk)->srcprefs : 0,
1033 &fl6->saddr);
1034 rcu_read_unlock();
1035
1036 if (err)
1037 goto out_err_release;
1038
1039
1040
1041
1042
1043 if (!had_dst && (*dst)->error) {
1044 dst_release(*dst);
1045 *dst = NULL;
1046 }
1047
1048 if (fl6->flowi6_oif)
1049 flags |= RT6_LOOKUP_F_IFACE;
1050 }
1051
1052 if (!*dst)
1053 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1054
1055 err = (*dst)->error;
1056 if (err)
1057 goto out_err_release;
1058
1059#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1060
1061
1062
1063
1064
1065
1066
1067
1068 rt = (struct rt6_info *) *dst;
1069 rcu_read_lock_bh();
1070 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1071 rt6_nexthop(rt, &fl6->daddr));
1072 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1073 rcu_read_unlock_bh();
1074
1075 if (err) {
1076 struct inet6_ifaddr *ifp;
1077 struct flowi6 fl_gw6;
1078 int redirect;
1079
1080 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1081 (*dst)->dev, 1);
1082
1083 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1084 if (ifp)
1085 in6_ifa_put(ifp);
1086
1087 if (redirect) {
1088
1089
1090
1091
1092 dst_release(*dst);
1093 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1094 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1095 *dst = ip6_route_output(net, sk, &fl_gw6);
1096 err = (*dst)->error;
1097 if (err)
1098 goto out_err_release;
1099 }
1100 }
1101#endif
1102 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1103 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1104 err = -EAFNOSUPPORT;
1105 goto out_err_release;
1106 }
1107
1108 return 0;
1109
1110out_err_release:
1111 dst_release(*dst);
1112 *dst = NULL;
1113
1114 if (err == -ENETUNREACH)
1115 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1116 return err;
1117}
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1131 struct flowi6 *fl6)
1132{
1133 *dst = NULL;
1134 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1135}
1136EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1151 const struct in6_addr *final_dst)
1152{
1153 struct dst_entry *dst = NULL;
1154 int err;
1155
1156 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1157 if (err)
1158 return ERR_PTR(err);
1159 if (final_dst)
1160 fl6->daddr = *final_dst;
1161
1162 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1163}
1164EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1185 const struct in6_addr *final_dst,
1186 bool connected)
1187{
1188 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1189
1190 dst = ip6_sk_dst_check(sk, dst, fl6);
1191 if (dst)
1192 return dst;
1193
1194 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1195 if (connected && !IS_ERR(dst))
1196 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1197
1198 return dst;
1199}
1200EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1219 struct net_device *dev,
1220 struct net *net,
1221 struct socket *sock,
1222 struct in6_addr *saddr,
1223 const struct ip_tunnel_info *info,
1224 u8 protocol,
1225 bool use_cache)
1226{
1227 struct dst_entry *dst = NULL;
1228#ifdef CONFIG_DST_CACHE
1229 struct dst_cache *dst_cache;
1230#endif
1231 struct flowi6 fl6;
1232 __u8 prio;
1233
1234#ifdef CONFIG_DST_CACHE
1235 dst_cache = (struct dst_cache *)&info->dst_cache;
1236 if (use_cache) {
1237 dst = dst_cache_get_ip6(dst_cache, saddr);
1238 if (dst)
1239 return dst;
1240 }
1241#endif
1242 memset(&fl6, 0, sizeof(fl6));
1243 fl6.flowi6_mark = skb->mark;
1244 fl6.flowi6_proto = protocol;
1245 fl6.daddr = info->key.u.ipv6.dst;
1246 fl6.saddr = info->key.u.ipv6.src;
1247 prio = info->key.tos;
1248 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1249 info->key.label);
1250
1251 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1252 NULL);
1253 if (IS_ERR(dst)) {
1254 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1255 return ERR_PTR(-ENETUNREACH);
1256 }
1257 if (dst->dev == dev) {
1258 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1259 dst_release(dst);
1260 return ERR_PTR(-ELOOP);
1261 }
1262#ifdef CONFIG_DST_CACHE
1263 if (use_cache)
1264 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1265#endif
1266 *saddr = fl6.saddr;
1267 return dst;
1268}
1269EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1270
1271static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1272 gfp_t gfp)
1273{
1274 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1275}
1276
1277static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1278 gfp_t gfp)
1279{
1280 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1281}
1282
1283static void ip6_append_data_mtu(unsigned int *mtu,
1284 int *maxfraglen,
1285 unsigned int fragheaderlen,
1286 struct sk_buff *skb,
1287 struct rt6_info *rt,
1288 unsigned int orig_mtu)
1289{
1290 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1291 if (!skb) {
1292
1293 *mtu = orig_mtu - rt->dst.header_len;
1294
1295 } else {
1296
1297
1298
1299
1300 *mtu = orig_mtu;
1301 }
1302 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1303 + fragheaderlen - sizeof(struct frag_hdr);
1304 }
1305}
1306
1307static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1308 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1309 struct rt6_info *rt, struct flowi6 *fl6)
1310{
1311 struct ipv6_pinfo *np = inet6_sk(sk);
1312 unsigned int mtu;
1313 struct ipv6_txoptions *opt = ipc6->opt;
1314
1315
1316
1317
1318 if (opt) {
1319 if (WARN_ON(v6_cork->opt))
1320 return -EINVAL;
1321
1322 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1323 if (unlikely(!v6_cork->opt))
1324 return -ENOBUFS;
1325
1326 v6_cork->opt->tot_len = sizeof(*opt);
1327 v6_cork->opt->opt_flen = opt->opt_flen;
1328 v6_cork->opt->opt_nflen = opt->opt_nflen;
1329
1330 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1331 sk->sk_allocation);
1332 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1333 return -ENOBUFS;
1334
1335 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1336 sk->sk_allocation);
1337 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1338 return -ENOBUFS;
1339
1340 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1341 sk->sk_allocation);
1342 if (opt->hopopt && !v6_cork->opt->hopopt)
1343 return -ENOBUFS;
1344
1345 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1346 sk->sk_allocation);
1347 if (opt->srcrt && !v6_cork->opt->srcrt)
1348 return -ENOBUFS;
1349
1350
1351 }
1352 dst_hold(&rt->dst);
1353 cork->base.dst = &rt->dst;
1354 cork->fl.u.ip6 = *fl6;
1355 v6_cork->hop_limit = ipc6->hlimit;
1356 v6_cork->tclass = ipc6->tclass;
1357 if (rt->dst.flags & DST_XFRM_TUNNEL)
1358 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1359 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1360 else
1361 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1362 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1363 if (np->frag_size < mtu) {
1364 if (np->frag_size)
1365 mtu = np->frag_size;
1366 }
1367 if (mtu < IPV6_MIN_MTU)
1368 return -EINVAL;
1369 cork->base.fragsize = mtu;
1370 cork->base.gso_size = ipc6->gso_size;
1371 cork->base.tx_flags = 0;
1372 cork->base.mark = ipc6->sockc.mark;
1373 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1374
1375 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1376 cork->base.flags |= IPCORK_ALLFRAG;
1377 cork->base.length = 0;
1378
1379 cork->base.transmit_time = ipc6->sockc.transmit_time;
1380
1381 return 0;
1382}
1383
1384static int __ip6_append_data(struct sock *sk,
1385 struct flowi6 *fl6,
1386 struct sk_buff_head *queue,
1387 struct inet_cork *cork,
1388 struct inet6_cork *v6_cork,
1389 struct page_frag *pfrag,
1390 int getfrag(void *from, char *to, int offset,
1391 int len, int odd, struct sk_buff *skb),
1392 void *from, int length, int transhdrlen,
1393 unsigned int flags, struct ipcm6_cookie *ipc6)
1394{
1395 struct sk_buff *skb, *skb_prev = NULL;
1396 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1397 struct ubuf_info *uarg = NULL;
1398 int exthdrlen = 0;
1399 int dst_exthdrlen = 0;
1400 int hh_len;
1401 int copy;
1402 int err;
1403 int offset = 0;
1404 u32 tskey = 0;
1405 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1406 struct ipv6_txoptions *opt = v6_cork->opt;
1407 int csummode = CHECKSUM_NONE;
1408 unsigned int maxnonfragsize, headersize;
1409 unsigned int wmem_alloc_delta = 0;
1410 bool paged, extra_uref = false;
1411
1412 skb = skb_peek_tail(queue);
1413 if (!skb) {
1414 exthdrlen = opt ? opt->opt_flen : 0;
1415 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1416 }
1417
1418 paged = !!cork->gso_size;
1419 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1420 orig_mtu = mtu;
1421
1422 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1423 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1424 tskey = sk->sk_tskey++;
1425
1426 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1427
1428 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1429 (opt ? opt->opt_nflen : 0);
1430 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1431 sizeof(struct frag_hdr);
1432
1433 headersize = sizeof(struct ipv6hdr) +
1434 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1435 (dst_allfrag(&rt->dst) ?
1436 sizeof(struct frag_hdr) : 0) +
1437 rt->rt6i_nfheader_len;
1438
1439
1440
1441
1442 if (headersize + transhdrlen > mtu)
1443 goto emsgsize;
1444
1445 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1446 (sk->sk_protocol == IPPROTO_UDP ||
1447 sk->sk_protocol == IPPROTO_RAW)) {
1448 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1449 sizeof(struct ipv6hdr));
1450 goto emsgsize;
1451 }
1452
1453 if (ip6_sk_ignore_df(sk))
1454 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1455 else
1456 maxnonfragsize = mtu;
1457
1458 if (cork->length + length > maxnonfragsize - headersize) {
1459emsgsize:
1460 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1461 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1462 return -EMSGSIZE;
1463 }
1464
1465
1466
1467
1468 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1469 headersize == sizeof(struct ipv6hdr) &&
1470 length <= mtu - headersize &&
1471 (!(flags & MSG_MORE) || cork->gso_size) &&
1472 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1473 csummode = CHECKSUM_PARTIAL;
1474
1475 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1476 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1477 if (!uarg)
1478 return -ENOBUFS;
1479 extra_uref = !skb_zcopy(skb);
1480 if (rt->dst.dev->features & NETIF_F_SG &&
1481 csummode == CHECKSUM_PARTIAL) {
1482 paged = true;
1483 } else {
1484 uarg->zerocopy = 0;
1485 skb_zcopy_set(skb, uarg, &extra_uref);
1486 }
1487 }
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505 cork->length += length;
1506 if (!skb)
1507 goto alloc_new_skb;
1508
1509 while (length > 0) {
1510
1511 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1512 if (copy < length)
1513 copy = maxfraglen - skb->len;
1514
1515 if (copy <= 0) {
1516 char *data;
1517 unsigned int datalen;
1518 unsigned int fraglen;
1519 unsigned int fraggap;
1520 unsigned int alloclen;
1521 unsigned int pagedlen;
1522alloc_new_skb:
1523
1524 if (skb)
1525 fraggap = skb->len - maxfraglen;
1526 else
1527 fraggap = 0;
1528
1529 if (!skb || !skb_prev)
1530 ip6_append_data_mtu(&mtu, &maxfraglen,
1531 fragheaderlen, skb, rt,
1532 orig_mtu);
1533
1534 skb_prev = skb;
1535
1536
1537
1538
1539
1540 datalen = length + fraggap;
1541
1542 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1543 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1544 fraglen = datalen + fragheaderlen;
1545 pagedlen = 0;
1546
1547 if ((flags & MSG_MORE) &&
1548 !(rt->dst.dev->features&NETIF_F_SG))
1549 alloclen = mtu;
1550 else if (!paged)
1551 alloclen = fraglen;
1552 else {
1553 alloclen = min_t(int, fraglen, MAX_HEADER);
1554 pagedlen = fraglen - alloclen;
1555 }
1556
1557 alloclen += dst_exthdrlen;
1558
1559 if (datalen != length + fraggap) {
1560
1561
1562
1563
1564 datalen += rt->dst.trailer_len;
1565 }
1566
1567 alloclen += rt->dst.trailer_len;
1568 fraglen = datalen + fragheaderlen;
1569
1570
1571
1572
1573
1574
1575 alloclen += sizeof(struct frag_hdr);
1576
1577 copy = datalen - transhdrlen - fraggap - pagedlen;
1578 if (copy < 0) {
1579 err = -EINVAL;
1580 goto error;
1581 }
1582 if (transhdrlen) {
1583 skb = sock_alloc_send_skb(sk,
1584 alloclen + hh_len,
1585 (flags & MSG_DONTWAIT), &err);
1586 } else {
1587 skb = NULL;
1588 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1589 2 * sk->sk_sndbuf)
1590 skb = alloc_skb(alloclen + hh_len,
1591 sk->sk_allocation);
1592 if (unlikely(!skb))
1593 err = -ENOBUFS;
1594 }
1595 if (!skb)
1596 goto error;
1597
1598
1599
1600 skb->protocol = htons(ETH_P_IPV6);
1601 skb->ip_summed = csummode;
1602 skb->csum = 0;
1603
1604 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1605 dst_exthdrlen);
1606
1607
1608
1609
1610 data = skb_put(skb, fraglen - pagedlen);
1611 skb_set_network_header(skb, exthdrlen);
1612 data += fragheaderlen;
1613 skb->transport_header = (skb->network_header +
1614 fragheaderlen);
1615 if (fraggap) {
1616 skb->csum = skb_copy_and_csum_bits(
1617 skb_prev, maxfraglen,
1618 data + transhdrlen, fraggap, 0);
1619 skb_prev->csum = csum_sub(skb_prev->csum,
1620 skb->csum);
1621 data += fraggap;
1622 pskb_trim_unique(skb_prev, maxfraglen);
1623 }
1624 if (copy > 0 &&
1625 getfrag(from, data + transhdrlen, offset,
1626 copy, fraggap, skb) < 0) {
1627 err = -EFAULT;
1628 kfree_skb(skb);
1629 goto error;
1630 }
1631
1632 offset += copy;
1633 length -= copy + transhdrlen;
1634 transhdrlen = 0;
1635 exthdrlen = 0;
1636 dst_exthdrlen = 0;
1637
1638
1639 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1640 cork->tx_flags = 0;
1641 skb_shinfo(skb)->tskey = tskey;
1642 tskey = 0;
1643 skb_zcopy_set(skb, uarg, &extra_uref);
1644
1645 if ((flags & MSG_CONFIRM) && !skb_prev)
1646 skb_set_dst_pending_confirm(skb, 1);
1647
1648
1649
1650
1651 if (!skb->destructor) {
1652 skb->destructor = sock_wfree;
1653 skb->sk = sk;
1654 wmem_alloc_delta += skb->truesize;
1655 }
1656 __skb_queue_tail(queue, skb);
1657 continue;
1658 }
1659
1660 if (copy > length)
1661 copy = length;
1662
1663 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1664 skb_tailroom(skb) >= copy) {
1665 unsigned int off;
1666
1667 off = skb->len;
1668 if (getfrag(from, skb_put(skb, copy),
1669 offset, copy, off, skb) < 0) {
1670 __skb_trim(skb, off);
1671 err = -EFAULT;
1672 goto error;
1673 }
1674 } else if (!uarg || !uarg->zerocopy) {
1675 int i = skb_shinfo(skb)->nr_frags;
1676
1677 err = -ENOMEM;
1678 if (!sk_page_frag_refill(sk, pfrag))
1679 goto error;
1680
1681 if (!skb_can_coalesce(skb, i, pfrag->page,
1682 pfrag->offset)) {
1683 err = -EMSGSIZE;
1684 if (i == MAX_SKB_FRAGS)
1685 goto error;
1686
1687 __skb_fill_page_desc(skb, i, pfrag->page,
1688 pfrag->offset, 0);
1689 skb_shinfo(skb)->nr_frags = ++i;
1690 get_page(pfrag->page);
1691 }
1692 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1693 if (getfrag(from,
1694 page_address(pfrag->page) + pfrag->offset,
1695 offset, copy, skb->len, skb) < 0)
1696 goto error_efault;
1697
1698 pfrag->offset += copy;
1699 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1700 skb->len += copy;
1701 skb->data_len += copy;
1702 skb->truesize += copy;
1703 wmem_alloc_delta += copy;
1704 } else {
1705 err = skb_zerocopy_iter_dgram(skb, from, copy);
1706 if (err < 0)
1707 goto error;
1708 }
1709 offset += copy;
1710 length -= copy;
1711 }
1712
1713 if (wmem_alloc_delta)
1714 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1715 return 0;
1716
1717error_efault:
1718 err = -EFAULT;
1719error:
1720 if (uarg)
1721 sock_zerocopy_put_abort(uarg, extra_uref);
1722 cork->length -= length;
1723 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1724 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1725 return err;
1726}
1727
1728int ip6_append_data(struct sock *sk,
1729 int getfrag(void *from, char *to, int offset, int len,
1730 int odd, struct sk_buff *skb),
1731 void *from, int length, int transhdrlen,
1732 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1733 struct rt6_info *rt, unsigned int flags)
1734{
1735 struct inet_sock *inet = inet_sk(sk);
1736 struct ipv6_pinfo *np = inet6_sk(sk);
1737 int exthdrlen;
1738 int err;
1739
1740 if (flags&MSG_PROBE)
1741 return 0;
1742 if (skb_queue_empty(&sk->sk_write_queue)) {
1743
1744
1745
1746 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1747 ipc6, rt, fl6);
1748 if (err)
1749 return err;
1750
1751 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1752 length += exthdrlen;
1753 transhdrlen += exthdrlen;
1754 } else {
1755 fl6 = &inet->cork.fl.u.ip6;
1756 transhdrlen = 0;
1757 }
1758
1759 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1760 &np->cork, sk_page_frag(sk), getfrag,
1761 from, length, transhdrlen, flags, ipc6);
1762}
1763EXPORT_SYMBOL_GPL(ip6_append_data);
1764
1765static void ip6_cork_release(struct inet_cork_full *cork,
1766 struct inet6_cork *v6_cork)
1767{
1768 if (v6_cork->opt) {
1769 kfree(v6_cork->opt->dst0opt);
1770 kfree(v6_cork->opt->dst1opt);
1771 kfree(v6_cork->opt->hopopt);
1772 kfree(v6_cork->opt->srcrt);
1773 kfree(v6_cork->opt);
1774 v6_cork->opt = NULL;
1775 }
1776
1777 if (cork->base.dst) {
1778 dst_release(cork->base.dst);
1779 cork->base.dst = NULL;
1780 cork->base.flags &= ~IPCORK_ALLFRAG;
1781 }
1782 memset(&cork->fl, 0, sizeof(cork->fl));
1783}
1784
1785struct sk_buff *__ip6_make_skb(struct sock *sk,
1786 struct sk_buff_head *queue,
1787 struct inet_cork_full *cork,
1788 struct inet6_cork *v6_cork)
1789{
1790 struct sk_buff *skb, *tmp_skb;
1791 struct sk_buff **tail_skb;
1792 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1793 struct ipv6_pinfo *np = inet6_sk(sk);
1794 struct net *net = sock_net(sk);
1795 struct ipv6hdr *hdr;
1796 struct ipv6_txoptions *opt = v6_cork->opt;
1797 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1798 struct flowi6 *fl6 = &cork->fl.u.ip6;
1799 unsigned char proto = fl6->flowi6_proto;
1800
1801 skb = __skb_dequeue(queue);
1802 if (!skb)
1803 goto out;
1804 tail_skb = &(skb_shinfo(skb)->frag_list);
1805
1806
1807 if (skb->data < skb_network_header(skb))
1808 __skb_pull(skb, skb_network_offset(skb));
1809 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1810 __skb_pull(tmp_skb, skb_network_header_len(skb));
1811 *tail_skb = tmp_skb;
1812 tail_skb = &(tmp_skb->next);
1813 skb->len += tmp_skb->len;
1814 skb->data_len += tmp_skb->len;
1815 skb->truesize += tmp_skb->truesize;
1816 tmp_skb->destructor = NULL;
1817 tmp_skb->sk = NULL;
1818 }
1819
1820
1821 skb->ignore_df = ip6_sk_ignore_df(sk);
1822
1823 *final_dst = fl6->daddr;
1824 __skb_pull(skb, skb_network_header_len(skb));
1825 if (opt && opt->opt_flen)
1826 ipv6_push_frag_opts(skb, opt, &proto);
1827 if (opt && opt->opt_nflen)
1828 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1829
1830 skb_push(skb, sizeof(struct ipv6hdr));
1831 skb_reset_network_header(skb);
1832 hdr = ipv6_hdr(skb);
1833
1834 ip6_flow_hdr(hdr, v6_cork->tclass,
1835 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1836 ip6_autoflowlabel(net, np), fl6));
1837 hdr->hop_limit = v6_cork->hop_limit;
1838 hdr->nexthdr = proto;
1839 hdr->saddr = fl6->saddr;
1840 hdr->daddr = *final_dst;
1841
1842 skb->priority = sk->sk_priority;
1843 skb->mark = cork->base.mark;
1844
1845 skb->tstamp = cork->base.transmit_time;
1846
1847 skb_dst_set(skb, dst_clone(&rt->dst));
1848 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1849 if (proto == IPPROTO_ICMPV6) {
1850 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1851
1852 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1853 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1854 }
1855
1856 ip6_cork_release(cork, v6_cork);
1857out:
1858 return skb;
1859}
1860
1861int ip6_send_skb(struct sk_buff *skb)
1862{
1863 struct net *net = sock_net(skb->sk);
1864 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1865 int err;
1866
1867 err = ip6_local_out(net, skb->sk, skb);
1868 if (err) {
1869 if (err > 0)
1870 err = net_xmit_errno(err);
1871 if (err)
1872 IP6_INC_STATS(net, rt->rt6i_idev,
1873 IPSTATS_MIB_OUTDISCARDS);
1874 }
1875
1876 return err;
1877}
1878
1879int ip6_push_pending_frames(struct sock *sk)
1880{
1881 struct sk_buff *skb;
1882
1883 skb = ip6_finish_skb(sk);
1884 if (!skb)
1885 return 0;
1886
1887 return ip6_send_skb(skb);
1888}
1889EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1890
1891static void __ip6_flush_pending_frames(struct sock *sk,
1892 struct sk_buff_head *queue,
1893 struct inet_cork_full *cork,
1894 struct inet6_cork *v6_cork)
1895{
1896 struct sk_buff *skb;
1897
1898 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1899 if (skb_dst(skb))
1900 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1901 IPSTATS_MIB_OUTDISCARDS);
1902 kfree_skb(skb);
1903 }
1904
1905 ip6_cork_release(cork, v6_cork);
1906}
1907
1908void ip6_flush_pending_frames(struct sock *sk)
1909{
1910 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1911 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1912}
1913EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1914
1915struct sk_buff *ip6_make_skb(struct sock *sk,
1916 int getfrag(void *from, char *to, int offset,
1917 int len, int odd, struct sk_buff *skb),
1918 void *from, int length, int transhdrlen,
1919 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1920 struct rt6_info *rt, unsigned int flags,
1921 struct inet_cork_full *cork)
1922{
1923 struct inet6_cork v6_cork;
1924 struct sk_buff_head queue;
1925 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1926 int err;
1927
1928 if (flags & MSG_PROBE)
1929 return NULL;
1930
1931 __skb_queue_head_init(&queue);
1932
1933 cork->base.flags = 0;
1934 cork->base.addr = 0;
1935 cork->base.opt = NULL;
1936 cork->base.dst = NULL;
1937 v6_cork.opt = NULL;
1938 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1939 if (err) {
1940 ip6_cork_release(cork, &v6_cork);
1941 return ERR_PTR(err);
1942 }
1943 if (ipc6->dontfrag < 0)
1944 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1945
1946 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1947 ¤t->task_frag, getfrag, from,
1948 length + exthdrlen, transhdrlen + exthdrlen,
1949 flags, ipc6);
1950 if (err) {
1951 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1952 return ERR_PTR(err);
1953 }
1954
1955 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1956}
1957