1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/module.h>
24#include <linux/capability.h>
25#include <linux/errno.h>
26#include <linux/types.h>
27#include <linux/sockios.h>
28#include <linux/icmp.h>
29#include <linux/if.h>
30#include <linux/in.h>
31#include <linux/ip.h>
32#include <linux/net.h>
33#include <linux/in6.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/icmpv6.h>
37#include <linux/init.h>
38#include <linux/route.h>
39#include <linux/rtnetlink.h>
40#include <linux/netfilter_ipv6.h>
41#include <linux/slab.h>
42#include <linux/hash.h>
43#include <linux/etherdevice.h>
44
45#include <asm/uaccess.h>
46#include <linux/atomic.h>
47
48#include <net/icmp.h>
49#include <net/ip.h>
50#include <net/ip_tunnels.h>
51#include <net/ipv6.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/ip6_tunnel.h>
55#include <net/xfrm.h>
56#include <net/dsfield.h>
57#include <net/inet_ecn.h>
58#include <net/net_namespace.h>
59#include <net/netns/generic.h>
60
61MODULE_AUTHOR("Ville Nuorvala");
62MODULE_DESCRIPTION("IPv6 tunneling device");
63MODULE_LICENSE("GPL");
64MODULE_ALIAS_RTNL_LINK("ip6tnl");
65MODULE_ALIAS_NETDEV("ip6tnl0");
66
67#define IP6_TUNNEL_HASH_SIZE_SHIFT 5
68#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
69
70static bool log_ecn_error = true;
71module_param(log_ecn_error, bool, 0644);
72MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
73
74static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
75{
76 u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
77
78 return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
79}
80
81static int ip6_tnl_dev_init(struct net_device *dev);
82static void ip6_tnl_dev_setup(struct net_device *dev);
83static struct rtnl_link_ops ip6_link_ops __read_mostly;
84
85static int ip6_tnl_net_id __read_mostly;
86struct ip6_tnl_net {
87
88 struct net_device *fb_tnl_dev;
89
90 struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
91 struct ip6_tnl __rcu *tnls_wc[1];
92 struct ip6_tnl __rcu **tnls[2];
93};
94
95static struct net_device_stats *ip6_get_stats(struct net_device *dev)
96{
97 struct pcpu_sw_netstats tmp, sum = { 0 };
98 int i;
99
100 for_each_possible_cpu(i) {
101 unsigned int start;
102 const struct pcpu_sw_netstats *tstats =
103 per_cpu_ptr(dev->tstats, i);
104
105 do {
106 start = u64_stats_fetch_begin_irq(&tstats->syncp);
107 tmp.rx_packets = tstats->rx_packets;
108 tmp.rx_bytes = tstats->rx_bytes;
109 tmp.tx_packets = tstats->tx_packets;
110 tmp.tx_bytes = tstats->tx_bytes;
111 } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
112
113 sum.rx_packets += tmp.rx_packets;
114 sum.rx_bytes += tmp.rx_bytes;
115 sum.tx_packets += tmp.tx_packets;
116 sum.tx_bytes += tmp.tx_bytes;
117 }
118 dev->stats.rx_packets = sum.rx_packets;
119 dev->stats.rx_bytes = sum.rx_bytes;
120 dev->stats.tx_packets = sum.tx_packets;
121 dev->stats.tx_bytes = sum.tx_bytes;
122 return &dev->stats;
123}
124
125
126
127
128
129
130
131
132
133
134
135
136#define for_each_ip6_tunnel_rcu(start) \
137 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
138
139static struct ip6_tnl *
140ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
141{
142 unsigned int hash = HASH(remote, local);
143 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 struct in6_addr any;
146
147 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
148 if (ipv6_addr_equal(local, &t->parms.laddr) &&
149 ipv6_addr_equal(remote, &t->parms.raddr) &&
150 (t->dev->flags & IFF_UP))
151 return t;
152 }
153
154 memset(&any, 0, sizeof(any));
155 hash = HASH(&any, local);
156 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
157 if (ipv6_addr_equal(local, &t->parms.laddr) &&
158 ipv6_addr_any(&t->parms.raddr) &&
159 (t->dev->flags & IFF_UP))
160 return t;
161 }
162
163 hash = HASH(remote, &any);
164 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
165 if (ipv6_addr_equal(remote, &t->parms.raddr) &&
166 ipv6_addr_any(&t->parms.laddr) &&
167 (t->dev->flags & IFF_UP))
168 return t;
169 }
170
171 t = rcu_dereference(ip6n->tnls_wc[0]);
172 if (t && (t->dev->flags & IFF_UP))
173 return t;
174
175 return NULL;
176}
177
178
179
180
181
182
183
184
185
186
187
188
189static struct ip6_tnl __rcu **
190ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
191{
192 const struct in6_addr *remote = &p->raddr;
193 const struct in6_addr *local = &p->laddr;
194 unsigned int h = 0;
195 int prio = 0;
196
197 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
198 prio = 1;
199 h = HASH(remote, local);
200 }
201 return &ip6n->tnls[prio][h];
202}
203
204
205
206
207
208
209static void
210ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
211{
212 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
213
214 rcu_assign_pointer(t->next , rtnl_dereference(*tp));
215 rcu_assign_pointer(*tp, t);
216}
217
218
219
220
221
222
223static void
224ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
225{
226 struct ip6_tnl __rcu **tp;
227 struct ip6_tnl *iter;
228
229 for (tp = ip6_tnl_bucket(ip6n, &t->parms);
230 (iter = rtnl_dereference(*tp)) != NULL;
231 tp = &iter->next) {
232 if (t == iter) {
233 rcu_assign_pointer(*tp, t->next);
234 break;
235 }
236 }
237}
238
239static void ip6_dev_free(struct net_device *dev)
240{
241 struct ip6_tnl *t = netdev_priv(dev);
242
243 gro_cells_destroy(&t->gro_cells);
244 dst_cache_destroy(&t->dst_cache);
245 free_percpu(dev->tstats);
246}
247
248static int ip6_tnl_create2(struct net_device *dev)
249{
250 struct ip6_tnl *t = netdev_priv(dev);
251 struct net *net = dev_net(dev);
252 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
253 int err;
254
255 t = netdev_priv(dev);
256
257 dev->rtnl_link_ops = &ip6_link_ops;
258 err = register_netdevice(dev);
259 if (err < 0)
260 goto out;
261
262 strcpy(t->parms.name, dev->name);
263
264 dev_hold(dev);
265 ip6_tnl_link(ip6n, t);
266 return 0;
267
268out:
269 return err;
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
285{
286 struct net_device *dev;
287 struct ip6_tnl *t;
288 char name[IFNAMSIZ];
289 int err = -E2BIG;
290
291 if (p->name[0]) {
292 if (!dev_valid_name(p->name))
293 goto failed;
294 strlcpy(name, p->name, IFNAMSIZ);
295 } else {
296 sprintf(name, "ip6tnl%%d");
297 }
298 err = -ENOMEM;
299 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
300 if (!dev)
301 goto failed;
302
303 dev_net_set(dev, net);
304
305 t = netdev_priv(dev);
306 t->parms = *p;
307 t->net = dev_net(dev);
308 err = ip6_tnl_create2(dev);
309 if (err < 0)
310 goto failed_free;
311
312 return t;
313
314failed_free:
315 free_netdev(dev);
316failed:
317 return ERR_PTR(err);
318}
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334static struct ip6_tnl *ip6_tnl_locate(struct net *net,
335 struct __ip6_tnl_parm *p, int create)
336{
337 const struct in6_addr *remote = &p->raddr;
338 const struct in6_addr *local = &p->laddr;
339 struct ip6_tnl __rcu **tp;
340 struct ip6_tnl *t;
341 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
342
343 for (tp = ip6_tnl_bucket(ip6n, p);
344 (t = rtnl_dereference(*tp)) != NULL;
345 tp = &t->next) {
346 if (ipv6_addr_equal(local, &t->parms.laddr) &&
347 ipv6_addr_equal(remote, &t->parms.raddr)) {
348 if (create)
349 return ERR_PTR(-EEXIST);
350
351 return t;
352 }
353 }
354 if (!create)
355 return ERR_PTR(-ENODEV);
356 return ip6_tnl_create(net, p);
357}
358
359
360
361
362
363
364
365
366
367static void
368ip6_tnl_dev_uninit(struct net_device *dev)
369{
370 struct ip6_tnl *t = netdev_priv(dev);
371 struct net *net = t->net;
372 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
373
374 if (dev == ip6n->fb_tnl_dev)
375 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
376 else
377 ip6_tnl_unlink(ip6n, t);
378 dst_cache_reset(&t->dst_cache);
379 dev_put(dev);
380}
381
382
383
384
385
386
387
388
389
390
391__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
392{
393 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
394 unsigned int nhoff = raw - skb->data;
395 unsigned int off = nhoff + sizeof(*ipv6h);
396 u8 next, nexthdr = ipv6h->nexthdr;
397
398 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
399 struct ipv6_opt_hdr *hdr;
400 u16 optlen;
401
402 if (!pskb_may_pull(skb, off + sizeof(*hdr)))
403 break;
404
405 hdr = (struct ipv6_opt_hdr *)(skb->data + off);
406 if (nexthdr == NEXTHDR_FRAGMENT) {
407 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
408 if (frag_hdr->frag_off)
409 break;
410 optlen = 8;
411 } else if (nexthdr == NEXTHDR_AUTH) {
412 optlen = (hdr->hdrlen + 2) << 2;
413 } else {
414 optlen = ipv6_optlen(hdr);
415 }
416
417
418
419 next = hdr->nexthdr;
420 if (nexthdr == NEXTHDR_DEST) {
421 u16 i = 2;
422
423
424 if (!pskb_may_pull(skb, off + optlen))
425 break;
426
427 while (1) {
428 struct ipv6_tlv_tnl_enc_lim *tel;
429
430
431 if (i + sizeof(*tel) > optlen)
432 break;
433
434 tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
435
436 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
437 tel->length == 1)
438 return i + off - nhoff;
439
440 if (tel->type)
441 i += tel->length + 2;
442 else
443 i++;
444 }
445 }
446 nexthdr = next;
447 off += optlen;
448 }
449 return 0;
450}
451EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
452
453
454
455
456
457
458
459
460
461static int
462ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
463 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
464{
465 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
466 struct net *net = dev_net(skb->dev);
467 u8 rel_type = ICMPV6_DEST_UNREACH;
468 u8 rel_code = ICMPV6_ADDR_UNREACH;
469 __u32 rel_info = 0;
470 struct ip6_tnl *t;
471 int err = -ENOENT;
472 int rel_msg = 0;
473 u8 tproto;
474 __u16 len;
475
476
477
478
479
480 rcu_read_lock();
481 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
482 &ipv6h->saddr)) == NULL)
483 goto out;
484
485 tproto = ACCESS_ONCE(t->parms.proto);
486 if (tproto != ipproto && tproto != 0)
487 goto out;
488
489 err = 0;
490
491 switch (*type) {
492 struct ipv6_tlv_tnl_enc_lim *tel;
493 __u32 mtu, teli;
494 case ICMPV6_DEST_UNREACH:
495 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
496 t->parms.name);
497 rel_msg = 1;
498 break;
499 case ICMPV6_TIME_EXCEED:
500 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
501 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
502 t->parms.name);
503 rel_msg = 1;
504 }
505 break;
506 case ICMPV6_PARAMPROB:
507 teli = 0;
508 if ((*code) == ICMPV6_HDR_FIELD)
509 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
510
511 if (teli && teli == *info - 2) {
512 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
513 if (tel->encap_limit == 0) {
514 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
515 t->parms.name);
516 rel_msg = 1;
517 }
518 } else {
519 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
520 t->parms.name);
521 }
522 break;
523 case ICMPV6_PKT_TOOBIG:
524 ip6_update_pmtu(skb, net, htonl(*info), 0, 0);
525 mtu = *info - offset;
526 if (mtu < IPV6_MIN_MTU)
527 mtu = IPV6_MIN_MTU;
528 len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
529 if (len > mtu) {
530 rel_type = ICMPV6_PKT_TOOBIG;
531 rel_code = 0;
532 rel_info = mtu;
533 rel_msg = 1;
534 }
535 break;
536 case NDISC_REDIRECT:
537 ip6_redirect(skb, net, skb->dev->ifindex, 0);
538 break;
539 }
540
541 *type = rel_type;
542 *code = rel_code;
543 *info = rel_info;
544 *msg = rel_msg;
545
546out:
547 rcu_read_unlock();
548 return err;
549}
550
551static int
552ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
553 u8 type, u8 code, int offset, __be32 info)
554{
555 __u32 rel_info = ntohl(info);
556 const struct iphdr *eiph;
557 struct sk_buff *skb2;
558 int err, rel_msg = 0;
559 u8 rel_type = type;
560 u8 rel_code = code;
561 struct rtable *rt;
562 struct flowi4 fl4;
563
564 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
565 &rel_msg, &rel_info, offset);
566 if (err < 0)
567 return err;
568
569 if (rel_msg == 0)
570 return 0;
571
572 switch (rel_type) {
573 case ICMPV6_DEST_UNREACH:
574 if (rel_code != ICMPV6_ADDR_UNREACH)
575 return 0;
576 rel_type = ICMP_DEST_UNREACH;
577 rel_code = ICMP_HOST_UNREACH;
578 break;
579 case ICMPV6_PKT_TOOBIG:
580 if (rel_code != 0)
581 return 0;
582 rel_type = ICMP_DEST_UNREACH;
583 rel_code = ICMP_FRAG_NEEDED;
584 break;
585 default:
586 return 0;
587 }
588
589 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
590 return 0;
591
592 skb2 = skb_clone(skb, GFP_ATOMIC);
593 if (!skb2)
594 return 0;
595
596 skb_dst_drop(skb2);
597
598 skb_pull(skb2, offset);
599 skb_reset_network_header(skb2);
600 eiph = ip_hdr(skb2);
601
602
603 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
604 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
605 if (IS_ERR(rt))
606 goto out;
607
608 skb2->dev = rt->dst.dev;
609 ip_rt_put(rt);
610
611
612 if (rt->rt_flags & RTCF_LOCAL) {
613 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
614 eiph->daddr, eiph->saddr, 0, 0,
615 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
616 if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
617 if (!IS_ERR(rt))
618 ip_rt_put(rt);
619 goto out;
620 }
621 skb_dst_set(skb2, &rt->dst);
622 } else {
623 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
624 skb2->dev) ||
625 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
626 goto out;
627 }
628
629
630 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
631 if (rel_info > dst_mtu(skb_dst(skb2)))
632 goto out;
633
634 skb_dst_update_pmtu(skb2, rel_info);
635 }
636
637 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
638
639out:
640 kfree_skb(skb2);
641 return 0;
642}
643
644static int
645ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
646 u8 type, u8 code, int offset, __be32 info)
647{
648 __u32 rel_info = ntohl(info);
649 int err, rel_msg = 0;
650 u8 rel_type = type;
651 u8 rel_code = code;
652
653 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
654 &rel_msg, &rel_info, offset);
655 if (err < 0)
656 return err;
657
658 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
659 struct rt6_info *rt;
660 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
661
662 if (!skb2)
663 return 0;
664
665 skb_dst_drop(skb2);
666 skb_pull(skb2, offset);
667 skb_reset_network_header(skb2);
668
669
670 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
671 NULL, 0, 0);
672
673 if (rt && rt->dst.dev)
674 skb2->dev = rt->dst.dev;
675
676 icmpv6_send(skb2, rel_type, rel_code, rel_info);
677
678 ip6_rt_put(rt);
679
680 kfree_skb(skb2);
681 }
682
683 return 0;
684}
685
686static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
687 const struct ipv6hdr *ipv6h,
688 struct sk_buff *skb)
689{
690 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
691
692 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
693 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
694
695 return IP6_ECN_decapsulate(ipv6h, skb);
696}
697
698static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
699 const struct ipv6hdr *ipv6h,
700 struct sk_buff *skb)
701{
702 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
703 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
704
705 return IP6_ECN_decapsulate(ipv6h, skb);
706}
707
708__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
709 const struct in6_addr *laddr,
710 const struct in6_addr *raddr)
711{
712 struct __ip6_tnl_parm *p = &t->parms;
713 int ltype = ipv6_addr_type(laddr);
714 int rtype = ipv6_addr_type(raddr);
715 __u32 flags = 0;
716
717 if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
718 flags = IP6_TNL_F_CAP_PER_PACKET;
719 } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
720 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
721 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
722 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
723 if (ltype&IPV6_ADDR_UNICAST)
724 flags |= IP6_TNL_F_CAP_XMIT;
725 if (rtype&IPV6_ADDR_UNICAST)
726 flags |= IP6_TNL_F_CAP_RCV;
727 }
728 return flags;
729}
730EXPORT_SYMBOL(ip6_tnl_get_cap);
731
732
733int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
734 const struct in6_addr *laddr,
735 const struct in6_addr *raddr)
736{
737 struct __ip6_tnl_parm *p = &t->parms;
738 int ret = 0;
739 struct net *net = t->net;
740
741 if ((p->flags & IP6_TNL_F_CAP_RCV) ||
742 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
743 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
744 struct net_device *ldev = NULL;
745
746 if (p->link)
747 ldev = dev_get_by_index_rcu(net, p->link);
748
749 if ((ipv6_addr_is_multicast(laddr) ||
750 likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
751 likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
752 ret = 1;
753 }
754 return ret;
755}
756EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
757
758static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
759 const struct tnl_ptk_info *tpi,
760 struct metadata_dst *tun_dst,
761 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
762 const struct ipv6hdr *ipv6h,
763 struct sk_buff *skb),
764 bool log_ecn_err)
765{
766 struct pcpu_sw_netstats *tstats;
767 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
768 int err;
769
770 if ((!(tpi->flags & TUNNEL_CSUM) &&
771 (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
772 ((tpi->flags & TUNNEL_CSUM) &&
773 !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
774 tunnel->dev->stats.rx_crc_errors++;
775 tunnel->dev->stats.rx_errors++;
776 goto drop;
777 }
778
779 if (tunnel->parms.i_flags & TUNNEL_SEQ) {
780 if (!(tpi->flags & TUNNEL_SEQ) ||
781 (tunnel->i_seqno &&
782 (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
783 tunnel->dev->stats.rx_fifo_errors++;
784 tunnel->dev->stats.rx_errors++;
785 goto drop;
786 }
787 tunnel->i_seqno = ntohl(tpi->seq) + 1;
788 }
789
790 skb->protocol = tpi->proto;
791
792
793 if (tunnel->dev->type == ARPHRD_ETHER) {
794 if (!pskb_may_pull(skb, ETH_HLEN)) {
795 tunnel->dev->stats.rx_length_errors++;
796 tunnel->dev->stats.rx_errors++;
797 goto drop;
798 }
799
800 ipv6h = ipv6_hdr(skb);
801 skb->protocol = eth_type_trans(skb, tunnel->dev);
802 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
803 } else {
804 skb->dev = tunnel->dev;
805 }
806
807 skb_reset_network_header(skb);
808 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
809
810 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
811
812 err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
813 if (unlikely(err)) {
814 if (log_ecn_err)
815 net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
816 &ipv6h->saddr,
817 ipv6_get_dsfield(ipv6h));
818 if (err > 1) {
819 ++tunnel->dev->stats.rx_frame_errors;
820 ++tunnel->dev->stats.rx_errors;
821 goto drop;
822 }
823 }
824
825 tstats = this_cpu_ptr(tunnel->dev->tstats);
826 u64_stats_update_begin(&tstats->syncp);
827 tstats->rx_packets++;
828 tstats->rx_bytes += skb->len;
829 u64_stats_update_end(&tstats->syncp);
830
831 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
832
833 gro_cells_receive(&tunnel->gro_cells, skb);
834 return 0;
835
836drop:
837 kfree_skb(skb);
838 return 0;
839}
840
841int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
842 const struct tnl_ptk_info *tpi,
843 struct metadata_dst *tun_dst,
844 bool log_ecn_err)
845{
846 return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
847 log_ecn_err);
848}
849EXPORT_SYMBOL(ip6_tnl_rcv);
850
851static const struct tnl_ptk_info tpi_v6 = {
852
853 .proto = htons(ETH_P_IPV6),
854};
855
856static const struct tnl_ptk_info tpi_v4 = {
857
858 .proto = htons(ETH_P_IP),
859};
860
861static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
862 const struct tnl_ptk_info *tpi,
863 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
864 const struct ipv6hdr *ipv6h,
865 struct sk_buff *skb))
866{
867 struct ip6_tnl *t;
868 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
869 int ret = -1;
870
871 rcu_read_lock();
872 t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
873
874 if (t) {
875 u8 tproto = ACCESS_ONCE(t->parms.proto);
876
877 if (tproto != ipproto && tproto != 0)
878 goto drop;
879 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
880 goto drop;
881 ipv6h = ipv6_hdr(skb);
882 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
883 goto drop;
884 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
885 goto drop;
886 ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
887 log_ecn_error);
888 }
889
890 rcu_read_unlock();
891
892 return ret;
893
894drop:
895 rcu_read_unlock();
896 kfree_skb(skb);
897 return 0;
898}
899
900static int ip4ip6_rcv(struct sk_buff *skb)
901{
902 return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
903 ip4ip6_dscp_ecn_decapsulate);
904}
905
906static int ip6ip6_rcv(struct sk_buff *skb)
907{
908 return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
909 ip6ip6_dscp_ecn_decapsulate);
910}
911
912struct ipv6_tel_txoption {
913 struct ipv6_txoptions ops;
914 __u8 dst_opt[8];
915};
916
917static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
918{
919 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
920
921 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
922 opt->dst_opt[3] = 1;
923 opt->dst_opt[4] = encap_limit;
924 opt->dst_opt[5] = IPV6_TLV_PADN;
925 opt->dst_opt[6] = 1;
926
927 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
928 opt->ops.opt_nflen = 8;
929}
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945static inline bool
946ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
947{
948 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
949}
950
951int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
952 const struct in6_addr *laddr,
953 const struct in6_addr *raddr)
954{
955 struct __ip6_tnl_parm *p = &t->parms;
956 int ret = 0;
957 struct net *net = t->net;
958
959 if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
960 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
961 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
962 struct net_device *ldev = NULL;
963
964 rcu_read_lock();
965 if (p->link)
966 ldev = dev_get_by_index_rcu(net, p->link);
967
968 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
969 pr_warn("%s xmit: Local address not yet configured!\n",
970 p->name);
971 else if (!ipv6_addr_is_multicast(raddr) &&
972 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
973 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
974 p->name);
975 else
976 ret = 1;
977 rcu_read_unlock();
978 }
979 return ret;
980}
981EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1004 struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
1005 __u8 proto)
1006{
1007 struct ip6_tnl *t = netdev_priv(dev);
1008 struct net *net = t->net;
1009 struct net_device_stats *stats = &t->dev->stats;
1010 struct ipv6hdr *ipv6h;
1011 struct ipv6_tel_txoption opt;
1012 struct dst_entry *dst = NULL, *ndst = NULL;
1013 struct net_device *tdev;
1014 int mtu;
1015 unsigned int max_headroom = sizeof(struct ipv6hdr);
1016 unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
1017 bool use_cache = false;
1018 int err = -1;
1019
1020
1021 if (ipv6_addr_any(&t->parms.raddr)) {
1022 if (skb->protocol == htons(ETH_P_IPV6)) {
1023 struct in6_addr *addr6;
1024 struct neighbour *neigh;
1025 int addr_type;
1026
1027 if (!skb_dst(skb))
1028 goto tx_err_link_failure;
1029
1030 neigh = dst_neigh_lookup(skb_dst(skb),
1031 &ipv6_hdr(skb)->daddr);
1032 if (!neigh)
1033 goto tx_err_link_failure;
1034
1035 addr6 = (struct in6_addr *)&neigh->primary_key;
1036 addr_type = ipv6_addr_type(addr6);
1037
1038 if (addr_type == IPV6_ADDR_ANY)
1039 addr6 = &ipv6_hdr(skb)->daddr;
1040
1041 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1042 neigh_release(neigh);
1043 }
1044 } else if (!(t->parms.flags &
1045 (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
1046
1047
1048
1049 use_cache = true;
1050 }
1051
1052 if (use_cache)
1053 dst = dst_cache_get(&t->dst_cache);
1054
1055 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1056 goto tx_err_link_failure;
1057
1058 if (!dst) {
1059 dst = ip6_route_output(net, NULL, fl6);
1060
1061 if (dst->error)
1062 goto tx_err_link_failure;
1063 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1064 if (IS_ERR(dst)) {
1065 err = PTR_ERR(dst);
1066 dst = NULL;
1067 goto tx_err_link_failure;
1068 }
1069 ndst = dst;
1070 }
1071
1072 tdev = dst->dev;
1073
1074 if (tdev == dev) {
1075 stats->collisions++;
1076 net_warn_ratelimited("%s: Local routing loop detected!\n",
1077 t->parms.name);
1078 goto tx_err_dst_release;
1079 }
1080 mtu = dst_mtu(dst) - eth_hlen - sizeof(*ipv6h) - t->tun_hlen;
1081 if (encap_limit >= 0) {
1082 max_headroom += 8;
1083 mtu -= 8;
1084 }
1085 mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
1086 IPV6_MIN_MTU : IPV4_MIN_MTU);
1087
1088 skb_dst_update_pmtu(skb, mtu);
1089 if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
1090 *pmtu = mtu;
1091 err = -EMSGSIZE;
1092 goto tx_err_dst_release;
1093 }
1094
1095 if (t->err_count > 0) {
1096 if (time_before(jiffies,
1097 t->err_time + IP6TUNNEL_ERR_TIMEO)) {
1098 t->err_count--;
1099
1100 dst_link_failure(skb);
1101 } else {
1102 t->err_count = 0;
1103 }
1104 }
1105
1106 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1107
1108
1109
1110
1111 max_headroom += LL_RESERVED_SPACE(tdev);
1112
1113 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1114 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1115 struct sk_buff *new_skb;
1116
1117 if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
1118 goto tx_err_dst_release;
1119
1120 if (skb->sk)
1121 skb_set_owner_w(new_skb, skb->sk);
1122 consume_skb(skb);
1123 skb = new_skb;
1124 }
1125
1126 if (use_cache && ndst)
1127 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1128 skb_dst_set(skb, dst);
1129
1130 skb->transport_header = skb->network_header;
1131
1132 if (encap_limit >= 0) {
1133 init_tel_txopt(&opt, encap_limit);
1134 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
1135 }
1136
1137 if (likely(!skb->encapsulation)) {
1138 skb_reset_inner_headers(skb);
1139 skb->encapsulation = 1;
1140 }
1141
1142 max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
1143 + dst->header_len;
1144 if (max_headroom > dev->needed_headroom)
1145 dev->needed_headroom = max_headroom;
1146
1147 skb_push(skb, sizeof(struct ipv6hdr));
1148 skb_reset_network_header(skb);
1149 ipv6h = ipv6_hdr(skb);
1150 ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
1151 ipv6h->hop_limit = t->parms.hop_limit;
1152 ipv6h->nexthdr = proto;
1153 ipv6h->saddr = fl6->saddr;
1154 ipv6h->daddr = fl6->daddr;
1155 ip6tunnel_xmit(NULL, skb, dev);
1156 return 0;
1157tx_err_link_failure:
1158 stats->tx_carrier_errors++;
1159 dst_link_failure(skb);
1160tx_err_dst_release:
1161 dst_release(dst);
1162 return err;
1163}
1164EXPORT_SYMBOL(ip6_tnl_xmit);
1165
1166static inline int
1167ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1168{
1169 struct ip6_tnl *t = netdev_priv(dev);
1170 const struct iphdr *iph;
1171 int encap_limit = -1;
1172 struct flowi6 fl6;
1173 __u8 dsfield;
1174 __u32 mtu;
1175 u8 tproto;
1176 int err;
1177
1178
1179 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
1180 return -1;
1181
1182 iph = ip_hdr(skb);
1183 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1184
1185 tproto = ACCESS_ONCE(t->parms.proto);
1186 if (tproto != IPPROTO_IPIP && tproto != 0)
1187 return -1;
1188
1189 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1190 encap_limit = t->parms.encap_limit;
1191
1192 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1193 fl6.flowi6_proto = IPPROTO_IPIP;
1194
1195 dsfield = ipv4_get_dsfield(iph);
1196
1197 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1198 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1199 & IPV6_TCLASS_MASK;
1200 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1201 fl6.flowi6_mark = skb->mark;
1202
1203 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1204 IPPROTO_IPIP);
1205 if (err != 0) {
1206
1207 if (err == -EMSGSIZE)
1208 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1209 htonl(mtu));
1210 return -1;
1211 }
1212
1213 return 0;
1214}
1215
1216static inline int
1217ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1218{
1219 struct ip6_tnl *t = netdev_priv(dev);
1220 struct ipv6hdr *ipv6h;
1221 int encap_limit = -1;
1222 __u16 offset;
1223 struct flowi6 fl6;
1224 __u8 dsfield;
1225 __u32 mtu;
1226 u8 tproto;
1227 int err;
1228
1229 if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
1230 return -1;
1231
1232 ipv6h = ipv6_hdr(skb);
1233 tproto = ACCESS_ONCE(t->parms.proto);
1234 if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1235 ip6_tnl_addr_conflict(t, ipv6h))
1236 return -1;
1237
1238 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1239
1240 ipv6h = ipv6_hdr(skb);
1241 if (offset > 0) {
1242 struct ipv6_tlv_tnl_enc_lim *tel;
1243 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1244 if (tel->encap_limit == 0) {
1245 icmpv6_send(skb, ICMPV6_PARAMPROB,
1246 ICMPV6_HDR_FIELD, offset + 2);
1247 return -1;
1248 }
1249 encap_limit = tel->encap_limit - 1;
1250 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1251 encap_limit = t->parms.encap_limit;
1252
1253 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1254 fl6.flowi6_proto = IPPROTO_IPV6;
1255
1256 dsfield = ipv6_get_dsfield(ipv6h);
1257 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1258 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1259 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1260 fl6.flowlabel |= ip6_flowlabel(ipv6h);
1261 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1262 fl6.flowi6_mark = skb->mark;
1263
1264 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1265 IPPROTO_IPV6);
1266 if (err != 0) {
1267 if (err == -EMSGSIZE)
1268 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1269 return -1;
1270 }
1271
1272 return 0;
1273}
1274
1275static netdev_tx_t
1276ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
1277{
1278 struct ip6_tnl *t = netdev_priv(dev);
1279 struct net_device_stats *stats = &t->dev->stats;
1280 int ret;
1281
1282 switch (skb->protocol) {
1283 case htons(ETH_P_IP):
1284 ret = ip4ip6_tnl_xmit(skb, dev);
1285 break;
1286 case htons(ETH_P_IPV6):
1287 ret = ip6ip6_tnl_xmit(skb, dev);
1288 break;
1289 default:
1290 goto tx_err;
1291 }
1292
1293 if (ret < 0)
1294 goto tx_err;
1295
1296 return NETDEV_TX_OK;
1297
1298tx_err:
1299 stats->tx_errors++;
1300 stats->tx_dropped++;
1301 kfree_skb(skb);
1302 return NETDEV_TX_OK;
1303}
1304
1305static void ip6_tnl_link_config(struct ip6_tnl *t)
1306{
1307 struct net_device *dev = t->dev;
1308 struct __ip6_tnl_parm *p = &t->parms;
1309 struct flowi6 *fl6 = &t->fl.u.ip6;
1310
1311 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1312 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1313
1314
1315 fl6->saddr = p->laddr;
1316 fl6->daddr = p->raddr;
1317 fl6->flowi6_oif = p->link;
1318 fl6->flowlabel = 0;
1319
1320 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1321 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1322 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1323 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1324
1325 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1326 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1327
1328 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1329 dev->flags |= IFF_POINTOPOINT;
1330 else
1331 dev->flags &= ~IFF_POINTOPOINT;
1332
1333 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1334 int strict = (ipv6_addr_type(&p->raddr) &
1335 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1336
1337 struct rt6_info *rt = rt6_lookup(t->net,
1338 &p->raddr, &p->laddr,
1339 p->link, strict);
1340
1341 if (rt == NULL)
1342 return;
1343
1344 if (rt->dst.dev) {
1345 dev->hard_header_len = rt->dst.dev->hard_header_len +
1346 sizeof (struct ipv6hdr);
1347
1348 dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
1349 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1350 dev->mtu-=8;
1351
1352 if (dev->mtu < IPV6_MIN_MTU)
1353 dev->mtu = IPV6_MIN_MTU;
1354 }
1355 ip6_rt_put(rt);
1356 }
1357}
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368static int
1369ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1370{
1371 t->parms.laddr = p->laddr;
1372 t->parms.raddr = p->raddr;
1373 t->parms.flags = p->flags;
1374 t->parms.hop_limit = p->hop_limit;
1375 t->parms.encap_limit = p->encap_limit;
1376 t->parms.flowinfo = p->flowinfo;
1377 t->parms.link = p->link;
1378 t->parms.proto = p->proto;
1379 dst_cache_reset(&t->dst_cache);
1380 ip6_tnl_link_config(t);
1381 return 0;
1382}
1383
1384static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1385{
1386 struct net *net = t->net;
1387 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1388 int err;
1389
1390 ip6_tnl_unlink(ip6n, t);
1391 synchronize_net();
1392 err = ip6_tnl_change(t, p);
1393 ip6_tnl_link(ip6n, t);
1394 netdev_state_change(t->dev);
1395 return err;
1396}
1397
1398static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1399{
1400
1401 t->parms.proto = p->proto;
1402 netdev_state_change(t->dev);
1403 return 0;
1404}
1405
1406static void
1407ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1408{
1409 p->laddr = u->laddr;
1410 p->raddr = u->raddr;
1411 p->flags = u->flags;
1412 p->hop_limit = u->hop_limit;
1413 p->encap_limit = u->encap_limit;
1414 p->flowinfo = u->flowinfo;
1415 p->link = u->link;
1416 p->proto = u->proto;
1417 memcpy(p->name, u->name, sizeof(u->name));
1418}
1419
1420static void
1421ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1422{
1423 u->laddr = p->laddr;
1424 u->raddr = p->raddr;
1425 u->flags = p->flags;
1426 u->hop_limit = p->hop_limit;
1427 u->encap_limit = p->encap_limit;
1428 u->flowinfo = p->flowinfo;
1429 u->link = p->link;
1430 u->proto = p->proto;
1431 memcpy(u->name, p->name, sizeof(u->name));
1432}
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462static int
1463ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1464{
1465 int err = 0;
1466 struct ip6_tnl_parm p;
1467 struct __ip6_tnl_parm p1;
1468 struct ip6_tnl *t = netdev_priv(dev);
1469 struct net *net = t->net;
1470 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1471
1472 memset(&p1, 0, sizeof(p1));
1473
1474 switch (cmd) {
1475 case SIOCGETTUNNEL:
1476 if (dev == ip6n->fb_tnl_dev) {
1477 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
1478 err = -EFAULT;
1479 break;
1480 }
1481 ip6_tnl_parm_from_user(&p1, &p);
1482 t = ip6_tnl_locate(net, &p1, 0);
1483 if (IS_ERR(t))
1484 t = netdev_priv(dev);
1485 } else {
1486 memset(&p, 0, sizeof(p));
1487 }
1488 ip6_tnl_parm_to_user(&p, &t->parms);
1489 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1490 err = -EFAULT;
1491 }
1492 break;
1493 case SIOCADDTUNNEL:
1494 case SIOCCHGTUNNEL:
1495 err = -EPERM;
1496 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1497 break;
1498 err = -EFAULT;
1499 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1500 break;
1501 err = -EINVAL;
1502 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1503 p.proto != 0)
1504 break;
1505 ip6_tnl_parm_from_user(&p1, &p);
1506 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1507 if (cmd == SIOCCHGTUNNEL) {
1508 if (!IS_ERR(t)) {
1509 if (t->dev != dev) {
1510 err = -EEXIST;
1511 break;
1512 }
1513 } else
1514 t = netdev_priv(dev);
1515 if (dev == ip6n->fb_tnl_dev)
1516 err = ip6_tnl0_update(t, &p1);
1517 else
1518 err = ip6_tnl_update(t, &p1);
1519 }
1520 if (!IS_ERR(t)) {
1521 err = 0;
1522 ip6_tnl_parm_to_user(&p, &t->parms);
1523 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1524 err = -EFAULT;
1525
1526 } else {
1527 err = PTR_ERR(t);
1528 }
1529 break;
1530 case SIOCDELTUNNEL:
1531 err = -EPERM;
1532 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1533 break;
1534
1535 if (dev == ip6n->fb_tnl_dev) {
1536 err = -EFAULT;
1537 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1538 break;
1539 err = -ENOENT;
1540 ip6_tnl_parm_from_user(&p1, &p);
1541 t = ip6_tnl_locate(net, &p1, 0);
1542 if (IS_ERR(t))
1543 break;
1544 err = -EPERM;
1545 if (t->dev == ip6n->fb_tnl_dev)
1546 break;
1547 dev = t->dev;
1548 }
1549 err = 0;
1550 unregister_netdevice(dev);
1551 break;
1552 default:
1553 err = -EINVAL;
1554 }
1555 return err;
1556}
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1569{
1570 struct ip6_tnl *tnl = netdev_priv(dev);
1571
1572 if (tnl->parms.proto == IPPROTO_IPV6) {
1573 if (new_mtu < IPV6_MIN_MTU)
1574 return -EINVAL;
1575 } else {
1576 if (new_mtu < ETH_MIN_MTU)
1577 return -EINVAL;
1578 }
1579 if (new_mtu > 0xFFF8 - dev->hard_header_len)
1580 return -EINVAL;
1581 dev->mtu = new_mtu;
1582 return 0;
1583}
1584EXPORT_SYMBOL(ip6_tnl_change_mtu);
1585
1586int ip6_tnl_get_iflink(const struct net_device *dev)
1587{
1588 struct ip6_tnl *t = netdev_priv(dev);
1589
1590 return t->parms.link;
1591}
1592EXPORT_SYMBOL(ip6_tnl_get_iflink);
1593
1594static const struct net_device_ops ip6_tnl_netdev_ops = {
1595 .ndo_init = ip6_tnl_dev_init,
1596 .ndo_uninit = ip6_tnl_dev_uninit,
1597 .ndo_start_xmit = ip6_tnl_start_xmit,
1598 .ndo_do_ioctl = ip6_tnl_ioctl,
1599 .ndo_change_mtu_rh74 = ip6_tnl_change_mtu,
1600 .ndo_get_stats = ip6_get_stats,
1601 .ndo_get_iflink = ip6_tnl_get_iflink,
1602};
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613static void ip6_tnl_dev_setup(struct net_device *dev)
1614{
1615 struct ip6_tnl *t;
1616
1617 dev->netdev_ops = &ip6_tnl_netdev_ops;
1618 dev->extended->needs_free_netdev = true;
1619 dev->extended->priv_destructor = ip6_dev_free;
1620
1621 dev->type = ARPHRD_TUNNEL6;
1622 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1623 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1624 t = netdev_priv(dev);
1625 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1626 dev->mtu-=8;
1627 dev->flags |= IFF_NOARP;
1628 dev->addr_len = sizeof(struct in6_addr);
1629 netif_keep_dst(dev);
1630
1631 dev->addr_assign_type = NET_ADDR_RANDOM;
1632 eth_random_addr(dev->perm_addr);
1633}
1634
1635
1636
1637
1638
1639
1640
1641static inline int
1642ip6_tnl_dev_init_gen(struct net_device *dev)
1643{
1644 struct ip6_tnl *t = netdev_priv(dev);
1645 int ret;
1646
1647 t->dev = dev;
1648 t->net = dev_net(dev);
1649 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1650 if (!dev->tstats)
1651 return -ENOMEM;
1652
1653 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1654 if (ret)
1655 goto free_stats;
1656
1657 ret = gro_cells_init(&t->gro_cells, dev);
1658 if (ret)
1659 goto destroy_dst;
1660
1661 t->hlen = 0;
1662 t->tun_hlen = 0;
1663
1664 return 0;
1665
1666destroy_dst:
1667 dst_cache_destroy(&t->dst_cache);
1668free_stats:
1669 free_percpu(dev->tstats);
1670 dev->tstats = NULL;
1671
1672 return ret;
1673}
1674
1675
1676
1677
1678
1679
1680static int ip6_tnl_dev_init(struct net_device *dev)
1681{
1682 struct ip6_tnl *t = netdev_priv(dev);
1683 int err = ip6_tnl_dev_init_gen(dev);
1684
1685 if (err)
1686 return err;
1687 ip6_tnl_link_config(t);
1688 return 0;
1689}
1690
1691
1692
1693
1694
1695
1696
1697
1698static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1699{
1700 struct ip6_tnl *t = netdev_priv(dev);
1701 struct net *net = dev_net(dev);
1702 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1703
1704 t->parms.proto = IPPROTO_IPV6;
1705 dev_hold(dev);
1706
1707 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1708 return 0;
1709}
1710
1711static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
1712{
1713 u8 proto;
1714
1715 if (!data)
1716 return 0;
1717
1718 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1719 if (proto != IPPROTO_IPV6 &&
1720 proto != IPPROTO_IPIP &&
1721 proto != 0)
1722 return -EINVAL;
1723
1724 return 0;
1725}
1726
1727static void ip6_tnl_netlink_parms(struct nlattr *data[],
1728 struct __ip6_tnl_parm *parms)
1729{
1730 memset(parms, 0, sizeof(*parms));
1731
1732 if (!data)
1733 return;
1734
1735 if (data[IFLA_IPTUN_LINK])
1736 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1737
1738 if (data[IFLA_IPTUN_LOCAL])
1739 parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
1740
1741 if (data[IFLA_IPTUN_REMOTE])
1742 parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
1743
1744 if (data[IFLA_IPTUN_TTL])
1745 parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
1746
1747 if (data[IFLA_IPTUN_ENCAP_LIMIT])
1748 parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
1749
1750 if (data[IFLA_IPTUN_FLOWINFO])
1751 parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1752
1753 if (data[IFLA_IPTUN_FLAGS])
1754 parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
1755
1756 if (data[IFLA_IPTUN_PROTO])
1757 parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1758}
1759
1760static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1761 struct nlattr *tb[], struct nlattr *data[])
1762{
1763 struct net *net = dev_net(dev);
1764 struct ip6_tnl *nt, *t;
1765 int err;
1766
1767 nt = netdev_priv(dev);
1768 ip6_tnl_netlink_parms(data, &nt->parms);
1769
1770 t = ip6_tnl_locate(net, &nt->parms, 0);
1771 if (!IS_ERR(t))
1772 return -EEXIST;
1773
1774 err = ip6_tnl_create2(dev);
1775 if (!err && tb[IFLA_MTU])
1776 ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1777
1778 return err;
1779}
1780
1781static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
1782 struct nlattr *data[])
1783{
1784 struct ip6_tnl *t = netdev_priv(dev);
1785 struct __ip6_tnl_parm p;
1786 struct net *net = t->net;
1787 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1788
1789 if (dev == ip6n->fb_tnl_dev)
1790 return -EINVAL;
1791
1792 ip6_tnl_netlink_parms(data, &p);
1793
1794 t = ip6_tnl_locate(net, &p, 0);
1795 if (!IS_ERR(t)) {
1796 if (t->dev != dev)
1797 return -EEXIST;
1798 } else
1799 t = netdev_priv(dev);
1800
1801 return ip6_tnl_update(t, &p);
1802}
1803
1804static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
1805{
1806 struct net *net = dev_net(dev);
1807 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1808
1809 if (dev != ip6n->fb_tnl_dev)
1810 unregister_netdevice_queue(dev, head);
1811}
1812
1813static size_t ip6_tnl_get_size(const struct net_device *dev)
1814{
1815 return
1816
1817 nla_total_size(4) +
1818
1819 nla_total_size(sizeof(struct in6_addr)) +
1820
1821 nla_total_size(sizeof(struct in6_addr)) +
1822
1823 nla_total_size(1) +
1824
1825 nla_total_size(1) +
1826
1827 nla_total_size(4) +
1828
1829 nla_total_size(4) +
1830
1831 nla_total_size(1) +
1832 0;
1833}
1834
1835static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1836{
1837 struct ip6_tnl *tunnel = netdev_priv(dev);
1838 struct __ip6_tnl_parm *parm = &tunnel->parms;
1839
1840 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1841 nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
1842 nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
1843 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
1844 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
1845 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1846 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
1847 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
1848 goto nla_put_failure;
1849 return 0;
1850
1851nla_put_failure:
1852 return -EMSGSIZE;
1853}
1854
1855struct net *ip6_tnl_get_link_net(const struct net_device *dev)
1856{
1857 struct ip6_tnl *tunnel = netdev_priv(dev);
1858
1859 return tunnel->net;
1860}
1861EXPORT_SYMBOL(ip6_tnl_get_link_net);
1862
1863static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
1864 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
1865 [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
1866 [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
1867 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
1868 [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
1869 [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
1870 [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
1871 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
1872};
1873
1874static struct rtnl_link_ops ip6_link_ops __read_mostly = {
1875 .kind = "ip6tnl",
1876 .maxtype = IFLA_IPTUN_MAX,
1877 .policy = ip6_tnl_policy,
1878 .priv_size = sizeof(struct ip6_tnl),
1879 .setup = ip6_tnl_dev_setup,
1880 .validate = ip6_tnl_validate,
1881 .newlink = ip6_tnl_newlink,
1882 .changelink = ip6_tnl_changelink,
1883 .dellink = ip6_tnl_dellink,
1884 .get_size = ip6_tnl_get_size,
1885 .fill_info = ip6_tnl_fill_info,
1886 .get_link_net = ip6_tnl_get_link_net,
1887};
1888
1889static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1890 .handler = ip4ip6_rcv,
1891 .err_handler = ip4ip6_err,
1892 .priority = 1,
1893};
1894
1895static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1896 .handler = ip6ip6_rcv,
1897 .err_handler = ip6ip6_err,
1898 .priority = 1,
1899};
1900
1901static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
1902{
1903 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1904 struct net_device *dev, *aux;
1905 int h;
1906 struct ip6_tnl *t;
1907 LIST_HEAD(list);
1908
1909 for_each_netdev_safe(net, dev, aux)
1910 if (dev->rtnl_link_ops == &ip6_link_ops)
1911 unregister_netdevice_queue(dev, &list);
1912
1913 for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
1914 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1915 while (t != NULL) {
1916
1917
1918
1919 if (!net_eq(dev_net(t->dev), net))
1920 unregister_netdevice_queue(t->dev, &list);
1921 t = rtnl_dereference(t->next);
1922 }
1923 }
1924
1925 unregister_netdevice_many(&list);
1926}
1927
1928static int __net_init ip6_tnl_init_net(struct net *net)
1929{
1930 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1931 struct ip6_tnl *t = NULL;
1932 int err;
1933
1934 ip6n->tnls[0] = ip6n->tnls_wc;
1935 ip6n->tnls[1] = ip6n->tnls_r_l;
1936
1937 err = -ENOMEM;
1938 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1939 ip6_tnl_dev_setup);
1940
1941 if (!ip6n->fb_tnl_dev)
1942 goto err_alloc_dev;
1943 dev_net_set(ip6n->fb_tnl_dev, net);
1944 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
1945
1946
1947
1948 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
1949
1950 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1951 if (err < 0)
1952 goto err_register;
1953
1954 err = register_netdev(ip6n->fb_tnl_dev);
1955 if (err < 0)
1956 goto err_register;
1957
1958 t = netdev_priv(ip6n->fb_tnl_dev);
1959
1960 strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
1961 return 0;
1962
1963err_register:
1964 free_netdev(ip6n->fb_tnl_dev);
1965err_alloc_dev:
1966 return err;
1967}
1968
1969static void __net_exit ip6_tnl_exit_net(struct net *net)
1970{
1971 rtnl_lock();
1972 ip6_tnl_destroy_tunnels(net);
1973 rtnl_unlock();
1974}
1975
1976static struct pernet_operations ip6_tnl_net_ops = {
1977 .init = ip6_tnl_init_net,
1978 .exit = ip6_tnl_exit_net,
1979 .id = &ip6_tnl_net_id,
1980 .size = sizeof(struct ip6_tnl_net),
1981};
1982
1983
1984
1985
1986
1987
1988
1989static int __init ip6_tunnel_init(void)
1990{
1991 int err;
1992
1993 if (!ipv6_mod_enabled())
1994 return -EOPNOTSUPP;
1995
1996 err = register_pernet_device(&ip6_tnl_net_ops);
1997 if (err < 0)
1998 goto out_pernet;
1999
2000 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
2001 if (err < 0) {
2002 pr_err("%s: can't register ip4ip6\n", __func__);
2003 goto out_ip4ip6;
2004 }
2005
2006 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
2007 if (err < 0) {
2008 pr_err("%s: can't register ip6ip6\n", __func__);
2009 goto out_ip6ip6;
2010 }
2011 err = rtnl_link_register(&ip6_link_ops);
2012 if (err < 0)
2013 goto rtnl_link_failed;
2014
2015 return 0;
2016
2017rtnl_link_failed:
2018 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
2019out_ip6ip6:
2020 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
2021out_ip4ip6:
2022 unregister_pernet_device(&ip6_tnl_net_ops);
2023out_pernet:
2024 return err;
2025}
2026
2027
2028
2029
2030
2031static void __exit ip6_tunnel_cleanup(void)
2032{
2033 rtnl_link_unregister(&ip6_link_ops);
2034 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
2035 pr_info("%s: can't deregister ip4ip6\n", __func__);
2036
2037 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
2038 pr_info("%s: can't deregister ip6ip6\n", __func__);
2039
2040 unregister_pernet_device(&ip6_tnl_net_ops);
2041}
2042
2043module_init(ip6_tunnel_init);
2044module_exit(ip6_tunnel_cleanup);
2045