1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/module.h>
24#include <linux/capability.h>
25#include <linux/errno.h>
26#include <linux/types.h>
27#include <linux/sockios.h>
28#include <linux/icmp.h>
29#include <linux/if.h>
30#include <linux/in.h>
31#include <linux/ip.h>
32#include <linux/net.h>
33#include <linux/in6.h>
34#include <linux/netdevice.h>
35#include <linux/if_arp.h>
36#include <linux/icmpv6.h>
37#include <linux/init.h>
38#include <linux/route.h>
39#include <linux/rtnetlink.h>
40#include <linux/netfilter_ipv6.h>
41#include <linux/slab.h>
42#include <linux/hash.h>
43#include <linux/etherdevice.h>
44
45#include <asm/uaccess.h>
46#include <linux/atomic.h>
47
48#include <net/icmp.h>
49#include <net/ip.h>
50#include <net/ip_tunnels.h>
51#include <net/ipv6.h>
52#include <net/ip6_route.h>
53#include <net/addrconf.h>
54#include <net/ip6_tunnel.h>
55#include <net/xfrm.h>
56#include <net/dsfield.h>
57#include <net/inet_ecn.h>
58#include <net/net_namespace.h>
59#include <net/netns/generic.h>
60
61MODULE_AUTHOR("Ville Nuorvala");
62MODULE_DESCRIPTION("IPv6 tunneling device");
63MODULE_LICENSE("GPL");
64MODULE_ALIAS_RTNL_LINK("ip6tnl");
65MODULE_ALIAS_NETDEV("ip6tnl0");
66
67#define HASH_SIZE_SHIFT 5
68#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
69
70static bool log_ecn_error = true;
71module_param(log_ecn_error, bool, 0644);
72MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
73
74static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
75{
76 u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
77
78 return hash_32(hash, HASH_SIZE_SHIFT);
79}
80
81static int ip6_tnl_dev_init(struct net_device *dev);
82static void ip6_tnl_dev_setup(struct net_device *dev);
83static struct rtnl_link_ops ip6_link_ops __read_mostly;
84
85static int ip6_tnl_net_id __read_mostly;
86struct ip6_tnl_net {
87
88 struct net_device *fb_tnl_dev;
89
90 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
91 struct ip6_tnl __rcu *tnls_wc[1];
92 struct ip6_tnl __rcu **tnls[2];
93};
94
95static struct net_device_stats *ip6_get_stats(struct net_device *dev)
96{
97 struct pcpu_sw_netstats tmp, sum = { 0 };
98 int i;
99
100 for_each_possible_cpu(i) {
101 unsigned int start;
102 const struct pcpu_sw_netstats *tstats =
103 per_cpu_ptr(dev->tstats, i);
104
105 do {
106 start = u64_stats_fetch_begin_irq(&tstats->syncp);
107 tmp.rx_packets = tstats->rx_packets;
108 tmp.rx_bytes = tstats->rx_bytes;
109 tmp.tx_packets = tstats->tx_packets;
110 tmp.tx_bytes = tstats->tx_bytes;
111 } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
112
113 sum.rx_packets += tmp.rx_packets;
114 sum.rx_bytes += tmp.rx_bytes;
115 sum.tx_packets += tmp.tx_packets;
116 sum.tx_bytes += tmp.tx_bytes;
117 }
118 dev->stats.rx_packets = sum.rx_packets;
119 dev->stats.rx_bytes = sum.rx_bytes;
120 dev->stats.tx_packets = sum.tx_packets;
121 dev->stats.tx_bytes = sum.tx_bytes;
122 return &dev->stats;
123}
124
125
126
127
128
129
130
131
132
133
134
135
136#define for_each_ip6_tunnel_rcu(start) \
137 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
138
139static struct ip6_tnl *
140ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
141{
142 unsigned int hash = HASH(remote, local);
143 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 struct in6_addr any;
146
147 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
148 if (ipv6_addr_equal(local, &t->parms.laddr) &&
149 ipv6_addr_equal(remote, &t->parms.raddr) &&
150 (t->dev->flags & IFF_UP))
151 return t;
152 }
153
154 memset(&any, 0, sizeof(any));
155 hash = HASH(&any, local);
156 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
157 if (ipv6_addr_equal(local, &t->parms.laddr) &&
158 ipv6_addr_any(&t->parms.raddr) &&
159 (t->dev->flags & IFF_UP))
160 return t;
161 }
162
163 hash = HASH(remote, &any);
164 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
165 if (ipv6_addr_equal(remote, &t->parms.raddr) &&
166 ipv6_addr_any(&t->parms.laddr) &&
167 (t->dev->flags & IFF_UP))
168 return t;
169 }
170
171 t = rcu_dereference(ip6n->tnls_wc[0]);
172 if (t && (t->dev->flags & IFF_UP))
173 return t;
174
175 return NULL;
176}
177
178
179
180
181
182
183
184
185
186
187
188
189static struct ip6_tnl __rcu **
190ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
191{
192 const struct in6_addr *remote = &p->raddr;
193 const struct in6_addr *local = &p->laddr;
194 unsigned int h = 0;
195 int prio = 0;
196
197 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
198 prio = 1;
199 h = HASH(remote, local);
200 }
201 return &ip6n->tnls[prio][h];
202}
203
204
205
206
207
208
209static void
210ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
211{
212 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
213
214 rcu_assign_pointer(t->next , rtnl_dereference(*tp));
215 rcu_assign_pointer(*tp, t);
216}
217
218
219
220
221
222
223static void
224ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
225{
226 struct ip6_tnl __rcu **tp;
227 struct ip6_tnl *iter;
228
229 for (tp = ip6_tnl_bucket(ip6n, &t->parms);
230 (iter = rtnl_dereference(*tp)) != NULL;
231 tp = &iter->next) {
232 if (t == iter) {
233 rcu_assign_pointer(*tp, t->next);
234 break;
235 }
236 }
237}
238
239static void ip6_dev_free(struct net_device *dev)
240{
241 struct ip6_tnl *t = netdev_priv(dev);
242
243 gro_cells_destroy(&t->gro_cells);
244 dst_cache_destroy(&t->dst_cache);
245 free_percpu(dev->tstats);
246 free_netdev(dev);
247}
248
249static int ip6_tnl_create2(struct net_device *dev)
250{
251 struct ip6_tnl *t = netdev_priv(dev);
252 struct net *net = dev_net(dev);
253 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
254 int err;
255
256 t = netdev_priv(dev);
257
258 dev->rtnl_link_ops = &ip6_link_ops;
259 err = register_netdevice(dev);
260 if (err < 0)
261 goto out;
262
263 strcpy(t->parms.name, dev->name);
264
265 dev_hold(dev);
266 ip6_tnl_link(ip6n, t);
267 return 0;
268
269out:
270 return err;
271}
272
273
274
275
276
277
278
279
280
281
282
283
284
285static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
286{
287 struct net_device *dev;
288 struct ip6_tnl *t;
289 char name[IFNAMSIZ];
290 int err = -ENOMEM;
291
292 if (p->name[0])
293 strlcpy(name, p->name, IFNAMSIZ);
294 else
295 sprintf(name, "ip6tnl%%d");
296
297 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
298 if (dev == NULL)
299 goto failed;
300
301 dev_net_set(dev, net);
302
303 t = netdev_priv(dev);
304 t->parms = *p;
305 t->net = dev_net(dev);
306 err = ip6_tnl_create2(dev);
307 if (err < 0)
308 goto failed_free;
309
310 return t;
311
312failed_free:
313 ip6_dev_free(dev);
314failed:
315 return ERR_PTR(err);
316}
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332static struct ip6_tnl *ip6_tnl_locate(struct net *net,
333 struct __ip6_tnl_parm *p, int create)
334{
335 const struct in6_addr *remote = &p->raddr;
336 const struct in6_addr *local = &p->laddr;
337 struct ip6_tnl __rcu **tp;
338 struct ip6_tnl *t;
339 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
340
341 for (tp = ip6_tnl_bucket(ip6n, p);
342 (t = rtnl_dereference(*tp)) != NULL;
343 tp = &t->next) {
344 if (ipv6_addr_equal(local, &t->parms.laddr) &&
345 ipv6_addr_equal(remote, &t->parms.raddr)) {
346 if (create)
347 return ERR_PTR(-EEXIST);
348
349 return t;
350 }
351 }
352 if (!create)
353 return ERR_PTR(-ENODEV);
354 return ip6_tnl_create(net, p);
355}
356
357
358
359
360
361
362
363
364
365static void
366ip6_tnl_dev_uninit(struct net_device *dev)
367{
368 struct ip6_tnl *t = netdev_priv(dev);
369 struct net *net = t->net;
370 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
371
372 if (dev == ip6n->fb_tnl_dev)
373 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
374 else
375 ip6_tnl_unlink(ip6n, t);
376 dst_cache_reset(&t->dst_cache);
377 dev_put(dev);
378}
379
380
381
382
383
384
385
386
387
388
389__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
390{
391 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
392 unsigned int nhoff = raw - skb->data;
393 unsigned int off = nhoff + sizeof(*ipv6h);
394 u8 next, nexthdr = ipv6h->nexthdr;
395
396 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
397 struct ipv6_opt_hdr *hdr;
398 u16 optlen;
399
400 if (!pskb_may_pull(skb, off + sizeof(*hdr)))
401 break;
402
403 hdr = (struct ipv6_opt_hdr *)(skb->data + off);
404 if (nexthdr == NEXTHDR_FRAGMENT) {
405 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
406 if (frag_hdr->frag_off)
407 break;
408 optlen = 8;
409 } else if (nexthdr == NEXTHDR_AUTH) {
410 optlen = (hdr->hdrlen + 2) << 2;
411 } else {
412 optlen = ipv6_optlen(hdr);
413 }
414
415
416
417 next = hdr->nexthdr;
418 if (nexthdr == NEXTHDR_DEST) {
419 u16 i = 2;
420
421
422 if (!pskb_may_pull(skb, off + optlen))
423 break;
424
425 while (1) {
426 struct ipv6_tlv_tnl_enc_lim *tel;
427
428
429 if (i + sizeof(*tel) > optlen)
430 break;
431
432 tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
433
434 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
435 tel->length == 1)
436 return i + off - nhoff;
437
438 if (tel->type)
439 i += tel->length + 2;
440 else
441 i++;
442 }
443 }
444 nexthdr = next;
445 off += optlen;
446 }
447 return 0;
448}
449EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
450
451
452
453
454
455
456
457
458
459static int
460ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
461 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
462{
463 const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
464 struct ip6_tnl *t;
465 int rel_msg = 0;
466 u8 rel_type = ICMPV6_DEST_UNREACH;
467 u8 rel_code = ICMPV6_ADDR_UNREACH;
468 u8 tproto;
469 __u32 rel_info = 0;
470 __u16 len;
471 int err = -ENOENT;
472
473
474
475
476
477 rcu_read_lock();
478 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
479 &ipv6h->saddr)) == NULL)
480 goto out;
481
482 tproto = ACCESS_ONCE(t->parms.proto);
483 if (tproto != ipproto && tproto != 0)
484 goto out;
485
486 err = 0;
487
488 switch (*type) {
489 __u32 teli;
490 struct ipv6_tlv_tnl_enc_lim *tel;
491 __u32 mtu;
492 case ICMPV6_DEST_UNREACH:
493 net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
494 t->parms.name);
495 rel_msg = 1;
496 break;
497 case ICMPV6_TIME_EXCEED:
498 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
499 net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
500 t->parms.name);
501 rel_msg = 1;
502 }
503 break;
504 case ICMPV6_PARAMPROB:
505 teli = 0;
506 if ((*code) == ICMPV6_HDR_FIELD)
507 teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
508
509 if (teli && teli == *info - 2) {
510 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
511 if (tel->encap_limit == 0) {
512 net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
513 t->parms.name);
514 rel_msg = 1;
515 }
516 } else {
517 net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
518 t->parms.name);
519 }
520 break;
521 case ICMPV6_PKT_TOOBIG:
522 mtu = *info - offset;
523 if (mtu < IPV6_MIN_MTU)
524 mtu = IPV6_MIN_MTU;
525 t->dev->mtu = mtu;
526
527 if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
528 rel_type = ICMPV6_PKT_TOOBIG;
529 rel_code = 0;
530 rel_info = mtu;
531 rel_msg = 1;
532 }
533 break;
534 }
535
536 *type = rel_type;
537 *code = rel_code;
538 *info = rel_info;
539 *msg = rel_msg;
540
541out:
542 rcu_read_unlock();
543 return err;
544}
545
546static int
547ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
548 u8 type, u8 code, int offset, __be32 info)
549{
550 int rel_msg = 0;
551 u8 rel_type = type;
552 u8 rel_code = code;
553 __u32 rel_info = ntohl(info);
554 int err;
555 struct sk_buff *skb2;
556 const struct iphdr *eiph;
557 struct rtable *rt;
558 struct flowi4 fl4;
559
560 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
561 &rel_msg, &rel_info, offset);
562 if (err < 0)
563 return err;
564
565 if (rel_msg == 0)
566 return 0;
567
568 switch (rel_type) {
569 case ICMPV6_DEST_UNREACH:
570 if (rel_code != ICMPV6_ADDR_UNREACH)
571 return 0;
572 rel_type = ICMP_DEST_UNREACH;
573 rel_code = ICMP_HOST_UNREACH;
574 break;
575 case ICMPV6_PKT_TOOBIG:
576 if (rel_code != 0)
577 return 0;
578 rel_type = ICMP_DEST_UNREACH;
579 rel_code = ICMP_FRAG_NEEDED;
580 break;
581 case NDISC_REDIRECT:
582 rel_type = ICMP_REDIRECT;
583 rel_code = ICMP_REDIR_HOST;
584 default:
585 return 0;
586 }
587
588 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
589 return 0;
590
591 skb2 = skb_clone(skb, GFP_ATOMIC);
592 if (!skb2)
593 return 0;
594
595 skb_dst_drop(skb2);
596
597 skb_pull(skb2, offset);
598 skb_reset_network_header(skb2);
599 eiph = ip_hdr(skb2);
600
601
602 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
603 eiph->saddr, 0,
604 0, 0,
605 IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
606 if (IS_ERR(rt))
607 goto out;
608
609 skb2->dev = rt->dst.dev;
610
611
612 if (rt->rt_flags & RTCF_LOCAL) {
613 ip_rt_put(rt);
614 rt = NULL;
615 rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
616 eiph->daddr, eiph->saddr,
617 0, 0,
618 IPPROTO_IPIP,
619 RT_TOS(eiph->tos), 0);
620 if (IS_ERR(rt) ||
621 rt->dst.dev->type != ARPHRD_TUNNEL) {
622 if (!IS_ERR(rt))
623 ip_rt_put(rt);
624 goto out;
625 }
626 skb_dst_set(skb2, &rt->dst);
627 } else {
628 ip_rt_put(rt);
629 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
630 skb2->dev) ||
631 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
632 goto out;
633 }
634
635
636 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
637 if (rel_info > dst_mtu(skb_dst(skb2)))
638 goto out;
639
640 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
641 }
642 if (rel_type == ICMP_REDIRECT)
643 skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
644
645 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
646
647out:
648 kfree_skb(skb2);
649 return 0;
650}
651
652static int
653ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
654 u8 type, u8 code, int offset, __be32 info)
655{
656 int rel_msg = 0;
657 u8 rel_type = type;
658 u8 rel_code = code;
659 __u32 rel_info = ntohl(info);
660 int err;
661
662 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
663 &rel_msg, &rel_info, offset);
664 if (err < 0)
665 return err;
666
667 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
668 struct rt6_info *rt;
669 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
670
671 if (!skb2)
672 return 0;
673
674 skb_dst_drop(skb2);
675 skb_pull(skb2, offset);
676 skb_reset_network_header(skb2);
677
678
679 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
680 NULL, 0, 0);
681
682 if (rt && rt->dst.dev)
683 skb2->dev = rt->dst.dev;
684
685 icmpv6_send(skb2, rel_type, rel_code, rel_info);
686
687 ip6_rt_put(rt);
688
689 kfree_skb(skb2);
690 }
691
692 return 0;
693}
694
695static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
696 const struct ipv6hdr *ipv6h,
697 struct sk_buff *skb)
698{
699 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
700
701 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
702 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
703
704 return IP6_ECN_decapsulate(ipv6h, skb);
705}
706
707static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
708 const struct ipv6hdr *ipv6h,
709 struct sk_buff *skb)
710{
711 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
712 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
713
714 return IP6_ECN_decapsulate(ipv6h, skb);
715}
716
717__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
718 const struct in6_addr *laddr,
719 const struct in6_addr *raddr)
720{
721 struct __ip6_tnl_parm *p = &t->parms;
722 int ltype = ipv6_addr_type(laddr);
723 int rtype = ipv6_addr_type(raddr);
724 __u32 flags = 0;
725
726 if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
727 flags = IP6_TNL_F_CAP_PER_PACKET;
728 } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
729 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
730 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
731 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
732 if (ltype&IPV6_ADDR_UNICAST)
733 flags |= IP6_TNL_F_CAP_XMIT;
734 if (rtype&IPV6_ADDR_UNICAST)
735 flags |= IP6_TNL_F_CAP_RCV;
736 }
737 return flags;
738}
739EXPORT_SYMBOL(ip6_tnl_get_cap);
740
741
742int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
743 const struct in6_addr *laddr,
744 const struct in6_addr *raddr)
745{
746 struct __ip6_tnl_parm *p = &t->parms;
747 int ret = 0;
748 struct net *net = t->net;
749
750 if ((p->flags & IP6_TNL_F_CAP_RCV) ||
751 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
752 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
753 struct net_device *ldev = NULL;
754
755 if (p->link)
756 ldev = dev_get_by_index_rcu(net, p->link);
757
758 if ((ipv6_addr_is_multicast(laddr) ||
759 likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
760 likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
761 ret = 1;
762 }
763 return ret;
764}
765EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
766
767static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
768 const struct tnl_ptk_info *tpi,
769 struct metadata_dst *tun_dst,
770 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
771 const struct ipv6hdr *ipv6h,
772 struct sk_buff *skb),
773 bool log_ecn_err)
774{
775 struct pcpu_sw_netstats *tstats;
776 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
777 int err;
778
779 if ((!(tpi->flags & TUNNEL_CSUM) &&
780 (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
781 ((tpi->flags & TUNNEL_CSUM) &&
782 !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
783 tunnel->dev->stats.rx_crc_errors++;
784 tunnel->dev->stats.rx_errors++;
785 goto drop;
786 }
787
788 if (tunnel->parms.i_flags & TUNNEL_SEQ) {
789 if (!(tpi->flags & TUNNEL_SEQ) ||
790 (tunnel->i_seqno &&
791 (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
792 tunnel->dev->stats.rx_fifo_errors++;
793 tunnel->dev->stats.rx_errors++;
794 goto drop;
795 }
796 tunnel->i_seqno = ntohl(tpi->seq) + 1;
797 }
798
799 skb->protocol = tpi->proto;
800
801
802 if (tunnel->dev->type == ARPHRD_ETHER) {
803 if (!pskb_may_pull(skb, ETH_HLEN)) {
804 tunnel->dev->stats.rx_length_errors++;
805 tunnel->dev->stats.rx_errors++;
806 goto drop;
807 }
808
809 ipv6h = ipv6_hdr(skb);
810 skb->protocol = eth_type_trans(skb, tunnel->dev);
811 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
812 } else {
813 skb->dev = tunnel->dev;
814 }
815
816 skb_reset_network_header(skb);
817 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
818
819 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
820
821 err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
822 if (unlikely(err)) {
823 if (log_ecn_err)
824 net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
825 &ipv6h->saddr,
826 ipv6_get_dsfield(ipv6h));
827 if (err > 1) {
828 ++tunnel->dev->stats.rx_frame_errors;
829 ++tunnel->dev->stats.rx_errors;
830 goto drop;
831 }
832 }
833
834 tstats = this_cpu_ptr(tunnel->dev->tstats);
835 u64_stats_update_begin(&tstats->syncp);
836 tstats->rx_packets++;
837 tstats->rx_bytes += skb->len;
838 u64_stats_update_end(&tstats->syncp);
839
840 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
841
842 gro_cells_receive(&tunnel->gro_cells, skb);
843 return 0;
844
845drop:
846 kfree_skb(skb);
847 return 0;
848}
849
850int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
851 const struct tnl_ptk_info *tpi,
852 struct metadata_dst *tun_dst,
853 bool log_ecn_err)
854{
855 return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
856 log_ecn_err);
857}
858EXPORT_SYMBOL(ip6_tnl_rcv);
859
860static const struct tnl_ptk_info tpi_v6 = {
861
862 .proto = htons(ETH_P_IPV6),
863};
864
865static const struct tnl_ptk_info tpi_v4 = {
866
867 .proto = htons(ETH_P_IP),
868};
869
870static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
871 const struct tnl_ptk_info *tpi,
872 int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
873 const struct ipv6hdr *ipv6h,
874 struct sk_buff *skb))
875{
876 struct ip6_tnl *t;
877 const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
878 int ret = -1;
879
880 rcu_read_lock();
881 t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
882
883 if (t) {
884 u8 tproto = ACCESS_ONCE(t->parms.proto);
885
886 if (tproto != ipproto && tproto != 0)
887 goto drop;
888 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
889 goto drop;
890 if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
891 goto drop;
892 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
893 goto drop;
894 ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
895 log_ecn_error);
896 }
897
898 rcu_read_unlock();
899
900 return ret;
901
902drop:
903 rcu_read_unlock();
904 kfree_skb(skb);
905 return 0;
906}
907
908static int ip4ip6_rcv(struct sk_buff *skb)
909{
910 return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
911 ip4ip6_dscp_ecn_decapsulate);
912}
913
914static int ip6ip6_rcv(struct sk_buff *skb)
915{
916 return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
917 ip6ip6_dscp_ecn_decapsulate);
918}
919
920struct ipv6_tel_txoption {
921 struct ipv6_txoptions ops;
922 __u8 dst_opt[8];
923};
924
925static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
926{
927 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
928
929 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
930 opt->dst_opt[3] = 1;
931 opt->dst_opt[4] = encap_limit;
932 opt->dst_opt[5] = IPV6_TLV_PADN;
933 opt->dst_opt[6] = 1;
934
935 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
936 opt->ops.opt_nflen = 8;
937}
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953static inline bool
954ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
955{
956 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
957}
958
959int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
960 const struct in6_addr *laddr,
961 const struct in6_addr *raddr)
962{
963 struct __ip6_tnl_parm *p = &t->parms;
964 int ret = 0;
965 struct net *net = t->net;
966
967 if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
968 ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
969 (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
970 struct net_device *ldev = NULL;
971
972 rcu_read_lock();
973 if (p->link)
974 ldev = dev_get_by_index_rcu(net, p->link);
975
976 if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
977 pr_warn("%s xmit: Local address not yet configured!\n",
978 p->name);
979 else if (!ipv6_addr_is_multicast(raddr) &&
980 unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
981 pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
982 p->name);
983 else
984 ret = 1;
985 rcu_read_unlock();
986 }
987 return ret;
988}
989EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1012 struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
1013 __u8 proto)
1014{
1015 struct ip6_tnl *t = netdev_priv(dev);
1016 struct net *net = t->net;
1017 struct net_device_stats *stats = &t->dev->stats;
1018 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019 struct ipv6_tel_txoption opt;
1020 struct dst_entry *dst = NULL, *ndst = NULL;
1021 struct net_device *tdev;
1022 int mtu;
1023 unsigned int max_headroom = sizeof(struct ipv6hdr);
1024 int err = -1;
1025
1026
1027 if (ipv6_addr_any(&t->parms.raddr)) {
1028 struct in6_addr *addr6;
1029 struct neighbour *neigh;
1030 int addr_type;
1031
1032 if (!skb_dst(skb))
1033 goto tx_err_link_failure;
1034
1035 neigh = dst_neigh_lookup(skb_dst(skb),
1036 &ipv6_hdr(skb)->daddr);
1037 if (!neigh)
1038 goto tx_err_link_failure;
1039
1040 addr6 = (struct in6_addr *)&neigh->primary_key;
1041 addr_type = ipv6_addr_type(addr6);
1042
1043 if (addr_type == IPV6_ADDR_ANY)
1044 addr6 = &ipv6_hdr(skb)->daddr;
1045
1046 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1047 neigh_release(neigh);
1048 } else if (!fl6->flowi6_mark)
1049 dst = dst_cache_get(&t->dst_cache);
1050
1051 if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1052 goto tx_err_link_failure;
1053
1054 if (!dst) {
1055 dst = ip6_route_output(net, NULL, fl6);
1056
1057 if (dst->error)
1058 goto tx_err_link_failure;
1059 dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1060 if (IS_ERR(dst)) {
1061 err = PTR_ERR(dst);
1062 dst = NULL;
1063 goto tx_err_link_failure;
1064 }
1065 ndst = dst;
1066 }
1067
1068 tdev = dst->dev;
1069
1070 if (tdev == dev) {
1071 stats->collisions++;
1072 net_warn_ratelimited("%s: Local routing loop detected!\n",
1073 t->parms.name);
1074 goto tx_err_dst_release;
1075 }
1076 mtu = dst_mtu(dst) - sizeof (*ipv6h) - t->tun_hlen;
1077 if (encap_limit >= 0) {
1078 max_headroom += 8;
1079 mtu -= 8;
1080 }
1081 if (mtu < IPV6_MIN_MTU)
1082 mtu = IPV6_MIN_MTU;
1083 if (skb_dst(skb))
1084 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
1085 if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
1086 *pmtu = mtu;
1087 err = -EMSGSIZE;
1088 goto tx_err_dst_release;
1089 }
1090
1091 if (t->err_count > 0) {
1092 if (time_before(jiffies,
1093 t->err_time + IP6TUNNEL_ERR_TIMEO)) {
1094 t->err_count--;
1095
1096 dst_link_failure(skb);
1097 } else {
1098 t->err_count = 0;
1099 }
1100 }
1101
1102 skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1103
1104
1105
1106
1107 max_headroom += LL_RESERVED_SPACE(tdev);
1108
1109 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1110 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1111 struct sk_buff *new_skb;
1112
1113 if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
1114 goto tx_err_dst_release;
1115
1116 if (skb->sk)
1117 skb_set_owner_w(new_skb, skb->sk);
1118 consume_skb(skb);
1119 skb = new_skb;
1120 }
1121
1122 if (!fl6->flowi6_mark && ndst)
1123 dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1124 skb_dst_set(skb, dst);
1125
1126 skb->transport_header = skb->network_header;
1127
1128 if (encap_limit >= 0) {
1129 init_tel_txopt(&opt, encap_limit);
1130 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
1131 }
1132
1133 if (likely(!skb->encapsulation)) {
1134 skb_reset_inner_headers(skb);
1135 skb->encapsulation = 1;
1136 }
1137
1138 max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
1139 + dst->header_len;
1140 if (max_headroom > dev->needed_headroom)
1141 dev->needed_headroom = max_headroom;
1142
1143 skb_push(skb, sizeof(struct ipv6hdr));
1144 skb_reset_network_header(skb);
1145 ipv6h = ipv6_hdr(skb);
1146 ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
1147 ipv6h->hop_limit = t->parms.hop_limit;
1148 ipv6h->nexthdr = proto;
1149 ipv6h->saddr = fl6->saddr;
1150 ipv6h->daddr = fl6->daddr;
1151 ip6tunnel_xmit(NULL, skb, dev);
1152 return 0;
1153tx_err_link_failure:
1154 stats->tx_carrier_errors++;
1155 dst_link_failure(skb);
1156tx_err_dst_release:
1157 dst_release(dst);
1158 return err;
1159}
1160EXPORT_SYMBOL(ip6_tnl_xmit);
1161
1162static inline int
1163ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1164{
1165 struct ip6_tnl *t = netdev_priv(dev);
1166 const struct iphdr *iph = ip_hdr(skb);
1167 int encap_limit = -1;
1168 struct flowi6 fl6;
1169 __u8 dsfield;
1170 __u32 mtu;
1171 u8 tproto;
1172 int err;
1173
1174 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1175
1176 tproto = ACCESS_ONCE(t->parms.proto);
1177 if (tproto != IPPROTO_IPIP && tproto != 0)
1178 return -1;
1179
1180 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1181 encap_limit = t->parms.encap_limit;
1182
1183 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1184 fl6.flowi6_proto = IPPROTO_IPIP;
1185
1186 dsfield = ipv4_get_dsfield(iph);
1187
1188 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1189 fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1190 & IPV6_TCLASS_MASK;
1191 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1192 fl6.flowi6_mark = skb->mark;
1193
1194 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1195 IPPROTO_IPIP);
1196 if (err != 0) {
1197
1198 if (err == -EMSGSIZE)
1199 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1200 htonl(mtu));
1201 return -1;
1202 }
1203
1204 return 0;
1205}
1206
1207static inline int
1208ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1209{
1210 struct ip6_tnl *t = netdev_priv(dev);
1211 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1212 int encap_limit = -1;
1213 __u16 offset;
1214 struct flowi6 fl6;
1215 __u8 dsfield;
1216 __u32 mtu;
1217 u8 tproto;
1218 int err;
1219
1220 tproto = ACCESS_ONCE(t->parms.proto);
1221 if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1222 ip6_tnl_addr_conflict(t, ipv6h))
1223 return -1;
1224
1225 offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1226
1227 ipv6h = ipv6_hdr(skb);
1228 if (offset > 0) {
1229 struct ipv6_tlv_tnl_enc_lim *tel;
1230 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1231 if (tel->encap_limit == 0) {
1232 icmpv6_send(skb, ICMPV6_PARAMPROB,
1233 ICMPV6_HDR_FIELD, offset + 2);
1234 return -1;
1235 }
1236 encap_limit = tel->encap_limit - 1;
1237 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1238 encap_limit = t->parms.encap_limit;
1239
1240 memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1241 fl6.flowi6_proto = IPPROTO_IPV6;
1242
1243 dsfield = ipv6_get_dsfield(ipv6h);
1244 if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1245 fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1246 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1247 fl6.flowlabel |= ip6_flowlabel(ipv6h);
1248 if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1249 fl6.flowi6_mark = skb->mark;
1250
1251 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1252 IPPROTO_IPV6);
1253 if (err != 0) {
1254 if (err == -EMSGSIZE)
1255 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1256 return -1;
1257 }
1258
1259 return 0;
1260}
1261
1262static netdev_tx_t
1263ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
1264{
1265 struct ip6_tnl *t = netdev_priv(dev);
1266 struct net_device_stats *stats = &t->dev->stats;
1267 int ret;
1268
1269 switch (skb->protocol) {
1270 case htons(ETH_P_IP):
1271 ret = ip4ip6_tnl_xmit(skb, dev);
1272 break;
1273 case htons(ETH_P_IPV6):
1274 ret = ip6ip6_tnl_xmit(skb, dev);
1275 break;
1276 default:
1277 goto tx_err;
1278 }
1279
1280 if (ret < 0)
1281 goto tx_err;
1282
1283 return NETDEV_TX_OK;
1284
1285tx_err:
1286 stats->tx_errors++;
1287 stats->tx_dropped++;
1288 kfree_skb(skb);
1289 return NETDEV_TX_OK;
1290}
1291
1292static void ip6_tnl_link_config(struct ip6_tnl *t)
1293{
1294 struct net_device *dev = t->dev;
1295 struct __ip6_tnl_parm *p = &t->parms;
1296 struct flowi6 *fl6 = &t->fl.u.ip6;
1297
1298 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1299 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1300
1301
1302 fl6->saddr = p->laddr;
1303 fl6->daddr = p->raddr;
1304 fl6->flowi6_oif = p->link;
1305 fl6->flowlabel = 0;
1306
1307 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1308 fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1309 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1310 fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1311
1312 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1313 p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1314
1315 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1316 dev->flags |= IFF_POINTOPOINT;
1317 else
1318 dev->flags &= ~IFF_POINTOPOINT;
1319
1320 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1321 int strict = (ipv6_addr_type(&p->raddr) &
1322 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1323
1324 struct rt6_info *rt = rt6_lookup(t->net,
1325 &p->raddr, &p->laddr,
1326 p->link, strict);
1327
1328 if (rt == NULL)
1329 return;
1330
1331 if (rt->dst.dev) {
1332 dev->hard_header_len = rt->dst.dev->hard_header_len +
1333 sizeof (struct ipv6hdr);
1334
1335 dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
1336 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1337 dev->mtu-=8;
1338
1339 if (dev->mtu < IPV6_MIN_MTU)
1340 dev->mtu = IPV6_MIN_MTU;
1341 }
1342 ip6_rt_put(rt);
1343 }
1344}
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355static int
1356ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1357{
1358 t->parms.laddr = p->laddr;
1359 t->parms.raddr = p->raddr;
1360 t->parms.flags = p->flags;
1361 t->parms.hop_limit = p->hop_limit;
1362 t->parms.encap_limit = p->encap_limit;
1363 t->parms.flowinfo = p->flowinfo;
1364 t->parms.link = p->link;
1365 t->parms.proto = p->proto;
1366 dst_cache_reset(&t->dst_cache);
1367 ip6_tnl_link_config(t);
1368 return 0;
1369}
1370
1371static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1372{
1373 struct net *net = t->net;
1374 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1375 int err;
1376
1377 ip6_tnl_unlink(ip6n, t);
1378 synchronize_net();
1379 err = ip6_tnl_change(t, p);
1380 ip6_tnl_link(ip6n, t);
1381 netdev_state_change(t->dev);
1382 return err;
1383}
1384
1385static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1386{
1387
1388 t->parms.proto = p->proto;
1389 netdev_state_change(t->dev);
1390 return 0;
1391}
1392
1393static void
1394ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1395{
1396 p->laddr = u->laddr;
1397 p->raddr = u->raddr;
1398 p->flags = u->flags;
1399 p->hop_limit = u->hop_limit;
1400 p->encap_limit = u->encap_limit;
1401 p->flowinfo = u->flowinfo;
1402 p->link = u->link;
1403 p->proto = u->proto;
1404 memcpy(p->name, u->name, sizeof(u->name));
1405}
1406
1407static void
1408ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1409{
1410 u->laddr = p->laddr;
1411 u->raddr = p->raddr;
1412 u->flags = p->flags;
1413 u->hop_limit = p->hop_limit;
1414 u->encap_limit = p->encap_limit;
1415 u->flowinfo = p->flowinfo;
1416 u->link = p->link;
1417 u->proto = p->proto;
1418 memcpy(u->name, p->name, sizeof(u->name));
1419}
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449static int
1450ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1451{
1452 int err = 0;
1453 struct ip6_tnl_parm p;
1454 struct __ip6_tnl_parm p1;
1455 struct ip6_tnl *t = netdev_priv(dev);
1456 struct net *net = t->net;
1457 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1458
1459 memset(&p1, 0, sizeof(p1));
1460
1461 switch (cmd) {
1462 case SIOCGETTUNNEL:
1463 if (dev == ip6n->fb_tnl_dev) {
1464 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
1465 err = -EFAULT;
1466 break;
1467 }
1468 ip6_tnl_parm_from_user(&p1, &p);
1469 t = ip6_tnl_locate(net, &p1, 0);
1470 if (IS_ERR(t))
1471 t = netdev_priv(dev);
1472 } else {
1473 memset(&p, 0, sizeof(p));
1474 }
1475 ip6_tnl_parm_to_user(&p, &t->parms);
1476 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1477 err = -EFAULT;
1478 }
1479 break;
1480 case SIOCADDTUNNEL:
1481 case SIOCCHGTUNNEL:
1482 err = -EPERM;
1483 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1484 break;
1485 err = -EFAULT;
1486 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1487 break;
1488 err = -EINVAL;
1489 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1490 p.proto != 0)
1491 break;
1492 ip6_tnl_parm_from_user(&p1, &p);
1493 t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1494 if (cmd == SIOCCHGTUNNEL) {
1495 if (!IS_ERR(t)) {
1496 if (t->dev != dev) {
1497 err = -EEXIST;
1498 break;
1499 }
1500 } else
1501 t = netdev_priv(dev);
1502 if (dev == ip6n->fb_tnl_dev)
1503 err = ip6_tnl0_update(t, &p1);
1504 else
1505 err = ip6_tnl_update(t, &p1);
1506 }
1507 if (!IS_ERR(t)) {
1508 err = 0;
1509 ip6_tnl_parm_to_user(&p, &t->parms);
1510 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1511 err = -EFAULT;
1512
1513 } else {
1514 err = PTR_ERR(t);
1515 }
1516 break;
1517 case SIOCDELTUNNEL:
1518 err = -EPERM;
1519 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1520 break;
1521
1522 if (dev == ip6n->fb_tnl_dev) {
1523 err = -EFAULT;
1524 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1525 break;
1526 err = -ENOENT;
1527 ip6_tnl_parm_from_user(&p1, &p);
1528 t = ip6_tnl_locate(net, &p1, 0);
1529 if (IS_ERR(t))
1530 break;
1531 err = -EPERM;
1532 if (t->dev == ip6n->fb_tnl_dev)
1533 break;
1534 dev = t->dev;
1535 }
1536 err = 0;
1537 unregister_netdevice(dev);
1538 break;
1539 default:
1540 err = -EINVAL;
1541 }
1542 return err;
1543}
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1556{
1557 struct ip6_tnl *tnl = netdev_priv(dev);
1558
1559 if (tnl->parms.proto == IPPROTO_IPIP) {
1560 if (new_mtu < 68)
1561 return -EINVAL;
1562 } else {
1563 if (new_mtu < IPV6_MIN_MTU)
1564 return -EINVAL;
1565 }
1566 if (new_mtu > 0xFFF8 - dev->hard_header_len)
1567 return -EINVAL;
1568 dev->mtu = new_mtu;
1569 return 0;
1570}
1571EXPORT_SYMBOL(ip6_tnl_change_mtu);
1572
1573int ip6_tnl_get_iflink(const struct net_device *dev)
1574{
1575 struct ip6_tnl *t = netdev_priv(dev);
1576
1577 return t->parms.link;
1578}
1579EXPORT_SYMBOL(ip6_tnl_get_iflink);
1580
1581static const struct net_device_ops ip6_tnl_netdev_ops = {
1582 .ndo_init = ip6_tnl_dev_init,
1583 .ndo_uninit = ip6_tnl_dev_uninit,
1584 .ndo_start_xmit = ip6_tnl_start_xmit,
1585 .ndo_do_ioctl = ip6_tnl_ioctl,
1586 .ndo_change_mtu = ip6_tnl_change_mtu,
1587 .ndo_get_stats = ip6_get_stats,
1588 .ndo_get_iflink = ip6_tnl_get_iflink,
1589};
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600static void ip6_tnl_dev_setup(struct net_device *dev)
1601{
1602 struct ip6_tnl *t;
1603
1604 dev->netdev_ops = &ip6_tnl_netdev_ops;
1605 dev->destructor = ip6_dev_free;
1606
1607 dev->type = ARPHRD_TUNNEL6;
1608 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1609 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1610 t = netdev_priv(dev);
1611 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1612 dev->mtu-=8;
1613 dev->flags |= IFF_NOARP;
1614 dev->addr_len = sizeof(struct in6_addr);
1615 netif_keep_dst(dev);
1616
1617 dev->addr_assign_type = NET_ADDR_RANDOM;
1618 eth_random_addr(dev->perm_addr);
1619}
1620
1621
1622
1623
1624
1625
1626
1627static inline int
1628ip6_tnl_dev_init_gen(struct net_device *dev)
1629{
1630 struct ip6_tnl *t = netdev_priv(dev);
1631 int ret;
1632
1633 t->dev = dev;
1634 t->net = dev_net(dev);
1635 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1636 if (!dev->tstats)
1637 return -ENOMEM;
1638
1639 ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1640 if (ret)
1641 goto free_stats;
1642
1643 ret = gro_cells_init(&t->gro_cells, dev);
1644 if (ret)
1645 goto destroy_dst;
1646
1647 t->hlen = 0;
1648 t->tun_hlen = 0;
1649
1650 return 0;
1651
1652destroy_dst:
1653 dst_cache_destroy(&t->dst_cache);
1654free_stats:
1655 free_percpu(dev->tstats);
1656 dev->tstats = NULL;
1657
1658 return ret;
1659}
1660
1661
1662
1663
1664
1665
1666static int ip6_tnl_dev_init(struct net_device *dev)
1667{
1668 struct ip6_tnl *t = netdev_priv(dev);
1669 int err = ip6_tnl_dev_init_gen(dev);
1670
1671 if (err)
1672 return err;
1673 ip6_tnl_link_config(t);
1674 return 0;
1675}
1676
1677
1678
1679
1680
1681
1682
1683
1684static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1685{
1686 struct ip6_tnl *t = netdev_priv(dev);
1687 struct net *net = dev_net(dev);
1688 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1689
1690 t->parms.proto = IPPROTO_IPV6;
1691 dev_hold(dev);
1692
1693 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1694 return 0;
1695}
1696
1697static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
1698{
1699 u8 proto;
1700
1701 if (!data)
1702 return 0;
1703
1704 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1705 if (proto != IPPROTO_IPV6 &&
1706 proto != IPPROTO_IPIP &&
1707 proto != 0)
1708 return -EINVAL;
1709
1710 return 0;
1711}
1712
1713static void ip6_tnl_netlink_parms(struct nlattr *data[],
1714 struct __ip6_tnl_parm *parms)
1715{
1716 memset(parms, 0, sizeof(*parms));
1717
1718 if (!data)
1719 return;
1720
1721 if (data[IFLA_IPTUN_LINK])
1722 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1723
1724 if (data[IFLA_IPTUN_LOCAL])
1725 parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
1726
1727 if (data[IFLA_IPTUN_REMOTE])
1728 parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
1729
1730 if (data[IFLA_IPTUN_TTL])
1731 parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
1732
1733 if (data[IFLA_IPTUN_ENCAP_LIMIT])
1734 parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
1735
1736 if (data[IFLA_IPTUN_FLOWINFO])
1737 parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1738
1739 if (data[IFLA_IPTUN_FLAGS])
1740 parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
1741
1742 if (data[IFLA_IPTUN_PROTO])
1743 parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1744}
1745
1746static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1747 struct nlattr *tb[], struct nlattr *data[])
1748{
1749 struct net *net = dev_net(dev);
1750 struct ip6_tnl *nt, *t;
1751
1752 nt = netdev_priv(dev);
1753 ip6_tnl_netlink_parms(data, &nt->parms);
1754
1755 t = ip6_tnl_locate(net, &nt->parms, 0);
1756 if (!IS_ERR(t))
1757 return -EEXIST;
1758
1759 return ip6_tnl_create2(dev);
1760}
1761
1762static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
1763 struct nlattr *data[])
1764{
1765 struct ip6_tnl *t = netdev_priv(dev);
1766 struct __ip6_tnl_parm p;
1767 struct net *net = t->net;
1768 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1769
1770 if (dev == ip6n->fb_tnl_dev)
1771 return -EINVAL;
1772
1773 ip6_tnl_netlink_parms(data, &p);
1774
1775 t = ip6_tnl_locate(net, &p, 0);
1776 if (!IS_ERR(t)) {
1777 if (t->dev != dev)
1778 return -EEXIST;
1779 } else
1780 t = netdev_priv(dev);
1781
1782 return ip6_tnl_update(t, &p);
1783}
1784
1785static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
1786{
1787 struct net *net = dev_net(dev);
1788 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1789
1790 if (dev != ip6n->fb_tnl_dev)
1791 unregister_netdevice_queue(dev, head);
1792}
1793
1794static size_t ip6_tnl_get_size(const struct net_device *dev)
1795{
1796 return
1797
1798 nla_total_size(4) +
1799
1800 nla_total_size(sizeof(struct in6_addr)) +
1801
1802 nla_total_size(sizeof(struct in6_addr)) +
1803
1804 nla_total_size(1) +
1805
1806 nla_total_size(1) +
1807
1808 nla_total_size(4) +
1809
1810 nla_total_size(4) +
1811
1812 nla_total_size(1) +
1813 0;
1814}
1815
1816static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1817{
1818 struct ip6_tnl *tunnel = netdev_priv(dev);
1819 struct __ip6_tnl_parm *parm = &tunnel->parms;
1820
1821 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1822 nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
1823 nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
1824 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
1825 nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
1826 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1827 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
1828 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
1829 goto nla_put_failure;
1830 return 0;
1831
1832nla_put_failure:
1833 return -EMSGSIZE;
1834}
1835
1836struct net *ip6_tnl_get_link_net(const struct net_device *dev)
1837{
1838 struct ip6_tnl *tunnel = netdev_priv(dev);
1839
1840 return tunnel->net;
1841}
1842EXPORT_SYMBOL(ip6_tnl_get_link_net);
1843
1844static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
1845 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
1846 [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) },
1847 [IFLA_IPTUN_REMOTE] = { .len = sizeof(struct in6_addr) },
1848 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
1849 [IFLA_IPTUN_ENCAP_LIMIT] = { .type = NLA_U8 },
1850 [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 },
1851 [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 },
1852 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
1853};
1854
1855static struct rtnl_link_ops ip6_link_ops __read_mostly = {
1856 .kind = "ip6tnl",
1857 .maxtype = IFLA_IPTUN_MAX,
1858 .policy = ip6_tnl_policy,
1859 .priv_size = sizeof(struct ip6_tnl),
1860 .setup = ip6_tnl_dev_setup,
1861 .validate = ip6_tnl_validate,
1862 .newlink = ip6_tnl_newlink,
1863 .changelink = ip6_tnl_changelink,
1864 .dellink = ip6_tnl_dellink,
1865 .get_size = ip6_tnl_get_size,
1866 .fill_info = ip6_tnl_fill_info,
1867 .get_link_net = ip6_tnl_get_link_net,
1868};
1869
1870static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1871 .handler = ip4ip6_rcv,
1872 .err_handler = ip4ip6_err,
1873 .priority = 1,
1874};
1875
1876static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1877 .handler = ip6ip6_rcv,
1878 .err_handler = ip6ip6_err,
1879 .priority = 1,
1880};
1881
1882static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
1883{
1884 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1885 struct net_device *dev, *aux;
1886 int h;
1887 struct ip6_tnl *t;
1888 LIST_HEAD(list);
1889
1890 for_each_netdev_safe(net, dev, aux)
1891 if (dev->rtnl_link_ops == &ip6_link_ops)
1892 unregister_netdevice_queue(dev, &list);
1893
1894 for (h = 0; h < HASH_SIZE; h++) {
1895 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1896 while (t != NULL) {
1897
1898
1899
1900 if (!net_eq(dev_net(t->dev), net))
1901 unregister_netdevice_queue(t->dev, &list);
1902 t = rtnl_dereference(t->next);
1903 }
1904 }
1905
1906 unregister_netdevice_many(&list);
1907}
1908
1909static int __net_init ip6_tnl_init_net(struct net *net)
1910{
1911 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1912 struct ip6_tnl *t = NULL;
1913 int err;
1914
1915 ip6n->tnls[0] = ip6n->tnls_wc;
1916 ip6n->tnls[1] = ip6n->tnls_r_l;
1917
1918 err = -ENOMEM;
1919 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1920 ip6_tnl_dev_setup);
1921
1922 if (!ip6n->fb_tnl_dev)
1923 goto err_alloc_dev;
1924 dev_net_set(ip6n->fb_tnl_dev, net);
1925 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
1926
1927
1928
1929 ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
1930
1931 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1932 if (err < 0)
1933 goto err_register;
1934
1935 err = register_netdev(ip6n->fb_tnl_dev);
1936 if (err < 0)
1937 goto err_register;
1938
1939 t = netdev_priv(ip6n->fb_tnl_dev);
1940
1941 strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
1942 return 0;
1943
1944err_register:
1945 ip6_dev_free(ip6n->fb_tnl_dev);
1946err_alloc_dev:
1947 return err;
1948}
1949
1950static void __net_exit ip6_tnl_exit_net(struct net *net)
1951{
1952 rtnl_lock();
1953 ip6_tnl_destroy_tunnels(net);
1954 rtnl_unlock();
1955}
1956
1957static struct pernet_operations ip6_tnl_net_ops = {
1958 .init = ip6_tnl_init_net,
1959 .exit = ip6_tnl_exit_net,
1960 .id = &ip6_tnl_net_id,
1961 .size = sizeof(struct ip6_tnl_net),
1962};
1963
1964
1965
1966
1967
1968
1969
1970static int __init ip6_tunnel_init(void)
1971{
1972 int err;
1973
1974 err = register_pernet_device(&ip6_tnl_net_ops);
1975 if (err < 0)
1976 goto out_pernet;
1977
1978 err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
1979 if (err < 0) {
1980 pr_err("%s: can't register ip4ip6\n", __func__);
1981 goto out_ip4ip6;
1982 }
1983
1984 err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
1985 if (err < 0) {
1986 pr_err("%s: can't register ip6ip6\n", __func__);
1987 goto out_ip6ip6;
1988 }
1989 err = rtnl_link_register(&ip6_link_ops);
1990 if (err < 0)
1991 goto rtnl_link_failed;
1992
1993 return 0;
1994
1995rtnl_link_failed:
1996 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1997out_ip6ip6:
1998 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1999out_ip4ip6:
2000 unregister_pernet_device(&ip6_tnl_net_ops);
2001out_pernet:
2002 return err;
2003}
2004
2005
2006
2007
2008
2009static void __exit ip6_tunnel_cleanup(void)
2010{
2011 rtnl_link_unregister(&ip6_link_ops);
2012 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
2013 pr_info("%s: can't deregister ip4ip6\n", __func__);
2014
2015 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
2016 pr_info("%s: can't deregister ip6ip6\n", __func__);
2017
2018 unregister_pernet_device(&ip6_tnl_net_ops);
2019}
2020
2021module_init(ip6_tunnel_init);
2022module_exit(ip6_tunnel_cleanup);
2023