1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/module.h>
22#include <linux/capability.h>
23#include <linux/errno.h>
24#include <linux/types.h>
25#include <linux/sockios.h>
26#include <linux/icmp.h>
27#include <linux/if.h>
28#include <linux/in.h>
29#include <linux/ip.h>
30#include <linux/if_tunnel.h>
31#include <linux/net.h>
32#include <linux/in6.h>
33#include <linux/netdevice.h>
34#include <linux/if_arp.h>
35#include <linux/icmpv6.h>
36#include <linux/init.h>
37#include <linux/route.h>
38#include <linux/rtnetlink.h>
39#include <linux/netfilter_ipv6.h>
40
41#include <asm/uaccess.h>
42#include <asm/atomic.h>
43
44#include <net/icmp.h>
45#include <net/ip.h>
46#include <net/ipv6.h>
47#include <net/ip6_route.h>
48#include <net/addrconf.h>
49#include <net/ip6_tunnel.h>
50#include <net/xfrm.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/net_namespace.h>
54#include <net/netns/generic.h>
55
56MODULE_AUTHOR("Ville Nuorvala");
57MODULE_DESCRIPTION("IPv6 tunneling device");
58MODULE_LICENSE("GPL");
59
60#define IPV6_TLV_TEL_DST_SIZE 8
61
62#ifdef IP6_TNL_DEBUG
63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
64#else
65#define IP6_TNL_TRACE(x...) do {;} while(0)
66#endif
67
68#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
69#define IPV6_TCLASS_SHIFT 20
70
71#define HASH_SIZE 32
72
73#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
74 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
75 (HASH_SIZE - 1))
76
77static void ip6_fb_tnl_dev_init(struct net_device *dev);
78static void ip6_tnl_dev_init(struct net_device *dev);
79static void ip6_tnl_dev_setup(struct net_device *dev);
80
81static int ip6_tnl_net_id;
82struct ip6_tnl_net {
83
84 struct net_device *fb_tnl_dev;
85
86 struct ip6_tnl *tnls_r_l[HASH_SIZE];
87 struct ip6_tnl *tnls_wc[1];
88 struct ip6_tnl **tnls[2];
89};
90
91
92static DEFINE_RWLOCK(ip6_tnl_lock);
93
94static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
95{
96 struct dst_entry *dst = t->dst_cache;
97
98 if (dst && dst->obsolete &&
99 dst->ops->check(dst, t->dst_cookie) == NULL) {
100 t->dst_cache = NULL;
101 dst_release(dst);
102 return NULL;
103 }
104
105 return dst;
106}
107
108static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
109{
110 dst_release(t->dst_cache);
111 t->dst_cache = NULL;
112}
113
114static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
115{
116 struct rt6_info *rt = (struct rt6_info *) dst;
117 t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
118 dst_release(t->dst_cache);
119 t->dst_cache = dst;
120}
121
122
123
124
125
126
127
128
129
130
131
132
133static struct ip6_tnl *
134ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
135{
136 unsigned h0 = HASH(remote);
137 unsigned h1 = HASH(local);
138 struct ip6_tnl *t;
139 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
140
141 for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) {
142 if (ipv6_addr_equal(local, &t->parms.laddr) &&
143 ipv6_addr_equal(remote, &t->parms.raddr) &&
144 (t->dev->flags & IFF_UP))
145 return t;
146 }
147 if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
148 return t;
149
150 return NULL;
151}
152
153
154
155
156
157
158
159
160
161
162
163
164static struct ip6_tnl **
165ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
166{
167 struct in6_addr *remote = &p->raddr;
168 struct in6_addr *local = &p->laddr;
169 unsigned h = 0;
170 int prio = 0;
171
172 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
173 prio = 1;
174 h = HASH(remote) ^ HASH(local);
175 }
176 return &ip6n->tnls[prio][h];
177}
178
179
180
181
182
183
184static void
185ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
186{
187 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
188
189 t->next = *tp;
190 write_lock_bh(&ip6_tnl_lock);
191 *tp = t;
192 write_unlock_bh(&ip6_tnl_lock);
193}
194
195
196
197
198
199
200static void
201ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
202{
203 struct ip6_tnl **tp;
204
205 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
206 if (t == *tp) {
207 write_lock_bh(&ip6_tnl_lock);
208 *tp = t->next;
209 write_unlock_bh(&ip6_tnl_lock);
210 break;
211 }
212 }
213}
214
215
216
217
218
219
220
221
222
223
224
225
226
227static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
228{
229 struct net_device *dev;
230 struct ip6_tnl *t;
231 char name[IFNAMSIZ];
232 int err;
233 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
234
235 if (p->name[0])
236 strlcpy(name, p->name, IFNAMSIZ);
237 else
238 sprintf(name, "ip6tnl%%d");
239
240 dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
241 if (dev == NULL)
242 goto failed;
243
244 dev_net_set(dev, net);
245
246 if (strchr(name, '%')) {
247 if (dev_alloc_name(dev, name) < 0)
248 goto failed_free;
249 }
250
251 t = netdev_priv(dev);
252 t->parms = *p;
253 ip6_tnl_dev_init(dev);
254
255 if ((err = register_netdevice(dev)) < 0)
256 goto failed_free;
257
258 dev_hold(dev);
259 ip6_tnl_link(ip6n, t);
260 return t;
261
262failed_free:
263 free_netdev(dev);
264failed:
265 return NULL;
266}
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282static struct ip6_tnl *ip6_tnl_locate(struct net *net,
283 struct ip6_tnl_parm *p, int create)
284{
285 struct in6_addr *remote = &p->raddr;
286 struct in6_addr *local = &p->laddr;
287 struct ip6_tnl *t;
288 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
289
290 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) {
291 if (ipv6_addr_equal(local, &t->parms.laddr) &&
292 ipv6_addr_equal(remote, &t->parms.raddr))
293 return t;
294 }
295 if (!create)
296 return NULL;
297 return ip6_tnl_create(net, p);
298}
299
300
301
302
303
304
305
306
307
308static void
309ip6_tnl_dev_uninit(struct net_device *dev)
310{
311 struct ip6_tnl *t = netdev_priv(dev);
312 struct net *net = dev_net(dev);
313 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
314
315 if (dev == ip6n->fb_tnl_dev) {
316 write_lock_bh(&ip6_tnl_lock);
317 ip6n->tnls_wc[0] = NULL;
318 write_unlock_bh(&ip6_tnl_lock);
319 } else {
320 ip6_tnl_unlink(ip6n, t);
321 }
322 ip6_tnl_dst_reset(t);
323 dev_put(dev);
324}
325
326
327
328
329
330
331
332
333
334
335static __u16
336parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
337{
338 struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
339 __u8 nexthdr = ipv6h->nexthdr;
340 __u16 off = sizeof (*ipv6h);
341
342 while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
343 __u16 optlen = 0;
344 struct ipv6_opt_hdr *hdr;
345 if (raw + off + sizeof (*hdr) > skb->data &&
346 !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
347 break;
348
349 hdr = (struct ipv6_opt_hdr *) (raw + off);
350 if (nexthdr == NEXTHDR_FRAGMENT) {
351 struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
352 if (frag_hdr->frag_off)
353 break;
354 optlen = 8;
355 } else if (nexthdr == NEXTHDR_AUTH) {
356 optlen = (hdr->hdrlen + 2) << 2;
357 } else {
358 optlen = ipv6_optlen(hdr);
359 }
360 if (nexthdr == NEXTHDR_DEST) {
361 __u16 i = off + 2;
362 while (1) {
363 struct ipv6_tlv_tnl_enc_lim *tel;
364
365
366 if (i + sizeof (*tel) > off + optlen)
367 break;
368
369 tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
370
371 if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
372 tel->length == 1)
373 return i;
374
375 if (tel->type)
376 i += tel->length + 2;
377 else
378 i++;
379 }
380 }
381 nexthdr = hdr->nexthdr;
382 off += optlen;
383 }
384 return 0;
385}
386
387
388
389
390
391
392
393
394
395static int
396ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
397 u8 *type, u8 *code, int *msg, __u32 *info, int offset)
398{
399 struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
400 struct ip6_tnl *t;
401 int rel_msg = 0;
402 u8 rel_type = ICMPV6_DEST_UNREACH;
403 u8 rel_code = ICMPV6_ADDR_UNREACH;
404 __u32 rel_info = 0;
405 __u16 len;
406 int err = -ENOENT;
407
408
409
410
411
412 read_lock(&ip6_tnl_lock);
413 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
414 &ipv6h->saddr)) == NULL)
415 goto out;
416
417 if (t->parms.proto != ipproto && t->parms.proto != 0)
418 goto out;
419
420 err = 0;
421
422 switch (*type) {
423 __u32 teli;
424 struct ipv6_tlv_tnl_enc_lim *tel;
425 __u32 mtu;
426 case ICMPV6_DEST_UNREACH:
427 if (net_ratelimit())
428 printk(KERN_WARNING
429 "%s: Path to destination invalid "
430 "or inactive!\n", t->parms.name);
431 rel_msg = 1;
432 break;
433 case ICMPV6_TIME_EXCEED:
434 if ((*code) == ICMPV6_EXC_HOPLIMIT) {
435 if (net_ratelimit())
436 printk(KERN_WARNING
437 "%s: Too small hop limit or "
438 "routing loop in tunnel!\n",
439 t->parms.name);
440 rel_msg = 1;
441 }
442 break;
443 case ICMPV6_PARAMPROB:
444 teli = 0;
445 if ((*code) == ICMPV6_HDR_FIELD)
446 teli = parse_tlv_tnl_enc_lim(skb, skb->data);
447
448 if (teli && teli == *info - 2) {
449 tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
450 if (tel->encap_limit == 0) {
451 if (net_ratelimit())
452 printk(KERN_WARNING
453 "%s: Too small encapsulation "
454 "limit or routing loop in "
455 "tunnel!\n", t->parms.name);
456 rel_msg = 1;
457 }
458 } else if (net_ratelimit()) {
459 printk(KERN_WARNING
460 "%s: Recipient unable to parse tunneled "
461 "packet!\n ", t->parms.name);
462 }
463 break;
464 case ICMPV6_PKT_TOOBIG:
465 mtu = *info - offset;
466 if (mtu < IPV6_MIN_MTU)
467 mtu = IPV6_MIN_MTU;
468 t->dev->mtu = mtu;
469
470 if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
471 rel_type = ICMPV6_PKT_TOOBIG;
472 rel_code = 0;
473 rel_info = mtu;
474 rel_msg = 1;
475 }
476 break;
477 }
478
479 *type = rel_type;
480 *code = rel_code;
481 *info = rel_info;
482 *msg = rel_msg;
483
484out:
485 read_unlock(&ip6_tnl_lock);
486 return err;
487}
488
489static int
490ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
491 u8 type, u8 code, int offset, __be32 info)
492{
493 int rel_msg = 0;
494 u8 rel_type = type;
495 u8 rel_code = code;
496 __u32 rel_info = ntohl(info);
497 int err;
498 struct sk_buff *skb2;
499 struct iphdr *eiph;
500 struct flowi fl;
501 struct rtable *rt;
502
503 err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
504 &rel_msg, &rel_info, offset);
505 if (err < 0)
506 return err;
507
508 if (rel_msg == 0)
509 return 0;
510
511 switch (rel_type) {
512 case ICMPV6_DEST_UNREACH:
513 if (rel_code != ICMPV6_ADDR_UNREACH)
514 return 0;
515 rel_type = ICMP_DEST_UNREACH;
516 rel_code = ICMP_HOST_UNREACH;
517 break;
518 case ICMPV6_PKT_TOOBIG:
519 if (rel_code != 0)
520 return 0;
521 rel_type = ICMP_DEST_UNREACH;
522 rel_code = ICMP_FRAG_NEEDED;
523 break;
524 default:
525 return 0;
526 }
527
528 if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
529 return 0;
530
531 skb2 = skb_clone(skb, GFP_ATOMIC);
532 if (!skb2)
533 return 0;
534
535 skb_dst_drop(skb2);
536
537 skb_pull(skb2, offset);
538 skb_reset_network_header(skb2);
539 eiph = ip_hdr(skb2);
540
541
542 memset(&fl, 0, sizeof(fl));
543 fl.fl4_dst = eiph->saddr;
544 fl.fl4_tos = RT_TOS(eiph->tos);
545 fl.proto = IPPROTO_IPIP;
546 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
547 goto out;
548
549 skb2->dev = rt->u.dst.dev;
550
551
552 if (rt->rt_flags & RTCF_LOCAL) {
553 ip_rt_put(rt);
554 rt = NULL;
555 fl.fl4_dst = eiph->daddr;
556 fl.fl4_src = eiph->saddr;
557 fl.fl4_tos = eiph->tos;
558 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
559 rt->u.dst.dev->type != ARPHRD_TUNNEL) {
560 ip_rt_put(rt);
561 goto out;
562 }
563 skb_dst_set(skb2, (struct dst_entry *)rt);
564 } else {
565 ip_rt_put(rt);
566 if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
567 skb2->dev) ||
568 skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
569 goto out;
570 }
571
572
573 if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
574 if (rel_info > dst_mtu(skb_dst(skb2)))
575 goto out;
576
577 skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
578 }
579
580 icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
581
582out:
583 kfree_skb(skb2);
584 return 0;
585}
586
587static int
588ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
589 u8 type, u8 code, int offset, __be32 info)
590{
591 int rel_msg = 0;
592 u8 rel_type = type;
593 u8 rel_code = code;
594 __u32 rel_info = ntohl(info);
595 int err;
596
597 err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
598 &rel_msg, &rel_info, offset);
599 if (err < 0)
600 return err;
601
602 if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
603 struct rt6_info *rt;
604 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
605
606 if (!skb2)
607 return 0;
608
609 skb_dst_drop(skb2);
610 skb_pull(skb2, offset);
611 skb_reset_network_header(skb2);
612
613
614 rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
615 NULL, 0, 0);
616
617 if (rt && rt->rt6i_dev)
618 skb2->dev = rt->rt6i_dev;
619
620 icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
621
622 if (rt)
623 dst_release(&rt->u.dst);
624
625 kfree_skb(skb2);
626 }
627
628 return 0;
629}
630
631static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
632 struct ipv6hdr *ipv6h,
633 struct sk_buff *skb)
634{
635 __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
636
637 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
638 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
639
640 if (INET_ECN_is_ce(dsfield))
641 IP_ECN_set_ce(ip_hdr(skb));
642}
643
644static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
645 struct ipv6hdr *ipv6h,
646 struct sk_buff *skb)
647{
648 if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
649 ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
650
651 if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
652 IP6_ECN_set_ce(ipv6_hdr(skb));
653}
654
655static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
656{
657 struct ip6_tnl_parm *p = &t->parms;
658 int ret = 0;
659 struct net *net = dev_net(t->dev);
660
661 if (p->flags & IP6_TNL_F_CAP_RCV) {
662 struct net_device *ldev = NULL;
663
664 if (p->link)
665 ldev = dev_get_by_index(net, p->link);
666
667 if ((ipv6_addr_is_multicast(&p->laddr) ||
668 likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
669 likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
670 ret = 1;
671
672 if (ldev)
673 dev_put(ldev);
674 }
675 return ret;
676}
677
678
679
680
681
682
683
684
685
686
687static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
688 __u8 ipproto,
689 void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
690 struct ipv6hdr *ipv6h,
691 struct sk_buff *skb))
692{
693 struct ip6_tnl *t;
694 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
695
696 read_lock(&ip6_tnl_lock);
697
698 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
699 &ipv6h->daddr)) != NULL) {
700 if (t->parms.proto != ipproto && t->parms.proto != 0) {
701 read_unlock(&ip6_tnl_lock);
702 goto discard;
703 }
704
705 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
706 read_unlock(&ip6_tnl_lock);
707 goto discard;
708 }
709
710 if (!ip6_tnl_rcv_ctl(t)) {
711 t->dev->stats.rx_dropped++;
712 read_unlock(&ip6_tnl_lock);
713 goto discard;
714 }
715 secpath_reset(skb);
716 skb->mac_header = skb->network_header;
717 skb_reset_network_header(skb);
718 skb->protocol = htons(protocol);
719 skb->pkt_type = PACKET_HOST;
720 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
721 skb->dev = t->dev;
722 skb_dst_drop(skb);
723 nf_reset(skb);
724
725 dscp_ecn_decapsulate(t, ipv6h, skb);
726
727 t->dev->stats.rx_packets++;
728 t->dev->stats.rx_bytes += skb->len;
729 netif_rx(skb);
730 read_unlock(&ip6_tnl_lock);
731 return 0;
732 }
733 read_unlock(&ip6_tnl_lock);
734 return 1;
735
736discard:
737 kfree_skb(skb);
738 return 0;
739}
740
741static int ip4ip6_rcv(struct sk_buff *skb)
742{
743 return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
744 ip4ip6_dscp_ecn_decapsulate);
745}
746
747static int ip6ip6_rcv(struct sk_buff *skb)
748{
749 return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
750 ip6ip6_dscp_ecn_decapsulate);
751}
752
753struct ipv6_tel_txoption {
754 struct ipv6_txoptions ops;
755 __u8 dst_opt[8];
756};
757
758static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
759{
760 memset(opt, 0, sizeof(struct ipv6_tel_txoption));
761
762 opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
763 opt->dst_opt[3] = 1;
764 opt->dst_opt[4] = encap_limit;
765 opt->dst_opt[5] = IPV6_TLV_PADN;
766 opt->dst_opt[6] = 1;
767
768 opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
769 opt->ops.opt_nflen = 8;
770}
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786static inline int
787ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
788{
789 return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
790}
791
792static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
793{
794 struct ip6_tnl_parm *p = &t->parms;
795 int ret = 0;
796 struct net *net = dev_net(t->dev);
797
798 if (p->flags & IP6_TNL_F_CAP_XMIT) {
799 struct net_device *ldev = NULL;
800
801 if (p->link)
802 ldev = dev_get_by_index(net, p->link);
803
804 if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
805 printk(KERN_WARNING
806 "%s xmit: Local address not yet configured!\n",
807 p->name);
808 else if (!ipv6_addr_is_multicast(&p->raddr) &&
809 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
810 printk(KERN_WARNING
811 "%s xmit: Routing loop! "
812 "Remote address found on this node!\n",
813 p->name);
814 else
815 ret = 1;
816 if (ldev)
817 dev_put(ldev);
818 }
819 return ret;
820}
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840static int ip6_tnl_xmit2(struct sk_buff *skb,
841 struct net_device *dev,
842 __u8 dsfield,
843 struct flowi *fl,
844 int encap_limit,
845 __u32 *pmtu)
846{
847 struct net *net = dev_net(dev);
848 struct ip6_tnl *t = netdev_priv(dev);
849 struct net_device_stats *stats = &t->dev->stats;
850 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
851 struct ipv6_tel_txoption opt;
852 struct dst_entry *dst;
853 struct net_device *tdev;
854 int mtu;
855 unsigned int max_headroom = sizeof(struct ipv6hdr);
856 u8 proto;
857 int err = -1;
858 int pkt_len;
859
860 if ((dst = ip6_tnl_dst_check(t)) != NULL)
861 dst_hold(dst);
862 else {
863 dst = ip6_route_output(net, NULL, fl);
864
865 if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
866 goto tx_err_link_failure;
867 }
868
869 tdev = dst->dev;
870
871 if (tdev == dev) {
872 stats->collisions++;
873 if (net_ratelimit())
874 printk(KERN_WARNING
875 "%s: Local routing loop detected!\n",
876 t->parms.name);
877 goto tx_err_dst_release;
878 }
879 mtu = dst_mtu(dst) - sizeof (*ipv6h);
880 if (encap_limit >= 0) {
881 max_headroom += 8;
882 mtu -= 8;
883 }
884 if (mtu < IPV6_MIN_MTU)
885 mtu = IPV6_MIN_MTU;
886 if (skb_dst(skb))
887 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
888 if (skb->len > mtu) {
889 *pmtu = mtu;
890 err = -EMSGSIZE;
891 goto tx_err_dst_release;
892 }
893
894
895
896
897 max_headroom += LL_RESERVED_SPACE(tdev);
898
899 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
900 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
901 struct sk_buff *new_skb;
902
903 if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
904 goto tx_err_dst_release;
905
906 if (skb->sk)
907 skb_set_owner_w(new_skb, skb->sk);
908 kfree_skb(skb);
909 skb = new_skb;
910 }
911 skb_dst_drop(skb);
912 skb_dst_set(skb, dst_clone(dst));
913
914 skb->transport_header = skb->network_header;
915
916 proto = fl->proto;
917 if (encap_limit >= 0) {
918 init_tel_txopt(&opt, encap_limit);
919 ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
920 }
921 skb_push(skb, sizeof(struct ipv6hdr));
922 skb_reset_network_header(skb);
923 ipv6h = ipv6_hdr(skb);
924 *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
925 dsfield = INET_ECN_encapsulate(0, dsfield);
926 ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
927 ipv6h->hop_limit = t->parms.hop_limit;
928 ipv6h->nexthdr = proto;
929 ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
930 ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
931 nf_reset(skb);
932 pkt_len = skb->len;
933 err = ip6_local_out(skb);
934
935 if (net_xmit_eval(err) == 0) {
936 stats->tx_bytes += pkt_len;
937 stats->tx_packets++;
938 } else {
939 stats->tx_errors++;
940 stats->tx_aborted_errors++;
941 }
942 ip6_tnl_dst_store(t, dst);
943 return 0;
944tx_err_link_failure:
945 stats->tx_carrier_errors++;
946 dst_link_failure(skb);
947tx_err_dst_release:
948 dst_release(dst);
949 return err;
950}
951
952static inline int
953ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
954{
955 struct ip6_tnl *t = netdev_priv(dev);
956 struct iphdr *iph = ip_hdr(skb);
957 int encap_limit = -1;
958 struct flowi fl;
959 __u8 dsfield;
960 __u32 mtu;
961 int err;
962
963 if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
964 !ip6_tnl_xmit_ctl(t))
965 return -1;
966
967 if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
968 encap_limit = t->parms.encap_limit;
969
970 memcpy(&fl, &t->fl, sizeof (fl));
971 fl.proto = IPPROTO_IPIP;
972
973 dsfield = ipv4_get_dsfield(iph);
974
975 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
976 fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
977 & IPV6_TCLASS_MASK;
978
979 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
980 if (err != 0) {
981
982 if (err == -EMSGSIZE)
983 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
984 htonl(mtu));
985 return -1;
986 }
987
988 return 0;
989}
990
991static inline int
992ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
993{
994 struct ip6_tnl *t = netdev_priv(dev);
995 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
996 int encap_limit = -1;
997 __u16 offset;
998 struct flowi fl;
999 __u8 dsfield;
1000 __u32 mtu;
1001 int err;
1002
1003 if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
1004 !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1005 return -1;
1006
1007 offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
1008 if (offset > 0) {
1009 struct ipv6_tlv_tnl_enc_lim *tel;
1010 tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1011 if (tel->encap_limit == 0) {
1012 icmpv6_send(skb, ICMPV6_PARAMPROB,
1013 ICMPV6_HDR_FIELD, offset + 2, skb->dev);
1014 return -1;
1015 }
1016 encap_limit = tel->encap_limit - 1;
1017 } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1018 encap_limit = t->parms.encap_limit;
1019
1020 memcpy(&fl, &t->fl, sizeof (fl));
1021 fl.proto = IPPROTO_IPV6;
1022
1023 dsfield = ipv6_get_dsfield(ipv6h);
1024 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1025 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1026 if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1027 fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1028
1029 err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
1030 if (err != 0) {
1031 if (err == -EMSGSIZE)
1032 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1033 return -1;
1034 }
1035
1036 return 0;
1037}
1038
1039static netdev_tx_t
1040ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1041{
1042 struct ip6_tnl *t = netdev_priv(dev);
1043 struct net_device_stats *stats = &t->dev->stats;
1044 int ret;
1045
1046 switch (skb->protocol) {
1047 case htons(ETH_P_IP):
1048 ret = ip4ip6_tnl_xmit(skb, dev);
1049 break;
1050 case htons(ETH_P_IPV6):
1051 ret = ip6ip6_tnl_xmit(skb, dev);
1052 break;
1053 default:
1054 goto tx_err;
1055 }
1056
1057 if (ret < 0)
1058 goto tx_err;
1059
1060 return NETDEV_TX_OK;
1061
1062tx_err:
1063 stats->tx_errors++;
1064 stats->tx_dropped++;
1065 kfree_skb(skb);
1066 return NETDEV_TX_OK;
1067}
1068
1069static void ip6_tnl_set_cap(struct ip6_tnl *t)
1070{
1071 struct ip6_tnl_parm *p = &t->parms;
1072 int ltype = ipv6_addr_type(&p->laddr);
1073 int rtype = ipv6_addr_type(&p->raddr);
1074
1075 p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
1076
1077 if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1078 rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1079 !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
1080 (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
1081 if (ltype&IPV6_ADDR_UNICAST)
1082 p->flags |= IP6_TNL_F_CAP_XMIT;
1083 if (rtype&IPV6_ADDR_UNICAST)
1084 p->flags |= IP6_TNL_F_CAP_RCV;
1085 }
1086}
1087
1088static void ip6_tnl_link_config(struct ip6_tnl *t)
1089{
1090 struct net_device *dev = t->dev;
1091 struct ip6_tnl_parm *p = &t->parms;
1092 struct flowi *fl = &t->fl;
1093
1094 memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1095 memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1096
1097
1098 ipv6_addr_copy(&fl->fl6_src, &p->laddr);
1099 ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
1100 fl->oif = p->link;
1101 fl->fl6_flowlabel = 0;
1102
1103 if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1104 fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1105 if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1106 fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1107
1108 ip6_tnl_set_cap(t);
1109
1110 if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1111 dev->flags |= IFF_POINTOPOINT;
1112 else
1113 dev->flags &= ~IFF_POINTOPOINT;
1114
1115 dev->iflink = p->link;
1116
1117 if (p->flags & IP6_TNL_F_CAP_XMIT) {
1118 int strict = (ipv6_addr_type(&p->raddr) &
1119 (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1120
1121 struct rt6_info *rt = rt6_lookup(dev_net(dev),
1122 &p->raddr, &p->laddr,
1123 p->link, strict);
1124
1125 if (rt == NULL)
1126 return;
1127
1128 if (rt->rt6i_dev) {
1129 dev->hard_header_len = rt->rt6i_dev->hard_header_len +
1130 sizeof (struct ipv6hdr);
1131
1132 dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
1133
1134 if (dev->mtu < IPV6_MIN_MTU)
1135 dev->mtu = IPV6_MIN_MTU;
1136 }
1137 dst_release(&rt->u.dst);
1138 }
1139}
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150static int
1151ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
1152{
1153 ipv6_addr_copy(&t->parms.laddr, &p->laddr);
1154 ipv6_addr_copy(&t->parms.raddr, &p->raddr);
1155 t->parms.flags = p->flags;
1156 t->parms.hop_limit = p->hop_limit;
1157 t->parms.encap_limit = p->encap_limit;
1158 t->parms.flowinfo = p->flowinfo;
1159 t->parms.link = p->link;
1160 t->parms.proto = p->proto;
1161 ip6_tnl_dst_reset(t);
1162 ip6_tnl_link_config(t);
1163 return 0;
1164}
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194static int
1195ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1196{
1197 int err = 0;
1198 struct ip6_tnl_parm p;
1199 struct ip6_tnl *t = NULL;
1200 struct net *net = dev_net(dev);
1201 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1202
1203 switch (cmd) {
1204 case SIOCGETTUNNEL:
1205 if (dev == ip6n->fb_tnl_dev) {
1206 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
1207 err = -EFAULT;
1208 break;
1209 }
1210 t = ip6_tnl_locate(net, &p, 0);
1211 }
1212 if (t == NULL)
1213 t = netdev_priv(dev);
1214 memcpy(&p, &t->parms, sizeof (p));
1215 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1216 err = -EFAULT;
1217 }
1218 break;
1219 case SIOCADDTUNNEL:
1220 case SIOCCHGTUNNEL:
1221 err = -EPERM;
1222 if (!capable(CAP_NET_ADMIN))
1223 break;
1224 err = -EFAULT;
1225 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1226 break;
1227 err = -EINVAL;
1228 if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1229 p.proto != 0)
1230 break;
1231 t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
1232 if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1233 if (t != NULL) {
1234 if (t->dev != dev) {
1235 err = -EEXIST;
1236 break;
1237 }
1238 } else
1239 t = netdev_priv(dev);
1240
1241 ip6_tnl_unlink(ip6n, t);
1242 err = ip6_tnl_change(t, &p);
1243 ip6_tnl_link(ip6n, t);
1244 netdev_state_change(dev);
1245 }
1246 if (t) {
1247 err = 0;
1248 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
1249 err = -EFAULT;
1250
1251 } else
1252 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1253 break;
1254 case SIOCDELTUNNEL:
1255 err = -EPERM;
1256 if (!capable(CAP_NET_ADMIN))
1257 break;
1258
1259 if (dev == ip6n->fb_tnl_dev) {
1260 err = -EFAULT;
1261 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1262 break;
1263 err = -ENOENT;
1264 if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
1265 break;
1266 err = -EPERM;
1267 if (t->dev == ip6n->fb_tnl_dev)
1268 break;
1269 dev = t->dev;
1270 }
1271 err = 0;
1272 unregister_netdevice(dev);
1273 break;
1274 default:
1275 err = -EINVAL;
1276 }
1277 return err;
1278}
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290static int
1291ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1292{
1293 if (new_mtu < IPV6_MIN_MTU) {
1294 return -EINVAL;
1295 }
1296 dev->mtu = new_mtu;
1297 return 0;
1298}
1299
1300
1301static const struct net_device_ops ip6_tnl_netdev_ops = {
1302 .ndo_uninit = ip6_tnl_dev_uninit,
1303 .ndo_start_xmit = ip6_tnl_xmit,
1304 .ndo_do_ioctl = ip6_tnl_ioctl,
1305 .ndo_change_mtu = ip6_tnl_change_mtu,
1306};
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316static void ip6_tnl_dev_setup(struct net_device *dev)
1317{
1318 dev->netdev_ops = &ip6_tnl_netdev_ops;
1319 dev->destructor = free_netdev;
1320
1321 dev->type = ARPHRD_TUNNEL6;
1322 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1323 dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1324 dev->flags |= IFF_NOARP;
1325 dev->addr_len = sizeof(struct in6_addr);
1326 dev->features |= NETIF_F_NETNS_LOCAL;
1327}
1328
1329
1330
1331
1332
1333
1334
1335static inline void
1336ip6_tnl_dev_init_gen(struct net_device *dev)
1337{
1338 struct ip6_tnl *t = netdev_priv(dev);
1339 t->dev = dev;
1340 strcpy(t->parms.name, dev->name);
1341}
1342
1343
1344
1345
1346
1347
1348static void ip6_tnl_dev_init(struct net_device *dev)
1349{
1350 struct ip6_tnl *t = netdev_priv(dev);
1351 ip6_tnl_dev_init_gen(dev);
1352 ip6_tnl_link_config(t);
1353}
1354
1355
1356
1357
1358
1359
1360
1361
1362static void ip6_fb_tnl_dev_init(struct net_device *dev)
1363{
1364 struct ip6_tnl *t = netdev_priv(dev);
1365 struct net *net = dev_net(dev);
1366 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1367
1368 ip6_tnl_dev_init_gen(dev);
1369 t->parms.proto = IPPROTO_IPV6;
1370 dev_hold(dev);
1371 ip6n->tnls_wc[0] = t;
1372}
1373
1374static struct xfrm6_tunnel ip4ip6_handler = {
1375 .handler = ip4ip6_rcv,
1376 .err_handler = ip4ip6_err,
1377 .priority = 1,
1378};
1379
1380static struct xfrm6_tunnel ip6ip6_handler = {
1381 .handler = ip6ip6_rcv,
1382 .err_handler = ip6ip6_err,
1383 .priority = 1,
1384};
1385
1386static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1387{
1388 int h;
1389 struct ip6_tnl *t;
1390
1391 for (h = 0; h < HASH_SIZE; h++) {
1392 while ((t = ip6n->tnls_r_l[h]) != NULL)
1393 unregister_netdevice(t->dev);
1394 }
1395
1396 t = ip6n->tnls_wc[0];
1397 unregister_netdevice(t->dev);
1398}
1399
1400static int ip6_tnl_init_net(struct net *net)
1401{
1402 int err;
1403 struct ip6_tnl_net *ip6n;
1404
1405 err = -ENOMEM;
1406 ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL);
1407 if (ip6n == NULL)
1408 goto err_alloc;
1409
1410 err = net_assign_generic(net, ip6_tnl_net_id, ip6n);
1411 if (err < 0)
1412 goto err_assign;
1413
1414 ip6n->tnls[0] = ip6n->tnls_wc;
1415 ip6n->tnls[1] = ip6n->tnls_r_l;
1416
1417 err = -ENOMEM;
1418 ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1419 ip6_tnl_dev_setup);
1420
1421 if (!ip6n->fb_tnl_dev)
1422 goto err_alloc_dev;
1423 dev_net_set(ip6n->fb_tnl_dev, net);
1424
1425 ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1426
1427 err = register_netdev(ip6n->fb_tnl_dev);
1428 if (err < 0)
1429 goto err_register;
1430 return 0;
1431
1432err_register:
1433 free_netdev(ip6n->fb_tnl_dev);
1434err_alloc_dev:
1435
1436err_assign:
1437 kfree(ip6n);
1438err_alloc:
1439 return err;
1440}
1441
1442static void ip6_tnl_exit_net(struct net *net)
1443{
1444 struct ip6_tnl_net *ip6n;
1445
1446 ip6n = net_generic(net, ip6_tnl_net_id);
1447 rtnl_lock();
1448 ip6_tnl_destroy_tunnels(ip6n);
1449 rtnl_unlock();
1450 kfree(ip6n);
1451}
1452
1453static struct pernet_operations ip6_tnl_net_ops = {
1454 .init = ip6_tnl_init_net,
1455 .exit = ip6_tnl_exit_net,
1456};
1457
1458
1459
1460
1461
1462
1463
1464static int __init ip6_tunnel_init(void)
1465{
1466 int err;
1467
1468 if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
1469 printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
1470 err = -EAGAIN;
1471 goto out;
1472 }
1473
1474 if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
1475 printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
1476 err = -EAGAIN;
1477 goto unreg_ip4ip6;
1478 }
1479
1480 err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops);
1481 if (err < 0)
1482 goto err_pernet;
1483 return 0;
1484err_pernet:
1485 xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1486unreg_ip4ip6:
1487 xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1488out:
1489 return err;
1490}
1491
1492
1493
1494
1495
1496static void __exit ip6_tunnel_cleanup(void)
1497{
1498 if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1499 printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
1500
1501 if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
1502 printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
1503
1504 unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops);
1505}
1506
1507module_init(ip6_tunnel_init);
1508module_exit(ip6_tunnel_cleanup);
1509