1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94#include <linux/capability.h>
95#include <linux/module.h>
96#include <linux/types.h>
97#include <linux/kernel.h>
98#include <linux/slab.h>
99#include <linux/uaccess.h>
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
106#include <linux/init.h>
107#include <linux/netfilter_ipv4.h>
108#include <linux/if_ether.h>
109
110#include <net/sock.h>
111#include <net/ip.h>
112#include <net/icmp.h>
113#include <net/ip_tunnels.h>
114#include <net/inet_ecn.h>
115#include <net/xfrm.h>
116#include <net/net_namespace.h>
117#include <net/netns/generic.h>
118#include <net/dst_metadata.h>
119
120static bool log_ecn_error = true;
121module_param(log_ecn_error, bool, 0644);
122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
123
124static unsigned int ipip_net_id __read_mostly;
125
126static int ipip_tunnel_init(struct net_device *dev);
127static struct rtnl_link_ops ipip_link_ops __read_mostly;
128
129static int ipip_err(struct sk_buff *skb, u32 info)
130{
131
132
133
134
135 struct net *net = dev_net(skb->dev);
136 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
137 const struct iphdr *iph = (const struct iphdr *)skb->data;
138 const int type = icmp_hdr(skb)->type;
139 const int code = icmp_hdr(skb)->code;
140 struct ip_tunnel *t;
141 int err = 0;
142
143 switch (type) {
144 case ICMP_DEST_UNREACH:
145 switch (code) {
146 case ICMP_SR_FAILED:
147
148 goto out;
149 default:
150
151
152
153
154 break;
155 }
156 break;
157
158 case ICMP_TIME_EXCEEDED:
159 if (code != ICMP_EXC_TTL)
160 goto out;
161 break;
162
163 case ICMP_REDIRECT:
164 break;
165
166 default:
167 goto out;
168 }
169
170 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
171 iph->daddr, iph->saddr, 0);
172 if (!t) {
173 err = -ENOENT;
174 goto out;
175 }
176
177 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
178 ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
179 iph->protocol, 0);
180 goto out;
181 }
182
183 if (type == ICMP_REDIRECT) {
184 ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
185 goto out;
186 }
187
188 if (t->parms.iph.daddr == 0) {
189 err = -ENOENT;
190 goto out;
191 }
192
193 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
194 goto out;
195
196 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
197 t->err_count++;
198 else
199 t->err_count = 1;
200 t->err_time = jiffies;
201
202out:
203 return err;
204}
205
206static const struct tnl_ptk_info ipip_tpi = {
207
208 .proto = htons(ETH_P_IP),
209};
210
211#if IS_ENABLED(CONFIG_MPLS)
212static const struct tnl_ptk_info mplsip_tpi = {
213
214 .proto = htons(ETH_P_MPLS_UC),
215};
216#endif
217
218static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
219{
220 struct net *net = dev_net(skb->dev);
221 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
222 struct metadata_dst *tun_dst = NULL;
223 struct ip_tunnel *tunnel;
224 const struct iphdr *iph;
225
226 iph = ip_hdr(skb);
227 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
228 iph->saddr, iph->daddr, 0);
229 if (tunnel) {
230 const struct tnl_ptk_info *tpi;
231
232 if (tunnel->parms.iph.protocol != ipproto &&
233 tunnel->parms.iph.protocol != 0)
234 goto drop;
235
236 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
237 goto drop;
238#if IS_ENABLED(CONFIG_MPLS)
239 if (ipproto == IPPROTO_MPLS)
240 tpi = &mplsip_tpi;
241 else
242#endif
243 tpi = &ipip_tpi;
244 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
245 goto drop;
246 if (tunnel->collect_md) {
247 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
248 if (!tun_dst)
249 return 0;
250 }
251 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
252 }
253
254 return -1;
255
256drop:
257 kfree_skb(skb);
258 return 0;
259}
260
261static int ipip_rcv(struct sk_buff *skb)
262{
263 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
264}
265
266#if IS_ENABLED(CONFIG_MPLS)
267static int mplsip_rcv(struct sk_buff *skb)
268{
269 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
270}
271#endif
272
273
274
275
276
277static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
278 struct net_device *dev)
279{
280 struct ip_tunnel *tunnel = netdev_priv(dev);
281 const struct iphdr *tiph = &tunnel->parms.iph;
282 u8 ipproto;
283
284 switch (skb->protocol) {
285 case htons(ETH_P_IP):
286 ipproto = IPPROTO_IPIP;
287 break;
288#if IS_ENABLED(CONFIG_MPLS)
289 case htons(ETH_P_MPLS_UC):
290 ipproto = IPPROTO_MPLS;
291 break;
292#endif
293 default:
294 goto tx_error;
295 }
296
297 if (tiph->protocol != ipproto && tiph->protocol != 0)
298 goto tx_error;
299
300 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
301 goto tx_error;
302
303 skb_set_inner_ipproto(skb, ipproto);
304
305 if (tunnel->collect_md)
306 ip_md_tunnel_xmit(skb, dev, ipproto);
307 else
308 ip_tunnel_xmit(skb, dev, tiph, ipproto);
309 return NETDEV_TX_OK;
310
311tx_error:
312 kfree_skb(skb);
313
314 dev->stats.tx_errors++;
315 return NETDEV_TX_OK;
316}
317
318static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
319{
320 switch (ipproto) {
321 case 0:
322 case IPPROTO_IPIP:
323#if IS_ENABLED(CONFIG_MPLS)
324 case IPPROTO_MPLS:
325#endif
326 return true;
327 }
328
329 return false;
330}
331
332static int
333ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
334{
335 int err = 0;
336 struct ip_tunnel_parm p;
337
338 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
339 return -EFAULT;
340
341 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
342 if (p.iph.version != 4 ||
343 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
344 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
345 return -EINVAL;
346 }
347
348 p.i_key = p.o_key = 0;
349 p.i_flags = p.o_flags = 0;
350 err = ip_tunnel_ioctl(dev, &p, cmd);
351 if (err)
352 return err;
353
354 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
355 return -EFAULT;
356
357 return 0;
358}
359
360static const struct net_device_ops ipip_netdev_ops = {
361 .ndo_init = ipip_tunnel_init,
362 .ndo_uninit = ip_tunnel_uninit,
363 .ndo_start_xmit = ipip_tunnel_xmit,
364 .ndo_do_ioctl = ipip_tunnel_ioctl,
365 .ndo_change_mtu = ip_tunnel_change_mtu,
366 .ndo_get_stats64 = ip_tunnel_get_stats64,
367 .ndo_get_iflink = ip_tunnel_get_iflink,
368};
369
370#define IPIP_FEATURES (NETIF_F_SG | \
371 NETIF_F_FRAGLIST | \
372 NETIF_F_HIGHDMA | \
373 NETIF_F_GSO_SOFTWARE | \
374 NETIF_F_HW_CSUM)
375
376static void ipip_tunnel_setup(struct net_device *dev)
377{
378 dev->netdev_ops = &ipip_netdev_ops;
379
380 dev->type = ARPHRD_TUNNEL;
381 dev->flags = IFF_NOARP;
382 dev->addr_len = 4;
383 dev->features |= NETIF_F_LLTX;
384 netif_keep_dst(dev);
385
386 dev->features |= IPIP_FEATURES;
387 dev->hw_features |= IPIP_FEATURES;
388 ip_tunnel_setup(dev, ipip_net_id);
389}
390
391static int ipip_tunnel_init(struct net_device *dev)
392{
393 struct ip_tunnel *tunnel = netdev_priv(dev);
394
395 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
396 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
397
398 tunnel->tun_hlen = 0;
399 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
400 return ip_tunnel_init(dev);
401}
402
403static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
404 struct netlink_ext_ack *extack)
405{
406 u8 proto;
407
408 if (!data || !data[IFLA_IPTUN_PROTO])
409 return 0;
410
411 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
412 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
413 return -EINVAL;
414
415 return 0;
416}
417
418static void ipip_netlink_parms(struct nlattr *data[],
419 struct ip_tunnel_parm *parms, bool *collect_md,
420 __u32 *fwmark)
421{
422 memset(parms, 0, sizeof(*parms));
423
424 parms->iph.version = 4;
425 parms->iph.protocol = IPPROTO_IPIP;
426 parms->iph.ihl = 5;
427 *collect_md = false;
428
429 if (!data)
430 return;
431
432 if (data[IFLA_IPTUN_LINK])
433 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
434
435 if (data[IFLA_IPTUN_LOCAL])
436 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
437
438 if (data[IFLA_IPTUN_REMOTE])
439 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
440
441 if (data[IFLA_IPTUN_TTL]) {
442 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
443 if (parms->iph.ttl)
444 parms->iph.frag_off = htons(IP_DF);
445 }
446
447 if (data[IFLA_IPTUN_TOS])
448 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
449
450 if (data[IFLA_IPTUN_PROTO])
451 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
452
453 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
454 parms->iph.frag_off = htons(IP_DF);
455
456 if (data[IFLA_IPTUN_COLLECT_METADATA])
457 *collect_md = true;
458
459 if (data[IFLA_IPTUN_FWMARK])
460 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
461}
462
463
464static bool ipip_netlink_encap_parms(struct nlattr *data[],
465 struct ip_tunnel_encap *ipencap)
466{
467 bool ret = false;
468
469 memset(ipencap, 0, sizeof(*ipencap));
470
471 if (!data)
472 return ret;
473
474 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
475 ret = true;
476 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
477 }
478
479 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
480 ret = true;
481 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
482 }
483
484 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
485 ret = true;
486 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
487 }
488
489 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
490 ret = true;
491 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
492 }
493
494 return ret;
495}
496
497static int ipip_newlink(struct net *src_net, struct net_device *dev,
498 struct nlattr *tb[], struct nlattr *data[],
499 struct netlink_ext_ack *extack)
500{
501 struct ip_tunnel *t = netdev_priv(dev);
502 struct ip_tunnel_parm p;
503 struct ip_tunnel_encap ipencap;
504 __u32 fwmark = 0;
505
506 if (ipip_netlink_encap_parms(data, &ipencap)) {
507 int err = ip_tunnel_encap_setup(t, &ipencap);
508
509 if (err < 0)
510 return err;
511 }
512
513 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
514 return ip_tunnel_newlink(dev, tb, &p, fwmark);
515}
516
517static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
518 struct nlattr *data[],
519 struct netlink_ext_ack *extack)
520{
521 struct ip_tunnel *t = netdev_priv(dev);
522 struct ip_tunnel_parm p;
523 struct ip_tunnel_encap ipencap;
524 bool collect_md;
525 __u32 fwmark = t->fwmark;
526
527 if (ipip_netlink_encap_parms(data, &ipencap)) {
528 int err = ip_tunnel_encap_setup(t, &ipencap);
529
530 if (err < 0)
531 return err;
532 }
533
534 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
535 if (collect_md)
536 return -EINVAL;
537
538 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
539 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
540 return -EINVAL;
541
542 return ip_tunnel_changelink(dev, tb, &p, fwmark);
543}
544
545static size_t ipip_get_size(const struct net_device *dev)
546{
547 return
548
549 nla_total_size(4) +
550
551 nla_total_size(4) +
552
553 nla_total_size(4) +
554
555 nla_total_size(1) +
556
557 nla_total_size(1) +
558
559 nla_total_size(1) +
560
561 nla_total_size(1) +
562
563 nla_total_size(2) +
564
565 nla_total_size(2) +
566
567 nla_total_size(2) +
568
569 nla_total_size(2) +
570
571 nla_total_size(0) +
572
573 nla_total_size(4) +
574 0;
575}
576
577static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
578{
579 struct ip_tunnel *tunnel = netdev_priv(dev);
580 struct ip_tunnel_parm *parm = &tunnel->parms;
581
582 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
583 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
584 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
585 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
586 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
587 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
588 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
589 !!(parm->iph.frag_off & htons(IP_DF))) ||
590 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
591 goto nla_put_failure;
592
593 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
594 tunnel->encap.type) ||
595 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
596 tunnel->encap.sport) ||
597 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
598 tunnel->encap.dport) ||
599 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
600 tunnel->encap.flags))
601 goto nla_put_failure;
602
603 if (tunnel->collect_md)
604 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
605 goto nla_put_failure;
606 return 0;
607
608nla_put_failure:
609 return -EMSGSIZE;
610}
611
612static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
613 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
614 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
615 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
616 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
617 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
618 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
619 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
620 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
621 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
622 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
623 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
624 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
625 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
626};
627
628static struct rtnl_link_ops ipip_link_ops __read_mostly = {
629 .kind = "ipip",
630 .maxtype = IFLA_IPTUN_MAX,
631 .policy = ipip_policy,
632 .priv_size = sizeof(struct ip_tunnel),
633 .setup = ipip_tunnel_setup,
634 .validate = ipip_tunnel_validate,
635 .newlink = ipip_newlink,
636 .changelink = ipip_changelink,
637 .dellink = ip_tunnel_dellink,
638 .get_size = ipip_get_size,
639 .fill_info = ipip_fill_info,
640 .get_link_net = ip_tunnel_get_link_net,
641};
642
643static struct xfrm_tunnel ipip_handler __read_mostly = {
644 .handler = ipip_rcv,
645 .err_handler = ipip_err,
646 .priority = 1,
647};
648
649#if IS_ENABLED(CONFIG_MPLS)
650static struct xfrm_tunnel mplsip_handler __read_mostly = {
651 .handler = mplsip_rcv,
652 .err_handler = ipip_err,
653 .priority = 1,
654};
655#endif
656
657static int __net_init ipip_init_net(struct net *net)
658{
659 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
660}
661
662static void __net_exit ipip_exit_net(struct net *net)
663{
664 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
665 ip_tunnel_delete_net(itn, &ipip_link_ops);
666}
667
668static struct pernet_operations ipip_net_ops = {
669 .init = ipip_init_net,
670 .exit = ipip_exit_net,
671 .id = &ipip_net_id,
672 .size = sizeof(struct ip_tunnel_net),
673};
674
675static int __init ipip_init(void)
676{
677 int err;
678
679 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
680
681 err = register_pernet_device(&ipip_net_ops);
682 if (err < 0)
683 return err;
684 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
685 if (err < 0) {
686 pr_info("%s: can't register tunnel\n", __func__);
687 goto xfrm_tunnel_ipip_failed;
688 }
689#if IS_ENABLED(CONFIG_MPLS)
690 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
691 if (err < 0) {
692 pr_info("%s: can't register tunnel\n", __func__);
693 goto xfrm_tunnel_mplsip_failed;
694 }
695#endif
696 err = rtnl_link_register(&ipip_link_ops);
697 if (err < 0)
698 goto rtnl_link_failed;
699
700out:
701 return err;
702
703rtnl_link_failed:
704#if IS_ENABLED(CONFIG_MPLS)
705 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
706xfrm_tunnel_mplsip_failed:
707
708#endif
709 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
710xfrm_tunnel_ipip_failed:
711 unregister_pernet_device(&ipip_net_ops);
712 goto out;
713}
714
715static void __exit ipip_fini(void)
716{
717 rtnl_link_unregister(&ipip_link_ops);
718 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
719 pr_info("%s: can't deregister tunnel\n", __func__);
720#if IS_ENABLED(CONFIG_MPLS)
721 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
722 pr_info("%s: can't deregister tunnel\n", __func__);
723#endif
724 unregister_pernet_device(&ipip_net_ops);
725}
726
727module_init(ipip_init);
728module_exit(ipip_fini);
729MODULE_LICENSE("GPL");
730MODULE_ALIAS_RTNL_LINK("ipip");
731MODULE_ALIAS_NETDEV("tunl0");
732