1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94#include <linux/capability.h>
95#include <linux/module.h>
96#include <linux/types.h>
97#include <linux/kernel.h>
98#include <linux/slab.h>
99#include <linux/uaccess.h>
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
106#include <linux/init.h>
107#include <linux/netfilter_ipv4.h>
108#include <linux/if_ether.h>
109
110#include <net/sock.h>
111#include <net/ip.h>
112#include <net/icmp.h>
113#include <net/ip_tunnels.h>
114#include <net/inet_ecn.h>
115#include <net/xfrm.h>
116#include <net/net_namespace.h>
117#include <net/netns/generic.h>
118#include <net/dst_metadata.h>
119
120static bool log_ecn_error = true;
121module_param(log_ecn_error, bool, 0644);
122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
123
124static unsigned int ipip_net_id __read_mostly;
125
126static int ipip_tunnel_init(struct net_device *dev);
127static struct rtnl_link_ops ipip_link_ops __read_mostly;
128
129static int ipip_err(struct sk_buff *skb, u32 info)
130{
131
132
133
134
135
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
138 const struct iphdr *iph = (const struct iphdr *)skb->data;
139 struct ip_tunnel *t;
140 int err;
141 const int type = icmp_hdr(skb)->type;
142 const int code = icmp_hdr(skb)->code;
143
144 err = -ENOENT;
145 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146 iph->daddr, iph->saddr, 0);
147 if (!t)
148 goto out;
149
150 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
151 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
152 t->parms.link, 0, iph->protocol, 0);
153 err = 0;
154 goto out;
155 }
156
157 if (type == ICMP_REDIRECT) {
158 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
159 iph->protocol, 0);
160 err = 0;
161 goto out;
162 }
163
164 if (t->parms.iph.daddr == 0)
165 goto out;
166
167 err = 0;
168 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
169 goto out;
170
171 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
172 t->err_count++;
173 else
174 t->err_count = 1;
175 t->err_time = jiffies;
176
177out:
178 return err;
179}
180
181static const struct tnl_ptk_info ipip_tpi = {
182
183 .proto = htons(ETH_P_IP),
184};
185
186#if IS_ENABLED(CONFIG_MPLS)
187static const struct tnl_ptk_info mplsip_tpi = {
188
189 .proto = htons(ETH_P_MPLS_UC),
190};
191#endif
192
193static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
194{
195 struct net *net = dev_net(skb->dev);
196 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
197 struct metadata_dst *tun_dst = NULL;
198 struct ip_tunnel *tunnel;
199 const struct iphdr *iph;
200
201 iph = ip_hdr(skb);
202 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
203 iph->saddr, iph->daddr, 0);
204 if (tunnel) {
205 const struct tnl_ptk_info *tpi;
206
207 if (tunnel->parms.iph.protocol != ipproto &&
208 tunnel->parms.iph.protocol != 0)
209 goto drop;
210
211 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
212 goto drop;
213#if IS_ENABLED(CONFIG_MPLS)
214 if (ipproto == IPPROTO_MPLS)
215 tpi = &mplsip_tpi;
216 else
217#endif
218 tpi = &ipip_tpi;
219 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
220 goto drop;
221 if (tunnel->collect_md) {
222 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
223 if (!tun_dst)
224 return 0;
225 }
226 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
227 }
228
229 return -1;
230
231drop:
232 kfree_skb(skb);
233 return 0;
234}
235
236static int ipip_rcv(struct sk_buff *skb)
237{
238 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
239}
240
241#if IS_ENABLED(CONFIG_MPLS)
242static int mplsip_rcv(struct sk_buff *skb)
243{
244 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
245}
246#endif
247
248
249
250
251
252static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
253 struct net_device *dev)
254{
255 struct ip_tunnel *tunnel = netdev_priv(dev);
256 const struct iphdr *tiph = &tunnel->parms.iph;
257 u8 ipproto;
258
259 switch (skb->protocol) {
260 case htons(ETH_P_IP):
261 ipproto = IPPROTO_IPIP;
262 break;
263#if IS_ENABLED(CONFIG_MPLS)
264 case htons(ETH_P_MPLS_UC):
265 ipproto = IPPROTO_MPLS;
266 break;
267#endif
268 default:
269 goto tx_error;
270 }
271
272 if (tiph->protocol != ipproto && tiph->protocol != 0)
273 goto tx_error;
274
275 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
276 goto tx_error;
277
278 skb_set_inner_ipproto(skb, ipproto);
279
280 if (tunnel->collect_md)
281 ip_md_tunnel_xmit(skb, dev, ipproto);
282 else
283 ip_tunnel_xmit(skb, dev, tiph, ipproto);
284 return NETDEV_TX_OK;
285
286tx_error:
287 kfree_skb(skb);
288
289 dev->stats.tx_errors++;
290 return NETDEV_TX_OK;
291}
292
293static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
294{
295 switch (ipproto) {
296 case 0:
297 case IPPROTO_IPIP:
298#if IS_ENABLED(CONFIG_MPLS)
299 case IPPROTO_MPLS:
300#endif
301 return true;
302 }
303
304 return false;
305}
306
307static int
308ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
309{
310 int err = 0;
311 struct ip_tunnel_parm p;
312
313 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
314 return -EFAULT;
315
316 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
317 if (p.iph.version != 4 ||
318 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
319 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
320 return -EINVAL;
321 }
322
323 p.i_key = p.o_key = 0;
324 p.i_flags = p.o_flags = 0;
325 err = ip_tunnel_ioctl(dev, &p, cmd);
326 if (err)
327 return err;
328
329 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
330 return -EFAULT;
331
332 return 0;
333}
334
335static const struct net_device_ops ipip_netdev_ops = {
336 .ndo_init = ipip_tunnel_init,
337 .ndo_uninit = ip_tunnel_uninit,
338 .ndo_start_xmit = ipip_tunnel_xmit,
339 .ndo_do_ioctl = ipip_tunnel_ioctl,
340 .ndo_change_mtu = ip_tunnel_change_mtu,
341 .ndo_get_stats64 = ip_tunnel_get_stats64,
342 .ndo_get_iflink = ip_tunnel_get_iflink,
343};
344
345#define IPIP_FEATURES (NETIF_F_SG | \
346 NETIF_F_FRAGLIST | \
347 NETIF_F_HIGHDMA | \
348 NETIF_F_GSO_SOFTWARE | \
349 NETIF_F_HW_CSUM)
350
351static void ipip_tunnel_setup(struct net_device *dev)
352{
353 dev->netdev_ops = &ipip_netdev_ops;
354
355 dev->type = ARPHRD_TUNNEL;
356 dev->flags = IFF_NOARP;
357 dev->addr_len = 4;
358 dev->features |= NETIF_F_LLTX;
359 netif_keep_dst(dev);
360
361 dev->features |= IPIP_FEATURES;
362 dev->hw_features |= IPIP_FEATURES;
363 ip_tunnel_setup(dev, ipip_net_id);
364}
365
366static int ipip_tunnel_init(struct net_device *dev)
367{
368 struct ip_tunnel *tunnel = netdev_priv(dev);
369
370 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
371 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
372
373 tunnel->tun_hlen = 0;
374 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
375 return ip_tunnel_init(dev);
376}
377
378static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
379{
380 u8 proto;
381
382 if (!data || !data[IFLA_IPTUN_PROTO])
383 return 0;
384
385 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
386 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
387 return -EINVAL;
388
389 return 0;
390}
391
392static void ipip_netlink_parms(struct nlattr *data[],
393 struct ip_tunnel_parm *parms, bool *collect_md,
394 __u32 *fwmark)
395{
396 memset(parms, 0, sizeof(*parms));
397
398 parms->iph.version = 4;
399 parms->iph.protocol = IPPROTO_IPIP;
400 parms->iph.ihl = 5;
401 *collect_md = false;
402
403 if (!data)
404 return;
405
406 if (data[IFLA_IPTUN_LINK])
407 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
408
409 if (data[IFLA_IPTUN_LOCAL])
410 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
411
412 if (data[IFLA_IPTUN_REMOTE])
413 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
414
415 if (data[IFLA_IPTUN_TTL]) {
416 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
417 if (parms->iph.ttl)
418 parms->iph.frag_off = htons(IP_DF);
419 }
420
421 if (data[IFLA_IPTUN_TOS])
422 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
423
424 if (data[IFLA_IPTUN_PROTO])
425 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
426
427 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
428 parms->iph.frag_off = htons(IP_DF);
429
430 if (data[IFLA_IPTUN_COLLECT_METADATA])
431 *collect_md = true;
432
433 if (data[IFLA_IPTUN_FWMARK])
434 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
435}
436
437
438static bool ipip_netlink_encap_parms(struct nlattr *data[],
439 struct ip_tunnel_encap *ipencap)
440{
441 bool ret = false;
442
443 memset(ipencap, 0, sizeof(*ipencap));
444
445 if (!data)
446 return ret;
447
448 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
449 ret = true;
450 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
451 }
452
453 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
454 ret = true;
455 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
456 }
457
458 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
459 ret = true;
460 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
461 }
462
463 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
464 ret = true;
465 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
466 }
467
468 return ret;
469}
470
471static int ipip_newlink(struct net *src_net, struct net_device *dev,
472 struct nlattr *tb[], struct nlattr *data[])
473{
474 struct ip_tunnel *t = netdev_priv(dev);
475 struct ip_tunnel_parm p;
476 struct ip_tunnel_encap ipencap;
477 __u32 fwmark = 0;
478
479 if (ipip_netlink_encap_parms(data, &ipencap)) {
480 int err = ip_tunnel_encap_setup(t, &ipencap);
481
482 if (err < 0)
483 return err;
484 }
485
486 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
487 return ip_tunnel_newlink(dev, tb, &p, fwmark);
488}
489
490static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
491 struct nlattr *data[])
492{
493 struct ip_tunnel *t = netdev_priv(dev);
494 struct ip_tunnel_parm p;
495 struct ip_tunnel_encap ipencap;
496 bool collect_md;
497 __u32 fwmark = t->fwmark;
498
499 if (ipip_netlink_encap_parms(data, &ipencap)) {
500 int err = ip_tunnel_encap_setup(t, &ipencap);
501
502 if (err < 0)
503 return err;
504 }
505
506 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
507 if (collect_md)
508 return -EINVAL;
509
510 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
511 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
512 return -EINVAL;
513
514 return ip_tunnel_changelink(dev, tb, &p, fwmark);
515}
516
517static size_t ipip_get_size(const struct net_device *dev)
518{
519 return
520
521 nla_total_size(4) +
522
523 nla_total_size(4) +
524
525 nla_total_size(4) +
526
527 nla_total_size(1) +
528
529 nla_total_size(1) +
530
531 nla_total_size(1) +
532
533 nla_total_size(1) +
534
535 nla_total_size(2) +
536
537 nla_total_size(2) +
538
539 nla_total_size(2) +
540
541 nla_total_size(2) +
542
543 nla_total_size(0) +
544
545 nla_total_size(4) +
546 0;
547}
548
549static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
550{
551 struct ip_tunnel *tunnel = netdev_priv(dev);
552 struct ip_tunnel_parm *parm = &tunnel->parms;
553
554 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
555 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
556 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
557 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
558 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
559 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
560 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
561 !!(parm->iph.frag_off & htons(IP_DF))) ||
562 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
563 goto nla_put_failure;
564
565 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
566 tunnel->encap.type) ||
567 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
568 tunnel->encap.sport) ||
569 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
570 tunnel->encap.dport) ||
571 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
572 tunnel->encap.flags))
573 goto nla_put_failure;
574
575 if (tunnel->collect_md)
576 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
577 goto nla_put_failure;
578 return 0;
579
580nla_put_failure:
581 return -EMSGSIZE;
582}
583
584static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
585 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
586 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
587 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
588 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
589 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
590 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
591 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
592 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
593 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
594 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
595 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
596 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
597 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
598};
599
600static struct rtnl_link_ops ipip_link_ops __read_mostly = {
601 .kind = "ipip",
602 .maxtype = IFLA_IPTUN_MAX,
603 .policy = ipip_policy,
604 .priv_size = sizeof(struct ip_tunnel),
605 .setup = ipip_tunnel_setup,
606 .validate = ipip_tunnel_validate,
607 .newlink = ipip_newlink,
608 .changelink = ipip_changelink,
609 .dellink = ip_tunnel_dellink,
610 .get_size = ipip_get_size,
611 .fill_info = ipip_fill_info,
612 .get_link_net = ip_tunnel_get_link_net,
613};
614
615static struct xfrm_tunnel ipip_handler __read_mostly = {
616 .handler = ipip_rcv,
617 .err_handler = ipip_err,
618 .priority = 1,
619};
620
621#if IS_ENABLED(CONFIG_MPLS)
622static struct xfrm_tunnel mplsip_handler __read_mostly = {
623 .handler = mplsip_rcv,
624 .err_handler = ipip_err,
625 .priority = 1,
626};
627#endif
628
629static int __net_init ipip_init_net(struct net *net)
630{
631 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
632}
633
634static void __net_exit ipip_exit_net(struct net *net)
635{
636 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
637 ip_tunnel_delete_net(itn, &ipip_link_ops);
638}
639
640static struct pernet_operations ipip_net_ops = {
641 .init = ipip_init_net,
642 .exit = ipip_exit_net,
643 .id = &ipip_net_id,
644 .size = sizeof(struct ip_tunnel_net),
645};
646
647static int __init ipip_init(void)
648{
649 int err;
650
651 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
652
653 err = register_pernet_device(&ipip_net_ops);
654 if (err < 0)
655 return err;
656 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
657 if (err < 0) {
658 pr_info("%s: can't register tunnel\n", __func__);
659 goto xfrm_tunnel_ipip_failed;
660 }
661#if IS_ENABLED(CONFIG_MPLS)
662 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
663 if (err < 0) {
664 pr_info("%s: can't register tunnel\n", __func__);
665 goto xfrm_tunnel_mplsip_failed;
666 }
667#endif
668 err = rtnl_link_register(&ipip_link_ops);
669 if (err < 0)
670 goto rtnl_link_failed;
671
672out:
673 return err;
674
675rtnl_link_failed:
676#if IS_ENABLED(CONFIG_MPLS)
677 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
678xfrm_tunnel_mplsip_failed:
679
680#endif
681 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
682xfrm_tunnel_ipip_failed:
683 unregister_pernet_device(&ipip_net_ops);
684 goto out;
685}
686
687static void __exit ipip_fini(void)
688{
689 rtnl_link_unregister(&ipip_link_ops);
690 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
691 pr_info("%s: can't deregister tunnel\n", __func__);
692#if IS_ENABLED(CONFIG_MPLS)
693 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
694 pr_info("%s: can't deregister tunnel\n", __func__);
695#endif
696 unregister_pernet_device(&ipip_net_ops);
697}
698
699module_init(ipip_init);
700module_exit(ipip_fini);
701MODULE_LICENSE("GPL");
702MODULE_ALIAS_RTNL_LINK("ipip");
703MODULE_ALIAS_NETDEV("tunl0");
704