1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/capability.h>
90#include <linux/module.h>
91#include <linux/types.h>
92#include <linux/kernel.h>
93#include <linux/slab.h>
94#include <linux/uaccess.h>
95#include <linux/skbuff.h>
96#include <linux/netdevice.h>
97#include <linux/in.h>
98#include <linux/tcp.h>
99#include <linux/udp.h>
100#include <linux/if_arp.h>
101#include <linux/init.h>
102#include <linux/netfilter_ipv4.h>
103#include <linux/if_ether.h>
104
105#include <net/sock.h>
106#include <net/ip.h>
107#include <net/icmp.h>
108#include <net/ip_tunnels.h>
109#include <net/inet_ecn.h>
110#include <net/xfrm.h>
111#include <net/net_namespace.h>
112#include <net/netns/generic.h>
113#include <net/dst_metadata.h>
114
115static bool log_ecn_error = true;
116module_param(log_ecn_error, bool, 0644);
117MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
118
119static unsigned int ipip_net_id __read_mostly;
120
121static int ipip_tunnel_init(struct net_device *dev);
122static struct rtnl_link_ops ipip_link_ops __read_mostly;
123
124static int ipip_err(struct sk_buff *skb, u32 info)
125{
126
127
128
129
130 struct net *net = dev_net(skb->dev);
131 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
132 const struct iphdr *iph = (const struct iphdr *)skb->data;
133 const int type = icmp_hdr(skb)->type;
134 const int code = icmp_hdr(skb)->code;
135 struct ip_tunnel *t;
136 int err = 0;
137
138 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
139 iph->daddr, iph->saddr, 0);
140 if (!t) {
141 err = -ENOENT;
142 goto out;
143 }
144
145 switch (type) {
146 case ICMP_DEST_UNREACH:
147 switch (code) {
148 case ICMP_SR_FAILED:
149
150 goto out;
151 default:
152
153
154
155
156 break;
157 }
158 break;
159
160 case ICMP_TIME_EXCEEDED:
161 if (code != ICMP_EXC_TTL)
162 goto out;
163 break;
164
165 case ICMP_REDIRECT:
166 break;
167
168 default:
169 goto out;
170 }
171
172 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
173 ipv4_update_pmtu(skb, net, info, t->parms.link, iph->protocol);
174 goto out;
175 }
176
177 if (type == ICMP_REDIRECT) {
178 ipv4_redirect(skb, net, t->parms.link, iph->protocol);
179 goto out;
180 }
181
182 if (t->parms.iph.daddr == 0) {
183 err = -ENOENT;
184 goto out;
185 }
186
187 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
188 goto out;
189
190 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
191 t->err_count++;
192 else
193 t->err_count = 1;
194 t->err_time = jiffies;
195
196out:
197 return err;
198}
199
200static const struct tnl_ptk_info ipip_tpi = {
201
202 .proto = htons(ETH_P_IP),
203};
204
205#if IS_ENABLED(CONFIG_MPLS)
206static const struct tnl_ptk_info mplsip_tpi = {
207
208 .proto = htons(ETH_P_MPLS_UC),
209};
210#endif
211
212static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
213{
214 struct net *net = dev_net(skb->dev);
215 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
216 struct metadata_dst *tun_dst = NULL;
217 struct ip_tunnel *tunnel;
218 const struct iphdr *iph;
219
220 iph = ip_hdr(skb);
221 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
222 iph->saddr, iph->daddr, 0);
223 if (tunnel) {
224 const struct tnl_ptk_info *tpi;
225
226 if (tunnel->parms.iph.protocol != ipproto &&
227 tunnel->parms.iph.protocol != 0)
228 goto drop;
229
230 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
231 goto drop;
232#if IS_ENABLED(CONFIG_MPLS)
233 if (ipproto == IPPROTO_MPLS)
234 tpi = &mplsip_tpi;
235 else
236#endif
237 tpi = &ipip_tpi;
238 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
239 goto drop;
240 if (tunnel->collect_md) {
241 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
242 if (!tun_dst)
243 return 0;
244 }
245 skb_reset_mac_header(skb);
246
247 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
248 }
249
250 return -1;
251
252drop:
253 kfree_skb(skb);
254 return 0;
255}
256
257static int ipip_rcv(struct sk_buff *skb)
258{
259 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
260}
261
262#if IS_ENABLED(CONFIG_MPLS)
263static int mplsip_rcv(struct sk_buff *skb)
264{
265 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
266}
267#endif
268
269
270
271
272
273static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
274 struct net_device *dev)
275{
276 struct ip_tunnel *tunnel = netdev_priv(dev);
277 const struct iphdr *tiph = &tunnel->parms.iph;
278 u8 ipproto;
279
280 if (!pskb_inet_may_pull(skb))
281 goto tx_error;
282
283 switch (skb->protocol) {
284 case htons(ETH_P_IP):
285 ipproto = IPPROTO_IPIP;
286 break;
287#if IS_ENABLED(CONFIG_MPLS)
288 case htons(ETH_P_MPLS_UC):
289 ipproto = IPPROTO_MPLS;
290 break;
291#endif
292 default:
293 goto tx_error;
294 }
295
296 if (tiph->protocol != ipproto && tiph->protocol != 0)
297 goto tx_error;
298
299 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
300 goto tx_error;
301
302 skb_set_inner_ipproto(skb, ipproto);
303
304 if (tunnel->collect_md)
305 ip_md_tunnel_xmit(skb, dev, ipproto, 0);
306 else
307 ip_tunnel_xmit(skb, dev, tiph, ipproto);
308 return NETDEV_TX_OK;
309
310tx_error:
311 kfree_skb(skb);
312
313 dev->stats.tx_errors++;
314 return NETDEV_TX_OK;
315}
316
317static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
318{
319 switch (ipproto) {
320 case 0:
321 case IPPROTO_IPIP:
322#if IS_ENABLED(CONFIG_MPLS)
323 case IPPROTO_MPLS:
324#endif
325 return true;
326 }
327
328 return false;
329}
330
331static int
332ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
333{
334 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
335 if (p->iph.version != 4 ||
336 !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) ||
337 p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
338 return -EINVAL;
339 }
340
341 p->i_key = p->o_key = 0;
342 p->i_flags = p->o_flags = 0;
343 return ip_tunnel_ctl(dev, p, cmd);
344}
345
346static const struct net_device_ops ipip_netdev_ops = {
347 .ndo_init = ipip_tunnel_init,
348 .ndo_uninit = ip_tunnel_uninit,
349 .ndo_start_xmit = ipip_tunnel_xmit,
350 .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
351 .ndo_change_mtu = ip_tunnel_change_mtu,
352 .ndo_get_stats64 = dev_get_tstats64,
353 .ndo_get_iflink = ip_tunnel_get_iflink,
354 .ndo_tunnel_ctl = ipip_tunnel_ctl,
355};
356
357#define IPIP_FEATURES (NETIF_F_SG | \
358 NETIF_F_FRAGLIST | \
359 NETIF_F_HIGHDMA | \
360 NETIF_F_GSO_SOFTWARE | \
361 NETIF_F_HW_CSUM)
362
363static void ipip_tunnel_setup(struct net_device *dev)
364{
365 dev->netdev_ops = &ipip_netdev_ops;
366 dev->header_ops = &ip_tunnel_header_ops;
367
368 dev->type = ARPHRD_TUNNEL;
369 dev->flags = IFF_NOARP;
370 dev->addr_len = 4;
371 dev->features |= NETIF_F_LLTX;
372 netif_keep_dst(dev);
373
374 dev->features |= IPIP_FEATURES;
375 dev->hw_features |= IPIP_FEATURES;
376 ip_tunnel_setup(dev, ipip_net_id);
377}
378
379static int ipip_tunnel_init(struct net_device *dev)
380{
381 struct ip_tunnel *tunnel = netdev_priv(dev);
382
383 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
384 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
385
386 tunnel->tun_hlen = 0;
387 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
388 return ip_tunnel_init(dev);
389}
390
391static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
392 struct netlink_ext_ack *extack)
393{
394 u8 proto;
395
396 if (!data || !data[IFLA_IPTUN_PROTO])
397 return 0;
398
399 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
400 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
401 return -EINVAL;
402
403 return 0;
404}
405
406static void ipip_netlink_parms(struct nlattr *data[],
407 struct ip_tunnel_parm *parms, bool *collect_md,
408 __u32 *fwmark)
409{
410 memset(parms, 0, sizeof(*parms));
411
412 parms->iph.version = 4;
413 parms->iph.protocol = IPPROTO_IPIP;
414 parms->iph.ihl = 5;
415 *collect_md = false;
416
417 if (!data)
418 return;
419
420 if (data[IFLA_IPTUN_LINK])
421 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
422
423 if (data[IFLA_IPTUN_LOCAL])
424 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
425
426 if (data[IFLA_IPTUN_REMOTE])
427 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
428
429 if (data[IFLA_IPTUN_TTL]) {
430 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
431 if (parms->iph.ttl)
432 parms->iph.frag_off = htons(IP_DF);
433 }
434
435 if (data[IFLA_IPTUN_TOS])
436 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
437
438 if (data[IFLA_IPTUN_PROTO])
439 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
440
441 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
442 parms->iph.frag_off = htons(IP_DF);
443
444 if (data[IFLA_IPTUN_COLLECT_METADATA])
445 *collect_md = true;
446
447 if (data[IFLA_IPTUN_FWMARK])
448 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
449}
450
451
452static bool ipip_netlink_encap_parms(struct nlattr *data[],
453 struct ip_tunnel_encap *ipencap)
454{
455 bool ret = false;
456
457 memset(ipencap, 0, sizeof(*ipencap));
458
459 if (!data)
460 return ret;
461
462 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
463 ret = true;
464 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
465 }
466
467 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
468 ret = true;
469 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
470 }
471
472 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
473 ret = true;
474 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
475 }
476
477 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
478 ret = true;
479 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
480 }
481
482 return ret;
483}
484
485static int ipip_newlink(struct net *src_net, struct net_device *dev,
486 struct nlattr *tb[], struct nlattr *data[],
487 struct netlink_ext_ack *extack)
488{
489 struct ip_tunnel *t = netdev_priv(dev);
490 struct ip_tunnel_parm p;
491 struct ip_tunnel_encap ipencap;
492 __u32 fwmark = 0;
493
494 if (ipip_netlink_encap_parms(data, &ipencap)) {
495 int err = ip_tunnel_encap_setup(t, &ipencap);
496
497 if (err < 0)
498 return err;
499 }
500
501 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
502 return ip_tunnel_newlink(dev, tb, &p, fwmark);
503}
504
505static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
506 struct nlattr *data[],
507 struct netlink_ext_ack *extack)
508{
509 struct ip_tunnel *t = netdev_priv(dev);
510 struct ip_tunnel_parm p;
511 struct ip_tunnel_encap ipencap;
512 bool collect_md;
513 __u32 fwmark = t->fwmark;
514
515 if (ipip_netlink_encap_parms(data, &ipencap)) {
516 int err = ip_tunnel_encap_setup(t, &ipencap);
517
518 if (err < 0)
519 return err;
520 }
521
522 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
523 if (collect_md)
524 return -EINVAL;
525
526 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
527 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
528 return -EINVAL;
529
530 return ip_tunnel_changelink(dev, tb, &p, fwmark);
531}
532
533static size_t ipip_get_size(const struct net_device *dev)
534{
535 return
536
537 nla_total_size(4) +
538
539 nla_total_size(4) +
540
541 nla_total_size(4) +
542
543 nla_total_size(1) +
544
545 nla_total_size(1) +
546
547 nla_total_size(1) +
548
549 nla_total_size(1) +
550
551 nla_total_size(2) +
552
553 nla_total_size(2) +
554
555 nla_total_size(2) +
556
557 nla_total_size(2) +
558
559 nla_total_size(0) +
560
561 nla_total_size(4) +
562 0;
563}
564
565static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
566{
567 struct ip_tunnel *tunnel = netdev_priv(dev);
568 struct ip_tunnel_parm *parm = &tunnel->parms;
569
570 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
571 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
572 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
573 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
574 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
575 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
576 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
577 !!(parm->iph.frag_off & htons(IP_DF))) ||
578 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
579 goto nla_put_failure;
580
581 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
582 tunnel->encap.type) ||
583 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
584 tunnel->encap.sport) ||
585 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
586 tunnel->encap.dport) ||
587 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
588 tunnel->encap.flags))
589 goto nla_put_failure;
590
591 if (tunnel->collect_md)
592 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
593 goto nla_put_failure;
594 return 0;
595
596nla_put_failure:
597 return -EMSGSIZE;
598}
599
600static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
601 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
602 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
603 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
604 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
605 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
606 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
607 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
608 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
609 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
610 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
611 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
612 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
613 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
614};
615
616static struct rtnl_link_ops ipip_link_ops __read_mostly = {
617 .kind = "ipip",
618 .maxtype = IFLA_IPTUN_MAX,
619 .policy = ipip_policy,
620 .priv_size = sizeof(struct ip_tunnel),
621 .setup = ipip_tunnel_setup,
622 .validate = ipip_tunnel_validate,
623 .newlink = ipip_newlink,
624 .changelink = ipip_changelink,
625 .dellink = ip_tunnel_dellink,
626 .get_size = ipip_get_size,
627 .fill_info = ipip_fill_info,
628 .get_link_net = ip_tunnel_get_link_net,
629};
630
631static struct xfrm_tunnel ipip_handler __read_mostly = {
632 .handler = ipip_rcv,
633 .err_handler = ipip_err,
634 .priority = 1,
635};
636
637#if IS_ENABLED(CONFIG_MPLS)
638static struct xfrm_tunnel mplsip_handler __read_mostly = {
639 .handler = mplsip_rcv,
640 .err_handler = ipip_err,
641 .priority = 1,
642};
643#endif
644
645static int __net_init ipip_init_net(struct net *net)
646{
647 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
648}
649
650static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
651{
652 ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
653}
654
655static struct pernet_operations ipip_net_ops = {
656 .init = ipip_init_net,
657 .exit_batch = ipip_exit_batch_net,
658 .id = &ipip_net_id,
659 .size = sizeof(struct ip_tunnel_net),
660};
661
662static int __init ipip_init(void)
663{
664 int err;
665
666 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
667
668 err = register_pernet_device(&ipip_net_ops);
669 if (err < 0)
670 return err;
671 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
672 if (err < 0) {
673 pr_info("%s: can't register tunnel\n", __func__);
674 goto xfrm_tunnel_ipip_failed;
675 }
676#if IS_ENABLED(CONFIG_MPLS)
677 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
678 if (err < 0) {
679 pr_info("%s: can't register tunnel\n", __func__);
680 goto xfrm_tunnel_mplsip_failed;
681 }
682#endif
683 err = rtnl_link_register(&ipip_link_ops);
684 if (err < 0)
685 goto rtnl_link_failed;
686
687out:
688 return err;
689
690rtnl_link_failed:
691#if IS_ENABLED(CONFIG_MPLS)
692 xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
693xfrm_tunnel_mplsip_failed:
694
695#endif
696 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
697xfrm_tunnel_ipip_failed:
698 unregister_pernet_device(&ipip_net_ops);
699 goto out;
700}
701
702static void __exit ipip_fini(void)
703{
704 rtnl_link_unregister(&ipip_link_ops);
705 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
706 pr_info("%s: can't deregister tunnel\n", __func__);
707#if IS_ENABLED(CONFIG_MPLS)
708 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
709 pr_info("%s: can't deregister tunnel\n", __func__);
710#endif
711 unregister_pernet_device(&ipip_net_ops);
712}
713
714module_init(ipip_init);
715module_exit(ipip_fini);
716MODULE_LICENSE("GPL");
717MODULE_ALIAS_RTNL_LINK("ipip");
718MODULE_ALIAS_NETDEV("tunl0");
719