1
2
3
4
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/capability.h>
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/in.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
20#include <linux/init.h>
21#include <linux/in6.h>
22#include <linux/inetdevice.h>
23#include <linux/igmp.h>
24#include <linux/netfilter_ipv4.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <linux/rculist.h>
29#include <linux/err.h>
30
31#include <net/sock.h>
32#include <net/ip.h>
33#include <net/icmp.h>
34#include <net/protocol.h>
35#include <net/ip_tunnels.h>
36#include <net/arp.h>
37#include <net/checksum.h>
38#include <net/dsfield.h>
39#include <net/inet_ecn.h>
40#include <net/xfrm.h>
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
43#include <net/rtnetlink.h>
44#include <net/udp.h>
45#include <net/dst_metadata.h>
46
47#if IS_ENABLED(CONFIG_IPV6)
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54{
55 return hash_32((__force u32)key ^ (__force u32)remote,
56 IP_TNL_HASH_BITS);
57}
58
59static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 __be16 flags, __be32 key)
61{
62 if (p->i_flags & TUNNEL_KEY) {
63 if (flags & TUNNEL_KEY)
64 return key == p->i_key;
65 else
66
67 return false;
68 } else
69 return !(flags & TUNNEL_KEY);
70}
71
72
73
74
75
76
77
78
79
80
81
82
83struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 int link, __be16 flags,
85 __be32 remote, __be32 local,
86 __be32 key)
87{
88 struct ip_tunnel *t, *cand = NULL;
89 struct hlist_head *head;
90 struct net_device *ndev;
91 unsigned int hash;
92
93 hash = ip_tunnel_hash(key, remote);
94 head = &itn->tunnels[hash];
95
96 hlist_for_each_entry_rcu(t, head, hash_node) {
97 if (local != t->parms.iph.saddr ||
98 remote != t->parms.iph.daddr ||
99 !(t->dev->flags & IFF_UP))
100 continue;
101
102 if (!ip_tunnel_key_match(&t->parms, flags, key))
103 continue;
104
105 if (t->parms.link == link)
106 return t;
107 else
108 cand = t;
109 }
110
111 hlist_for_each_entry_rcu(t, head, hash_node) {
112 if (remote != t->parms.iph.daddr ||
113 t->parms.iph.saddr != 0 ||
114 !(t->dev->flags & IFF_UP))
115 continue;
116
117 if (!ip_tunnel_key_match(&t->parms, flags, key))
118 continue;
119
120 if (t->parms.link == link)
121 return t;
122 else if (!cand)
123 cand = t;
124 }
125
126 hash = ip_tunnel_hash(key, 0);
127 head = &itn->tunnels[hash];
128
129 hlist_for_each_entry_rcu(t, head, hash_node) {
130 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
131 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
132 continue;
133
134 if (!(t->dev->flags & IFF_UP))
135 continue;
136
137 if (!ip_tunnel_key_match(&t->parms, flags, key))
138 continue;
139
140 if (t->parms.link == link)
141 return t;
142 else if (!cand)
143 cand = t;
144 }
145
146 hlist_for_each_entry_rcu(t, head, hash_node) {
147 if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
148 t->parms.iph.saddr != 0 ||
149 t->parms.iph.daddr != 0 ||
150 !(t->dev->flags & IFF_UP))
151 continue;
152
153 if (t->parms.link == link)
154 return t;
155 else if (!cand)
156 cand = t;
157 }
158
159 if (cand)
160 return cand;
161
162 t = rcu_dereference(itn->collect_md_tun);
163 if (t && t->dev->flags & IFF_UP)
164 return t;
165
166 ndev = READ_ONCE(itn->fb_tunnel_dev);
167 if (ndev && ndev->flags & IFF_UP)
168 return netdev_priv(ndev);
169
170 return NULL;
171}
172EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
173
174static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
175 struct ip_tunnel_parm *parms)
176{
177 unsigned int h;
178 __be32 remote;
179 __be32 i_key = parms->i_key;
180
181 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
182 remote = parms->iph.daddr;
183 else
184 remote = 0;
185
186 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
187 i_key = 0;
188
189 h = ip_tunnel_hash(i_key, remote);
190 return &itn->tunnels[h];
191}
192
193static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
194{
195 struct hlist_head *head = ip_bucket(itn, &t->parms);
196
197 if (t->collect_md)
198 rcu_assign_pointer(itn->collect_md_tun, t);
199 hlist_add_head_rcu(&t->hash_node, head);
200}
201
202static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
203{
204 if (t->collect_md)
205 rcu_assign_pointer(itn->collect_md_tun, NULL);
206 hlist_del_init_rcu(&t->hash_node);
207}
208
209static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
210 struct ip_tunnel_parm *parms,
211 int type)
212{
213 __be32 remote = parms->iph.daddr;
214 __be32 local = parms->iph.saddr;
215 __be32 key = parms->i_key;
216 __be16 flags = parms->i_flags;
217 int link = parms->link;
218 struct ip_tunnel *t = NULL;
219 struct hlist_head *head = ip_bucket(itn, parms);
220
221 hlist_for_each_entry_rcu(t, head, hash_node) {
222 if (local == t->parms.iph.saddr &&
223 remote == t->parms.iph.daddr &&
224 link == t->parms.link &&
225 type == t->dev->type &&
226 ip_tunnel_key_match(&t->parms, flags, key))
227 break;
228 }
229 return t;
230}
231
232static struct net_device *__ip_tunnel_create(struct net *net,
233 const struct rtnl_link_ops *ops,
234 struct ip_tunnel_parm *parms)
235{
236 int err;
237 struct ip_tunnel *tunnel;
238 struct net_device *dev;
239 char name[IFNAMSIZ];
240
241 err = -E2BIG;
242 if (parms->name[0]) {
243 if (!dev_valid_name(parms->name))
244 goto failed;
245 strlcpy(name, parms->name, IFNAMSIZ);
246 } else {
247 if (strlen(ops->kind) > (IFNAMSIZ - 3))
248 goto failed;
249 strcpy(name, ops->kind);
250 strcat(name, "%d");
251 }
252
253 ASSERT_RTNL();
254 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
255 if (!dev) {
256 err = -ENOMEM;
257 goto failed;
258 }
259 dev_net_set(dev, net);
260
261 dev->rtnl_link_ops = ops;
262
263 tunnel = netdev_priv(dev);
264 tunnel->parms = *parms;
265 tunnel->net = net;
266
267 err = register_netdevice(dev);
268 if (err)
269 goto failed_free;
270
271 return dev;
272
273failed_free:
274 free_netdev(dev);
275failed:
276 return ERR_PTR(err);
277}
278
279static int ip_tunnel_bind_dev(struct net_device *dev)
280{
281 struct net_device *tdev = NULL;
282 struct ip_tunnel *tunnel = netdev_priv(dev);
283 const struct iphdr *iph;
284 int hlen = LL_MAX_HEADER;
285 int mtu = ETH_DATA_LEN;
286 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
287
288 iph = &tunnel->parms.iph;
289
290
291 if (iph->daddr) {
292 struct flowi4 fl4;
293 struct rtable *rt;
294
295 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
296 iph->saddr, tunnel->parms.o_key,
297 RT_TOS(iph->tos), tunnel->parms.link,
298 tunnel->fwmark, 0);
299 rt = ip_route_output_key(tunnel->net, &fl4);
300
301 if (!IS_ERR(rt)) {
302 tdev = rt->dst.dev;
303 ip_rt_put(rt);
304 }
305 if (dev->type != ARPHRD_ETHER)
306 dev->flags |= IFF_POINTOPOINT;
307
308 dst_cache_reset(&tunnel->dst_cache);
309 }
310
311 if (!tdev && tunnel->parms.link)
312 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
313
314 if (tdev) {
315 hlen = tdev->hard_header_len + tdev->needed_headroom;
316 mtu = min(tdev->mtu, IP_MAX_MTU);
317 }
318
319 dev->needed_headroom = t_hlen + hlen;
320 mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
321
322 if (mtu < IPV4_MIN_MTU)
323 mtu = IPV4_MIN_MTU;
324
325 return mtu;
326}
327
328static struct ip_tunnel *ip_tunnel_create(struct net *net,
329 struct ip_tunnel_net *itn,
330 struct ip_tunnel_parm *parms)
331{
332 struct ip_tunnel *nt;
333 struct net_device *dev;
334 int t_hlen;
335 int mtu;
336 int err;
337
338 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
339 if (IS_ERR(dev))
340 return ERR_CAST(dev);
341
342 mtu = ip_tunnel_bind_dev(dev);
343 err = dev_set_mtu(dev, mtu);
344 if (err)
345 goto err_dev_set_mtu;
346
347 nt = netdev_priv(dev);
348 t_hlen = nt->hlen + sizeof(struct iphdr);
349 dev->min_mtu = ETH_MIN_MTU;
350 dev->max_mtu = IP_MAX_MTU - t_hlen;
351 if (dev->type == ARPHRD_ETHER)
352 dev->max_mtu -= dev->hard_header_len;
353
354 ip_tunnel_add(itn, nt);
355 return nt;
356
357err_dev_set_mtu:
358 unregister_netdevice(dev);
359 return ERR_PTR(err);
360}
361
362int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
363 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
364 bool log_ecn_error)
365{
366 const struct iphdr *iph = ip_hdr(skb);
367 int err;
368
369#ifdef CONFIG_NET_IPGRE_BROADCAST
370 if (ipv4_is_multicast(iph->daddr)) {
371 tunnel->dev->stats.multicast++;
372 skb->pkt_type = PACKET_BROADCAST;
373 }
374#endif
375
376 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 tunnel->dev->stats.rx_crc_errors++;
379 tunnel->dev->stats.rx_errors++;
380 goto drop;
381 }
382
383 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 if (!(tpi->flags&TUNNEL_SEQ) ||
385 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 tunnel->dev->stats.rx_fifo_errors++;
387 tunnel->dev->stats.rx_errors++;
388 goto drop;
389 }
390 tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 }
392
393 skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
394
395 err = IP_ECN_decapsulate(iph, skb);
396 if (unlikely(err)) {
397 if (log_ecn_error)
398 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399 &iph->saddr, iph->tos);
400 if (err > 1) {
401 ++tunnel->dev->stats.rx_frame_errors;
402 ++tunnel->dev->stats.rx_errors;
403 goto drop;
404 }
405 }
406
407 dev_sw_netstats_rx_add(tunnel->dev, skb->len);
408 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
409
410 if (tunnel->dev->type == ARPHRD_ETHER) {
411 skb->protocol = eth_type_trans(skb, tunnel->dev);
412 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
413 } else {
414 skb->dev = tunnel->dev;
415 }
416
417 if (tun_dst)
418 skb_dst_set(skb, (struct dst_entry *)tun_dst);
419
420 gro_cells_receive(&tunnel->gro_cells, skb);
421 return 0;
422
423drop:
424 if (tun_dst)
425 dst_release((struct dst_entry *)tun_dst);
426 kfree_skb(skb);
427 return 0;
428}
429EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
430
431int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
432 unsigned int num)
433{
434 if (num >= MAX_IPTUN_ENCAP_OPS)
435 return -ERANGE;
436
437 return !cmpxchg((const struct ip_tunnel_encap_ops **)
438 &iptun_encaps[num],
439 NULL, ops) ? 0 : -1;
440}
441EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
442
443int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
444 unsigned int num)
445{
446 int ret;
447
448 if (num >= MAX_IPTUN_ENCAP_OPS)
449 return -ERANGE;
450
451 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
452 &iptun_encaps[num],
453 ops, NULL) == ops) ? 0 : -1;
454
455 synchronize_net();
456
457 return ret;
458}
459EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
460
461int ip_tunnel_encap_setup(struct ip_tunnel *t,
462 struct ip_tunnel_encap *ipencap)
463{
464 int hlen;
465
466 memset(&t->encap, 0, sizeof(t->encap));
467
468 hlen = ip_encap_hlen(ipencap);
469 if (hlen < 0)
470 return hlen;
471
472 t->encap.type = ipencap->type;
473 t->encap.sport = ipencap->sport;
474 t->encap.dport = ipencap->dport;
475 t->encap.flags = ipencap->flags;
476
477 t->encap_hlen = hlen;
478 t->hlen = t->encap_hlen + t->tun_hlen;
479
480 return 0;
481}
482EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
483
484static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485 struct rtable *rt, __be16 df,
486 const struct iphdr *inner_iph,
487 int tunnel_hlen, __be32 dst, bool md)
488{
489 struct ip_tunnel *tunnel = netdev_priv(dev);
490 int pkt_size;
491 int mtu;
492
493 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
494 pkt_size = skb->len - tunnel_hlen;
495 pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
496
497 if (df) {
498 mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
499 mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
500 } else {
501 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502 }
503
504 if (skb_valid_dst(skb))
505 skb_dst_update_pmtu_no_confirm(skb, mtu);
506
507 if (skb->protocol == htons(ETH_P_IP)) {
508 if (!skb_is_gso(skb) &&
509 (inner_iph->frag_off & htons(IP_DF)) &&
510 mtu < pkt_size) {
511 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
512 return -E2BIG;
513 }
514 }
515#if IS_ENABLED(CONFIG_IPV6)
516 else if (skb->protocol == htons(ETH_P_IPV6)) {
517 struct rt6_info *rt6;
518 __be32 daddr;
519
520 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
521 NULL;
522 daddr = md ? dst : tunnel->parms.iph.daddr;
523
524 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
525 mtu >= IPV6_MIN_MTU) {
526 if ((daddr && !ipv4_is_multicast(daddr)) ||
527 rt6->rt6i_dst.plen == 128) {
528 rt6->rt6i_flags |= RTF_MODIFIED;
529 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
530 }
531 }
532
533 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
534 mtu < pkt_size) {
535 icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
536 return -E2BIG;
537 }
538 }
539#endif
540 return 0;
541}
542
543void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
544 u8 proto, int tunnel_hlen)
545{
546 struct ip_tunnel *tunnel = netdev_priv(dev);
547 u32 headroom = sizeof(struct iphdr);
548 struct ip_tunnel_info *tun_info;
549 const struct ip_tunnel_key *key;
550 const struct iphdr *inner_iph;
551 struct rtable *rt = NULL;
552 struct flowi4 fl4;
553 __be16 df = 0;
554 u8 tos, ttl;
555 bool use_cache;
556
557 tun_info = skb_tunnel_info(skb);
558 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
559 ip_tunnel_info_af(tun_info) != AF_INET))
560 goto tx_error;
561 key = &tun_info->key;
562 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
563 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
564 tos = key->tos;
565 if (tos == 1) {
566 if (skb->protocol == htons(ETH_P_IP))
567 tos = inner_iph->tos;
568 else if (skb->protocol == htons(ETH_P_IPV6))
569 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
570 }
571 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
572 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
573 0, skb->mark, skb_get_hash(skb));
574 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
575 goto tx_error;
576
577 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
578 if (use_cache)
579 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
580 if (!rt) {
581 rt = ip_route_output_key(tunnel->net, &fl4);
582 if (IS_ERR(rt)) {
583 dev->stats.tx_carrier_errors++;
584 goto tx_error;
585 }
586 if (use_cache)
587 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
588 fl4.saddr);
589 }
590 if (rt->dst.dev == dev) {
591 ip_rt_put(rt);
592 dev->stats.collisions++;
593 goto tx_error;
594 }
595
596 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
597 df = htons(IP_DF);
598 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
599 key->u.ipv4.dst, true)) {
600 ip_rt_put(rt);
601 goto tx_error;
602 }
603
604 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
605 ttl = key->ttl;
606 if (ttl == 0) {
607 if (skb->protocol == htons(ETH_P_IP))
608 ttl = inner_iph->ttl;
609 else if (skb->protocol == htons(ETH_P_IPV6))
610 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
611 else
612 ttl = ip4_dst_hoplimit(&rt->dst);
613 }
614
615 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
616 if (headroom > dev->needed_headroom)
617 dev->needed_headroom = headroom;
618
619 if (skb_cow_head(skb, dev->needed_headroom)) {
620 ip_rt_put(rt);
621 goto tx_dropped;
622 }
623 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
624 df, !net_eq(tunnel->net, dev_net(dev)));
625 return;
626tx_error:
627 dev->stats.tx_errors++;
628 goto kfree;
629tx_dropped:
630 dev->stats.tx_dropped++;
631kfree:
632 kfree_skb(skb);
633}
634EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
635
636void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
637 const struct iphdr *tnl_params, u8 protocol)
638{
639 struct ip_tunnel *tunnel = netdev_priv(dev);
640 struct ip_tunnel_info *tun_info = NULL;
641 const struct iphdr *inner_iph;
642 unsigned int max_headroom;
643 struct rtable *rt = NULL;
644 bool use_cache = false;
645 struct flowi4 fl4;
646 bool md = false;
647 bool connected;
648 u8 tos, ttl;
649 __be32 dst;
650 __be16 df;
651
652 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
653 connected = (tunnel->parms.iph.daddr != 0);
654
655 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
656
657 dst = tnl_params->daddr;
658 if (dst == 0) {
659
660
661 if (!skb_dst(skb)) {
662 dev->stats.tx_fifo_errors++;
663 goto tx_error;
664 }
665
666 tun_info = skb_tunnel_info(skb);
667 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
668 ip_tunnel_info_af(tun_info) == AF_INET &&
669 tun_info->key.u.ipv4.dst) {
670 dst = tun_info->key.u.ipv4.dst;
671 md = true;
672 connected = true;
673 }
674 else if (skb->protocol == htons(ETH_P_IP)) {
675 rt = skb_rtable(skb);
676 dst = rt_nexthop(rt, inner_iph->daddr);
677 }
678#if IS_ENABLED(CONFIG_IPV6)
679 else if (skb->protocol == htons(ETH_P_IPV6)) {
680 const struct in6_addr *addr6;
681 struct neighbour *neigh;
682 bool do_tx_error_icmp;
683 int addr_type;
684
685 neigh = dst_neigh_lookup(skb_dst(skb),
686 &ipv6_hdr(skb)->daddr);
687 if (!neigh)
688 goto tx_error;
689
690 addr6 = (const struct in6_addr *)&neigh->primary_key;
691 addr_type = ipv6_addr_type(addr6);
692
693 if (addr_type == IPV6_ADDR_ANY) {
694 addr6 = &ipv6_hdr(skb)->daddr;
695 addr_type = ipv6_addr_type(addr6);
696 }
697
698 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
699 do_tx_error_icmp = true;
700 else {
701 do_tx_error_icmp = false;
702 dst = addr6->s6_addr32[3];
703 }
704 neigh_release(neigh);
705 if (do_tx_error_icmp)
706 goto tx_error_icmp;
707 }
708#endif
709 else
710 goto tx_error;
711
712 if (!md)
713 connected = false;
714 }
715
716 tos = tnl_params->tos;
717 if (tos & 0x1) {
718 tos &= ~0x1;
719 if (skb->protocol == htons(ETH_P_IP)) {
720 tos = inner_iph->tos;
721 connected = false;
722 } else if (skb->protocol == htons(ETH_P_IPV6)) {
723 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
724 connected = false;
725 }
726 }
727
728 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
729 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
730 tunnel->fwmark, skb_get_hash(skb));
731
732 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
733 goto tx_error;
734
735 if (connected && md) {
736 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
737 if (use_cache)
738 rt = dst_cache_get_ip4(&tun_info->dst_cache,
739 &fl4.saddr);
740 } else {
741 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
742 &fl4.saddr) : NULL;
743 }
744
745 if (!rt) {
746 rt = ip_route_output_key(tunnel->net, &fl4);
747
748 if (IS_ERR(rt)) {
749 dev->stats.tx_carrier_errors++;
750 goto tx_error;
751 }
752 if (use_cache)
753 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
754 fl4.saddr);
755 else if (!md && connected)
756 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
757 fl4.saddr);
758 }
759
760 if (rt->dst.dev == dev) {
761 ip_rt_put(rt);
762 dev->stats.collisions++;
763 goto tx_error;
764 }
765
766 df = tnl_params->frag_off;
767 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
768 df |= (inner_iph->frag_off & htons(IP_DF));
769
770 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
771 ip_rt_put(rt);
772 goto tx_error;
773 }
774
775 if (tunnel->err_count > 0) {
776 if (time_before(jiffies,
777 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
778 tunnel->err_count--;
779
780 dst_link_failure(skb);
781 } else
782 tunnel->err_count = 0;
783 }
784
785 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
786 ttl = tnl_params->ttl;
787 if (ttl == 0) {
788 if (skb->protocol == htons(ETH_P_IP))
789 ttl = inner_iph->ttl;
790#if IS_ENABLED(CONFIG_IPV6)
791 else if (skb->protocol == htons(ETH_P_IPV6))
792 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
793#endif
794 else
795 ttl = ip4_dst_hoplimit(&rt->dst);
796 }
797
798 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
799 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
800 if (max_headroom > dev->needed_headroom)
801 dev->needed_headroom = max_headroom;
802
803 if (skb_cow_head(skb, dev->needed_headroom)) {
804 ip_rt_put(rt);
805 dev->stats.tx_dropped++;
806 kfree_skb(skb);
807 return;
808 }
809
810 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
811 df, !net_eq(tunnel->net, dev_net(dev)));
812 return;
813
814#if IS_ENABLED(CONFIG_IPV6)
815tx_error_icmp:
816 dst_link_failure(skb);
817#endif
818tx_error:
819 dev->stats.tx_errors++;
820 kfree_skb(skb);
821}
822EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
823
824static void ip_tunnel_update(struct ip_tunnel_net *itn,
825 struct ip_tunnel *t,
826 struct net_device *dev,
827 struct ip_tunnel_parm *p,
828 bool set_mtu,
829 __u32 fwmark)
830{
831 ip_tunnel_del(itn, t);
832 t->parms.iph.saddr = p->iph.saddr;
833 t->parms.iph.daddr = p->iph.daddr;
834 t->parms.i_key = p->i_key;
835 t->parms.o_key = p->o_key;
836 if (dev->type != ARPHRD_ETHER) {
837 memcpy(dev->dev_addr, &p->iph.saddr, 4);
838 memcpy(dev->broadcast, &p->iph.daddr, 4);
839 }
840 ip_tunnel_add(itn, t);
841
842 t->parms.iph.ttl = p->iph.ttl;
843 t->parms.iph.tos = p->iph.tos;
844 t->parms.iph.frag_off = p->iph.frag_off;
845
846 if (t->parms.link != p->link || t->fwmark != fwmark) {
847 int mtu;
848
849 t->parms.link = p->link;
850 t->fwmark = fwmark;
851 mtu = ip_tunnel_bind_dev(dev);
852 if (set_mtu)
853 dev->mtu = mtu;
854 }
855 dst_cache_reset(&t->dst_cache);
856 netdev_state_change(dev);
857}
858
859int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
860{
861 int err = 0;
862 struct ip_tunnel *t = netdev_priv(dev);
863 struct net *net = t->net;
864 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
865
866 switch (cmd) {
867 case SIOCGETTUNNEL:
868 if (dev == itn->fb_tunnel_dev) {
869 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
870 if (!t)
871 t = netdev_priv(dev);
872 }
873 memcpy(p, &t->parms, sizeof(*p));
874 break;
875
876 case SIOCADDTUNNEL:
877 case SIOCCHGTUNNEL:
878 err = -EPERM;
879 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
880 goto done;
881 if (p->iph.ttl)
882 p->iph.frag_off |= htons(IP_DF);
883 if (!(p->i_flags & VTI_ISVTI)) {
884 if (!(p->i_flags & TUNNEL_KEY))
885 p->i_key = 0;
886 if (!(p->o_flags & TUNNEL_KEY))
887 p->o_key = 0;
888 }
889
890 t = ip_tunnel_find(itn, p, itn->type);
891
892 if (cmd == SIOCADDTUNNEL) {
893 if (!t) {
894 t = ip_tunnel_create(net, itn, p);
895 err = PTR_ERR_OR_ZERO(t);
896 break;
897 }
898
899 err = -EEXIST;
900 break;
901 }
902 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
903 if (t) {
904 if (t->dev != dev) {
905 err = -EEXIST;
906 break;
907 }
908 } else {
909 unsigned int nflags = 0;
910
911 if (ipv4_is_multicast(p->iph.daddr))
912 nflags = IFF_BROADCAST;
913 else if (p->iph.daddr)
914 nflags = IFF_POINTOPOINT;
915
916 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
917 err = -EINVAL;
918 break;
919 }
920
921 t = netdev_priv(dev);
922 }
923 }
924
925 if (t) {
926 err = 0;
927 ip_tunnel_update(itn, t, dev, p, true, 0);
928 } else {
929 err = -ENOENT;
930 }
931 break;
932
933 case SIOCDELTUNNEL:
934 err = -EPERM;
935 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
936 goto done;
937
938 if (dev == itn->fb_tunnel_dev) {
939 err = -ENOENT;
940 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
941 if (!t)
942 goto done;
943 err = -EPERM;
944 if (t == netdev_priv(itn->fb_tunnel_dev))
945 goto done;
946 dev = t->dev;
947 }
948 unregister_netdevice(dev);
949 err = 0;
950 break;
951
952 default:
953 err = -EINVAL;
954 }
955
956done:
957 return err;
958}
959EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
960
961int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
962 void __user *data, int cmd)
963{
964 struct ip_tunnel_parm p;
965 int err;
966
967 if (copy_from_user(&p, data, sizeof(p)))
968 return -EFAULT;
969 err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
970 if (!err && copy_to_user(data, &p, sizeof(p)))
971 return -EFAULT;
972 return err;
973}
974EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
975
976int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
977{
978 struct ip_tunnel *tunnel = netdev_priv(dev);
979 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
980 int max_mtu = IP_MAX_MTU - t_hlen;
981
982 if (dev->type == ARPHRD_ETHER)
983 max_mtu -= dev->hard_header_len;
984
985 if (new_mtu < ETH_MIN_MTU)
986 return -EINVAL;
987
988 if (new_mtu > max_mtu) {
989 if (strict)
990 return -EINVAL;
991
992 new_mtu = max_mtu;
993 }
994
995 dev->mtu = new_mtu;
996 return 0;
997}
998EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
999
1000int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1001{
1002 return __ip_tunnel_change_mtu(dev, new_mtu, true);
1003}
1004EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
1005
1006static void ip_tunnel_dev_free(struct net_device *dev)
1007{
1008 struct ip_tunnel *tunnel = netdev_priv(dev);
1009
1010 gro_cells_destroy(&tunnel->gro_cells);
1011 dst_cache_destroy(&tunnel->dst_cache);
1012 free_percpu(dev->tstats);
1013}
1014
1015void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1016{
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018 struct ip_tunnel_net *itn;
1019
1020 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1021
1022 if (itn->fb_tunnel_dev != dev) {
1023 ip_tunnel_del(itn, netdev_priv(dev));
1024 unregister_netdevice_queue(dev, head);
1025 }
1026}
1027EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1028
1029struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1030{
1031 struct ip_tunnel *tunnel = netdev_priv(dev);
1032
1033 return tunnel->net;
1034}
1035EXPORT_SYMBOL(ip_tunnel_get_link_net);
1036
1037int ip_tunnel_get_iflink(const struct net_device *dev)
1038{
1039 struct ip_tunnel *tunnel = netdev_priv(dev);
1040
1041 return tunnel->parms.link;
1042}
1043EXPORT_SYMBOL(ip_tunnel_get_iflink);
1044
1045int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1046 struct rtnl_link_ops *ops, char *devname)
1047{
1048 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1049 struct ip_tunnel_parm parms;
1050 unsigned int i;
1051
1052 itn->rtnl_link_ops = ops;
1053 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1054 INIT_HLIST_HEAD(&itn->tunnels[i]);
1055
1056 if (!ops || !net_has_fallback_tunnels(net)) {
1057 struct ip_tunnel_net *it_init_net;
1058
1059 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1060 itn->type = it_init_net->type;
1061 itn->fb_tunnel_dev = NULL;
1062 return 0;
1063 }
1064
1065 memset(&parms, 0, sizeof(parms));
1066 if (devname)
1067 strlcpy(parms.name, devname, IFNAMSIZ);
1068
1069 rtnl_lock();
1070 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1071
1072
1073
1074 if (!IS_ERR(itn->fb_tunnel_dev)) {
1075 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1076 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1077 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1078 itn->type = itn->fb_tunnel_dev->type;
1079 }
1080 rtnl_unlock();
1081
1082 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1083}
1084EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1085
1086static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1087 struct list_head *head,
1088 struct rtnl_link_ops *ops)
1089{
1090 struct net_device *dev, *aux;
1091 int h;
1092
1093 for_each_netdev_safe(net, dev, aux)
1094 if (dev->rtnl_link_ops == ops)
1095 unregister_netdevice_queue(dev, head);
1096
1097 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1098 struct ip_tunnel *t;
1099 struct hlist_node *n;
1100 struct hlist_head *thead = &itn->tunnels[h];
1101
1102 hlist_for_each_entry_safe(t, n, thead, hash_node)
1103
1104
1105
1106 if (!net_eq(dev_net(t->dev), net))
1107 unregister_netdevice_queue(t->dev, head);
1108 }
1109}
1110
1111void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1112 struct rtnl_link_ops *ops)
1113{
1114 struct ip_tunnel_net *itn;
1115 struct net *net;
1116 LIST_HEAD(list);
1117
1118 rtnl_lock();
1119 list_for_each_entry(net, net_list, exit_list) {
1120 itn = net_generic(net, id);
1121 ip_tunnel_destroy(net, itn, &list, ops);
1122 }
1123 unregister_netdevice_many(&list);
1124 rtnl_unlock();
1125}
1126EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1127
1128int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1129 struct ip_tunnel_parm *p, __u32 fwmark)
1130{
1131 struct ip_tunnel *nt;
1132 struct net *net = dev_net(dev);
1133 struct ip_tunnel_net *itn;
1134 int mtu;
1135 int err;
1136
1137 nt = netdev_priv(dev);
1138 itn = net_generic(net, nt->ip_tnl_net_id);
1139
1140 if (nt->collect_md) {
1141 if (rtnl_dereference(itn->collect_md_tun))
1142 return -EEXIST;
1143 } else {
1144 if (ip_tunnel_find(itn, p, dev->type))
1145 return -EEXIST;
1146 }
1147
1148 nt->net = net;
1149 nt->parms = *p;
1150 nt->fwmark = fwmark;
1151 err = register_netdevice(dev);
1152 if (err)
1153 goto err_register_netdevice;
1154
1155 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1156 eth_hw_addr_random(dev);
1157
1158 mtu = ip_tunnel_bind_dev(dev);
1159 if (tb[IFLA_MTU]) {
1160 unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
1161
1162 if (dev->type == ARPHRD_ETHER)
1163 max -= dev->hard_header_len;
1164
1165 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
1166 }
1167
1168 err = dev_set_mtu(dev, mtu);
1169 if (err)
1170 goto err_dev_set_mtu;
1171
1172 ip_tunnel_add(itn, nt);
1173 return 0;
1174
1175err_dev_set_mtu:
1176 unregister_netdevice(dev);
1177err_register_netdevice:
1178 return err;
1179}
1180EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1181
1182int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1183 struct ip_tunnel_parm *p, __u32 fwmark)
1184{
1185 struct ip_tunnel *t;
1186 struct ip_tunnel *tunnel = netdev_priv(dev);
1187 struct net *net = tunnel->net;
1188 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1189
1190 if (dev == itn->fb_tunnel_dev)
1191 return -EINVAL;
1192
1193 t = ip_tunnel_find(itn, p, dev->type);
1194
1195 if (t) {
1196 if (t->dev != dev)
1197 return -EEXIST;
1198 } else {
1199 t = tunnel;
1200
1201 if (dev->type != ARPHRD_ETHER) {
1202 unsigned int nflags = 0;
1203
1204 if (ipv4_is_multicast(p->iph.daddr))
1205 nflags = IFF_BROADCAST;
1206 else if (p->iph.daddr)
1207 nflags = IFF_POINTOPOINT;
1208
1209 if ((dev->flags ^ nflags) &
1210 (IFF_POINTOPOINT | IFF_BROADCAST))
1211 return -EINVAL;
1212 }
1213 }
1214
1215 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1216 return 0;
1217}
1218EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1219
1220int ip_tunnel_init(struct net_device *dev)
1221{
1222 struct ip_tunnel *tunnel = netdev_priv(dev);
1223 struct iphdr *iph = &tunnel->parms.iph;
1224 int err;
1225
1226 dev->needs_free_netdev = true;
1227 dev->priv_destructor = ip_tunnel_dev_free;
1228 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1229 if (!dev->tstats)
1230 return -ENOMEM;
1231
1232 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1233 if (err) {
1234 free_percpu(dev->tstats);
1235 return err;
1236 }
1237
1238 err = gro_cells_init(&tunnel->gro_cells, dev);
1239 if (err) {
1240 dst_cache_destroy(&tunnel->dst_cache);
1241 free_percpu(dev->tstats);
1242 return err;
1243 }
1244
1245 tunnel->dev = dev;
1246 tunnel->net = dev_net(dev);
1247 strcpy(tunnel->parms.name, dev->name);
1248 iph->version = 4;
1249 iph->ihl = 5;
1250
1251 if (tunnel->collect_md)
1252 netif_keep_dst(dev);
1253 return 0;
1254}
1255EXPORT_SYMBOL_GPL(ip_tunnel_init);
1256
1257void ip_tunnel_uninit(struct net_device *dev)
1258{
1259 struct ip_tunnel *tunnel = netdev_priv(dev);
1260 struct net *net = tunnel->net;
1261 struct ip_tunnel_net *itn;
1262
1263 itn = net_generic(net, tunnel->ip_tnl_net_id);
1264 ip_tunnel_del(itn, netdev_priv(dev));
1265 if (itn->fb_tunnel_dev == dev)
1266 WRITE_ONCE(itn->fb_tunnel_dev, NULL);
1267
1268 dst_cache_reset(&tunnel->dst_cache);
1269}
1270EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1271
1272
1273void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1274{
1275 struct ip_tunnel *tunnel = netdev_priv(dev);
1276 tunnel->ip_tnl_net_id = net_id;
1277}
1278EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1279
1280MODULE_LICENSE("GPL");
1281