1
2
3
4
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#include <linux/capability.h>
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/uaccess.h>
14#include <linux/skbuff.h>
15#include <linux/netdevice.h>
16#include <linux/in.h>
17#include <linux/tcp.h>
18#include <linux/udp.h>
19#include <linux/if_arp.h>
20#include <linux/init.h>
21#include <linux/in6.h>
22#include <linux/inetdevice.h>
23#include <linux/igmp.h>
24#include <linux/netfilter_ipv4.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/if_vlan.h>
28#include <linux/rculist.h>
29#include <linux/err.h>
30
31#include <net/sock.h>
32#include <net/ip.h>
33#include <net/icmp.h>
34#include <net/protocol.h>
35#include <net/ip_tunnels.h>
36#include <net/arp.h>
37#include <net/checksum.h>
38#include <net/dsfield.h>
39#include <net/inet_ecn.h>
40#include <net/xfrm.h>
41#include <net/net_namespace.h>
42#include <net/netns/generic.h>
43#include <net/rtnetlink.h>
44#include <net/udp.h>
45#include <net/dst_metadata.h>
46
47#if IS_ENABLED(CONFIG_IPV6)
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
54{
55 return hash_32((__force u32)key ^ (__force u32)remote,
56 IP_TNL_HASH_BITS);
57}
58
59static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
60 __be16 flags, __be32 key)
61{
62 if (p->i_flags & TUNNEL_KEY) {
63 if (flags & TUNNEL_KEY)
64 return key == p->i_key;
65 else
66
67 return false;
68 } else
69 return !(flags & TUNNEL_KEY);
70}
71
72
73
74
75
76
77
78
79
80
81
82
83struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
84 int link, __be16 flags,
85 __be32 remote, __be32 local,
86 __be32 key)
87{
88 unsigned int hash;
89 struct ip_tunnel *t, *cand = NULL;
90 struct hlist_head *head;
91
92 hash = ip_tunnel_hash(key, remote);
93 head = &itn->tunnels[hash];
94
95 hlist_for_each_entry_rcu(t, head, hash_node) {
96 if (local != t->parms.iph.saddr ||
97 remote != t->parms.iph.daddr ||
98 !(t->dev->flags & IFF_UP))
99 continue;
100
101 if (!ip_tunnel_key_match(&t->parms, flags, key))
102 continue;
103
104 if (t->parms.link == link)
105 return t;
106 else
107 cand = t;
108 }
109
110 hlist_for_each_entry_rcu(t, head, hash_node) {
111 if (remote != t->parms.iph.daddr ||
112 t->parms.iph.saddr != 0 ||
113 !(t->dev->flags & IFF_UP))
114 continue;
115
116 if (!ip_tunnel_key_match(&t->parms, flags, key))
117 continue;
118
119 if (t->parms.link == link)
120 return t;
121 else if (!cand)
122 cand = t;
123 }
124
125 hash = ip_tunnel_hash(key, 0);
126 head = &itn->tunnels[hash];
127
128 hlist_for_each_entry_rcu(t, head, hash_node) {
129 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
130 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
131 continue;
132
133 if (!(t->dev->flags & IFF_UP))
134 continue;
135
136 if (!ip_tunnel_key_match(&t->parms, flags, key))
137 continue;
138
139 if (t->parms.link == link)
140 return t;
141 else if (!cand)
142 cand = t;
143 }
144
145 if (flags & TUNNEL_NO_KEY)
146 goto skip_key_lookup;
147
148 hlist_for_each_entry_rcu(t, head, hash_node) {
149 if (t->parms.i_key != key ||
150 t->parms.iph.saddr != 0 ||
151 t->parms.iph.daddr != 0 ||
152 !(t->dev->flags & IFF_UP))
153 continue;
154
155 if (t->parms.link == link)
156 return t;
157 else if (!cand)
158 cand = t;
159 }
160
161skip_key_lookup:
162 if (cand)
163 return cand;
164
165 t = rcu_dereference(itn->collect_md_tun);
166 if (t && t->dev->flags & IFF_UP)
167 return t;
168
169 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
170 return netdev_priv(itn->fb_tunnel_dev);
171
172 return NULL;
173}
174EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
175
176static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
177 struct ip_tunnel_parm *parms)
178{
179 unsigned int h;
180 __be32 remote;
181 __be32 i_key = parms->i_key;
182
183 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
184 remote = parms->iph.daddr;
185 else
186 remote = 0;
187
188 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
189 i_key = 0;
190
191 h = ip_tunnel_hash(i_key, remote);
192 return &itn->tunnels[h];
193}
194
195static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
196{
197 struct hlist_head *head = ip_bucket(itn, &t->parms);
198
199 if (t->collect_md)
200 rcu_assign_pointer(itn->collect_md_tun, t);
201 hlist_add_head_rcu(&t->hash_node, head);
202}
203
204static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
205{
206 if (t->collect_md)
207 rcu_assign_pointer(itn->collect_md_tun, NULL);
208 hlist_del_init_rcu(&t->hash_node);
209}
210
211static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
212 struct ip_tunnel_parm *parms,
213 int type)
214{
215 __be32 remote = parms->iph.daddr;
216 __be32 local = parms->iph.saddr;
217 __be32 key = parms->i_key;
218 __be16 flags = parms->i_flags;
219 int link = parms->link;
220 struct ip_tunnel *t = NULL;
221 struct hlist_head *head = ip_bucket(itn, parms);
222
223 hlist_for_each_entry_rcu(t, head, hash_node) {
224 if (local == t->parms.iph.saddr &&
225 remote == t->parms.iph.daddr &&
226 link == t->parms.link &&
227 type == t->dev->type &&
228 ip_tunnel_key_match(&t->parms, flags, key))
229 break;
230 }
231 return t;
232}
233
234static struct net_device *__ip_tunnel_create(struct net *net,
235 const struct rtnl_link_ops *ops,
236 struct ip_tunnel_parm *parms)
237{
238 int err;
239 struct ip_tunnel *tunnel;
240 struct net_device *dev;
241 char name[IFNAMSIZ];
242
243 err = -E2BIG;
244 if (parms->name[0]) {
245 if (!dev_valid_name(parms->name))
246 goto failed;
247 strlcpy(name, parms->name, IFNAMSIZ);
248 } else {
249 if (strlen(ops->kind) > (IFNAMSIZ - 3))
250 goto failed;
251 strcpy(name, ops->kind);
252 strcat(name, "%d");
253 }
254
255 ASSERT_RTNL();
256 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
257 if (!dev) {
258 err = -ENOMEM;
259 goto failed;
260 }
261 dev_net_set(dev, net);
262
263 dev->rtnl_link_ops = ops;
264
265 tunnel = netdev_priv(dev);
266 tunnel->parms = *parms;
267 tunnel->net = net;
268
269 err = register_netdevice(dev);
270 if (err)
271 goto failed_free;
272
273 return dev;
274
275failed_free:
276 free_netdev(dev);
277failed:
278 return ERR_PTR(err);
279}
280
281static int ip_tunnel_bind_dev(struct net_device *dev)
282{
283 struct net_device *tdev = NULL;
284 struct ip_tunnel *tunnel = netdev_priv(dev);
285 const struct iphdr *iph;
286 int hlen = LL_MAX_HEADER;
287 int mtu = ETH_DATA_LEN;
288 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
289
290 iph = &tunnel->parms.iph;
291
292
293 if (iph->daddr) {
294 struct flowi4 fl4;
295 struct rtable *rt;
296
297 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
298 iph->saddr, tunnel->parms.o_key,
299 RT_TOS(iph->tos), tunnel->parms.link,
300 tunnel->fwmark, 0);
301 rt = ip_route_output_key(tunnel->net, &fl4);
302
303 if (!IS_ERR(rt)) {
304 tdev = rt->dst.dev;
305 ip_rt_put(rt);
306 }
307 if (dev->type != ARPHRD_ETHER)
308 dev->flags |= IFF_POINTOPOINT;
309
310 dst_cache_reset(&tunnel->dst_cache);
311 }
312
313 if (!tdev && tunnel->parms.link)
314 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
315
316 if (tdev) {
317 hlen = tdev->hard_header_len + tdev->needed_headroom;
318 mtu = min(tdev->mtu, IP_MAX_MTU);
319 }
320
321 dev->needed_headroom = t_hlen + hlen;
322 mtu -= (dev->hard_header_len + t_hlen);
323
324 if (mtu < IPV4_MIN_MTU)
325 mtu = IPV4_MIN_MTU;
326
327 return mtu;
328}
329
330static struct ip_tunnel *ip_tunnel_create(struct net *net,
331 struct ip_tunnel_net *itn,
332 struct ip_tunnel_parm *parms)
333{
334 struct ip_tunnel *nt;
335 struct net_device *dev;
336 int t_hlen;
337 int mtu;
338 int err;
339
340 dev = __ip_tunnel_create(net, itn->rtnl_link_ops, parms);
341 if (IS_ERR(dev))
342 return ERR_CAST(dev);
343
344 mtu = ip_tunnel_bind_dev(dev);
345 err = dev_set_mtu(dev, mtu);
346 if (err)
347 goto err_dev_set_mtu;
348
349 nt = netdev_priv(dev);
350 t_hlen = nt->hlen + sizeof(struct iphdr);
351 dev->min_mtu = ETH_MIN_MTU;
352 dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
353 ip_tunnel_add(itn, nt);
354 return nt;
355
356err_dev_set_mtu:
357 unregister_netdevice(dev);
358 return ERR_PTR(err);
359}
360
361int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
362 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
363 bool log_ecn_error)
364{
365 struct pcpu_sw_netstats *tstats;
366 const struct iphdr *iph = ip_hdr(skb);
367 int err;
368
369#ifdef CONFIG_NET_IPGRE_BROADCAST
370 if (ipv4_is_multicast(iph->daddr)) {
371 tunnel->dev->stats.multicast++;
372 skb->pkt_type = PACKET_BROADCAST;
373 }
374#endif
375
376 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
377 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
378 tunnel->dev->stats.rx_crc_errors++;
379 tunnel->dev->stats.rx_errors++;
380 goto drop;
381 }
382
383 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
384 if (!(tpi->flags&TUNNEL_SEQ) ||
385 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
386 tunnel->dev->stats.rx_fifo_errors++;
387 tunnel->dev->stats.rx_errors++;
388 goto drop;
389 }
390 tunnel->i_seqno = ntohl(tpi->seq) + 1;
391 }
392
393 skb_reset_network_header(skb);
394
395 err = IP_ECN_decapsulate(iph, skb);
396 if (unlikely(err)) {
397 if (log_ecn_error)
398 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
399 &iph->saddr, iph->tos);
400 if (err > 1) {
401 ++tunnel->dev->stats.rx_frame_errors;
402 ++tunnel->dev->stats.rx_errors;
403 goto drop;
404 }
405 }
406
407 tstats = this_cpu_ptr(tunnel->dev->tstats);
408 u64_stats_update_begin(&tstats->syncp);
409 tstats->rx_packets++;
410 tstats->rx_bytes += skb->len;
411 u64_stats_update_end(&tstats->syncp);
412
413 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
414
415 if (tunnel->dev->type == ARPHRD_ETHER) {
416 skb->protocol = eth_type_trans(skb, tunnel->dev);
417 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
418 } else {
419 skb->dev = tunnel->dev;
420 }
421
422 if (tun_dst)
423 skb_dst_set(skb, (struct dst_entry *)tun_dst);
424
425 gro_cells_receive(&tunnel->gro_cells, skb);
426 return 0;
427
428drop:
429 if (tun_dst)
430 dst_release((struct dst_entry *)tun_dst);
431 kfree_skb(skb);
432 return 0;
433}
434EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
435
436int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
437 unsigned int num)
438{
439 if (num >= MAX_IPTUN_ENCAP_OPS)
440 return -ERANGE;
441
442 return !cmpxchg((const struct ip_tunnel_encap_ops **)
443 &iptun_encaps[num],
444 NULL, ops) ? 0 : -1;
445}
446EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
447
448int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
449 unsigned int num)
450{
451 int ret;
452
453 if (num >= MAX_IPTUN_ENCAP_OPS)
454 return -ERANGE;
455
456 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
457 &iptun_encaps[num],
458 ops, NULL) == ops) ? 0 : -1;
459
460 synchronize_net();
461
462 return ret;
463}
464EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
465
466int ip_tunnel_encap_setup(struct ip_tunnel *t,
467 struct ip_tunnel_encap *ipencap)
468{
469 int hlen;
470
471 memset(&t->encap, 0, sizeof(t->encap));
472
473 hlen = ip_encap_hlen(ipencap);
474 if (hlen < 0)
475 return hlen;
476
477 t->encap.type = ipencap->type;
478 t->encap.sport = ipencap->sport;
479 t->encap.dport = ipencap->dport;
480 t->encap.flags = ipencap->flags;
481
482 t->encap_hlen = hlen;
483 t->hlen = t->encap_hlen + t->tun_hlen;
484
485 return 0;
486}
487EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
488
489static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
490 struct rtable *rt, __be16 df,
491 const struct iphdr *inner_iph,
492 int tunnel_hlen, __be32 dst, bool md)
493{
494 struct ip_tunnel *tunnel = netdev_priv(dev);
495 int pkt_size;
496 int mtu;
497
498 tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
499 pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
500
501 if (df)
502 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
503 - sizeof(struct iphdr) - tunnel_hlen;
504 else
505 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
506
507 if (skb_valid_dst(skb))
508 skb_dst_update_pmtu(skb, mtu);
509
510 if (skb->protocol == htons(ETH_P_IP)) {
511 if (!skb_is_gso(skb) &&
512 (inner_iph->frag_off & htons(IP_DF)) &&
513 mtu < pkt_size) {
514 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
515 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
516 return -E2BIG;
517 }
518 }
519#if IS_ENABLED(CONFIG_IPV6)
520 else if (skb->protocol == htons(ETH_P_IPV6)) {
521 struct rt6_info *rt6;
522 __be32 daddr;
523
524 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
525 NULL;
526 daddr = md ? dst : tunnel->parms.iph.daddr;
527
528 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
529 mtu >= IPV6_MIN_MTU) {
530 if ((daddr && !ipv4_is_multicast(daddr)) ||
531 rt6->rt6i_dst.plen == 128) {
532 rt6->rt6i_flags |= RTF_MODIFIED;
533 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
534 }
535 }
536
537 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
538 mtu < pkt_size) {
539 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
540 return -E2BIG;
541 }
542 }
543#endif
544 return 0;
545}
546
547void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
548 u8 proto, int tunnel_hlen)
549{
550 struct ip_tunnel *tunnel = netdev_priv(dev);
551 u32 headroom = sizeof(struct iphdr);
552 struct ip_tunnel_info *tun_info;
553 const struct ip_tunnel_key *key;
554 const struct iphdr *inner_iph;
555 struct rtable *rt = NULL;
556 struct flowi4 fl4;
557 __be16 df = 0;
558 u8 tos, ttl;
559 bool use_cache;
560
561 tun_info = skb_tunnel_info(skb);
562 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
563 ip_tunnel_info_af(tun_info) != AF_INET))
564 goto tx_error;
565 key = &tun_info->key;
566 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
567 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
568 tos = key->tos;
569 if (tos == 1) {
570 if (skb->protocol == htons(ETH_P_IP))
571 tos = inner_iph->tos;
572 else if (skb->protocol == htons(ETH_P_IPV6))
573 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
574 }
575 ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
576 tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
577 0, skb->mark, skb_get_hash(skb));
578 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
579 goto tx_error;
580
581 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
582 if (use_cache)
583 rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
584 if (!rt) {
585 rt = ip_route_output_key(tunnel->net, &fl4);
586 if (IS_ERR(rt)) {
587 dev->stats.tx_carrier_errors++;
588 goto tx_error;
589 }
590 if (use_cache)
591 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
592 fl4.saddr);
593 }
594 if (rt->dst.dev == dev) {
595 ip_rt_put(rt);
596 dev->stats.collisions++;
597 goto tx_error;
598 }
599
600 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
601 df = htons(IP_DF);
602 if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
603 key->u.ipv4.dst, true)) {
604 ip_rt_put(rt);
605 goto tx_error;
606 }
607
608 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
609 ttl = key->ttl;
610 if (ttl == 0) {
611 if (skb->protocol == htons(ETH_P_IP))
612 ttl = inner_iph->ttl;
613 else if (skb->protocol == htons(ETH_P_IPV6))
614 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
615 else
616 ttl = ip4_dst_hoplimit(&rt->dst);
617 }
618
619 if (!df && skb->protocol == htons(ETH_P_IP))
620 df = inner_iph->frag_off & htons(IP_DF);
621
622 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
623 if (headroom > dev->needed_headroom)
624 dev->needed_headroom = headroom;
625
626 if (skb_cow_head(skb, dev->needed_headroom)) {
627 ip_rt_put(rt);
628 goto tx_dropped;
629 }
630 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
631 df, !net_eq(tunnel->net, dev_net(dev)));
632 return;
633tx_error:
634 dev->stats.tx_errors++;
635 goto kfree;
636tx_dropped:
637 dev->stats.tx_dropped++;
638kfree:
639 kfree_skb(skb);
640}
641EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
642
643void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
644 const struct iphdr *tnl_params, u8 protocol)
645{
646 struct ip_tunnel *tunnel = netdev_priv(dev);
647 struct ip_tunnel_info *tun_info = NULL;
648 const struct iphdr *inner_iph;
649 unsigned int max_headroom;
650 struct rtable *rt = NULL;
651 bool use_cache = false;
652 struct flowi4 fl4;
653 bool md = false;
654 bool connected;
655 u8 tos, ttl;
656 __be32 dst;
657 __be16 df;
658
659 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
660 connected = (tunnel->parms.iph.daddr != 0);
661
662 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
663
664 dst = tnl_params->daddr;
665 if (dst == 0) {
666
667
668 if (!skb_dst(skb)) {
669 dev->stats.tx_fifo_errors++;
670 goto tx_error;
671 }
672
673 tun_info = skb_tunnel_info(skb);
674 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
675 ip_tunnel_info_af(tun_info) == AF_INET &&
676 tun_info->key.u.ipv4.dst) {
677 dst = tun_info->key.u.ipv4.dst;
678 md = true;
679 connected = true;
680 }
681 else if (skb->protocol == htons(ETH_P_IP)) {
682 rt = skb_rtable(skb);
683 dst = rt_nexthop(rt, inner_iph->daddr);
684 }
685#if IS_ENABLED(CONFIG_IPV6)
686 else if (skb->protocol == htons(ETH_P_IPV6)) {
687 const struct in6_addr *addr6;
688 struct neighbour *neigh;
689 bool do_tx_error_icmp;
690 int addr_type;
691
692 neigh = dst_neigh_lookup(skb_dst(skb),
693 &ipv6_hdr(skb)->daddr);
694 if (!neigh)
695 goto tx_error;
696
697 addr6 = (const struct in6_addr *)&neigh->primary_key;
698 addr_type = ipv6_addr_type(addr6);
699
700 if (addr_type == IPV6_ADDR_ANY) {
701 addr6 = &ipv6_hdr(skb)->daddr;
702 addr_type = ipv6_addr_type(addr6);
703 }
704
705 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
706 do_tx_error_icmp = true;
707 else {
708 do_tx_error_icmp = false;
709 dst = addr6->s6_addr32[3];
710 }
711 neigh_release(neigh);
712 if (do_tx_error_icmp)
713 goto tx_error_icmp;
714 }
715#endif
716 else
717 goto tx_error;
718
719 if (!md)
720 connected = false;
721 }
722
723 tos = tnl_params->tos;
724 if (tos & 0x1) {
725 tos &= ~0x1;
726 if (skb->protocol == htons(ETH_P_IP)) {
727 tos = inner_iph->tos;
728 connected = false;
729 } else if (skb->protocol == htons(ETH_P_IPV6)) {
730 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
731 connected = false;
732 }
733 }
734
735 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
736 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
737 tunnel->fwmark, skb_get_hash(skb));
738
739 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
740 goto tx_error;
741
742 if (connected && md) {
743 use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
744 if (use_cache)
745 rt = dst_cache_get_ip4(&tun_info->dst_cache,
746 &fl4.saddr);
747 } else {
748 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
749 &fl4.saddr) : NULL;
750 }
751
752 if (!rt) {
753 rt = ip_route_output_key(tunnel->net, &fl4);
754
755 if (IS_ERR(rt)) {
756 dev->stats.tx_carrier_errors++;
757 goto tx_error;
758 }
759 if (use_cache)
760 dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
761 fl4.saddr);
762 else if (!md && connected)
763 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
764 fl4.saddr);
765 }
766
767 if (rt->dst.dev == dev) {
768 ip_rt_put(rt);
769 dev->stats.collisions++;
770 goto tx_error;
771 }
772
773 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
774 0, 0, false)) {
775 ip_rt_put(rt);
776 goto tx_error;
777 }
778
779 if (tunnel->err_count > 0) {
780 if (time_before(jiffies,
781 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
782 tunnel->err_count--;
783
784 dst_link_failure(skb);
785 } else
786 tunnel->err_count = 0;
787 }
788
789 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
790 ttl = tnl_params->ttl;
791 if (ttl == 0) {
792 if (skb->protocol == htons(ETH_P_IP))
793 ttl = inner_iph->ttl;
794#if IS_ENABLED(CONFIG_IPV6)
795 else if (skb->protocol == htons(ETH_P_IPV6))
796 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
797#endif
798 else
799 ttl = ip4_dst_hoplimit(&rt->dst);
800 }
801
802 df = tnl_params->frag_off;
803 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
804 df |= (inner_iph->frag_off&htons(IP_DF));
805
806 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
807 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
808 if (max_headroom > dev->needed_headroom)
809 dev->needed_headroom = max_headroom;
810
811 if (skb_cow_head(skb, dev->needed_headroom)) {
812 ip_rt_put(rt);
813 dev->stats.tx_dropped++;
814 kfree_skb(skb);
815 return;
816 }
817
818 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
819 df, !net_eq(tunnel->net, dev_net(dev)));
820 return;
821
822#if IS_ENABLED(CONFIG_IPV6)
823tx_error_icmp:
824 dst_link_failure(skb);
825#endif
826tx_error:
827 dev->stats.tx_errors++;
828 kfree_skb(skb);
829}
830EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
831
832static void ip_tunnel_update(struct ip_tunnel_net *itn,
833 struct ip_tunnel *t,
834 struct net_device *dev,
835 struct ip_tunnel_parm *p,
836 bool set_mtu,
837 __u32 fwmark)
838{
839 ip_tunnel_del(itn, t);
840 t->parms.iph.saddr = p->iph.saddr;
841 t->parms.iph.daddr = p->iph.daddr;
842 t->parms.i_key = p->i_key;
843 t->parms.o_key = p->o_key;
844 if (dev->type != ARPHRD_ETHER) {
845 memcpy(dev->dev_addr, &p->iph.saddr, 4);
846 memcpy(dev->broadcast, &p->iph.daddr, 4);
847 }
848 ip_tunnel_add(itn, t);
849
850 t->parms.iph.ttl = p->iph.ttl;
851 t->parms.iph.tos = p->iph.tos;
852 t->parms.iph.frag_off = p->iph.frag_off;
853
854 if (t->parms.link != p->link || t->fwmark != fwmark) {
855 int mtu;
856
857 t->parms.link = p->link;
858 t->fwmark = fwmark;
859 mtu = ip_tunnel_bind_dev(dev);
860 if (set_mtu)
861 dev->mtu = mtu;
862 }
863 dst_cache_reset(&t->dst_cache);
864 netdev_state_change(dev);
865}
866
867int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
868{
869 int err = 0;
870 struct ip_tunnel *t = netdev_priv(dev);
871 struct net *net = t->net;
872 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
873
874 switch (cmd) {
875 case SIOCGETTUNNEL:
876 if (dev == itn->fb_tunnel_dev) {
877 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
878 if (!t)
879 t = netdev_priv(dev);
880 }
881 memcpy(p, &t->parms, sizeof(*p));
882 break;
883
884 case SIOCADDTUNNEL:
885 case SIOCCHGTUNNEL:
886 err = -EPERM;
887 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
888 goto done;
889 if (p->iph.ttl)
890 p->iph.frag_off |= htons(IP_DF);
891 if (!(p->i_flags & VTI_ISVTI)) {
892 if (!(p->i_flags & TUNNEL_KEY))
893 p->i_key = 0;
894 if (!(p->o_flags & TUNNEL_KEY))
895 p->o_key = 0;
896 }
897
898 t = ip_tunnel_find(itn, p, itn->type);
899
900 if (cmd == SIOCADDTUNNEL) {
901 if (!t) {
902 t = ip_tunnel_create(net, itn, p);
903 err = PTR_ERR_OR_ZERO(t);
904 break;
905 }
906
907 err = -EEXIST;
908 break;
909 }
910 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
911 if (t) {
912 if (t->dev != dev) {
913 err = -EEXIST;
914 break;
915 }
916 } else {
917 unsigned int nflags = 0;
918
919 if (ipv4_is_multicast(p->iph.daddr))
920 nflags = IFF_BROADCAST;
921 else if (p->iph.daddr)
922 nflags = IFF_POINTOPOINT;
923
924 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
925 err = -EINVAL;
926 break;
927 }
928
929 t = netdev_priv(dev);
930 }
931 }
932
933 if (t) {
934 err = 0;
935 ip_tunnel_update(itn, t, dev, p, true, 0);
936 } else {
937 err = -ENOENT;
938 }
939 break;
940
941 case SIOCDELTUNNEL:
942 err = -EPERM;
943 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
944 goto done;
945
946 if (dev == itn->fb_tunnel_dev) {
947 err = -ENOENT;
948 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
949 if (!t)
950 goto done;
951 err = -EPERM;
952 if (t == netdev_priv(itn->fb_tunnel_dev))
953 goto done;
954 dev = t->dev;
955 }
956 unregister_netdevice(dev);
957 err = 0;
958 break;
959
960 default:
961 err = -EINVAL;
962 }
963
964done:
965 return err;
966}
967EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
968
969int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
970{
971 struct ip_tunnel *tunnel = netdev_priv(dev);
972 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
973 int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
974
975 if (new_mtu < ETH_MIN_MTU)
976 return -EINVAL;
977
978 if (new_mtu > max_mtu) {
979 if (strict)
980 return -EINVAL;
981
982 new_mtu = max_mtu;
983 }
984
985 dev->mtu = new_mtu;
986 return 0;
987}
988EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
989
990int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
991{
992 return __ip_tunnel_change_mtu(dev, new_mtu, true);
993}
994EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
995
996static void ip_tunnel_dev_free(struct net_device *dev)
997{
998 struct ip_tunnel *tunnel = netdev_priv(dev);
999
1000 gro_cells_destroy(&tunnel->gro_cells);
1001 dst_cache_destroy(&tunnel->dst_cache);
1002 free_percpu(dev->tstats);
1003}
1004
1005void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
1006{
1007 struct ip_tunnel *tunnel = netdev_priv(dev);
1008 struct ip_tunnel_net *itn;
1009
1010 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
1011
1012 if (itn->fb_tunnel_dev != dev) {
1013 ip_tunnel_del(itn, netdev_priv(dev));
1014 unregister_netdevice_queue(dev, head);
1015 }
1016}
1017EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1018
1019struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1020{
1021 struct ip_tunnel *tunnel = netdev_priv(dev);
1022
1023 return tunnel->net;
1024}
1025EXPORT_SYMBOL(ip_tunnel_get_link_net);
1026
1027int ip_tunnel_get_iflink(const struct net_device *dev)
1028{
1029 struct ip_tunnel *tunnel = netdev_priv(dev);
1030
1031 return tunnel->parms.link;
1032}
1033EXPORT_SYMBOL(ip_tunnel_get_iflink);
1034
1035int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1036 struct rtnl_link_ops *ops, char *devname)
1037{
1038 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1039 struct ip_tunnel_parm parms;
1040 unsigned int i;
1041
1042 itn->rtnl_link_ops = ops;
1043 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1044 INIT_HLIST_HEAD(&itn->tunnels[i]);
1045
1046 if (!ops || !net_has_fallback_tunnels(net)) {
1047 struct ip_tunnel_net *it_init_net;
1048
1049 it_init_net = net_generic(&init_net, ip_tnl_net_id);
1050 itn->type = it_init_net->type;
1051 itn->fb_tunnel_dev = NULL;
1052 return 0;
1053 }
1054
1055 memset(&parms, 0, sizeof(parms));
1056 if (devname)
1057 strlcpy(parms.name, devname, IFNAMSIZ);
1058
1059 rtnl_lock();
1060 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1061
1062
1063
1064 if (!IS_ERR(itn->fb_tunnel_dev)) {
1065 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1066 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1067 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1068 itn->type = itn->fb_tunnel_dev->type;
1069 }
1070 rtnl_unlock();
1071
1072 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1073}
1074EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1075
1076static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
1077 struct list_head *head,
1078 struct rtnl_link_ops *ops)
1079{
1080 struct net_device *dev, *aux;
1081 int h;
1082
1083 for_each_netdev_safe(net, dev, aux)
1084 if (dev->rtnl_link_ops == ops)
1085 unregister_netdevice_queue(dev, head);
1086
1087 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1088 struct ip_tunnel *t;
1089 struct hlist_node *n;
1090 struct hlist_head *thead = &itn->tunnels[h];
1091
1092 hlist_for_each_entry_safe(t, n, thead, hash_node)
1093
1094
1095
1096 if (!net_eq(dev_net(t->dev), net))
1097 unregister_netdevice_queue(t->dev, head);
1098 }
1099}
1100
1101void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1102 struct rtnl_link_ops *ops)
1103{
1104 struct ip_tunnel_net *itn;
1105 struct net *net;
1106 LIST_HEAD(list);
1107
1108 rtnl_lock();
1109 list_for_each_entry(net, net_list, exit_list) {
1110 itn = net_generic(net, id);
1111 ip_tunnel_destroy(net, itn, &list, ops);
1112 }
1113 unregister_netdevice_many(&list);
1114 rtnl_unlock();
1115}
1116EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1117
1118int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1119 struct ip_tunnel_parm *p, __u32 fwmark)
1120{
1121 struct ip_tunnel *nt;
1122 struct net *net = dev_net(dev);
1123 struct ip_tunnel_net *itn;
1124 int mtu;
1125 int err;
1126
1127 nt = netdev_priv(dev);
1128 itn = net_generic(net, nt->ip_tnl_net_id);
1129
1130 if (nt->collect_md) {
1131 if (rtnl_dereference(itn->collect_md_tun))
1132 return -EEXIST;
1133 } else {
1134 if (ip_tunnel_find(itn, p, dev->type))
1135 return -EEXIST;
1136 }
1137
1138 nt->net = net;
1139 nt->parms = *p;
1140 nt->fwmark = fwmark;
1141 err = register_netdevice(dev);
1142 if (err)
1143 goto err_register_netdevice;
1144
1145 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1146 eth_hw_addr_random(dev);
1147
1148 mtu = ip_tunnel_bind_dev(dev);
1149 if (tb[IFLA_MTU]) {
1150 unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1151
1152 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1153 (unsigned int)(max - sizeof(struct iphdr)));
1154 }
1155
1156 err = dev_set_mtu(dev, mtu);
1157 if (err)
1158 goto err_dev_set_mtu;
1159
1160 ip_tunnel_add(itn, nt);
1161 return 0;
1162
1163err_dev_set_mtu:
1164 unregister_netdevice(dev);
1165err_register_netdevice:
1166 return err;
1167}
1168EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1169
1170int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1171 struct ip_tunnel_parm *p, __u32 fwmark)
1172{
1173 struct ip_tunnel *t;
1174 struct ip_tunnel *tunnel = netdev_priv(dev);
1175 struct net *net = tunnel->net;
1176 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1177
1178 if (dev == itn->fb_tunnel_dev)
1179 return -EINVAL;
1180
1181 t = ip_tunnel_find(itn, p, dev->type);
1182
1183 if (t) {
1184 if (t->dev != dev)
1185 return -EEXIST;
1186 } else {
1187 t = tunnel;
1188
1189 if (dev->type != ARPHRD_ETHER) {
1190 unsigned int nflags = 0;
1191
1192 if (ipv4_is_multicast(p->iph.daddr))
1193 nflags = IFF_BROADCAST;
1194 else if (p->iph.daddr)
1195 nflags = IFF_POINTOPOINT;
1196
1197 if ((dev->flags ^ nflags) &
1198 (IFF_POINTOPOINT | IFF_BROADCAST))
1199 return -EINVAL;
1200 }
1201 }
1202
1203 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1204 return 0;
1205}
1206EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1207
1208int ip_tunnel_init(struct net_device *dev)
1209{
1210 struct ip_tunnel *tunnel = netdev_priv(dev);
1211 struct iphdr *iph = &tunnel->parms.iph;
1212 int err;
1213
1214 dev->needs_free_netdev = true;
1215 dev->priv_destructor = ip_tunnel_dev_free;
1216 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1217 if (!dev->tstats)
1218 return -ENOMEM;
1219
1220 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1221 if (err) {
1222 free_percpu(dev->tstats);
1223 return err;
1224 }
1225
1226 err = gro_cells_init(&tunnel->gro_cells, dev);
1227 if (err) {
1228 dst_cache_destroy(&tunnel->dst_cache);
1229 free_percpu(dev->tstats);
1230 return err;
1231 }
1232
1233 tunnel->dev = dev;
1234 tunnel->net = dev_net(dev);
1235 strcpy(tunnel->parms.name, dev->name);
1236 iph->version = 4;
1237 iph->ihl = 5;
1238
1239 if (tunnel->collect_md) {
1240 dev->features |= NETIF_F_NETNS_LOCAL;
1241 netif_keep_dst(dev);
1242 }
1243 return 0;
1244}
1245EXPORT_SYMBOL_GPL(ip_tunnel_init);
1246
1247void ip_tunnel_uninit(struct net_device *dev)
1248{
1249 struct ip_tunnel *tunnel = netdev_priv(dev);
1250 struct net *net = tunnel->net;
1251 struct ip_tunnel_net *itn;
1252
1253 itn = net_generic(net, tunnel->ip_tnl_net_id);
1254
1255 if (itn->fb_tunnel_dev != dev)
1256 ip_tunnel_del(itn, netdev_priv(dev));
1257
1258 dst_cache_reset(&tunnel->dst_cache);
1259}
1260EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1261
1262
1263void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1264{
1265 struct ip_tunnel *tunnel = netdev_priv(dev);
1266 tunnel->ip_tnl_net_id = net_id;
1267}
1268EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1269
1270MODULE_LICENSE("GPL");
1271