1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43#include <linux/err.h>
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58#include <net/udp.h>
59
60#if IS_ENABLED(CONFIG_IPV6)
61#include <net/ipv6.h>
62#include <net/ip6_fib.h>
63#include <net/ip6_route.h>
64#endif
65
66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67{
68 return hash_32((__force u32)key ^ (__force u32)remote,
69 IP_TNL_HASH_BITS);
70}
71
72static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73 struct dst_entry *dst, __be32 saddr)
74{
75 struct dst_entry *old_dst;
76
77 dst_clone(dst);
78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
79 dst_release(old_dst);
80 idst->saddr = saddr;
81}
82
83static noinline void tunnel_dst_set(struct ip_tunnel *t,
84 struct dst_entry *dst, __be32 saddr)
85{
86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
87}
88
89static void tunnel_dst_reset(struct ip_tunnel *t)
90{
91 tunnel_dst_set(t, NULL, 0);
92}
93
94void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
95{
96 int i;
97
98 for_each_possible_cpu(i)
99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100}
101EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102
103static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
105{
106 struct ip_tunnel_dst *idst;
107 struct dst_entry *dst;
108
109 rcu_read_lock();
110 idst = raw_cpu_ptr(t->dst_cache);
111 dst = rcu_dereference(idst->dst);
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113 dst = NULL;
114 if (dst) {
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
117 } else {
118 tunnel_dst_reset(t);
119 dst_release(dst);
120 dst = NULL;
121 }
122 }
123 rcu_read_unlock();
124 return (struct rtable *)dst;
125}
126
127static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
129{
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
133 else
134
135 return false;
136 } else
137 return !(flags & TUNNEL_KEY);
138}
139
140
141
142
143
144
145
146
147
148
149
150
151struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
154 __be32 key)
155{
156 unsigned int hash;
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
159
160 hash = ip_tunnel_hash(key, remote);
161 head = &itn->tunnels[hash];
162
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
167 continue;
168
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
170 continue;
171
172 if (t->parms.link == link)
173 return t;
174 else
175 cand = t;
176 }
177
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
180 t->parms.iph.saddr != 0 ||
181 !(t->dev->flags & IFF_UP))
182 continue;
183
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
185 continue;
186
187 if (t->parms.link == link)
188 return t;
189 else if (!cand)
190 cand = t;
191 }
192
193 hash = ip_tunnel_hash(key, 0);
194 head = &itn->tunnels[hash];
195
196 hlist_for_each_entry_rcu(t, head, hash_node) {
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199 continue;
200
201 if (!(t->dev->flags & IFF_UP))
202 continue;
203
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
205 continue;
206
207 if (t->parms.link == link)
208 return t;
209 else if (!cand)
210 cand = t;
211 }
212
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
215
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
220 !(t->dev->flags & IFF_UP))
221 continue;
222
223 if (t->parms.link == link)
224 return t;
225 else if (!cand)
226 cand = t;
227 }
228
229skip_key_lookup:
230 if (cand)
231 return cand;
232
233 t = rcu_dereference(itn->collect_md_tun);
234 if (t)
235 return t;
236
237 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
238 return netdev_priv(itn->fb_tunnel_dev);
239
240 return NULL;
241}
242EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
243
244static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
245 struct ip_tunnel_parm *parms)
246{
247 unsigned int h;
248 __be32 remote;
249 __be32 i_key = parms->i_key;
250
251 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
252 remote = parms->iph.daddr;
253 else
254 remote = 0;
255
256 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
257 i_key = 0;
258
259 h = ip_tunnel_hash(i_key, remote);
260 return &itn->tunnels[h];
261}
262
263static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
264{
265 struct hlist_head *head = ip_bucket(itn, &t->parms);
266
267 if (t->collect_md)
268 rcu_assign_pointer(itn->collect_md_tun, t);
269 hlist_add_head_rcu(&t->hash_node, head);
270}
271
272static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
273{
274 if (t->collect_md)
275 rcu_assign_pointer(itn->collect_md_tun, NULL);
276 hlist_del_init_rcu(&t->hash_node);
277}
278
279static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
280 struct ip_tunnel_parm *parms,
281 int type)
282{
283 __be32 remote = parms->iph.daddr;
284 __be32 local = parms->iph.saddr;
285 __be32 key = parms->i_key;
286 __be16 flags = parms->i_flags;
287 int link = parms->link;
288 struct ip_tunnel *t = NULL;
289 struct hlist_head *head = ip_bucket(itn, parms);
290
291 hlist_for_each_entry_rcu(t, head, hash_node) {
292 if (local == t->parms.iph.saddr &&
293 remote == t->parms.iph.daddr &&
294 link == t->parms.link &&
295 type == t->dev->type &&
296 ip_tunnel_key_match(&t->parms, flags, key))
297 break;
298 }
299 return t;
300}
301
302static struct net_device *__ip_tunnel_create(struct net *net,
303 const struct rtnl_link_ops *ops,
304 struct ip_tunnel_parm *parms)
305{
306 int err;
307 struct ip_tunnel *tunnel;
308 struct net_device *dev;
309 char name[IFNAMSIZ];
310
311 if (parms->name[0])
312 strlcpy(name, parms->name, IFNAMSIZ);
313 else {
314 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
315 err = -E2BIG;
316 goto failed;
317 }
318 strlcpy(name, ops->kind, IFNAMSIZ);
319 strncat(name, "%d", 2);
320 }
321
322 ASSERT_RTNL();
323 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
324 if (!dev) {
325 err = -ENOMEM;
326 goto failed;
327 }
328 dev_net_set(dev, net);
329
330 dev->rtnl_link_ops = ops;
331
332 tunnel = netdev_priv(dev);
333 tunnel->parms = *parms;
334 tunnel->net = net;
335
336 err = register_netdevice(dev);
337 if (err)
338 goto failed_free;
339
340 return dev;
341
342failed_free:
343 free_netdev(dev);
344failed:
345 return ERR_PTR(err);
346}
347
348static inline void init_tunnel_flow(struct flowi4 *fl4,
349 int proto,
350 __be32 daddr, __be32 saddr,
351 __be32 key, __u8 tos, int oif)
352{
353 memset(fl4, 0, sizeof(*fl4));
354 fl4->flowi4_oif = oif;
355 fl4->daddr = daddr;
356 fl4->saddr = saddr;
357 fl4->flowi4_tos = tos;
358 fl4->flowi4_proto = proto;
359 fl4->fl4_gre_key = key;
360}
361
362static int ip_tunnel_bind_dev(struct net_device *dev)
363{
364 struct net_device *tdev = NULL;
365 struct ip_tunnel *tunnel = netdev_priv(dev);
366 const struct iphdr *iph;
367 int hlen = LL_MAX_HEADER;
368 int mtu = ETH_DATA_LEN;
369 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
370
371 iph = &tunnel->parms.iph;
372
373
374 if (iph->daddr) {
375 struct flowi4 fl4;
376 struct rtable *rt;
377
378 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
379 iph->saddr, tunnel->parms.o_key,
380 RT_TOS(iph->tos), tunnel->parms.link);
381 rt = ip_route_output_key(tunnel->net, &fl4);
382
383 if (!IS_ERR(rt)) {
384 tdev = rt->dst.dev;
385 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
386 ip_rt_put(rt);
387 }
388 if (dev->type != ARPHRD_ETHER)
389 dev->flags |= IFF_POINTOPOINT;
390 }
391
392 if (!tdev && tunnel->parms.link)
393 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
394
395 if (tdev) {
396 hlen = tdev->hard_header_len + tdev->needed_headroom;
397 mtu = tdev->mtu;
398 }
399
400 dev->needed_headroom = t_hlen + hlen;
401 mtu -= (dev->hard_header_len + t_hlen);
402
403 if (mtu < 68)
404 mtu = 68;
405
406 return mtu;
407}
408
409static struct ip_tunnel *ip_tunnel_create(struct net *net,
410 struct ip_tunnel_net *itn,
411 struct ip_tunnel_parm *parms)
412{
413 struct ip_tunnel *nt;
414 struct net_device *dev;
415
416 BUG_ON(!itn->fb_tunnel_dev);
417 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
418 if (IS_ERR(dev))
419 return ERR_CAST(dev);
420
421 dev->mtu = ip_tunnel_bind_dev(dev);
422
423 nt = netdev_priv(dev);
424 ip_tunnel_add(itn, nt);
425 return nt;
426}
427
428int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
429 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
430 bool log_ecn_error)
431{
432 struct pcpu_sw_netstats *tstats;
433 const struct iphdr *iph = ip_hdr(skb);
434 int err;
435
436#ifdef CONFIG_NET_IPGRE_BROADCAST
437 if (ipv4_is_multicast(iph->daddr)) {
438 tunnel->dev->stats.multicast++;
439 skb->pkt_type = PACKET_BROADCAST;
440 }
441#endif
442
443 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
444 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
445 tunnel->dev->stats.rx_crc_errors++;
446 tunnel->dev->stats.rx_errors++;
447 goto drop;
448 }
449
450 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
451 if (!(tpi->flags&TUNNEL_SEQ) ||
452 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
453 tunnel->dev->stats.rx_fifo_errors++;
454 tunnel->dev->stats.rx_errors++;
455 goto drop;
456 }
457 tunnel->i_seqno = ntohl(tpi->seq) + 1;
458 }
459
460 skb_reset_network_header(skb);
461
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
471 }
472 }
473
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
479
480 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
481
482 if (tunnel->dev->type == ARPHRD_ETHER) {
483 skb->protocol = eth_type_trans(skb, tunnel->dev);
484 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
485 } else {
486 skb->dev = tunnel->dev;
487 }
488
489 if (tun_dst)
490 skb_dst_set(skb, (struct dst_entry *)tun_dst);
491
492 gro_cells_receive(&tunnel->gro_cells, skb);
493 return 0;
494
495drop:
496 kfree_skb(skb);
497 return 0;
498}
499EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
500
501static int ip_encap_hlen(struct ip_tunnel_encap *e)
502{
503 const struct ip_tunnel_encap_ops *ops;
504 int hlen = -EINVAL;
505
506 if (e->type == TUNNEL_ENCAP_NONE)
507 return 0;
508
509 if (e->type >= MAX_IPTUN_ENCAP_OPS)
510 return -EINVAL;
511
512 rcu_read_lock();
513 ops = rcu_dereference(iptun_encaps[e->type]);
514 if (likely(ops && ops->encap_hlen))
515 hlen = ops->encap_hlen(e);
516 rcu_read_unlock();
517
518 return hlen;
519}
520
521const struct ip_tunnel_encap_ops __rcu *
522 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
523
524int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
525 unsigned int num)
526{
527 if (num >= MAX_IPTUN_ENCAP_OPS)
528 return -ERANGE;
529
530 return !cmpxchg((const struct ip_tunnel_encap_ops **)
531 &iptun_encaps[num],
532 NULL, ops) ? 0 : -1;
533}
534EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
535
536int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
537 unsigned int num)
538{
539 int ret;
540
541 if (num >= MAX_IPTUN_ENCAP_OPS)
542 return -ERANGE;
543
544 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
545 &iptun_encaps[num],
546 ops, NULL) == ops) ? 0 : -1;
547
548 synchronize_net();
549
550 return ret;
551}
552EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
553
554int ip_tunnel_encap_setup(struct ip_tunnel *t,
555 struct ip_tunnel_encap *ipencap)
556{
557 int hlen;
558
559 memset(&t->encap, 0, sizeof(t->encap));
560
561 hlen = ip_encap_hlen(ipencap);
562 if (hlen < 0)
563 return hlen;
564
565 t->encap.type = ipencap->type;
566 t->encap.sport = ipencap->sport;
567 t->encap.dport = ipencap->dport;
568 t->encap.flags = ipencap->flags;
569
570 t->encap_hlen = hlen;
571 t->hlen = t->encap_hlen + t->tun_hlen;
572
573 return 0;
574}
575EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
576
577int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
578 u8 *protocol, struct flowi4 *fl4)
579{
580 const struct ip_tunnel_encap_ops *ops;
581 int ret = -EINVAL;
582
583 if (t->encap.type == TUNNEL_ENCAP_NONE)
584 return 0;
585
586 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
587 return -EINVAL;
588
589 rcu_read_lock();
590 ops = rcu_dereference(iptun_encaps[t->encap.type]);
591 if (likely(ops && ops->build_header))
592 ret = ops->build_header(skb, &t->encap, protocol, fl4);
593 rcu_read_unlock();
594
595 return ret;
596}
597EXPORT_SYMBOL(ip_tunnel_encap);
598
599static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
600 struct rtable *rt, __be16 df,
601 const struct iphdr *inner_iph)
602{
603 struct ip_tunnel *tunnel = netdev_priv(dev);
604 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
605 int mtu;
606
607 if (df)
608 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
609 - sizeof(struct iphdr) - tunnel->hlen;
610 else
611 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
612
613 if (skb_dst(skb))
614 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
615
616 if (skb->protocol == htons(ETH_P_IP)) {
617 if (!skb_is_gso(skb) &&
618 (inner_iph->frag_off & htons(IP_DF)) &&
619 mtu < pkt_size) {
620 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
621 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
622 return -E2BIG;
623 }
624 }
625#if IS_ENABLED(CONFIG_IPV6)
626 else if (skb->protocol == htons(ETH_P_IPV6)) {
627 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
628
629 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
630 mtu >= IPV6_MIN_MTU) {
631 if ((tunnel->parms.iph.daddr &&
632 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
633 rt6->rt6i_dst.plen == 128) {
634 rt6->rt6i_flags |= RTF_MODIFIED;
635 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
636 }
637 }
638
639 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
640 mtu < pkt_size) {
641 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
642 return -E2BIG;
643 }
644 }
645#endif
646 return 0;
647}
648
649void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
650 const struct iphdr *tnl_params, u8 protocol)
651{
652 struct ip_tunnel *tunnel = netdev_priv(dev);
653 const struct iphdr *inner_iph;
654 struct flowi4 fl4;
655 u8 tos, ttl;
656 __be16 df;
657 struct rtable *rt;
658 unsigned int max_headroom;
659 __be32 dst;
660 int err;
661 bool connected;
662
663 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
664 connected = (tunnel->parms.iph.daddr != 0);
665
666 dst = tnl_params->daddr;
667 if (dst == 0) {
668
669
670 if (!skb_dst(skb)) {
671 dev->stats.tx_fifo_errors++;
672 goto tx_error;
673 }
674
675 if (skb->protocol == htons(ETH_P_IP)) {
676 rt = skb_rtable(skb);
677 dst = rt_nexthop(rt, inner_iph->daddr);
678 }
679#if IS_ENABLED(CONFIG_IPV6)
680 else if (skb->protocol == htons(ETH_P_IPV6)) {
681 const struct in6_addr *addr6;
682 struct neighbour *neigh;
683 bool do_tx_error_icmp;
684 int addr_type;
685
686 neigh = dst_neigh_lookup(skb_dst(skb),
687 &ipv6_hdr(skb)->daddr);
688 if (!neigh)
689 goto tx_error;
690
691 addr6 = (const struct in6_addr *)&neigh->primary_key;
692 addr_type = ipv6_addr_type(addr6);
693
694 if (addr_type == IPV6_ADDR_ANY) {
695 addr6 = &ipv6_hdr(skb)->daddr;
696 addr_type = ipv6_addr_type(addr6);
697 }
698
699 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
700 do_tx_error_icmp = true;
701 else {
702 do_tx_error_icmp = false;
703 dst = addr6->s6_addr32[3];
704 }
705 neigh_release(neigh);
706 if (do_tx_error_icmp)
707 goto tx_error_icmp;
708 }
709#endif
710 else
711 goto tx_error;
712
713 connected = false;
714 }
715
716 tos = tnl_params->tos;
717 if (tos & 0x1) {
718 tos &= ~0x1;
719 if (skb->protocol == htons(ETH_P_IP)) {
720 tos = inner_iph->tos;
721 connected = false;
722 } else if (skb->protocol == htons(ETH_P_IPV6)) {
723 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
724 connected = false;
725 }
726 }
727
728 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
729 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
730
731 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
732 goto tx_error;
733
734 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
735
736 if (!rt) {
737 rt = ip_route_output_key(tunnel->net, &fl4);
738
739 if (IS_ERR(rt)) {
740 dev->stats.tx_carrier_errors++;
741 goto tx_error;
742 }
743 if (connected)
744 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
745 }
746
747 if (rt->dst.dev == dev) {
748 ip_rt_put(rt);
749 dev->stats.collisions++;
750 goto tx_error;
751 }
752
753 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
754 ip_rt_put(rt);
755 goto tx_error;
756 }
757
758 if (tunnel->err_count > 0) {
759 if (time_before(jiffies,
760 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
761 tunnel->err_count--;
762
763 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
764 dst_link_failure(skb);
765 } else
766 tunnel->err_count = 0;
767 }
768
769 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
770 ttl = tnl_params->ttl;
771 if (ttl == 0) {
772 if (skb->protocol == htons(ETH_P_IP))
773 ttl = inner_iph->ttl;
774#if IS_ENABLED(CONFIG_IPV6)
775 else if (skb->protocol == htons(ETH_P_IPV6))
776 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
777#endif
778 else
779 ttl = ip4_dst_hoplimit(&rt->dst);
780 }
781
782 df = tnl_params->frag_off;
783 if (skb->protocol == htons(ETH_P_IP))
784 df |= (inner_iph->frag_off&htons(IP_DF));
785
786 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
787 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
788 if (max_headroom > dev->needed_headroom)
789 dev->needed_headroom = max_headroom;
790
791 if (skb_cow_head(skb, dev->needed_headroom)) {
792 ip_rt_put(rt);
793 dev->stats.tx_dropped++;
794 kfree_skb(skb);
795 return;
796 }
797
798 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
799 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
800 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
801
802 return;
803
804#if IS_ENABLED(CONFIG_IPV6)
805tx_error_icmp:
806 dst_link_failure(skb);
807#endif
808tx_error:
809 dev->stats.tx_errors++;
810 kfree_skb(skb);
811}
812EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
813
814static void ip_tunnel_update(struct ip_tunnel_net *itn,
815 struct ip_tunnel *t,
816 struct net_device *dev,
817 struct ip_tunnel_parm *p,
818 bool set_mtu)
819{
820 ip_tunnel_del(itn, t);
821 t->parms.iph.saddr = p->iph.saddr;
822 t->parms.iph.daddr = p->iph.daddr;
823 t->parms.i_key = p->i_key;
824 t->parms.o_key = p->o_key;
825 if (dev->type != ARPHRD_ETHER) {
826 memcpy(dev->dev_addr, &p->iph.saddr, 4);
827 memcpy(dev->broadcast, &p->iph.daddr, 4);
828 }
829 ip_tunnel_add(itn, t);
830
831 t->parms.iph.ttl = p->iph.ttl;
832 t->parms.iph.tos = p->iph.tos;
833 t->parms.iph.frag_off = p->iph.frag_off;
834
835 if (t->parms.link != p->link) {
836 int mtu;
837
838 t->parms.link = p->link;
839 mtu = ip_tunnel_bind_dev(dev);
840 if (set_mtu)
841 dev->mtu = mtu;
842 }
843 ip_tunnel_dst_reset_all(t);
844 netdev_state_change(dev);
845}
846
847int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
848{
849 int err = 0;
850 struct ip_tunnel *t = netdev_priv(dev);
851 struct net *net = t->net;
852 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
853
854 BUG_ON(!itn->fb_tunnel_dev);
855 switch (cmd) {
856 case SIOCGETTUNNEL:
857 if (dev == itn->fb_tunnel_dev) {
858 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
859 if (!t)
860 t = netdev_priv(dev);
861 }
862 memcpy(p, &t->parms, sizeof(*p));
863 break;
864
865 case SIOCADDTUNNEL:
866 case SIOCCHGTUNNEL:
867 err = -EPERM;
868 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
869 goto done;
870 if (p->iph.ttl)
871 p->iph.frag_off |= htons(IP_DF);
872 if (!(p->i_flags & VTI_ISVTI)) {
873 if (!(p->i_flags & TUNNEL_KEY))
874 p->i_key = 0;
875 if (!(p->o_flags & TUNNEL_KEY))
876 p->o_key = 0;
877 }
878
879 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
880
881 if (cmd == SIOCADDTUNNEL) {
882 if (!t) {
883 t = ip_tunnel_create(net, itn, p);
884 err = PTR_ERR_OR_ZERO(t);
885 break;
886 }
887
888 err = -EEXIST;
889 break;
890 }
891 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
892 if (t) {
893 if (t->dev != dev) {
894 err = -EEXIST;
895 break;
896 }
897 } else {
898 unsigned int nflags = 0;
899
900 if (ipv4_is_multicast(p->iph.daddr))
901 nflags = IFF_BROADCAST;
902 else if (p->iph.daddr)
903 nflags = IFF_POINTOPOINT;
904
905 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
906 err = -EINVAL;
907 break;
908 }
909
910 t = netdev_priv(dev);
911 }
912 }
913
914 if (t) {
915 err = 0;
916 ip_tunnel_update(itn, t, dev, p, true);
917 } else {
918 err = -ENOENT;
919 }
920 break;
921
922 case SIOCDELTUNNEL:
923 err = -EPERM;
924 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
925 goto done;
926
927 if (dev == itn->fb_tunnel_dev) {
928 err = -ENOENT;
929 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
930 if (!t)
931 goto done;
932 err = -EPERM;
933 if (t == netdev_priv(itn->fb_tunnel_dev))
934 goto done;
935 dev = t->dev;
936 }
937 unregister_netdevice(dev);
938 err = 0;
939 break;
940
941 default:
942 err = -EINVAL;
943 }
944
945done:
946 return err;
947}
948EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
949
950int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
951{
952 struct ip_tunnel *tunnel = netdev_priv(dev);
953 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
954
955 if (new_mtu < 68 ||
956 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
957 return -EINVAL;
958 dev->mtu = new_mtu;
959 return 0;
960}
961EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
962
963static void ip_tunnel_dev_free(struct net_device *dev)
964{
965 struct ip_tunnel *tunnel = netdev_priv(dev);
966
967 gro_cells_destroy(&tunnel->gro_cells);
968 free_percpu(tunnel->dst_cache);
969 free_percpu(dev->tstats);
970 free_netdev(dev);
971}
972
973void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
974{
975 struct ip_tunnel *tunnel = netdev_priv(dev);
976 struct ip_tunnel_net *itn;
977
978 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
979
980 if (itn->fb_tunnel_dev != dev) {
981 ip_tunnel_del(itn, netdev_priv(dev));
982 unregister_netdevice_queue(dev, head);
983 }
984}
985EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
986
987struct net *ip_tunnel_get_link_net(const struct net_device *dev)
988{
989 struct ip_tunnel *tunnel = netdev_priv(dev);
990
991 return tunnel->net;
992}
993EXPORT_SYMBOL(ip_tunnel_get_link_net);
994
995int ip_tunnel_get_iflink(const struct net_device *dev)
996{
997 struct ip_tunnel *tunnel = netdev_priv(dev);
998
999 return tunnel->parms.link;
1000}
1001EXPORT_SYMBOL(ip_tunnel_get_iflink);
1002
1003int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
1004 struct rtnl_link_ops *ops, char *devname)
1005{
1006 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1007 struct ip_tunnel_parm parms;
1008 unsigned int i;
1009
1010 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1011 INIT_HLIST_HEAD(&itn->tunnels[i]);
1012
1013 if (!ops) {
1014 itn->fb_tunnel_dev = NULL;
1015 return 0;
1016 }
1017
1018 memset(&parms, 0, sizeof(parms));
1019 if (devname)
1020 strlcpy(parms.name, devname, IFNAMSIZ);
1021
1022 rtnl_lock();
1023 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1024
1025
1026
1027 if (!IS_ERR(itn->fb_tunnel_dev)) {
1028 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1029 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1030 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1031 }
1032 rtnl_unlock();
1033
1034 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1035}
1036EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1037
1038static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1039 struct rtnl_link_ops *ops)
1040{
1041 struct net *net = dev_net(itn->fb_tunnel_dev);
1042 struct net_device *dev, *aux;
1043 int h;
1044
1045 for_each_netdev_safe(net, dev, aux)
1046 if (dev->rtnl_link_ops == ops)
1047 unregister_netdevice_queue(dev, head);
1048
1049 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1050 struct ip_tunnel *t;
1051 struct hlist_node *n;
1052 struct hlist_head *thead = &itn->tunnels[h];
1053
1054 hlist_for_each_entry_safe(t, n, thead, hash_node)
1055
1056
1057
1058 if (!net_eq(dev_net(t->dev), net))
1059 unregister_netdevice_queue(t->dev, head);
1060 }
1061}
1062
1063void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1064{
1065 LIST_HEAD(list);
1066
1067 rtnl_lock();
1068 ip_tunnel_destroy(itn, &list, ops);
1069 unregister_netdevice_many(&list);
1070 rtnl_unlock();
1071}
1072EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1073
1074int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1075 struct ip_tunnel_parm *p)
1076{
1077 struct ip_tunnel *nt;
1078 struct net *net = dev_net(dev);
1079 struct ip_tunnel_net *itn;
1080 int mtu;
1081 int err;
1082
1083 nt = netdev_priv(dev);
1084 itn = net_generic(net, nt->ip_tnl_net_id);
1085
1086 if (nt->collect_md) {
1087 if (rtnl_dereference(itn->collect_md_tun))
1088 return -EEXIST;
1089 } else {
1090 if (ip_tunnel_find(itn, p, dev->type))
1091 return -EEXIST;
1092 }
1093
1094 nt->net = net;
1095 nt->parms = *p;
1096 err = register_netdevice(dev);
1097 if (err)
1098 goto out;
1099
1100 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1101 eth_hw_addr_random(dev);
1102
1103 mtu = ip_tunnel_bind_dev(dev);
1104 if (!tb[IFLA_MTU])
1105 dev->mtu = mtu;
1106
1107 ip_tunnel_add(itn, nt);
1108out:
1109 return err;
1110}
1111EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1112
1113int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1114 struct ip_tunnel_parm *p)
1115{
1116 struct ip_tunnel *t;
1117 struct ip_tunnel *tunnel = netdev_priv(dev);
1118 struct net *net = tunnel->net;
1119 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1120
1121 if (dev == itn->fb_tunnel_dev)
1122 return -EINVAL;
1123
1124 t = ip_tunnel_find(itn, p, dev->type);
1125
1126 if (t) {
1127 if (t->dev != dev)
1128 return -EEXIST;
1129 } else {
1130 t = tunnel;
1131
1132 if (dev->type != ARPHRD_ETHER) {
1133 unsigned int nflags = 0;
1134
1135 if (ipv4_is_multicast(p->iph.daddr))
1136 nflags = IFF_BROADCAST;
1137 else if (p->iph.daddr)
1138 nflags = IFF_POINTOPOINT;
1139
1140 if ((dev->flags ^ nflags) &
1141 (IFF_POINTOPOINT | IFF_BROADCAST))
1142 return -EINVAL;
1143 }
1144 }
1145
1146 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1147 return 0;
1148}
1149EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1150
1151int ip_tunnel_init(struct net_device *dev)
1152{
1153 struct ip_tunnel *tunnel = netdev_priv(dev);
1154 struct iphdr *iph = &tunnel->parms.iph;
1155 int err;
1156
1157 dev->destructor = ip_tunnel_dev_free;
1158 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1159 if (!dev->tstats)
1160 return -ENOMEM;
1161
1162 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1163 if (!tunnel->dst_cache) {
1164 free_percpu(dev->tstats);
1165 return -ENOMEM;
1166 }
1167
1168 err = gro_cells_init(&tunnel->gro_cells, dev);
1169 if (err) {
1170 free_percpu(tunnel->dst_cache);
1171 free_percpu(dev->tstats);
1172 return err;
1173 }
1174
1175 tunnel->dev = dev;
1176 tunnel->net = dev_net(dev);
1177 strcpy(tunnel->parms.name, dev->name);
1178 iph->version = 4;
1179 iph->ihl = 5;
1180
1181 if (tunnel->collect_md) {
1182 dev->features |= NETIF_F_NETNS_LOCAL;
1183 netif_keep_dst(dev);
1184 }
1185 return 0;
1186}
1187EXPORT_SYMBOL_GPL(ip_tunnel_init);
1188
1189void ip_tunnel_uninit(struct net_device *dev)
1190{
1191 struct ip_tunnel *tunnel = netdev_priv(dev);
1192 struct net *net = tunnel->net;
1193 struct ip_tunnel_net *itn;
1194
1195 itn = net_generic(net, tunnel->ip_tnl_net_id);
1196
1197 if (itn->fb_tunnel_dev != dev)
1198 ip_tunnel_del(itn, netdev_priv(dev));
1199
1200 ip_tunnel_dst_reset_all(tunnel);
1201}
1202EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1203
1204
1205void ip_tunnel_setup(struct net_device *dev, int net_id)
1206{
1207 struct ip_tunnel *tunnel = netdev_priv(dev);
1208 tunnel->ip_tnl_net_id = net_id;
1209}
1210EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1211
1212MODULE_LICENSE("GPL");
1213