1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43#include <linux/err.h>
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58#include <net/udp.h>
59
60#if IS_ENABLED(CONFIG_IPV6)
61#include <net/ipv6.h>
62#include <net/ip6_fib.h>
63#include <net/ip6_route.h>
64#endif
65
66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67{
68 return hash_32((__force u32)key ^ (__force u32)remote,
69 IP_TNL_HASH_BITS);
70}
71
72static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73 struct dst_entry *dst, __be32 saddr)
74{
75 struct dst_entry *old_dst;
76
77 dst_clone(dst);
78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
79 dst_release(old_dst);
80 idst->saddr = saddr;
81}
82
83static noinline void tunnel_dst_set(struct ip_tunnel *t,
84 struct dst_entry *dst, __be32 saddr)
85{
86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
87}
88
89static void tunnel_dst_reset(struct ip_tunnel *t)
90{
91 tunnel_dst_set(t, NULL, 0);
92}
93
94void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
95{
96 int i;
97
98 for_each_possible_cpu(i)
99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100}
101EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102
103static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
105{
106 struct ip_tunnel_dst *idst;
107 struct dst_entry *dst;
108
109 rcu_read_lock();
110 idst = raw_cpu_ptr(t->dst_cache);
111 dst = rcu_dereference(idst->dst);
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113 dst = NULL;
114 if (dst) {
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
117 } else {
118 tunnel_dst_reset(t);
119 dst_release(dst);
120 dst = NULL;
121 }
122 }
123 rcu_read_unlock();
124 return (struct rtable *)dst;
125}
126
127static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
129{
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
133 else
134
135 return false;
136 } else
137 return !(flags & TUNNEL_KEY);
138}
139
140
141
142
143
144
145
146
147
148
149
150
151struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
154 __be32 key)
155{
156 unsigned int hash;
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
159
160 hash = ip_tunnel_hash(key, remote);
161 head = &itn->tunnels[hash];
162
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
167 continue;
168
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
170 continue;
171
172 if (t->parms.link == link)
173 return t;
174 else
175 cand = t;
176 }
177
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
180 t->parms.iph.saddr != 0 ||
181 !(t->dev->flags & IFF_UP))
182 continue;
183
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
185 continue;
186
187 if (t->parms.link == link)
188 return t;
189 else if (!cand)
190 cand = t;
191 }
192
193 hash = ip_tunnel_hash(key, 0);
194 head = &itn->tunnels[hash];
195
196 hlist_for_each_entry_rcu(t, head, hash_node) {
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199 continue;
200
201 if (!(t->dev->flags & IFF_UP))
202 continue;
203
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
205 continue;
206
207 if (t->parms.link == link)
208 return t;
209 else if (!cand)
210 cand = t;
211 }
212
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
215
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
220 !(t->dev->flags & IFF_UP))
221 continue;
222
223 if (t->parms.link == link)
224 return t;
225 else if (!cand)
226 cand = t;
227 }
228
229skip_key_lookup:
230 if (cand)
231 return cand;
232
233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234 return netdev_priv(itn->fb_tunnel_dev);
235
236
237 return NULL;
238}
239EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240
241static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242 struct ip_tunnel_parm *parms)
243{
244 unsigned int h;
245 __be32 remote;
246 __be32 i_key = parms->i_key;
247
248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249 remote = parms->iph.daddr;
250 else
251 remote = 0;
252
253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
254 i_key = 0;
255
256 h = ip_tunnel_hash(i_key, remote);
257 return &itn->tunnels[h];
258}
259
260static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261{
262 struct hlist_head *head = ip_bucket(itn, &t->parms);
263
264 hlist_add_head_rcu(&t->hash_node, head);
265}
266
267static void ip_tunnel_del(struct ip_tunnel *t)
268{
269 hlist_del_init_rcu(&t->hash_node);
270}
271
272static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273 struct ip_tunnel_parm *parms,
274 int type)
275{
276 __be32 remote = parms->iph.daddr;
277 __be32 local = parms->iph.saddr;
278 __be32 key = parms->i_key;
279 __be16 flags = parms->i_flags;
280 int link = parms->link;
281 struct ip_tunnel *t = NULL;
282 struct hlist_head *head = ip_bucket(itn, parms);
283
284 hlist_for_each_entry_rcu(t, head, hash_node) {
285 if (local == t->parms.iph.saddr &&
286 remote == t->parms.iph.daddr &&
287 link == t->parms.link &&
288 type == t->dev->type &&
289 ip_tunnel_key_match(&t->parms, flags, key))
290 break;
291 }
292 return t;
293}
294
295static struct net_device *__ip_tunnel_create(struct net *net,
296 const struct rtnl_link_ops *ops,
297 struct ip_tunnel_parm *parms)
298{
299 int err;
300 struct ip_tunnel *tunnel;
301 struct net_device *dev;
302 char name[IFNAMSIZ];
303
304 if (parms->name[0])
305 strlcpy(name, parms->name, IFNAMSIZ);
306 else {
307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
308 err = -E2BIG;
309 goto failed;
310 }
311 strlcpy(name, ops->kind, IFNAMSIZ);
312 strncat(name, "%d", 2);
313 }
314
315 ASSERT_RTNL();
316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
317 if (!dev) {
318 err = -ENOMEM;
319 goto failed;
320 }
321 dev_net_set(dev, net);
322
323 dev->rtnl_link_ops = ops;
324
325 tunnel = netdev_priv(dev);
326 tunnel->parms = *parms;
327 tunnel->net = net;
328
329 err = register_netdevice(dev);
330 if (err)
331 goto failed_free;
332
333 return dev;
334
335failed_free:
336 free_netdev(dev);
337failed:
338 return ERR_PTR(err);
339}
340
341static inline void init_tunnel_flow(struct flowi4 *fl4,
342 int proto,
343 __be32 daddr, __be32 saddr,
344 __be32 key, __u8 tos, int oif)
345{
346 memset(fl4, 0, sizeof(*fl4));
347 fl4->flowi4_oif = oif;
348 fl4->daddr = daddr;
349 fl4->saddr = saddr;
350 fl4->flowi4_tos = tos;
351 fl4->flowi4_proto = proto;
352 fl4->fl4_gre_key = key;
353}
354
355static int ip_tunnel_bind_dev(struct net_device *dev)
356{
357 struct net_device *tdev = NULL;
358 struct ip_tunnel *tunnel = netdev_priv(dev);
359 const struct iphdr *iph;
360 int hlen = LL_MAX_HEADER;
361 int mtu = ETH_DATA_LEN;
362 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363
364 iph = &tunnel->parms.iph;
365
366
367 if (iph->daddr) {
368 struct flowi4 fl4;
369 struct rtable *rt;
370
371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372 iph->saddr, tunnel->parms.o_key,
373 RT_TOS(iph->tos), tunnel->parms.link);
374 rt = ip_route_output_key(tunnel->net, &fl4);
375
376 if (!IS_ERR(rt)) {
377 tdev = rt->dst.dev;
378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
379 ip_rt_put(rt);
380 }
381 if (dev->type != ARPHRD_ETHER)
382 dev->flags |= IFF_POINTOPOINT;
383 }
384
385 if (!tdev && tunnel->parms.link)
386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
387
388 if (tdev) {
389 hlen = tdev->hard_header_len + tdev->needed_headroom;
390 mtu = tdev->mtu;
391 }
392
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
395
396 if (mtu < 68)
397 mtu = 68;
398
399 return mtu;
400}
401
402static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
405{
406 struct ip_tunnel *nt;
407 struct net_device *dev;
408
409 BUG_ON(!itn->fb_tunnel_dev);
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411 if (IS_ERR(dev))
412 return ERR_CAST(dev);
413
414 dev->mtu = ip_tunnel_bind_dev(dev);
415
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
418 return nt;
419}
420
421int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
423{
424 struct pcpu_sw_netstats *tstats;
425 const struct iphdr *iph = ip_hdr(skb);
426 int err;
427
428#ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
432 }
433#endif
434
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
439 goto drop;
440 }
441
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
447 goto drop;
448 }
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450 }
451
452 skb_reset_network_header(skb);
453
454 err = IP_ECN_decapsulate(iph, skb);
455 if (unlikely(err)) {
456 if (log_ecn_error)
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
459 if (err > 1) {
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
462 goto drop;
463 }
464 }
465
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
471
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477 } else {
478 skb->dev = tunnel->dev;
479 }
480
481 gro_cells_receive(&tunnel->gro_cells, skb);
482 return 0;
483
484drop:
485 kfree_skb(skb);
486 return 0;
487}
488EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489
490static int ip_encap_hlen(struct ip_tunnel_encap *e)
491{
492 const struct ip_tunnel_encap_ops *ops;
493 int hlen = -EINVAL;
494
495 if (e->type == TUNNEL_ENCAP_NONE)
496 return 0;
497
498 if (e->type >= MAX_IPTUN_ENCAP_OPS)
499 return -EINVAL;
500
501 rcu_read_lock();
502 ops = rcu_dereference(iptun_encaps[e->type]);
503 if (likely(ops && ops->encap_hlen))
504 hlen = ops->encap_hlen(e);
505 rcu_read_unlock();
506
507 return hlen;
508}
509
510const struct ip_tunnel_encap_ops __rcu *
511 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
512
513int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
514 unsigned int num)
515{
516 if (num >= MAX_IPTUN_ENCAP_OPS)
517 return -ERANGE;
518
519 return !cmpxchg((const struct ip_tunnel_encap_ops **)
520 &iptun_encaps[num],
521 NULL, ops) ? 0 : -1;
522}
523EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
524
525int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
526 unsigned int num)
527{
528 int ret;
529
530 if (num >= MAX_IPTUN_ENCAP_OPS)
531 return -ERANGE;
532
533 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
534 &iptun_encaps[num],
535 ops, NULL) == ops) ? 0 : -1;
536
537 synchronize_net();
538
539 return ret;
540}
541EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
542
543int ip_tunnel_encap_setup(struct ip_tunnel *t,
544 struct ip_tunnel_encap *ipencap)
545{
546 int hlen;
547
548 memset(&t->encap, 0, sizeof(t->encap));
549
550 hlen = ip_encap_hlen(ipencap);
551 if (hlen < 0)
552 return hlen;
553
554 t->encap.type = ipencap->type;
555 t->encap.sport = ipencap->sport;
556 t->encap.dport = ipencap->dport;
557 t->encap.flags = ipencap->flags;
558
559 t->encap_hlen = hlen;
560 t->hlen = t->encap_hlen + t->tun_hlen;
561
562 return 0;
563}
564EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
565
566int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
567 u8 *protocol, struct flowi4 *fl4)
568{
569 const struct ip_tunnel_encap_ops *ops;
570 int ret = -EINVAL;
571
572 if (t->encap.type == TUNNEL_ENCAP_NONE)
573 return 0;
574
575 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
576 return -EINVAL;
577
578 rcu_read_lock();
579 ops = rcu_dereference(iptun_encaps[t->encap.type]);
580 if (likely(ops && ops->build_header))
581 ret = ops->build_header(skb, &t->encap, protocol, fl4);
582 rcu_read_unlock();
583
584 return ret;
585}
586EXPORT_SYMBOL(ip_tunnel_encap);
587
588static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
589 struct rtable *rt, __be16 df)
590{
591 struct ip_tunnel *tunnel = netdev_priv(dev);
592 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
593 int mtu;
594
595 if (df)
596 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
597 - sizeof(struct iphdr) - tunnel->hlen;
598 else
599 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
600
601 if (skb_dst(skb))
602 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
603
604 if (skb->protocol == htons(ETH_P_IP)) {
605 if (!skb_is_gso(skb) &&
606 (df & htons(IP_DF)) && mtu < pkt_size) {
607 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
608 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
609 return -E2BIG;
610 }
611 }
612#if IS_ENABLED(CONFIG_IPV6)
613 else if (skb->protocol == htons(ETH_P_IPV6)) {
614 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
615
616 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
617 mtu >= IPV6_MIN_MTU) {
618 if ((tunnel->parms.iph.daddr &&
619 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
620 rt6->rt6i_dst.plen == 128) {
621 rt6->rt6i_flags |= RTF_MODIFIED;
622 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
623 }
624 }
625
626 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
627 mtu < pkt_size) {
628 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
629 return -E2BIG;
630 }
631 }
632#endif
633 return 0;
634}
635
636void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
637 const struct iphdr *tnl_params, u8 protocol)
638{
639 struct ip_tunnel *tunnel = netdev_priv(dev);
640 const struct iphdr *inner_iph;
641 struct flowi4 fl4;
642 u8 tos, ttl;
643 __be16 df;
644 struct rtable *rt;
645 unsigned int max_headroom;
646 __be32 dst;
647 int err;
648 bool connected;
649
650 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
651 connected = (tunnel->parms.iph.daddr != 0);
652
653 dst = tnl_params->daddr;
654 if (dst == 0) {
655
656
657 if (!skb_dst(skb)) {
658 dev->stats.tx_fifo_errors++;
659 goto tx_error;
660 }
661
662 if (skb->protocol == htons(ETH_P_IP)) {
663 rt = skb_rtable(skb);
664 dst = rt_nexthop(rt, inner_iph->daddr);
665 }
666#if IS_ENABLED(CONFIG_IPV6)
667 else if (skb->protocol == htons(ETH_P_IPV6)) {
668 const struct in6_addr *addr6;
669 struct neighbour *neigh;
670 bool do_tx_error_icmp;
671 int addr_type;
672
673 neigh = dst_neigh_lookup(skb_dst(skb),
674 &ipv6_hdr(skb)->daddr);
675 if (!neigh)
676 goto tx_error;
677
678 addr6 = (const struct in6_addr *)&neigh->primary_key;
679 addr_type = ipv6_addr_type(addr6);
680
681 if (addr_type == IPV6_ADDR_ANY) {
682 addr6 = &ipv6_hdr(skb)->daddr;
683 addr_type = ipv6_addr_type(addr6);
684 }
685
686 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
687 do_tx_error_icmp = true;
688 else {
689 do_tx_error_icmp = false;
690 dst = addr6->s6_addr32[3];
691 }
692 neigh_release(neigh);
693 if (do_tx_error_icmp)
694 goto tx_error_icmp;
695 }
696#endif
697 else
698 goto tx_error;
699
700 connected = false;
701 }
702
703 tos = tnl_params->tos;
704 if (tos & 0x1) {
705 tos &= ~0x1;
706 if (skb->protocol == htons(ETH_P_IP)) {
707 tos = inner_iph->tos;
708 connected = false;
709 } else if (skb->protocol == htons(ETH_P_IPV6)) {
710 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
711 connected = false;
712 }
713 }
714
715 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
716 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
717
718 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
719 goto tx_error;
720
721 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
722
723 if (!rt) {
724 rt = ip_route_output_key(tunnel->net, &fl4);
725
726 if (IS_ERR(rt)) {
727 dev->stats.tx_carrier_errors++;
728 goto tx_error;
729 }
730 if (connected)
731 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
732 }
733
734 if (rt->dst.dev == dev) {
735 ip_rt_put(rt);
736 dev->stats.collisions++;
737 goto tx_error;
738 }
739
740 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
741 ip_rt_put(rt);
742 goto tx_error;
743 }
744
745 if (tunnel->err_count > 0) {
746 if (time_before(jiffies,
747 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
748 tunnel->err_count--;
749
750 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
751 dst_link_failure(skb);
752 } else
753 tunnel->err_count = 0;
754 }
755
756 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
757 ttl = tnl_params->ttl;
758 if (ttl == 0) {
759 if (skb->protocol == htons(ETH_P_IP))
760 ttl = inner_iph->ttl;
761#if IS_ENABLED(CONFIG_IPV6)
762 else if (skb->protocol == htons(ETH_P_IPV6))
763 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
764#endif
765 else
766 ttl = ip4_dst_hoplimit(&rt->dst);
767 }
768
769 df = tnl_params->frag_off;
770 if (skb->protocol == htons(ETH_P_IP))
771 df |= (inner_iph->frag_off&htons(IP_DF));
772
773 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
774 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
775 if (max_headroom > dev->needed_headroom)
776 dev->needed_headroom = max_headroom;
777
778 if (skb_cow_head(skb, dev->needed_headroom)) {
779 ip_rt_put(rt);
780 dev->stats.tx_dropped++;
781 kfree_skb(skb);
782 return;
783 }
784
785 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
786 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
787 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
788
789 return;
790
791#if IS_ENABLED(CONFIG_IPV6)
792tx_error_icmp:
793 dst_link_failure(skb);
794#endif
795tx_error:
796 dev->stats.tx_errors++;
797 kfree_skb(skb);
798}
799EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
800
801static void ip_tunnel_update(struct ip_tunnel_net *itn,
802 struct ip_tunnel *t,
803 struct net_device *dev,
804 struct ip_tunnel_parm *p,
805 bool set_mtu)
806{
807 ip_tunnel_del(t);
808 t->parms.iph.saddr = p->iph.saddr;
809 t->parms.iph.daddr = p->iph.daddr;
810 t->parms.i_key = p->i_key;
811 t->parms.o_key = p->o_key;
812 if (dev->type != ARPHRD_ETHER) {
813 memcpy(dev->dev_addr, &p->iph.saddr, 4);
814 memcpy(dev->broadcast, &p->iph.daddr, 4);
815 }
816 ip_tunnel_add(itn, t);
817
818 t->parms.iph.ttl = p->iph.ttl;
819 t->parms.iph.tos = p->iph.tos;
820 t->parms.iph.frag_off = p->iph.frag_off;
821
822 if (t->parms.link != p->link) {
823 int mtu;
824
825 t->parms.link = p->link;
826 mtu = ip_tunnel_bind_dev(dev);
827 if (set_mtu)
828 dev->mtu = mtu;
829 }
830 ip_tunnel_dst_reset_all(t);
831 netdev_state_change(dev);
832}
833
834int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
835{
836 int err = 0;
837 struct ip_tunnel *t = netdev_priv(dev);
838 struct net *net = t->net;
839 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
840
841 BUG_ON(!itn->fb_tunnel_dev);
842 switch (cmd) {
843 case SIOCGETTUNNEL:
844 if (dev == itn->fb_tunnel_dev) {
845 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
846 if (!t)
847 t = netdev_priv(dev);
848 }
849 memcpy(p, &t->parms, sizeof(*p));
850 break;
851
852 case SIOCADDTUNNEL:
853 case SIOCCHGTUNNEL:
854 err = -EPERM;
855 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
856 goto done;
857 if (p->iph.ttl)
858 p->iph.frag_off |= htons(IP_DF);
859 if (!(p->i_flags & VTI_ISVTI)) {
860 if (!(p->i_flags & TUNNEL_KEY))
861 p->i_key = 0;
862 if (!(p->o_flags & TUNNEL_KEY))
863 p->o_key = 0;
864 }
865
866 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
867
868 if (cmd == SIOCADDTUNNEL) {
869 if (!t) {
870 t = ip_tunnel_create(net, itn, p);
871 err = PTR_ERR_OR_ZERO(t);
872 break;
873 }
874
875 err = -EEXIST;
876 break;
877 }
878 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
879 if (t) {
880 if (t->dev != dev) {
881 err = -EEXIST;
882 break;
883 }
884 } else {
885 unsigned int nflags = 0;
886
887 if (ipv4_is_multicast(p->iph.daddr))
888 nflags = IFF_BROADCAST;
889 else if (p->iph.daddr)
890 nflags = IFF_POINTOPOINT;
891
892 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
893 err = -EINVAL;
894 break;
895 }
896
897 t = netdev_priv(dev);
898 }
899 }
900
901 if (t) {
902 err = 0;
903 ip_tunnel_update(itn, t, dev, p, true);
904 } else {
905 err = -ENOENT;
906 }
907 break;
908
909 case SIOCDELTUNNEL:
910 err = -EPERM;
911 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
912 goto done;
913
914 if (dev == itn->fb_tunnel_dev) {
915 err = -ENOENT;
916 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
917 if (!t)
918 goto done;
919 err = -EPERM;
920 if (t == netdev_priv(itn->fb_tunnel_dev))
921 goto done;
922 dev = t->dev;
923 }
924 unregister_netdevice(dev);
925 err = 0;
926 break;
927
928 default:
929 err = -EINVAL;
930 }
931
932done:
933 return err;
934}
935EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
936
937int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
938{
939 struct ip_tunnel *tunnel = netdev_priv(dev);
940 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
941
942 if (new_mtu < 68 ||
943 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
944 return -EINVAL;
945 dev->mtu = new_mtu;
946 return 0;
947}
948EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
949
950static void ip_tunnel_dev_free(struct net_device *dev)
951{
952 struct ip_tunnel *tunnel = netdev_priv(dev);
953
954 gro_cells_destroy(&tunnel->gro_cells);
955 free_percpu(tunnel->dst_cache);
956 free_percpu(dev->tstats);
957 free_netdev(dev);
958}
959
960void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
961{
962 struct ip_tunnel *tunnel = netdev_priv(dev);
963 struct ip_tunnel_net *itn;
964
965 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
966
967 if (itn->fb_tunnel_dev != dev) {
968 ip_tunnel_del(netdev_priv(dev));
969 unregister_netdevice_queue(dev, head);
970 }
971}
972EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
973
974struct net *ip_tunnel_get_link_net(const struct net_device *dev)
975{
976 struct ip_tunnel *tunnel = netdev_priv(dev);
977
978 return tunnel->net;
979}
980EXPORT_SYMBOL(ip_tunnel_get_link_net);
981
982int ip_tunnel_get_iflink(const struct net_device *dev)
983{
984 struct ip_tunnel *tunnel = netdev_priv(dev);
985
986 return tunnel->parms.link;
987}
988EXPORT_SYMBOL(ip_tunnel_get_iflink);
989
990int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
991 struct rtnl_link_ops *ops, char *devname)
992{
993 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
994 struct ip_tunnel_parm parms;
995 unsigned int i;
996
997 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
998 INIT_HLIST_HEAD(&itn->tunnels[i]);
999
1000 if (!ops) {
1001 itn->fb_tunnel_dev = NULL;
1002 return 0;
1003 }
1004
1005 memset(&parms, 0, sizeof(parms));
1006 if (devname)
1007 strlcpy(parms.name, devname, IFNAMSIZ);
1008
1009 rtnl_lock();
1010 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1011
1012
1013
1014 if (!IS_ERR(itn->fb_tunnel_dev)) {
1015 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1016 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1017 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1018 }
1019 rtnl_unlock();
1020
1021 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1022}
1023EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1024
1025static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1026 struct rtnl_link_ops *ops)
1027{
1028 struct net *net = dev_net(itn->fb_tunnel_dev);
1029 struct net_device *dev, *aux;
1030 int h;
1031
1032 for_each_netdev_safe(net, dev, aux)
1033 if (dev->rtnl_link_ops == ops)
1034 unregister_netdevice_queue(dev, head);
1035
1036 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1037 struct ip_tunnel *t;
1038 struct hlist_node *n;
1039 struct hlist_head *thead = &itn->tunnels[h];
1040
1041 hlist_for_each_entry_safe(t, n, thead, hash_node)
1042
1043
1044
1045 if (!net_eq(dev_net(t->dev), net))
1046 unregister_netdevice_queue(t->dev, head);
1047 }
1048}
1049
1050void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1051{
1052 LIST_HEAD(list);
1053
1054 rtnl_lock();
1055 ip_tunnel_destroy(itn, &list, ops);
1056 unregister_netdevice_many(&list);
1057 rtnl_unlock();
1058}
1059EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1060
1061int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1062 struct ip_tunnel_parm *p)
1063{
1064 struct ip_tunnel *nt;
1065 struct net *net = dev_net(dev);
1066 struct ip_tunnel_net *itn;
1067 int mtu;
1068 int err;
1069
1070 nt = netdev_priv(dev);
1071 itn = net_generic(net, nt->ip_tnl_net_id);
1072
1073 if (ip_tunnel_find(itn, p, dev->type))
1074 return -EEXIST;
1075
1076 nt->net = net;
1077 nt->parms = *p;
1078 err = register_netdevice(dev);
1079 if (err)
1080 goto out;
1081
1082 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1083 eth_hw_addr_random(dev);
1084
1085 mtu = ip_tunnel_bind_dev(dev);
1086 if (!tb[IFLA_MTU])
1087 dev->mtu = mtu;
1088
1089 ip_tunnel_add(itn, nt);
1090
1091out:
1092 return err;
1093}
1094EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1095
1096int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1097 struct ip_tunnel_parm *p)
1098{
1099 struct ip_tunnel *t;
1100 struct ip_tunnel *tunnel = netdev_priv(dev);
1101 struct net *net = tunnel->net;
1102 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1103
1104 if (dev == itn->fb_tunnel_dev)
1105 return -EINVAL;
1106
1107 t = ip_tunnel_find(itn, p, dev->type);
1108
1109 if (t) {
1110 if (t->dev != dev)
1111 return -EEXIST;
1112 } else {
1113 t = tunnel;
1114
1115 if (dev->type != ARPHRD_ETHER) {
1116 unsigned int nflags = 0;
1117
1118 if (ipv4_is_multicast(p->iph.daddr))
1119 nflags = IFF_BROADCAST;
1120 else if (p->iph.daddr)
1121 nflags = IFF_POINTOPOINT;
1122
1123 if ((dev->flags ^ nflags) &
1124 (IFF_POINTOPOINT | IFF_BROADCAST))
1125 return -EINVAL;
1126 }
1127 }
1128
1129 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1130 return 0;
1131}
1132EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1133
1134int ip_tunnel_init(struct net_device *dev)
1135{
1136 struct ip_tunnel *tunnel = netdev_priv(dev);
1137 struct iphdr *iph = &tunnel->parms.iph;
1138 int err;
1139
1140 dev->destructor = ip_tunnel_dev_free;
1141 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1142 if (!dev->tstats)
1143 return -ENOMEM;
1144
1145 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1146 if (!tunnel->dst_cache) {
1147 free_percpu(dev->tstats);
1148 return -ENOMEM;
1149 }
1150
1151 err = gro_cells_init(&tunnel->gro_cells, dev);
1152 if (err) {
1153 free_percpu(tunnel->dst_cache);
1154 free_percpu(dev->tstats);
1155 return err;
1156 }
1157
1158 tunnel->dev = dev;
1159 tunnel->net = dev_net(dev);
1160 strcpy(tunnel->parms.name, dev->name);
1161 iph->version = 4;
1162 iph->ihl = 5;
1163
1164 return 0;
1165}
1166EXPORT_SYMBOL_GPL(ip_tunnel_init);
1167
1168void ip_tunnel_uninit(struct net_device *dev)
1169{
1170 struct ip_tunnel *tunnel = netdev_priv(dev);
1171 struct net *net = tunnel->net;
1172 struct ip_tunnel_net *itn;
1173
1174 itn = net_generic(net, tunnel->ip_tnl_net_id);
1175
1176 if (itn->fb_tunnel_dev != dev)
1177 ip_tunnel_del(netdev_priv(dev));
1178
1179 ip_tunnel_dst_reset_all(tunnel);
1180}
1181EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1182
1183
1184void ip_tunnel_setup(struct net_device *dev, int net_id)
1185{
1186 struct ip_tunnel *tunnel = netdev_priv(dev);
1187 tunnel->ip_tnl_net_id = net_id;
1188}
1189EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1190
1191MODULE_LICENSE("GPL");
1192