1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/init.h>
34#include <linux/in6.h>
35#include <linux/inetdevice.h>
36#include <linux/igmp.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/etherdevice.h>
39#include <linux/if_ether.h>
40#include <linux/if_vlan.h>
41#include <linux/rculist.h>
42#include <linux/err.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57#include <net/udp.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
72 __be16 flags, __be32 key)
73{
74 if (p->i_flags & TUNNEL_KEY) {
75 if (flags & TUNNEL_KEY)
76 return key == p->i_key;
77 else
78
79 return false;
80 } else
81 return !(flags & TUNNEL_KEY);
82}
83
84
85
86
87
88
89
90
91
92
93
94
95struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
96 int link, __be16 flags,
97 __be32 remote, __be32 local,
98 __be32 key)
99{
100 unsigned int hash;
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103
104 hash = ip_tunnel_hash(key, remote);
105 head = &itn->tunnels[hash];
106
107 hlist_for_each_entry_rcu(t, head, hash_node) {
108 if (local != t->parms.iph.saddr ||
109 remote != t->parms.iph.daddr ||
110 !(t->dev->flags & IFF_UP))
111 continue;
112
113 if (!ip_tunnel_key_match(&t->parms, flags, key))
114 continue;
115
116 if (t->parms.link == link)
117 return t;
118 else
119 cand = t;
120 }
121
122 hlist_for_each_entry_rcu(t, head, hash_node) {
123 if (remote != t->parms.iph.daddr ||
124 t->parms.iph.saddr != 0 ||
125 !(t->dev->flags & IFF_UP))
126 continue;
127
128 if (!ip_tunnel_key_match(&t->parms, flags, key))
129 continue;
130
131 if (t->parms.link == link)
132 return t;
133 else if (!cand)
134 cand = t;
135 }
136
137 hash = ip_tunnel_hash(key, 0);
138 head = &itn->tunnels[hash];
139
140 hlist_for_each_entry_rcu(t, head, hash_node) {
141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
143 continue;
144
145 if (!(t->dev->flags & IFF_UP))
146 continue;
147
148 if (!ip_tunnel_key_match(&t->parms, flags, key))
149 continue;
150
151 if (t->parms.link == link)
152 return t;
153 else if (!cand)
154 cand = t;
155 }
156
157 if (flags & TUNNEL_NO_KEY)
158 goto skip_key_lookup;
159
160 hlist_for_each_entry_rcu(t, head, hash_node) {
161 if (t->parms.i_key != key ||
162 t->parms.iph.saddr != 0 ||
163 t->parms.iph.daddr != 0 ||
164 !(t->dev->flags & IFF_UP))
165 continue;
166
167 if (t->parms.link == link)
168 return t;
169 else if (!cand)
170 cand = t;
171 }
172
173skip_key_lookup:
174 if (cand)
175 return cand;
176
177 t = rcu_dereference(itn->collect_md_tun);
178 if (t)
179 return t;
180
181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182 return netdev_priv(itn->fb_tunnel_dev);
183
184 return NULL;
185}
186EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187
188static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189 struct ip_tunnel_parm *parms)
190{
191 unsigned int h;
192 __be32 remote;
193 __be32 i_key = parms->i_key;
194
195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196 remote = parms->iph.daddr;
197 else
198 remote = 0;
199
200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
201 i_key = 0;
202
203 h = ip_tunnel_hash(i_key, remote);
204 return &itn->tunnels[h];
205}
206
207static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208{
209 struct hlist_head *head = ip_bucket(itn, &t->parms);
210
211 if (t->collect_md)
212 rcu_assign_pointer(itn->collect_md_tun, t);
213 hlist_add_head_rcu(&t->hash_node, head);
214}
215
216static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
217{
218 if (t->collect_md)
219 rcu_assign_pointer(itn->collect_md_tun, NULL);
220 hlist_del_init_rcu(&t->hash_node);
221}
222
223static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224 struct ip_tunnel_parm *parms,
225 int type)
226{
227 __be32 remote = parms->iph.daddr;
228 __be32 local = parms->iph.saddr;
229 __be32 key = parms->i_key;
230 __be16 flags = parms->i_flags;
231 int link = parms->link;
232 struct ip_tunnel *t = NULL;
233 struct hlist_head *head = ip_bucket(itn, parms);
234
235 hlist_for_each_entry_rcu(t, head, hash_node) {
236 if (local == t->parms.iph.saddr &&
237 remote == t->parms.iph.daddr &&
238 link == t->parms.link &&
239 type == t->dev->type &&
240 ip_tunnel_key_match(&t->parms, flags, key))
241 break;
242 }
243 return t;
244}
245
246static struct net_device *__ip_tunnel_create(struct net *net,
247 const struct rtnl_link_ops *ops,
248 struct ip_tunnel_parm *parms)
249{
250 int err;
251 struct ip_tunnel *tunnel;
252 struct net_device *dev;
253 char name[IFNAMSIZ];
254
255 if (parms->name[0])
256 strlcpy(name, parms->name, IFNAMSIZ);
257 else {
258 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
259 err = -E2BIG;
260 goto failed;
261 }
262 strlcpy(name, ops->kind, IFNAMSIZ);
263 strncat(name, "%d", 2);
264 }
265
266 ASSERT_RTNL();
267 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
268 if (!dev) {
269 err = -ENOMEM;
270 goto failed;
271 }
272 dev_net_set(dev, net);
273
274 dev->rtnl_link_ops = ops;
275
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
278 tunnel->net = net;
279
280 err = register_netdevice(dev);
281 if (err)
282 goto failed_free;
283
284 return dev;
285
286failed_free:
287 free_netdev(dev);
288failed:
289 return ERR_PTR(err);
290}
291
292static inline void init_tunnel_flow(struct flowi4 *fl4,
293 int proto,
294 __be32 daddr, __be32 saddr,
295 __be32 key, __u8 tos, int oif)
296{
297 memset(fl4, 0, sizeof(*fl4));
298 fl4->flowi4_oif = oif;
299 fl4->daddr = daddr;
300 fl4->saddr = saddr;
301 fl4->flowi4_tos = tos;
302 fl4->flowi4_proto = proto;
303 fl4->fl4_gre_key = key;
304}
305
306static int ip_tunnel_bind_dev(struct net_device *dev)
307{
308 struct net_device *tdev = NULL;
309 struct ip_tunnel *tunnel = netdev_priv(dev);
310 const struct iphdr *iph;
311 int hlen = LL_MAX_HEADER;
312 int mtu = ETH_DATA_LEN;
313 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
314
315 iph = &tunnel->parms.iph;
316
317
318 if (iph->daddr) {
319 struct flowi4 fl4;
320 struct rtable *rt;
321
322 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323 iph->saddr, tunnel->parms.o_key,
324 RT_TOS(iph->tos), tunnel->parms.link);
325 rt = ip_route_output_key(tunnel->net, &fl4);
326
327 if (!IS_ERR(rt)) {
328 tdev = rt->dst.dev;
329 ip_rt_put(rt);
330 }
331 if (dev->type != ARPHRD_ETHER)
332 dev->flags |= IFF_POINTOPOINT;
333
334 dst_cache_reset(&tunnel->dst_cache);
335 }
336
337 if (!tdev && tunnel->parms.link)
338 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
339
340 if (tdev) {
341 hlen = tdev->hard_header_len + tdev->needed_headroom;
342 mtu = tdev->mtu;
343 }
344
345 dev->needed_headroom = t_hlen + hlen;
346 mtu -= (dev->hard_header_len + t_hlen);
347
348 if (mtu < 68)
349 mtu = 68;
350
351 return mtu;
352}
353
354static struct ip_tunnel *ip_tunnel_create(struct net *net,
355 struct ip_tunnel_net *itn,
356 struct ip_tunnel_parm *parms)
357{
358 struct ip_tunnel *nt;
359 struct net_device *dev;
360
361 BUG_ON(!itn->fb_tunnel_dev);
362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
363 if (IS_ERR(dev))
364 return ERR_CAST(dev);
365
366 dev->mtu = ip_tunnel_bind_dev(dev);
367
368 nt = netdev_priv(dev);
369 ip_tunnel_add(itn, nt);
370 return nt;
371}
372
373int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
375 bool log_ecn_error)
376{
377 struct pcpu_sw_netstats *tstats;
378 const struct iphdr *iph = ip_hdr(skb);
379 int err;
380
381#ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph->daddr)) {
383 tunnel->dev->stats.multicast++;
384 skb->pkt_type = PACKET_BROADCAST;
385 }
386#endif
387
388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390 tunnel->dev->stats.rx_crc_errors++;
391 tunnel->dev->stats.rx_errors++;
392 goto drop;
393 }
394
395 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396 if (!(tpi->flags&TUNNEL_SEQ) ||
397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398 tunnel->dev->stats.rx_fifo_errors++;
399 tunnel->dev->stats.rx_errors++;
400 goto drop;
401 }
402 tunnel->i_seqno = ntohl(tpi->seq) + 1;
403 }
404
405 skb_reset_network_header(skb);
406
407 err = IP_ECN_decapsulate(iph, skb);
408 if (unlikely(err)) {
409 if (log_ecn_error)
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph->saddr, iph->tos);
412 if (err > 1) {
413 ++tunnel->dev->stats.rx_frame_errors;
414 ++tunnel->dev->stats.rx_errors;
415 goto drop;
416 }
417 }
418
419 tstats = this_cpu_ptr(tunnel->dev->tstats);
420 u64_stats_update_begin(&tstats->syncp);
421 tstats->rx_packets++;
422 tstats->rx_bytes += skb->len;
423 u64_stats_update_end(&tstats->syncp);
424
425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
426
427 if (tunnel->dev->type == ARPHRD_ETHER) {
428 skb->protocol = eth_type_trans(skb, tunnel->dev);
429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
430 } else {
431 skb->dev = tunnel->dev;
432 }
433
434 if (tun_dst)
435 skb_dst_set(skb, (struct dst_entry *)tun_dst);
436
437 gro_cells_receive(&tunnel->gro_cells, skb);
438 return 0;
439
440drop:
441 kfree_skb(skb);
442 return 0;
443}
444EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
445
446static int ip_encap_hlen(struct ip_tunnel_encap *e)
447{
448 const struct ip_tunnel_encap_ops *ops;
449 int hlen = -EINVAL;
450
451 if (e->type == TUNNEL_ENCAP_NONE)
452 return 0;
453
454 if (e->type >= MAX_IPTUN_ENCAP_OPS)
455 return -EINVAL;
456
457 rcu_read_lock();
458 ops = rcu_dereference(iptun_encaps[e->type]);
459 if (likely(ops && ops->encap_hlen))
460 hlen = ops->encap_hlen(e);
461 rcu_read_unlock();
462
463 return hlen;
464}
465
466const struct ip_tunnel_encap_ops __rcu *
467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
468
469int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
470 unsigned int num)
471{
472 if (num >= MAX_IPTUN_ENCAP_OPS)
473 return -ERANGE;
474
475 return !cmpxchg((const struct ip_tunnel_encap_ops **)
476 &iptun_encaps[num],
477 NULL, ops) ? 0 : -1;
478}
479EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
480
481int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
482 unsigned int num)
483{
484 int ret;
485
486 if (num >= MAX_IPTUN_ENCAP_OPS)
487 return -ERANGE;
488
489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
490 &iptun_encaps[num],
491 ops, NULL) == ops) ? 0 : -1;
492
493 synchronize_net();
494
495 return ret;
496}
497EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
498
499int ip_tunnel_encap_setup(struct ip_tunnel *t,
500 struct ip_tunnel_encap *ipencap)
501{
502 int hlen;
503
504 memset(&t->encap, 0, sizeof(t->encap));
505
506 hlen = ip_encap_hlen(ipencap);
507 if (hlen < 0)
508 return hlen;
509
510 t->encap.type = ipencap->type;
511 t->encap.sport = ipencap->sport;
512 t->encap.dport = ipencap->dport;
513 t->encap.flags = ipencap->flags;
514
515 t->encap_hlen = hlen;
516 t->hlen = t->encap_hlen + t->tun_hlen;
517
518 return 0;
519}
520EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
521
522int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
523 u8 *protocol, struct flowi4 *fl4)
524{
525 const struct ip_tunnel_encap_ops *ops;
526 int ret = -EINVAL;
527
528 if (t->encap.type == TUNNEL_ENCAP_NONE)
529 return 0;
530
531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
532 return -EINVAL;
533
534 rcu_read_lock();
535 ops = rcu_dereference(iptun_encaps[t->encap.type]);
536 if (likely(ops && ops->build_header))
537 ret = ops->build_header(skb, &t->encap, protocol, fl4);
538 rcu_read_unlock();
539
540 return ret;
541}
542EXPORT_SYMBOL(ip_tunnel_encap);
543
544static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
545 struct rtable *rt, __be16 df,
546 const struct iphdr *inner_iph)
547{
548 struct ip_tunnel *tunnel = netdev_priv(dev);
549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
550 int mtu;
551
552 if (df)
553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
554 - sizeof(struct iphdr) - tunnel->hlen;
555 else
556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
557
558 if (skb_dst(skb))
559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
560
561 if (skb->protocol == htons(ETH_P_IP)) {
562 if (!skb_is_gso(skb) &&
563 (inner_iph->frag_off & htons(IP_DF)) &&
564 mtu < pkt_size) {
565 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
567 return -E2BIG;
568 }
569 }
570#if IS_ENABLED(CONFIG_IPV6)
571 else if (skb->protocol == htons(ETH_P_IPV6)) {
572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
573
574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
575 mtu >= IPV6_MIN_MTU) {
576 if ((tunnel->parms.iph.daddr &&
577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
578 rt6->rt6i_dst.plen == 128) {
579 rt6->rt6i_flags |= RTF_MODIFIED;
580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
581 }
582 }
583
584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
585 mtu < pkt_size) {
586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
587 return -E2BIG;
588 }
589 }
590#endif
591 return 0;
592}
593
594void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
595 const struct iphdr *tnl_params, u8 protocol)
596{
597 struct ip_tunnel *tunnel = netdev_priv(dev);
598 const struct iphdr *inner_iph;
599 struct flowi4 fl4;
600 u8 tos, ttl;
601 __be16 df;
602 struct rtable *rt;
603 unsigned int max_headroom;
604 __be32 dst;
605 bool connected;
606
607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
608 connected = (tunnel->parms.iph.daddr != 0);
609
610 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
611
612 dst = tnl_params->daddr;
613 if (dst == 0) {
614
615
616 if (!skb_dst(skb)) {
617 dev->stats.tx_fifo_errors++;
618 goto tx_error;
619 }
620
621 if (skb->protocol == htons(ETH_P_IP)) {
622 rt = skb_rtable(skb);
623 dst = rt_nexthop(rt, inner_iph->daddr);
624 }
625#if IS_ENABLED(CONFIG_IPV6)
626 else if (skb->protocol == htons(ETH_P_IPV6)) {
627 const struct in6_addr *addr6;
628 struct neighbour *neigh;
629 bool do_tx_error_icmp;
630 int addr_type;
631
632 neigh = dst_neigh_lookup(skb_dst(skb),
633 &ipv6_hdr(skb)->daddr);
634 if (!neigh)
635 goto tx_error;
636
637 addr6 = (const struct in6_addr *)&neigh->primary_key;
638 addr_type = ipv6_addr_type(addr6);
639
640 if (addr_type == IPV6_ADDR_ANY) {
641 addr6 = &ipv6_hdr(skb)->daddr;
642 addr_type = ipv6_addr_type(addr6);
643 }
644
645 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
646 do_tx_error_icmp = true;
647 else {
648 do_tx_error_icmp = false;
649 dst = addr6->s6_addr32[3];
650 }
651 neigh_release(neigh);
652 if (do_tx_error_icmp)
653 goto tx_error_icmp;
654 }
655#endif
656 else
657 goto tx_error;
658
659 connected = false;
660 }
661
662 tos = tnl_params->tos;
663 if (tos & 0x1) {
664 tos &= ~0x1;
665 if (skb->protocol == htons(ETH_P_IP)) {
666 tos = inner_iph->tos;
667 connected = false;
668 } else if (skb->protocol == htons(ETH_P_IPV6)) {
669 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
670 connected = false;
671 }
672 }
673
674 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
675 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
676
677 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
678 goto tx_error;
679
680 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
681 NULL;
682
683 if (!rt) {
684 rt = ip_route_output_key(tunnel->net, &fl4);
685
686 if (IS_ERR(rt)) {
687 dev->stats.tx_carrier_errors++;
688 goto tx_error;
689 }
690 if (connected)
691 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
692 fl4.saddr);
693 }
694
695 if (rt->dst.dev == dev) {
696 ip_rt_put(rt);
697 dev->stats.collisions++;
698 goto tx_error;
699 }
700
701 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
702 ip_rt_put(rt);
703 goto tx_error;
704 }
705
706 if (tunnel->err_count > 0) {
707 if (time_before(jiffies,
708 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
709 tunnel->err_count--;
710
711 dst_link_failure(skb);
712 } else
713 tunnel->err_count = 0;
714 }
715
716 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
717 ttl = tnl_params->ttl;
718 if (ttl == 0) {
719 if (skb->protocol == htons(ETH_P_IP))
720 ttl = inner_iph->ttl;
721#if IS_ENABLED(CONFIG_IPV6)
722 else if (skb->protocol == htons(ETH_P_IPV6))
723 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
724#endif
725 else
726 ttl = ip4_dst_hoplimit(&rt->dst);
727 }
728
729 df = tnl_params->frag_off;
730 if (skb->protocol == htons(ETH_P_IP))
731 df |= (inner_iph->frag_off&htons(IP_DF));
732
733 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
734 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
735 if (max_headroom > dev->needed_headroom)
736 dev->needed_headroom = max_headroom;
737
738 if (skb_cow_head(skb, dev->needed_headroom)) {
739 ip_rt_put(rt);
740 dev->stats.tx_dropped++;
741 kfree_skb(skb);
742 return;
743 }
744
745 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
746 df, !net_eq(tunnel->net, dev_net(dev)));
747 return;
748
749#if IS_ENABLED(CONFIG_IPV6)
750tx_error_icmp:
751 dst_link_failure(skb);
752#endif
753tx_error:
754 dev->stats.tx_errors++;
755 kfree_skb(skb);
756}
757EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
758
759static void ip_tunnel_update(struct ip_tunnel_net *itn,
760 struct ip_tunnel *t,
761 struct net_device *dev,
762 struct ip_tunnel_parm *p,
763 bool set_mtu)
764{
765 ip_tunnel_del(itn, t);
766 t->parms.iph.saddr = p->iph.saddr;
767 t->parms.iph.daddr = p->iph.daddr;
768 t->parms.i_key = p->i_key;
769 t->parms.o_key = p->o_key;
770 if (dev->type != ARPHRD_ETHER) {
771 memcpy(dev->dev_addr, &p->iph.saddr, 4);
772 memcpy(dev->broadcast, &p->iph.daddr, 4);
773 }
774 ip_tunnel_add(itn, t);
775
776 t->parms.iph.ttl = p->iph.ttl;
777 t->parms.iph.tos = p->iph.tos;
778 t->parms.iph.frag_off = p->iph.frag_off;
779
780 if (t->parms.link != p->link) {
781 int mtu;
782
783 t->parms.link = p->link;
784 mtu = ip_tunnel_bind_dev(dev);
785 if (set_mtu)
786 dev->mtu = mtu;
787 }
788 dst_cache_reset(&t->dst_cache);
789 netdev_state_change(dev);
790}
791
792int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
793{
794 int err = 0;
795 struct ip_tunnel *t = netdev_priv(dev);
796 struct net *net = t->net;
797 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
798
799 BUG_ON(!itn->fb_tunnel_dev);
800 switch (cmd) {
801 case SIOCGETTUNNEL:
802 if (dev == itn->fb_tunnel_dev) {
803 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
804 if (!t)
805 t = netdev_priv(dev);
806 }
807 memcpy(p, &t->parms, sizeof(*p));
808 break;
809
810 case SIOCADDTUNNEL:
811 case SIOCCHGTUNNEL:
812 err = -EPERM;
813 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
814 goto done;
815 if (p->iph.ttl)
816 p->iph.frag_off |= htons(IP_DF);
817 if (!(p->i_flags & VTI_ISVTI)) {
818 if (!(p->i_flags & TUNNEL_KEY))
819 p->i_key = 0;
820 if (!(p->o_flags & TUNNEL_KEY))
821 p->o_key = 0;
822 }
823
824 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
825
826 if (cmd == SIOCADDTUNNEL) {
827 if (!t) {
828 t = ip_tunnel_create(net, itn, p);
829 err = PTR_ERR_OR_ZERO(t);
830 break;
831 }
832
833 err = -EEXIST;
834 break;
835 }
836 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
837 if (t) {
838 if (t->dev != dev) {
839 err = -EEXIST;
840 break;
841 }
842 } else {
843 unsigned int nflags = 0;
844
845 if (ipv4_is_multicast(p->iph.daddr))
846 nflags = IFF_BROADCAST;
847 else if (p->iph.daddr)
848 nflags = IFF_POINTOPOINT;
849
850 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
851 err = -EINVAL;
852 break;
853 }
854
855 t = netdev_priv(dev);
856 }
857 }
858
859 if (t) {
860 err = 0;
861 ip_tunnel_update(itn, t, dev, p, true);
862 } else {
863 err = -ENOENT;
864 }
865 break;
866
867 case SIOCDELTUNNEL:
868 err = -EPERM;
869 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
870 goto done;
871
872 if (dev == itn->fb_tunnel_dev) {
873 err = -ENOENT;
874 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
875 if (!t)
876 goto done;
877 err = -EPERM;
878 if (t == netdev_priv(itn->fb_tunnel_dev))
879 goto done;
880 dev = t->dev;
881 }
882 unregister_netdevice(dev);
883 err = 0;
884 break;
885
886 default:
887 err = -EINVAL;
888 }
889
890done:
891 return err;
892}
893EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
894
895int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
896{
897 struct ip_tunnel *tunnel = netdev_priv(dev);
898 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
899 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
900
901 if (new_mtu < 68)
902 return -EINVAL;
903
904 if (new_mtu > max_mtu) {
905 if (strict)
906 return -EINVAL;
907
908 new_mtu = max_mtu;
909 }
910
911 dev->mtu = new_mtu;
912 return 0;
913}
914EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
915
916int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
917{
918 return __ip_tunnel_change_mtu(dev, new_mtu, true);
919}
920EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
921
922static void ip_tunnel_dev_free(struct net_device *dev)
923{
924 struct ip_tunnel *tunnel = netdev_priv(dev);
925
926 gro_cells_destroy(&tunnel->gro_cells);
927 dst_cache_destroy(&tunnel->dst_cache);
928 free_percpu(dev->tstats);
929 free_netdev(dev);
930}
931
932void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
933{
934 struct ip_tunnel *tunnel = netdev_priv(dev);
935 struct ip_tunnel_net *itn;
936
937 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
938
939 if (itn->fb_tunnel_dev != dev) {
940 ip_tunnel_del(itn, netdev_priv(dev));
941 unregister_netdevice_queue(dev, head);
942 }
943}
944EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
945
946struct net *ip_tunnel_get_link_net(const struct net_device *dev)
947{
948 struct ip_tunnel *tunnel = netdev_priv(dev);
949
950 return tunnel->net;
951}
952EXPORT_SYMBOL(ip_tunnel_get_link_net);
953
954int ip_tunnel_get_iflink(const struct net_device *dev)
955{
956 struct ip_tunnel *tunnel = netdev_priv(dev);
957
958 return tunnel->parms.link;
959}
960EXPORT_SYMBOL(ip_tunnel_get_iflink);
961
962int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
963 struct rtnl_link_ops *ops, char *devname)
964{
965 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
966 struct ip_tunnel_parm parms;
967 unsigned int i;
968
969 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
970 INIT_HLIST_HEAD(&itn->tunnels[i]);
971
972 if (!ops) {
973 itn->fb_tunnel_dev = NULL;
974 return 0;
975 }
976
977 memset(&parms, 0, sizeof(parms));
978 if (devname)
979 strlcpy(parms.name, devname, IFNAMSIZ);
980
981 rtnl_lock();
982 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
983
984
985
986 if (!IS_ERR(itn->fb_tunnel_dev)) {
987 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
988 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
989 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
990 }
991 rtnl_unlock();
992
993 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
994}
995EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
996
997static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
998 struct rtnl_link_ops *ops)
999{
1000 struct net *net = dev_net(itn->fb_tunnel_dev);
1001 struct net_device *dev, *aux;
1002 int h;
1003
1004 for_each_netdev_safe(net, dev, aux)
1005 if (dev->rtnl_link_ops == ops)
1006 unregister_netdevice_queue(dev, head);
1007
1008 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1009 struct ip_tunnel *t;
1010 struct hlist_node *n;
1011 struct hlist_head *thead = &itn->tunnels[h];
1012
1013 hlist_for_each_entry_safe(t, n, thead, hash_node)
1014
1015
1016
1017 if (!net_eq(dev_net(t->dev), net))
1018 unregister_netdevice_queue(t->dev, head);
1019 }
1020}
1021
1022void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1023{
1024 LIST_HEAD(list);
1025
1026 rtnl_lock();
1027 ip_tunnel_destroy(itn, &list, ops);
1028 unregister_netdevice_many(&list);
1029 rtnl_unlock();
1030}
1031EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1032
1033int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1034 struct ip_tunnel_parm *p)
1035{
1036 struct ip_tunnel *nt;
1037 struct net *net = dev_net(dev);
1038 struct ip_tunnel_net *itn;
1039 int mtu;
1040 int err;
1041
1042 nt = netdev_priv(dev);
1043 itn = net_generic(net, nt->ip_tnl_net_id);
1044
1045 if (nt->collect_md) {
1046 if (rtnl_dereference(itn->collect_md_tun))
1047 return -EEXIST;
1048 } else {
1049 if (ip_tunnel_find(itn, p, dev->type))
1050 return -EEXIST;
1051 }
1052
1053 nt->net = net;
1054 nt->parms = *p;
1055 err = register_netdevice(dev);
1056 if (err)
1057 goto out;
1058
1059 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1060 eth_hw_addr_random(dev);
1061
1062 mtu = ip_tunnel_bind_dev(dev);
1063 if (!tb[IFLA_MTU])
1064 dev->mtu = mtu;
1065
1066 ip_tunnel_add(itn, nt);
1067out:
1068 return err;
1069}
1070EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1071
1072int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1073 struct ip_tunnel_parm *p)
1074{
1075 struct ip_tunnel *t;
1076 struct ip_tunnel *tunnel = netdev_priv(dev);
1077 struct net *net = tunnel->net;
1078 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1079
1080 if (dev == itn->fb_tunnel_dev)
1081 return -EINVAL;
1082
1083 t = ip_tunnel_find(itn, p, dev->type);
1084
1085 if (t) {
1086 if (t->dev != dev)
1087 return -EEXIST;
1088 } else {
1089 t = tunnel;
1090
1091 if (dev->type != ARPHRD_ETHER) {
1092 unsigned int nflags = 0;
1093
1094 if (ipv4_is_multicast(p->iph.daddr))
1095 nflags = IFF_BROADCAST;
1096 else if (p->iph.daddr)
1097 nflags = IFF_POINTOPOINT;
1098
1099 if ((dev->flags ^ nflags) &
1100 (IFF_POINTOPOINT | IFF_BROADCAST))
1101 return -EINVAL;
1102 }
1103 }
1104
1105 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1106 return 0;
1107}
1108EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1109
1110int ip_tunnel_init(struct net_device *dev)
1111{
1112 struct ip_tunnel *tunnel = netdev_priv(dev);
1113 struct iphdr *iph = &tunnel->parms.iph;
1114 int err;
1115
1116 dev->destructor = ip_tunnel_dev_free;
1117 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1118 if (!dev->tstats)
1119 return -ENOMEM;
1120
1121 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1122 if (err) {
1123 free_percpu(dev->tstats);
1124 return err;
1125 }
1126
1127 err = gro_cells_init(&tunnel->gro_cells, dev);
1128 if (err) {
1129 dst_cache_destroy(&tunnel->dst_cache);
1130 free_percpu(dev->tstats);
1131 return err;
1132 }
1133
1134 tunnel->dev = dev;
1135 tunnel->net = dev_net(dev);
1136 strcpy(tunnel->parms.name, dev->name);
1137 iph->version = 4;
1138 iph->ihl = 5;
1139
1140 if (tunnel->collect_md) {
1141 dev->features |= NETIF_F_NETNS_LOCAL;
1142 netif_keep_dst(dev);
1143 }
1144 return 0;
1145}
1146EXPORT_SYMBOL_GPL(ip_tunnel_init);
1147
1148void ip_tunnel_uninit(struct net_device *dev)
1149{
1150 struct ip_tunnel *tunnel = netdev_priv(dev);
1151 struct net *net = tunnel->net;
1152 struct ip_tunnel_net *itn;
1153
1154 itn = net_generic(net, tunnel->ip_tnl_net_id);
1155
1156 if (itn->fb_tunnel_dev != dev)
1157 ip_tunnel_del(itn, netdev_priv(dev));
1158
1159 dst_cache_reset(&tunnel->dst_cache);
1160}
1161EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1162
1163
1164void ip_tunnel_setup(struct net_device *dev, int net_id)
1165{
1166 struct ip_tunnel *tunnel = netdev_priv(dev);
1167 tunnel->ip_tnl_net_id = net_id;
1168}
1169EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1170
1171MODULE_LICENSE("GPL");
1172