1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57#include <net/udp.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
72 __be16 flags, __be32 key)
73{
74 if (p->i_flags & TUNNEL_KEY) {
75 if (flags & TUNNEL_KEY)
76 return key == p->i_key;
77 else
78
79 return false;
80 } else
81 return !(flags & TUNNEL_KEY);
82}
83
84
85
86
87
88
89
90
91
92
93
94
95struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
96 int link, __be16 flags,
97 __be32 remote, __be32 local,
98 __be32 key)
99{
100 unsigned int hash;
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103
104 hash = ip_tunnel_hash(key, remote);
105 head = &itn->tunnels[hash];
106
107 hlist_for_each_entry_rcu(t, head, hash_node) {
108 if (local != t->parms.iph.saddr ||
109 remote != t->parms.iph.daddr ||
110 !(t->dev->flags & IFF_UP))
111 continue;
112
113 if (!ip_tunnel_key_match(&t->parms, flags, key))
114 continue;
115
116 if (t->parms.link == link)
117 return t;
118 else
119 cand = t;
120 }
121
122 hlist_for_each_entry_rcu(t, head, hash_node) {
123 if (remote != t->parms.iph.daddr ||
124 t->parms.iph.saddr != 0 ||
125 !(t->dev->flags & IFF_UP))
126 continue;
127
128 if (!ip_tunnel_key_match(&t->parms, flags, key))
129 continue;
130
131 if (t->parms.link == link)
132 return t;
133 else if (!cand)
134 cand = t;
135 }
136
137 hash = ip_tunnel_hash(key, 0);
138 head = &itn->tunnels[hash];
139
140 hlist_for_each_entry_rcu(t, head, hash_node) {
141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
143 continue;
144
145 if (!(t->dev->flags & IFF_UP))
146 continue;
147
148 if (!ip_tunnel_key_match(&t->parms, flags, key))
149 continue;
150
151 if (t->parms.link == link)
152 return t;
153 else if (!cand)
154 cand = t;
155 }
156
157 if (flags & TUNNEL_NO_KEY)
158 goto skip_key_lookup;
159
160 hlist_for_each_entry_rcu(t, head, hash_node) {
161 if (t->parms.i_key != key ||
162 t->parms.iph.saddr != 0 ||
163 t->parms.iph.daddr != 0 ||
164 !(t->dev->flags & IFF_UP))
165 continue;
166
167 if (t->parms.link == link)
168 return t;
169 else if (!cand)
170 cand = t;
171 }
172
173skip_key_lookup:
174 if (cand)
175 return cand;
176
177 t = rcu_dereference(itn->collect_md_tun);
178 if (t)
179 return t;
180
181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182 return netdev_priv(itn->fb_tunnel_dev);
183
184 return NULL;
185}
186EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187
188static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189 struct ip_tunnel_parm *parms)
190{
191 unsigned int h;
192 __be32 remote;
193 __be32 i_key = parms->i_key;
194
195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196 remote = parms->iph.daddr;
197 else
198 remote = 0;
199
200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
201 i_key = 0;
202
203 h = ip_tunnel_hash(i_key, remote);
204 return &itn->tunnels[h];
205}
206
207static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208{
209 struct hlist_head *head = ip_bucket(itn, &t->parms);
210
211 if (t->collect_md)
212 rcu_assign_pointer(itn->collect_md_tun, t);
213 hlist_add_head_rcu(&t->hash_node, head);
214}
215
216static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
217{
218 if (t->collect_md)
219 rcu_assign_pointer(itn->collect_md_tun, NULL);
220 hlist_del_init_rcu(&t->hash_node);
221}
222
223static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224 struct ip_tunnel_parm *parms,
225 int type)
226{
227 __be32 remote = parms->iph.daddr;
228 __be32 local = parms->iph.saddr;
229 __be32 key = parms->i_key;
230 int link = parms->link;
231 struct ip_tunnel *t = NULL;
232 struct hlist_head *head = ip_bucket(itn, parms);
233
234 hlist_for_each_entry_rcu(t, head, hash_node) {
235 if (local == t->parms.iph.saddr &&
236 remote == t->parms.iph.daddr &&
237 key == t->parms.i_key &&
238 link == t->parms.link &&
239 type == t->dev->type)
240 break;
241 }
242 return t;
243}
244
245static struct net_device *__ip_tunnel_create(struct net *net,
246 const struct rtnl_link_ops *ops,
247 struct ip_tunnel_parm *parms)
248{
249 int err;
250 struct ip_tunnel *tunnel;
251 struct net_device *dev;
252 char name[IFNAMSIZ];
253
254 if (parms->name[0])
255 strlcpy(name, parms->name, IFNAMSIZ);
256 else {
257 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
258 err = -E2BIG;
259 goto failed;
260 }
261 strlcpy(name, ops->kind, IFNAMSIZ);
262 strncat(name, "%d", 2);
263 }
264
265 ASSERT_RTNL();
266 dev = alloc_netdev(ops->priv_size, name, ops->setup);
267 if (!dev) {
268 err = -ENOMEM;
269 goto failed;
270 }
271 dev_net_set(dev, net);
272
273 dev->rtnl_link_ops = ops;
274
275 tunnel = netdev_priv(dev);
276 tunnel->parms = *parms;
277 tunnel->net = net;
278
279 err = register_netdevice(dev);
280 if (err)
281 goto failed_free;
282
283 return dev;
284
285failed_free:
286 free_netdev(dev);
287failed:
288 return ERR_PTR(err);
289}
290
291static inline void init_tunnel_flow(struct flowi4 *fl4,
292 int proto,
293 __be32 daddr, __be32 saddr,
294 __be32 key, __u8 tos, int oif)
295{
296 memset(fl4, 0, sizeof(*fl4));
297 fl4->flowi4_oif = oif;
298 fl4->daddr = daddr;
299 fl4->saddr = saddr;
300 fl4->flowi4_tos = tos;
301 fl4->flowi4_proto = proto;
302 fl4->fl4_gre_key = key;
303}
304
305static int ip_tunnel_bind_dev(struct net_device *dev)
306{
307 struct net_device *tdev = NULL;
308 struct ip_tunnel *tunnel = netdev_priv(dev);
309 const struct iphdr *iph;
310 int hlen = LL_MAX_HEADER;
311 int mtu = ETH_DATA_LEN;
312 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
313
314 iph = &tunnel->parms.iph;
315
316
317 if (iph->daddr) {
318 struct flowi4 fl4;
319 struct rtable *rt;
320
321 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
322 iph->saddr, tunnel->parms.o_key,
323 RT_TOS(iph->tos), tunnel->parms.link);
324 rt = ip_route_output_key(tunnel->net, &fl4);
325
326 if (!IS_ERR(rt)) {
327 tdev = rt->dst.dev;
328 ip_rt_put(rt);
329 }
330 if (dev->type != ARPHRD_ETHER)
331 dev->flags |= IFF_POINTOPOINT;
332
333 dst_cache_reset(&tunnel->dst_cache);
334 }
335
336 if (!tdev && tunnel->parms.link)
337 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
338
339 if (tdev) {
340 hlen = tdev->hard_header_len + tdev->needed_headroom;
341 mtu = tdev->mtu;
342 }
343
344 dev->needed_headroom = t_hlen + hlen;
345 mtu -= (dev->hard_header_len + t_hlen);
346
347 if (mtu < 68)
348 mtu = 68;
349
350 return mtu;
351}
352
353static struct ip_tunnel *ip_tunnel_create(struct net *net,
354 struct ip_tunnel_net *itn,
355 struct ip_tunnel_parm *parms)
356{
357 struct ip_tunnel *nt, *fbt;
358 struct net_device *dev;
359
360 BUG_ON(!itn->fb_tunnel_dev);
361 fbt = netdev_priv(itn->fb_tunnel_dev);
362 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
363 if (IS_ERR(dev))
364 return ERR_CAST(dev);
365
366 dev->mtu = ip_tunnel_bind_dev(dev);
367
368 nt = netdev_priv(dev);
369 ip_tunnel_add(itn, nt);
370 return nt;
371}
372
373int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
375 bool log_ecn_error)
376{
377 struct pcpu_sw_netstats *tstats;
378 const struct iphdr *iph = ip_hdr(skb);
379 int err;
380
381#ifdef CONFIG_NET_IPGRE_BROADCAST
382 if (ipv4_is_multicast(iph->daddr)) {
383 tunnel->dev->stats.multicast++;
384 skb->pkt_type = PACKET_BROADCAST;
385 }
386#endif
387
388 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390 tunnel->dev->stats.rx_crc_errors++;
391 tunnel->dev->stats.rx_errors++;
392 goto drop;
393 }
394
395 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396 if (!(tpi->flags&TUNNEL_SEQ) ||
397 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398 tunnel->dev->stats.rx_fifo_errors++;
399 tunnel->dev->stats.rx_errors++;
400 goto drop;
401 }
402 tunnel->i_seqno = ntohl(tpi->seq) + 1;
403 }
404
405 skb_reset_network_header(skb);
406
407 err = IP_ECN_decapsulate(iph, skb);
408 if (unlikely(err)) {
409 if (log_ecn_error)
410 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411 &iph->saddr, iph->tos);
412 if (err > 1) {
413 ++tunnel->dev->stats.rx_frame_errors;
414 ++tunnel->dev->stats.rx_errors;
415 goto drop;
416 }
417 }
418
419 tstats = this_cpu_ptr(tunnel->dev->tstats);
420 u64_stats_update_begin(&tstats->syncp);
421 tstats->rx_packets++;
422 tstats->rx_bytes += skb->len;
423 u64_stats_update_end(&tstats->syncp);
424
425 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
426
427 if (tunnel->dev->type == ARPHRD_ETHER) {
428 skb->protocol = eth_type_trans(skb, tunnel->dev);
429 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
430 } else {
431 skb->dev = tunnel->dev;
432 }
433
434 if (tun_dst)
435 skb_dst_set(skb, (struct dst_entry *)tun_dst);
436
437 gro_cells_receive(&tunnel->gro_cells, skb);
438 return 0;
439
440drop:
441 kfree_skb(skb);
442 return 0;
443}
444EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
445
446static int ip_encap_hlen(struct ip_tunnel_encap *e)
447{
448 const struct ip_tunnel_encap_ops *ops;
449 int hlen = -EINVAL;
450
451 if (e->type == TUNNEL_ENCAP_NONE)
452 return 0;
453
454 if (e->type >= MAX_IPTUN_ENCAP_OPS)
455 return -EINVAL;
456
457 rcu_read_lock();
458 ops = rcu_dereference(iptun_encaps[e->type]);
459 if (likely(ops && ops->encap_hlen))
460 hlen = ops->encap_hlen(e);
461 rcu_read_unlock();
462
463 return hlen;
464}
465
466const struct ip_tunnel_encap_ops __rcu *
467 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
468
469int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
470 unsigned int num)
471{
472 if (num >= MAX_IPTUN_ENCAP_OPS)
473 return -ERANGE;
474
475 return !cmpxchg((const struct ip_tunnel_encap_ops **)
476 &iptun_encaps[num],
477 NULL, ops) ? 0 : -1;
478}
479EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
480
481int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
482 unsigned int num)
483{
484 int ret;
485
486 if (num >= MAX_IPTUN_ENCAP_OPS)
487 return -ERANGE;
488
489 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
490 &iptun_encaps[num],
491 ops, NULL) == ops) ? 0 : -1;
492
493 synchronize_net();
494
495 return ret;
496}
497EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
498
499int ip_tunnel_encap_setup(struct ip_tunnel *t,
500 struct ip_tunnel_encap *ipencap)
501{
502 int hlen;
503
504 memset(&t->encap, 0, sizeof(t->encap));
505
506 hlen = ip_encap_hlen(ipencap);
507 if (hlen < 0)
508 return hlen;
509
510 t->encap.type = ipencap->type;
511 t->encap.sport = ipencap->sport;
512 t->encap.dport = ipencap->dport;
513 t->encap.flags = ipencap->flags;
514
515 t->encap_hlen = hlen;
516 t->hlen = t->encap_hlen + t->tun_hlen;
517
518 return 0;
519}
520EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
521
522int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
523 u8 *protocol, struct flowi4 *fl4)
524{
525 const struct ip_tunnel_encap_ops *ops;
526 int ret = -EINVAL;
527
528 if (t->encap.type == TUNNEL_ENCAP_NONE)
529 return 0;
530
531 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
532 return -EINVAL;
533
534 rcu_read_lock();
535 ops = rcu_dereference(iptun_encaps[t->encap.type]);
536 if (likely(ops && ops->build_header))
537 ret = ops->build_header(skb, &t->encap, protocol, fl4);
538 rcu_read_unlock();
539
540 return ret;
541}
542EXPORT_SYMBOL(ip_tunnel_encap);
543
544static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
545 struct rtable *rt, __be16 df,
546 const struct iphdr *inner_iph)
547{
548 struct ip_tunnel *tunnel = netdev_priv(dev);
549 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
550 int mtu;
551
552 if (df)
553 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
554 - sizeof(struct iphdr) - tunnel->hlen;
555 else
556 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
557
558 if (skb_dst(skb))
559 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
560
561 if (skb->protocol == htons(ETH_P_IP)) {
562 if (!skb_is_gso(skb) &&
563 (inner_iph->frag_off & htons(IP_DF)) &&
564 mtu < pkt_size) {
565 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
566 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
567 return -E2BIG;
568 }
569 }
570#if IS_ENABLED(CONFIG_IPV6)
571 else if (skb->protocol == htons(ETH_P_IPV6)) {
572 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
573
574 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
575 mtu >= IPV6_MIN_MTU) {
576 if ((tunnel->parms.iph.daddr &&
577 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
578 rt6->rt6i_dst.plen == 128) {
579 rt6->rt6i_flags |= RTF_MODIFIED;
580 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
581 }
582 }
583
584 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
585 mtu < pkt_size) {
586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
587 return -E2BIG;
588 }
589 }
590#endif
591 return 0;
592}
593
594void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
595 const struct iphdr *tnl_params, u8 protocol)
596{
597 struct ip_tunnel *tunnel = netdev_priv(dev);
598 const struct iphdr *inner_iph;
599 struct flowi4 fl4;
600 u8 tos, ttl;
601 __be16 df;
602 struct rtable *rt;
603 unsigned int max_headroom;
604 __be32 dst;
605 bool connected;
606
607 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
608 connected = (tunnel->parms.iph.daddr != 0);
609
610 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
611
612 dst = tnl_params->daddr;
613 if (dst == 0) {
614
615
616 if (skb_dst(skb) == NULL) {
617 dev->stats.tx_fifo_errors++;
618 goto tx_error;
619 }
620
621 if (skb->protocol == htons(ETH_P_IP)) {
622 rt = skb_rtable(skb);
623 dst = rt_nexthop(rt, inner_iph->daddr);
624 }
625#if IS_ENABLED(CONFIG_IPV6)
626 else if (skb->protocol == htons(ETH_P_IPV6)) {
627 const struct in6_addr *addr6;
628 struct neighbour *neigh;
629 bool do_tx_error_icmp;
630 int addr_type;
631
632 neigh = dst_neigh_lookup(skb_dst(skb),
633 &ipv6_hdr(skb)->daddr);
634 if (neigh == NULL)
635 goto tx_error;
636
637 addr6 = (const struct in6_addr *)&neigh->primary_key;
638 addr_type = ipv6_addr_type(addr6);
639
640 if (addr_type == IPV6_ADDR_ANY) {
641 addr6 = &ipv6_hdr(skb)->daddr;
642 addr_type = ipv6_addr_type(addr6);
643 }
644
645 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
646 do_tx_error_icmp = true;
647 else {
648 do_tx_error_icmp = false;
649 dst = addr6->s6_addr32[3];
650 }
651 neigh_release(neigh);
652 if (do_tx_error_icmp)
653 goto tx_error_icmp;
654 }
655#endif
656 else
657 goto tx_error;
658
659 connected = false;
660 }
661
662 tos = tnl_params->tos;
663 if (tos & 0x1) {
664 tos &= ~0x1;
665 if (skb->protocol == htons(ETH_P_IP)) {
666 tos = inner_iph->tos;
667 connected = false;
668 } else if (skb->protocol == htons(ETH_P_IPV6)) {
669 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
670 connected = false;
671 }
672 }
673
674 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
675 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
676
677 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
678 goto tx_error;
679
680 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
681 NULL;
682
683 if (!rt) {
684 rt = ip_route_output_key(tunnel->net, &fl4);
685
686 if (IS_ERR(rt)) {
687 dev->stats.tx_carrier_errors++;
688 goto tx_error;
689 }
690 if (connected)
691 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
692 fl4.saddr);
693 }
694
695 if (rt->dst.dev == dev) {
696 ip_rt_put(rt);
697 dev->stats.collisions++;
698 goto tx_error;
699 }
700
701 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
702 ip_rt_put(rt);
703 goto tx_error;
704 }
705
706 if (tunnel->err_count > 0) {
707 if (time_before(jiffies,
708 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
709 tunnel->err_count--;
710
711 dst_link_failure(skb);
712 } else
713 tunnel->err_count = 0;
714 }
715
716 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
717 ttl = tnl_params->ttl;
718 if (ttl == 0) {
719 if (skb->protocol == htons(ETH_P_IP))
720 ttl = inner_iph->ttl;
721#if IS_ENABLED(CONFIG_IPV6)
722 else if (skb->protocol == htons(ETH_P_IPV6))
723 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
724#endif
725 else
726 ttl = ip4_dst_hoplimit(&rt->dst);
727 }
728
729 df = tnl_params->frag_off;
730 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
731 df |= (inner_iph->frag_off&htons(IP_DF));
732
733 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
734 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
735 if (max_headroom > dev->needed_headroom)
736 dev->needed_headroom = max_headroom;
737
738 if (skb_cow_head(skb, dev->needed_headroom)) {
739 ip_rt_put(rt);
740 dev->stats.tx_dropped++;
741 kfree_skb(skb);
742 return;
743 }
744
745 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
746 df, !net_eq(tunnel->net, dev_net(dev)));
747 return;
748
749#if IS_ENABLED(CONFIG_IPV6)
750tx_error_icmp:
751 dst_link_failure(skb);
752#endif
753tx_error:
754 dev->stats.tx_errors++;
755 kfree_skb(skb);
756}
757EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
758
759static void ip_tunnel_update(struct ip_tunnel_net *itn,
760 struct ip_tunnel *t,
761 struct net_device *dev,
762 struct ip_tunnel_parm *p,
763 bool set_mtu)
764{
765 ip_tunnel_del(itn, t);
766 t->parms.iph.saddr = p->iph.saddr;
767 t->parms.iph.daddr = p->iph.daddr;
768 t->parms.i_key = p->i_key;
769 t->parms.o_key = p->o_key;
770 if (dev->type != ARPHRD_ETHER) {
771 memcpy(dev->dev_addr, &p->iph.saddr, 4);
772 memcpy(dev->broadcast, &p->iph.daddr, 4);
773 }
774 ip_tunnel_add(itn, t);
775
776 t->parms.iph.ttl = p->iph.ttl;
777 t->parms.iph.tos = p->iph.tos;
778 t->parms.iph.frag_off = p->iph.frag_off;
779
780 if (t->parms.link != p->link) {
781 int mtu;
782
783 t->parms.link = p->link;
784 mtu = ip_tunnel_bind_dev(dev);
785 if (set_mtu)
786 dev->mtu = mtu;
787 }
788 dst_cache_reset(&t->dst_cache);
789 netdev_state_change(dev);
790}
791
792int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
793{
794 int err = 0;
795 struct ip_tunnel *t = netdev_priv(dev);
796 struct net *net = t->net;
797 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
798
799 BUG_ON(!itn->fb_tunnel_dev);
800 switch (cmd) {
801 case SIOCGETTUNNEL:
802 if (dev == itn->fb_tunnel_dev) {
803 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
804 if (t == NULL)
805 t = netdev_priv(dev);
806 }
807 memcpy(p, &t->parms, sizeof(*p));
808 break;
809
810 case SIOCADDTUNNEL:
811 case SIOCCHGTUNNEL:
812 err = -EPERM;
813 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
814 goto done;
815 if (p->iph.ttl)
816 p->iph.frag_off |= htons(IP_DF);
817 if (!(p->i_flags & VTI_ISVTI)) {
818 if (!(p->i_flags & TUNNEL_KEY))
819 p->i_key = 0;
820 if (!(p->o_flags & TUNNEL_KEY))
821 p->o_key = 0;
822 }
823
824 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
825
826 if (cmd == SIOCADDTUNNEL) {
827 if (!t) {
828 t = ip_tunnel_create(net, itn, p);
829 err = PTR_RET(t);
830 break;
831 }
832
833 err = -EEXIST;
834 break;
835 }
836 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
837 if (t != NULL) {
838 if (t->dev != dev) {
839 err = -EEXIST;
840 break;
841 }
842 } else {
843 unsigned int nflags = 0;
844
845 if (ipv4_is_multicast(p->iph.daddr))
846 nflags = IFF_BROADCAST;
847 else if (p->iph.daddr)
848 nflags = IFF_POINTOPOINT;
849
850 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
851 err = -EINVAL;
852 break;
853 }
854
855 t = netdev_priv(dev);
856 }
857 }
858
859 if (t) {
860 err = 0;
861 ip_tunnel_update(itn, t, dev, p, true);
862 } else {
863 err = -ENOENT;
864 }
865 break;
866
867 case SIOCDELTUNNEL:
868 err = -EPERM;
869 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
870 goto done;
871
872 if (dev == itn->fb_tunnel_dev) {
873 err = -ENOENT;
874 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
875 if (t == NULL)
876 goto done;
877 err = -EPERM;
878 if (t == netdev_priv(itn->fb_tunnel_dev))
879 goto done;
880 dev = t->dev;
881 }
882 unregister_netdevice(dev);
883 err = 0;
884 break;
885
886 default:
887 err = -EINVAL;
888 }
889
890done:
891 return err;
892}
893EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
894
895int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
896{
897 struct ip_tunnel *tunnel = netdev_priv(dev);
898 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
899 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
900
901 if (new_mtu < 68)
902 return -EINVAL;
903
904 if (new_mtu > max_mtu) {
905 if (strict)
906 return -EINVAL;
907
908 new_mtu = max_mtu;
909 }
910
911 dev->mtu = new_mtu;
912 return 0;
913}
914EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
915
916int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
917{
918 return __ip_tunnel_change_mtu(dev, new_mtu, true);
919}
920EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
921
922static void ip_tunnel_dev_free(struct net_device *dev)
923{
924 struct ip_tunnel *tunnel = netdev_priv(dev);
925
926 gro_cells_destroy(&tunnel->gro_cells);
927 dst_cache_destroy(&tunnel->dst_cache);
928 free_percpu(dev->tstats);
929 free_netdev(dev);
930}
931
932void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
933{
934 struct ip_tunnel *tunnel = netdev_priv(dev);
935 struct ip_tunnel_net *itn;
936
937 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
938
939 if (itn->fb_tunnel_dev != dev) {
940 ip_tunnel_del(itn, netdev_priv(dev));
941 unregister_netdevice_queue(dev, head);
942 }
943}
944EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
945
946struct net *ip_tunnel_get_link_net(const struct net_device *dev)
947{
948 struct ip_tunnel *tunnel = netdev_priv(dev);
949
950 return tunnel->net;
951}
952EXPORT_SYMBOL(ip_tunnel_get_link_net);
953
954int ip_tunnel_get_iflink(const struct net_device *dev)
955{
956 struct ip_tunnel *tunnel = netdev_priv(dev);
957
958 return tunnel->parms.link;
959}
960EXPORT_SYMBOL(ip_tunnel_get_iflink);
961
962int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
963 struct rtnl_link_ops *ops, char *devname)
964{
965 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
966 struct ip_tunnel_parm parms;
967 unsigned int i;
968
969 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
970 INIT_HLIST_HEAD(&itn->tunnels[i]);
971
972 if (!ops) {
973 itn->fb_tunnel_dev = NULL;
974 return 0;
975 }
976
977 memset(&parms, 0, sizeof(parms));
978 if (devname)
979 strlcpy(parms.name, devname, IFNAMSIZ);
980
981 rtnl_lock();
982 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
983
984
985
986 if (!IS_ERR(itn->fb_tunnel_dev)) {
987 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
988 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
989 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
990 }
991 rtnl_unlock();
992
993 if (IS_ERR(itn->fb_tunnel_dev))
994 return PTR_ERR(itn->fb_tunnel_dev);
995
996 return 0;
997}
998EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
999
1000static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1001 struct rtnl_link_ops *ops)
1002{
1003 struct net *net = dev_net(itn->fb_tunnel_dev);
1004 struct net_device *dev, *aux;
1005 int h;
1006
1007 for_each_netdev_safe(net, dev, aux)
1008 if (dev->rtnl_link_ops == ops)
1009 unregister_netdevice_queue(dev, head);
1010
1011 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1012 struct ip_tunnel *t;
1013 struct hlist_node *n;
1014 struct hlist_head *thead = &itn->tunnels[h];
1015
1016 hlist_for_each_entry_safe(t, n, thead, hash_node)
1017
1018
1019
1020 if (!net_eq(dev_net(t->dev), net))
1021 unregister_netdevice_queue(t->dev, head);
1022 }
1023}
1024
1025void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1026{
1027 LIST_HEAD(list);
1028
1029 rtnl_lock();
1030 ip_tunnel_destroy(itn, &list, ops);
1031 unregister_netdevice_many(&list);
1032 rtnl_unlock();
1033}
1034EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1035
1036int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1037 struct ip_tunnel_parm *p)
1038{
1039 struct ip_tunnel *nt;
1040 struct net *net = dev_net(dev);
1041 struct ip_tunnel_net *itn;
1042 int mtu;
1043 int err;
1044
1045 nt = netdev_priv(dev);
1046 itn = net_generic(net, nt->ip_tnl_net_id);
1047
1048 if (nt->collect_md) {
1049 if (rtnl_dereference(itn->collect_md_tun))
1050 return -EEXIST;
1051 } else {
1052 if (ip_tunnel_find(itn, p, dev->type))
1053 return -EEXIST;
1054 }
1055
1056 nt->net = net;
1057 nt->parms = *p;
1058 err = register_netdevice(dev);
1059 if (err)
1060 goto out;
1061
1062 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1063 eth_hw_addr_random(dev);
1064
1065 mtu = ip_tunnel_bind_dev(dev);
1066 if (!tb[IFLA_MTU])
1067 dev->mtu = mtu;
1068
1069 ip_tunnel_add(itn, nt);
1070out:
1071 return err;
1072}
1073EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1074
1075int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1076 struct ip_tunnel_parm *p)
1077{
1078 struct ip_tunnel *t;
1079 struct ip_tunnel *tunnel = netdev_priv(dev);
1080 struct net *net = tunnel->net;
1081 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1082
1083 if (dev == itn->fb_tunnel_dev)
1084 return -EINVAL;
1085
1086 t = ip_tunnel_find(itn, p, dev->type);
1087
1088 if (t) {
1089 if (t->dev != dev)
1090 return -EEXIST;
1091 } else {
1092 t = tunnel;
1093
1094 if (dev->type != ARPHRD_ETHER) {
1095 unsigned int nflags = 0;
1096
1097 if (ipv4_is_multicast(p->iph.daddr))
1098 nflags = IFF_BROADCAST;
1099 else if (p->iph.daddr)
1100 nflags = IFF_POINTOPOINT;
1101
1102 if ((dev->flags ^ nflags) &
1103 (IFF_POINTOPOINT | IFF_BROADCAST))
1104 return -EINVAL;
1105 }
1106 }
1107
1108 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1109 return 0;
1110}
1111EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1112
1113int ip_tunnel_init(struct net_device *dev)
1114{
1115 struct ip_tunnel *tunnel = netdev_priv(dev);
1116 struct iphdr *iph = &tunnel->parms.iph;
1117 int err;
1118
1119 dev->destructor = ip_tunnel_dev_free;
1120 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1121 if (!dev->tstats)
1122 return -ENOMEM;
1123
1124 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1125 if (err) {
1126 free_percpu(dev->tstats);
1127 return err;
1128 }
1129
1130 err = gro_cells_init(&tunnel->gro_cells, dev);
1131 if (err) {
1132 dst_cache_destroy(&tunnel->dst_cache);
1133 free_percpu(dev->tstats);
1134 return err;
1135 }
1136
1137 tunnel->dev = dev;
1138 tunnel->net = dev_net(dev);
1139 strcpy(tunnel->parms.name, dev->name);
1140 iph->version = 4;
1141 iph->ihl = 5;
1142
1143 if (tunnel->collect_md) {
1144 dev->features |= NETIF_F_NETNS_LOCAL;
1145 netif_keep_dst(dev);
1146 }
1147 return 0;
1148}
1149EXPORT_SYMBOL_GPL(ip_tunnel_init);
1150
1151void ip_tunnel_uninit(struct net_device *dev)
1152{
1153 struct ip_tunnel *tunnel = netdev_priv(dev);
1154 struct net *net = tunnel->net;
1155 struct ip_tunnel_net *itn;
1156
1157 itn = net_generic(net, tunnel->ip_tnl_net_id);
1158
1159 if (itn->fb_tunnel_dev != dev)
1160 ip_tunnel_del(itn, netdev_priv(dev));
1161
1162 dst_cache_reset(&tunnel->dst_cache);
1163}
1164EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1165
1166
1167void ip_tunnel_setup(struct net_device *dev, int net_id)
1168{
1169 struct ip_tunnel *tunnel = netdev_priv(dev);
1170 tunnel->ip_tnl_net_id = net_id;
1171}
1172EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1173
1174MODULE_LICENSE("GPL");
1175