1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/init.h>
34#include <linux/in6.h>
35#include <linux/inetdevice.h>
36#include <linux/igmp.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/etherdevice.h>
39#include <linux/if_ether.h>
40#include <linux/if_vlan.h>
41#include <linux/rculist.h>
42
43#include <net/sock.h>
44#include <net/ip.h>
45#include <net/icmp.h>
46#include <net/protocol.h>
47#include <net/ip_tunnels.h>
48#include <net/arp.h>
49#include <net/checksum.h>
50#include <net/dsfield.h>
51#include <net/inet_ecn.h>
52#include <net/xfrm.h>
53#include <net/net_namespace.h>
54#include <net/netns/generic.h>
55#include <net/rtnetlink.h>
56#include <net/udp.h>
57#include <net/dst_metadata.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
72 __be16 flags, __be32 key)
73{
74 if (p->i_flags & TUNNEL_KEY) {
75 if (flags & TUNNEL_KEY)
76 return key == p->i_key;
77 else
78
79 return false;
80 } else
81 return !(flags & TUNNEL_KEY);
82}
83
84
85
86
87
88
89
90
91
92
93
94
95struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
96 int link, __be16 flags,
97 __be32 remote, __be32 local,
98 __be32 key)
99{
100 unsigned int hash;
101 struct ip_tunnel *t, *cand = NULL;
102 struct hlist_head *head;
103
104 hash = ip_tunnel_hash(key, remote);
105 head = &itn->tunnels[hash];
106
107 hlist_for_each_entry_rcu(t, head, hash_node) {
108 if (local != t->parms.iph.saddr ||
109 remote != t->parms.iph.daddr ||
110 !(t->dev->flags & IFF_UP))
111 continue;
112
113 if (!ip_tunnel_key_match(&t->parms, flags, key))
114 continue;
115
116 if (t->parms.link == link)
117 return t;
118 else
119 cand = t;
120 }
121
122 hlist_for_each_entry_rcu(t, head, hash_node) {
123 if (remote != t->parms.iph.daddr ||
124 t->parms.iph.saddr != 0 ||
125 !(t->dev->flags & IFF_UP))
126 continue;
127
128 if (!ip_tunnel_key_match(&t->parms, flags, key))
129 continue;
130
131 if (t->parms.link == link)
132 return t;
133 else if (!cand)
134 cand = t;
135 }
136
137 hash = ip_tunnel_hash(key, 0);
138 head = &itn->tunnels[hash];
139
140 hlist_for_each_entry_rcu(t, head, hash_node) {
141 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
143 continue;
144
145 if (!(t->dev->flags & IFF_UP))
146 continue;
147
148 if (!ip_tunnel_key_match(&t->parms, flags, key))
149 continue;
150
151 if (t->parms.link == link)
152 return t;
153 else if (!cand)
154 cand = t;
155 }
156
157 if (flags & TUNNEL_NO_KEY)
158 goto skip_key_lookup;
159
160 hlist_for_each_entry_rcu(t, head, hash_node) {
161 if (t->parms.i_key != key ||
162 t->parms.iph.saddr != 0 ||
163 t->parms.iph.daddr != 0 ||
164 !(t->dev->flags & IFF_UP))
165 continue;
166
167 if (t->parms.link == link)
168 return t;
169 else if (!cand)
170 cand = t;
171 }
172
173skip_key_lookup:
174 if (cand)
175 return cand;
176
177 t = rcu_dereference(itn->collect_md_tun);
178 if (t && t->dev->flags & IFF_UP)
179 return t;
180
181 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182 return netdev_priv(itn->fb_tunnel_dev);
183
184 return NULL;
185}
186EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187
188static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189 struct ip_tunnel_parm *parms)
190{
191 unsigned int h;
192 __be32 remote;
193 __be32 i_key = parms->i_key;
194
195 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196 remote = parms->iph.daddr;
197 else
198 remote = 0;
199
200 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
201 i_key = 0;
202
203 h = ip_tunnel_hash(i_key, remote);
204 return &itn->tunnels[h];
205}
206
207static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208{
209 struct hlist_head *head = ip_bucket(itn, &t->parms);
210
211 if (t->collect_md)
212 rcu_assign_pointer(itn->collect_md_tun, t);
213 hlist_add_head_rcu(&t->hash_node, head);
214}
215
216static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
217{
218 if (t->collect_md)
219 rcu_assign_pointer(itn->collect_md_tun, NULL);
220 hlist_del_init_rcu(&t->hash_node);
221}
222
223static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224 struct ip_tunnel_parm *parms,
225 int type)
226{
227 __be32 remote = parms->iph.daddr;
228 __be32 local = parms->iph.saddr;
229 __be32 key = parms->i_key;
230 int link = parms->link;
231 struct ip_tunnel *t = NULL;
232 struct hlist_head *head = ip_bucket(itn, parms);
233
234 hlist_for_each_entry_rcu(t, head, hash_node) {
235 if (local == t->parms.iph.saddr &&
236 remote == t->parms.iph.daddr &&
237 key == t->parms.i_key &&
238 link == t->parms.link &&
239 type == t->dev->type)
240 break;
241 }
242 return t;
243}
244
245static struct net_device *__ip_tunnel_create(struct net *net,
246 const struct rtnl_link_ops *ops,
247 struct ip_tunnel_parm *parms)
248{
249 int err;
250 struct ip_tunnel *tunnel;
251 struct net_device *dev;
252 char name[IFNAMSIZ];
253
254 err = -E2BIG;
255 if (parms->name[0]) {
256 if (!dev_valid_name(parms->name))
257 goto failed;
258 strlcpy(name, parms->name, IFNAMSIZ);
259 } else {
260 if (strlen(ops->kind) > (IFNAMSIZ - 3))
261 goto failed;
262 strlcpy(name, ops->kind, IFNAMSIZ);
263 strncat(name, "%d", 2);
264 }
265
266 ASSERT_RTNL();
267 dev = alloc_netdev(ops->priv_size, name, ops->setup);
268 if (!dev) {
269 err = -ENOMEM;
270 goto failed;
271 }
272 dev_net_set(dev, net);
273
274 dev->rtnl_link_ops = ops;
275
276 tunnel = netdev_priv(dev);
277 tunnel->parms = *parms;
278 tunnel->net = net;
279
280 err = register_netdevice(dev);
281 if (err)
282 goto failed_free;
283
284 return dev;
285
286failed_free:
287 free_netdev(dev);
288failed:
289 return ERR_PTR(err);
290}
291
292static int ip_tunnel_bind_dev(struct net_device *dev)
293{
294 struct net_device *tdev = NULL;
295 struct ip_tunnel *tunnel = netdev_priv(dev);
296 const struct iphdr *iph;
297 int hlen = LL_MAX_HEADER;
298 int mtu = ETH_DATA_LEN;
299 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
300
301 iph = &tunnel->parms.iph;
302
303
304 if (iph->daddr) {
305 struct flowi4 fl4;
306 struct rtable *rt;
307
308 ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
309 iph->saddr, tunnel->parms.o_key,
310 RT_TOS(iph->tos), tunnel->parms.link);
311 rt = ip_route_output_key(tunnel->net, &fl4);
312
313 if (!IS_ERR(rt)) {
314 tdev = rt->dst.dev;
315 ip_rt_put(rt);
316 }
317 if (dev->type != ARPHRD_ETHER)
318 dev->flags |= IFF_POINTOPOINT;
319
320 dst_cache_reset(&tunnel->dst_cache);
321 }
322
323 if (!tdev && tunnel->parms.link)
324 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
325
326 if (tdev) {
327 hlen = tdev->hard_header_len + tdev->needed_headroom;
328 mtu = min(tdev->mtu, IP_MAX_MTU);
329 }
330
331 dev->needed_headroom = t_hlen + hlen;
332 mtu -= (dev->hard_header_len + t_hlen);
333
334 if (mtu < 68)
335 mtu = 68;
336
337 return mtu;
338}
339
340static struct ip_tunnel *ip_tunnel_create(struct net *net,
341 struct ip_tunnel_net *itn,
342 struct ip_tunnel_parm *parms)
343{
344 struct ip_tunnel *nt, *fbt;
345 struct net_device *dev;
346 int mtu;
347 int err;
348
349 BUG_ON(!itn->fb_tunnel_dev);
350 fbt = netdev_priv(itn->fb_tunnel_dev);
351 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
352 if (IS_ERR(dev))
353 return ERR_CAST(dev);
354
355 mtu = ip_tunnel_bind_dev(dev);
356 err = dev_set_mtu(dev, mtu);
357 if (err)
358 goto err_dev_set_mtu;
359
360 nt = netdev_priv(dev);
361 ip_tunnel_add(itn, nt);
362 return nt;
363
364err_dev_set_mtu:
365 unregister_netdevice(dev);
366 return ERR_PTR(err);
367}
368
369int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
370 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
371 bool log_ecn_error)
372{
373 struct pcpu_sw_netstats *tstats;
374 const struct iphdr *iph = ip_hdr(skb);
375 int err;
376
377#ifdef CONFIG_NET_IPGRE_BROADCAST
378 if (ipv4_is_multicast(iph->daddr)) {
379 tunnel->dev->stats.multicast++;
380 skb->pkt_type = PACKET_BROADCAST;
381 }
382#endif
383
384 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
385 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
386 tunnel->dev->stats.rx_crc_errors++;
387 tunnel->dev->stats.rx_errors++;
388 goto drop;
389 }
390
391 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
392 if (!(tpi->flags&TUNNEL_SEQ) ||
393 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
394 tunnel->dev->stats.rx_fifo_errors++;
395 tunnel->dev->stats.rx_errors++;
396 goto drop;
397 }
398 tunnel->i_seqno = ntohl(tpi->seq) + 1;
399 }
400
401 skb_reset_network_header(skb);
402
403 err = IP_ECN_decapsulate(iph, skb);
404 if (unlikely(err)) {
405 if (log_ecn_error)
406 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
407 &iph->saddr, iph->tos);
408 if (err > 1) {
409 ++tunnel->dev->stats.rx_frame_errors;
410 ++tunnel->dev->stats.rx_errors;
411 goto drop;
412 }
413 }
414
415 tstats = this_cpu_ptr(tunnel->dev->tstats);
416 u64_stats_update_begin(&tstats->syncp);
417 tstats->rx_packets++;
418 tstats->rx_bytes += skb->len;
419 u64_stats_update_end(&tstats->syncp);
420
421 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
422
423 if (tunnel->dev->type == ARPHRD_ETHER) {
424 skb->protocol = eth_type_trans(skb, tunnel->dev);
425 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
426 } else {
427 skb->dev = tunnel->dev;
428 }
429
430 if (tun_dst)
431 skb_dst_set(skb, (struct dst_entry *)tun_dst);
432
433 gro_cells_receive(&tunnel->gro_cells, skb);
434 return 0;
435
436drop:
437 if (tun_dst)
438 dst_release((struct dst_entry *)tun_dst);
439 kfree_skb(skb);
440 return 0;
441}
442EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
443
444static int ip_encap_hlen(struct ip_tunnel_encap *e)
445{
446 const struct ip_tunnel_encap_ops *ops;
447 int hlen = -EINVAL;
448
449 if (e->type == TUNNEL_ENCAP_NONE)
450 return 0;
451
452 if (e->type >= MAX_IPTUN_ENCAP_OPS)
453 return -EINVAL;
454
455 rcu_read_lock();
456 ops = rcu_dereference(iptun_encaps[e->type]);
457 if (likely(ops && ops->encap_hlen))
458 hlen = ops->encap_hlen(e);
459 rcu_read_unlock();
460
461 return hlen;
462}
463
464const struct ip_tunnel_encap_ops __rcu *
465 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
466
467int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
468 unsigned int num)
469{
470 if (num >= MAX_IPTUN_ENCAP_OPS)
471 return -ERANGE;
472
473 return !cmpxchg((const struct ip_tunnel_encap_ops **)
474 &iptun_encaps[num],
475 NULL, ops) ? 0 : -1;
476}
477EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
478
479int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
480 unsigned int num)
481{
482 int ret;
483
484 if (num >= MAX_IPTUN_ENCAP_OPS)
485 return -ERANGE;
486
487 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
488 &iptun_encaps[num],
489 ops, NULL) == ops) ? 0 : -1;
490
491 synchronize_net();
492
493 return ret;
494}
495EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
496
497int ip_tunnel_encap_setup(struct ip_tunnel *t,
498 struct ip_tunnel_encap *ipencap)
499{
500 int hlen;
501
502 memset(&t->encap, 0, sizeof(t->encap));
503
504 hlen = ip_encap_hlen(ipencap);
505 if (hlen < 0)
506 return hlen;
507
508 t->encap.type = ipencap->type;
509 t->encap.sport = ipencap->sport;
510 t->encap.dport = ipencap->dport;
511 t->encap.flags = ipencap->flags;
512
513 t->encap_hlen = hlen;
514 t->hlen = t->encap_hlen + t->tun_hlen;
515
516 return 0;
517}
518EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
519
520int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
521 u8 *protocol, struct flowi4 *fl4)
522{
523 const struct ip_tunnel_encap_ops *ops;
524 int ret = -EINVAL;
525
526 if (t->encap.type == TUNNEL_ENCAP_NONE)
527 return 0;
528
529 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
530 return -EINVAL;
531
532 rcu_read_lock();
533 ops = rcu_dereference(iptun_encaps[t->encap.type]);
534 if (likely(ops && ops->build_header))
535 ret = ops->build_header(skb, &t->encap, protocol, fl4);
536 rcu_read_unlock();
537
538 return ret;
539}
540EXPORT_SYMBOL(ip_tunnel_encap);
541
542static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
543 struct rtable *rt, __be16 df,
544 const struct iphdr *inner_iph)
545{
546 struct ip_tunnel *tunnel = netdev_priv(dev);
547 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
548 int mtu;
549
550 if (df)
551 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
552 - sizeof(struct iphdr) - tunnel->hlen;
553 else
554 mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
555
556 if (skb_valid_dst(skb))
557 skb_dst_update_pmtu(skb, mtu);
558
559 if (skb->protocol == htons(ETH_P_IP)) {
560 if (!skb_is_gso(skb) &&
561 (inner_iph->frag_off & htons(IP_DF)) &&
562 mtu < pkt_size) {
563 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
564 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
565 return -E2BIG;
566 }
567 }
568#if IS_ENABLED(CONFIG_IPV6)
569 else if (skb->protocol == htons(ETH_P_IPV6)) {
570 struct rt6_info *rt6;
571
572 rt6 = skb_valid_dst(skb) ? (struct rt6_info *)skb_dst(skb) :
573 NULL;
574
575 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
576 mtu >= IPV6_MIN_MTU) {
577 if ((tunnel->parms.iph.daddr &&
578 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
579 rt6->rt6i_dst.plen == 128) {
580 rt6->rt6i_flags |= RTF_MODIFIED;
581 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
582 }
583 }
584
585 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
586 mtu < pkt_size) {
587 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
588 return -E2BIG;
589 }
590 }
591#endif
592 return 0;
593}
594
595void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
596 const struct iphdr *tnl_params, u8 protocol)
597{
598 struct ip_tunnel *tunnel = netdev_priv(dev);
599 unsigned int inner_nhdr_len = 0;
600 const struct iphdr *inner_iph;
601 struct flowi4 fl4;
602 u8 tos, ttl;
603 __be16 df;
604 struct rtable *rt;
605 unsigned int max_headroom;
606 __be32 dst;
607 bool connected;
608
609
610 if (skb->protocol == htons(ETH_P_IP))
611 inner_nhdr_len = sizeof(struct iphdr);
612 else if (skb->protocol == htons(ETH_P_IPV6))
613 inner_nhdr_len = sizeof(struct ipv6hdr);
614 if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
615 goto tx_error;
616
617 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
618 connected = (tunnel->parms.iph.daddr != 0);
619
620 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
621
622 dst = tnl_params->daddr;
623 if (dst == 0) {
624
625
626 if (skb_dst(skb) == NULL) {
627 dev->stats.tx_fifo_errors++;
628 goto tx_error;
629 }
630
631 if (skb->protocol == htons(ETH_P_IP)) {
632 rt = skb_rtable(skb);
633 dst = rt_nexthop(rt, inner_iph->daddr);
634 }
635#if IS_ENABLED(CONFIG_IPV6)
636 else if (skb->protocol == htons(ETH_P_IPV6)) {
637 const struct in6_addr *addr6;
638 struct neighbour *neigh;
639 bool do_tx_error_icmp;
640 int addr_type;
641
642 neigh = dst_neigh_lookup(skb_dst(skb),
643 &ipv6_hdr(skb)->daddr);
644 if (neigh == NULL)
645 goto tx_error;
646
647 addr6 = (const struct in6_addr *)&neigh->primary_key;
648 addr_type = ipv6_addr_type(addr6);
649
650 if (addr_type == IPV6_ADDR_ANY) {
651 addr6 = &ipv6_hdr(skb)->daddr;
652 addr_type = ipv6_addr_type(addr6);
653 }
654
655 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
656 do_tx_error_icmp = true;
657 else {
658 do_tx_error_icmp = false;
659 dst = addr6->s6_addr32[3];
660 }
661 neigh_release(neigh);
662 if (do_tx_error_icmp)
663 goto tx_error_icmp;
664 }
665#endif
666 else
667 goto tx_error;
668
669 connected = false;
670 }
671
672 tos = tnl_params->tos;
673 if (tos & 0x1) {
674 tos &= ~0x1;
675 if (skb->protocol == htons(ETH_P_IP)) {
676 tos = inner_iph->tos;
677 connected = false;
678 } else if (skb->protocol == htons(ETH_P_IPV6)) {
679 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
680 connected = false;
681 }
682 }
683
684 ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
685 tunnel->parms.o_key, RT_TOS(tos),
686 tunnel->parms.link);
687
688 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
689 goto tx_error;
690
691 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
692 NULL;
693
694 if (!rt) {
695 rt = ip_route_output_key(tunnel->net, &fl4);
696
697 if (IS_ERR(rt)) {
698 dev->stats.tx_carrier_errors++;
699 goto tx_error;
700 }
701 if (connected)
702 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
703 fl4.saddr);
704 }
705
706 if (rt->dst.dev == dev) {
707 ip_rt_put(rt);
708 dev->stats.collisions++;
709 goto tx_error;
710 }
711
712 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
713 ip_rt_put(rt);
714 goto tx_error;
715 }
716
717 if (tunnel->err_count > 0) {
718 if (time_before(jiffies,
719 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
720 tunnel->err_count--;
721
722 dst_link_failure(skb);
723 } else
724 tunnel->err_count = 0;
725 }
726
727 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
728 ttl = tnl_params->ttl;
729 if (ttl == 0) {
730 if (skb->protocol == htons(ETH_P_IP))
731 ttl = inner_iph->ttl;
732#if IS_ENABLED(CONFIG_IPV6)
733 else if (skb->protocol == htons(ETH_P_IPV6))
734 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
735#endif
736 else
737 ttl = ip4_dst_hoplimit(&rt->dst);
738 }
739
740 df = tnl_params->frag_off;
741 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
742 df |= (inner_iph->frag_off&htons(IP_DF));
743
744 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
745 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
746 if (max_headroom > dev->needed_headroom)
747 dev->needed_headroom = max_headroom;
748
749 if (skb_cow_head(skb, dev->needed_headroom)) {
750 ip_rt_put(rt);
751 dev->stats.tx_dropped++;
752 kfree_skb(skb);
753 return;
754 }
755
756 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
757 df, !net_eq(tunnel->net, dev_net(dev)));
758 return;
759
760#if IS_ENABLED(CONFIG_IPV6)
761tx_error_icmp:
762 dst_link_failure(skb);
763#endif
764tx_error:
765 dev->stats.tx_errors++;
766 kfree_skb(skb);
767}
768EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
769
770static void ip_tunnel_update(struct ip_tunnel_net *itn,
771 struct ip_tunnel *t,
772 struct net_device *dev,
773 struct ip_tunnel_parm *p,
774 bool set_mtu)
775{
776 ip_tunnel_del(itn, t);
777 t->parms.iph.saddr = p->iph.saddr;
778 t->parms.iph.daddr = p->iph.daddr;
779 t->parms.i_key = p->i_key;
780 t->parms.o_key = p->o_key;
781 if (dev->type != ARPHRD_ETHER) {
782 memcpy(dev->dev_addr, &p->iph.saddr, 4);
783 memcpy(dev->broadcast, &p->iph.daddr, 4);
784 }
785 ip_tunnel_add(itn, t);
786
787 t->parms.iph.ttl = p->iph.ttl;
788 t->parms.iph.tos = p->iph.tos;
789 t->parms.iph.frag_off = p->iph.frag_off;
790
791 if (t->parms.link != p->link) {
792 int mtu;
793
794 t->parms.link = p->link;
795 mtu = ip_tunnel_bind_dev(dev);
796 if (set_mtu)
797 dev->mtu = mtu;
798 }
799 dst_cache_reset(&t->dst_cache);
800 netdev_state_change(dev);
801}
802
803int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
804{
805 int err = 0;
806 struct ip_tunnel *t = netdev_priv(dev);
807 struct net *net = t->net;
808 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
809
810 BUG_ON(!itn->fb_tunnel_dev);
811 switch (cmd) {
812 case SIOCGETTUNNEL:
813 if (dev == itn->fb_tunnel_dev) {
814 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
815 if (t == NULL)
816 t = netdev_priv(dev);
817 }
818 memcpy(p, &t->parms, sizeof(*p));
819 break;
820
821 case SIOCADDTUNNEL:
822 case SIOCCHGTUNNEL:
823 err = -EPERM;
824 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
825 goto done;
826 if (p->iph.ttl)
827 p->iph.frag_off |= htons(IP_DF);
828 if (!(p->i_flags & VTI_ISVTI)) {
829 if (!(p->i_flags & TUNNEL_KEY))
830 p->i_key = 0;
831 if (!(p->o_flags & TUNNEL_KEY))
832 p->o_key = 0;
833 }
834
835 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
836
837 if (cmd == SIOCADDTUNNEL) {
838 if (!t) {
839 t = ip_tunnel_create(net, itn, p);
840 err = PTR_RET(t);
841 break;
842 }
843
844 err = -EEXIST;
845 break;
846 }
847 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
848 if (t != NULL) {
849 if (t->dev != dev) {
850 err = -EEXIST;
851 break;
852 }
853 } else {
854 unsigned int nflags = 0;
855
856 if (ipv4_is_multicast(p->iph.daddr))
857 nflags = IFF_BROADCAST;
858 else if (p->iph.daddr)
859 nflags = IFF_POINTOPOINT;
860
861 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
862 err = -EINVAL;
863 break;
864 }
865
866 t = netdev_priv(dev);
867 }
868 }
869
870 if (t) {
871 err = 0;
872 ip_tunnel_update(itn, t, dev, p, true);
873 } else {
874 err = -ENOENT;
875 }
876 break;
877
878 case SIOCDELTUNNEL:
879 err = -EPERM;
880 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
881 goto done;
882
883 if (dev == itn->fb_tunnel_dev) {
884 err = -ENOENT;
885 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
886 if (t == NULL)
887 goto done;
888 err = -EPERM;
889 if (t == netdev_priv(itn->fb_tunnel_dev))
890 goto done;
891 dev = t->dev;
892 }
893 unregister_netdevice(dev);
894 err = 0;
895 break;
896
897 default:
898 err = -EINVAL;
899 }
900
901done:
902 return err;
903}
904EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
905
906int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
907{
908 struct ip_tunnel *tunnel = netdev_priv(dev);
909 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
910 int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
911
912 if (new_mtu < 68)
913 return -EINVAL;
914
915 if (new_mtu > max_mtu) {
916 if (strict)
917 return -EINVAL;
918
919 new_mtu = max_mtu;
920 }
921
922 dev->mtu = new_mtu;
923 return 0;
924}
925EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
926
927int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
928{
929 return __ip_tunnel_change_mtu(dev, new_mtu, true);
930}
931EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
932
933static void ip_tunnel_dev_free(struct net_device *dev)
934{
935 struct ip_tunnel *tunnel = netdev_priv(dev);
936
937 gro_cells_destroy(&tunnel->gro_cells);
938 dst_cache_destroy(&tunnel->dst_cache);
939 free_percpu(dev->tstats);
940}
941
942void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
943{
944 struct ip_tunnel *tunnel = netdev_priv(dev);
945 struct ip_tunnel_net *itn;
946
947 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
948
949 if (itn->fb_tunnel_dev != dev) {
950 ip_tunnel_del(itn, netdev_priv(dev));
951 unregister_netdevice_queue(dev, head);
952 }
953}
954EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
955
956struct net *ip_tunnel_get_link_net(const struct net_device *dev)
957{
958 struct ip_tunnel *tunnel = netdev_priv(dev);
959
960 return tunnel->net;
961}
962EXPORT_SYMBOL(ip_tunnel_get_link_net);
963
964int ip_tunnel_get_iflink(const struct net_device *dev)
965{
966 struct ip_tunnel *tunnel = netdev_priv(dev);
967
968 return tunnel->parms.link;
969}
970EXPORT_SYMBOL(ip_tunnel_get_iflink);
971
972int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
973 struct rtnl_link_ops *ops, char *devname)
974{
975 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
976 struct ip_tunnel_parm parms;
977 unsigned int i;
978
979 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
980 INIT_HLIST_HEAD(&itn->tunnels[i]);
981
982 if (!ops) {
983 itn->fb_tunnel_dev = NULL;
984 return 0;
985 }
986
987 memset(&parms, 0, sizeof(parms));
988 if (devname)
989 strlcpy(parms.name, devname, IFNAMSIZ);
990
991 rtnl_lock();
992 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
993
994
995
996 if (!IS_ERR(itn->fb_tunnel_dev)) {
997 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
998 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
999 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1000 }
1001 rtnl_unlock();
1002
1003 if (IS_ERR(itn->fb_tunnel_dev))
1004 return PTR_ERR(itn->fb_tunnel_dev);
1005
1006 return 0;
1007}
1008EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1009
1010static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1011 struct rtnl_link_ops *ops)
1012{
1013 struct net *net = dev_net(itn->fb_tunnel_dev);
1014 struct net_device *dev, *aux;
1015 int h;
1016
1017 for_each_netdev_safe(net, dev, aux)
1018 if (dev->rtnl_link_ops == ops)
1019 unregister_netdevice_queue(dev, head);
1020
1021 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1022 struct ip_tunnel *t;
1023 struct hlist_node *n;
1024 struct hlist_head *thead = &itn->tunnels[h];
1025
1026 hlist_for_each_entry_safe(t, n, thead, hash_node)
1027
1028
1029
1030 if (!net_eq(dev_net(t->dev), net))
1031 unregister_netdevice_queue(t->dev, head);
1032 }
1033}
1034
1035void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1036{
1037 LIST_HEAD(list);
1038
1039 rtnl_lock();
1040 ip_tunnel_destroy(itn, &list, ops);
1041 unregister_netdevice_many(&list);
1042 rtnl_unlock();
1043}
1044EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1045
1046int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1047 struct ip_tunnel_parm *p)
1048{
1049 struct ip_tunnel *nt;
1050 struct net *net = dev_net(dev);
1051 struct ip_tunnel_net *itn;
1052 int mtu;
1053 int err;
1054
1055 nt = netdev_priv(dev);
1056 itn = net_generic(net, nt->ip_tnl_net_id);
1057
1058 if (nt->collect_md) {
1059 if (rtnl_dereference(itn->collect_md_tun))
1060 return -EEXIST;
1061 } else {
1062 if (ip_tunnel_find(itn, p, dev->type))
1063 return -EEXIST;
1064 }
1065
1066 nt->net = net;
1067 nt->parms = *p;
1068 err = register_netdevice(dev);
1069 if (err)
1070 goto err_register_netdevice;
1071
1072 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1073 eth_hw_addr_random(dev);
1074
1075 mtu = ip_tunnel_bind_dev(dev);
1076 if (tb[IFLA_MTU]) {
1077 unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
1078
1079 mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1080 (unsigned int)(max - sizeof(struct iphdr)));
1081 }
1082
1083 err = dev_set_mtu(dev, mtu);
1084 if (err)
1085 goto err_dev_set_mtu;
1086
1087 ip_tunnel_add(itn, nt);
1088 return 0;
1089
1090err_dev_set_mtu:
1091 unregister_netdevice(dev);
1092err_register_netdevice:
1093 return err;
1094}
1095EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1096
1097int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1098 struct ip_tunnel_parm *p)
1099{
1100 struct ip_tunnel *t;
1101 struct ip_tunnel *tunnel = netdev_priv(dev);
1102 struct net *net = tunnel->net;
1103 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1104
1105 if (dev == itn->fb_tunnel_dev)
1106 return -EINVAL;
1107
1108 t = ip_tunnel_find(itn, p, dev->type);
1109
1110 if (t) {
1111 if (t->dev != dev)
1112 return -EEXIST;
1113 } else {
1114 t = tunnel;
1115
1116 if (dev->type != ARPHRD_ETHER) {
1117 unsigned int nflags = 0;
1118
1119 if (ipv4_is_multicast(p->iph.daddr))
1120 nflags = IFF_BROADCAST;
1121 else if (p->iph.daddr)
1122 nflags = IFF_POINTOPOINT;
1123
1124 if ((dev->flags ^ nflags) &
1125 (IFF_POINTOPOINT | IFF_BROADCAST))
1126 return -EINVAL;
1127 }
1128 }
1129
1130 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1131 return 0;
1132}
1133EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1134
1135int ip_tunnel_init(struct net_device *dev)
1136{
1137 struct ip_tunnel *tunnel = netdev_priv(dev);
1138 struct iphdr *iph = &tunnel->parms.iph;
1139 int err;
1140
1141 dev->extended->needs_free_netdev = true;
1142 dev->extended->priv_destructor = ip_tunnel_dev_free;
1143 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1144 if (!dev->tstats)
1145 return -ENOMEM;
1146
1147 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1148 if (err) {
1149 free_percpu(dev->tstats);
1150 return err;
1151 }
1152
1153 err = gro_cells_init(&tunnel->gro_cells, dev);
1154 if (err) {
1155 dst_cache_destroy(&tunnel->dst_cache);
1156 free_percpu(dev->tstats);
1157 return err;
1158 }
1159
1160 tunnel->dev = dev;
1161 tunnel->net = dev_net(dev);
1162 strcpy(tunnel->parms.name, dev->name);
1163 iph->version = 4;
1164 iph->ihl = 5;
1165
1166 if (tunnel->collect_md) {
1167 dev->features |= NETIF_F_NETNS_LOCAL;
1168 netif_keep_dst(dev);
1169 }
1170 return 0;
1171}
1172EXPORT_SYMBOL_GPL(ip_tunnel_init);
1173
1174void ip_tunnel_uninit(struct net_device *dev)
1175{
1176 struct ip_tunnel *tunnel = netdev_priv(dev);
1177 struct net *net = tunnel->net;
1178 struct ip_tunnel_net *itn;
1179
1180 itn = net_generic(net, tunnel->ip_tnl_net_id);
1181
1182 if (itn->fb_tunnel_dev != dev)
1183 ip_tunnel_del(itn, netdev_priv(dev));
1184
1185 dst_cache_reset(&tunnel->dst_cache);
1186}
1187EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1188
1189
1190void ip_tunnel_setup(struct net_device *dev, int net_id)
1191{
1192 struct ip_tunnel *tunnel = netdev_priv(dev);
1193 tunnel->ip_tnl_net_id = net_id;
1194}
1195EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1196
1197MODULE_LICENSE("GPL");
1198