1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/init.h>
34#include <linux/in6.h>
35#include <linux/inetdevice.h>
36#include <linux/igmp.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/etherdevice.h>
39#include <linux/if_ether.h>
40#include <linux/if_vlan.h>
41#include <linux/rculist.h>
42#include <linux/err.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57#include <net/udp.h>
58#include <net/dst_metadata.h>
59
60#if IS_ENABLED(CONFIG_IPV6)
61#include <net/ipv6.h>
62#include <net/ip6_fib.h>
63#include <net/ip6_route.h>
64#endif
65
66static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67{
68 return hash_32((__force u32)key ^ (__force u32)remote,
69 IP_TNL_HASH_BITS);
70}
71
72static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73 __be16 flags, __be32 key)
74{
75 if (p->i_flags & TUNNEL_KEY) {
76 if (flags & TUNNEL_KEY)
77 return key == p->i_key;
78 else
79
80 return false;
81 } else
82 return !(flags & TUNNEL_KEY);
83}
84
85
86
87
88
89
90
91
92
93
94
95
96struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97 int link, __be16 flags,
98 __be32 remote, __be32 local,
99 __be32 key)
100{
101 unsigned int hash;
102 struct ip_tunnel *t, *cand = NULL;
103 struct hlist_head *head;
104
105 hash = ip_tunnel_hash(key, remote);
106 head = &itn->tunnels[hash];
107
108 hlist_for_each_entry_rcu(t, head, hash_node) {
109 if (local != t->parms.iph.saddr ||
110 remote != t->parms.iph.daddr ||
111 !(t->dev->flags & IFF_UP))
112 continue;
113
114 if (!ip_tunnel_key_match(&t->parms, flags, key))
115 continue;
116
117 if (t->parms.link == link)
118 return t;
119 else
120 cand = t;
121 }
122
123 hlist_for_each_entry_rcu(t, head, hash_node) {
124 if (remote != t->parms.iph.daddr ||
125 t->parms.iph.saddr != 0 ||
126 !(t->dev->flags & IFF_UP))
127 continue;
128
129 if (!ip_tunnel_key_match(&t->parms, flags, key))
130 continue;
131
132 if (t->parms.link == link)
133 return t;
134 else if (!cand)
135 cand = t;
136 }
137
138 hash = ip_tunnel_hash(key, 0);
139 head = &itn->tunnels[hash];
140
141 hlist_for_each_entry_rcu(t, head, hash_node) {
142 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
143 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
144 continue;
145
146 if (!(t->dev->flags & IFF_UP))
147 continue;
148
149 if (!ip_tunnel_key_match(&t->parms, flags, key))
150 continue;
151
152 if (t->parms.link == link)
153 return t;
154 else if (!cand)
155 cand = t;
156 }
157
158 if (flags & TUNNEL_NO_KEY)
159 goto skip_key_lookup;
160
161 hlist_for_each_entry_rcu(t, head, hash_node) {
162 if (t->parms.i_key != key ||
163 t->parms.iph.saddr != 0 ||
164 t->parms.iph.daddr != 0 ||
165 !(t->dev->flags & IFF_UP))
166 continue;
167
168 if (t->parms.link == link)
169 return t;
170 else if (!cand)
171 cand = t;
172 }
173
174skip_key_lookup:
175 if (cand)
176 return cand;
177
178 t = rcu_dereference(itn->collect_md_tun);
179 if (t && t->dev->flags & IFF_UP)
180 return t;
181
182 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
183 return netdev_priv(itn->fb_tunnel_dev);
184
185 return NULL;
186}
187EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
188
189static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
190 struct ip_tunnel_parm *parms)
191{
192 unsigned int h;
193 __be32 remote;
194 __be32 i_key = parms->i_key;
195
196 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
197 remote = parms->iph.daddr;
198 else
199 remote = 0;
200
201 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202 i_key = 0;
203
204 h = ip_tunnel_hash(i_key, remote);
205 return &itn->tunnels[h];
206}
207
208static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
209{
210 struct hlist_head *head = ip_bucket(itn, &t->parms);
211
212 if (t->collect_md)
213 rcu_assign_pointer(itn->collect_md_tun, t);
214 hlist_add_head_rcu(&t->hash_node, head);
215}
216
217static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218{
219 if (t->collect_md)
220 rcu_assign_pointer(itn->collect_md_tun, NULL);
221 hlist_del_init_rcu(&t->hash_node);
222}
223
224static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
225 struct ip_tunnel_parm *parms,
226 int type)
227{
228 __be32 remote = parms->iph.daddr;
229 __be32 local = parms->iph.saddr;
230 __be32 key = parms->i_key;
231 __be16 flags = parms->i_flags;
232 int link = parms->link;
233 struct ip_tunnel *t = NULL;
234 struct hlist_head *head = ip_bucket(itn, parms);
235
236 hlist_for_each_entry_rcu(t, head, hash_node) {
237 if (local == t->parms.iph.saddr &&
238 remote == t->parms.iph.daddr &&
239 link == t->parms.link &&
240 type == t->dev->type &&
241 ip_tunnel_key_match(&t->parms, flags, key))
242 break;
243 }
244 return t;
245}
246
247static struct net_device *__ip_tunnel_create(struct net *net,
248 const struct rtnl_link_ops *ops,
249 struct ip_tunnel_parm *parms)
250{
251 int err;
252 struct ip_tunnel *tunnel;
253 struct net_device *dev;
254 char name[IFNAMSIZ];
255
256 if (parms->name[0])
257 strlcpy(name, parms->name, IFNAMSIZ);
258 else {
259 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
260 err = -E2BIG;
261 goto failed;
262 }
263 strlcpy(name, ops->kind, IFNAMSIZ);
264 strncat(name, "%d", 2);
265 }
266
267 ASSERT_RTNL();
268 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
269 if (!dev) {
270 err = -ENOMEM;
271 goto failed;
272 }
273 dev_net_set(dev, net);
274
275 dev->rtnl_link_ops = ops;
276
277 tunnel = netdev_priv(dev);
278 tunnel->parms = *parms;
279 tunnel->net = net;
280
281 err = register_netdevice(dev);
282 if (err)
283 goto failed_free;
284
285 return dev;
286
287failed_free:
288 free_netdev(dev);
289failed:
290 return ERR_PTR(err);
291}
292
293static inline void init_tunnel_flow(struct flowi4 *fl4,
294 int proto,
295 __be32 daddr, __be32 saddr,
296 __be32 key, __u8 tos, int oif,
297 __u32 mark)
298{
299 memset(fl4, 0, sizeof(*fl4));
300 fl4->flowi4_oif = oif;
301 fl4->daddr = daddr;
302 fl4->saddr = saddr;
303 fl4->flowi4_tos = tos;
304 fl4->flowi4_proto = proto;
305 fl4->fl4_gre_key = key;
306 fl4->flowi4_mark = mark;
307}
308
309static int ip_tunnel_bind_dev(struct net_device *dev)
310{
311 struct net_device *tdev = NULL;
312 struct ip_tunnel *tunnel = netdev_priv(dev);
313 const struct iphdr *iph;
314 int hlen = LL_MAX_HEADER;
315 int mtu = ETH_DATA_LEN;
316 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
317
318 iph = &tunnel->parms.iph;
319
320
321 if (iph->daddr) {
322 struct flowi4 fl4;
323 struct rtable *rt;
324
325 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
326 iph->saddr, tunnel->parms.o_key,
327 RT_TOS(iph->tos), tunnel->parms.link,
328 tunnel->fwmark);
329 rt = ip_route_output_key(tunnel->net, &fl4);
330
331 if (!IS_ERR(rt)) {
332 tdev = rt->dst.dev;
333 ip_rt_put(rt);
334 }
335 if (dev->type != ARPHRD_ETHER)
336 dev->flags |= IFF_POINTOPOINT;
337
338 dst_cache_reset(&tunnel->dst_cache);
339 }
340
341 if (!tdev && tunnel->parms.link)
342 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
343
344 if (tdev) {
345 hlen = tdev->hard_header_len + tdev->needed_headroom;
346 mtu = tdev->mtu;
347 }
348
349 dev->needed_headroom = t_hlen + hlen;
350 mtu -= (dev->hard_header_len + t_hlen);
351
352 if (mtu < IPV4_MIN_MTU)
353 mtu = IPV4_MIN_MTU;
354
355 return mtu;
356}
357
358static struct ip_tunnel *ip_tunnel_create(struct net *net,
359 struct ip_tunnel_net *itn,
360 struct ip_tunnel_parm *parms)
361{
362 struct ip_tunnel *nt;
363 struct net_device *dev;
364 int t_hlen;
365
366 BUG_ON(!itn->fb_tunnel_dev);
367 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
368 if (IS_ERR(dev))
369 return ERR_CAST(dev);
370
371 dev->mtu = ip_tunnel_bind_dev(dev);
372
373 nt = netdev_priv(dev);
374 t_hlen = nt->hlen + sizeof(struct iphdr);
375 dev->min_mtu = ETH_MIN_MTU;
376 dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
377 ip_tunnel_add(itn, nt);
378 return nt;
379}
380
381int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
382 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
383 bool log_ecn_error)
384{
385 struct pcpu_sw_netstats *tstats;
386 const struct iphdr *iph = ip_hdr(skb);
387 int err;
388
389#ifdef CONFIG_NET_IPGRE_BROADCAST
390 if (ipv4_is_multicast(iph->daddr)) {
391 tunnel->dev->stats.multicast++;
392 skb->pkt_type = PACKET_BROADCAST;
393 }
394#endif
395
396 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
397 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
398 tunnel->dev->stats.rx_crc_errors++;
399 tunnel->dev->stats.rx_errors++;
400 goto drop;
401 }
402
403 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
404 if (!(tpi->flags&TUNNEL_SEQ) ||
405 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
406 tunnel->dev->stats.rx_fifo_errors++;
407 tunnel->dev->stats.rx_errors++;
408 goto drop;
409 }
410 tunnel->i_seqno = ntohl(tpi->seq) + 1;
411 }
412
413 skb_reset_network_header(skb);
414
415 err = IP_ECN_decapsulate(iph, skb);
416 if (unlikely(err)) {
417 if (log_ecn_error)
418 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
419 &iph->saddr, iph->tos);
420 if (err > 1) {
421 ++tunnel->dev->stats.rx_frame_errors;
422 ++tunnel->dev->stats.rx_errors;
423 goto drop;
424 }
425 }
426
427 tstats = this_cpu_ptr(tunnel->dev->tstats);
428 u64_stats_update_begin(&tstats->syncp);
429 tstats->rx_packets++;
430 tstats->rx_bytes += skb->len;
431 u64_stats_update_end(&tstats->syncp);
432
433 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
434
435 if (tunnel->dev->type == ARPHRD_ETHER) {
436 skb->protocol = eth_type_trans(skb, tunnel->dev);
437 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
438 } else {
439 skb->dev = tunnel->dev;
440 }
441
442 if (tun_dst)
443 skb_dst_set(skb, (struct dst_entry *)tun_dst);
444
445 gro_cells_receive(&tunnel->gro_cells, skb);
446 return 0;
447
448drop:
449 if (tun_dst)
450 dst_release((struct dst_entry *)tun_dst);
451 kfree_skb(skb);
452 return 0;
453}
454EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
455
456int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
457 unsigned int num)
458{
459 if (num >= MAX_IPTUN_ENCAP_OPS)
460 return -ERANGE;
461
462 return !cmpxchg((const struct ip_tunnel_encap_ops **)
463 &iptun_encaps[num],
464 NULL, ops) ? 0 : -1;
465}
466EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
467
468int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
469 unsigned int num)
470{
471 int ret;
472
473 if (num >= MAX_IPTUN_ENCAP_OPS)
474 return -ERANGE;
475
476 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
477 &iptun_encaps[num],
478 ops, NULL) == ops) ? 0 : -1;
479
480 synchronize_net();
481
482 return ret;
483}
484EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
485
486int ip_tunnel_encap_setup(struct ip_tunnel *t,
487 struct ip_tunnel_encap *ipencap)
488{
489 int hlen;
490
491 memset(&t->encap, 0, sizeof(t->encap));
492
493 hlen = ip_encap_hlen(ipencap);
494 if (hlen < 0)
495 return hlen;
496
497 t->encap.type = ipencap->type;
498 t->encap.sport = ipencap->sport;
499 t->encap.dport = ipencap->dport;
500 t->encap.flags = ipencap->flags;
501
502 t->encap_hlen = hlen;
503 t->hlen = t->encap_hlen + t->tun_hlen;
504
505 return 0;
506}
507EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
508
509static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
510 struct rtable *rt, __be16 df,
511 const struct iphdr *inner_iph)
512{
513 struct ip_tunnel *tunnel = netdev_priv(dev);
514 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
515 int mtu;
516
517 if (df)
518 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
519 - sizeof(struct iphdr) - tunnel->hlen;
520 else
521 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
522
523 skb_dst_update_pmtu(skb, mtu);
524
525 if (skb->protocol == htons(ETH_P_IP)) {
526 if (!skb_is_gso(skb) &&
527 (inner_iph->frag_off & htons(IP_DF)) &&
528 mtu < pkt_size) {
529 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
530 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
531 return -E2BIG;
532 }
533 }
534#if IS_ENABLED(CONFIG_IPV6)
535 else if (skb->protocol == htons(ETH_P_IPV6)) {
536 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
537
538 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
539 mtu >= IPV6_MIN_MTU) {
540 if ((tunnel->parms.iph.daddr &&
541 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
542 rt6->rt6i_dst.plen == 128) {
543 rt6->rt6i_flags |= RTF_MODIFIED;
544 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
545 }
546 }
547
548 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
549 mtu < pkt_size) {
550 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
551 return -E2BIG;
552 }
553 }
554#endif
555 return 0;
556}
557
558void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
559{
560 struct ip_tunnel *tunnel = netdev_priv(dev);
561 u32 headroom = sizeof(struct iphdr);
562 struct ip_tunnel_info *tun_info;
563 const struct ip_tunnel_key *key;
564 const struct iphdr *inner_iph;
565 struct rtable *rt;
566 struct flowi4 fl4;
567 __be16 df = 0;
568 u8 tos, ttl;
569
570 tun_info = skb_tunnel_info(skb);
571 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
572 ip_tunnel_info_af(tun_info) != AF_INET))
573 goto tx_error;
574 key = &tun_info->key;
575 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
576 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
577 tos = key->tos;
578 if (tos == 1) {
579 if (skb->protocol == htons(ETH_P_IP))
580 tos = inner_iph->tos;
581 else if (skb->protocol == htons(ETH_P_IPV6))
582 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
583 }
584 init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
585 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
586 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
587 goto tx_error;
588 rt = ip_route_output_key(tunnel->net, &fl4);
589 if (IS_ERR(rt)) {
590 dev->stats.tx_carrier_errors++;
591 goto tx_error;
592 }
593 if (rt->dst.dev == dev) {
594 ip_rt_put(rt);
595 dev->stats.collisions++;
596 goto tx_error;
597 }
598 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
599 ttl = key->ttl;
600 if (ttl == 0) {
601 if (skb->protocol == htons(ETH_P_IP))
602 ttl = inner_iph->ttl;
603 else if (skb->protocol == htons(ETH_P_IPV6))
604 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
605 else
606 ttl = ip4_dst_hoplimit(&rt->dst);
607 }
608 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
609 df = htons(IP_DF);
610 else if (skb->protocol == htons(ETH_P_IP))
611 df = inner_iph->frag_off & htons(IP_DF);
612 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
613 if (headroom > dev->needed_headroom)
614 dev->needed_headroom = headroom;
615
616 if (skb_cow_head(skb, dev->needed_headroom)) {
617 ip_rt_put(rt);
618 goto tx_dropped;
619 }
620 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
621 df, !net_eq(tunnel->net, dev_net(dev)));
622 return;
623tx_error:
624 dev->stats.tx_errors++;
625 goto kfree;
626tx_dropped:
627 dev->stats.tx_dropped++;
628kfree:
629 kfree_skb(skb);
630}
631EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
632
633void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
634 const struct iphdr *tnl_params, u8 protocol)
635{
636 struct ip_tunnel *tunnel = netdev_priv(dev);
637 const struct iphdr *inner_iph;
638 struct flowi4 fl4;
639 u8 tos, ttl;
640 __be16 df;
641 struct rtable *rt;
642 unsigned int max_headroom;
643 __be32 dst;
644 bool connected;
645
646 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
647 connected = (tunnel->parms.iph.daddr != 0);
648
649 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
650
651 dst = tnl_params->daddr;
652 if (dst == 0) {
653
654
655 if (!skb_dst(skb)) {
656 dev->stats.tx_fifo_errors++;
657 goto tx_error;
658 }
659
660 if (skb->protocol == htons(ETH_P_IP)) {
661 rt = skb_rtable(skb);
662 dst = rt_nexthop(rt, inner_iph->daddr);
663 }
664#if IS_ENABLED(CONFIG_IPV6)
665 else if (skb->protocol == htons(ETH_P_IPV6)) {
666 const struct in6_addr *addr6;
667 struct neighbour *neigh;
668 bool do_tx_error_icmp;
669 int addr_type;
670
671 neigh = dst_neigh_lookup(skb_dst(skb),
672 &ipv6_hdr(skb)->daddr);
673 if (!neigh)
674 goto tx_error;
675
676 addr6 = (const struct in6_addr *)&neigh->primary_key;
677 addr_type = ipv6_addr_type(addr6);
678
679 if (addr_type == IPV6_ADDR_ANY) {
680 addr6 = &ipv6_hdr(skb)->daddr;
681 addr_type = ipv6_addr_type(addr6);
682 }
683
684 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
685 do_tx_error_icmp = true;
686 else {
687 do_tx_error_icmp = false;
688 dst = addr6->s6_addr32[3];
689 }
690 neigh_release(neigh);
691 if (do_tx_error_icmp)
692 goto tx_error_icmp;
693 }
694#endif
695 else
696 goto tx_error;
697
698 connected = false;
699 }
700
701 tos = tnl_params->tos;
702 if (tos & 0x1) {
703 tos &= ~0x1;
704 if (skb->protocol == htons(ETH_P_IP)) {
705 tos = inner_iph->tos;
706 connected = false;
707 } else if (skb->protocol == htons(ETH_P_IPV6)) {
708 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
709 connected = false;
710 }
711 }
712
713 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
714 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
715 tunnel->fwmark);
716
717 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
718 goto tx_error;
719
720 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
721 NULL;
722
723 if (!rt) {
724 rt = ip_route_output_key(tunnel->net, &fl4);
725
726 if (IS_ERR(rt)) {
727 dev->stats.tx_carrier_errors++;
728 goto tx_error;
729 }
730 if (connected)
731 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
732 fl4.saddr);
733 }
734
735 if (rt->dst.dev == dev) {
736 ip_rt_put(rt);
737 dev->stats.collisions++;
738 goto tx_error;
739 }
740
741 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
742 ip_rt_put(rt);
743 goto tx_error;
744 }
745
746 if (tunnel->err_count > 0) {
747 if (time_before(jiffies,
748 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
749 tunnel->err_count--;
750
751 dst_link_failure(skb);
752 } else
753 tunnel->err_count = 0;
754 }
755
756 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
757 ttl = tnl_params->ttl;
758 if (ttl == 0) {
759 if (skb->protocol == htons(ETH_P_IP))
760 ttl = inner_iph->ttl;
761#if IS_ENABLED(CONFIG_IPV6)
762 else if (skb->protocol == htons(ETH_P_IPV6))
763 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
764#endif
765 else
766 ttl = ip4_dst_hoplimit(&rt->dst);
767 }
768
769 df = tnl_params->frag_off;
770 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
771 df |= (inner_iph->frag_off&htons(IP_DF));
772
773 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
774 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
775 if (max_headroom > dev->needed_headroom)
776 dev->needed_headroom = max_headroom;
777
778 if (skb_cow_head(skb, dev->needed_headroom)) {
779 ip_rt_put(rt);
780 dev->stats.tx_dropped++;
781 kfree_skb(skb);
782 return;
783 }
784
785 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
786 df, !net_eq(tunnel->net, dev_net(dev)));
787 return;
788
789#if IS_ENABLED(CONFIG_IPV6)
790tx_error_icmp:
791 dst_link_failure(skb);
792#endif
793tx_error:
794 dev->stats.tx_errors++;
795 kfree_skb(skb);
796}
797EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
798
799static void ip_tunnel_update(struct ip_tunnel_net *itn,
800 struct ip_tunnel *t,
801 struct net_device *dev,
802 struct ip_tunnel_parm *p,
803 bool set_mtu,
804 __u32 fwmark)
805{
806 ip_tunnel_del(itn, t);
807 t->parms.iph.saddr = p->iph.saddr;
808 t->parms.iph.daddr = p->iph.daddr;
809 t->parms.i_key = p->i_key;
810 t->parms.o_key = p->o_key;
811 if (dev->type != ARPHRD_ETHER) {
812 memcpy(dev->dev_addr, &p->iph.saddr, 4);
813 memcpy(dev->broadcast, &p->iph.daddr, 4);
814 }
815 ip_tunnel_add(itn, t);
816
817 t->parms.iph.ttl = p->iph.ttl;
818 t->parms.iph.tos = p->iph.tos;
819 t->parms.iph.frag_off = p->iph.frag_off;
820
821 if (t->parms.link != p->link || t->fwmark != fwmark) {
822 int mtu;
823
824 t->parms.link = p->link;
825 t->fwmark = fwmark;
826 mtu = ip_tunnel_bind_dev(dev);
827 if (set_mtu)
828 dev->mtu = mtu;
829 }
830 dst_cache_reset(&t->dst_cache);
831 netdev_state_change(dev);
832}
833
834int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
835{
836 int err = 0;
837 struct ip_tunnel *t = netdev_priv(dev);
838 struct net *net = t->net;
839 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
840
841 BUG_ON(!itn->fb_tunnel_dev);
842 switch (cmd) {
843 case SIOCGETTUNNEL:
844 if (dev == itn->fb_tunnel_dev) {
845 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
846 if (!t)
847 t = netdev_priv(dev);
848 }
849 memcpy(p, &t->parms, sizeof(*p));
850 break;
851
852 case SIOCADDTUNNEL:
853 case SIOCCHGTUNNEL:
854 err = -EPERM;
855 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
856 goto done;
857 if (p->iph.ttl)
858 p->iph.frag_off |= htons(IP_DF);
859 if (!(p->i_flags & VTI_ISVTI)) {
860 if (!(p->i_flags & TUNNEL_KEY))
861 p->i_key = 0;
862 if (!(p->o_flags & TUNNEL_KEY))
863 p->o_key = 0;
864 }
865
866 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
867
868 if (cmd == SIOCADDTUNNEL) {
869 if (!t) {
870 t = ip_tunnel_create(net, itn, p);
871 err = PTR_ERR_OR_ZERO(t);
872 break;
873 }
874
875 err = -EEXIST;
876 break;
877 }
878 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
879 if (t) {
880 if (t->dev != dev) {
881 err = -EEXIST;
882 break;
883 }
884 } else {
885 unsigned int nflags = 0;
886
887 if (ipv4_is_multicast(p->iph.daddr))
888 nflags = IFF_BROADCAST;
889 else if (p->iph.daddr)
890 nflags = IFF_POINTOPOINT;
891
892 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
893 err = -EINVAL;
894 break;
895 }
896
897 t = netdev_priv(dev);
898 }
899 }
900
901 if (t) {
902 err = 0;
903 ip_tunnel_update(itn, t, dev, p, true, 0);
904 } else {
905 err = -ENOENT;
906 }
907 break;
908
909 case SIOCDELTUNNEL:
910 err = -EPERM;
911 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
912 goto done;
913
914 if (dev == itn->fb_tunnel_dev) {
915 err = -ENOENT;
916 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
917 if (!t)
918 goto done;
919 err = -EPERM;
920 if (t == netdev_priv(itn->fb_tunnel_dev))
921 goto done;
922 dev = t->dev;
923 }
924 unregister_netdevice(dev);
925 err = 0;
926 break;
927
928 default:
929 err = -EINVAL;
930 }
931
932done:
933 return err;
934}
935EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
936
937int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
938{
939 struct ip_tunnel *tunnel = netdev_priv(dev);
940 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
941 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
942
943 if (new_mtu < ETH_MIN_MTU)
944 return -EINVAL;
945
946 if (new_mtu > max_mtu) {
947 if (strict)
948 return -EINVAL;
949
950 new_mtu = max_mtu;
951 }
952
953 dev->mtu = new_mtu;
954 return 0;
955}
956EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
957
958int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
959{
960 return __ip_tunnel_change_mtu(dev, new_mtu, true);
961}
962EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
963
964static void ip_tunnel_dev_free(struct net_device *dev)
965{
966 struct ip_tunnel *tunnel = netdev_priv(dev);
967
968 gro_cells_destroy(&tunnel->gro_cells);
969 dst_cache_destroy(&tunnel->dst_cache);
970 free_percpu(dev->tstats);
971}
972
973void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
974{
975 struct ip_tunnel *tunnel = netdev_priv(dev);
976 struct ip_tunnel_net *itn;
977
978 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
979
980 if (itn->fb_tunnel_dev != dev) {
981 ip_tunnel_del(itn, netdev_priv(dev));
982 unregister_netdevice_queue(dev, head);
983 }
984}
985EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
986
987struct net *ip_tunnel_get_link_net(const struct net_device *dev)
988{
989 struct ip_tunnel *tunnel = netdev_priv(dev);
990
991 return tunnel->net;
992}
993EXPORT_SYMBOL(ip_tunnel_get_link_net);
994
995int ip_tunnel_get_iflink(const struct net_device *dev)
996{
997 struct ip_tunnel *tunnel = netdev_priv(dev);
998
999 return tunnel->parms.link;
1000}
1001EXPORT_SYMBOL(ip_tunnel_get_iflink);
1002
1003int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1004 struct rtnl_link_ops *ops, char *devname)
1005{
1006 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1007 struct ip_tunnel_parm parms;
1008 unsigned int i;
1009
1010 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1011 INIT_HLIST_HEAD(&itn->tunnels[i]);
1012
1013 if (!ops) {
1014 itn->fb_tunnel_dev = NULL;
1015 return 0;
1016 }
1017
1018 memset(&parms, 0, sizeof(parms));
1019 if (devname)
1020 strlcpy(parms.name, devname, IFNAMSIZ);
1021
1022 rtnl_lock();
1023 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1024
1025
1026
1027 if (!IS_ERR(itn->fb_tunnel_dev)) {
1028 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1029 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1030 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1031 }
1032 rtnl_unlock();
1033
1034 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1035}
1036EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1037
1038static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1039 struct rtnl_link_ops *ops)
1040{
1041 struct net *net = dev_net(itn->fb_tunnel_dev);
1042 struct net_device *dev, *aux;
1043 int h;
1044
1045 for_each_netdev_safe(net, dev, aux)
1046 if (dev->rtnl_link_ops == ops)
1047 unregister_netdevice_queue(dev, head);
1048
1049 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1050 struct ip_tunnel *t;
1051 struct hlist_node *n;
1052 struct hlist_head *thead = &itn->tunnels[h];
1053
1054 hlist_for_each_entry_safe(t, n, thead, hash_node)
1055
1056
1057
1058 if (!net_eq(dev_net(t->dev), net))
1059 unregister_netdevice_queue(t->dev, head);
1060 }
1061}
1062
1063void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1064 struct rtnl_link_ops *ops)
1065{
1066 struct ip_tunnel_net *itn;
1067 struct net *net;
1068 LIST_HEAD(list);
1069
1070 rtnl_lock();
1071 list_for_each_entry(net, net_list, exit_list) {
1072 itn = net_generic(net, id);
1073 ip_tunnel_destroy(itn, &list, ops);
1074 }
1075 unregister_netdevice_many(&list);
1076 rtnl_unlock();
1077}
1078EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1079
1080int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1081 struct ip_tunnel_parm *p, __u32 fwmark)
1082{
1083 struct ip_tunnel *nt;
1084 struct net *net = dev_net(dev);
1085 struct ip_tunnel_net *itn;
1086 int mtu;
1087 int err;
1088
1089 nt = netdev_priv(dev);
1090 itn = net_generic(net, nt->ip_tnl_net_id);
1091
1092 if (nt->collect_md) {
1093 if (rtnl_dereference(itn->collect_md_tun))
1094 return -EEXIST;
1095 } else {
1096 if (ip_tunnel_find(itn, p, dev->type))
1097 return -EEXIST;
1098 }
1099
1100 nt->net = net;
1101 nt->parms = *p;
1102 nt->fwmark = fwmark;
1103 err = register_netdevice(dev);
1104 if (err)
1105 goto out;
1106
1107 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1108 eth_hw_addr_random(dev);
1109
1110 mtu = ip_tunnel_bind_dev(dev);
1111 if (!tb[IFLA_MTU])
1112 dev->mtu = mtu;
1113
1114 ip_tunnel_add(itn, nt);
1115out:
1116 return err;
1117}
1118EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1119
1120int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1121 struct ip_tunnel_parm *p, __u32 fwmark)
1122{
1123 struct ip_tunnel *t;
1124 struct ip_tunnel *tunnel = netdev_priv(dev);
1125 struct net *net = tunnel->net;
1126 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1127
1128 if (dev == itn->fb_tunnel_dev)
1129 return -EINVAL;
1130
1131 t = ip_tunnel_find(itn, p, dev->type);
1132
1133 if (t) {
1134 if (t->dev != dev)
1135 return -EEXIST;
1136 } else {
1137 t = tunnel;
1138
1139 if (dev->type != ARPHRD_ETHER) {
1140 unsigned int nflags = 0;
1141
1142 if (ipv4_is_multicast(p->iph.daddr))
1143 nflags = IFF_BROADCAST;
1144 else if (p->iph.daddr)
1145 nflags = IFF_POINTOPOINT;
1146
1147 if ((dev->flags ^ nflags) &
1148 (IFF_POINTOPOINT | IFF_BROADCAST))
1149 return -EINVAL;
1150 }
1151 }
1152
1153 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1154 return 0;
1155}
1156EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1157
1158int ip_tunnel_init(struct net_device *dev)
1159{
1160 struct ip_tunnel *tunnel = netdev_priv(dev);
1161 struct iphdr *iph = &tunnel->parms.iph;
1162 int err;
1163
1164 dev->needs_free_netdev = true;
1165 dev->priv_destructor = ip_tunnel_dev_free;
1166 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1167 if (!dev->tstats)
1168 return -ENOMEM;
1169
1170 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1171 if (err) {
1172 free_percpu(dev->tstats);
1173 return err;
1174 }
1175
1176 err = gro_cells_init(&tunnel->gro_cells, dev);
1177 if (err) {
1178 dst_cache_destroy(&tunnel->dst_cache);
1179 free_percpu(dev->tstats);
1180 return err;
1181 }
1182
1183 tunnel->dev = dev;
1184 tunnel->net = dev_net(dev);
1185 strcpy(tunnel->parms.name, dev->name);
1186 iph->version = 4;
1187 iph->ihl = 5;
1188
1189 if (tunnel->collect_md) {
1190 dev->features |= NETIF_F_NETNS_LOCAL;
1191 netif_keep_dst(dev);
1192 }
1193 return 0;
1194}
1195EXPORT_SYMBOL_GPL(ip_tunnel_init);
1196
1197void ip_tunnel_uninit(struct net_device *dev)
1198{
1199 struct ip_tunnel *tunnel = netdev_priv(dev);
1200 struct net *net = tunnel->net;
1201 struct ip_tunnel_net *itn;
1202
1203 itn = net_generic(net, tunnel->ip_tnl_net_id);
1204
1205 if (itn->fb_tunnel_dev != dev)
1206 ip_tunnel_del(itn, netdev_priv(dev));
1207
1208 dst_cache_reset(&tunnel->dst_cache);
1209}
1210EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1211
1212
1213void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1214{
1215 struct ip_tunnel *tunnel = netdev_priv(dev);
1216 tunnel->ip_tnl_net_id = net_id;
1217}
1218EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1219
1220MODULE_LICENSE("GPL");
1221