linux/net/ipv6/ip6_tunnel.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 tunneling device
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Ville Nuorvala          <vnuorval@tcs.hut.fi>
   8 *      Yasuyuki Kozakai        <kozakai@linux-ipv6.org>
   9 *
  10 *      Based on:
  11 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  12 *
  13 *      RFC 2473
  14 */
  15
  16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  17
  18#include <linux/module.h>
  19#include <linux/capability.h>
  20#include <linux/errno.h>
  21#include <linux/types.h>
  22#include <linux/sockios.h>
  23#include <linux/icmp.h>
  24#include <linux/if.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/net.h>
  28#include <linux/in6.h>
  29#include <linux/netdevice.h>
  30#include <linux/if_arp.h>
  31#include <linux/icmpv6.h>
  32#include <linux/init.h>
  33#include <linux/route.h>
  34#include <linux/rtnetlink.h>
  35#include <linux/netfilter_ipv6.h>
  36#include <linux/slab.h>
  37#include <linux/hash.h>
  38#include <linux/etherdevice.h>
  39
  40#include <linux/uaccess.h>
  41#include <linux/atomic.h>
  42
  43#include <net/icmp.h>
  44#include <net/ip.h>
  45#include <net/ip_tunnels.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_route.h>
  48#include <net/addrconf.h>
  49#include <net/ip6_tunnel.h>
  50#include <net/xfrm.h>
  51#include <net/dsfield.h>
  52#include <net/inet_ecn.h>
  53#include <net/net_namespace.h>
  54#include <net/netns/generic.h>
  55#include <net/dst_metadata.h>
  56
  57MODULE_AUTHOR("Ville Nuorvala");
  58MODULE_DESCRIPTION("IPv6 tunneling device");
  59MODULE_LICENSE("GPL");
  60MODULE_ALIAS_RTNL_LINK("ip6tnl");
  61MODULE_ALIAS_NETDEV("ip6tnl0");
  62
  63#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
  64#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
  65
  66static bool log_ecn_error = true;
  67module_param(log_ecn_error, bool, 0644);
  68MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  69
  70static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
  71{
  72        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
  73
  74        return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
  75}
  76
  77static int ip6_tnl_dev_init(struct net_device *dev);
  78static void ip6_tnl_dev_setup(struct net_device *dev);
  79static struct rtnl_link_ops ip6_link_ops __read_mostly;
  80
  81static unsigned int ip6_tnl_net_id __read_mostly;
  82struct ip6_tnl_net {
  83        /* the IPv6 tunnel fallback device */
  84        struct net_device *fb_tnl_dev;
  85        /* lists for storing tunnels in use */
  86        struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
  87        struct ip6_tnl __rcu *tnls_wc[1];
  88        struct ip6_tnl __rcu **tnls[2];
  89        struct ip6_tnl __rcu *collect_md_tun;
  90};
  91
  92static struct net_device_stats *ip6_get_stats(struct net_device *dev)
  93{
  94        struct pcpu_sw_netstats tmp, sum = { 0 };
  95        int i;
  96
  97        for_each_possible_cpu(i) {
  98                unsigned int start;
  99                const struct pcpu_sw_netstats *tstats =
 100                                                   per_cpu_ptr(dev->tstats, i);
 101
 102                do {
 103                        start = u64_stats_fetch_begin_irq(&tstats->syncp);
 104                        tmp.rx_packets = tstats->rx_packets;
 105                        tmp.rx_bytes = tstats->rx_bytes;
 106                        tmp.tx_packets = tstats->tx_packets;
 107                        tmp.tx_bytes =  tstats->tx_bytes;
 108                } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
 109
 110                sum.rx_packets += tmp.rx_packets;
 111                sum.rx_bytes   += tmp.rx_bytes;
 112                sum.tx_packets += tmp.tx_packets;
 113                sum.tx_bytes   += tmp.tx_bytes;
 114        }
 115        dev->stats.rx_packets = sum.rx_packets;
 116        dev->stats.rx_bytes   = sum.rx_bytes;
 117        dev->stats.tx_packets = sum.tx_packets;
 118        dev->stats.tx_bytes   = sum.tx_bytes;
 119        return &dev->stats;
 120}
 121
 122/**
 123 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 124 *   @remote: the address of the tunnel exit-point
 125 *   @local: the address of the tunnel entry-point
 126 *
 127 * Return:
 128 *   tunnel matching given end-points if found,
 129 *   else fallback tunnel if its device is up,
 130 *   else %NULL
 131 **/
 132
 133#define for_each_ip6_tunnel_rcu(start) \
 134        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 135
 136static struct ip6_tnl *
 137ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
 138{
 139        unsigned int hash = HASH(remote, local);
 140        struct ip6_tnl *t;
 141        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 142        struct in6_addr any;
 143
 144        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 145                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 146                    ipv6_addr_equal(remote, &t->parms.raddr) &&
 147                    (t->dev->flags & IFF_UP))
 148                        return t;
 149        }
 150
 151        memset(&any, 0, sizeof(any));
 152        hash = HASH(&any, local);
 153        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 154                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 155                    ipv6_addr_any(&t->parms.raddr) &&
 156                    (t->dev->flags & IFF_UP))
 157                        return t;
 158        }
 159
 160        hash = HASH(remote, &any);
 161        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 162                if (ipv6_addr_equal(remote, &t->parms.raddr) &&
 163                    ipv6_addr_any(&t->parms.laddr) &&
 164                    (t->dev->flags & IFF_UP))
 165                        return t;
 166        }
 167
 168        t = rcu_dereference(ip6n->collect_md_tun);
 169        if (t && t->dev->flags & IFF_UP)
 170                return t;
 171
 172        t = rcu_dereference(ip6n->tnls_wc[0]);
 173        if (t && (t->dev->flags & IFF_UP))
 174                return t;
 175
 176        return NULL;
 177}
 178
 179/**
 180 * ip6_tnl_bucket - get head of list matching given tunnel parameters
 181 *   @p: parameters containing tunnel end-points
 182 *
 183 * Description:
 184 *   ip6_tnl_bucket() returns the head of the list matching the
 185 *   &struct in6_addr entries laddr and raddr in @p.
 186 *
 187 * Return: head of IPv6 tunnel list
 188 **/
 189
 190static struct ip6_tnl __rcu **
 191ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
 192{
 193        const struct in6_addr *remote = &p->raddr;
 194        const struct in6_addr *local = &p->laddr;
 195        unsigned int h = 0;
 196        int prio = 0;
 197
 198        if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 199                prio = 1;
 200                h = HASH(remote, local);
 201        }
 202        return &ip6n->tnls[prio][h];
 203}
 204
 205/**
 206 * ip6_tnl_link - add tunnel to hash table
 207 *   @t: tunnel to be added
 208 **/
 209
 210static void
 211ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 212{
 213        struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 214
 215        if (t->parms.collect_md)
 216                rcu_assign_pointer(ip6n->collect_md_tun, t);
 217        rcu_assign_pointer(t->next , rtnl_dereference(*tp));
 218        rcu_assign_pointer(*tp, t);
 219}
 220
 221/**
 222 * ip6_tnl_unlink - remove tunnel from hash table
 223 *   @t: tunnel to be removed
 224 **/
 225
 226static void
 227ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 228{
 229        struct ip6_tnl __rcu **tp;
 230        struct ip6_tnl *iter;
 231
 232        if (t->parms.collect_md)
 233                rcu_assign_pointer(ip6n->collect_md_tun, NULL);
 234
 235        for (tp = ip6_tnl_bucket(ip6n, &t->parms);
 236             (iter = rtnl_dereference(*tp)) != NULL;
 237             tp = &iter->next) {
 238                if (t == iter) {
 239                        rcu_assign_pointer(*tp, t->next);
 240                        break;
 241                }
 242        }
 243}
 244
 245static void ip6_dev_free(struct net_device *dev)
 246{
 247        struct ip6_tnl *t = netdev_priv(dev);
 248
 249        gro_cells_destroy(&t->gro_cells);
 250        dst_cache_destroy(&t->dst_cache);
 251        free_percpu(dev->tstats);
 252}
 253
 254static int ip6_tnl_create2(struct net_device *dev)
 255{
 256        struct ip6_tnl *t = netdev_priv(dev);
 257        struct net *net = dev_net(dev);
 258        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 259        int err;
 260
 261        t = netdev_priv(dev);
 262
 263        dev->rtnl_link_ops = &ip6_link_ops;
 264        err = register_netdevice(dev);
 265        if (err < 0)
 266                goto out;
 267
 268        strcpy(t->parms.name, dev->name);
 269
 270        dev_hold(dev);
 271        ip6_tnl_link(ip6n, t);
 272        return 0;
 273
 274out:
 275        return err;
 276}
 277
 278/**
 279 * ip6_tnl_create - create a new tunnel
 280 *   @p: tunnel parameters
 281 *   @pt: pointer to new tunnel
 282 *
 283 * Description:
 284 *   Create tunnel matching given parameters.
 285 *
 286 * Return:
 287 *   created tunnel or error pointer
 288 **/
 289
 290static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 291{
 292        struct net_device *dev;
 293        struct ip6_tnl *t;
 294        char name[IFNAMSIZ];
 295        int err = -E2BIG;
 296
 297        if (p->name[0]) {
 298                if (!dev_valid_name(p->name))
 299                        goto failed;
 300                strlcpy(name, p->name, IFNAMSIZ);
 301        } else {
 302                sprintf(name, "ip6tnl%%d");
 303        }
 304        err = -ENOMEM;
 305        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 306                           ip6_tnl_dev_setup);
 307        if (!dev)
 308                goto failed;
 309
 310        dev_net_set(dev, net);
 311
 312        t = netdev_priv(dev);
 313        t->parms = *p;
 314        t->net = dev_net(dev);
 315        err = ip6_tnl_create2(dev);
 316        if (err < 0)
 317                goto failed_free;
 318
 319        return t;
 320
 321failed_free:
 322        free_netdev(dev);
 323failed:
 324        return ERR_PTR(err);
 325}
 326
 327/**
 328 * ip6_tnl_locate - find or create tunnel matching given parameters
 329 *   @p: tunnel parameters
 330 *   @create: != 0 if allowed to create new tunnel if no match found
 331 *
 332 * Description:
 333 *   ip6_tnl_locate() first tries to locate an existing tunnel
 334 *   based on @parms. If this is unsuccessful, but @create is set a new
 335 *   tunnel device is created and registered for use.
 336 *
 337 * Return:
 338 *   matching tunnel or error pointer
 339 **/
 340
 341static struct ip6_tnl *ip6_tnl_locate(struct net *net,
 342                struct __ip6_tnl_parm *p, int create)
 343{
 344        const struct in6_addr *remote = &p->raddr;
 345        const struct in6_addr *local = &p->laddr;
 346        struct ip6_tnl __rcu **tp;
 347        struct ip6_tnl *t;
 348        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 349
 350        for (tp = ip6_tnl_bucket(ip6n, p);
 351             (t = rtnl_dereference(*tp)) != NULL;
 352             tp = &t->next) {
 353                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 354                    ipv6_addr_equal(remote, &t->parms.raddr)) {
 355                        if (create)
 356                                return ERR_PTR(-EEXIST);
 357
 358                        return t;
 359                }
 360        }
 361        if (!create)
 362                return ERR_PTR(-ENODEV);
 363        return ip6_tnl_create(net, p);
 364}
 365
 366/**
 367 * ip6_tnl_dev_uninit - tunnel device uninitializer
 368 *   @dev: the device to be destroyed
 369 *
 370 * Description:
 371 *   ip6_tnl_dev_uninit() removes tunnel from its list
 372 **/
 373
 374static void
 375ip6_tnl_dev_uninit(struct net_device *dev)
 376{
 377        struct ip6_tnl *t = netdev_priv(dev);
 378        struct net *net = t->net;
 379        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 380
 381        if (dev == ip6n->fb_tnl_dev)
 382                RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
 383        else
 384                ip6_tnl_unlink(ip6n, t);
 385        dst_cache_reset(&t->dst_cache);
 386        dev_put(dev);
 387}
 388
 389/**
 390 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 391 *   @skb: received socket buffer
 392 *
 393 * Return:
 394 *   0 if none was found,
 395 *   else index to encapsulation limit
 396 **/
 397
 398__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 399{
 400        const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
 401        unsigned int nhoff = raw - skb->data;
 402        unsigned int off = nhoff + sizeof(*ipv6h);
 403        u8 next, nexthdr = ipv6h->nexthdr;
 404
 405        while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
 406                struct ipv6_opt_hdr *hdr;
 407                u16 optlen;
 408
 409                if (!pskb_may_pull(skb, off + sizeof(*hdr)))
 410                        break;
 411
 412                hdr = (struct ipv6_opt_hdr *)(skb->data + off);
 413                if (nexthdr == NEXTHDR_FRAGMENT) {
 414                        struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
 415                        if (frag_hdr->frag_off)
 416                                break;
 417                        optlen = 8;
 418                } else if (nexthdr == NEXTHDR_AUTH) {
 419                        optlen = (hdr->hdrlen + 2) << 2;
 420                } else {
 421                        optlen = ipv6_optlen(hdr);
 422                }
 423                /* cache hdr->nexthdr, since pskb_may_pull() might
 424                 * invalidate hdr
 425                 */
 426                next = hdr->nexthdr;
 427                if (nexthdr == NEXTHDR_DEST) {
 428                        u16 i = 2;
 429
 430                        /* Remember : hdr is no longer valid at this point. */
 431                        if (!pskb_may_pull(skb, off + optlen))
 432                                break;
 433
 434                        while (1) {
 435                                struct ipv6_tlv_tnl_enc_lim *tel;
 436
 437                                /* No more room for encapsulation limit */
 438                                if (i + sizeof(*tel) > optlen)
 439                                        break;
 440
 441                                tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
 442                                /* return index of option if found and valid */
 443                                if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 444                                    tel->length == 1)
 445                                        return i + off - nhoff;
 446                                /* else jump to next option */
 447                                if (tel->type)
 448                                        i += tel->length + 2;
 449                                else
 450                                        i++;
 451                        }
 452                }
 453                nexthdr = next;
 454                off += optlen;
 455        }
 456        return 0;
 457}
 458EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
 459
 460/**
 461 * ip6_tnl_err - tunnel error handler
 462 *
 463 * Description:
 464 *   ip6_tnl_err() should handle errors in the tunnel according
 465 *   to the specifications in RFC 2473.
 466 **/
 467
 468static int
 469ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 470            u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 471{
 472        const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
 473        struct net *net = dev_net(skb->dev);
 474        u8 rel_type = ICMPV6_DEST_UNREACH;
 475        u8 rel_code = ICMPV6_ADDR_UNREACH;
 476        __u32 rel_info = 0;
 477        struct ip6_tnl *t;
 478        int err = -ENOENT;
 479        int rel_msg = 0;
 480        u8 tproto;
 481        __u16 len;
 482
 483        /* If the packet doesn't contain the original IPv6 header we are
 484           in trouble since we might need the source address for further
 485           processing of the error. */
 486
 487        rcu_read_lock();
 488        t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
 489        if (!t)
 490                goto out;
 491
 492        tproto = READ_ONCE(t->parms.proto);
 493        if (tproto != ipproto && tproto != 0)
 494                goto out;
 495
 496        err = 0;
 497
 498        switch (*type) {
 499                struct ipv6_tlv_tnl_enc_lim *tel;
 500                __u32 mtu, teli;
 501        case ICMPV6_DEST_UNREACH:
 502                net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
 503                                    t->parms.name);
 504                rel_msg = 1;
 505                break;
 506        case ICMPV6_TIME_EXCEED:
 507                if ((*code) == ICMPV6_EXC_HOPLIMIT) {
 508                        net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 509                                            t->parms.name);
 510                        rel_msg = 1;
 511                }
 512                break;
 513        case ICMPV6_PARAMPROB:
 514                teli = 0;
 515                if ((*code) == ICMPV6_HDR_FIELD)
 516                        teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 517
 518                if (teli && teli == *info - 2) {
 519                        tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 520                        if (tel->encap_limit == 0) {
 521                                net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
 522                                                    t->parms.name);
 523                                rel_msg = 1;
 524                        }
 525                } else {
 526                        net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 527                                            t->parms.name);
 528                }
 529                break;
 530        case ICMPV6_PKT_TOOBIG:
 531                ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
 532                                sock_net_uid(net, NULL));
 533                mtu = *info - offset;
 534                if (mtu < IPV6_MIN_MTU)
 535                        mtu = IPV6_MIN_MTU;
 536                len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
 537                if (len > mtu) {
 538                        rel_type = ICMPV6_PKT_TOOBIG;
 539                        rel_code = 0;
 540                        rel_info = mtu;
 541                        rel_msg = 1;
 542                }
 543                break;
 544        case NDISC_REDIRECT:
 545                ip6_redirect(skb, net, skb->dev->ifindex, 0,
 546                             sock_net_uid(net, NULL));
 547                break;
 548        }
 549
 550        *type = rel_type;
 551        *code = rel_code;
 552        *info = rel_info;
 553        *msg = rel_msg;
 554
 555out:
 556        rcu_read_unlock();
 557        return err;
 558}
 559
 560static int
 561ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 562           u8 type, u8 code, int offset, __be32 info)
 563{
 564        __u32 rel_info = ntohl(info);
 565        const struct iphdr *eiph;
 566        struct sk_buff *skb2;
 567        int err, rel_msg = 0;
 568        u8 rel_type = type;
 569        u8 rel_code = code;
 570        struct rtable *rt;
 571        struct flowi4 fl4;
 572
 573        err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
 574                          &rel_msg, &rel_info, offset);
 575        if (err < 0)
 576                return err;
 577
 578        if (rel_msg == 0)
 579                return 0;
 580
 581        switch (rel_type) {
 582        case ICMPV6_DEST_UNREACH:
 583                if (rel_code != ICMPV6_ADDR_UNREACH)
 584                        return 0;
 585                rel_type = ICMP_DEST_UNREACH;
 586                rel_code = ICMP_HOST_UNREACH;
 587                break;
 588        case ICMPV6_PKT_TOOBIG:
 589                if (rel_code != 0)
 590                        return 0;
 591                rel_type = ICMP_DEST_UNREACH;
 592                rel_code = ICMP_FRAG_NEEDED;
 593                break;
 594        default:
 595                return 0;
 596        }
 597
 598        if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
 599                return 0;
 600
 601        skb2 = skb_clone(skb, GFP_ATOMIC);
 602        if (!skb2)
 603                return 0;
 604
 605        skb_dst_drop(skb2);
 606
 607        skb_pull(skb2, offset);
 608        skb_reset_network_header(skb2);
 609        eiph = ip_hdr(skb2);
 610
 611        /* Try to guess incoming interface */
 612        rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
 613                                   0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 614        if (IS_ERR(rt))
 615                goto out;
 616
 617        skb2->dev = rt->dst.dev;
 618        ip_rt_put(rt);
 619
 620        /* route "incoming" packet */
 621        if (rt->rt_flags & RTCF_LOCAL) {
 622                rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
 623                                           eiph->daddr, eiph->saddr, 0, 0,
 624                                           IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 625                if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
 626                        if (!IS_ERR(rt))
 627                                ip_rt_put(rt);
 628                        goto out;
 629                }
 630                skb_dst_set(skb2, &rt->dst);
 631        } else {
 632                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 633                                   skb2->dev) ||
 634                    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
 635                        goto out;
 636        }
 637
 638        /* change mtu on this route */
 639        if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
 640                if (rel_info > dst_mtu(skb_dst(skb2)))
 641                        goto out;
 642
 643                skb_dst_update_pmtu(skb2, rel_info);
 644        }
 645
 646        icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
 647
 648out:
 649        kfree_skb(skb2);
 650        return 0;
 651}
 652
 653static int
 654ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 655           u8 type, u8 code, int offset, __be32 info)
 656{
 657        __u32 rel_info = ntohl(info);
 658        int err, rel_msg = 0;
 659        u8 rel_type = type;
 660        u8 rel_code = code;
 661
 662        err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
 663                          &rel_msg, &rel_info, offset);
 664        if (err < 0)
 665                return err;
 666
 667        if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 668                struct rt6_info *rt;
 669                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 670
 671                if (!skb2)
 672                        return 0;
 673
 674                skb_dst_drop(skb2);
 675                skb_pull(skb2, offset);
 676                skb_reset_network_header(skb2);
 677
 678                /* Try to guess incoming interface */
 679                rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
 680                                NULL, 0, skb2, 0);
 681
 682                if (rt && rt->dst.dev)
 683                        skb2->dev = rt->dst.dev;
 684
 685                icmpv6_send(skb2, rel_type, rel_code, rel_info);
 686
 687                ip6_rt_put(rt);
 688
 689                kfree_skb(skb2);
 690        }
 691
 692        return 0;
 693}
 694
 695static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 696                                       const struct ipv6hdr *ipv6h,
 697                                       struct sk_buff *skb)
 698{
 699        __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
 700
 701        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 702                ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
 703
 704        return IP6_ECN_decapsulate(ipv6h, skb);
 705}
 706
 707static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 708                                       const struct ipv6hdr *ipv6h,
 709                                       struct sk_buff *skb)
 710{
 711        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 712                ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
 713
 714        return IP6_ECN_decapsulate(ipv6h, skb);
 715}
 716
 717__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
 718                             const struct in6_addr *laddr,
 719                             const struct in6_addr *raddr)
 720{
 721        struct __ip6_tnl_parm *p = &t->parms;
 722        int ltype = ipv6_addr_type(laddr);
 723        int rtype = ipv6_addr_type(raddr);
 724        __u32 flags = 0;
 725
 726        if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
 727                flags = IP6_TNL_F_CAP_PER_PACKET;
 728        } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
 729                   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
 730                   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
 731                   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
 732                if (ltype&IPV6_ADDR_UNICAST)
 733                        flags |= IP6_TNL_F_CAP_XMIT;
 734                if (rtype&IPV6_ADDR_UNICAST)
 735                        flags |= IP6_TNL_F_CAP_RCV;
 736        }
 737        return flags;
 738}
 739EXPORT_SYMBOL(ip6_tnl_get_cap);
 740
 741/* called with rcu_read_lock() */
 742int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 743                                  const struct in6_addr *laddr,
 744                                  const struct in6_addr *raddr)
 745{
 746        struct __ip6_tnl_parm *p = &t->parms;
 747        int ret = 0;
 748        struct net *net = t->net;
 749
 750        if ((p->flags & IP6_TNL_F_CAP_RCV) ||
 751            ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 752             (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
 753                struct net_device *ldev = NULL;
 754
 755                if (p->link)
 756                        ldev = dev_get_by_index_rcu(net, p->link);
 757
 758                if ((ipv6_addr_is_multicast(laddr) ||
 759                     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
 760                                                    0, IFA_F_TENTATIVE))) &&
 761                    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
 762                     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
 763                                                     0, IFA_F_TENTATIVE))))
 764                        ret = 1;
 765        }
 766        return ret;
 767}
 768EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 769
 770static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 771                         const struct tnl_ptk_info *tpi,
 772                         struct metadata_dst *tun_dst,
 773                         int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
 774                                                const struct ipv6hdr *ipv6h,
 775                                                struct sk_buff *skb),
 776                         bool log_ecn_err)
 777{
 778        struct pcpu_sw_netstats *tstats;
 779        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 780        int err;
 781
 782        if ((!(tpi->flags & TUNNEL_CSUM) &&
 783             (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
 784            ((tpi->flags & TUNNEL_CSUM) &&
 785             !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
 786                tunnel->dev->stats.rx_crc_errors++;
 787                tunnel->dev->stats.rx_errors++;
 788                goto drop;
 789        }
 790
 791        if (tunnel->parms.i_flags & TUNNEL_SEQ) {
 792                if (!(tpi->flags & TUNNEL_SEQ) ||
 793                    (tunnel->i_seqno &&
 794                     (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 795                        tunnel->dev->stats.rx_fifo_errors++;
 796                        tunnel->dev->stats.rx_errors++;
 797                        goto drop;
 798                }
 799                tunnel->i_seqno = ntohl(tpi->seq) + 1;
 800        }
 801
 802        skb->protocol = tpi->proto;
 803
 804        /* Warning: All skb pointers will be invalidated! */
 805        if (tunnel->dev->type == ARPHRD_ETHER) {
 806                if (!pskb_may_pull(skb, ETH_HLEN)) {
 807                        tunnel->dev->stats.rx_length_errors++;
 808                        tunnel->dev->stats.rx_errors++;
 809                        goto drop;
 810                }
 811
 812                ipv6h = ipv6_hdr(skb);
 813                skb->protocol = eth_type_trans(skb, tunnel->dev);
 814                skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 815        } else {
 816                skb->dev = tunnel->dev;
 817        }
 818
 819        skb_reset_network_header(skb);
 820        memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 821
 822        __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
 823
 824        err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
 825        if (unlikely(err)) {
 826                if (log_ecn_err)
 827                        net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
 828                                             &ipv6h->saddr,
 829                                             ipv6_get_dsfield(ipv6h));
 830                if (err > 1) {
 831                        ++tunnel->dev->stats.rx_frame_errors;
 832                        ++tunnel->dev->stats.rx_errors;
 833                        goto drop;
 834                }
 835        }
 836
 837        tstats = this_cpu_ptr(tunnel->dev->tstats);
 838        u64_stats_update_begin(&tstats->syncp);
 839        tstats->rx_packets++;
 840        tstats->rx_bytes += skb->len;
 841        u64_stats_update_end(&tstats->syncp);
 842
 843        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 844
 845        if (tun_dst)
 846                skb_dst_set(skb, (struct dst_entry *)tun_dst);
 847
 848        gro_cells_receive(&tunnel->gro_cells, skb);
 849        return 0;
 850
 851drop:
 852        if (tun_dst)
 853                dst_release((struct dst_entry *)tun_dst);
 854        kfree_skb(skb);
 855        return 0;
 856}
 857
 858int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
 859                const struct tnl_ptk_info *tpi,
 860                struct metadata_dst *tun_dst,
 861                bool log_ecn_err)
 862{
 863        return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
 864                             log_ecn_err);
 865}
 866EXPORT_SYMBOL(ip6_tnl_rcv);
 867
 868static const struct tnl_ptk_info tpi_v6 = {
 869        /* no tunnel info required for ipxip6. */
 870        .proto = htons(ETH_P_IPV6),
 871};
 872
 873static const struct tnl_ptk_info tpi_v4 = {
 874        /* no tunnel info required for ipxip6. */
 875        .proto = htons(ETH_P_IP),
 876};
 877
 878static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 879                      const struct tnl_ptk_info *tpi,
 880                      int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
 881                                                  const struct ipv6hdr *ipv6h,
 882                                                  struct sk_buff *skb))
 883{
 884        struct ip6_tnl *t;
 885        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 886        struct metadata_dst *tun_dst = NULL;
 887        int ret = -1;
 888
 889        rcu_read_lock();
 890        t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
 891
 892        if (t) {
 893                u8 tproto = READ_ONCE(t->parms.proto);
 894
 895                if (tproto != ipproto && tproto != 0)
 896                        goto drop;
 897                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 898                        goto drop;
 899                ipv6h = ipv6_hdr(skb);
 900                if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
 901                        goto drop;
 902                if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 903                        goto drop;
 904                if (t->parms.collect_md) {
 905                        tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
 906                        if (!tun_dst)
 907                                goto drop;
 908                }
 909                ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
 910                                    log_ecn_error);
 911        }
 912
 913        rcu_read_unlock();
 914
 915        return ret;
 916
 917drop:
 918        rcu_read_unlock();
 919        kfree_skb(skb);
 920        return 0;
 921}
 922
 923static int ip4ip6_rcv(struct sk_buff *skb)
 924{
 925        return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
 926                          ip4ip6_dscp_ecn_decapsulate);
 927}
 928
 929static int ip6ip6_rcv(struct sk_buff *skb)
 930{
 931        return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
 932                          ip6ip6_dscp_ecn_decapsulate);
 933}
 934
 935struct ipv6_tel_txoption {
 936        struct ipv6_txoptions ops;
 937        __u8 dst_opt[8];
 938};
 939
 940static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
 941{
 942        memset(opt, 0, sizeof(struct ipv6_tel_txoption));
 943
 944        opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
 945        opt->dst_opt[3] = 1;
 946        opt->dst_opt[4] = encap_limit;
 947        opt->dst_opt[5] = IPV6_TLV_PADN;
 948        opt->dst_opt[6] = 1;
 949
 950        opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt;
 951        opt->ops.opt_nflen = 8;
 952}
 953
 954/**
 955 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
 956 *   @t: the outgoing tunnel device
 957 *   @hdr: IPv6 header from the incoming packet
 958 *
 959 * Description:
 960 *   Avoid trivial tunneling loop by checking that tunnel exit-point
 961 *   doesn't match source of incoming packet.
 962 *
 963 * Return:
 964 *   1 if conflict,
 965 *   0 else
 966 **/
 967
 968static inline bool
 969ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
 970{
 971        return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 972}
 973
 974int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 975                     const struct in6_addr *laddr,
 976                     const struct in6_addr *raddr)
 977{
 978        struct __ip6_tnl_parm *p = &t->parms;
 979        int ret = 0;
 980        struct net *net = t->net;
 981
 982        if (t->parms.collect_md)
 983                return 1;
 984
 985        if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
 986            ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 987             (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
 988                struct net_device *ldev = NULL;
 989
 990                rcu_read_lock();
 991                if (p->link)
 992                        ldev = dev_get_by_index_rcu(net, p->link);
 993
 994                if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
 995                                                      0, IFA_F_TENTATIVE)))
 996                        pr_warn("%s xmit: Local address not yet configured!\n",
 997                                p->name);
 998                else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
 999                         !ipv6_addr_is_multicast(raddr) &&
1000                         unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
1001                                                          true, 0, IFA_F_TENTATIVE)))
1002                        pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
1003                                p->name);
1004                else
1005                        ret = 1;
1006                rcu_read_unlock();
1007        }
1008        return ret;
1009}
1010EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
1011
1012/**
1013 * ip6_tnl_xmit - encapsulate packet and send
1014 *   @skb: the outgoing socket buffer
1015 *   @dev: the outgoing tunnel device
1016 *   @dsfield: dscp code for outer header
1017 *   @fl6: flow of tunneled packet
1018 *   @encap_limit: encapsulation limit
1019 *   @pmtu: Path MTU is stored if packet is too big
1020 *   @proto: next header value
1021 *
1022 * Description:
1023 *   Build new header and do some sanity checks on the packet before sending
1024 *   it.
1025 *
1026 * Return:
1027 *   0 on success
1028 *   -1 fail
1029 *   %-EMSGSIZE message too big. return mtu in this case.
1030 **/
1031
1032int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1033                 struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
1034                 __u8 proto)
1035{
1036        struct ip6_tnl *t = netdev_priv(dev);
1037        struct net *net = t->net;
1038        struct net_device_stats *stats = &t->dev->stats;
1039        struct ipv6hdr *ipv6h;
1040        struct ipv6_tel_txoption opt;
1041        struct dst_entry *dst = NULL, *ndst = NULL;
1042        struct net_device *tdev;
1043        int mtu;
1044        unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
1045        unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
1046        unsigned int max_headroom = psh_hlen;
1047        bool use_cache = false;
1048        u8 hop_limit;
1049        int err = -1;
1050
1051        if (t->parms.collect_md) {
1052                hop_limit = skb_tunnel_info(skb)->key.ttl;
1053                goto route_lookup;
1054        } else {
1055                hop_limit = t->parms.hop_limit;
1056        }
1057
1058        /* NBMA tunnel */
1059        if (ipv6_addr_any(&t->parms.raddr)) {
1060                if (skb->protocol == htons(ETH_P_IPV6)) {
1061                        struct in6_addr *addr6;
1062                        struct neighbour *neigh;
1063                        int addr_type;
1064
1065                        if (!skb_dst(skb))
1066                                goto tx_err_link_failure;
1067
1068                        neigh = dst_neigh_lookup(skb_dst(skb),
1069                                                 &ipv6_hdr(skb)->daddr);
1070                        if (!neigh)
1071                                goto tx_err_link_failure;
1072
1073                        addr6 = (struct in6_addr *)&neigh->primary_key;
1074                        addr_type = ipv6_addr_type(addr6);
1075
1076                        if (addr_type == IPV6_ADDR_ANY)
1077                                addr6 = &ipv6_hdr(skb)->daddr;
1078
1079                        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1080                        neigh_release(neigh);
1081                }
1082        } else if (t->parms.proto != 0 && !(t->parms.flags &
1083                                            (IP6_TNL_F_USE_ORIG_TCLASS |
1084                                             IP6_TNL_F_USE_ORIG_FWMARK))) {
1085                /* enable the cache only if neither the outer protocol nor the
1086                 * routing decision depends on the current inner header value
1087                 */
1088                use_cache = true;
1089        }
1090
1091        if (use_cache)
1092                dst = dst_cache_get(&t->dst_cache);
1093
1094        if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1095                goto tx_err_link_failure;
1096
1097        if (!dst) {
1098route_lookup:
1099                /* add dsfield to flowlabel for route lookup */
1100                fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
1101
1102                dst = ip6_route_output(net, NULL, fl6);
1103
1104                if (dst->error)
1105                        goto tx_err_link_failure;
1106                dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1107                if (IS_ERR(dst)) {
1108                        err = PTR_ERR(dst);
1109                        dst = NULL;
1110                        goto tx_err_link_failure;
1111                }
1112                if (t->parms.collect_md && ipv6_addr_any(&fl6->saddr) &&
1113                    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
1114                                       &fl6->daddr, 0, &fl6->saddr))
1115                        goto tx_err_link_failure;
1116                ndst = dst;
1117        }
1118
1119        tdev = dst->dev;
1120
1121        if (tdev == dev) {
1122                stats->collisions++;
1123                net_warn_ratelimited("%s: Local routing loop detected!\n",
1124                                     t->parms.name);
1125                goto tx_err_dst_release;
1126        }
1127        mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
1128        if (encap_limit >= 0) {
1129                max_headroom += 8;
1130                mtu -= 8;
1131        }
1132        mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
1133                       IPV6_MIN_MTU : IPV4_MIN_MTU);
1134
1135        skb_dst_update_pmtu(skb, mtu);
1136        if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
1137                *pmtu = mtu;
1138                err = -EMSGSIZE;
1139                goto tx_err_dst_release;
1140        }
1141
1142        if (t->err_count > 0) {
1143                if (time_before(jiffies,
1144                                t->err_time + IP6TUNNEL_ERR_TIMEO)) {
1145                        t->err_count--;
1146
1147                        dst_link_failure(skb);
1148                } else {
1149                        t->err_count = 0;
1150                }
1151        }
1152
1153        skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1154
1155        /*
1156         * Okay, now see if we can stuff it in the buffer as-is.
1157         */
1158        max_headroom += LL_RESERVED_SPACE(tdev);
1159
1160        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1161            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1162                struct sk_buff *new_skb;
1163
1164                new_skb = skb_realloc_headroom(skb, max_headroom);
1165                if (!new_skb)
1166                        goto tx_err_dst_release;
1167
1168                if (skb->sk)
1169                        skb_set_owner_w(new_skb, skb->sk);
1170                consume_skb(skb);
1171                skb = new_skb;
1172        }
1173
1174        if (t->parms.collect_md) {
1175                if (t->encap.type != TUNNEL_ENCAP_NONE)
1176                        goto tx_err_dst_release;
1177        } else {
1178                if (use_cache && ndst)
1179                        dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1180        }
1181        skb_dst_set(skb, dst);
1182
1183        if (hop_limit == 0) {
1184                if (skb->protocol == htons(ETH_P_IP))
1185                        hop_limit = ip_hdr(skb)->ttl;
1186                else if (skb->protocol == htons(ETH_P_IPV6))
1187                        hop_limit = ipv6_hdr(skb)->hop_limit;
1188                else
1189                        hop_limit = ip6_dst_hoplimit(dst);
1190        }
1191
1192        /* Calculate max headroom for all the headers and adjust
1193         * needed_headroom if necessary.
1194         */
1195        max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
1196                        + dst->header_len + t->hlen;
1197        if (max_headroom > dev->needed_headroom)
1198                dev->needed_headroom = max_headroom;
1199
1200        err = ip6_tnl_encap(skb, t, &proto, fl6);
1201        if (err)
1202                return err;
1203
1204        if (encap_limit >= 0) {
1205                init_tel_txopt(&opt, encap_limit);
1206                ipv6_push_frag_opts(skb, &opt.ops, &proto);
1207        }
1208
1209        skb_push(skb, sizeof(struct ipv6hdr));
1210        skb_reset_network_header(skb);
1211        ipv6h = ipv6_hdr(skb);
1212        ip6_flow_hdr(ipv6h, dsfield,
1213                     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
1214        ipv6h->hop_limit = hop_limit;
1215        ipv6h->nexthdr = proto;
1216        ipv6h->saddr = fl6->saddr;
1217        ipv6h->daddr = fl6->daddr;
1218        ip6tunnel_xmit(NULL, skb, dev);
1219        return 0;
1220tx_err_link_failure:
1221        stats->tx_carrier_errors++;
1222        dst_link_failure(skb);
1223tx_err_dst_release:
1224        dst_release(dst);
1225        return err;
1226}
1227EXPORT_SYMBOL(ip6_tnl_xmit);
1228
1229static inline int
1230ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1231{
1232        struct ip6_tnl *t = netdev_priv(dev);
1233        const struct iphdr  *iph;
1234        int encap_limit = -1;
1235        struct flowi6 fl6;
1236        __u8 dsfield;
1237        __u32 mtu;
1238        u8 tproto;
1239        int err;
1240
1241        iph = ip_hdr(skb);
1242        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1243
1244        tproto = READ_ONCE(t->parms.proto);
1245        if (tproto != IPPROTO_IPIP && tproto != 0)
1246                return -1;
1247
1248        if (t->parms.collect_md) {
1249                struct ip_tunnel_info *tun_info;
1250                const struct ip_tunnel_key *key;
1251
1252                tun_info = skb_tunnel_info(skb);
1253                if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
1254                             ip_tunnel_info_af(tun_info) != AF_INET6))
1255                        return -1;
1256                key = &tun_info->key;
1257                memset(&fl6, 0, sizeof(fl6));
1258                fl6.flowi6_proto = IPPROTO_IPIP;
1259                fl6.saddr = key->u.ipv6.src;
1260                fl6.daddr = key->u.ipv6.dst;
1261                fl6.flowlabel = key->label;
1262                dsfield =  key->tos;
1263        } else {
1264                if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1265                        encap_limit = t->parms.encap_limit;
1266
1267                memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1268                fl6.flowi6_proto = IPPROTO_IPIP;
1269
1270                if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1271                        dsfield = ipv4_get_dsfield(iph);
1272                else
1273                        dsfield = ip6_tclass(t->parms.flowinfo);
1274                if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1275                        fl6.flowi6_mark = skb->mark;
1276                else
1277                        fl6.flowi6_mark = t->parms.fwmark;
1278        }
1279
1280        fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
1281
1282        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
1283                return -1;
1284
1285        dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
1286
1287        skb_set_inner_ipproto(skb, IPPROTO_IPIP);
1288
1289        err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1290                           IPPROTO_IPIP);
1291        if (err != 0) {
1292                /* XXX: send ICMP error even if DF is not set. */
1293                if (err == -EMSGSIZE)
1294                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1295                                  htonl(mtu));
1296                return -1;
1297        }
1298
1299        return 0;
1300}
1301
1302static inline int
1303ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1304{
1305        struct ip6_tnl *t = netdev_priv(dev);
1306        struct ipv6hdr *ipv6h;
1307        int encap_limit = -1;
1308        __u16 offset;
1309        struct flowi6 fl6;
1310        __u8 dsfield;
1311        __u32 mtu;
1312        u8 tproto;
1313        int err;
1314
1315        ipv6h = ipv6_hdr(skb);
1316        tproto = READ_ONCE(t->parms.proto);
1317        if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1318            ip6_tnl_addr_conflict(t, ipv6h))
1319                return -1;
1320
1321        if (t->parms.collect_md) {
1322                struct ip_tunnel_info *tun_info;
1323                const struct ip_tunnel_key *key;
1324
1325                tun_info = skb_tunnel_info(skb);
1326                if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
1327                             ip_tunnel_info_af(tun_info) != AF_INET6))
1328                        return -1;
1329                key = &tun_info->key;
1330                memset(&fl6, 0, sizeof(fl6));
1331                fl6.flowi6_proto = IPPROTO_IPV6;
1332                fl6.saddr = key->u.ipv6.src;
1333                fl6.daddr = key->u.ipv6.dst;
1334                fl6.flowlabel = key->label;
1335                dsfield = key->tos;
1336        } else {
1337                offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1338                /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
1339                ipv6h = ipv6_hdr(skb);
1340                if (offset > 0) {
1341                        struct ipv6_tlv_tnl_enc_lim *tel;
1342
1343                        tel = (void *)&skb_network_header(skb)[offset];
1344                        if (tel->encap_limit == 0) {
1345                                icmpv6_send(skb, ICMPV6_PARAMPROB,
1346                                            ICMPV6_HDR_FIELD, offset + 2);
1347                                return -1;
1348                        }
1349                        encap_limit = tel->encap_limit - 1;
1350                } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
1351                        encap_limit = t->parms.encap_limit;
1352                }
1353
1354                memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1355                fl6.flowi6_proto = IPPROTO_IPV6;
1356
1357                if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1358                        dsfield = ipv6_get_dsfield(ipv6h);
1359                else
1360                        dsfield = ip6_tclass(t->parms.flowinfo);
1361                if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1362                        fl6.flowlabel |= ip6_flowlabel(ipv6h);
1363                if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1364                        fl6.flowi6_mark = skb->mark;
1365                else
1366                        fl6.flowi6_mark = t->parms.fwmark;
1367        }
1368
1369        fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
1370
1371        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
1372                return -1;
1373
1374        dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
1375
1376        skb_set_inner_ipproto(skb, IPPROTO_IPV6);
1377
1378        err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1379                           IPPROTO_IPV6);
1380        if (err != 0) {
1381                if (err == -EMSGSIZE)
1382                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1383                return -1;
1384        }
1385
1386        return 0;
1387}
1388
1389static netdev_tx_t
1390ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
1391{
1392        struct ip6_tnl *t = netdev_priv(dev);
1393        struct net_device_stats *stats = &t->dev->stats;
1394        int ret;
1395
1396        if (!pskb_inet_may_pull(skb))
1397                goto tx_err;
1398
1399        switch (skb->protocol) {
1400        case htons(ETH_P_IP):
1401                ret = ip4ip6_tnl_xmit(skb, dev);
1402                break;
1403        case htons(ETH_P_IPV6):
1404                ret = ip6ip6_tnl_xmit(skb, dev);
1405                break;
1406        default:
1407                goto tx_err;
1408        }
1409
1410        if (ret < 0)
1411                goto tx_err;
1412
1413        return NETDEV_TX_OK;
1414
1415tx_err:
1416        stats->tx_errors++;
1417        stats->tx_dropped++;
1418        kfree_skb(skb);
1419        return NETDEV_TX_OK;
1420}
1421
1422static void ip6_tnl_link_config(struct ip6_tnl *t)
1423{
1424        struct net_device *dev = t->dev;
1425        struct __ip6_tnl_parm *p = &t->parms;
1426        struct flowi6 *fl6 = &t->fl.u.ip6;
1427        int t_hlen;
1428
1429        memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1430        memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1431
1432        /* Set up flowi template */
1433        fl6->saddr = p->laddr;
1434        fl6->daddr = p->raddr;
1435        fl6->flowi6_oif = p->link;
1436        fl6->flowlabel = 0;
1437
1438        if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1439                fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1440        if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1441                fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1442
1443        p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1444        p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1445
1446        if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1447                dev->flags |= IFF_POINTOPOINT;
1448        else
1449                dev->flags &= ~IFF_POINTOPOINT;
1450
1451        t->tun_hlen = 0;
1452        t->hlen = t->encap_hlen + t->tun_hlen;
1453        t_hlen = t->hlen + sizeof(struct ipv6hdr);
1454
1455        if (p->flags & IP6_TNL_F_CAP_XMIT) {
1456                int strict = (ipv6_addr_type(&p->raddr) &
1457                              (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1458
1459                struct rt6_info *rt = rt6_lookup(t->net,
1460                                                 &p->raddr, &p->laddr,
1461                                                 p->link, NULL, strict);
1462
1463                if (!rt)
1464                        return;
1465
1466                if (rt->dst.dev) {
1467                        dev->hard_header_len = rt->dst.dev->hard_header_len +
1468                                t_hlen;
1469
1470                        dev->mtu = rt->dst.dev->mtu - t_hlen;
1471                        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1472                                dev->mtu -= 8;
1473
1474                        if (dev->mtu < IPV6_MIN_MTU)
1475                                dev->mtu = IPV6_MIN_MTU;
1476                }
1477                ip6_rt_put(rt);
1478        }
1479}
1480
1481/**
1482 * ip6_tnl_change - update the tunnel parameters
1483 *   @t: tunnel to be changed
1484 *   @p: tunnel configuration parameters
1485 *
1486 * Description:
1487 *   ip6_tnl_change() updates the tunnel parameters
1488 **/
1489
1490static int
1491ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1492{
1493        t->parms.laddr = p->laddr;
1494        t->parms.raddr = p->raddr;
1495        t->parms.flags = p->flags;
1496        t->parms.hop_limit = p->hop_limit;
1497        t->parms.encap_limit = p->encap_limit;
1498        t->parms.flowinfo = p->flowinfo;
1499        t->parms.link = p->link;
1500        t->parms.proto = p->proto;
1501        t->parms.fwmark = p->fwmark;
1502        dst_cache_reset(&t->dst_cache);
1503        ip6_tnl_link_config(t);
1504        return 0;
1505}
1506
1507static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1508{
1509        struct net *net = t->net;
1510        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1511        int err;
1512
1513        ip6_tnl_unlink(ip6n, t);
1514        synchronize_net();
1515        err = ip6_tnl_change(t, p);
1516        ip6_tnl_link(ip6n, t);
1517        netdev_state_change(t->dev);
1518        return err;
1519}
1520
1521static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1522{
1523        /* for default tnl0 device allow to change only the proto */
1524        t->parms.proto = p->proto;
1525        netdev_state_change(t->dev);
1526        return 0;
1527}
1528
1529static void
1530ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1531{
1532        p->laddr = u->laddr;
1533        p->raddr = u->raddr;
1534        p->flags = u->flags;
1535        p->hop_limit = u->hop_limit;
1536        p->encap_limit = u->encap_limit;
1537        p->flowinfo = u->flowinfo;
1538        p->link = u->link;
1539        p->proto = u->proto;
1540        memcpy(p->name, u->name, sizeof(u->name));
1541}
1542
1543static void
1544ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1545{
1546        u->laddr = p->laddr;
1547        u->raddr = p->raddr;
1548        u->flags = p->flags;
1549        u->hop_limit = p->hop_limit;
1550        u->encap_limit = p->encap_limit;
1551        u->flowinfo = p->flowinfo;
1552        u->link = p->link;
1553        u->proto = p->proto;
1554        memcpy(u->name, p->name, sizeof(u->name));
1555}
1556
1557/**
1558 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1559 *   @dev: virtual device associated with tunnel
1560 *   @ifr: parameters passed from userspace
1561 *   @cmd: command to be performed
1562 *
1563 * Description:
1564 *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
1565 *   from userspace.
1566 *
1567 *   The possible commands are the following:
1568 *     %SIOCGETTUNNEL: get tunnel parameters for device
1569 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
1570 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
1571 *     %SIOCDELTUNNEL: delete tunnel
1572 *
1573 *   The fallback device "ip6tnl0", created during module
1574 *   initialization, can be used for creating other tunnel devices.
1575 *
1576 * Return:
1577 *   0 on success,
1578 *   %-EFAULT if unable to copy data to or from userspace,
1579 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
1580 *   %-EINVAL if passed tunnel parameters are invalid,
1581 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
1582 *   %-ENODEV if attempting to change or delete a nonexisting device
1583 **/
1584
1585static int
1586ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1587{
1588        int err = 0;
1589        struct ip6_tnl_parm p;
1590        struct __ip6_tnl_parm p1;
1591        struct ip6_tnl *t = netdev_priv(dev);
1592        struct net *net = t->net;
1593        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1594
1595        memset(&p1, 0, sizeof(p1));
1596
1597        switch (cmd) {
1598        case SIOCGETTUNNEL:
1599                if (dev == ip6n->fb_tnl_dev) {
1600                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1601                                err = -EFAULT;
1602                                break;
1603                        }
1604                        ip6_tnl_parm_from_user(&p1, &p);
1605                        t = ip6_tnl_locate(net, &p1, 0);
1606                        if (IS_ERR(t))
1607                                t = netdev_priv(dev);
1608                } else {
1609                        memset(&p, 0, sizeof(p));
1610                }
1611                ip6_tnl_parm_to_user(&p, &t->parms);
1612                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
1613                        err = -EFAULT;
1614                }
1615                break;
1616        case SIOCADDTUNNEL:
1617        case SIOCCHGTUNNEL:
1618                err = -EPERM;
1619                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1620                        break;
1621                err = -EFAULT;
1622                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1623                        break;
1624                err = -EINVAL;
1625                if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1626                    p.proto != 0)
1627                        break;
1628                ip6_tnl_parm_from_user(&p1, &p);
1629                t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1630                if (cmd == SIOCCHGTUNNEL) {
1631                        if (!IS_ERR(t)) {
1632                                if (t->dev != dev) {
1633                                        err = -EEXIST;
1634                                        break;
1635                                }
1636                        } else
1637                                t = netdev_priv(dev);
1638                        if (dev == ip6n->fb_tnl_dev)
1639                                err = ip6_tnl0_update(t, &p1);
1640                        else
1641                                err = ip6_tnl_update(t, &p1);
1642                }
1643                if (!IS_ERR(t)) {
1644                        err = 0;
1645                        ip6_tnl_parm_to_user(&p, &t->parms);
1646                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1647                                err = -EFAULT;
1648
1649                } else {
1650                        err = PTR_ERR(t);
1651                }
1652                break;
1653        case SIOCDELTUNNEL:
1654                err = -EPERM;
1655                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1656                        break;
1657
1658                if (dev == ip6n->fb_tnl_dev) {
1659                        err = -EFAULT;
1660                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1661                                break;
1662                        err = -ENOENT;
1663                        ip6_tnl_parm_from_user(&p1, &p);
1664                        t = ip6_tnl_locate(net, &p1, 0);
1665                        if (IS_ERR(t))
1666                                break;
1667                        err = -EPERM;
1668                        if (t->dev == ip6n->fb_tnl_dev)
1669                                break;
1670                        dev = t->dev;
1671                }
1672                err = 0;
1673                unregister_netdevice(dev);
1674                break;
1675        default:
1676                err = -EINVAL;
1677        }
1678        return err;
1679}
1680
1681/**
1682 * ip6_tnl_change_mtu - change mtu manually for tunnel device
1683 *   @dev: virtual device associated with tunnel
1684 *   @new_mtu: the new mtu
1685 *
1686 * Return:
1687 *   0 on success,
1688 *   %-EINVAL if mtu too small
1689 **/
1690
1691int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1692{
1693        struct ip6_tnl *tnl = netdev_priv(dev);
1694
1695        if (tnl->parms.proto == IPPROTO_IPV6) {
1696                if (new_mtu < IPV6_MIN_MTU)
1697                        return -EINVAL;
1698        } else {
1699                if (new_mtu < ETH_MIN_MTU)
1700                        return -EINVAL;
1701        }
1702        if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) {
1703                if (new_mtu > IP6_MAX_MTU - dev->hard_header_len)
1704                        return -EINVAL;
1705        } else {
1706                if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
1707                        return -EINVAL;
1708        }
1709        dev->mtu = new_mtu;
1710        return 0;
1711}
1712EXPORT_SYMBOL(ip6_tnl_change_mtu);
1713
1714int ip6_tnl_get_iflink(const struct net_device *dev)
1715{
1716        struct ip6_tnl *t = netdev_priv(dev);
1717
1718        return t->parms.link;
1719}
1720EXPORT_SYMBOL(ip6_tnl_get_iflink);
1721
1722int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops,
1723                          unsigned int num)
1724{
1725        if (num >= MAX_IPTUN_ENCAP_OPS)
1726                return -ERANGE;
1727
1728        return !cmpxchg((const struct ip6_tnl_encap_ops **)
1729                        &ip6tun_encaps[num],
1730                        NULL, ops) ? 0 : -1;
1731}
1732EXPORT_SYMBOL(ip6_tnl_encap_add_ops);
1733
1734int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops,
1735                          unsigned int num)
1736{
1737        int ret;
1738
1739        if (num >= MAX_IPTUN_ENCAP_OPS)
1740                return -ERANGE;
1741
1742        ret = (cmpxchg((const struct ip6_tnl_encap_ops **)
1743                       &ip6tun_encaps[num],
1744                       ops, NULL) == ops) ? 0 : -1;
1745
1746        synchronize_net();
1747
1748        return ret;
1749}
1750EXPORT_SYMBOL(ip6_tnl_encap_del_ops);
1751
1752int ip6_tnl_encap_setup(struct ip6_tnl *t,
1753                        struct ip_tunnel_encap *ipencap)
1754{
1755        int hlen;
1756
1757        memset(&t->encap, 0, sizeof(t->encap));
1758
1759        hlen = ip6_encap_hlen(ipencap);
1760        if (hlen < 0)
1761                return hlen;
1762
1763        t->encap.type = ipencap->type;
1764        t->encap.sport = ipencap->sport;
1765        t->encap.dport = ipencap->dport;
1766        t->encap.flags = ipencap->flags;
1767
1768        t->encap_hlen = hlen;
1769        t->hlen = t->encap_hlen + t->tun_hlen;
1770
1771        return 0;
1772}
1773EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
1774
1775static const struct net_device_ops ip6_tnl_netdev_ops = {
1776        .ndo_init       = ip6_tnl_dev_init,
1777        .ndo_uninit     = ip6_tnl_dev_uninit,
1778        .ndo_start_xmit = ip6_tnl_start_xmit,
1779        .ndo_do_ioctl   = ip6_tnl_ioctl,
1780        .ndo_change_mtu = ip6_tnl_change_mtu,
1781        .ndo_get_stats  = ip6_get_stats,
1782        .ndo_get_iflink = ip6_tnl_get_iflink,
1783};
1784
1785#define IPXIPX_FEATURES (NETIF_F_SG |           \
1786                         NETIF_F_FRAGLIST |     \
1787                         NETIF_F_HIGHDMA |      \
1788                         NETIF_F_GSO_SOFTWARE | \
1789                         NETIF_F_HW_CSUM)
1790
1791/**
1792 * ip6_tnl_dev_setup - setup virtual tunnel device
1793 *   @dev: virtual device associated with tunnel
1794 *
1795 * Description:
1796 *   Initialize function pointers and device parameters
1797 **/
1798
1799static void ip6_tnl_dev_setup(struct net_device *dev)
1800{
1801        dev->netdev_ops = &ip6_tnl_netdev_ops;
1802        dev->needs_free_netdev = true;
1803        dev->priv_destructor = ip6_dev_free;
1804
1805        dev->type = ARPHRD_TUNNEL6;
1806        dev->flags |= IFF_NOARP;
1807        dev->addr_len = sizeof(struct in6_addr);
1808        dev->features |= NETIF_F_LLTX;
1809        netif_keep_dst(dev);
1810
1811        dev->features           |= IPXIPX_FEATURES;
1812        dev->hw_features        |= IPXIPX_FEATURES;
1813
1814        /* This perm addr will be used as interface identifier by IPv6 */
1815        dev->addr_assign_type = NET_ADDR_RANDOM;
1816        eth_random_addr(dev->perm_addr);
1817}
1818
1819
1820/**
1821 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1822 *   @dev: virtual device associated with tunnel
1823 **/
1824
1825static inline int
1826ip6_tnl_dev_init_gen(struct net_device *dev)
1827{
1828        struct ip6_tnl *t = netdev_priv(dev);
1829        int ret;
1830        int t_hlen;
1831
1832        t->dev = dev;
1833        t->net = dev_net(dev);
1834        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1835        if (!dev->tstats)
1836                return -ENOMEM;
1837
1838        ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1839        if (ret)
1840                goto free_stats;
1841
1842        ret = gro_cells_init(&t->gro_cells, dev);
1843        if (ret)
1844                goto destroy_dst;
1845
1846        t->tun_hlen = 0;
1847        t->hlen = t->encap_hlen + t->tun_hlen;
1848        t_hlen = t->hlen + sizeof(struct ipv6hdr);
1849
1850        dev->type = ARPHRD_TUNNEL6;
1851        dev->hard_header_len = LL_MAX_HEADER + t_hlen;
1852        dev->mtu = ETH_DATA_LEN - t_hlen;
1853        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1854                dev->mtu -= 8;
1855        dev->min_mtu = ETH_MIN_MTU;
1856        dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
1857
1858        return 0;
1859
1860destroy_dst:
1861        dst_cache_destroy(&t->dst_cache);
1862free_stats:
1863        free_percpu(dev->tstats);
1864        dev->tstats = NULL;
1865
1866        return ret;
1867}
1868
1869/**
1870 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1871 *   @dev: virtual device associated with tunnel
1872 **/
1873
1874static int ip6_tnl_dev_init(struct net_device *dev)
1875{
1876        struct ip6_tnl *t = netdev_priv(dev);
1877        int err = ip6_tnl_dev_init_gen(dev);
1878
1879        if (err)
1880                return err;
1881        ip6_tnl_link_config(t);
1882        if (t->parms.collect_md) {
1883                dev->features |= NETIF_F_NETNS_LOCAL;
1884                netif_keep_dst(dev);
1885        }
1886        return 0;
1887}
1888
1889/**
1890 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1891 *   @dev: fallback device
1892 *
1893 * Return: 0
1894 **/
1895
1896static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1897{
1898        struct ip6_tnl *t = netdev_priv(dev);
1899        struct net *net = dev_net(dev);
1900        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1901
1902        t->parms.proto = IPPROTO_IPV6;
1903        dev_hold(dev);
1904
1905        rcu_assign_pointer(ip6n->tnls_wc[0], t);
1906        return 0;
1907}
1908
1909static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[],
1910                            struct netlink_ext_ack *extack)
1911{
1912        u8 proto;
1913
1914        if (!data || !data[IFLA_IPTUN_PROTO])
1915                return 0;
1916
1917        proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1918        if (proto != IPPROTO_IPV6 &&
1919            proto != IPPROTO_IPIP &&
1920            proto != 0)
1921                return -EINVAL;
1922
1923        return 0;
1924}
1925
1926static void ip6_tnl_netlink_parms(struct nlattr *data[],
1927                                  struct __ip6_tnl_parm *parms)
1928{
1929        memset(parms, 0, sizeof(*parms));
1930
1931        if (!data)
1932                return;
1933
1934        if (data[IFLA_IPTUN_LINK])
1935                parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1936
1937        if (data[IFLA_IPTUN_LOCAL])
1938                parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
1939
1940        if (data[IFLA_IPTUN_REMOTE])
1941                parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
1942
1943        if (data[IFLA_IPTUN_TTL])
1944                parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
1945
1946        if (data[IFLA_IPTUN_ENCAP_LIMIT])
1947                parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
1948
1949        if (data[IFLA_IPTUN_FLOWINFO])
1950                parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1951
1952        if (data[IFLA_IPTUN_FLAGS])
1953                parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
1954
1955        if (data[IFLA_IPTUN_PROTO])
1956                parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1957
1958        if (data[IFLA_IPTUN_COLLECT_METADATA])
1959                parms->collect_md = true;
1960
1961        if (data[IFLA_IPTUN_FWMARK])
1962                parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
1963}
1964
1965static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
1966                                        struct ip_tunnel_encap *ipencap)
1967{
1968        bool ret = false;
1969
1970        memset(ipencap, 0, sizeof(*ipencap));
1971
1972        if (!data)
1973                return ret;
1974
1975        if (data[IFLA_IPTUN_ENCAP_TYPE]) {
1976                ret = true;
1977                ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
1978        }
1979
1980        if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
1981                ret = true;
1982                ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
1983        }
1984
1985        if (data[IFLA_IPTUN_ENCAP_SPORT]) {
1986                ret = true;
1987                ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
1988        }
1989
1990        if (data[IFLA_IPTUN_ENCAP_DPORT]) {
1991                ret = true;
1992                ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
1993        }
1994
1995        return ret;
1996}
1997
1998static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1999                           struct nlattr *tb[], struct nlattr *data[],
2000                           struct netlink_ext_ack *extack)
2001{
2002        struct net *net = dev_net(dev);
2003        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2004        struct ip_tunnel_encap ipencap;
2005        struct ip6_tnl *nt, *t;
2006        int err;
2007
2008        nt = netdev_priv(dev);
2009
2010        if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
2011                err = ip6_tnl_encap_setup(nt, &ipencap);
2012                if (err < 0)
2013                        return err;
2014        }
2015
2016        ip6_tnl_netlink_parms(data, &nt->parms);
2017
2018        if (nt->parms.collect_md) {
2019                if (rtnl_dereference(ip6n->collect_md_tun))
2020                        return -EEXIST;
2021        } else {
2022                t = ip6_tnl_locate(net, &nt->parms, 0);
2023                if (!IS_ERR(t))
2024                        return -EEXIST;
2025        }
2026
2027        err = ip6_tnl_create2(dev);
2028        if (!err && tb[IFLA_MTU])
2029                ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
2030
2031        return err;
2032}
2033
2034static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
2035                              struct nlattr *data[],
2036                              struct netlink_ext_ack *extack)
2037{
2038        struct ip6_tnl *t = netdev_priv(dev);
2039        struct __ip6_tnl_parm p;
2040        struct net *net = t->net;
2041        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2042        struct ip_tunnel_encap ipencap;
2043
2044        if (dev == ip6n->fb_tnl_dev)
2045                return -EINVAL;
2046
2047        if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
2048                int err = ip6_tnl_encap_setup(t, &ipencap);
2049
2050                if (err < 0)
2051                        return err;
2052        }
2053        ip6_tnl_netlink_parms(data, &p);
2054        if (p.collect_md)
2055                return -EINVAL;
2056
2057        t = ip6_tnl_locate(net, &p, 0);
2058        if (!IS_ERR(t)) {
2059                if (t->dev != dev)
2060                        return -EEXIST;
2061        } else
2062                t = netdev_priv(dev);
2063
2064        return ip6_tnl_update(t, &p);
2065}
2066
2067static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
2068{
2069        struct net *net = dev_net(dev);
2070        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2071
2072        if (dev != ip6n->fb_tnl_dev)
2073                unregister_netdevice_queue(dev, head);
2074}
2075
2076static size_t ip6_tnl_get_size(const struct net_device *dev)
2077{
2078        return
2079                /* IFLA_IPTUN_LINK */
2080                nla_total_size(4) +
2081                /* IFLA_IPTUN_LOCAL */
2082                nla_total_size(sizeof(struct in6_addr)) +
2083                /* IFLA_IPTUN_REMOTE */
2084                nla_total_size(sizeof(struct in6_addr)) +
2085                /* IFLA_IPTUN_TTL */
2086                nla_total_size(1) +
2087                /* IFLA_IPTUN_ENCAP_LIMIT */
2088                nla_total_size(1) +
2089                /* IFLA_IPTUN_FLOWINFO */
2090                nla_total_size(4) +
2091                /* IFLA_IPTUN_FLAGS */
2092                nla_total_size(4) +
2093                /* IFLA_IPTUN_PROTO */
2094                nla_total_size(1) +
2095                /* IFLA_IPTUN_ENCAP_TYPE */
2096                nla_total_size(2) +
2097                /* IFLA_IPTUN_ENCAP_FLAGS */
2098                nla_total_size(2) +
2099                /* IFLA_IPTUN_ENCAP_SPORT */
2100                nla_total_size(2) +
2101                /* IFLA_IPTUN_ENCAP_DPORT */
2102                nla_total_size(2) +
2103                /* IFLA_IPTUN_COLLECT_METADATA */
2104                nla_total_size(0) +
2105                /* IFLA_IPTUN_FWMARK */
2106                nla_total_size(4) +
2107                0;
2108}
2109
2110static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
2111{
2112        struct ip6_tnl *tunnel = netdev_priv(dev);
2113        struct __ip6_tnl_parm *parm = &tunnel->parms;
2114
2115        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
2116            nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
2117            nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
2118            nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
2119            nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
2120            nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
2121            nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
2122            nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
2123            nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
2124                goto nla_put_failure;
2125
2126        if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
2127            nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
2128            nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
2129            nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
2130                goto nla_put_failure;
2131
2132        if (parm->collect_md)
2133                if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
2134                        goto nla_put_failure;
2135
2136        return 0;
2137
2138nla_put_failure:
2139        return -EMSGSIZE;
2140}
2141
2142struct net *ip6_tnl_get_link_net(const struct net_device *dev)
2143{
2144        struct ip6_tnl *tunnel = netdev_priv(dev);
2145
2146        return tunnel->net;
2147}
2148EXPORT_SYMBOL(ip6_tnl_get_link_net);
2149
2150static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
2151        [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
2152        [IFLA_IPTUN_LOCAL]              = { .len = sizeof(struct in6_addr) },
2153        [IFLA_IPTUN_REMOTE]             = { .len = sizeof(struct in6_addr) },
2154        [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
2155        [IFLA_IPTUN_ENCAP_LIMIT]        = { .type = NLA_U8 },
2156        [IFLA_IPTUN_FLOWINFO]           = { .type = NLA_U32 },
2157        [IFLA_IPTUN_FLAGS]              = { .type = NLA_U32 },
2158        [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
2159        [IFLA_IPTUN_ENCAP_TYPE]         = { .type = NLA_U16 },
2160        [IFLA_IPTUN_ENCAP_FLAGS]        = { .type = NLA_U16 },
2161        [IFLA_IPTUN_ENCAP_SPORT]        = { .type = NLA_U16 },
2162        [IFLA_IPTUN_ENCAP_DPORT]        = { .type = NLA_U16 },
2163        [IFLA_IPTUN_COLLECT_METADATA]   = { .type = NLA_FLAG },
2164        [IFLA_IPTUN_FWMARK]             = { .type = NLA_U32 },
2165};
2166
2167static struct rtnl_link_ops ip6_link_ops __read_mostly = {
2168        .kind           = "ip6tnl",
2169        .maxtype        = IFLA_IPTUN_MAX,
2170        .policy         = ip6_tnl_policy,
2171        .priv_size      = sizeof(struct ip6_tnl),
2172        .setup          = ip6_tnl_dev_setup,
2173        .validate       = ip6_tnl_validate,
2174        .newlink        = ip6_tnl_newlink,
2175        .changelink     = ip6_tnl_changelink,
2176        .dellink        = ip6_tnl_dellink,
2177        .get_size       = ip6_tnl_get_size,
2178        .fill_info      = ip6_tnl_fill_info,
2179        .get_link_net   = ip6_tnl_get_link_net,
2180};
2181
2182static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
2183        .handler        = ip4ip6_rcv,
2184        .err_handler    = ip4ip6_err,
2185        .priority       =       1,
2186};
2187
2188static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
2189        .handler        = ip6ip6_rcv,
2190        .err_handler    = ip6ip6_err,
2191        .priority       =       1,
2192};
2193
2194static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
2195{
2196        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2197        struct net_device *dev, *aux;
2198        int h;
2199        struct ip6_tnl *t;
2200
2201        for_each_netdev_safe(net, dev, aux)
2202                if (dev->rtnl_link_ops == &ip6_link_ops)
2203                        unregister_netdevice_queue(dev, list);
2204
2205        for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
2206                t = rtnl_dereference(ip6n->tnls_r_l[h]);
2207                while (t) {
2208                        /* If dev is in the same netns, it has already
2209                         * been added to the list by the previous loop.
2210                         */
2211                        if (!net_eq(dev_net(t->dev), net))
2212                                unregister_netdevice_queue(t->dev, list);
2213                        t = rtnl_dereference(t->next);
2214                }
2215        }
2216}
2217
2218static int __net_init ip6_tnl_init_net(struct net *net)
2219{
2220        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2221        struct ip6_tnl *t = NULL;
2222        int err;
2223
2224        ip6n->tnls[0] = ip6n->tnls_wc;
2225        ip6n->tnls[1] = ip6n->tnls_r_l;
2226
2227        if (!net_has_fallback_tunnels(net))
2228                return 0;
2229        err = -ENOMEM;
2230        ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
2231                                        NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
2232
2233        if (!ip6n->fb_tnl_dev)
2234                goto err_alloc_dev;
2235        dev_net_set(ip6n->fb_tnl_dev, net);
2236        ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
2237        /* FB netdevice is special: we have one, and only one per netns.
2238         * Allowing to move it to another netns is clearly unsafe.
2239         */
2240        ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
2241
2242        err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
2243        if (err < 0)
2244                goto err_register;
2245
2246        err = register_netdev(ip6n->fb_tnl_dev);
2247        if (err < 0)
2248                goto err_register;
2249
2250        t = netdev_priv(ip6n->fb_tnl_dev);
2251
2252        strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
2253        return 0;
2254
2255err_register:
2256        free_netdev(ip6n->fb_tnl_dev);
2257err_alloc_dev:
2258        return err;
2259}
2260
2261static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
2262{
2263        struct net *net;
2264        LIST_HEAD(list);
2265
2266        rtnl_lock();
2267        list_for_each_entry(net, net_list, exit_list)
2268                ip6_tnl_destroy_tunnels(net, &list);
2269        unregister_netdevice_many(&list);
2270        rtnl_unlock();
2271}
2272
2273static struct pernet_operations ip6_tnl_net_ops = {
2274        .init = ip6_tnl_init_net,
2275        .exit_batch = ip6_tnl_exit_batch_net,
2276        .id   = &ip6_tnl_net_id,
2277        .size = sizeof(struct ip6_tnl_net),
2278};
2279
2280/**
2281 * ip6_tunnel_init - register protocol and reserve needed resources
2282 *
2283 * Return: 0 on success
2284 **/
2285
2286static int __init ip6_tunnel_init(void)
2287{
2288        int  err;
2289
2290        if (!ipv6_mod_enabled())
2291                return -EOPNOTSUPP;
2292
2293        err = register_pernet_device(&ip6_tnl_net_ops);
2294        if (err < 0)
2295                goto out_pernet;
2296
2297        err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
2298        if (err < 0) {
2299                pr_err("%s: can't register ip4ip6\n", __func__);
2300                goto out_ip4ip6;
2301        }
2302
2303        err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
2304        if (err < 0) {
2305                pr_err("%s: can't register ip6ip6\n", __func__);
2306                goto out_ip6ip6;
2307        }
2308        err = rtnl_link_register(&ip6_link_ops);
2309        if (err < 0)
2310                goto rtnl_link_failed;
2311
2312        return 0;
2313
2314rtnl_link_failed:
2315        xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
2316out_ip6ip6:
2317        xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
2318out_ip4ip6:
2319        unregister_pernet_device(&ip6_tnl_net_ops);
2320out_pernet:
2321        return err;
2322}
2323
2324/**
2325 * ip6_tunnel_cleanup - free resources and unregister protocol
2326 **/
2327
2328static void __exit ip6_tunnel_cleanup(void)
2329{
2330        rtnl_link_unregister(&ip6_link_ops);
2331        if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
2332                pr_info("%s: can't deregister ip4ip6\n", __func__);
2333
2334        if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
2335                pr_info("%s: can't deregister ip6ip6\n", __func__);
2336
2337        unregister_pernet_device(&ip6_tnl_net_ops);
2338}
2339
2340module_init(ip6_tunnel_init);
2341module_exit(ip6_tunnel_cleanup);
2342