linux/net/ipv6/ip6_tunnel.c
<<
>>
Prefs
   1/*
   2 *      IPv6 tunneling device
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Ville Nuorvala          <vnuorval@tcs.hut.fi>
   7 *      Yasuyuki Kozakai        <kozakai@linux-ipv6.org>
   8 *
   9 *      Based on:
  10 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  11 *
  12 *      RFC 2473
  13 *
  14 *      This program is free software; you can redistribute it and/or
  15 *      modify it under the terms of the GNU General Public License
  16 *      as published by the Free Software Foundation; either version
  17 *      2 of the License, or (at your option) any later version.
  18 *
  19 */
  20
  21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23#include <linux/module.h>
  24#include <linux/capability.h>
  25#include <linux/errno.h>
  26#include <linux/types.h>
  27#include <linux/sockios.h>
  28#include <linux/icmp.h>
  29#include <linux/if.h>
  30#include <linux/in.h>
  31#include <linux/ip.h>
  32#include <linux/net.h>
  33#include <linux/in6.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/icmpv6.h>
  37#include <linux/init.h>
  38#include <linux/route.h>
  39#include <linux/rtnetlink.h>
  40#include <linux/netfilter_ipv6.h>
  41#include <linux/slab.h>
  42#include <linux/hash.h>
  43#include <linux/etherdevice.h>
  44
  45#include <asm/uaccess.h>
  46#include <linux/atomic.h>
  47
  48#include <net/icmp.h>
  49#include <net/ip.h>
  50#include <net/ip_tunnels.h>
  51#include <net/ipv6.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/ip6_tunnel.h>
  55#include <net/xfrm.h>
  56#include <net/dsfield.h>
  57#include <net/inet_ecn.h>
  58#include <net/net_namespace.h>
  59#include <net/netns/generic.h>
  60
  61MODULE_AUTHOR("Ville Nuorvala");
  62MODULE_DESCRIPTION("IPv6 tunneling device");
  63MODULE_LICENSE("GPL");
  64MODULE_ALIAS_RTNL_LINK("ip6tnl");
  65MODULE_ALIAS_NETDEV("ip6tnl0");
  66
  67#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
  68#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
  69
  70static bool log_ecn_error = true;
  71module_param(log_ecn_error, bool, 0644);
  72MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  73
  74static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
  75{
  76        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
  77
  78        return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
  79}
  80
  81static int ip6_tnl_dev_init(struct net_device *dev);
  82static void ip6_tnl_dev_setup(struct net_device *dev);
  83static struct rtnl_link_ops ip6_link_ops __read_mostly;
  84
  85static int ip6_tnl_net_id __read_mostly;
  86struct ip6_tnl_net {
  87        /* the IPv6 tunnel fallback device */
  88        struct net_device *fb_tnl_dev;
  89        /* lists for storing tunnels in use */
  90        struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
  91        struct ip6_tnl __rcu *tnls_wc[1];
  92        struct ip6_tnl __rcu **tnls[2];
  93};
  94
  95static struct net_device_stats *ip6_get_stats(struct net_device *dev)
  96{
  97        struct pcpu_sw_netstats tmp, sum = { 0 };
  98        int i;
  99
 100        for_each_possible_cpu(i) {
 101                unsigned int start;
 102                const struct pcpu_sw_netstats *tstats =
 103                                                   per_cpu_ptr(dev->tstats, i);
 104
 105                do {
 106                        start = u64_stats_fetch_begin_irq(&tstats->syncp);
 107                        tmp.rx_packets = tstats->rx_packets;
 108                        tmp.rx_bytes = tstats->rx_bytes;
 109                        tmp.tx_packets = tstats->tx_packets;
 110                        tmp.tx_bytes =  tstats->tx_bytes;
 111                } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
 112
 113                sum.rx_packets += tmp.rx_packets;
 114                sum.rx_bytes   += tmp.rx_bytes;
 115                sum.tx_packets += tmp.tx_packets;
 116                sum.tx_bytes   += tmp.tx_bytes;
 117        }
 118        dev->stats.rx_packets = sum.rx_packets;
 119        dev->stats.rx_bytes   = sum.rx_bytes;
 120        dev->stats.tx_packets = sum.tx_packets;
 121        dev->stats.tx_bytes   = sum.tx_bytes;
 122        return &dev->stats;
 123}
 124
 125/**
 126 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 127 *   @remote: the address of the tunnel exit-point
 128 *   @local: the address of the tunnel entry-point
 129 *
 130 * Return:
 131 *   tunnel matching given end-points if found,
 132 *   else fallback tunnel if its device is up,
 133 *   else %NULL
 134 **/
 135
 136#define for_each_ip6_tunnel_rcu(start) \
 137        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 138
 139static struct ip6_tnl *
 140ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
 141{
 142        unsigned int hash = HASH(remote, local);
 143        struct ip6_tnl *t;
 144        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 145        struct in6_addr any;
 146
 147        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 148                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 149                    ipv6_addr_equal(remote, &t->parms.raddr) &&
 150                    (t->dev->flags & IFF_UP))
 151                        return t;
 152        }
 153
 154        memset(&any, 0, sizeof(any));
 155        hash = HASH(&any, local);
 156        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 157                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 158                    ipv6_addr_any(&t->parms.raddr) &&
 159                    (t->dev->flags & IFF_UP))
 160                        return t;
 161        }
 162
 163        hash = HASH(remote, &any);
 164        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 165                if (ipv6_addr_equal(remote, &t->parms.raddr) &&
 166                    ipv6_addr_any(&t->parms.laddr) &&
 167                    (t->dev->flags & IFF_UP))
 168                        return t;
 169        }
 170
 171        t = rcu_dereference(ip6n->tnls_wc[0]);
 172        if (t && (t->dev->flags & IFF_UP))
 173                return t;
 174
 175        return NULL;
 176}
 177
 178/**
 179 * ip6_tnl_bucket - get head of list matching given tunnel parameters
 180 *   @p: parameters containing tunnel end-points
 181 *
 182 * Description:
 183 *   ip6_tnl_bucket() returns the head of the list matching the
 184 *   &struct in6_addr entries laddr and raddr in @p.
 185 *
 186 * Return: head of IPv6 tunnel list
 187 **/
 188
 189static struct ip6_tnl __rcu **
 190ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
 191{
 192        const struct in6_addr *remote = &p->raddr;
 193        const struct in6_addr *local = &p->laddr;
 194        unsigned int h = 0;
 195        int prio = 0;
 196
 197        if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 198                prio = 1;
 199                h = HASH(remote, local);
 200        }
 201        return &ip6n->tnls[prio][h];
 202}
 203
 204/**
 205 * ip6_tnl_link - add tunnel to hash table
 206 *   @t: tunnel to be added
 207 **/
 208
 209static void
 210ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 211{
 212        struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 213
 214        rcu_assign_pointer(t->next , rtnl_dereference(*tp));
 215        rcu_assign_pointer(*tp, t);
 216}
 217
 218/**
 219 * ip6_tnl_unlink - remove tunnel from hash table
 220 *   @t: tunnel to be removed
 221 **/
 222
 223static void
 224ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 225{
 226        struct ip6_tnl __rcu **tp;
 227        struct ip6_tnl *iter;
 228
 229        for (tp = ip6_tnl_bucket(ip6n, &t->parms);
 230             (iter = rtnl_dereference(*tp)) != NULL;
 231             tp = &iter->next) {
 232                if (t == iter) {
 233                        rcu_assign_pointer(*tp, t->next);
 234                        break;
 235                }
 236        }
 237}
 238
 239static void ip6_dev_free(struct net_device *dev)
 240{
 241        struct ip6_tnl *t = netdev_priv(dev);
 242
 243        gro_cells_destroy(&t->gro_cells);
 244        dst_cache_destroy(&t->dst_cache);
 245        free_percpu(dev->tstats);
 246}
 247
 248static int ip6_tnl_create2(struct net_device *dev)
 249{
 250        struct ip6_tnl *t = netdev_priv(dev);
 251        struct net *net = dev_net(dev);
 252        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 253        int err;
 254
 255        t = netdev_priv(dev);
 256
 257        dev->rtnl_link_ops = &ip6_link_ops;
 258        err = register_netdevice(dev);
 259        if (err < 0)
 260                goto out;
 261
 262        strcpy(t->parms.name, dev->name);
 263
 264        dev_hold(dev);
 265        ip6_tnl_link(ip6n, t);
 266        return 0;
 267
 268out:
 269        return err;
 270}
 271
 272/**
 273 * ip6_tnl_create - create a new tunnel
 274 *   @p: tunnel parameters
 275 *   @pt: pointer to new tunnel
 276 *
 277 * Description:
 278 *   Create tunnel matching given parameters.
 279 *
 280 * Return:
 281 *   created tunnel or error pointer
 282 **/
 283
 284static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 285{
 286        struct net_device *dev;
 287        struct ip6_tnl *t;
 288        char name[IFNAMSIZ];
 289        int err = -E2BIG;
 290
 291        if (p->name[0]) {
 292                if (!dev_valid_name(p->name))
 293                        goto failed;
 294                strlcpy(name, p->name, IFNAMSIZ);
 295        } else {
 296                sprintf(name, "ip6tnl%%d");
 297        }
 298        err = -ENOMEM;
 299        dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
 300        if (!dev)
 301                goto failed;
 302
 303        dev_net_set(dev, net);
 304
 305        t = netdev_priv(dev);
 306        t->parms = *p;
 307        t->net = dev_net(dev);
 308        err = ip6_tnl_create2(dev);
 309        if (err < 0)
 310                goto failed_free;
 311
 312        return t;
 313
 314failed_free:
 315        free_netdev(dev);
 316failed:
 317        return ERR_PTR(err);
 318}
 319
 320/**
 321 * ip6_tnl_locate - find or create tunnel matching given parameters
 322 *   @p: tunnel parameters
 323 *   @create: != 0 if allowed to create new tunnel if no match found
 324 *
 325 * Description:
 326 *   ip6_tnl_locate() first tries to locate an existing tunnel
 327 *   based on @parms. If this is unsuccessful, but @create is set a new
 328 *   tunnel device is created and registered for use.
 329 *
 330 * Return:
 331 *   matching tunnel or error pointer
 332 **/
 333
 334static struct ip6_tnl *ip6_tnl_locate(struct net *net,
 335                struct __ip6_tnl_parm *p, int create)
 336{
 337        const struct in6_addr *remote = &p->raddr;
 338        const struct in6_addr *local = &p->laddr;
 339        struct ip6_tnl __rcu **tp;
 340        struct ip6_tnl *t;
 341        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 342
 343        for (tp = ip6_tnl_bucket(ip6n, p);
 344             (t = rtnl_dereference(*tp)) != NULL;
 345             tp = &t->next) {
 346                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 347                    ipv6_addr_equal(remote, &t->parms.raddr)) {
 348                        if (create)
 349                                return ERR_PTR(-EEXIST);
 350
 351                        return t;
 352                }
 353        }
 354        if (!create)
 355                return ERR_PTR(-ENODEV);
 356        return ip6_tnl_create(net, p);
 357}
 358
 359/**
 360 * ip6_tnl_dev_uninit - tunnel device uninitializer
 361 *   @dev: the device to be destroyed
 362 *
 363 * Description:
 364 *   ip6_tnl_dev_uninit() removes tunnel from its list
 365 **/
 366
 367static void
 368ip6_tnl_dev_uninit(struct net_device *dev)
 369{
 370        struct ip6_tnl *t = netdev_priv(dev);
 371        struct net *net = t->net;
 372        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 373
 374        if (dev == ip6n->fb_tnl_dev)
 375                RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
 376        else
 377                ip6_tnl_unlink(ip6n, t);
 378        dst_cache_reset(&t->dst_cache);
 379        dev_put(dev);
 380}
 381
 382/**
 383 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 384 *   @skb: received socket buffer
 385 *
 386 * Return:
 387 *   0 if none was found,
 388 *   else index to encapsulation limit
 389 **/
 390
 391__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 392{
 393        const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
 394        unsigned int nhoff = raw - skb->data;
 395        unsigned int off = nhoff + sizeof(*ipv6h);
 396        u8 next, nexthdr = ipv6h->nexthdr;
 397
 398        while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
 399                struct ipv6_opt_hdr *hdr;
 400                u16 optlen;
 401
 402                if (!pskb_may_pull(skb, off + sizeof(*hdr)))
 403                        break;
 404
 405                hdr = (struct ipv6_opt_hdr *)(skb->data + off);
 406                if (nexthdr == NEXTHDR_FRAGMENT) {
 407                        struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
 408                        if (frag_hdr->frag_off)
 409                                break;
 410                        optlen = 8;
 411                } else if (nexthdr == NEXTHDR_AUTH) {
 412                        optlen = (hdr->hdrlen + 2) << 2;
 413                } else {
 414                        optlen = ipv6_optlen(hdr);
 415                }
 416                /* cache hdr->nexthdr, since pskb_may_pull() might
 417                 * invalidate hdr
 418                 */
 419                next = hdr->nexthdr;
 420                if (nexthdr == NEXTHDR_DEST) {
 421                        u16 i = 2;
 422
 423                        /* Remember : hdr is no longer valid at this point. */
 424                        if (!pskb_may_pull(skb, off + optlen))
 425                                break;
 426
 427                        while (1) {
 428                                struct ipv6_tlv_tnl_enc_lim *tel;
 429
 430                                /* No more room for encapsulation limit */
 431                                if (i + sizeof(*tel) > optlen)
 432                                        break;
 433
 434                                tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
 435                                /* return index of option if found and valid */
 436                                if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 437                                    tel->length == 1)
 438                                        return i + off - nhoff;
 439                                /* else jump to next option */
 440                                if (tel->type)
 441                                        i += tel->length + 2;
 442                                else
 443                                        i++;
 444                        }
 445                }
 446                nexthdr = next;
 447                off += optlen;
 448        }
 449        return 0;
 450}
 451EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
 452
 453/**
 454 * ip6_tnl_err - tunnel error handler
 455 *
 456 * Description:
 457 *   ip6_tnl_err() should handle errors in the tunnel according
 458 *   to the specifications in RFC 2473.
 459 **/
 460
 461static int
 462ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 463            u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 464{
 465        const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
 466        struct net *net = dev_net(skb->dev);
 467        u8 rel_type = ICMPV6_DEST_UNREACH;
 468        u8 rel_code = ICMPV6_ADDR_UNREACH;
 469        __u32 rel_info = 0;
 470        struct ip6_tnl *t;
 471        int err = -ENOENT;
 472        int rel_msg = 0;
 473        u8 tproto;
 474        __u16 len;
 475
 476        /* If the packet doesn't contain the original IPv6 header we are
 477           in trouble since we might need the source address for further
 478           processing of the error. */
 479
 480        rcu_read_lock();
 481        if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
 482                                        &ipv6h->saddr)) == NULL)
 483                goto out;
 484
 485        tproto = ACCESS_ONCE(t->parms.proto);
 486        if (tproto != ipproto && tproto != 0)
 487                goto out;
 488
 489        err = 0;
 490
 491        switch (*type) {
 492                struct ipv6_tlv_tnl_enc_lim *tel;
 493                __u32 mtu, teli;
 494        case ICMPV6_DEST_UNREACH:
 495                net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
 496                                     t->parms.name);
 497                rel_msg = 1;
 498                break;
 499        case ICMPV6_TIME_EXCEED:
 500                if ((*code) == ICMPV6_EXC_HOPLIMIT) {
 501                        net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 502                                             t->parms.name);
 503                        rel_msg = 1;
 504                }
 505                break;
 506        case ICMPV6_PARAMPROB:
 507                teli = 0;
 508                if ((*code) == ICMPV6_HDR_FIELD)
 509                        teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 510
 511                if (teli && teli == *info - 2) {
 512                        tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 513                        if (tel->encap_limit == 0) {
 514                                net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
 515                                                     t->parms.name);
 516                                rel_msg = 1;
 517                        }
 518                } else {
 519                        net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 520                                             t->parms.name);
 521                }
 522                break;
 523        case ICMPV6_PKT_TOOBIG:
 524                ip6_update_pmtu(skb, net, htonl(*info), 0, 0);
 525                mtu = *info - offset;
 526                if (mtu < IPV6_MIN_MTU)
 527                        mtu = IPV6_MIN_MTU;
 528                len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
 529                if (len > mtu) {
 530                        rel_type = ICMPV6_PKT_TOOBIG;
 531                        rel_code = 0;
 532                        rel_info = mtu;
 533                        rel_msg = 1;
 534                }
 535                break;
 536        case NDISC_REDIRECT:
 537                ip6_redirect(skb, net, skb->dev->ifindex, 0);
 538                break;
 539        }
 540
 541        *type = rel_type;
 542        *code = rel_code;
 543        *info = rel_info;
 544        *msg = rel_msg;
 545
 546out:
 547        rcu_read_unlock();
 548        return err;
 549}
 550
 551static int
 552ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 553           u8 type, u8 code, int offset, __be32 info)
 554{
 555        __u32 rel_info = ntohl(info);
 556        const struct iphdr *eiph;
 557        struct sk_buff *skb2;
 558        int err, rel_msg = 0;
 559        u8 rel_type = type;
 560        u8 rel_code = code;
 561        struct rtable *rt;
 562        struct flowi4 fl4;
 563
 564        err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
 565                          &rel_msg, &rel_info, offset);
 566        if (err < 0)
 567                return err;
 568
 569        if (rel_msg == 0)
 570                return 0;
 571
 572        switch (rel_type) {
 573        case ICMPV6_DEST_UNREACH:
 574                if (rel_code != ICMPV6_ADDR_UNREACH)
 575                        return 0;
 576                rel_type = ICMP_DEST_UNREACH;
 577                rel_code = ICMP_HOST_UNREACH;
 578                break;
 579        case ICMPV6_PKT_TOOBIG:
 580                if (rel_code != 0)
 581                        return 0;
 582                rel_type = ICMP_DEST_UNREACH;
 583                rel_code = ICMP_FRAG_NEEDED;
 584                break;
 585        default:
 586                return 0;
 587        }
 588
 589        if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
 590                return 0;
 591
 592        skb2 = skb_clone(skb, GFP_ATOMIC);
 593        if (!skb2)
 594                return 0;
 595
 596        skb_dst_drop(skb2);
 597
 598        skb_pull(skb2, offset);
 599        skb_reset_network_header(skb2);
 600        eiph = ip_hdr(skb2);
 601
 602        /* Try to guess incoming interface */
 603        rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
 604                                   0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 605        if (IS_ERR(rt))
 606                goto out;
 607
 608        skb2->dev = rt->dst.dev;
 609        ip_rt_put(rt);
 610
 611        /* route "incoming" packet */
 612        if (rt->rt_flags & RTCF_LOCAL) {
 613                rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
 614                                           eiph->daddr, eiph->saddr, 0, 0,
 615                                           IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 616                if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
 617                        if (!IS_ERR(rt))
 618                                ip_rt_put(rt);
 619                        goto out;
 620                }
 621                skb_dst_set(skb2, &rt->dst);
 622        } else {
 623                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 624                                   skb2->dev) ||
 625                    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
 626                        goto out;
 627        }
 628
 629        /* change mtu on this route */
 630        if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
 631                if (rel_info > dst_mtu(skb_dst(skb2)))
 632                        goto out;
 633
 634                skb_dst_update_pmtu(skb2, rel_info);
 635        }
 636
 637        icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
 638
 639out:
 640        kfree_skb(skb2);
 641        return 0;
 642}
 643
 644static int
 645ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 646           u8 type, u8 code, int offset, __be32 info)
 647{
 648        __u32 rel_info = ntohl(info);
 649        int err, rel_msg = 0;
 650        u8 rel_type = type;
 651        u8 rel_code = code;
 652
 653        err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
 654                          &rel_msg, &rel_info, offset);
 655        if (err < 0)
 656                return err;
 657
 658        if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 659                struct rt6_info *rt;
 660                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 661
 662                if (!skb2)
 663                        return 0;
 664
 665                skb_dst_drop(skb2);
 666                skb_pull(skb2, offset);
 667                skb_reset_network_header(skb2);
 668
 669                /* Try to guess incoming interface */
 670                rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
 671                                NULL, 0, 0);
 672
 673                if (rt && rt->dst.dev)
 674                        skb2->dev = rt->dst.dev;
 675
 676                icmpv6_send(skb2, rel_type, rel_code, rel_info);
 677
 678                ip6_rt_put(rt);
 679
 680                kfree_skb(skb2);
 681        }
 682
 683        return 0;
 684}
 685
 686static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 687                                       const struct ipv6hdr *ipv6h,
 688                                       struct sk_buff *skb)
 689{
 690        __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
 691
 692        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 693                ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
 694
 695        return IP6_ECN_decapsulate(ipv6h, skb);
 696}
 697
 698static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 699                                       const struct ipv6hdr *ipv6h,
 700                                       struct sk_buff *skb)
 701{
 702        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 703                ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
 704
 705        return IP6_ECN_decapsulate(ipv6h, skb);
 706}
 707
 708__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
 709                             const struct in6_addr *laddr,
 710                             const struct in6_addr *raddr)
 711{
 712        struct __ip6_tnl_parm *p = &t->parms;
 713        int ltype = ipv6_addr_type(laddr);
 714        int rtype = ipv6_addr_type(raddr);
 715        __u32 flags = 0;
 716
 717        if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
 718                flags = IP6_TNL_F_CAP_PER_PACKET;
 719        } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
 720                   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
 721                   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
 722                   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
 723                if (ltype&IPV6_ADDR_UNICAST)
 724                        flags |= IP6_TNL_F_CAP_XMIT;
 725                if (rtype&IPV6_ADDR_UNICAST)
 726                        flags |= IP6_TNL_F_CAP_RCV;
 727        }
 728        return flags;
 729}
 730EXPORT_SYMBOL(ip6_tnl_get_cap);
 731
 732/* called with rcu_read_lock() */
 733int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 734                                  const struct in6_addr *laddr,
 735                                  const struct in6_addr *raddr)
 736{
 737        struct __ip6_tnl_parm *p = &t->parms;
 738        int ret = 0;
 739        struct net *net = t->net;
 740
 741        if ((p->flags & IP6_TNL_F_CAP_RCV) ||
 742            ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 743             (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
 744                struct net_device *ldev = NULL;
 745
 746                if (p->link)
 747                        ldev = dev_get_by_index_rcu(net, p->link);
 748
 749                if ((ipv6_addr_is_multicast(laddr) ||
 750                     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
 751                    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
 752                        ret = 1;
 753        }
 754        return ret;
 755}
 756EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 757
 758static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 759                         const struct tnl_ptk_info *tpi,
 760                         struct metadata_dst *tun_dst,
 761                         int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
 762                                                const struct ipv6hdr *ipv6h,
 763                                                struct sk_buff *skb),
 764                         bool log_ecn_err)
 765{
 766        struct pcpu_sw_netstats *tstats;
 767        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 768        int err;
 769
 770        if ((!(tpi->flags & TUNNEL_CSUM) &&
 771             (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
 772            ((tpi->flags & TUNNEL_CSUM) &&
 773             !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
 774                tunnel->dev->stats.rx_crc_errors++;
 775                tunnel->dev->stats.rx_errors++;
 776                goto drop;
 777        }
 778
 779        if (tunnel->parms.i_flags & TUNNEL_SEQ) {
 780                if (!(tpi->flags & TUNNEL_SEQ) ||
 781                    (tunnel->i_seqno &&
 782                     (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 783                        tunnel->dev->stats.rx_fifo_errors++;
 784                        tunnel->dev->stats.rx_errors++;
 785                        goto drop;
 786                }
 787                tunnel->i_seqno = ntohl(tpi->seq) + 1;
 788        }
 789
 790        skb->protocol = tpi->proto;
 791
 792        /* Warning: All skb pointers will be invalidated! */
 793        if (tunnel->dev->type == ARPHRD_ETHER) {
 794                if (!pskb_may_pull(skb, ETH_HLEN)) {
 795                        tunnel->dev->stats.rx_length_errors++;
 796                        tunnel->dev->stats.rx_errors++;
 797                        goto drop;
 798                }
 799
 800                ipv6h = ipv6_hdr(skb);
 801                skb->protocol = eth_type_trans(skb, tunnel->dev);
 802                skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 803        } else {
 804                skb->dev = tunnel->dev;
 805        }
 806
 807        skb_reset_network_header(skb);
 808        memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 809
 810        __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
 811
 812        err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
 813        if (unlikely(err)) {
 814                if (log_ecn_err)
 815                        net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
 816                                             &ipv6h->saddr,
 817                                             ipv6_get_dsfield(ipv6h));
 818                if (err > 1) {
 819                        ++tunnel->dev->stats.rx_frame_errors;
 820                        ++tunnel->dev->stats.rx_errors;
 821                        goto drop;
 822                }
 823        }
 824
 825        tstats = this_cpu_ptr(tunnel->dev->tstats);
 826        u64_stats_update_begin(&tstats->syncp);
 827        tstats->rx_packets++;
 828        tstats->rx_bytes += skb->len;
 829        u64_stats_update_end(&tstats->syncp);
 830
 831        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 832
 833        gro_cells_receive(&tunnel->gro_cells, skb);
 834        return 0;
 835
 836drop:
 837        kfree_skb(skb);
 838        return 0;
 839}
 840
 841int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
 842                const struct tnl_ptk_info *tpi,
 843                struct metadata_dst *tun_dst,
 844                bool log_ecn_err)
 845{
 846        return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
 847                             log_ecn_err);
 848}
 849EXPORT_SYMBOL(ip6_tnl_rcv);
 850
 851static const struct tnl_ptk_info tpi_v6 = {
 852        /* no tunnel info required for ipxip6. */
 853        .proto = htons(ETH_P_IPV6),
 854};
 855
 856static const struct tnl_ptk_info tpi_v4 = {
 857        /* no tunnel info required for ipxip6. */
 858        .proto = htons(ETH_P_IP),
 859};
 860
 861static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 862                      const struct tnl_ptk_info *tpi,
 863                      int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
 864                                                  const struct ipv6hdr *ipv6h,
 865                                                  struct sk_buff *skb))
 866{
 867        struct ip6_tnl *t;
 868        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 869        int ret = -1;
 870
 871        rcu_read_lock();
 872        t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
 873
 874        if (t) {
 875                u8 tproto = ACCESS_ONCE(t->parms.proto);
 876
 877                if (tproto != ipproto && tproto != 0)
 878                        goto drop;
 879                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 880                        goto drop;
 881                ipv6h = ipv6_hdr(skb);
 882                if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
 883                        goto drop;
 884                if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 885                        goto drop;
 886                ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
 887                                    log_ecn_error);
 888        }
 889
 890        rcu_read_unlock();
 891
 892        return ret;
 893
 894drop:
 895        rcu_read_unlock();
 896        kfree_skb(skb);
 897        return 0;
 898}
 899
 900static int ip4ip6_rcv(struct sk_buff *skb)
 901{
 902        return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
 903                          ip4ip6_dscp_ecn_decapsulate);
 904}
 905
 906static int ip6ip6_rcv(struct sk_buff *skb)
 907{
 908        return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
 909                          ip6ip6_dscp_ecn_decapsulate);
 910}
 911
 912struct ipv6_tel_txoption {
 913        struct ipv6_txoptions ops;
 914        __u8 dst_opt[8];
 915};
 916
 917static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
 918{
 919        memset(opt, 0, sizeof(struct ipv6_tel_txoption));
 920
 921        opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
 922        opt->dst_opt[3] = 1;
 923        opt->dst_opt[4] = encap_limit;
 924        opt->dst_opt[5] = IPV6_TLV_PADN;
 925        opt->dst_opt[6] = 1;
 926
 927        opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
 928        opt->ops.opt_nflen = 8;
 929}
 930
 931/**
 932 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
 933 *   @t: the outgoing tunnel device
 934 *   @hdr: IPv6 header from the incoming packet
 935 *
 936 * Description:
 937 *   Avoid trivial tunneling loop by checking that tunnel exit-point
 938 *   doesn't match source of incoming packet.
 939 *
 940 * Return:
 941 *   1 if conflict,
 942 *   0 else
 943 **/
 944
 945static inline bool
 946ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
 947{
 948        return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 949}
 950
 951int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 952                     const struct in6_addr *laddr,
 953                     const struct in6_addr *raddr)
 954{
 955        struct __ip6_tnl_parm *p = &t->parms;
 956        int ret = 0;
 957        struct net *net = t->net;
 958
 959        if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
 960            ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 961             (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
 962                struct net_device *ldev = NULL;
 963
 964                rcu_read_lock();
 965                if (p->link)
 966                        ldev = dev_get_by_index_rcu(net, p->link);
 967
 968                if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
 969                        pr_warn("%s xmit: Local address not yet configured!\n",
 970                                p->name);
 971                else if (!ipv6_addr_is_multicast(raddr) &&
 972                         unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
 973                        pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
 974                                p->name);
 975                else
 976                        ret = 1;
 977                rcu_read_unlock();
 978        }
 979        return ret;
 980}
 981EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
 982
 983/**
 984 * ip6_tnl_xmit - encapsulate packet and send
 985 *   @skb: the outgoing socket buffer
 986 *   @dev: the outgoing tunnel device
 987 *   @dsfield: dscp code for outer header
 988 *   @fl6: flow of tunneled packet
 989 *   @encap_limit: encapsulation limit
 990 *   @pmtu: Path MTU is stored if packet is too big
 991 *   @proto: next header value
 992 *
 993 * Description:
 994 *   Build new header and do some sanity checks on the packet before sending
 995 *   it.
 996 *
 997 * Return:
 998 *   0 on success
 999 *   -1 fail
1000 *   %-EMSGSIZE message too big. return mtu in this case.
1001 **/
1002
1003int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1004                 struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
1005                 __u8 proto)
1006{
1007        struct ip6_tnl *t = netdev_priv(dev);
1008        struct net *net = t->net;
1009        struct net_device_stats *stats = &t->dev->stats;
1010        struct ipv6hdr *ipv6h;
1011        struct ipv6_tel_txoption opt;
1012        struct dst_entry *dst = NULL, *ndst = NULL;
1013        struct net_device *tdev;
1014        int mtu;
1015        unsigned int max_headroom = sizeof(struct ipv6hdr);
1016        unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
1017        bool use_cache = false;
1018        int err = -1;
1019
1020        /* NBMA tunnel */
1021        if (ipv6_addr_any(&t->parms.raddr)) {
1022                if (skb->protocol == htons(ETH_P_IPV6)) {
1023                        struct in6_addr *addr6;
1024                        struct neighbour *neigh;
1025                        int addr_type;
1026
1027                        if (!skb_dst(skb))
1028                                goto tx_err_link_failure;
1029
1030                        neigh = dst_neigh_lookup(skb_dst(skb),
1031                                                 &ipv6_hdr(skb)->daddr);
1032                        if (!neigh)
1033                                goto tx_err_link_failure;
1034
1035                        addr6 = (struct in6_addr *)&neigh->primary_key;
1036                        addr_type = ipv6_addr_type(addr6);
1037
1038                        if (addr_type == IPV6_ADDR_ANY)
1039                                addr6 = &ipv6_hdr(skb)->daddr;
1040
1041                        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1042                        neigh_release(neigh);
1043                }
1044        } else if (!(t->parms.flags &
1045                     (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
1046                /* enable the cache only only if the routing decision does
1047                 * not depend on the current inner header value
1048                 */
1049                use_cache = true;
1050        }
1051
1052        if (use_cache)
1053                dst = dst_cache_get(&t->dst_cache);
1054
1055        if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1056                goto tx_err_link_failure;
1057
1058        if (!dst) {
1059                dst = ip6_route_output(net, NULL, fl6);
1060
1061                if (dst->error)
1062                        goto tx_err_link_failure;
1063                dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1064                if (IS_ERR(dst)) {
1065                        err = PTR_ERR(dst);
1066                        dst = NULL;
1067                        goto tx_err_link_failure;
1068                }
1069                ndst = dst;
1070        }
1071
1072        tdev = dst->dev;
1073
1074        if (tdev == dev) {
1075                stats->collisions++;
1076                net_warn_ratelimited("%s: Local routing loop detected!\n",
1077                                     t->parms.name);
1078                goto tx_err_dst_release;
1079        }
1080        mtu = dst_mtu(dst) - eth_hlen - sizeof(*ipv6h) - t->tun_hlen;
1081        if (encap_limit >= 0) {
1082                max_headroom += 8;
1083                mtu -= 8;
1084        }
1085        mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
1086                       IPV6_MIN_MTU : IPV4_MIN_MTU);
1087
1088        skb_dst_update_pmtu(skb, mtu);
1089        if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
1090                *pmtu = mtu;
1091                err = -EMSGSIZE;
1092                goto tx_err_dst_release;
1093        }
1094
1095        if (t->err_count > 0) {
1096                if (time_before(jiffies,
1097                                t->err_time + IP6TUNNEL_ERR_TIMEO)) {
1098                        t->err_count--;
1099
1100                        dst_link_failure(skb);
1101                } else {
1102                        t->err_count = 0;
1103                }
1104        }
1105
1106        skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1107
1108        /*
1109         * Okay, now see if we can stuff it in the buffer as-is.
1110         */
1111        max_headroom += LL_RESERVED_SPACE(tdev);
1112
1113        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1114            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1115                struct sk_buff *new_skb;
1116
1117                if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
1118                        goto tx_err_dst_release;
1119
1120                if (skb->sk)
1121                        skb_set_owner_w(new_skb, skb->sk);
1122                consume_skb(skb);
1123                skb = new_skb;
1124        }
1125
1126        if (use_cache && ndst)
1127                dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1128        skb_dst_set(skb, dst);
1129
1130        skb->transport_header = skb->network_header;
1131
1132        if (encap_limit >= 0) {
1133                init_tel_txopt(&opt, encap_limit);
1134                ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
1135        }
1136
1137        if (likely(!skb->encapsulation)) {
1138                skb_reset_inner_headers(skb);
1139                skb->encapsulation = 1;
1140        }
1141
1142        max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
1143                        + dst->header_len;
1144        if (max_headroom > dev->needed_headroom)
1145                dev->needed_headroom = max_headroom;
1146
1147        skb_push(skb, sizeof(struct ipv6hdr));
1148        skb_reset_network_header(skb);
1149        ipv6h = ipv6_hdr(skb);
1150        ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
1151        ipv6h->hop_limit = t->parms.hop_limit;
1152        ipv6h->nexthdr = proto;
1153        ipv6h->saddr = fl6->saddr;
1154        ipv6h->daddr = fl6->daddr;
1155        ip6tunnel_xmit(NULL, skb, dev);
1156        return 0;
1157tx_err_link_failure:
1158        stats->tx_carrier_errors++;
1159        dst_link_failure(skb);
1160tx_err_dst_release:
1161        dst_release(dst);
1162        return err;
1163}
1164EXPORT_SYMBOL(ip6_tnl_xmit);
1165
1166static inline int
1167ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1168{
1169        struct ip6_tnl *t = netdev_priv(dev);
1170        const struct iphdr  *iph;
1171        int encap_limit = -1;
1172        struct flowi6 fl6;
1173        __u8 dsfield;
1174        __u32 mtu;
1175        u8 tproto;
1176        int err;
1177
1178        /* ensure we can access the full inner ip header */
1179        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
1180                return -1;
1181
1182        iph = ip_hdr(skb);
1183        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1184
1185        tproto = ACCESS_ONCE(t->parms.proto);
1186        if (tproto != IPPROTO_IPIP && tproto != 0)
1187                return -1;
1188
1189        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1190                encap_limit = t->parms.encap_limit;
1191
1192        memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1193        fl6.flowi6_proto = IPPROTO_IPIP;
1194
1195        dsfield = ipv4_get_dsfield(iph);
1196
1197        if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1198                fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
1199                                          & IPV6_TCLASS_MASK;
1200        if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1201                fl6.flowi6_mark = skb->mark;
1202
1203        err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1204                           IPPROTO_IPIP);
1205        if (err != 0) {
1206                /* XXX: send ICMP error even if DF is not set. */
1207                if (err == -EMSGSIZE)
1208                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1209                                  htonl(mtu));
1210                return -1;
1211        }
1212
1213        return 0;
1214}
1215
1216static inline int
1217ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1218{
1219        struct ip6_tnl *t = netdev_priv(dev);
1220        struct ipv6hdr *ipv6h;
1221        int encap_limit = -1;
1222        __u16 offset;
1223        struct flowi6 fl6;
1224        __u8 dsfield;
1225        __u32 mtu;
1226        u8 tproto;
1227        int err;
1228
1229        if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
1230                return -1;
1231
1232        ipv6h = ipv6_hdr(skb);
1233        tproto = ACCESS_ONCE(t->parms.proto);
1234        if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1235            ip6_tnl_addr_conflict(t, ipv6h))
1236                return -1;
1237
1238        offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1239        /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
1240        ipv6h = ipv6_hdr(skb);
1241        if (offset > 0) {
1242                struct ipv6_tlv_tnl_enc_lim *tel;
1243                tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1244                if (tel->encap_limit == 0) {
1245                        icmpv6_send(skb, ICMPV6_PARAMPROB,
1246                                    ICMPV6_HDR_FIELD, offset + 2);
1247                        return -1;
1248                }
1249                encap_limit = tel->encap_limit - 1;
1250        } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1251                encap_limit = t->parms.encap_limit;
1252
1253        memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1254        fl6.flowi6_proto = IPPROTO_IPV6;
1255
1256        dsfield = ipv6_get_dsfield(ipv6h);
1257        if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1258                fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1259        if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1260                fl6.flowlabel |= ip6_flowlabel(ipv6h);
1261        if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1262                fl6.flowi6_mark = skb->mark;
1263
1264        err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1265                           IPPROTO_IPV6);
1266        if (err != 0) {
1267                if (err == -EMSGSIZE)
1268                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1269                return -1;
1270        }
1271
1272        return 0;
1273}
1274
1275static netdev_tx_t
1276ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
1277{
1278        struct ip6_tnl *t = netdev_priv(dev);
1279        struct net_device_stats *stats = &t->dev->stats;
1280        int ret;
1281
1282        switch (skb->protocol) {
1283        case htons(ETH_P_IP):
1284                ret = ip4ip6_tnl_xmit(skb, dev);
1285                break;
1286        case htons(ETH_P_IPV6):
1287                ret = ip6ip6_tnl_xmit(skb, dev);
1288                break;
1289        default:
1290                goto tx_err;
1291        }
1292
1293        if (ret < 0)
1294                goto tx_err;
1295
1296        return NETDEV_TX_OK;
1297
1298tx_err:
1299        stats->tx_errors++;
1300        stats->tx_dropped++;
1301        kfree_skb(skb);
1302        return NETDEV_TX_OK;
1303}
1304
1305static void ip6_tnl_link_config(struct ip6_tnl *t)
1306{
1307        struct net_device *dev = t->dev;
1308        struct __ip6_tnl_parm *p = &t->parms;
1309        struct flowi6 *fl6 = &t->fl.u.ip6;
1310
1311        memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1312        memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1313
1314        /* Set up flowi template */
1315        fl6->saddr = p->laddr;
1316        fl6->daddr = p->raddr;
1317        fl6->flowi6_oif = p->link;
1318        fl6->flowlabel = 0;
1319
1320        if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1321                fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1322        if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1323                fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1324
1325        p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1326        p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1327
1328        if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1329                dev->flags |= IFF_POINTOPOINT;
1330        else
1331                dev->flags &= ~IFF_POINTOPOINT;
1332
1333        if (p->flags & IP6_TNL_F_CAP_XMIT) {
1334                int strict = (ipv6_addr_type(&p->raddr) &
1335                              (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1336
1337                struct rt6_info *rt = rt6_lookup(t->net,
1338                                                 &p->raddr, &p->laddr,
1339                                                 p->link, strict);
1340
1341                if (rt == NULL)
1342                        return;
1343
1344                if (rt->dst.dev) {
1345                        dev->hard_header_len = rt->dst.dev->hard_header_len +
1346                                sizeof (struct ipv6hdr);
1347
1348                        dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr);
1349                        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1350                                dev->mtu-=8;
1351
1352                        if (dev->mtu < IPV6_MIN_MTU)
1353                                dev->mtu = IPV6_MIN_MTU;
1354                }
1355                ip6_rt_put(rt);
1356        }
1357}
1358
1359/**
1360 * ip6_tnl_change - update the tunnel parameters
1361 *   @t: tunnel to be changed
1362 *   @p: tunnel configuration parameters
1363 *
1364 * Description:
1365 *   ip6_tnl_change() updates the tunnel parameters
1366 **/
1367
1368static int
1369ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1370{
1371        t->parms.laddr = p->laddr;
1372        t->parms.raddr = p->raddr;
1373        t->parms.flags = p->flags;
1374        t->parms.hop_limit = p->hop_limit;
1375        t->parms.encap_limit = p->encap_limit;
1376        t->parms.flowinfo = p->flowinfo;
1377        t->parms.link = p->link;
1378        t->parms.proto = p->proto;
1379        dst_cache_reset(&t->dst_cache);
1380        ip6_tnl_link_config(t);
1381        return 0;
1382}
1383
1384static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1385{
1386        struct net *net = t->net;
1387        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1388        int err;
1389
1390        ip6_tnl_unlink(ip6n, t);
1391        synchronize_net();
1392        err = ip6_tnl_change(t, p);
1393        ip6_tnl_link(ip6n, t);
1394        netdev_state_change(t->dev);
1395        return err;
1396}
1397
1398static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1399{
1400        /* for default tnl0 device allow to change only the proto */
1401        t->parms.proto = p->proto;
1402        netdev_state_change(t->dev);
1403        return 0;
1404}
1405
1406static void
1407ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1408{
1409        p->laddr = u->laddr;
1410        p->raddr = u->raddr;
1411        p->flags = u->flags;
1412        p->hop_limit = u->hop_limit;
1413        p->encap_limit = u->encap_limit;
1414        p->flowinfo = u->flowinfo;
1415        p->link = u->link;
1416        p->proto = u->proto;
1417        memcpy(p->name, u->name, sizeof(u->name));
1418}
1419
1420static void
1421ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1422{
1423        u->laddr = p->laddr;
1424        u->raddr = p->raddr;
1425        u->flags = p->flags;
1426        u->hop_limit = p->hop_limit;
1427        u->encap_limit = p->encap_limit;
1428        u->flowinfo = p->flowinfo;
1429        u->link = p->link;
1430        u->proto = p->proto;
1431        memcpy(u->name, p->name, sizeof(u->name));
1432}
1433
1434/**
1435 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1436 *   @dev: virtual device associated with tunnel
1437 *   @ifr: parameters passed from userspace
1438 *   @cmd: command to be performed
1439 *
1440 * Description:
1441 *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
1442 *   from userspace.
1443 *
1444 *   The possible commands are the following:
1445 *     %SIOCGETTUNNEL: get tunnel parameters for device
1446 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
1447 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
1448 *     %SIOCDELTUNNEL: delete tunnel
1449 *
1450 *   The fallback device "ip6tnl0", created during module
1451 *   initialization, can be used for creating other tunnel devices.
1452 *
1453 * Return:
1454 *   0 on success,
1455 *   %-EFAULT if unable to copy data to or from userspace,
1456 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
1457 *   %-EINVAL if passed tunnel parameters are invalid,
1458 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
1459 *   %-ENODEV if attempting to change or delete a nonexisting device
1460 **/
1461
1462static int
1463ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1464{
1465        int err = 0;
1466        struct ip6_tnl_parm p;
1467        struct __ip6_tnl_parm p1;
1468        struct ip6_tnl *t = netdev_priv(dev);
1469        struct net *net = t->net;
1470        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1471
1472        memset(&p1, 0, sizeof(p1));
1473
1474        switch (cmd) {
1475        case SIOCGETTUNNEL:
1476                if (dev == ip6n->fb_tnl_dev) {
1477                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
1478                                err = -EFAULT;
1479                                break;
1480                        }
1481                        ip6_tnl_parm_from_user(&p1, &p);
1482                        t = ip6_tnl_locate(net, &p1, 0);
1483                        if (IS_ERR(t))
1484                                t = netdev_priv(dev);
1485                } else {
1486                        memset(&p, 0, sizeof(p));
1487                }
1488                ip6_tnl_parm_to_user(&p, &t->parms);
1489                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1490                        err = -EFAULT;
1491                }
1492                break;
1493        case SIOCADDTUNNEL:
1494        case SIOCCHGTUNNEL:
1495                err = -EPERM;
1496                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1497                        break;
1498                err = -EFAULT;
1499                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1500                        break;
1501                err = -EINVAL;
1502                if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1503                    p.proto != 0)
1504                        break;
1505                ip6_tnl_parm_from_user(&p1, &p);
1506                t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1507                if (cmd == SIOCCHGTUNNEL) {
1508                        if (!IS_ERR(t)) {
1509                                if (t->dev != dev) {
1510                                        err = -EEXIST;
1511                                        break;
1512                                }
1513                        } else
1514                                t = netdev_priv(dev);
1515                        if (dev == ip6n->fb_tnl_dev)
1516                                err = ip6_tnl0_update(t, &p1);
1517                        else
1518                                err = ip6_tnl_update(t, &p1);
1519                }
1520                if (!IS_ERR(t)) {
1521                        err = 0;
1522                        ip6_tnl_parm_to_user(&p, &t->parms);
1523                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1524                                err = -EFAULT;
1525
1526                } else {
1527                        err = PTR_ERR(t);
1528                }
1529                break;
1530        case SIOCDELTUNNEL:
1531                err = -EPERM;
1532                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1533                        break;
1534
1535                if (dev == ip6n->fb_tnl_dev) {
1536                        err = -EFAULT;
1537                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1538                                break;
1539                        err = -ENOENT;
1540                        ip6_tnl_parm_from_user(&p1, &p);
1541                        t = ip6_tnl_locate(net, &p1, 0);
1542                        if (IS_ERR(t))
1543                                break;
1544                        err = -EPERM;
1545                        if (t->dev == ip6n->fb_tnl_dev)
1546                                break;
1547                        dev = t->dev;
1548                }
1549                err = 0;
1550                unregister_netdevice(dev);
1551                break;
1552        default:
1553                err = -EINVAL;
1554        }
1555        return err;
1556}
1557
1558/**
1559 * ip6_tnl_change_mtu - change mtu manually for tunnel device
1560 *   @dev: virtual device associated with tunnel
1561 *   @new_mtu: the new mtu
1562 *
1563 * Return:
1564 *   0 on success,
1565 *   %-EINVAL if mtu too small
1566 **/
1567
1568int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1569{
1570        struct ip6_tnl *tnl = netdev_priv(dev);
1571
1572        if (tnl->parms.proto == IPPROTO_IPV6) {
1573                if (new_mtu < IPV6_MIN_MTU)
1574                        return -EINVAL;
1575        } else {
1576                if (new_mtu < ETH_MIN_MTU)
1577                        return -EINVAL;
1578        }
1579        if (new_mtu > 0xFFF8 - dev->hard_header_len)
1580                return -EINVAL;
1581        dev->mtu = new_mtu;
1582        return 0;
1583}
1584EXPORT_SYMBOL(ip6_tnl_change_mtu);
1585
1586int ip6_tnl_get_iflink(const struct net_device *dev)
1587{
1588        struct ip6_tnl *t = netdev_priv(dev);
1589
1590        return t->parms.link;
1591}
1592EXPORT_SYMBOL(ip6_tnl_get_iflink);
1593
1594static const struct net_device_ops ip6_tnl_netdev_ops = {
1595        .ndo_init       = ip6_tnl_dev_init,
1596        .ndo_uninit     = ip6_tnl_dev_uninit,
1597        .ndo_start_xmit = ip6_tnl_start_xmit,
1598        .ndo_do_ioctl   = ip6_tnl_ioctl,
1599        .ndo_change_mtu_rh74 = ip6_tnl_change_mtu,
1600        .ndo_get_stats  = ip6_get_stats,
1601        .ndo_get_iflink = ip6_tnl_get_iflink,
1602};
1603
1604
1605/**
1606 * ip6_tnl_dev_setup - setup virtual tunnel device
1607 *   @dev: virtual device associated with tunnel
1608 *
1609 * Description:
1610 *   Initialize function pointers and device parameters
1611 **/
1612
1613static void ip6_tnl_dev_setup(struct net_device *dev)
1614{
1615        struct ip6_tnl *t;
1616
1617        dev->netdev_ops = &ip6_tnl_netdev_ops;
1618        dev->extended->needs_free_netdev = true;
1619        dev->extended->priv_destructor = ip6_dev_free;
1620
1621        dev->type = ARPHRD_TUNNEL6;
1622        dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1623        dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1624        t = netdev_priv(dev);
1625        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1626                dev->mtu-=8;
1627        dev->flags |= IFF_NOARP;
1628        dev->addr_len = sizeof(struct in6_addr);
1629        netif_keep_dst(dev);
1630        /* This perm addr will be used as interface identifier by IPv6 */
1631        dev->addr_assign_type = NET_ADDR_RANDOM;
1632        eth_random_addr(dev->perm_addr);
1633}
1634
1635
1636/**
1637 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1638 *   @dev: virtual device associated with tunnel
1639 **/
1640
1641static inline int
1642ip6_tnl_dev_init_gen(struct net_device *dev)
1643{
1644        struct ip6_tnl *t = netdev_priv(dev);
1645        int ret;
1646
1647        t->dev = dev;
1648        t->net = dev_net(dev);
1649        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1650        if (!dev->tstats)
1651                return -ENOMEM;
1652
1653        ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1654        if (ret)
1655                goto free_stats;
1656
1657        ret = gro_cells_init(&t->gro_cells, dev);
1658        if (ret)
1659                goto destroy_dst;
1660
1661        t->hlen = 0;
1662        t->tun_hlen = 0;
1663
1664        return 0;
1665
1666destroy_dst:
1667        dst_cache_destroy(&t->dst_cache);
1668free_stats:
1669        free_percpu(dev->tstats);
1670        dev->tstats = NULL;
1671
1672        return ret;
1673}
1674
1675/**
1676 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1677 *   @dev: virtual device associated with tunnel
1678 **/
1679
1680static int ip6_tnl_dev_init(struct net_device *dev)
1681{
1682        struct ip6_tnl *t = netdev_priv(dev);
1683        int err = ip6_tnl_dev_init_gen(dev);
1684
1685        if (err)
1686                return err;
1687        ip6_tnl_link_config(t);
1688        return 0;
1689}
1690
1691/**
1692 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1693 *   @dev: fallback device
1694 *
1695 * Return: 0
1696 **/
1697
1698static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1699{
1700        struct ip6_tnl *t = netdev_priv(dev);
1701        struct net *net = dev_net(dev);
1702        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1703
1704        t->parms.proto = IPPROTO_IPV6;
1705        dev_hold(dev);
1706
1707        rcu_assign_pointer(ip6n->tnls_wc[0], t);
1708        return 0;
1709}
1710
1711static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])
1712{
1713        u8 proto;
1714
1715        if (!data)
1716                return 0;
1717
1718        proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1719        if (proto != IPPROTO_IPV6 &&
1720            proto != IPPROTO_IPIP &&
1721            proto != 0)
1722                return -EINVAL;
1723
1724        return 0;
1725}
1726
1727static void ip6_tnl_netlink_parms(struct nlattr *data[],
1728                                  struct __ip6_tnl_parm *parms)
1729{
1730        memset(parms, 0, sizeof(*parms));
1731
1732        if (!data)
1733                return;
1734
1735        if (data[IFLA_IPTUN_LINK])
1736                parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1737
1738        if (data[IFLA_IPTUN_LOCAL])
1739                parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
1740
1741        if (data[IFLA_IPTUN_REMOTE])
1742                parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
1743
1744        if (data[IFLA_IPTUN_TTL])
1745                parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
1746
1747        if (data[IFLA_IPTUN_ENCAP_LIMIT])
1748                parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
1749
1750        if (data[IFLA_IPTUN_FLOWINFO])
1751                parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1752
1753        if (data[IFLA_IPTUN_FLAGS])
1754                parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
1755
1756        if (data[IFLA_IPTUN_PROTO])
1757                parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1758}
1759
1760static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1761                           struct nlattr *tb[], struct nlattr *data[])
1762{
1763        struct net *net = dev_net(dev);
1764        struct ip6_tnl *nt, *t;
1765        int err;
1766
1767        nt = netdev_priv(dev);
1768        ip6_tnl_netlink_parms(data, &nt->parms);
1769
1770        t = ip6_tnl_locate(net, &nt->parms, 0);
1771        if (!IS_ERR(t))
1772                return -EEXIST;
1773
1774        err = ip6_tnl_create2(dev);
1775        if (!err && tb[IFLA_MTU])
1776                ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1777
1778        return err;
1779}
1780
1781static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
1782                              struct nlattr *data[])
1783{
1784        struct ip6_tnl *t = netdev_priv(dev);
1785        struct __ip6_tnl_parm p;
1786        struct net *net = t->net;
1787        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1788
1789        if (dev == ip6n->fb_tnl_dev)
1790                return -EINVAL;
1791
1792        ip6_tnl_netlink_parms(data, &p);
1793
1794        t = ip6_tnl_locate(net, &p, 0);
1795        if (!IS_ERR(t)) {
1796                if (t->dev != dev)
1797                        return -EEXIST;
1798        } else
1799                t = netdev_priv(dev);
1800
1801        return ip6_tnl_update(t, &p);
1802}
1803
1804static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
1805{
1806        struct net *net = dev_net(dev);
1807        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1808
1809        if (dev != ip6n->fb_tnl_dev)
1810                unregister_netdevice_queue(dev, head);
1811}
1812
1813static size_t ip6_tnl_get_size(const struct net_device *dev)
1814{
1815        return
1816                /* IFLA_IPTUN_LINK */
1817                nla_total_size(4) +
1818                /* IFLA_IPTUN_LOCAL */
1819                nla_total_size(sizeof(struct in6_addr)) +
1820                /* IFLA_IPTUN_REMOTE */
1821                nla_total_size(sizeof(struct in6_addr)) +
1822                /* IFLA_IPTUN_TTL */
1823                nla_total_size(1) +
1824                /* IFLA_IPTUN_ENCAP_LIMIT */
1825                nla_total_size(1) +
1826                /* IFLA_IPTUN_FLOWINFO */
1827                nla_total_size(4) +
1828                /* IFLA_IPTUN_FLAGS */
1829                nla_total_size(4) +
1830                /* IFLA_IPTUN_PROTO */
1831                nla_total_size(1) +
1832                0;
1833}
1834
1835static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
1836{
1837        struct ip6_tnl *tunnel = netdev_priv(dev);
1838        struct __ip6_tnl_parm *parm = &tunnel->parms;
1839
1840        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1841            nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
1842            nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
1843            nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
1844            nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
1845            nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
1846            nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
1847            nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
1848                goto nla_put_failure;
1849        return 0;
1850
1851nla_put_failure:
1852        return -EMSGSIZE;
1853}
1854
1855struct net *ip6_tnl_get_link_net(const struct net_device *dev)
1856{
1857        struct ip6_tnl *tunnel = netdev_priv(dev);
1858
1859        return tunnel->net;
1860}
1861EXPORT_SYMBOL(ip6_tnl_get_link_net);
1862
1863static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
1864        [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
1865        [IFLA_IPTUN_LOCAL]              = { .len = sizeof(struct in6_addr) },
1866        [IFLA_IPTUN_REMOTE]             = { .len = sizeof(struct in6_addr) },
1867        [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
1868        [IFLA_IPTUN_ENCAP_LIMIT]        = { .type = NLA_U8 },
1869        [IFLA_IPTUN_FLOWINFO]           = { .type = NLA_U32 },
1870        [IFLA_IPTUN_FLAGS]              = { .type = NLA_U32 },
1871        [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
1872};
1873
1874static struct rtnl_link_ops ip6_link_ops __read_mostly = {
1875        .kind           = "ip6tnl",
1876        .maxtype        = IFLA_IPTUN_MAX,
1877        .policy         = ip6_tnl_policy,
1878        .priv_size      = sizeof(struct ip6_tnl),
1879        .setup          = ip6_tnl_dev_setup,
1880        .validate       = ip6_tnl_validate,
1881        .newlink        = ip6_tnl_newlink,
1882        .changelink     = ip6_tnl_changelink,
1883        .dellink        = ip6_tnl_dellink,
1884        .get_size       = ip6_tnl_get_size,
1885        .fill_info      = ip6_tnl_fill_info,
1886        .get_link_net   = ip6_tnl_get_link_net,
1887};
1888
1889static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1890        .handler        = ip4ip6_rcv,
1891        .err_handler    = ip4ip6_err,
1892        .priority       =       1,
1893};
1894
1895static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1896        .handler        = ip6ip6_rcv,
1897        .err_handler    = ip6ip6_err,
1898        .priority       =       1,
1899};
1900
1901static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
1902{
1903        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1904        struct net_device *dev, *aux;
1905        int h;
1906        struct ip6_tnl *t;
1907        LIST_HEAD(list);
1908
1909        for_each_netdev_safe(net, dev, aux)
1910                if (dev->rtnl_link_ops == &ip6_link_ops)
1911                        unregister_netdevice_queue(dev, &list);
1912
1913        for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
1914                t = rtnl_dereference(ip6n->tnls_r_l[h]);
1915                while (t != NULL) {
1916                        /* If dev is in the same netns, it has already
1917                         * been added to the list by the previous loop.
1918                         */
1919                        if (!net_eq(dev_net(t->dev), net))
1920                                unregister_netdevice_queue(t->dev, &list);
1921                        t = rtnl_dereference(t->next);
1922                }
1923        }
1924
1925        unregister_netdevice_many(&list);
1926}
1927
1928static int __net_init ip6_tnl_init_net(struct net *net)
1929{
1930        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1931        struct ip6_tnl *t = NULL;
1932        int err;
1933
1934        ip6n->tnls[0] = ip6n->tnls_wc;
1935        ip6n->tnls[1] = ip6n->tnls_r_l;
1936
1937        err = -ENOMEM;
1938        ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1939                                      ip6_tnl_dev_setup);
1940
1941        if (!ip6n->fb_tnl_dev)
1942                goto err_alloc_dev;
1943        dev_net_set(ip6n->fb_tnl_dev, net);
1944        ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
1945        /* FB netdevice is special: we have one, and only one per netns.
1946         * Allowing to move it to another netns is clearly unsafe.
1947         */
1948        ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
1949
1950        err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1951        if (err < 0)
1952                goto err_register;
1953
1954        err = register_netdev(ip6n->fb_tnl_dev);
1955        if (err < 0)
1956                goto err_register;
1957
1958        t = netdev_priv(ip6n->fb_tnl_dev);
1959
1960        strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
1961        return 0;
1962
1963err_register:
1964        free_netdev(ip6n->fb_tnl_dev);
1965err_alloc_dev:
1966        return err;
1967}
1968
1969static void __net_exit ip6_tnl_exit_net(struct net *net)
1970{
1971        rtnl_lock();
1972        ip6_tnl_destroy_tunnels(net);
1973        rtnl_unlock();
1974}
1975
1976static struct pernet_operations ip6_tnl_net_ops = {
1977        .init = ip6_tnl_init_net,
1978        .exit = ip6_tnl_exit_net,
1979        .id   = &ip6_tnl_net_id,
1980        .size = sizeof(struct ip6_tnl_net),
1981};
1982
1983/**
1984 * ip6_tunnel_init - register protocol and reserve needed resources
1985 *
1986 * Return: 0 on success
1987 **/
1988
1989static int __init ip6_tunnel_init(void)
1990{
1991        int  err;
1992
1993        if (!ipv6_mod_enabled())
1994                return -EOPNOTSUPP;
1995
1996        err = register_pernet_device(&ip6_tnl_net_ops);
1997        if (err < 0)
1998                goto out_pernet;
1999
2000        err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
2001        if (err < 0) {
2002                pr_err("%s: can't register ip4ip6\n", __func__);
2003                goto out_ip4ip6;
2004        }
2005
2006        err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
2007        if (err < 0) {
2008                pr_err("%s: can't register ip6ip6\n", __func__);
2009                goto out_ip6ip6;
2010        }
2011        err = rtnl_link_register(&ip6_link_ops);
2012        if (err < 0)
2013                goto rtnl_link_failed;
2014
2015        return 0;
2016
2017rtnl_link_failed:
2018        xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
2019out_ip6ip6:
2020        xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
2021out_ip4ip6:
2022        unregister_pernet_device(&ip6_tnl_net_ops);
2023out_pernet:
2024        return err;
2025}
2026
2027/**
2028 * ip6_tunnel_cleanup - free resources and unregister protocol
2029 **/
2030
2031static void __exit ip6_tunnel_cleanup(void)
2032{
2033        rtnl_link_unregister(&ip6_link_ops);
2034        if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
2035                pr_info("%s: can't deregister ip4ip6\n", __func__);
2036
2037        if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
2038                pr_info("%s: can't deregister ip6ip6\n", __func__);
2039
2040        unregister_pernet_device(&ip6_tnl_net_ops);
2041}
2042
2043module_init(ip6_tunnel_init);
2044module_exit(ip6_tunnel_cleanup);
2045