linux/net/ipv6/ip6_tunnel.c
<<
>>
Prefs
   1/*
   2 *      IPv6 tunneling device
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Ville Nuorvala          <vnuorval@tcs.hut.fi>
   7 *      Yasuyuki Kozakai        <kozakai@linux-ipv6.org>
   8 *
   9 *      Based on:
  10 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  11 *
  12 *      RFC 2473
  13 *
  14 *      This program is free software; you can redistribute it and/or
  15 *      modify it under the terms of the GNU General Public License
  16 *      as published by the Free Software Foundation; either version
  17 *      2 of the License, or (at your option) any later version.
  18 *
  19 */
  20
  21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  22
  23#include <linux/module.h>
  24#include <linux/capability.h>
  25#include <linux/errno.h>
  26#include <linux/types.h>
  27#include <linux/sockios.h>
  28#include <linux/icmp.h>
  29#include <linux/if.h>
  30#include <linux/in.h>
  31#include <linux/ip.h>
  32#include <linux/net.h>
  33#include <linux/in6.h>
  34#include <linux/netdevice.h>
  35#include <linux/if_arp.h>
  36#include <linux/icmpv6.h>
  37#include <linux/init.h>
  38#include <linux/route.h>
  39#include <linux/rtnetlink.h>
  40#include <linux/netfilter_ipv6.h>
  41#include <linux/slab.h>
  42#include <linux/hash.h>
  43#include <linux/etherdevice.h>
  44
  45#include <linux/uaccess.h>
  46#include <linux/atomic.h>
  47
  48#include <net/icmp.h>
  49#include <net/ip.h>
  50#include <net/ip_tunnels.h>
  51#include <net/ipv6.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/ip6_tunnel.h>
  55#include <net/xfrm.h>
  56#include <net/dsfield.h>
  57#include <net/inet_ecn.h>
  58#include <net/net_namespace.h>
  59#include <net/netns/generic.h>
  60#include <net/dst_metadata.h>
  61
  62MODULE_AUTHOR("Ville Nuorvala");
  63MODULE_DESCRIPTION("IPv6 tunneling device");
  64MODULE_LICENSE("GPL");
  65MODULE_ALIAS_RTNL_LINK("ip6tnl");
  66MODULE_ALIAS_NETDEV("ip6tnl0");
  67
  68#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
  69#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
  70
  71static bool log_ecn_error = true;
  72module_param(log_ecn_error, bool, 0644);
  73MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  74
  75static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
  76{
  77        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
  78
  79        return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
  80}
  81
  82static int ip6_tnl_dev_init(struct net_device *dev);
  83static void ip6_tnl_dev_setup(struct net_device *dev);
  84static struct rtnl_link_ops ip6_link_ops __read_mostly;
  85
  86static unsigned int ip6_tnl_net_id __read_mostly;
  87struct ip6_tnl_net {
  88        /* the IPv6 tunnel fallback device */
  89        struct net_device *fb_tnl_dev;
  90        /* lists for storing tunnels in use */
  91        struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
  92        struct ip6_tnl __rcu *tnls_wc[1];
  93        struct ip6_tnl __rcu **tnls[2];
  94        struct ip6_tnl __rcu *collect_md_tun;
  95};
  96
  97static struct net_device_stats *ip6_get_stats(struct net_device *dev)
  98{
  99        struct pcpu_sw_netstats tmp, sum = { 0 };
 100        int i;
 101
 102        for_each_possible_cpu(i) {
 103                unsigned int start;
 104                const struct pcpu_sw_netstats *tstats =
 105                                                   per_cpu_ptr(dev->tstats, i);
 106
 107                do {
 108                        start = u64_stats_fetch_begin_irq(&tstats->syncp);
 109                        tmp.rx_packets = tstats->rx_packets;
 110                        tmp.rx_bytes = tstats->rx_bytes;
 111                        tmp.tx_packets = tstats->tx_packets;
 112                        tmp.tx_bytes =  tstats->tx_bytes;
 113                } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
 114
 115                sum.rx_packets += tmp.rx_packets;
 116                sum.rx_bytes   += tmp.rx_bytes;
 117                sum.tx_packets += tmp.tx_packets;
 118                sum.tx_bytes   += tmp.tx_bytes;
 119        }
 120        dev->stats.rx_packets = sum.rx_packets;
 121        dev->stats.rx_bytes   = sum.rx_bytes;
 122        dev->stats.tx_packets = sum.tx_packets;
 123        dev->stats.tx_bytes   = sum.tx_bytes;
 124        return &dev->stats;
 125}
 126
 127/**
 128 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 129 *   @remote: the address of the tunnel exit-point
 130 *   @local: the address of the tunnel entry-point
 131 *
 132 * Return:
 133 *   tunnel matching given end-points if found,
 134 *   else fallback tunnel if its device is up,
 135 *   else %NULL
 136 **/
 137
 138#define for_each_ip6_tunnel_rcu(start) \
 139        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 140
 141static struct ip6_tnl *
 142ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
 143{
 144        unsigned int hash = HASH(remote, local);
 145        struct ip6_tnl *t;
 146        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 147        struct in6_addr any;
 148
 149        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 150                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 151                    ipv6_addr_equal(remote, &t->parms.raddr) &&
 152                    (t->dev->flags & IFF_UP))
 153                        return t;
 154        }
 155
 156        memset(&any, 0, sizeof(any));
 157        hash = HASH(&any, local);
 158        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 159                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 160                    ipv6_addr_any(&t->parms.raddr) &&
 161                    (t->dev->flags & IFF_UP))
 162                        return t;
 163        }
 164
 165        hash = HASH(remote, &any);
 166        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 167                if (ipv6_addr_equal(remote, &t->parms.raddr) &&
 168                    ipv6_addr_any(&t->parms.laddr) &&
 169                    (t->dev->flags & IFF_UP))
 170                        return t;
 171        }
 172
 173        t = rcu_dereference(ip6n->collect_md_tun);
 174        if (t && t->dev->flags & IFF_UP)
 175                return t;
 176
 177        t = rcu_dereference(ip6n->tnls_wc[0]);
 178        if (t && (t->dev->flags & IFF_UP))
 179                return t;
 180
 181        return NULL;
 182}
 183
 184/**
 185 * ip6_tnl_bucket - get head of list matching given tunnel parameters
 186 *   @p: parameters containing tunnel end-points
 187 *
 188 * Description:
 189 *   ip6_tnl_bucket() returns the head of the list matching the
 190 *   &struct in6_addr entries laddr and raddr in @p.
 191 *
 192 * Return: head of IPv6 tunnel list
 193 **/
 194
 195static struct ip6_tnl __rcu **
 196ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
 197{
 198        const struct in6_addr *remote = &p->raddr;
 199        const struct in6_addr *local = &p->laddr;
 200        unsigned int h = 0;
 201        int prio = 0;
 202
 203        if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 204                prio = 1;
 205                h = HASH(remote, local);
 206        }
 207        return &ip6n->tnls[prio][h];
 208}
 209
 210/**
 211 * ip6_tnl_link - add tunnel to hash table
 212 *   @t: tunnel to be added
 213 **/
 214
 215static void
 216ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 217{
 218        struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 219
 220        if (t->parms.collect_md)
 221                rcu_assign_pointer(ip6n->collect_md_tun, t);
 222        rcu_assign_pointer(t->next , rtnl_dereference(*tp));
 223        rcu_assign_pointer(*tp, t);
 224}
 225
 226/**
 227 * ip6_tnl_unlink - remove tunnel from hash table
 228 *   @t: tunnel to be removed
 229 **/
 230
 231static void
 232ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 233{
 234        struct ip6_tnl __rcu **tp;
 235        struct ip6_tnl *iter;
 236
 237        if (t->parms.collect_md)
 238                rcu_assign_pointer(ip6n->collect_md_tun, NULL);
 239
 240        for (tp = ip6_tnl_bucket(ip6n, &t->parms);
 241             (iter = rtnl_dereference(*tp)) != NULL;
 242             tp = &iter->next) {
 243                if (t == iter) {
 244                        rcu_assign_pointer(*tp, t->next);
 245                        break;
 246                }
 247        }
 248}
 249
 250static void ip6_dev_free(struct net_device *dev)
 251{
 252        struct ip6_tnl *t = netdev_priv(dev);
 253
 254        gro_cells_destroy(&t->gro_cells);
 255        dst_cache_destroy(&t->dst_cache);
 256        free_percpu(dev->tstats);
 257}
 258
 259static int ip6_tnl_create2(struct net_device *dev)
 260{
 261        struct ip6_tnl *t = netdev_priv(dev);
 262        struct net *net = dev_net(dev);
 263        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 264        int err;
 265
 266        t = netdev_priv(dev);
 267
 268        dev->rtnl_link_ops = &ip6_link_ops;
 269        err = register_netdevice(dev);
 270        if (err < 0)
 271                goto out;
 272
 273        strcpy(t->parms.name, dev->name);
 274
 275        dev_hold(dev);
 276        ip6_tnl_link(ip6n, t);
 277        return 0;
 278
 279out:
 280        return err;
 281}
 282
 283/**
 284 * ip6_tnl_create - create a new tunnel
 285 *   @p: tunnel parameters
 286 *   @pt: pointer to new tunnel
 287 *
 288 * Description:
 289 *   Create tunnel matching given parameters.
 290 *
 291 * Return:
 292 *   created tunnel or error pointer
 293 **/
 294
 295static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 296{
 297        struct net_device *dev;
 298        struct ip6_tnl *t;
 299        char name[IFNAMSIZ];
 300        int err = -E2BIG;
 301
 302        if (p->name[0]) {
 303                if (!dev_valid_name(p->name))
 304                        goto failed;
 305                strlcpy(name, p->name, IFNAMSIZ);
 306        } else {
 307                sprintf(name, "ip6tnl%%d");
 308        }
 309        err = -ENOMEM;
 310        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 311                           ip6_tnl_dev_setup);
 312        if (!dev)
 313                goto failed;
 314
 315        dev_net_set(dev, net);
 316
 317        t = netdev_priv(dev);
 318        t->parms = *p;
 319        t->net = dev_net(dev);
 320        err = ip6_tnl_create2(dev);
 321        if (err < 0)
 322                goto failed_free;
 323
 324        return t;
 325
 326failed_free:
 327        free_netdev(dev);
 328failed:
 329        return ERR_PTR(err);
 330}
 331
 332/**
 333 * ip6_tnl_locate - find or create tunnel matching given parameters
 334 *   @p: tunnel parameters
 335 *   @create: != 0 if allowed to create new tunnel if no match found
 336 *
 337 * Description:
 338 *   ip6_tnl_locate() first tries to locate an existing tunnel
 339 *   based on @parms. If this is unsuccessful, but @create is set a new
 340 *   tunnel device is created and registered for use.
 341 *
 342 * Return:
 343 *   matching tunnel or error pointer
 344 **/
 345
 346static struct ip6_tnl *ip6_tnl_locate(struct net *net,
 347                struct __ip6_tnl_parm *p, int create)
 348{
 349        const struct in6_addr *remote = &p->raddr;
 350        const struct in6_addr *local = &p->laddr;
 351        struct ip6_tnl __rcu **tp;
 352        struct ip6_tnl *t;
 353        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 354
 355        for (tp = ip6_tnl_bucket(ip6n, p);
 356             (t = rtnl_dereference(*tp)) != NULL;
 357             tp = &t->next) {
 358                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 359                    ipv6_addr_equal(remote, &t->parms.raddr)) {
 360                        if (create)
 361                                return ERR_PTR(-EEXIST);
 362
 363                        return t;
 364                }
 365        }
 366        if (!create)
 367                return ERR_PTR(-ENODEV);
 368        return ip6_tnl_create(net, p);
 369}
 370
 371/**
 372 * ip6_tnl_dev_uninit - tunnel device uninitializer
 373 *   @dev: the device to be destroyed
 374 *
 375 * Description:
 376 *   ip6_tnl_dev_uninit() removes tunnel from its list
 377 **/
 378
 379static void
 380ip6_tnl_dev_uninit(struct net_device *dev)
 381{
 382        struct ip6_tnl *t = netdev_priv(dev);
 383        struct net *net = t->net;
 384        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 385
 386        if (dev == ip6n->fb_tnl_dev)
 387                RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
 388        else
 389                ip6_tnl_unlink(ip6n, t);
 390        dst_cache_reset(&t->dst_cache);
 391        dev_put(dev);
 392}
 393
 394/**
 395 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 396 *   @skb: received socket buffer
 397 *
 398 * Return:
 399 *   0 if none was found,
 400 *   else index to encapsulation limit
 401 **/
 402
 403__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 404{
 405        const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
 406        unsigned int nhoff = raw - skb->data;
 407        unsigned int off = nhoff + sizeof(*ipv6h);
 408        u8 next, nexthdr = ipv6h->nexthdr;
 409
 410        while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
 411                struct ipv6_opt_hdr *hdr;
 412                u16 optlen;
 413
 414                if (!pskb_may_pull(skb, off + sizeof(*hdr)))
 415                        break;
 416
 417                hdr = (struct ipv6_opt_hdr *)(skb->data + off);
 418                if (nexthdr == NEXTHDR_FRAGMENT) {
 419                        struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
 420                        if (frag_hdr->frag_off)
 421                                break;
 422                        optlen = 8;
 423                } else if (nexthdr == NEXTHDR_AUTH) {
 424                        optlen = (hdr->hdrlen + 2) << 2;
 425                } else {
 426                        optlen = ipv6_optlen(hdr);
 427                }
 428                /* cache hdr->nexthdr, since pskb_may_pull() might
 429                 * invalidate hdr
 430                 */
 431                next = hdr->nexthdr;
 432                if (nexthdr == NEXTHDR_DEST) {
 433                        u16 i = 2;
 434
 435                        /* Remember : hdr is no longer valid at this point. */
 436                        if (!pskb_may_pull(skb, off + optlen))
 437                                break;
 438
 439                        while (1) {
 440                                struct ipv6_tlv_tnl_enc_lim *tel;
 441
 442                                /* No more room for encapsulation limit */
 443                                if (i + sizeof(*tel) > optlen)
 444                                        break;
 445
 446                                tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
 447                                /* return index of option if found and valid */
 448                                if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 449                                    tel->length == 1)
 450                                        return i + off - nhoff;
 451                                /* else jump to next option */
 452                                if (tel->type)
 453                                        i += tel->length + 2;
 454                                else
 455                                        i++;
 456                        }
 457                }
 458                nexthdr = next;
 459                off += optlen;
 460        }
 461        return 0;
 462}
 463EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
 464
 465/**
 466 * ip6_tnl_err - tunnel error handler
 467 *
 468 * Description:
 469 *   ip6_tnl_err() should handle errors in the tunnel according
 470 *   to the specifications in RFC 2473.
 471 **/
 472
 473static int
 474ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 475            u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 476{
 477        const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
 478        struct net *net = dev_net(skb->dev);
 479        u8 rel_type = ICMPV6_DEST_UNREACH;
 480        u8 rel_code = ICMPV6_ADDR_UNREACH;
 481        __u32 rel_info = 0;
 482        struct ip6_tnl *t;
 483        int err = -ENOENT;
 484        int rel_msg = 0;
 485        u8 tproto;
 486        __u16 len;
 487
 488        /* If the packet doesn't contain the original IPv6 header we are
 489           in trouble since we might need the source address for further
 490           processing of the error. */
 491
 492        rcu_read_lock();
 493        t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr);
 494        if (!t)
 495                goto out;
 496
 497        tproto = READ_ONCE(t->parms.proto);
 498        if (tproto != ipproto && tproto != 0)
 499                goto out;
 500
 501        err = 0;
 502
 503        switch (*type) {
 504                struct ipv6_tlv_tnl_enc_lim *tel;
 505                __u32 mtu, teli;
 506        case ICMPV6_DEST_UNREACH:
 507                net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
 508                                    t->parms.name);
 509                rel_msg = 1;
 510                break;
 511        case ICMPV6_TIME_EXCEED:
 512                if ((*code) == ICMPV6_EXC_HOPLIMIT) {
 513                        net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
 514                                            t->parms.name);
 515                        rel_msg = 1;
 516                }
 517                break;
 518        case ICMPV6_PARAMPROB:
 519                teli = 0;
 520                if ((*code) == ICMPV6_HDR_FIELD)
 521                        teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 522
 523                if (teli && teli == *info - 2) {
 524                        tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 525                        if (tel->encap_limit == 0) {
 526                                net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
 527                                                    t->parms.name);
 528                                rel_msg = 1;
 529                        }
 530                } else {
 531                        net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
 532                                            t->parms.name);
 533                }
 534                break;
 535        case ICMPV6_PKT_TOOBIG:
 536                ip6_update_pmtu(skb, net, htonl(*info), 0, 0,
 537                                sock_net_uid(net, NULL));
 538                mtu = *info - offset;
 539                if (mtu < IPV6_MIN_MTU)
 540                        mtu = IPV6_MIN_MTU;
 541                len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len);
 542                if (len > mtu) {
 543                        rel_type = ICMPV6_PKT_TOOBIG;
 544                        rel_code = 0;
 545                        rel_info = mtu;
 546                        rel_msg = 1;
 547                }
 548                break;
 549        case NDISC_REDIRECT:
 550                ip6_redirect(skb, net, skb->dev->ifindex, 0,
 551                             sock_net_uid(net, NULL));
 552                break;
 553        }
 554
 555        *type = rel_type;
 556        *code = rel_code;
 557        *info = rel_info;
 558        *msg = rel_msg;
 559
 560out:
 561        rcu_read_unlock();
 562        return err;
 563}
 564
 565static int
 566ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 567           u8 type, u8 code, int offset, __be32 info)
 568{
 569        __u32 rel_info = ntohl(info);
 570        const struct iphdr *eiph;
 571        struct sk_buff *skb2;
 572        int err, rel_msg = 0;
 573        u8 rel_type = type;
 574        u8 rel_code = code;
 575        struct rtable *rt;
 576        struct flowi4 fl4;
 577
 578        err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
 579                          &rel_msg, &rel_info, offset);
 580        if (err < 0)
 581                return err;
 582
 583        if (rel_msg == 0)
 584                return 0;
 585
 586        switch (rel_type) {
 587        case ICMPV6_DEST_UNREACH:
 588                if (rel_code != ICMPV6_ADDR_UNREACH)
 589                        return 0;
 590                rel_type = ICMP_DEST_UNREACH;
 591                rel_code = ICMP_HOST_UNREACH;
 592                break;
 593        case ICMPV6_PKT_TOOBIG:
 594                if (rel_code != 0)
 595                        return 0;
 596                rel_type = ICMP_DEST_UNREACH;
 597                rel_code = ICMP_FRAG_NEEDED;
 598                break;
 599        default:
 600                return 0;
 601        }
 602
 603        if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
 604                return 0;
 605
 606        skb2 = skb_clone(skb, GFP_ATOMIC);
 607        if (!skb2)
 608                return 0;
 609
 610        skb_dst_drop(skb2);
 611
 612        skb_pull(skb2, offset);
 613        skb_reset_network_header(skb2);
 614        eiph = ip_hdr(skb2);
 615
 616        /* Try to guess incoming interface */
 617        rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
 618                                   0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 619        if (IS_ERR(rt))
 620                goto out;
 621
 622        skb2->dev = rt->dst.dev;
 623        ip_rt_put(rt);
 624
 625        /* route "incoming" packet */
 626        if (rt->rt_flags & RTCF_LOCAL) {
 627                rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
 628                                           eiph->daddr, eiph->saddr, 0, 0,
 629                                           IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
 630                if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) {
 631                        if (!IS_ERR(rt))
 632                                ip_rt_put(rt);
 633                        goto out;
 634                }
 635                skb_dst_set(skb2, &rt->dst);
 636        } else {
 637                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 638                                   skb2->dev) ||
 639                    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
 640                        goto out;
 641        }
 642
 643        /* change mtu on this route */
 644        if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
 645                if (rel_info > dst_mtu(skb_dst(skb2)))
 646                        goto out;
 647
 648                skb_dst_update_pmtu(skb2, rel_info);
 649        }
 650
 651        icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
 652
 653out:
 654        kfree_skb(skb2);
 655        return 0;
 656}
 657
 658static int
 659ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 660           u8 type, u8 code, int offset, __be32 info)
 661{
 662        __u32 rel_info = ntohl(info);
 663        int err, rel_msg = 0;
 664        u8 rel_type = type;
 665        u8 rel_code = code;
 666
 667        err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
 668                          &rel_msg, &rel_info, offset);
 669        if (err < 0)
 670                return err;
 671
 672        if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 673                struct rt6_info *rt;
 674                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 675
 676                if (!skb2)
 677                        return 0;
 678
 679                skb_dst_drop(skb2);
 680                skb_pull(skb2, offset);
 681                skb_reset_network_header(skb2);
 682
 683                /* Try to guess incoming interface */
 684                rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
 685                                NULL, 0, skb2, 0);
 686
 687                if (rt && rt->dst.dev)
 688                        skb2->dev = rt->dst.dev;
 689
 690                icmpv6_send(skb2, rel_type, rel_code, rel_info);
 691
 692                ip6_rt_put(rt);
 693
 694                kfree_skb(skb2);
 695        }
 696
 697        return 0;
 698}
 699
 700static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 701                                       const struct ipv6hdr *ipv6h,
 702                                       struct sk_buff *skb)
 703{
 704        __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
 705
 706        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 707                ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
 708
 709        return IP6_ECN_decapsulate(ipv6h, skb);
 710}
 711
 712static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 713                                       const struct ipv6hdr *ipv6h,
 714                                       struct sk_buff *skb)
 715{
 716        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 717                ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
 718
 719        return IP6_ECN_decapsulate(ipv6h, skb);
 720}
 721
 722__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
 723                             const struct in6_addr *laddr,
 724                             const struct in6_addr *raddr)
 725{
 726        struct __ip6_tnl_parm *p = &t->parms;
 727        int ltype = ipv6_addr_type(laddr);
 728        int rtype = ipv6_addr_type(raddr);
 729        __u32 flags = 0;
 730
 731        if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
 732                flags = IP6_TNL_F_CAP_PER_PACKET;
 733        } else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
 734                   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
 735                   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
 736                   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
 737                if (ltype&IPV6_ADDR_UNICAST)
 738                        flags |= IP6_TNL_F_CAP_XMIT;
 739                if (rtype&IPV6_ADDR_UNICAST)
 740                        flags |= IP6_TNL_F_CAP_RCV;
 741        }
 742        return flags;
 743}
 744EXPORT_SYMBOL(ip6_tnl_get_cap);
 745
 746/* called with rcu_read_lock() */
 747int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 748                                  const struct in6_addr *laddr,
 749                                  const struct in6_addr *raddr)
 750{
 751        struct __ip6_tnl_parm *p = &t->parms;
 752        int ret = 0;
 753        struct net *net = t->net;
 754
 755        if ((p->flags & IP6_TNL_F_CAP_RCV) ||
 756            ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 757             (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
 758                struct net_device *ldev = NULL;
 759
 760                if (p->link)
 761                        ldev = dev_get_by_index_rcu(net, p->link);
 762
 763                if ((ipv6_addr_is_multicast(laddr) ||
 764                     likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
 765                                                    0, IFA_F_TENTATIVE))) &&
 766                    ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
 767                     likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
 768                                                     0, IFA_F_TENTATIVE))))
 769                        ret = 1;
 770        }
 771        return ret;
 772}
 773EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 774
 775static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 776                         const struct tnl_ptk_info *tpi,
 777                         struct metadata_dst *tun_dst,
 778                         int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
 779                                                const struct ipv6hdr *ipv6h,
 780                                                struct sk_buff *skb),
 781                         bool log_ecn_err)
 782{
 783        struct pcpu_sw_netstats *tstats;
 784        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 785        int err;
 786
 787        if ((!(tpi->flags & TUNNEL_CSUM) &&
 788             (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
 789            ((tpi->flags & TUNNEL_CSUM) &&
 790             !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
 791                tunnel->dev->stats.rx_crc_errors++;
 792                tunnel->dev->stats.rx_errors++;
 793                goto drop;
 794        }
 795
 796        if (tunnel->parms.i_flags & TUNNEL_SEQ) {
 797                if (!(tpi->flags & TUNNEL_SEQ) ||
 798                    (tunnel->i_seqno &&
 799                     (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
 800                        tunnel->dev->stats.rx_fifo_errors++;
 801                        tunnel->dev->stats.rx_errors++;
 802                        goto drop;
 803                }
 804                tunnel->i_seqno = ntohl(tpi->seq) + 1;
 805        }
 806
 807        skb->protocol = tpi->proto;
 808
 809        /* Warning: All skb pointers will be invalidated! */
 810        if (tunnel->dev->type == ARPHRD_ETHER) {
 811                if (!pskb_may_pull(skb, ETH_HLEN)) {
 812                        tunnel->dev->stats.rx_length_errors++;
 813                        tunnel->dev->stats.rx_errors++;
 814                        goto drop;
 815                }
 816
 817                ipv6h = ipv6_hdr(skb);
 818                skb->protocol = eth_type_trans(skb, tunnel->dev);
 819                skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 820        } else {
 821                skb->dev = tunnel->dev;
 822        }
 823
 824        skb_reset_network_header(skb);
 825        memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 826
 827        __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
 828
 829        err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
 830        if (unlikely(err)) {
 831                if (log_ecn_err)
 832                        net_info_ratelimited("non-ECT from %pI6 with DS=%#x\n",
 833                                             &ipv6h->saddr,
 834                                             ipv6_get_dsfield(ipv6h));
 835                if (err > 1) {
 836                        ++tunnel->dev->stats.rx_frame_errors;
 837                        ++tunnel->dev->stats.rx_errors;
 838                        goto drop;
 839                }
 840        }
 841
 842        tstats = this_cpu_ptr(tunnel->dev->tstats);
 843        u64_stats_update_begin(&tstats->syncp);
 844        tstats->rx_packets++;
 845        tstats->rx_bytes += skb->len;
 846        u64_stats_update_end(&tstats->syncp);
 847
 848        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 849
 850        if (tun_dst)
 851                skb_dst_set(skb, (struct dst_entry *)tun_dst);
 852
 853        gro_cells_receive(&tunnel->gro_cells, skb);
 854        return 0;
 855
 856drop:
 857        if (tun_dst)
 858                dst_release((struct dst_entry *)tun_dst);
 859        kfree_skb(skb);
 860        return 0;
 861}
 862
 863int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
 864                const struct tnl_ptk_info *tpi,
 865                struct metadata_dst *tun_dst,
 866                bool log_ecn_err)
 867{
 868        return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
 869                             log_ecn_err);
 870}
 871EXPORT_SYMBOL(ip6_tnl_rcv);
 872
 873static const struct tnl_ptk_info tpi_v6 = {
 874        /* no tunnel info required for ipxip6. */
 875        .proto = htons(ETH_P_IPV6),
 876};
 877
 878static const struct tnl_ptk_info tpi_v4 = {
 879        /* no tunnel info required for ipxip6. */
 880        .proto = htons(ETH_P_IP),
 881};
 882
 883static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 884                      const struct tnl_ptk_info *tpi,
 885                      int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
 886                                                  const struct ipv6hdr *ipv6h,
 887                                                  struct sk_buff *skb))
 888{
 889        struct ip6_tnl *t;
 890        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 891        struct metadata_dst *tun_dst = NULL;
 892        int ret = -1;
 893
 894        rcu_read_lock();
 895        t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
 896
 897        if (t) {
 898                u8 tproto = READ_ONCE(t->parms.proto);
 899
 900                if (tproto != ipproto && tproto != 0)
 901                        goto drop;
 902                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 903                        goto drop;
 904                if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr))
 905                        goto drop;
 906                if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 907                        goto drop;
 908                if (t->parms.collect_md) {
 909                        tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
 910                        if (!tun_dst)
 911                                goto drop;
 912                }
 913                ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
 914                                    log_ecn_error);
 915        }
 916
 917        rcu_read_unlock();
 918
 919        return ret;
 920
 921drop:
 922        rcu_read_unlock();
 923        kfree_skb(skb);
 924        return 0;
 925}
 926
 927static int ip4ip6_rcv(struct sk_buff *skb)
 928{
 929        return ipxip6_rcv(skb, IPPROTO_IPIP, &tpi_v4,
 930                          ip4ip6_dscp_ecn_decapsulate);
 931}
 932
 933static int ip6ip6_rcv(struct sk_buff *skb)
 934{
 935        return ipxip6_rcv(skb, IPPROTO_IPV6, &tpi_v6,
 936                          ip6ip6_dscp_ecn_decapsulate);
 937}
 938
 939struct ipv6_tel_txoption {
 940        struct ipv6_txoptions ops;
 941        __u8 dst_opt[8];
 942};
 943
 944static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
 945{
 946        memset(opt, 0, sizeof(struct ipv6_tel_txoption));
 947
 948        opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
 949        opt->dst_opt[3] = 1;
 950        opt->dst_opt[4] = encap_limit;
 951        opt->dst_opt[5] = IPV6_TLV_PADN;
 952        opt->dst_opt[6] = 1;
 953
 954        opt->ops.dst1opt = (struct ipv6_opt_hdr *) opt->dst_opt;
 955        opt->ops.opt_nflen = 8;
 956}
 957
 958/**
 959 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
 960 *   @t: the outgoing tunnel device
 961 *   @hdr: IPv6 header from the incoming packet
 962 *
 963 * Description:
 964 *   Avoid trivial tunneling loop by checking that tunnel exit-point
 965 *   doesn't match source of incoming packet.
 966 *
 967 * Return:
 968 *   1 if conflict,
 969 *   0 else
 970 **/
 971
 972static inline bool
 973ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
 974{
 975        return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 976}
 977
 978int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 979                     const struct in6_addr *laddr,
 980                     const struct in6_addr *raddr)
 981{
 982        struct __ip6_tnl_parm *p = &t->parms;
 983        int ret = 0;
 984        struct net *net = t->net;
 985
 986        if (t->parms.collect_md)
 987                return 1;
 988
 989        if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
 990            ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
 991             (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
 992                struct net_device *ldev = NULL;
 993
 994                rcu_read_lock();
 995                if (p->link)
 996                        ldev = dev_get_by_index_rcu(net, p->link);
 997
 998                if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
 999                                                      0, IFA_F_TENTATIVE)))
1000                        pr_warn("%s xmit: Local address not yet configured!\n",
1001                                p->name);
1002                else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
1003                         !ipv6_addr_is_multicast(raddr) &&
1004                         unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
1005                                                          true, 0, IFA_F_TENTATIVE)))
1006                        pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
1007                                p->name);
1008                else
1009                        ret = 1;
1010                rcu_read_unlock();
1011        }
1012        return ret;
1013}
1014EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
1015
1016/**
1017 * ip6_tnl_xmit - encapsulate packet and send
1018 *   @skb: the outgoing socket buffer
1019 *   @dev: the outgoing tunnel device
1020 *   @dsfield: dscp code for outer header
1021 *   @fl6: flow of tunneled packet
1022 *   @encap_limit: encapsulation limit
1023 *   @pmtu: Path MTU is stored if packet is too big
1024 *   @proto: next header value
1025 *
1026 * Description:
1027 *   Build new header and do some sanity checks on the packet before sending
1028 *   it.
1029 *
1030 * Return:
1031 *   0 on success
1032 *   -1 fail
1033 *   %-EMSGSIZE message too big. return mtu in this case.
1034 **/
1035
1036int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
1037                 struct flowi6 *fl6, int encap_limit, __u32 *pmtu,
1038                 __u8 proto)
1039{
1040        struct ip6_tnl *t = netdev_priv(dev);
1041        struct net *net = t->net;
1042        struct net_device_stats *stats = &t->dev->stats;
1043        struct ipv6hdr *ipv6h;
1044        struct ipv6_tel_txoption opt;
1045        struct dst_entry *dst = NULL, *ndst = NULL;
1046        struct net_device *tdev;
1047        int mtu;
1048        unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
1049        unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
1050        unsigned int max_headroom = psh_hlen;
1051        bool use_cache = false;
1052        u8 hop_limit;
1053        int err = -1;
1054
1055        if (t->parms.collect_md) {
1056                hop_limit = skb_tunnel_info(skb)->key.ttl;
1057                goto route_lookup;
1058        } else {
1059                hop_limit = t->parms.hop_limit;
1060        }
1061
1062        /* NBMA tunnel */
1063        if (ipv6_addr_any(&t->parms.raddr)) {
1064                if (skb->protocol == htons(ETH_P_IPV6)) {
1065                        struct in6_addr *addr6;
1066                        struct neighbour *neigh;
1067                        int addr_type;
1068
1069                        if (!skb_dst(skb))
1070                                goto tx_err_link_failure;
1071
1072                        neigh = dst_neigh_lookup(skb_dst(skb),
1073                                                 &ipv6_hdr(skb)->daddr);
1074                        if (!neigh)
1075                                goto tx_err_link_failure;
1076
1077                        addr6 = (struct in6_addr *)&neigh->primary_key;
1078                        addr_type = ipv6_addr_type(addr6);
1079
1080                        if (addr_type == IPV6_ADDR_ANY)
1081                                addr6 = &ipv6_hdr(skb)->daddr;
1082
1083                        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
1084                        neigh_release(neigh);
1085                }
1086        } else if (t->parms.proto != 0 && !(t->parms.flags &
1087                                            (IP6_TNL_F_USE_ORIG_TCLASS |
1088                                             IP6_TNL_F_USE_ORIG_FWMARK))) {
1089                /* enable the cache only if neither the outer protocol nor the
1090                 * routing decision depends on the current inner header value
1091                 */
1092                use_cache = true;
1093        }
1094
1095        if (use_cache)
1096                dst = dst_cache_get(&t->dst_cache);
1097
1098        if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
1099                goto tx_err_link_failure;
1100
1101        if (!dst) {
1102route_lookup:
1103                /* add dsfield to flowlabel for route lookup */
1104                fl6->flowlabel = ip6_make_flowinfo(dsfield, fl6->flowlabel);
1105
1106                dst = ip6_route_output(net, NULL, fl6);
1107
1108                if (dst->error)
1109                        goto tx_err_link_failure;
1110                dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0);
1111                if (IS_ERR(dst)) {
1112                        err = PTR_ERR(dst);
1113                        dst = NULL;
1114                        goto tx_err_link_failure;
1115                }
1116                if (t->parms.collect_md &&
1117                    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
1118                                       &fl6->daddr, 0, &fl6->saddr))
1119                        goto tx_err_link_failure;
1120                ndst = dst;
1121        }
1122
1123        tdev = dst->dev;
1124
1125        if (tdev == dev) {
1126                stats->collisions++;
1127                net_warn_ratelimited("%s: Local routing loop detected!\n",
1128                                     t->parms.name);
1129                goto tx_err_dst_release;
1130        }
1131        mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
1132        if (encap_limit >= 0) {
1133                max_headroom += 8;
1134                mtu -= 8;
1135        }
1136        if (skb->protocol == htons(ETH_P_IPV6)) {
1137                if (mtu < IPV6_MIN_MTU)
1138                        mtu = IPV6_MIN_MTU;
1139        } else if (mtu < 576) {
1140                mtu = 576;
1141        }
1142
1143        skb_dst_update_pmtu(skb, mtu);
1144        if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
1145                *pmtu = mtu;
1146                err = -EMSGSIZE;
1147                goto tx_err_dst_release;
1148        }
1149
1150        if (t->err_count > 0) {
1151                if (time_before(jiffies,
1152                                t->err_time + IP6TUNNEL_ERR_TIMEO)) {
1153                        t->err_count--;
1154
1155                        dst_link_failure(skb);
1156                } else {
1157                        t->err_count = 0;
1158                }
1159        }
1160
1161        skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
1162
1163        /*
1164         * Okay, now see if we can stuff it in the buffer as-is.
1165         */
1166        max_headroom += LL_RESERVED_SPACE(tdev);
1167
1168        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1169            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1170                struct sk_buff *new_skb;
1171
1172                new_skb = skb_realloc_headroom(skb, max_headroom);
1173                if (!new_skb)
1174                        goto tx_err_dst_release;
1175
1176                if (skb->sk)
1177                        skb_set_owner_w(new_skb, skb->sk);
1178                consume_skb(skb);
1179                skb = new_skb;
1180        }
1181
1182        if (t->parms.collect_md) {
1183                if (t->encap.type != TUNNEL_ENCAP_NONE)
1184                        goto tx_err_dst_release;
1185        } else {
1186                if (use_cache && ndst)
1187                        dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
1188        }
1189        skb_dst_set(skb, dst);
1190
1191        if (encap_limit >= 0) {
1192                init_tel_txopt(&opt, encap_limit);
1193                ipv6_push_frag_opts(skb, &opt.ops, &proto);
1194        }
1195        hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
1196
1197        /* Calculate max headroom for all the headers and adjust
1198         * needed_headroom if necessary.
1199         */
1200        max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
1201                        + dst->header_len + t->hlen;
1202        if (max_headroom > dev->needed_headroom)
1203                dev->needed_headroom = max_headroom;
1204
1205        err = ip6_tnl_encap(skb, t, &proto, fl6);
1206        if (err)
1207                return err;
1208
1209        skb_push(skb, sizeof(struct ipv6hdr));
1210        skb_reset_network_header(skb);
1211        ipv6h = ipv6_hdr(skb);
1212        ip6_flow_hdr(ipv6h, dsfield,
1213                     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
1214        ipv6h->hop_limit = hop_limit;
1215        ipv6h->nexthdr = proto;
1216        ipv6h->saddr = fl6->saddr;
1217        ipv6h->daddr = fl6->daddr;
1218        ip6tunnel_xmit(NULL, skb, dev);
1219        return 0;
1220tx_err_link_failure:
1221        stats->tx_carrier_errors++;
1222        dst_link_failure(skb);
1223tx_err_dst_release:
1224        dst_release(dst);
1225        return err;
1226}
1227EXPORT_SYMBOL(ip6_tnl_xmit);
1228
1229static inline int
1230ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1231{
1232        struct ip6_tnl *t = netdev_priv(dev);
1233        const struct iphdr  *iph = ip_hdr(skb);
1234        int encap_limit = -1;
1235        struct flowi6 fl6;
1236        __u8 dsfield;
1237        __u32 mtu;
1238        u8 tproto;
1239        int err;
1240
1241        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1242
1243        tproto = READ_ONCE(t->parms.proto);
1244        if (tproto != IPPROTO_IPIP && tproto != 0)
1245                return -1;
1246
1247        if (t->parms.collect_md) {
1248                struct ip_tunnel_info *tun_info;
1249                const struct ip_tunnel_key *key;
1250
1251                tun_info = skb_tunnel_info(skb);
1252                if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
1253                             ip_tunnel_info_af(tun_info) != AF_INET6))
1254                        return -1;
1255                key = &tun_info->key;
1256                memset(&fl6, 0, sizeof(fl6));
1257                fl6.flowi6_proto = IPPROTO_IPIP;
1258                fl6.daddr = key->u.ipv6.dst;
1259                fl6.flowlabel = key->label;
1260                dsfield =  key->tos;
1261        } else {
1262                if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1263                        encap_limit = t->parms.encap_limit;
1264
1265                memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1266                fl6.flowi6_proto = IPPROTO_IPIP;
1267
1268                if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1269                        dsfield = ipv4_get_dsfield(iph);
1270                else
1271                        dsfield = ip6_tclass(t->parms.flowinfo);
1272                if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1273                        fl6.flowi6_mark = skb->mark;
1274                else
1275                        fl6.flowi6_mark = t->parms.fwmark;
1276        }
1277
1278        fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
1279
1280        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
1281                return -1;
1282
1283        dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
1284
1285        skb_set_inner_ipproto(skb, IPPROTO_IPIP);
1286
1287        err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1288                           IPPROTO_IPIP);
1289        if (err != 0) {
1290                /* XXX: send ICMP error even if DF is not set. */
1291                if (err == -EMSGSIZE)
1292                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
1293                                  htonl(mtu));
1294                return -1;
1295        }
1296
1297        return 0;
1298}
1299
1300static inline int
1301ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1302{
1303        struct ip6_tnl *t = netdev_priv(dev);
1304        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1305        int encap_limit = -1;
1306        __u16 offset;
1307        struct flowi6 fl6;
1308        __u8 dsfield;
1309        __u32 mtu;
1310        u8 tproto;
1311        int err;
1312
1313        tproto = READ_ONCE(t->parms.proto);
1314        if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
1315            ip6_tnl_addr_conflict(t, ipv6h))
1316                return -1;
1317
1318        if (t->parms.collect_md) {
1319                struct ip_tunnel_info *tun_info;
1320                const struct ip_tunnel_key *key;
1321
1322                tun_info = skb_tunnel_info(skb);
1323                if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
1324                             ip_tunnel_info_af(tun_info) != AF_INET6))
1325                        return -1;
1326                key = &tun_info->key;
1327                memset(&fl6, 0, sizeof(fl6));
1328                fl6.flowi6_proto = IPPROTO_IPV6;
1329                fl6.daddr = key->u.ipv6.dst;
1330                fl6.flowlabel = key->label;
1331                dsfield = key->tos;
1332        } else {
1333                offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
1334                /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
1335                ipv6h = ipv6_hdr(skb);
1336                if (offset > 0) {
1337                        struct ipv6_tlv_tnl_enc_lim *tel;
1338
1339                        tel = (void *)&skb_network_header(skb)[offset];
1340                        if (tel->encap_limit == 0) {
1341                                icmpv6_send(skb, ICMPV6_PARAMPROB,
1342                                            ICMPV6_HDR_FIELD, offset + 2);
1343                                return -1;
1344                        }
1345                        encap_limit = tel->encap_limit - 1;
1346                } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
1347                        encap_limit = t->parms.encap_limit;
1348                }
1349
1350                memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
1351                fl6.flowi6_proto = IPPROTO_IPV6;
1352
1353                if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
1354                        dsfield = ipv6_get_dsfield(ipv6h);
1355                else
1356                        dsfield = ip6_tclass(t->parms.flowinfo);
1357                if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
1358                        fl6.flowlabel |= ip6_flowlabel(ipv6h);
1359                if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
1360                        fl6.flowi6_mark = skb->mark;
1361                else
1362                        fl6.flowi6_mark = t->parms.fwmark;
1363        }
1364
1365        fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
1366
1367        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
1368                return -1;
1369
1370        dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
1371
1372        skb_set_inner_ipproto(skb, IPPROTO_IPV6);
1373
1374        err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
1375                           IPPROTO_IPV6);
1376        if (err != 0) {
1377                if (err == -EMSGSIZE)
1378                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1379                return -1;
1380        }
1381
1382        return 0;
1383}
1384
1385static netdev_tx_t
1386ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
1387{
1388        struct ip6_tnl *t = netdev_priv(dev);
1389        struct net_device_stats *stats = &t->dev->stats;
1390        int ret;
1391
1392        switch (skb->protocol) {
1393        case htons(ETH_P_IP):
1394                ret = ip4ip6_tnl_xmit(skb, dev);
1395                break;
1396        case htons(ETH_P_IPV6):
1397                ret = ip6ip6_tnl_xmit(skb, dev);
1398                break;
1399        default:
1400                goto tx_err;
1401        }
1402
1403        if (ret < 0)
1404                goto tx_err;
1405
1406        return NETDEV_TX_OK;
1407
1408tx_err:
1409        stats->tx_errors++;
1410        stats->tx_dropped++;
1411        kfree_skb(skb);
1412        return NETDEV_TX_OK;
1413}
1414
1415static void ip6_tnl_link_config(struct ip6_tnl *t)
1416{
1417        struct net_device *dev = t->dev;
1418        struct __ip6_tnl_parm *p = &t->parms;
1419        struct flowi6 *fl6 = &t->fl.u.ip6;
1420        int t_hlen;
1421
1422        memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1423        memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1424
1425        /* Set up flowi template */
1426        fl6->saddr = p->laddr;
1427        fl6->daddr = p->raddr;
1428        fl6->flowi6_oif = p->link;
1429        fl6->flowlabel = 0;
1430
1431        if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1432                fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1433        if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1434                fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1435
1436        p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
1437        p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
1438
1439        if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1440                dev->flags |= IFF_POINTOPOINT;
1441        else
1442                dev->flags &= ~IFF_POINTOPOINT;
1443
1444        t->tun_hlen = 0;
1445        t->hlen = t->encap_hlen + t->tun_hlen;
1446        t_hlen = t->hlen + sizeof(struct ipv6hdr);
1447
1448        if (p->flags & IP6_TNL_F_CAP_XMIT) {
1449                int strict = (ipv6_addr_type(&p->raddr) &
1450                              (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1451
1452                struct rt6_info *rt = rt6_lookup(t->net,
1453                                                 &p->raddr, &p->laddr,
1454                                                 p->link, NULL, strict);
1455
1456                if (!rt)
1457                        return;
1458
1459                if (rt->dst.dev) {
1460                        dev->hard_header_len = rt->dst.dev->hard_header_len +
1461                                t_hlen;
1462
1463                        dev->mtu = rt->dst.dev->mtu - t_hlen;
1464                        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1465                                dev->mtu -= 8;
1466
1467                        if (dev->mtu < IPV6_MIN_MTU)
1468                                dev->mtu = IPV6_MIN_MTU;
1469                }
1470                ip6_rt_put(rt);
1471        }
1472}
1473
1474/**
1475 * ip6_tnl_change - update the tunnel parameters
1476 *   @t: tunnel to be changed
1477 *   @p: tunnel configuration parameters
1478 *
1479 * Description:
1480 *   ip6_tnl_change() updates the tunnel parameters
1481 **/
1482
1483static int
1484ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
1485{
1486        t->parms.laddr = p->laddr;
1487        t->parms.raddr = p->raddr;
1488        t->parms.flags = p->flags;
1489        t->parms.hop_limit = p->hop_limit;
1490        t->parms.encap_limit = p->encap_limit;
1491        t->parms.flowinfo = p->flowinfo;
1492        t->parms.link = p->link;
1493        t->parms.proto = p->proto;
1494        t->parms.fwmark = p->fwmark;
1495        dst_cache_reset(&t->dst_cache);
1496        ip6_tnl_link_config(t);
1497        return 0;
1498}
1499
1500static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1501{
1502        struct net *net = t->net;
1503        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1504        int err;
1505
1506        ip6_tnl_unlink(ip6n, t);
1507        synchronize_net();
1508        err = ip6_tnl_change(t, p);
1509        ip6_tnl_link(ip6n, t);
1510        netdev_state_change(t->dev);
1511        return err;
1512}
1513
1514static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
1515{
1516        /* for default tnl0 device allow to change only the proto */
1517        t->parms.proto = p->proto;
1518        netdev_state_change(t->dev);
1519        return 0;
1520}
1521
1522static void
1523ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
1524{
1525        p->laddr = u->laddr;
1526        p->raddr = u->raddr;
1527        p->flags = u->flags;
1528        p->hop_limit = u->hop_limit;
1529        p->encap_limit = u->encap_limit;
1530        p->flowinfo = u->flowinfo;
1531        p->link = u->link;
1532        p->proto = u->proto;
1533        memcpy(p->name, u->name, sizeof(u->name));
1534}
1535
1536static void
1537ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
1538{
1539        u->laddr = p->laddr;
1540        u->raddr = p->raddr;
1541        u->flags = p->flags;
1542        u->hop_limit = p->hop_limit;
1543        u->encap_limit = p->encap_limit;
1544        u->flowinfo = p->flowinfo;
1545        u->link = p->link;
1546        u->proto = p->proto;
1547        memcpy(u->name, p->name, sizeof(u->name));
1548}
1549
1550/**
1551 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1552 *   @dev: virtual device associated with tunnel
1553 *   @ifr: parameters passed from userspace
1554 *   @cmd: command to be performed
1555 *
1556 * Description:
1557 *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
1558 *   from userspace.
1559 *
1560 *   The possible commands are the following:
1561 *     %SIOCGETTUNNEL: get tunnel parameters for device
1562 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
1563 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
1564 *     %SIOCDELTUNNEL: delete tunnel
1565 *
1566 *   The fallback device "ip6tnl0", created during module
1567 *   initialization, can be used for creating other tunnel devices.
1568 *
1569 * Return:
1570 *   0 on success,
1571 *   %-EFAULT if unable to copy data to or from userspace,
1572 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
1573 *   %-EINVAL if passed tunnel parameters are invalid,
1574 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
1575 *   %-ENODEV if attempting to change or delete a nonexisting device
1576 **/
1577
1578static int
1579ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1580{
1581        int err = 0;
1582        struct ip6_tnl_parm p;
1583        struct __ip6_tnl_parm p1;
1584        struct ip6_tnl *t = netdev_priv(dev);
1585        struct net *net = t->net;
1586        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1587
1588        memset(&p1, 0, sizeof(p1));
1589
1590        switch (cmd) {
1591        case SIOCGETTUNNEL:
1592                if (dev == ip6n->fb_tnl_dev) {
1593                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1594                                err = -EFAULT;
1595                                break;
1596                        }
1597                        ip6_tnl_parm_from_user(&p1, &p);
1598                        t = ip6_tnl_locate(net, &p1, 0);
1599                        if (IS_ERR(t))
1600                                t = netdev_priv(dev);
1601                } else {
1602                        memset(&p, 0, sizeof(p));
1603                }
1604                ip6_tnl_parm_to_user(&p, &t->parms);
1605                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
1606                        err = -EFAULT;
1607                }
1608                break;
1609        case SIOCADDTUNNEL:
1610        case SIOCCHGTUNNEL:
1611                err = -EPERM;
1612                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1613                        break;
1614                err = -EFAULT;
1615                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1616                        break;
1617                err = -EINVAL;
1618                if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1619                    p.proto != 0)
1620                        break;
1621                ip6_tnl_parm_from_user(&p1, &p);
1622                t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
1623                if (cmd == SIOCCHGTUNNEL) {
1624                        if (!IS_ERR(t)) {
1625                                if (t->dev != dev) {
1626                                        err = -EEXIST;
1627                                        break;
1628                                }
1629                        } else
1630                                t = netdev_priv(dev);
1631                        if (dev == ip6n->fb_tnl_dev)
1632                                err = ip6_tnl0_update(t, &p1);
1633                        else
1634                                err = ip6_tnl_update(t, &p1);
1635                }
1636                if (!IS_ERR(t)) {
1637                        err = 0;
1638                        ip6_tnl_parm_to_user(&p, &t->parms);
1639                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1640                                err = -EFAULT;
1641
1642                } else {
1643                        err = PTR_ERR(t);
1644                }
1645                break;
1646        case SIOCDELTUNNEL:
1647                err = -EPERM;
1648                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1649                        break;
1650
1651                if (dev == ip6n->fb_tnl_dev) {
1652                        err = -EFAULT;
1653                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1654                                break;
1655                        err = -ENOENT;
1656                        ip6_tnl_parm_from_user(&p1, &p);
1657                        t = ip6_tnl_locate(net, &p1, 0);
1658                        if (IS_ERR(t))
1659                                break;
1660                        err = -EPERM;
1661                        if (t->dev == ip6n->fb_tnl_dev)
1662                                break;
1663                        dev = t->dev;
1664                }
1665                err = 0;
1666                unregister_netdevice(dev);
1667                break;
1668        default:
1669                err = -EINVAL;
1670        }
1671        return err;
1672}
1673
1674/**
1675 * ip6_tnl_change_mtu - change mtu manually for tunnel device
1676 *   @dev: virtual device associated with tunnel
1677 *   @new_mtu: the new mtu
1678 *
1679 * Return:
1680 *   0 on success,
1681 *   %-EINVAL if mtu too small
1682 **/
1683
1684int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1685{
1686        struct ip6_tnl *tnl = netdev_priv(dev);
1687
1688        if (tnl->parms.proto == IPPROTO_IPV6) {
1689                if (new_mtu < IPV6_MIN_MTU)
1690                        return -EINVAL;
1691        } else {
1692                if (new_mtu < ETH_MIN_MTU)
1693                        return -EINVAL;
1694        }
1695        if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) {
1696                if (new_mtu > IP6_MAX_MTU - dev->hard_header_len)
1697                        return -EINVAL;
1698        } else {
1699                if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
1700                        return -EINVAL;
1701        }
1702        dev->mtu = new_mtu;
1703        return 0;
1704}
1705EXPORT_SYMBOL(ip6_tnl_change_mtu);
1706
1707int ip6_tnl_get_iflink(const struct net_device *dev)
1708{
1709        struct ip6_tnl *t = netdev_priv(dev);
1710
1711        return t->parms.link;
1712}
1713EXPORT_SYMBOL(ip6_tnl_get_iflink);
1714
1715int ip6_tnl_encap_add_ops(const struct ip6_tnl_encap_ops *ops,
1716                          unsigned int num)
1717{
1718        if (num >= MAX_IPTUN_ENCAP_OPS)
1719                return -ERANGE;
1720
1721        return !cmpxchg((const struct ip6_tnl_encap_ops **)
1722                        &ip6tun_encaps[num],
1723                        NULL, ops) ? 0 : -1;
1724}
1725EXPORT_SYMBOL(ip6_tnl_encap_add_ops);
1726
1727int ip6_tnl_encap_del_ops(const struct ip6_tnl_encap_ops *ops,
1728                          unsigned int num)
1729{
1730        int ret;
1731
1732        if (num >= MAX_IPTUN_ENCAP_OPS)
1733                return -ERANGE;
1734
1735        ret = (cmpxchg((const struct ip6_tnl_encap_ops **)
1736                       &ip6tun_encaps[num],
1737                       ops, NULL) == ops) ? 0 : -1;
1738
1739        synchronize_net();
1740
1741        return ret;
1742}
1743EXPORT_SYMBOL(ip6_tnl_encap_del_ops);
1744
1745int ip6_tnl_encap_setup(struct ip6_tnl *t,
1746                        struct ip_tunnel_encap *ipencap)
1747{
1748        int hlen;
1749
1750        memset(&t->encap, 0, sizeof(t->encap));
1751
1752        hlen = ip6_encap_hlen(ipencap);
1753        if (hlen < 0)
1754                return hlen;
1755
1756        t->encap.type = ipencap->type;
1757        t->encap.sport = ipencap->sport;
1758        t->encap.dport = ipencap->dport;
1759        t->encap.flags = ipencap->flags;
1760
1761        t->encap_hlen = hlen;
1762        t->hlen = t->encap_hlen + t->tun_hlen;
1763
1764        return 0;
1765}
1766EXPORT_SYMBOL_GPL(ip6_tnl_encap_setup);
1767
1768static const struct net_device_ops ip6_tnl_netdev_ops = {
1769        .ndo_init       = ip6_tnl_dev_init,
1770        .ndo_uninit     = ip6_tnl_dev_uninit,
1771        .ndo_start_xmit = ip6_tnl_start_xmit,
1772        .ndo_do_ioctl   = ip6_tnl_ioctl,
1773        .ndo_change_mtu = ip6_tnl_change_mtu,
1774        .ndo_get_stats  = ip6_get_stats,
1775        .ndo_get_iflink = ip6_tnl_get_iflink,
1776};
1777
1778#define IPXIPX_FEATURES (NETIF_F_SG |           \
1779                         NETIF_F_FRAGLIST |     \
1780                         NETIF_F_HIGHDMA |      \
1781                         NETIF_F_GSO_SOFTWARE | \
1782                         NETIF_F_HW_CSUM)
1783
1784/**
1785 * ip6_tnl_dev_setup - setup virtual tunnel device
1786 *   @dev: virtual device associated with tunnel
1787 *
1788 * Description:
1789 *   Initialize function pointers and device parameters
1790 **/
1791
1792static void ip6_tnl_dev_setup(struct net_device *dev)
1793{
1794        dev->netdev_ops = &ip6_tnl_netdev_ops;
1795        dev->needs_free_netdev = true;
1796        dev->priv_destructor = ip6_dev_free;
1797
1798        dev->type = ARPHRD_TUNNEL6;
1799        dev->flags |= IFF_NOARP;
1800        dev->addr_len = sizeof(struct in6_addr);
1801        dev->features |= NETIF_F_LLTX;
1802        netif_keep_dst(dev);
1803
1804        dev->features           |= IPXIPX_FEATURES;
1805        dev->hw_features        |= IPXIPX_FEATURES;
1806
1807        /* This perm addr will be used as interface identifier by IPv6 */
1808        dev->addr_assign_type = NET_ADDR_RANDOM;
1809        eth_random_addr(dev->perm_addr);
1810}
1811
1812
1813/**
1814 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1815 *   @dev: virtual device associated with tunnel
1816 **/
1817
1818static inline int
1819ip6_tnl_dev_init_gen(struct net_device *dev)
1820{
1821        struct ip6_tnl *t = netdev_priv(dev);
1822        int ret;
1823        int t_hlen;
1824
1825        t->dev = dev;
1826        t->net = dev_net(dev);
1827        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1828        if (!dev->tstats)
1829                return -ENOMEM;
1830
1831        ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
1832        if (ret)
1833                goto free_stats;
1834
1835        ret = gro_cells_init(&t->gro_cells, dev);
1836        if (ret)
1837                goto destroy_dst;
1838
1839        t->tun_hlen = 0;
1840        t->hlen = t->encap_hlen + t->tun_hlen;
1841        t_hlen = t->hlen + sizeof(struct ipv6hdr);
1842
1843        dev->type = ARPHRD_TUNNEL6;
1844        dev->hard_header_len = LL_MAX_HEADER + t_hlen;
1845        dev->mtu = ETH_DATA_LEN - t_hlen;
1846        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1847                dev->mtu -= 8;
1848        dev->min_mtu = ETH_MIN_MTU;
1849        dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
1850
1851        return 0;
1852
1853destroy_dst:
1854        dst_cache_destroy(&t->dst_cache);
1855free_stats:
1856        free_percpu(dev->tstats);
1857        dev->tstats = NULL;
1858
1859        return ret;
1860}
1861
1862/**
1863 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1864 *   @dev: virtual device associated with tunnel
1865 **/
1866
1867static int ip6_tnl_dev_init(struct net_device *dev)
1868{
1869        struct ip6_tnl *t = netdev_priv(dev);
1870        int err = ip6_tnl_dev_init_gen(dev);
1871
1872        if (err)
1873                return err;
1874        ip6_tnl_link_config(t);
1875        if (t->parms.collect_md) {
1876                dev->features |= NETIF_F_NETNS_LOCAL;
1877                netif_keep_dst(dev);
1878        }
1879        return 0;
1880}
1881
1882/**
1883 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1884 *   @dev: fallback device
1885 *
1886 * Return: 0
1887 **/
1888
1889static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1890{
1891        struct ip6_tnl *t = netdev_priv(dev);
1892        struct net *net = dev_net(dev);
1893        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1894
1895        t->parms.proto = IPPROTO_IPV6;
1896        dev_hold(dev);
1897
1898        rcu_assign_pointer(ip6n->tnls_wc[0], t);
1899        return 0;
1900}
1901
1902static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[],
1903                            struct netlink_ext_ack *extack)
1904{
1905        u8 proto;
1906
1907        if (!data || !data[IFLA_IPTUN_PROTO])
1908                return 0;
1909
1910        proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1911        if (proto != IPPROTO_IPV6 &&
1912            proto != IPPROTO_IPIP &&
1913            proto != 0)
1914                return -EINVAL;
1915
1916        return 0;
1917}
1918
1919static void ip6_tnl_netlink_parms(struct nlattr *data[],
1920                                  struct __ip6_tnl_parm *parms)
1921{
1922        memset(parms, 0, sizeof(*parms));
1923
1924        if (!data)
1925                return;
1926
1927        if (data[IFLA_IPTUN_LINK])
1928                parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1929
1930        if (data[IFLA_IPTUN_LOCAL])
1931                parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]);
1932
1933        if (data[IFLA_IPTUN_REMOTE])
1934                parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]);
1935
1936        if (data[IFLA_IPTUN_TTL])
1937                parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]);
1938
1939        if (data[IFLA_IPTUN_ENCAP_LIMIT])
1940                parms->encap_limit = nla_get_u8(data[IFLA_IPTUN_ENCAP_LIMIT]);
1941
1942        if (data[IFLA_IPTUN_FLOWINFO])
1943                parms->flowinfo = nla_get_be32(data[IFLA_IPTUN_FLOWINFO]);
1944
1945        if (data[IFLA_IPTUN_FLAGS])
1946                parms->flags = nla_get_u32(data[IFLA_IPTUN_FLAGS]);
1947
1948        if (data[IFLA_IPTUN_PROTO])
1949                parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1950
1951        if (data[IFLA_IPTUN_COLLECT_METADATA])
1952                parms->collect_md = true;
1953
1954        if (data[IFLA_IPTUN_FWMARK])
1955                parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
1956}
1957
1958static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
1959                                        struct ip_tunnel_encap *ipencap)
1960{
1961        bool ret = false;
1962
1963        memset(ipencap, 0, sizeof(*ipencap));
1964
1965        if (!data)
1966                return ret;
1967
1968        if (data[IFLA_IPTUN_ENCAP_TYPE]) {
1969                ret = true;
1970                ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
1971        }
1972
1973        if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
1974                ret = true;
1975                ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
1976        }
1977
1978        if (data[IFLA_IPTUN_ENCAP_SPORT]) {
1979                ret = true;
1980                ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
1981        }
1982
1983        if (data[IFLA_IPTUN_ENCAP_DPORT]) {
1984                ret = true;
1985                ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
1986        }
1987
1988        return ret;
1989}
1990
1991static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
1992                           struct nlattr *tb[], struct nlattr *data[],
1993                           struct netlink_ext_ack *extack)
1994{
1995        struct net *net = dev_net(dev);
1996        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1997        struct ip_tunnel_encap ipencap;
1998        struct ip6_tnl *nt, *t;
1999        int err;
2000
2001        nt = netdev_priv(dev);
2002
2003        if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
2004                err = ip6_tnl_encap_setup(nt, &ipencap);
2005                if (err < 0)
2006                        return err;
2007        }
2008
2009        ip6_tnl_netlink_parms(data, &nt->parms);
2010
2011        if (nt->parms.collect_md) {
2012                if (rtnl_dereference(ip6n->collect_md_tun))
2013                        return -EEXIST;
2014        } else {
2015                t = ip6_tnl_locate(net, &nt->parms, 0);
2016                if (!IS_ERR(t))
2017                        return -EEXIST;
2018        }
2019
2020        err = ip6_tnl_create2(dev);
2021        if (!err && tb[IFLA_MTU])
2022                ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
2023
2024        return err;
2025}
2026
2027static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
2028                              struct nlattr *data[],
2029                              struct netlink_ext_ack *extack)
2030{
2031        struct ip6_tnl *t = netdev_priv(dev);
2032        struct __ip6_tnl_parm p;
2033        struct net *net = t->net;
2034        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2035        struct ip_tunnel_encap ipencap;
2036
2037        if (dev == ip6n->fb_tnl_dev)
2038                return -EINVAL;
2039
2040        if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
2041                int err = ip6_tnl_encap_setup(t, &ipencap);
2042
2043                if (err < 0)
2044                        return err;
2045        }
2046        ip6_tnl_netlink_parms(data, &p);
2047        if (p.collect_md)
2048                return -EINVAL;
2049
2050        t = ip6_tnl_locate(net, &p, 0);
2051        if (!IS_ERR(t)) {
2052                if (t->dev != dev)
2053                        return -EEXIST;
2054        } else
2055                t = netdev_priv(dev);
2056
2057        return ip6_tnl_update(t, &p);
2058}
2059
2060static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
2061{
2062        struct net *net = dev_net(dev);
2063        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2064
2065        if (dev != ip6n->fb_tnl_dev)
2066                unregister_netdevice_queue(dev, head);
2067}
2068
2069static size_t ip6_tnl_get_size(const struct net_device *dev)
2070{
2071        return
2072                /* IFLA_IPTUN_LINK */
2073                nla_total_size(4) +
2074                /* IFLA_IPTUN_LOCAL */
2075                nla_total_size(sizeof(struct in6_addr)) +
2076                /* IFLA_IPTUN_REMOTE */
2077                nla_total_size(sizeof(struct in6_addr)) +
2078                /* IFLA_IPTUN_TTL */
2079                nla_total_size(1) +
2080                /* IFLA_IPTUN_ENCAP_LIMIT */
2081                nla_total_size(1) +
2082                /* IFLA_IPTUN_FLOWINFO */
2083                nla_total_size(4) +
2084                /* IFLA_IPTUN_FLAGS */
2085                nla_total_size(4) +
2086                /* IFLA_IPTUN_PROTO */
2087                nla_total_size(1) +
2088                /* IFLA_IPTUN_ENCAP_TYPE */
2089                nla_total_size(2) +
2090                /* IFLA_IPTUN_ENCAP_FLAGS */
2091                nla_total_size(2) +
2092                /* IFLA_IPTUN_ENCAP_SPORT */
2093                nla_total_size(2) +
2094                /* IFLA_IPTUN_ENCAP_DPORT */
2095                nla_total_size(2) +
2096                /* IFLA_IPTUN_COLLECT_METADATA */
2097                nla_total_size(0) +
2098                /* IFLA_IPTUN_FWMARK */
2099                nla_total_size(4) +
2100                0;
2101}
2102
2103static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
2104{
2105        struct ip6_tnl *tunnel = netdev_priv(dev);
2106        struct __ip6_tnl_parm *parm = &tunnel->parms;
2107
2108        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
2109            nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
2110            nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) ||
2111            nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
2112            nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
2113            nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
2114            nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
2115            nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
2116            nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
2117                goto nla_put_failure;
2118
2119        if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
2120            nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
2121            nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
2122            nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
2123                goto nla_put_failure;
2124
2125        if (parm->collect_md)
2126                if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
2127                        goto nla_put_failure;
2128
2129        return 0;
2130
2131nla_put_failure:
2132        return -EMSGSIZE;
2133}
2134
2135struct net *ip6_tnl_get_link_net(const struct net_device *dev)
2136{
2137        struct ip6_tnl *tunnel = netdev_priv(dev);
2138
2139        return tunnel->net;
2140}
2141EXPORT_SYMBOL(ip6_tnl_get_link_net);
2142
2143static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
2144        [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
2145        [IFLA_IPTUN_LOCAL]              = { .len = sizeof(struct in6_addr) },
2146        [IFLA_IPTUN_REMOTE]             = { .len = sizeof(struct in6_addr) },
2147        [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
2148        [IFLA_IPTUN_ENCAP_LIMIT]        = { .type = NLA_U8 },
2149        [IFLA_IPTUN_FLOWINFO]           = { .type = NLA_U32 },
2150        [IFLA_IPTUN_FLAGS]              = { .type = NLA_U32 },
2151        [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
2152        [IFLA_IPTUN_ENCAP_TYPE]         = { .type = NLA_U16 },
2153        [IFLA_IPTUN_ENCAP_FLAGS]        = { .type = NLA_U16 },
2154        [IFLA_IPTUN_ENCAP_SPORT]        = { .type = NLA_U16 },
2155        [IFLA_IPTUN_ENCAP_DPORT]        = { .type = NLA_U16 },
2156        [IFLA_IPTUN_COLLECT_METADATA]   = { .type = NLA_FLAG },
2157        [IFLA_IPTUN_FWMARK]             = { .type = NLA_U32 },
2158};
2159
2160static struct rtnl_link_ops ip6_link_ops __read_mostly = {
2161        .kind           = "ip6tnl",
2162        .maxtype        = IFLA_IPTUN_MAX,
2163        .policy         = ip6_tnl_policy,
2164        .priv_size      = sizeof(struct ip6_tnl),
2165        .setup          = ip6_tnl_dev_setup,
2166        .validate       = ip6_tnl_validate,
2167        .newlink        = ip6_tnl_newlink,
2168        .changelink     = ip6_tnl_changelink,
2169        .dellink        = ip6_tnl_dellink,
2170        .get_size       = ip6_tnl_get_size,
2171        .fill_info      = ip6_tnl_fill_info,
2172        .get_link_net   = ip6_tnl_get_link_net,
2173};
2174
2175static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
2176        .handler        = ip4ip6_rcv,
2177        .err_handler    = ip4ip6_err,
2178        .priority       =       1,
2179};
2180
2181static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
2182        .handler        = ip6ip6_rcv,
2183        .err_handler    = ip6ip6_err,
2184        .priority       =       1,
2185};
2186
2187static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
2188{
2189        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2190        struct net_device *dev, *aux;
2191        int h;
2192        struct ip6_tnl *t;
2193
2194        for_each_netdev_safe(net, dev, aux)
2195                if (dev->rtnl_link_ops == &ip6_link_ops)
2196                        unregister_netdevice_queue(dev, list);
2197
2198        for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
2199                t = rtnl_dereference(ip6n->tnls_r_l[h]);
2200                while (t) {
2201                        /* If dev is in the same netns, it has already
2202                         * been added to the list by the previous loop.
2203                         */
2204                        if (!net_eq(dev_net(t->dev), net))
2205                                unregister_netdevice_queue(t->dev, list);
2206                        t = rtnl_dereference(t->next);
2207                }
2208        }
2209}
2210
2211static int __net_init ip6_tnl_init_net(struct net *net)
2212{
2213        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
2214        struct ip6_tnl *t = NULL;
2215        int err;
2216
2217        ip6n->tnls[0] = ip6n->tnls_wc;
2218        ip6n->tnls[1] = ip6n->tnls_r_l;
2219
2220        if (!net_has_fallback_tunnels(net))
2221                return 0;
2222        err = -ENOMEM;
2223        ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
2224                                        NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
2225
2226        if (!ip6n->fb_tnl_dev)
2227                goto err_alloc_dev;
2228        dev_net_set(ip6n->fb_tnl_dev, net);
2229        ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
2230        /* FB netdevice is special: we have one, and only one per netns.
2231         * Allowing to move it to another netns is clearly unsafe.
2232         */
2233        ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
2234
2235        err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
2236        if (err < 0)
2237                goto err_register;
2238
2239        err = register_netdev(ip6n->fb_tnl_dev);
2240        if (err < 0)
2241                goto err_register;
2242
2243        t = netdev_priv(ip6n->fb_tnl_dev);
2244
2245        strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
2246        return 0;
2247
2248err_register:
2249        free_netdev(ip6n->fb_tnl_dev);
2250err_alloc_dev:
2251        return err;
2252}
2253
2254static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
2255{
2256        struct net *net;
2257        LIST_HEAD(list);
2258
2259        rtnl_lock();
2260        list_for_each_entry(net, net_list, exit_list)
2261                ip6_tnl_destroy_tunnels(net, &list);
2262        unregister_netdevice_many(&list);
2263        rtnl_unlock();
2264}
2265
2266static struct pernet_operations ip6_tnl_net_ops = {
2267        .init = ip6_tnl_init_net,
2268        .exit_batch = ip6_tnl_exit_batch_net,
2269        .id   = &ip6_tnl_net_id,
2270        .size = sizeof(struct ip6_tnl_net),
2271};
2272
2273/**
2274 * ip6_tunnel_init - register protocol and reserve needed resources
2275 *
2276 * Return: 0 on success
2277 **/
2278
2279static int __init ip6_tunnel_init(void)
2280{
2281        int  err;
2282
2283        if (!ipv6_mod_enabled())
2284                return -EOPNOTSUPP;
2285
2286        err = register_pernet_device(&ip6_tnl_net_ops);
2287        if (err < 0)
2288                goto out_pernet;
2289
2290        err = xfrm6_tunnel_register(&ip4ip6_handler, AF_INET);
2291        if (err < 0) {
2292                pr_err("%s: can't register ip4ip6\n", __func__);
2293                goto out_ip4ip6;
2294        }
2295
2296        err = xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6);
2297        if (err < 0) {
2298                pr_err("%s: can't register ip6ip6\n", __func__);
2299                goto out_ip6ip6;
2300        }
2301        err = rtnl_link_register(&ip6_link_ops);
2302        if (err < 0)
2303                goto rtnl_link_failed;
2304
2305        return 0;
2306
2307rtnl_link_failed:
2308        xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
2309out_ip6ip6:
2310        xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
2311out_ip4ip6:
2312        unregister_pernet_device(&ip6_tnl_net_ops);
2313out_pernet:
2314        return err;
2315}
2316
2317/**
2318 * ip6_tunnel_cleanup - free resources and unregister protocol
2319 **/
2320
2321static void __exit ip6_tunnel_cleanup(void)
2322{
2323        rtnl_link_unregister(&ip6_link_ops);
2324        if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
2325                pr_info("%s: can't deregister ip4ip6\n", __func__);
2326
2327        if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
2328                pr_info("%s: can't deregister ip6ip6\n", __func__);
2329
2330        unregister_pernet_device(&ip6_tnl_net_ops);
2331}
2332
2333module_init(ip6_tunnel_init);
2334module_exit(ip6_tunnel_cleanup);
2335