linux/net/ipv6/sit.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
   9 *
  10 *      Changes:
  11 * Roger Venning <r.venning@telstra.com>:       6to4 support
  12 * Nate Thompson <nate@thebog.net>:             6to4 support
  13 * Fred Templin <fred.l.templin@boeing.com>:    isatap support
  14 */
  15
  16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  17
  18#include <linux/module.h>
  19#include <linux/capability.h>
  20#include <linux/errno.h>
  21#include <linux/types.h>
  22#include <linux/socket.h>
  23#include <linux/sockios.h>
  24#include <linux/net.h>
  25#include <linux/in6.h>
  26#include <linux/netdevice.h>
  27#include <linux/if_arp.h>
  28#include <linux/icmp.h>
  29#include <linux/slab.h>
  30#include <linux/uaccess.h>
  31#include <linux/init.h>
  32#include <linux/netfilter_ipv4.h>
  33#include <linux/if_ether.h>
  34
  35#include <net/sock.h>
  36#include <net/snmp.h>
  37
  38#include <net/ipv6.h>
  39#include <net/protocol.h>
  40#include <net/transp_v6.h>
  41#include <net/ip6_fib.h>
  42#include <net/ip6_route.h>
  43#include <net/ndisc.h>
  44#include <net/addrconf.h>
  45#include <net/ip.h>
  46#include <net/udp.h>
  47#include <net/icmp.h>
  48#include <net/ip_tunnels.h>
  49#include <net/inet_ecn.h>
  50#include <net/xfrm.h>
  51#include <net/dsfield.h>
  52#include <net/net_namespace.h>
  53#include <net/netns/generic.h>
  54
  55/*
  56   This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
  57
  58   For comments look at net/ipv4/ip_gre.c --ANK
  59 */
  60
  61#define IP6_SIT_HASH_SIZE  16
  62#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
  63
  64static bool log_ecn_error = true;
  65module_param(log_ecn_error, bool, 0644);
  66MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  67
  68static int ipip6_tunnel_init(struct net_device *dev);
  69static void ipip6_tunnel_setup(struct net_device *dev);
  70static void ipip6_dev_free(struct net_device *dev);
  71static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
  72                      __be32 *v4dst);
  73static struct rtnl_link_ops sit_link_ops __read_mostly;
  74
  75static unsigned int sit_net_id __read_mostly;
  76struct sit_net {
  77        struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
  78        struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
  79        struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
  80        struct ip_tunnel __rcu *tunnels_wc[1];
  81        struct ip_tunnel __rcu **tunnels[4];
  82
  83        struct net_device *fb_tunnel_dev;
  84};
  85
  86static inline struct sit_net *dev_to_sit_net(struct net_device *dev)
  87{
  88        struct ip_tunnel *t = netdev_priv(dev);
  89
  90        return net_generic(t->net, sit_net_id);
  91}
  92
  93/*
  94 * Must be invoked with rcu_read_lock
  95 */
  96static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
  97                                             struct net_device *dev,
  98                                             __be32 remote, __be32 local,
  99                                             int sifindex)
 100{
 101        unsigned int h0 = HASH(remote);
 102        unsigned int h1 = HASH(local);
 103        struct ip_tunnel *t;
 104        struct sit_net *sitn = net_generic(net, sit_net_id);
 105        int ifindex = dev ? dev->ifindex : 0;
 106
 107        for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
 108                if (local == t->parms.iph.saddr &&
 109                    remote == t->parms.iph.daddr &&
 110                    (!dev || !t->parms.link || ifindex == t->parms.link ||
 111                     sifindex == t->parms.link) &&
 112                    (t->dev->flags & IFF_UP))
 113                        return t;
 114        }
 115        for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
 116                if (remote == t->parms.iph.daddr &&
 117                    (!dev || !t->parms.link || ifindex == t->parms.link ||
 118                     sifindex == t->parms.link) &&
 119                    (t->dev->flags & IFF_UP))
 120                        return t;
 121        }
 122        for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
 123                if (local == t->parms.iph.saddr &&
 124                    (!dev || !t->parms.link || ifindex == t->parms.link ||
 125                     sifindex == t->parms.link) &&
 126                    (t->dev->flags & IFF_UP))
 127                        return t;
 128        }
 129        t = rcu_dereference(sitn->tunnels_wc[0]);
 130        if (t && (t->dev->flags & IFF_UP))
 131                return t;
 132        return NULL;
 133}
 134
 135static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
 136                struct ip_tunnel_parm *parms)
 137{
 138        __be32 remote = parms->iph.daddr;
 139        __be32 local = parms->iph.saddr;
 140        unsigned int h = 0;
 141        int prio = 0;
 142
 143        if (remote) {
 144                prio |= 2;
 145                h ^= HASH(remote);
 146        }
 147        if (local) {
 148                prio |= 1;
 149                h ^= HASH(local);
 150        }
 151        return &sitn->tunnels[prio][h];
 152}
 153
 154static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
 155                struct ip_tunnel *t)
 156{
 157        return __ipip6_bucket(sitn, &t->parms);
 158}
 159
 160static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
 161{
 162        struct ip_tunnel __rcu **tp;
 163        struct ip_tunnel *iter;
 164
 165        for (tp = ipip6_bucket(sitn, t);
 166             (iter = rtnl_dereference(*tp)) != NULL;
 167             tp = &iter->next) {
 168                if (t == iter) {
 169                        rcu_assign_pointer(*tp, t->next);
 170                        break;
 171                }
 172        }
 173}
 174
 175static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
 176{
 177        struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
 178
 179        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 180        rcu_assign_pointer(*tp, t);
 181}
 182
 183static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
 184{
 185#ifdef CONFIG_IPV6_SIT_6RD
 186        struct ip_tunnel *t = netdev_priv(dev);
 187
 188        if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
 189                ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
 190                t->ip6rd.relay_prefix = 0;
 191                t->ip6rd.prefixlen = 16;
 192                t->ip6rd.relay_prefixlen = 0;
 193        } else {
 194                struct ip_tunnel *t0 = netdev_priv(sitn->fb_tunnel_dev);
 195                memcpy(&t->ip6rd, &t0->ip6rd, sizeof(t->ip6rd));
 196        }
 197#endif
 198}
 199
 200static int ipip6_tunnel_create(struct net_device *dev)
 201{
 202        struct ip_tunnel *t = netdev_priv(dev);
 203        struct net *net = dev_net(dev);
 204        struct sit_net *sitn = net_generic(net, sit_net_id);
 205        int err;
 206
 207        __dev_addr_set(dev, &t->parms.iph.saddr, 4);
 208        memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
 209
 210        if ((__force u16)t->parms.i_flags & SIT_ISATAP)
 211                dev->priv_flags |= IFF_ISATAP;
 212
 213        dev->rtnl_link_ops = &sit_link_ops;
 214
 215        err = register_netdevice(dev);
 216        if (err < 0)
 217                goto out;
 218
 219        ipip6_tunnel_clone_6rd(dev, sitn);
 220
 221        ipip6_tunnel_link(sitn, t);
 222        return 0;
 223
 224out:
 225        return err;
 226}
 227
 228static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 229                struct ip_tunnel_parm *parms, int create)
 230{
 231        __be32 remote = parms->iph.daddr;
 232        __be32 local = parms->iph.saddr;
 233        struct ip_tunnel *t, *nt;
 234        struct ip_tunnel __rcu **tp;
 235        struct net_device *dev;
 236        char name[IFNAMSIZ];
 237        struct sit_net *sitn = net_generic(net, sit_net_id);
 238
 239        for (tp = __ipip6_bucket(sitn, parms);
 240            (t = rtnl_dereference(*tp)) != NULL;
 241             tp = &t->next) {
 242                if (local == t->parms.iph.saddr &&
 243                    remote == t->parms.iph.daddr &&
 244                    parms->link == t->parms.link) {
 245                        if (create)
 246                                return NULL;
 247                        else
 248                                return t;
 249                }
 250        }
 251        if (!create)
 252                goto failed;
 253
 254        if (parms->name[0]) {
 255                if (!dev_valid_name(parms->name))
 256                        goto failed;
 257                strlcpy(name, parms->name, IFNAMSIZ);
 258        } else {
 259                strcpy(name, "sit%d");
 260        }
 261        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 262                           ipip6_tunnel_setup);
 263        if (!dev)
 264                return NULL;
 265
 266        dev_net_set(dev, net);
 267
 268        nt = netdev_priv(dev);
 269
 270        nt->parms = *parms;
 271        if (ipip6_tunnel_create(dev) < 0)
 272                goto failed_free;
 273
 274        if (!parms->name[0])
 275                strcpy(parms->name, dev->name);
 276
 277        return nt;
 278
 279failed_free:
 280        free_netdev(dev);
 281failed:
 282        return NULL;
 283}
 284
 285#define for_each_prl_rcu(start)                 \
 286        for (prl = rcu_dereference(start);      \
 287             prl;                               \
 288             prl = rcu_dereference(prl->next))
 289
 290static struct ip_tunnel_prl_entry *
 291__ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 292{
 293        struct ip_tunnel_prl_entry *prl;
 294
 295        for_each_prl_rcu(t->prl)
 296                if (prl->addr == addr)
 297                        break;
 298        return prl;
 299
 300}
 301
 302static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a)
 303{
 304        struct ip_tunnel *t = netdev_priv(dev);
 305        struct ip_tunnel_prl kprl, *kp;
 306        struct ip_tunnel_prl_entry *prl;
 307        unsigned int cmax, c = 0, ca, len;
 308        int ret = 0;
 309
 310        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
 311                return -EINVAL;
 312
 313        if (copy_from_user(&kprl, a, sizeof(kprl)))
 314                return -EFAULT;
 315        cmax = kprl.datalen / sizeof(kprl);
 316        if (cmax > 1 && kprl.addr != htonl(INADDR_ANY))
 317                cmax = 1;
 318
 319        /* For simple GET or for root users,
 320         * we try harder to allocate.
 321         */
 322        kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
 323                kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
 324                NULL;
 325
 326        rcu_read_lock();
 327
 328        ca = min(t->prl_count, cmax);
 329
 330        if (!kp) {
 331                /* We don't try hard to allocate much memory for
 332                 * non-root users.
 333                 * For root users, retry allocating enough memory for
 334                 * the answer.
 335                 */
 336                kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
 337                                              __GFP_NOWARN);
 338                if (!kp) {
 339                        ret = -ENOMEM;
 340                        goto out;
 341                }
 342        }
 343
 344        c = 0;
 345        for_each_prl_rcu(t->prl) {
 346                if (c >= cmax)
 347                        break;
 348                if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr)
 349                        continue;
 350                kp[c].addr = prl->addr;
 351                kp[c].flags = prl->flags;
 352                c++;
 353                if (kprl.addr != htonl(INADDR_ANY))
 354                        break;
 355        }
 356out:
 357        rcu_read_unlock();
 358
 359        len = sizeof(*kp) * c;
 360        ret = 0;
 361        if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen))
 362                ret = -EFAULT;
 363
 364        kfree(kp);
 365
 366        return ret;
 367}
 368
 369static int
 370ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 371{
 372        struct ip_tunnel_prl_entry *p;
 373        int err = 0;
 374
 375        if (a->addr == htonl(INADDR_ANY))
 376                return -EINVAL;
 377
 378        ASSERT_RTNL();
 379
 380        for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
 381                if (p->addr == a->addr) {
 382                        if (chg) {
 383                                p->flags = a->flags;
 384                                goto out;
 385                        }
 386                        err = -EEXIST;
 387                        goto out;
 388                }
 389        }
 390
 391        if (chg) {
 392                err = -ENXIO;
 393                goto out;
 394        }
 395
 396        p = kzalloc(sizeof(struct ip_tunnel_prl_entry), GFP_KERNEL);
 397        if (!p) {
 398                err = -ENOBUFS;
 399                goto out;
 400        }
 401
 402        p->next = t->prl;
 403        p->addr = a->addr;
 404        p->flags = a->flags;
 405        t->prl_count++;
 406        rcu_assign_pointer(t->prl, p);
 407out:
 408        return err;
 409}
 410
 411static void prl_list_destroy_rcu(struct rcu_head *head)
 412{
 413        struct ip_tunnel_prl_entry *p, *n;
 414
 415        p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
 416        do {
 417                n = rcu_dereference_protected(p->next, 1);
 418                kfree(p);
 419                p = n;
 420        } while (p);
 421}
 422
 423static int
 424ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 425{
 426        struct ip_tunnel_prl_entry *x;
 427        struct ip_tunnel_prl_entry __rcu **p;
 428        int err = 0;
 429
 430        ASSERT_RTNL();
 431
 432        if (a && a->addr != htonl(INADDR_ANY)) {
 433                for (p = &t->prl;
 434                     (x = rtnl_dereference(*p)) != NULL;
 435                     p = &x->next) {
 436                        if (x->addr == a->addr) {
 437                                *p = x->next;
 438                                kfree_rcu(x, rcu_head);
 439                                t->prl_count--;
 440                                goto out;
 441                        }
 442                }
 443                err = -ENXIO;
 444        } else {
 445                x = rtnl_dereference(t->prl);
 446                if (x) {
 447                        t->prl_count = 0;
 448                        call_rcu(&x->rcu_head, prl_list_destroy_rcu);
 449                        t->prl = NULL;
 450                }
 451        }
 452out:
 453        return err;
 454}
 455
 456static int ipip6_tunnel_prl_ctl(struct net_device *dev,
 457                                struct ip_tunnel_prl __user *data, int cmd)
 458{
 459        struct ip_tunnel *t = netdev_priv(dev);
 460        struct ip_tunnel_prl prl;
 461        int err;
 462
 463        if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
 464                return -EPERM;
 465        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
 466                return -EINVAL;
 467
 468        if (copy_from_user(&prl, data, sizeof(prl)))
 469                return -EFAULT;
 470
 471        switch (cmd) {
 472        case SIOCDELPRL:
 473                err = ipip6_tunnel_del_prl(t, &prl);
 474                break;
 475        case SIOCADDPRL:
 476        case SIOCCHGPRL:
 477                err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
 478                break;
 479        }
 480        dst_cache_reset(&t->dst_cache);
 481        netdev_state_change(dev);
 482        return err;
 483}
 484
 485static int
 486isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
 487{
 488        struct ip_tunnel_prl_entry *p;
 489        int ok = 1;
 490
 491        rcu_read_lock();
 492        p = __ipip6_tunnel_locate_prl(t, iph->saddr);
 493        if (p) {
 494                if (p->flags & PRL_DEFAULT)
 495                        skb->ndisc_nodetype = NDISC_NODETYPE_DEFAULT;
 496                else
 497                        skb->ndisc_nodetype = NDISC_NODETYPE_NODEFAULT;
 498        } else {
 499                const struct in6_addr *addr6 = &ipv6_hdr(skb)->saddr;
 500
 501                if (ipv6_addr_is_isatap(addr6) &&
 502                    (addr6->s6_addr32[3] == iph->saddr) &&
 503                    ipv6_chk_prefix(addr6, t->dev))
 504                        skb->ndisc_nodetype = NDISC_NODETYPE_HOST;
 505                else
 506                        ok = 0;
 507        }
 508        rcu_read_unlock();
 509        return ok;
 510}
 511
 512static void ipip6_tunnel_uninit(struct net_device *dev)
 513{
 514        struct ip_tunnel *tunnel = netdev_priv(dev);
 515        struct sit_net *sitn = net_generic(tunnel->net, sit_net_id);
 516
 517        if (dev == sitn->fb_tunnel_dev) {
 518                RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL);
 519        } else {
 520                ipip6_tunnel_unlink(sitn, tunnel);
 521                ipip6_tunnel_del_prl(tunnel, NULL);
 522        }
 523        dst_cache_reset(&tunnel->dst_cache);
 524        dev_put_track(dev, &tunnel->dev_tracker);
 525}
 526
 527static int ipip6_err(struct sk_buff *skb, u32 info)
 528{
 529        const struct iphdr *iph = (const struct iphdr *)skb->data;
 530        const int type = icmp_hdr(skb)->type;
 531        const int code = icmp_hdr(skb)->code;
 532        unsigned int data_len = 0;
 533        struct ip_tunnel *t;
 534        int sifindex;
 535        int err;
 536
 537        switch (type) {
 538        default:
 539        case ICMP_PARAMETERPROB:
 540                return 0;
 541
 542        case ICMP_DEST_UNREACH:
 543                switch (code) {
 544                case ICMP_SR_FAILED:
 545                        /* Impossible event. */
 546                        return 0;
 547                default:
 548                        /* All others are translated to HOST_UNREACH.
 549                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 550                           I believe they are just ether pollution. --ANK
 551                         */
 552                        break;
 553                }
 554                break;
 555        case ICMP_TIME_EXCEEDED:
 556                if (code != ICMP_EXC_TTL)
 557                        return 0;
 558                data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 559                break;
 560        case ICMP_REDIRECT:
 561                break;
 562        }
 563
 564        err = -ENOENT;
 565
 566        sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
 567        t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 568                                iph->daddr, iph->saddr, sifindex);
 569        if (!t)
 570                goto out;
 571
 572        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 573                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 574                                 t->parms.link, iph->protocol);
 575                err = 0;
 576                goto out;
 577        }
 578        if (type == ICMP_REDIRECT) {
 579                ipv4_redirect(skb, dev_net(skb->dev), t->parms.link,
 580                              iph->protocol);
 581                err = 0;
 582                goto out;
 583        }
 584
 585        err = 0;
 586        if (__in6_dev_get(skb->dev) &&
 587            !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
 588                goto out;
 589
 590        if (t->parms.iph.daddr == 0)
 591                goto out;
 592
 593        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 594                goto out;
 595
 596        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 597                t->err_count++;
 598        else
 599                t->err_count = 1;
 600        t->err_time = jiffies;
 601out:
 602        return err;
 603}
 604
 605static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
 606                                  const struct in6_addr *v6addr)
 607{
 608        __be32 v4embed = 0;
 609        if (check_6rd(tunnel, v6addr, &v4embed) && v4addr != v4embed)
 610                return true;
 611        return false;
 612}
 613
 614/* Checks if an address matches an address on the tunnel interface.
 615 * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
 616 * Long story:
 617 * This function is called after we considered the packet as spoofed
 618 * in is_spoofed_6rd.
 619 * We may have a router that is doing NAT for proto 41 packets
 620 * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
 621 * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
 622 * function will return true, dropping the packet.
 623 * But, we can still check if is spoofed against the IP
 624 * addresses associated with the interface.
 625 */
 626static bool only_dnatted(const struct ip_tunnel *tunnel,
 627        const struct in6_addr *v6dst)
 628{
 629        int prefix_len;
 630
 631#ifdef CONFIG_IPV6_SIT_6RD
 632        prefix_len = tunnel->ip6rd.prefixlen + 32
 633                - tunnel->ip6rd.relay_prefixlen;
 634#else
 635        prefix_len = 48;
 636#endif
 637        return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
 638}
 639
 640/* Returns true if a packet is spoofed */
 641static bool packet_is_spoofed(struct sk_buff *skb,
 642                              const struct iphdr *iph,
 643                              struct ip_tunnel *tunnel)
 644{
 645        const struct ipv6hdr *ipv6h;
 646
 647        if (tunnel->dev->priv_flags & IFF_ISATAP) {
 648                if (!isatap_chksrc(skb, iph, tunnel))
 649                        return true;
 650
 651                return false;
 652        }
 653
 654        if (tunnel->dev->flags & IFF_POINTOPOINT)
 655                return false;
 656
 657        ipv6h = ipv6_hdr(skb);
 658
 659        if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
 660                net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
 661                                     &iph->saddr, &ipv6h->saddr,
 662                                     &iph->daddr, &ipv6h->daddr);
 663                return true;
 664        }
 665
 666        if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
 667                return false;
 668
 669        if (only_dnatted(tunnel, &ipv6h->daddr))
 670                return false;
 671
 672        net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
 673                             &iph->saddr, &ipv6h->saddr,
 674                             &iph->daddr, &ipv6h->daddr);
 675        return true;
 676}
 677
 678static int ipip6_rcv(struct sk_buff *skb)
 679{
 680        const struct iphdr *iph = ip_hdr(skb);
 681        struct ip_tunnel *tunnel;
 682        int sifindex;
 683        int err;
 684
 685        sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
 686        tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 687                                     iph->saddr, iph->daddr, sifindex);
 688        if (tunnel) {
 689                struct pcpu_sw_netstats *tstats;
 690
 691                if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&
 692                    tunnel->parms.iph.protocol != 0)
 693                        goto out;
 694
 695                skb->mac_header = skb->network_header;
 696                skb_reset_network_header(skb);
 697                IPCB(skb)->flags = 0;
 698                skb->dev = tunnel->dev;
 699
 700                if (packet_is_spoofed(skb, iph, tunnel)) {
 701                        tunnel->dev->stats.rx_errors++;
 702                        goto out;
 703                }
 704
 705                if (iptunnel_pull_header(skb, 0, htons(ETH_P_IPV6),
 706                    !net_eq(tunnel->net, dev_net(tunnel->dev))))
 707                        goto out;
 708
 709                /* skb can be uncloned in iptunnel_pull_header, so
 710                 * old iph is no longer valid
 711                 */
 712                iph = (const struct iphdr *)skb_mac_header(skb);
 713                skb_reset_mac_header(skb);
 714
 715                err = IP_ECN_decapsulate(iph, skb);
 716                if (unlikely(err)) {
 717                        if (log_ecn_error)
 718                                net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
 719                                                     &iph->saddr, iph->tos);
 720                        if (err > 1) {
 721                                ++tunnel->dev->stats.rx_frame_errors;
 722                                ++tunnel->dev->stats.rx_errors;
 723                                goto out;
 724                        }
 725                }
 726
 727                tstats = this_cpu_ptr(tunnel->dev->tstats);
 728                u64_stats_update_begin(&tstats->syncp);
 729                tstats->rx_packets++;
 730                tstats->rx_bytes += skb->len;
 731                u64_stats_update_end(&tstats->syncp);
 732
 733                netif_rx(skb);
 734
 735                return 0;
 736        }
 737
 738        /* no tunnel matched,  let upstream know, ipsec may handle it */
 739        return 1;
 740out:
 741        kfree_skb(skb);
 742        return 0;
 743}
 744
 745static const struct tnl_ptk_info ipip_tpi = {
 746        /* no tunnel info required for ipip. */
 747        .proto = htons(ETH_P_IP),
 748};
 749
 750#if IS_ENABLED(CONFIG_MPLS)
 751static const struct tnl_ptk_info mplsip_tpi = {
 752        /* no tunnel info required for mplsip. */
 753        .proto = htons(ETH_P_MPLS_UC),
 754};
 755#endif
 756
 757static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 758{
 759        const struct iphdr *iph;
 760        struct ip_tunnel *tunnel;
 761        int sifindex;
 762
 763        sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
 764
 765        iph = ip_hdr(skb);
 766        tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
 767                                     iph->saddr, iph->daddr, sifindex);
 768        if (tunnel) {
 769                const struct tnl_ptk_info *tpi;
 770
 771                if (tunnel->parms.iph.protocol != ipproto &&
 772                    tunnel->parms.iph.protocol != 0)
 773                        goto drop;
 774
 775                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 776                        goto drop;
 777#if IS_ENABLED(CONFIG_MPLS)
 778                if (ipproto == IPPROTO_MPLS)
 779                        tpi = &mplsip_tpi;
 780                else
 781#endif
 782                        tpi = &ipip_tpi;
 783                if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 784                        goto drop;
 785                skb_reset_mac_header(skb);
 786
 787                return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
 788        }
 789
 790        return 1;
 791
 792drop:
 793        kfree_skb(skb);
 794        return 0;
 795}
 796
 797static int ipip_rcv(struct sk_buff *skb)
 798{
 799        return sit_tunnel_rcv(skb, IPPROTO_IPIP);
 800}
 801
 802#if IS_ENABLED(CONFIG_MPLS)
 803static int mplsip_rcv(struct sk_buff *skb)
 804{
 805        return sit_tunnel_rcv(skb, IPPROTO_MPLS);
 806}
 807#endif
 808
 809/*
 810 * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function
 811 * stores the embedded IPv4 address in v4dst and returns true.
 812 */
 813static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
 814                      __be32 *v4dst)
 815{
 816#ifdef CONFIG_IPV6_SIT_6RD
 817        if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
 818                              tunnel->ip6rd.prefixlen)) {
 819                unsigned int pbw0, pbi0;
 820                int pbi1;
 821                u32 d;
 822
 823                pbw0 = tunnel->ip6rd.prefixlen >> 5;
 824                pbi0 = tunnel->ip6rd.prefixlen & 0x1f;
 825
 826                d = tunnel->ip6rd.relay_prefixlen < 32 ?
 827                        (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >>
 828                    tunnel->ip6rd.relay_prefixlen : 0;
 829
 830                pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen;
 831                if (pbi1 > 0)
 832                        d |= ntohl(v6dst->s6_addr32[pbw0 + 1]) >>
 833                             (32 - pbi1);
 834
 835                *v4dst = tunnel->ip6rd.relay_prefix | htonl(d);
 836                return true;
 837        }
 838#else
 839        if (v6dst->s6_addr16[0] == htons(0x2002)) {
 840                /* 6to4 v6 addr has 16 bits prefix, 32 v4addr, 16 SLA, ... */
 841                memcpy(v4dst, &v6dst->s6_addr16[1], 4);
 842                return true;
 843        }
 844#endif
 845        return false;
 846}
 847
 848static inline __be32 try_6rd(struct ip_tunnel *tunnel,
 849                             const struct in6_addr *v6dst)
 850{
 851        __be32 dst = 0;
 852        check_6rd(tunnel, v6dst, &dst);
 853        return dst;
 854}
 855
 856/*
 857 *      This function assumes it is being called from dev_queue_xmit()
 858 *      and that skb is filled properly by that function.
 859 */
 860
 861static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 862                                     struct net_device *dev)
 863{
 864        struct ip_tunnel *tunnel = netdev_priv(dev);
 865        const struct iphdr  *tiph = &tunnel->parms.iph;
 866        const struct ipv6hdr *iph6 = ipv6_hdr(skb);
 867        u8     tos = tunnel->parms.iph.tos;
 868        __be16 df = tiph->frag_off;
 869        struct rtable *rt;              /* Route to the other host */
 870        struct net_device *tdev;        /* Device to other host */
 871        unsigned int max_headroom;      /* The extra header space needed */
 872        __be32 dst = tiph->daddr;
 873        struct flowi4 fl4;
 874        int    mtu;
 875        const struct in6_addr *addr6;
 876        int addr_type;
 877        u8 ttl;
 878        u8 protocol = IPPROTO_IPV6;
 879        int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 880
 881        if (tos == 1)
 882                tos = ipv6_get_dsfield(iph6);
 883
 884        /* ISATAP (RFC4214) - must come before 6to4 */
 885        if (dev->priv_flags & IFF_ISATAP) {
 886                struct neighbour *neigh = NULL;
 887                bool do_tx_error = false;
 888
 889                if (skb_dst(skb))
 890                        neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
 891
 892                if (!neigh) {
 893                        net_dbg_ratelimited("nexthop == NULL\n");
 894                        goto tx_error;
 895                }
 896
 897                addr6 = (const struct in6_addr *)&neigh->primary_key;
 898                addr_type = ipv6_addr_type(addr6);
 899
 900                if ((addr_type & IPV6_ADDR_UNICAST) &&
 901                     ipv6_addr_is_isatap(addr6))
 902                        dst = addr6->s6_addr32[3];
 903                else
 904                        do_tx_error = true;
 905
 906                neigh_release(neigh);
 907                if (do_tx_error)
 908                        goto tx_error;
 909        }
 910
 911        if (!dst)
 912                dst = try_6rd(tunnel, &iph6->daddr);
 913
 914        if (!dst) {
 915                struct neighbour *neigh = NULL;
 916                bool do_tx_error = false;
 917
 918                if (skb_dst(skb))
 919                        neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
 920
 921                if (!neigh) {
 922                        net_dbg_ratelimited("nexthop == NULL\n");
 923                        goto tx_error;
 924                }
 925
 926                addr6 = (const struct in6_addr *)&neigh->primary_key;
 927                addr_type = ipv6_addr_type(addr6);
 928
 929                if (addr_type == IPV6_ADDR_ANY) {
 930                        addr6 = &ipv6_hdr(skb)->daddr;
 931                        addr_type = ipv6_addr_type(addr6);
 932                }
 933
 934                if ((addr_type & IPV6_ADDR_COMPATv4) != 0)
 935                        dst = addr6->s6_addr32[3];
 936                else
 937                        do_tx_error = true;
 938
 939                neigh_release(neigh);
 940                if (do_tx_error)
 941                        goto tx_error;
 942        }
 943
 944        flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
 945                           RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
 946                           0, dst, tiph->saddr, 0, 0,
 947                           sock_net_uid(tunnel->net, NULL));
 948
 949        rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
 950        if (!rt) {
 951                rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
 952                if (IS_ERR(rt)) {
 953                        dev->stats.tx_carrier_errors++;
 954                        goto tx_error_icmp;
 955                }
 956                dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
 957        }
 958
 959        if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
 960                ip_rt_put(rt);
 961                dev->stats.tx_carrier_errors++;
 962                goto tx_error_icmp;
 963        }
 964        tdev = rt->dst.dev;
 965
 966        if (tdev == dev) {
 967                ip_rt_put(rt);
 968                dev->stats.collisions++;
 969                goto tx_error;
 970        }
 971
 972        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4)) {
 973                ip_rt_put(rt);
 974                goto tx_error;
 975        }
 976
 977        if (df) {
 978                mtu = dst_mtu(&rt->dst) - t_hlen;
 979
 980                if (mtu < IPV4_MIN_MTU) {
 981                        dev->stats.collisions++;
 982                        ip_rt_put(rt);
 983                        goto tx_error;
 984                }
 985
 986                if (mtu < IPV6_MIN_MTU) {
 987                        mtu = IPV6_MIN_MTU;
 988                        df = 0;
 989                }
 990
 991                if (tunnel->parms.iph.daddr)
 992                        skb_dst_update_pmtu_no_confirm(skb, mtu);
 993
 994                if (skb->len > mtu && !skb_is_gso(skb)) {
 995                        icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 996                        ip_rt_put(rt);
 997                        goto tx_error;
 998                }
 999        }
1000
1001        if (tunnel->err_count > 0) {
1002                if (time_before(jiffies,
1003                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
1004                        tunnel->err_count--;
1005                        dst_link_failure(skb);
1006                } else
1007                        tunnel->err_count = 0;
1008        }
1009
1010        /*
1011         * Okay, now see if we can stuff it in the buffer as-is.
1012         */
1013        max_headroom = LL_RESERVED_SPACE(tdev) + t_hlen;
1014
1015        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
1016            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
1017                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
1018                if (!new_skb) {
1019                        ip_rt_put(rt);
1020                        dev->stats.tx_dropped++;
1021                        kfree_skb(skb);
1022                        return NETDEV_TX_OK;
1023                }
1024                if (skb->sk)
1025                        skb_set_owner_w(new_skb, skb->sk);
1026                dev_kfree_skb(skb);
1027                skb = new_skb;
1028                iph6 = ipv6_hdr(skb);
1029        }
1030        ttl = tiph->ttl;
1031        if (ttl == 0)
1032                ttl = iph6->hop_limit;
1033        tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
1034
1035        if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) {
1036                ip_rt_put(rt);
1037                goto tx_error;
1038        }
1039
1040        skb_set_inner_ipproto(skb, IPPROTO_IPV6);
1041
1042        iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
1043                      df, !net_eq(tunnel->net, dev_net(dev)));
1044        return NETDEV_TX_OK;
1045
1046tx_error_icmp:
1047        dst_link_failure(skb);
1048tx_error:
1049        kfree_skb(skb);
1050        dev->stats.tx_errors++;
1051        return NETDEV_TX_OK;
1052}
1053
1054static netdev_tx_t sit_tunnel_xmit__(struct sk_buff *skb,
1055                                     struct net_device *dev, u8 ipproto)
1056{
1057        struct ip_tunnel *tunnel = netdev_priv(dev);
1058        const struct iphdr  *tiph = &tunnel->parms.iph;
1059
1060        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
1061                goto tx_error;
1062
1063        skb_set_inner_ipproto(skb, ipproto);
1064
1065        ip_tunnel_xmit(skb, dev, tiph, ipproto);
1066        return NETDEV_TX_OK;
1067tx_error:
1068        kfree_skb(skb);
1069        dev->stats.tx_errors++;
1070        return NETDEV_TX_OK;
1071}
1072
1073static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,
1074                                   struct net_device *dev)
1075{
1076        if (!pskb_inet_may_pull(skb))
1077                goto tx_err;
1078
1079        switch (skb->protocol) {
1080        case htons(ETH_P_IP):
1081                sit_tunnel_xmit__(skb, dev, IPPROTO_IPIP);
1082                break;
1083        case htons(ETH_P_IPV6):
1084                ipip6_tunnel_xmit(skb, dev);
1085                break;
1086#if IS_ENABLED(CONFIG_MPLS)
1087        case htons(ETH_P_MPLS_UC):
1088                sit_tunnel_xmit__(skb, dev, IPPROTO_MPLS);
1089                break;
1090#endif
1091        default:
1092                goto tx_err;
1093        }
1094
1095        return NETDEV_TX_OK;
1096
1097tx_err:
1098        dev->stats.tx_errors++;
1099        kfree_skb(skb);
1100        return NETDEV_TX_OK;
1101
1102}
1103
1104static void ipip6_tunnel_bind_dev(struct net_device *dev)
1105{
1106        struct net_device *tdev = NULL;
1107        struct ip_tunnel *tunnel;
1108        const struct iphdr *iph;
1109        struct flowi4 fl4;
1110
1111        tunnel = netdev_priv(dev);
1112        iph = &tunnel->parms.iph;
1113
1114        if (iph->daddr) {
1115                struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4,
1116                                                          NULL,
1117                                                          iph->daddr, iph->saddr,
1118                                                          0, 0,
1119                                                          IPPROTO_IPV6,
1120                                                          RT_TOS(iph->tos),
1121                                                          tunnel->parms.link);
1122
1123                if (!IS_ERR(rt)) {
1124                        tdev = rt->dst.dev;
1125                        ip_rt_put(rt);
1126                }
1127                dev->flags |= IFF_POINTOPOINT;
1128        }
1129
1130        if (!tdev && tunnel->parms.link)
1131                tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
1132
1133        if (tdev && !netif_is_l3_master(tdev)) {
1134                int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1135
1136                dev->mtu = tdev->mtu - t_hlen;
1137                if (dev->mtu < IPV6_MIN_MTU)
1138                        dev->mtu = IPV6_MIN_MTU;
1139        }
1140}
1141
1142static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
1143                                __u32 fwmark)
1144{
1145        struct net *net = t->net;
1146        struct sit_net *sitn = net_generic(net, sit_net_id);
1147
1148        ipip6_tunnel_unlink(sitn, t);
1149        synchronize_net();
1150        t->parms.iph.saddr = p->iph.saddr;
1151        t->parms.iph.daddr = p->iph.daddr;
1152        __dev_addr_set(t->dev, &p->iph.saddr, 4);
1153        memcpy(t->dev->broadcast, &p->iph.daddr, 4);
1154        ipip6_tunnel_link(sitn, t);
1155        t->parms.iph.ttl = p->iph.ttl;
1156        t->parms.iph.tos = p->iph.tos;
1157        t->parms.iph.frag_off = p->iph.frag_off;
1158        if (t->parms.link != p->link || t->fwmark != fwmark) {
1159                t->parms.link = p->link;
1160                t->fwmark = fwmark;
1161                ipip6_tunnel_bind_dev(t->dev);
1162        }
1163        dst_cache_reset(&t->dst_cache);
1164        netdev_state_change(t->dev);
1165}
1166
1167#ifdef CONFIG_IPV6_SIT_6RD
1168static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
1169                                   struct ip_tunnel_6rd *ip6rd)
1170{
1171        struct in6_addr prefix;
1172        __be32 relay_prefix;
1173
1174        if (ip6rd->relay_prefixlen > 32 ||
1175            ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
1176                return -EINVAL;
1177
1178        ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
1179        if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
1180                return -EINVAL;
1181        if (ip6rd->relay_prefixlen)
1182                relay_prefix = ip6rd->relay_prefix &
1183                               htonl(0xffffffffUL <<
1184                                     (32 - ip6rd->relay_prefixlen));
1185        else
1186                relay_prefix = 0;
1187        if (relay_prefix != ip6rd->relay_prefix)
1188                return -EINVAL;
1189
1190        t->ip6rd.prefix = prefix;
1191        t->ip6rd.relay_prefix = relay_prefix;
1192        t->ip6rd.prefixlen = ip6rd->prefixlen;
1193        t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
1194        dst_cache_reset(&t->dst_cache);
1195        netdev_state_change(t->dev);
1196        return 0;
1197}
1198
1199static int
1200ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
1201{
1202        struct ip_tunnel *t = netdev_priv(dev);
1203        struct ip_tunnel_6rd ip6rd;
1204        struct ip_tunnel_parm p;
1205
1206        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
1207                if (copy_from_user(&p, data, sizeof(p)))
1208                        return -EFAULT;
1209                t = ipip6_tunnel_locate(t->net, &p, 0);
1210        }
1211        if (!t)
1212                t = netdev_priv(dev);
1213
1214        ip6rd.prefix = t->ip6rd.prefix;
1215        ip6rd.relay_prefix = t->ip6rd.relay_prefix;
1216        ip6rd.prefixlen = t->ip6rd.prefixlen;
1217        ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
1218        if (copy_to_user(data, &ip6rd, sizeof(ip6rd)))
1219                return -EFAULT;
1220        return 0;
1221}
1222
1223static int
1224ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data,
1225                    int cmd)
1226{
1227        struct ip_tunnel *t = netdev_priv(dev);
1228        struct ip_tunnel_6rd ip6rd;
1229        int err;
1230
1231        if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
1232                return -EPERM;
1233        if (copy_from_user(&ip6rd, data, sizeof(ip6rd)))
1234                return -EFAULT;
1235
1236        if (cmd != SIOCDEL6RD) {
1237                err = ipip6_tunnel_update_6rd(t, &ip6rd);
1238                if (err < 0)
1239                        return err;
1240        } else
1241                ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev));
1242        return 0;
1243}
1244
1245#endif /* CONFIG_IPV6_SIT_6RD */
1246
1247static bool ipip6_valid_ip_proto(u8 ipproto)
1248{
1249        return ipproto == IPPROTO_IPV6 ||
1250                ipproto == IPPROTO_IPIP ||
1251#if IS_ENABLED(CONFIG_MPLS)
1252                ipproto == IPPROTO_MPLS ||
1253#endif
1254                ipproto == 0;
1255}
1256
1257static int
1258__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
1259{
1260        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1261                return -EPERM;
1262
1263        if (!ipip6_valid_ip_proto(p->iph.protocol))
1264                return -EINVAL;
1265        if (p->iph.version != 4 ||
1266            p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
1267                return -EINVAL;
1268
1269        if (p->iph.ttl)
1270                p->iph.frag_off |= htons(IP_DF);
1271        return 0;
1272}
1273
1274static int
1275ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
1276{
1277        struct ip_tunnel *t = netdev_priv(dev);
1278
1279        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
1280                t = ipip6_tunnel_locate(t->net, p, 0);
1281        if (!t)
1282                t = netdev_priv(dev);
1283        memcpy(p, &t->parms, sizeof(*p));
1284        return 0;
1285}
1286
1287static int
1288ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
1289{
1290        struct ip_tunnel *t = netdev_priv(dev);
1291        int err;
1292
1293        err = __ipip6_tunnel_ioctl_validate(t->net, p);
1294        if (err)
1295                return err;
1296
1297        t = ipip6_tunnel_locate(t->net, p, 1);
1298        if (!t)
1299                return -ENOBUFS;
1300        return 0;
1301}
1302
1303static int
1304ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
1305{
1306        struct ip_tunnel *t = netdev_priv(dev);
1307        int err;
1308
1309        err = __ipip6_tunnel_ioctl_validate(t->net, p);
1310        if (err)
1311                return err;
1312
1313        t = ipip6_tunnel_locate(t->net, p, 0);
1314        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
1315                if (!t)
1316                        return -ENOENT;
1317        } else {
1318                if (t) {
1319                        if (t->dev != dev)
1320                                return -EEXIST;
1321                } else {
1322                        if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) ||
1323                            (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr))
1324                                return -EINVAL;
1325                        t = netdev_priv(dev);
1326                }
1327
1328                ipip6_tunnel_update(t, p, t->fwmark);
1329        }
1330
1331        return 0;
1332}
1333
1334static int
1335ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
1336{
1337        struct ip_tunnel *t = netdev_priv(dev);
1338
1339        if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
1340                return -EPERM;
1341
1342        if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
1343                t = ipip6_tunnel_locate(t->net, p, 0);
1344                if (!t)
1345                        return -ENOENT;
1346                if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev))
1347                        return -EPERM;
1348                dev = t->dev;
1349        }
1350        unregister_netdevice(dev);
1351        return 0;
1352}
1353
1354static int
1355ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
1356{
1357        switch (cmd) {
1358        case SIOCGETTUNNEL:
1359                return ipip6_tunnel_get(dev, p);
1360        case SIOCADDTUNNEL:
1361                return ipip6_tunnel_add(dev, p);
1362        case SIOCCHGTUNNEL:
1363                return ipip6_tunnel_change(dev, p);
1364        case SIOCDELTUNNEL:
1365                return ipip6_tunnel_del(dev, p);
1366        default:
1367                return -EINVAL;
1368        }
1369}
1370
1371static int
1372ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
1373                            void __user *data, int cmd)
1374{
1375        switch (cmd) {
1376        case SIOCGETTUNNEL:
1377        case SIOCADDTUNNEL:
1378        case SIOCCHGTUNNEL:
1379        case SIOCDELTUNNEL:
1380                return ip_tunnel_siocdevprivate(dev, ifr, data, cmd);
1381        case SIOCGETPRL:
1382                return ipip6_tunnel_get_prl(dev, data);
1383        case SIOCADDPRL:
1384        case SIOCDELPRL:
1385        case SIOCCHGPRL:
1386                return ipip6_tunnel_prl_ctl(dev, data, cmd);
1387#ifdef CONFIG_IPV6_SIT_6RD
1388        case SIOCGET6RD:
1389                return ipip6_tunnel_get6rd(dev, data);
1390        case SIOCADD6RD:
1391        case SIOCCHG6RD:
1392        case SIOCDEL6RD:
1393                return ipip6_tunnel_6rdctl(dev, data, cmd);
1394#endif
1395        default:
1396                return -EINVAL;
1397        }
1398}
1399
1400static const struct net_device_ops ipip6_netdev_ops = {
1401        .ndo_init       = ipip6_tunnel_init,
1402        .ndo_uninit     = ipip6_tunnel_uninit,
1403        .ndo_start_xmit = sit_tunnel_xmit,
1404        .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
1405        .ndo_get_stats64 = dev_get_tstats64,
1406        .ndo_get_iflink = ip_tunnel_get_iflink,
1407        .ndo_tunnel_ctl = ipip6_tunnel_ctl,
1408};
1409
1410static void ipip6_dev_free(struct net_device *dev)
1411{
1412        struct ip_tunnel *tunnel = netdev_priv(dev);
1413
1414        dst_cache_destroy(&tunnel->dst_cache);
1415        free_percpu(dev->tstats);
1416}
1417
1418#define SIT_FEATURES (NETIF_F_SG           | \
1419                      NETIF_F_FRAGLIST     | \
1420                      NETIF_F_HIGHDMA      | \
1421                      NETIF_F_GSO_SOFTWARE | \
1422                      NETIF_F_HW_CSUM)
1423
1424static void ipip6_tunnel_setup(struct net_device *dev)
1425{
1426        struct ip_tunnel *tunnel = netdev_priv(dev);
1427        int t_hlen = tunnel->hlen + sizeof(struct iphdr);
1428
1429        dev->netdev_ops         = &ipip6_netdev_ops;
1430        dev->header_ops         = &ip_tunnel_header_ops;
1431        dev->needs_free_netdev  = true;
1432        dev->priv_destructor    = ipip6_dev_free;
1433
1434        dev->type               = ARPHRD_SIT;
1435        dev->mtu                = ETH_DATA_LEN - t_hlen;
1436        dev->min_mtu            = IPV6_MIN_MTU;
1437        dev->max_mtu            = IP6_MAX_MTU - t_hlen;
1438        dev->flags              = IFF_NOARP;
1439        netif_keep_dst(dev);
1440        dev->addr_len           = 4;
1441        dev->features           |= NETIF_F_LLTX;
1442        dev->features           |= SIT_FEATURES;
1443        dev->hw_features        |= SIT_FEATURES;
1444}
1445
1446static int ipip6_tunnel_init(struct net_device *dev)
1447{
1448        struct ip_tunnel *tunnel = netdev_priv(dev);
1449        int err;
1450
1451        tunnel->dev = dev;
1452        tunnel->net = dev_net(dev);
1453        strcpy(tunnel->parms.name, dev->name);
1454
1455        ipip6_tunnel_bind_dev(dev);
1456        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1457        if (!dev->tstats)
1458                return -ENOMEM;
1459
1460        err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1461        if (err) {
1462                free_percpu(dev->tstats);
1463                dev->tstats = NULL;
1464                return err;
1465        }
1466        dev_hold_track(dev, &tunnel->dev_tracker, GFP_KERNEL);
1467        return 0;
1468}
1469
1470static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1471{
1472        struct ip_tunnel *tunnel = netdev_priv(dev);
1473        struct iphdr *iph = &tunnel->parms.iph;
1474        struct net *net = dev_net(dev);
1475        struct sit_net *sitn = net_generic(net, sit_net_id);
1476
1477        iph->version            = 4;
1478        iph->protocol           = IPPROTO_IPV6;
1479        iph->ihl                = 5;
1480        iph->ttl                = 64;
1481
1482        rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
1483}
1484
1485static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[],
1486                          struct netlink_ext_ack *extack)
1487{
1488        u8 proto;
1489
1490        if (!data || !data[IFLA_IPTUN_PROTO])
1491                return 0;
1492
1493        proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1494        if (!ipip6_valid_ip_proto(proto))
1495                return -EINVAL;
1496
1497        return 0;
1498}
1499
1500static void ipip6_netlink_parms(struct nlattr *data[],
1501                                struct ip_tunnel_parm *parms,
1502                                __u32 *fwmark)
1503{
1504        memset(parms, 0, sizeof(*parms));
1505
1506        parms->iph.version = 4;
1507        parms->iph.protocol = IPPROTO_IPV6;
1508        parms->iph.ihl = 5;
1509        parms->iph.ttl = 64;
1510
1511        if (!data)
1512                return;
1513
1514        if (data[IFLA_IPTUN_LINK])
1515                parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
1516
1517        if (data[IFLA_IPTUN_LOCAL])
1518                parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
1519
1520        if (data[IFLA_IPTUN_REMOTE])
1521                parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
1522
1523        if (data[IFLA_IPTUN_TTL]) {
1524                parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
1525                if (parms->iph.ttl)
1526                        parms->iph.frag_off = htons(IP_DF);
1527        }
1528
1529        if (data[IFLA_IPTUN_TOS])
1530                parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
1531
1532        if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
1533                parms->iph.frag_off = htons(IP_DF);
1534
1535        if (data[IFLA_IPTUN_FLAGS])
1536                parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
1537
1538        if (data[IFLA_IPTUN_PROTO])
1539                parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
1540
1541        if (data[IFLA_IPTUN_FWMARK])
1542                *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
1543}
1544
1545/* This function returns true when ENCAP attributes are present in the nl msg */
1546static bool ipip6_netlink_encap_parms(struct nlattr *data[],
1547                                      struct ip_tunnel_encap *ipencap)
1548{
1549        bool ret = false;
1550
1551        memset(ipencap, 0, sizeof(*ipencap));
1552
1553        if (!data)
1554                return ret;
1555
1556        if (data[IFLA_IPTUN_ENCAP_TYPE]) {
1557                ret = true;
1558                ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
1559        }
1560
1561        if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
1562                ret = true;
1563                ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
1564        }
1565
1566        if (data[IFLA_IPTUN_ENCAP_SPORT]) {
1567                ret = true;
1568                ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
1569        }
1570
1571        if (data[IFLA_IPTUN_ENCAP_DPORT]) {
1572                ret = true;
1573                ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
1574        }
1575
1576        return ret;
1577}
1578
1579#ifdef CONFIG_IPV6_SIT_6RD
1580/* This function returns true when 6RD attributes are present in the nl msg */
1581static bool ipip6_netlink_6rd_parms(struct nlattr *data[],
1582                                    struct ip_tunnel_6rd *ip6rd)
1583{
1584        bool ret = false;
1585        memset(ip6rd, 0, sizeof(*ip6rd));
1586
1587        if (!data)
1588                return ret;
1589
1590        if (data[IFLA_IPTUN_6RD_PREFIX]) {
1591                ret = true;
1592                ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]);
1593        }
1594
1595        if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) {
1596                ret = true;
1597                ip6rd->relay_prefix =
1598                        nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]);
1599        }
1600
1601        if (data[IFLA_IPTUN_6RD_PREFIXLEN]) {
1602                ret = true;
1603                ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]);
1604        }
1605
1606        if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) {
1607                ret = true;
1608                ip6rd->relay_prefixlen =
1609                        nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]);
1610        }
1611
1612        return ret;
1613}
1614#endif
1615
1616static int ipip6_newlink(struct net *src_net, struct net_device *dev,
1617                         struct nlattr *tb[], struct nlattr *data[],
1618                         struct netlink_ext_ack *extack)
1619{
1620        struct net *net = dev_net(dev);
1621        struct ip_tunnel *nt;
1622        struct ip_tunnel_encap ipencap;
1623#ifdef CONFIG_IPV6_SIT_6RD
1624        struct ip_tunnel_6rd ip6rd;
1625#endif
1626        int err;
1627
1628        nt = netdev_priv(dev);
1629
1630        if (ipip6_netlink_encap_parms(data, &ipencap)) {
1631                err = ip_tunnel_encap_setup(nt, &ipencap);
1632                if (err < 0)
1633                        return err;
1634        }
1635
1636        ipip6_netlink_parms(data, &nt->parms, &nt->fwmark);
1637
1638        if (ipip6_tunnel_locate(net, &nt->parms, 0))
1639                return -EEXIST;
1640
1641        err = ipip6_tunnel_create(dev);
1642        if (err < 0)
1643                return err;
1644
1645        if (tb[IFLA_MTU]) {
1646                u32 mtu = nla_get_u32(tb[IFLA_MTU]);
1647
1648                if (mtu >= IPV6_MIN_MTU &&
1649                    mtu <= IP6_MAX_MTU - dev->hard_header_len)
1650                        dev->mtu = mtu;
1651        }
1652
1653#ifdef CONFIG_IPV6_SIT_6RD
1654        if (ipip6_netlink_6rd_parms(data, &ip6rd)) {
1655                err = ipip6_tunnel_update_6rd(nt, &ip6rd);
1656                if (err < 0)
1657                        unregister_netdevice_queue(dev, NULL);
1658        }
1659#endif
1660
1661        return err;
1662}
1663
1664static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
1665                            struct nlattr *data[],
1666                            struct netlink_ext_ack *extack)
1667{
1668        struct ip_tunnel *t = netdev_priv(dev);
1669        struct ip_tunnel_parm p;
1670        struct ip_tunnel_encap ipencap;
1671        struct net *net = t->net;
1672        struct sit_net *sitn = net_generic(net, sit_net_id);
1673#ifdef CONFIG_IPV6_SIT_6RD
1674        struct ip_tunnel_6rd ip6rd;
1675#endif
1676        __u32 fwmark = t->fwmark;
1677        int err;
1678
1679        if (dev == sitn->fb_tunnel_dev)
1680                return -EINVAL;
1681
1682        if (ipip6_netlink_encap_parms(data, &ipencap)) {
1683                err = ip_tunnel_encap_setup(t, &ipencap);
1684                if (err < 0)
1685                        return err;
1686        }
1687
1688        ipip6_netlink_parms(data, &p, &fwmark);
1689
1690        if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
1691            (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
1692                return -EINVAL;
1693
1694        t = ipip6_tunnel_locate(net, &p, 0);
1695
1696        if (t) {
1697                if (t->dev != dev)
1698                        return -EEXIST;
1699        } else
1700                t = netdev_priv(dev);
1701
1702        ipip6_tunnel_update(t, &p, fwmark);
1703
1704#ifdef CONFIG_IPV6_SIT_6RD
1705        if (ipip6_netlink_6rd_parms(data, &ip6rd))
1706                return ipip6_tunnel_update_6rd(t, &ip6rd);
1707#endif
1708
1709        return 0;
1710}
1711
1712static size_t ipip6_get_size(const struct net_device *dev)
1713{
1714        return
1715                /* IFLA_IPTUN_LINK */
1716                nla_total_size(4) +
1717                /* IFLA_IPTUN_LOCAL */
1718                nla_total_size(4) +
1719                /* IFLA_IPTUN_REMOTE */
1720                nla_total_size(4) +
1721                /* IFLA_IPTUN_TTL */
1722                nla_total_size(1) +
1723                /* IFLA_IPTUN_TOS */
1724                nla_total_size(1) +
1725                /* IFLA_IPTUN_PMTUDISC */
1726                nla_total_size(1) +
1727                /* IFLA_IPTUN_FLAGS */
1728                nla_total_size(2) +
1729                /* IFLA_IPTUN_PROTO */
1730                nla_total_size(1) +
1731#ifdef CONFIG_IPV6_SIT_6RD
1732                /* IFLA_IPTUN_6RD_PREFIX */
1733                nla_total_size(sizeof(struct in6_addr)) +
1734                /* IFLA_IPTUN_6RD_RELAY_PREFIX */
1735                nla_total_size(4) +
1736                /* IFLA_IPTUN_6RD_PREFIXLEN */
1737                nla_total_size(2) +
1738                /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */
1739                nla_total_size(2) +
1740#endif
1741                /* IFLA_IPTUN_ENCAP_TYPE */
1742                nla_total_size(2) +
1743                /* IFLA_IPTUN_ENCAP_FLAGS */
1744                nla_total_size(2) +
1745                /* IFLA_IPTUN_ENCAP_SPORT */
1746                nla_total_size(2) +
1747                /* IFLA_IPTUN_ENCAP_DPORT */
1748                nla_total_size(2) +
1749                /* IFLA_IPTUN_FWMARK */
1750                nla_total_size(4) +
1751                0;
1752}
1753
1754static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
1755{
1756        struct ip_tunnel *tunnel = netdev_priv(dev);
1757        struct ip_tunnel_parm *parm = &tunnel->parms;
1758
1759        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
1760            nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
1761            nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
1762            nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
1763            nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1764            nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
1765                       !!(parm->iph.frag_off & htons(IP_DF))) ||
1766            nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
1767            nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
1768            nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
1769                goto nla_put_failure;
1770
1771#ifdef CONFIG_IPV6_SIT_6RD
1772        if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX,
1773                             &tunnel->ip6rd.prefix) ||
1774            nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX,
1775                            tunnel->ip6rd.relay_prefix) ||
1776            nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN,
1777                        tunnel->ip6rd.prefixlen) ||
1778            nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN,
1779                        tunnel->ip6rd.relay_prefixlen))
1780                goto nla_put_failure;
1781#endif
1782
1783        if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
1784                        tunnel->encap.type) ||
1785            nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
1786                        tunnel->encap.sport) ||
1787            nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
1788                        tunnel->encap.dport) ||
1789            nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
1790                        tunnel->encap.flags))
1791                goto nla_put_failure;
1792
1793        return 0;
1794
1795nla_put_failure:
1796        return -EMSGSIZE;
1797}
1798
1799static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
1800        [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
1801        [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
1802        [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
1803        [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
1804        [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
1805        [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
1806        [IFLA_IPTUN_FLAGS]              = { .type = NLA_U16 },
1807        [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
1808#ifdef CONFIG_IPV6_SIT_6RD
1809        [IFLA_IPTUN_6RD_PREFIX]         = { .len = sizeof(struct in6_addr) },
1810        [IFLA_IPTUN_6RD_RELAY_PREFIX]   = { .type = NLA_U32 },
1811        [IFLA_IPTUN_6RD_PREFIXLEN]      = { .type = NLA_U16 },
1812        [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 },
1813#endif
1814        [IFLA_IPTUN_ENCAP_TYPE]         = { .type = NLA_U16 },
1815        [IFLA_IPTUN_ENCAP_FLAGS]        = { .type = NLA_U16 },
1816        [IFLA_IPTUN_ENCAP_SPORT]        = { .type = NLA_U16 },
1817        [IFLA_IPTUN_ENCAP_DPORT]        = { .type = NLA_U16 },
1818        [IFLA_IPTUN_FWMARK]             = { .type = NLA_U32 },
1819};
1820
1821static void ipip6_dellink(struct net_device *dev, struct list_head *head)
1822{
1823        struct net *net = dev_net(dev);
1824        struct sit_net *sitn = net_generic(net, sit_net_id);
1825
1826        if (dev != sitn->fb_tunnel_dev)
1827                unregister_netdevice_queue(dev, head);
1828}
1829
1830static struct rtnl_link_ops sit_link_ops __read_mostly = {
1831        .kind           = "sit",
1832        .maxtype        = IFLA_IPTUN_MAX,
1833        .policy         = ipip6_policy,
1834        .priv_size      = sizeof(struct ip_tunnel),
1835        .setup          = ipip6_tunnel_setup,
1836        .validate       = ipip6_validate,
1837        .newlink        = ipip6_newlink,
1838        .changelink     = ipip6_changelink,
1839        .get_size       = ipip6_get_size,
1840        .fill_info      = ipip6_fill_info,
1841        .dellink        = ipip6_dellink,
1842        .get_link_net   = ip_tunnel_get_link_net,
1843};
1844
1845static struct xfrm_tunnel sit_handler __read_mostly = {
1846        .handler        =       ipip6_rcv,
1847        .err_handler    =       ipip6_err,
1848        .priority       =       1,
1849};
1850
1851static struct xfrm_tunnel ipip_handler __read_mostly = {
1852        .handler        =       ipip_rcv,
1853        .err_handler    =       ipip6_err,
1854        .priority       =       2,
1855};
1856
1857#if IS_ENABLED(CONFIG_MPLS)
1858static struct xfrm_tunnel mplsip_handler __read_mostly = {
1859        .handler        =       mplsip_rcv,
1860        .err_handler    =       ipip6_err,
1861        .priority       =       2,
1862};
1863#endif
1864
1865static void __net_exit sit_destroy_tunnels(struct net *net,
1866                                           struct list_head *head)
1867{
1868        struct sit_net *sitn = net_generic(net, sit_net_id);
1869        struct net_device *dev, *aux;
1870        int prio;
1871
1872        for_each_netdev_safe(net, dev, aux)
1873                if (dev->rtnl_link_ops == &sit_link_ops)
1874                        unregister_netdevice_queue(dev, head);
1875
1876        for (prio = 0; prio < 4; prio++) {
1877                int h;
1878                for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) {
1879                        struct ip_tunnel *t;
1880
1881                        t = rtnl_dereference(sitn->tunnels[prio][h]);
1882                        while (t) {
1883                                /* If dev is in the same netns, it has already
1884                                 * been added to the list by the previous loop.
1885                                 */
1886                                if (!net_eq(dev_net(t->dev), net))
1887                                        unregister_netdevice_queue(t->dev,
1888                                                                   head);
1889                                t = rtnl_dereference(t->next);
1890                        }
1891                }
1892        }
1893}
1894
1895static int __net_init sit_init_net(struct net *net)
1896{
1897        struct sit_net *sitn = net_generic(net, sit_net_id);
1898        struct ip_tunnel *t;
1899        int err;
1900
1901        sitn->tunnels[0] = sitn->tunnels_wc;
1902        sitn->tunnels[1] = sitn->tunnels_l;
1903        sitn->tunnels[2] = sitn->tunnels_r;
1904        sitn->tunnels[3] = sitn->tunnels_r_l;
1905
1906        if (!net_has_fallback_tunnels(net))
1907                return 0;
1908
1909        sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
1910                                           NET_NAME_UNKNOWN,
1911                                           ipip6_tunnel_setup);
1912        if (!sitn->fb_tunnel_dev) {
1913                err = -ENOMEM;
1914                goto err_alloc_dev;
1915        }
1916        dev_net_set(sitn->fb_tunnel_dev, net);
1917        sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
1918        /* FB netdevice is special: we have one, and only one per netns.
1919         * Allowing to move it to another netns is clearly unsafe.
1920         */
1921        sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1922
1923        err = register_netdev(sitn->fb_tunnel_dev);
1924        if (err)
1925                goto err_reg_dev;
1926
1927        ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1928        ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1929
1930        t = netdev_priv(sitn->fb_tunnel_dev);
1931
1932        strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
1933        return 0;
1934
1935err_reg_dev:
1936        free_netdev(sitn->fb_tunnel_dev);
1937err_alloc_dev:
1938        return err;
1939}
1940
1941static void __net_exit sit_exit_batch_net(struct list_head *net_list)
1942{
1943        LIST_HEAD(list);
1944        struct net *net;
1945
1946        rtnl_lock();
1947        list_for_each_entry(net, net_list, exit_list)
1948                sit_destroy_tunnels(net, &list);
1949
1950        unregister_netdevice_many(&list);
1951        rtnl_unlock();
1952}
1953
1954static struct pernet_operations sit_net_ops = {
1955        .init = sit_init_net,
1956        .exit_batch = sit_exit_batch_net,
1957        .id   = &sit_net_id,
1958        .size = sizeof(struct sit_net),
1959};
1960
1961static void __exit sit_cleanup(void)
1962{
1963        rtnl_link_unregister(&sit_link_ops);
1964        xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
1965        xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1966#if IS_ENABLED(CONFIG_MPLS)
1967        xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
1968#endif
1969
1970        unregister_pernet_device(&sit_net_ops);
1971        rcu_barrier(); /* Wait for completion of call_rcu()'s */
1972}
1973
1974static int __init sit_init(void)
1975{
1976        int err;
1977
1978        pr_info("IPv6, IPv4 and MPLS over IPv4 tunneling driver\n");
1979
1980        err = register_pernet_device(&sit_net_ops);
1981        if (err < 0)
1982                return err;
1983        err = xfrm4_tunnel_register(&sit_handler, AF_INET6);
1984        if (err < 0) {
1985                pr_info("%s: can't register ip6ip4\n", __func__);
1986                goto xfrm_tunnel_failed;
1987        }
1988        err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1989        if (err < 0) {
1990                pr_info("%s: can't register ip4ip4\n", __func__);
1991                goto xfrm_tunnel4_failed;
1992        }
1993#if IS_ENABLED(CONFIG_MPLS)
1994        err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
1995        if (err < 0) {
1996                pr_info("%s: can't register mplsip\n", __func__);
1997                goto xfrm_tunnel_mpls_failed;
1998        }
1999#endif
2000        err = rtnl_link_register(&sit_link_ops);
2001        if (err < 0)
2002                goto rtnl_link_failed;
2003
2004out:
2005        return err;
2006
2007rtnl_link_failed:
2008#if IS_ENABLED(CONFIG_MPLS)
2009        xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS);
2010xfrm_tunnel_mpls_failed:
2011#endif
2012        xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
2013xfrm_tunnel4_failed:
2014        xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
2015xfrm_tunnel_failed:
2016        unregister_pernet_device(&sit_net_ops);
2017        goto out;
2018}
2019
2020module_init(sit_init);
2021module_exit(sit_cleanup);
2022MODULE_LICENSE("GPL");
2023MODULE_ALIAS_RTNL_LINK("sit");
2024MODULE_ALIAS_NETDEV("sit0");
2025