linux/net/ipv6/ip6_vti.c
<<
>>
Prefs
   1/*
   2 *      IPv6 virtual tunneling interface
   3 *
   4 *      Copyright (C) 2013 secunet Security Networks AG
   5 *
   6 *      Author:
   7 *      Steffen Klassert <steffen.klassert@secunet.com>
   8 *
   9 *      Based on:
  10 *      net/ipv6/ip6_tunnel.c
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 */
  17
  18#include <linux/module.h>
  19#include <linux/capability.h>
  20#include <linux/errno.h>
  21#include <linux/types.h>
  22#include <linux/sockios.h>
  23#include <linux/icmp.h>
  24#include <linux/if.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/net.h>
  28#include <linux/in6.h>
  29#include <linux/netdevice.h>
  30#include <linux/if_arp.h>
  31#include <linux/icmpv6.h>
  32#include <linux/init.h>
  33#include <linux/route.h>
  34#include <linux/rtnetlink.h>
  35#include <linux/netfilter_ipv6.h>
  36#include <linux/slab.h>
  37#include <linux/hash.h>
  38
  39#include <linux/uaccess.h>
  40#include <linux/atomic.h>
  41
  42#include <net/icmp.h>
  43#include <net/ip.h>
  44#include <net/ip_tunnels.h>
  45#include <net/ipv6.h>
  46#include <net/ip6_route.h>
  47#include <net/addrconf.h>
  48#include <net/ip6_tunnel.h>
  49#include <net/xfrm.h>
  50#include <net/net_namespace.h>
  51#include <net/netns/generic.h>
  52
  53#define HASH_SIZE_SHIFT  5
  54#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
  55
  56static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
  57{
  58        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
  59
  60        return hash_32(hash, HASH_SIZE_SHIFT);
  61}
  62
  63static int vti6_dev_init(struct net_device *dev);
  64static void vti6_dev_setup(struct net_device *dev);
  65static struct rtnl_link_ops vti6_link_ops __read_mostly;
  66
  67static int vti6_net_id __read_mostly;
  68struct vti6_net {
  69        /* the vti6 tunnel fallback device */
  70        struct net_device *fb_tnl_dev;
  71        /* lists for storing tunnels in use */
  72        struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
  73        struct ip6_tnl __rcu *tnls_wc[1];
  74        struct ip6_tnl __rcu **tnls[2];
  75};
  76
  77#define for_each_vti6_tunnel_rcu(start) \
  78        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
  79
  80/**
  81 * vti6_tnl_lookup - fetch tunnel matching the end-point addresses
  82 *   @net: network namespace
  83 *   @remote: the address of the tunnel exit-point
  84 *   @local: the address of the tunnel entry-point
  85 *
  86 * Return:
  87 *   tunnel matching given end-points if found,
  88 *   else fallback tunnel if its device is up,
  89 *   else %NULL
  90 **/
  91static struct ip6_tnl *
  92vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
  93                const struct in6_addr *local)
  94{
  95        unsigned int hash = HASH(remote, local);
  96        struct ip6_tnl *t;
  97        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
  98        struct in6_addr any;
  99
 100        for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 101                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 102                    ipv6_addr_equal(remote, &t->parms.raddr) &&
 103                    (t->dev->flags & IFF_UP))
 104                        return t;
 105        }
 106
 107        memset(&any, 0, sizeof(any));
 108        hash = HASH(&any, local);
 109        for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 110                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 111                    (t->dev->flags & IFF_UP))
 112                        return t;
 113        }
 114
 115        hash = HASH(remote, &any);
 116        for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 117                if (ipv6_addr_equal(remote, &t->parms.raddr) &&
 118                    (t->dev->flags & IFF_UP))
 119                        return t;
 120        }
 121
 122        t = rcu_dereference(ip6n->tnls_wc[0]);
 123        if (t && (t->dev->flags & IFF_UP))
 124                return t;
 125
 126        return NULL;
 127}
 128
 129/**
 130 * vti6_tnl_bucket - get head of list matching given tunnel parameters
 131 *   @p: parameters containing tunnel end-points
 132 *
 133 * Description:
 134 *   vti6_tnl_bucket() returns the head of the list matching the
 135 *   &struct in6_addr entries laddr and raddr in @p.
 136 *
 137 * Return: head of IPv6 tunnel list
 138 **/
 139static struct ip6_tnl __rcu **
 140vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p)
 141{
 142        const struct in6_addr *remote = &p->raddr;
 143        const struct in6_addr *local = &p->laddr;
 144        unsigned int h = 0;
 145        int prio = 0;
 146
 147        if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 148                prio = 1;
 149                h = HASH(remote, local);
 150        }
 151        return &ip6n->tnls[prio][h];
 152}
 153
 154static void
 155vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t)
 156{
 157        struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms);
 158
 159        rcu_assign_pointer(t->next , rtnl_dereference(*tp));
 160        rcu_assign_pointer(*tp, t);
 161}
 162
 163static void
 164vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
 165{
 166        struct ip6_tnl __rcu **tp;
 167        struct ip6_tnl *iter;
 168
 169        for (tp = vti6_tnl_bucket(ip6n, &t->parms);
 170             (iter = rtnl_dereference(*tp)) != NULL;
 171             tp = &iter->next) {
 172                if (t == iter) {
 173                        rcu_assign_pointer(*tp, t->next);
 174                        break;
 175                }
 176        }
 177}
 178
 179static void vti6_dev_free(struct net_device *dev)
 180{
 181        free_percpu(dev->tstats);
 182        free_netdev(dev);
 183}
 184
 185static int vti6_tnl_create2(struct net_device *dev)
 186{
 187        struct ip6_tnl *t = netdev_priv(dev);
 188        struct net *net = dev_net(dev);
 189        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 190        int err;
 191
 192        err = register_netdevice(dev);
 193        if (err < 0)
 194                goto out;
 195
 196        strcpy(t->parms.name, dev->name);
 197        dev->rtnl_link_ops = &vti6_link_ops;
 198
 199        dev_hold(dev);
 200        vti6_tnl_link(ip6n, t);
 201
 202        return 0;
 203
 204out:
 205        return err;
 206}
 207
 208static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 209{
 210        struct net_device *dev;
 211        struct ip6_tnl *t;
 212        char name[IFNAMSIZ];
 213        int err;
 214
 215        if (p->name[0])
 216                strlcpy(name, p->name, IFNAMSIZ);
 217        else
 218                sprintf(name, "ip6_vti%%d");
 219
 220        dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
 221        if (dev == NULL)
 222                goto failed;
 223
 224        dev_net_set(dev, net);
 225
 226        t = netdev_priv(dev);
 227        t->parms = *p;
 228        t->net = dev_net(dev);
 229
 230        err = vti6_tnl_create2(dev);
 231        if (err < 0)
 232                goto failed_free;
 233
 234        return t;
 235
 236failed_free:
 237        vti6_dev_free(dev);
 238failed:
 239        return NULL;
 240}
 241
 242/**
 243 * vti6_locate - find or create tunnel matching given parameters
 244 *   @net: network namespace
 245 *   @p: tunnel parameters
 246 *   @create: != 0 if allowed to create new tunnel if no match found
 247 *
 248 * Description:
 249 *   vti6_locate() first tries to locate an existing tunnel
 250 *   based on @parms. If this is unsuccessful, but @create is set a new
 251 *   tunnel device is created and registered for use.
 252 *
 253 * Return:
 254 *   matching tunnel or NULL
 255 **/
 256static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
 257                                   int create)
 258{
 259        const struct in6_addr *remote = &p->raddr;
 260        const struct in6_addr *local = &p->laddr;
 261        struct ip6_tnl __rcu **tp;
 262        struct ip6_tnl *t;
 263        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 264
 265        for (tp = vti6_tnl_bucket(ip6n, p);
 266             (t = rtnl_dereference(*tp)) != NULL;
 267             tp = &t->next) {
 268                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 269                    ipv6_addr_equal(remote, &t->parms.raddr)) {
 270                        if (create)
 271                                return NULL;
 272
 273                        return t;
 274                }
 275        }
 276        if (!create)
 277                return NULL;
 278        return vti6_tnl_create(net, p);
 279}
 280
 281/**
 282 * vti6_dev_uninit - tunnel device uninitializer
 283 *   @dev: the device to be destroyed
 284 *
 285 * Description:
 286 *   vti6_dev_uninit() removes tunnel from its list
 287 **/
 288static void vti6_dev_uninit(struct net_device *dev)
 289{
 290        struct ip6_tnl *t = netdev_priv(dev);
 291        struct net *net = dev_net(dev);
 292        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 293
 294        if (dev == ip6n->fb_tnl_dev)
 295                RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
 296        else
 297                vti6_tnl_unlink(ip6n, t);
 298        dev_put(dev);
 299}
 300
 301static int vti6_rcv(struct sk_buff *skb)
 302{
 303        struct ip6_tnl *t;
 304        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 305
 306        rcu_read_lock();
 307        t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
 308        if (t != NULL) {
 309                if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
 310                        rcu_read_unlock();
 311                        goto discard;
 312                }
 313
 314                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 315                        rcu_read_unlock();
 316                        return 0;
 317                }
 318
 319                if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
 320                        t->dev->stats.rx_dropped++;
 321                        rcu_read_unlock();
 322                        goto discard;
 323                }
 324
 325                XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
 326                skb->mark = be32_to_cpu(t->parms.i_key);
 327
 328                rcu_read_unlock();
 329
 330                return xfrm6_rcv(skb);
 331        }
 332        rcu_read_unlock();
 333        return -EINVAL;
 334discard:
 335        kfree_skb(skb);
 336        return 0;
 337}
 338
 339static int vti6_rcv_cb(struct sk_buff *skb, int err)
 340{
 341        unsigned short family;
 342        struct net_device *dev;
 343        struct pcpu_sw_netstats *tstats;
 344        struct xfrm_state *x;
 345        struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
 346
 347        if (!t)
 348                return 1;
 349
 350        dev = t->dev;
 351
 352        if (err) {
 353                dev->stats.rx_errors++;
 354                dev->stats.rx_dropped++;
 355
 356                return 0;
 357        }
 358
 359        x = xfrm_input_state(skb);
 360        family = x->inner_mode->afinfo->family;
 361
 362        if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
 363                return -EPERM;
 364
 365        skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
 366        skb->dev = dev;
 367
 368        tstats = this_cpu_ptr(dev->tstats);
 369        u64_stats_update_begin(&tstats->syncp);
 370        tstats->rx_packets++;
 371        tstats->rx_bytes += skb->len;
 372        u64_stats_update_end(&tstats->syncp);
 373
 374        return 0;
 375}
 376
 377/**
 378 * vti6_addr_conflict - compare packet addresses to tunnel's own
 379 *   @t: the outgoing tunnel device
 380 *   @hdr: IPv6 header from the incoming packet
 381 *
 382 * Description:
 383 *   Avoid trivial tunneling loop by checking that tunnel exit-point
 384 *   doesn't match source of incoming packet.
 385 *
 386 * Return:
 387 *   1 if conflict,
 388 *   0 else
 389 **/
 390static inline bool
 391vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
 392{
 393        return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 394}
 395
 396static bool vti6_state_check(const struct xfrm_state *x,
 397                             const struct in6_addr *dst,
 398                             const struct in6_addr *src)
 399{
 400        xfrm_address_t *daddr = (xfrm_address_t *)dst;
 401        xfrm_address_t *saddr = (xfrm_address_t *)src;
 402
 403        /* if there is no transform then this tunnel is not functional.
 404         * Or if the xfrm is not mode tunnel.
 405         */
 406        if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
 407            x->props.family != AF_INET6)
 408                return false;
 409
 410        if (ipv6_addr_any(dst))
 411                return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6);
 412
 413        if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6))
 414                return false;
 415
 416        return true;
 417}
 418
 419/**
 420 * vti6_xmit - send a packet
 421 *   @skb: the outgoing socket buffer
 422 *   @dev: the outgoing tunnel device
 423 *   @fl: the flow informations for the xfrm_lookup
 424 **/
 425static int
 426vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 427{
 428        struct ip6_tnl *t = netdev_priv(dev);
 429        struct net_device_stats *stats = &t->dev->stats;
 430        struct dst_entry *dst = skb_dst(skb);
 431        struct net_device *tdev;
 432        struct xfrm_state *x;
 433        int err = -1;
 434
 435        if (!dst)
 436                goto tx_err_link_failure;
 437
 438        dst_hold(dst);
 439        dst = xfrm_lookup(t->net, dst, fl, NULL, 0);
 440        if (IS_ERR(dst)) {
 441                err = PTR_ERR(dst);
 442                dst = NULL;
 443                goto tx_err_link_failure;
 444        }
 445
 446        x = dst->xfrm;
 447        if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr))
 448                goto tx_err_link_failure;
 449
 450        if (!ip6_tnl_xmit_ctl(t, (const struct in6_addr *)&x->props.saddr,
 451                              (const struct in6_addr *)&x->id.daddr))
 452                goto tx_err_link_failure;
 453
 454        tdev = dst->dev;
 455
 456        if (tdev == dev) {
 457                stats->collisions++;
 458                net_warn_ratelimited("%s: Local routing loop detected!\n",
 459                                     t->parms.name);
 460                goto tx_err_dst_release;
 461        }
 462
 463        skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
 464        skb_dst_set(skb, dst);
 465        skb->dev = skb_dst(skb)->dev;
 466
 467        err = dst_output(skb);
 468        if (net_xmit_eval(err) == 0) {
 469                struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
 470
 471                u64_stats_update_begin(&tstats->syncp);
 472                tstats->tx_bytes += skb->len;
 473                tstats->tx_packets++;
 474                u64_stats_update_end(&tstats->syncp);
 475        } else {
 476                stats->tx_errors++;
 477                stats->tx_aborted_errors++;
 478        }
 479
 480        return 0;
 481tx_err_link_failure:
 482        stats->tx_carrier_errors++;
 483        dst_link_failure(skb);
 484tx_err_dst_release:
 485        dst_release(dst);
 486        return err;
 487}
 488
 489static netdev_tx_t
 490vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 491{
 492        struct ip6_tnl *t = netdev_priv(dev);
 493        struct net_device_stats *stats = &t->dev->stats;
 494        struct ipv6hdr *ipv6h;
 495        struct flowi fl;
 496        int ret;
 497
 498        memset(&fl, 0, sizeof(fl));
 499        skb->mark = be32_to_cpu(t->parms.o_key);
 500
 501        switch (skb->protocol) {
 502        case htons(ETH_P_IPV6):
 503                ipv6h = ipv6_hdr(skb);
 504
 505                if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
 506                    vti6_addr_conflict(t, ipv6h))
 507                        goto tx_err;
 508
 509                xfrm_decode_session(skb, &fl, AF_INET6);
 510                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 511                break;
 512        case htons(ETH_P_IP):
 513                xfrm_decode_session(skb, &fl, AF_INET);
 514                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 515                break;
 516        default:
 517                goto tx_err;
 518        }
 519
 520        ret = vti6_xmit(skb, dev, &fl);
 521        if (ret < 0)
 522                goto tx_err;
 523
 524        return NETDEV_TX_OK;
 525
 526tx_err:
 527        stats->tx_errors++;
 528        stats->tx_dropped++;
 529        kfree_skb(skb);
 530        return NETDEV_TX_OK;
 531}
 532
 533static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 534                    u8 type, u8 code, int offset, __be32 info)
 535{
 536        __be32 spi;
 537        __u32 mark;
 538        struct xfrm_state *x;
 539        struct ip6_tnl *t;
 540        struct ip_esp_hdr *esph;
 541        struct ip_auth_hdr *ah;
 542        struct ip_comp_hdr *ipch;
 543        struct net *net = dev_net(skb->dev);
 544        const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
 545        int protocol = iph->nexthdr;
 546
 547        t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr);
 548        if (!t)
 549                return -1;
 550
 551        mark = be32_to_cpu(t->parms.o_key);
 552
 553        switch (protocol) {
 554        case IPPROTO_ESP:
 555                esph = (struct ip_esp_hdr *)(skb->data + offset);
 556                spi = esph->spi;
 557                break;
 558        case IPPROTO_AH:
 559                ah = (struct ip_auth_hdr *)(skb->data + offset);
 560                spi = ah->spi;
 561                break;
 562        case IPPROTO_COMP:
 563                ipch = (struct ip_comp_hdr *)(skb->data + offset);
 564                spi = htonl(ntohs(ipch->cpi));
 565                break;
 566        default:
 567                return 0;
 568        }
 569
 570        if (type != ICMPV6_PKT_TOOBIG &&
 571            type != NDISC_REDIRECT)
 572                return 0;
 573
 574        x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
 575                              spi, protocol, AF_INET6);
 576        if (!x)
 577                return 0;
 578
 579        if (type == NDISC_REDIRECT)
 580                ip6_redirect(skb, net, skb->dev->ifindex, 0);
 581        else
 582                ip6_update_pmtu(skb, net, info, 0, 0);
 583        xfrm_state_put(x);
 584
 585        return 0;
 586}
 587
 588static void vti6_link_config(struct ip6_tnl *t)
 589{
 590        struct net_device *dev = t->dev;
 591        struct __ip6_tnl_parm *p = &t->parms;
 592
 593        memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
 594        memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
 595
 596        p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV |
 597                      IP6_TNL_F_CAP_PER_PACKET);
 598        p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
 599
 600        if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV)
 601                dev->flags |= IFF_POINTOPOINT;
 602        else
 603                dev->flags &= ~IFF_POINTOPOINT;
 604
 605        dev->iflink = p->link;
 606}
 607
 608/**
 609 * vti6_tnl_change - update the tunnel parameters
 610 *   @t: tunnel to be changed
 611 *   @p: tunnel configuration parameters
 612 *
 613 * Description:
 614 *   vti6_tnl_change() updates the tunnel parameters
 615 **/
 616static int
 617vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
 618{
 619        t->parms.laddr = p->laddr;
 620        t->parms.raddr = p->raddr;
 621        t->parms.link = p->link;
 622        t->parms.i_key = p->i_key;
 623        t->parms.o_key = p->o_key;
 624        t->parms.proto = p->proto;
 625        ip6_tnl_dst_reset(t);
 626        vti6_link_config(t);
 627        return 0;
 628}
 629
 630static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
 631{
 632        struct net *net = dev_net(t->dev);
 633        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 634        int err;
 635
 636        vti6_tnl_unlink(ip6n, t);
 637        synchronize_net();
 638        err = vti6_tnl_change(t, p);
 639        vti6_tnl_link(ip6n, t);
 640        netdev_state_change(t->dev);
 641        return err;
 642}
 643
 644static void
 645vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u)
 646{
 647        p->laddr = u->laddr;
 648        p->raddr = u->raddr;
 649        p->link = u->link;
 650        p->i_key = u->i_key;
 651        p->o_key = u->o_key;
 652        p->proto = u->proto;
 653
 654        memcpy(p->name, u->name, sizeof(u->name));
 655}
 656
 657static void
 658vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
 659{
 660        u->laddr = p->laddr;
 661        u->raddr = p->raddr;
 662        u->link = p->link;
 663        u->i_key = p->i_key;
 664        u->o_key = p->o_key;
 665        u->proto = p->proto;
 666
 667        memcpy(u->name, p->name, sizeof(u->name));
 668}
 669
 670/**
 671 * vti6_tnl_ioctl - configure vti6 tunnels from userspace
 672 *   @dev: virtual device associated with tunnel
 673 *   @ifr: parameters passed from userspace
 674 *   @cmd: command to be performed
 675 *
 676 * Description:
 677 *   vti6_ioctl() is used for managing vti6 tunnels
 678 *   from userspace.
 679 *
 680 *   The possible commands are the following:
 681 *     %SIOCGETTUNNEL: get tunnel parameters for device
 682 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
 683 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
 684 *     %SIOCDELTUNNEL: delete tunnel
 685 *
 686 *   The fallback device "ip6_vti0", created during module
 687 *   initialization, can be used for creating other tunnel devices.
 688 *
 689 * Return:
 690 *   0 on success,
 691 *   %-EFAULT if unable to copy data to or from userspace,
 692 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
 693 *   %-EINVAL if passed tunnel parameters are invalid,
 694 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
 695 *   %-ENODEV if attempting to change or delete a nonexisting device
 696 **/
 697static int
 698vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 699{
 700        int err = 0;
 701        struct ip6_tnl_parm2 p;
 702        struct __ip6_tnl_parm p1;
 703        struct ip6_tnl *t = NULL;
 704        struct net *net = dev_net(dev);
 705        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 706
 707        switch (cmd) {
 708        case SIOCGETTUNNEL:
 709                if (dev == ip6n->fb_tnl_dev) {
 710                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 711                                err = -EFAULT;
 712                                break;
 713                        }
 714                        vti6_parm_from_user(&p1, &p);
 715                        t = vti6_locate(net, &p1, 0);
 716                } else {
 717                        memset(&p, 0, sizeof(p));
 718                }
 719                if (t == NULL)
 720                        t = netdev_priv(dev);
 721                vti6_parm_to_user(&p, &t->parms);
 722                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 723                        err = -EFAULT;
 724                break;
 725        case SIOCADDTUNNEL:
 726        case SIOCCHGTUNNEL:
 727                err = -EPERM;
 728                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 729                        break;
 730                err = -EFAULT;
 731                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 732                        break;
 733                err = -EINVAL;
 734                if (p.proto != IPPROTO_IPV6  && p.proto != 0)
 735                        break;
 736                vti6_parm_from_user(&p1, &p);
 737                t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL);
 738                if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
 739                        if (t != NULL) {
 740                                if (t->dev != dev) {
 741                                        err = -EEXIST;
 742                                        break;
 743                                }
 744                        } else
 745                                t = netdev_priv(dev);
 746
 747                        err = vti6_update(t, &p1);
 748                }
 749                if (t) {
 750                        err = 0;
 751                        vti6_parm_to_user(&p, &t->parms);
 752                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 753                                err = -EFAULT;
 754
 755                } else
 756                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 757                break;
 758        case SIOCDELTUNNEL:
 759                err = -EPERM;
 760                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 761                        break;
 762
 763                if (dev == ip6n->fb_tnl_dev) {
 764                        err = -EFAULT;
 765                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 766                                break;
 767                        err = -ENOENT;
 768                        vti6_parm_from_user(&p1, &p);
 769                        t = vti6_locate(net, &p1, 0);
 770                        if (t == NULL)
 771                                break;
 772                        err = -EPERM;
 773                        if (t->dev == ip6n->fb_tnl_dev)
 774                                break;
 775                        dev = t->dev;
 776                }
 777                err = 0;
 778                unregister_netdevice(dev);
 779                break;
 780        default:
 781                err = -EINVAL;
 782        }
 783        return err;
 784}
 785
 786/**
 787 * vti6_tnl_change_mtu - change mtu manually for tunnel device
 788 *   @dev: virtual device associated with tunnel
 789 *   @new_mtu: the new mtu
 790 *
 791 * Return:
 792 *   0 on success,
 793 *   %-EINVAL if mtu too small
 794 **/
 795static int vti6_change_mtu(struct net_device *dev, int new_mtu)
 796{
 797        if (new_mtu < IPV6_MIN_MTU)
 798                return -EINVAL;
 799
 800        dev->mtu = new_mtu;
 801        return 0;
 802}
 803
 804static const struct net_device_ops vti6_netdev_ops = {
 805        .ndo_init       = vti6_dev_init,
 806        .ndo_uninit     = vti6_dev_uninit,
 807        .ndo_start_xmit = vti6_tnl_xmit,
 808        .ndo_do_ioctl   = vti6_ioctl,
 809        .ndo_change_mtu = vti6_change_mtu,
 810        .ndo_get_stats64 = ip_tunnel_get_stats64,
 811};
 812
 813/**
 814 * vti6_dev_setup - setup virtual tunnel device
 815 *   @dev: virtual device associated with tunnel
 816 *
 817 * Description:
 818 *   Initialize function pointers and device parameters
 819 **/
 820static void vti6_dev_setup(struct net_device *dev)
 821{
 822        dev->netdev_ops = &vti6_netdev_ops;
 823        dev->destructor = vti6_dev_free;
 824
 825        dev->type = ARPHRD_TUNNEL6;
 826        dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr);
 827        dev->mtu = ETH_DATA_LEN;
 828        dev->flags |= IFF_NOARP;
 829        dev->addr_len = sizeof(struct in6_addr);
 830        netif_keep_dst(dev);
 831}
 832
 833/**
 834 * vti6_dev_init_gen - general initializer for all tunnel devices
 835 *   @dev: virtual device associated with tunnel
 836 **/
 837static inline int vti6_dev_init_gen(struct net_device *dev)
 838{
 839        struct ip6_tnl *t = netdev_priv(dev);
 840
 841        t->dev = dev;
 842        t->net = dev_net(dev);
 843        dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 844        if (!dev->tstats)
 845                return -ENOMEM;
 846        return 0;
 847}
 848
 849/**
 850 * vti6_dev_init - initializer for all non fallback tunnel devices
 851 *   @dev: virtual device associated with tunnel
 852 **/
 853static int vti6_dev_init(struct net_device *dev)
 854{
 855        struct ip6_tnl *t = netdev_priv(dev);
 856        int err = vti6_dev_init_gen(dev);
 857
 858        if (err)
 859                return err;
 860        vti6_link_config(t);
 861        return 0;
 862}
 863
 864/**
 865 * vti6_fb_tnl_dev_init - initializer for fallback tunnel device
 866 *   @dev: fallback device
 867 *
 868 * Return: 0
 869 **/
 870static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
 871{
 872        struct ip6_tnl *t = netdev_priv(dev);
 873        struct net *net = dev_net(dev);
 874        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 875
 876        t->parms.proto = IPPROTO_IPV6;
 877        dev_hold(dev);
 878
 879        rcu_assign_pointer(ip6n->tnls_wc[0], t);
 880        return 0;
 881}
 882
 883static int vti6_validate(struct nlattr *tb[], struct nlattr *data[])
 884{
 885        return 0;
 886}
 887
 888static void vti6_netlink_parms(struct nlattr *data[],
 889                               struct __ip6_tnl_parm *parms)
 890{
 891        memset(parms, 0, sizeof(*parms));
 892
 893        if (!data)
 894                return;
 895
 896        if (data[IFLA_VTI_LINK])
 897                parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 898
 899        if (data[IFLA_VTI_LOCAL])
 900                nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL],
 901                           sizeof(struct in6_addr));
 902
 903        if (data[IFLA_VTI_REMOTE])
 904                nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE],
 905                           sizeof(struct in6_addr));
 906
 907        if (data[IFLA_VTI_IKEY])
 908                parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
 909
 910        if (data[IFLA_VTI_OKEY])
 911                parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 912}
 913
 914static int vti6_newlink(struct net *src_net, struct net_device *dev,
 915                        struct nlattr *tb[], struct nlattr *data[])
 916{
 917        struct net *net = dev_net(dev);
 918        struct ip6_tnl *nt;
 919
 920        nt = netdev_priv(dev);
 921        vti6_netlink_parms(data, &nt->parms);
 922
 923        nt->parms.proto = IPPROTO_IPV6;
 924
 925        if (vti6_locate(net, &nt->parms, 0))
 926                return -EEXIST;
 927
 928        return vti6_tnl_create2(dev);
 929}
 930
 931static void vti6_dellink(struct net_device *dev, struct list_head *head)
 932{
 933        struct net *net = dev_net(dev);
 934        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 935
 936        if (dev != ip6n->fb_tnl_dev)
 937                unregister_netdevice_queue(dev, head);
 938}
 939
 940static int vti6_changelink(struct net_device *dev, struct nlattr *tb[],
 941                           struct nlattr *data[])
 942{
 943        struct ip6_tnl *t;
 944        struct __ip6_tnl_parm p;
 945        struct net *net = dev_net(dev);
 946        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 947
 948        if (dev == ip6n->fb_tnl_dev)
 949                return -EINVAL;
 950
 951        vti6_netlink_parms(data, &p);
 952
 953        t = vti6_locate(net, &p, 0);
 954
 955        if (t) {
 956                if (t->dev != dev)
 957                        return -EEXIST;
 958        } else
 959                t = netdev_priv(dev);
 960
 961        return vti6_update(t, &p);
 962}
 963
 964static size_t vti6_get_size(const struct net_device *dev)
 965{
 966        return
 967                /* IFLA_VTI_LINK */
 968                nla_total_size(4) +
 969                /* IFLA_VTI_LOCAL */
 970                nla_total_size(sizeof(struct in6_addr)) +
 971                /* IFLA_VTI_REMOTE */
 972                nla_total_size(sizeof(struct in6_addr)) +
 973                /* IFLA_VTI_IKEY */
 974                nla_total_size(4) +
 975                /* IFLA_VTI_OKEY */
 976                nla_total_size(4) +
 977                0;
 978}
 979
 980static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev)
 981{
 982        struct ip6_tnl *tunnel = netdev_priv(dev);
 983        struct __ip6_tnl_parm *parm = &tunnel->parms;
 984
 985        if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) ||
 986            nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr),
 987                    &parm->laddr) ||
 988            nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr),
 989                    &parm->raddr) ||
 990            nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) ||
 991            nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key))
 992                goto nla_put_failure;
 993        return 0;
 994
 995nla_put_failure:
 996        return -EMSGSIZE;
 997}
 998
 999static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = {
1000        [IFLA_VTI_LINK]         = { .type = NLA_U32 },
1001        [IFLA_VTI_LOCAL]        = { .len = sizeof(struct in6_addr) },
1002        [IFLA_VTI_REMOTE]       = { .len = sizeof(struct in6_addr) },
1003        [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
1004        [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
1005};
1006
1007static struct rtnl_link_ops vti6_link_ops __read_mostly = {
1008        .kind           = "vti6",
1009        .maxtype        = IFLA_VTI_MAX,
1010        .policy         = vti6_policy,
1011        .priv_size      = sizeof(struct ip6_tnl),
1012        .setup          = vti6_dev_setup,
1013        .validate       = vti6_validate,
1014        .newlink        = vti6_newlink,
1015        .dellink        = vti6_dellink,
1016        .changelink     = vti6_changelink,
1017        .get_size       = vti6_get_size,
1018        .fill_info      = vti6_fill_info,
1019        .get_link_net   = ip6_tnl_get_link_net,
1020};
1021
1022static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
1023{
1024        int h;
1025        struct ip6_tnl *t;
1026        LIST_HEAD(list);
1027
1028        for (h = 0; h < HASH_SIZE; h++) {
1029                t = rtnl_dereference(ip6n->tnls_r_l[h]);
1030                while (t != NULL) {
1031                        unregister_netdevice_queue(t->dev, &list);
1032                        t = rtnl_dereference(t->next);
1033                }
1034        }
1035
1036        t = rtnl_dereference(ip6n->tnls_wc[0]);
1037        unregister_netdevice_queue(t->dev, &list);
1038        unregister_netdevice_many(&list);
1039}
1040
1041static int __net_init vti6_init_net(struct net *net)
1042{
1043        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
1044        struct ip6_tnl *t = NULL;
1045        int err;
1046
1047        ip6n->tnls[0] = ip6n->tnls_wc;
1048        ip6n->tnls[1] = ip6n->tnls_r_l;
1049
1050        err = -ENOMEM;
1051        ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0",
1052                                        NET_NAME_UNKNOWN, vti6_dev_setup);
1053
1054        if (!ip6n->fb_tnl_dev)
1055                goto err_alloc_dev;
1056        dev_net_set(ip6n->fb_tnl_dev, net);
1057        ip6n->fb_tnl_dev->rtnl_link_ops = &vti6_link_ops;
1058
1059        err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1060        if (err < 0)
1061                goto err_register;
1062
1063        err = register_netdev(ip6n->fb_tnl_dev);
1064        if (err < 0)
1065                goto err_register;
1066
1067        t = netdev_priv(ip6n->fb_tnl_dev);
1068
1069        strcpy(t->parms.name, ip6n->fb_tnl_dev->name);
1070        return 0;
1071
1072err_register:
1073        vti6_dev_free(ip6n->fb_tnl_dev);
1074err_alloc_dev:
1075        return err;
1076}
1077
1078static void __net_exit vti6_exit_net(struct net *net)
1079{
1080        struct vti6_net *ip6n = net_generic(net, vti6_net_id);
1081
1082        rtnl_lock();
1083        vti6_destroy_tunnels(ip6n);
1084        rtnl_unlock();
1085}
1086
1087static struct pernet_operations vti6_net_ops = {
1088        .init = vti6_init_net,
1089        .exit = vti6_exit_net,
1090        .id   = &vti6_net_id,
1091        .size = sizeof(struct vti6_net),
1092};
1093
1094static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
1095        .handler        =       vti6_rcv,
1096        .cb_handler     =       vti6_rcv_cb,
1097        .err_handler    =       vti6_err,
1098        .priority       =       100,
1099};
1100
1101static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
1102        .handler        =       vti6_rcv,
1103        .cb_handler     =       vti6_rcv_cb,
1104        .err_handler    =       vti6_err,
1105        .priority       =       100,
1106};
1107
1108static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
1109        .handler        =       vti6_rcv,
1110        .cb_handler     =       vti6_rcv_cb,
1111        .err_handler    =       vti6_err,
1112        .priority       =       100,
1113};
1114
1115/**
1116 * vti6_tunnel_init - register protocol and reserve needed resources
1117 *
1118 * Return: 0 on success
1119 **/
1120static int __init vti6_tunnel_init(void)
1121{
1122        const char *msg;
1123        int err;
1124
1125        msg = "tunnel device";
1126        err = register_pernet_device(&vti6_net_ops);
1127        if (err < 0)
1128                goto pernet_dev_failed;
1129
1130        msg = "tunnel protocols";
1131        err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP);
1132        if (err < 0)
1133                goto xfrm_proto_esp_failed;
1134        err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH);
1135        if (err < 0)
1136                goto xfrm_proto_ah_failed;
1137        err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP);
1138        if (err < 0)
1139                goto xfrm_proto_comp_failed;
1140
1141        msg = "netlink interface";
1142        err = rtnl_link_register(&vti6_link_ops);
1143        if (err < 0)
1144                goto rtnl_link_failed;
1145
1146        return 0;
1147
1148rtnl_link_failed:
1149        xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
1150xfrm_proto_comp_failed:
1151        xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
1152xfrm_proto_ah_failed:
1153        xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
1154xfrm_proto_esp_failed:
1155        unregister_pernet_device(&vti6_net_ops);
1156pernet_dev_failed:
1157        pr_err("vti6 init: failed to register %s\n", msg);
1158        return err;
1159}
1160
1161/**
1162 * vti6_tunnel_cleanup - free resources and unregister protocol
1163 **/
1164static void __exit vti6_tunnel_cleanup(void)
1165{
1166        rtnl_link_unregister(&vti6_link_ops);
1167        xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP);
1168        xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH);
1169        xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP);
1170        unregister_pernet_device(&vti6_net_ops);
1171}
1172
1173module_init(vti6_tunnel_init);
1174module_exit(vti6_tunnel_cleanup);
1175MODULE_LICENSE("GPL");
1176MODULE_ALIAS_RTNL_LINK("vti6");
1177MODULE_ALIAS_NETDEV("ip6_vti0");
1178MODULE_AUTHOR("Steffen Klassert");
1179MODULE_DESCRIPTION("IPv6 virtual tunnel interface");
1180