linux/net/ipv4/ip_vti.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3: IP/IP protocol decoder modified to support
   3 *                  virtual tunnel interface
   4 *
   5 *      Authors:
   6 *              Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 */
  14
  15/*
  16   This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  17
  18   For comments look at net/ipv4/ip_gre.c --ANK
  19 */
  20
  21
  22#include <linux/capability.h>
  23#include <linux/module.h>
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/mroute.h>
  34#include <linux/init.h>
  35#include <linux/netfilter_ipv4.h>
  36#include <linux/if_ether.h>
  37
  38#include <net/sock.h>
  39#include <net/ip.h>
  40#include <net/icmp.h>
  41#include <net/ip_tunnels.h>
  42#include <net/inet_ecn.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/netns/generic.h>
  46
  47#define HASH_SIZE  16
  48#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
  49
  50static struct rtnl_link_ops vti_link_ops __read_mostly;
  51
  52static int vti_net_id __read_mostly;
  53struct vti_net {
  54        struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
  55        struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
  56        struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
  57        struct ip_tunnel __rcu *tunnels_wc[1];
  58        struct ip_tunnel __rcu **tunnels[4];
  59
  60        struct net_device *fb_tunnel_dev;
  61};
  62
  63static int vti_fb_tunnel_init(struct net_device *dev);
  64static int vti_tunnel_init(struct net_device *dev);
  65static void vti_tunnel_setup(struct net_device *dev);
  66static void vti_dev_free(struct net_device *dev);
  67static int vti_tunnel_bind_dev(struct net_device *dev);
  68
  69#define VTI_XMIT(stats1, stats2) do {                           \
  70        int err;                                                \
  71        int pkt_len = skb->len;                                 \
  72        err = dst_output(skb);                                  \
  73        if (net_xmit_eval(err) == 0) {                          \
  74                u64_stats_update_begin(&(stats1)->syncp);       \
  75                (stats1)->tx_bytes += pkt_len;                  \
  76                (stats1)->tx_packets++;                         \
  77                u64_stats_update_end(&(stats1)->syncp);         \
  78        } else {                                                \
  79                (stats2)->tx_errors++;                          \
  80                (stats2)->tx_aborted_errors++;                  \
  81        }                                                       \
  82} while (0)
  83
  84
  85static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
  86                                           __be32 remote, __be32 local)
  87{
  88        unsigned h0 = HASH(remote);
  89        unsigned h1 = HASH(local);
  90        struct ip_tunnel *t;
  91        struct vti_net *ipn = net_generic(net, vti_net_id);
  92
  93        for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
  94                if (local == t->parms.iph.saddr &&
  95                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  96                        return t;
  97        for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
  98                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  99                        return t;
 100
 101        for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
 102                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 103                        return t;
 104
 105        for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
 106                if (t && (t->dev->flags&IFF_UP))
 107                        return t;
 108        return NULL;
 109}
 110
 111static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
 112                                             struct ip_tunnel_parm *parms)
 113{
 114        __be32 remote = parms->iph.daddr;
 115        __be32 local = parms->iph.saddr;
 116        unsigned h = 0;
 117        int prio = 0;
 118
 119        if (remote) {
 120                prio |= 2;
 121                h ^= HASH(remote);
 122        }
 123        if (local) {
 124                prio |= 1;
 125                h ^= HASH(local);
 126        }
 127        return &ipn->tunnels[prio][h];
 128}
 129
 130static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
 131                                                  struct ip_tunnel *t)
 132{
 133        return __vti_bucket(ipn, &t->parms);
 134}
 135
 136static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
 137{
 138        struct ip_tunnel __rcu **tp;
 139        struct ip_tunnel *iter;
 140
 141        for (tp = vti_bucket(ipn, t);
 142             (iter = rtnl_dereference(*tp)) != NULL;
 143             tp = &iter->next) {
 144                if (t == iter) {
 145                        rcu_assign_pointer(*tp, t->next);
 146                        break;
 147                }
 148        }
 149}
 150
 151static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
 152{
 153        struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
 154
 155        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 156        rcu_assign_pointer(*tp, t);
 157}
 158
 159static struct ip_tunnel *vti_tunnel_locate(struct net *net,
 160                                           struct ip_tunnel_parm *parms,
 161                                           int create)
 162{
 163        __be32 remote = parms->iph.daddr;
 164        __be32 local = parms->iph.saddr;
 165        struct ip_tunnel *t, *nt;
 166        struct ip_tunnel __rcu **tp;
 167        struct net_device *dev;
 168        char name[IFNAMSIZ];
 169        struct vti_net *ipn = net_generic(net, vti_net_id);
 170
 171        for (tp = __vti_bucket(ipn, parms);
 172             (t = rtnl_dereference(*tp)) != NULL;
 173             tp = &t->next) {
 174                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 175                        return t;
 176        }
 177        if (!create)
 178                return NULL;
 179
 180        if (parms->name[0])
 181                strlcpy(name, parms->name, IFNAMSIZ);
 182        else
 183                strcpy(name, "vti%d");
 184
 185        dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
 186        if (dev == NULL)
 187                return NULL;
 188
 189        dev_net_set(dev, net);
 190
 191        nt = netdev_priv(dev);
 192        nt->parms = *parms;
 193        dev->rtnl_link_ops = &vti_link_ops;
 194
 195        vti_tunnel_bind_dev(dev);
 196
 197        if (register_netdevice(dev) < 0)
 198                goto failed_free;
 199
 200        dev_hold(dev);
 201        vti_tunnel_link(ipn, nt);
 202        return nt;
 203
 204failed_free:
 205        free_netdev(dev);
 206        return NULL;
 207}
 208
 209static void vti_tunnel_uninit(struct net_device *dev)
 210{
 211        struct net *net = dev_net(dev);
 212        struct vti_net *ipn = net_generic(net, vti_net_id);
 213
 214        vti_tunnel_unlink(ipn, netdev_priv(dev));
 215        dev_put(dev);
 216}
 217
 218static int vti_err(struct sk_buff *skb, u32 info)
 219{
 220
 221        /* All the routers (except for Linux) return only
 222         * 8 bytes of packet payload. It means, that precise relaying of
 223         * ICMP in the real Internet is absolutely infeasible.
 224         */
 225        struct iphdr *iph = (struct iphdr *)skb->data;
 226        const int type = icmp_hdr(skb)->type;
 227        const int code = icmp_hdr(skb)->code;
 228        struct ip_tunnel *t;
 229        int err;
 230
 231        switch (type) {
 232        default:
 233        case ICMP_PARAMETERPROB:
 234                return 0;
 235
 236        case ICMP_DEST_UNREACH:
 237                switch (code) {
 238                case ICMP_SR_FAILED:
 239                case ICMP_PORT_UNREACH:
 240                        /* Impossible event. */
 241                        return 0;
 242                default:
 243                        /* All others are translated to HOST_UNREACH. */
 244                        break;
 245                }
 246                break;
 247        case ICMP_TIME_EXCEEDED:
 248                if (code != ICMP_EXC_TTL)
 249                        return 0;
 250                break;
 251        }
 252
 253        err = -ENOENT;
 254
 255        t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 256        if (t == NULL)
 257                goto out;
 258
 259        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 260                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 261                                 t->parms.link, 0, IPPROTO_IPIP, 0);
 262                err = 0;
 263                goto out;
 264        }
 265
 266        err = 0;
 267        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 268                goto out;
 269
 270        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 271                t->err_count++;
 272        else
 273                t->err_count = 1;
 274        t->err_time = jiffies;
 275out:
 276        return err;
 277}
 278
 279/* We dont digest the packet therefore let the packet pass */
 280static int vti_rcv(struct sk_buff *skb)
 281{
 282        struct ip_tunnel *tunnel;
 283        const struct iphdr *iph = ip_hdr(skb);
 284
 285        tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 286        if (tunnel != NULL) {
 287                struct pcpu_tstats *tstats;
 288
 289                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 290                        return -1;
 291
 292                tstats = this_cpu_ptr(tunnel->dev->tstats);
 293                u64_stats_update_begin(&tstats->syncp);
 294                tstats->rx_packets++;
 295                tstats->rx_bytes += skb->len;
 296                u64_stats_update_end(&tstats->syncp);
 297
 298                skb->mark = 0;
 299                secpath_reset(skb);
 300                skb->dev = tunnel->dev;
 301                return 1;
 302        }
 303
 304        return -1;
 305}
 306
 307/* This function assumes it is being called from dev_queue_xmit()
 308 * and that skb is filled properly by that function.
 309 */
 310
 311static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 312{
 313        struct ip_tunnel *tunnel = netdev_priv(dev);
 314        struct pcpu_tstats *tstats;
 315        struct iphdr  *tiph = &tunnel->parms.iph;
 316        u8     tos;
 317        struct rtable *rt;              /* Route to the other host */
 318        struct net_device *tdev;        /* Device to other host */
 319        struct iphdr  *old_iph = ip_hdr(skb);
 320        __be32 dst = tiph->daddr;
 321        struct flowi4 fl4;
 322
 323        if (skb->protocol != htons(ETH_P_IP))
 324                goto tx_error;
 325
 326        tos = old_iph->tos;
 327
 328        memset(&fl4, 0, sizeof(fl4));
 329        flowi4_init_output(&fl4, tunnel->parms.link,
 330                           be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
 331                           RT_SCOPE_UNIVERSE,
 332                           IPPROTO_IPIP, 0,
 333                           dst, tiph->saddr, 0, 0);
 334        rt = ip_route_output_key(dev_net(dev), &fl4);
 335        if (IS_ERR(rt)) {
 336                dev->stats.tx_carrier_errors++;
 337                goto tx_error_icmp;
 338        }
 339        /* if there is no transform then this tunnel is not functional.
 340         * Or if the xfrm is not mode tunnel.
 341         */
 342        if (!rt->dst.xfrm ||
 343            rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
 344                dev->stats.tx_carrier_errors++;
 345                goto tx_error_icmp;
 346        }
 347        tdev = rt->dst.dev;
 348
 349        if (tdev == dev) {
 350                ip_rt_put(rt);
 351                dev->stats.collisions++;
 352                goto tx_error;
 353        }
 354
 355        if (tunnel->err_count > 0) {
 356                if (time_before(jiffies,
 357                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 358                        tunnel->err_count--;
 359                        dst_link_failure(skb);
 360                } else
 361                        tunnel->err_count = 0;
 362        }
 363
 364        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 365        skb_dst_drop(skb);
 366        skb_dst_set(skb, &rt->dst);
 367        nf_reset(skb);
 368        skb->dev = skb_dst(skb)->dev;
 369
 370        tstats = this_cpu_ptr(dev->tstats);
 371        VTI_XMIT(tstats, &dev->stats);
 372        return NETDEV_TX_OK;
 373
 374tx_error_icmp:
 375        dst_link_failure(skb);
 376tx_error:
 377        dev->stats.tx_errors++;
 378        dev_kfree_skb(skb);
 379        return NETDEV_TX_OK;
 380}
 381
 382static int vti_tunnel_bind_dev(struct net_device *dev)
 383{
 384        struct net_device *tdev = NULL;
 385        struct ip_tunnel *tunnel;
 386        struct iphdr *iph;
 387
 388        tunnel = netdev_priv(dev);
 389        iph = &tunnel->parms.iph;
 390
 391        if (iph->daddr) {
 392                struct rtable *rt;
 393                struct flowi4 fl4;
 394                memset(&fl4, 0, sizeof(fl4));
 395                flowi4_init_output(&fl4, tunnel->parms.link,
 396                                   be32_to_cpu(tunnel->parms.i_key),
 397                                   RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
 398                                   IPPROTO_IPIP, 0,
 399                                   iph->daddr, iph->saddr, 0, 0);
 400                rt = ip_route_output_key(dev_net(dev), &fl4);
 401                if (!IS_ERR(rt)) {
 402                        tdev = rt->dst.dev;
 403                        ip_rt_put(rt);
 404                }
 405                dev->flags |= IFF_POINTOPOINT;
 406        }
 407
 408        if (!tdev && tunnel->parms.link)
 409                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 410
 411        if (tdev) {
 412                dev->hard_header_len = tdev->hard_header_len +
 413                                       sizeof(struct iphdr);
 414                dev->mtu = tdev->mtu;
 415        }
 416        dev->iflink = tunnel->parms.link;
 417        return dev->mtu;
 418}
 419
 420static int
 421vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 422{
 423        int err = 0;
 424        struct ip_tunnel_parm p;
 425        struct ip_tunnel *t;
 426        struct net *net = dev_net(dev);
 427        struct vti_net *ipn = net_generic(net, vti_net_id);
 428
 429        switch (cmd) {
 430        case SIOCGETTUNNEL:
 431                t = NULL;
 432                if (dev == ipn->fb_tunnel_dev) {
 433                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
 434                                           sizeof(p))) {
 435                                err = -EFAULT;
 436                                break;
 437                        }
 438                        t = vti_tunnel_locate(net, &p, 0);
 439                }
 440                if (t == NULL)
 441                        t = netdev_priv(dev);
 442                memcpy(&p, &t->parms, sizeof(p));
 443                p.i_flags |= GRE_KEY | VTI_ISVTI;
 444                p.o_flags |= GRE_KEY;
 445                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 446                        err = -EFAULT;
 447                break;
 448
 449        case SIOCADDTUNNEL:
 450        case SIOCCHGTUNNEL:
 451                err = -EPERM;
 452                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 453                        goto done;
 454
 455                err = -EFAULT;
 456                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 457                        goto done;
 458
 459                err = -EINVAL;
 460                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 461                    p.iph.ihl != 5)
 462                        goto done;
 463
 464                t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 465
 466                if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 467                        if (t != NULL) {
 468                                if (t->dev != dev) {
 469                                        err = -EEXIST;
 470                                        break;
 471                                }
 472                        } else {
 473                                if (((dev->flags&IFF_POINTOPOINT) &&
 474                                    !p.iph.daddr) ||
 475                                    (!(dev->flags&IFF_POINTOPOINT) &&
 476                                    p.iph.daddr)) {
 477                                        err = -EINVAL;
 478                                        break;
 479                                }
 480                                t = netdev_priv(dev);
 481                                vti_tunnel_unlink(ipn, t);
 482                                synchronize_net();
 483                                t->parms.iph.saddr = p.iph.saddr;
 484                                t->parms.iph.daddr = p.iph.daddr;
 485                                t->parms.i_key = p.i_key;
 486                                t->parms.o_key = p.o_key;
 487                                t->parms.iph.protocol = IPPROTO_IPIP;
 488                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 489                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 490                                vti_tunnel_link(ipn, t);
 491                                netdev_state_change(dev);
 492                        }
 493                }
 494
 495                if (t) {
 496                        err = 0;
 497                        if (cmd == SIOCCHGTUNNEL) {
 498                                t->parms.i_key = p.i_key;
 499                                t->parms.o_key = p.o_key;
 500                                if (t->parms.link != p.link) {
 501                                        t->parms.link = p.link;
 502                                        vti_tunnel_bind_dev(dev);
 503                                        netdev_state_change(dev);
 504                                }
 505                        }
 506                        p.i_flags |= GRE_KEY | VTI_ISVTI;
 507                        p.o_flags |= GRE_KEY;
 508                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
 509                                         sizeof(p)))
 510                                err = -EFAULT;
 511                } else
 512                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 513                break;
 514
 515        case SIOCDELTUNNEL:
 516                err = -EPERM;
 517                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 518                        goto done;
 519
 520                if (dev == ipn->fb_tunnel_dev) {
 521                        err = -EFAULT;
 522                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
 523                                           sizeof(p)))
 524                                goto done;
 525                        err = -ENOENT;
 526
 527                        t = vti_tunnel_locate(net, &p, 0);
 528                        if (t == NULL)
 529                                goto done;
 530                        err = -EPERM;
 531                        if (t->dev == ipn->fb_tunnel_dev)
 532                                goto done;
 533                        dev = t->dev;
 534                }
 535                unregister_netdevice(dev);
 536                err = 0;
 537                break;
 538
 539        default:
 540                err = -EINVAL;
 541        }
 542
 543done:
 544        return err;
 545}
 546
 547static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 548{
 549        if (new_mtu < 68 || new_mtu > 0xFFF8)
 550                return -EINVAL;
 551        dev->mtu = new_mtu;
 552        return 0;
 553}
 554
 555static const struct net_device_ops vti_netdev_ops = {
 556        .ndo_init       = vti_tunnel_init,
 557        .ndo_uninit     = vti_tunnel_uninit,
 558        .ndo_start_xmit = vti_tunnel_xmit,
 559        .ndo_do_ioctl   = vti_tunnel_ioctl,
 560        .ndo_change_mtu = vti_tunnel_change_mtu,
 561        .ndo_get_stats64 = ip_tunnel_get_stats64,
 562};
 563
 564static void vti_dev_free(struct net_device *dev)
 565{
 566        free_percpu(dev->tstats);
 567        free_netdev(dev);
 568}
 569
 570static void vti_tunnel_setup(struct net_device *dev)
 571{
 572        dev->netdev_ops         = &vti_netdev_ops;
 573        dev->destructor         = vti_dev_free;
 574
 575        dev->type               = ARPHRD_TUNNEL;
 576        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 577        dev->mtu                = ETH_DATA_LEN;
 578        dev->flags              = IFF_NOARP;
 579        dev->iflink             = 0;
 580        dev->addr_len           = 4;
 581        dev->features           |= NETIF_F_NETNS_LOCAL;
 582        dev->features           |= NETIF_F_LLTX;
 583        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 584}
 585
 586static int vti_tunnel_init(struct net_device *dev)
 587{
 588        struct ip_tunnel *tunnel = netdev_priv(dev);
 589
 590        tunnel->dev = dev;
 591        strcpy(tunnel->parms.name, dev->name);
 592
 593        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 594        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 595
 596        dev->tstats = alloc_percpu(struct pcpu_tstats);
 597        if (!dev->tstats)
 598                return -ENOMEM;
 599
 600        return 0;
 601}
 602
 603static int __net_init vti_fb_tunnel_init(struct net_device *dev)
 604{
 605        struct ip_tunnel *tunnel = netdev_priv(dev);
 606        struct iphdr *iph = &tunnel->parms.iph;
 607        struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
 608
 609        iph->version            = 4;
 610        iph->protocol           = IPPROTO_IPIP;
 611        iph->ihl                = 5;
 612
 613        dev_hold(dev);
 614        rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 615        return 0;
 616}
 617
 618static struct xfrm_tunnel vti_handler __read_mostly = {
 619        .handler        =       vti_rcv,
 620        .err_handler    =       vti_err,
 621        .priority       =       1,
 622};
 623
 624static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
 625{
 626        int prio;
 627
 628        for (prio = 1; prio < 4; prio++) {
 629                int h;
 630                for (h = 0; h < HASH_SIZE; h++) {
 631                        struct ip_tunnel *t;
 632
 633                        t = rtnl_dereference(ipn->tunnels[prio][h]);
 634                        while (t != NULL) {
 635                                unregister_netdevice_queue(t->dev, head);
 636                                t = rtnl_dereference(t->next);
 637                        }
 638                }
 639        }
 640}
 641
 642static int __net_init vti_init_net(struct net *net)
 643{
 644        int err;
 645        struct vti_net *ipn = net_generic(net, vti_net_id);
 646
 647        ipn->tunnels[0] = ipn->tunnels_wc;
 648        ipn->tunnels[1] = ipn->tunnels_l;
 649        ipn->tunnels[2] = ipn->tunnels_r;
 650        ipn->tunnels[3] = ipn->tunnels_r_l;
 651
 652        ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 653                                          "ip_vti0",
 654                                          vti_tunnel_setup);
 655        if (!ipn->fb_tunnel_dev) {
 656                err = -ENOMEM;
 657                goto err_alloc_dev;
 658        }
 659        dev_net_set(ipn->fb_tunnel_dev, net);
 660
 661        err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
 662        if (err)
 663                goto err_reg_dev;
 664        ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
 665
 666        err = register_netdev(ipn->fb_tunnel_dev);
 667        if (err)
 668                goto err_reg_dev;
 669        return 0;
 670
 671err_reg_dev:
 672        vti_dev_free(ipn->fb_tunnel_dev);
 673err_alloc_dev:
 674        /* nothing */
 675        return err;
 676}
 677
 678static void __net_exit vti_exit_net(struct net *net)
 679{
 680        struct vti_net *ipn = net_generic(net, vti_net_id);
 681        LIST_HEAD(list);
 682
 683        rtnl_lock();
 684        vti_destroy_tunnels(ipn, &list);
 685        unregister_netdevice_many(&list);
 686        rtnl_unlock();
 687}
 688
 689static struct pernet_operations vti_net_ops = {
 690        .init = vti_init_net,
 691        .exit = vti_exit_net,
 692        .id   = &vti_net_id,
 693        .size = sizeof(struct vti_net),
 694};
 695
 696static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 697{
 698        return 0;
 699}
 700
 701static void vti_netlink_parms(struct nlattr *data[],
 702                              struct ip_tunnel_parm *parms)
 703{
 704        memset(parms, 0, sizeof(*parms));
 705
 706        parms->iph.protocol = IPPROTO_IPIP;
 707
 708        if (!data)
 709                return;
 710
 711        if (data[IFLA_VTI_LINK])
 712                parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 713
 714        if (data[IFLA_VTI_IKEY])
 715                parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
 716
 717        if (data[IFLA_VTI_OKEY])
 718                parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 719
 720        if (data[IFLA_VTI_LOCAL])
 721                parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
 722
 723        if (data[IFLA_VTI_REMOTE])
 724                parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
 725
 726}
 727
 728static int vti_newlink(struct net *src_net, struct net_device *dev,
 729                       struct nlattr *tb[], struct nlattr *data[])
 730{
 731        struct ip_tunnel *nt;
 732        struct net *net = dev_net(dev);
 733        struct vti_net *ipn = net_generic(net, vti_net_id);
 734        int mtu;
 735        int err;
 736
 737        nt = netdev_priv(dev);
 738        vti_netlink_parms(data, &nt->parms);
 739
 740        if (vti_tunnel_locate(net, &nt->parms, 0))
 741                return -EEXIST;
 742
 743        mtu = vti_tunnel_bind_dev(dev);
 744        if (!tb[IFLA_MTU])
 745                dev->mtu = mtu;
 746
 747        err = register_netdevice(dev);
 748        if (err)
 749                goto out;
 750
 751        dev_hold(dev);
 752        vti_tunnel_link(ipn, nt);
 753
 754out:
 755        return err;
 756}
 757
 758static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
 759                          struct nlattr *data[])
 760{
 761        struct ip_tunnel *t, *nt;
 762        struct net *net = dev_net(dev);
 763        struct vti_net *ipn = net_generic(net, vti_net_id);
 764        struct ip_tunnel_parm p;
 765        int mtu;
 766
 767        if (dev == ipn->fb_tunnel_dev)
 768                return -EINVAL;
 769
 770        nt = netdev_priv(dev);
 771        vti_netlink_parms(data, &p);
 772
 773        t = vti_tunnel_locate(net, &p, 0);
 774
 775        if (t) {
 776                if (t->dev != dev)
 777                        return -EEXIST;
 778        } else {
 779                t = nt;
 780
 781                vti_tunnel_unlink(ipn, t);
 782                t->parms.iph.saddr = p.iph.saddr;
 783                t->parms.iph.daddr = p.iph.daddr;
 784                t->parms.i_key = p.i_key;
 785                t->parms.o_key = p.o_key;
 786                if (dev->type != ARPHRD_ETHER) {
 787                        memcpy(dev->dev_addr, &p.iph.saddr, 4);
 788                        memcpy(dev->broadcast, &p.iph.daddr, 4);
 789                }
 790                vti_tunnel_link(ipn, t);
 791                netdev_state_change(dev);
 792        }
 793
 794        if (t->parms.link != p.link) {
 795                t->parms.link = p.link;
 796                mtu = vti_tunnel_bind_dev(dev);
 797                if (!tb[IFLA_MTU])
 798                        dev->mtu = mtu;
 799                netdev_state_change(dev);
 800        }
 801
 802        return 0;
 803}
 804
 805static size_t vti_get_size(const struct net_device *dev)
 806{
 807        return
 808                /* IFLA_VTI_LINK */
 809                nla_total_size(4) +
 810                /* IFLA_VTI_IKEY */
 811                nla_total_size(4) +
 812                /* IFLA_VTI_OKEY */
 813                nla_total_size(4) +
 814                /* IFLA_VTI_LOCAL */
 815                nla_total_size(4) +
 816                /* IFLA_VTI_REMOTE */
 817                nla_total_size(4) +
 818                0;
 819}
 820
 821static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 822{
 823        struct ip_tunnel *t = netdev_priv(dev);
 824        struct ip_tunnel_parm *p = &t->parms;
 825
 826        nla_put_u32(skb, IFLA_VTI_LINK, p->link);
 827        nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
 828        nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
 829        nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
 830        nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
 831
 832        return 0;
 833}
 834
 835static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
 836        [IFLA_VTI_LINK]         = { .type = NLA_U32 },
 837        [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
 838        [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
 839        [IFLA_VTI_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 840        [IFLA_VTI_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 841};
 842
 843static struct rtnl_link_ops vti_link_ops __read_mostly = {
 844        .kind           = "vti",
 845        .maxtype        = IFLA_VTI_MAX,
 846        .policy         = vti_policy,
 847        .priv_size      = sizeof(struct ip_tunnel),
 848        .setup          = vti_tunnel_setup,
 849        .validate       = vti_tunnel_validate,
 850        .newlink        = vti_newlink,
 851        .changelink     = vti_changelink,
 852        .get_size       = vti_get_size,
 853        .fill_info      = vti_fill_info,
 854};
 855
 856static int __init vti_init(void)
 857{
 858        int err;
 859
 860        pr_info("IPv4 over IPSec tunneling driver\n");
 861
 862        err = register_pernet_device(&vti_net_ops);
 863        if (err < 0)
 864                return err;
 865        err = xfrm4_mode_tunnel_input_register(&vti_handler);
 866        if (err < 0) {
 867                unregister_pernet_device(&vti_net_ops);
 868                pr_info(KERN_INFO "vti init: can't register tunnel\n");
 869        }
 870
 871        err = rtnl_link_register(&vti_link_ops);
 872        if (err < 0)
 873                goto rtnl_link_failed;
 874
 875        return err;
 876
 877rtnl_link_failed:
 878        xfrm4_mode_tunnel_input_deregister(&vti_handler);
 879        unregister_pernet_device(&vti_net_ops);
 880        return err;
 881}
 882
 883static void __exit vti_fini(void)
 884{
 885        rtnl_link_unregister(&vti_link_ops);
 886        if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
 887                pr_info("vti close: can't deregister tunnel\n");
 888
 889        unregister_pernet_device(&vti_net_ops);
 890}
 891
 892module_init(vti_init);
 893module_exit(vti_fini);
 894MODULE_LICENSE("GPL");
 895MODULE_ALIAS_RTNL_LINK("vti");
 896MODULE_ALIAS_NETDEV("ip_vti0");
 897