linux/net/ipv4/ip_vti.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3: IP/IP protocol decoder modified to support
   3 *                  virtual tunnel interface
   4 *
   5 *      Authors:
   6 *              Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 */
  14
  15/*
  16   This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  17
  18   For comments look at net/ipv4/ip_gre.c --ANK
  19 */
  20
  21
  22#include <linux/capability.h>
  23#include <linux/module.h>
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/mroute.h>
  34#include <linux/init.h>
  35#include <linux/netfilter_ipv4.h>
  36#include <linux/if_ether.h>
  37
  38#include <net/sock.h>
  39#include <net/ip.h>
  40#include <net/icmp.h>
  41#include <net/ip_tunnels.h>
  42#include <net/inet_ecn.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/netns/generic.h>
  46
  47#define HASH_SIZE  16
  48#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
  49
  50static struct rtnl_link_ops vti_link_ops __read_mostly;
  51
  52static int vti_net_id __read_mostly;
  53struct vti_net {
  54        struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
  55        struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
  56        struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
  57        struct ip_tunnel __rcu *tunnels_wc[1];
  58        struct ip_tunnel __rcu **tunnels[4];
  59
  60        struct net_device *fb_tunnel_dev;
  61};
  62
  63static int vti_fb_tunnel_init(struct net_device *dev);
  64static int vti_tunnel_init(struct net_device *dev);
  65static void vti_tunnel_setup(struct net_device *dev);
  66static void vti_dev_free(struct net_device *dev);
  67static int vti_tunnel_bind_dev(struct net_device *dev);
  68
  69#define VTI_XMIT(stats1, stats2) do {                           \
  70        int err;                                                \
  71        int pkt_len = skb->len;                                 \
  72        err = dst_output(skb);                                  \
  73        if (net_xmit_eval(err) == 0) {                          \
  74                u64_stats_update_begin(&(stats1)->syncp);       \
  75                (stats1)->tx_bytes += pkt_len;                  \
  76                (stats1)->tx_packets++;                         \
  77                u64_stats_update_end(&(stats1)->syncp);         \
  78        } else {                                                \
  79                (stats2)->tx_errors++;                          \
  80                (stats2)->tx_aborted_errors++;                  \
  81        }                                                       \
  82} while (0)
  83
  84
  85static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
  86                                           __be32 remote, __be32 local)
  87{
  88        unsigned h0 = HASH(remote);
  89        unsigned h1 = HASH(local);
  90        struct ip_tunnel *t;
  91        struct vti_net *ipn = net_generic(net, vti_net_id);
  92
  93        for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
  94                if (local == t->parms.iph.saddr &&
  95                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  96                        return t;
  97        for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
  98                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  99                        return t;
 100
 101        for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
 102                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 103                        return t;
 104
 105        for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
 106                if (t && (t->dev->flags&IFF_UP))
 107                        return t;
 108        return NULL;
 109}
 110
 111static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
 112                                             struct ip_tunnel_parm *parms)
 113{
 114        __be32 remote = parms->iph.daddr;
 115        __be32 local = parms->iph.saddr;
 116        unsigned h = 0;
 117        int prio = 0;
 118
 119        if (remote) {
 120                prio |= 2;
 121                h ^= HASH(remote);
 122        }
 123        if (local) {
 124                prio |= 1;
 125                h ^= HASH(local);
 126        }
 127        return &ipn->tunnels[prio][h];
 128}
 129
 130static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
 131                                                  struct ip_tunnel *t)
 132{
 133        return __vti_bucket(ipn, &t->parms);
 134}
 135
 136static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
 137{
 138        struct ip_tunnel __rcu **tp;
 139        struct ip_tunnel *iter;
 140
 141        for (tp = vti_bucket(ipn, t);
 142             (iter = rtnl_dereference(*tp)) != NULL;
 143             tp = &iter->next) {
 144                if (t == iter) {
 145                        rcu_assign_pointer(*tp, t->next);
 146                        break;
 147                }
 148        }
 149}
 150
 151static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
 152{
 153        struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
 154
 155        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 156        rcu_assign_pointer(*tp, t);
 157}
 158
 159static struct ip_tunnel *vti_tunnel_locate(struct net *net,
 160                                           struct ip_tunnel_parm *parms,
 161                                           int create)
 162{
 163        __be32 remote = parms->iph.daddr;
 164        __be32 local = parms->iph.saddr;
 165        struct ip_tunnel *t, *nt;
 166        struct ip_tunnel __rcu **tp;
 167        struct net_device *dev;
 168        char name[IFNAMSIZ];
 169        struct vti_net *ipn = net_generic(net, vti_net_id);
 170
 171        for (tp = __vti_bucket(ipn, parms);
 172             (t = rtnl_dereference(*tp)) != NULL;
 173             tp = &t->next) {
 174                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 175                        return t;
 176        }
 177        if (!create)
 178                return NULL;
 179
 180        if (parms->name[0])
 181                strlcpy(name, parms->name, IFNAMSIZ);
 182        else
 183                strcpy(name, "vti%d");
 184
 185        dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
 186        if (dev == NULL)
 187                return NULL;
 188
 189        dev_net_set(dev, net);
 190
 191        nt = netdev_priv(dev);
 192        nt->parms = *parms;
 193        dev->rtnl_link_ops = &vti_link_ops;
 194
 195        vti_tunnel_bind_dev(dev);
 196
 197        if (register_netdevice(dev) < 0)
 198                goto failed_free;
 199
 200        dev_hold(dev);
 201        vti_tunnel_link(ipn, nt);
 202        return nt;
 203
 204failed_free:
 205        free_netdev(dev);
 206        return NULL;
 207}
 208
 209static void vti_tunnel_uninit(struct net_device *dev)
 210{
 211        struct net *net = dev_net(dev);
 212        struct vti_net *ipn = net_generic(net, vti_net_id);
 213
 214        vti_tunnel_unlink(ipn, netdev_priv(dev));
 215        dev_put(dev);
 216}
 217
 218static int vti_err(struct sk_buff *skb, u32 info)
 219{
 220
 221        /* All the routers (except for Linux) return only
 222         * 8 bytes of packet payload. It means, that precise relaying of
 223         * ICMP in the real Internet is absolutely infeasible.
 224         */
 225        struct iphdr *iph = (struct iphdr *)skb->data;
 226        const int type = icmp_hdr(skb)->type;
 227        const int code = icmp_hdr(skb)->code;
 228        struct ip_tunnel *t;
 229        int err;
 230
 231        switch (type) {
 232        default:
 233        case ICMP_PARAMETERPROB:
 234                return 0;
 235
 236        case ICMP_DEST_UNREACH:
 237                switch (code) {
 238                case ICMP_SR_FAILED:
 239                case ICMP_PORT_UNREACH:
 240                        /* Impossible event. */
 241                        return 0;
 242                default:
 243                        /* All others are translated to HOST_UNREACH. */
 244                        break;
 245                }
 246                break;
 247        case ICMP_TIME_EXCEEDED:
 248                if (code != ICMP_EXC_TTL)
 249                        return 0;
 250                break;
 251        }
 252
 253        err = -ENOENT;
 254
 255        t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 256        if (t == NULL)
 257                goto out;
 258
 259        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 260                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 261                                 t->parms.link, 0, IPPROTO_IPIP, 0);
 262                err = 0;
 263                goto out;
 264        }
 265
 266        err = 0;
 267        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 268                goto out;
 269
 270        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 271                t->err_count++;
 272        else
 273                t->err_count = 1;
 274        t->err_time = jiffies;
 275out:
 276        return err;
 277}
 278
 279/* We dont digest the packet therefore let the packet pass */
 280static int vti_rcv(struct sk_buff *skb)
 281{
 282        struct ip_tunnel *tunnel;
 283        const struct iphdr *iph = ip_hdr(skb);
 284
 285        tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 286        if (tunnel != NULL) {
 287                struct pcpu_tstats *tstats;
 288
 289                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 290                        return -1;
 291
 292                tstats = this_cpu_ptr(tunnel->dev->tstats);
 293                u64_stats_update_begin(&tstats->syncp);
 294                tstats->rx_packets++;
 295                tstats->rx_bytes += skb->len;
 296                u64_stats_update_end(&tstats->syncp);
 297
 298                skb->mark = 0;
 299                secpath_reset(skb);
 300                skb->dev = tunnel->dev;
 301                return 1;
 302        }
 303
 304        return -1;
 305}
 306
 307/* This function assumes it is being called from dev_queue_xmit()
 308 * and that skb is filled properly by that function.
 309 */
 310
 311static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 312{
 313        struct ip_tunnel *tunnel = netdev_priv(dev);
 314        struct pcpu_tstats *tstats;
 315        struct iphdr  *tiph = &tunnel->parms.iph;
 316        u8     tos;
 317        struct rtable *rt;              /* Route to the other host */
 318        struct net_device *tdev;        /* Device to other host */
 319        struct iphdr  *old_iph = ip_hdr(skb);
 320        __be32 dst = tiph->daddr;
 321        struct flowi4 fl4;
 322
 323        if (skb->protocol != htons(ETH_P_IP))
 324                goto tx_error;
 325
 326        tos = old_iph->tos;
 327
 328        memset(&fl4, 0, sizeof(fl4));
 329        flowi4_init_output(&fl4, tunnel->parms.link,
 330                           be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
 331                           RT_SCOPE_UNIVERSE,
 332                           IPPROTO_IPIP, 0,
 333                           dst, tiph->saddr, 0, 0);
 334        rt = ip_route_output_key(dev_net(dev), &fl4);
 335        if (IS_ERR(rt)) {
 336                dev->stats.tx_carrier_errors++;
 337                goto tx_error_icmp;
 338        }
 339        /* if there is no transform then this tunnel is not functional.
 340         * Or if the xfrm is not mode tunnel.
 341         */
 342        if (!rt->dst.xfrm ||
 343            rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
 344                dev->stats.tx_carrier_errors++;
 345                goto tx_error_icmp;
 346        }
 347        tdev = rt->dst.dev;
 348
 349        if (tdev == dev) {
 350                ip_rt_put(rt);
 351                dev->stats.collisions++;
 352                goto tx_error;
 353        }
 354
 355        if (tunnel->err_count > 0) {
 356                if (time_before(jiffies,
 357                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 358                        tunnel->err_count--;
 359                        dst_link_failure(skb);
 360                } else
 361                        tunnel->err_count = 0;
 362        }
 363
 364        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 365        skb_dst_drop(skb);
 366        skb_dst_set(skb, &rt->dst);
 367        nf_reset(skb);
 368        skb->dev = skb_dst(skb)->dev;
 369
 370        tstats = this_cpu_ptr(dev->tstats);
 371        VTI_XMIT(tstats, &dev->stats);
 372        return NETDEV_TX_OK;
 373
 374tx_error_icmp:
 375        dst_link_failure(skb);
 376tx_error:
 377        dev->stats.tx_errors++;
 378        dev_kfree_skb(skb);
 379        return NETDEV_TX_OK;
 380}
 381
 382static int vti_tunnel_bind_dev(struct net_device *dev)
 383{
 384        struct net_device *tdev = NULL;
 385        struct ip_tunnel *tunnel;
 386        struct iphdr *iph;
 387
 388        tunnel = netdev_priv(dev);
 389        iph = &tunnel->parms.iph;
 390
 391        if (iph->daddr) {
 392                struct rtable *rt;
 393                struct flowi4 fl4;
 394                memset(&fl4, 0, sizeof(fl4));
 395                flowi4_init_output(&fl4, tunnel->parms.link,
 396                                   be32_to_cpu(tunnel->parms.i_key),
 397                                   RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
 398                                   IPPROTO_IPIP, 0,
 399                                   iph->daddr, iph->saddr, 0, 0);
 400                rt = ip_route_output_key(dev_net(dev), &fl4);
 401                if (!IS_ERR(rt)) {
 402                        tdev = rt->dst.dev;
 403                        ip_rt_put(rt);
 404                }
 405                dev->flags |= IFF_POINTOPOINT;
 406        }
 407
 408        if (!tdev && tunnel->parms.link)
 409                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 410
 411        if (tdev) {
 412                dev->hard_header_len = tdev->hard_header_len +
 413                                       sizeof(struct iphdr);
 414                dev->mtu = tdev->mtu;
 415        }
 416        dev->iflink = tunnel->parms.link;
 417        return dev->mtu;
 418}
 419
 420static int
 421vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 422{
 423        int err = 0;
 424        struct ip_tunnel_parm p;
 425        struct ip_tunnel *t;
 426        struct net *net = dev_net(dev);
 427        struct vti_net *ipn = net_generic(net, vti_net_id);
 428
 429        switch (cmd) {
 430        case SIOCGETTUNNEL:
 431                t = NULL;
 432                if (dev == ipn->fb_tunnel_dev) {
 433                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
 434                                           sizeof(p))) {
 435                                err = -EFAULT;
 436                                break;
 437                        }
 438                        t = vti_tunnel_locate(net, &p, 0);
 439                }
 440                if (t == NULL)
 441                        t = netdev_priv(dev);
 442                memcpy(&p, &t->parms, sizeof(p));
 443                p.i_flags |= GRE_KEY | VTI_ISVTI;
 444                p.o_flags |= GRE_KEY;
 445                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 446                        err = -EFAULT;
 447                break;
 448
 449        case SIOCADDTUNNEL:
 450        case SIOCCHGTUNNEL:
 451                err = -EPERM;
 452                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 453                        goto done;
 454
 455                err = -EFAULT;
 456                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 457                        goto done;
 458
 459                err = -EINVAL;
 460                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 461                    p.iph.ihl != 5)
 462                        goto done;
 463
 464                t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 465
 466                if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 467                        if (t != NULL) {
 468                                if (t->dev != dev) {
 469                                        err = -EEXIST;
 470                                        break;
 471                                }
 472                        } else {
 473                                if (((dev->flags&IFF_POINTOPOINT) &&
 474                                    !p.iph.daddr) ||
 475                                    (!(dev->flags&IFF_POINTOPOINT) &&
 476                                    p.iph.daddr)) {
 477                                        err = -EINVAL;
 478                                        break;
 479                                }
 480                                t = netdev_priv(dev);
 481                                vti_tunnel_unlink(ipn, t);
 482                                synchronize_net();
 483                                t->parms.iph.saddr = p.iph.saddr;
 484                                t->parms.iph.daddr = p.iph.daddr;
 485                                t->parms.i_key = p.i_key;
 486                                t->parms.o_key = p.o_key;
 487                                t->parms.iph.protocol = IPPROTO_IPIP;
 488                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 489                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 490                                vti_tunnel_link(ipn, t);
 491                                netdev_state_change(dev);
 492                        }
 493                }
 494
 495                if (t) {
 496                        err = 0;
 497                        if (cmd == SIOCCHGTUNNEL) {
 498                                t->parms.i_key = p.i_key;
 499                                t->parms.o_key = p.o_key;
 500                                if (t->parms.link != p.link) {
 501                                        t->parms.link = p.link;
 502                                        vti_tunnel_bind_dev(dev);
 503                                        netdev_state_change(dev);
 504                                }
 505                        }
 506                        p.i_flags |= GRE_KEY | VTI_ISVTI;
 507                        p.o_flags |= GRE_KEY;
 508                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
 509                                         sizeof(p)))
 510                                err = -EFAULT;
 511                } else
 512                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 513                break;
 514
 515        case SIOCDELTUNNEL:
 516                err = -EPERM;
 517                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 518                        goto done;
 519
 520                if (dev == ipn->fb_tunnel_dev) {
 521                        err = -EFAULT;
 522                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
 523                                           sizeof(p)))
 524                                goto done;
 525                        err = -ENOENT;
 526
 527                        t = vti_tunnel_locate(net, &p, 0);
 528                        if (t == NULL)
 529                                goto done;
 530                        err = -EPERM;
 531                        if (t->dev == ipn->fb_tunnel_dev)
 532                                goto done;
 533                        dev = t->dev;
 534                }
 535                unregister_netdevice(dev);
 536                err = 0;
 537                break;
 538
 539        default:
 540                err = -EINVAL;
 541        }
 542
 543done:
 544        return err;
 545}
 546
 547static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 548{
 549        if (new_mtu < 68 || new_mtu > 0xFFF8)
 550                return -EINVAL;
 551        dev->mtu = new_mtu;
 552        return 0;
 553}
 554
 555static const struct net_device_ops vti_netdev_ops = {
 556        .ndo_init       = vti_tunnel_init,
 557        .ndo_uninit     = vti_tunnel_uninit,
 558        .ndo_start_xmit = vti_tunnel_xmit,
 559        .ndo_do_ioctl   = vti_tunnel_ioctl,
 560        .ndo_change_mtu = vti_tunnel_change_mtu,
 561        .ndo_get_stats64 = ip_tunnel_get_stats64,
 562};
 563
 564static void vti_dev_free(struct net_device *dev)
 565{
 566        free_percpu(dev->tstats);
 567        free_netdev(dev);
 568}
 569
 570static void vti_tunnel_setup(struct net_device *dev)
 571{
 572        dev->netdev_ops         = &vti_netdev_ops;
 573        dev->destructor         = vti_dev_free;
 574
 575        dev->type               = ARPHRD_TUNNEL;
 576        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 577        dev->mtu                = ETH_DATA_LEN;
 578        dev->flags              = IFF_NOARP;
 579        dev->iflink             = 0;
 580        dev->addr_len           = 4;
 581        dev->features           |= NETIF_F_NETNS_LOCAL;
 582        dev->features           |= NETIF_F_LLTX;
 583        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 584}
 585
 586static int vti_tunnel_init(struct net_device *dev)
 587{
 588        struct ip_tunnel *tunnel = netdev_priv(dev);
 589
 590        tunnel->dev = dev;
 591        strcpy(tunnel->parms.name, dev->name);
 592
 593        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 594        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 595
 596        dev->tstats = alloc_percpu(struct pcpu_tstats);
 597        if (!dev->tstats)
 598                return -ENOMEM;
 599
 600        return 0;
 601}
 602
 603static int __net_init vti_fb_tunnel_init(struct net_device *dev)
 604{
 605        struct ip_tunnel *tunnel = netdev_priv(dev);
 606        struct iphdr *iph = &tunnel->parms.iph;
 607        struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
 608
 609        tunnel->dev = dev;
 610        strcpy(tunnel->parms.name, dev->name);
 611
 612        iph->version            = 4;
 613        iph->protocol           = IPPROTO_IPIP;
 614        iph->ihl                = 5;
 615
 616        dev->tstats = alloc_percpu(struct pcpu_tstats);
 617        if (!dev->tstats)
 618                return -ENOMEM;
 619
 620        dev_hold(dev);
 621        rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 622        return 0;
 623}
 624
 625static struct xfrm_tunnel vti_handler __read_mostly = {
 626        .handler        =       vti_rcv,
 627        .err_handler    =       vti_err,
 628        .priority       =       1,
 629};
 630
 631static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
 632{
 633        int prio;
 634
 635        for (prio = 1; prio < 4; prio++) {
 636                int h;
 637                for (h = 0; h < HASH_SIZE; h++) {
 638                        struct ip_tunnel *t;
 639
 640                        t = rtnl_dereference(ipn->tunnels[prio][h]);
 641                        while (t != NULL) {
 642                                unregister_netdevice_queue(t->dev, head);
 643                                t = rtnl_dereference(t->next);
 644                        }
 645                }
 646        }
 647}
 648
 649static int __net_init vti_init_net(struct net *net)
 650{
 651        int err;
 652        struct vti_net *ipn = net_generic(net, vti_net_id);
 653
 654        ipn->tunnels[0] = ipn->tunnels_wc;
 655        ipn->tunnels[1] = ipn->tunnels_l;
 656        ipn->tunnels[2] = ipn->tunnels_r;
 657        ipn->tunnels[3] = ipn->tunnels_r_l;
 658
 659        ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 660                                          "ip_vti0",
 661                                          vti_tunnel_setup);
 662        if (!ipn->fb_tunnel_dev) {
 663                err = -ENOMEM;
 664                goto err_alloc_dev;
 665        }
 666        dev_net_set(ipn->fb_tunnel_dev, net);
 667
 668        err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
 669        if (err)
 670                goto err_reg_dev;
 671        ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
 672
 673        err = register_netdev(ipn->fb_tunnel_dev);
 674        if (err)
 675                goto err_reg_dev;
 676        return 0;
 677
 678err_reg_dev:
 679        vti_dev_free(ipn->fb_tunnel_dev);
 680err_alloc_dev:
 681        /* nothing */
 682        return err;
 683}
 684
 685static void __net_exit vti_exit_net(struct net *net)
 686{
 687        struct vti_net *ipn = net_generic(net, vti_net_id);
 688        LIST_HEAD(list);
 689
 690        rtnl_lock();
 691        vti_destroy_tunnels(ipn, &list);
 692        unregister_netdevice_many(&list);
 693        rtnl_unlock();
 694}
 695
 696static struct pernet_operations vti_net_ops = {
 697        .init = vti_init_net,
 698        .exit = vti_exit_net,
 699        .id   = &vti_net_id,
 700        .size = sizeof(struct vti_net),
 701};
 702
 703static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 704{
 705        return 0;
 706}
 707
 708static void vti_netlink_parms(struct nlattr *data[],
 709                              struct ip_tunnel_parm *parms)
 710{
 711        memset(parms, 0, sizeof(*parms));
 712
 713        parms->iph.protocol = IPPROTO_IPIP;
 714
 715        if (!data)
 716                return;
 717
 718        if (data[IFLA_VTI_LINK])
 719                parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 720
 721        if (data[IFLA_VTI_IKEY])
 722                parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
 723
 724        if (data[IFLA_VTI_OKEY])
 725                parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 726
 727        if (data[IFLA_VTI_LOCAL])
 728                parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
 729
 730        if (data[IFLA_VTI_REMOTE])
 731                parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
 732
 733}
 734
 735static int vti_newlink(struct net *src_net, struct net_device *dev,
 736                       struct nlattr *tb[], struct nlattr *data[])
 737{
 738        struct ip_tunnel *nt;
 739        struct net *net = dev_net(dev);
 740        struct vti_net *ipn = net_generic(net, vti_net_id);
 741        int mtu;
 742        int err;
 743
 744        nt = netdev_priv(dev);
 745        vti_netlink_parms(data, &nt->parms);
 746
 747        if (vti_tunnel_locate(net, &nt->parms, 0))
 748                return -EEXIST;
 749
 750        mtu = vti_tunnel_bind_dev(dev);
 751        if (!tb[IFLA_MTU])
 752                dev->mtu = mtu;
 753
 754        err = register_netdevice(dev);
 755        if (err)
 756                goto out;
 757
 758        dev_hold(dev);
 759        vti_tunnel_link(ipn, nt);
 760
 761out:
 762        return err;
 763}
 764
 765static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
 766                          struct nlattr *data[])
 767{
 768        struct ip_tunnel *t, *nt;
 769        struct net *net = dev_net(dev);
 770        struct vti_net *ipn = net_generic(net, vti_net_id);
 771        struct ip_tunnel_parm p;
 772        int mtu;
 773
 774        if (dev == ipn->fb_tunnel_dev)
 775                return -EINVAL;
 776
 777        nt = netdev_priv(dev);
 778        vti_netlink_parms(data, &p);
 779
 780        t = vti_tunnel_locate(net, &p, 0);
 781
 782        if (t) {
 783                if (t->dev != dev)
 784                        return -EEXIST;
 785        } else {
 786                t = nt;
 787
 788                vti_tunnel_unlink(ipn, t);
 789                t->parms.iph.saddr = p.iph.saddr;
 790                t->parms.iph.daddr = p.iph.daddr;
 791                t->parms.i_key = p.i_key;
 792                t->parms.o_key = p.o_key;
 793                if (dev->type != ARPHRD_ETHER) {
 794                        memcpy(dev->dev_addr, &p.iph.saddr, 4);
 795                        memcpy(dev->broadcast, &p.iph.daddr, 4);
 796                }
 797                vti_tunnel_link(ipn, t);
 798                netdev_state_change(dev);
 799        }
 800
 801        if (t->parms.link != p.link) {
 802                t->parms.link = p.link;
 803                mtu = vti_tunnel_bind_dev(dev);
 804                if (!tb[IFLA_MTU])
 805                        dev->mtu = mtu;
 806                netdev_state_change(dev);
 807        }
 808
 809        return 0;
 810}
 811
 812static size_t vti_get_size(const struct net_device *dev)
 813{
 814        return
 815                /* IFLA_VTI_LINK */
 816                nla_total_size(4) +
 817                /* IFLA_VTI_IKEY */
 818                nla_total_size(4) +
 819                /* IFLA_VTI_OKEY */
 820                nla_total_size(4) +
 821                /* IFLA_VTI_LOCAL */
 822                nla_total_size(4) +
 823                /* IFLA_VTI_REMOTE */
 824                nla_total_size(4) +
 825                0;
 826}
 827
 828static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 829{
 830        struct ip_tunnel *t = netdev_priv(dev);
 831        struct ip_tunnel_parm *p = &t->parms;
 832
 833        nla_put_u32(skb, IFLA_VTI_LINK, p->link);
 834        nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
 835        nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
 836        nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
 837        nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
 838
 839        return 0;
 840}
 841
 842static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
 843        [IFLA_VTI_LINK]         = { .type = NLA_U32 },
 844        [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
 845        [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
 846        [IFLA_VTI_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 847        [IFLA_VTI_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 848};
 849
 850static struct rtnl_link_ops vti_link_ops __read_mostly = {
 851        .kind           = "vti",
 852        .maxtype        = IFLA_VTI_MAX,
 853        .policy         = vti_policy,
 854        .priv_size      = sizeof(struct ip_tunnel),
 855        .setup          = vti_tunnel_setup,
 856        .validate       = vti_tunnel_validate,
 857        .newlink        = vti_newlink,
 858        .changelink     = vti_changelink,
 859        .get_size       = vti_get_size,
 860        .fill_info      = vti_fill_info,
 861};
 862
 863static int __init vti_init(void)
 864{
 865        int err;
 866
 867        pr_info("IPv4 over IPSec tunneling driver\n");
 868
 869        err = register_pernet_device(&vti_net_ops);
 870        if (err < 0)
 871                return err;
 872        err = xfrm4_mode_tunnel_input_register(&vti_handler);
 873        if (err < 0) {
 874                unregister_pernet_device(&vti_net_ops);
 875                pr_info(KERN_INFO "vti init: can't register tunnel\n");
 876        }
 877
 878        err = rtnl_link_register(&vti_link_ops);
 879        if (err < 0)
 880                goto rtnl_link_failed;
 881
 882        return err;
 883
 884rtnl_link_failed:
 885        xfrm4_mode_tunnel_input_deregister(&vti_handler);
 886        unregister_pernet_device(&vti_net_ops);
 887        return err;
 888}
 889
 890static void __exit vti_fini(void)
 891{
 892        rtnl_link_unregister(&vti_link_ops);
 893        if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
 894                pr_info("vti close: can't deregister tunnel\n");
 895
 896        unregister_pernet_device(&vti_net_ops);
 897}
 898
 899module_init(vti_init);
 900module_exit(vti_fini);
 901MODULE_LICENSE("GPL");
 902MODULE_ALIAS_RTNL_LINK("vti");
 903MODULE_ALIAS_NETDEV("ip_vti0");
 904