linux/net/ipv4/ipip.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     IP/IP protocol decoder.
   3 *
   4 *      Authors:
   5 *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6 *
   7 *      Fixes:
   8 *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9 *                                      a module taking up 2 pages).
  10 *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11 *                                      to keep ip_forward happy.
  12 *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13 *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14 *              David Woodhouse :       Perform some basic ICMP handling.
  15 *                                      IPIP Routing without decapsulation.
  16 *              Carlos Picoto   :       GRE over IP support
  17 *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18 *                                      I do not want to merge them together.
  19 *
  20 *      This program is free software; you can redistribute it and/or
  21 *      modify it under the terms of the GNU General Public License
  22 *      as published by the Free Software Foundation; either version
  23 *      2 of the License, or (at your option) any later version.
  24 *
  25 */
  26
  27/* tunnel.c: an IP tunnel driver
  28
  29        The purpose of this driver is to provide an IP tunnel through
  30        which you can tunnel network traffic transparently across subnets.
  31
  32        This was written by looking at Nick Holloway's dummy driver
  33        Thanks for the great code!
  34
  35                -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37        Minor tweaks:
  38                Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                dev->hard_header/hard_header_len changed to use no headers.
  40                Comments/bracketing tweaked.
  41                Made the tunnels use dev->name not tunnel: when error reporting.
  42                Added tx_dropped stat
  43
  44                -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46        Reworked:
  47                Changed to tunnel to destination gateway in addition to the
  48                        tunnel's pointopoint address
  49                Almost completely rewritten
  50                Note:  There is currently no firewall or ICMP handling done.
  51
  52                -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54*/
  55
  56/* Things I wish I had known when writing the tunnel driver:
  57
  58        When the tunnel_xmit() function is called, the skb contains the
  59        packet to be sent (plus a great deal of extra info), and dev
  60        contains the tunnel device that _we_ are.
  61
  62        When we are passed a packet, we are expected to fill in the
  63        source address with our source IP address.
  64
  65        What is the proper way to allocate, copy and free a buffer?
  66        After you allocate it, it is a "0 length" chunk of memory
  67        starting at zero.  If you want to add headers to the buffer
  68        later, you'll have to call "skb_reserve(skb, amount)" with
  69        the amount of memory you want reserved.  Then, you call
  70        "skb_put(skb, amount)" with the amount of space you want in
  71        the buffer.  skb_put() returns a pointer to the top (#0) of
  72        that buffer.  skb->len is set to the amount of space you have
  73        "allocated" with skb_put().  You can then write up to skb->len
  74        bytes to that buffer.  If you need more, you can call skb_put()
  75        again with the additional amount of space you need.  You can
  76        find out how much more space you can allocate by calling
  77        "skb_tailroom(skb)".
  78        Now, to add header space, call "skb_push(skb, header_len)".
  79        This creates space at the beginning of the buffer and returns
  80        a pointer to this new space.  If later you need to strip a
  81        header from a buffer, call "skb_pull(skb, header_len)".
  82        skb_headroom() will return how much space is left at the top
  83        of the buffer (before the main data).  Remember, this headroom
  84        space must be reserved before the skb_put() function is called.
  85        */
  86
  87/*
  88   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90   For comments look at net/ipv4/ip_gre.c --ANK
  91 */
  92
  93
  94#include <linux/capability.h>
  95#include <linux/module.h>
  96#include <linux/types.h>
  97#include <linux/kernel.h>
  98#include <linux/slab.h>
  99#include <asm/uaccess.h>
 100#include <linux/skbuff.h>
 101#include <linux/netdevice.h>
 102#include <linux/in.h>
 103#include <linux/tcp.h>
 104#include <linux/udp.h>
 105#include <linux/if_arp.h>
 106#include <linux/mroute.h>
 107#include <linux/init.h>
 108#include <linux/netfilter_ipv4.h>
 109#include <linux/if_ether.h>
 110
 111#include <net/sock.h>
 112#include <net/ip.h>
 113#include <net/icmp.h>
 114#include <net/ipip.h>
 115#include <net/inet_ecn.h>
 116#include <net/xfrm.h>
 117#include <net/net_namespace.h>
 118#include <net/netns/generic.h>
 119
 120#define HASH_SIZE  16
 121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123static int ipip_net_id __read_mostly;
 124struct ipip_net {
 125        struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 126        struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 127        struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 128        struct ip_tunnel __rcu *tunnels_wc[1];
 129        struct ip_tunnel __rcu **tunnels[4];
 130
 131        struct net_device *fb_tunnel_dev;
 132};
 133
 134static int ipip_tunnel_init(struct net_device *dev);
 135static void ipip_tunnel_setup(struct net_device *dev);
 136static void ipip_dev_free(struct net_device *dev);
 137
 138/*
 139 * Locking : hash tables are protected by RCU and RTNL
 140 */
 141
 142#define for_each_ip_tunnel_rcu(start) \
 143        for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 144
 145/* often modified stats are per cpu, other are shared (netdev->stats) */
 146struct pcpu_tstats {
 147        u64     rx_packets;
 148        u64     rx_bytes;
 149        u64     tx_packets;
 150        u64     tx_bytes;
 151        struct u64_stats_sync   syncp;
 152};
 153
 154static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 155                                                  struct rtnl_link_stats64 *tot)
 156{
 157        int i;
 158
 159        for_each_possible_cpu(i) {
 160                const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 161                u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 162                unsigned int start;
 163
 164                do {
 165                        start = u64_stats_fetch_begin_bh(&tstats->syncp);
 166                        rx_packets = tstats->rx_packets;
 167                        tx_packets = tstats->tx_packets;
 168                        rx_bytes = tstats->rx_bytes;
 169                        tx_bytes = tstats->tx_bytes;
 170                } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 171
 172                tot->rx_packets += rx_packets;
 173                tot->tx_packets += tx_packets;
 174                tot->rx_bytes   += rx_bytes;
 175                tot->tx_bytes   += tx_bytes;
 176        }
 177
 178        tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 179        tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 180        tot->tx_dropped = dev->stats.tx_dropped;
 181        tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 182        tot->tx_errors = dev->stats.tx_errors;
 183        tot->collisions = dev->stats.collisions;
 184
 185        return tot;
 186}
 187
 188static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
 189                __be32 remote, __be32 local)
 190{
 191        unsigned int h0 = HASH(remote);
 192        unsigned int h1 = HASH(local);
 193        struct ip_tunnel *t;
 194        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 195
 196        for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 197                if (local == t->parms.iph.saddr &&
 198                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 199                        return t;
 200
 201        for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 202                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 203                        return t;
 204
 205        for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 206                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 207                        return t;
 208
 209        t = rcu_dereference(ipn->tunnels_wc[0]);
 210        if (t && (t->dev->flags&IFF_UP))
 211                return t;
 212        return NULL;
 213}
 214
 215static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 216                struct ip_tunnel_parm *parms)
 217{
 218        __be32 remote = parms->iph.daddr;
 219        __be32 local = parms->iph.saddr;
 220        unsigned int h = 0;
 221        int prio = 0;
 222
 223        if (remote) {
 224                prio |= 2;
 225                h ^= HASH(remote);
 226        }
 227        if (local) {
 228                prio |= 1;
 229                h ^= HASH(local);
 230        }
 231        return &ipn->tunnels[prio][h];
 232}
 233
 234static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 235                struct ip_tunnel *t)
 236{
 237        return __ipip_bucket(ipn, &t->parms);
 238}
 239
 240static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 241{
 242        struct ip_tunnel __rcu **tp;
 243        struct ip_tunnel *iter;
 244
 245        for (tp = ipip_bucket(ipn, t);
 246             (iter = rtnl_dereference(*tp)) != NULL;
 247             tp = &iter->next) {
 248                if (t == iter) {
 249                        rcu_assign_pointer(*tp, t->next);
 250                        break;
 251                }
 252        }
 253}
 254
 255static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 256{
 257        struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 258
 259        rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 260        rcu_assign_pointer(*tp, t);
 261}
 262
 263static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
 264                struct ip_tunnel_parm *parms, int create)
 265{
 266        __be32 remote = parms->iph.daddr;
 267        __be32 local = parms->iph.saddr;
 268        struct ip_tunnel *t, *nt;
 269        struct ip_tunnel __rcu **tp;
 270        struct net_device *dev;
 271        char name[IFNAMSIZ];
 272        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 273
 274        for (tp = __ipip_bucket(ipn, parms);
 275                 (t = rtnl_dereference(*tp)) != NULL;
 276                 tp = &t->next) {
 277                if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 278                        return t;
 279        }
 280        if (!create)
 281                return NULL;
 282
 283        if (parms->name[0])
 284                strlcpy(name, parms->name, IFNAMSIZ);
 285        else
 286                strcpy(name, "tunl%d");
 287
 288        dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 289        if (dev == NULL)
 290                return NULL;
 291
 292        dev_net_set(dev, net);
 293
 294        nt = netdev_priv(dev);
 295        nt->parms = *parms;
 296
 297        if (ipip_tunnel_init(dev) < 0)
 298                goto failed_free;
 299
 300        if (register_netdevice(dev) < 0)
 301                goto failed_free;
 302
 303        strcpy(nt->parms.name, dev->name);
 304
 305        dev_hold(dev);
 306        ipip_tunnel_link(ipn, nt);
 307        return nt;
 308
 309failed_free:
 310        ipip_dev_free(dev);
 311        return NULL;
 312}
 313
 314/* called with RTNL */
 315static void ipip_tunnel_uninit(struct net_device *dev)
 316{
 317        struct net *net = dev_net(dev);
 318        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 319
 320        if (dev == ipn->fb_tunnel_dev)
 321                RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
 322        else
 323                ipip_tunnel_unlink(ipn, netdev_priv(dev));
 324        dev_put(dev);
 325}
 326
 327static int ipip_err(struct sk_buff *skb, u32 info)
 328{
 329
 330/* All the routers (except for Linux) return only
 331   8 bytes of packet payload. It means, that precise relaying of
 332   ICMP in the real Internet is absolutely infeasible.
 333 */
 334        const struct iphdr *iph = (const struct iphdr *)skb->data;
 335        const int type = icmp_hdr(skb)->type;
 336        const int code = icmp_hdr(skb)->code;
 337        struct ip_tunnel *t;
 338        int err;
 339
 340        switch (type) {
 341        default:
 342        case ICMP_PARAMETERPROB:
 343                return 0;
 344
 345        case ICMP_DEST_UNREACH:
 346                switch (code) {
 347                case ICMP_SR_FAILED:
 348                case ICMP_PORT_UNREACH:
 349                        /* Impossible event. */
 350                        return 0;
 351                case ICMP_FRAG_NEEDED:
 352                        /* Soft state for pmtu is maintained by IP core. */
 353                        return 0;
 354                default:
 355                        /* All others are translated to HOST_UNREACH.
 356                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 357                           I believe they are just ether pollution. --ANK
 358                         */
 359                        break;
 360                }
 361                break;
 362        case ICMP_TIME_EXCEEDED:
 363                if (code != ICMP_EXC_TTL)
 364                        return 0;
 365                break;
 366        }
 367
 368        err = -ENOENT;
 369
 370        rcu_read_lock();
 371        t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 372        if (t == NULL || t->parms.iph.daddr == 0)
 373                goto out;
 374
 375        err = 0;
 376        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 377                goto out;
 378
 379        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 380                t->err_count++;
 381        else
 382                t->err_count = 1;
 383        t->err_time = jiffies;
 384out:
 385        rcu_read_unlock();
 386        return err;
 387}
 388
 389static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 390                                        struct sk_buff *skb)
 391{
 392        struct iphdr *inner_iph = ip_hdr(skb);
 393
 394        if (INET_ECN_is_ce(outer_iph->tos))
 395                IP_ECN_set_ce(inner_iph);
 396}
 397
 398static int ipip_rcv(struct sk_buff *skb)
 399{
 400        struct ip_tunnel *tunnel;
 401        const struct iphdr *iph = ip_hdr(skb);
 402
 403        rcu_read_lock();
 404        tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 405        if (tunnel != NULL) {
 406                struct pcpu_tstats *tstats;
 407
 408                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 409                        rcu_read_unlock();
 410                        kfree_skb(skb);
 411                        return 0;
 412                }
 413
 414                secpath_reset(skb);
 415
 416                skb->mac_header = skb->network_header;
 417                skb_reset_network_header(skb);
 418                skb->protocol = htons(ETH_P_IP);
 419                skb->pkt_type = PACKET_HOST;
 420
 421                tstats = this_cpu_ptr(tunnel->dev->tstats);
 422                u64_stats_update_begin(&tstats->syncp);
 423                tstats->rx_packets++;
 424                tstats->rx_bytes += skb->len;
 425                u64_stats_update_end(&tstats->syncp);
 426
 427                __skb_tunnel_rx(skb, tunnel->dev);
 428
 429                ipip_ecn_decapsulate(iph, skb);
 430
 431                netif_rx(skb);
 432
 433                rcu_read_unlock();
 434                return 0;
 435        }
 436        rcu_read_unlock();
 437
 438        return -1;
 439}
 440
 441/*
 442 *      This function assumes it is being called from dev_queue_xmit()
 443 *      and that skb is filled properly by that function.
 444 */
 445
 446static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 447{
 448        struct ip_tunnel *tunnel = netdev_priv(dev);
 449        struct pcpu_tstats *tstats;
 450        const struct iphdr  *tiph = &tunnel->parms.iph;
 451        u8     tos = tunnel->parms.iph.tos;
 452        __be16 df = tiph->frag_off;
 453        struct rtable *rt;                      /* Route to the other host */
 454        struct net_device *tdev;                /* Device to other host */
 455        const struct iphdr  *old_iph = ip_hdr(skb);
 456        struct iphdr  *iph;                     /* Our new IP header */
 457        unsigned int max_headroom;              /* The extra header space needed */
 458        __be32 dst = tiph->daddr;
 459        struct flowi4 fl4;
 460        int    mtu;
 461
 462        if (skb->protocol != htons(ETH_P_IP))
 463                goto tx_error;
 464
 465        if (tos & 1)
 466                tos = old_iph->tos;
 467
 468        if (!dst) {
 469                /* NBMA tunnel */
 470                if ((rt = skb_rtable(skb)) == NULL) {
 471                        dev->stats.tx_fifo_errors++;
 472                        goto tx_error;
 473                }
 474                dst = rt->rt_gateway;
 475        }
 476
 477        rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 478                                   dst, tiph->saddr,
 479                                   0, 0,
 480                                   IPPROTO_IPIP, RT_TOS(tos),
 481                                   tunnel->parms.link);
 482        if (IS_ERR(rt)) {
 483                dev->stats.tx_carrier_errors++;
 484                goto tx_error_icmp;
 485        }
 486        tdev = rt->dst.dev;
 487
 488        if (tdev == dev) {
 489                ip_rt_put(rt);
 490                dev->stats.collisions++;
 491                goto tx_error;
 492        }
 493
 494        df |= old_iph->frag_off & htons(IP_DF);
 495
 496        if (df) {
 497                mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 498
 499                if (mtu < 68) {
 500                        dev->stats.collisions++;
 501                        ip_rt_put(rt);
 502                        goto tx_error;
 503                }
 504
 505                if (skb_dst(skb))
 506                        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 507
 508                if ((old_iph->frag_off & htons(IP_DF)) &&
 509                    mtu < ntohs(old_iph->tot_len)) {
 510                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 511                                  htonl(mtu));
 512                        ip_rt_put(rt);
 513                        goto tx_error;
 514                }
 515        }
 516
 517        if (tunnel->err_count > 0) {
 518                if (time_before(jiffies,
 519                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 520                        tunnel->err_count--;
 521                        dst_link_failure(skb);
 522                } else
 523                        tunnel->err_count = 0;
 524        }
 525
 526        /*
 527         * Okay, now see if we can stuff it in the buffer as-is.
 528         */
 529        max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 530
 531        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 532            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 533                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 534                if (!new_skb) {
 535                        ip_rt_put(rt);
 536                        dev->stats.tx_dropped++;
 537                        dev_kfree_skb(skb);
 538                        return NETDEV_TX_OK;
 539                }
 540                if (skb->sk)
 541                        skb_set_owner_w(new_skb, skb->sk);
 542                dev_kfree_skb(skb);
 543                skb = new_skb;
 544                old_iph = ip_hdr(skb);
 545        }
 546
 547        skb->transport_header = skb->network_header;
 548        skb_push(skb, sizeof(struct iphdr));
 549        skb_reset_network_header(skb);
 550        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 551        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 552                              IPSKB_REROUTED);
 553        skb_dst_drop(skb);
 554        skb_dst_set(skb, &rt->dst);
 555
 556        /*
 557         *      Push down and install the IPIP header.
 558         */
 559
 560        iph                     =       ip_hdr(skb);
 561        iph->version            =       4;
 562        iph->ihl                =       sizeof(struct iphdr)>>2;
 563        iph->frag_off           =       df;
 564        iph->protocol           =       IPPROTO_IPIP;
 565        iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 566        iph->daddr              =       fl4.daddr;
 567        iph->saddr              =       fl4.saddr;
 568
 569        if ((iph->ttl = tiph->ttl) == 0)
 570                iph->ttl        =       old_iph->ttl;
 571
 572        nf_reset(skb);
 573        tstats = this_cpu_ptr(dev->tstats);
 574        __IPTUNNEL_XMIT(tstats, &dev->stats);
 575        return NETDEV_TX_OK;
 576
 577tx_error_icmp:
 578        dst_link_failure(skb);
 579tx_error:
 580        dev->stats.tx_errors++;
 581        dev_kfree_skb(skb);
 582        return NETDEV_TX_OK;
 583}
 584
 585static void ipip_tunnel_bind_dev(struct net_device *dev)
 586{
 587        struct net_device *tdev = NULL;
 588        struct ip_tunnel *tunnel;
 589        const struct iphdr *iph;
 590
 591        tunnel = netdev_priv(dev);
 592        iph = &tunnel->parms.iph;
 593
 594        if (iph->daddr) {
 595                struct rtable *rt;
 596                struct flowi4 fl4;
 597
 598                rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 599                                           iph->daddr, iph->saddr,
 600                                           0, 0,
 601                                           IPPROTO_IPIP,
 602                                           RT_TOS(iph->tos),
 603                                           tunnel->parms.link);
 604                if (!IS_ERR(rt)) {
 605                        tdev = rt->dst.dev;
 606                        ip_rt_put(rt);
 607                }
 608                dev->flags |= IFF_POINTOPOINT;
 609        }
 610
 611        if (!tdev && tunnel->parms.link)
 612                tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 613
 614        if (tdev) {
 615                dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 616                dev->mtu = tdev->mtu - sizeof(struct iphdr);
 617        }
 618        dev->iflink = tunnel->parms.link;
 619}
 620
 621static int
 622ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 623{
 624        int err = 0;
 625        struct ip_tunnel_parm p;
 626        struct ip_tunnel *t;
 627        struct net *net = dev_net(dev);
 628        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 629
 630        switch (cmd) {
 631        case SIOCGETTUNNEL:
 632                t = NULL;
 633                if (dev == ipn->fb_tunnel_dev) {
 634                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 635                                err = -EFAULT;
 636                                break;
 637                        }
 638                        t = ipip_tunnel_locate(net, &p, 0);
 639                }
 640                if (t == NULL)
 641                        t = netdev_priv(dev);
 642                memcpy(&p, &t->parms, sizeof(p));
 643                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 644                        err = -EFAULT;
 645                break;
 646
 647        case SIOCADDTUNNEL:
 648        case SIOCCHGTUNNEL:
 649                err = -EPERM;
 650                if (!capable(CAP_NET_ADMIN))
 651                        goto done;
 652
 653                err = -EFAULT;
 654                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 655                        goto done;
 656
 657                err = -EINVAL;
 658                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 659                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 660                        goto done;
 661                if (p.iph.ttl)
 662                        p.iph.frag_off |= htons(IP_DF);
 663
 664                t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 665
 666                if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 667                        if (t != NULL) {
 668                                if (t->dev != dev) {
 669                                        err = -EEXIST;
 670                                        break;
 671                                }
 672                        } else {
 673                                if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 674                                    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 675                                        err = -EINVAL;
 676                                        break;
 677                                }
 678                                t = netdev_priv(dev);
 679                                ipip_tunnel_unlink(ipn, t);
 680                                synchronize_net();
 681                                t->parms.iph.saddr = p.iph.saddr;
 682                                t->parms.iph.daddr = p.iph.daddr;
 683                                memcpy(dev->dev_addr, &p.iph.saddr, 4);
 684                                memcpy(dev->broadcast, &p.iph.daddr, 4);
 685                                ipip_tunnel_link(ipn, t);
 686                                netdev_state_change(dev);
 687                        }
 688                }
 689
 690                if (t) {
 691                        err = 0;
 692                        if (cmd == SIOCCHGTUNNEL) {
 693                                t->parms.iph.ttl = p.iph.ttl;
 694                                t->parms.iph.tos = p.iph.tos;
 695                                t->parms.iph.frag_off = p.iph.frag_off;
 696                                if (t->parms.link != p.link) {
 697                                        t->parms.link = p.link;
 698                                        ipip_tunnel_bind_dev(dev);
 699                                        netdev_state_change(dev);
 700                                }
 701                        }
 702                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 703                                err = -EFAULT;
 704                } else
 705                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 706                break;
 707
 708        case SIOCDELTUNNEL:
 709                err = -EPERM;
 710                if (!capable(CAP_NET_ADMIN))
 711                        goto done;
 712
 713                if (dev == ipn->fb_tunnel_dev) {
 714                        err = -EFAULT;
 715                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 716                                goto done;
 717                        err = -ENOENT;
 718                        if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 719                                goto done;
 720                        err = -EPERM;
 721                        if (t->dev == ipn->fb_tunnel_dev)
 722                                goto done;
 723                        dev = t->dev;
 724                }
 725                unregister_netdevice(dev);
 726                err = 0;
 727                break;
 728
 729        default:
 730                err = -EINVAL;
 731        }
 732
 733done:
 734        return err;
 735}
 736
 737static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 738{
 739        if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 740                return -EINVAL;
 741        dev->mtu = new_mtu;
 742        return 0;
 743}
 744
 745static const struct net_device_ops ipip_netdev_ops = {
 746        .ndo_uninit     = ipip_tunnel_uninit,
 747        .ndo_start_xmit = ipip_tunnel_xmit,
 748        .ndo_do_ioctl   = ipip_tunnel_ioctl,
 749        .ndo_change_mtu = ipip_tunnel_change_mtu,
 750        .ndo_get_stats64 = ipip_get_stats64,
 751};
 752
 753static void ipip_dev_free(struct net_device *dev)
 754{
 755        free_percpu(dev->tstats);
 756        free_netdev(dev);
 757}
 758
 759static void ipip_tunnel_setup(struct net_device *dev)
 760{
 761        dev->netdev_ops         = &ipip_netdev_ops;
 762        dev->destructor         = ipip_dev_free;
 763
 764        dev->type               = ARPHRD_TUNNEL;
 765        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 766        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 767        dev->flags              = IFF_NOARP;
 768        dev->iflink             = 0;
 769        dev->addr_len           = 4;
 770        dev->features           |= NETIF_F_NETNS_LOCAL;
 771        dev->features           |= NETIF_F_LLTX;
 772        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 773}
 774
 775static int ipip_tunnel_init(struct net_device *dev)
 776{
 777        struct ip_tunnel *tunnel = netdev_priv(dev);
 778
 779        tunnel->dev = dev;
 780
 781        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 782        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 783
 784        ipip_tunnel_bind_dev(dev);
 785
 786        dev->tstats = alloc_percpu(struct pcpu_tstats);
 787        if (!dev->tstats)
 788                return -ENOMEM;
 789
 790        return 0;
 791}
 792
 793static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 794{
 795        struct ip_tunnel *tunnel = netdev_priv(dev);
 796        struct iphdr *iph = &tunnel->parms.iph;
 797        struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 798
 799        tunnel->dev = dev;
 800        strcpy(tunnel->parms.name, dev->name);
 801
 802        iph->version            = 4;
 803        iph->protocol           = IPPROTO_IPIP;
 804        iph->ihl                = 5;
 805
 806        dev->tstats = alloc_percpu(struct pcpu_tstats);
 807        if (!dev->tstats)
 808                return -ENOMEM;
 809
 810        dev_hold(dev);
 811        rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 812        return 0;
 813}
 814
 815static struct xfrm_tunnel ipip_handler __read_mostly = {
 816        .handler        =       ipip_rcv,
 817        .err_handler    =       ipip_err,
 818        .priority       =       1,
 819};
 820
 821static const char banner[] __initconst =
 822        KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 823
 824static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 825{
 826        int prio;
 827
 828        for (prio = 1; prio < 4; prio++) {
 829                int h;
 830                for (h = 0; h < HASH_SIZE; h++) {
 831                        struct ip_tunnel *t;
 832
 833                        t = rtnl_dereference(ipn->tunnels[prio][h]);
 834                        while (t != NULL) {
 835                                unregister_netdevice_queue(t->dev, head);
 836                                t = rtnl_dereference(t->next);
 837                        }
 838                }
 839        }
 840}
 841
 842static int __net_init ipip_init_net(struct net *net)
 843{
 844        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 845        struct ip_tunnel *t;
 846        int err;
 847
 848        ipn->tunnels[0] = ipn->tunnels_wc;
 849        ipn->tunnels[1] = ipn->tunnels_l;
 850        ipn->tunnels[2] = ipn->tunnels_r;
 851        ipn->tunnels[3] = ipn->tunnels_r_l;
 852
 853        ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 854                                           "tunl0",
 855                                           ipip_tunnel_setup);
 856        if (!ipn->fb_tunnel_dev) {
 857                err = -ENOMEM;
 858                goto err_alloc_dev;
 859        }
 860        dev_net_set(ipn->fb_tunnel_dev, net);
 861
 862        err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 863        if (err)
 864                goto err_reg_dev;
 865
 866        if ((err = register_netdev(ipn->fb_tunnel_dev)))
 867                goto err_reg_dev;
 868
 869        t = netdev_priv(ipn->fb_tunnel_dev);
 870
 871        strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
 872        return 0;
 873
 874err_reg_dev:
 875        ipip_dev_free(ipn->fb_tunnel_dev);
 876err_alloc_dev:
 877        /* nothing */
 878        return err;
 879}
 880
 881static void __net_exit ipip_exit_net(struct net *net)
 882{
 883        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 884        LIST_HEAD(list);
 885
 886        rtnl_lock();
 887        ipip_destroy_tunnels(ipn, &list);
 888        unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 889        unregister_netdevice_many(&list);
 890        rtnl_unlock();
 891}
 892
 893static struct pernet_operations ipip_net_ops = {
 894        .init = ipip_init_net,
 895        .exit = ipip_exit_net,
 896        .id   = &ipip_net_id,
 897        .size = sizeof(struct ipip_net),
 898};
 899
 900static int __init ipip_init(void)
 901{
 902        int err;
 903
 904        printk(banner);
 905
 906        err = register_pernet_device(&ipip_net_ops);
 907        if (err < 0)
 908                return err;
 909        err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 910        if (err < 0) {
 911                unregister_pernet_device(&ipip_net_ops);
 912                pr_info("%s: can't register tunnel\n", __func__);
 913        }
 914        return err;
 915}
 916
 917static void __exit ipip_fini(void)
 918{
 919        if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 920                pr_info("%s: can't deregister tunnel\n", __func__);
 921
 922        unregister_pernet_device(&ipip_net_ops);
 923}
 924
 925module_init(ipip_init);
 926module_exit(ipip_fini);
 927MODULE_LICENSE("GPL");
 928MODULE_ALIAS_NETDEV("tunl0");
 929