linux/net/ipv4/ipip.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     IP/IP protocol decoder.
   3 *
   4 *      Authors:
   5 *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6 *
   7 *      Fixes:
   8 *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9 *                                      a module taking up 2 pages).
  10 *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11 *                                      to keep ip_forward happy.
  12 *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13 *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14 *              David Woodhouse :       Perform some basic ICMP handling.
  15 *                                      IPIP Routing without decapsulation.
  16 *              Carlos Picoto   :       GRE over IP support
  17 *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18 *                                      I do not want to merge them together.
  19 *
  20 *      This program is free software; you can redistribute it and/or
  21 *      modify it under the terms of the GNU General Public License
  22 *      as published by the Free Software Foundation; either version
  23 *      2 of the License, or (at your option) any later version.
  24 *
  25 */
  26
  27/* tunnel.c: an IP tunnel driver
  28
  29        The purpose of this driver is to provide an IP tunnel through
  30        which you can tunnel network traffic transparently across subnets.
  31
  32        This was written by looking at Nick Holloway's dummy driver
  33        Thanks for the great code!
  34
  35                -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37        Minor tweaks:
  38                Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                dev->hard_header/hard_header_len changed to use no headers.
  40                Comments/bracketing tweaked.
  41                Made the tunnels use dev->name not tunnel: when error reporting.
  42                Added tx_dropped stat
  43
  44                -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46        Reworked:
  47                Changed to tunnel to destination gateway in addition to the
  48                        tunnel's pointopoint address
  49                Almost completely rewritten
  50                Note:  There is currently no firewall or ICMP handling done.
  51
  52                -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54*/
  55
  56/* Things I wish I had known when writing the tunnel driver:
  57
  58        When the tunnel_xmit() function is called, the skb contains the
  59        packet to be sent (plus a great deal of extra info), and dev
  60        contains the tunnel device that _we_ are.
  61
  62        When we are passed a packet, we are expected to fill in the
  63        source address with our source IP address.
  64
  65        What is the proper way to allocate, copy and free a buffer?
  66        After you allocate it, it is a "0 length" chunk of memory
  67        starting at zero.  If you want to add headers to the buffer
  68        later, you'll have to call "skb_reserve(skb, amount)" with
  69        the amount of memory you want reserved.  Then, you call
  70        "skb_put(skb, amount)" with the amount of space you want in
  71        the buffer.  skb_put() returns a pointer to the top (#0) of
  72        that buffer.  skb->len is set to the amount of space you have
  73        "allocated" with skb_put().  You can then write up to skb->len
  74        bytes to that buffer.  If you need more, you can call skb_put()
  75        again with the additional amount of space you need.  You can
  76        find out how much more space you can allocate by calling
  77        "skb_tailroom(skb)".
  78        Now, to add header space, call "skb_push(skb, header_len)".
  79        This creates space at the beginning of the buffer and returns
  80        a pointer to this new space.  If later you need to strip a
  81        header from a buffer, call "skb_pull(skb, header_len)".
  82        skb_headroom() will return how much space is left at the top
  83        of the buffer (before the main data).  Remember, this headroom
  84        space must be reserved before the skb_put() function is called.
  85        */
  86
  87/*
  88   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90   For comments look at net/ipv4/ip_gre.c --ANK
  91 */
  92
  93
  94#include <linux/capability.h>
  95#include <linux/module.h>
  96#include <linux/types.h>
  97#include <linux/kernel.h>
  98#include <linux/slab.h>
  99#include <asm/uaccess.h>
 100#include <linux/skbuff.h>
 101#include <linux/netdevice.h>
 102#include <linux/in.h>
 103#include <linux/tcp.h>
 104#include <linux/udp.h>
 105#include <linux/if_arp.h>
 106#include <linux/mroute.h>
 107#include <linux/init.h>
 108#include <linux/netfilter_ipv4.h>
 109#include <linux/if_ether.h>
 110
 111#include <net/sock.h>
 112#include <net/ip.h>
 113#include <net/icmp.h>
 114#include <net/ip_tunnels.h>
 115#include <net/inet_ecn.h>
 116#include <net/xfrm.h>
 117#include <net/net_namespace.h>
 118#include <net/netns/generic.h>
 119
 120static bool log_ecn_error = true;
 121module_param(log_ecn_error, bool, 0644);
 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 123
 124static int ipip_net_id __read_mostly;
 125
 126static int ipip_tunnel_init(struct net_device *dev);
 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
 128
 129static int ipip_err(struct sk_buff *skb, u32 info)
 130{
 131
 132/* All the routers (except for Linux) return only
 133   8 bytes of packet payload. It means, that precise relaying of
 134   ICMP in the real Internet is absolutely infeasible.
 135 */
 136        struct net *net = dev_net(skb->dev);
 137        struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 138        const struct iphdr *iph = (const struct iphdr *)skb->data;
 139        struct ip_tunnel *t;
 140        int err;
 141        const int type = icmp_hdr(skb)->type;
 142        const int code = icmp_hdr(skb)->code;
 143
 144        err = -ENOENT;
 145        t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 146                             iph->daddr, iph->saddr, 0);
 147        if (t == NULL)
 148                goto out;
 149
 150        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 151                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 152                                 t->dev->ifindex, 0, IPPROTO_IPIP, 0);
 153                err = 0;
 154                goto out;
 155        }
 156
 157        if (type == ICMP_REDIRECT) {
 158                ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
 159                              IPPROTO_IPIP, 0);
 160                err = 0;
 161                goto out;
 162        }
 163
 164        if (t->parms.iph.daddr == 0)
 165                goto out;
 166
 167        err = 0;
 168        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 169                goto out;
 170
 171        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 172                t->err_count++;
 173        else
 174                t->err_count = 1;
 175        t->err_time = jiffies;
 176
 177out:
 178        return err;
 179}
 180
 181static const struct tnl_ptk_info tpi = {
 182        /* no tunnel info required for ipip. */
 183        .proto = htons(ETH_P_IP),
 184};
 185
 186static int ipip_rcv(struct sk_buff *skb)
 187{
 188        struct net *net = dev_net(skb->dev);
 189        struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 190        struct ip_tunnel *tunnel;
 191        const struct iphdr *iph;
 192
 193        iph = ip_hdr(skb);
 194        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 195                        iph->saddr, iph->daddr, 0);
 196        if (tunnel) {
 197                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 198                        goto drop;
 199                if (iptunnel_pull_header(skb, 0, tpi.proto))
 200                        goto drop;
 201                return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
 202        }
 203
 204        return -1;
 205
 206drop:
 207        kfree_skb(skb);
 208        return 0;
 209}
 210
 211/*
 212 *      This function assumes it is being called from dev_queue_xmit()
 213 *      and that skb is filled properly by that function.
 214 */
 215static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 216{
 217        struct ip_tunnel *tunnel = netdev_priv(dev);
 218        const struct iphdr  *tiph = &tunnel->parms.iph;
 219
 220        if (unlikely(skb->protocol != htons(ETH_P_IP)))
 221                goto tx_error;
 222
 223        skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
 224        if (IS_ERR(skb))
 225                goto out;
 226
 227        ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
 228        return NETDEV_TX_OK;
 229
 230tx_error:
 231        kfree_skb(skb);
 232out:
 233        dev->stats.tx_errors++;
 234        return NETDEV_TX_OK;
 235}
 236
 237static int
 238ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 239{
 240        int err = 0;
 241        struct ip_tunnel_parm p;
 242
 243        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 244                return -EFAULT;
 245
 246        if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 247                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 248                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 249                        return -EINVAL;
 250        }
 251
 252        p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
 253        if (p.iph.ttl)
 254                p.iph.frag_off |= htons(IP_DF);
 255
 256        err = ip_tunnel_ioctl(dev, &p, cmd);
 257        if (err)
 258                return err;
 259
 260        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 261                return -EFAULT;
 262
 263        return 0;
 264}
 265
 266static const struct net_device_ops ipip_netdev_ops = {
 267        .ndo_init       = ipip_tunnel_init,
 268        .ndo_uninit     = ip_tunnel_uninit,
 269        .ndo_start_xmit = ipip_tunnel_xmit,
 270        .ndo_do_ioctl   = ipip_tunnel_ioctl,
 271        .ndo_change_mtu = ip_tunnel_change_mtu,
 272        .ndo_get_stats64 = ip_tunnel_get_stats64,
 273};
 274
 275#define IPIP_FEATURES (NETIF_F_SG |             \
 276                       NETIF_F_FRAGLIST |       \
 277                       NETIF_F_HIGHDMA |        \
 278                       NETIF_F_GSO_SOFTWARE |   \
 279                       NETIF_F_HW_CSUM)
 280
 281static void ipip_tunnel_setup(struct net_device *dev)
 282{
 283        dev->netdev_ops         = &ipip_netdev_ops;
 284
 285        dev->type               = ARPHRD_TUNNEL;
 286        dev->flags              = IFF_NOARP;
 287        dev->iflink             = 0;
 288        dev->addr_len           = 4;
 289        dev->features           |= NETIF_F_LLTX;
 290        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 291
 292        dev->features           |= IPIP_FEATURES;
 293        dev->hw_features        |= IPIP_FEATURES;
 294        ip_tunnel_setup(dev, ipip_net_id);
 295}
 296
 297static int ipip_tunnel_init(struct net_device *dev)
 298{
 299        struct ip_tunnel *tunnel = netdev_priv(dev);
 300
 301        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 302        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 303
 304        tunnel->hlen = 0;
 305        tunnel->parms.iph.protocol = IPPROTO_IPIP;
 306        return ip_tunnel_init(dev);
 307}
 308
 309static void ipip_netlink_parms(struct nlattr *data[],
 310                               struct ip_tunnel_parm *parms)
 311{
 312        memset(parms, 0, sizeof(*parms));
 313
 314        parms->iph.version = 4;
 315        parms->iph.protocol = IPPROTO_IPIP;
 316        parms->iph.ihl = 5;
 317
 318        if (!data)
 319                return;
 320
 321        if (data[IFLA_IPTUN_LINK])
 322                parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
 323
 324        if (data[IFLA_IPTUN_LOCAL])
 325                parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
 326
 327        if (data[IFLA_IPTUN_REMOTE])
 328                parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
 329
 330        if (data[IFLA_IPTUN_TTL]) {
 331                parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
 332                if (parms->iph.ttl)
 333                        parms->iph.frag_off = htons(IP_DF);
 334        }
 335
 336        if (data[IFLA_IPTUN_TOS])
 337                parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
 338
 339        if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 340                parms->iph.frag_off = htons(IP_DF);
 341}
 342
 343static int ipip_newlink(struct net *src_net, struct net_device *dev,
 344                        struct nlattr *tb[], struct nlattr *data[])
 345{
 346        struct ip_tunnel_parm p;
 347
 348        ipip_netlink_parms(data, &p);
 349        return ip_tunnel_newlink(dev, tb, &p);
 350}
 351
 352static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 353                           struct nlattr *data[])
 354{
 355        struct ip_tunnel_parm p;
 356
 357        ipip_netlink_parms(data, &p);
 358
 359        if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 360            (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
 361                return -EINVAL;
 362
 363        return ip_tunnel_changelink(dev, tb, &p);
 364}
 365
 366static size_t ipip_get_size(const struct net_device *dev)
 367{
 368        return
 369                /* IFLA_IPTUN_LINK */
 370                nla_total_size(4) +
 371                /* IFLA_IPTUN_LOCAL */
 372                nla_total_size(4) +
 373                /* IFLA_IPTUN_REMOTE */
 374                nla_total_size(4) +
 375                /* IFLA_IPTUN_TTL */
 376                nla_total_size(1) +
 377                /* IFLA_IPTUN_TOS */
 378                nla_total_size(1) +
 379                /* IFLA_IPTUN_PMTUDISC */
 380                nla_total_size(1) +
 381                0;
 382}
 383
 384static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 385{
 386        struct ip_tunnel *tunnel = netdev_priv(dev);
 387        struct ip_tunnel_parm *parm = &tunnel->parms;
 388
 389        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
 390            nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
 391            nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 392            nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 393            nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
 394            nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
 395                       !!(parm->iph.frag_off & htons(IP_DF))))
 396                goto nla_put_failure;
 397        return 0;
 398
 399nla_put_failure:
 400        return -EMSGSIZE;
 401}
 402
 403static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 404        [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
 405        [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
 406        [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
 407        [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
 408        [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
 409        [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
 410};
 411
 412static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 413        .kind           = "ipip",
 414        .maxtype        = IFLA_IPTUN_MAX,
 415        .policy         = ipip_policy,
 416        .priv_size      = sizeof(struct ip_tunnel),
 417        .setup          = ipip_tunnel_setup,
 418        .newlink        = ipip_newlink,
 419        .changelink     = ipip_changelink,
 420        .dellink        = ip_tunnel_dellink,
 421        .get_size       = ipip_get_size,
 422        .fill_info      = ipip_fill_info,
 423};
 424
 425static struct xfrm_tunnel ipip_handler __read_mostly = {
 426        .handler        =       ipip_rcv,
 427        .err_handler    =       ipip_err,
 428        .priority       =       1,
 429};
 430
 431static int __net_init ipip_init_net(struct net *net)
 432{
 433        return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
 434}
 435
 436static void __net_exit ipip_exit_net(struct net *net)
 437{
 438        struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 439        ip_tunnel_delete_net(itn, &ipip_link_ops);
 440}
 441
 442static struct pernet_operations ipip_net_ops = {
 443        .init = ipip_init_net,
 444        .exit = ipip_exit_net,
 445        .id   = &ipip_net_id,
 446        .size = sizeof(struct ip_tunnel_net),
 447};
 448
 449static int __init ipip_init(void)
 450{
 451        int err;
 452
 453        pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
 454
 455        err = register_pernet_device(&ipip_net_ops);
 456        if (err < 0)
 457                return err;
 458        err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 459        if (err < 0) {
 460                pr_info("%s: can't register tunnel\n", __func__);
 461                goto xfrm_tunnel_failed;
 462        }
 463        err = rtnl_link_register(&ipip_link_ops);
 464        if (err < 0)
 465                goto rtnl_link_failed;
 466
 467out:
 468        return err;
 469
 470rtnl_link_failed:
 471        xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 472xfrm_tunnel_failed:
 473        unregister_pernet_device(&ipip_net_ops);
 474        goto out;
 475}
 476
 477static void __exit ipip_fini(void)
 478{
 479        rtnl_link_unregister(&ipip_link_ops);
 480        if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 481                pr_info("%s: can't deregister tunnel\n", __func__);
 482
 483        unregister_pernet_device(&ipip_net_ops);
 484}
 485
 486module_init(ipip_init);
 487module_exit(ipip_fini);
 488MODULE_LICENSE("GPL");
 489MODULE_ALIAS_NETDEV("tunl0");
 490