linux/net/ipv4/ipip.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     IP/IP protocol decoder.
   3 *
   4 *      Authors:
   5 *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6 *
   7 *      Fixes:
   8 *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9 *                                      a module taking up 2 pages).
  10 *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11 *                                      to keep ip_forward happy.
  12 *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13 *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14 *              David Woodhouse :       Perform some basic ICMP handling.
  15 *                                      IPIP Routing without decapsulation.
  16 *              Carlos Picoto   :       GRE over IP support
  17 *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18 *                                      I do not want to merge them together.
  19 *
  20 *      This program is free software; you can redistribute it and/or
  21 *      modify it under the terms of the GNU General Public License
  22 *      as published by the Free Software Foundation; either version
  23 *      2 of the License, or (at your option) any later version.
  24 *
  25 */
  26
  27/* tunnel.c: an IP tunnel driver
  28
  29        The purpose of this driver is to provide an IP tunnel through
  30        which you can tunnel network traffic transparently across subnets.
  31
  32        This was written by looking at Nick Holloway's dummy driver
  33        Thanks for the great code!
  34
  35                -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37        Minor tweaks:
  38                Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                dev->hard_header/hard_header_len changed to use no headers.
  40                Comments/bracketing tweaked.
  41                Made the tunnels use dev->name not tunnel: when error reporting.
  42                Added tx_dropped stat
  43
  44                -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46        Reworked:
  47                Changed to tunnel to destination gateway in addition to the
  48                        tunnel's pointopoint address
  49                Almost completely rewritten
  50                Note:  There is currently no firewall or ICMP handling done.
  51
  52                -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54*/
  55
  56/* Things I wish I had known when writing the tunnel driver:
  57
  58        When the tunnel_xmit() function is called, the skb contains the
  59        packet to be sent (plus a great deal of extra info), and dev
  60        contains the tunnel device that _we_ are.
  61
  62        When we are passed a packet, we are expected to fill in the
  63        source address with our source IP address.
  64
  65        What is the proper way to allocate, copy and free a buffer?
  66        After you allocate it, it is a "0 length" chunk of memory
  67        starting at zero.  If you want to add headers to the buffer
  68        later, you'll have to call "skb_reserve(skb, amount)" with
  69        the amount of memory you want reserved.  Then, you call
  70        "skb_put(skb, amount)" with the amount of space you want in
  71        the buffer.  skb_put() returns a pointer to the top (#0) of
  72        that buffer.  skb->len is set to the amount of space you have
  73        "allocated" with skb_put().  You can then write up to skb->len
  74        bytes to that buffer.  If you need more, you can call skb_put()
  75        again with the additional amount of space you need.  You can
  76        find out how much more space you can allocate by calling
  77        "skb_tailroom(skb)".
  78        Now, to add header space, call "skb_push(skb, header_len)".
  79        This creates space at the beginning of the buffer and returns
  80        a pointer to this new space.  If later you need to strip a
  81        header from a buffer, call "skb_pull(skb, header_len)".
  82        skb_headroom() will return how much space is left at the top
  83        of the buffer (before the main data).  Remember, this headroom
  84        space must be reserved before the skb_put() function is called.
  85        */
  86
  87/*
  88   This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90   For comments look at net/ipv4/ip_gre.c --ANK
  91 */
  92
  93
  94#include <linux/capability.h>
  95#include <linux/module.h>
  96#include <linux/types.h>
  97#include <linux/kernel.h>
  98#include <linux/slab.h>
  99#include <asm/uaccess.h>
 100#include <linux/skbuff.h>
 101#include <linux/netdevice.h>
 102#include <linux/in.h>
 103#include <linux/tcp.h>
 104#include <linux/udp.h>
 105#include <linux/if_arp.h>
 106#include <linux/mroute.h>
 107#include <linux/init.h>
 108#include <linux/netfilter_ipv4.h>
 109#include <linux/if_ether.h>
 110
 111#include <net/sock.h>
 112#include <net/ip.h>
 113#include <net/icmp.h>
 114#include <net/ip_tunnels.h>
 115#include <net/inet_ecn.h>
 116#include <net/xfrm.h>
 117#include <net/net_namespace.h>
 118#include <net/netns/generic.h>
 119
 120static bool log_ecn_error = true;
 121module_param(log_ecn_error, bool, 0644);
 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 123
 124static int ipip_net_id __read_mostly;
 125
 126static int ipip_tunnel_init(struct net_device *dev);
 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
 128
 129static int ipip_err(struct sk_buff *skb, u32 info)
 130{
 131
 132/* All the routers (except for Linux) return only
 133   8 bytes of packet payload. It means, that precise relaying of
 134   ICMP in the real Internet is absolutely infeasible.
 135 */
 136        struct net *net = dev_net(skb->dev);
 137        struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 138        const struct iphdr *iph = (const struct iphdr *)skb->data;
 139        struct ip_tunnel *t;
 140        int err;
 141        const int type = icmp_hdr(skb)->type;
 142        const int code = icmp_hdr(skb)->code;
 143
 144        err = -ENOENT;
 145        t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 146                             iph->daddr, iph->saddr, 0);
 147        if (t == NULL)
 148                goto out;
 149
 150        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 151                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 152                                 t->parms.link, 0, IPPROTO_IPIP, 0);
 153                err = 0;
 154                goto out;
 155        }
 156
 157        if (type == ICMP_REDIRECT) {
 158                ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 159                              IPPROTO_IPIP, 0);
 160                err = 0;
 161                goto out;
 162        }
 163
 164        if (t->parms.iph.daddr == 0)
 165                goto out;
 166
 167        err = 0;
 168        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 169                goto out;
 170
 171        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 172                t->err_count++;
 173        else
 174                t->err_count = 1;
 175        t->err_time = jiffies;
 176
 177out:
 178        return err;
 179}
 180
 181static const struct tnl_ptk_info tpi = {
 182        /* no tunnel info required for ipip. */
 183        .proto = htons(ETH_P_IP),
 184};
 185
 186static int ipip_rcv(struct sk_buff *skb)
 187{
 188        struct net *net = dev_net(skb->dev);
 189        struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 190        struct ip_tunnel *tunnel;
 191        const struct iphdr *iph;
 192
 193        iph = ip_hdr(skb);
 194        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 195                        iph->saddr, iph->daddr, 0);
 196        if (tunnel) {
 197                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 198                        goto drop;
 199                if (iptunnel_pull_header(skb, 0, tpi.proto))
 200                        goto drop;
 201                return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
 202        }
 203
 204        return -1;
 205
 206drop:
 207        kfree_skb(skb);
 208        return 0;
 209}
 210
 211/*
 212 *      This function assumes it is being called from dev_queue_xmit()
 213 *      and that skb is filled properly by that function.
 214 */
 215static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 216{
 217        struct ip_tunnel *tunnel = netdev_priv(dev);
 218        const struct iphdr  *tiph = &tunnel->parms.iph;
 219
 220        if (unlikely(skb->protocol != htons(ETH_P_IP)))
 221                goto tx_error;
 222
 223        skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
 224        if (IS_ERR(skb))
 225                goto out;
 226
 227        skb_set_inner_ipproto(skb, IPPROTO_IPIP);
 228
 229        ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
 230        return NETDEV_TX_OK;
 231
 232tx_error:
 233        kfree_skb(skb);
 234out:
 235        dev->stats.tx_errors++;
 236        return NETDEV_TX_OK;
 237}
 238
 239static int
 240ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 241{
 242        int err = 0;
 243        struct ip_tunnel_parm p;
 244
 245        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 246                return -EFAULT;
 247
 248        if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 249                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 250                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 251                        return -EINVAL;
 252        }
 253
 254        p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
 255        if (p.iph.ttl)
 256                p.iph.frag_off |= htons(IP_DF);
 257
 258        err = ip_tunnel_ioctl(dev, &p, cmd);
 259        if (err)
 260                return err;
 261
 262        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 263                return -EFAULT;
 264
 265        return 0;
 266}
 267
 268static const struct net_device_ops ipip_netdev_ops = {
 269        .ndo_init       = ipip_tunnel_init,
 270        .ndo_uninit     = ip_tunnel_uninit,
 271        .ndo_start_xmit = ipip_tunnel_xmit,
 272        .ndo_do_ioctl   = ipip_tunnel_ioctl,
 273        .ndo_change_mtu = ip_tunnel_change_mtu,
 274        .ndo_get_stats64 = ip_tunnel_get_stats64,
 275};
 276
 277#define IPIP_FEATURES (NETIF_F_SG |             \
 278                       NETIF_F_FRAGLIST |       \
 279                       NETIF_F_HIGHDMA |        \
 280                       NETIF_F_GSO_SOFTWARE |   \
 281                       NETIF_F_HW_CSUM)
 282
 283static void ipip_tunnel_setup(struct net_device *dev)
 284{
 285        dev->netdev_ops         = &ipip_netdev_ops;
 286
 287        dev->type               = ARPHRD_TUNNEL;
 288        dev->flags              = IFF_NOARP;
 289        dev->iflink             = 0;
 290        dev->addr_len           = 4;
 291        dev->features           |= NETIF_F_LLTX;
 292        netif_keep_dst(dev);
 293
 294        dev->features           |= IPIP_FEATURES;
 295        dev->hw_features        |= IPIP_FEATURES;
 296        ip_tunnel_setup(dev, ipip_net_id);
 297}
 298
 299static int ipip_tunnel_init(struct net_device *dev)
 300{
 301        struct ip_tunnel *tunnel = netdev_priv(dev);
 302
 303        memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 304        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 305
 306        tunnel->tun_hlen = 0;
 307        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 308        tunnel->parms.iph.protocol = IPPROTO_IPIP;
 309        return ip_tunnel_init(dev);
 310}
 311
 312static void ipip_netlink_parms(struct nlattr *data[],
 313                               struct ip_tunnel_parm *parms)
 314{
 315        memset(parms, 0, sizeof(*parms));
 316
 317        parms->iph.version = 4;
 318        parms->iph.protocol = IPPROTO_IPIP;
 319        parms->iph.ihl = 5;
 320
 321        if (!data)
 322                return;
 323
 324        if (data[IFLA_IPTUN_LINK])
 325                parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
 326
 327        if (data[IFLA_IPTUN_LOCAL])
 328                parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
 329
 330        if (data[IFLA_IPTUN_REMOTE])
 331                parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
 332
 333        if (data[IFLA_IPTUN_TTL]) {
 334                parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
 335                if (parms->iph.ttl)
 336                        parms->iph.frag_off = htons(IP_DF);
 337        }
 338
 339        if (data[IFLA_IPTUN_TOS])
 340                parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
 341
 342        if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 343                parms->iph.frag_off = htons(IP_DF);
 344}
 345
 346/* This function returns true when ENCAP attributes are present in the nl msg */
 347static bool ipip_netlink_encap_parms(struct nlattr *data[],
 348                                     struct ip_tunnel_encap *ipencap)
 349{
 350        bool ret = false;
 351
 352        memset(ipencap, 0, sizeof(*ipencap));
 353
 354        if (!data)
 355                return ret;
 356
 357        if (data[IFLA_IPTUN_ENCAP_TYPE]) {
 358                ret = true;
 359                ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
 360        }
 361
 362        if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
 363                ret = true;
 364                ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
 365        }
 366
 367        if (data[IFLA_IPTUN_ENCAP_SPORT]) {
 368                ret = true;
 369                ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]);
 370        }
 371
 372        if (data[IFLA_IPTUN_ENCAP_DPORT]) {
 373                ret = true;
 374                ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]);
 375        }
 376
 377        return ret;
 378}
 379
 380static int ipip_newlink(struct net *src_net, struct net_device *dev,
 381                        struct nlattr *tb[], struct nlattr *data[])
 382{
 383        struct ip_tunnel_parm p;
 384        struct ip_tunnel_encap ipencap;
 385
 386        if (ipip_netlink_encap_parms(data, &ipencap)) {
 387                struct ip_tunnel *t = netdev_priv(dev);
 388                int err = ip_tunnel_encap_setup(t, &ipencap);
 389
 390                if (err < 0)
 391                        return err;
 392        }
 393
 394        ipip_netlink_parms(data, &p);
 395        return ip_tunnel_newlink(dev, tb, &p);
 396}
 397
 398static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 399                           struct nlattr *data[])
 400{
 401        struct ip_tunnel_parm p;
 402        struct ip_tunnel_encap ipencap;
 403
 404        if (ipip_netlink_encap_parms(data, &ipencap)) {
 405                struct ip_tunnel *t = netdev_priv(dev);
 406                int err = ip_tunnel_encap_setup(t, &ipencap);
 407
 408                if (err < 0)
 409                        return err;
 410        }
 411
 412        ipip_netlink_parms(data, &p);
 413
 414        if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 415            (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
 416                return -EINVAL;
 417
 418        return ip_tunnel_changelink(dev, tb, &p);
 419}
 420
 421static size_t ipip_get_size(const struct net_device *dev)
 422{
 423        return
 424                /* IFLA_IPTUN_LINK */
 425                nla_total_size(4) +
 426                /* IFLA_IPTUN_LOCAL */
 427                nla_total_size(4) +
 428                /* IFLA_IPTUN_REMOTE */
 429                nla_total_size(4) +
 430                /* IFLA_IPTUN_TTL */
 431                nla_total_size(1) +
 432                /* IFLA_IPTUN_TOS */
 433                nla_total_size(1) +
 434                /* IFLA_IPTUN_PMTUDISC */
 435                nla_total_size(1) +
 436                /* IFLA_IPTUN_ENCAP_TYPE */
 437                nla_total_size(2) +
 438                /* IFLA_IPTUN_ENCAP_FLAGS */
 439                nla_total_size(2) +
 440                /* IFLA_IPTUN_ENCAP_SPORT */
 441                nla_total_size(2) +
 442                /* IFLA_IPTUN_ENCAP_DPORT */
 443                nla_total_size(2) +
 444                0;
 445}
 446
 447static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 448{
 449        struct ip_tunnel *tunnel = netdev_priv(dev);
 450        struct ip_tunnel_parm *parm = &tunnel->parms;
 451
 452        if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
 453            nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
 454            nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 455            nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 456            nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
 457            nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
 458                       !!(parm->iph.frag_off & htons(IP_DF))))
 459                goto nla_put_failure;
 460
 461        if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
 462                        tunnel->encap.type) ||
 463            nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT,
 464                        tunnel->encap.sport) ||
 465            nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
 466                        tunnel->encap.dport) ||
 467            nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
 468                        tunnel->encap.dport))
 469                goto nla_put_failure;
 470
 471        return 0;
 472
 473nla_put_failure:
 474        return -EMSGSIZE;
 475}
 476
 477static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 478        [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
 479        [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
 480        [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
 481        [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
 482        [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
 483        [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
 484        [IFLA_IPTUN_ENCAP_TYPE]         = { .type = NLA_U16 },
 485        [IFLA_IPTUN_ENCAP_FLAGS]        = { .type = NLA_U16 },
 486        [IFLA_IPTUN_ENCAP_SPORT]        = { .type = NLA_U16 },
 487        [IFLA_IPTUN_ENCAP_DPORT]        = { .type = NLA_U16 },
 488};
 489
 490static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 491        .kind           = "ipip",
 492        .maxtype        = IFLA_IPTUN_MAX,
 493        .policy         = ipip_policy,
 494        .priv_size      = sizeof(struct ip_tunnel),
 495        .setup          = ipip_tunnel_setup,
 496        .newlink        = ipip_newlink,
 497        .changelink     = ipip_changelink,
 498        .dellink        = ip_tunnel_dellink,
 499        .get_size       = ipip_get_size,
 500        .fill_info      = ipip_fill_info,
 501};
 502
 503static struct xfrm_tunnel ipip_handler __read_mostly = {
 504        .handler        =       ipip_rcv,
 505        .err_handler    =       ipip_err,
 506        .priority       =       1,
 507};
 508
 509static int __net_init ipip_init_net(struct net *net)
 510{
 511        return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
 512}
 513
 514static void __net_exit ipip_exit_net(struct net *net)
 515{
 516        struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 517        ip_tunnel_delete_net(itn, &ipip_link_ops);
 518}
 519
 520static struct pernet_operations ipip_net_ops = {
 521        .init = ipip_init_net,
 522        .exit = ipip_exit_net,
 523        .id   = &ipip_net_id,
 524        .size = sizeof(struct ip_tunnel_net),
 525};
 526
 527static int __init ipip_init(void)
 528{
 529        int err;
 530
 531        pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
 532
 533        err = register_pernet_device(&ipip_net_ops);
 534        if (err < 0)
 535                return err;
 536        err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 537        if (err < 0) {
 538                pr_info("%s: can't register tunnel\n", __func__);
 539                goto xfrm_tunnel_failed;
 540        }
 541        err = rtnl_link_register(&ipip_link_ops);
 542        if (err < 0)
 543                goto rtnl_link_failed;
 544
 545out:
 546        return err;
 547
 548rtnl_link_failed:
 549        xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 550xfrm_tunnel_failed:
 551        unregister_pernet_device(&ipip_net_ops);
 552        goto out;
 553}
 554
 555static void __exit ipip_fini(void)
 556{
 557        rtnl_link_unregister(&ipip_link_ops);
 558        if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 559                pr_info("%s: can't deregister tunnel\n", __func__);
 560
 561        unregister_pernet_device(&ipip_net_ops);
 562}
 563
 564module_init(ipip_init);
 565module_exit(ipip_fini);
 566MODULE_LICENSE("GPL");
 567MODULE_ALIAS_RTNL_LINK("ipip");
 568MODULE_ALIAS_NETDEV("tunl0");
 569