linux/net/ipv4/ip_vti.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3: IP/IP protocol decoder modified to support
   3 *                  virtual tunnel interface
   4 *
   5 *      Authors:
   6 *              Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 */
  14
  15/*
  16   This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  17
  18   For comments look at net/ipv4/ip_gre.c --ANK
  19 */
  20
  21
  22#include <linux/capability.h>
  23#include <linux/module.h>
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/mroute.h>
  34#include <linux/init.h>
  35#include <linux/netfilter_ipv4.h>
  36#include <linux/if_ether.h>
  37
  38#include <net/sock.h>
  39#include <net/ip.h>
  40#include <net/icmp.h>
  41#include <net/ip_tunnels.h>
  42#include <net/inet_ecn.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/netns/generic.h>
  46
  47static struct rtnl_link_ops vti_link_ops __read_mostly;
  48
  49static int vti_net_id __read_mostly;
  50static int vti_tunnel_init(struct net_device *dev);
  51
  52static int vti_err(struct sk_buff *skb, u32 info)
  53{
  54
  55        /* All the routers (except for Linux) return only
  56         * 8 bytes of packet payload. It means, that precise relaying of
  57         * ICMP in the real Internet is absolutely infeasible.
  58         */
  59        struct net *net = dev_net(skb->dev);
  60        struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
  61        struct iphdr *iph = (struct iphdr *)skb->data;
  62        const int type = icmp_hdr(skb)->type;
  63        const int code = icmp_hdr(skb)->code;
  64        struct ip_tunnel *t;
  65        int err;
  66
  67        switch (type) {
  68        default:
  69        case ICMP_PARAMETERPROB:
  70                return 0;
  71
  72        case ICMP_DEST_UNREACH:
  73                switch (code) {
  74                case ICMP_SR_FAILED:
  75                case ICMP_PORT_UNREACH:
  76                        /* Impossible event. */
  77                        return 0;
  78                default:
  79                        /* All others are translated to HOST_UNREACH. */
  80                        break;
  81                }
  82                break;
  83        case ICMP_TIME_EXCEEDED:
  84                if (code != ICMP_EXC_TTL)
  85                        return 0;
  86                break;
  87        }
  88
  89        err = -ENOENT;
  90
  91        t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
  92                             iph->daddr, iph->saddr, 0);
  93        if (t == NULL)
  94                goto out;
  95
  96        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
  97                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
  98                                 t->parms.link, 0, IPPROTO_IPIP, 0);
  99                err = 0;
 100                goto out;
 101        }
 102
 103        err = 0;
 104        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 105                goto out;
 106
 107        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 108                t->err_count++;
 109        else
 110                t->err_count = 1;
 111        t->err_time = jiffies;
 112out:
 113        return err;
 114}
 115
 116/* We dont digest the packet therefore let the packet pass */
 117static int vti_rcv(struct sk_buff *skb)
 118{
 119        struct ip_tunnel *tunnel;
 120        const struct iphdr *iph = ip_hdr(skb);
 121        struct net *net = dev_net(skb->dev);
 122        struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
 123
 124        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 125                                  iph->saddr, iph->daddr, 0);
 126        if (tunnel != NULL) {
 127                struct pcpu_tstats *tstats;
 128                u32 oldmark = skb->mark;
 129                int ret;
 130
 131
 132                /* temporarily mark the skb with the tunnel o_key, to
 133                 * only match policies with this mark.
 134                 */
 135                skb->mark = be32_to_cpu(tunnel->parms.o_key);
 136                ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
 137                skb->mark = oldmark;
 138                if (!ret)
 139                        return -1;
 140
 141                tstats = this_cpu_ptr(tunnel->dev->tstats);
 142                u64_stats_update_begin(&tstats->syncp);
 143                tstats->rx_packets++;
 144                tstats->rx_bytes += skb->len;
 145                u64_stats_update_end(&tstats->syncp);
 146
 147                secpath_reset(skb);
 148                skb->dev = tunnel->dev;
 149                return 1;
 150        }
 151
 152        return -1;
 153}
 154
 155/* This function assumes it is being called from dev_queue_xmit()
 156 * and that skb is filled properly by that function.
 157 */
 158
 159static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 160{
 161        struct ip_tunnel *tunnel = netdev_priv(dev);
 162        struct iphdr  *tiph = &tunnel->parms.iph;
 163        u8     tos;
 164        struct rtable *rt;              /* Route to the other host */
 165        struct net_device *tdev;        /* Device to other host */
 166        struct iphdr  *old_iph = ip_hdr(skb);
 167        __be32 dst = tiph->daddr;
 168        struct flowi4 fl4;
 169        int err;
 170
 171        if (skb->protocol != htons(ETH_P_IP))
 172                goto tx_error;
 173
 174        tos = old_iph->tos;
 175
 176        memset(&fl4, 0, sizeof(fl4));
 177        flowi4_init_output(&fl4, tunnel->parms.link,
 178                           be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
 179                           RT_SCOPE_UNIVERSE,
 180                           IPPROTO_IPIP, 0,
 181                           dst, tiph->saddr, 0, 0);
 182        rt = ip_route_output_key(dev_net(dev), &fl4);
 183        if (IS_ERR(rt)) {
 184                dev->stats.tx_carrier_errors++;
 185                goto tx_error_icmp;
 186        }
 187        /* if there is no transform then this tunnel is not functional.
 188         * Or if the xfrm is not mode tunnel.
 189         */
 190        if (!rt->dst.xfrm ||
 191            rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
 192                dev->stats.tx_carrier_errors++;
 193                goto tx_error_icmp;
 194        }
 195        tdev = rt->dst.dev;
 196
 197        if (tdev == dev) {
 198                ip_rt_put(rt);
 199                dev->stats.collisions++;
 200                goto tx_error;
 201        }
 202
 203        if (tunnel->err_count > 0) {
 204                if (time_before(jiffies,
 205                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 206                        tunnel->err_count--;
 207                        dst_link_failure(skb);
 208                } else
 209                        tunnel->err_count = 0;
 210        }
 211
 212        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 213        skb_dst_drop(skb);
 214        skb_dst_set(skb, &rt->dst);
 215        nf_reset(skb);
 216        skb->dev = skb_dst(skb)->dev;
 217
 218        err = dst_output(skb);
 219        if (net_xmit_eval(err) == 0)
 220                err = skb->len;
 221        iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 222        return NETDEV_TX_OK;
 223
 224tx_error_icmp:
 225        dst_link_failure(skb);
 226tx_error:
 227        dev->stats.tx_errors++;
 228        dev_kfree_skb(skb);
 229        return NETDEV_TX_OK;
 230}
 231
 232static int
 233vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 234{
 235        int err = 0;
 236        struct ip_tunnel_parm p;
 237
 238        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 239                return -EFAULT;
 240
 241        if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 242                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 243                    p.iph.ihl != 5)
 244                        return -EINVAL;
 245        }
 246
 247        err = ip_tunnel_ioctl(dev, &p, cmd);
 248        if (err)
 249                return err;
 250
 251        if (cmd != SIOCDELTUNNEL) {
 252                p.i_flags |= GRE_KEY | VTI_ISVTI;
 253                p.o_flags |= GRE_KEY;
 254        }
 255
 256        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 257                return -EFAULT;
 258        return 0;
 259}
 260
 261static const struct net_device_ops vti_netdev_ops = {
 262        .ndo_init       = vti_tunnel_init,
 263        .ndo_uninit     = ip_tunnel_uninit,
 264        .ndo_start_xmit = vti_tunnel_xmit,
 265        .ndo_do_ioctl   = vti_tunnel_ioctl,
 266        .ndo_change_mtu = ip_tunnel_change_mtu,
 267        .ndo_get_stats64 = ip_tunnel_get_stats64,
 268};
 269
 270static void vti_tunnel_setup(struct net_device *dev)
 271{
 272        dev->netdev_ops         = &vti_netdev_ops;
 273        ip_tunnel_setup(dev, vti_net_id);
 274}
 275
 276static int vti_tunnel_init(struct net_device *dev)
 277{
 278        struct ip_tunnel *tunnel = netdev_priv(dev);
 279        struct iphdr *iph = &tunnel->parms.iph;
 280
 281        memcpy(dev->dev_addr, &iph->saddr, 4);
 282        memcpy(dev->broadcast, &iph->daddr, 4);
 283
 284        dev->type               = ARPHRD_TUNNEL;
 285        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 286        dev->mtu                = ETH_DATA_LEN;
 287        dev->flags              = IFF_NOARP;
 288        dev->iflink             = 0;
 289        dev->addr_len           = 4;
 290        dev->features           |= NETIF_F_NETNS_LOCAL;
 291        dev->features           |= NETIF_F_LLTX;
 292        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 293
 294        return ip_tunnel_init(dev);
 295}
 296
 297static void __net_init vti_fb_tunnel_init(struct net_device *dev)
 298{
 299        struct ip_tunnel *tunnel = netdev_priv(dev);
 300        struct iphdr *iph = &tunnel->parms.iph;
 301
 302        iph->version            = 4;
 303        iph->protocol           = IPPROTO_IPIP;
 304        iph->ihl                = 5;
 305}
 306
 307static struct xfrm_tunnel vti_handler __read_mostly = {
 308        .handler        =       vti_rcv,
 309        .err_handler    =       vti_err,
 310        .priority       =       1,
 311};
 312
 313static int __net_init vti_init_net(struct net *net)
 314{
 315        int err;
 316        struct ip_tunnel_net *itn;
 317
 318        err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0");
 319        if (err)
 320                return err;
 321        itn = net_generic(net, vti_net_id);
 322        vti_fb_tunnel_init(itn->fb_tunnel_dev);
 323        return 0;
 324}
 325
 326static void __net_exit vti_exit_net(struct net *net)
 327{
 328        struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
 329        ip_tunnel_delete_net(itn, &vti_link_ops);
 330}
 331
 332static struct pernet_operations vti_net_ops = {
 333        .init = vti_init_net,
 334        .exit = vti_exit_net,
 335        .id   = &vti_net_id,
 336        .size = sizeof(struct ip_tunnel_net),
 337};
 338
 339static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 340{
 341        return 0;
 342}
 343
 344static void vti_netlink_parms(struct nlattr *data[],
 345                              struct ip_tunnel_parm *parms)
 346{
 347        memset(parms, 0, sizeof(*parms));
 348
 349        parms->iph.protocol = IPPROTO_IPIP;
 350
 351        if (!data)
 352                return;
 353
 354        if (data[IFLA_VTI_LINK])
 355                parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 356
 357        if (data[IFLA_VTI_IKEY])
 358                parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
 359
 360        if (data[IFLA_VTI_OKEY])
 361                parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 362
 363        if (data[IFLA_VTI_LOCAL])
 364                parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
 365
 366        if (data[IFLA_VTI_REMOTE])
 367                parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
 368
 369}
 370
 371static int vti_newlink(struct net *src_net, struct net_device *dev,
 372                       struct nlattr *tb[], struct nlattr *data[])
 373{
 374        struct ip_tunnel_parm parms;
 375
 376        vti_netlink_parms(data, &parms);
 377        return ip_tunnel_newlink(dev, tb, &parms);
 378}
 379
 380static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
 381                          struct nlattr *data[])
 382{
 383        struct ip_tunnel_parm p;
 384
 385        vti_netlink_parms(data, &p);
 386        return ip_tunnel_changelink(dev, tb, &p);
 387}
 388
 389static size_t vti_get_size(const struct net_device *dev)
 390{
 391        return
 392                /* IFLA_VTI_LINK */
 393                nla_total_size(4) +
 394                /* IFLA_VTI_IKEY */
 395                nla_total_size(4) +
 396                /* IFLA_VTI_OKEY */
 397                nla_total_size(4) +
 398                /* IFLA_VTI_LOCAL */
 399                nla_total_size(4) +
 400                /* IFLA_VTI_REMOTE */
 401                nla_total_size(4) +
 402                0;
 403}
 404
 405static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 406{
 407        struct ip_tunnel *t = netdev_priv(dev);
 408        struct ip_tunnel_parm *p = &t->parms;
 409
 410        nla_put_u32(skb, IFLA_VTI_LINK, p->link);
 411        nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
 412        nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
 413        nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
 414        nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
 415
 416        return 0;
 417}
 418
 419static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
 420        [IFLA_VTI_LINK]         = { .type = NLA_U32 },
 421        [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
 422        [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
 423        [IFLA_VTI_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 424        [IFLA_VTI_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 425};
 426
 427static struct rtnl_link_ops vti_link_ops __read_mostly = {
 428        .kind           = "vti",
 429        .maxtype        = IFLA_VTI_MAX,
 430        .policy         = vti_policy,
 431        .priv_size      = sizeof(struct ip_tunnel),
 432        .setup          = vti_tunnel_setup,
 433        .validate       = vti_tunnel_validate,
 434        .newlink        = vti_newlink,
 435        .changelink     = vti_changelink,
 436        .get_size       = vti_get_size,
 437        .fill_info      = vti_fill_info,
 438};
 439
 440static int __init vti_init(void)
 441{
 442        int err;
 443
 444        pr_info("IPv4 over IPSec tunneling driver\n");
 445
 446        err = register_pernet_device(&vti_net_ops);
 447        if (err < 0)
 448                return err;
 449        err = xfrm4_mode_tunnel_input_register(&vti_handler);
 450        if (err < 0) {
 451                unregister_pernet_device(&vti_net_ops);
 452                pr_info("vti init: can't register tunnel\n");
 453        }
 454
 455        err = rtnl_link_register(&vti_link_ops);
 456        if (err < 0)
 457                goto rtnl_link_failed;
 458
 459        return err;
 460
 461rtnl_link_failed:
 462        xfrm4_mode_tunnel_input_deregister(&vti_handler);
 463        unregister_pernet_device(&vti_net_ops);
 464        return err;
 465}
 466
 467static void __exit vti_fini(void)
 468{
 469        rtnl_link_unregister(&vti_link_ops);
 470        if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
 471                pr_info("vti close: can't deregister tunnel\n");
 472
 473        unregister_pernet_device(&vti_net_ops);
 474}
 475
 476module_init(vti_init);
 477module_exit(vti_fini);
 478MODULE_LICENSE("GPL");
 479MODULE_ALIAS_RTNL_LINK("vti");
 480MODULE_ALIAS_NETDEV("ip_vti0");
 481