linux/net/ipv4/ip_vti.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3: IP/IP protocol decoder modified to support
   3 *                  virtual tunnel interface
   4 *
   5 *      Authors:
   6 *              Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 */
  14
  15/*
  16   This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  17
  18   For comments look at net/ipv4/ip_gre.c --ANK
  19 */
  20
  21
  22#include <linux/capability.h>
  23#include <linux/module.h>
  24#include <linux/types.h>
  25#include <linux/kernel.h>
  26#include <linux/uaccess.h>
  27#include <linux/skbuff.h>
  28#include <linux/netdevice.h>
  29#include <linux/in.h>
  30#include <linux/tcp.h>
  31#include <linux/udp.h>
  32#include <linux/if_arp.h>
  33#include <linux/mroute.h>
  34#include <linux/init.h>
  35#include <linux/netfilter_ipv4.h>
  36#include <linux/if_ether.h>
  37#include <linux/icmpv6.h>
  38
  39#include <net/sock.h>
  40#include <net/ip.h>
  41#include <net/icmp.h>
  42#include <net/ip_tunnels.h>
  43#include <net/inet_ecn.h>
  44#include <net/xfrm.h>
  45#include <net/net_namespace.h>
  46#include <net/netns/generic.h>
  47
  48static struct rtnl_link_ops vti_link_ops __read_mostly;
  49
  50static int vti_net_id __read_mostly;
  51static int vti_tunnel_init(struct net_device *dev);
  52
  53static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
  54                     int encap_type)
  55{
  56        struct ip_tunnel *tunnel;
  57        const struct iphdr *iph = ip_hdr(skb);
  58        struct net *net = dev_net(skb->dev);
  59        struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
  60
  61        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
  62                                  iph->saddr, iph->daddr, 0);
  63        if (tunnel) {
  64                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
  65                        goto drop;
  66
  67                XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
  68
  69                return xfrm_input(skb, nexthdr, spi, encap_type);
  70        }
  71
  72        return -EINVAL;
  73drop:
  74        kfree_skb(skb);
  75        return 0;
  76}
  77
  78static int vti_rcv(struct sk_buff *skb)
  79{
  80        XFRM_SPI_SKB_CB(skb)->family = AF_INET;
  81        XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
  82
  83        return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
  84}
  85
  86static int vti_rcv_cb(struct sk_buff *skb, int err)
  87{
  88        unsigned short family;
  89        struct net_device *dev;
  90        struct pcpu_sw_netstats *tstats;
  91        struct xfrm_state *x;
  92        struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
  93        u32 orig_mark = skb->mark;
  94        int ret;
  95
  96        if (!tunnel)
  97                return 1;
  98
  99        dev = tunnel->dev;
 100
 101        if (err) {
 102                dev->stats.rx_errors++;
 103                dev->stats.rx_dropped++;
 104
 105                return 0;
 106        }
 107
 108        x = xfrm_input_state(skb);
 109        family = x->inner_mode->afinfo->family;
 110
 111        skb->mark = be32_to_cpu(tunnel->parms.i_key);
 112        ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
 113        skb->mark = orig_mark;
 114
 115        if (!ret)
 116                return -EPERM;
 117
 118        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
 119        skb->dev = dev;
 120
 121        tstats = this_cpu_ptr(dev->tstats);
 122
 123        u64_stats_update_begin(&tstats->syncp);
 124        tstats->rx_packets++;
 125        tstats->rx_bytes += skb->len;
 126        u64_stats_update_end(&tstats->syncp);
 127
 128        return 0;
 129}
 130
 131static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src)
 132{
 133        xfrm_address_t *daddr = (xfrm_address_t *)&dst;
 134        xfrm_address_t *saddr = (xfrm_address_t *)&src;
 135
 136        /* if there is no transform then this tunnel is not functional.
 137         * Or if the xfrm is not mode tunnel.
 138         */
 139        if (!x || x->props.mode != XFRM_MODE_TUNNEL ||
 140            x->props.family != AF_INET)
 141                return false;
 142
 143        if (!dst)
 144                return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET);
 145
 146        if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET))
 147                return false;
 148
 149        return true;
 150}
 151
 152static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 153                            struct flowi *fl)
 154{
 155        struct ip_tunnel *tunnel = netdev_priv(dev);
 156        struct ip_tunnel_parm *parms = &tunnel->parms;
 157        struct dst_entry *dst = skb_dst(skb);
 158        struct net_device *tdev;        /* Device to other host */
 159        int err;
 160
 161        if (!dst) {
 162                dev->stats.tx_carrier_errors++;
 163                goto tx_error_icmp;
 164        }
 165
 166        dst_hold(dst);
 167        dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0);
 168        if (IS_ERR(dst)) {
 169                dev->stats.tx_carrier_errors++;
 170                goto tx_error_icmp;
 171        }
 172
 173        if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {
 174                dev->stats.tx_carrier_errors++;
 175                dst_release(dst);
 176                goto tx_error_icmp;
 177        }
 178
 179        tdev = dst->dev;
 180
 181        if (tdev == dev) {
 182                dst_release(dst);
 183                dev->stats.collisions++;
 184                goto tx_error;
 185        }
 186
 187        if (tunnel->err_count > 0) {
 188                if (time_before(jiffies,
 189                                tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 190                        tunnel->err_count--;
 191                        dst_link_failure(skb);
 192                } else
 193                        tunnel->err_count = 0;
 194        }
 195
 196        skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
 197        skb_dst_set(skb, dst);
 198        skb->dev = skb_dst(skb)->dev;
 199
 200        err = dst_output(tunnel->net, skb->sk, skb);
 201        if (net_xmit_eval(err) == 0)
 202                err = skb->len;
 203        iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
 204        return NETDEV_TX_OK;
 205
 206tx_error_icmp:
 207        dst_link_failure(skb);
 208tx_error:
 209        dev->stats.tx_errors++;
 210        kfree_skb(skb);
 211        return NETDEV_TX_OK;
 212}
 213
 214/* This function assumes it is being called from dev_queue_xmit()
 215 * and that skb is filled properly by that function.
 216 */
 217static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 218{
 219        struct ip_tunnel *tunnel = netdev_priv(dev);
 220        struct flowi fl;
 221
 222        memset(&fl, 0, sizeof(fl));
 223
 224        switch (skb->protocol) {
 225        case htons(ETH_P_IP):
 226                xfrm_decode_session(skb, &fl, AF_INET);
 227                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 228                break;
 229        case htons(ETH_P_IPV6):
 230                xfrm_decode_session(skb, &fl, AF_INET6);
 231                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 232                break;
 233        default:
 234                dev->stats.tx_errors++;
 235                dev_kfree_skb(skb);
 236                return NETDEV_TX_OK;
 237        }
 238
 239        /* override mark with tunnel output key */
 240        fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
 241
 242        return vti_xmit(skb, dev, &fl);
 243}
 244
 245static int vti4_err(struct sk_buff *skb, u32 info)
 246{
 247        __be32 spi;
 248        __u32 mark;
 249        struct xfrm_state *x;
 250        struct ip_tunnel *tunnel;
 251        struct ip_esp_hdr *esph;
 252        struct ip_auth_hdr *ah ;
 253        struct ip_comp_hdr *ipch;
 254        struct net *net = dev_net(skb->dev);
 255        const struct iphdr *iph = (const struct iphdr *)skb->data;
 256        int protocol = iph->protocol;
 257        struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
 258
 259        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 260                                  iph->daddr, iph->saddr, 0);
 261        if (!tunnel)
 262                return -1;
 263
 264        mark = be32_to_cpu(tunnel->parms.o_key);
 265
 266        switch (protocol) {
 267        case IPPROTO_ESP:
 268                esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
 269                spi = esph->spi;
 270                break;
 271        case IPPROTO_AH:
 272                ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
 273                spi = ah->spi;
 274                break;
 275        case IPPROTO_COMP:
 276                ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
 277                spi = htonl(ntohs(ipch->cpi));
 278                break;
 279        default:
 280                return 0;
 281        }
 282
 283        switch (icmp_hdr(skb)->type) {
 284        case ICMP_DEST_UNREACH:
 285                if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 286                        return 0;
 287        case ICMP_REDIRECT:
 288                break;
 289        default:
 290                return 0;
 291        }
 292
 293        x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr,
 294                              spi, protocol, AF_INET);
 295        if (!x)
 296                return 0;
 297
 298        if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 299                ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
 300        else
 301                ipv4_redirect(skb, net, 0, 0, protocol, 0);
 302        xfrm_state_put(x);
 303
 304        return 0;
 305}
 306
 307static int
 308vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 309{
 310        int err = 0;
 311        struct ip_tunnel_parm p;
 312
 313        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 314                return -EFAULT;
 315
 316        if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 317                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 318                    p.iph.ihl != 5)
 319                        return -EINVAL;
 320        }
 321
 322        if (!(p.i_flags & GRE_KEY))
 323                p.i_key = 0;
 324        if (!(p.o_flags & GRE_KEY))
 325                p.o_key = 0;
 326
 327        p.i_flags = VTI_ISVTI;
 328
 329        err = ip_tunnel_ioctl(dev, &p, cmd);
 330        if (err)
 331                return err;
 332
 333        if (cmd != SIOCDELTUNNEL) {
 334                p.i_flags |= GRE_KEY;
 335                p.o_flags |= GRE_KEY;
 336        }
 337
 338        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 339                return -EFAULT;
 340        return 0;
 341}
 342
 343static const struct net_device_ops vti_netdev_ops = {
 344        .ndo_init       = vti_tunnel_init,
 345        .ndo_uninit     = ip_tunnel_uninit,
 346        .ndo_start_xmit = vti_tunnel_xmit,
 347        .ndo_do_ioctl   = vti_tunnel_ioctl,
 348        .ndo_change_mtu = ip_tunnel_change_mtu,
 349        .ndo_get_stats64 = ip_tunnel_get_stats64,
 350        .ndo_get_iflink = ip_tunnel_get_iflink,
 351};
 352
 353static void vti_tunnel_setup(struct net_device *dev)
 354{
 355        dev->netdev_ops         = &vti_netdev_ops;
 356        dev->type               = ARPHRD_TUNNEL;
 357        ip_tunnel_setup(dev, vti_net_id);
 358}
 359
 360static int vti_tunnel_init(struct net_device *dev)
 361{
 362        struct ip_tunnel *tunnel = netdev_priv(dev);
 363        struct iphdr *iph = &tunnel->parms.iph;
 364
 365        memcpy(dev->dev_addr, &iph->saddr, 4);
 366        memcpy(dev->broadcast, &iph->daddr, 4);
 367
 368        dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 369        dev->mtu                = ETH_DATA_LEN;
 370        dev->flags              = IFF_NOARP;
 371        dev->addr_len           = 4;
 372        dev->features           |= NETIF_F_LLTX;
 373        netif_keep_dst(dev);
 374
 375        return ip_tunnel_init(dev);
 376}
 377
 378static void __net_init vti_fb_tunnel_init(struct net_device *dev)
 379{
 380        struct ip_tunnel *tunnel = netdev_priv(dev);
 381        struct iphdr *iph = &tunnel->parms.iph;
 382
 383        iph->version            = 4;
 384        iph->protocol           = IPPROTO_IPIP;
 385        iph->ihl                = 5;
 386}
 387
 388static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
 389        .handler        =       vti_rcv,
 390        .input_handler  =       vti_input,
 391        .cb_handler     =       vti_rcv_cb,
 392        .err_handler    =       vti4_err,
 393        .priority       =       100,
 394};
 395
 396static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
 397        .handler        =       vti_rcv,
 398        .input_handler  =       vti_input,
 399        .cb_handler     =       vti_rcv_cb,
 400        .err_handler    =       vti4_err,
 401        .priority       =       100,
 402};
 403
 404static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
 405        .handler        =       vti_rcv,
 406        .input_handler  =       vti_input,
 407        .cb_handler     =       vti_rcv_cb,
 408        .err_handler    =       vti4_err,
 409        .priority       =       100,
 410};
 411
 412static int __net_init vti_init_net(struct net *net)
 413{
 414        int err;
 415        struct ip_tunnel_net *itn;
 416
 417        err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0");
 418        if (err)
 419                return err;
 420        itn = net_generic(net, vti_net_id);
 421        vti_fb_tunnel_init(itn->fb_tunnel_dev);
 422        return 0;
 423}
 424
 425static void __net_exit vti_exit_net(struct net *net)
 426{
 427        struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
 428        ip_tunnel_delete_net(itn, &vti_link_ops);
 429}
 430
 431static struct pernet_operations vti_net_ops = {
 432        .init = vti_init_net,
 433        .exit = vti_exit_net,
 434        .id   = &vti_net_id,
 435        .size = sizeof(struct ip_tunnel_net),
 436};
 437
 438static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 439{
 440        return 0;
 441}
 442
 443static void vti_netlink_parms(struct nlattr *data[],
 444                              struct ip_tunnel_parm *parms)
 445{
 446        memset(parms, 0, sizeof(*parms));
 447
 448        parms->iph.protocol = IPPROTO_IPIP;
 449
 450        if (!data)
 451                return;
 452
 453        parms->i_flags = VTI_ISVTI;
 454
 455        if (data[IFLA_VTI_LINK])
 456                parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
 457
 458        if (data[IFLA_VTI_IKEY])
 459                parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
 460
 461        if (data[IFLA_VTI_OKEY])
 462                parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
 463
 464        if (data[IFLA_VTI_LOCAL])
 465                parms->iph.saddr = nla_get_in_addr(data[IFLA_VTI_LOCAL]);
 466
 467        if (data[IFLA_VTI_REMOTE])
 468                parms->iph.daddr = nla_get_in_addr(data[IFLA_VTI_REMOTE]);
 469
 470}
 471
 472static int vti_newlink(struct net *src_net, struct net_device *dev,
 473                       struct nlattr *tb[], struct nlattr *data[])
 474{
 475        struct ip_tunnel_parm parms;
 476
 477        vti_netlink_parms(data, &parms);
 478        return ip_tunnel_newlink(dev, tb, &parms);
 479}
 480
 481static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
 482                          struct nlattr *data[])
 483{
 484        struct ip_tunnel_parm p;
 485
 486        vti_netlink_parms(data, &p);
 487        return ip_tunnel_changelink(dev, tb, &p);
 488}
 489
 490static size_t vti_get_size(const struct net_device *dev)
 491{
 492        return
 493                /* IFLA_VTI_LINK */
 494                nla_total_size(4) +
 495                /* IFLA_VTI_IKEY */
 496                nla_total_size(4) +
 497                /* IFLA_VTI_OKEY */
 498                nla_total_size(4) +
 499                /* IFLA_VTI_LOCAL */
 500                nla_total_size(4) +
 501                /* IFLA_VTI_REMOTE */
 502                nla_total_size(4) +
 503                0;
 504}
 505
 506static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
 507{
 508        struct ip_tunnel *t = netdev_priv(dev);
 509        struct ip_tunnel_parm *p = &t->parms;
 510
 511        nla_put_u32(skb, IFLA_VTI_LINK, p->link);
 512        nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
 513        nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
 514        nla_put_in_addr(skb, IFLA_VTI_LOCAL, p->iph.saddr);
 515        nla_put_in_addr(skb, IFLA_VTI_REMOTE, p->iph.daddr);
 516
 517        return 0;
 518}
 519
 520static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
 521        [IFLA_VTI_LINK]         = { .type = NLA_U32 },
 522        [IFLA_VTI_IKEY]         = { .type = NLA_U32 },
 523        [IFLA_VTI_OKEY]         = { .type = NLA_U32 },
 524        [IFLA_VTI_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 525        [IFLA_VTI_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 526};
 527
 528static struct rtnl_link_ops vti_link_ops __read_mostly = {
 529        .kind           = "vti",
 530        .maxtype        = IFLA_VTI_MAX,
 531        .policy         = vti_policy,
 532        .priv_size      = sizeof(struct ip_tunnel),
 533        .setup          = vti_tunnel_setup,
 534        .validate       = vti_tunnel_validate,
 535        .newlink        = vti_newlink,
 536        .changelink     = vti_changelink,
 537        .dellink        = ip_tunnel_dellink,
 538        .get_size       = vti_get_size,
 539        .fill_info      = vti_fill_info,
 540        .get_link_net   = ip_tunnel_get_link_net,
 541};
 542
 543static int __init vti_init(void)
 544{
 545        const char *msg;
 546        int err;
 547
 548        pr_info("IPv4 over IPsec tunneling driver\n");
 549
 550        msg = "tunnel device";
 551        err = register_pernet_device(&vti_net_ops);
 552        if (err < 0)
 553                goto pernet_dev_failed;
 554
 555        msg = "tunnel protocols";
 556        err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);
 557        if (err < 0)
 558                goto xfrm_proto_esp_failed;
 559        err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH);
 560        if (err < 0)
 561                goto xfrm_proto_ah_failed;
 562        err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP);
 563        if (err < 0)
 564                goto xfrm_proto_comp_failed;
 565
 566        msg = "netlink interface";
 567        err = rtnl_link_register(&vti_link_ops);
 568        if (err < 0)
 569                goto rtnl_link_failed;
 570
 571        return err;
 572
 573rtnl_link_failed:
 574        xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
 575xfrm_proto_comp_failed:
 576        xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 577xfrm_proto_ah_failed:
 578        xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 579xfrm_proto_esp_failed:
 580        unregister_pernet_device(&vti_net_ops);
 581pernet_dev_failed:
 582        pr_err("vti init: failed to register %s\n", msg);
 583        return err;
 584}
 585
 586static void __exit vti_fini(void)
 587{
 588        rtnl_link_unregister(&vti_link_ops);
 589        xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
 590        xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 591        xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 592        unregister_pernet_device(&vti_net_ops);
 593}
 594
 595module_init(vti_init);
 596module_exit(vti_fini);
 597MODULE_LICENSE("GPL");
 598MODULE_ALIAS_RTNL_LINK("vti");
 599MODULE_ALIAS_NETDEV("ip_vti0");
 600