linux/net/ipv6/ip6_tunnel.c
<<
>>
Prefs
   1/*
   2 *      IPv6 tunneling device
   3 *      Linux INET6 implementation
   4 *
   5 *      Authors:
   6 *      Ville Nuorvala          <vnuorval@tcs.hut.fi>
   7 *      Yasuyuki Kozakai        <kozakai@linux-ipv6.org>
   8 *
   9 *      Based on:
  10 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  11 *
  12 *      RFC 2473
  13 *
  14 *      This program is free software; you can redistribute it and/or
  15 *      modify it under the terms of the GNU General Public License
  16 *      as published by the Free Software Foundation; either version
  17 *      2 of the License, or (at your option) any later version.
  18 *
  19 */
  20
  21#include <linux/module.h>
  22#include <linux/capability.h>
  23#include <linux/errno.h>
  24#include <linux/types.h>
  25#include <linux/sockios.h>
  26#include <linux/icmp.h>
  27#include <linux/if.h>
  28#include <linux/in.h>
  29#include <linux/ip.h>
  30#include <linux/if_tunnel.h>
  31#include <linux/net.h>
  32#include <linux/in6.h>
  33#include <linux/netdevice.h>
  34#include <linux/if_arp.h>
  35#include <linux/icmpv6.h>
  36#include <linux/init.h>
  37#include <linux/route.h>
  38#include <linux/rtnetlink.h>
  39#include <linux/netfilter_ipv6.h>
  40
  41#include <asm/uaccess.h>
  42#include <asm/atomic.h>
  43
  44#include <net/icmp.h>
  45#include <net/ip.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_route.h>
  48#include <net/addrconf.h>
  49#include <net/ip6_tunnel.h>
  50#include <net/xfrm.h>
  51#include <net/dsfield.h>
  52#include <net/inet_ecn.h>
  53#include <net/net_namespace.h>
  54#include <net/netns/generic.h>
  55
  56MODULE_AUTHOR("Ville Nuorvala");
  57MODULE_DESCRIPTION("IPv6 tunneling device");
  58MODULE_LICENSE("GPL");
  59
  60#define IPV6_TLV_TEL_DST_SIZE 8
  61
  62#ifdef IP6_TNL_DEBUG
  63#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
  64#else
  65#define IP6_TNL_TRACE(x...) do {;} while(0)
  66#endif
  67
  68#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
  69#define IPV6_TCLASS_SHIFT 20
  70
  71#define HASH_SIZE  32
  72
  73#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
  74                     (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
  75                    (HASH_SIZE - 1))
  76
  77static void ip6_fb_tnl_dev_init(struct net_device *dev);
  78static void ip6_tnl_dev_init(struct net_device *dev);
  79static void ip6_tnl_dev_setup(struct net_device *dev);
  80
  81static int ip6_tnl_net_id;
  82struct ip6_tnl_net {
  83        /* the IPv6 tunnel fallback device */
  84        struct net_device *fb_tnl_dev;
  85        /* lists for storing tunnels in use */
  86        struct ip6_tnl *tnls_r_l[HASH_SIZE];
  87        struct ip6_tnl *tnls_wc[1];
  88        struct ip6_tnl **tnls[2];
  89};
  90
  91/* lock for the tunnel lists */
  92static DEFINE_RWLOCK(ip6_tnl_lock);
  93
  94static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
  95{
  96        struct dst_entry *dst = t->dst_cache;
  97
  98        if (dst && dst->obsolete &&
  99            dst->ops->check(dst, t->dst_cookie) == NULL) {
 100                t->dst_cache = NULL;
 101                dst_release(dst);
 102                return NULL;
 103        }
 104
 105        return dst;
 106}
 107
 108static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
 109{
 110        dst_release(t->dst_cache);
 111        t->dst_cache = NULL;
 112}
 113
 114static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
 115{
 116        struct rt6_info *rt = (struct rt6_info *) dst;
 117        t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 118        dst_release(t->dst_cache);
 119        t->dst_cache = dst;
 120}
 121
 122/**
 123 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 124 *   @remote: the address of the tunnel exit-point
 125 *   @local: the address of the tunnel entry-point
 126 *
 127 * Return:
 128 *   tunnel matching given end-points if found,
 129 *   else fallback tunnel if its device is up,
 130 *   else %NULL
 131 **/
 132
 133static struct ip6_tnl *
 134ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
 135{
 136        unsigned h0 = HASH(remote);
 137        unsigned h1 = HASH(local);
 138        struct ip6_tnl *t;
 139        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 140
 141        for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) {
 142                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 143                    ipv6_addr_equal(remote, &t->parms.raddr) &&
 144                    (t->dev->flags & IFF_UP))
 145                        return t;
 146        }
 147        if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
 148                return t;
 149
 150        return NULL;
 151}
 152
 153/**
 154 * ip6_tnl_bucket - get head of list matching given tunnel parameters
 155 *   @p: parameters containing tunnel end-points
 156 *
 157 * Description:
 158 *   ip6_tnl_bucket() returns the head of the list matching the
 159 *   &struct in6_addr entries laddr and raddr in @p.
 160 *
 161 * Return: head of IPv6 tunnel list
 162 **/
 163
 164static struct ip6_tnl **
 165ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
 166{
 167        struct in6_addr *remote = &p->raddr;
 168        struct in6_addr *local = &p->laddr;
 169        unsigned h = 0;
 170        int prio = 0;
 171
 172        if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 173                prio = 1;
 174                h = HASH(remote) ^ HASH(local);
 175        }
 176        return &ip6n->tnls[prio][h];
 177}
 178
 179/**
 180 * ip6_tnl_link - add tunnel to hash table
 181 *   @t: tunnel to be added
 182 **/
 183
 184static void
 185ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 186{
 187        struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
 188
 189        t->next = *tp;
 190        write_lock_bh(&ip6_tnl_lock);
 191        *tp = t;
 192        write_unlock_bh(&ip6_tnl_lock);
 193}
 194
 195/**
 196 * ip6_tnl_unlink - remove tunnel from hash table
 197 *   @t: tunnel to be removed
 198 **/
 199
 200static void
 201ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
 202{
 203        struct ip6_tnl **tp;
 204
 205        for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
 206                if (t == *tp) {
 207                        write_lock_bh(&ip6_tnl_lock);
 208                        *tp = t->next;
 209                        write_unlock_bh(&ip6_tnl_lock);
 210                        break;
 211                }
 212        }
 213}
 214
 215/**
 216 * ip6_tnl_create() - create a new tunnel
 217 *   @p: tunnel parameters
 218 *   @pt: pointer to new tunnel
 219 *
 220 * Description:
 221 *   Create tunnel matching given parameters.
 222 *
 223 * Return:
 224 *   created tunnel or NULL
 225 **/
 226
 227static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
 228{
 229        struct net_device *dev;
 230        struct ip6_tnl *t;
 231        char name[IFNAMSIZ];
 232        int err;
 233        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 234
 235        if (p->name[0])
 236                strlcpy(name, p->name, IFNAMSIZ);
 237        else
 238                sprintf(name, "ip6tnl%%d");
 239
 240        dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
 241        if (dev == NULL)
 242                goto failed;
 243
 244        dev_net_set(dev, net);
 245
 246        if (strchr(name, '%')) {
 247                if (dev_alloc_name(dev, name) < 0)
 248                        goto failed_free;
 249        }
 250
 251        t = netdev_priv(dev);
 252        t->parms = *p;
 253        ip6_tnl_dev_init(dev);
 254
 255        if ((err = register_netdevice(dev)) < 0)
 256                goto failed_free;
 257
 258        dev_hold(dev);
 259        ip6_tnl_link(ip6n, t);
 260        return t;
 261
 262failed_free:
 263        free_netdev(dev);
 264failed:
 265        return NULL;
 266}
 267
 268/**
 269 * ip6_tnl_locate - find or create tunnel matching given parameters
 270 *   @p: tunnel parameters
 271 *   @create: != 0 if allowed to create new tunnel if no match found
 272 *
 273 * Description:
 274 *   ip6_tnl_locate() first tries to locate an existing tunnel
 275 *   based on @parms. If this is unsuccessful, but @create is set a new
 276 *   tunnel device is created and registered for use.
 277 *
 278 * Return:
 279 *   matching tunnel or NULL
 280 **/
 281
 282static struct ip6_tnl *ip6_tnl_locate(struct net *net,
 283                struct ip6_tnl_parm *p, int create)
 284{
 285        struct in6_addr *remote = &p->raddr;
 286        struct in6_addr *local = &p->laddr;
 287        struct ip6_tnl *t;
 288        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 289
 290        for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) {
 291                if (ipv6_addr_equal(local, &t->parms.laddr) &&
 292                    ipv6_addr_equal(remote, &t->parms.raddr))
 293                        return t;
 294        }
 295        if (!create)
 296                return NULL;
 297        return ip6_tnl_create(net, p);
 298}
 299
 300/**
 301 * ip6_tnl_dev_uninit - tunnel device uninitializer
 302 *   @dev: the device to be destroyed
 303 *
 304 * Description:
 305 *   ip6_tnl_dev_uninit() removes tunnel from its list
 306 **/
 307
 308static void
 309ip6_tnl_dev_uninit(struct net_device *dev)
 310{
 311        struct ip6_tnl *t = netdev_priv(dev);
 312        struct net *net = dev_net(dev);
 313        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 314
 315        if (dev == ip6n->fb_tnl_dev) {
 316                write_lock_bh(&ip6_tnl_lock);
 317                ip6n->tnls_wc[0] = NULL;
 318                write_unlock_bh(&ip6_tnl_lock);
 319        } else {
 320                ip6_tnl_unlink(ip6n, t);
 321        }
 322        ip6_tnl_dst_reset(t);
 323        dev_put(dev);
 324}
 325
 326/**
 327 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 328 *   @skb: received socket buffer
 329 *
 330 * Return:
 331 *   0 if none was found,
 332 *   else index to encapsulation limit
 333 **/
 334
 335static __u16
 336parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 337{
 338        struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
 339        __u8 nexthdr = ipv6h->nexthdr;
 340        __u16 off = sizeof (*ipv6h);
 341
 342        while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
 343                __u16 optlen = 0;
 344                struct ipv6_opt_hdr *hdr;
 345                if (raw + off + sizeof (*hdr) > skb->data &&
 346                    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
 347                        break;
 348
 349                hdr = (struct ipv6_opt_hdr *) (raw + off);
 350                if (nexthdr == NEXTHDR_FRAGMENT) {
 351                        struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
 352                        if (frag_hdr->frag_off)
 353                                break;
 354                        optlen = 8;
 355                } else if (nexthdr == NEXTHDR_AUTH) {
 356                        optlen = (hdr->hdrlen + 2) << 2;
 357                } else {
 358                        optlen = ipv6_optlen(hdr);
 359                }
 360                if (nexthdr == NEXTHDR_DEST) {
 361                        __u16 i = off + 2;
 362                        while (1) {
 363                                struct ipv6_tlv_tnl_enc_lim *tel;
 364
 365                                /* No more room for encapsulation limit */
 366                                if (i + sizeof (*tel) > off + optlen)
 367                                        break;
 368
 369                                tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
 370                                /* return index of option if found and valid */
 371                                if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 372                                    tel->length == 1)
 373                                        return i;
 374                                /* else jump to next option */
 375                                if (tel->type)
 376                                        i += tel->length + 2;
 377                                else
 378                                        i++;
 379                        }
 380                }
 381                nexthdr = hdr->nexthdr;
 382                off += optlen;
 383        }
 384        return 0;
 385}
 386
 387/**
 388 * ip6_tnl_err - tunnel error handler
 389 *
 390 * Description:
 391 *   ip6_tnl_err() should handle errors in the tunnel according
 392 *   to the specifications in RFC 2473.
 393 **/
 394
 395static int
 396ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 397            u8 *type, u8 *code, int *msg, __u32 *info, int offset)
 398{
 399        struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
 400        struct ip6_tnl *t;
 401        int rel_msg = 0;
 402        u8 rel_type = ICMPV6_DEST_UNREACH;
 403        u8 rel_code = ICMPV6_ADDR_UNREACH;
 404        __u32 rel_info = 0;
 405        __u16 len;
 406        int err = -ENOENT;
 407
 408        /* If the packet doesn't contain the original IPv6 header we are
 409           in trouble since we might need the source address for further
 410           processing of the error. */
 411
 412        read_lock(&ip6_tnl_lock);
 413        if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
 414                                        &ipv6h->saddr)) == NULL)
 415                goto out;
 416
 417        if (t->parms.proto != ipproto && t->parms.proto != 0)
 418                goto out;
 419
 420        err = 0;
 421
 422        switch (*type) {
 423                __u32 teli;
 424                struct ipv6_tlv_tnl_enc_lim *tel;
 425                __u32 mtu;
 426        case ICMPV6_DEST_UNREACH:
 427                if (net_ratelimit())
 428                        printk(KERN_WARNING
 429                               "%s: Path to destination invalid "
 430                               "or inactive!\n", t->parms.name);
 431                rel_msg = 1;
 432                break;
 433        case ICMPV6_TIME_EXCEED:
 434                if ((*code) == ICMPV6_EXC_HOPLIMIT) {
 435                        if (net_ratelimit())
 436                                printk(KERN_WARNING
 437                                       "%s: Too small hop limit or "
 438                                       "routing loop in tunnel!\n",
 439                                       t->parms.name);
 440                        rel_msg = 1;
 441                }
 442                break;
 443        case ICMPV6_PARAMPROB:
 444                teli = 0;
 445                if ((*code) == ICMPV6_HDR_FIELD)
 446                        teli = parse_tlv_tnl_enc_lim(skb, skb->data);
 447
 448                if (teli && teli == *info - 2) {
 449                        tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 450                        if (tel->encap_limit == 0) {
 451                                if (net_ratelimit())
 452                                        printk(KERN_WARNING
 453                                               "%s: Too small encapsulation "
 454                                               "limit or routing loop in "
 455                                               "tunnel!\n", t->parms.name);
 456                                rel_msg = 1;
 457                        }
 458                } else if (net_ratelimit()) {
 459                        printk(KERN_WARNING
 460                               "%s: Recipient unable to parse tunneled "
 461                               "packet!\n ", t->parms.name);
 462                }
 463                break;
 464        case ICMPV6_PKT_TOOBIG:
 465                mtu = *info - offset;
 466                if (mtu < IPV6_MIN_MTU)
 467                        mtu = IPV6_MIN_MTU;
 468                t->dev->mtu = mtu;
 469
 470                if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
 471                        rel_type = ICMPV6_PKT_TOOBIG;
 472                        rel_code = 0;
 473                        rel_info = mtu;
 474                        rel_msg = 1;
 475                }
 476                break;
 477        }
 478
 479        *type = rel_type;
 480        *code = rel_code;
 481        *info = rel_info;
 482        *msg = rel_msg;
 483
 484out:
 485        read_unlock(&ip6_tnl_lock);
 486        return err;
 487}
 488
 489static int
 490ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 491           u8 type, u8 code, int offset, __be32 info)
 492{
 493        int rel_msg = 0;
 494        u8 rel_type = type;
 495        u8 rel_code = code;
 496        __u32 rel_info = ntohl(info);
 497        int err;
 498        struct sk_buff *skb2;
 499        struct iphdr *eiph;
 500        struct flowi fl;
 501        struct rtable *rt;
 502
 503        err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
 504                          &rel_msg, &rel_info, offset);
 505        if (err < 0)
 506                return err;
 507
 508        if (rel_msg == 0)
 509                return 0;
 510
 511        switch (rel_type) {
 512        case ICMPV6_DEST_UNREACH:
 513                if (rel_code != ICMPV6_ADDR_UNREACH)
 514                        return 0;
 515                rel_type = ICMP_DEST_UNREACH;
 516                rel_code = ICMP_HOST_UNREACH;
 517                break;
 518        case ICMPV6_PKT_TOOBIG:
 519                if (rel_code != 0)
 520                        return 0;
 521                rel_type = ICMP_DEST_UNREACH;
 522                rel_code = ICMP_FRAG_NEEDED;
 523                break;
 524        default:
 525                return 0;
 526        }
 527
 528        if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
 529                return 0;
 530
 531        skb2 = skb_clone(skb, GFP_ATOMIC);
 532        if (!skb2)
 533                return 0;
 534
 535        skb_dst_drop(skb2);
 536
 537        skb_pull(skb2, offset);
 538        skb_reset_network_header(skb2);
 539        eiph = ip_hdr(skb2);
 540
 541        /* Try to guess incoming interface */
 542        memset(&fl, 0, sizeof(fl));
 543        fl.fl4_dst = eiph->saddr;
 544        fl.fl4_tos = RT_TOS(eiph->tos);
 545        fl.proto = IPPROTO_IPIP;
 546        if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
 547                goto out;
 548
 549        skb2->dev = rt->u.dst.dev;
 550
 551        /* route "incoming" packet */
 552        if (rt->rt_flags & RTCF_LOCAL) {
 553                ip_rt_put(rt);
 554                rt = NULL;
 555                fl.fl4_dst = eiph->daddr;
 556                fl.fl4_src = eiph->saddr;
 557                fl.fl4_tos = eiph->tos;
 558                if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
 559                    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
 560                        ip_rt_put(rt);
 561                        goto out;
 562                }
 563                skb_dst_set(skb2, (struct dst_entry *)rt);
 564        } else {
 565                ip_rt_put(rt);
 566                if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
 567                                   skb2->dev) ||
 568                    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
 569                        goto out;
 570        }
 571
 572        /* change mtu on this route */
 573        if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
 574                if (rel_info > dst_mtu(skb_dst(skb2)))
 575                        goto out;
 576
 577                skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), rel_info);
 578        }
 579
 580        icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
 581
 582out:
 583        kfree_skb(skb2);
 584        return 0;
 585}
 586
 587static int
 588ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 589           u8 type, u8 code, int offset, __be32 info)
 590{
 591        int rel_msg = 0;
 592        u8 rel_type = type;
 593        u8 rel_code = code;
 594        __u32 rel_info = ntohl(info);
 595        int err;
 596
 597        err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
 598                          &rel_msg, &rel_info, offset);
 599        if (err < 0)
 600                return err;
 601
 602        if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 603                struct rt6_info *rt;
 604                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 605
 606                if (!skb2)
 607                        return 0;
 608
 609                skb_dst_drop(skb2);
 610                skb_pull(skb2, offset);
 611                skb_reset_network_header(skb2);
 612
 613                /* Try to guess incoming interface */
 614                rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
 615                                NULL, 0, 0);
 616
 617                if (rt && rt->rt6i_dev)
 618                        skb2->dev = rt->rt6i_dev;
 619
 620                icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
 621
 622                if (rt)
 623                        dst_release(&rt->u.dst);
 624
 625                kfree_skb(skb2);
 626        }
 627
 628        return 0;
 629}
 630
 631static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 632                                        struct ipv6hdr *ipv6h,
 633                                        struct sk_buff *skb)
 634{
 635        __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
 636
 637        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 638                ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
 639
 640        if (INET_ECN_is_ce(dsfield))
 641                IP_ECN_set_ce(ip_hdr(skb));
 642}
 643
 644static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 645                                        struct ipv6hdr *ipv6h,
 646                                        struct sk_buff *skb)
 647{
 648        if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
 649                ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
 650
 651        if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
 652                IP6_ECN_set_ce(ipv6_hdr(skb));
 653}
 654
 655static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 656{
 657        struct ip6_tnl_parm *p = &t->parms;
 658        int ret = 0;
 659        struct net *net = dev_net(t->dev);
 660
 661        if (p->flags & IP6_TNL_F_CAP_RCV) {
 662                struct net_device *ldev = NULL;
 663
 664                if (p->link)
 665                        ldev = dev_get_by_index(net, p->link);
 666
 667                if ((ipv6_addr_is_multicast(&p->laddr) ||
 668                     likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
 669                    likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
 670                        ret = 1;
 671
 672                if (ldev)
 673                        dev_put(ldev);
 674        }
 675        return ret;
 676}
 677
 678/**
 679 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
 680 *   @skb: received socket buffer
 681 *   @protocol: ethernet protocol ID
 682 *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
 683 *
 684 * Return: 0
 685 **/
 686
 687static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 688                       __u8 ipproto,
 689                       void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
 690                                                    struct ipv6hdr *ipv6h,
 691                                                    struct sk_buff *skb))
 692{
 693        struct ip6_tnl *t;
 694        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 695
 696        read_lock(&ip6_tnl_lock);
 697
 698        if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
 699                                        &ipv6h->daddr)) != NULL) {
 700                if (t->parms.proto != ipproto && t->parms.proto != 0) {
 701                        read_unlock(&ip6_tnl_lock);
 702                        goto discard;
 703                }
 704
 705                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 706                        read_unlock(&ip6_tnl_lock);
 707                        goto discard;
 708                }
 709
 710                if (!ip6_tnl_rcv_ctl(t)) {
 711                        t->dev->stats.rx_dropped++;
 712                        read_unlock(&ip6_tnl_lock);
 713                        goto discard;
 714                }
 715                secpath_reset(skb);
 716                skb->mac_header = skb->network_header;
 717                skb_reset_network_header(skb);
 718                skb->protocol = htons(protocol);
 719                skb->pkt_type = PACKET_HOST;
 720                memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 721                skb->dev = t->dev;
 722                skb_dst_drop(skb);
 723                nf_reset(skb);
 724
 725                dscp_ecn_decapsulate(t, ipv6h, skb);
 726
 727                t->dev->stats.rx_packets++;
 728                t->dev->stats.rx_bytes += skb->len;
 729                netif_rx(skb);
 730                read_unlock(&ip6_tnl_lock);
 731                return 0;
 732        }
 733        read_unlock(&ip6_tnl_lock);
 734        return 1;
 735
 736discard:
 737        kfree_skb(skb);
 738        return 0;
 739}
 740
 741static int ip4ip6_rcv(struct sk_buff *skb)
 742{
 743        return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
 744                           ip4ip6_dscp_ecn_decapsulate);
 745}
 746
 747static int ip6ip6_rcv(struct sk_buff *skb)
 748{
 749        return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
 750                           ip6ip6_dscp_ecn_decapsulate);
 751}
 752
 753struct ipv6_tel_txoption {
 754        struct ipv6_txoptions ops;
 755        __u8 dst_opt[8];
 756};
 757
 758static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
 759{
 760        memset(opt, 0, sizeof(struct ipv6_tel_txoption));
 761
 762        opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
 763        opt->dst_opt[3] = 1;
 764        opt->dst_opt[4] = encap_limit;
 765        opt->dst_opt[5] = IPV6_TLV_PADN;
 766        opt->dst_opt[6] = 1;
 767
 768        opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
 769        opt->ops.opt_nflen = 8;
 770}
 771
 772/**
 773 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
 774 *   @t: the outgoing tunnel device
 775 *   @hdr: IPv6 header from the incoming packet
 776 *
 777 * Description:
 778 *   Avoid trivial tunneling loop by checking that tunnel exit-point
 779 *   doesn't match source of incoming packet.
 780 *
 781 * Return:
 782 *   1 if conflict,
 783 *   0 else
 784 **/
 785
 786static inline int
 787ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
 788{
 789        return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 790}
 791
 792static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 793{
 794        struct ip6_tnl_parm *p = &t->parms;
 795        int ret = 0;
 796        struct net *net = dev_net(t->dev);
 797
 798        if (p->flags & IP6_TNL_F_CAP_XMIT) {
 799                struct net_device *ldev = NULL;
 800
 801                if (p->link)
 802                        ldev = dev_get_by_index(net, p->link);
 803
 804                if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
 805                        printk(KERN_WARNING
 806                               "%s xmit: Local address not yet configured!\n",
 807                               p->name);
 808                else if (!ipv6_addr_is_multicast(&p->raddr) &&
 809                         unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
 810                        printk(KERN_WARNING
 811                               "%s xmit: Routing loop! "
 812                               "Remote address found on this node!\n",
 813                               p->name);
 814                else
 815                        ret = 1;
 816                if (ldev)
 817                        dev_put(ldev);
 818        }
 819        return ret;
 820}
 821/**
 822 * ip6_tnl_xmit2 - encapsulate packet and send
 823 *   @skb: the outgoing socket buffer
 824 *   @dev: the outgoing tunnel device
 825 *   @dsfield: dscp code for outer header
 826 *   @fl: flow of tunneled packet
 827 *   @encap_limit: encapsulation limit
 828 *   @pmtu: Path MTU is stored if packet is too big
 829 *
 830 * Description:
 831 *   Build new header and do some sanity checks on the packet before sending
 832 *   it.
 833 *
 834 * Return:
 835 *   0 on success
 836 *   -1 fail
 837 *   %-EMSGSIZE message too big. return mtu in this case.
 838 **/
 839
 840static int ip6_tnl_xmit2(struct sk_buff *skb,
 841                         struct net_device *dev,
 842                         __u8 dsfield,
 843                         struct flowi *fl,
 844                         int encap_limit,
 845                         __u32 *pmtu)
 846{
 847        struct net *net = dev_net(dev);
 848        struct ip6_tnl *t = netdev_priv(dev);
 849        struct net_device_stats *stats = &t->dev->stats;
 850        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 851        struct ipv6_tel_txoption opt;
 852        struct dst_entry *dst;
 853        struct net_device *tdev;
 854        int mtu;
 855        unsigned int max_headroom = sizeof(struct ipv6hdr);
 856        u8 proto;
 857        int err = -1;
 858        int pkt_len;
 859
 860        if ((dst = ip6_tnl_dst_check(t)) != NULL)
 861                dst_hold(dst);
 862        else {
 863                dst = ip6_route_output(net, NULL, fl);
 864
 865                if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
 866                        goto tx_err_link_failure;
 867        }
 868
 869        tdev = dst->dev;
 870
 871        if (tdev == dev) {
 872                stats->collisions++;
 873                if (net_ratelimit())
 874                        printk(KERN_WARNING
 875                               "%s: Local routing loop detected!\n",
 876                               t->parms.name);
 877                goto tx_err_dst_release;
 878        }
 879        mtu = dst_mtu(dst) - sizeof (*ipv6h);
 880        if (encap_limit >= 0) {
 881                max_headroom += 8;
 882                mtu -= 8;
 883        }
 884        if (mtu < IPV6_MIN_MTU)
 885                mtu = IPV6_MIN_MTU;
 886        if (skb_dst(skb))
 887                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 888        if (skb->len > mtu) {
 889                *pmtu = mtu;
 890                err = -EMSGSIZE;
 891                goto tx_err_dst_release;
 892        }
 893
 894        /*
 895         * Okay, now see if we can stuff it in the buffer as-is.
 896         */
 897        max_headroom += LL_RESERVED_SPACE(tdev);
 898
 899        if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 900            (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 901                struct sk_buff *new_skb;
 902
 903                if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
 904                        goto tx_err_dst_release;
 905
 906                if (skb->sk)
 907                        skb_set_owner_w(new_skb, skb->sk);
 908                kfree_skb(skb);
 909                skb = new_skb;
 910        }
 911        skb_dst_drop(skb);
 912        skb_dst_set(skb, dst_clone(dst));
 913
 914        skb->transport_header = skb->network_header;
 915
 916        proto = fl->proto;
 917        if (encap_limit >= 0) {
 918                init_tel_txopt(&opt, encap_limit);
 919                ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
 920        }
 921        skb_push(skb, sizeof(struct ipv6hdr));
 922        skb_reset_network_header(skb);
 923        ipv6h = ipv6_hdr(skb);
 924        *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
 925        dsfield = INET_ECN_encapsulate(0, dsfield);
 926        ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
 927        ipv6h->hop_limit = t->parms.hop_limit;
 928        ipv6h->nexthdr = proto;
 929        ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
 930        ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
 931        nf_reset(skb);
 932        pkt_len = skb->len;
 933        err = ip6_local_out(skb);
 934
 935        if (net_xmit_eval(err) == 0) {
 936                stats->tx_bytes += pkt_len;
 937                stats->tx_packets++;
 938        } else {
 939                stats->tx_errors++;
 940                stats->tx_aborted_errors++;
 941        }
 942        ip6_tnl_dst_store(t, dst);
 943        return 0;
 944tx_err_link_failure:
 945        stats->tx_carrier_errors++;
 946        dst_link_failure(skb);
 947tx_err_dst_release:
 948        dst_release(dst);
 949        return err;
 950}
 951
 952static inline int
 953ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 954{
 955        struct ip6_tnl *t = netdev_priv(dev);
 956        struct iphdr  *iph = ip_hdr(skb);
 957        int encap_limit = -1;
 958        struct flowi fl;
 959        __u8 dsfield;
 960        __u32 mtu;
 961        int err;
 962
 963        if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
 964            !ip6_tnl_xmit_ctl(t))
 965                return -1;
 966
 967        if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 968                encap_limit = t->parms.encap_limit;
 969
 970        memcpy(&fl, &t->fl, sizeof (fl));
 971        fl.proto = IPPROTO_IPIP;
 972
 973        dsfield = ipv4_get_dsfield(iph);
 974
 975        if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
 976                fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
 977                                          & IPV6_TCLASS_MASK;
 978
 979        err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
 980        if (err != 0) {
 981                /* XXX: send ICMP error even if DF is not set. */
 982                if (err == -EMSGSIZE)
 983                        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 984                                  htonl(mtu));
 985                return -1;
 986        }
 987
 988        return 0;
 989}
 990
 991static inline int
 992ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 993{
 994        struct ip6_tnl *t = netdev_priv(dev);
 995        struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 996        int encap_limit = -1;
 997        __u16 offset;
 998        struct flowi fl;
 999        __u8 dsfield;
1000        __u32 mtu;
1001        int err;
1002
1003        if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
1004            !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
1005                return -1;
1006
1007        offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
1008        if (offset > 0) {
1009                struct ipv6_tlv_tnl_enc_lim *tel;
1010                tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
1011                if (tel->encap_limit == 0) {
1012                        icmpv6_send(skb, ICMPV6_PARAMPROB,
1013                                    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
1014                        return -1;
1015                }
1016                encap_limit = tel->encap_limit - 1;
1017        } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
1018                encap_limit = t->parms.encap_limit;
1019
1020        memcpy(&fl, &t->fl, sizeof (fl));
1021        fl.proto = IPPROTO_IPV6;
1022
1023        dsfield = ipv6_get_dsfield(ipv6h);
1024        if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
1025                fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
1026        if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
1027                fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
1028
1029        err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
1030        if (err != 0) {
1031                if (err == -EMSGSIZE)
1032                        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1033                return -1;
1034        }
1035
1036        return 0;
1037}
1038
1039static netdev_tx_t
1040ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
1041{
1042        struct ip6_tnl *t = netdev_priv(dev);
1043        struct net_device_stats *stats = &t->dev->stats;
1044        int ret;
1045
1046        switch (skb->protocol) {
1047        case htons(ETH_P_IP):
1048                ret = ip4ip6_tnl_xmit(skb, dev);
1049                break;
1050        case htons(ETH_P_IPV6):
1051                ret = ip6ip6_tnl_xmit(skb, dev);
1052                break;
1053        default:
1054                goto tx_err;
1055        }
1056
1057        if (ret < 0)
1058                goto tx_err;
1059
1060        return NETDEV_TX_OK;
1061
1062tx_err:
1063        stats->tx_errors++;
1064        stats->tx_dropped++;
1065        kfree_skb(skb);
1066        return NETDEV_TX_OK;
1067}
1068
1069static void ip6_tnl_set_cap(struct ip6_tnl *t)
1070{
1071        struct ip6_tnl_parm *p = &t->parms;
1072        int ltype = ipv6_addr_type(&p->laddr);
1073        int rtype = ipv6_addr_type(&p->raddr);
1074
1075        p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
1076
1077        if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1078            rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
1079            !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
1080            (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
1081                if (ltype&IPV6_ADDR_UNICAST)
1082                        p->flags |= IP6_TNL_F_CAP_XMIT;
1083                if (rtype&IPV6_ADDR_UNICAST)
1084                        p->flags |= IP6_TNL_F_CAP_RCV;
1085        }
1086}
1087
1088static void ip6_tnl_link_config(struct ip6_tnl *t)
1089{
1090        struct net_device *dev = t->dev;
1091        struct ip6_tnl_parm *p = &t->parms;
1092        struct flowi *fl = &t->fl;
1093
1094        memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
1095        memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
1096
1097        /* Set up flowi template */
1098        ipv6_addr_copy(&fl->fl6_src, &p->laddr);
1099        ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
1100        fl->oif = p->link;
1101        fl->fl6_flowlabel = 0;
1102
1103        if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
1104                fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
1105        if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
1106                fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
1107
1108        ip6_tnl_set_cap(t);
1109
1110        if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
1111                dev->flags |= IFF_POINTOPOINT;
1112        else
1113                dev->flags &= ~IFF_POINTOPOINT;
1114
1115        dev->iflink = p->link;
1116
1117        if (p->flags & IP6_TNL_F_CAP_XMIT) {
1118                int strict = (ipv6_addr_type(&p->raddr) &
1119                              (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
1120
1121                struct rt6_info *rt = rt6_lookup(dev_net(dev),
1122                                                 &p->raddr, &p->laddr,
1123                                                 p->link, strict);
1124
1125                if (rt == NULL)
1126                        return;
1127
1128                if (rt->rt6i_dev) {
1129                        dev->hard_header_len = rt->rt6i_dev->hard_header_len +
1130                                sizeof (struct ipv6hdr);
1131
1132                        dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
1133
1134                        if (dev->mtu < IPV6_MIN_MTU)
1135                                dev->mtu = IPV6_MIN_MTU;
1136                }
1137                dst_release(&rt->u.dst);
1138        }
1139}
1140
1141/**
1142 * ip6_tnl_change - update the tunnel parameters
1143 *   @t: tunnel to be changed
1144 *   @p: tunnel configuration parameters
1145 *
1146 * Description:
1147 *   ip6_tnl_change() updates the tunnel parameters
1148 **/
1149
1150static int
1151ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
1152{
1153        ipv6_addr_copy(&t->parms.laddr, &p->laddr);
1154        ipv6_addr_copy(&t->parms.raddr, &p->raddr);
1155        t->parms.flags = p->flags;
1156        t->parms.hop_limit = p->hop_limit;
1157        t->parms.encap_limit = p->encap_limit;
1158        t->parms.flowinfo = p->flowinfo;
1159        t->parms.link = p->link;
1160        t->parms.proto = p->proto;
1161        ip6_tnl_dst_reset(t);
1162        ip6_tnl_link_config(t);
1163        return 0;
1164}
1165
1166/**
1167 * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
1168 *   @dev: virtual device associated with tunnel
1169 *   @ifr: parameters passed from userspace
1170 *   @cmd: command to be performed
1171 *
1172 * Description:
1173 *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
1174 *   from userspace.
1175 *
1176 *   The possible commands are the following:
1177 *     %SIOCGETTUNNEL: get tunnel parameters for device
1178 *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
1179 *     %SIOCCHGTUNNEL: change tunnel parameters to those given
1180 *     %SIOCDELTUNNEL: delete tunnel
1181 *
1182 *   The fallback device "ip6tnl0", created during module
1183 *   initialization, can be used for creating other tunnel devices.
1184 *
1185 * Return:
1186 *   0 on success,
1187 *   %-EFAULT if unable to copy data to or from userspace,
1188 *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
1189 *   %-EINVAL if passed tunnel parameters are invalid,
1190 *   %-EEXIST if changing a tunnel's parameters would cause a conflict
1191 *   %-ENODEV if attempting to change or delete a nonexisting device
1192 **/
1193
1194static int
1195ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1196{
1197        int err = 0;
1198        struct ip6_tnl_parm p;
1199        struct ip6_tnl *t = NULL;
1200        struct net *net = dev_net(dev);
1201        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1202
1203        switch (cmd) {
1204        case SIOCGETTUNNEL:
1205                if (dev == ip6n->fb_tnl_dev) {
1206                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
1207                                err = -EFAULT;
1208                                break;
1209                        }
1210                        t = ip6_tnl_locate(net, &p, 0);
1211                }
1212                if (t == NULL)
1213                        t = netdev_priv(dev);
1214                memcpy(&p, &t->parms, sizeof (p));
1215                if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
1216                        err = -EFAULT;
1217                }
1218                break;
1219        case SIOCADDTUNNEL:
1220        case SIOCCHGTUNNEL:
1221                err = -EPERM;
1222                if (!capable(CAP_NET_ADMIN))
1223                        break;
1224                err = -EFAULT;
1225                if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1226                        break;
1227                err = -EINVAL;
1228                if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
1229                    p.proto != 0)
1230                        break;
1231                t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
1232                if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
1233                        if (t != NULL) {
1234                                if (t->dev != dev) {
1235                                        err = -EEXIST;
1236                                        break;
1237                                }
1238                        } else
1239                                t = netdev_priv(dev);
1240
1241                        ip6_tnl_unlink(ip6n, t);
1242                        err = ip6_tnl_change(t, &p);
1243                        ip6_tnl_link(ip6n, t);
1244                        netdev_state_change(dev);
1245                }
1246                if (t) {
1247                        err = 0;
1248                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
1249                                err = -EFAULT;
1250
1251                } else
1252                        err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1253                break;
1254        case SIOCDELTUNNEL:
1255                err = -EPERM;
1256                if (!capable(CAP_NET_ADMIN))
1257                        break;
1258
1259                if (dev == ip6n->fb_tnl_dev) {
1260                        err = -EFAULT;
1261                        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
1262                                break;
1263                        err = -ENOENT;
1264                        if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
1265                                break;
1266                        err = -EPERM;
1267                        if (t->dev == ip6n->fb_tnl_dev)
1268                                break;
1269                        dev = t->dev;
1270                }
1271                err = 0;
1272                unregister_netdevice(dev);
1273                break;
1274        default:
1275                err = -EINVAL;
1276        }
1277        return err;
1278}
1279
1280/**
1281 * ip6_tnl_change_mtu - change mtu manually for tunnel device
1282 *   @dev: virtual device associated with tunnel
1283 *   @new_mtu: the new mtu
1284 *
1285 * Return:
1286 *   0 on success,
1287 *   %-EINVAL if mtu too small
1288 **/
1289
1290static int
1291ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1292{
1293        if (new_mtu < IPV6_MIN_MTU) {
1294                return -EINVAL;
1295        }
1296        dev->mtu = new_mtu;
1297        return 0;
1298}
1299
1300
1301static const struct net_device_ops ip6_tnl_netdev_ops = {
1302        .ndo_uninit = ip6_tnl_dev_uninit,
1303        .ndo_start_xmit = ip6_tnl_xmit,
1304        .ndo_do_ioctl = ip6_tnl_ioctl,
1305        .ndo_change_mtu = ip6_tnl_change_mtu,
1306};
1307
1308/**
1309 * ip6_tnl_dev_setup - setup virtual tunnel device
1310 *   @dev: virtual device associated with tunnel
1311 *
1312 * Description:
1313 *   Initialize function pointers and device parameters
1314 **/
1315
1316static void ip6_tnl_dev_setup(struct net_device *dev)
1317{
1318        dev->netdev_ops = &ip6_tnl_netdev_ops;
1319        dev->destructor = free_netdev;
1320
1321        dev->type = ARPHRD_TUNNEL6;
1322        dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
1323        dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
1324        dev->flags |= IFF_NOARP;
1325        dev->addr_len = sizeof(struct in6_addr);
1326        dev->features |= NETIF_F_NETNS_LOCAL;
1327}
1328
1329
1330/**
1331 * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
1332 *   @dev: virtual device associated with tunnel
1333 **/
1334
1335static inline void
1336ip6_tnl_dev_init_gen(struct net_device *dev)
1337{
1338        struct ip6_tnl *t = netdev_priv(dev);
1339        t->dev = dev;
1340        strcpy(t->parms.name, dev->name);
1341}
1342
1343/**
1344 * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
1345 *   @dev: virtual device associated with tunnel
1346 **/
1347
1348static void ip6_tnl_dev_init(struct net_device *dev)
1349{
1350        struct ip6_tnl *t = netdev_priv(dev);
1351        ip6_tnl_dev_init_gen(dev);
1352        ip6_tnl_link_config(t);
1353}
1354
1355/**
1356 * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
1357 *   @dev: fallback device
1358 *
1359 * Return: 0
1360 **/
1361
1362static void ip6_fb_tnl_dev_init(struct net_device *dev)
1363{
1364        struct ip6_tnl *t = netdev_priv(dev);
1365        struct net *net = dev_net(dev);
1366        struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1367
1368        ip6_tnl_dev_init_gen(dev);
1369        t->parms.proto = IPPROTO_IPV6;
1370        dev_hold(dev);
1371        ip6n->tnls_wc[0] = t;
1372}
1373
1374static struct xfrm6_tunnel ip4ip6_handler = {
1375        .handler        = ip4ip6_rcv,
1376        .err_handler    = ip4ip6_err,
1377        .priority       =       1,
1378};
1379
1380static struct xfrm6_tunnel ip6ip6_handler = {
1381        .handler        = ip6ip6_rcv,
1382        .err_handler    = ip6ip6_err,
1383        .priority       =       1,
1384};
1385
1386static void ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1387{
1388        int h;
1389        struct ip6_tnl *t;
1390
1391        for (h = 0; h < HASH_SIZE; h++) {
1392                while ((t = ip6n->tnls_r_l[h]) != NULL)
1393                        unregister_netdevice(t->dev);
1394        }
1395
1396        t = ip6n->tnls_wc[0];
1397        unregister_netdevice(t->dev);
1398}
1399
1400static int ip6_tnl_init_net(struct net *net)
1401{
1402        int err;
1403        struct ip6_tnl_net *ip6n;
1404
1405        err = -ENOMEM;
1406        ip6n = kzalloc(sizeof(struct ip6_tnl_net), GFP_KERNEL);
1407        if (ip6n == NULL)
1408                goto err_alloc;
1409
1410        err = net_assign_generic(net, ip6_tnl_net_id, ip6n);
1411        if (err < 0)
1412                goto err_assign;
1413
1414        ip6n->tnls[0] = ip6n->tnls_wc;
1415        ip6n->tnls[1] = ip6n->tnls_r_l;
1416
1417        err = -ENOMEM;
1418        ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
1419                                      ip6_tnl_dev_setup);
1420
1421        if (!ip6n->fb_tnl_dev)
1422                goto err_alloc_dev;
1423        dev_net_set(ip6n->fb_tnl_dev, net);
1424
1425        ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1426
1427        err = register_netdev(ip6n->fb_tnl_dev);
1428        if (err < 0)
1429                goto err_register;
1430        return 0;
1431
1432err_register:
1433        free_netdev(ip6n->fb_tnl_dev);
1434err_alloc_dev:
1435        /* nothing */
1436err_assign:
1437        kfree(ip6n);
1438err_alloc:
1439        return err;
1440}
1441
1442static void ip6_tnl_exit_net(struct net *net)
1443{
1444        struct ip6_tnl_net *ip6n;
1445
1446        ip6n = net_generic(net, ip6_tnl_net_id);
1447        rtnl_lock();
1448        ip6_tnl_destroy_tunnels(ip6n);
1449        rtnl_unlock();
1450        kfree(ip6n);
1451}
1452
1453static struct pernet_operations ip6_tnl_net_ops = {
1454        .init = ip6_tnl_init_net,
1455        .exit = ip6_tnl_exit_net,
1456};
1457
1458/**
1459 * ip6_tunnel_init - register protocol and reserve needed resources
1460 *
1461 * Return: 0 on success
1462 **/
1463
1464static int __init ip6_tunnel_init(void)
1465{
1466        int  err;
1467
1468        if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
1469                printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
1470                err = -EAGAIN;
1471                goto out;
1472        }
1473
1474        if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
1475                printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
1476                err = -EAGAIN;
1477                goto unreg_ip4ip6;
1478        }
1479
1480        err = register_pernet_gen_device(&ip6_tnl_net_id, &ip6_tnl_net_ops);
1481        if (err < 0)
1482                goto err_pernet;
1483        return 0;
1484err_pernet:
1485        xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
1486unreg_ip4ip6:
1487        xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
1488out:
1489        return err;
1490}
1491
1492/**
1493 * ip6_tunnel_cleanup - free resources and unregister protocol
1494 **/
1495
1496static void __exit ip6_tunnel_cleanup(void)
1497{
1498        if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
1499                printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
1500
1501        if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
1502                printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
1503
1504        unregister_pernet_gen_device(ip6_tnl_net_id, &ip6_tnl_net_ops);
1505}
1506
1507module_init(ip6_tunnel_init);
1508module_exit(ip6_tunnel_cleanup);
1509