linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/system.h>
  20#include <asm/uaccess.h>
  21#include <linux/types.h>
  22#include <linux/sched.h>
  23#include <linux/errno.h>
  24#include <linux/timer.h>
  25#include <linux/mm.h>
  26#include <linux/kernel.h>
  27#include <linux/fcntl.h>
  28#include <linux/stat.h>
  29#include <linux/socket.h>
  30#include <linux/inet.h>
  31#include <linux/netdevice.h>
  32#include <linux/inetdevice.h>
  33#include <linux/proc_fs.h>
  34#include <linux/seq_file.h>
  35#include <linux/init.h>
  36#include <net/protocol.h>
  37#include <linux/skbuff.h>
  38#include <net/sock.h>
  39#include <net/raw.h>
  40#include <linux/notifier.h>
  41#include <linux/if_arp.h>
  42#include <net/checksum.h>
  43#include <net/netlink.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ip6_route.h>
  47#include <linux/mroute6.h>
  48#include <linux/pim.h>
  49#include <net/addrconf.h>
  50#include <linux/netfilter_ipv6.h>
  51#include <net/ip6_checksum.h>
  52
  53/* Big lock, protecting vif table, mrt cache and mroute socket state.
  54   Note that the changes are semaphored via rtnl_lock.
  55 */
  56
  57static DEFINE_RWLOCK(mrt_lock);
  58
  59/*
  60 *      Multicast router control variables
  61 */
  62
  63#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
  64
  65static struct mfc6_cache *mfc_unres_queue;              /* Queue of unresolved entries */
  66
  67/* Special spinlock for queue of unresolved entries */
  68static DEFINE_SPINLOCK(mfc_unres_lock);
  69
  70/* We return to original Alan's scheme. Hash table of resolved
  71   entries is changed only in process context and protected
  72   with weak lock mrt_lock. Queue of unresolved entries is protected
  73   with strong spinlock mfc_unres_lock.
  74
  75   In this case data path is free of exclusive locks at all.
  76 */
  77
  78static struct kmem_cache *mrt_cachep __read_mostly;
  79
  80static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
  81static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
  82                              mifi_t mifi, int assert);
  83static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
  84static void mroute_clean_tables(struct net *net);
  85
  86static struct timer_list ipmr_expire_timer;
  87
  88
  89#ifdef CONFIG_PROC_FS
  90
  91struct ipmr_mfc_iter {
  92        struct seq_net_private p;
  93        struct mfc6_cache **cache;
  94        int ct;
  95};
  96
  97
  98static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
  99                                           struct ipmr_mfc_iter *it, loff_t pos)
 100{
 101        struct mfc6_cache *mfc;
 102
 103        it->cache = net->ipv6.mfc6_cache_array;
 104        read_lock(&mrt_lock);
 105        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
 106                for (mfc = net->ipv6.mfc6_cache_array[it->ct];
 107                     mfc; mfc = mfc->next)
 108                        if (pos-- == 0)
 109                                return mfc;
 110        read_unlock(&mrt_lock);
 111
 112        it->cache = &mfc_unres_queue;
 113        spin_lock_bh(&mfc_unres_lock);
 114        for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
 115                if (net_eq(mfc6_net(mfc), net) &&
 116                    pos-- == 0)
 117                        return mfc;
 118        spin_unlock_bh(&mfc_unres_lock);
 119
 120        it->cache = NULL;
 121        return NULL;
 122}
 123
 124
 125
 126
 127/*
 128 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 129 */
 130
 131struct ipmr_vif_iter {
 132        struct seq_net_private p;
 133        int ct;
 134};
 135
 136static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 137                                            struct ipmr_vif_iter *iter,
 138                                            loff_t pos)
 139{
 140        for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
 141                if (!MIF_EXISTS(net, iter->ct))
 142                        continue;
 143                if (pos-- == 0)
 144                        return &net->ipv6.vif6_table[iter->ct];
 145        }
 146        return NULL;
 147}
 148
 149static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 150        __acquires(mrt_lock)
 151{
 152        struct net *net = seq_file_net(seq);
 153
 154        read_lock(&mrt_lock);
 155        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 156                : SEQ_START_TOKEN;
 157}
 158
 159static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 160{
 161        struct ipmr_vif_iter *iter = seq->private;
 162        struct net *net = seq_file_net(seq);
 163
 164        ++*pos;
 165        if (v == SEQ_START_TOKEN)
 166                return ip6mr_vif_seq_idx(net, iter, 0);
 167
 168        while (++iter->ct < net->ipv6.maxvif) {
 169                if (!MIF_EXISTS(net, iter->ct))
 170                        continue;
 171                return &net->ipv6.vif6_table[iter->ct];
 172        }
 173        return NULL;
 174}
 175
 176static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 177        __releases(mrt_lock)
 178{
 179        read_unlock(&mrt_lock);
 180}
 181
 182static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 183{
 184        struct net *net = seq_file_net(seq);
 185
 186        if (v == SEQ_START_TOKEN) {
 187                seq_puts(seq,
 188                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 189        } else {
 190                const struct mif_device *vif = v;
 191                const char *name = vif->dev ? vif->dev->name : "none";
 192
 193                seq_printf(seq,
 194                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 195                           vif - net->ipv6.vif6_table,
 196                           name, vif->bytes_in, vif->pkt_in,
 197                           vif->bytes_out, vif->pkt_out,
 198                           vif->flags);
 199        }
 200        return 0;
 201}
 202
 203static const struct seq_operations ip6mr_vif_seq_ops = {
 204        .start = ip6mr_vif_seq_start,
 205        .next  = ip6mr_vif_seq_next,
 206        .stop  = ip6mr_vif_seq_stop,
 207        .show  = ip6mr_vif_seq_show,
 208};
 209
 210static int ip6mr_vif_open(struct inode *inode, struct file *file)
 211{
 212        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 213                            sizeof(struct ipmr_vif_iter));
 214}
 215
 216static const struct file_operations ip6mr_vif_fops = {
 217        .owner   = THIS_MODULE,
 218        .open    = ip6mr_vif_open,
 219        .read    = seq_read,
 220        .llseek  = seq_lseek,
 221        .release = seq_release_net,
 222};
 223
 224static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 225{
 226        struct net *net = seq_file_net(seq);
 227
 228        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 229                : SEQ_START_TOKEN;
 230}
 231
 232static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 233{
 234        struct mfc6_cache *mfc = v;
 235        struct ipmr_mfc_iter *it = seq->private;
 236        struct net *net = seq_file_net(seq);
 237
 238        ++*pos;
 239
 240        if (v == SEQ_START_TOKEN)
 241                return ipmr_mfc_seq_idx(net, seq->private, 0);
 242
 243        if (mfc->next)
 244                return mfc->next;
 245
 246        if (it->cache == &mfc_unres_queue)
 247                goto end_of_list;
 248
 249        BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
 250
 251        while (++it->ct < MFC6_LINES) {
 252                mfc = net->ipv6.mfc6_cache_array[it->ct];
 253                if (mfc)
 254                        return mfc;
 255        }
 256
 257        /* exhausted cache_array, show unresolved */
 258        read_unlock(&mrt_lock);
 259        it->cache = &mfc_unres_queue;
 260        it->ct = 0;
 261
 262        spin_lock_bh(&mfc_unres_lock);
 263        mfc = mfc_unres_queue;
 264        if (mfc)
 265                return mfc;
 266
 267 end_of_list:
 268        spin_unlock_bh(&mfc_unres_lock);
 269        it->cache = NULL;
 270
 271        return NULL;
 272}
 273
 274static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 275{
 276        struct ipmr_mfc_iter *it = seq->private;
 277        struct net *net = seq_file_net(seq);
 278
 279        if (it->cache == &mfc_unres_queue)
 280                spin_unlock_bh(&mfc_unres_lock);
 281        else if (it->cache == net->ipv6.mfc6_cache_array)
 282                read_unlock(&mrt_lock);
 283}
 284
 285static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 286{
 287        int n;
 288        struct net *net = seq_file_net(seq);
 289
 290        if (v == SEQ_START_TOKEN) {
 291                seq_puts(seq,
 292                         "Group                            "
 293                         "Origin                           "
 294                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 295        } else {
 296                const struct mfc6_cache *mfc = v;
 297                const struct ipmr_mfc_iter *it = seq->private;
 298
 299                seq_printf(seq, "%pI6 %pI6 %-3hd",
 300                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 301                           mfc->mf6c_parent);
 302
 303                if (it->cache != &mfc_unres_queue) {
 304                        seq_printf(seq, " %8lu %8lu %8lu",
 305                                   mfc->mfc_un.res.pkt,
 306                                   mfc->mfc_un.res.bytes,
 307                                   mfc->mfc_un.res.wrong_if);
 308                        for (n = mfc->mfc_un.res.minvif;
 309                             n < mfc->mfc_un.res.maxvif; n++) {
 310                                if (MIF_EXISTS(net, n) &&
 311                                    mfc->mfc_un.res.ttls[n] < 255)
 312                                        seq_printf(seq,
 313                                                   " %2d:%-3d",
 314                                                   n, mfc->mfc_un.res.ttls[n]);
 315                        }
 316                } else {
 317                        /* unresolved mfc_caches don't contain
 318                         * pkt, bytes and wrong_if values
 319                         */
 320                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 321                }
 322                seq_putc(seq, '\n');
 323        }
 324        return 0;
 325}
 326
 327static const struct seq_operations ipmr_mfc_seq_ops = {
 328        .start = ipmr_mfc_seq_start,
 329        .next  = ipmr_mfc_seq_next,
 330        .stop  = ipmr_mfc_seq_stop,
 331        .show  = ipmr_mfc_seq_show,
 332};
 333
 334static int ipmr_mfc_open(struct inode *inode, struct file *file)
 335{
 336        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 337                            sizeof(struct ipmr_mfc_iter));
 338}
 339
 340static const struct file_operations ip6mr_mfc_fops = {
 341        .owner   = THIS_MODULE,
 342        .open    = ipmr_mfc_open,
 343        .read    = seq_read,
 344        .llseek  = seq_lseek,
 345        .release = seq_release_net,
 346};
 347#endif
 348
 349#ifdef CONFIG_IPV6_PIMSM_V2
 350
 351static int pim6_rcv(struct sk_buff *skb)
 352{
 353        struct pimreghdr *pim;
 354        struct ipv6hdr   *encap;
 355        struct net_device  *reg_dev = NULL;
 356        struct net *net = dev_net(skb->dev);
 357        int reg_vif_num = net->ipv6.mroute_reg_vif_num;
 358
 359        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 360                goto drop;
 361
 362        pim = (struct pimreghdr *)skb_transport_header(skb);
 363        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 364            (pim->flags & PIM_NULL_REGISTER) ||
 365            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 366                             sizeof(*pim), IPPROTO_PIM,
 367                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 368             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 369                goto drop;
 370
 371        /* check if the inner packet is destined to mcast group */
 372        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 373                                   sizeof(*pim));
 374
 375        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 376            encap->payload_len == 0 ||
 377            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 378                goto drop;
 379
 380        read_lock(&mrt_lock);
 381        if (reg_vif_num >= 0)
 382                reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
 383        if (reg_dev)
 384                dev_hold(reg_dev);
 385        read_unlock(&mrt_lock);
 386
 387        if (reg_dev == NULL)
 388                goto drop;
 389
 390        skb->mac_header = skb->network_header;
 391        skb_pull(skb, (u8 *)encap - skb->data);
 392        skb_reset_network_header(skb);
 393        skb->dev = reg_dev;
 394        skb->protocol = htons(ETH_P_IPV6);
 395        skb->ip_summed = 0;
 396        skb->pkt_type = PACKET_HOST;
 397        skb_dst_drop(skb);
 398        reg_dev->stats.rx_bytes += skb->len;
 399        reg_dev->stats.rx_packets++;
 400        nf_reset(skb);
 401        netif_rx(skb);
 402        dev_put(reg_dev);
 403        return 0;
 404 drop:
 405        kfree_skb(skb);
 406        return 0;
 407}
 408
 409static const struct inet6_protocol pim6_protocol = {
 410        .handler        =       pim6_rcv,
 411};
 412
 413/* Service routines creating virtual interfaces: PIMREG */
 414
 415static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 416                                      struct net_device *dev)
 417{
 418        struct net *net = dev_net(dev);
 419
 420        read_lock(&mrt_lock);
 421        dev->stats.tx_bytes += skb->len;
 422        dev->stats.tx_packets++;
 423        ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
 424                           MRT6MSG_WHOLEPKT);
 425        read_unlock(&mrt_lock);
 426        kfree_skb(skb);
 427        return NETDEV_TX_OK;
 428}
 429
 430static const struct net_device_ops reg_vif_netdev_ops = {
 431        .ndo_start_xmit = reg_vif_xmit,
 432};
 433
 434static void reg_vif_setup(struct net_device *dev)
 435{
 436        dev->type               = ARPHRD_PIMREG;
 437        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 438        dev->flags              = IFF_NOARP;
 439        dev->netdev_ops         = &reg_vif_netdev_ops;
 440        dev->destructor         = free_netdev;
 441        dev->features           |= NETIF_F_NETNS_LOCAL;
 442}
 443
 444static struct net_device *ip6mr_reg_vif(struct net *net)
 445{
 446        struct net_device *dev;
 447
 448        dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
 449        if (dev == NULL)
 450                return NULL;
 451
 452        dev_net_set(dev, net);
 453
 454        if (register_netdevice(dev)) {
 455                free_netdev(dev);
 456                return NULL;
 457        }
 458        dev->iflink = 0;
 459
 460        if (dev_open(dev))
 461                goto failure;
 462
 463        dev_hold(dev);
 464        return dev;
 465
 466failure:
 467        /* allow the register to be completed before unregistering. */
 468        rtnl_unlock();
 469        rtnl_lock();
 470
 471        unregister_netdevice(dev);
 472        return NULL;
 473}
 474#endif
 475
 476/*
 477 *      Delete a VIF entry
 478 */
 479
 480static int mif6_delete(struct net *net, int vifi)
 481{
 482        struct mif_device *v;
 483        struct net_device *dev;
 484        struct inet6_dev *in6_dev;
 485        if (vifi < 0 || vifi >= net->ipv6.maxvif)
 486                return -EADDRNOTAVAIL;
 487
 488        v = &net->ipv6.vif6_table[vifi];
 489
 490        write_lock_bh(&mrt_lock);
 491        dev = v->dev;
 492        v->dev = NULL;
 493
 494        if (!dev) {
 495                write_unlock_bh(&mrt_lock);
 496                return -EADDRNOTAVAIL;
 497        }
 498
 499#ifdef CONFIG_IPV6_PIMSM_V2
 500        if (vifi == net->ipv6.mroute_reg_vif_num)
 501                net->ipv6.mroute_reg_vif_num = -1;
 502#endif
 503
 504        if (vifi + 1 == net->ipv6.maxvif) {
 505                int tmp;
 506                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 507                        if (MIF_EXISTS(net, tmp))
 508                                break;
 509                }
 510                net->ipv6.maxvif = tmp + 1;
 511        }
 512
 513        write_unlock_bh(&mrt_lock);
 514
 515        dev_set_allmulti(dev, -1);
 516
 517        in6_dev = __in6_dev_get(dev);
 518        if (in6_dev)
 519                in6_dev->cnf.mc_forwarding--;
 520
 521        if (v->flags & MIFF_REGISTER)
 522                unregister_netdevice(dev);
 523
 524        dev_put(dev);
 525        return 0;
 526}
 527
 528static inline void ip6mr_cache_free(struct mfc6_cache *c)
 529{
 530        release_net(mfc6_net(c));
 531        kmem_cache_free(mrt_cachep, c);
 532}
 533
 534/* Destroy an unresolved cache entry, killing queued skbs
 535   and reporting error to netlink readers.
 536 */
 537
 538static void ip6mr_destroy_unres(struct mfc6_cache *c)
 539{
 540        struct sk_buff *skb;
 541        struct net *net = mfc6_net(c);
 542
 543        atomic_dec(&net->ipv6.cache_resolve_queue_len);
 544
 545        while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 546                if (ipv6_hdr(skb)->version == 0) {
 547                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 548                        nlh->nlmsg_type = NLMSG_ERROR;
 549                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 550                        skb_trim(skb, nlh->nlmsg_len);
 551                        ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 552                        rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 553                } else
 554                        kfree_skb(skb);
 555        }
 556
 557        ip6mr_cache_free(c);
 558}
 559
 560
 561/* Single timer process for all the unresolved queue. */
 562
 563static void ipmr_do_expire_process(unsigned long dummy)
 564{
 565        unsigned long now = jiffies;
 566        unsigned long expires = 10 * HZ;
 567        struct mfc6_cache *c, **cp;
 568
 569        cp = &mfc_unres_queue;
 570
 571        while ((c = *cp) != NULL) {
 572                if (time_after(c->mfc_un.unres.expires, now)) {
 573                        /* not yet... */
 574                        unsigned long interval = c->mfc_un.unres.expires - now;
 575                        if (interval < expires)
 576                                expires = interval;
 577                        cp = &c->next;
 578                        continue;
 579                }
 580
 581                *cp = c->next;
 582                ip6mr_destroy_unres(c);
 583        }
 584
 585        if (mfc_unres_queue != NULL)
 586                mod_timer(&ipmr_expire_timer, jiffies + expires);
 587}
 588
 589static void ipmr_expire_process(unsigned long dummy)
 590{
 591        if (!spin_trylock(&mfc_unres_lock)) {
 592                mod_timer(&ipmr_expire_timer, jiffies + 1);
 593                return;
 594        }
 595
 596        if (mfc_unres_queue != NULL)
 597                ipmr_do_expire_process(dummy);
 598
 599        spin_unlock(&mfc_unres_lock);
 600}
 601
 602/* Fill oifs list. It is called under write locked mrt_lock. */
 603
 604static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
 605{
 606        int vifi;
 607        struct net *net = mfc6_net(cache);
 608
 609        cache->mfc_un.res.minvif = MAXMIFS;
 610        cache->mfc_un.res.maxvif = 0;
 611        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 612
 613        for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
 614                if (MIF_EXISTS(net, vifi) &&
 615                    ttls[vifi] && ttls[vifi] < 255) {
 616                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 617                        if (cache->mfc_un.res.minvif > vifi)
 618                                cache->mfc_un.res.minvif = vifi;
 619                        if (cache->mfc_un.res.maxvif <= vifi)
 620                                cache->mfc_un.res.maxvif = vifi + 1;
 621                }
 622        }
 623}
 624
 625static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 626{
 627        int vifi = vifc->mif6c_mifi;
 628        struct mif_device *v = &net->ipv6.vif6_table[vifi];
 629        struct net_device *dev;
 630        struct inet6_dev *in6_dev;
 631        int err;
 632
 633        /* Is vif busy ? */
 634        if (MIF_EXISTS(net, vifi))
 635                return -EADDRINUSE;
 636
 637        switch (vifc->mif6c_flags) {
 638#ifdef CONFIG_IPV6_PIMSM_V2
 639        case MIFF_REGISTER:
 640                /*
 641                 * Special Purpose VIF in PIM
 642                 * All the packets will be sent to the daemon
 643                 */
 644                if (net->ipv6.mroute_reg_vif_num >= 0)
 645                        return -EADDRINUSE;
 646                dev = ip6mr_reg_vif(net);
 647                if (!dev)
 648                        return -ENOBUFS;
 649                err = dev_set_allmulti(dev, 1);
 650                if (err) {
 651                        unregister_netdevice(dev);
 652                        dev_put(dev);
 653                        return err;
 654                }
 655                break;
 656#endif
 657        case 0:
 658                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 659                if (!dev)
 660                        return -EADDRNOTAVAIL;
 661                err = dev_set_allmulti(dev, 1);
 662                if (err) {
 663                        dev_put(dev);
 664                        return err;
 665                }
 666                break;
 667        default:
 668                return -EINVAL;
 669        }
 670
 671        in6_dev = __in6_dev_get(dev);
 672        if (in6_dev)
 673                in6_dev->cnf.mc_forwarding++;
 674
 675        /*
 676         *      Fill in the VIF structures
 677         */
 678        v->rate_limit = vifc->vifc_rate_limit;
 679        v->flags = vifc->mif6c_flags;
 680        if (!mrtsock)
 681                v->flags |= VIFF_STATIC;
 682        v->threshold = vifc->vifc_threshold;
 683        v->bytes_in = 0;
 684        v->bytes_out = 0;
 685        v->pkt_in = 0;
 686        v->pkt_out = 0;
 687        v->link = dev->ifindex;
 688        if (v->flags & MIFF_REGISTER)
 689                v->link = dev->iflink;
 690
 691        /* And finish update writing critical data */
 692        write_lock_bh(&mrt_lock);
 693        v->dev = dev;
 694#ifdef CONFIG_IPV6_PIMSM_V2
 695        if (v->flags & MIFF_REGISTER)
 696                net->ipv6.mroute_reg_vif_num = vifi;
 697#endif
 698        if (vifi + 1 > net->ipv6.maxvif)
 699                net->ipv6.maxvif = vifi + 1;
 700        write_unlock_bh(&mrt_lock);
 701        return 0;
 702}
 703
 704static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 705                                           struct in6_addr *origin,
 706                                           struct in6_addr *mcastgrp)
 707{
 708        int line = MFC6_HASH(mcastgrp, origin);
 709        struct mfc6_cache *c;
 710
 711        for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
 712                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 713                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
 714                        break;
 715        }
 716        return c;
 717}
 718
 719/*
 720 *      Allocate a multicast cache entry
 721 */
 722static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
 723{
 724        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 725        if (c == NULL)
 726                return NULL;
 727        c->mfc_un.res.minvif = MAXMIFS;
 728        mfc6_net_set(c, net);
 729        return c;
 730}
 731
 732static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
 733{
 734        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 735        if (c == NULL)
 736                return NULL;
 737        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 738        c->mfc_un.unres.expires = jiffies + 10 * HZ;
 739        mfc6_net_set(c, net);
 740        return c;
 741}
 742
 743/*
 744 *      A cache entry has gone into a resolved state from queued
 745 */
 746
 747static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 748{
 749        struct sk_buff *skb;
 750
 751        /*
 752         *      Play the pending entries through our router
 753         */
 754
 755        while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 756                if (ipv6_hdr(skb)->version == 0) {
 757                        int err;
 758                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 759
 760                        if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 761                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 762                        } else {
 763                                nlh->nlmsg_type = NLMSG_ERROR;
 764                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 765                                skb_trim(skb, nlh->nlmsg_len);
 766                                ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 767                        }
 768                        err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
 769                } else
 770                        ip6_mr_forward(skb, c);
 771        }
 772}
 773
 774/*
 775 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
 776 *      expects the following bizarre scheme.
 777 *
 778 *      Called under mrt_lock.
 779 */
 780
 781static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 782                              int assert)
 783{
 784        struct sk_buff *skb;
 785        struct mrt6msg *msg;
 786        int ret;
 787
 788#ifdef CONFIG_IPV6_PIMSM_V2
 789        if (assert == MRT6MSG_WHOLEPKT)
 790                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
 791                                                +sizeof(*msg));
 792        else
 793#endif
 794                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
 795
 796        if (!skb)
 797                return -ENOBUFS;
 798
 799        /* I suppose that internal messages
 800         * do not require checksums */
 801
 802        skb->ip_summed = CHECKSUM_UNNECESSARY;
 803
 804#ifdef CONFIG_IPV6_PIMSM_V2
 805        if (assert == MRT6MSG_WHOLEPKT) {
 806                /* Ugly, but we have no choice with this interface.
 807                   Duplicate old header, fix length etc.
 808                   And all this only to mangle msg->im6_msgtype and
 809                   to set msg->im6_mbz to "mbz" :-)
 810                 */
 811                skb_push(skb, -skb_network_offset(pkt));
 812
 813                skb_push(skb, sizeof(*msg));
 814                skb_reset_transport_header(skb);
 815                msg = (struct mrt6msg *)skb_transport_header(skb);
 816                msg->im6_mbz = 0;
 817                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
 818                msg->im6_mif = net->ipv6.mroute_reg_vif_num;
 819                msg->im6_pad = 0;
 820                ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 821                ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
 822
 823                skb->ip_summed = CHECKSUM_UNNECESSARY;
 824        } else
 825#endif
 826        {
 827        /*
 828         *      Copy the IP header
 829         */
 830
 831        skb_put(skb, sizeof(struct ipv6hdr));
 832        skb_reset_network_header(skb);
 833        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
 834
 835        /*
 836         *      Add our header
 837         */
 838        skb_put(skb, sizeof(*msg));
 839        skb_reset_transport_header(skb);
 840        msg = (struct mrt6msg *)skb_transport_header(skb);
 841
 842        msg->im6_mbz = 0;
 843        msg->im6_msgtype = assert;
 844        msg->im6_mif = mifi;
 845        msg->im6_pad = 0;
 846        ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 847        ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
 848
 849        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 850        skb->ip_summed = CHECKSUM_UNNECESSARY;
 851        }
 852
 853        if (net->ipv6.mroute6_sk == NULL) {
 854                kfree_skb(skb);
 855                return -EINVAL;
 856        }
 857
 858        /*
 859         *      Deliver to user space multicast routing algorithms
 860         */
 861        ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
 862        if (ret < 0) {
 863                if (net_ratelimit())
 864                        printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
 865                kfree_skb(skb);
 866        }
 867
 868        return ret;
 869}
 870
 871/*
 872 *      Queue a packet for resolution. It gets locked cache entry!
 873 */
 874
 875static int
 876ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 877{
 878        int err;
 879        struct mfc6_cache *c;
 880
 881        spin_lock_bh(&mfc_unres_lock);
 882        for (c = mfc_unres_queue; c; c = c->next) {
 883                if (net_eq(mfc6_net(c), net) &&
 884                    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 885                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
 886                        break;
 887        }
 888
 889        if (c == NULL) {
 890                /*
 891                 *      Create a new entry if allowable
 892                 */
 893
 894                if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
 895                    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
 896                        spin_unlock_bh(&mfc_unres_lock);
 897
 898                        kfree_skb(skb);
 899                        return -ENOBUFS;
 900                }
 901
 902                /*
 903                 *      Fill in the new cache entry
 904                 */
 905                c->mf6c_parent = -1;
 906                c->mf6c_origin = ipv6_hdr(skb)->saddr;
 907                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
 908
 909                /*
 910                 *      Reflect first query at pim6sd
 911                 */
 912                err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
 913                if (err < 0) {
 914                        /* If the report failed throw the cache entry
 915                           out - Brad Parker
 916                         */
 917                        spin_unlock_bh(&mfc_unres_lock);
 918
 919                        ip6mr_cache_free(c);
 920                        kfree_skb(skb);
 921                        return err;
 922                }
 923
 924                atomic_inc(&net->ipv6.cache_resolve_queue_len);
 925                c->next = mfc_unres_queue;
 926                mfc_unres_queue = c;
 927
 928                ipmr_do_expire_process(1);
 929        }
 930
 931        /*
 932         *      See if we can append the packet
 933         */
 934        if (c->mfc_un.unres.unresolved.qlen > 3) {
 935                kfree_skb(skb);
 936                err = -ENOBUFS;
 937        } else {
 938                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 939                err = 0;
 940        }
 941
 942        spin_unlock_bh(&mfc_unres_lock);
 943        return err;
 944}
 945
 946/*
 947 *      MFC6 cache manipulation by user space
 948 */
 949
 950static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
 951{
 952        int line;
 953        struct mfc6_cache *c, **cp;
 954
 955        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 956
 957        for (cp = &net->ipv6.mfc6_cache_array[line];
 958             (c = *cp) != NULL; cp = &c->next) {
 959                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 960                    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 961                        write_lock_bh(&mrt_lock);
 962                        *cp = c->next;
 963                        write_unlock_bh(&mrt_lock);
 964
 965                        ip6mr_cache_free(c);
 966                        return 0;
 967                }
 968        }
 969        return -ENOENT;
 970}
 971
 972static int ip6mr_device_event(struct notifier_block *this,
 973                              unsigned long event, void *ptr)
 974{
 975        struct net_device *dev = ptr;
 976        struct net *net = dev_net(dev);
 977        struct mif_device *v;
 978        int ct;
 979
 980        if (event != NETDEV_UNREGISTER)
 981                return NOTIFY_DONE;
 982
 983        v = &net->ipv6.vif6_table[0];
 984        for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
 985                if (v->dev == dev)
 986                        mif6_delete(net, ct);
 987        }
 988        return NOTIFY_DONE;
 989}
 990
 991static struct notifier_block ip6_mr_notifier = {
 992        .notifier_call = ip6mr_device_event
 993};
 994
 995/*
 996 *      Setup for IP multicast routing
 997 */
 998
 999static int __net_init ip6mr_net_init(struct net *net)
1000{
1001        int err = 0;
1002        net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1003                                       GFP_KERNEL);
1004        if (!net->ipv6.vif6_table) {
1005                err = -ENOMEM;
1006                goto fail;
1007        }
1008
1009        /* Forwarding cache */
1010        net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1011                                             sizeof(struct mfc6_cache *),
1012                                             GFP_KERNEL);
1013        if (!net->ipv6.mfc6_cache_array) {
1014                err = -ENOMEM;
1015                goto fail_mfc6_cache;
1016        }
1017
1018#ifdef CONFIG_IPV6_PIMSM_V2
1019        net->ipv6.mroute_reg_vif_num = -1;
1020#endif
1021
1022#ifdef CONFIG_PROC_FS
1023        err = -ENOMEM;
1024        if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1025                goto proc_vif_fail;
1026        if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1027                goto proc_cache_fail;
1028#endif
1029        return 0;
1030
1031#ifdef CONFIG_PROC_FS
1032proc_cache_fail:
1033        proc_net_remove(net, "ip6_mr_vif");
1034proc_vif_fail:
1035        kfree(net->ipv6.mfc6_cache_array);
1036#endif
1037fail_mfc6_cache:
1038        kfree(net->ipv6.vif6_table);
1039fail:
1040        return err;
1041}
1042
1043static void __net_exit ip6mr_net_exit(struct net *net)
1044{
1045#ifdef CONFIG_PROC_FS
1046        proc_net_remove(net, "ip6_mr_cache");
1047        proc_net_remove(net, "ip6_mr_vif");
1048#endif
1049        mroute_clean_tables(net);
1050        kfree(net->ipv6.mfc6_cache_array);
1051        kfree(net->ipv6.vif6_table);
1052}
1053
1054static struct pernet_operations ip6mr_net_ops = {
1055        .init = ip6mr_net_init,
1056        .exit = ip6mr_net_exit,
1057};
1058
1059int __init ip6_mr_init(void)
1060{
1061        int err;
1062
1063        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1064                                       sizeof(struct mfc6_cache),
1065                                       0, SLAB_HWCACHE_ALIGN,
1066                                       NULL);
1067        if (!mrt_cachep)
1068                return -ENOMEM;
1069
1070        err = register_pernet_subsys(&ip6mr_net_ops);
1071        if (err)
1072                goto reg_pernet_fail;
1073
1074        setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1075        err = register_netdevice_notifier(&ip6_mr_notifier);
1076        if (err)
1077                goto reg_notif_fail;
1078#ifdef CONFIG_IPV6_PIMSM_V2
1079        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1080                printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1081                err = -EAGAIN;
1082                goto add_proto_fail;
1083        }
1084#endif
1085        return 0;
1086#ifdef CONFIG_IPV6_PIMSM_V2
1087add_proto_fail:
1088        unregister_netdevice_notifier(&ip6_mr_notifier);
1089#endif
1090reg_notif_fail:
1091        del_timer(&ipmr_expire_timer);
1092        unregister_pernet_subsys(&ip6mr_net_ops);
1093reg_pernet_fail:
1094        kmem_cache_destroy(mrt_cachep);
1095        return err;
1096}
1097
1098void ip6_mr_cleanup(void)
1099{
1100        unregister_netdevice_notifier(&ip6_mr_notifier);
1101        del_timer(&ipmr_expire_timer);
1102        unregister_pernet_subsys(&ip6mr_net_ops);
1103        kmem_cache_destroy(mrt_cachep);
1104}
1105
1106static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1107{
1108        int line;
1109        struct mfc6_cache *uc, *c, **cp;
1110        unsigned char ttls[MAXMIFS];
1111        int i;
1112
1113        memset(ttls, 255, MAXMIFS);
1114        for (i = 0; i < MAXMIFS; i++) {
1115                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1116                        ttls[i] = 1;
1117
1118        }
1119
1120        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1121
1122        for (cp = &net->ipv6.mfc6_cache_array[line];
1123             (c = *cp) != NULL; cp = &c->next) {
1124                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1125                    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1126                        break;
1127        }
1128
1129        if (c != NULL) {
1130                write_lock_bh(&mrt_lock);
1131                c->mf6c_parent = mfc->mf6cc_parent;
1132                ip6mr_update_thresholds(c, ttls);
1133                if (!mrtsock)
1134                        c->mfc_flags |= MFC_STATIC;
1135                write_unlock_bh(&mrt_lock);
1136                return 0;
1137        }
1138
1139        if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1140                return -EINVAL;
1141
1142        c = ip6mr_cache_alloc(net);
1143        if (c == NULL)
1144                return -ENOMEM;
1145
1146        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1147        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1148        c->mf6c_parent = mfc->mf6cc_parent;
1149        ip6mr_update_thresholds(c, ttls);
1150        if (!mrtsock)
1151                c->mfc_flags |= MFC_STATIC;
1152
1153        write_lock_bh(&mrt_lock);
1154        c->next = net->ipv6.mfc6_cache_array[line];
1155        net->ipv6.mfc6_cache_array[line] = c;
1156        write_unlock_bh(&mrt_lock);
1157
1158        /*
1159         *      Check to see if we resolved a queued list. If so we
1160         *      need to send on the frames and tidy up.
1161         */
1162        spin_lock_bh(&mfc_unres_lock);
1163        for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1164             cp = &uc->next) {
1165                if (net_eq(mfc6_net(uc), net) &&
1166                    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1167                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1168                        *cp = uc->next;
1169                        atomic_dec(&net->ipv6.cache_resolve_queue_len);
1170                        break;
1171                }
1172        }
1173        if (mfc_unres_queue == NULL)
1174                del_timer(&ipmr_expire_timer);
1175        spin_unlock_bh(&mfc_unres_lock);
1176
1177        if (uc) {
1178                ip6mr_cache_resolve(uc, c);
1179                ip6mr_cache_free(uc);
1180        }
1181        return 0;
1182}
1183
1184/*
1185 *      Close the multicast socket, and clear the vif tables etc
1186 */
1187
1188static void mroute_clean_tables(struct net *net)
1189{
1190        int i;
1191
1192        /*
1193         *      Shut down all active vif entries
1194         */
1195        for (i = 0; i < net->ipv6.maxvif; i++) {
1196                if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
1197                        mif6_delete(net, i);
1198        }
1199
1200        /*
1201         *      Wipe the cache
1202         */
1203        for (i = 0; i < MFC6_LINES; i++) {
1204                struct mfc6_cache *c, **cp;
1205
1206                cp = &net->ipv6.mfc6_cache_array[i];
1207                while ((c = *cp) != NULL) {
1208                        if (c->mfc_flags & MFC_STATIC) {
1209                                cp = &c->next;
1210                                continue;
1211                        }
1212                        write_lock_bh(&mrt_lock);
1213                        *cp = c->next;
1214                        write_unlock_bh(&mrt_lock);
1215
1216                        ip6mr_cache_free(c);
1217                }
1218        }
1219
1220        if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
1221                struct mfc6_cache *c, **cp;
1222
1223                spin_lock_bh(&mfc_unres_lock);
1224                cp = &mfc_unres_queue;
1225                while ((c = *cp) != NULL) {
1226                        if (!net_eq(mfc6_net(c), net)) {
1227                                cp = &c->next;
1228                                continue;
1229                        }
1230                        *cp = c->next;
1231                        ip6mr_destroy_unres(c);
1232                }
1233                spin_unlock_bh(&mfc_unres_lock);
1234        }
1235}
1236
1237static int ip6mr_sk_init(struct sock *sk)
1238{
1239        int err = 0;
1240        struct net *net = sock_net(sk);
1241
1242        rtnl_lock();
1243        write_lock_bh(&mrt_lock);
1244        if (likely(net->ipv6.mroute6_sk == NULL)) {
1245                net->ipv6.mroute6_sk = sk;
1246                net->ipv6.devconf_all->mc_forwarding++;
1247        }
1248        else
1249                err = -EADDRINUSE;
1250        write_unlock_bh(&mrt_lock);
1251
1252        rtnl_unlock();
1253
1254        return err;
1255}
1256
1257int ip6mr_sk_done(struct sock *sk)
1258{
1259        int err = 0;
1260        struct net *net = sock_net(sk);
1261
1262        rtnl_lock();
1263        if (sk == net->ipv6.mroute6_sk) {
1264                write_lock_bh(&mrt_lock);
1265                net->ipv6.mroute6_sk = NULL;
1266                net->ipv6.devconf_all->mc_forwarding--;
1267                write_unlock_bh(&mrt_lock);
1268
1269                mroute_clean_tables(net);
1270        } else
1271                err = -EACCES;
1272        rtnl_unlock();
1273
1274        return err;
1275}
1276
1277/*
1278 *      Socket options and virtual interface manipulation. The whole
1279 *      virtual interface system is a complete heap, but unfortunately
1280 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1281 *      MOSPF/PIM router set up we can clean this up.
1282 */
1283
1284int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1285{
1286        int ret;
1287        struct mif6ctl vif;
1288        struct mf6cctl mfc;
1289        mifi_t mifi;
1290        struct net *net = sock_net(sk);
1291
1292        if (optname != MRT6_INIT) {
1293                if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
1294                        return -EACCES;
1295        }
1296
1297        switch (optname) {
1298        case MRT6_INIT:
1299                if (sk->sk_type != SOCK_RAW ||
1300                    inet_sk(sk)->num != IPPROTO_ICMPV6)
1301                        return -EOPNOTSUPP;
1302                if (optlen < sizeof(int))
1303                        return -EINVAL;
1304
1305                return ip6mr_sk_init(sk);
1306
1307        case MRT6_DONE:
1308                return ip6mr_sk_done(sk);
1309
1310        case MRT6_ADD_MIF:
1311                if (optlen < sizeof(vif))
1312                        return -EINVAL;
1313                if (copy_from_user(&vif, optval, sizeof(vif)))
1314                        return -EFAULT;
1315                if (vif.mif6c_mifi >= MAXMIFS)
1316                        return -ENFILE;
1317                rtnl_lock();
1318                ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
1319                rtnl_unlock();
1320                return ret;
1321
1322        case MRT6_DEL_MIF:
1323                if (optlen < sizeof(mifi_t))
1324                        return -EINVAL;
1325                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1326                        return -EFAULT;
1327                rtnl_lock();
1328                ret = mif6_delete(net, mifi);
1329                rtnl_unlock();
1330                return ret;
1331
1332        /*
1333         *      Manipulate the forwarding caches. These live
1334         *      in a sort of kernel/user symbiosis.
1335         */
1336        case MRT6_ADD_MFC:
1337        case MRT6_DEL_MFC:
1338                if (optlen < sizeof(mfc))
1339                        return -EINVAL;
1340                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1341                        return -EFAULT;
1342                rtnl_lock();
1343                if (optname == MRT6_DEL_MFC)
1344                        ret = ip6mr_mfc_delete(net, &mfc);
1345                else
1346                        ret = ip6mr_mfc_add(net, &mfc,
1347                                            sk == net->ipv6.mroute6_sk);
1348                rtnl_unlock();
1349                return ret;
1350
1351        /*
1352         *      Control PIM assert (to activate pim will activate assert)
1353         */
1354        case MRT6_ASSERT:
1355        {
1356                int v;
1357                if (get_user(v, (int __user *)optval))
1358                        return -EFAULT;
1359                net->ipv6.mroute_do_assert = !!v;
1360                return 0;
1361        }
1362
1363#ifdef CONFIG_IPV6_PIMSM_V2
1364        case MRT6_PIM:
1365        {
1366                int v;
1367                if (get_user(v, (int __user *)optval))
1368                        return -EFAULT;
1369                v = !!v;
1370                rtnl_lock();
1371                ret = 0;
1372                if (v != net->ipv6.mroute_do_pim) {
1373                        net->ipv6.mroute_do_pim = v;
1374                        net->ipv6.mroute_do_assert = v;
1375                }
1376                rtnl_unlock();
1377                return ret;
1378        }
1379
1380#endif
1381        /*
1382         *      Spurious command, or MRT6_VERSION which you cannot
1383         *      set.
1384         */
1385        default:
1386                return -ENOPROTOOPT;
1387        }
1388}
1389
1390/*
1391 *      Getsock opt support for the multicast routing system.
1392 */
1393
1394int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1395                          int __user *optlen)
1396{
1397        int olr;
1398        int val;
1399        struct net *net = sock_net(sk);
1400
1401        switch (optname) {
1402        case MRT6_VERSION:
1403                val = 0x0305;
1404                break;
1405#ifdef CONFIG_IPV6_PIMSM_V2
1406        case MRT6_PIM:
1407                val = net->ipv6.mroute_do_pim;
1408                break;
1409#endif
1410        case MRT6_ASSERT:
1411                val = net->ipv6.mroute_do_assert;
1412                break;
1413        default:
1414                return -ENOPROTOOPT;
1415        }
1416
1417        if (get_user(olr, optlen))
1418                return -EFAULT;
1419
1420        olr = min_t(int, olr, sizeof(int));
1421        if (olr < 0)
1422                return -EINVAL;
1423
1424        if (put_user(olr, optlen))
1425                return -EFAULT;
1426        if (copy_to_user(optval, &val, olr))
1427                return -EFAULT;
1428        return 0;
1429}
1430
1431/*
1432 *      The IP multicast ioctl support routines.
1433 */
1434
1435int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1436{
1437        struct sioc_sg_req6 sr;
1438        struct sioc_mif_req6 vr;
1439        struct mif_device *vif;
1440        struct mfc6_cache *c;
1441        struct net *net = sock_net(sk);
1442
1443        switch (cmd) {
1444        case SIOCGETMIFCNT_IN6:
1445                if (copy_from_user(&vr, arg, sizeof(vr)))
1446                        return -EFAULT;
1447                if (vr.mifi >= net->ipv6.maxvif)
1448                        return -EINVAL;
1449                read_lock(&mrt_lock);
1450                vif = &net->ipv6.vif6_table[vr.mifi];
1451                if (MIF_EXISTS(net, vr.mifi)) {
1452                        vr.icount = vif->pkt_in;
1453                        vr.ocount = vif->pkt_out;
1454                        vr.ibytes = vif->bytes_in;
1455                        vr.obytes = vif->bytes_out;
1456                        read_unlock(&mrt_lock);
1457
1458                        if (copy_to_user(arg, &vr, sizeof(vr)))
1459                                return -EFAULT;
1460                        return 0;
1461                }
1462                read_unlock(&mrt_lock);
1463                return -EADDRNOTAVAIL;
1464        case SIOCGETSGCNT_IN6:
1465                if (copy_from_user(&sr, arg, sizeof(sr)))
1466                        return -EFAULT;
1467
1468                read_lock(&mrt_lock);
1469                c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1470                if (c) {
1471                        sr.pktcnt = c->mfc_un.res.pkt;
1472                        sr.bytecnt = c->mfc_un.res.bytes;
1473                        sr.wrong_if = c->mfc_un.res.wrong_if;
1474                        read_unlock(&mrt_lock);
1475
1476                        if (copy_to_user(arg, &sr, sizeof(sr)))
1477                                return -EFAULT;
1478                        return 0;
1479                }
1480                read_unlock(&mrt_lock);
1481                return -EADDRNOTAVAIL;
1482        default:
1483                return -ENOIOCTLCMD;
1484        }
1485}
1486
1487
1488static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1489{
1490        IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1491                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1492        return dst_output(skb);
1493}
1494
1495/*
1496 *      Processing handlers for ip6mr_forward
1497 */
1498
1499static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1500{
1501        struct ipv6hdr *ipv6h;
1502        struct net *net = mfc6_net(c);
1503        struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1504        struct net_device *dev;
1505        struct dst_entry *dst;
1506        struct flowi fl;
1507
1508        if (vif->dev == NULL)
1509                goto out_free;
1510
1511#ifdef CONFIG_IPV6_PIMSM_V2
1512        if (vif->flags & MIFF_REGISTER) {
1513                vif->pkt_out++;
1514                vif->bytes_out += skb->len;
1515                vif->dev->stats.tx_bytes += skb->len;
1516                vif->dev->stats.tx_packets++;
1517                ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
1518                goto out_free;
1519        }
1520#endif
1521
1522        ipv6h = ipv6_hdr(skb);
1523
1524        fl = (struct flowi) {
1525                .oif = vif->link,
1526                .nl_u = { .ip6_u =
1527                                { .daddr = ipv6h->daddr, }
1528                }
1529        };
1530
1531        dst = ip6_route_output(net, NULL, &fl);
1532        if (!dst)
1533                goto out_free;
1534
1535        skb_dst_drop(skb);
1536        skb_dst_set(skb, dst);
1537
1538        /*
1539         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1540         * not only before forwarding, but after forwarding on all output
1541         * interfaces. It is clear, if mrouter runs a multicasting
1542         * program, it should receive packets not depending to what interface
1543         * program is joined.
1544         * If we will not make it, the program will have to join on all
1545         * interfaces. On the other hand, multihoming host (or router, but
1546         * not mrouter) cannot join to more than one interface - it will
1547         * result in receiving multiple packets.
1548         */
1549        dev = vif->dev;
1550        skb->dev = dev;
1551        vif->pkt_out++;
1552        vif->bytes_out += skb->len;
1553
1554        /* We are about to write */
1555        /* XXX: extension headers? */
1556        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1557                goto out_free;
1558
1559        ipv6h = ipv6_hdr(skb);
1560        ipv6h->hop_limit--;
1561
1562        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1563
1564        return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1565                       ip6mr_forward2_finish);
1566
1567out_free:
1568        kfree_skb(skb);
1569        return 0;
1570}
1571
1572static int ip6mr_find_vif(struct net_device *dev)
1573{
1574        struct net *net = dev_net(dev);
1575        int ct;
1576        for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
1577                if (net->ipv6.vif6_table[ct].dev == dev)
1578                        break;
1579        }
1580        return ct;
1581}
1582
1583static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1584{
1585        int psend = -1;
1586        int vif, ct;
1587        struct net *net = mfc6_net(cache);
1588
1589        vif = cache->mf6c_parent;
1590        cache->mfc_un.res.pkt++;
1591        cache->mfc_un.res.bytes += skb->len;
1592
1593        /*
1594         * Wrong interface: drop packet and (maybe) send PIM assert.
1595         */
1596        if (net->ipv6.vif6_table[vif].dev != skb->dev) {
1597                int true_vifi;
1598
1599                cache->mfc_un.res.wrong_if++;
1600                true_vifi = ip6mr_find_vif(skb->dev);
1601
1602                if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
1603                    /* pimsm uses asserts, when switching from RPT to SPT,
1604                       so that we cannot check that packet arrived on an oif.
1605                       It is bad, but otherwise we would need to move pretty
1606                       large chunk of pimd to kernel. Ough... --ANK
1607                     */
1608                    (net->ipv6.mroute_do_pim ||
1609                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1610                    time_after(jiffies,
1611                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1612                        cache->mfc_un.res.last_assert = jiffies;
1613                        ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
1614                }
1615                goto dont_forward;
1616        }
1617
1618        net->ipv6.vif6_table[vif].pkt_in++;
1619        net->ipv6.vif6_table[vif].bytes_in += skb->len;
1620
1621        /*
1622         *      Forward the frame
1623         */
1624        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1625                if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1626                        if (psend != -1) {
1627                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1628                                if (skb2)
1629                                        ip6mr_forward2(skb2, cache, psend);
1630                        }
1631                        psend = ct;
1632                }
1633        }
1634        if (psend != -1) {
1635                ip6mr_forward2(skb, cache, psend);
1636                return 0;
1637        }
1638
1639dont_forward:
1640        kfree_skb(skb);
1641        return 0;
1642}
1643
1644
1645/*
1646 *      Multicast packets for forwarding arrive here
1647 */
1648
1649int ip6_mr_input(struct sk_buff *skb)
1650{
1651        struct mfc6_cache *cache;
1652        struct net *net = dev_net(skb->dev);
1653
1654        read_lock(&mrt_lock);
1655        cache = ip6mr_cache_find(net,
1656                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1657
1658        /*
1659         *      No usable cache entry
1660         */
1661        if (cache == NULL) {
1662                int vif;
1663
1664                vif = ip6mr_find_vif(skb->dev);
1665                if (vif >= 0) {
1666                        int err = ip6mr_cache_unresolved(net, vif, skb);
1667                        read_unlock(&mrt_lock);
1668
1669                        return err;
1670                }
1671                read_unlock(&mrt_lock);
1672                kfree_skb(skb);
1673                return -ENODEV;
1674        }
1675
1676        ip6_mr_forward(skb, cache);
1677
1678        read_unlock(&mrt_lock);
1679
1680        return 0;
1681}
1682
1683
1684static int
1685ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1686{
1687        int ct;
1688        struct rtnexthop *nhp;
1689        struct net *net = mfc6_net(c);
1690        struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev;
1691        u8 *b = skb_tail_pointer(skb);
1692        struct rtattr *mp_head;
1693
1694        if (dev)
1695                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1696
1697        mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1698
1699        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1700                if (c->mfc_un.res.ttls[ct] < 255) {
1701                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1702                                goto rtattr_failure;
1703                        nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1704                        nhp->rtnh_flags = 0;
1705                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1706                        nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
1707                        nhp->rtnh_len = sizeof(*nhp);
1708                }
1709        }
1710        mp_head->rta_type = RTA_MULTIPATH;
1711        mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1712        rtm->rtm_type = RTN_MULTICAST;
1713        return 1;
1714
1715rtattr_failure:
1716        nlmsg_trim(skb, b);
1717        return -EMSGSIZE;
1718}
1719
1720int ip6mr_get_route(struct net *net,
1721                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1722{
1723        int err;
1724        struct mfc6_cache *cache;
1725        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1726
1727        read_lock(&mrt_lock);
1728        cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1729
1730        if (!cache) {
1731                struct sk_buff *skb2;
1732                struct ipv6hdr *iph;
1733                struct net_device *dev;
1734                int vif;
1735
1736                if (nowait) {
1737                        read_unlock(&mrt_lock);
1738                        return -EAGAIN;
1739                }
1740
1741                dev = skb->dev;
1742                if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1743                        read_unlock(&mrt_lock);
1744                        return -ENODEV;
1745                }
1746
1747                /* really correct? */
1748                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1749                if (!skb2) {
1750                        read_unlock(&mrt_lock);
1751                        return -ENOMEM;
1752                }
1753
1754                skb_reset_transport_header(skb2);
1755
1756                skb_put(skb2, sizeof(struct ipv6hdr));
1757                skb_reset_network_header(skb2);
1758
1759                iph = ipv6_hdr(skb2);
1760                iph->version = 0;
1761                iph->priority = 0;
1762                iph->flow_lbl[0] = 0;
1763                iph->flow_lbl[1] = 0;
1764                iph->flow_lbl[2] = 0;
1765                iph->payload_len = 0;
1766                iph->nexthdr = IPPROTO_NONE;
1767                iph->hop_limit = 0;
1768                ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1769                ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1770
1771                err = ip6mr_cache_unresolved(net, vif, skb2);
1772                read_unlock(&mrt_lock);
1773
1774                return err;
1775        }
1776
1777        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1778                cache->mfc_flags |= MFC_NOTIFY;
1779
1780        err = ip6mr_fill_mroute(skb, cache, rtm);
1781        read_unlock(&mrt_lock);
1782        return err;
1783}
1784
1785