linux/net/ipv4/ipmr.c
<<
>>
Prefs
   1/*
   2 *      IP multicast routing support for mrouted 3.6/3.8
   3 *
   4 *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *        Linux Consultancy and Custom Driver Development
   6 *
   7 *      This program is free software; you can redistribute it and/or
   8 *      modify it under the terms of the GNU General Public License
   9 *      as published by the Free Software Foundation; either version
  10 *      2 of the License, or (at your option) any later version.
  11 *
  12 *      Fixes:
  13 *      Michael Chastain        :       Incorrect size of copying.
  14 *      Alan Cox                :       Added the cache manager code
  15 *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16 *      Mike McLagan            :       Routing by source
  17 *      Malcolm Beattie         :       Buffer handling fixes.
  18 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19 *      SVR Anand               :       Fixed several multicast bugs and problems.
  20 *      Alexey Kuznetsov        :       Status, optimisations and more.
  21 *      Brad Parker             :       Better behaviour on mrouted upcall
  22 *                                      overflow.
  23 *      Carlos Picoto           :       PIMv1 Support
  24 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25 *                                      Relax this requrement to work with older peers.
  26 *
  27 */
  28
  29#include <asm/system.h>
  30#include <asm/uaccess.h>
  31#include <linux/types.h>
  32#include <linux/capability.h>
  33#include <linux/errno.h>
  34#include <linux/timer.h>
  35#include <linux/mm.h>
  36#include <linux/kernel.h>
  37#include <linux/fcntl.h>
  38#include <linux/stat.h>
  39#include <linux/socket.h>
  40#include <linux/in.h>
  41#include <linux/inet.h>
  42#include <linux/netdevice.h>
  43#include <linux/inetdevice.h>
  44#include <linux/igmp.h>
  45#include <linux/proc_fs.h>
  46#include <linux/seq_file.h>
  47#include <linux/mroute.h>
  48#include <linux/init.h>
  49#include <linux/if_ether.h>
  50#include <net/net_namespace.h>
  51#include <net/ip.h>
  52#include <net/protocol.h>
  53#include <linux/skbuff.h>
  54#include <net/route.h>
  55#include <net/sock.h>
  56#include <net/icmp.h>
  57#include <net/udp.h>
  58#include <net/raw.h>
  59#include <linux/notifier.h>
  60#include <linux/if_arp.h>
  61#include <linux/netfilter_ipv4.h>
  62#include <net/ipip.h>
  63#include <net/checksum.h>
  64#include <net/netlink.h>
  65
  66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67#define CONFIG_IP_PIMSM 1
  68#endif
  69
  70/* Big lock, protecting vif table, mrt cache and mroute socket state.
  71   Note that the changes are semaphored via rtnl_lock.
  72 */
  73
  74static DEFINE_RWLOCK(mrt_lock);
  75
  76/*
  77 *      Multicast router control variables
  78 */
  79
  80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  81
  82static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  83
  84/* Special spinlock for queue of unresolved entries */
  85static DEFINE_SPINLOCK(mfc_unres_lock);
  86
  87/* We return to original Alan's scheme. Hash table of resolved
  88   entries is changed only in process context and protected
  89   with weak lock mrt_lock. Queue of unresolved entries is protected
  90   with strong spinlock mfc_unres_lock.
  91
  92   In this case data path is free of exclusive locks at all.
  93 */
  94
  95static struct kmem_cache *mrt_cachep __read_mostly;
  96
  97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  98static int ipmr_cache_report(struct net *net,
  99                             struct sk_buff *pkt, vifi_t vifi, int assert);
 100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 101
 102static struct timer_list ipmr_expire_timer;
 103
 104/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 105
 106static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 107{
 108        struct net *net = dev_net(dev);
 109
 110        dev_close(dev);
 111
 112        dev = __dev_get_by_name(net, "tunl0");
 113        if (dev) {
 114                const struct net_device_ops *ops = dev->netdev_ops;
 115                struct ifreq ifr;
 116                struct ip_tunnel_parm p;
 117
 118                memset(&p, 0, sizeof(p));
 119                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 120                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 121                p.iph.version = 4;
 122                p.iph.ihl = 5;
 123                p.iph.protocol = IPPROTO_IPIP;
 124                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 125                ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 126
 127                if (ops->ndo_do_ioctl) {
 128                        mm_segment_t oldfs = get_fs();
 129
 130                        set_fs(KERNEL_DS);
 131                        ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 132                        set_fs(oldfs);
 133                }
 134        }
 135}
 136
 137static
 138struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 139{
 140        struct net_device  *dev;
 141
 142        dev = __dev_get_by_name(net, "tunl0");
 143
 144        if (dev) {
 145                const struct net_device_ops *ops = dev->netdev_ops;
 146                int err;
 147                struct ifreq ifr;
 148                struct ip_tunnel_parm p;
 149                struct in_device  *in_dev;
 150
 151                memset(&p, 0, sizeof(p));
 152                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 153                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 154                p.iph.version = 4;
 155                p.iph.ihl = 5;
 156                p.iph.protocol = IPPROTO_IPIP;
 157                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 158                ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 159
 160                if (ops->ndo_do_ioctl) {
 161                        mm_segment_t oldfs = get_fs();
 162
 163                        set_fs(KERNEL_DS);
 164                        err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 165                        set_fs(oldfs);
 166                } else
 167                        err = -EOPNOTSUPP;
 168
 169                dev = NULL;
 170
 171                if (err == 0 &&
 172                    (dev = __dev_get_by_name(net, p.name)) != NULL) {
 173                        dev->flags |= IFF_MULTICAST;
 174
 175                        in_dev = __in_dev_get_rtnl(dev);
 176                        if (in_dev == NULL)
 177                                goto failure;
 178
 179                        ipv4_devconf_setall(in_dev);
 180                        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 181
 182                        if (dev_open(dev))
 183                                goto failure;
 184                        dev_hold(dev);
 185                }
 186        }
 187        return dev;
 188
 189failure:
 190        /* allow the register to be completed before unregistering. */
 191        rtnl_unlock();
 192        rtnl_lock();
 193
 194        unregister_netdevice(dev);
 195        return NULL;
 196}
 197
 198#ifdef CONFIG_IP_PIMSM
 199
 200static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 201{
 202        struct net *net = dev_net(dev);
 203
 204        read_lock(&mrt_lock);
 205        dev->stats.tx_bytes += skb->len;
 206        dev->stats.tx_packets++;
 207        ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 208                          IGMPMSG_WHOLEPKT);
 209        read_unlock(&mrt_lock);
 210        kfree_skb(skb);
 211        return NETDEV_TX_OK;
 212}
 213
 214static const struct net_device_ops reg_vif_netdev_ops = {
 215        .ndo_start_xmit = reg_vif_xmit,
 216};
 217
 218static void reg_vif_setup(struct net_device *dev)
 219{
 220        dev->type               = ARPHRD_PIMREG;
 221        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 222        dev->flags              = IFF_NOARP;
 223        dev->netdev_ops         = &reg_vif_netdev_ops,
 224        dev->destructor         = free_netdev;
 225        dev->features           |= NETIF_F_NETNS_LOCAL;
 226}
 227
 228static struct net_device *ipmr_reg_vif(struct net *net)
 229{
 230        struct net_device *dev;
 231        struct in_device *in_dev;
 232
 233        dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 234
 235        if (dev == NULL)
 236                return NULL;
 237
 238        dev_net_set(dev, net);
 239
 240        if (register_netdevice(dev)) {
 241                free_netdev(dev);
 242                return NULL;
 243        }
 244        dev->iflink = 0;
 245
 246        rcu_read_lock();
 247        if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 248                rcu_read_unlock();
 249                goto failure;
 250        }
 251
 252        ipv4_devconf_setall(in_dev);
 253        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 254        rcu_read_unlock();
 255
 256        if (dev_open(dev))
 257                goto failure;
 258
 259        dev_hold(dev);
 260
 261        return dev;
 262
 263failure:
 264        /* allow the register to be completed before unregistering. */
 265        rtnl_unlock();
 266        rtnl_lock();
 267
 268        unregister_netdevice(dev);
 269        return NULL;
 270}
 271#endif
 272
 273/*
 274 *      Delete a VIF entry
 275 *      @notify: Set to 1, if the caller is a notifier_call
 276 */
 277
 278static int vif_delete(struct net *net, int vifi, int notify)
 279{
 280        struct vif_device *v;
 281        struct net_device *dev;
 282        struct in_device *in_dev;
 283
 284        if (vifi < 0 || vifi >= net->ipv4.maxvif)
 285                return -EADDRNOTAVAIL;
 286
 287        v = &net->ipv4.vif_table[vifi];
 288
 289        write_lock_bh(&mrt_lock);
 290        dev = v->dev;
 291        v->dev = NULL;
 292
 293        if (!dev) {
 294                write_unlock_bh(&mrt_lock);
 295                return -EADDRNOTAVAIL;
 296        }
 297
 298#ifdef CONFIG_IP_PIMSM
 299        if (vifi == net->ipv4.mroute_reg_vif_num)
 300                net->ipv4.mroute_reg_vif_num = -1;
 301#endif
 302
 303        if (vifi+1 == net->ipv4.maxvif) {
 304                int tmp;
 305                for (tmp=vifi-1; tmp>=0; tmp--) {
 306                        if (VIF_EXISTS(net, tmp))
 307                                break;
 308                }
 309                net->ipv4.maxvif = tmp+1;
 310        }
 311
 312        write_unlock_bh(&mrt_lock);
 313
 314        dev_set_allmulti(dev, -1);
 315
 316        if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 317                IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 318                ip_rt_multicast_event(in_dev);
 319        }
 320
 321        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 322                unregister_netdevice(dev);
 323
 324        dev_put(dev);
 325        return 0;
 326}
 327
 328static inline void ipmr_cache_free(struct mfc_cache *c)
 329{
 330        release_net(mfc_net(c));
 331        kmem_cache_free(mrt_cachep, c);
 332}
 333
 334/* Destroy an unresolved cache entry, killing queued skbs
 335   and reporting error to netlink readers.
 336 */
 337
 338static void ipmr_destroy_unres(struct mfc_cache *c)
 339{
 340        struct sk_buff *skb;
 341        struct nlmsgerr *e;
 342        struct net *net = mfc_net(c);
 343
 344        atomic_dec(&net->ipv4.cache_resolve_queue_len);
 345
 346        while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 347                if (ip_hdr(skb)->version == 0) {
 348                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 349                        nlh->nlmsg_type = NLMSG_ERROR;
 350                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 351                        skb_trim(skb, nlh->nlmsg_len);
 352                        e = NLMSG_DATA(nlh);
 353                        e->error = -ETIMEDOUT;
 354                        memset(&e->msg, 0, sizeof(e->msg));
 355
 356                        rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 357                } else
 358                        kfree_skb(skb);
 359        }
 360
 361        ipmr_cache_free(c);
 362}
 363
 364
 365/* Single timer process for all the unresolved queue. */
 366
 367static void ipmr_expire_process(unsigned long dummy)
 368{
 369        unsigned long now;
 370        unsigned long expires;
 371        struct mfc_cache *c, **cp;
 372
 373        if (!spin_trylock(&mfc_unres_lock)) {
 374                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 375                return;
 376        }
 377
 378        if (mfc_unres_queue == NULL)
 379                goto out;
 380
 381        now = jiffies;
 382        expires = 10*HZ;
 383        cp = &mfc_unres_queue;
 384
 385        while ((c=*cp) != NULL) {
 386                if (time_after(c->mfc_un.unres.expires, now)) {
 387                        unsigned long interval = c->mfc_un.unres.expires - now;
 388                        if (interval < expires)
 389                                expires = interval;
 390                        cp = &c->next;
 391                        continue;
 392                }
 393
 394                *cp = c->next;
 395
 396                ipmr_destroy_unres(c);
 397        }
 398
 399        if (mfc_unres_queue != NULL)
 400                mod_timer(&ipmr_expire_timer, jiffies + expires);
 401
 402out:
 403        spin_unlock(&mfc_unres_lock);
 404}
 405
 406/* Fill oifs list. It is called under write locked mrt_lock. */
 407
 408static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 409{
 410        int vifi;
 411        struct net *net = mfc_net(cache);
 412
 413        cache->mfc_un.res.minvif = MAXVIFS;
 414        cache->mfc_un.res.maxvif = 0;
 415        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 416
 417        for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 418                if (VIF_EXISTS(net, vifi) &&
 419                    ttls[vifi] && ttls[vifi] < 255) {
 420                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 421                        if (cache->mfc_un.res.minvif > vifi)
 422                                cache->mfc_un.res.minvif = vifi;
 423                        if (cache->mfc_un.res.maxvif <= vifi)
 424                                cache->mfc_un.res.maxvif = vifi + 1;
 425                }
 426        }
 427}
 428
 429static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 430{
 431        int vifi = vifc->vifc_vifi;
 432        struct vif_device *v = &net->ipv4.vif_table[vifi];
 433        struct net_device *dev;
 434        struct in_device *in_dev;
 435        int err;
 436
 437        /* Is vif busy ? */
 438        if (VIF_EXISTS(net, vifi))
 439                return -EADDRINUSE;
 440
 441        switch (vifc->vifc_flags) {
 442#ifdef CONFIG_IP_PIMSM
 443        case VIFF_REGISTER:
 444                /*
 445                 * Special Purpose VIF in PIM
 446                 * All the packets will be sent to the daemon
 447                 */
 448                if (net->ipv4.mroute_reg_vif_num >= 0)
 449                        return -EADDRINUSE;
 450                dev = ipmr_reg_vif(net);
 451                if (!dev)
 452                        return -ENOBUFS;
 453                err = dev_set_allmulti(dev, 1);
 454                if (err) {
 455                        unregister_netdevice(dev);
 456                        dev_put(dev);
 457                        return err;
 458                }
 459                break;
 460#endif
 461        case VIFF_TUNNEL:
 462                dev = ipmr_new_tunnel(net, vifc);
 463                if (!dev)
 464                        return -ENOBUFS;
 465                err = dev_set_allmulti(dev, 1);
 466                if (err) {
 467                        ipmr_del_tunnel(dev, vifc);
 468                        dev_put(dev);
 469                        return err;
 470                }
 471                break;
 472        case 0:
 473                dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 474                if (!dev)
 475                        return -EADDRNOTAVAIL;
 476                err = dev_set_allmulti(dev, 1);
 477                if (err) {
 478                        dev_put(dev);
 479                        return err;
 480                }
 481                break;
 482        default:
 483                return -EINVAL;
 484        }
 485
 486        if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
 487                dev_put(dev);
 488                return -EADDRNOTAVAIL;
 489        }
 490        IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 491        ip_rt_multicast_event(in_dev);
 492
 493        /*
 494         *      Fill in the VIF structures
 495         */
 496        v->rate_limit = vifc->vifc_rate_limit;
 497        v->local = vifc->vifc_lcl_addr.s_addr;
 498        v->remote = vifc->vifc_rmt_addr.s_addr;
 499        v->flags = vifc->vifc_flags;
 500        if (!mrtsock)
 501                v->flags |= VIFF_STATIC;
 502        v->threshold = vifc->vifc_threshold;
 503        v->bytes_in = 0;
 504        v->bytes_out = 0;
 505        v->pkt_in = 0;
 506        v->pkt_out = 0;
 507        v->link = dev->ifindex;
 508        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 509                v->link = dev->iflink;
 510
 511        /* And finish update writing critical data */
 512        write_lock_bh(&mrt_lock);
 513        v->dev = dev;
 514#ifdef CONFIG_IP_PIMSM
 515        if (v->flags&VIFF_REGISTER)
 516                net->ipv4.mroute_reg_vif_num = vifi;
 517#endif
 518        if (vifi+1 > net->ipv4.maxvif)
 519                net->ipv4.maxvif = vifi+1;
 520        write_unlock_bh(&mrt_lock);
 521        return 0;
 522}
 523
 524static struct mfc_cache *ipmr_cache_find(struct net *net,
 525                                         __be32 origin,
 526                                         __be32 mcastgrp)
 527{
 528        int line = MFC_HASH(mcastgrp, origin);
 529        struct mfc_cache *c;
 530
 531        for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 532                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 533                        break;
 534        }
 535        return c;
 536}
 537
 538/*
 539 *      Allocate a multicast cache entry
 540 */
 541static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 542{
 543        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 544        if (c == NULL)
 545                return NULL;
 546        c->mfc_un.res.minvif = MAXVIFS;
 547        mfc_net_set(c, net);
 548        return c;
 549}
 550
 551static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 552{
 553        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 554        if (c == NULL)
 555                return NULL;
 556        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 557        c->mfc_un.unres.expires = jiffies + 10*HZ;
 558        mfc_net_set(c, net);
 559        return c;
 560}
 561
 562/*
 563 *      A cache entry has gone into a resolved state from queued
 564 */
 565
 566static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 567{
 568        struct sk_buff *skb;
 569        struct nlmsgerr *e;
 570
 571        /*
 572         *      Play the pending entries through our router
 573         */
 574
 575        while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 576                if (ip_hdr(skb)->version == 0) {
 577                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 578
 579                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 580                                nlh->nlmsg_len = (skb_tail_pointer(skb) -
 581                                                  (u8 *)nlh);
 582                        } else {
 583                                nlh->nlmsg_type = NLMSG_ERROR;
 584                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 585                                skb_trim(skb, nlh->nlmsg_len);
 586                                e = NLMSG_DATA(nlh);
 587                                e->error = -EMSGSIZE;
 588                                memset(&e->msg, 0, sizeof(e->msg));
 589                        }
 590
 591                        rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 592                } else
 593                        ip_mr_forward(skb, c, 0);
 594        }
 595}
 596
 597/*
 598 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 599 *      expects the following bizarre scheme.
 600 *
 601 *      Called under mrt_lock.
 602 */
 603
 604static int ipmr_cache_report(struct net *net,
 605                             struct sk_buff *pkt, vifi_t vifi, int assert)
 606{
 607        struct sk_buff *skb;
 608        const int ihl = ip_hdrlen(pkt);
 609        struct igmphdr *igmp;
 610        struct igmpmsg *msg;
 611        int ret;
 612
 613#ifdef CONFIG_IP_PIMSM
 614        if (assert == IGMPMSG_WHOLEPKT)
 615                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 616        else
 617#endif
 618                skb = alloc_skb(128, GFP_ATOMIC);
 619
 620        if (!skb)
 621                return -ENOBUFS;
 622
 623#ifdef CONFIG_IP_PIMSM
 624        if (assert == IGMPMSG_WHOLEPKT) {
 625                /* Ugly, but we have no choice with this interface.
 626                   Duplicate old header, fix ihl, length etc.
 627                   And all this only to mangle msg->im_msgtype and
 628                   to set msg->im_mbz to "mbz" :-)
 629                 */
 630                skb_push(skb, sizeof(struct iphdr));
 631                skb_reset_network_header(skb);
 632                skb_reset_transport_header(skb);
 633                msg = (struct igmpmsg *)skb_network_header(skb);
 634                memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 635                msg->im_msgtype = IGMPMSG_WHOLEPKT;
 636                msg->im_mbz = 0;
 637                msg->im_vif = net->ipv4.mroute_reg_vif_num;
 638                ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 639                ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 640                                             sizeof(struct iphdr));
 641        } else
 642#endif
 643        {
 644
 645        /*
 646         *      Copy the IP header
 647         */
 648
 649        skb->network_header = skb->tail;
 650        skb_put(skb, ihl);
 651        skb_copy_to_linear_data(skb, pkt->data, ihl);
 652        ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 653        msg = (struct igmpmsg *)skb_network_header(skb);
 654        msg->im_vif = vifi;
 655        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 656
 657        /*
 658         *      Add our header
 659         */
 660
 661        igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 662        igmp->type      =
 663        msg->im_msgtype = assert;
 664        igmp->code      =       0;
 665        ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 666        skb->transport_header = skb->network_header;
 667        }
 668
 669        if (net->ipv4.mroute_sk == NULL) {
 670                kfree_skb(skb);
 671                return -EINVAL;
 672        }
 673
 674        /*
 675         *      Deliver to mrouted
 676         */
 677        ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 678        if (ret < 0) {
 679                if (net_ratelimit())
 680                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 681                kfree_skb(skb);
 682        }
 683
 684        return ret;
 685}
 686
 687/*
 688 *      Queue a packet for resolution. It gets locked cache entry!
 689 */
 690
 691static int
 692ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 693{
 694        int err;
 695        struct mfc_cache *c;
 696        const struct iphdr *iph = ip_hdr(skb);
 697
 698        spin_lock_bh(&mfc_unres_lock);
 699        for (c=mfc_unres_queue; c; c=c->next) {
 700                if (net_eq(mfc_net(c), net) &&
 701                    c->mfc_mcastgrp == iph->daddr &&
 702                    c->mfc_origin == iph->saddr)
 703                        break;
 704        }
 705
 706        if (c == NULL) {
 707                /*
 708                 *      Create a new entry if allowable
 709                 */
 710
 711                if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 712                    (c = ipmr_cache_alloc_unres(net)) == NULL) {
 713                        spin_unlock_bh(&mfc_unres_lock);
 714
 715                        kfree_skb(skb);
 716                        return -ENOBUFS;
 717                }
 718
 719                /*
 720                 *      Fill in the new cache entry
 721                 */
 722                c->mfc_parent   = -1;
 723                c->mfc_origin   = iph->saddr;
 724                c->mfc_mcastgrp = iph->daddr;
 725
 726                /*
 727                 *      Reflect first query at mrouted.
 728                 */
 729                err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 730                if (err < 0) {
 731                        /* If the report failed throw the cache entry
 732                           out - Brad Parker
 733                         */
 734                        spin_unlock_bh(&mfc_unres_lock);
 735
 736                        ipmr_cache_free(c);
 737                        kfree_skb(skb);
 738                        return err;
 739                }
 740
 741                atomic_inc(&net->ipv4.cache_resolve_queue_len);
 742                c->next = mfc_unres_queue;
 743                mfc_unres_queue = c;
 744
 745                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 746        }
 747
 748        /*
 749         *      See if we can append the packet
 750         */
 751        if (c->mfc_un.unres.unresolved.qlen>3) {
 752                kfree_skb(skb);
 753                err = -ENOBUFS;
 754        } else {
 755                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 756                err = 0;
 757        }
 758
 759        spin_unlock_bh(&mfc_unres_lock);
 760        return err;
 761}
 762
 763/*
 764 *      MFC cache manipulation by user space mroute daemon
 765 */
 766
 767static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 768{
 769        int line;
 770        struct mfc_cache *c, **cp;
 771
 772        line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 773
 774        for (cp = &net->ipv4.mfc_cache_array[line];
 775             (c = *cp) != NULL; cp = &c->next) {
 776                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 777                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 778                        write_lock_bh(&mrt_lock);
 779                        *cp = c->next;
 780                        write_unlock_bh(&mrt_lock);
 781
 782                        ipmr_cache_free(c);
 783                        return 0;
 784                }
 785        }
 786        return -ENOENT;
 787}
 788
 789static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 790{
 791        int line;
 792        struct mfc_cache *uc, *c, **cp;
 793
 794        line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 795
 796        for (cp = &net->ipv4.mfc_cache_array[line];
 797             (c = *cp) != NULL; cp = &c->next) {
 798                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 799                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 800                        break;
 801        }
 802
 803        if (c != NULL) {
 804                write_lock_bh(&mrt_lock);
 805                c->mfc_parent = mfc->mfcc_parent;
 806                ipmr_update_thresholds(c, mfc->mfcc_ttls);
 807                if (!mrtsock)
 808                        c->mfc_flags |= MFC_STATIC;
 809                write_unlock_bh(&mrt_lock);
 810                return 0;
 811        }
 812
 813        if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 814                return -EINVAL;
 815
 816        c = ipmr_cache_alloc(net);
 817        if (c == NULL)
 818                return -ENOMEM;
 819
 820        c->mfc_origin = mfc->mfcc_origin.s_addr;
 821        c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 822        c->mfc_parent = mfc->mfcc_parent;
 823        ipmr_update_thresholds(c, mfc->mfcc_ttls);
 824        if (!mrtsock)
 825                c->mfc_flags |= MFC_STATIC;
 826
 827        write_lock_bh(&mrt_lock);
 828        c->next = net->ipv4.mfc_cache_array[line];
 829        net->ipv4.mfc_cache_array[line] = c;
 830        write_unlock_bh(&mrt_lock);
 831
 832        /*
 833         *      Check to see if we resolved a queued list. If so we
 834         *      need to send on the frames and tidy up.
 835         */
 836        spin_lock_bh(&mfc_unres_lock);
 837        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 838             cp = &uc->next) {
 839                if (net_eq(mfc_net(uc), net) &&
 840                    uc->mfc_origin == c->mfc_origin &&
 841                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 842                        *cp = uc->next;
 843                        atomic_dec(&net->ipv4.cache_resolve_queue_len);
 844                        break;
 845                }
 846        }
 847        if (mfc_unres_queue == NULL)
 848                del_timer(&ipmr_expire_timer);
 849        spin_unlock_bh(&mfc_unres_lock);
 850
 851        if (uc) {
 852                ipmr_cache_resolve(uc, c);
 853                ipmr_cache_free(uc);
 854        }
 855        return 0;
 856}
 857
 858/*
 859 *      Close the multicast socket, and clear the vif tables etc
 860 */
 861
 862static void mroute_clean_tables(struct net *net)
 863{
 864        int i;
 865
 866        /*
 867         *      Shut down all active vif entries
 868         */
 869        for (i = 0; i < net->ipv4.maxvif; i++) {
 870                if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 871                        vif_delete(net, i, 0);
 872        }
 873
 874        /*
 875         *      Wipe the cache
 876         */
 877        for (i=0; i<MFC_LINES; i++) {
 878                struct mfc_cache *c, **cp;
 879
 880                cp = &net->ipv4.mfc_cache_array[i];
 881                while ((c = *cp) != NULL) {
 882                        if (c->mfc_flags&MFC_STATIC) {
 883                                cp = &c->next;
 884                                continue;
 885                        }
 886                        write_lock_bh(&mrt_lock);
 887                        *cp = c->next;
 888                        write_unlock_bh(&mrt_lock);
 889
 890                        ipmr_cache_free(c);
 891                }
 892        }
 893
 894        if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 895                struct mfc_cache *c, **cp;
 896
 897                spin_lock_bh(&mfc_unres_lock);
 898                cp = &mfc_unres_queue;
 899                while ((c = *cp) != NULL) {
 900                        if (!net_eq(mfc_net(c), net)) {
 901                                cp = &c->next;
 902                                continue;
 903                        }
 904                        *cp = c->next;
 905
 906                        ipmr_destroy_unres(c);
 907                }
 908                spin_unlock_bh(&mfc_unres_lock);
 909        }
 910}
 911
 912static void mrtsock_destruct(struct sock *sk)
 913{
 914        struct net *net = sock_net(sk);
 915
 916        rtnl_lock();
 917        if (sk == net->ipv4.mroute_sk) {
 918                IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 919
 920                write_lock_bh(&mrt_lock);
 921                net->ipv4.mroute_sk = NULL;
 922                write_unlock_bh(&mrt_lock);
 923
 924                mroute_clean_tables(net);
 925        }
 926        rtnl_unlock();
 927}
 928
 929/*
 930 *      Socket options and virtual interface manipulation. The whole
 931 *      virtual interface system is a complete heap, but unfortunately
 932 *      that's how BSD mrouted happens to think. Maybe one day with a proper
 933 *      MOSPF/PIM router set up we can clean this up.
 934 */
 935
 936int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 937{
 938        int ret;
 939        struct vifctl vif;
 940        struct mfcctl mfc;
 941        struct net *net = sock_net(sk);
 942
 943        if (optname != MRT_INIT) {
 944                if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 945                        return -EACCES;
 946        }
 947
 948        switch (optname) {
 949        case MRT_INIT:
 950                if (sk->sk_type != SOCK_RAW ||
 951                    inet_sk(sk)->num != IPPROTO_IGMP)
 952                        return -EOPNOTSUPP;
 953                if (optlen != sizeof(int))
 954                        return -ENOPROTOOPT;
 955
 956                rtnl_lock();
 957                if (net->ipv4.mroute_sk) {
 958                        rtnl_unlock();
 959                        return -EADDRINUSE;
 960                }
 961
 962                ret = ip_ra_control(sk, 1, mrtsock_destruct);
 963                if (ret == 0) {
 964                        write_lock_bh(&mrt_lock);
 965                        net->ipv4.mroute_sk = sk;
 966                        write_unlock_bh(&mrt_lock);
 967
 968                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 969                }
 970                rtnl_unlock();
 971                return ret;
 972        case MRT_DONE:
 973                if (sk != net->ipv4.mroute_sk)
 974                        return -EACCES;
 975                return ip_ra_control(sk, 0, NULL);
 976        case MRT_ADD_VIF:
 977        case MRT_DEL_VIF:
 978                if (optlen != sizeof(vif))
 979                        return -EINVAL;
 980                if (copy_from_user(&vif, optval, sizeof(vif)))
 981                        return -EFAULT;
 982                if (vif.vifc_vifi >= MAXVIFS)
 983                        return -ENFILE;
 984                rtnl_lock();
 985                if (optname == MRT_ADD_VIF) {
 986                        ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 987                } else {
 988                        ret = vif_delete(net, vif.vifc_vifi, 0);
 989                }
 990                rtnl_unlock();
 991                return ret;
 992
 993                /*
 994                 *      Manipulate the forwarding caches. These live
 995                 *      in a sort of kernel/user symbiosis.
 996                 */
 997        case MRT_ADD_MFC:
 998        case MRT_DEL_MFC:
 999                if (optlen != sizeof(mfc))
1000                        return -EINVAL;
1001                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1002                        return -EFAULT;
1003                rtnl_lock();
1004                if (optname == MRT_DEL_MFC)
1005                        ret = ipmr_mfc_delete(net, &mfc);
1006                else
1007                        ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1008                rtnl_unlock();
1009                return ret;
1010                /*
1011                 *      Control PIM assert.
1012                 */
1013        case MRT_ASSERT:
1014        {
1015                int v;
1016                if (get_user(v,(int __user *)optval))
1017                        return -EFAULT;
1018                net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1019                return 0;
1020        }
1021#ifdef CONFIG_IP_PIMSM
1022        case MRT_PIM:
1023        {
1024                int v;
1025
1026                if (get_user(v,(int __user *)optval))
1027                        return -EFAULT;
1028                v = (v) ? 1 : 0;
1029
1030                rtnl_lock();
1031                ret = 0;
1032                if (v != net->ipv4.mroute_do_pim) {
1033                        net->ipv4.mroute_do_pim = v;
1034                        net->ipv4.mroute_do_assert = v;
1035                }
1036                rtnl_unlock();
1037                return ret;
1038        }
1039#endif
1040        /*
1041         *      Spurious command, or MRT_VERSION which you cannot
1042         *      set.
1043         */
1044        default:
1045                return -ENOPROTOOPT;
1046        }
1047}
1048
1049/*
1050 *      Getsock opt support for the multicast routing system.
1051 */
1052
1053int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1054{
1055        int olr;
1056        int val;
1057        struct net *net = sock_net(sk);
1058
1059        if (optname != MRT_VERSION &&
1060#ifdef CONFIG_IP_PIMSM
1061           optname!=MRT_PIM &&
1062#endif
1063           optname!=MRT_ASSERT)
1064                return -ENOPROTOOPT;
1065
1066        if (get_user(olr, optlen))
1067                return -EFAULT;
1068
1069        olr = min_t(unsigned int, olr, sizeof(int));
1070        if (olr < 0)
1071                return -EINVAL;
1072
1073        if (put_user(olr, optlen))
1074                return -EFAULT;
1075        if (optname == MRT_VERSION)
1076                val = 0x0305;
1077#ifdef CONFIG_IP_PIMSM
1078        else if (optname == MRT_PIM)
1079                val = net->ipv4.mroute_do_pim;
1080#endif
1081        else
1082                val = net->ipv4.mroute_do_assert;
1083        if (copy_to_user(optval, &val, olr))
1084                return -EFAULT;
1085        return 0;
1086}
1087
1088/*
1089 *      The IP multicast ioctl support routines.
1090 */
1091
1092int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1093{
1094        struct sioc_sg_req sr;
1095        struct sioc_vif_req vr;
1096        struct vif_device *vif;
1097        struct mfc_cache *c;
1098        struct net *net = sock_net(sk);
1099
1100        switch (cmd) {
1101        case SIOCGETVIFCNT:
1102                if (copy_from_user(&vr, arg, sizeof(vr)))
1103                        return -EFAULT;
1104                if (vr.vifi >= net->ipv4.maxvif)
1105                        return -EINVAL;
1106                read_lock(&mrt_lock);
1107                vif = &net->ipv4.vif_table[vr.vifi];
1108                if (VIF_EXISTS(net, vr.vifi)) {
1109                        vr.icount = vif->pkt_in;
1110                        vr.ocount = vif->pkt_out;
1111                        vr.ibytes = vif->bytes_in;
1112                        vr.obytes = vif->bytes_out;
1113                        read_unlock(&mrt_lock);
1114
1115                        if (copy_to_user(arg, &vr, sizeof(vr)))
1116                                return -EFAULT;
1117                        return 0;
1118                }
1119                read_unlock(&mrt_lock);
1120                return -EADDRNOTAVAIL;
1121        case SIOCGETSGCNT:
1122                if (copy_from_user(&sr, arg, sizeof(sr)))
1123                        return -EFAULT;
1124
1125                read_lock(&mrt_lock);
1126                c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1127                if (c) {
1128                        sr.pktcnt = c->mfc_un.res.pkt;
1129                        sr.bytecnt = c->mfc_un.res.bytes;
1130                        sr.wrong_if = c->mfc_un.res.wrong_if;
1131                        read_unlock(&mrt_lock);
1132
1133                        if (copy_to_user(arg, &sr, sizeof(sr)))
1134                                return -EFAULT;
1135                        return 0;
1136                }
1137                read_unlock(&mrt_lock);
1138                return -EADDRNOTAVAIL;
1139        default:
1140                return -ENOIOCTLCMD;
1141        }
1142}
1143
1144
1145static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1146{
1147        struct net_device *dev = ptr;
1148        struct net *net = dev_net(dev);
1149        struct vif_device *v;
1150        int ct;
1151
1152        if (!net_eq(dev_net(dev), net))
1153                return NOTIFY_DONE;
1154
1155        if (event != NETDEV_UNREGISTER)
1156                return NOTIFY_DONE;
1157        v = &net->ipv4.vif_table[0];
1158        for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1159                if (v->dev == dev)
1160                        vif_delete(net, ct, 1);
1161        }
1162        return NOTIFY_DONE;
1163}
1164
1165
1166static struct notifier_block ip_mr_notifier = {
1167        .notifier_call = ipmr_device_event,
1168};
1169
1170/*
1171 *      Encapsulate a packet by attaching a valid IPIP header to it.
1172 *      This avoids tunnel drivers and other mess and gives us the speed so
1173 *      important for multicast video.
1174 */
1175
1176static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1177{
1178        struct iphdr *iph;
1179        struct iphdr *old_iph = ip_hdr(skb);
1180
1181        skb_push(skb, sizeof(struct iphdr));
1182        skb->transport_header = skb->network_header;
1183        skb_reset_network_header(skb);
1184        iph = ip_hdr(skb);
1185
1186        iph->version    =       4;
1187        iph->tos        =       old_iph->tos;
1188        iph->ttl        =       old_iph->ttl;
1189        iph->frag_off   =       0;
1190        iph->daddr      =       daddr;
1191        iph->saddr      =       saddr;
1192        iph->protocol   =       IPPROTO_IPIP;
1193        iph->ihl        =       5;
1194        iph->tot_len    =       htons(skb->len);
1195        ip_select_ident(iph, skb_dst(skb), NULL);
1196        ip_send_check(iph);
1197
1198        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1199        nf_reset(skb);
1200}
1201
1202static inline int ipmr_forward_finish(struct sk_buff *skb)
1203{
1204        struct ip_options * opt = &(IPCB(skb)->opt);
1205
1206        IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1207
1208        if (unlikely(opt->optlen))
1209                ip_forward_options(skb);
1210
1211        return dst_output(skb);
1212}
1213
1214/*
1215 *      Processing handlers for ipmr_forward
1216 */
1217
1218static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1219{
1220        struct net *net = mfc_net(c);
1221        const struct iphdr *iph = ip_hdr(skb);
1222        struct vif_device *vif = &net->ipv4.vif_table[vifi];
1223        struct net_device *dev;
1224        struct rtable *rt;
1225        int    encap = 0;
1226
1227        if (vif->dev == NULL)
1228                goto out_free;
1229
1230#ifdef CONFIG_IP_PIMSM
1231        if (vif->flags & VIFF_REGISTER) {
1232                vif->pkt_out++;
1233                vif->bytes_out += skb->len;
1234                vif->dev->stats.tx_bytes += skb->len;
1235                vif->dev->stats.tx_packets++;
1236                ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1237                goto out_free;
1238        }
1239#endif
1240
1241        if (vif->flags&VIFF_TUNNEL) {
1242                struct flowi fl = { .oif = vif->link,
1243                                    .nl_u = { .ip4_u =
1244                                              { .daddr = vif->remote,
1245                                                .saddr = vif->local,
1246                                                .tos = RT_TOS(iph->tos) } },
1247                                    .proto = IPPROTO_IPIP };
1248                if (ip_route_output_key(net, &rt, &fl))
1249                        goto out_free;
1250                encap = sizeof(struct iphdr);
1251        } else {
1252                struct flowi fl = { .oif = vif->link,
1253                                    .nl_u = { .ip4_u =
1254                                              { .daddr = iph->daddr,
1255                                                .tos = RT_TOS(iph->tos) } },
1256                                    .proto = IPPROTO_IPIP };
1257                if (ip_route_output_key(net, &rt, &fl))
1258                        goto out_free;
1259        }
1260
1261        dev = rt->u.dst.dev;
1262
1263        if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1264                /* Do not fragment multicasts. Alas, IPv4 does not
1265                   allow to send ICMP, so that packets will disappear
1266                   to blackhole.
1267                 */
1268
1269                IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1270                ip_rt_put(rt);
1271                goto out_free;
1272        }
1273
1274        encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1275
1276        if (skb_cow(skb, encap)) {
1277                ip_rt_put(rt);
1278                goto out_free;
1279        }
1280
1281        vif->pkt_out++;
1282        vif->bytes_out += skb->len;
1283
1284        skb_dst_drop(skb);
1285        skb_dst_set(skb, &rt->u.dst);
1286        ip_decrease_ttl(ip_hdr(skb));
1287
1288        /* FIXME: forward and output firewalls used to be called here.
1289         * What do we do with netfilter? -- RR */
1290        if (vif->flags & VIFF_TUNNEL) {
1291                ip_encap(skb, vif->local, vif->remote);
1292                /* FIXME: extra output firewall step used to be here. --RR */
1293                vif->dev->stats.tx_packets++;
1294                vif->dev->stats.tx_bytes += skb->len;
1295        }
1296
1297        IPCB(skb)->flags |= IPSKB_FORWARDED;
1298
1299        /*
1300         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1301         * not only before forwarding, but after forwarding on all output
1302         * interfaces. It is clear, if mrouter runs a multicasting
1303         * program, it should receive packets not depending to what interface
1304         * program is joined.
1305         * If we will not make it, the program will have to join on all
1306         * interfaces. On the other hand, multihoming host (or router, but
1307         * not mrouter) cannot join to more than one interface - it will
1308         * result in receiving multiple packets.
1309         */
1310        NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1311                ipmr_forward_finish);
1312        return;
1313
1314out_free:
1315        kfree_skb(skb);
1316        return;
1317}
1318
1319static int ipmr_find_vif(struct net_device *dev)
1320{
1321        struct net *net = dev_net(dev);
1322        int ct;
1323        for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1324                if (net->ipv4.vif_table[ct].dev == dev)
1325                        break;
1326        }
1327        return ct;
1328}
1329
1330/* "local" means that we should preserve one skb (for local delivery) */
1331
1332static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1333{
1334        int psend = -1;
1335        int vif, ct;
1336        struct net *net = mfc_net(cache);
1337
1338        vif = cache->mfc_parent;
1339        cache->mfc_un.res.pkt++;
1340        cache->mfc_un.res.bytes += skb->len;
1341
1342        /*
1343         * Wrong interface: drop packet and (maybe) send PIM assert.
1344         */
1345        if (net->ipv4.vif_table[vif].dev != skb->dev) {
1346                int true_vifi;
1347
1348                if (skb_rtable(skb)->fl.iif == 0) {
1349                        /* It is our own packet, looped back.
1350                           Very complicated situation...
1351
1352                           The best workaround until routing daemons will be
1353                           fixed is not to redistribute packet, if it was
1354                           send through wrong interface. It means, that
1355                           multicast applications WILL NOT work for
1356                           (S,G), which have default multicast route pointing
1357                           to wrong oif. In any case, it is not a good
1358                           idea to use multicasting applications on router.
1359                         */
1360                        goto dont_forward;
1361                }
1362
1363                cache->mfc_un.res.wrong_if++;
1364                true_vifi = ipmr_find_vif(skb->dev);
1365
1366                if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1367                    /* pimsm uses asserts, when switching from RPT to SPT,
1368                       so that we cannot check that packet arrived on an oif.
1369                       It is bad, but otherwise we would need to move pretty
1370                       large chunk of pimd to kernel. Ough... --ANK
1371                     */
1372                    (net->ipv4.mroute_do_pim ||
1373                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
1374                    time_after(jiffies,
1375                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1376                        cache->mfc_un.res.last_assert = jiffies;
1377                        ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1378                }
1379                goto dont_forward;
1380        }
1381
1382        net->ipv4.vif_table[vif].pkt_in++;
1383        net->ipv4.vif_table[vif].bytes_in += skb->len;
1384
1385        /*
1386         *      Forward the frame
1387         */
1388        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1389                if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1390                        if (psend != -1) {
1391                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1392                                if (skb2)
1393                                        ipmr_queue_xmit(skb2, cache, psend);
1394                        }
1395                        psend = ct;
1396                }
1397        }
1398        if (psend != -1) {
1399                if (local) {
1400                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1401                        if (skb2)
1402                                ipmr_queue_xmit(skb2, cache, psend);
1403                } else {
1404                        ipmr_queue_xmit(skb, cache, psend);
1405                        return 0;
1406                }
1407        }
1408
1409dont_forward:
1410        if (!local)
1411                kfree_skb(skb);
1412        return 0;
1413}
1414
1415
1416/*
1417 *      Multicast packets for forwarding arrive here
1418 */
1419
1420int ip_mr_input(struct sk_buff *skb)
1421{
1422        struct mfc_cache *cache;
1423        struct net *net = dev_net(skb->dev);
1424        int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1425
1426        /* Packet is looped back after forward, it should not be
1427           forwarded second time, but still can be delivered locally.
1428         */
1429        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1430                goto dont_forward;
1431
1432        if (!local) {
1433                    if (IPCB(skb)->opt.router_alert) {
1434                            if (ip_call_ra_chain(skb))
1435                                    return 0;
1436                    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1437                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1438                               Cisco IOS <= 11.2(8)) do not put router alert
1439                               option to IGMP packets destined to routable
1440                               groups. It is very bad, because it means
1441                               that we can forward NO IGMP messages.
1442                             */
1443                            read_lock(&mrt_lock);
1444                            if (net->ipv4.mroute_sk) {
1445                                    nf_reset(skb);
1446                                    raw_rcv(net->ipv4.mroute_sk, skb);
1447                                    read_unlock(&mrt_lock);
1448                                    return 0;
1449                            }
1450                            read_unlock(&mrt_lock);
1451                    }
1452        }
1453
1454        read_lock(&mrt_lock);
1455        cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1456
1457        /*
1458         *      No usable cache entry
1459         */
1460        if (cache == NULL) {
1461                int vif;
1462
1463                if (local) {
1464                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1465                        ip_local_deliver(skb);
1466                        if (skb2 == NULL) {
1467                                read_unlock(&mrt_lock);
1468                                return -ENOBUFS;
1469                        }
1470                        skb = skb2;
1471                }
1472
1473                vif = ipmr_find_vif(skb->dev);
1474                if (vif >= 0) {
1475                        int err = ipmr_cache_unresolved(net, vif, skb);
1476                        read_unlock(&mrt_lock);
1477
1478                        return err;
1479                }
1480                read_unlock(&mrt_lock);
1481                kfree_skb(skb);
1482                return -ENODEV;
1483        }
1484
1485        ip_mr_forward(skb, cache, local);
1486
1487        read_unlock(&mrt_lock);
1488
1489        if (local)
1490                return ip_local_deliver(skb);
1491
1492        return 0;
1493
1494dont_forward:
1495        if (local)
1496                return ip_local_deliver(skb);
1497        kfree_skb(skb);
1498        return 0;
1499}
1500
1501#ifdef CONFIG_IP_PIMSM
1502static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1503{
1504        struct net_device *reg_dev = NULL;
1505        struct iphdr *encap;
1506        struct net *net = dev_net(skb->dev);
1507
1508        encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1509        /*
1510           Check that:
1511           a. packet is really destinted to a multicast group
1512           b. packet is not a NULL-REGISTER
1513           c. packet is not truncated
1514         */
1515        if (!ipv4_is_multicast(encap->daddr) ||
1516            encap->tot_len == 0 ||
1517            ntohs(encap->tot_len) + pimlen > skb->len)
1518                return 1;
1519
1520        read_lock(&mrt_lock);
1521        if (net->ipv4.mroute_reg_vif_num >= 0)
1522                reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1523        if (reg_dev)
1524                dev_hold(reg_dev);
1525        read_unlock(&mrt_lock);
1526
1527        if (reg_dev == NULL)
1528                return 1;
1529
1530        skb->mac_header = skb->network_header;
1531        skb_pull(skb, (u8*)encap - skb->data);
1532        skb_reset_network_header(skb);
1533        skb->dev = reg_dev;
1534        skb->protocol = htons(ETH_P_IP);
1535        skb->ip_summed = 0;
1536        skb->pkt_type = PACKET_HOST;
1537        skb_dst_drop(skb);
1538        reg_dev->stats.rx_bytes += skb->len;
1539        reg_dev->stats.rx_packets++;
1540        nf_reset(skb);
1541        netif_rx(skb);
1542        dev_put(reg_dev);
1543
1544        return 0;
1545}
1546#endif
1547
1548#ifdef CONFIG_IP_PIMSM_V1
1549/*
1550 * Handle IGMP messages of PIMv1
1551 */
1552
1553int pim_rcv_v1(struct sk_buff * skb)
1554{
1555        struct igmphdr *pim;
1556        struct net *net = dev_net(skb->dev);
1557
1558        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1559                goto drop;
1560
1561        pim = igmp_hdr(skb);
1562
1563        if (!net->ipv4.mroute_do_pim ||
1564            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1565                goto drop;
1566
1567        if (__pim_rcv(skb, sizeof(*pim))) {
1568drop:
1569                kfree_skb(skb);
1570        }
1571        return 0;
1572}
1573#endif
1574
1575#ifdef CONFIG_IP_PIMSM_V2
1576static int pim_rcv(struct sk_buff * skb)
1577{
1578        struct pimreghdr *pim;
1579
1580        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1581                goto drop;
1582
1583        pim = (struct pimreghdr *)skb_transport_header(skb);
1584        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1585            (pim->flags&PIM_NULL_REGISTER) ||
1586            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1587             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1588                goto drop;
1589
1590        if (__pim_rcv(skb, sizeof(*pim))) {
1591drop:
1592                kfree_skb(skb);
1593        }
1594        return 0;
1595}
1596#endif
1597
1598static int
1599ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1600{
1601        int ct;
1602        struct rtnexthop *nhp;
1603        struct net *net = mfc_net(c);
1604        struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1605        u8 *b = skb_tail_pointer(skb);
1606        struct rtattr *mp_head;
1607
1608        if (dev)
1609                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1610
1611        mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1612
1613        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1614                if (c->mfc_un.res.ttls[ct] < 255) {
1615                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1616                                goto rtattr_failure;
1617                        nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1618                        nhp->rtnh_flags = 0;
1619                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1620                        nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1621                        nhp->rtnh_len = sizeof(*nhp);
1622                }
1623        }
1624        mp_head->rta_type = RTA_MULTIPATH;
1625        mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1626        rtm->rtm_type = RTN_MULTICAST;
1627        return 1;
1628
1629rtattr_failure:
1630        nlmsg_trim(skb, b);
1631        return -EMSGSIZE;
1632}
1633
1634int ipmr_get_route(struct net *net,
1635                   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1636{
1637        int err;
1638        struct mfc_cache *cache;
1639        struct rtable *rt = skb_rtable(skb);
1640
1641        read_lock(&mrt_lock);
1642        cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1643
1644        if (cache == NULL) {
1645                struct sk_buff *skb2;
1646                struct iphdr *iph;
1647                struct net_device *dev;
1648                int vif;
1649
1650                if (nowait) {
1651                        read_unlock(&mrt_lock);
1652                        return -EAGAIN;
1653                }
1654
1655                dev = skb->dev;
1656                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1657                        read_unlock(&mrt_lock);
1658                        return -ENODEV;
1659                }
1660                skb2 = skb_clone(skb, GFP_ATOMIC);
1661                if (!skb2) {
1662                        read_unlock(&mrt_lock);
1663                        return -ENOMEM;
1664                }
1665
1666                skb_push(skb2, sizeof(struct iphdr));
1667                skb_reset_network_header(skb2);
1668                iph = ip_hdr(skb2);
1669                iph->ihl = sizeof(struct iphdr) >> 2;
1670                iph->saddr = rt->rt_src;
1671                iph->daddr = rt->rt_dst;
1672                iph->version = 0;
1673                err = ipmr_cache_unresolved(net, vif, skb2);
1674                read_unlock(&mrt_lock);
1675                return err;
1676        }
1677
1678        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1679                cache->mfc_flags |= MFC_NOTIFY;
1680        err = ipmr_fill_mroute(skb, cache, rtm);
1681        read_unlock(&mrt_lock);
1682        return err;
1683}
1684
1685#ifdef CONFIG_PROC_FS
1686/*
1687 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1688 */
1689struct ipmr_vif_iter {
1690        struct seq_net_private p;
1691        int ct;
1692};
1693
1694static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1695                                           struct ipmr_vif_iter *iter,
1696                                           loff_t pos)
1697{
1698        for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1699                if (!VIF_EXISTS(net, iter->ct))
1700                        continue;
1701                if (pos-- == 0)
1702                        return &net->ipv4.vif_table[iter->ct];
1703        }
1704        return NULL;
1705}
1706
1707static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1708        __acquires(mrt_lock)
1709{
1710        struct net *net = seq_file_net(seq);
1711
1712        read_lock(&mrt_lock);
1713        return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1714                : SEQ_START_TOKEN;
1715}
1716
1717static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1718{
1719        struct ipmr_vif_iter *iter = seq->private;
1720        struct net *net = seq_file_net(seq);
1721
1722        ++*pos;
1723        if (v == SEQ_START_TOKEN)
1724                return ipmr_vif_seq_idx(net, iter, 0);
1725
1726        while (++iter->ct < net->ipv4.maxvif) {
1727                if (!VIF_EXISTS(net, iter->ct))
1728                        continue;
1729                return &net->ipv4.vif_table[iter->ct];
1730        }
1731        return NULL;
1732}
1733
1734static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1735        __releases(mrt_lock)
1736{
1737        read_unlock(&mrt_lock);
1738}
1739
1740static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1741{
1742        struct net *net = seq_file_net(seq);
1743
1744        if (v == SEQ_START_TOKEN) {
1745                seq_puts(seq,
1746                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1747        } else {
1748                const struct vif_device *vif = v;
1749                const char *name =  vif->dev ? vif->dev->name : "none";
1750
1751                seq_printf(seq,
1752                           "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1753                           vif - net->ipv4.vif_table,
1754                           name, vif->bytes_in, vif->pkt_in,
1755                           vif->bytes_out, vif->pkt_out,
1756                           vif->flags, vif->local, vif->remote);
1757        }
1758        return 0;
1759}
1760
1761static const struct seq_operations ipmr_vif_seq_ops = {
1762        .start = ipmr_vif_seq_start,
1763        .next  = ipmr_vif_seq_next,
1764        .stop  = ipmr_vif_seq_stop,
1765        .show  = ipmr_vif_seq_show,
1766};
1767
1768static int ipmr_vif_open(struct inode *inode, struct file *file)
1769{
1770        return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1771                            sizeof(struct ipmr_vif_iter));
1772}
1773
1774static const struct file_operations ipmr_vif_fops = {
1775        .owner   = THIS_MODULE,
1776        .open    = ipmr_vif_open,
1777        .read    = seq_read,
1778        .llseek  = seq_lseek,
1779        .release = seq_release_net,
1780};
1781
1782struct ipmr_mfc_iter {
1783        struct seq_net_private p;
1784        struct mfc_cache **cache;
1785        int ct;
1786};
1787
1788
1789static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1790                                          struct ipmr_mfc_iter *it, loff_t pos)
1791{
1792        struct mfc_cache *mfc;
1793
1794        it->cache = net->ipv4.mfc_cache_array;
1795        read_lock(&mrt_lock);
1796        for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1797                for (mfc = net->ipv4.mfc_cache_array[it->ct];
1798                     mfc; mfc = mfc->next)
1799                        if (pos-- == 0)
1800                                return mfc;
1801        read_unlock(&mrt_lock);
1802
1803        it->cache = &mfc_unres_queue;
1804        spin_lock_bh(&mfc_unres_lock);
1805        for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1806                if (net_eq(mfc_net(mfc), net) &&
1807                    pos-- == 0)
1808                        return mfc;
1809        spin_unlock_bh(&mfc_unres_lock);
1810
1811        it->cache = NULL;
1812        return NULL;
1813}
1814
1815
1816static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1817{
1818        struct ipmr_mfc_iter *it = seq->private;
1819        struct net *net = seq_file_net(seq);
1820
1821        it->cache = NULL;
1822        it->ct = 0;
1823        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1824                : SEQ_START_TOKEN;
1825}
1826
1827static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1828{
1829        struct mfc_cache *mfc = v;
1830        struct ipmr_mfc_iter *it = seq->private;
1831        struct net *net = seq_file_net(seq);
1832
1833        ++*pos;
1834
1835        if (v == SEQ_START_TOKEN)
1836                return ipmr_mfc_seq_idx(net, seq->private, 0);
1837
1838        if (mfc->next)
1839                return mfc->next;
1840
1841        if (it->cache == &mfc_unres_queue)
1842                goto end_of_list;
1843
1844        BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1845
1846        while (++it->ct < MFC_LINES) {
1847                mfc = net->ipv4.mfc_cache_array[it->ct];
1848                if (mfc)
1849                        return mfc;
1850        }
1851
1852        /* exhausted cache_array, show unresolved */
1853        read_unlock(&mrt_lock);
1854        it->cache = &mfc_unres_queue;
1855        it->ct = 0;
1856
1857        spin_lock_bh(&mfc_unres_lock);
1858        mfc = mfc_unres_queue;
1859        while (mfc && !net_eq(mfc_net(mfc), net))
1860                mfc = mfc->next;
1861        if (mfc)
1862                return mfc;
1863
1864 end_of_list:
1865        spin_unlock_bh(&mfc_unres_lock);
1866        it->cache = NULL;
1867
1868        return NULL;
1869}
1870
1871static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1872{
1873        struct ipmr_mfc_iter *it = seq->private;
1874        struct net *net = seq_file_net(seq);
1875
1876        if (it->cache == &mfc_unres_queue)
1877                spin_unlock_bh(&mfc_unres_lock);
1878        else if (it->cache == net->ipv4.mfc_cache_array)
1879                read_unlock(&mrt_lock);
1880}
1881
1882static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1883{
1884        int n;
1885        struct net *net = seq_file_net(seq);
1886
1887        if (v == SEQ_START_TOKEN) {
1888                seq_puts(seq,
1889                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1890        } else {
1891                const struct mfc_cache *mfc = v;
1892                const struct ipmr_mfc_iter *it = seq->private;
1893
1894                seq_printf(seq, "%08lX %08lX %-3hd",
1895                           (unsigned long) mfc->mfc_mcastgrp,
1896                           (unsigned long) mfc->mfc_origin,
1897                           mfc->mfc_parent);
1898
1899                if (it->cache != &mfc_unres_queue) {
1900                        seq_printf(seq, " %8lu %8lu %8lu",
1901                                   mfc->mfc_un.res.pkt,
1902                                   mfc->mfc_un.res.bytes,
1903                                   mfc->mfc_un.res.wrong_if);
1904                        for (n = mfc->mfc_un.res.minvif;
1905                             n < mfc->mfc_un.res.maxvif; n++ ) {
1906                                if (VIF_EXISTS(net, n) &&
1907                                    mfc->mfc_un.res.ttls[n] < 255)
1908                                        seq_printf(seq,
1909                                           " %2d:%-3d",
1910                                           n, mfc->mfc_un.res.ttls[n]);
1911                        }
1912                } else {
1913                        /* unresolved mfc_caches don't contain
1914                         * pkt, bytes and wrong_if values
1915                         */
1916                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1917                }
1918                seq_putc(seq, '\n');
1919        }
1920        return 0;
1921}
1922
1923static const struct seq_operations ipmr_mfc_seq_ops = {
1924        .start = ipmr_mfc_seq_start,
1925        .next  = ipmr_mfc_seq_next,
1926        .stop  = ipmr_mfc_seq_stop,
1927        .show  = ipmr_mfc_seq_show,
1928};
1929
1930static int ipmr_mfc_open(struct inode *inode, struct file *file)
1931{
1932        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1933                            sizeof(struct ipmr_mfc_iter));
1934}
1935
1936static const struct file_operations ipmr_mfc_fops = {
1937        .owner   = THIS_MODULE,
1938        .open    = ipmr_mfc_open,
1939        .read    = seq_read,
1940        .llseek  = seq_lseek,
1941        .release = seq_release_net,
1942};
1943#endif
1944
1945#ifdef CONFIG_IP_PIMSM_V2
1946static const struct net_protocol pim_protocol = {
1947        .handler        =       pim_rcv,
1948        .netns_ok       =       1,
1949};
1950#endif
1951
1952
1953/*
1954 *      Setup for IP multicast routing
1955 */
1956static int __net_init ipmr_net_init(struct net *net)
1957{
1958        int err = 0;
1959
1960        net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1961                                      GFP_KERNEL);
1962        if (!net->ipv4.vif_table) {
1963                err = -ENOMEM;
1964                goto fail;
1965        }
1966
1967        /* Forwarding cache */
1968        net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1969                                            sizeof(struct mfc_cache *),
1970                                            GFP_KERNEL);
1971        if (!net->ipv4.mfc_cache_array) {
1972                err = -ENOMEM;
1973                goto fail_mfc_cache;
1974        }
1975
1976#ifdef CONFIG_IP_PIMSM
1977        net->ipv4.mroute_reg_vif_num = -1;
1978#endif
1979
1980#ifdef CONFIG_PROC_FS
1981        err = -ENOMEM;
1982        if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1983                goto proc_vif_fail;
1984        if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1985                goto proc_cache_fail;
1986#endif
1987        return 0;
1988
1989#ifdef CONFIG_PROC_FS
1990proc_cache_fail:
1991        proc_net_remove(net, "ip_mr_vif");
1992proc_vif_fail:
1993        kfree(net->ipv4.mfc_cache_array);
1994#endif
1995fail_mfc_cache:
1996        kfree(net->ipv4.vif_table);
1997fail:
1998        return err;
1999}
2000
2001static void __net_exit ipmr_net_exit(struct net *net)
2002{
2003#ifdef CONFIG_PROC_FS
2004        proc_net_remove(net, "ip_mr_cache");
2005        proc_net_remove(net, "ip_mr_vif");
2006#endif
2007        kfree(net->ipv4.mfc_cache_array);
2008        kfree(net->ipv4.vif_table);
2009}
2010
2011static struct pernet_operations ipmr_net_ops = {
2012        .init = ipmr_net_init,
2013        .exit = ipmr_net_exit,
2014};
2015
2016int __init ip_mr_init(void)
2017{
2018        int err;
2019
2020        mrt_cachep = kmem_cache_create("ip_mrt_cache",
2021                                       sizeof(struct mfc_cache),
2022                                       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2023                                       NULL);
2024        if (!mrt_cachep)
2025                return -ENOMEM;
2026
2027        err = register_pernet_subsys(&ipmr_net_ops);
2028        if (err)
2029                goto reg_pernet_fail;
2030
2031        setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2032        err = register_netdevice_notifier(&ip_mr_notifier);
2033        if (err)
2034                goto reg_notif_fail;
2035#ifdef CONFIG_IP_PIMSM_V2
2036        if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2037                printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2038                err = -EAGAIN;
2039                goto add_proto_fail;
2040        }
2041#endif
2042        return 0;
2043
2044#ifdef CONFIG_IP_PIMSM_V2
2045add_proto_fail:
2046        unregister_netdevice_notifier(&ip_mr_notifier);
2047#endif
2048reg_notif_fail:
2049        del_timer(&ipmr_expire_timer);
2050        unregister_pernet_subsys(&ipmr_net_ops);
2051reg_pernet_fail:
2052        kmem_cache_destroy(mrt_cachep);
2053        return err;
2054}
2055