linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <linux/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/mm.h>
  24#include <linux/kernel.h>
  25#include <linux/fcntl.h>
  26#include <linux/stat.h>
  27#include <linux/socket.h>
  28#include <linux/inet.h>
  29#include <linux/netdevice.h>
  30#include <linux/inetdevice.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/init.h>
  34#include <linux/compat.h>
  35#include <net/protocol.h>
  36#include <linux/skbuff.h>
  37#include <net/raw.h>
  38#include <linux/notifier.h>
  39#include <linux/if_arp.h>
  40#include <net/checksum.h>
  41#include <net/netlink.h>
  42#include <net/fib_rules.h>
  43
  44#include <net/ipv6.h>
  45#include <net/ip6_route.h>
  46#include <linux/mroute6.h>
  47#include <linux/pim.h>
  48#include <net/addrconf.h>
  49#include <linux/netfilter_ipv6.h>
  50#include <linux/export.h>
  51#include <net/ip6_checksum.h>
  52#include <linux/netconf.h>
  53
  54struct ip6mr_rule {
  55        struct fib_rule         common;
  56};
  57
  58struct ip6mr_result {
  59        struct mr_table *mrt;
  60};
  61
  62/* Big lock, protecting vif table, mrt cache and mroute socket state.
  63   Note that the changes are semaphored via rtnl_lock.
  64 */
  65
  66static DEFINE_RWLOCK(mrt_lock);
  67
  68/* Multicast router control variables */
  69
  70/* Special spinlock for queue of unresolved entries */
  71static DEFINE_SPINLOCK(mfc_unres_lock);
  72
  73/* We return to original Alan's scheme. Hash table of resolved
  74   entries is changed only in process context and protected
  75   with weak lock mrt_lock. Queue of unresolved entries is protected
  76   with strong spinlock mfc_unres_lock.
  77
  78   In this case data path is free of exclusive locks at all.
  79 */
  80
  81static struct kmem_cache *mrt_cachep __read_mostly;
  82
  83static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  84static void ip6mr_free_table(struct mr_table *mrt);
  85
  86static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  87                           struct sk_buff *skb, struct mfc6_cache *cache);
  88static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  89                              mifi_t mifi, int assert);
  90static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  91                              int cmd);
  92static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  93static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  94                               struct netlink_callback *cb);
  95static void mroute_clean_tables(struct mr_table *mrt, bool all);
  96static void ipmr_expire_process(struct timer_list *t);
  97
  98#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
  99#define ip6mr_for_each_table(mrt, net) \
 100        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 101
 102static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 103                                            struct mr_table *mrt)
 104{
 105        struct mr_table *ret;
 106
 107        if (!mrt)
 108                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 109                                     struct mr_table, list);
 110        else
 111                ret = list_entry_rcu(mrt->list.next,
 112                                     struct mr_table, list);
 113
 114        if (&ret->list == &net->ipv6.mr6_tables)
 115                return NULL;
 116        return ret;
 117}
 118
 119static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 120{
 121        struct mr_table *mrt;
 122
 123        ip6mr_for_each_table(mrt, net) {
 124                if (mrt->id == id)
 125                        return mrt;
 126        }
 127        return NULL;
 128}
 129
 130static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 131                            struct mr_table **mrt)
 132{
 133        int err;
 134        struct ip6mr_result res;
 135        struct fib_lookup_arg arg = {
 136                .result = &res,
 137                .flags = FIB_LOOKUP_NOREF,
 138        };
 139
 140        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 141                               flowi6_to_flowi(flp6), 0, &arg);
 142        if (err < 0)
 143                return err;
 144        *mrt = res.mrt;
 145        return 0;
 146}
 147
 148static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 149                             int flags, struct fib_lookup_arg *arg)
 150{
 151        struct ip6mr_result *res = arg->result;
 152        struct mr_table *mrt;
 153
 154        switch (rule->action) {
 155        case FR_ACT_TO_TBL:
 156                break;
 157        case FR_ACT_UNREACHABLE:
 158                return -ENETUNREACH;
 159        case FR_ACT_PROHIBIT:
 160                return -EACCES;
 161        case FR_ACT_BLACKHOLE:
 162        default:
 163                return -EINVAL;
 164        }
 165
 166        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 167        if (!mrt)
 168                return -EAGAIN;
 169        res->mrt = mrt;
 170        return 0;
 171}
 172
 173static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 174{
 175        return 1;
 176}
 177
 178static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 179        FRA_GENERIC_POLICY,
 180};
 181
 182static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 183                                struct fib_rule_hdr *frh, struct nlattr **tb)
 184{
 185        return 0;
 186}
 187
 188static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 189                              struct nlattr **tb)
 190{
 191        return 1;
 192}
 193
 194static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 195                           struct fib_rule_hdr *frh)
 196{
 197        frh->dst_len = 0;
 198        frh->src_len = 0;
 199        frh->tos     = 0;
 200        return 0;
 201}
 202
 203static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 204        .family         = RTNL_FAMILY_IP6MR,
 205        .rule_size      = sizeof(struct ip6mr_rule),
 206        .addr_size      = sizeof(struct in6_addr),
 207        .action         = ip6mr_rule_action,
 208        .match          = ip6mr_rule_match,
 209        .configure      = ip6mr_rule_configure,
 210        .compare        = ip6mr_rule_compare,
 211        .fill           = ip6mr_rule_fill,
 212        .nlgroup        = RTNLGRP_IPV6_RULE,
 213        .policy         = ip6mr_rule_policy,
 214        .owner          = THIS_MODULE,
 215};
 216
 217static int __net_init ip6mr_rules_init(struct net *net)
 218{
 219        struct fib_rules_ops *ops;
 220        struct mr_table *mrt;
 221        int err;
 222
 223        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 224        if (IS_ERR(ops))
 225                return PTR_ERR(ops);
 226
 227        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 228
 229        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 230        if (!mrt) {
 231                err = -ENOMEM;
 232                goto err1;
 233        }
 234
 235        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 236        if (err < 0)
 237                goto err2;
 238
 239        net->ipv6.mr6_rules_ops = ops;
 240        return 0;
 241
 242err2:
 243        ip6mr_free_table(mrt);
 244err1:
 245        fib_rules_unregister(ops);
 246        return err;
 247}
 248
 249static void __net_exit ip6mr_rules_exit(struct net *net)
 250{
 251        struct mr_table *mrt, *next;
 252
 253        rtnl_lock();
 254        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 255                list_del(&mrt->list);
 256                ip6mr_free_table(mrt);
 257        }
 258        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 259        rtnl_unlock();
 260}
 261
 262static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 263{
 264        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
 265}
 266
 267static unsigned int ip6mr_rules_seq_read(struct net *net)
 268{
 269        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 270}
 271
 272bool ip6mr_rule_default(const struct fib_rule *rule)
 273{
 274        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 275               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 276}
 277EXPORT_SYMBOL(ip6mr_rule_default);
 278#else
 279#define ip6mr_for_each_table(mrt, net) \
 280        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 281
 282static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 283                                            struct mr_table *mrt)
 284{
 285        if (!mrt)
 286                return net->ipv6.mrt6;
 287        return NULL;
 288}
 289
 290static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 291{
 292        return net->ipv6.mrt6;
 293}
 294
 295static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 296                            struct mr_table **mrt)
 297{
 298        *mrt = net->ipv6.mrt6;
 299        return 0;
 300}
 301
 302static int __net_init ip6mr_rules_init(struct net *net)
 303{
 304        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 305        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 306}
 307
 308static void __net_exit ip6mr_rules_exit(struct net *net)
 309{
 310        rtnl_lock();
 311        ip6mr_free_table(net->ipv6.mrt6);
 312        net->ipv6.mrt6 = NULL;
 313        rtnl_unlock();
 314}
 315
 316static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 317{
 318        return 0;
 319}
 320
 321static unsigned int ip6mr_rules_seq_read(struct net *net)
 322{
 323        return 0;
 324}
 325#endif
 326
 327static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 328                          const void *ptr)
 329{
 330        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 331        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 332
 333        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 334               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 335}
 336
 337static const struct rhashtable_params ip6mr_rht_params = {
 338        .head_offset = offsetof(struct mr_mfc, mnode),
 339        .key_offset = offsetof(struct mfc6_cache, cmparg),
 340        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 341        .nelem_hint = 3,
 342        .locks_mul = 1,
 343        .obj_cmpfn = ip6mr_hash_cmp,
 344        .automatic_shrinking = true,
 345};
 346
 347static void ip6mr_new_table_set(struct mr_table *mrt,
 348                                struct net *net)
 349{
 350#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 351        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 352#endif
 353}
 354
 355static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 356        .mf6c_origin = IN6ADDR_ANY_INIT,
 357        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 358};
 359
 360static struct mr_table_ops ip6mr_mr_table_ops = {
 361        .rht_params = &ip6mr_rht_params,
 362        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 363};
 364
 365static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 366{
 367        struct mr_table *mrt;
 368
 369        mrt = ip6mr_get_table(net, id);
 370        if (mrt)
 371                return mrt;
 372
 373        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 374                              ipmr_expire_process, ip6mr_new_table_set);
 375}
 376
 377static void ip6mr_free_table(struct mr_table *mrt)
 378{
 379        del_timer_sync(&mrt->ipmr_expire_timer);
 380        mroute_clean_tables(mrt, true);
 381        rhltable_destroy(&mrt->mfc_hash);
 382        kfree(mrt);
 383}
 384
 385#ifdef CONFIG_PROC_FS
 386/* The /proc interfaces to multicast routing
 387 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 388 */
 389
 390static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 391        __acquires(mrt_lock)
 392{
 393        struct mr_vif_iter *iter = seq->private;
 394        struct net *net = seq_file_net(seq);
 395        struct mr_table *mrt;
 396
 397        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 398        if (!mrt)
 399                return ERR_PTR(-ENOENT);
 400
 401        iter->mrt = mrt;
 402
 403        read_lock(&mrt_lock);
 404        return mr_vif_seq_start(seq, pos);
 405}
 406
 407static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 408        __releases(mrt_lock)
 409{
 410        read_unlock(&mrt_lock);
 411}
 412
 413static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 414{
 415        struct mr_vif_iter *iter = seq->private;
 416        struct mr_table *mrt = iter->mrt;
 417
 418        if (v == SEQ_START_TOKEN) {
 419                seq_puts(seq,
 420                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 421        } else {
 422                const struct vif_device *vif = v;
 423                const char *name = vif->dev ? vif->dev->name : "none";
 424
 425                seq_printf(seq,
 426                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 427                           vif - mrt->vif_table,
 428                           name, vif->bytes_in, vif->pkt_in,
 429                           vif->bytes_out, vif->pkt_out,
 430                           vif->flags);
 431        }
 432        return 0;
 433}
 434
 435static const struct seq_operations ip6mr_vif_seq_ops = {
 436        .start = ip6mr_vif_seq_start,
 437        .next  = mr_vif_seq_next,
 438        .stop  = ip6mr_vif_seq_stop,
 439        .show  = ip6mr_vif_seq_show,
 440};
 441
 442static int ip6mr_vif_open(struct inode *inode, struct file *file)
 443{
 444        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 445                            sizeof(struct mr_vif_iter));
 446}
 447
 448static const struct file_operations ip6mr_vif_fops = {
 449        .open    = ip6mr_vif_open,
 450        .read    = seq_read,
 451        .llseek  = seq_lseek,
 452        .release = seq_release_net,
 453};
 454
 455static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 456{
 457        struct net *net = seq_file_net(seq);
 458        struct mr_table *mrt;
 459
 460        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 461        if (!mrt)
 462                return ERR_PTR(-ENOENT);
 463
 464        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 465}
 466
 467static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 468{
 469        int n;
 470
 471        if (v == SEQ_START_TOKEN) {
 472                seq_puts(seq,
 473                         "Group                            "
 474                         "Origin                           "
 475                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 476        } else {
 477                const struct mfc6_cache *mfc = v;
 478                const struct mr_mfc_iter *it = seq->private;
 479                struct mr_table *mrt = it->mrt;
 480
 481                seq_printf(seq, "%pI6 %pI6 %-3hd",
 482                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 483                           mfc->_c.mfc_parent);
 484
 485                if (it->cache != &mrt->mfc_unres_queue) {
 486                        seq_printf(seq, " %8lu %8lu %8lu",
 487                                   mfc->_c.mfc_un.res.pkt,
 488                                   mfc->_c.mfc_un.res.bytes,
 489                                   mfc->_c.mfc_un.res.wrong_if);
 490                        for (n = mfc->_c.mfc_un.res.minvif;
 491                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 492                                if (VIF_EXISTS(mrt, n) &&
 493                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 494                                        seq_printf(seq,
 495                                                   " %2d:%-3d", n,
 496                                                   mfc->_c.mfc_un.res.ttls[n]);
 497                        }
 498                } else {
 499                        /* unresolved mfc_caches don't contain
 500                         * pkt, bytes and wrong_if values
 501                         */
 502                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 503                }
 504                seq_putc(seq, '\n');
 505        }
 506        return 0;
 507}
 508
 509static const struct seq_operations ipmr_mfc_seq_ops = {
 510        .start = ipmr_mfc_seq_start,
 511        .next  = mr_mfc_seq_next,
 512        .stop  = mr_mfc_seq_stop,
 513        .show  = ipmr_mfc_seq_show,
 514};
 515
 516static int ipmr_mfc_open(struct inode *inode, struct file *file)
 517{
 518        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 519                            sizeof(struct mr_mfc_iter));
 520}
 521
 522static const struct file_operations ip6mr_mfc_fops = {
 523        .open    = ipmr_mfc_open,
 524        .read    = seq_read,
 525        .llseek  = seq_lseek,
 526        .release = seq_release_net,
 527};
 528#endif
 529
 530#ifdef CONFIG_IPV6_PIMSM_V2
 531
 532static int pim6_rcv(struct sk_buff *skb)
 533{
 534        struct pimreghdr *pim;
 535        struct ipv6hdr   *encap;
 536        struct net_device  *reg_dev = NULL;
 537        struct net *net = dev_net(skb->dev);
 538        struct mr_table *mrt;
 539        struct flowi6 fl6 = {
 540                .flowi6_iif     = skb->dev->ifindex,
 541                .flowi6_mark    = skb->mark,
 542        };
 543        int reg_vif_num;
 544
 545        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 546                goto drop;
 547
 548        pim = (struct pimreghdr *)skb_transport_header(skb);
 549        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 550            (pim->flags & PIM_NULL_REGISTER) ||
 551            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 552                             sizeof(*pim), IPPROTO_PIM,
 553                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 554             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 555                goto drop;
 556
 557        /* check if the inner packet is destined to mcast group */
 558        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 559                                   sizeof(*pim));
 560
 561        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 562            encap->payload_len == 0 ||
 563            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 564                goto drop;
 565
 566        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 567                goto drop;
 568        reg_vif_num = mrt->mroute_reg_vif_num;
 569
 570        read_lock(&mrt_lock);
 571        if (reg_vif_num >= 0)
 572                reg_dev = mrt->vif_table[reg_vif_num].dev;
 573        if (reg_dev)
 574                dev_hold(reg_dev);
 575        read_unlock(&mrt_lock);
 576
 577        if (!reg_dev)
 578                goto drop;
 579
 580        skb->mac_header = skb->network_header;
 581        skb_pull(skb, (u8 *)encap - skb->data);
 582        skb_reset_network_header(skb);
 583        skb->protocol = htons(ETH_P_IPV6);
 584        skb->ip_summed = CHECKSUM_NONE;
 585
 586        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 587
 588        netif_rx(skb);
 589
 590        dev_put(reg_dev);
 591        return 0;
 592 drop:
 593        kfree_skb(skb);
 594        return 0;
 595}
 596
 597static const struct inet6_protocol pim6_protocol = {
 598        .handler        =       pim6_rcv,
 599};
 600
 601/* Service routines creating virtual interfaces: PIMREG */
 602
 603static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 604                                      struct net_device *dev)
 605{
 606        struct net *net = dev_net(dev);
 607        struct mr_table *mrt;
 608        struct flowi6 fl6 = {
 609                .flowi6_oif     = dev->ifindex,
 610                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 611                .flowi6_mark    = skb->mark,
 612        };
 613        int err;
 614
 615        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 616        if (err < 0) {
 617                kfree_skb(skb);
 618                return err;
 619        }
 620
 621        read_lock(&mrt_lock);
 622        dev->stats.tx_bytes += skb->len;
 623        dev->stats.tx_packets++;
 624        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 625        read_unlock(&mrt_lock);
 626        kfree_skb(skb);
 627        return NETDEV_TX_OK;
 628}
 629
 630static int reg_vif_get_iflink(const struct net_device *dev)
 631{
 632        return 0;
 633}
 634
 635static const struct net_device_ops reg_vif_netdev_ops = {
 636        .ndo_start_xmit = reg_vif_xmit,
 637        .ndo_get_iflink = reg_vif_get_iflink,
 638};
 639
 640static void reg_vif_setup(struct net_device *dev)
 641{
 642        dev->type               = ARPHRD_PIMREG;
 643        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 644        dev->flags              = IFF_NOARP;
 645        dev->netdev_ops         = &reg_vif_netdev_ops;
 646        dev->needs_free_netdev  = true;
 647        dev->features           |= NETIF_F_NETNS_LOCAL;
 648}
 649
 650static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 651{
 652        struct net_device *dev;
 653        char name[IFNAMSIZ];
 654
 655        if (mrt->id == RT6_TABLE_DFLT)
 656                sprintf(name, "pim6reg");
 657        else
 658                sprintf(name, "pim6reg%u", mrt->id);
 659
 660        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 661        if (!dev)
 662                return NULL;
 663
 664        dev_net_set(dev, net);
 665
 666        if (register_netdevice(dev)) {
 667                free_netdev(dev);
 668                return NULL;
 669        }
 670
 671        if (dev_open(dev))
 672                goto failure;
 673
 674        dev_hold(dev);
 675        return dev;
 676
 677failure:
 678        unregister_netdevice(dev);
 679        return NULL;
 680}
 681#endif
 682
 683static int call_ip6mr_vif_entry_notifiers(struct net *net,
 684                                          enum fib_event_type event_type,
 685                                          struct vif_device *vif,
 686                                          mifi_t vif_index, u32 tb_id)
 687{
 688        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 689                                     vif, vif_index, tb_id,
 690                                     &net->ipv6.ipmr_seq);
 691}
 692
 693static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 694                                          enum fib_event_type event_type,
 695                                          struct mfc6_cache *mfc, u32 tb_id)
 696{
 697        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 698                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 699}
 700
 701/* Delete a VIF entry */
 702static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 703                       struct list_head *head)
 704{
 705        struct vif_device *v;
 706        struct net_device *dev;
 707        struct inet6_dev *in6_dev;
 708
 709        if (vifi < 0 || vifi >= mrt->maxvif)
 710                return -EADDRNOTAVAIL;
 711
 712        v = &mrt->vif_table[vifi];
 713
 714        if (VIF_EXISTS(mrt, vifi))
 715                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 716                                               FIB_EVENT_VIF_DEL, v, vifi,
 717                                               mrt->id);
 718
 719        write_lock_bh(&mrt_lock);
 720        dev = v->dev;
 721        v->dev = NULL;
 722
 723        if (!dev) {
 724                write_unlock_bh(&mrt_lock);
 725                return -EADDRNOTAVAIL;
 726        }
 727
 728#ifdef CONFIG_IPV6_PIMSM_V2
 729        if (vifi == mrt->mroute_reg_vif_num)
 730                mrt->mroute_reg_vif_num = -1;
 731#endif
 732
 733        if (vifi + 1 == mrt->maxvif) {
 734                int tmp;
 735                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 736                        if (VIF_EXISTS(mrt, tmp))
 737                                break;
 738                }
 739                mrt->maxvif = tmp + 1;
 740        }
 741
 742        write_unlock_bh(&mrt_lock);
 743
 744        dev_set_allmulti(dev, -1);
 745
 746        in6_dev = __in6_dev_get(dev);
 747        if (in6_dev) {
 748                in6_dev->cnf.mc_forwarding--;
 749                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 750                                             NETCONFA_MC_FORWARDING,
 751                                             dev->ifindex, &in6_dev->cnf);
 752        }
 753
 754        if ((v->flags & MIFF_REGISTER) && !notify)
 755                unregister_netdevice_queue(dev, head);
 756
 757        dev_put(dev);
 758        return 0;
 759}
 760
 761static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 762{
 763        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 764
 765        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 766}
 767
 768static inline void ip6mr_cache_free(struct mfc6_cache *c)
 769{
 770        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 771}
 772
 773/* Destroy an unresolved cache entry, killing queued skbs
 774   and reporting error to netlink readers.
 775 */
 776
 777static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 778{
 779        struct net *net = read_pnet(&mrt->net);
 780        struct sk_buff *skb;
 781
 782        atomic_dec(&mrt->cache_resolve_queue_len);
 783
 784        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 785                if (ipv6_hdr(skb)->version == 0) {
 786                        struct nlmsghdr *nlh = skb_pull(skb,
 787                                                        sizeof(struct ipv6hdr));
 788                        nlh->nlmsg_type = NLMSG_ERROR;
 789                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 790                        skb_trim(skb, nlh->nlmsg_len);
 791                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 792                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 793                } else
 794                        kfree_skb(skb);
 795        }
 796
 797        ip6mr_cache_free(c);
 798}
 799
 800
 801/* Timer process for all the unresolved queue. */
 802
 803static void ipmr_do_expire_process(struct mr_table *mrt)
 804{
 805        unsigned long now = jiffies;
 806        unsigned long expires = 10 * HZ;
 807        struct mr_mfc *c, *next;
 808
 809        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 810                if (time_after(c->mfc_un.unres.expires, now)) {
 811                        /* not yet... */
 812                        unsigned long interval = c->mfc_un.unres.expires - now;
 813                        if (interval < expires)
 814                                expires = interval;
 815                        continue;
 816                }
 817
 818                list_del(&c->list);
 819                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 820                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 821        }
 822
 823        if (!list_empty(&mrt->mfc_unres_queue))
 824                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 825}
 826
 827static void ipmr_expire_process(struct timer_list *t)
 828{
 829        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 830
 831        if (!spin_trylock(&mfc_unres_lock)) {
 832                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 833                return;
 834        }
 835
 836        if (!list_empty(&mrt->mfc_unres_queue))
 837                ipmr_do_expire_process(mrt);
 838
 839        spin_unlock(&mfc_unres_lock);
 840}
 841
 842/* Fill oifs list. It is called under write locked mrt_lock. */
 843
 844static void ip6mr_update_thresholds(struct mr_table *mrt,
 845                                    struct mr_mfc *cache,
 846                                    unsigned char *ttls)
 847{
 848        int vifi;
 849
 850        cache->mfc_un.res.minvif = MAXMIFS;
 851        cache->mfc_un.res.maxvif = 0;
 852        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 853
 854        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 855                if (VIF_EXISTS(mrt, vifi) &&
 856                    ttls[vifi] && ttls[vifi] < 255) {
 857                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 858                        if (cache->mfc_un.res.minvif > vifi)
 859                                cache->mfc_un.res.minvif = vifi;
 860                        if (cache->mfc_un.res.maxvif <= vifi)
 861                                cache->mfc_un.res.maxvif = vifi + 1;
 862                }
 863        }
 864        cache->mfc_un.res.lastuse = jiffies;
 865}
 866
 867static int mif6_add(struct net *net, struct mr_table *mrt,
 868                    struct mif6ctl *vifc, int mrtsock)
 869{
 870        int vifi = vifc->mif6c_mifi;
 871        struct vif_device *v = &mrt->vif_table[vifi];
 872        struct net_device *dev;
 873        struct inet6_dev *in6_dev;
 874        int err;
 875
 876        /* Is vif busy ? */
 877        if (VIF_EXISTS(mrt, vifi))
 878                return -EADDRINUSE;
 879
 880        switch (vifc->mif6c_flags) {
 881#ifdef CONFIG_IPV6_PIMSM_V2
 882        case MIFF_REGISTER:
 883                /*
 884                 * Special Purpose VIF in PIM
 885                 * All the packets will be sent to the daemon
 886                 */
 887                if (mrt->mroute_reg_vif_num >= 0)
 888                        return -EADDRINUSE;
 889                dev = ip6mr_reg_vif(net, mrt);
 890                if (!dev)
 891                        return -ENOBUFS;
 892                err = dev_set_allmulti(dev, 1);
 893                if (err) {
 894                        unregister_netdevice(dev);
 895                        dev_put(dev);
 896                        return err;
 897                }
 898                break;
 899#endif
 900        case 0:
 901                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 902                if (!dev)
 903                        return -EADDRNOTAVAIL;
 904                err = dev_set_allmulti(dev, 1);
 905                if (err) {
 906                        dev_put(dev);
 907                        return err;
 908                }
 909                break;
 910        default:
 911                return -EINVAL;
 912        }
 913
 914        in6_dev = __in6_dev_get(dev);
 915        if (in6_dev) {
 916                in6_dev->cnf.mc_forwarding++;
 917                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 918                                             NETCONFA_MC_FORWARDING,
 919                                             dev->ifindex, &in6_dev->cnf);
 920        }
 921
 922        /* Fill in the VIF structures */
 923        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 924                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 925                        MIFF_REGISTER);
 926
 927        /* And finish update writing critical data */
 928        write_lock_bh(&mrt_lock);
 929        v->dev = dev;
 930#ifdef CONFIG_IPV6_PIMSM_V2
 931        if (v->flags & MIFF_REGISTER)
 932                mrt->mroute_reg_vif_num = vifi;
 933#endif
 934        if (vifi + 1 > mrt->maxvif)
 935                mrt->maxvif = vifi + 1;
 936        write_unlock_bh(&mrt_lock);
 937        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 938                                       v, vifi, mrt->id);
 939        return 0;
 940}
 941
 942static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 943                                           const struct in6_addr *origin,
 944                                           const struct in6_addr *mcastgrp)
 945{
 946        struct mfc6_cache_cmp_arg arg = {
 947                .mf6c_origin = *origin,
 948                .mf6c_mcastgrp = *mcastgrp,
 949        };
 950
 951        return mr_mfc_find(mrt, &arg);
 952}
 953
 954/* Look for a (*,G) entry */
 955static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 956                                               struct in6_addr *mcastgrp,
 957                                               mifi_t mifi)
 958{
 959        struct mfc6_cache_cmp_arg arg = {
 960                .mf6c_origin = in6addr_any,
 961                .mf6c_mcastgrp = *mcastgrp,
 962        };
 963
 964        if (ipv6_addr_any(mcastgrp))
 965                return mr_mfc_find_any_parent(mrt, mifi);
 966        return mr_mfc_find_any(mrt, mifi, &arg);
 967}
 968
 969/* Look for a (S,G,iif) entry if parent != -1 */
 970static struct mfc6_cache *
 971ip6mr_cache_find_parent(struct mr_table *mrt,
 972                        const struct in6_addr *origin,
 973                        const struct in6_addr *mcastgrp,
 974                        int parent)
 975{
 976        struct mfc6_cache_cmp_arg arg = {
 977                .mf6c_origin = *origin,
 978                .mf6c_mcastgrp = *mcastgrp,
 979        };
 980
 981        return mr_mfc_find_parent(mrt, &arg, parent);
 982}
 983
 984/* Allocate a multicast cache entry */
 985static struct mfc6_cache *ip6mr_cache_alloc(void)
 986{
 987        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 988        if (!c)
 989                return NULL;
 990        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 991        c->_c.mfc_un.res.minvif = MAXMIFS;
 992        c->_c.free = ip6mr_cache_free_rcu;
 993        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 994        return c;
 995}
 996
 997static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 998{
 999        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1000        if (!c)
1001                return NULL;
1002        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1003        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1004        return c;
1005}
1006
1007/*
1008 *      A cache entry has gone into a resolved state from queued
1009 */
1010
1011static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1012                                struct mfc6_cache *uc, struct mfc6_cache *c)
1013{
1014        struct sk_buff *skb;
1015
1016        /*
1017         *      Play the pending entries through our router
1018         */
1019
1020        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1021                if (ipv6_hdr(skb)->version == 0) {
1022                        struct nlmsghdr *nlh = skb_pull(skb,
1023                                                        sizeof(struct ipv6hdr));
1024
1025                        if (mr_fill_mroute(mrt, skb, &c->_c,
1026                                           nlmsg_data(nlh)) > 0) {
1027                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1028                        } else {
1029                                nlh->nlmsg_type = NLMSG_ERROR;
1030                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1031                                skb_trim(skb, nlh->nlmsg_len);
1032                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1033                        }
1034                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1035                } else
1036                        ip6_mr_forward(net, mrt, skb, c);
1037        }
1038}
1039
1040/*
1041 *      Bounce a cache query up to pim6sd and netlink.
1042 *
1043 *      Called under mrt_lock.
1044 */
1045
1046static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1047                              mifi_t mifi, int assert)
1048{
1049        struct sock *mroute6_sk;
1050        struct sk_buff *skb;
1051        struct mrt6msg *msg;
1052        int ret;
1053
1054#ifdef CONFIG_IPV6_PIMSM_V2
1055        if (assert == MRT6MSG_WHOLEPKT)
1056                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1057                                                +sizeof(*msg));
1058        else
1059#endif
1060                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1061
1062        if (!skb)
1063                return -ENOBUFS;
1064
1065        /* I suppose that internal messages
1066         * do not require checksums */
1067
1068        skb->ip_summed = CHECKSUM_UNNECESSARY;
1069
1070#ifdef CONFIG_IPV6_PIMSM_V2
1071        if (assert == MRT6MSG_WHOLEPKT) {
1072                /* Ugly, but we have no choice with this interface.
1073                   Duplicate old header, fix length etc.
1074                   And all this only to mangle msg->im6_msgtype and
1075                   to set msg->im6_mbz to "mbz" :-)
1076                 */
1077                skb_push(skb, -skb_network_offset(pkt));
1078
1079                skb_push(skb, sizeof(*msg));
1080                skb_reset_transport_header(skb);
1081                msg = (struct mrt6msg *)skb_transport_header(skb);
1082                msg->im6_mbz = 0;
1083                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1084                msg->im6_mif = mrt->mroute_reg_vif_num;
1085                msg->im6_pad = 0;
1086                msg->im6_src = ipv6_hdr(pkt)->saddr;
1087                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1088
1089                skb->ip_summed = CHECKSUM_UNNECESSARY;
1090        } else
1091#endif
1092        {
1093        /*
1094         *      Copy the IP header
1095         */
1096
1097        skb_put(skb, sizeof(struct ipv6hdr));
1098        skb_reset_network_header(skb);
1099        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1100
1101        /*
1102         *      Add our header
1103         */
1104        skb_put(skb, sizeof(*msg));
1105        skb_reset_transport_header(skb);
1106        msg = (struct mrt6msg *)skb_transport_header(skb);
1107
1108        msg->im6_mbz = 0;
1109        msg->im6_msgtype = assert;
1110        msg->im6_mif = mifi;
1111        msg->im6_pad = 0;
1112        msg->im6_src = ipv6_hdr(pkt)->saddr;
1113        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1114
1115        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1116        skb->ip_summed = CHECKSUM_UNNECESSARY;
1117        }
1118
1119        rcu_read_lock();
1120        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1121        if (!mroute6_sk) {
1122                rcu_read_unlock();
1123                kfree_skb(skb);
1124                return -EINVAL;
1125        }
1126
1127        mrt6msg_netlink_event(mrt, skb);
1128
1129        /* Deliver to user space multicast routing algorithms */
1130        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1131        rcu_read_unlock();
1132        if (ret < 0) {
1133                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1134                kfree_skb(skb);
1135        }
1136
1137        return ret;
1138}
1139
1140/* Queue a packet for resolution. It gets locked cache entry! */
1141static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1142                                  struct sk_buff *skb)
1143{
1144        struct mfc6_cache *c;
1145        bool found = false;
1146        int err;
1147
1148        spin_lock_bh(&mfc_unres_lock);
1149        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1150                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1151                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1152                        found = true;
1153                        break;
1154                }
1155        }
1156
1157        if (!found) {
1158                /*
1159                 *      Create a new entry if allowable
1160                 */
1161
1162                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1163                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1164                        spin_unlock_bh(&mfc_unres_lock);
1165
1166                        kfree_skb(skb);
1167                        return -ENOBUFS;
1168                }
1169
1170                /* Fill in the new cache entry */
1171                c->_c.mfc_parent = -1;
1172                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1173                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1174
1175                /*
1176                 *      Reflect first query at pim6sd
1177                 */
1178                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1179                if (err < 0) {
1180                        /* If the report failed throw the cache entry
1181                           out - Brad Parker
1182                         */
1183                        spin_unlock_bh(&mfc_unres_lock);
1184
1185                        ip6mr_cache_free(c);
1186                        kfree_skb(skb);
1187                        return err;
1188                }
1189
1190                atomic_inc(&mrt->cache_resolve_queue_len);
1191                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1192                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1193
1194                ipmr_do_expire_process(mrt);
1195        }
1196
1197        /* See if we can append the packet */
1198        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1199                kfree_skb(skb);
1200                err = -ENOBUFS;
1201        } else {
1202                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1203                err = 0;
1204        }
1205
1206        spin_unlock_bh(&mfc_unres_lock);
1207        return err;
1208}
1209
1210/*
1211 *      MFC6 cache manipulation by user space
1212 */
1213
1214static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1215                            int parent)
1216{
1217        struct mfc6_cache *c;
1218
1219        /* The entries are added/deleted only under RTNL */
1220        rcu_read_lock();
1221        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1222                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1223        rcu_read_unlock();
1224        if (!c)
1225                return -ENOENT;
1226        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1227        list_del_rcu(&c->_c.list);
1228
1229        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1230                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1231        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1232        mr_cache_put(&c->_c);
1233        return 0;
1234}
1235
1236static int ip6mr_device_event(struct notifier_block *this,
1237                              unsigned long event, void *ptr)
1238{
1239        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1240        struct net *net = dev_net(dev);
1241        struct mr_table *mrt;
1242        struct vif_device *v;
1243        int ct;
1244
1245        if (event != NETDEV_UNREGISTER)
1246                return NOTIFY_DONE;
1247
1248        ip6mr_for_each_table(mrt, net) {
1249                v = &mrt->vif_table[0];
1250                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1251                        if (v->dev == dev)
1252                                mif6_delete(mrt, ct, 1, NULL);
1253                }
1254        }
1255
1256        return NOTIFY_DONE;
1257}
1258
1259static unsigned int ip6mr_seq_read(struct net *net)
1260{
1261        ASSERT_RTNL();
1262
1263        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1264}
1265
1266static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1267{
1268        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1269                       ip6mr_mr_table_iter, &mrt_lock);
1270}
1271
1272static struct notifier_block ip6_mr_notifier = {
1273        .notifier_call = ip6mr_device_event
1274};
1275
1276static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1277        .family         = RTNL_FAMILY_IP6MR,
1278        .fib_seq_read   = ip6mr_seq_read,
1279        .fib_dump       = ip6mr_dump,
1280        .owner          = THIS_MODULE,
1281};
1282
1283static int __net_init ip6mr_notifier_init(struct net *net)
1284{
1285        struct fib_notifier_ops *ops;
1286
1287        net->ipv6.ipmr_seq = 0;
1288
1289        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1290        if (IS_ERR(ops))
1291                return PTR_ERR(ops);
1292
1293        net->ipv6.ip6mr_notifier_ops = ops;
1294
1295        return 0;
1296}
1297
1298static void __net_exit ip6mr_notifier_exit(struct net *net)
1299{
1300        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1301        net->ipv6.ip6mr_notifier_ops = NULL;
1302}
1303
1304/* Setup for IP multicast routing */
1305static int __net_init ip6mr_net_init(struct net *net)
1306{
1307        int err;
1308
1309        err = ip6mr_notifier_init(net);
1310        if (err)
1311                return err;
1312
1313        err = ip6mr_rules_init(net);
1314        if (err < 0)
1315                goto ip6mr_rules_fail;
1316
1317#ifdef CONFIG_PROC_FS
1318        err = -ENOMEM;
1319        if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1320                goto proc_vif_fail;
1321        if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1322                goto proc_cache_fail;
1323#endif
1324
1325        return 0;
1326
1327#ifdef CONFIG_PROC_FS
1328proc_cache_fail:
1329        remove_proc_entry("ip6_mr_vif", net->proc_net);
1330proc_vif_fail:
1331        ip6mr_rules_exit(net);
1332#endif
1333ip6mr_rules_fail:
1334        ip6mr_notifier_exit(net);
1335        return err;
1336}
1337
1338static void __net_exit ip6mr_net_exit(struct net *net)
1339{
1340#ifdef CONFIG_PROC_FS
1341        remove_proc_entry("ip6_mr_cache", net->proc_net);
1342        remove_proc_entry("ip6_mr_vif", net->proc_net);
1343#endif
1344        ip6mr_rules_exit(net);
1345        ip6mr_notifier_exit(net);
1346}
1347
1348static struct pernet_operations ip6mr_net_ops = {
1349        .init = ip6mr_net_init,
1350        .exit = ip6mr_net_exit,
1351};
1352
1353int __init ip6_mr_init(void)
1354{
1355        int err;
1356
1357        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1358                                       sizeof(struct mfc6_cache),
1359                                       0, SLAB_HWCACHE_ALIGN,
1360                                       NULL);
1361        if (!mrt_cachep)
1362                return -ENOMEM;
1363
1364        err = register_pernet_subsys(&ip6mr_net_ops);
1365        if (err)
1366                goto reg_pernet_fail;
1367
1368        err = register_netdevice_notifier(&ip6_mr_notifier);
1369        if (err)
1370                goto reg_notif_fail;
1371#ifdef CONFIG_IPV6_PIMSM_V2
1372        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1373                pr_err("%s: can't add PIM protocol\n", __func__);
1374                err = -EAGAIN;
1375                goto add_proto_fail;
1376        }
1377#endif
1378        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1379                                   NULL, ip6mr_rtm_dumproute, 0);
1380        if (err == 0)
1381                return 0;
1382
1383#ifdef CONFIG_IPV6_PIMSM_V2
1384        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1385add_proto_fail:
1386        unregister_netdevice_notifier(&ip6_mr_notifier);
1387#endif
1388reg_notif_fail:
1389        unregister_pernet_subsys(&ip6mr_net_ops);
1390reg_pernet_fail:
1391        kmem_cache_destroy(mrt_cachep);
1392        return err;
1393}
1394
1395void ip6_mr_cleanup(void)
1396{
1397        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1398#ifdef CONFIG_IPV6_PIMSM_V2
1399        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1400#endif
1401        unregister_netdevice_notifier(&ip6_mr_notifier);
1402        unregister_pernet_subsys(&ip6mr_net_ops);
1403        kmem_cache_destroy(mrt_cachep);
1404}
1405
1406static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1407                         struct mf6cctl *mfc, int mrtsock, int parent)
1408{
1409        unsigned char ttls[MAXMIFS];
1410        struct mfc6_cache *uc, *c;
1411        struct mr_mfc *_uc;
1412        bool found;
1413        int i, err;
1414
1415        if (mfc->mf6cc_parent >= MAXMIFS)
1416                return -ENFILE;
1417
1418        memset(ttls, 255, MAXMIFS);
1419        for (i = 0; i < MAXMIFS; i++) {
1420                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1421                        ttls[i] = 1;
1422        }
1423
1424        /* The entries are added/deleted only under RTNL */
1425        rcu_read_lock();
1426        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1427                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1428        rcu_read_unlock();
1429        if (c) {
1430                write_lock_bh(&mrt_lock);
1431                c->_c.mfc_parent = mfc->mf6cc_parent;
1432                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1433                if (!mrtsock)
1434                        c->_c.mfc_flags |= MFC_STATIC;
1435                write_unlock_bh(&mrt_lock);
1436                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1437                                               c, mrt->id);
1438                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1439                return 0;
1440        }
1441
1442        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1443            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1444                return -EINVAL;
1445
1446        c = ip6mr_cache_alloc();
1447        if (!c)
1448                return -ENOMEM;
1449
1450        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1451        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1452        c->_c.mfc_parent = mfc->mf6cc_parent;
1453        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1454        if (!mrtsock)
1455                c->_c.mfc_flags |= MFC_STATIC;
1456
1457        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1458                                  ip6mr_rht_params);
1459        if (err) {
1460                pr_err("ip6mr: rhtable insert error %d\n", err);
1461                ip6mr_cache_free(c);
1462                return err;
1463        }
1464        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1465
1466        /* Check to see if we resolved a queued list. If so we
1467         * need to send on the frames and tidy up.
1468         */
1469        found = false;
1470        spin_lock_bh(&mfc_unres_lock);
1471        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1472                uc = (struct mfc6_cache *)_uc;
1473                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1474                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1475                        list_del(&_uc->list);
1476                        atomic_dec(&mrt->cache_resolve_queue_len);
1477                        found = true;
1478                        break;
1479                }
1480        }
1481        if (list_empty(&mrt->mfc_unres_queue))
1482                del_timer(&mrt->ipmr_expire_timer);
1483        spin_unlock_bh(&mfc_unres_lock);
1484
1485        if (found) {
1486                ip6mr_cache_resolve(net, mrt, uc, c);
1487                ip6mr_cache_free(uc);
1488        }
1489        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1490                                       c, mrt->id);
1491        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1492        return 0;
1493}
1494
1495/*
1496 *      Close the multicast socket, and clear the vif tables etc
1497 */
1498
1499static void mroute_clean_tables(struct mr_table *mrt, bool all)
1500{
1501        struct mr_mfc *c, *tmp;
1502        LIST_HEAD(list);
1503        int i;
1504
1505        /* Shut down all active vif entries */
1506        for (i = 0; i < mrt->maxvif; i++) {
1507                if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1508                        continue;
1509                mif6_delete(mrt, i, 0, &list);
1510        }
1511        unregister_netdevice_many(&list);
1512
1513        /* Wipe the cache */
1514        list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1515                if (!all && (c->mfc_flags & MFC_STATIC))
1516                        continue;
1517                rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1518                list_del_rcu(&c->list);
1519                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1520                mr_cache_put(c);
1521        }
1522
1523        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1524                spin_lock_bh(&mfc_unres_lock);
1525                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1526                        list_del(&c->list);
1527                        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1528                                                       FIB_EVENT_ENTRY_DEL,
1529                                                       (struct mfc6_cache *)c,
1530                                                       mrt->id);
1531                        mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1532                                          RTM_DELROUTE);
1533                        ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1534                }
1535                spin_unlock_bh(&mfc_unres_lock);
1536        }
1537}
1538
1539static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1540{
1541        int err = 0;
1542        struct net *net = sock_net(sk);
1543
1544        rtnl_lock();
1545        write_lock_bh(&mrt_lock);
1546        if (rtnl_dereference(mrt->mroute_sk)) {
1547                err = -EADDRINUSE;
1548        } else {
1549                rcu_assign_pointer(mrt->mroute_sk, sk);
1550                sock_set_flag(sk, SOCK_RCU_FREE);
1551                net->ipv6.devconf_all->mc_forwarding++;
1552        }
1553        write_unlock_bh(&mrt_lock);
1554
1555        if (!err)
1556                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1557                                             NETCONFA_MC_FORWARDING,
1558                                             NETCONFA_IFINDEX_ALL,
1559                                             net->ipv6.devconf_all);
1560        rtnl_unlock();
1561
1562        return err;
1563}
1564
1565int ip6mr_sk_done(struct sock *sk)
1566{
1567        int err = -EACCES;
1568        struct net *net = sock_net(sk);
1569        struct mr_table *mrt;
1570
1571        if (sk->sk_type != SOCK_RAW ||
1572            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1573                return err;
1574
1575        rtnl_lock();
1576        ip6mr_for_each_table(mrt, net) {
1577                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1578                        write_lock_bh(&mrt_lock);
1579                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1580                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1581                         * so the RCU grace period before sk freeing
1582                         * is guaranteed by sk_destruct()
1583                         */
1584                        net->ipv6.devconf_all->mc_forwarding--;
1585                        write_unlock_bh(&mrt_lock);
1586                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1587                                                     NETCONFA_MC_FORWARDING,
1588                                                     NETCONFA_IFINDEX_ALL,
1589                                                     net->ipv6.devconf_all);
1590
1591                        mroute_clean_tables(mrt, false);
1592                        err = 0;
1593                        break;
1594                }
1595        }
1596        rtnl_unlock();
1597
1598        return err;
1599}
1600
1601bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1602{
1603        struct mr_table *mrt;
1604        struct flowi6 fl6 = {
1605                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1606                .flowi6_oif     = skb->dev->ifindex,
1607                .flowi6_mark    = skb->mark,
1608        };
1609
1610        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1611                return NULL;
1612
1613        return rcu_access_pointer(mrt->mroute_sk);
1614}
1615EXPORT_SYMBOL(mroute6_is_socket);
1616
1617/*
1618 *      Socket options and virtual interface manipulation. The whole
1619 *      virtual interface system is a complete heap, but unfortunately
1620 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1621 *      MOSPF/PIM router set up we can clean this up.
1622 */
1623
1624int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1625{
1626        int ret, parent = 0;
1627        struct mif6ctl vif;
1628        struct mf6cctl mfc;
1629        mifi_t mifi;
1630        struct net *net = sock_net(sk);
1631        struct mr_table *mrt;
1632
1633        if (sk->sk_type != SOCK_RAW ||
1634            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1635                return -EOPNOTSUPP;
1636
1637        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1638        if (!mrt)
1639                return -ENOENT;
1640
1641        if (optname != MRT6_INIT) {
1642                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1643                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1644                        return -EACCES;
1645        }
1646
1647        switch (optname) {
1648        case MRT6_INIT:
1649                if (optlen < sizeof(int))
1650                        return -EINVAL;
1651
1652                return ip6mr_sk_init(mrt, sk);
1653
1654        case MRT6_DONE:
1655                return ip6mr_sk_done(sk);
1656
1657        case MRT6_ADD_MIF:
1658                if (optlen < sizeof(vif))
1659                        return -EINVAL;
1660                if (copy_from_user(&vif, optval, sizeof(vif)))
1661                        return -EFAULT;
1662                if (vif.mif6c_mifi >= MAXMIFS)
1663                        return -ENFILE;
1664                rtnl_lock();
1665                ret = mif6_add(net, mrt, &vif,
1666                               sk == rtnl_dereference(mrt->mroute_sk));
1667                rtnl_unlock();
1668                return ret;
1669
1670        case MRT6_DEL_MIF:
1671                if (optlen < sizeof(mifi_t))
1672                        return -EINVAL;
1673                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1674                        return -EFAULT;
1675                rtnl_lock();
1676                ret = mif6_delete(mrt, mifi, 0, NULL);
1677                rtnl_unlock();
1678                return ret;
1679
1680        /*
1681         *      Manipulate the forwarding caches. These live
1682         *      in a sort of kernel/user symbiosis.
1683         */
1684        case MRT6_ADD_MFC:
1685        case MRT6_DEL_MFC:
1686                parent = -1;
1687                /* fall through */
1688        case MRT6_ADD_MFC_PROXY:
1689        case MRT6_DEL_MFC_PROXY:
1690                if (optlen < sizeof(mfc))
1691                        return -EINVAL;
1692                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1693                        return -EFAULT;
1694                if (parent == 0)
1695                        parent = mfc.mf6cc_parent;
1696                rtnl_lock();
1697                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1698                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1699                else
1700                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1701                                            sk ==
1702                                            rtnl_dereference(mrt->mroute_sk),
1703                                            parent);
1704                rtnl_unlock();
1705                return ret;
1706
1707        /*
1708         *      Control PIM assert (to activate pim will activate assert)
1709         */
1710        case MRT6_ASSERT:
1711        {
1712                int v;
1713
1714                if (optlen != sizeof(v))
1715                        return -EINVAL;
1716                if (get_user(v, (int __user *)optval))
1717                        return -EFAULT;
1718                mrt->mroute_do_assert = v;
1719                return 0;
1720        }
1721
1722#ifdef CONFIG_IPV6_PIMSM_V2
1723        case MRT6_PIM:
1724        {
1725                int v;
1726
1727                if (optlen != sizeof(v))
1728                        return -EINVAL;
1729                if (get_user(v, (int __user *)optval))
1730                        return -EFAULT;
1731                v = !!v;
1732                rtnl_lock();
1733                ret = 0;
1734                if (v != mrt->mroute_do_pim) {
1735                        mrt->mroute_do_pim = v;
1736                        mrt->mroute_do_assert = v;
1737                }
1738                rtnl_unlock();
1739                return ret;
1740        }
1741
1742#endif
1743#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1744        case MRT6_TABLE:
1745        {
1746                u32 v;
1747
1748                if (optlen != sizeof(u32))
1749                        return -EINVAL;
1750                if (get_user(v, (u32 __user *)optval))
1751                        return -EFAULT;
1752                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1753                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1754                        return -EINVAL;
1755                if (sk == rcu_access_pointer(mrt->mroute_sk))
1756                        return -EBUSY;
1757
1758                rtnl_lock();
1759                ret = 0;
1760                if (!ip6mr_new_table(net, v))
1761                        ret = -ENOMEM;
1762                raw6_sk(sk)->ip6mr_table = v;
1763                rtnl_unlock();
1764                return ret;
1765        }
1766#endif
1767        /*
1768         *      Spurious command, or MRT6_VERSION which you cannot
1769         *      set.
1770         */
1771        default:
1772                return -ENOPROTOOPT;
1773        }
1774}
1775
1776/*
1777 *      Getsock opt support for the multicast routing system.
1778 */
1779
1780int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1781                          int __user *optlen)
1782{
1783        int olr;
1784        int val;
1785        struct net *net = sock_net(sk);
1786        struct mr_table *mrt;
1787
1788        if (sk->sk_type != SOCK_RAW ||
1789            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1790                return -EOPNOTSUPP;
1791
1792        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1793        if (!mrt)
1794                return -ENOENT;
1795
1796        switch (optname) {
1797        case MRT6_VERSION:
1798                val = 0x0305;
1799                break;
1800#ifdef CONFIG_IPV6_PIMSM_V2
1801        case MRT6_PIM:
1802                val = mrt->mroute_do_pim;
1803                break;
1804#endif
1805        case MRT6_ASSERT:
1806                val = mrt->mroute_do_assert;
1807                break;
1808        default:
1809                return -ENOPROTOOPT;
1810        }
1811
1812        if (get_user(olr, optlen))
1813                return -EFAULT;
1814
1815        olr = min_t(int, olr, sizeof(int));
1816        if (olr < 0)
1817                return -EINVAL;
1818
1819        if (put_user(olr, optlen))
1820                return -EFAULT;
1821        if (copy_to_user(optval, &val, olr))
1822                return -EFAULT;
1823        return 0;
1824}
1825
1826/*
1827 *      The IP multicast ioctl support routines.
1828 */
1829
1830int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1831{
1832        struct sioc_sg_req6 sr;
1833        struct sioc_mif_req6 vr;
1834        struct vif_device *vif;
1835        struct mfc6_cache *c;
1836        struct net *net = sock_net(sk);
1837        struct mr_table *mrt;
1838
1839        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1840        if (!mrt)
1841                return -ENOENT;
1842
1843        switch (cmd) {
1844        case SIOCGETMIFCNT_IN6:
1845                if (copy_from_user(&vr, arg, sizeof(vr)))
1846                        return -EFAULT;
1847                if (vr.mifi >= mrt->maxvif)
1848                        return -EINVAL;
1849                read_lock(&mrt_lock);
1850                vif = &mrt->vif_table[vr.mifi];
1851                if (VIF_EXISTS(mrt, vr.mifi)) {
1852                        vr.icount = vif->pkt_in;
1853                        vr.ocount = vif->pkt_out;
1854                        vr.ibytes = vif->bytes_in;
1855                        vr.obytes = vif->bytes_out;
1856                        read_unlock(&mrt_lock);
1857
1858                        if (copy_to_user(arg, &vr, sizeof(vr)))
1859                                return -EFAULT;
1860                        return 0;
1861                }
1862                read_unlock(&mrt_lock);
1863                return -EADDRNOTAVAIL;
1864        case SIOCGETSGCNT_IN6:
1865                if (copy_from_user(&sr, arg, sizeof(sr)))
1866                        return -EFAULT;
1867
1868                rcu_read_lock();
1869                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1870                if (c) {
1871                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1872                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1873                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1874                        rcu_read_unlock();
1875
1876                        if (copy_to_user(arg, &sr, sizeof(sr)))
1877                                return -EFAULT;
1878                        return 0;
1879                }
1880                rcu_read_unlock();
1881                return -EADDRNOTAVAIL;
1882        default:
1883                return -ENOIOCTLCMD;
1884        }
1885}
1886
1887#ifdef CONFIG_COMPAT
1888struct compat_sioc_sg_req6 {
1889        struct sockaddr_in6 src;
1890        struct sockaddr_in6 grp;
1891        compat_ulong_t pktcnt;
1892        compat_ulong_t bytecnt;
1893        compat_ulong_t wrong_if;
1894};
1895
1896struct compat_sioc_mif_req6 {
1897        mifi_t  mifi;
1898        compat_ulong_t icount;
1899        compat_ulong_t ocount;
1900        compat_ulong_t ibytes;
1901        compat_ulong_t obytes;
1902};
1903
1904int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1905{
1906        struct compat_sioc_sg_req6 sr;
1907        struct compat_sioc_mif_req6 vr;
1908        struct vif_device *vif;
1909        struct mfc6_cache *c;
1910        struct net *net = sock_net(sk);
1911        struct mr_table *mrt;
1912
1913        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1914        if (!mrt)
1915                return -ENOENT;
1916
1917        switch (cmd) {
1918        case SIOCGETMIFCNT_IN6:
1919                if (copy_from_user(&vr, arg, sizeof(vr)))
1920                        return -EFAULT;
1921                if (vr.mifi >= mrt->maxvif)
1922                        return -EINVAL;
1923                read_lock(&mrt_lock);
1924                vif = &mrt->vif_table[vr.mifi];
1925                if (VIF_EXISTS(mrt, vr.mifi)) {
1926                        vr.icount = vif->pkt_in;
1927                        vr.ocount = vif->pkt_out;
1928                        vr.ibytes = vif->bytes_in;
1929                        vr.obytes = vif->bytes_out;
1930                        read_unlock(&mrt_lock);
1931
1932                        if (copy_to_user(arg, &vr, sizeof(vr)))
1933                                return -EFAULT;
1934                        return 0;
1935                }
1936                read_unlock(&mrt_lock);
1937                return -EADDRNOTAVAIL;
1938        case SIOCGETSGCNT_IN6:
1939                if (copy_from_user(&sr, arg, sizeof(sr)))
1940                        return -EFAULT;
1941
1942                rcu_read_lock();
1943                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1944                if (c) {
1945                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1946                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1947                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1948                        rcu_read_unlock();
1949
1950                        if (copy_to_user(arg, &sr, sizeof(sr)))
1951                                return -EFAULT;
1952                        return 0;
1953                }
1954                rcu_read_unlock();
1955                return -EADDRNOTAVAIL;
1956        default:
1957                return -ENOIOCTLCMD;
1958        }
1959}
1960#endif
1961
1962static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1963{
1964        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1965                        IPSTATS_MIB_OUTFORWDATAGRAMS);
1966        __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1967                        IPSTATS_MIB_OUTOCTETS, skb->len);
1968        return dst_output(net, sk, skb);
1969}
1970
1971/*
1972 *      Processing handlers for ip6mr_forward
1973 */
1974
1975static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1976                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1977{
1978        struct ipv6hdr *ipv6h;
1979        struct vif_device *vif = &mrt->vif_table[vifi];
1980        struct net_device *dev;
1981        struct dst_entry *dst;
1982        struct flowi6 fl6;
1983
1984        if (!vif->dev)
1985                goto out_free;
1986
1987#ifdef CONFIG_IPV6_PIMSM_V2
1988        if (vif->flags & MIFF_REGISTER) {
1989                vif->pkt_out++;
1990                vif->bytes_out += skb->len;
1991                vif->dev->stats.tx_bytes += skb->len;
1992                vif->dev->stats.tx_packets++;
1993                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1994                goto out_free;
1995        }
1996#endif
1997
1998        ipv6h = ipv6_hdr(skb);
1999
2000        fl6 = (struct flowi6) {
2001                .flowi6_oif = vif->link,
2002                .daddr = ipv6h->daddr,
2003        };
2004
2005        dst = ip6_route_output(net, NULL, &fl6);
2006        if (dst->error) {
2007                dst_release(dst);
2008                goto out_free;
2009        }
2010
2011        skb_dst_drop(skb);
2012        skb_dst_set(skb, dst);
2013
2014        /*
2015         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2016         * not only before forwarding, but after forwarding on all output
2017         * interfaces. It is clear, if mrouter runs a multicasting
2018         * program, it should receive packets not depending to what interface
2019         * program is joined.
2020         * If we will not make it, the program will have to join on all
2021         * interfaces. On the other hand, multihoming host (or router, but
2022         * not mrouter) cannot join to more than one interface - it will
2023         * result in receiving multiple packets.
2024         */
2025        dev = vif->dev;
2026        skb->dev = dev;
2027        vif->pkt_out++;
2028        vif->bytes_out += skb->len;
2029
2030        /* We are about to write */
2031        /* XXX: extension headers? */
2032        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2033                goto out_free;
2034
2035        ipv6h = ipv6_hdr(skb);
2036        ipv6h->hop_limit--;
2037
2038        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2039
2040        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2041                       net, NULL, skb, skb->dev, dev,
2042                       ip6mr_forward2_finish);
2043
2044out_free:
2045        kfree_skb(skb);
2046        return 0;
2047}
2048
2049static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2050{
2051        int ct;
2052
2053        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2054                if (mrt->vif_table[ct].dev == dev)
2055                        break;
2056        }
2057        return ct;
2058}
2059
2060static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2061                           struct sk_buff *skb, struct mfc6_cache *c)
2062{
2063        int psend = -1;
2064        int vif, ct;
2065        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2066
2067        vif = c->_c.mfc_parent;
2068        c->_c.mfc_un.res.pkt++;
2069        c->_c.mfc_un.res.bytes += skb->len;
2070        c->_c.mfc_un.res.lastuse = jiffies;
2071
2072        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2073                struct mfc6_cache *cache_proxy;
2074
2075                /* For an (*,G) entry, we only check that the incoming
2076                 * interface is part of the static tree.
2077                 */
2078                rcu_read_lock();
2079                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2080                if (cache_proxy &&
2081                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2082                        rcu_read_unlock();
2083                        goto forward;
2084                }
2085                rcu_read_unlock();
2086        }
2087
2088        /*
2089         * Wrong interface: drop packet and (maybe) send PIM assert.
2090         */
2091        if (mrt->vif_table[vif].dev != skb->dev) {
2092                c->_c.mfc_un.res.wrong_if++;
2093
2094                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2095                    /* pimsm uses asserts, when switching from RPT to SPT,
2096                       so that we cannot check that packet arrived on an oif.
2097                       It is bad, but otherwise we would need to move pretty
2098                       large chunk of pimd to kernel. Ough... --ANK
2099                     */
2100                    (mrt->mroute_do_pim ||
2101                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2102                    time_after(jiffies,
2103                               c->_c.mfc_un.res.last_assert +
2104                               MFC_ASSERT_THRESH)) {
2105                        c->_c.mfc_un.res.last_assert = jiffies;
2106                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2107                }
2108                goto dont_forward;
2109        }
2110
2111forward:
2112        mrt->vif_table[vif].pkt_in++;
2113        mrt->vif_table[vif].bytes_in += skb->len;
2114
2115        /*
2116         *      Forward the frame
2117         */
2118        if (ipv6_addr_any(&c->mf6c_origin) &&
2119            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2120                if (true_vifi >= 0 &&
2121                    true_vifi != c->_c.mfc_parent &&
2122                    ipv6_hdr(skb)->hop_limit >
2123                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2124                        /* It's an (*,*) entry and the packet is not coming from
2125                         * the upstream: forward the packet to the upstream
2126                         * only.
2127                         */
2128                        psend = c->_c.mfc_parent;
2129                        goto last_forward;
2130                }
2131                goto dont_forward;
2132        }
2133        for (ct = c->_c.mfc_un.res.maxvif - 1;
2134             ct >= c->_c.mfc_un.res.minvif; ct--) {
2135                /* For (*,G) entry, don't forward to the incoming interface */
2136                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2137                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2138                        if (psend != -1) {
2139                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2140                                if (skb2)
2141                                        ip6mr_forward2(net, mrt, skb2,
2142                                                       c, psend);
2143                        }
2144                        psend = ct;
2145                }
2146        }
2147last_forward:
2148        if (psend != -1) {
2149                ip6mr_forward2(net, mrt, skb, c, psend);
2150                return;
2151        }
2152
2153dont_forward:
2154        kfree_skb(skb);
2155}
2156
2157
2158/*
2159 *      Multicast packets for forwarding arrive here
2160 */
2161
2162int ip6_mr_input(struct sk_buff *skb)
2163{
2164        struct mfc6_cache *cache;
2165        struct net *net = dev_net(skb->dev);
2166        struct mr_table *mrt;
2167        struct flowi6 fl6 = {
2168                .flowi6_iif     = skb->dev->ifindex,
2169                .flowi6_mark    = skb->mark,
2170        };
2171        int err;
2172
2173        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2174        if (err < 0) {
2175                kfree_skb(skb);
2176                return err;
2177        }
2178
2179        read_lock(&mrt_lock);
2180        cache = ip6mr_cache_find(mrt,
2181                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2182        if (!cache) {
2183                int vif = ip6mr_find_vif(mrt, skb->dev);
2184
2185                if (vif >= 0)
2186                        cache = ip6mr_cache_find_any(mrt,
2187                                                     &ipv6_hdr(skb)->daddr,
2188                                                     vif);
2189        }
2190
2191        /*
2192         *      No usable cache entry
2193         */
2194        if (!cache) {
2195                int vif;
2196
2197                vif = ip6mr_find_vif(mrt, skb->dev);
2198                if (vif >= 0) {
2199                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2200                        read_unlock(&mrt_lock);
2201
2202                        return err;
2203                }
2204                read_unlock(&mrt_lock);
2205                kfree_skb(skb);
2206                return -ENODEV;
2207        }
2208
2209        ip6_mr_forward(net, mrt, skb, cache);
2210
2211        read_unlock(&mrt_lock);
2212
2213        return 0;
2214}
2215
2216int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2217                    u32 portid)
2218{
2219        int err;
2220        struct mr_table *mrt;
2221        struct mfc6_cache *cache;
2222        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2223
2224        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2225        if (!mrt)
2226                return -ENOENT;
2227
2228        read_lock(&mrt_lock);
2229        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2230        if (!cache && skb->dev) {
2231                int vif = ip6mr_find_vif(mrt, skb->dev);
2232
2233                if (vif >= 0)
2234                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2235                                                     vif);
2236        }
2237
2238        if (!cache) {
2239                struct sk_buff *skb2;
2240                struct ipv6hdr *iph;
2241                struct net_device *dev;
2242                int vif;
2243
2244                dev = skb->dev;
2245                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2246                        read_unlock(&mrt_lock);
2247                        return -ENODEV;
2248                }
2249
2250                /* really correct? */
2251                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2252                if (!skb2) {
2253                        read_unlock(&mrt_lock);
2254                        return -ENOMEM;
2255                }
2256
2257                NETLINK_CB(skb2).portid = portid;
2258                skb_reset_transport_header(skb2);
2259
2260                skb_put(skb2, sizeof(struct ipv6hdr));
2261                skb_reset_network_header(skb2);
2262
2263                iph = ipv6_hdr(skb2);
2264                iph->version = 0;
2265                iph->priority = 0;
2266                iph->flow_lbl[0] = 0;
2267                iph->flow_lbl[1] = 0;
2268                iph->flow_lbl[2] = 0;
2269                iph->payload_len = 0;
2270                iph->nexthdr = IPPROTO_NONE;
2271                iph->hop_limit = 0;
2272                iph->saddr = rt->rt6i_src.addr;
2273                iph->daddr = rt->rt6i_dst.addr;
2274
2275                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2276                read_unlock(&mrt_lock);
2277
2278                return err;
2279        }
2280
2281        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2282        read_unlock(&mrt_lock);
2283        return err;
2284}
2285
2286static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2287                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2288                             int flags)
2289{
2290        struct nlmsghdr *nlh;
2291        struct rtmsg *rtm;
2292        int err;
2293
2294        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2295        if (!nlh)
2296                return -EMSGSIZE;
2297
2298        rtm = nlmsg_data(nlh);
2299        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2300        rtm->rtm_dst_len  = 128;
2301        rtm->rtm_src_len  = 128;
2302        rtm->rtm_tos      = 0;
2303        rtm->rtm_table    = mrt->id;
2304        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2305                goto nla_put_failure;
2306        rtm->rtm_type = RTN_MULTICAST;
2307        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2308        if (c->_c.mfc_flags & MFC_STATIC)
2309                rtm->rtm_protocol = RTPROT_STATIC;
2310        else
2311                rtm->rtm_protocol = RTPROT_MROUTED;
2312        rtm->rtm_flags    = 0;
2313
2314        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2315            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2316                goto nla_put_failure;
2317        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2318        /* do not break the dump if cache is unresolved */
2319        if (err < 0 && err != -ENOENT)
2320                goto nla_put_failure;
2321
2322        nlmsg_end(skb, nlh);
2323        return 0;
2324
2325nla_put_failure:
2326        nlmsg_cancel(skb, nlh);
2327        return -EMSGSIZE;
2328}
2329
2330static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2331                              u32 portid, u32 seq, struct mr_mfc *c,
2332                              int cmd, int flags)
2333{
2334        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2335                                 cmd, flags);
2336}
2337
2338static int mr6_msgsize(bool unresolved, int maxvif)
2339{
2340        size_t len =
2341                NLMSG_ALIGN(sizeof(struct rtmsg))
2342                + nla_total_size(4)     /* RTA_TABLE */
2343                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2344                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2345                ;
2346
2347        if (!unresolved)
2348                len = len
2349                      + nla_total_size(4)       /* RTA_IIF */
2350                      + nla_total_size(0)       /* RTA_MULTIPATH */
2351                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2352                                                /* RTA_MFC_STATS */
2353                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2354                ;
2355
2356        return len;
2357}
2358
2359static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2360                              int cmd)
2361{
2362        struct net *net = read_pnet(&mrt->net);
2363        struct sk_buff *skb;
2364        int err = -ENOBUFS;
2365
2366        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2367                        GFP_ATOMIC);
2368        if (!skb)
2369                goto errout;
2370
2371        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2372        if (err < 0)
2373                goto errout;
2374
2375        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2376        return;
2377
2378errout:
2379        kfree_skb(skb);
2380        if (err < 0)
2381                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2382}
2383
2384static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2385{
2386        size_t len =
2387                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2388                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2389                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2390                                        /* IP6MRA_CREPORT_SRC_ADDR */
2391                + nla_total_size(sizeof(struct in6_addr))
2392                                        /* IP6MRA_CREPORT_DST_ADDR */
2393                + nla_total_size(sizeof(struct in6_addr))
2394                                        /* IP6MRA_CREPORT_PKT */
2395                + nla_total_size(payloadlen)
2396                ;
2397
2398        return len;
2399}
2400
2401static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2402{
2403        struct net *net = read_pnet(&mrt->net);
2404        struct nlmsghdr *nlh;
2405        struct rtgenmsg *rtgenm;
2406        struct mrt6msg *msg;
2407        struct sk_buff *skb;
2408        struct nlattr *nla;
2409        int payloadlen;
2410
2411        payloadlen = pkt->len - sizeof(struct mrt6msg);
2412        msg = (struct mrt6msg *)skb_transport_header(pkt);
2413
2414        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2415        if (!skb)
2416                goto errout;
2417
2418        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2419                        sizeof(struct rtgenmsg), 0);
2420        if (!nlh)
2421                goto errout;
2422        rtgenm = nlmsg_data(nlh);
2423        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2424        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2425            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2426            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2427                             &msg->im6_src) ||
2428            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2429                             &msg->im6_dst))
2430                goto nla_put_failure;
2431
2432        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2433        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2434                                  nla_data(nla), payloadlen))
2435                goto nla_put_failure;
2436
2437        nlmsg_end(skb, nlh);
2438
2439        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2440        return;
2441
2442nla_put_failure:
2443        nlmsg_cancel(skb, nlh);
2444errout:
2445        kfree_skb(skb);
2446        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2447}
2448
2449static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2450{
2451        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2452                                _ip6mr_fill_mroute, &mfc_unres_lock);
2453}
2454