linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <linux/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/mm.h>
  24#include <linux/kernel.h>
  25#include <linux/fcntl.h>
  26#include <linux/stat.h>
  27#include <linux/socket.h>
  28#include <linux/inet.h>
  29#include <linux/netdevice.h>
  30#include <linux/inetdevice.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/init.h>
  34#include <linux/compat.h>
  35#include <net/protocol.h>
  36#include <linux/skbuff.h>
  37#include <net/raw.h>
  38#include <linux/notifier.h>
  39#include <linux/if_arp.h>
  40#include <net/checksum.h>
  41#include <net/netlink.h>
  42#include <net/fib_rules.h>
  43
  44#include <net/ipv6.h>
  45#include <net/ip6_route.h>
  46#include <linux/mroute6.h>
  47#include <linux/pim.h>
  48#include <net/addrconf.h>
  49#include <linux/netfilter_ipv6.h>
  50#include <linux/export.h>
  51#include <net/ip6_checksum.h>
  52#include <linux/netconf.h>
  53
  54struct ip6mr_rule {
  55        struct fib_rule         common;
  56};
  57
  58struct ip6mr_result {
  59        struct mr_table *mrt;
  60};
  61
  62/* Big lock, protecting vif table, mrt cache and mroute socket state.
  63   Note that the changes are semaphored via rtnl_lock.
  64 */
  65
  66static DEFINE_RWLOCK(mrt_lock);
  67
  68/* Multicast router control variables */
  69
  70/* Special spinlock for queue of unresolved entries */
  71static DEFINE_SPINLOCK(mfc_unres_lock);
  72
  73/* We return to original Alan's scheme. Hash table of resolved
  74   entries is changed only in process context and protected
  75   with weak lock mrt_lock. Queue of unresolved entries is protected
  76   with strong spinlock mfc_unres_lock.
  77
  78   In this case data path is free of exclusive locks at all.
  79 */
  80
  81static struct kmem_cache *mrt_cachep __read_mostly;
  82
  83static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  84static void ip6mr_free_table(struct mr_table *mrt);
  85
  86static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  87                           struct sk_buff *skb, struct mfc6_cache *cache);
  88static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  89                              mifi_t mifi, int assert);
  90static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  91                              int cmd);
  92static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  93static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  94                               struct netlink_callback *cb);
  95static void mroute_clean_tables(struct mr_table *mrt, bool all);
  96static void ipmr_expire_process(struct timer_list *t);
  97
  98#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
  99#define ip6mr_for_each_table(mrt, net) \
 100        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 101
 102static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 103                                            struct mr_table *mrt)
 104{
 105        struct mr_table *ret;
 106
 107        if (!mrt)
 108                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 109                                     struct mr_table, list);
 110        else
 111                ret = list_entry_rcu(mrt->list.next,
 112                                     struct mr_table, list);
 113
 114        if (&ret->list == &net->ipv6.mr6_tables)
 115                return NULL;
 116        return ret;
 117}
 118
 119static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 120{
 121        struct mr_table *mrt;
 122
 123        ip6mr_for_each_table(mrt, net) {
 124                if (mrt->id == id)
 125                        return mrt;
 126        }
 127        return NULL;
 128}
 129
 130static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 131                            struct mr_table **mrt)
 132{
 133        int err;
 134        struct ip6mr_result res;
 135        struct fib_lookup_arg arg = {
 136                .result = &res,
 137                .flags = FIB_LOOKUP_NOREF,
 138        };
 139
 140        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 141                               flowi6_to_flowi(flp6), 0, &arg);
 142        if (err < 0)
 143                return err;
 144        *mrt = res.mrt;
 145        return 0;
 146}
 147
 148static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 149                             int flags, struct fib_lookup_arg *arg)
 150{
 151        struct ip6mr_result *res = arg->result;
 152        struct mr_table *mrt;
 153
 154        switch (rule->action) {
 155        case FR_ACT_TO_TBL:
 156                break;
 157        case FR_ACT_UNREACHABLE:
 158                return -ENETUNREACH;
 159        case FR_ACT_PROHIBIT:
 160                return -EACCES;
 161        case FR_ACT_BLACKHOLE:
 162        default:
 163                return -EINVAL;
 164        }
 165
 166        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 167        if (!mrt)
 168                return -EAGAIN;
 169        res->mrt = mrt;
 170        return 0;
 171}
 172
 173static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 174{
 175        return 1;
 176}
 177
 178static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 179        FRA_GENERIC_POLICY,
 180};
 181
 182static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 183                                struct fib_rule_hdr *frh, struct nlattr **tb,
 184                                struct netlink_ext_ack *extack)
 185{
 186        return 0;
 187}
 188
 189static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 190                              struct nlattr **tb)
 191{
 192        return 1;
 193}
 194
 195static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 196                           struct fib_rule_hdr *frh)
 197{
 198        frh->dst_len = 0;
 199        frh->src_len = 0;
 200        frh->tos     = 0;
 201        return 0;
 202}
 203
 204static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 205        .family         = RTNL_FAMILY_IP6MR,
 206        .rule_size      = sizeof(struct ip6mr_rule),
 207        .addr_size      = sizeof(struct in6_addr),
 208        .action         = ip6mr_rule_action,
 209        .match          = ip6mr_rule_match,
 210        .configure      = ip6mr_rule_configure,
 211        .compare        = ip6mr_rule_compare,
 212        .fill           = ip6mr_rule_fill,
 213        .nlgroup        = RTNLGRP_IPV6_RULE,
 214        .policy         = ip6mr_rule_policy,
 215        .owner          = THIS_MODULE,
 216};
 217
 218static int __net_init ip6mr_rules_init(struct net *net)
 219{
 220        struct fib_rules_ops *ops;
 221        struct mr_table *mrt;
 222        int err;
 223
 224        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 225        if (IS_ERR(ops))
 226                return PTR_ERR(ops);
 227
 228        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 229
 230        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 231        if (IS_ERR(mrt)) {
 232                err = PTR_ERR(mrt);
 233                goto err1;
 234        }
 235
 236        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 237        if (err < 0)
 238                goto err2;
 239
 240        net->ipv6.mr6_rules_ops = ops;
 241        return 0;
 242
 243err2:
 244        ip6mr_free_table(mrt);
 245err1:
 246        fib_rules_unregister(ops);
 247        return err;
 248}
 249
 250static void __net_exit ip6mr_rules_exit(struct net *net)
 251{
 252        struct mr_table *mrt, *next;
 253
 254        rtnl_lock();
 255        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 256                list_del(&mrt->list);
 257                ip6mr_free_table(mrt);
 258        }
 259        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 260        rtnl_unlock();
 261}
 262
 263static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 264{
 265        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
 266}
 267
 268static unsigned int ip6mr_rules_seq_read(struct net *net)
 269{
 270        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 271}
 272
 273bool ip6mr_rule_default(const struct fib_rule *rule)
 274{
 275        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 276               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 277}
 278EXPORT_SYMBOL(ip6mr_rule_default);
 279#else
 280#define ip6mr_for_each_table(mrt, net) \
 281        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 282
 283static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 284                                            struct mr_table *mrt)
 285{
 286        if (!mrt)
 287                return net->ipv6.mrt6;
 288        return NULL;
 289}
 290
 291static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 292{
 293        return net->ipv6.mrt6;
 294}
 295
 296static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 297                            struct mr_table **mrt)
 298{
 299        *mrt = net->ipv6.mrt6;
 300        return 0;
 301}
 302
 303static int __net_init ip6mr_rules_init(struct net *net)
 304{
 305        struct mr_table *mrt;
 306
 307        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 308        if (IS_ERR(mrt))
 309                return PTR_ERR(mrt);
 310        net->ipv6.mrt6 = mrt;
 311        return 0;
 312}
 313
 314static void __net_exit ip6mr_rules_exit(struct net *net)
 315{
 316        rtnl_lock();
 317        ip6mr_free_table(net->ipv6.mrt6);
 318        net->ipv6.mrt6 = NULL;
 319        rtnl_unlock();
 320}
 321
 322static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 323{
 324        return 0;
 325}
 326
 327static unsigned int ip6mr_rules_seq_read(struct net *net)
 328{
 329        return 0;
 330}
 331#endif
 332
 333static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 334                          const void *ptr)
 335{
 336        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 337        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 338
 339        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 340               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 341}
 342
 343static const struct rhashtable_params ip6mr_rht_params = {
 344        .head_offset = offsetof(struct mr_mfc, mnode),
 345        .key_offset = offsetof(struct mfc6_cache, cmparg),
 346        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 347        .nelem_hint = 3,
 348        .locks_mul = 1,
 349        .obj_cmpfn = ip6mr_hash_cmp,
 350        .automatic_shrinking = true,
 351};
 352
 353static void ip6mr_new_table_set(struct mr_table *mrt,
 354                                struct net *net)
 355{
 356#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 357        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 358#endif
 359}
 360
 361static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 362        .mf6c_origin = IN6ADDR_ANY_INIT,
 363        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 364};
 365
 366static struct mr_table_ops ip6mr_mr_table_ops = {
 367        .rht_params = &ip6mr_rht_params,
 368        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 369};
 370
 371static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 372{
 373        struct mr_table *mrt;
 374
 375        mrt = ip6mr_get_table(net, id);
 376        if (mrt)
 377                return mrt;
 378
 379        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 380                              ipmr_expire_process, ip6mr_new_table_set);
 381}
 382
 383static void ip6mr_free_table(struct mr_table *mrt)
 384{
 385        del_timer_sync(&mrt->ipmr_expire_timer);
 386        mroute_clean_tables(mrt, true);
 387        rhltable_destroy(&mrt->mfc_hash);
 388        kfree(mrt);
 389}
 390
 391#ifdef CONFIG_PROC_FS
 392/* The /proc interfaces to multicast routing
 393 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 394 */
 395
 396static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 397        __acquires(mrt_lock)
 398{
 399        struct mr_vif_iter *iter = seq->private;
 400        struct net *net = seq_file_net(seq);
 401        struct mr_table *mrt;
 402
 403        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 404        if (!mrt)
 405                return ERR_PTR(-ENOENT);
 406
 407        iter->mrt = mrt;
 408
 409        read_lock(&mrt_lock);
 410        return mr_vif_seq_start(seq, pos);
 411}
 412
 413static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 414        __releases(mrt_lock)
 415{
 416        read_unlock(&mrt_lock);
 417}
 418
 419static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 420{
 421        struct mr_vif_iter *iter = seq->private;
 422        struct mr_table *mrt = iter->mrt;
 423
 424        if (v == SEQ_START_TOKEN) {
 425                seq_puts(seq,
 426                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 427        } else {
 428                const struct vif_device *vif = v;
 429                const char *name = vif->dev ? vif->dev->name : "none";
 430
 431                seq_printf(seq,
 432                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 433                           vif - mrt->vif_table,
 434                           name, vif->bytes_in, vif->pkt_in,
 435                           vif->bytes_out, vif->pkt_out,
 436                           vif->flags);
 437        }
 438        return 0;
 439}
 440
 441static const struct seq_operations ip6mr_vif_seq_ops = {
 442        .start = ip6mr_vif_seq_start,
 443        .next  = mr_vif_seq_next,
 444        .stop  = ip6mr_vif_seq_stop,
 445        .show  = ip6mr_vif_seq_show,
 446};
 447
 448static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 449{
 450        struct net *net = seq_file_net(seq);
 451        struct mr_table *mrt;
 452
 453        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 454        if (!mrt)
 455                return ERR_PTR(-ENOENT);
 456
 457        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 458}
 459
 460static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 461{
 462        int n;
 463
 464        if (v == SEQ_START_TOKEN) {
 465                seq_puts(seq,
 466                         "Group                            "
 467                         "Origin                           "
 468                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 469        } else {
 470                const struct mfc6_cache *mfc = v;
 471                const struct mr_mfc_iter *it = seq->private;
 472                struct mr_table *mrt = it->mrt;
 473
 474                seq_printf(seq, "%pI6 %pI6 %-3hd",
 475                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 476                           mfc->_c.mfc_parent);
 477
 478                if (it->cache != &mrt->mfc_unres_queue) {
 479                        seq_printf(seq, " %8lu %8lu %8lu",
 480                                   mfc->_c.mfc_un.res.pkt,
 481                                   mfc->_c.mfc_un.res.bytes,
 482                                   mfc->_c.mfc_un.res.wrong_if);
 483                        for (n = mfc->_c.mfc_un.res.minvif;
 484                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 485                                if (VIF_EXISTS(mrt, n) &&
 486                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 487                                        seq_printf(seq,
 488                                                   " %2d:%-3d", n,
 489                                                   mfc->_c.mfc_un.res.ttls[n]);
 490                        }
 491                } else {
 492                        /* unresolved mfc_caches don't contain
 493                         * pkt, bytes and wrong_if values
 494                         */
 495                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 496                }
 497                seq_putc(seq, '\n');
 498        }
 499        return 0;
 500}
 501
 502static const struct seq_operations ipmr_mfc_seq_ops = {
 503        .start = ipmr_mfc_seq_start,
 504        .next  = mr_mfc_seq_next,
 505        .stop  = mr_mfc_seq_stop,
 506        .show  = ipmr_mfc_seq_show,
 507};
 508#endif
 509
 510#ifdef CONFIG_IPV6_PIMSM_V2
 511
 512static int pim6_rcv(struct sk_buff *skb)
 513{
 514        struct pimreghdr *pim;
 515        struct ipv6hdr   *encap;
 516        struct net_device  *reg_dev = NULL;
 517        struct net *net = dev_net(skb->dev);
 518        struct mr_table *mrt;
 519        struct flowi6 fl6 = {
 520                .flowi6_iif     = skb->dev->ifindex,
 521                .flowi6_mark    = skb->mark,
 522        };
 523        int reg_vif_num;
 524
 525        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 526                goto drop;
 527
 528        pim = (struct pimreghdr *)skb_transport_header(skb);
 529        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 530            (pim->flags & PIM_NULL_REGISTER) ||
 531            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 532                             sizeof(*pim), IPPROTO_PIM,
 533                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 534             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 535                goto drop;
 536
 537        /* check if the inner packet is destined to mcast group */
 538        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 539                                   sizeof(*pim));
 540
 541        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 542            encap->payload_len == 0 ||
 543            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 544                goto drop;
 545
 546        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 547                goto drop;
 548        reg_vif_num = mrt->mroute_reg_vif_num;
 549
 550        read_lock(&mrt_lock);
 551        if (reg_vif_num >= 0)
 552                reg_dev = mrt->vif_table[reg_vif_num].dev;
 553        if (reg_dev)
 554                dev_hold(reg_dev);
 555        read_unlock(&mrt_lock);
 556
 557        if (!reg_dev)
 558                goto drop;
 559
 560        skb->mac_header = skb->network_header;
 561        skb_pull(skb, (u8 *)encap - skb->data);
 562        skb_reset_network_header(skb);
 563        skb->protocol = htons(ETH_P_IPV6);
 564        skb->ip_summed = CHECKSUM_NONE;
 565
 566        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 567
 568        netif_rx(skb);
 569
 570        dev_put(reg_dev);
 571        return 0;
 572 drop:
 573        kfree_skb(skb);
 574        return 0;
 575}
 576
 577static const struct inet6_protocol pim6_protocol = {
 578        .handler        =       pim6_rcv,
 579};
 580
 581/* Service routines creating virtual interfaces: PIMREG */
 582
 583static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 584                                      struct net_device *dev)
 585{
 586        struct net *net = dev_net(dev);
 587        struct mr_table *mrt;
 588        struct flowi6 fl6 = {
 589                .flowi6_oif     = dev->ifindex,
 590                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 591                .flowi6_mark    = skb->mark,
 592        };
 593        int err;
 594
 595        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 596        if (err < 0) {
 597                kfree_skb(skb);
 598                return err;
 599        }
 600
 601        read_lock(&mrt_lock);
 602        dev->stats.tx_bytes += skb->len;
 603        dev->stats.tx_packets++;
 604        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 605        read_unlock(&mrt_lock);
 606        kfree_skb(skb);
 607        return NETDEV_TX_OK;
 608}
 609
 610static int reg_vif_get_iflink(const struct net_device *dev)
 611{
 612        return 0;
 613}
 614
 615static const struct net_device_ops reg_vif_netdev_ops = {
 616        .ndo_start_xmit = reg_vif_xmit,
 617        .ndo_get_iflink = reg_vif_get_iflink,
 618};
 619
 620static void reg_vif_setup(struct net_device *dev)
 621{
 622        dev->type               = ARPHRD_PIMREG;
 623        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 624        dev->flags              = IFF_NOARP;
 625        dev->netdev_ops         = &reg_vif_netdev_ops;
 626        dev->needs_free_netdev  = true;
 627        dev->features           |= NETIF_F_NETNS_LOCAL;
 628}
 629
 630static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 631{
 632        struct net_device *dev;
 633        char name[IFNAMSIZ];
 634
 635        if (mrt->id == RT6_TABLE_DFLT)
 636                sprintf(name, "pim6reg");
 637        else
 638                sprintf(name, "pim6reg%u", mrt->id);
 639
 640        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 641        if (!dev)
 642                return NULL;
 643
 644        dev_net_set(dev, net);
 645
 646        if (register_netdevice(dev)) {
 647                free_netdev(dev);
 648                return NULL;
 649        }
 650
 651        if (dev_open(dev))
 652                goto failure;
 653
 654        dev_hold(dev);
 655        return dev;
 656
 657failure:
 658        unregister_netdevice(dev);
 659        return NULL;
 660}
 661#endif
 662
 663static int call_ip6mr_vif_entry_notifiers(struct net *net,
 664                                          enum fib_event_type event_type,
 665                                          struct vif_device *vif,
 666                                          mifi_t vif_index, u32 tb_id)
 667{
 668        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 669                                     vif, vif_index, tb_id,
 670                                     &net->ipv6.ipmr_seq);
 671}
 672
 673static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 674                                          enum fib_event_type event_type,
 675                                          struct mfc6_cache *mfc, u32 tb_id)
 676{
 677        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 678                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 679}
 680
 681/* Delete a VIF entry */
 682static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 683                       struct list_head *head)
 684{
 685        struct vif_device *v;
 686        struct net_device *dev;
 687        struct inet6_dev *in6_dev;
 688
 689        if (vifi < 0 || vifi >= mrt->maxvif)
 690                return -EADDRNOTAVAIL;
 691
 692        v = &mrt->vif_table[vifi];
 693
 694        if (VIF_EXISTS(mrt, vifi))
 695                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 696                                               FIB_EVENT_VIF_DEL, v, vifi,
 697                                               mrt->id);
 698
 699        write_lock_bh(&mrt_lock);
 700        dev = v->dev;
 701        v->dev = NULL;
 702
 703        if (!dev) {
 704                write_unlock_bh(&mrt_lock);
 705                return -EADDRNOTAVAIL;
 706        }
 707
 708#ifdef CONFIG_IPV6_PIMSM_V2
 709        if (vifi == mrt->mroute_reg_vif_num)
 710                mrt->mroute_reg_vif_num = -1;
 711#endif
 712
 713        if (vifi + 1 == mrt->maxvif) {
 714                int tmp;
 715                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 716                        if (VIF_EXISTS(mrt, tmp))
 717                                break;
 718                }
 719                mrt->maxvif = tmp + 1;
 720        }
 721
 722        write_unlock_bh(&mrt_lock);
 723
 724        dev_set_allmulti(dev, -1);
 725
 726        in6_dev = __in6_dev_get(dev);
 727        if (in6_dev) {
 728                in6_dev->cnf.mc_forwarding--;
 729                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 730                                             NETCONFA_MC_FORWARDING,
 731                                             dev->ifindex, &in6_dev->cnf);
 732        }
 733
 734        if ((v->flags & MIFF_REGISTER) && !notify)
 735                unregister_netdevice_queue(dev, head);
 736
 737        dev_put(dev);
 738        return 0;
 739}
 740
 741static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 742{
 743        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 744
 745        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 746}
 747
 748static inline void ip6mr_cache_free(struct mfc6_cache *c)
 749{
 750        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 751}
 752
 753/* Destroy an unresolved cache entry, killing queued skbs
 754   and reporting error to netlink readers.
 755 */
 756
 757static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 758{
 759        struct net *net = read_pnet(&mrt->net);
 760        struct sk_buff *skb;
 761
 762        atomic_dec(&mrt->cache_resolve_queue_len);
 763
 764        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 765                if (ipv6_hdr(skb)->version == 0) {
 766                        struct nlmsghdr *nlh = skb_pull(skb,
 767                                                        sizeof(struct ipv6hdr));
 768                        nlh->nlmsg_type = NLMSG_ERROR;
 769                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 770                        skb_trim(skb, nlh->nlmsg_len);
 771                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 772                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 773                } else
 774                        kfree_skb(skb);
 775        }
 776
 777        ip6mr_cache_free(c);
 778}
 779
 780
 781/* Timer process for all the unresolved queue. */
 782
 783static void ipmr_do_expire_process(struct mr_table *mrt)
 784{
 785        unsigned long now = jiffies;
 786        unsigned long expires = 10 * HZ;
 787        struct mr_mfc *c, *next;
 788
 789        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 790                if (time_after(c->mfc_un.unres.expires, now)) {
 791                        /* not yet... */
 792                        unsigned long interval = c->mfc_un.unres.expires - now;
 793                        if (interval < expires)
 794                                expires = interval;
 795                        continue;
 796                }
 797
 798                list_del(&c->list);
 799                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 800                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 801        }
 802
 803        if (!list_empty(&mrt->mfc_unres_queue))
 804                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 805}
 806
 807static void ipmr_expire_process(struct timer_list *t)
 808{
 809        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 810
 811        if (!spin_trylock(&mfc_unres_lock)) {
 812                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 813                return;
 814        }
 815
 816        if (!list_empty(&mrt->mfc_unres_queue))
 817                ipmr_do_expire_process(mrt);
 818
 819        spin_unlock(&mfc_unres_lock);
 820}
 821
 822/* Fill oifs list. It is called under write locked mrt_lock. */
 823
 824static void ip6mr_update_thresholds(struct mr_table *mrt,
 825                                    struct mr_mfc *cache,
 826                                    unsigned char *ttls)
 827{
 828        int vifi;
 829
 830        cache->mfc_un.res.minvif = MAXMIFS;
 831        cache->mfc_un.res.maxvif = 0;
 832        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 833
 834        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 835                if (VIF_EXISTS(mrt, vifi) &&
 836                    ttls[vifi] && ttls[vifi] < 255) {
 837                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 838                        if (cache->mfc_un.res.minvif > vifi)
 839                                cache->mfc_un.res.minvif = vifi;
 840                        if (cache->mfc_un.res.maxvif <= vifi)
 841                                cache->mfc_un.res.maxvif = vifi + 1;
 842                }
 843        }
 844        cache->mfc_un.res.lastuse = jiffies;
 845}
 846
 847static int mif6_add(struct net *net, struct mr_table *mrt,
 848                    struct mif6ctl *vifc, int mrtsock)
 849{
 850        int vifi = vifc->mif6c_mifi;
 851        struct vif_device *v = &mrt->vif_table[vifi];
 852        struct net_device *dev;
 853        struct inet6_dev *in6_dev;
 854        int err;
 855
 856        /* Is vif busy ? */
 857        if (VIF_EXISTS(mrt, vifi))
 858                return -EADDRINUSE;
 859
 860        switch (vifc->mif6c_flags) {
 861#ifdef CONFIG_IPV6_PIMSM_V2
 862        case MIFF_REGISTER:
 863                /*
 864                 * Special Purpose VIF in PIM
 865                 * All the packets will be sent to the daemon
 866                 */
 867                if (mrt->mroute_reg_vif_num >= 0)
 868                        return -EADDRINUSE;
 869                dev = ip6mr_reg_vif(net, mrt);
 870                if (!dev)
 871                        return -ENOBUFS;
 872                err = dev_set_allmulti(dev, 1);
 873                if (err) {
 874                        unregister_netdevice(dev);
 875                        dev_put(dev);
 876                        return err;
 877                }
 878                break;
 879#endif
 880        case 0:
 881                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 882                if (!dev)
 883                        return -EADDRNOTAVAIL;
 884                err = dev_set_allmulti(dev, 1);
 885                if (err) {
 886                        dev_put(dev);
 887                        return err;
 888                }
 889                break;
 890        default:
 891                return -EINVAL;
 892        }
 893
 894        in6_dev = __in6_dev_get(dev);
 895        if (in6_dev) {
 896                in6_dev->cnf.mc_forwarding++;
 897                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 898                                             NETCONFA_MC_FORWARDING,
 899                                             dev->ifindex, &in6_dev->cnf);
 900        }
 901
 902        /* Fill in the VIF structures */
 903        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 904                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 905                        MIFF_REGISTER);
 906
 907        /* And finish update writing critical data */
 908        write_lock_bh(&mrt_lock);
 909        v->dev = dev;
 910#ifdef CONFIG_IPV6_PIMSM_V2
 911        if (v->flags & MIFF_REGISTER)
 912                mrt->mroute_reg_vif_num = vifi;
 913#endif
 914        if (vifi + 1 > mrt->maxvif)
 915                mrt->maxvif = vifi + 1;
 916        write_unlock_bh(&mrt_lock);
 917        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 918                                       v, vifi, mrt->id);
 919        return 0;
 920}
 921
 922static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 923                                           const struct in6_addr *origin,
 924                                           const struct in6_addr *mcastgrp)
 925{
 926        struct mfc6_cache_cmp_arg arg = {
 927                .mf6c_origin = *origin,
 928                .mf6c_mcastgrp = *mcastgrp,
 929        };
 930
 931        return mr_mfc_find(mrt, &arg);
 932}
 933
 934/* Look for a (*,G) entry */
 935static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 936                                               struct in6_addr *mcastgrp,
 937                                               mifi_t mifi)
 938{
 939        struct mfc6_cache_cmp_arg arg = {
 940                .mf6c_origin = in6addr_any,
 941                .mf6c_mcastgrp = *mcastgrp,
 942        };
 943
 944        if (ipv6_addr_any(mcastgrp))
 945                return mr_mfc_find_any_parent(mrt, mifi);
 946        return mr_mfc_find_any(mrt, mifi, &arg);
 947}
 948
 949/* Look for a (S,G,iif) entry if parent != -1 */
 950static struct mfc6_cache *
 951ip6mr_cache_find_parent(struct mr_table *mrt,
 952                        const struct in6_addr *origin,
 953                        const struct in6_addr *mcastgrp,
 954                        int parent)
 955{
 956        struct mfc6_cache_cmp_arg arg = {
 957                .mf6c_origin = *origin,
 958                .mf6c_mcastgrp = *mcastgrp,
 959        };
 960
 961        return mr_mfc_find_parent(mrt, &arg, parent);
 962}
 963
 964/* Allocate a multicast cache entry */
 965static struct mfc6_cache *ip6mr_cache_alloc(void)
 966{
 967        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 968        if (!c)
 969                return NULL;
 970        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 971        c->_c.mfc_un.res.minvif = MAXMIFS;
 972        c->_c.free = ip6mr_cache_free_rcu;
 973        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 974        return c;
 975}
 976
 977static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 978{
 979        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 980        if (!c)
 981                return NULL;
 982        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 983        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 984        return c;
 985}
 986
 987/*
 988 *      A cache entry has gone into a resolved state from queued
 989 */
 990
 991static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 992                                struct mfc6_cache *uc, struct mfc6_cache *c)
 993{
 994        struct sk_buff *skb;
 995
 996        /*
 997         *      Play the pending entries through our router
 998         */
 999
1000        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1001                if (ipv6_hdr(skb)->version == 0) {
1002                        struct nlmsghdr *nlh = skb_pull(skb,
1003                                                        sizeof(struct ipv6hdr));
1004
1005                        if (mr_fill_mroute(mrt, skb, &c->_c,
1006                                           nlmsg_data(nlh)) > 0) {
1007                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1008                        } else {
1009                                nlh->nlmsg_type = NLMSG_ERROR;
1010                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1011                                skb_trim(skb, nlh->nlmsg_len);
1012                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1013                        }
1014                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1015                } else
1016                        ip6_mr_forward(net, mrt, skb, c);
1017        }
1018}
1019
1020/*
1021 *      Bounce a cache query up to pim6sd and netlink.
1022 *
1023 *      Called under mrt_lock.
1024 */
1025
1026static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1027                              mifi_t mifi, int assert)
1028{
1029        struct sock *mroute6_sk;
1030        struct sk_buff *skb;
1031        struct mrt6msg *msg;
1032        int ret;
1033
1034#ifdef CONFIG_IPV6_PIMSM_V2
1035        if (assert == MRT6MSG_WHOLEPKT)
1036                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1037                                                +sizeof(*msg));
1038        else
1039#endif
1040                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1041
1042        if (!skb)
1043                return -ENOBUFS;
1044
1045        /* I suppose that internal messages
1046         * do not require checksums */
1047
1048        skb->ip_summed = CHECKSUM_UNNECESSARY;
1049
1050#ifdef CONFIG_IPV6_PIMSM_V2
1051        if (assert == MRT6MSG_WHOLEPKT) {
1052                /* Ugly, but we have no choice with this interface.
1053                   Duplicate old header, fix length etc.
1054                   And all this only to mangle msg->im6_msgtype and
1055                   to set msg->im6_mbz to "mbz" :-)
1056                 */
1057                skb_push(skb, -skb_network_offset(pkt));
1058
1059                skb_push(skb, sizeof(*msg));
1060                skb_reset_transport_header(skb);
1061                msg = (struct mrt6msg *)skb_transport_header(skb);
1062                msg->im6_mbz = 0;
1063                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1064                msg->im6_mif = mrt->mroute_reg_vif_num;
1065                msg->im6_pad = 0;
1066                msg->im6_src = ipv6_hdr(pkt)->saddr;
1067                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1068
1069                skb->ip_summed = CHECKSUM_UNNECESSARY;
1070        } else
1071#endif
1072        {
1073        /*
1074         *      Copy the IP header
1075         */
1076
1077        skb_put(skb, sizeof(struct ipv6hdr));
1078        skb_reset_network_header(skb);
1079        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1080
1081        /*
1082         *      Add our header
1083         */
1084        skb_put(skb, sizeof(*msg));
1085        skb_reset_transport_header(skb);
1086        msg = (struct mrt6msg *)skb_transport_header(skb);
1087
1088        msg->im6_mbz = 0;
1089        msg->im6_msgtype = assert;
1090        msg->im6_mif = mifi;
1091        msg->im6_pad = 0;
1092        msg->im6_src = ipv6_hdr(pkt)->saddr;
1093        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1094
1095        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1096        skb->ip_summed = CHECKSUM_UNNECESSARY;
1097        }
1098
1099        rcu_read_lock();
1100        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1101        if (!mroute6_sk) {
1102                rcu_read_unlock();
1103                kfree_skb(skb);
1104                return -EINVAL;
1105        }
1106
1107        mrt6msg_netlink_event(mrt, skb);
1108
1109        /* Deliver to user space multicast routing algorithms */
1110        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1111        rcu_read_unlock();
1112        if (ret < 0) {
1113                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1114                kfree_skb(skb);
1115        }
1116
1117        return ret;
1118}
1119
1120/* Queue a packet for resolution. It gets locked cache entry! */
1121static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1122                                  struct sk_buff *skb)
1123{
1124        struct mfc6_cache *c;
1125        bool found = false;
1126        int err;
1127
1128        spin_lock_bh(&mfc_unres_lock);
1129        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1130                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1131                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1132                        found = true;
1133                        break;
1134                }
1135        }
1136
1137        if (!found) {
1138                /*
1139                 *      Create a new entry if allowable
1140                 */
1141
1142                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1143                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1144                        spin_unlock_bh(&mfc_unres_lock);
1145
1146                        kfree_skb(skb);
1147                        return -ENOBUFS;
1148                }
1149
1150                /* Fill in the new cache entry */
1151                c->_c.mfc_parent = -1;
1152                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1153                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1154
1155                /*
1156                 *      Reflect first query at pim6sd
1157                 */
1158                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1159                if (err < 0) {
1160                        /* If the report failed throw the cache entry
1161                           out - Brad Parker
1162                         */
1163                        spin_unlock_bh(&mfc_unres_lock);
1164
1165                        ip6mr_cache_free(c);
1166                        kfree_skb(skb);
1167                        return err;
1168                }
1169
1170                atomic_inc(&mrt->cache_resolve_queue_len);
1171                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1172                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1173
1174                ipmr_do_expire_process(mrt);
1175        }
1176
1177        /* See if we can append the packet */
1178        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1179                kfree_skb(skb);
1180                err = -ENOBUFS;
1181        } else {
1182                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1183                err = 0;
1184        }
1185
1186        spin_unlock_bh(&mfc_unres_lock);
1187        return err;
1188}
1189
1190/*
1191 *      MFC6 cache manipulation by user space
1192 */
1193
1194static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1195                            int parent)
1196{
1197        struct mfc6_cache *c;
1198
1199        /* The entries are added/deleted only under RTNL */
1200        rcu_read_lock();
1201        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1202                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1203        rcu_read_unlock();
1204        if (!c)
1205                return -ENOENT;
1206        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1207        list_del_rcu(&c->_c.list);
1208
1209        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1210                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1211        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1212        mr_cache_put(&c->_c);
1213        return 0;
1214}
1215
1216static int ip6mr_device_event(struct notifier_block *this,
1217                              unsigned long event, void *ptr)
1218{
1219        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1220        struct net *net = dev_net(dev);
1221        struct mr_table *mrt;
1222        struct vif_device *v;
1223        int ct;
1224
1225        if (event != NETDEV_UNREGISTER)
1226                return NOTIFY_DONE;
1227
1228        ip6mr_for_each_table(mrt, net) {
1229                v = &mrt->vif_table[0];
1230                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1231                        if (v->dev == dev)
1232                                mif6_delete(mrt, ct, 1, NULL);
1233                }
1234        }
1235
1236        return NOTIFY_DONE;
1237}
1238
1239static unsigned int ip6mr_seq_read(struct net *net)
1240{
1241        ASSERT_RTNL();
1242
1243        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1244}
1245
1246static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1247{
1248        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1249                       ip6mr_mr_table_iter, &mrt_lock);
1250}
1251
1252static struct notifier_block ip6_mr_notifier = {
1253        .notifier_call = ip6mr_device_event
1254};
1255
1256static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1257        .family         = RTNL_FAMILY_IP6MR,
1258        .fib_seq_read   = ip6mr_seq_read,
1259        .fib_dump       = ip6mr_dump,
1260        .owner          = THIS_MODULE,
1261};
1262
1263static int __net_init ip6mr_notifier_init(struct net *net)
1264{
1265        struct fib_notifier_ops *ops;
1266
1267        net->ipv6.ipmr_seq = 0;
1268
1269        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1270        if (IS_ERR(ops))
1271                return PTR_ERR(ops);
1272
1273        net->ipv6.ip6mr_notifier_ops = ops;
1274
1275        return 0;
1276}
1277
1278static void __net_exit ip6mr_notifier_exit(struct net *net)
1279{
1280        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1281        net->ipv6.ip6mr_notifier_ops = NULL;
1282}
1283
1284/* Setup for IP multicast routing */
1285static int __net_init ip6mr_net_init(struct net *net)
1286{
1287        int err;
1288
1289        err = ip6mr_notifier_init(net);
1290        if (err)
1291                return err;
1292
1293        err = ip6mr_rules_init(net);
1294        if (err < 0)
1295                goto ip6mr_rules_fail;
1296
1297#ifdef CONFIG_PROC_FS
1298        err = -ENOMEM;
1299        if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1300                        sizeof(struct mr_vif_iter)))
1301                goto proc_vif_fail;
1302        if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1303                        sizeof(struct mr_mfc_iter)))
1304                goto proc_cache_fail;
1305#endif
1306
1307        return 0;
1308
1309#ifdef CONFIG_PROC_FS
1310proc_cache_fail:
1311        remove_proc_entry("ip6_mr_vif", net->proc_net);
1312proc_vif_fail:
1313        ip6mr_rules_exit(net);
1314#endif
1315ip6mr_rules_fail:
1316        ip6mr_notifier_exit(net);
1317        return err;
1318}
1319
1320static void __net_exit ip6mr_net_exit(struct net *net)
1321{
1322#ifdef CONFIG_PROC_FS
1323        remove_proc_entry("ip6_mr_cache", net->proc_net);
1324        remove_proc_entry("ip6_mr_vif", net->proc_net);
1325#endif
1326        ip6mr_rules_exit(net);
1327        ip6mr_notifier_exit(net);
1328}
1329
1330static struct pernet_operations ip6mr_net_ops = {
1331        .init = ip6mr_net_init,
1332        .exit = ip6mr_net_exit,
1333};
1334
1335int __init ip6_mr_init(void)
1336{
1337        int err;
1338
1339        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1340                                       sizeof(struct mfc6_cache),
1341                                       0, SLAB_HWCACHE_ALIGN,
1342                                       NULL);
1343        if (!mrt_cachep)
1344                return -ENOMEM;
1345
1346        err = register_pernet_subsys(&ip6mr_net_ops);
1347        if (err)
1348                goto reg_pernet_fail;
1349
1350        err = register_netdevice_notifier(&ip6_mr_notifier);
1351        if (err)
1352                goto reg_notif_fail;
1353#ifdef CONFIG_IPV6_PIMSM_V2
1354        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1355                pr_err("%s: can't add PIM protocol\n", __func__);
1356                err = -EAGAIN;
1357                goto add_proto_fail;
1358        }
1359#endif
1360        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1361                                   NULL, ip6mr_rtm_dumproute, 0);
1362        if (err == 0)
1363                return 0;
1364
1365#ifdef CONFIG_IPV6_PIMSM_V2
1366        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1367add_proto_fail:
1368        unregister_netdevice_notifier(&ip6_mr_notifier);
1369#endif
1370reg_notif_fail:
1371        unregister_pernet_subsys(&ip6mr_net_ops);
1372reg_pernet_fail:
1373        kmem_cache_destroy(mrt_cachep);
1374        return err;
1375}
1376
1377void ip6_mr_cleanup(void)
1378{
1379        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1380#ifdef CONFIG_IPV6_PIMSM_V2
1381        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1382#endif
1383        unregister_netdevice_notifier(&ip6_mr_notifier);
1384        unregister_pernet_subsys(&ip6mr_net_ops);
1385        kmem_cache_destroy(mrt_cachep);
1386}
1387
1388static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1389                         struct mf6cctl *mfc, int mrtsock, int parent)
1390{
1391        unsigned char ttls[MAXMIFS];
1392        struct mfc6_cache *uc, *c;
1393        struct mr_mfc *_uc;
1394        bool found;
1395        int i, err;
1396
1397        if (mfc->mf6cc_parent >= MAXMIFS)
1398                return -ENFILE;
1399
1400        memset(ttls, 255, MAXMIFS);
1401        for (i = 0; i < MAXMIFS; i++) {
1402                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1403                        ttls[i] = 1;
1404        }
1405
1406        /* The entries are added/deleted only under RTNL */
1407        rcu_read_lock();
1408        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1409                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1410        rcu_read_unlock();
1411        if (c) {
1412                write_lock_bh(&mrt_lock);
1413                c->_c.mfc_parent = mfc->mf6cc_parent;
1414                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1415                if (!mrtsock)
1416                        c->_c.mfc_flags |= MFC_STATIC;
1417                write_unlock_bh(&mrt_lock);
1418                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1419                                               c, mrt->id);
1420                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1421                return 0;
1422        }
1423
1424        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1425            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1426                return -EINVAL;
1427
1428        c = ip6mr_cache_alloc();
1429        if (!c)
1430                return -ENOMEM;
1431
1432        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1433        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1434        c->_c.mfc_parent = mfc->mf6cc_parent;
1435        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1436        if (!mrtsock)
1437                c->_c.mfc_flags |= MFC_STATIC;
1438
1439        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1440                                  ip6mr_rht_params);
1441        if (err) {
1442                pr_err("ip6mr: rhtable insert error %d\n", err);
1443                ip6mr_cache_free(c);
1444                return err;
1445        }
1446        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1447
1448        /* Check to see if we resolved a queued list. If so we
1449         * need to send on the frames and tidy up.
1450         */
1451        found = false;
1452        spin_lock_bh(&mfc_unres_lock);
1453        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1454                uc = (struct mfc6_cache *)_uc;
1455                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1456                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1457                        list_del(&_uc->list);
1458                        atomic_dec(&mrt->cache_resolve_queue_len);
1459                        found = true;
1460                        break;
1461                }
1462        }
1463        if (list_empty(&mrt->mfc_unres_queue))
1464                del_timer(&mrt->ipmr_expire_timer);
1465        spin_unlock_bh(&mfc_unres_lock);
1466
1467        if (found) {
1468                ip6mr_cache_resolve(net, mrt, uc, c);
1469                ip6mr_cache_free(uc);
1470        }
1471        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1472                                       c, mrt->id);
1473        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1474        return 0;
1475}
1476
1477/*
1478 *      Close the multicast socket, and clear the vif tables etc
1479 */
1480
1481static void mroute_clean_tables(struct mr_table *mrt, bool all)
1482{
1483        struct mr_mfc *c, *tmp;
1484        LIST_HEAD(list);
1485        int i;
1486
1487        /* Shut down all active vif entries */
1488        for (i = 0; i < mrt->maxvif; i++) {
1489                if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1490                        continue;
1491                mif6_delete(mrt, i, 0, &list);
1492        }
1493        unregister_netdevice_many(&list);
1494
1495        /* Wipe the cache */
1496        list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1497                if (!all && (c->mfc_flags & MFC_STATIC))
1498                        continue;
1499                rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1500                list_del_rcu(&c->list);
1501                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1502                mr_cache_put(c);
1503        }
1504
1505        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1506                spin_lock_bh(&mfc_unres_lock);
1507                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1508                        list_del(&c->list);
1509                        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1510                                                       FIB_EVENT_ENTRY_DEL,
1511                                                       (struct mfc6_cache *)c,
1512                                                       mrt->id);
1513                        mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1514                                          RTM_DELROUTE);
1515                        ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1516                }
1517                spin_unlock_bh(&mfc_unres_lock);
1518        }
1519}
1520
1521static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1522{
1523        int err = 0;
1524        struct net *net = sock_net(sk);
1525
1526        rtnl_lock();
1527        write_lock_bh(&mrt_lock);
1528        if (rtnl_dereference(mrt->mroute_sk)) {
1529                err = -EADDRINUSE;
1530        } else {
1531                rcu_assign_pointer(mrt->mroute_sk, sk);
1532                sock_set_flag(sk, SOCK_RCU_FREE);
1533                net->ipv6.devconf_all->mc_forwarding++;
1534        }
1535        write_unlock_bh(&mrt_lock);
1536
1537        if (!err)
1538                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1539                                             NETCONFA_MC_FORWARDING,
1540                                             NETCONFA_IFINDEX_ALL,
1541                                             net->ipv6.devconf_all);
1542        rtnl_unlock();
1543
1544        return err;
1545}
1546
1547int ip6mr_sk_done(struct sock *sk)
1548{
1549        int err = -EACCES;
1550        struct net *net = sock_net(sk);
1551        struct mr_table *mrt;
1552
1553        if (sk->sk_type != SOCK_RAW ||
1554            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1555                return err;
1556
1557        rtnl_lock();
1558        ip6mr_for_each_table(mrt, net) {
1559                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1560                        write_lock_bh(&mrt_lock);
1561                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1562                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1563                         * so the RCU grace period before sk freeing
1564                         * is guaranteed by sk_destruct()
1565                         */
1566                        net->ipv6.devconf_all->mc_forwarding--;
1567                        write_unlock_bh(&mrt_lock);
1568                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1569                                                     NETCONFA_MC_FORWARDING,
1570                                                     NETCONFA_IFINDEX_ALL,
1571                                                     net->ipv6.devconf_all);
1572
1573                        mroute_clean_tables(mrt, false);
1574                        err = 0;
1575                        break;
1576                }
1577        }
1578        rtnl_unlock();
1579
1580        return err;
1581}
1582
1583bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1584{
1585        struct mr_table *mrt;
1586        struct flowi6 fl6 = {
1587                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1588                .flowi6_oif     = skb->dev->ifindex,
1589                .flowi6_mark    = skb->mark,
1590        };
1591
1592        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1593                return NULL;
1594
1595        return rcu_access_pointer(mrt->mroute_sk);
1596}
1597EXPORT_SYMBOL(mroute6_is_socket);
1598
1599/*
1600 *      Socket options and virtual interface manipulation. The whole
1601 *      virtual interface system is a complete heap, but unfortunately
1602 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1603 *      MOSPF/PIM router set up we can clean this up.
1604 */
1605
1606int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1607{
1608        int ret, parent = 0;
1609        struct mif6ctl vif;
1610        struct mf6cctl mfc;
1611        mifi_t mifi;
1612        struct net *net = sock_net(sk);
1613        struct mr_table *mrt;
1614
1615        if (sk->sk_type != SOCK_RAW ||
1616            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1617                return -EOPNOTSUPP;
1618
1619        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1620        if (!mrt)
1621                return -ENOENT;
1622
1623        if (optname != MRT6_INIT) {
1624                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1625                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1626                        return -EACCES;
1627        }
1628
1629        switch (optname) {
1630        case MRT6_INIT:
1631                if (optlen < sizeof(int))
1632                        return -EINVAL;
1633
1634                return ip6mr_sk_init(mrt, sk);
1635
1636        case MRT6_DONE:
1637                return ip6mr_sk_done(sk);
1638
1639        case MRT6_ADD_MIF:
1640                if (optlen < sizeof(vif))
1641                        return -EINVAL;
1642                if (copy_from_user(&vif, optval, sizeof(vif)))
1643                        return -EFAULT;
1644                if (vif.mif6c_mifi >= MAXMIFS)
1645                        return -ENFILE;
1646                rtnl_lock();
1647                ret = mif6_add(net, mrt, &vif,
1648                               sk == rtnl_dereference(mrt->mroute_sk));
1649                rtnl_unlock();
1650                return ret;
1651
1652        case MRT6_DEL_MIF:
1653                if (optlen < sizeof(mifi_t))
1654                        return -EINVAL;
1655                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1656                        return -EFAULT;
1657                rtnl_lock();
1658                ret = mif6_delete(mrt, mifi, 0, NULL);
1659                rtnl_unlock();
1660                return ret;
1661
1662        /*
1663         *      Manipulate the forwarding caches. These live
1664         *      in a sort of kernel/user symbiosis.
1665         */
1666        case MRT6_ADD_MFC:
1667        case MRT6_DEL_MFC:
1668                parent = -1;
1669                /* fall through */
1670        case MRT6_ADD_MFC_PROXY:
1671        case MRT6_DEL_MFC_PROXY:
1672                if (optlen < sizeof(mfc))
1673                        return -EINVAL;
1674                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1675                        return -EFAULT;
1676                if (parent == 0)
1677                        parent = mfc.mf6cc_parent;
1678                rtnl_lock();
1679                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1680                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1681                else
1682                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1683                                            sk ==
1684                                            rtnl_dereference(mrt->mroute_sk),
1685                                            parent);
1686                rtnl_unlock();
1687                return ret;
1688
1689        /*
1690         *      Control PIM assert (to activate pim will activate assert)
1691         */
1692        case MRT6_ASSERT:
1693        {
1694                int v;
1695
1696                if (optlen != sizeof(v))
1697                        return -EINVAL;
1698                if (get_user(v, (int __user *)optval))
1699                        return -EFAULT;
1700                mrt->mroute_do_assert = v;
1701                return 0;
1702        }
1703
1704#ifdef CONFIG_IPV6_PIMSM_V2
1705        case MRT6_PIM:
1706        {
1707                int v;
1708
1709                if (optlen != sizeof(v))
1710                        return -EINVAL;
1711                if (get_user(v, (int __user *)optval))
1712                        return -EFAULT;
1713                v = !!v;
1714                rtnl_lock();
1715                ret = 0;
1716                if (v != mrt->mroute_do_pim) {
1717                        mrt->mroute_do_pim = v;
1718                        mrt->mroute_do_assert = v;
1719                }
1720                rtnl_unlock();
1721                return ret;
1722        }
1723
1724#endif
1725#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1726        case MRT6_TABLE:
1727        {
1728                u32 v;
1729
1730                if (optlen != sizeof(u32))
1731                        return -EINVAL;
1732                if (get_user(v, (u32 __user *)optval))
1733                        return -EFAULT;
1734                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1735                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1736                        return -EINVAL;
1737                if (sk == rcu_access_pointer(mrt->mroute_sk))
1738                        return -EBUSY;
1739
1740                rtnl_lock();
1741                ret = 0;
1742                mrt = ip6mr_new_table(net, v);
1743                if (IS_ERR(mrt))
1744                        ret = PTR_ERR(mrt);
1745                else
1746                        raw6_sk(sk)->ip6mr_table = v;
1747                rtnl_unlock();
1748                return ret;
1749        }
1750#endif
1751        /*
1752         *      Spurious command, or MRT6_VERSION which you cannot
1753         *      set.
1754         */
1755        default:
1756                return -ENOPROTOOPT;
1757        }
1758}
1759
1760/*
1761 *      Getsock opt support for the multicast routing system.
1762 */
1763
1764int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1765                          int __user *optlen)
1766{
1767        int olr;
1768        int val;
1769        struct net *net = sock_net(sk);
1770        struct mr_table *mrt;
1771
1772        if (sk->sk_type != SOCK_RAW ||
1773            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1774                return -EOPNOTSUPP;
1775
1776        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1777        if (!mrt)
1778                return -ENOENT;
1779
1780        switch (optname) {
1781        case MRT6_VERSION:
1782                val = 0x0305;
1783                break;
1784#ifdef CONFIG_IPV6_PIMSM_V2
1785        case MRT6_PIM:
1786                val = mrt->mroute_do_pim;
1787                break;
1788#endif
1789        case MRT6_ASSERT:
1790                val = mrt->mroute_do_assert;
1791                break;
1792        default:
1793                return -ENOPROTOOPT;
1794        }
1795
1796        if (get_user(olr, optlen))
1797                return -EFAULT;
1798
1799        olr = min_t(int, olr, sizeof(int));
1800        if (olr < 0)
1801                return -EINVAL;
1802
1803        if (put_user(olr, optlen))
1804                return -EFAULT;
1805        if (copy_to_user(optval, &val, olr))
1806                return -EFAULT;
1807        return 0;
1808}
1809
1810/*
1811 *      The IP multicast ioctl support routines.
1812 */
1813
1814int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1815{
1816        struct sioc_sg_req6 sr;
1817        struct sioc_mif_req6 vr;
1818        struct vif_device *vif;
1819        struct mfc6_cache *c;
1820        struct net *net = sock_net(sk);
1821        struct mr_table *mrt;
1822
1823        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1824        if (!mrt)
1825                return -ENOENT;
1826
1827        switch (cmd) {
1828        case SIOCGETMIFCNT_IN6:
1829                if (copy_from_user(&vr, arg, sizeof(vr)))
1830                        return -EFAULT;
1831                if (vr.mifi >= mrt->maxvif)
1832                        return -EINVAL;
1833                read_lock(&mrt_lock);
1834                vif = &mrt->vif_table[vr.mifi];
1835                if (VIF_EXISTS(mrt, vr.mifi)) {
1836                        vr.icount = vif->pkt_in;
1837                        vr.ocount = vif->pkt_out;
1838                        vr.ibytes = vif->bytes_in;
1839                        vr.obytes = vif->bytes_out;
1840                        read_unlock(&mrt_lock);
1841
1842                        if (copy_to_user(arg, &vr, sizeof(vr)))
1843                                return -EFAULT;
1844                        return 0;
1845                }
1846                read_unlock(&mrt_lock);
1847                return -EADDRNOTAVAIL;
1848        case SIOCGETSGCNT_IN6:
1849                if (copy_from_user(&sr, arg, sizeof(sr)))
1850                        return -EFAULT;
1851
1852                rcu_read_lock();
1853                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1854                if (c) {
1855                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1856                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1857                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1858                        rcu_read_unlock();
1859
1860                        if (copy_to_user(arg, &sr, sizeof(sr)))
1861                                return -EFAULT;
1862                        return 0;
1863                }
1864                rcu_read_unlock();
1865                return -EADDRNOTAVAIL;
1866        default:
1867                return -ENOIOCTLCMD;
1868        }
1869}
1870
1871#ifdef CONFIG_COMPAT
1872struct compat_sioc_sg_req6 {
1873        struct sockaddr_in6 src;
1874        struct sockaddr_in6 grp;
1875        compat_ulong_t pktcnt;
1876        compat_ulong_t bytecnt;
1877        compat_ulong_t wrong_if;
1878};
1879
1880struct compat_sioc_mif_req6 {
1881        mifi_t  mifi;
1882        compat_ulong_t icount;
1883        compat_ulong_t ocount;
1884        compat_ulong_t ibytes;
1885        compat_ulong_t obytes;
1886};
1887
1888int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1889{
1890        struct compat_sioc_sg_req6 sr;
1891        struct compat_sioc_mif_req6 vr;
1892        struct vif_device *vif;
1893        struct mfc6_cache *c;
1894        struct net *net = sock_net(sk);
1895        struct mr_table *mrt;
1896
1897        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1898        if (!mrt)
1899                return -ENOENT;
1900
1901        switch (cmd) {
1902        case SIOCGETMIFCNT_IN6:
1903                if (copy_from_user(&vr, arg, sizeof(vr)))
1904                        return -EFAULT;
1905                if (vr.mifi >= mrt->maxvif)
1906                        return -EINVAL;
1907                read_lock(&mrt_lock);
1908                vif = &mrt->vif_table[vr.mifi];
1909                if (VIF_EXISTS(mrt, vr.mifi)) {
1910                        vr.icount = vif->pkt_in;
1911                        vr.ocount = vif->pkt_out;
1912                        vr.ibytes = vif->bytes_in;
1913                        vr.obytes = vif->bytes_out;
1914                        read_unlock(&mrt_lock);
1915
1916                        if (copy_to_user(arg, &vr, sizeof(vr)))
1917                                return -EFAULT;
1918                        return 0;
1919                }
1920                read_unlock(&mrt_lock);
1921                return -EADDRNOTAVAIL;
1922        case SIOCGETSGCNT_IN6:
1923                if (copy_from_user(&sr, arg, sizeof(sr)))
1924                        return -EFAULT;
1925
1926                rcu_read_lock();
1927                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1928                if (c) {
1929                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1930                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1931                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1932                        rcu_read_unlock();
1933
1934                        if (copy_to_user(arg, &sr, sizeof(sr)))
1935                                return -EFAULT;
1936                        return 0;
1937                }
1938                rcu_read_unlock();
1939                return -EADDRNOTAVAIL;
1940        default:
1941                return -ENOIOCTLCMD;
1942        }
1943}
1944#endif
1945
1946static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1947{
1948        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1949                        IPSTATS_MIB_OUTFORWDATAGRAMS);
1950        __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1951                        IPSTATS_MIB_OUTOCTETS, skb->len);
1952        return dst_output(net, sk, skb);
1953}
1954
1955/*
1956 *      Processing handlers for ip6mr_forward
1957 */
1958
1959static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1960                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1961{
1962        struct ipv6hdr *ipv6h;
1963        struct vif_device *vif = &mrt->vif_table[vifi];
1964        struct net_device *dev;
1965        struct dst_entry *dst;
1966        struct flowi6 fl6;
1967
1968        if (!vif->dev)
1969                goto out_free;
1970
1971#ifdef CONFIG_IPV6_PIMSM_V2
1972        if (vif->flags & MIFF_REGISTER) {
1973                vif->pkt_out++;
1974                vif->bytes_out += skb->len;
1975                vif->dev->stats.tx_bytes += skb->len;
1976                vif->dev->stats.tx_packets++;
1977                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1978                goto out_free;
1979        }
1980#endif
1981
1982        ipv6h = ipv6_hdr(skb);
1983
1984        fl6 = (struct flowi6) {
1985                .flowi6_oif = vif->link,
1986                .daddr = ipv6h->daddr,
1987        };
1988
1989        dst = ip6_route_output(net, NULL, &fl6);
1990        if (dst->error) {
1991                dst_release(dst);
1992                goto out_free;
1993        }
1994
1995        skb_dst_drop(skb);
1996        skb_dst_set(skb, dst);
1997
1998        /*
1999         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2000         * not only before forwarding, but after forwarding on all output
2001         * interfaces. It is clear, if mrouter runs a multicasting
2002         * program, it should receive packets not depending to what interface
2003         * program is joined.
2004         * If we will not make it, the program will have to join on all
2005         * interfaces. On the other hand, multihoming host (or router, but
2006         * not mrouter) cannot join to more than one interface - it will
2007         * result in receiving multiple packets.
2008         */
2009        dev = vif->dev;
2010        skb->dev = dev;
2011        vif->pkt_out++;
2012        vif->bytes_out += skb->len;
2013
2014        /* We are about to write */
2015        /* XXX: extension headers? */
2016        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2017                goto out_free;
2018
2019        ipv6h = ipv6_hdr(skb);
2020        ipv6h->hop_limit--;
2021
2022        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2023
2024        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2025                       net, NULL, skb, skb->dev, dev,
2026                       ip6mr_forward2_finish);
2027
2028out_free:
2029        kfree_skb(skb);
2030        return 0;
2031}
2032
2033static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2034{
2035        int ct;
2036
2037        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2038                if (mrt->vif_table[ct].dev == dev)
2039                        break;
2040        }
2041        return ct;
2042}
2043
2044static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2045                           struct sk_buff *skb, struct mfc6_cache *c)
2046{
2047        int psend = -1;
2048        int vif, ct;
2049        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2050
2051        vif = c->_c.mfc_parent;
2052        c->_c.mfc_un.res.pkt++;
2053        c->_c.mfc_un.res.bytes += skb->len;
2054        c->_c.mfc_un.res.lastuse = jiffies;
2055
2056        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2057                struct mfc6_cache *cache_proxy;
2058
2059                /* For an (*,G) entry, we only check that the incoming
2060                 * interface is part of the static tree.
2061                 */
2062                rcu_read_lock();
2063                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2064                if (cache_proxy &&
2065                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2066                        rcu_read_unlock();
2067                        goto forward;
2068                }
2069                rcu_read_unlock();
2070        }
2071
2072        /*
2073         * Wrong interface: drop packet and (maybe) send PIM assert.
2074         */
2075        if (mrt->vif_table[vif].dev != skb->dev) {
2076                c->_c.mfc_un.res.wrong_if++;
2077
2078                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2079                    /* pimsm uses asserts, when switching from RPT to SPT,
2080                       so that we cannot check that packet arrived on an oif.
2081                       It is bad, but otherwise we would need to move pretty
2082                       large chunk of pimd to kernel. Ough... --ANK
2083                     */
2084                    (mrt->mroute_do_pim ||
2085                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2086                    time_after(jiffies,
2087                               c->_c.mfc_un.res.last_assert +
2088                               MFC_ASSERT_THRESH)) {
2089                        c->_c.mfc_un.res.last_assert = jiffies;
2090                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2091                }
2092                goto dont_forward;
2093        }
2094
2095forward:
2096        mrt->vif_table[vif].pkt_in++;
2097        mrt->vif_table[vif].bytes_in += skb->len;
2098
2099        /*
2100         *      Forward the frame
2101         */
2102        if (ipv6_addr_any(&c->mf6c_origin) &&
2103            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2104                if (true_vifi >= 0 &&
2105                    true_vifi != c->_c.mfc_parent &&
2106                    ipv6_hdr(skb)->hop_limit >
2107                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2108                        /* It's an (*,*) entry and the packet is not coming from
2109                         * the upstream: forward the packet to the upstream
2110                         * only.
2111                         */
2112                        psend = c->_c.mfc_parent;
2113                        goto last_forward;
2114                }
2115                goto dont_forward;
2116        }
2117        for (ct = c->_c.mfc_un.res.maxvif - 1;
2118             ct >= c->_c.mfc_un.res.minvif; ct--) {
2119                /* For (*,G) entry, don't forward to the incoming interface */
2120                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2121                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2122                        if (psend != -1) {
2123                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2124                                if (skb2)
2125                                        ip6mr_forward2(net, mrt, skb2,
2126                                                       c, psend);
2127                        }
2128                        psend = ct;
2129                }
2130        }
2131last_forward:
2132        if (psend != -1) {
2133                ip6mr_forward2(net, mrt, skb, c, psend);
2134                return;
2135        }
2136
2137dont_forward:
2138        kfree_skb(skb);
2139}
2140
2141
2142/*
2143 *      Multicast packets for forwarding arrive here
2144 */
2145
2146int ip6_mr_input(struct sk_buff *skb)
2147{
2148        struct mfc6_cache *cache;
2149        struct net *net = dev_net(skb->dev);
2150        struct mr_table *mrt;
2151        struct flowi6 fl6 = {
2152                .flowi6_iif     = skb->dev->ifindex,
2153                .flowi6_mark    = skb->mark,
2154        };
2155        int err;
2156
2157        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2158        if (err < 0) {
2159                kfree_skb(skb);
2160                return err;
2161        }
2162
2163        read_lock(&mrt_lock);
2164        cache = ip6mr_cache_find(mrt,
2165                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2166        if (!cache) {
2167                int vif = ip6mr_find_vif(mrt, skb->dev);
2168
2169                if (vif >= 0)
2170                        cache = ip6mr_cache_find_any(mrt,
2171                                                     &ipv6_hdr(skb)->daddr,
2172                                                     vif);
2173        }
2174
2175        /*
2176         *      No usable cache entry
2177         */
2178        if (!cache) {
2179                int vif;
2180
2181                vif = ip6mr_find_vif(mrt, skb->dev);
2182                if (vif >= 0) {
2183                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2184                        read_unlock(&mrt_lock);
2185
2186                        return err;
2187                }
2188                read_unlock(&mrt_lock);
2189                kfree_skb(skb);
2190                return -ENODEV;
2191        }
2192
2193        ip6_mr_forward(net, mrt, skb, cache);
2194
2195        read_unlock(&mrt_lock);
2196
2197        return 0;
2198}
2199
2200int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2201                    u32 portid)
2202{
2203        int err;
2204        struct mr_table *mrt;
2205        struct mfc6_cache *cache;
2206        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2207
2208        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2209        if (!mrt)
2210                return -ENOENT;
2211
2212        read_lock(&mrt_lock);
2213        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2214        if (!cache && skb->dev) {
2215                int vif = ip6mr_find_vif(mrt, skb->dev);
2216
2217                if (vif >= 0)
2218                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2219                                                     vif);
2220        }
2221
2222        if (!cache) {
2223                struct sk_buff *skb2;
2224                struct ipv6hdr *iph;
2225                struct net_device *dev;
2226                int vif;
2227
2228                dev = skb->dev;
2229                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2230                        read_unlock(&mrt_lock);
2231                        return -ENODEV;
2232                }
2233
2234                /* really correct? */
2235                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2236                if (!skb2) {
2237                        read_unlock(&mrt_lock);
2238                        return -ENOMEM;
2239                }
2240
2241                NETLINK_CB(skb2).portid = portid;
2242                skb_reset_transport_header(skb2);
2243
2244                skb_put(skb2, sizeof(struct ipv6hdr));
2245                skb_reset_network_header(skb2);
2246
2247                iph = ipv6_hdr(skb2);
2248                iph->version = 0;
2249                iph->priority = 0;
2250                iph->flow_lbl[0] = 0;
2251                iph->flow_lbl[1] = 0;
2252                iph->flow_lbl[2] = 0;
2253                iph->payload_len = 0;
2254                iph->nexthdr = IPPROTO_NONE;
2255                iph->hop_limit = 0;
2256                iph->saddr = rt->rt6i_src.addr;
2257                iph->daddr = rt->rt6i_dst.addr;
2258
2259                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2260                read_unlock(&mrt_lock);
2261
2262                return err;
2263        }
2264
2265        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2266        read_unlock(&mrt_lock);
2267        return err;
2268}
2269
2270static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2271                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2272                             int flags)
2273{
2274        struct nlmsghdr *nlh;
2275        struct rtmsg *rtm;
2276        int err;
2277
2278        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2279        if (!nlh)
2280                return -EMSGSIZE;
2281
2282        rtm = nlmsg_data(nlh);
2283        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2284        rtm->rtm_dst_len  = 128;
2285        rtm->rtm_src_len  = 128;
2286        rtm->rtm_tos      = 0;
2287        rtm->rtm_table    = mrt->id;
2288        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2289                goto nla_put_failure;
2290        rtm->rtm_type = RTN_MULTICAST;
2291        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2292        if (c->_c.mfc_flags & MFC_STATIC)
2293                rtm->rtm_protocol = RTPROT_STATIC;
2294        else
2295                rtm->rtm_protocol = RTPROT_MROUTED;
2296        rtm->rtm_flags    = 0;
2297
2298        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2299            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2300                goto nla_put_failure;
2301        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2302        /* do not break the dump if cache is unresolved */
2303        if (err < 0 && err != -ENOENT)
2304                goto nla_put_failure;
2305
2306        nlmsg_end(skb, nlh);
2307        return 0;
2308
2309nla_put_failure:
2310        nlmsg_cancel(skb, nlh);
2311        return -EMSGSIZE;
2312}
2313
2314static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2315                              u32 portid, u32 seq, struct mr_mfc *c,
2316                              int cmd, int flags)
2317{
2318        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2319                                 cmd, flags);
2320}
2321
2322static int mr6_msgsize(bool unresolved, int maxvif)
2323{
2324        size_t len =
2325                NLMSG_ALIGN(sizeof(struct rtmsg))
2326                + nla_total_size(4)     /* RTA_TABLE */
2327                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2328                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2329                ;
2330
2331        if (!unresolved)
2332                len = len
2333                      + nla_total_size(4)       /* RTA_IIF */
2334                      + nla_total_size(0)       /* RTA_MULTIPATH */
2335                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2336                                                /* RTA_MFC_STATS */
2337                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2338                ;
2339
2340        return len;
2341}
2342
2343static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2344                              int cmd)
2345{
2346        struct net *net = read_pnet(&mrt->net);
2347        struct sk_buff *skb;
2348        int err = -ENOBUFS;
2349
2350        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2351                        GFP_ATOMIC);
2352        if (!skb)
2353                goto errout;
2354
2355        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2356        if (err < 0)
2357                goto errout;
2358
2359        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2360        return;
2361
2362errout:
2363        kfree_skb(skb);
2364        if (err < 0)
2365                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2366}
2367
2368static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2369{
2370        size_t len =
2371                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2372                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2373                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2374                                        /* IP6MRA_CREPORT_SRC_ADDR */
2375                + nla_total_size(sizeof(struct in6_addr))
2376                                        /* IP6MRA_CREPORT_DST_ADDR */
2377                + nla_total_size(sizeof(struct in6_addr))
2378                                        /* IP6MRA_CREPORT_PKT */
2379                + nla_total_size(payloadlen)
2380                ;
2381
2382        return len;
2383}
2384
2385static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2386{
2387        struct net *net = read_pnet(&mrt->net);
2388        struct nlmsghdr *nlh;
2389        struct rtgenmsg *rtgenm;
2390        struct mrt6msg *msg;
2391        struct sk_buff *skb;
2392        struct nlattr *nla;
2393        int payloadlen;
2394
2395        payloadlen = pkt->len - sizeof(struct mrt6msg);
2396        msg = (struct mrt6msg *)skb_transport_header(pkt);
2397
2398        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2399        if (!skb)
2400                goto errout;
2401
2402        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2403                        sizeof(struct rtgenmsg), 0);
2404        if (!nlh)
2405                goto errout;
2406        rtgenm = nlmsg_data(nlh);
2407        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2408        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2409            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2410            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2411                             &msg->im6_src) ||
2412            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2413                             &msg->im6_dst))
2414                goto nla_put_failure;
2415
2416        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2417        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2418                                  nla_data(nla), payloadlen))
2419                goto nla_put_failure;
2420
2421        nlmsg_end(skb, nlh);
2422
2423        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2424        return;
2425
2426nla_put_failure:
2427        nlmsg_cancel(skb, nlh);
2428errout:
2429        kfree_skb(skb);
2430        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2431}
2432
2433static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2434{
2435        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2436                                _ip6mr_fill_mroute, &mfc_unres_lock);
2437}
2438