linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <linux/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/mm.h>
  24#include <linux/kernel.h>
  25#include <linux/fcntl.h>
  26#include <linux/stat.h>
  27#include <linux/socket.h>
  28#include <linux/inet.h>
  29#include <linux/netdevice.h>
  30#include <linux/inetdevice.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/init.h>
  34#include <linux/compat.h>
  35#include <linux/rhashtable.h>
  36#include <net/protocol.h>
  37#include <linux/skbuff.h>
  38#include <net/raw.h>
  39#include <linux/notifier.h>
  40#include <linux/if_arp.h>
  41#include <net/checksum.h>
  42#include <net/netlink.h>
  43#include <net/fib_rules.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ip6_route.h>
  47#include <linux/mroute6.h>
  48#include <linux/pim.h>
  49#include <net/addrconf.h>
  50#include <linux/netfilter_ipv6.h>
  51#include <linux/export.h>
  52#include <net/ip6_checksum.h>
  53#include <linux/netconf.h>
  54
  55struct ip6mr_rule {
  56        struct fib_rule         common;
  57};
  58
  59struct ip6mr_result {
  60        struct mr_table *mrt;
  61};
  62
  63/* Big lock, protecting vif table, mrt cache and mroute socket state.
  64   Note that the changes are semaphored via rtnl_lock.
  65 */
  66
  67static DEFINE_RWLOCK(mrt_lock);
  68
  69/* Multicast router control variables */
  70
  71/* Special spinlock for queue of unresolved entries */
  72static DEFINE_SPINLOCK(mfc_unres_lock);
  73
  74/* We return to original Alan's scheme. Hash table of resolved
  75   entries is changed only in process context and protected
  76   with weak lock mrt_lock. Queue of unresolved entries is protected
  77   with strong spinlock mfc_unres_lock.
  78
  79   In this case data path is free of exclusive locks at all.
  80 */
  81
  82static struct kmem_cache *mrt_cachep __read_mostly;
  83
  84static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  85static void ip6mr_free_table(struct mr_table *mrt);
  86
  87static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  88                           struct sk_buff *skb, struct mfc6_cache *cache);
  89static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  90                              mifi_t mifi, int assert);
  91static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  92                              int cmd);
  93static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  94static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  95                               struct netlink_callback *cb);
  96static void mroute_clean_tables(struct mr_table *mrt, bool all);
  97static void ipmr_expire_process(struct timer_list *t);
  98
  99#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 100#define ip6mr_for_each_table(mrt, net) \
 101        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 102
 103static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 104                                            struct mr_table *mrt)
 105{
 106        struct mr_table *ret;
 107
 108        if (!mrt)
 109                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 110                                     struct mr_table, list);
 111        else
 112                ret = list_entry_rcu(mrt->list.next,
 113                                     struct mr_table, list);
 114
 115        if (&ret->list == &net->ipv6.mr6_tables)
 116                return NULL;
 117        return ret;
 118}
 119
 120static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 121{
 122        struct mr_table *mrt;
 123
 124        ip6mr_for_each_table(mrt, net) {
 125                if (mrt->id == id)
 126                        return mrt;
 127        }
 128        return NULL;
 129}
 130
 131static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 132                            struct mr_table **mrt)
 133{
 134        int err;
 135        struct ip6mr_result res;
 136        struct fib_lookup_arg arg = {
 137                .result = &res,
 138                .flags = FIB_LOOKUP_NOREF,
 139        };
 140
 141        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 142                               flowi6_to_flowi(flp6), 0, &arg);
 143        if (err < 0)
 144                return err;
 145        *mrt = res.mrt;
 146        return 0;
 147}
 148
 149static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 150                             int flags, struct fib_lookup_arg *arg)
 151{
 152        struct ip6mr_result *res = arg->result;
 153        struct mr_table *mrt;
 154
 155        switch (rule->action) {
 156        case FR_ACT_TO_TBL:
 157                break;
 158        case FR_ACT_UNREACHABLE:
 159                return -ENETUNREACH;
 160        case FR_ACT_PROHIBIT:
 161                return -EACCES;
 162        case FR_ACT_BLACKHOLE:
 163        default:
 164                return -EINVAL;
 165        }
 166
 167        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 168        if (!mrt)
 169                return -EAGAIN;
 170        res->mrt = mrt;
 171        return 0;
 172}
 173
 174static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 175{
 176        return 1;
 177}
 178
 179static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 180        FRA_GENERIC_POLICY,
 181};
 182
 183static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 184                                struct fib_rule_hdr *frh, struct nlattr **tb,
 185                                struct netlink_ext_ack *extack)
 186{
 187        return 0;
 188}
 189
 190static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 191                              struct nlattr **tb)
 192{
 193        return 1;
 194}
 195
 196static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 197                           struct fib_rule_hdr *frh)
 198{
 199        frh->dst_len = 0;
 200        frh->src_len = 0;
 201        frh->tos     = 0;
 202        return 0;
 203}
 204
 205static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 206        .family         = RTNL_FAMILY_IP6MR,
 207        .rule_size      = sizeof(struct ip6mr_rule),
 208        .addr_size      = sizeof(struct in6_addr),
 209        .action         = ip6mr_rule_action,
 210        .match          = ip6mr_rule_match,
 211        .configure      = ip6mr_rule_configure,
 212        .compare        = ip6mr_rule_compare,
 213        .fill           = ip6mr_rule_fill,
 214        .nlgroup        = RTNLGRP_IPV6_RULE,
 215        .policy         = ip6mr_rule_policy,
 216        .owner          = THIS_MODULE,
 217};
 218
 219static int __net_init ip6mr_rules_init(struct net *net)
 220{
 221        struct fib_rules_ops *ops;
 222        struct mr_table *mrt;
 223        int err;
 224
 225        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 226        if (IS_ERR(ops))
 227                return PTR_ERR(ops);
 228
 229        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 230
 231        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 232        if (IS_ERR(mrt)) {
 233                err = PTR_ERR(mrt);
 234                goto err1;
 235        }
 236
 237        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 238        if (err < 0)
 239                goto err2;
 240
 241        net->ipv6.mr6_rules_ops = ops;
 242        return 0;
 243
 244err2:
 245        ip6mr_free_table(mrt);
 246err1:
 247        fib_rules_unregister(ops);
 248        return err;
 249}
 250
 251static void __net_exit ip6mr_rules_exit(struct net *net)
 252{
 253        struct mr_table *mrt, *next;
 254
 255        rtnl_lock();
 256        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 257                list_del(&mrt->list);
 258                ip6mr_free_table(mrt);
 259        }
 260        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 261        rtnl_unlock();
 262}
 263
 264static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 265{
 266        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
 267}
 268
 269static unsigned int ip6mr_rules_seq_read(struct net *net)
 270{
 271        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 272}
 273
 274bool ip6mr_rule_default(const struct fib_rule *rule)
 275{
 276        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 277               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 278}
 279EXPORT_SYMBOL(ip6mr_rule_default);
 280#else
 281#define ip6mr_for_each_table(mrt, net) \
 282        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 283
 284static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 285                                            struct mr_table *mrt)
 286{
 287        if (!mrt)
 288                return net->ipv6.mrt6;
 289        return NULL;
 290}
 291
 292static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 293{
 294        return net->ipv6.mrt6;
 295}
 296
 297static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 298                            struct mr_table **mrt)
 299{
 300        *mrt = net->ipv6.mrt6;
 301        return 0;
 302}
 303
 304static int __net_init ip6mr_rules_init(struct net *net)
 305{
 306        struct mr_table *mrt;
 307
 308        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 309        if (IS_ERR(mrt))
 310                return PTR_ERR(mrt);
 311        net->ipv6.mrt6 = mrt;
 312        return 0;
 313}
 314
 315static void __net_exit ip6mr_rules_exit(struct net *net)
 316{
 317        rtnl_lock();
 318        ip6mr_free_table(net->ipv6.mrt6);
 319        net->ipv6.mrt6 = NULL;
 320        rtnl_unlock();
 321}
 322
 323static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 324{
 325        return 0;
 326}
 327
 328static unsigned int ip6mr_rules_seq_read(struct net *net)
 329{
 330        return 0;
 331}
 332#endif
 333
 334static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 335                          const void *ptr)
 336{
 337        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 338        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 339
 340        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 341               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 342}
 343
 344static const struct rhashtable_params ip6mr_rht_params = {
 345        .head_offset = offsetof(struct mr_mfc, mnode),
 346        .key_offset = offsetof(struct mfc6_cache, cmparg),
 347        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 348        .nelem_hint = 3,
 349        .locks_mul = 1,
 350        .obj_cmpfn = ip6mr_hash_cmp,
 351        .automatic_shrinking = true,
 352};
 353
 354static void ip6mr_new_table_set(struct mr_table *mrt,
 355                                struct net *net)
 356{
 357#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 358        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 359#endif
 360}
 361
 362static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 363        .mf6c_origin = IN6ADDR_ANY_INIT,
 364        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 365};
 366
 367static struct mr_table_ops ip6mr_mr_table_ops = {
 368        .rht_params = &ip6mr_rht_params,
 369        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 370};
 371
 372static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 373{
 374        struct mr_table *mrt;
 375
 376        mrt = ip6mr_get_table(net, id);
 377        if (mrt)
 378                return mrt;
 379
 380        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 381                              ipmr_expire_process, ip6mr_new_table_set);
 382}
 383
 384static void ip6mr_free_table(struct mr_table *mrt)
 385{
 386        del_timer_sync(&mrt->ipmr_expire_timer);
 387        mroute_clean_tables(mrt, true);
 388        rhltable_destroy(&mrt->mfc_hash);
 389        kfree(mrt);
 390}
 391
 392#ifdef CONFIG_PROC_FS
 393/* The /proc interfaces to multicast routing
 394 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 395 */
 396
 397static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 398        __acquires(mrt_lock)
 399{
 400        struct mr_vif_iter *iter = seq->private;
 401        struct net *net = seq_file_net(seq);
 402        struct mr_table *mrt;
 403
 404        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 405        if (!mrt)
 406                return ERR_PTR(-ENOENT);
 407
 408        iter->mrt = mrt;
 409
 410        read_lock(&mrt_lock);
 411        return mr_vif_seq_start(seq, pos);
 412}
 413
 414static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 415        __releases(mrt_lock)
 416{
 417        read_unlock(&mrt_lock);
 418}
 419
 420static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 421{
 422        struct mr_vif_iter *iter = seq->private;
 423        struct mr_table *mrt = iter->mrt;
 424
 425        if (v == SEQ_START_TOKEN) {
 426                seq_puts(seq,
 427                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 428        } else {
 429                const struct vif_device *vif = v;
 430                const char *name = vif->dev ? vif->dev->name : "none";
 431
 432                seq_printf(seq,
 433                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 434                           vif - mrt->vif_table,
 435                           name, vif->bytes_in, vif->pkt_in,
 436                           vif->bytes_out, vif->pkt_out,
 437                           vif->flags);
 438        }
 439        return 0;
 440}
 441
 442static const struct seq_operations ip6mr_vif_seq_ops = {
 443        .start = ip6mr_vif_seq_start,
 444        .next  = mr_vif_seq_next,
 445        .stop  = ip6mr_vif_seq_stop,
 446        .show  = ip6mr_vif_seq_show,
 447};
 448
 449static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 450{
 451        struct net *net = seq_file_net(seq);
 452        struct mr_table *mrt;
 453
 454        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 455        if (!mrt)
 456                return ERR_PTR(-ENOENT);
 457
 458        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 459}
 460
 461static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 462{
 463        int n;
 464
 465        if (v == SEQ_START_TOKEN) {
 466                seq_puts(seq,
 467                         "Group                            "
 468                         "Origin                           "
 469                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 470        } else {
 471                const struct mfc6_cache *mfc = v;
 472                const struct mr_mfc_iter *it = seq->private;
 473                struct mr_table *mrt = it->mrt;
 474
 475                seq_printf(seq, "%pI6 %pI6 %-3hd",
 476                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 477                           mfc->_c.mfc_parent);
 478
 479                if (it->cache != &mrt->mfc_unres_queue) {
 480                        seq_printf(seq, " %8lu %8lu %8lu",
 481                                   mfc->_c.mfc_un.res.pkt,
 482                                   mfc->_c.mfc_un.res.bytes,
 483                                   mfc->_c.mfc_un.res.wrong_if);
 484                        for (n = mfc->_c.mfc_un.res.minvif;
 485                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 486                                if (VIF_EXISTS(mrt, n) &&
 487                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 488                                        seq_printf(seq,
 489                                                   " %2d:%-3d", n,
 490                                                   mfc->_c.mfc_un.res.ttls[n]);
 491                        }
 492                } else {
 493                        /* unresolved mfc_caches don't contain
 494                         * pkt, bytes and wrong_if values
 495                         */
 496                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 497                }
 498                seq_putc(seq, '\n');
 499        }
 500        return 0;
 501}
 502
 503static const struct seq_operations ipmr_mfc_seq_ops = {
 504        .start = ipmr_mfc_seq_start,
 505        .next  = mr_mfc_seq_next,
 506        .stop  = mr_mfc_seq_stop,
 507        .show  = ipmr_mfc_seq_show,
 508};
 509#endif
 510
 511#ifdef CONFIG_IPV6_PIMSM_V2
 512
 513static int pim6_rcv(struct sk_buff *skb)
 514{
 515        struct pimreghdr *pim;
 516        struct ipv6hdr   *encap;
 517        struct net_device  *reg_dev = NULL;
 518        struct net *net = dev_net(skb->dev);
 519        struct mr_table *mrt;
 520        struct flowi6 fl6 = {
 521                .flowi6_iif     = skb->dev->ifindex,
 522                .flowi6_mark    = skb->mark,
 523        };
 524        int reg_vif_num;
 525
 526        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 527                goto drop;
 528
 529        pim = (struct pimreghdr *)skb_transport_header(skb);
 530        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 531            (pim->flags & PIM_NULL_REGISTER) ||
 532            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 533                             sizeof(*pim), IPPROTO_PIM,
 534                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 535             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 536                goto drop;
 537
 538        /* check if the inner packet is destined to mcast group */
 539        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 540                                   sizeof(*pim));
 541
 542        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 543            encap->payload_len == 0 ||
 544            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 545                goto drop;
 546
 547        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 548                goto drop;
 549        reg_vif_num = mrt->mroute_reg_vif_num;
 550
 551        read_lock(&mrt_lock);
 552        if (reg_vif_num >= 0)
 553                reg_dev = mrt->vif_table[reg_vif_num].dev;
 554        if (reg_dev)
 555                dev_hold(reg_dev);
 556        read_unlock(&mrt_lock);
 557
 558        if (!reg_dev)
 559                goto drop;
 560
 561        skb->mac_header = skb->network_header;
 562        skb_pull(skb, (u8 *)encap - skb->data);
 563        skb_reset_network_header(skb);
 564        skb->protocol = htons(ETH_P_IPV6);
 565        skb->ip_summed = CHECKSUM_NONE;
 566
 567        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 568
 569        netif_rx(skb);
 570
 571        dev_put(reg_dev);
 572        return 0;
 573 drop:
 574        kfree_skb(skb);
 575        return 0;
 576}
 577
 578static const struct inet6_protocol pim6_protocol = {
 579        .handler        =       pim6_rcv,
 580};
 581
 582/* Service routines creating virtual interfaces: PIMREG */
 583
 584static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 585                                      struct net_device *dev)
 586{
 587        struct net *net = dev_net(dev);
 588        struct mr_table *mrt;
 589        struct flowi6 fl6 = {
 590                .flowi6_oif     = dev->ifindex,
 591                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 592                .flowi6_mark    = skb->mark,
 593        };
 594        int err;
 595
 596        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 597        if (err < 0) {
 598                kfree_skb(skb);
 599                return err;
 600        }
 601
 602        read_lock(&mrt_lock);
 603        dev->stats.tx_bytes += skb->len;
 604        dev->stats.tx_packets++;
 605        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 606        read_unlock(&mrt_lock);
 607        kfree_skb(skb);
 608        return NETDEV_TX_OK;
 609}
 610
 611static int reg_vif_get_iflink(const struct net_device *dev)
 612{
 613        return 0;
 614}
 615
 616static const struct net_device_ops reg_vif_netdev_ops = {
 617        .ndo_start_xmit = reg_vif_xmit,
 618        .ndo_get_iflink = reg_vif_get_iflink,
 619};
 620
 621static void reg_vif_setup(struct net_device *dev)
 622{
 623        dev->type               = ARPHRD_PIMREG;
 624        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 625        dev->flags              = IFF_NOARP;
 626        dev->netdev_ops         = &reg_vif_netdev_ops;
 627        dev->needs_free_netdev  = true;
 628        dev->features           |= NETIF_F_NETNS_LOCAL;
 629}
 630
 631static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 632{
 633        struct net_device *dev;
 634        char name[IFNAMSIZ];
 635
 636        if (mrt->id == RT6_TABLE_DFLT)
 637                sprintf(name, "pim6reg");
 638        else
 639                sprintf(name, "pim6reg%u", mrt->id);
 640
 641        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 642        if (!dev)
 643                return NULL;
 644
 645        dev_net_set(dev, net);
 646
 647        if (register_netdevice(dev)) {
 648                free_netdev(dev);
 649                return NULL;
 650        }
 651
 652        if (dev_open(dev))
 653                goto failure;
 654
 655        dev_hold(dev);
 656        return dev;
 657
 658failure:
 659        unregister_netdevice(dev);
 660        return NULL;
 661}
 662#endif
 663
 664static int call_ip6mr_vif_entry_notifiers(struct net *net,
 665                                          enum fib_event_type event_type,
 666                                          struct vif_device *vif,
 667                                          mifi_t vif_index, u32 tb_id)
 668{
 669        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 670                                     vif, vif_index, tb_id,
 671                                     &net->ipv6.ipmr_seq);
 672}
 673
 674static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 675                                          enum fib_event_type event_type,
 676                                          struct mfc6_cache *mfc, u32 tb_id)
 677{
 678        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 679                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 680}
 681
 682/* Delete a VIF entry */
 683static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 684                       struct list_head *head)
 685{
 686        struct vif_device *v;
 687        struct net_device *dev;
 688        struct inet6_dev *in6_dev;
 689
 690        if (vifi < 0 || vifi >= mrt->maxvif)
 691                return -EADDRNOTAVAIL;
 692
 693        v = &mrt->vif_table[vifi];
 694
 695        if (VIF_EXISTS(mrt, vifi))
 696                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 697                                               FIB_EVENT_VIF_DEL, v, vifi,
 698                                               mrt->id);
 699
 700        write_lock_bh(&mrt_lock);
 701        dev = v->dev;
 702        v->dev = NULL;
 703
 704        if (!dev) {
 705                write_unlock_bh(&mrt_lock);
 706                return -EADDRNOTAVAIL;
 707        }
 708
 709#ifdef CONFIG_IPV6_PIMSM_V2
 710        if (vifi == mrt->mroute_reg_vif_num)
 711                mrt->mroute_reg_vif_num = -1;
 712#endif
 713
 714        if (vifi + 1 == mrt->maxvif) {
 715                int tmp;
 716                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 717                        if (VIF_EXISTS(mrt, tmp))
 718                                break;
 719                }
 720                mrt->maxvif = tmp + 1;
 721        }
 722
 723        write_unlock_bh(&mrt_lock);
 724
 725        dev_set_allmulti(dev, -1);
 726
 727        in6_dev = __in6_dev_get(dev);
 728        if (in6_dev) {
 729                in6_dev->cnf.mc_forwarding--;
 730                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 731                                             NETCONFA_MC_FORWARDING,
 732                                             dev->ifindex, &in6_dev->cnf);
 733        }
 734
 735        if ((v->flags & MIFF_REGISTER) && !notify)
 736                unregister_netdevice_queue(dev, head);
 737
 738        dev_put(dev);
 739        return 0;
 740}
 741
 742static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 743{
 744        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 745
 746        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 747}
 748
 749static inline void ip6mr_cache_free(struct mfc6_cache *c)
 750{
 751        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 752}
 753
 754/* Destroy an unresolved cache entry, killing queued skbs
 755   and reporting error to netlink readers.
 756 */
 757
 758static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 759{
 760        struct net *net = read_pnet(&mrt->net);
 761        struct sk_buff *skb;
 762
 763        atomic_dec(&mrt->cache_resolve_queue_len);
 764
 765        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 766                if (ipv6_hdr(skb)->version == 0) {
 767                        struct nlmsghdr *nlh = skb_pull(skb,
 768                                                        sizeof(struct ipv6hdr));
 769                        nlh->nlmsg_type = NLMSG_ERROR;
 770                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 771                        skb_trim(skb, nlh->nlmsg_len);
 772                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 773                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 774                } else
 775                        kfree_skb(skb);
 776        }
 777
 778        ip6mr_cache_free(c);
 779}
 780
 781
 782/* Timer process for all the unresolved queue. */
 783
 784static void ipmr_do_expire_process(struct mr_table *mrt)
 785{
 786        unsigned long now = jiffies;
 787        unsigned long expires = 10 * HZ;
 788        struct mr_mfc *c, *next;
 789
 790        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 791                if (time_after(c->mfc_un.unres.expires, now)) {
 792                        /* not yet... */
 793                        unsigned long interval = c->mfc_un.unres.expires - now;
 794                        if (interval < expires)
 795                                expires = interval;
 796                        continue;
 797                }
 798
 799                list_del(&c->list);
 800                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 801                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 802        }
 803
 804        if (!list_empty(&mrt->mfc_unres_queue))
 805                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 806}
 807
 808static void ipmr_expire_process(struct timer_list *t)
 809{
 810        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 811
 812        if (!spin_trylock(&mfc_unres_lock)) {
 813                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 814                return;
 815        }
 816
 817        if (!list_empty(&mrt->mfc_unres_queue))
 818                ipmr_do_expire_process(mrt);
 819
 820        spin_unlock(&mfc_unres_lock);
 821}
 822
 823/* Fill oifs list. It is called under write locked mrt_lock. */
 824
 825static void ip6mr_update_thresholds(struct mr_table *mrt,
 826                                    struct mr_mfc *cache,
 827                                    unsigned char *ttls)
 828{
 829        int vifi;
 830
 831        cache->mfc_un.res.minvif = MAXMIFS;
 832        cache->mfc_un.res.maxvif = 0;
 833        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 834
 835        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 836                if (VIF_EXISTS(mrt, vifi) &&
 837                    ttls[vifi] && ttls[vifi] < 255) {
 838                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 839                        if (cache->mfc_un.res.minvif > vifi)
 840                                cache->mfc_un.res.minvif = vifi;
 841                        if (cache->mfc_un.res.maxvif <= vifi)
 842                                cache->mfc_un.res.maxvif = vifi + 1;
 843                }
 844        }
 845        cache->mfc_un.res.lastuse = jiffies;
 846}
 847
 848static int mif6_add(struct net *net, struct mr_table *mrt,
 849                    struct mif6ctl *vifc, int mrtsock)
 850{
 851        int vifi = vifc->mif6c_mifi;
 852        struct vif_device *v = &mrt->vif_table[vifi];
 853        struct net_device *dev;
 854        struct inet6_dev *in6_dev;
 855        int err;
 856
 857        /* Is vif busy ? */
 858        if (VIF_EXISTS(mrt, vifi))
 859                return -EADDRINUSE;
 860
 861        switch (vifc->mif6c_flags) {
 862#ifdef CONFIG_IPV6_PIMSM_V2
 863        case MIFF_REGISTER:
 864                /*
 865                 * Special Purpose VIF in PIM
 866                 * All the packets will be sent to the daemon
 867                 */
 868                if (mrt->mroute_reg_vif_num >= 0)
 869                        return -EADDRINUSE;
 870                dev = ip6mr_reg_vif(net, mrt);
 871                if (!dev)
 872                        return -ENOBUFS;
 873                err = dev_set_allmulti(dev, 1);
 874                if (err) {
 875                        unregister_netdevice(dev);
 876                        dev_put(dev);
 877                        return err;
 878                }
 879                break;
 880#endif
 881        case 0:
 882                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 883                if (!dev)
 884                        return -EADDRNOTAVAIL;
 885                err = dev_set_allmulti(dev, 1);
 886                if (err) {
 887                        dev_put(dev);
 888                        return err;
 889                }
 890                break;
 891        default:
 892                return -EINVAL;
 893        }
 894
 895        in6_dev = __in6_dev_get(dev);
 896        if (in6_dev) {
 897                in6_dev->cnf.mc_forwarding++;
 898                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 899                                             NETCONFA_MC_FORWARDING,
 900                                             dev->ifindex, &in6_dev->cnf);
 901        }
 902
 903        /* Fill in the VIF structures */
 904        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 905                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 906                        MIFF_REGISTER);
 907
 908        /* And finish update writing critical data */
 909        write_lock_bh(&mrt_lock);
 910        v->dev = dev;
 911#ifdef CONFIG_IPV6_PIMSM_V2
 912        if (v->flags & MIFF_REGISTER)
 913                mrt->mroute_reg_vif_num = vifi;
 914#endif
 915        if (vifi + 1 > mrt->maxvif)
 916                mrt->maxvif = vifi + 1;
 917        write_unlock_bh(&mrt_lock);
 918        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 919                                       v, vifi, mrt->id);
 920        return 0;
 921}
 922
 923static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 924                                           const struct in6_addr *origin,
 925                                           const struct in6_addr *mcastgrp)
 926{
 927        struct mfc6_cache_cmp_arg arg = {
 928                .mf6c_origin = *origin,
 929                .mf6c_mcastgrp = *mcastgrp,
 930        };
 931
 932        return mr_mfc_find(mrt, &arg);
 933}
 934
 935/* Look for a (*,G) entry */
 936static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 937                                               struct in6_addr *mcastgrp,
 938                                               mifi_t mifi)
 939{
 940        struct mfc6_cache_cmp_arg arg = {
 941                .mf6c_origin = in6addr_any,
 942                .mf6c_mcastgrp = *mcastgrp,
 943        };
 944
 945        if (ipv6_addr_any(mcastgrp))
 946                return mr_mfc_find_any_parent(mrt, mifi);
 947        return mr_mfc_find_any(mrt, mifi, &arg);
 948}
 949
 950/* Look for a (S,G,iif) entry if parent != -1 */
 951static struct mfc6_cache *
 952ip6mr_cache_find_parent(struct mr_table *mrt,
 953                        const struct in6_addr *origin,
 954                        const struct in6_addr *mcastgrp,
 955                        int parent)
 956{
 957        struct mfc6_cache_cmp_arg arg = {
 958                .mf6c_origin = *origin,
 959                .mf6c_mcastgrp = *mcastgrp,
 960        };
 961
 962        return mr_mfc_find_parent(mrt, &arg, parent);
 963}
 964
 965/* Allocate a multicast cache entry */
 966static struct mfc6_cache *ip6mr_cache_alloc(void)
 967{
 968        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 969        if (!c)
 970                return NULL;
 971        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 972        c->_c.mfc_un.res.minvif = MAXMIFS;
 973        c->_c.free = ip6mr_cache_free_rcu;
 974        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 975        return c;
 976}
 977
 978static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 979{
 980        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 981        if (!c)
 982                return NULL;
 983        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 984        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 985        return c;
 986}
 987
 988/*
 989 *      A cache entry has gone into a resolved state from queued
 990 */
 991
 992static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
 993                                struct mfc6_cache *uc, struct mfc6_cache *c)
 994{
 995        struct sk_buff *skb;
 996
 997        /*
 998         *      Play the pending entries through our router
 999         */
1000
1001        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1002                if (ipv6_hdr(skb)->version == 0) {
1003                        struct nlmsghdr *nlh = skb_pull(skb,
1004                                                        sizeof(struct ipv6hdr));
1005
1006                        if (mr_fill_mroute(mrt, skb, &c->_c,
1007                                           nlmsg_data(nlh)) > 0) {
1008                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1009                        } else {
1010                                nlh->nlmsg_type = NLMSG_ERROR;
1011                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1012                                skb_trim(skb, nlh->nlmsg_len);
1013                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1014                        }
1015                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1016                } else
1017                        ip6_mr_forward(net, mrt, skb, c);
1018        }
1019}
1020
1021/*
1022 *      Bounce a cache query up to pim6sd and netlink.
1023 *
1024 *      Called under mrt_lock.
1025 */
1026
1027static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1028                              mifi_t mifi, int assert)
1029{
1030        struct sock *mroute6_sk;
1031        struct sk_buff *skb;
1032        struct mrt6msg *msg;
1033        int ret;
1034
1035#ifdef CONFIG_IPV6_PIMSM_V2
1036        if (assert == MRT6MSG_WHOLEPKT)
1037                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1038                                                +sizeof(*msg));
1039        else
1040#endif
1041                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1042
1043        if (!skb)
1044                return -ENOBUFS;
1045
1046        /* I suppose that internal messages
1047         * do not require checksums */
1048
1049        skb->ip_summed = CHECKSUM_UNNECESSARY;
1050
1051#ifdef CONFIG_IPV6_PIMSM_V2
1052        if (assert == MRT6MSG_WHOLEPKT) {
1053                /* Ugly, but we have no choice with this interface.
1054                   Duplicate old header, fix length etc.
1055                   And all this only to mangle msg->im6_msgtype and
1056                   to set msg->im6_mbz to "mbz" :-)
1057                 */
1058                skb_push(skb, -skb_network_offset(pkt));
1059
1060                skb_push(skb, sizeof(*msg));
1061                skb_reset_transport_header(skb);
1062                msg = (struct mrt6msg *)skb_transport_header(skb);
1063                msg->im6_mbz = 0;
1064                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1065                msg->im6_mif = mrt->mroute_reg_vif_num;
1066                msg->im6_pad = 0;
1067                msg->im6_src = ipv6_hdr(pkt)->saddr;
1068                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1069
1070                skb->ip_summed = CHECKSUM_UNNECESSARY;
1071        } else
1072#endif
1073        {
1074        /*
1075         *      Copy the IP header
1076         */
1077
1078        skb_put(skb, sizeof(struct ipv6hdr));
1079        skb_reset_network_header(skb);
1080        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1081
1082        /*
1083         *      Add our header
1084         */
1085        skb_put(skb, sizeof(*msg));
1086        skb_reset_transport_header(skb);
1087        msg = (struct mrt6msg *)skb_transport_header(skb);
1088
1089        msg->im6_mbz = 0;
1090        msg->im6_msgtype = assert;
1091        msg->im6_mif = mifi;
1092        msg->im6_pad = 0;
1093        msg->im6_src = ipv6_hdr(pkt)->saddr;
1094        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1095
1096        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1097        skb->ip_summed = CHECKSUM_UNNECESSARY;
1098        }
1099
1100        rcu_read_lock();
1101        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1102        if (!mroute6_sk) {
1103                rcu_read_unlock();
1104                kfree_skb(skb);
1105                return -EINVAL;
1106        }
1107
1108        mrt6msg_netlink_event(mrt, skb);
1109
1110        /* Deliver to user space multicast routing algorithms */
1111        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1112        rcu_read_unlock();
1113        if (ret < 0) {
1114                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1115                kfree_skb(skb);
1116        }
1117
1118        return ret;
1119}
1120
1121/* Queue a packet for resolution. It gets locked cache entry! */
1122static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1123                                  struct sk_buff *skb)
1124{
1125        struct mfc6_cache *c;
1126        bool found = false;
1127        int err;
1128
1129        spin_lock_bh(&mfc_unres_lock);
1130        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1131                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1132                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1133                        found = true;
1134                        break;
1135                }
1136        }
1137
1138        if (!found) {
1139                /*
1140                 *      Create a new entry if allowable
1141                 */
1142
1143                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1144                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1145                        spin_unlock_bh(&mfc_unres_lock);
1146
1147                        kfree_skb(skb);
1148                        return -ENOBUFS;
1149                }
1150
1151                /* Fill in the new cache entry */
1152                c->_c.mfc_parent = -1;
1153                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1154                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1155
1156                /*
1157                 *      Reflect first query at pim6sd
1158                 */
1159                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1160                if (err < 0) {
1161                        /* If the report failed throw the cache entry
1162                           out - Brad Parker
1163                         */
1164                        spin_unlock_bh(&mfc_unres_lock);
1165
1166                        ip6mr_cache_free(c);
1167                        kfree_skb(skb);
1168                        return err;
1169                }
1170
1171                atomic_inc(&mrt->cache_resolve_queue_len);
1172                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1173                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1174
1175                ipmr_do_expire_process(mrt);
1176        }
1177
1178        /* See if we can append the packet */
1179        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1180                kfree_skb(skb);
1181                err = -ENOBUFS;
1182        } else {
1183                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1184                err = 0;
1185        }
1186
1187        spin_unlock_bh(&mfc_unres_lock);
1188        return err;
1189}
1190
1191/*
1192 *      MFC6 cache manipulation by user space
1193 */
1194
1195static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1196                            int parent)
1197{
1198        struct mfc6_cache *c;
1199
1200        /* The entries are added/deleted only under RTNL */
1201        rcu_read_lock();
1202        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1203                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1204        rcu_read_unlock();
1205        if (!c)
1206                return -ENOENT;
1207        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1208        list_del_rcu(&c->_c.list);
1209
1210        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1211                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1212        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1213        mr_cache_put(&c->_c);
1214        return 0;
1215}
1216
1217static int ip6mr_device_event(struct notifier_block *this,
1218                              unsigned long event, void *ptr)
1219{
1220        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1221        struct net *net = dev_net(dev);
1222        struct mr_table *mrt;
1223        struct vif_device *v;
1224        int ct;
1225
1226        if (event != NETDEV_UNREGISTER)
1227                return NOTIFY_DONE;
1228
1229        ip6mr_for_each_table(mrt, net) {
1230                v = &mrt->vif_table[0];
1231                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1232                        if (v->dev == dev)
1233                                mif6_delete(mrt, ct, 1, NULL);
1234                }
1235        }
1236
1237        return NOTIFY_DONE;
1238}
1239
1240static unsigned int ip6mr_seq_read(struct net *net)
1241{
1242        ASSERT_RTNL();
1243
1244        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1245}
1246
1247static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1248{
1249        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1250                       ip6mr_mr_table_iter, &mrt_lock);
1251}
1252
1253static struct notifier_block ip6_mr_notifier = {
1254        .notifier_call = ip6mr_device_event
1255};
1256
1257static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1258        .family         = RTNL_FAMILY_IP6MR,
1259        .fib_seq_read   = ip6mr_seq_read,
1260        .fib_dump       = ip6mr_dump,
1261        .owner          = THIS_MODULE,
1262};
1263
1264static int __net_init ip6mr_notifier_init(struct net *net)
1265{
1266        struct fib_notifier_ops *ops;
1267
1268        net->ipv6.ipmr_seq = 0;
1269
1270        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1271        if (IS_ERR(ops))
1272                return PTR_ERR(ops);
1273
1274        net->ipv6.ip6mr_notifier_ops = ops;
1275
1276        return 0;
1277}
1278
1279static void __net_exit ip6mr_notifier_exit(struct net *net)
1280{
1281        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1282        net->ipv6.ip6mr_notifier_ops = NULL;
1283}
1284
1285/* Setup for IP multicast routing */
1286static int __net_init ip6mr_net_init(struct net *net)
1287{
1288        int err;
1289
1290        err = ip6mr_notifier_init(net);
1291        if (err)
1292                return err;
1293
1294        err = ip6mr_rules_init(net);
1295        if (err < 0)
1296                goto ip6mr_rules_fail;
1297
1298#ifdef CONFIG_PROC_FS
1299        err = -ENOMEM;
1300        if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1301                        sizeof(struct mr_vif_iter)))
1302                goto proc_vif_fail;
1303        if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1304                        sizeof(struct mr_mfc_iter)))
1305                goto proc_cache_fail;
1306#endif
1307
1308        return 0;
1309
1310#ifdef CONFIG_PROC_FS
1311proc_cache_fail:
1312        remove_proc_entry("ip6_mr_vif", net->proc_net);
1313proc_vif_fail:
1314        ip6mr_rules_exit(net);
1315#endif
1316ip6mr_rules_fail:
1317        ip6mr_notifier_exit(net);
1318        return err;
1319}
1320
1321static void __net_exit ip6mr_net_exit(struct net *net)
1322{
1323#ifdef CONFIG_PROC_FS
1324        remove_proc_entry("ip6_mr_cache", net->proc_net);
1325        remove_proc_entry("ip6_mr_vif", net->proc_net);
1326#endif
1327        ip6mr_rules_exit(net);
1328        ip6mr_notifier_exit(net);
1329}
1330
1331static struct pernet_operations ip6mr_net_ops = {
1332        .init = ip6mr_net_init,
1333        .exit = ip6mr_net_exit,
1334};
1335
1336int __init ip6_mr_init(void)
1337{
1338        int err;
1339
1340        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1341                                       sizeof(struct mfc6_cache),
1342                                       0, SLAB_HWCACHE_ALIGN,
1343                                       NULL);
1344        if (!mrt_cachep)
1345                return -ENOMEM;
1346
1347        err = register_pernet_subsys(&ip6mr_net_ops);
1348        if (err)
1349                goto reg_pernet_fail;
1350
1351        err = register_netdevice_notifier(&ip6_mr_notifier);
1352        if (err)
1353                goto reg_notif_fail;
1354#ifdef CONFIG_IPV6_PIMSM_V2
1355        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1356                pr_err("%s: can't add PIM protocol\n", __func__);
1357                err = -EAGAIN;
1358                goto add_proto_fail;
1359        }
1360#endif
1361        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1362                                   NULL, ip6mr_rtm_dumproute, 0);
1363        if (err == 0)
1364                return 0;
1365
1366#ifdef CONFIG_IPV6_PIMSM_V2
1367        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1368add_proto_fail:
1369        unregister_netdevice_notifier(&ip6_mr_notifier);
1370#endif
1371reg_notif_fail:
1372        unregister_pernet_subsys(&ip6mr_net_ops);
1373reg_pernet_fail:
1374        kmem_cache_destroy(mrt_cachep);
1375        return err;
1376}
1377
1378void ip6_mr_cleanup(void)
1379{
1380        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1381#ifdef CONFIG_IPV6_PIMSM_V2
1382        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1383#endif
1384        unregister_netdevice_notifier(&ip6_mr_notifier);
1385        unregister_pernet_subsys(&ip6mr_net_ops);
1386        kmem_cache_destroy(mrt_cachep);
1387}
1388
1389static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1390                         struct mf6cctl *mfc, int mrtsock, int parent)
1391{
1392        unsigned char ttls[MAXMIFS];
1393        struct mfc6_cache *uc, *c;
1394        struct mr_mfc *_uc;
1395        bool found;
1396        int i, err;
1397
1398        if (mfc->mf6cc_parent >= MAXMIFS)
1399                return -ENFILE;
1400
1401        memset(ttls, 255, MAXMIFS);
1402        for (i = 0; i < MAXMIFS; i++) {
1403                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1404                        ttls[i] = 1;
1405        }
1406
1407        /* The entries are added/deleted only under RTNL */
1408        rcu_read_lock();
1409        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1410                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1411        rcu_read_unlock();
1412        if (c) {
1413                write_lock_bh(&mrt_lock);
1414                c->_c.mfc_parent = mfc->mf6cc_parent;
1415                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1416                if (!mrtsock)
1417                        c->_c.mfc_flags |= MFC_STATIC;
1418                write_unlock_bh(&mrt_lock);
1419                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1420                                               c, mrt->id);
1421                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1422                return 0;
1423        }
1424
1425        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1426            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1427                return -EINVAL;
1428
1429        c = ip6mr_cache_alloc();
1430        if (!c)
1431                return -ENOMEM;
1432
1433        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1434        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1435        c->_c.mfc_parent = mfc->mf6cc_parent;
1436        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1437        if (!mrtsock)
1438                c->_c.mfc_flags |= MFC_STATIC;
1439
1440        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1441                                  ip6mr_rht_params);
1442        if (err) {
1443                pr_err("ip6mr: rhtable insert error %d\n", err);
1444                ip6mr_cache_free(c);
1445                return err;
1446        }
1447        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1448
1449        /* Check to see if we resolved a queued list. If so we
1450         * need to send on the frames and tidy up.
1451         */
1452        found = false;
1453        spin_lock_bh(&mfc_unres_lock);
1454        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1455                uc = (struct mfc6_cache *)_uc;
1456                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1457                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1458                        list_del(&_uc->list);
1459                        atomic_dec(&mrt->cache_resolve_queue_len);
1460                        found = true;
1461                        break;
1462                }
1463        }
1464        if (list_empty(&mrt->mfc_unres_queue))
1465                del_timer(&mrt->ipmr_expire_timer);
1466        spin_unlock_bh(&mfc_unres_lock);
1467
1468        if (found) {
1469                ip6mr_cache_resolve(net, mrt, uc, c);
1470                ip6mr_cache_free(uc);
1471        }
1472        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1473                                       c, mrt->id);
1474        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1475        return 0;
1476}
1477
1478/*
1479 *      Close the multicast socket, and clear the vif tables etc
1480 */
1481
1482static void mroute_clean_tables(struct mr_table *mrt, bool all)
1483{
1484        struct mr_mfc *c, *tmp;
1485        LIST_HEAD(list);
1486        int i;
1487
1488        /* Shut down all active vif entries */
1489        for (i = 0; i < mrt->maxvif; i++) {
1490                if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1491                        continue;
1492                mif6_delete(mrt, i, 0, &list);
1493        }
1494        unregister_netdevice_many(&list);
1495
1496        /* Wipe the cache */
1497        list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1498                if (!all && (c->mfc_flags & MFC_STATIC))
1499                        continue;
1500                rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1501                list_del_rcu(&c->list);
1502                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1503                mr_cache_put(c);
1504        }
1505
1506        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1507                spin_lock_bh(&mfc_unres_lock);
1508                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1509                        list_del(&c->list);
1510                        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1511                                                       FIB_EVENT_ENTRY_DEL,
1512                                                       (struct mfc6_cache *)c,
1513                                                       mrt->id);
1514                        mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1515                                          RTM_DELROUTE);
1516                        ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1517                }
1518                spin_unlock_bh(&mfc_unres_lock);
1519        }
1520}
1521
1522static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1523{
1524        int err = 0;
1525        struct net *net = sock_net(sk);
1526
1527        rtnl_lock();
1528        write_lock_bh(&mrt_lock);
1529        if (rtnl_dereference(mrt->mroute_sk)) {
1530                err = -EADDRINUSE;
1531        } else {
1532                rcu_assign_pointer(mrt->mroute_sk, sk);
1533                sock_set_flag(sk, SOCK_RCU_FREE);
1534                net->ipv6.devconf_all->mc_forwarding++;
1535        }
1536        write_unlock_bh(&mrt_lock);
1537
1538        if (!err)
1539                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1540                                             NETCONFA_MC_FORWARDING,
1541                                             NETCONFA_IFINDEX_ALL,
1542                                             net->ipv6.devconf_all);
1543        rtnl_unlock();
1544
1545        return err;
1546}
1547
1548int ip6mr_sk_done(struct sock *sk)
1549{
1550        int err = -EACCES;
1551        struct net *net = sock_net(sk);
1552        struct mr_table *mrt;
1553
1554        if (sk->sk_type != SOCK_RAW ||
1555            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1556                return err;
1557
1558        rtnl_lock();
1559        ip6mr_for_each_table(mrt, net) {
1560                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1561                        write_lock_bh(&mrt_lock);
1562                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1563                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1564                         * so the RCU grace period before sk freeing
1565                         * is guaranteed by sk_destruct()
1566                         */
1567                        net->ipv6.devconf_all->mc_forwarding--;
1568                        write_unlock_bh(&mrt_lock);
1569                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1570                                                     NETCONFA_MC_FORWARDING,
1571                                                     NETCONFA_IFINDEX_ALL,
1572                                                     net->ipv6.devconf_all);
1573
1574                        mroute_clean_tables(mrt, false);
1575                        err = 0;
1576                        break;
1577                }
1578        }
1579        rtnl_unlock();
1580
1581        return err;
1582}
1583
1584bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1585{
1586        struct mr_table *mrt;
1587        struct flowi6 fl6 = {
1588                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1589                .flowi6_oif     = skb->dev->ifindex,
1590                .flowi6_mark    = skb->mark,
1591        };
1592
1593        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1594                return NULL;
1595
1596        return rcu_access_pointer(mrt->mroute_sk);
1597}
1598EXPORT_SYMBOL(mroute6_is_socket);
1599
1600/*
1601 *      Socket options and virtual interface manipulation. The whole
1602 *      virtual interface system is a complete heap, but unfortunately
1603 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1604 *      MOSPF/PIM router set up we can clean this up.
1605 */
1606
1607int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1608{
1609        int ret, parent = 0;
1610        struct mif6ctl vif;
1611        struct mf6cctl mfc;
1612        mifi_t mifi;
1613        struct net *net = sock_net(sk);
1614        struct mr_table *mrt;
1615
1616        if (sk->sk_type != SOCK_RAW ||
1617            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1618                return -EOPNOTSUPP;
1619
1620        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1621        if (!mrt)
1622                return -ENOENT;
1623
1624        if (optname != MRT6_INIT) {
1625                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1626                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1627                        return -EACCES;
1628        }
1629
1630        switch (optname) {
1631        case MRT6_INIT:
1632                if (optlen < sizeof(int))
1633                        return -EINVAL;
1634
1635                return ip6mr_sk_init(mrt, sk);
1636
1637        case MRT6_DONE:
1638                return ip6mr_sk_done(sk);
1639
1640        case MRT6_ADD_MIF:
1641                if (optlen < sizeof(vif))
1642                        return -EINVAL;
1643                if (copy_from_user(&vif, optval, sizeof(vif)))
1644                        return -EFAULT;
1645                if (vif.mif6c_mifi >= MAXMIFS)
1646                        return -ENFILE;
1647                rtnl_lock();
1648                ret = mif6_add(net, mrt, &vif,
1649                               sk == rtnl_dereference(mrt->mroute_sk));
1650                rtnl_unlock();
1651                return ret;
1652
1653        case MRT6_DEL_MIF:
1654                if (optlen < sizeof(mifi_t))
1655                        return -EINVAL;
1656                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1657                        return -EFAULT;
1658                rtnl_lock();
1659                ret = mif6_delete(mrt, mifi, 0, NULL);
1660                rtnl_unlock();
1661                return ret;
1662
1663        /*
1664         *      Manipulate the forwarding caches. These live
1665         *      in a sort of kernel/user symbiosis.
1666         */
1667        case MRT6_ADD_MFC:
1668        case MRT6_DEL_MFC:
1669                parent = -1;
1670                /* fall through */
1671        case MRT6_ADD_MFC_PROXY:
1672        case MRT6_DEL_MFC_PROXY:
1673                if (optlen < sizeof(mfc))
1674                        return -EINVAL;
1675                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1676                        return -EFAULT;
1677                if (parent == 0)
1678                        parent = mfc.mf6cc_parent;
1679                rtnl_lock();
1680                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1681                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1682                else
1683                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1684                                            sk ==
1685                                            rtnl_dereference(mrt->mroute_sk),
1686                                            parent);
1687                rtnl_unlock();
1688                return ret;
1689
1690        /*
1691         *      Control PIM assert (to activate pim will activate assert)
1692         */
1693        case MRT6_ASSERT:
1694        {
1695                int v;
1696
1697                if (optlen != sizeof(v))
1698                        return -EINVAL;
1699                if (get_user(v, (int __user *)optval))
1700                        return -EFAULT;
1701                mrt->mroute_do_assert = v;
1702                return 0;
1703        }
1704
1705#ifdef CONFIG_IPV6_PIMSM_V2
1706        case MRT6_PIM:
1707        {
1708                int v;
1709
1710                if (optlen != sizeof(v))
1711                        return -EINVAL;
1712                if (get_user(v, (int __user *)optval))
1713                        return -EFAULT;
1714                v = !!v;
1715                rtnl_lock();
1716                ret = 0;
1717                if (v != mrt->mroute_do_pim) {
1718                        mrt->mroute_do_pim = v;
1719                        mrt->mroute_do_assert = v;
1720                }
1721                rtnl_unlock();
1722                return ret;
1723        }
1724
1725#endif
1726#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1727        case MRT6_TABLE:
1728        {
1729                u32 v;
1730
1731                if (optlen != sizeof(u32))
1732                        return -EINVAL;
1733                if (get_user(v, (u32 __user *)optval))
1734                        return -EFAULT;
1735                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1736                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1737                        return -EINVAL;
1738                if (sk == rcu_access_pointer(mrt->mroute_sk))
1739                        return -EBUSY;
1740
1741                rtnl_lock();
1742                ret = 0;
1743                mrt = ip6mr_new_table(net, v);
1744                if (IS_ERR(mrt))
1745                        ret = PTR_ERR(mrt);
1746                else
1747                        raw6_sk(sk)->ip6mr_table = v;
1748                rtnl_unlock();
1749                return ret;
1750        }
1751#endif
1752        /*
1753         *      Spurious command, or MRT6_VERSION which you cannot
1754         *      set.
1755         */
1756        default:
1757                return -ENOPROTOOPT;
1758        }
1759}
1760
1761/*
1762 *      Getsock opt support for the multicast routing system.
1763 */
1764
1765int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1766                          int __user *optlen)
1767{
1768        int olr;
1769        int val;
1770        struct net *net = sock_net(sk);
1771        struct mr_table *mrt;
1772
1773        if (sk->sk_type != SOCK_RAW ||
1774            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1775                return -EOPNOTSUPP;
1776
1777        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1778        if (!mrt)
1779                return -ENOENT;
1780
1781        switch (optname) {
1782        case MRT6_VERSION:
1783                val = 0x0305;
1784                break;
1785#ifdef CONFIG_IPV6_PIMSM_V2
1786        case MRT6_PIM:
1787                val = mrt->mroute_do_pim;
1788                break;
1789#endif
1790        case MRT6_ASSERT:
1791                val = mrt->mroute_do_assert;
1792                break;
1793        default:
1794                return -ENOPROTOOPT;
1795        }
1796
1797        if (get_user(olr, optlen))
1798                return -EFAULT;
1799
1800        olr = min_t(int, olr, sizeof(int));
1801        if (olr < 0)
1802                return -EINVAL;
1803
1804        if (put_user(olr, optlen))
1805                return -EFAULT;
1806        if (copy_to_user(optval, &val, olr))
1807                return -EFAULT;
1808        return 0;
1809}
1810
1811/*
1812 *      The IP multicast ioctl support routines.
1813 */
1814
1815int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1816{
1817        struct sioc_sg_req6 sr;
1818        struct sioc_mif_req6 vr;
1819        struct vif_device *vif;
1820        struct mfc6_cache *c;
1821        struct net *net = sock_net(sk);
1822        struct mr_table *mrt;
1823
1824        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1825        if (!mrt)
1826                return -ENOENT;
1827
1828        switch (cmd) {
1829        case SIOCGETMIFCNT_IN6:
1830                if (copy_from_user(&vr, arg, sizeof(vr)))
1831                        return -EFAULT;
1832                if (vr.mifi >= mrt->maxvif)
1833                        return -EINVAL;
1834                read_lock(&mrt_lock);
1835                vif = &mrt->vif_table[vr.mifi];
1836                if (VIF_EXISTS(mrt, vr.mifi)) {
1837                        vr.icount = vif->pkt_in;
1838                        vr.ocount = vif->pkt_out;
1839                        vr.ibytes = vif->bytes_in;
1840                        vr.obytes = vif->bytes_out;
1841                        read_unlock(&mrt_lock);
1842
1843                        if (copy_to_user(arg, &vr, sizeof(vr)))
1844                                return -EFAULT;
1845                        return 0;
1846                }
1847                read_unlock(&mrt_lock);
1848                return -EADDRNOTAVAIL;
1849        case SIOCGETSGCNT_IN6:
1850                if (copy_from_user(&sr, arg, sizeof(sr)))
1851                        return -EFAULT;
1852
1853                rcu_read_lock();
1854                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1855                if (c) {
1856                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1857                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1858                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1859                        rcu_read_unlock();
1860
1861                        if (copy_to_user(arg, &sr, sizeof(sr)))
1862                                return -EFAULT;
1863                        return 0;
1864                }
1865                rcu_read_unlock();
1866                return -EADDRNOTAVAIL;
1867        default:
1868                return -ENOIOCTLCMD;
1869        }
1870}
1871
1872#ifdef CONFIG_COMPAT
1873struct compat_sioc_sg_req6 {
1874        struct sockaddr_in6 src;
1875        struct sockaddr_in6 grp;
1876        compat_ulong_t pktcnt;
1877        compat_ulong_t bytecnt;
1878        compat_ulong_t wrong_if;
1879};
1880
1881struct compat_sioc_mif_req6 {
1882        mifi_t  mifi;
1883        compat_ulong_t icount;
1884        compat_ulong_t ocount;
1885        compat_ulong_t ibytes;
1886        compat_ulong_t obytes;
1887};
1888
1889int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1890{
1891        struct compat_sioc_sg_req6 sr;
1892        struct compat_sioc_mif_req6 vr;
1893        struct vif_device *vif;
1894        struct mfc6_cache *c;
1895        struct net *net = sock_net(sk);
1896        struct mr_table *mrt;
1897
1898        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1899        if (!mrt)
1900                return -ENOENT;
1901
1902        switch (cmd) {
1903        case SIOCGETMIFCNT_IN6:
1904                if (copy_from_user(&vr, arg, sizeof(vr)))
1905                        return -EFAULT;
1906                if (vr.mifi >= mrt->maxvif)
1907                        return -EINVAL;
1908                read_lock(&mrt_lock);
1909                vif = &mrt->vif_table[vr.mifi];
1910                if (VIF_EXISTS(mrt, vr.mifi)) {
1911                        vr.icount = vif->pkt_in;
1912                        vr.ocount = vif->pkt_out;
1913                        vr.ibytes = vif->bytes_in;
1914                        vr.obytes = vif->bytes_out;
1915                        read_unlock(&mrt_lock);
1916
1917                        if (copy_to_user(arg, &vr, sizeof(vr)))
1918                                return -EFAULT;
1919                        return 0;
1920                }
1921                read_unlock(&mrt_lock);
1922                return -EADDRNOTAVAIL;
1923        case SIOCGETSGCNT_IN6:
1924                if (copy_from_user(&sr, arg, sizeof(sr)))
1925                        return -EFAULT;
1926
1927                rcu_read_lock();
1928                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1929                if (c) {
1930                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1931                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1932                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1933                        rcu_read_unlock();
1934
1935                        if (copy_to_user(arg, &sr, sizeof(sr)))
1936                                return -EFAULT;
1937                        return 0;
1938                }
1939                rcu_read_unlock();
1940                return -EADDRNOTAVAIL;
1941        default:
1942                return -ENOIOCTLCMD;
1943        }
1944}
1945#endif
1946
1947static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1948{
1949        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1950                        IPSTATS_MIB_OUTFORWDATAGRAMS);
1951        __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1952                        IPSTATS_MIB_OUTOCTETS, skb->len);
1953        return dst_output(net, sk, skb);
1954}
1955
1956/*
1957 *      Processing handlers for ip6mr_forward
1958 */
1959
1960static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1961                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1962{
1963        struct ipv6hdr *ipv6h;
1964        struct vif_device *vif = &mrt->vif_table[vifi];
1965        struct net_device *dev;
1966        struct dst_entry *dst;
1967        struct flowi6 fl6;
1968
1969        if (!vif->dev)
1970                goto out_free;
1971
1972#ifdef CONFIG_IPV6_PIMSM_V2
1973        if (vif->flags & MIFF_REGISTER) {
1974                vif->pkt_out++;
1975                vif->bytes_out += skb->len;
1976                vif->dev->stats.tx_bytes += skb->len;
1977                vif->dev->stats.tx_packets++;
1978                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1979                goto out_free;
1980        }
1981#endif
1982
1983        ipv6h = ipv6_hdr(skb);
1984
1985        fl6 = (struct flowi6) {
1986                .flowi6_oif = vif->link,
1987                .daddr = ipv6h->daddr,
1988        };
1989
1990        dst = ip6_route_output(net, NULL, &fl6);
1991        if (dst->error) {
1992                dst_release(dst);
1993                goto out_free;
1994        }
1995
1996        skb_dst_drop(skb);
1997        skb_dst_set(skb, dst);
1998
1999        /*
2000         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2001         * not only before forwarding, but after forwarding on all output
2002         * interfaces. It is clear, if mrouter runs a multicasting
2003         * program, it should receive packets not depending to what interface
2004         * program is joined.
2005         * If we will not make it, the program will have to join on all
2006         * interfaces. On the other hand, multihoming host (or router, but
2007         * not mrouter) cannot join to more than one interface - it will
2008         * result in receiving multiple packets.
2009         */
2010        dev = vif->dev;
2011        skb->dev = dev;
2012        vif->pkt_out++;
2013        vif->bytes_out += skb->len;
2014
2015        /* We are about to write */
2016        /* XXX: extension headers? */
2017        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2018                goto out_free;
2019
2020        ipv6h = ipv6_hdr(skb);
2021        ipv6h->hop_limit--;
2022
2023        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2024
2025        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2026                       net, NULL, skb, skb->dev, dev,
2027                       ip6mr_forward2_finish);
2028
2029out_free:
2030        kfree_skb(skb);
2031        return 0;
2032}
2033
2034static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2035{
2036        int ct;
2037
2038        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2039                if (mrt->vif_table[ct].dev == dev)
2040                        break;
2041        }
2042        return ct;
2043}
2044
2045static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2046                           struct sk_buff *skb, struct mfc6_cache *c)
2047{
2048        int psend = -1;
2049        int vif, ct;
2050        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2051
2052        vif = c->_c.mfc_parent;
2053        c->_c.mfc_un.res.pkt++;
2054        c->_c.mfc_un.res.bytes += skb->len;
2055        c->_c.mfc_un.res.lastuse = jiffies;
2056
2057        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2058                struct mfc6_cache *cache_proxy;
2059
2060                /* For an (*,G) entry, we only check that the incoming
2061                 * interface is part of the static tree.
2062                 */
2063                rcu_read_lock();
2064                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2065                if (cache_proxy &&
2066                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2067                        rcu_read_unlock();
2068                        goto forward;
2069                }
2070                rcu_read_unlock();
2071        }
2072
2073        /*
2074         * Wrong interface: drop packet and (maybe) send PIM assert.
2075         */
2076        if (mrt->vif_table[vif].dev != skb->dev) {
2077                c->_c.mfc_un.res.wrong_if++;
2078
2079                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2080                    /* pimsm uses asserts, when switching from RPT to SPT,
2081                       so that we cannot check that packet arrived on an oif.
2082                       It is bad, but otherwise we would need to move pretty
2083                       large chunk of pimd to kernel. Ough... --ANK
2084                     */
2085                    (mrt->mroute_do_pim ||
2086                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2087                    time_after(jiffies,
2088                               c->_c.mfc_un.res.last_assert +
2089                               MFC_ASSERT_THRESH)) {
2090                        c->_c.mfc_un.res.last_assert = jiffies;
2091                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2092                }
2093                goto dont_forward;
2094        }
2095
2096forward:
2097        mrt->vif_table[vif].pkt_in++;
2098        mrt->vif_table[vif].bytes_in += skb->len;
2099
2100        /*
2101         *      Forward the frame
2102         */
2103        if (ipv6_addr_any(&c->mf6c_origin) &&
2104            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2105                if (true_vifi >= 0 &&
2106                    true_vifi != c->_c.mfc_parent &&
2107                    ipv6_hdr(skb)->hop_limit >
2108                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2109                        /* It's an (*,*) entry and the packet is not coming from
2110                         * the upstream: forward the packet to the upstream
2111                         * only.
2112                         */
2113                        psend = c->_c.mfc_parent;
2114                        goto last_forward;
2115                }
2116                goto dont_forward;
2117        }
2118        for (ct = c->_c.mfc_un.res.maxvif - 1;
2119             ct >= c->_c.mfc_un.res.minvif; ct--) {
2120                /* For (*,G) entry, don't forward to the incoming interface */
2121                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2122                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2123                        if (psend != -1) {
2124                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2125                                if (skb2)
2126                                        ip6mr_forward2(net, mrt, skb2,
2127                                                       c, psend);
2128                        }
2129                        psend = ct;
2130                }
2131        }
2132last_forward:
2133        if (psend != -1) {
2134                ip6mr_forward2(net, mrt, skb, c, psend);
2135                return;
2136        }
2137
2138dont_forward:
2139        kfree_skb(skb);
2140}
2141
2142
2143/*
2144 *      Multicast packets for forwarding arrive here
2145 */
2146
2147int ip6_mr_input(struct sk_buff *skb)
2148{
2149        struct mfc6_cache *cache;
2150        struct net *net = dev_net(skb->dev);
2151        struct mr_table *mrt;
2152        struct flowi6 fl6 = {
2153                .flowi6_iif     = skb->dev->ifindex,
2154                .flowi6_mark    = skb->mark,
2155        };
2156        int err;
2157
2158        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2159        if (err < 0) {
2160                kfree_skb(skb);
2161                return err;
2162        }
2163
2164        read_lock(&mrt_lock);
2165        cache = ip6mr_cache_find(mrt,
2166                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2167        if (!cache) {
2168                int vif = ip6mr_find_vif(mrt, skb->dev);
2169
2170                if (vif >= 0)
2171                        cache = ip6mr_cache_find_any(mrt,
2172                                                     &ipv6_hdr(skb)->daddr,
2173                                                     vif);
2174        }
2175
2176        /*
2177         *      No usable cache entry
2178         */
2179        if (!cache) {
2180                int vif;
2181
2182                vif = ip6mr_find_vif(mrt, skb->dev);
2183                if (vif >= 0) {
2184                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2185                        read_unlock(&mrt_lock);
2186
2187                        return err;
2188                }
2189                read_unlock(&mrt_lock);
2190                kfree_skb(skb);
2191                return -ENODEV;
2192        }
2193
2194        ip6_mr_forward(net, mrt, skb, cache);
2195
2196        read_unlock(&mrt_lock);
2197
2198        return 0;
2199}
2200
2201int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2202                    u32 portid)
2203{
2204        int err;
2205        struct mr_table *mrt;
2206        struct mfc6_cache *cache;
2207        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2208
2209        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2210        if (!mrt)
2211                return -ENOENT;
2212
2213        read_lock(&mrt_lock);
2214        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2215        if (!cache && skb->dev) {
2216                int vif = ip6mr_find_vif(mrt, skb->dev);
2217
2218                if (vif >= 0)
2219                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2220                                                     vif);
2221        }
2222
2223        if (!cache) {
2224                struct sk_buff *skb2;
2225                struct ipv6hdr *iph;
2226                struct net_device *dev;
2227                int vif;
2228
2229                dev = skb->dev;
2230                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2231                        read_unlock(&mrt_lock);
2232                        return -ENODEV;
2233                }
2234
2235                /* really correct? */
2236                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2237                if (!skb2) {
2238                        read_unlock(&mrt_lock);
2239                        return -ENOMEM;
2240                }
2241
2242                NETLINK_CB(skb2).portid = portid;
2243                skb_reset_transport_header(skb2);
2244
2245                skb_put(skb2, sizeof(struct ipv6hdr));
2246                skb_reset_network_header(skb2);
2247
2248                iph = ipv6_hdr(skb2);
2249                iph->version = 0;
2250                iph->priority = 0;
2251                iph->flow_lbl[0] = 0;
2252                iph->flow_lbl[1] = 0;
2253                iph->flow_lbl[2] = 0;
2254                iph->payload_len = 0;
2255                iph->nexthdr = IPPROTO_NONE;
2256                iph->hop_limit = 0;
2257                iph->saddr = rt->rt6i_src.addr;
2258                iph->daddr = rt->rt6i_dst.addr;
2259
2260                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2261                read_unlock(&mrt_lock);
2262
2263                return err;
2264        }
2265
2266        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2267        read_unlock(&mrt_lock);
2268        return err;
2269}
2270
2271static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2272                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2273                             int flags)
2274{
2275        struct nlmsghdr *nlh;
2276        struct rtmsg *rtm;
2277        int err;
2278
2279        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2280        if (!nlh)
2281                return -EMSGSIZE;
2282
2283        rtm = nlmsg_data(nlh);
2284        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2285        rtm->rtm_dst_len  = 128;
2286        rtm->rtm_src_len  = 128;
2287        rtm->rtm_tos      = 0;
2288        rtm->rtm_table    = mrt->id;
2289        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2290                goto nla_put_failure;
2291        rtm->rtm_type = RTN_MULTICAST;
2292        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2293        if (c->_c.mfc_flags & MFC_STATIC)
2294                rtm->rtm_protocol = RTPROT_STATIC;
2295        else
2296                rtm->rtm_protocol = RTPROT_MROUTED;
2297        rtm->rtm_flags    = 0;
2298
2299        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2300            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2301                goto nla_put_failure;
2302        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2303        /* do not break the dump if cache is unresolved */
2304        if (err < 0 && err != -ENOENT)
2305                goto nla_put_failure;
2306
2307        nlmsg_end(skb, nlh);
2308        return 0;
2309
2310nla_put_failure:
2311        nlmsg_cancel(skb, nlh);
2312        return -EMSGSIZE;
2313}
2314
2315static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2316                              u32 portid, u32 seq, struct mr_mfc *c,
2317                              int cmd, int flags)
2318{
2319        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2320                                 cmd, flags);
2321}
2322
2323static int mr6_msgsize(bool unresolved, int maxvif)
2324{
2325        size_t len =
2326                NLMSG_ALIGN(sizeof(struct rtmsg))
2327                + nla_total_size(4)     /* RTA_TABLE */
2328                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2329                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2330                ;
2331
2332        if (!unresolved)
2333                len = len
2334                      + nla_total_size(4)       /* RTA_IIF */
2335                      + nla_total_size(0)       /* RTA_MULTIPATH */
2336                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2337                                                /* RTA_MFC_STATS */
2338                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2339                ;
2340
2341        return len;
2342}
2343
2344static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2345                              int cmd)
2346{
2347        struct net *net = read_pnet(&mrt->net);
2348        struct sk_buff *skb;
2349        int err = -ENOBUFS;
2350
2351        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2352                        GFP_ATOMIC);
2353        if (!skb)
2354                goto errout;
2355
2356        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2357        if (err < 0)
2358                goto errout;
2359
2360        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2361        return;
2362
2363errout:
2364        kfree_skb(skb);
2365        if (err < 0)
2366                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2367}
2368
2369static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2370{
2371        size_t len =
2372                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2373                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2374                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2375                                        /* IP6MRA_CREPORT_SRC_ADDR */
2376                + nla_total_size(sizeof(struct in6_addr))
2377                                        /* IP6MRA_CREPORT_DST_ADDR */
2378                + nla_total_size(sizeof(struct in6_addr))
2379                                        /* IP6MRA_CREPORT_PKT */
2380                + nla_total_size(payloadlen)
2381                ;
2382
2383        return len;
2384}
2385
2386static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2387{
2388        struct net *net = read_pnet(&mrt->net);
2389        struct nlmsghdr *nlh;
2390        struct rtgenmsg *rtgenm;
2391        struct mrt6msg *msg;
2392        struct sk_buff *skb;
2393        struct nlattr *nla;
2394        int payloadlen;
2395
2396        payloadlen = pkt->len - sizeof(struct mrt6msg);
2397        msg = (struct mrt6msg *)skb_transport_header(pkt);
2398
2399        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2400        if (!skb)
2401                goto errout;
2402
2403        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2404                        sizeof(struct rtgenmsg), 0);
2405        if (!nlh)
2406                goto errout;
2407        rtgenm = nlmsg_data(nlh);
2408        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2409        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2410            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2411            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2412                             &msg->im6_src) ||
2413            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2414                             &msg->im6_dst))
2415                goto nla_put_failure;
2416
2417        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2418        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2419                                  nla_data(nla), payloadlen))
2420                goto nla_put_failure;
2421
2422        nlmsg_end(skb, nlh);
2423
2424        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2425        return;
2426
2427nla_put_failure:
2428        nlmsg_cancel(skb, nlh);
2429errout:
2430        kfree_skb(skb);
2431        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2432}
2433
2434static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2435{
2436        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2437                                _ip6mr_fill_mroute, &mfc_unres_lock);
2438}
2439