linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <linux/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/mm.h>
  24#include <linux/kernel.h>
  25#include <linux/fcntl.h>
  26#include <linux/stat.h>
  27#include <linux/socket.h>
  28#include <linux/inet.h>
  29#include <linux/netdevice.h>
  30#include <linux/inetdevice.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/init.h>
  34#include <linux/compat.h>
  35#include <linux/rhashtable.h>
  36#include <net/protocol.h>
  37#include <linux/skbuff.h>
  38#include <net/raw.h>
  39#include <linux/notifier.h>
  40#include <linux/if_arp.h>
  41#include <net/checksum.h>
  42#include <net/netlink.h>
  43#include <net/fib_rules.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ip6_route.h>
  47#include <linux/mroute6.h>
  48#include <linux/pim.h>
  49#include <net/addrconf.h>
  50#include <linux/netfilter_ipv6.h>
  51#include <linux/export.h>
  52#include <net/ip6_checksum.h>
  53#include <linux/netconf.h>
  54#include <net/ip_tunnels.h>
  55
  56#include <linux/nospec.h>
  57
  58struct ip6mr_rule {
  59        struct fib_rule         common;
  60};
  61
  62struct ip6mr_result {
  63        struct mr_table *mrt;
  64};
  65
  66/* Big lock, protecting vif table, mrt cache and mroute socket state.
  67   Note that the changes are semaphored via rtnl_lock.
  68 */
  69
  70static DEFINE_RWLOCK(mrt_lock);
  71
  72/* Multicast router control variables */
  73
  74/* Special spinlock for queue of unresolved entries */
  75static DEFINE_SPINLOCK(mfc_unres_lock);
  76
  77/* We return to original Alan's scheme. Hash table of resolved
  78   entries is changed only in process context and protected
  79   with weak lock mrt_lock. Queue of unresolved entries is protected
  80   with strong spinlock mfc_unres_lock.
  81
  82   In this case data path is free of exclusive locks at all.
  83 */
  84
  85static struct kmem_cache *mrt_cachep __read_mostly;
  86
  87static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  88static void ip6mr_free_table(struct mr_table *mrt);
  89
  90static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  91                           struct sk_buff *skb, struct mfc6_cache *cache);
  92static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  93                              mifi_t mifi, int assert);
  94static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  95                              int cmd);
  96static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  97static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  98                               struct netlink_callback *cb);
  99static void mroute_clean_tables(struct mr_table *mrt, bool all);
 100static void ipmr_expire_process(struct timer_list *t);
 101
 102#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 103#define ip6mr_for_each_table(mrt, net) \
 104        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 105
 106static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 107                                            struct mr_table *mrt)
 108{
 109        struct mr_table *ret;
 110
 111        if (!mrt)
 112                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 113                                     struct mr_table, list);
 114        else
 115                ret = list_entry_rcu(mrt->list.next,
 116                                     struct mr_table, list);
 117
 118        if (&ret->list == &net->ipv6.mr6_tables)
 119                return NULL;
 120        return ret;
 121}
 122
 123static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 124{
 125        struct mr_table *mrt;
 126
 127        ip6mr_for_each_table(mrt, net) {
 128                if (mrt->id == id)
 129                        return mrt;
 130        }
 131        return NULL;
 132}
 133
 134static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 135                            struct mr_table **mrt)
 136{
 137        int err;
 138        struct ip6mr_result res;
 139        struct fib_lookup_arg arg = {
 140                .result = &res,
 141                .flags = FIB_LOOKUP_NOREF,
 142        };
 143
 144        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 145                               flowi6_to_flowi(flp6), 0, &arg);
 146        if (err < 0)
 147                return err;
 148        *mrt = res.mrt;
 149        return 0;
 150}
 151
 152static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 153                             int flags, struct fib_lookup_arg *arg)
 154{
 155        struct ip6mr_result *res = arg->result;
 156        struct mr_table *mrt;
 157
 158        switch (rule->action) {
 159        case FR_ACT_TO_TBL:
 160                break;
 161        case FR_ACT_UNREACHABLE:
 162                return -ENETUNREACH;
 163        case FR_ACT_PROHIBIT:
 164                return -EACCES;
 165        case FR_ACT_BLACKHOLE:
 166        default:
 167                return -EINVAL;
 168        }
 169
 170        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 171        if (!mrt)
 172                return -EAGAIN;
 173        res->mrt = mrt;
 174        return 0;
 175}
 176
 177static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 178{
 179        return 1;
 180}
 181
 182static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 183        FRA_GENERIC_POLICY,
 184};
 185
 186static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 187                                struct fib_rule_hdr *frh, struct nlattr **tb,
 188                                struct netlink_ext_ack *extack)
 189{
 190        return 0;
 191}
 192
 193static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 194                              struct nlattr **tb)
 195{
 196        return 1;
 197}
 198
 199static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 200                           struct fib_rule_hdr *frh)
 201{
 202        frh->dst_len = 0;
 203        frh->src_len = 0;
 204        frh->tos     = 0;
 205        return 0;
 206}
 207
 208static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 209        .family         = RTNL_FAMILY_IP6MR,
 210        .rule_size      = sizeof(struct ip6mr_rule),
 211        .addr_size      = sizeof(struct in6_addr),
 212        .action         = ip6mr_rule_action,
 213        .match          = ip6mr_rule_match,
 214        .configure      = ip6mr_rule_configure,
 215        .compare        = ip6mr_rule_compare,
 216        .fill           = ip6mr_rule_fill,
 217        .nlgroup        = RTNLGRP_IPV6_RULE,
 218        .policy         = ip6mr_rule_policy,
 219        .owner          = THIS_MODULE,
 220};
 221
 222static int __net_init ip6mr_rules_init(struct net *net)
 223{
 224        struct fib_rules_ops *ops;
 225        struct mr_table *mrt;
 226        int err;
 227
 228        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 229        if (IS_ERR(ops))
 230                return PTR_ERR(ops);
 231
 232        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 233
 234        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 235        if (IS_ERR(mrt)) {
 236                err = PTR_ERR(mrt);
 237                goto err1;
 238        }
 239
 240        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 241        if (err < 0)
 242                goto err2;
 243
 244        net->ipv6.mr6_rules_ops = ops;
 245        return 0;
 246
 247err2:
 248        ip6mr_free_table(mrt);
 249err1:
 250        fib_rules_unregister(ops);
 251        return err;
 252}
 253
 254static void __net_exit ip6mr_rules_exit(struct net *net)
 255{
 256        struct mr_table *mrt, *next;
 257
 258        rtnl_lock();
 259        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 260                list_del(&mrt->list);
 261                ip6mr_free_table(mrt);
 262        }
 263        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 264        rtnl_unlock();
 265}
 266
 267static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 268{
 269        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
 270}
 271
 272static unsigned int ip6mr_rules_seq_read(struct net *net)
 273{
 274        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 275}
 276
 277bool ip6mr_rule_default(const struct fib_rule *rule)
 278{
 279        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 280               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 281}
 282EXPORT_SYMBOL(ip6mr_rule_default);
 283#else
 284#define ip6mr_for_each_table(mrt, net) \
 285        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 286
 287static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 288                                            struct mr_table *mrt)
 289{
 290        if (!mrt)
 291                return net->ipv6.mrt6;
 292        return NULL;
 293}
 294
 295static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 296{
 297        return net->ipv6.mrt6;
 298}
 299
 300static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 301                            struct mr_table **mrt)
 302{
 303        *mrt = net->ipv6.mrt6;
 304        return 0;
 305}
 306
 307static int __net_init ip6mr_rules_init(struct net *net)
 308{
 309        struct mr_table *mrt;
 310
 311        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 312        if (IS_ERR(mrt))
 313                return PTR_ERR(mrt);
 314        net->ipv6.mrt6 = mrt;
 315        return 0;
 316}
 317
 318static void __net_exit ip6mr_rules_exit(struct net *net)
 319{
 320        rtnl_lock();
 321        ip6mr_free_table(net->ipv6.mrt6);
 322        net->ipv6.mrt6 = NULL;
 323        rtnl_unlock();
 324}
 325
 326static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
 327{
 328        return 0;
 329}
 330
 331static unsigned int ip6mr_rules_seq_read(struct net *net)
 332{
 333        return 0;
 334}
 335#endif
 336
 337static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 338                          const void *ptr)
 339{
 340        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 341        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 342
 343        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 344               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 345}
 346
 347static const struct rhashtable_params ip6mr_rht_params = {
 348        .head_offset = offsetof(struct mr_mfc, mnode),
 349        .key_offset = offsetof(struct mfc6_cache, cmparg),
 350        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 351        .nelem_hint = 3,
 352        .locks_mul = 1,
 353        .obj_cmpfn = ip6mr_hash_cmp,
 354        .automatic_shrinking = true,
 355};
 356
 357static void ip6mr_new_table_set(struct mr_table *mrt,
 358                                struct net *net)
 359{
 360#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 361        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 362#endif
 363}
 364
 365static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 366        .mf6c_origin = IN6ADDR_ANY_INIT,
 367        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 368};
 369
 370static struct mr_table_ops ip6mr_mr_table_ops = {
 371        .rht_params = &ip6mr_rht_params,
 372        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 373};
 374
 375static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 376{
 377        struct mr_table *mrt;
 378
 379        mrt = ip6mr_get_table(net, id);
 380        if (mrt)
 381                return mrt;
 382
 383        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 384                              ipmr_expire_process, ip6mr_new_table_set);
 385}
 386
 387static void ip6mr_free_table(struct mr_table *mrt)
 388{
 389        del_timer_sync(&mrt->ipmr_expire_timer);
 390        mroute_clean_tables(mrt, true);
 391        rhltable_destroy(&mrt->mfc_hash);
 392        kfree(mrt);
 393}
 394
 395#ifdef CONFIG_PROC_FS
 396/* The /proc interfaces to multicast routing
 397 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 398 */
 399
 400static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 401        __acquires(mrt_lock)
 402{
 403        struct mr_vif_iter *iter = seq->private;
 404        struct net *net = seq_file_net(seq);
 405        struct mr_table *mrt;
 406
 407        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 408        if (!mrt)
 409                return ERR_PTR(-ENOENT);
 410
 411        iter->mrt = mrt;
 412
 413        read_lock(&mrt_lock);
 414        return mr_vif_seq_start(seq, pos);
 415}
 416
 417static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 418        __releases(mrt_lock)
 419{
 420        read_unlock(&mrt_lock);
 421}
 422
 423static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 424{
 425        struct mr_vif_iter *iter = seq->private;
 426        struct mr_table *mrt = iter->mrt;
 427
 428        if (v == SEQ_START_TOKEN) {
 429                seq_puts(seq,
 430                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 431        } else {
 432                const struct vif_device *vif = v;
 433                const char *name = vif->dev ? vif->dev->name : "none";
 434
 435                seq_printf(seq,
 436                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 437                           vif - mrt->vif_table,
 438                           name, vif->bytes_in, vif->pkt_in,
 439                           vif->bytes_out, vif->pkt_out,
 440                           vif->flags);
 441        }
 442        return 0;
 443}
 444
 445static const struct seq_operations ip6mr_vif_seq_ops = {
 446        .start = ip6mr_vif_seq_start,
 447        .next  = mr_vif_seq_next,
 448        .stop  = ip6mr_vif_seq_stop,
 449        .show  = ip6mr_vif_seq_show,
 450};
 451
 452static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 453{
 454        struct net *net = seq_file_net(seq);
 455        struct mr_table *mrt;
 456
 457        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 458        if (!mrt)
 459                return ERR_PTR(-ENOENT);
 460
 461        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 462}
 463
 464static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 465{
 466        int n;
 467
 468        if (v == SEQ_START_TOKEN) {
 469                seq_puts(seq,
 470                         "Group                            "
 471                         "Origin                           "
 472                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 473        } else {
 474                const struct mfc6_cache *mfc = v;
 475                const struct mr_mfc_iter *it = seq->private;
 476                struct mr_table *mrt = it->mrt;
 477
 478                seq_printf(seq, "%pI6 %pI6 %-3hd",
 479                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 480                           mfc->_c.mfc_parent);
 481
 482                if (it->cache != &mrt->mfc_unres_queue) {
 483                        seq_printf(seq, " %8lu %8lu %8lu",
 484                                   mfc->_c.mfc_un.res.pkt,
 485                                   mfc->_c.mfc_un.res.bytes,
 486                                   mfc->_c.mfc_un.res.wrong_if);
 487                        for (n = mfc->_c.mfc_un.res.minvif;
 488                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 489                                if (VIF_EXISTS(mrt, n) &&
 490                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 491                                        seq_printf(seq,
 492                                                   " %2d:%-3d", n,
 493                                                   mfc->_c.mfc_un.res.ttls[n]);
 494                        }
 495                } else {
 496                        /* unresolved mfc_caches don't contain
 497                         * pkt, bytes and wrong_if values
 498                         */
 499                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 500                }
 501                seq_putc(seq, '\n');
 502        }
 503        return 0;
 504}
 505
 506static const struct seq_operations ipmr_mfc_seq_ops = {
 507        .start = ipmr_mfc_seq_start,
 508        .next  = mr_mfc_seq_next,
 509        .stop  = mr_mfc_seq_stop,
 510        .show  = ipmr_mfc_seq_show,
 511};
 512#endif
 513
 514#ifdef CONFIG_IPV6_PIMSM_V2
 515
 516static int pim6_rcv(struct sk_buff *skb)
 517{
 518        struct pimreghdr *pim;
 519        struct ipv6hdr   *encap;
 520        struct net_device  *reg_dev = NULL;
 521        struct net *net = dev_net(skb->dev);
 522        struct mr_table *mrt;
 523        struct flowi6 fl6 = {
 524                .flowi6_iif     = skb->dev->ifindex,
 525                .flowi6_mark    = skb->mark,
 526        };
 527        int reg_vif_num;
 528
 529        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 530                goto drop;
 531
 532        pim = (struct pimreghdr *)skb_transport_header(skb);
 533        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 534            (pim->flags & PIM_NULL_REGISTER) ||
 535            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 536                             sizeof(*pim), IPPROTO_PIM,
 537                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 538             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 539                goto drop;
 540
 541        /* check if the inner packet is destined to mcast group */
 542        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 543                                   sizeof(*pim));
 544
 545        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 546            encap->payload_len == 0 ||
 547            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 548                goto drop;
 549
 550        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 551                goto drop;
 552        reg_vif_num = mrt->mroute_reg_vif_num;
 553
 554        read_lock(&mrt_lock);
 555        if (reg_vif_num >= 0)
 556                reg_dev = mrt->vif_table[reg_vif_num].dev;
 557        if (reg_dev)
 558                dev_hold(reg_dev);
 559        read_unlock(&mrt_lock);
 560
 561        if (!reg_dev)
 562                goto drop;
 563
 564        skb->mac_header = skb->network_header;
 565        skb_pull(skb, (u8 *)encap - skb->data);
 566        skb_reset_network_header(skb);
 567        skb->protocol = htons(ETH_P_IPV6);
 568        skb->ip_summed = CHECKSUM_NONE;
 569
 570        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 571
 572        netif_rx(skb);
 573
 574        dev_put(reg_dev);
 575        return 0;
 576 drop:
 577        kfree_skb(skb);
 578        return 0;
 579}
 580
 581static const struct inet6_protocol pim6_protocol = {
 582        .handler        =       pim6_rcv,
 583};
 584
 585/* Service routines creating virtual interfaces: PIMREG */
 586
 587static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 588                                      struct net_device *dev)
 589{
 590        struct net *net = dev_net(dev);
 591        struct mr_table *mrt;
 592        struct flowi6 fl6 = {
 593                .flowi6_oif     = dev->ifindex,
 594                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 595                .flowi6_mark    = skb->mark,
 596        };
 597
 598        if (!pskb_inet_may_pull(skb))
 599                goto tx_err;
 600
 601        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 602                goto tx_err;
 603
 604        read_lock(&mrt_lock);
 605        dev->stats.tx_bytes += skb->len;
 606        dev->stats.tx_packets++;
 607        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 608        read_unlock(&mrt_lock);
 609        kfree_skb(skb);
 610        return NETDEV_TX_OK;
 611
 612tx_err:
 613        dev->stats.tx_errors++;
 614        kfree_skb(skb);
 615        return NETDEV_TX_OK;
 616}
 617
 618static int reg_vif_get_iflink(const struct net_device *dev)
 619{
 620        return 0;
 621}
 622
 623static const struct net_device_ops reg_vif_netdev_ops = {
 624        .ndo_start_xmit = reg_vif_xmit,
 625        .ndo_get_iflink = reg_vif_get_iflink,
 626};
 627
 628static void reg_vif_setup(struct net_device *dev)
 629{
 630        dev->type               = ARPHRD_PIMREG;
 631        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 632        dev->flags              = IFF_NOARP;
 633        dev->netdev_ops         = &reg_vif_netdev_ops;
 634        dev->needs_free_netdev  = true;
 635        dev->features           |= NETIF_F_NETNS_LOCAL;
 636}
 637
 638static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 639{
 640        struct net_device *dev;
 641        char name[IFNAMSIZ];
 642
 643        if (mrt->id == RT6_TABLE_DFLT)
 644                sprintf(name, "pim6reg");
 645        else
 646                sprintf(name, "pim6reg%u", mrt->id);
 647
 648        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 649        if (!dev)
 650                return NULL;
 651
 652        dev_net_set(dev, net);
 653
 654        if (register_netdevice(dev)) {
 655                free_netdev(dev);
 656                return NULL;
 657        }
 658
 659        if (dev_open(dev, NULL))
 660                goto failure;
 661
 662        dev_hold(dev);
 663        return dev;
 664
 665failure:
 666        unregister_netdevice(dev);
 667        return NULL;
 668}
 669#endif
 670
 671static int call_ip6mr_vif_entry_notifiers(struct net *net,
 672                                          enum fib_event_type event_type,
 673                                          struct vif_device *vif,
 674                                          mifi_t vif_index, u32 tb_id)
 675{
 676        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 677                                     vif, vif_index, tb_id,
 678                                     &net->ipv6.ipmr_seq);
 679}
 680
 681static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 682                                          enum fib_event_type event_type,
 683                                          struct mfc6_cache *mfc, u32 tb_id)
 684{
 685        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 686                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 687}
 688
 689/* Delete a VIF entry */
 690static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 691                       struct list_head *head)
 692{
 693        struct vif_device *v;
 694        struct net_device *dev;
 695        struct inet6_dev *in6_dev;
 696
 697        if (vifi < 0 || vifi >= mrt->maxvif)
 698                return -EADDRNOTAVAIL;
 699
 700        v = &mrt->vif_table[vifi];
 701
 702        if (VIF_EXISTS(mrt, vifi))
 703                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 704                                               FIB_EVENT_VIF_DEL, v, vifi,
 705                                               mrt->id);
 706
 707        write_lock_bh(&mrt_lock);
 708        dev = v->dev;
 709        v->dev = NULL;
 710
 711        if (!dev) {
 712                write_unlock_bh(&mrt_lock);
 713                return -EADDRNOTAVAIL;
 714        }
 715
 716#ifdef CONFIG_IPV6_PIMSM_V2
 717        if (vifi == mrt->mroute_reg_vif_num)
 718                mrt->mroute_reg_vif_num = -1;
 719#endif
 720
 721        if (vifi + 1 == mrt->maxvif) {
 722                int tmp;
 723                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 724                        if (VIF_EXISTS(mrt, tmp))
 725                                break;
 726                }
 727                mrt->maxvif = tmp + 1;
 728        }
 729
 730        write_unlock_bh(&mrt_lock);
 731
 732        dev_set_allmulti(dev, -1);
 733
 734        in6_dev = __in6_dev_get(dev);
 735        if (in6_dev) {
 736                in6_dev->cnf.mc_forwarding--;
 737                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 738                                             NETCONFA_MC_FORWARDING,
 739                                             dev->ifindex, &in6_dev->cnf);
 740        }
 741
 742        if ((v->flags & MIFF_REGISTER) && !notify)
 743                unregister_netdevice_queue(dev, head);
 744
 745        dev_put(dev);
 746        return 0;
 747}
 748
 749static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 750{
 751        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 752
 753        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 754}
 755
 756static inline void ip6mr_cache_free(struct mfc6_cache *c)
 757{
 758        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 759}
 760
 761/* Destroy an unresolved cache entry, killing queued skbs
 762   and reporting error to netlink readers.
 763 */
 764
 765static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 766{
 767        struct net *net = read_pnet(&mrt->net);
 768        struct sk_buff *skb;
 769
 770        atomic_dec(&mrt->cache_resolve_queue_len);
 771
 772        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 773                if (ipv6_hdr(skb)->version == 0) {
 774                        struct nlmsghdr *nlh = skb_pull(skb,
 775                                                        sizeof(struct ipv6hdr));
 776                        nlh->nlmsg_type = NLMSG_ERROR;
 777                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 778                        skb_trim(skb, nlh->nlmsg_len);
 779                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 780                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 781                } else
 782                        kfree_skb(skb);
 783        }
 784
 785        ip6mr_cache_free(c);
 786}
 787
 788
 789/* Timer process for all the unresolved queue. */
 790
 791static void ipmr_do_expire_process(struct mr_table *mrt)
 792{
 793        unsigned long now = jiffies;
 794        unsigned long expires = 10 * HZ;
 795        struct mr_mfc *c, *next;
 796
 797        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 798                if (time_after(c->mfc_un.unres.expires, now)) {
 799                        /* not yet... */
 800                        unsigned long interval = c->mfc_un.unres.expires - now;
 801                        if (interval < expires)
 802                                expires = interval;
 803                        continue;
 804                }
 805
 806                list_del(&c->list);
 807                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 808                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 809        }
 810
 811        if (!list_empty(&mrt->mfc_unres_queue))
 812                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 813}
 814
 815static void ipmr_expire_process(struct timer_list *t)
 816{
 817        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 818
 819        if (!spin_trylock(&mfc_unres_lock)) {
 820                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 821                return;
 822        }
 823
 824        if (!list_empty(&mrt->mfc_unres_queue))
 825                ipmr_do_expire_process(mrt);
 826
 827        spin_unlock(&mfc_unres_lock);
 828}
 829
 830/* Fill oifs list. It is called under write locked mrt_lock. */
 831
 832static void ip6mr_update_thresholds(struct mr_table *mrt,
 833                                    struct mr_mfc *cache,
 834                                    unsigned char *ttls)
 835{
 836        int vifi;
 837
 838        cache->mfc_un.res.minvif = MAXMIFS;
 839        cache->mfc_un.res.maxvif = 0;
 840        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 841
 842        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 843                if (VIF_EXISTS(mrt, vifi) &&
 844                    ttls[vifi] && ttls[vifi] < 255) {
 845                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 846                        if (cache->mfc_un.res.minvif > vifi)
 847                                cache->mfc_un.res.minvif = vifi;
 848                        if (cache->mfc_un.res.maxvif <= vifi)
 849                                cache->mfc_un.res.maxvif = vifi + 1;
 850                }
 851        }
 852        cache->mfc_un.res.lastuse = jiffies;
 853}
 854
 855static int mif6_add(struct net *net, struct mr_table *mrt,
 856                    struct mif6ctl *vifc, int mrtsock)
 857{
 858        int vifi = vifc->mif6c_mifi;
 859        struct vif_device *v = &mrt->vif_table[vifi];
 860        struct net_device *dev;
 861        struct inet6_dev *in6_dev;
 862        int err;
 863
 864        /* Is vif busy ? */
 865        if (VIF_EXISTS(mrt, vifi))
 866                return -EADDRINUSE;
 867
 868        switch (vifc->mif6c_flags) {
 869#ifdef CONFIG_IPV6_PIMSM_V2
 870        case MIFF_REGISTER:
 871                /*
 872                 * Special Purpose VIF in PIM
 873                 * All the packets will be sent to the daemon
 874                 */
 875                if (mrt->mroute_reg_vif_num >= 0)
 876                        return -EADDRINUSE;
 877                dev = ip6mr_reg_vif(net, mrt);
 878                if (!dev)
 879                        return -ENOBUFS;
 880                err = dev_set_allmulti(dev, 1);
 881                if (err) {
 882                        unregister_netdevice(dev);
 883                        dev_put(dev);
 884                        return err;
 885                }
 886                break;
 887#endif
 888        case 0:
 889                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 890                if (!dev)
 891                        return -EADDRNOTAVAIL;
 892                err = dev_set_allmulti(dev, 1);
 893                if (err) {
 894                        dev_put(dev);
 895                        return err;
 896                }
 897                break;
 898        default:
 899                return -EINVAL;
 900        }
 901
 902        in6_dev = __in6_dev_get(dev);
 903        if (in6_dev) {
 904                in6_dev->cnf.mc_forwarding++;
 905                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 906                                             NETCONFA_MC_FORWARDING,
 907                                             dev->ifindex, &in6_dev->cnf);
 908        }
 909
 910        /* Fill in the VIF structures */
 911        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 912                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 913                        MIFF_REGISTER);
 914
 915        /* And finish update writing critical data */
 916        write_lock_bh(&mrt_lock);
 917        v->dev = dev;
 918#ifdef CONFIG_IPV6_PIMSM_V2
 919        if (v->flags & MIFF_REGISTER)
 920                mrt->mroute_reg_vif_num = vifi;
 921#endif
 922        if (vifi + 1 > mrt->maxvif)
 923                mrt->maxvif = vifi + 1;
 924        write_unlock_bh(&mrt_lock);
 925        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 926                                       v, vifi, mrt->id);
 927        return 0;
 928}
 929
 930static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 931                                           const struct in6_addr *origin,
 932                                           const struct in6_addr *mcastgrp)
 933{
 934        struct mfc6_cache_cmp_arg arg = {
 935                .mf6c_origin = *origin,
 936                .mf6c_mcastgrp = *mcastgrp,
 937        };
 938
 939        return mr_mfc_find(mrt, &arg);
 940}
 941
 942/* Look for a (*,G) entry */
 943static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 944                                               struct in6_addr *mcastgrp,
 945                                               mifi_t mifi)
 946{
 947        struct mfc6_cache_cmp_arg arg = {
 948                .mf6c_origin = in6addr_any,
 949                .mf6c_mcastgrp = *mcastgrp,
 950        };
 951
 952        if (ipv6_addr_any(mcastgrp))
 953                return mr_mfc_find_any_parent(mrt, mifi);
 954        return mr_mfc_find_any(mrt, mifi, &arg);
 955}
 956
 957/* Look for a (S,G,iif) entry if parent != -1 */
 958static struct mfc6_cache *
 959ip6mr_cache_find_parent(struct mr_table *mrt,
 960                        const struct in6_addr *origin,
 961                        const struct in6_addr *mcastgrp,
 962                        int parent)
 963{
 964        struct mfc6_cache_cmp_arg arg = {
 965                .mf6c_origin = *origin,
 966                .mf6c_mcastgrp = *mcastgrp,
 967        };
 968
 969        return mr_mfc_find_parent(mrt, &arg, parent);
 970}
 971
 972/* Allocate a multicast cache entry */
 973static struct mfc6_cache *ip6mr_cache_alloc(void)
 974{
 975        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 976        if (!c)
 977                return NULL;
 978        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 979        c->_c.mfc_un.res.minvif = MAXMIFS;
 980        c->_c.free = ip6mr_cache_free_rcu;
 981        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 982        return c;
 983}
 984
 985static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 986{
 987        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 988        if (!c)
 989                return NULL;
 990        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 991        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 992        return c;
 993}
 994
 995/*
 996 *      A cache entry has gone into a resolved state from queued
 997 */
 998
 999static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1000                                struct mfc6_cache *uc, struct mfc6_cache *c)
1001{
1002        struct sk_buff *skb;
1003
1004        /*
1005         *      Play the pending entries through our router
1006         */
1007
1008        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1009                if (ipv6_hdr(skb)->version == 0) {
1010                        struct nlmsghdr *nlh = skb_pull(skb,
1011                                                        sizeof(struct ipv6hdr));
1012
1013                        if (mr_fill_mroute(mrt, skb, &c->_c,
1014                                           nlmsg_data(nlh)) > 0) {
1015                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1016                        } else {
1017                                nlh->nlmsg_type = NLMSG_ERROR;
1018                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1019                                skb_trim(skb, nlh->nlmsg_len);
1020                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1021                        }
1022                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1023                } else
1024                        ip6_mr_forward(net, mrt, skb, c);
1025        }
1026}
1027
1028/*
1029 *      Bounce a cache query up to pim6sd and netlink.
1030 *
1031 *      Called under mrt_lock.
1032 */
1033
1034static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1035                              mifi_t mifi, int assert)
1036{
1037        struct sock *mroute6_sk;
1038        struct sk_buff *skb;
1039        struct mrt6msg *msg;
1040        int ret;
1041
1042#ifdef CONFIG_IPV6_PIMSM_V2
1043        if (assert == MRT6MSG_WHOLEPKT)
1044                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1045                                                +sizeof(*msg));
1046        else
1047#endif
1048                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1049
1050        if (!skb)
1051                return -ENOBUFS;
1052
1053        /* I suppose that internal messages
1054         * do not require checksums */
1055
1056        skb->ip_summed = CHECKSUM_UNNECESSARY;
1057
1058#ifdef CONFIG_IPV6_PIMSM_V2
1059        if (assert == MRT6MSG_WHOLEPKT) {
1060                /* Ugly, but we have no choice with this interface.
1061                   Duplicate old header, fix length etc.
1062                   And all this only to mangle msg->im6_msgtype and
1063                   to set msg->im6_mbz to "mbz" :-)
1064                 */
1065                skb_push(skb, -skb_network_offset(pkt));
1066
1067                skb_push(skb, sizeof(*msg));
1068                skb_reset_transport_header(skb);
1069                msg = (struct mrt6msg *)skb_transport_header(skb);
1070                msg->im6_mbz = 0;
1071                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1072                msg->im6_mif = mrt->mroute_reg_vif_num;
1073                msg->im6_pad = 0;
1074                msg->im6_src = ipv6_hdr(pkt)->saddr;
1075                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1076
1077                skb->ip_summed = CHECKSUM_UNNECESSARY;
1078        } else
1079#endif
1080        {
1081        /*
1082         *      Copy the IP header
1083         */
1084
1085        skb_put(skb, sizeof(struct ipv6hdr));
1086        skb_reset_network_header(skb);
1087        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1088
1089        /*
1090         *      Add our header
1091         */
1092        skb_put(skb, sizeof(*msg));
1093        skb_reset_transport_header(skb);
1094        msg = (struct mrt6msg *)skb_transport_header(skb);
1095
1096        msg->im6_mbz = 0;
1097        msg->im6_msgtype = assert;
1098        msg->im6_mif = mifi;
1099        msg->im6_pad = 0;
1100        msg->im6_src = ipv6_hdr(pkt)->saddr;
1101        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1102
1103        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1104        skb->ip_summed = CHECKSUM_UNNECESSARY;
1105        }
1106
1107        rcu_read_lock();
1108        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1109        if (!mroute6_sk) {
1110                rcu_read_unlock();
1111                kfree_skb(skb);
1112                return -EINVAL;
1113        }
1114
1115        mrt6msg_netlink_event(mrt, skb);
1116
1117        /* Deliver to user space multicast routing algorithms */
1118        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1119        rcu_read_unlock();
1120        if (ret < 0) {
1121                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1122                kfree_skb(skb);
1123        }
1124
1125        return ret;
1126}
1127
1128/* Queue a packet for resolution. It gets locked cache entry! */
1129static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1130                                  struct sk_buff *skb)
1131{
1132        struct mfc6_cache *c;
1133        bool found = false;
1134        int err;
1135
1136        spin_lock_bh(&mfc_unres_lock);
1137        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1138                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1139                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1140                        found = true;
1141                        break;
1142                }
1143        }
1144
1145        if (!found) {
1146                /*
1147                 *      Create a new entry if allowable
1148                 */
1149
1150                c = ip6mr_cache_alloc_unres();
1151                if (!c) {
1152                        spin_unlock_bh(&mfc_unres_lock);
1153
1154                        kfree_skb(skb);
1155                        return -ENOBUFS;
1156                }
1157
1158                /* Fill in the new cache entry */
1159                c->_c.mfc_parent = -1;
1160                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1161                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1162
1163                /*
1164                 *      Reflect first query at pim6sd
1165                 */
1166                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1167                if (err < 0) {
1168                        /* If the report failed throw the cache entry
1169                           out - Brad Parker
1170                         */
1171                        spin_unlock_bh(&mfc_unres_lock);
1172
1173                        ip6mr_cache_free(c);
1174                        kfree_skb(skb);
1175                        return err;
1176                }
1177
1178                atomic_inc(&mrt->cache_resolve_queue_len);
1179                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1180                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1181
1182                ipmr_do_expire_process(mrt);
1183        }
1184
1185        /* See if we can append the packet */
1186        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1187                kfree_skb(skb);
1188                err = -ENOBUFS;
1189        } else {
1190                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1191                err = 0;
1192        }
1193
1194        spin_unlock_bh(&mfc_unres_lock);
1195        return err;
1196}
1197
1198/*
1199 *      MFC6 cache manipulation by user space
1200 */
1201
1202static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1203                            int parent)
1204{
1205        struct mfc6_cache *c;
1206
1207        /* The entries are added/deleted only under RTNL */
1208        rcu_read_lock();
1209        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1210                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1211        rcu_read_unlock();
1212        if (!c)
1213                return -ENOENT;
1214        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1215        list_del_rcu(&c->_c.list);
1216
1217        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1218                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1219        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1220        mr_cache_put(&c->_c);
1221        return 0;
1222}
1223
1224static int ip6mr_device_event(struct notifier_block *this,
1225                              unsigned long event, void *ptr)
1226{
1227        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1228        struct net *net = dev_net(dev);
1229        struct mr_table *mrt;
1230        struct vif_device *v;
1231        int ct;
1232
1233        if (event != NETDEV_UNREGISTER)
1234                return NOTIFY_DONE;
1235
1236        ip6mr_for_each_table(mrt, net) {
1237                v = &mrt->vif_table[0];
1238                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1239                        if (v->dev == dev)
1240                                mif6_delete(mrt, ct, 1, NULL);
1241                }
1242        }
1243
1244        return NOTIFY_DONE;
1245}
1246
1247static unsigned int ip6mr_seq_read(struct net *net)
1248{
1249        ASSERT_RTNL();
1250
1251        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1252}
1253
1254static int ip6mr_dump(struct net *net, struct notifier_block *nb)
1255{
1256        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1257                       ip6mr_mr_table_iter, &mrt_lock);
1258}
1259
1260static struct notifier_block ip6_mr_notifier = {
1261        .notifier_call = ip6mr_device_event
1262};
1263
1264static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1265        .family         = RTNL_FAMILY_IP6MR,
1266        .fib_seq_read   = ip6mr_seq_read,
1267        .fib_dump       = ip6mr_dump,
1268        .owner          = THIS_MODULE,
1269};
1270
1271static int __net_init ip6mr_notifier_init(struct net *net)
1272{
1273        struct fib_notifier_ops *ops;
1274
1275        net->ipv6.ipmr_seq = 0;
1276
1277        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1278        if (IS_ERR(ops))
1279                return PTR_ERR(ops);
1280
1281        net->ipv6.ip6mr_notifier_ops = ops;
1282
1283        return 0;
1284}
1285
1286static void __net_exit ip6mr_notifier_exit(struct net *net)
1287{
1288        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1289        net->ipv6.ip6mr_notifier_ops = NULL;
1290}
1291
1292/* Setup for IP multicast routing */
1293static int __net_init ip6mr_net_init(struct net *net)
1294{
1295        int err;
1296
1297        err = ip6mr_notifier_init(net);
1298        if (err)
1299                return err;
1300
1301        err = ip6mr_rules_init(net);
1302        if (err < 0)
1303                goto ip6mr_rules_fail;
1304
1305#ifdef CONFIG_PROC_FS
1306        err = -ENOMEM;
1307        if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1308                        sizeof(struct mr_vif_iter)))
1309                goto proc_vif_fail;
1310        if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1311                        sizeof(struct mr_mfc_iter)))
1312                goto proc_cache_fail;
1313#endif
1314
1315        return 0;
1316
1317#ifdef CONFIG_PROC_FS
1318proc_cache_fail:
1319        remove_proc_entry("ip6_mr_vif", net->proc_net);
1320proc_vif_fail:
1321        ip6mr_rules_exit(net);
1322#endif
1323ip6mr_rules_fail:
1324        ip6mr_notifier_exit(net);
1325        return err;
1326}
1327
1328static void __net_exit ip6mr_net_exit(struct net *net)
1329{
1330#ifdef CONFIG_PROC_FS
1331        remove_proc_entry("ip6_mr_cache", net->proc_net);
1332        remove_proc_entry("ip6_mr_vif", net->proc_net);
1333#endif
1334        ip6mr_rules_exit(net);
1335        ip6mr_notifier_exit(net);
1336}
1337
1338static struct pernet_operations ip6mr_net_ops = {
1339        .init = ip6mr_net_init,
1340        .exit = ip6mr_net_exit,
1341};
1342
1343int __init ip6_mr_init(void)
1344{
1345        int err;
1346
1347        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1348                                       sizeof(struct mfc6_cache),
1349                                       0, SLAB_HWCACHE_ALIGN,
1350                                       NULL);
1351        if (!mrt_cachep)
1352                return -ENOMEM;
1353
1354        err = register_pernet_subsys(&ip6mr_net_ops);
1355        if (err)
1356                goto reg_pernet_fail;
1357
1358        err = register_netdevice_notifier(&ip6_mr_notifier);
1359        if (err)
1360                goto reg_notif_fail;
1361#ifdef CONFIG_IPV6_PIMSM_V2
1362        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1363                pr_err("%s: can't add PIM protocol\n", __func__);
1364                err = -EAGAIN;
1365                goto add_proto_fail;
1366        }
1367#endif
1368        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1369                                   NULL, ip6mr_rtm_dumproute, 0);
1370        if (err == 0)
1371                return 0;
1372
1373#ifdef CONFIG_IPV6_PIMSM_V2
1374        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1375add_proto_fail:
1376        unregister_netdevice_notifier(&ip6_mr_notifier);
1377#endif
1378reg_notif_fail:
1379        unregister_pernet_subsys(&ip6mr_net_ops);
1380reg_pernet_fail:
1381        kmem_cache_destroy(mrt_cachep);
1382        return err;
1383}
1384
1385void ip6_mr_cleanup(void)
1386{
1387        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1388#ifdef CONFIG_IPV6_PIMSM_V2
1389        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1390#endif
1391        unregister_netdevice_notifier(&ip6_mr_notifier);
1392        unregister_pernet_subsys(&ip6mr_net_ops);
1393        kmem_cache_destroy(mrt_cachep);
1394}
1395
1396static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1397                         struct mf6cctl *mfc, int mrtsock, int parent)
1398{
1399        unsigned char ttls[MAXMIFS];
1400        struct mfc6_cache *uc, *c;
1401        struct mr_mfc *_uc;
1402        bool found;
1403        int i, err;
1404
1405        if (mfc->mf6cc_parent >= MAXMIFS)
1406                return -ENFILE;
1407
1408        memset(ttls, 255, MAXMIFS);
1409        for (i = 0; i < MAXMIFS; i++) {
1410                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1411                        ttls[i] = 1;
1412        }
1413
1414        /* The entries are added/deleted only under RTNL */
1415        rcu_read_lock();
1416        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1417                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1418        rcu_read_unlock();
1419        if (c) {
1420                write_lock_bh(&mrt_lock);
1421                c->_c.mfc_parent = mfc->mf6cc_parent;
1422                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1423                if (!mrtsock)
1424                        c->_c.mfc_flags |= MFC_STATIC;
1425                write_unlock_bh(&mrt_lock);
1426                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1427                                               c, mrt->id);
1428                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1429                return 0;
1430        }
1431
1432        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1433            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1434                return -EINVAL;
1435
1436        c = ip6mr_cache_alloc();
1437        if (!c)
1438                return -ENOMEM;
1439
1440        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1441        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1442        c->_c.mfc_parent = mfc->mf6cc_parent;
1443        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1444        if (!mrtsock)
1445                c->_c.mfc_flags |= MFC_STATIC;
1446
1447        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1448                                  ip6mr_rht_params);
1449        if (err) {
1450                pr_err("ip6mr: rhtable insert error %d\n", err);
1451                ip6mr_cache_free(c);
1452                return err;
1453        }
1454        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1455
1456        /* Check to see if we resolved a queued list. If so we
1457         * need to send on the frames and tidy up.
1458         */
1459        found = false;
1460        spin_lock_bh(&mfc_unres_lock);
1461        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1462                uc = (struct mfc6_cache *)_uc;
1463                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1464                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1465                        list_del(&_uc->list);
1466                        atomic_dec(&mrt->cache_resolve_queue_len);
1467                        found = true;
1468                        break;
1469                }
1470        }
1471        if (list_empty(&mrt->mfc_unres_queue))
1472                del_timer(&mrt->ipmr_expire_timer);
1473        spin_unlock_bh(&mfc_unres_lock);
1474
1475        if (found) {
1476                ip6mr_cache_resolve(net, mrt, uc, c);
1477                ip6mr_cache_free(uc);
1478        }
1479        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1480                                       c, mrt->id);
1481        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1482        return 0;
1483}
1484
1485/*
1486 *      Close the multicast socket, and clear the vif tables etc
1487 */
1488
1489static void mroute_clean_tables(struct mr_table *mrt, bool all)
1490{
1491        struct mr_mfc *c, *tmp;
1492        LIST_HEAD(list);
1493        int i;
1494
1495        /* Shut down all active vif entries */
1496        for (i = 0; i < mrt->maxvif; i++) {
1497                if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
1498                        continue;
1499                mif6_delete(mrt, i, 0, &list);
1500        }
1501        unregister_netdevice_many(&list);
1502
1503        /* Wipe the cache */
1504        list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1505                if (!all && (c->mfc_flags & MFC_STATIC))
1506                        continue;
1507                rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1508                list_del_rcu(&c->list);
1509                call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1510                                               FIB_EVENT_ENTRY_DEL,
1511                                               (struct mfc6_cache *)c, mrt->id);
1512                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1513                mr_cache_put(c);
1514        }
1515
1516        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1517                spin_lock_bh(&mfc_unres_lock);
1518                list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1519                        list_del(&c->list);
1520                        mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1521                                          RTM_DELROUTE);
1522                        ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1523                }
1524                spin_unlock_bh(&mfc_unres_lock);
1525        }
1526}
1527
1528static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1529{
1530        int err = 0;
1531        struct net *net = sock_net(sk);
1532
1533        rtnl_lock();
1534        write_lock_bh(&mrt_lock);
1535        if (rtnl_dereference(mrt->mroute_sk)) {
1536                err = -EADDRINUSE;
1537        } else {
1538                rcu_assign_pointer(mrt->mroute_sk, sk);
1539                sock_set_flag(sk, SOCK_RCU_FREE);
1540                net->ipv6.devconf_all->mc_forwarding++;
1541        }
1542        write_unlock_bh(&mrt_lock);
1543
1544        if (!err)
1545                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1546                                             NETCONFA_MC_FORWARDING,
1547                                             NETCONFA_IFINDEX_ALL,
1548                                             net->ipv6.devconf_all);
1549        rtnl_unlock();
1550
1551        return err;
1552}
1553
1554int ip6mr_sk_done(struct sock *sk)
1555{
1556        int err = -EACCES;
1557        struct net *net = sock_net(sk);
1558        struct mr_table *mrt;
1559
1560        if (sk->sk_type != SOCK_RAW ||
1561            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1562                return err;
1563
1564        rtnl_lock();
1565        ip6mr_for_each_table(mrt, net) {
1566                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1567                        write_lock_bh(&mrt_lock);
1568                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1569                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1570                         * so the RCU grace period before sk freeing
1571                         * is guaranteed by sk_destruct()
1572                         */
1573                        net->ipv6.devconf_all->mc_forwarding--;
1574                        write_unlock_bh(&mrt_lock);
1575                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1576                                                     NETCONFA_MC_FORWARDING,
1577                                                     NETCONFA_IFINDEX_ALL,
1578                                                     net->ipv6.devconf_all);
1579
1580                        mroute_clean_tables(mrt, false);
1581                        err = 0;
1582                        break;
1583                }
1584        }
1585        rtnl_unlock();
1586
1587        return err;
1588}
1589
1590bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1591{
1592        struct mr_table *mrt;
1593        struct flowi6 fl6 = {
1594                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1595                .flowi6_oif     = skb->dev->ifindex,
1596                .flowi6_mark    = skb->mark,
1597        };
1598
1599        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1600                return NULL;
1601
1602        return rcu_access_pointer(mrt->mroute_sk);
1603}
1604EXPORT_SYMBOL(mroute6_is_socket);
1605
1606/*
1607 *      Socket options and virtual interface manipulation. The whole
1608 *      virtual interface system is a complete heap, but unfortunately
1609 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1610 *      MOSPF/PIM router set up we can clean this up.
1611 */
1612
1613int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1614{
1615        int ret, parent = 0;
1616        struct mif6ctl vif;
1617        struct mf6cctl mfc;
1618        mifi_t mifi;
1619        struct net *net = sock_net(sk);
1620        struct mr_table *mrt;
1621
1622        if (sk->sk_type != SOCK_RAW ||
1623            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1624                return -EOPNOTSUPP;
1625
1626        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1627        if (!mrt)
1628                return -ENOENT;
1629
1630        if (optname != MRT6_INIT) {
1631                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1632                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1633                        return -EACCES;
1634        }
1635
1636        switch (optname) {
1637        case MRT6_INIT:
1638                if (optlen < sizeof(int))
1639                        return -EINVAL;
1640
1641                return ip6mr_sk_init(mrt, sk);
1642
1643        case MRT6_DONE:
1644                return ip6mr_sk_done(sk);
1645
1646        case MRT6_ADD_MIF:
1647                if (optlen < sizeof(vif))
1648                        return -EINVAL;
1649                if (copy_from_user(&vif, optval, sizeof(vif)))
1650                        return -EFAULT;
1651                if (vif.mif6c_mifi >= MAXMIFS)
1652                        return -ENFILE;
1653                rtnl_lock();
1654                ret = mif6_add(net, mrt, &vif,
1655                               sk == rtnl_dereference(mrt->mroute_sk));
1656                rtnl_unlock();
1657                return ret;
1658
1659        case MRT6_DEL_MIF:
1660                if (optlen < sizeof(mifi_t))
1661                        return -EINVAL;
1662                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1663                        return -EFAULT;
1664                rtnl_lock();
1665                ret = mif6_delete(mrt, mifi, 0, NULL);
1666                rtnl_unlock();
1667                return ret;
1668
1669        /*
1670         *      Manipulate the forwarding caches. These live
1671         *      in a sort of kernel/user symbiosis.
1672         */
1673        case MRT6_ADD_MFC:
1674        case MRT6_DEL_MFC:
1675                parent = -1;
1676                /* fall through */
1677        case MRT6_ADD_MFC_PROXY:
1678        case MRT6_DEL_MFC_PROXY:
1679                if (optlen < sizeof(mfc))
1680                        return -EINVAL;
1681                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1682                        return -EFAULT;
1683                if (parent == 0)
1684                        parent = mfc.mf6cc_parent;
1685                rtnl_lock();
1686                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1687                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1688                else
1689                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1690                                            sk ==
1691                                            rtnl_dereference(mrt->mroute_sk),
1692                                            parent);
1693                rtnl_unlock();
1694                return ret;
1695
1696        /*
1697         *      Control PIM assert (to activate pim will activate assert)
1698         */
1699        case MRT6_ASSERT:
1700        {
1701                int v;
1702
1703                if (optlen != sizeof(v))
1704                        return -EINVAL;
1705                if (get_user(v, (int __user *)optval))
1706                        return -EFAULT;
1707                mrt->mroute_do_assert = v;
1708                return 0;
1709        }
1710
1711#ifdef CONFIG_IPV6_PIMSM_V2
1712        case MRT6_PIM:
1713        {
1714                int v;
1715
1716                if (optlen != sizeof(v))
1717                        return -EINVAL;
1718                if (get_user(v, (int __user *)optval))
1719                        return -EFAULT;
1720                v = !!v;
1721                rtnl_lock();
1722                ret = 0;
1723                if (v != mrt->mroute_do_pim) {
1724                        mrt->mroute_do_pim = v;
1725                        mrt->mroute_do_assert = v;
1726                }
1727                rtnl_unlock();
1728                return ret;
1729        }
1730
1731#endif
1732#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1733        case MRT6_TABLE:
1734        {
1735                u32 v;
1736
1737                if (optlen != sizeof(u32))
1738                        return -EINVAL;
1739                if (get_user(v, (u32 __user *)optval))
1740                        return -EFAULT;
1741                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1742                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1743                        return -EINVAL;
1744                if (sk == rcu_access_pointer(mrt->mroute_sk))
1745                        return -EBUSY;
1746
1747                rtnl_lock();
1748                ret = 0;
1749                mrt = ip6mr_new_table(net, v);
1750                if (IS_ERR(mrt))
1751                        ret = PTR_ERR(mrt);
1752                else
1753                        raw6_sk(sk)->ip6mr_table = v;
1754                rtnl_unlock();
1755                return ret;
1756        }
1757#endif
1758        /*
1759         *      Spurious command, or MRT6_VERSION which you cannot
1760         *      set.
1761         */
1762        default:
1763                return -ENOPROTOOPT;
1764        }
1765}
1766
1767/*
1768 *      Getsock opt support for the multicast routing system.
1769 */
1770
1771int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1772                          int __user *optlen)
1773{
1774        int olr;
1775        int val;
1776        struct net *net = sock_net(sk);
1777        struct mr_table *mrt;
1778
1779        if (sk->sk_type != SOCK_RAW ||
1780            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1781                return -EOPNOTSUPP;
1782
1783        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1784        if (!mrt)
1785                return -ENOENT;
1786
1787        switch (optname) {
1788        case MRT6_VERSION:
1789                val = 0x0305;
1790                break;
1791#ifdef CONFIG_IPV6_PIMSM_V2
1792        case MRT6_PIM:
1793                val = mrt->mroute_do_pim;
1794                break;
1795#endif
1796        case MRT6_ASSERT:
1797                val = mrt->mroute_do_assert;
1798                break;
1799        default:
1800                return -ENOPROTOOPT;
1801        }
1802
1803        if (get_user(olr, optlen))
1804                return -EFAULT;
1805
1806        olr = min_t(int, olr, sizeof(int));
1807        if (olr < 0)
1808                return -EINVAL;
1809
1810        if (put_user(olr, optlen))
1811                return -EFAULT;
1812        if (copy_to_user(optval, &val, olr))
1813                return -EFAULT;
1814        return 0;
1815}
1816
1817/*
1818 *      The IP multicast ioctl support routines.
1819 */
1820
1821int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1822{
1823        struct sioc_sg_req6 sr;
1824        struct sioc_mif_req6 vr;
1825        struct vif_device *vif;
1826        struct mfc6_cache *c;
1827        struct net *net = sock_net(sk);
1828        struct mr_table *mrt;
1829
1830        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1831        if (!mrt)
1832                return -ENOENT;
1833
1834        switch (cmd) {
1835        case SIOCGETMIFCNT_IN6:
1836                if (copy_from_user(&vr, arg, sizeof(vr)))
1837                        return -EFAULT;
1838                if (vr.mifi >= mrt->maxvif)
1839                        return -EINVAL;
1840                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1841                read_lock(&mrt_lock);
1842                vif = &mrt->vif_table[vr.mifi];
1843                if (VIF_EXISTS(mrt, vr.mifi)) {
1844                        vr.icount = vif->pkt_in;
1845                        vr.ocount = vif->pkt_out;
1846                        vr.ibytes = vif->bytes_in;
1847                        vr.obytes = vif->bytes_out;
1848                        read_unlock(&mrt_lock);
1849
1850                        if (copy_to_user(arg, &vr, sizeof(vr)))
1851                                return -EFAULT;
1852                        return 0;
1853                }
1854                read_unlock(&mrt_lock);
1855                return -EADDRNOTAVAIL;
1856        case SIOCGETSGCNT_IN6:
1857                if (copy_from_user(&sr, arg, sizeof(sr)))
1858                        return -EFAULT;
1859
1860                rcu_read_lock();
1861                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1862                if (c) {
1863                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1864                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1865                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1866                        rcu_read_unlock();
1867
1868                        if (copy_to_user(arg, &sr, sizeof(sr)))
1869                                return -EFAULT;
1870                        return 0;
1871                }
1872                rcu_read_unlock();
1873                return -EADDRNOTAVAIL;
1874        default:
1875                return -ENOIOCTLCMD;
1876        }
1877}
1878
1879#ifdef CONFIG_COMPAT
1880struct compat_sioc_sg_req6 {
1881        struct sockaddr_in6 src;
1882        struct sockaddr_in6 grp;
1883        compat_ulong_t pktcnt;
1884        compat_ulong_t bytecnt;
1885        compat_ulong_t wrong_if;
1886};
1887
1888struct compat_sioc_mif_req6 {
1889        mifi_t  mifi;
1890        compat_ulong_t icount;
1891        compat_ulong_t ocount;
1892        compat_ulong_t ibytes;
1893        compat_ulong_t obytes;
1894};
1895
1896int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1897{
1898        struct compat_sioc_sg_req6 sr;
1899        struct compat_sioc_mif_req6 vr;
1900        struct vif_device *vif;
1901        struct mfc6_cache *c;
1902        struct net *net = sock_net(sk);
1903        struct mr_table *mrt;
1904
1905        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1906        if (!mrt)
1907                return -ENOENT;
1908
1909        switch (cmd) {
1910        case SIOCGETMIFCNT_IN6:
1911                if (copy_from_user(&vr, arg, sizeof(vr)))
1912                        return -EFAULT;
1913                if (vr.mifi >= mrt->maxvif)
1914                        return -EINVAL;
1915                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1916                read_lock(&mrt_lock);
1917                vif = &mrt->vif_table[vr.mifi];
1918                if (VIF_EXISTS(mrt, vr.mifi)) {
1919                        vr.icount = vif->pkt_in;
1920                        vr.ocount = vif->pkt_out;
1921                        vr.ibytes = vif->bytes_in;
1922                        vr.obytes = vif->bytes_out;
1923                        read_unlock(&mrt_lock);
1924
1925                        if (copy_to_user(arg, &vr, sizeof(vr)))
1926                                return -EFAULT;
1927                        return 0;
1928                }
1929                read_unlock(&mrt_lock);
1930                return -EADDRNOTAVAIL;
1931        case SIOCGETSGCNT_IN6:
1932                if (copy_from_user(&sr, arg, sizeof(sr)))
1933                        return -EFAULT;
1934
1935                rcu_read_lock();
1936                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1937                if (c) {
1938                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1939                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1940                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1941                        rcu_read_unlock();
1942
1943                        if (copy_to_user(arg, &sr, sizeof(sr)))
1944                                return -EFAULT;
1945                        return 0;
1946                }
1947                rcu_read_unlock();
1948                return -EADDRNOTAVAIL;
1949        default:
1950                return -ENOIOCTLCMD;
1951        }
1952}
1953#endif
1954
1955static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1956{
1957        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1958                      IPSTATS_MIB_OUTFORWDATAGRAMS);
1959        IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1960                      IPSTATS_MIB_OUTOCTETS, skb->len);
1961        return dst_output(net, sk, skb);
1962}
1963
1964/*
1965 *      Processing handlers for ip6mr_forward
1966 */
1967
1968static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
1969                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1970{
1971        struct ipv6hdr *ipv6h;
1972        struct vif_device *vif = &mrt->vif_table[vifi];
1973        struct net_device *dev;
1974        struct dst_entry *dst;
1975        struct flowi6 fl6;
1976
1977        if (!vif->dev)
1978                goto out_free;
1979
1980#ifdef CONFIG_IPV6_PIMSM_V2
1981        if (vif->flags & MIFF_REGISTER) {
1982                vif->pkt_out++;
1983                vif->bytes_out += skb->len;
1984                vif->dev->stats.tx_bytes += skb->len;
1985                vif->dev->stats.tx_packets++;
1986                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1987                goto out_free;
1988        }
1989#endif
1990
1991        ipv6h = ipv6_hdr(skb);
1992
1993        fl6 = (struct flowi6) {
1994                .flowi6_oif = vif->link,
1995                .daddr = ipv6h->daddr,
1996        };
1997
1998        dst = ip6_route_output(net, NULL, &fl6);
1999        if (dst->error) {
2000                dst_release(dst);
2001                goto out_free;
2002        }
2003
2004        skb_dst_drop(skb);
2005        skb_dst_set(skb, dst);
2006
2007        /*
2008         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2009         * not only before forwarding, but after forwarding on all output
2010         * interfaces. It is clear, if mrouter runs a multicasting
2011         * program, it should receive packets not depending to what interface
2012         * program is joined.
2013         * If we will not make it, the program will have to join on all
2014         * interfaces. On the other hand, multihoming host (or router, but
2015         * not mrouter) cannot join to more than one interface - it will
2016         * result in receiving multiple packets.
2017         */
2018        dev = vif->dev;
2019        skb->dev = dev;
2020        vif->pkt_out++;
2021        vif->bytes_out += skb->len;
2022
2023        /* We are about to write */
2024        /* XXX: extension headers? */
2025        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2026                goto out_free;
2027
2028        ipv6h = ipv6_hdr(skb);
2029        ipv6h->hop_limit--;
2030
2031        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2032
2033        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2034                       net, NULL, skb, skb->dev, dev,
2035                       ip6mr_forward2_finish);
2036
2037out_free:
2038        kfree_skb(skb);
2039        return 0;
2040}
2041
2042static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2043{
2044        int ct;
2045
2046        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2047                if (mrt->vif_table[ct].dev == dev)
2048                        break;
2049        }
2050        return ct;
2051}
2052
2053static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2054                           struct sk_buff *skb, struct mfc6_cache *c)
2055{
2056        int psend = -1;
2057        int vif, ct;
2058        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2059
2060        vif = c->_c.mfc_parent;
2061        c->_c.mfc_un.res.pkt++;
2062        c->_c.mfc_un.res.bytes += skb->len;
2063        c->_c.mfc_un.res.lastuse = jiffies;
2064
2065        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2066                struct mfc6_cache *cache_proxy;
2067
2068                /* For an (*,G) entry, we only check that the incoming
2069                 * interface is part of the static tree.
2070                 */
2071                rcu_read_lock();
2072                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2073                if (cache_proxy &&
2074                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2075                        rcu_read_unlock();
2076                        goto forward;
2077                }
2078                rcu_read_unlock();
2079        }
2080
2081        /*
2082         * Wrong interface: drop packet and (maybe) send PIM assert.
2083         */
2084        if (mrt->vif_table[vif].dev != skb->dev) {
2085                c->_c.mfc_un.res.wrong_if++;
2086
2087                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2088                    /* pimsm uses asserts, when switching from RPT to SPT,
2089                       so that we cannot check that packet arrived on an oif.
2090                       It is bad, but otherwise we would need to move pretty
2091                       large chunk of pimd to kernel. Ough... --ANK
2092                     */
2093                    (mrt->mroute_do_pim ||
2094                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2095                    time_after(jiffies,
2096                               c->_c.mfc_un.res.last_assert +
2097                               MFC_ASSERT_THRESH)) {
2098                        c->_c.mfc_un.res.last_assert = jiffies;
2099                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2100                }
2101                goto dont_forward;
2102        }
2103
2104forward:
2105        mrt->vif_table[vif].pkt_in++;
2106        mrt->vif_table[vif].bytes_in += skb->len;
2107
2108        /*
2109         *      Forward the frame
2110         */
2111        if (ipv6_addr_any(&c->mf6c_origin) &&
2112            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2113                if (true_vifi >= 0 &&
2114                    true_vifi != c->_c.mfc_parent &&
2115                    ipv6_hdr(skb)->hop_limit >
2116                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2117                        /* It's an (*,*) entry and the packet is not coming from
2118                         * the upstream: forward the packet to the upstream
2119                         * only.
2120                         */
2121                        psend = c->_c.mfc_parent;
2122                        goto last_forward;
2123                }
2124                goto dont_forward;
2125        }
2126        for (ct = c->_c.mfc_un.res.maxvif - 1;
2127             ct >= c->_c.mfc_un.res.minvif; ct--) {
2128                /* For (*,G) entry, don't forward to the incoming interface */
2129                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2130                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2131                        if (psend != -1) {
2132                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2133                                if (skb2)
2134                                        ip6mr_forward2(net, mrt, skb2,
2135                                                       c, psend);
2136                        }
2137                        psend = ct;
2138                }
2139        }
2140last_forward:
2141        if (psend != -1) {
2142                ip6mr_forward2(net, mrt, skb, c, psend);
2143                return;
2144        }
2145
2146dont_forward:
2147        kfree_skb(skb);
2148}
2149
2150
2151/*
2152 *      Multicast packets for forwarding arrive here
2153 */
2154
2155int ip6_mr_input(struct sk_buff *skb)
2156{
2157        struct mfc6_cache *cache;
2158        struct net *net = dev_net(skb->dev);
2159        struct mr_table *mrt;
2160        struct flowi6 fl6 = {
2161                .flowi6_iif     = skb->dev->ifindex,
2162                .flowi6_mark    = skb->mark,
2163        };
2164        int err;
2165
2166        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2167        if (err < 0) {
2168                kfree_skb(skb);
2169                return err;
2170        }
2171
2172        read_lock(&mrt_lock);
2173        cache = ip6mr_cache_find(mrt,
2174                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2175        if (!cache) {
2176                int vif = ip6mr_find_vif(mrt, skb->dev);
2177
2178                if (vif >= 0)
2179                        cache = ip6mr_cache_find_any(mrt,
2180                                                     &ipv6_hdr(skb)->daddr,
2181                                                     vif);
2182        }
2183
2184        /*
2185         *      No usable cache entry
2186         */
2187        if (!cache) {
2188                int vif;
2189
2190                vif = ip6mr_find_vif(mrt, skb->dev);
2191                if (vif >= 0) {
2192                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2193                        read_unlock(&mrt_lock);
2194
2195                        return err;
2196                }
2197                read_unlock(&mrt_lock);
2198                kfree_skb(skb);
2199                return -ENODEV;
2200        }
2201
2202        ip6_mr_forward(net, mrt, skb, cache);
2203
2204        read_unlock(&mrt_lock);
2205
2206        return 0;
2207}
2208
2209int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2210                    u32 portid)
2211{
2212        int err;
2213        struct mr_table *mrt;
2214        struct mfc6_cache *cache;
2215        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2216
2217        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2218        if (!mrt)
2219                return -ENOENT;
2220
2221        read_lock(&mrt_lock);
2222        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2223        if (!cache && skb->dev) {
2224                int vif = ip6mr_find_vif(mrt, skb->dev);
2225
2226                if (vif >= 0)
2227                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2228                                                     vif);
2229        }
2230
2231        if (!cache) {
2232                struct sk_buff *skb2;
2233                struct ipv6hdr *iph;
2234                struct net_device *dev;
2235                int vif;
2236
2237                dev = skb->dev;
2238                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2239                        read_unlock(&mrt_lock);
2240                        return -ENODEV;
2241                }
2242
2243                /* really correct? */
2244                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2245                if (!skb2) {
2246                        read_unlock(&mrt_lock);
2247                        return -ENOMEM;
2248                }
2249
2250                NETLINK_CB(skb2).portid = portid;
2251                skb_reset_transport_header(skb2);
2252
2253                skb_put(skb2, sizeof(struct ipv6hdr));
2254                skb_reset_network_header(skb2);
2255
2256                iph = ipv6_hdr(skb2);
2257                iph->version = 0;
2258                iph->priority = 0;
2259                iph->flow_lbl[0] = 0;
2260                iph->flow_lbl[1] = 0;
2261                iph->flow_lbl[2] = 0;
2262                iph->payload_len = 0;
2263                iph->nexthdr = IPPROTO_NONE;
2264                iph->hop_limit = 0;
2265                iph->saddr = rt->rt6i_src.addr;
2266                iph->daddr = rt->rt6i_dst.addr;
2267
2268                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2269                read_unlock(&mrt_lock);
2270
2271                return err;
2272        }
2273
2274        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2275        read_unlock(&mrt_lock);
2276        return err;
2277}
2278
2279static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2280                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2281                             int flags)
2282{
2283        struct nlmsghdr *nlh;
2284        struct rtmsg *rtm;
2285        int err;
2286
2287        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2288        if (!nlh)
2289                return -EMSGSIZE;
2290
2291        rtm = nlmsg_data(nlh);
2292        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2293        rtm->rtm_dst_len  = 128;
2294        rtm->rtm_src_len  = 128;
2295        rtm->rtm_tos      = 0;
2296        rtm->rtm_table    = mrt->id;
2297        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2298                goto nla_put_failure;
2299        rtm->rtm_type = RTN_MULTICAST;
2300        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2301        if (c->_c.mfc_flags & MFC_STATIC)
2302                rtm->rtm_protocol = RTPROT_STATIC;
2303        else
2304                rtm->rtm_protocol = RTPROT_MROUTED;
2305        rtm->rtm_flags    = 0;
2306
2307        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2308            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2309                goto nla_put_failure;
2310        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2311        /* do not break the dump if cache is unresolved */
2312        if (err < 0 && err != -ENOENT)
2313                goto nla_put_failure;
2314
2315        nlmsg_end(skb, nlh);
2316        return 0;
2317
2318nla_put_failure:
2319        nlmsg_cancel(skb, nlh);
2320        return -EMSGSIZE;
2321}
2322
2323static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2324                              u32 portid, u32 seq, struct mr_mfc *c,
2325                              int cmd, int flags)
2326{
2327        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2328                                 cmd, flags);
2329}
2330
2331static int mr6_msgsize(bool unresolved, int maxvif)
2332{
2333        size_t len =
2334                NLMSG_ALIGN(sizeof(struct rtmsg))
2335                + nla_total_size(4)     /* RTA_TABLE */
2336                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2337                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2338                ;
2339
2340        if (!unresolved)
2341                len = len
2342                      + nla_total_size(4)       /* RTA_IIF */
2343                      + nla_total_size(0)       /* RTA_MULTIPATH */
2344                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2345                                                /* RTA_MFC_STATS */
2346                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2347                ;
2348
2349        return len;
2350}
2351
2352static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2353                              int cmd)
2354{
2355        struct net *net = read_pnet(&mrt->net);
2356        struct sk_buff *skb;
2357        int err = -ENOBUFS;
2358
2359        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2360                        GFP_ATOMIC);
2361        if (!skb)
2362                goto errout;
2363
2364        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2365        if (err < 0)
2366                goto errout;
2367
2368        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2369        return;
2370
2371errout:
2372        kfree_skb(skb);
2373        if (err < 0)
2374                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2375}
2376
2377static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2378{
2379        size_t len =
2380                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2381                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2382                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2383                                        /* IP6MRA_CREPORT_SRC_ADDR */
2384                + nla_total_size(sizeof(struct in6_addr))
2385                                        /* IP6MRA_CREPORT_DST_ADDR */
2386                + nla_total_size(sizeof(struct in6_addr))
2387                                        /* IP6MRA_CREPORT_PKT */
2388                + nla_total_size(payloadlen)
2389                ;
2390
2391        return len;
2392}
2393
2394static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2395{
2396        struct net *net = read_pnet(&mrt->net);
2397        struct nlmsghdr *nlh;
2398        struct rtgenmsg *rtgenm;
2399        struct mrt6msg *msg;
2400        struct sk_buff *skb;
2401        struct nlattr *nla;
2402        int payloadlen;
2403
2404        payloadlen = pkt->len - sizeof(struct mrt6msg);
2405        msg = (struct mrt6msg *)skb_transport_header(pkt);
2406
2407        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2408        if (!skb)
2409                goto errout;
2410
2411        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2412                        sizeof(struct rtgenmsg), 0);
2413        if (!nlh)
2414                goto errout;
2415        rtgenm = nlmsg_data(nlh);
2416        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2417        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2418            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2419            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2420                             &msg->im6_src) ||
2421            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2422                             &msg->im6_dst))
2423                goto nla_put_failure;
2424
2425        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2426        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2427                                  nla_data(nla), payloadlen))
2428                goto nla_put_failure;
2429
2430        nlmsg_end(skb, nlh);
2431
2432        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2433        return;
2434
2435nla_put_failure:
2436        nlmsg_cancel(skb, nlh);
2437errout:
2438        kfree_skb(skb);
2439        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2440}
2441
2442static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2443{
2444        const struct nlmsghdr *nlh = cb->nlh;
2445        struct fib_dump_filter filter = {};
2446        int err;
2447
2448        if (cb->strict_check) {
2449                err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2450                                            &filter, cb);
2451                if (err < 0)
2452                        return err;
2453        }
2454
2455        if (filter.table_id) {
2456                struct mr_table *mrt;
2457
2458                mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2459                if (!mrt) {
2460                        if (filter.dump_all_families)
2461                                return skb->len;
2462
2463                        NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2464                        return -ENOENT;
2465                }
2466                err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2467                                    &mfc_unres_lock, &filter);
2468                return skb->len ? : err;
2469        }
2470
2471        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2472                                _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2473}
2474