linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Linux IPv6 multicast routing support for BSD pim6sd
   4 *      Based on net/ipv4/ipmr.c.
   5 *
   6 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   7 *              LSIIT Laboratory, Strasbourg, France
   8 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   9 *              6WIND, Paris, France
  10 *      Copyright (C)2007,2008 USAGI/WIDE Project
  11 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  12 */
  13
  14#include <linux/uaccess.h>
  15#include <linux/types.h>
  16#include <linux/sched.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/kernel.h>
  20#include <linux/fcntl.h>
  21#include <linux/stat.h>
  22#include <linux/socket.h>
  23#include <linux/inet.h>
  24#include <linux/netdevice.h>
  25#include <linux/inetdevice.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/init.h>
  29#include <linux/compat.h>
  30#include <linux/rhashtable.h>
  31#include <net/protocol.h>
  32#include <linux/skbuff.h>
  33#include <net/raw.h>
  34#include <linux/notifier.h>
  35#include <linux/if_arp.h>
  36#include <net/checksum.h>
  37#include <net/netlink.h>
  38#include <net/fib_rules.h>
  39
  40#include <net/ipv6.h>
  41#include <net/ip6_route.h>
  42#include <linux/mroute6.h>
  43#include <linux/pim.h>
  44#include <net/addrconf.h>
  45#include <linux/netfilter_ipv6.h>
  46#include <linux/export.h>
  47#include <net/ip6_checksum.h>
  48#include <linux/netconf.h>
  49#include <net/ip_tunnels.h>
  50
  51#include <linux/nospec.h>
  52
  53struct ip6mr_rule {
  54        struct fib_rule         common;
  55};
  56
  57struct ip6mr_result {
  58        struct mr_table *mrt;
  59};
  60
  61/* Big lock, protecting vif table, mrt cache and mroute socket state.
  62   Note that the changes are semaphored via rtnl_lock.
  63 */
  64
  65static DEFINE_RWLOCK(mrt_lock);
  66
  67/* Multicast router control variables */
  68
  69/* Special spinlock for queue of unresolved entries */
  70static DEFINE_SPINLOCK(mfc_unres_lock);
  71
  72/* We return to original Alan's scheme. Hash table of resolved
  73   entries is changed only in process context and protected
  74   with weak lock mrt_lock. Queue of unresolved entries is protected
  75   with strong spinlock mfc_unres_lock.
  76
  77   In this case data path is free of exclusive locks at all.
  78 */
  79
  80static struct kmem_cache *mrt_cachep __read_mostly;
  81
  82static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  83static void ip6mr_free_table(struct mr_table *mrt);
  84
  85static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  86                           struct net_device *dev, struct sk_buff *skb,
  87                           struct mfc6_cache *cache);
  88static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  89                              mifi_t mifi, int assert);
  90static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  91                              int cmd);
  92static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  93static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  94                               struct netlink_callback *cb);
  95static void mroute_clean_tables(struct mr_table *mrt, int flags);
  96static void ipmr_expire_process(struct timer_list *t);
  97
  98#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
  99#define ip6mr_for_each_table(mrt, net) \
 100        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
 101                                lockdep_rtnl_is_held() || \
 102                                list_empty(&net->ipv6.mr6_tables))
 103
 104static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 105                                            struct mr_table *mrt)
 106{
 107        struct mr_table *ret;
 108
 109        if (!mrt)
 110                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 111                                     struct mr_table, list);
 112        else
 113                ret = list_entry_rcu(mrt->list.next,
 114                                     struct mr_table, list);
 115
 116        if (&ret->list == &net->ipv6.mr6_tables)
 117                return NULL;
 118        return ret;
 119}
 120
 121static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 122{
 123        struct mr_table *mrt;
 124
 125        ip6mr_for_each_table(mrt, net) {
 126                if (mrt->id == id)
 127                        return mrt;
 128        }
 129        return NULL;
 130}
 131
 132static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 133                            struct mr_table **mrt)
 134{
 135        int err;
 136        struct ip6mr_result res;
 137        struct fib_lookup_arg arg = {
 138                .result = &res,
 139                .flags = FIB_LOOKUP_NOREF,
 140        };
 141
 142        /* update flow if oif or iif point to device enslaved to l3mdev */
 143        l3mdev_update_flow(net, flowi6_to_flowi(flp6));
 144
 145        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 146                               flowi6_to_flowi(flp6), 0, &arg);
 147        if (err < 0)
 148                return err;
 149        *mrt = res.mrt;
 150        return 0;
 151}
 152
 153static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 154                             int flags, struct fib_lookup_arg *arg)
 155{
 156        struct ip6mr_result *res = arg->result;
 157        struct mr_table *mrt;
 158
 159        switch (rule->action) {
 160        case FR_ACT_TO_TBL:
 161                break;
 162        case FR_ACT_UNREACHABLE:
 163                return -ENETUNREACH;
 164        case FR_ACT_PROHIBIT:
 165                return -EACCES;
 166        case FR_ACT_BLACKHOLE:
 167        default:
 168                return -EINVAL;
 169        }
 170
 171        arg->table = fib_rule_get_table(rule, arg);
 172
 173        mrt = ip6mr_get_table(rule->fr_net, arg->table);
 174        if (!mrt)
 175                return -EAGAIN;
 176        res->mrt = mrt;
 177        return 0;
 178}
 179
 180static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 181{
 182        return 1;
 183}
 184
 185static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 186        FRA_GENERIC_POLICY,
 187};
 188
 189static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 190                                struct fib_rule_hdr *frh, struct nlattr **tb,
 191                                struct netlink_ext_ack *extack)
 192{
 193        return 0;
 194}
 195
 196static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 197                              struct nlattr **tb)
 198{
 199        return 1;
 200}
 201
 202static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 203                           struct fib_rule_hdr *frh)
 204{
 205        frh->dst_len = 0;
 206        frh->src_len = 0;
 207        frh->tos     = 0;
 208        return 0;
 209}
 210
 211static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 212        .family         = RTNL_FAMILY_IP6MR,
 213        .rule_size      = sizeof(struct ip6mr_rule),
 214        .addr_size      = sizeof(struct in6_addr),
 215        .action         = ip6mr_rule_action,
 216        .match          = ip6mr_rule_match,
 217        .configure      = ip6mr_rule_configure,
 218        .compare        = ip6mr_rule_compare,
 219        .fill           = ip6mr_rule_fill,
 220        .nlgroup        = RTNLGRP_IPV6_RULE,
 221        .policy         = ip6mr_rule_policy,
 222        .owner          = THIS_MODULE,
 223};
 224
 225static int __net_init ip6mr_rules_init(struct net *net)
 226{
 227        struct fib_rules_ops *ops;
 228        struct mr_table *mrt;
 229        int err;
 230
 231        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 232        if (IS_ERR(ops))
 233                return PTR_ERR(ops);
 234
 235        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 236
 237        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 238        if (IS_ERR(mrt)) {
 239                err = PTR_ERR(mrt);
 240                goto err1;
 241        }
 242
 243        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 244        if (err < 0)
 245                goto err2;
 246
 247        net->ipv6.mr6_rules_ops = ops;
 248        return 0;
 249
 250err2:
 251        ip6mr_free_table(mrt);
 252err1:
 253        fib_rules_unregister(ops);
 254        return err;
 255}
 256
 257static void __net_exit ip6mr_rules_exit(struct net *net)
 258{
 259        struct mr_table *mrt, *next;
 260
 261        rtnl_lock();
 262        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 263                list_del(&mrt->list);
 264                ip6mr_free_table(mrt);
 265        }
 266        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 267        rtnl_unlock();
 268}
 269
 270static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 271                            struct netlink_ext_ack *extack)
 272{
 273        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
 274}
 275
 276static unsigned int ip6mr_rules_seq_read(struct net *net)
 277{
 278        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 279}
 280
 281bool ip6mr_rule_default(const struct fib_rule *rule)
 282{
 283        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 284               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 285}
 286EXPORT_SYMBOL(ip6mr_rule_default);
 287#else
 288#define ip6mr_for_each_table(mrt, net) \
 289        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 290
 291static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 292                                            struct mr_table *mrt)
 293{
 294        if (!mrt)
 295                return net->ipv6.mrt6;
 296        return NULL;
 297}
 298
 299static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 300{
 301        return net->ipv6.mrt6;
 302}
 303
 304static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 305                            struct mr_table **mrt)
 306{
 307        *mrt = net->ipv6.mrt6;
 308        return 0;
 309}
 310
 311static int __net_init ip6mr_rules_init(struct net *net)
 312{
 313        struct mr_table *mrt;
 314
 315        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 316        if (IS_ERR(mrt))
 317                return PTR_ERR(mrt);
 318        net->ipv6.mrt6 = mrt;
 319        return 0;
 320}
 321
 322static void __net_exit ip6mr_rules_exit(struct net *net)
 323{
 324        rtnl_lock();
 325        ip6mr_free_table(net->ipv6.mrt6);
 326        net->ipv6.mrt6 = NULL;
 327        rtnl_unlock();
 328}
 329
 330static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 331                            struct netlink_ext_ack *extack)
 332{
 333        return 0;
 334}
 335
 336static unsigned int ip6mr_rules_seq_read(struct net *net)
 337{
 338        return 0;
 339}
 340#endif
 341
 342static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 343                          const void *ptr)
 344{
 345        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 346        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 347
 348        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 349               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 350}
 351
 352static const struct rhashtable_params ip6mr_rht_params = {
 353        .head_offset = offsetof(struct mr_mfc, mnode),
 354        .key_offset = offsetof(struct mfc6_cache, cmparg),
 355        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 356        .nelem_hint = 3,
 357        .obj_cmpfn = ip6mr_hash_cmp,
 358        .automatic_shrinking = true,
 359};
 360
 361static void ip6mr_new_table_set(struct mr_table *mrt,
 362                                struct net *net)
 363{
 364#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 365        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 366#endif
 367}
 368
 369static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 370        .mf6c_origin = IN6ADDR_ANY_INIT,
 371        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 372};
 373
 374static struct mr_table_ops ip6mr_mr_table_ops = {
 375        .rht_params = &ip6mr_rht_params,
 376        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 377};
 378
 379static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 380{
 381        struct mr_table *mrt;
 382
 383        mrt = ip6mr_get_table(net, id);
 384        if (mrt)
 385                return mrt;
 386
 387        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 388                              ipmr_expire_process, ip6mr_new_table_set);
 389}
 390
 391static void ip6mr_free_table(struct mr_table *mrt)
 392{
 393        del_timer_sync(&mrt->ipmr_expire_timer);
 394        mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
 395                                 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 396        rhltable_destroy(&mrt->mfc_hash);
 397        kfree(mrt);
 398}
 399
 400#ifdef CONFIG_PROC_FS
 401/* The /proc interfaces to multicast routing
 402 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 403 */
 404
 405static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 406        __acquires(mrt_lock)
 407{
 408        struct mr_vif_iter *iter = seq->private;
 409        struct net *net = seq_file_net(seq);
 410        struct mr_table *mrt;
 411
 412        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 413        if (!mrt)
 414                return ERR_PTR(-ENOENT);
 415
 416        iter->mrt = mrt;
 417
 418        read_lock(&mrt_lock);
 419        return mr_vif_seq_start(seq, pos);
 420}
 421
 422static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 423        __releases(mrt_lock)
 424{
 425        read_unlock(&mrt_lock);
 426}
 427
 428static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 429{
 430        struct mr_vif_iter *iter = seq->private;
 431        struct mr_table *mrt = iter->mrt;
 432
 433        if (v == SEQ_START_TOKEN) {
 434                seq_puts(seq,
 435                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 436        } else {
 437                const struct vif_device *vif = v;
 438                const char *name = vif->dev ? vif->dev->name : "none";
 439
 440                seq_printf(seq,
 441                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 442                           vif - mrt->vif_table,
 443                           name, vif->bytes_in, vif->pkt_in,
 444                           vif->bytes_out, vif->pkt_out,
 445                           vif->flags);
 446        }
 447        return 0;
 448}
 449
 450static const struct seq_operations ip6mr_vif_seq_ops = {
 451        .start = ip6mr_vif_seq_start,
 452        .next  = mr_vif_seq_next,
 453        .stop  = ip6mr_vif_seq_stop,
 454        .show  = ip6mr_vif_seq_show,
 455};
 456
 457static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 458{
 459        struct net *net = seq_file_net(seq);
 460        struct mr_table *mrt;
 461
 462        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 463        if (!mrt)
 464                return ERR_PTR(-ENOENT);
 465
 466        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 467}
 468
 469static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 470{
 471        int n;
 472
 473        if (v == SEQ_START_TOKEN) {
 474                seq_puts(seq,
 475                         "Group                            "
 476                         "Origin                           "
 477                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 478        } else {
 479                const struct mfc6_cache *mfc = v;
 480                const struct mr_mfc_iter *it = seq->private;
 481                struct mr_table *mrt = it->mrt;
 482
 483                seq_printf(seq, "%pI6 %pI6 %-3hd",
 484                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 485                           mfc->_c.mfc_parent);
 486
 487                if (it->cache != &mrt->mfc_unres_queue) {
 488                        seq_printf(seq, " %8lu %8lu %8lu",
 489                                   mfc->_c.mfc_un.res.pkt,
 490                                   mfc->_c.mfc_un.res.bytes,
 491                                   mfc->_c.mfc_un.res.wrong_if);
 492                        for (n = mfc->_c.mfc_un.res.minvif;
 493                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 494                                if (VIF_EXISTS(mrt, n) &&
 495                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 496                                        seq_printf(seq,
 497                                                   " %2d:%-3d", n,
 498                                                   mfc->_c.mfc_un.res.ttls[n]);
 499                        }
 500                } else {
 501                        /* unresolved mfc_caches don't contain
 502                         * pkt, bytes and wrong_if values
 503                         */
 504                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 505                }
 506                seq_putc(seq, '\n');
 507        }
 508        return 0;
 509}
 510
 511static const struct seq_operations ipmr_mfc_seq_ops = {
 512        .start = ipmr_mfc_seq_start,
 513        .next  = mr_mfc_seq_next,
 514        .stop  = mr_mfc_seq_stop,
 515        .show  = ipmr_mfc_seq_show,
 516};
 517#endif
 518
 519#ifdef CONFIG_IPV6_PIMSM_V2
 520
 521static int pim6_rcv(struct sk_buff *skb)
 522{
 523        struct pimreghdr *pim;
 524        struct ipv6hdr   *encap;
 525        struct net_device  *reg_dev = NULL;
 526        struct net *net = dev_net(skb->dev);
 527        struct mr_table *mrt;
 528        struct flowi6 fl6 = {
 529                .flowi6_iif     = skb->dev->ifindex,
 530                .flowi6_mark    = skb->mark,
 531        };
 532        int reg_vif_num;
 533
 534        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 535                goto drop;
 536
 537        pim = (struct pimreghdr *)skb_transport_header(skb);
 538        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 539            (pim->flags & PIM_NULL_REGISTER) ||
 540            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 541                             sizeof(*pim), IPPROTO_PIM,
 542                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 543             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 544                goto drop;
 545
 546        /* check if the inner packet is destined to mcast group */
 547        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 548                                   sizeof(*pim));
 549
 550        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 551            encap->payload_len == 0 ||
 552            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 553                goto drop;
 554
 555        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 556                goto drop;
 557        reg_vif_num = mrt->mroute_reg_vif_num;
 558
 559        read_lock(&mrt_lock);
 560        if (reg_vif_num >= 0)
 561                reg_dev = mrt->vif_table[reg_vif_num].dev;
 562        dev_hold(reg_dev);
 563        read_unlock(&mrt_lock);
 564
 565        if (!reg_dev)
 566                goto drop;
 567
 568        skb->mac_header = skb->network_header;
 569        skb_pull(skb, (u8 *)encap - skb->data);
 570        skb_reset_network_header(skb);
 571        skb->protocol = htons(ETH_P_IPV6);
 572        skb->ip_summed = CHECKSUM_NONE;
 573
 574        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 575
 576        netif_rx(skb);
 577
 578        dev_put(reg_dev);
 579        return 0;
 580 drop:
 581        kfree_skb(skb);
 582        return 0;
 583}
 584
 585static const struct inet6_protocol pim6_protocol = {
 586        .handler        =       pim6_rcv,
 587};
 588
 589/* Service routines creating virtual interfaces: PIMREG */
 590
 591static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 592                                      struct net_device *dev)
 593{
 594        struct net *net = dev_net(dev);
 595        struct mr_table *mrt;
 596        struct flowi6 fl6 = {
 597                .flowi6_oif     = dev->ifindex,
 598                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 599                .flowi6_mark    = skb->mark,
 600        };
 601
 602        if (!pskb_inet_may_pull(skb))
 603                goto tx_err;
 604
 605        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 606                goto tx_err;
 607
 608        read_lock(&mrt_lock);
 609        dev->stats.tx_bytes += skb->len;
 610        dev->stats.tx_packets++;
 611        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 612        read_unlock(&mrt_lock);
 613        kfree_skb(skb);
 614        return NETDEV_TX_OK;
 615
 616tx_err:
 617        dev->stats.tx_errors++;
 618        kfree_skb(skb);
 619        return NETDEV_TX_OK;
 620}
 621
 622static int reg_vif_get_iflink(const struct net_device *dev)
 623{
 624        return 0;
 625}
 626
 627static const struct net_device_ops reg_vif_netdev_ops = {
 628        .ndo_start_xmit = reg_vif_xmit,
 629        .ndo_get_iflink = reg_vif_get_iflink,
 630};
 631
 632static void reg_vif_setup(struct net_device *dev)
 633{
 634        dev->type               = ARPHRD_PIMREG;
 635        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 636        dev->flags              = IFF_NOARP;
 637        dev->netdev_ops         = &reg_vif_netdev_ops;
 638        dev->needs_free_netdev  = true;
 639        dev->features           |= NETIF_F_NETNS_LOCAL;
 640}
 641
 642static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 643{
 644        struct net_device *dev;
 645        char name[IFNAMSIZ];
 646
 647        if (mrt->id == RT6_TABLE_DFLT)
 648                sprintf(name, "pim6reg");
 649        else
 650                sprintf(name, "pim6reg%u", mrt->id);
 651
 652        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 653        if (!dev)
 654                return NULL;
 655
 656        dev_net_set(dev, net);
 657
 658        if (register_netdevice(dev)) {
 659                free_netdev(dev);
 660                return NULL;
 661        }
 662
 663        if (dev_open(dev, NULL))
 664                goto failure;
 665
 666        dev_hold(dev);
 667        return dev;
 668
 669failure:
 670        unregister_netdevice(dev);
 671        return NULL;
 672}
 673#endif
 674
 675static int call_ip6mr_vif_entry_notifiers(struct net *net,
 676                                          enum fib_event_type event_type,
 677                                          struct vif_device *vif,
 678                                          mifi_t vif_index, u32 tb_id)
 679{
 680        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 681                                     vif, vif_index, tb_id,
 682                                     &net->ipv6.ipmr_seq);
 683}
 684
 685static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 686                                          enum fib_event_type event_type,
 687                                          struct mfc6_cache *mfc, u32 tb_id)
 688{
 689        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 690                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 691}
 692
 693/* Delete a VIF entry */
 694static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 695                       struct list_head *head)
 696{
 697        struct vif_device *v;
 698        struct net_device *dev;
 699        struct inet6_dev *in6_dev;
 700
 701        if (vifi < 0 || vifi >= mrt->maxvif)
 702                return -EADDRNOTAVAIL;
 703
 704        v = &mrt->vif_table[vifi];
 705
 706        if (VIF_EXISTS(mrt, vifi))
 707                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 708                                               FIB_EVENT_VIF_DEL, v, vifi,
 709                                               mrt->id);
 710
 711        write_lock_bh(&mrt_lock);
 712        dev = v->dev;
 713        v->dev = NULL;
 714
 715        if (!dev) {
 716                write_unlock_bh(&mrt_lock);
 717                return -EADDRNOTAVAIL;
 718        }
 719
 720#ifdef CONFIG_IPV6_PIMSM_V2
 721        if (vifi == mrt->mroute_reg_vif_num)
 722                mrt->mroute_reg_vif_num = -1;
 723#endif
 724
 725        if (vifi + 1 == mrt->maxvif) {
 726                int tmp;
 727                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 728                        if (VIF_EXISTS(mrt, tmp))
 729                                break;
 730                }
 731                mrt->maxvif = tmp + 1;
 732        }
 733
 734        write_unlock_bh(&mrt_lock);
 735
 736        dev_set_allmulti(dev, -1);
 737
 738        in6_dev = __in6_dev_get(dev);
 739        if (in6_dev) {
 740                in6_dev->cnf.mc_forwarding--;
 741                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 742                                             NETCONFA_MC_FORWARDING,
 743                                             dev->ifindex, &in6_dev->cnf);
 744        }
 745
 746        if ((v->flags & MIFF_REGISTER) && !notify)
 747                unregister_netdevice_queue(dev, head);
 748
 749        dev_put(dev);
 750        return 0;
 751}
 752
 753static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 754{
 755        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 756
 757        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 758}
 759
 760static inline void ip6mr_cache_free(struct mfc6_cache *c)
 761{
 762        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 763}
 764
 765/* Destroy an unresolved cache entry, killing queued skbs
 766   and reporting error to netlink readers.
 767 */
 768
 769static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 770{
 771        struct net *net = read_pnet(&mrt->net);
 772        struct sk_buff *skb;
 773
 774        atomic_dec(&mrt->cache_resolve_queue_len);
 775
 776        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 777                if (ipv6_hdr(skb)->version == 0) {
 778                        struct nlmsghdr *nlh = skb_pull(skb,
 779                                                        sizeof(struct ipv6hdr));
 780                        nlh->nlmsg_type = NLMSG_ERROR;
 781                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 782                        skb_trim(skb, nlh->nlmsg_len);
 783                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 784                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 785                } else
 786                        kfree_skb(skb);
 787        }
 788
 789        ip6mr_cache_free(c);
 790}
 791
 792
 793/* Timer process for all the unresolved queue. */
 794
 795static void ipmr_do_expire_process(struct mr_table *mrt)
 796{
 797        unsigned long now = jiffies;
 798        unsigned long expires = 10 * HZ;
 799        struct mr_mfc *c, *next;
 800
 801        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 802                if (time_after(c->mfc_un.unres.expires, now)) {
 803                        /* not yet... */
 804                        unsigned long interval = c->mfc_un.unres.expires - now;
 805                        if (interval < expires)
 806                                expires = interval;
 807                        continue;
 808                }
 809
 810                list_del(&c->list);
 811                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 812                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 813        }
 814
 815        if (!list_empty(&mrt->mfc_unres_queue))
 816                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 817}
 818
 819static void ipmr_expire_process(struct timer_list *t)
 820{
 821        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 822
 823        if (!spin_trylock(&mfc_unres_lock)) {
 824                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 825                return;
 826        }
 827
 828        if (!list_empty(&mrt->mfc_unres_queue))
 829                ipmr_do_expire_process(mrt);
 830
 831        spin_unlock(&mfc_unres_lock);
 832}
 833
 834/* Fill oifs list. It is called under write locked mrt_lock. */
 835
 836static void ip6mr_update_thresholds(struct mr_table *mrt,
 837                                    struct mr_mfc *cache,
 838                                    unsigned char *ttls)
 839{
 840        int vifi;
 841
 842        cache->mfc_un.res.minvif = MAXMIFS;
 843        cache->mfc_un.res.maxvif = 0;
 844        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 845
 846        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 847                if (VIF_EXISTS(mrt, vifi) &&
 848                    ttls[vifi] && ttls[vifi] < 255) {
 849                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 850                        if (cache->mfc_un.res.minvif > vifi)
 851                                cache->mfc_un.res.minvif = vifi;
 852                        if (cache->mfc_un.res.maxvif <= vifi)
 853                                cache->mfc_un.res.maxvif = vifi + 1;
 854                }
 855        }
 856        cache->mfc_un.res.lastuse = jiffies;
 857}
 858
 859static int mif6_add(struct net *net, struct mr_table *mrt,
 860                    struct mif6ctl *vifc, int mrtsock)
 861{
 862        int vifi = vifc->mif6c_mifi;
 863        struct vif_device *v = &mrt->vif_table[vifi];
 864        struct net_device *dev;
 865        struct inet6_dev *in6_dev;
 866        int err;
 867
 868        /* Is vif busy ? */
 869        if (VIF_EXISTS(mrt, vifi))
 870                return -EADDRINUSE;
 871
 872        switch (vifc->mif6c_flags) {
 873#ifdef CONFIG_IPV6_PIMSM_V2
 874        case MIFF_REGISTER:
 875                /*
 876                 * Special Purpose VIF in PIM
 877                 * All the packets will be sent to the daemon
 878                 */
 879                if (mrt->mroute_reg_vif_num >= 0)
 880                        return -EADDRINUSE;
 881                dev = ip6mr_reg_vif(net, mrt);
 882                if (!dev)
 883                        return -ENOBUFS;
 884                err = dev_set_allmulti(dev, 1);
 885                if (err) {
 886                        unregister_netdevice(dev);
 887                        dev_put(dev);
 888                        return err;
 889                }
 890                break;
 891#endif
 892        case 0:
 893                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 894                if (!dev)
 895                        return -EADDRNOTAVAIL;
 896                err = dev_set_allmulti(dev, 1);
 897                if (err) {
 898                        dev_put(dev);
 899                        return err;
 900                }
 901                break;
 902        default:
 903                return -EINVAL;
 904        }
 905
 906        in6_dev = __in6_dev_get(dev);
 907        if (in6_dev) {
 908                in6_dev->cnf.mc_forwarding++;
 909                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 910                                             NETCONFA_MC_FORWARDING,
 911                                             dev->ifindex, &in6_dev->cnf);
 912        }
 913
 914        /* Fill in the VIF structures */
 915        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 916                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 917                        MIFF_REGISTER);
 918
 919        /* And finish update writing critical data */
 920        write_lock_bh(&mrt_lock);
 921        v->dev = dev;
 922#ifdef CONFIG_IPV6_PIMSM_V2
 923        if (v->flags & MIFF_REGISTER)
 924                mrt->mroute_reg_vif_num = vifi;
 925#endif
 926        if (vifi + 1 > mrt->maxvif)
 927                mrt->maxvif = vifi + 1;
 928        write_unlock_bh(&mrt_lock);
 929        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 930                                       v, vifi, mrt->id);
 931        return 0;
 932}
 933
 934static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 935                                           const struct in6_addr *origin,
 936                                           const struct in6_addr *mcastgrp)
 937{
 938        struct mfc6_cache_cmp_arg arg = {
 939                .mf6c_origin = *origin,
 940                .mf6c_mcastgrp = *mcastgrp,
 941        };
 942
 943        return mr_mfc_find(mrt, &arg);
 944}
 945
 946/* Look for a (*,G) entry */
 947static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 948                                               struct in6_addr *mcastgrp,
 949                                               mifi_t mifi)
 950{
 951        struct mfc6_cache_cmp_arg arg = {
 952                .mf6c_origin = in6addr_any,
 953                .mf6c_mcastgrp = *mcastgrp,
 954        };
 955
 956        if (ipv6_addr_any(mcastgrp))
 957                return mr_mfc_find_any_parent(mrt, mifi);
 958        return mr_mfc_find_any(mrt, mifi, &arg);
 959}
 960
 961/* Look for a (S,G,iif) entry if parent != -1 */
 962static struct mfc6_cache *
 963ip6mr_cache_find_parent(struct mr_table *mrt,
 964                        const struct in6_addr *origin,
 965                        const struct in6_addr *mcastgrp,
 966                        int parent)
 967{
 968        struct mfc6_cache_cmp_arg arg = {
 969                .mf6c_origin = *origin,
 970                .mf6c_mcastgrp = *mcastgrp,
 971        };
 972
 973        return mr_mfc_find_parent(mrt, &arg, parent);
 974}
 975
 976/* Allocate a multicast cache entry */
 977static struct mfc6_cache *ip6mr_cache_alloc(void)
 978{
 979        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 980        if (!c)
 981                return NULL;
 982        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 983        c->_c.mfc_un.res.minvif = MAXMIFS;
 984        c->_c.free = ip6mr_cache_free_rcu;
 985        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 986        return c;
 987}
 988
 989static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 990{
 991        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 992        if (!c)
 993                return NULL;
 994        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 995        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 996        return c;
 997}
 998
 999/*
1000 *      A cache entry has gone into a resolved state from queued
1001 */
1002
1003static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1004                                struct mfc6_cache *uc, struct mfc6_cache *c)
1005{
1006        struct sk_buff *skb;
1007
1008        /*
1009         *      Play the pending entries through our router
1010         */
1011
1012        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1013                if (ipv6_hdr(skb)->version == 0) {
1014                        struct nlmsghdr *nlh = skb_pull(skb,
1015                                                        sizeof(struct ipv6hdr));
1016
1017                        if (mr_fill_mroute(mrt, skb, &c->_c,
1018                                           nlmsg_data(nlh)) > 0) {
1019                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1020                        } else {
1021                                nlh->nlmsg_type = NLMSG_ERROR;
1022                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1023                                skb_trim(skb, nlh->nlmsg_len);
1024                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1025                        }
1026                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1027                } else
1028                        ip6_mr_forward(net, mrt, skb->dev, skb, c);
1029        }
1030}
1031
1032/*
1033 *      Bounce a cache query up to pim6sd and netlink.
1034 *
1035 *      Called under mrt_lock.
1036 */
1037
1038static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1039                              mifi_t mifi, int assert)
1040{
1041        struct sock *mroute6_sk;
1042        struct sk_buff *skb;
1043        struct mrt6msg *msg;
1044        int ret;
1045
1046#ifdef CONFIG_IPV6_PIMSM_V2
1047        if (assert == MRT6MSG_WHOLEPKT)
1048                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1049                                                +sizeof(*msg));
1050        else
1051#endif
1052                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1053
1054        if (!skb)
1055                return -ENOBUFS;
1056
1057        /* I suppose that internal messages
1058         * do not require checksums */
1059
1060        skb->ip_summed = CHECKSUM_UNNECESSARY;
1061
1062#ifdef CONFIG_IPV6_PIMSM_V2
1063        if (assert == MRT6MSG_WHOLEPKT) {
1064                /* Ugly, but we have no choice with this interface.
1065                   Duplicate old header, fix length etc.
1066                   And all this only to mangle msg->im6_msgtype and
1067                   to set msg->im6_mbz to "mbz" :-)
1068                 */
1069                skb_push(skb, -skb_network_offset(pkt));
1070
1071                skb_push(skb, sizeof(*msg));
1072                skb_reset_transport_header(skb);
1073                msg = (struct mrt6msg *)skb_transport_header(skb);
1074                msg->im6_mbz = 0;
1075                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1076                msg->im6_mif = mrt->mroute_reg_vif_num;
1077                msg->im6_pad = 0;
1078                msg->im6_src = ipv6_hdr(pkt)->saddr;
1079                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1080
1081                skb->ip_summed = CHECKSUM_UNNECESSARY;
1082        } else
1083#endif
1084        {
1085        /*
1086         *      Copy the IP header
1087         */
1088
1089        skb_put(skb, sizeof(struct ipv6hdr));
1090        skb_reset_network_header(skb);
1091        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1092
1093        /*
1094         *      Add our header
1095         */
1096        skb_put(skb, sizeof(*msg));
1097        skb_reset_transport_header(skb);
1098        msg = (struct mrt6msg *)skb_transport_header(skb);
1099
1100        msg->im6_mbz = 0;
1101        msg->im6_msgtype = assert;
1102        msg->im6_mif = mifi;
1103        msg->im6_pad = 0;
1104        msg->im6_src = ipv6_hdr(pkt)->saddr;
1105        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1106
1107        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1108        skb->ip_summed = CHECKSUM_UNNECESSARY;
1109        }
1110
1111        rcu_read_lock();
1112        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1113        if (!mroute6_sk) {
1114                rcu_read_unlock();
1115                kfree_skb(skb);
1116                return -EINVAL;
1117        }
1118
1119        mrt6msg_netlink_event(mrt, skb);
1120
1121        /* Deliver to user space multicast routing algorithms */
1122        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1123        rcu_read_unlock();
1124        if (ret < 0) {
1125                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1126                kfree_skb(skb);
1127        }
1128
1129        return ret;
1130}
1131
1132/* Queue a packet for resolution. It gets locked cache entry! */
1133static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1134                                  struct sk_buff *skb, struct net_device *dev)
1135{
1136        struct mfc6_cache *c;
1137        bool found = false;
1138        int err;
1139
1140        spin_lock_bh(&mfc_unres_lock);
1141        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1142                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1143                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1144                        found = true;
1145                        break;
1146                }
1147        }
1148
1149        if (!found) {
1150                /*
1151                 *      Create a new entry if allowable
1152                 */
1153
1154                c = ip6mr_cache_alloc_unres();
1155                if (!c) {
1156                        spin_unlock_bh(&mfc_unres_lock);
1157
1158                        kfree_skb(skb);
1159                        return -ENOBUFS;
1160                }
1161
1162                /* Fill in the new cache entry */
1163                c->_c.mfc_parent = -1;
1164                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1165                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1166
1167                /*
1168                 *      Reflect first query at pim6sd
1169                 */
1170                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1171                if (err < 0) {
1172                        /* If the report failed throw the cache entry
1173                           out - Brad Parker
1174                         */
1175                        spin_unlock_bh(&mfc_unres_lock);
1176
1177                        ip6mr_cache_free(c);
1178                        kfree_skb(skb);
1179                        return err;
1180                }
1181
1182                atomic_inc(&mrt->cache_resolve_queue_len);
1183                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1184                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1185
1186                ipmr_do_expire_process(mrt);
1187        }
1188
1189        /* See if we can append the packet */
1190        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1191                kfree_skb(skb);
1192                err = -ENOBUFS;
1193        } else {
1194                if (dev) {
1195                        skb->dev = dev;
1196                        skb->skb_iif = dev->ifindex;
1197                }
1198                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1199                err = 0;
1200        }
1201
1202        spin_unlock_bh(&mfc_unres_lock);
1203        return err;
1204}
1205
1206/*
1207 *      MFC6 cache manipulation by user space
1208 */
1209
1210static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1211                            int parent)
1212{
1213        struct mfc6_cache *c;
1214
1215        /* The entries are added/deleted only under RTNL */
1216        rcu_read_lock();
1217        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1218                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1219        rcu_read_unlock();
1220        if (!c)
1221                return -ENOENT;
1222        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1223        list_del_rcu(&c->_c.list);
1224
1225        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1226                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1227        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1228        mr_cache_put(&c->_c);
1229        return 0;
1230}
1231
1232static int ip6mr_device_event(struct notifier_block *this,
1233                              unsigned long event, void *ptr)
1234{
1235        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1236        struct net *net = dev_net(dev);
1237        struct mr_table *mrt;
1238        struct vif_device *v;
1239        int ct;
1240
1241        if (event != NETDEV_UNREGISTER)
1242                return NOTIFY_DONE;
1243
1244        ip6mr_for_each_table(mrt, net) {
1245                v = &mrt->vif_table[0];
1246                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1247                        if (v->dev == dev)
1248                                mif6_delete(mrt, ct, 1, NULL);
1249                }
1250        }
1251
1252        return NOTIFY_DONE;
1253}
1254
1255static unsigned int ip6mr_seq_read(struct net *net)
1256{
1257        ASSERT_RTNL();
1258
1259        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1260}
1261
1262static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1263                      struct netlink_ext_ack *extack)
1264{
1265        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1266                       ip6mr_mr_table_iter, &mrt_lock, extack);
1267}
1268
1269static struct notifier_block ip6_mr_notifier = {
1270        .notifier_call = ip6mr_device_event
1271};
1272
1273static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1274        .family         = RTNL_FAMILY_IP6MR,
1275        .fib_seq_read   = ip6mr_seq_read,
1276        .fib_dump       = ip6mr_dump,
1277        .owner          = THIS_MODULE,
1278};
1279
1280static int __net_init ip6mr_notifier_init(struct net *net)
1281{
1282        struct fib_notifier_ops *ops;
1283
1284        net->ipv6.ipmr_seq = 0;
1285
1286        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1287        if (IS_ERR(ops))
1288                return PTR_ERR(ops);
1289
1290        net->ipv6.ip6mr_notifier_ops = ops;
1291
1292        return 0;
1293}
1294
1295static void __net_exit ip6mr_notifier_exit(struct net *net)
1296{
1297        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1298        net->ipv6.ip6mr_notifier_ops = NULL;
1299}
1300
1301/* Setup for IP multicast routing */
1302static int __net_init ip6mr_net_init(struct net *net)
1303{
1304        int err;
1305
1306        err = ip6mr_notifier_init(net);
1307        if (err)
1308                return err;
1309
1310        err = ip6mr_rules_init(net);
1311        if (err < 0)
1312                goto ip6mr_rules_fail;
1313
1314#ifdef CONFIG_PROC_FS
1315        err = -ENOMEM;
1316        if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1317                        sizeof(struct mr_vif_iter)))
1318                goto proc_vif_fail;
1319        if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1320                        sizeof(struct mr_mfc_iter)))
1321                goto proc_cache_fail;
1322#endif
1323
1324        return 0;
1325
1326#ifdef CONFIG_PROC_FS
1327proc_cache_fail:
1328        remove_proc_entry("ip6_mr_vif", net->proc_net);
1329proc_vif_fail:
1330        ip6mr_rules_exit(net);
1331#endif
1332ip6mr_rules_fail:
1333        ip6mr_notifier_exit(net);
1334        return err;
1335}
1336
1337static void __net_exit ip6mr_net_exit(struct net *net)
1338{
1339#ifdef CONFIG_PROC_FS
1340        remove_proc_entry("ip6_mr_cache", net->proc_net);
1341        remove_proc_entry("ip6_mr_vif", net->proc_net);
1342#endif
1343        ip6mr_rules_exit(net);
1344        ip6mr_notifier_exit(net);
1345}
1346
1347static struct pernet_operations ip6mr_net_ops = {
1348        .init = ip6mr_net_init,
1349        .exit = ip6mr_net_exit,
1350};
1351
1352int __init ip6_mr_init(void)
1353{
1354        int err;
1355
1356        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1357                                       sizeof(struct mfc6_cache),
1358                                       0, SLAB_HWCACHE_ALIGN,
1359                                       NULL);
1360        if (!mrt_cachep)
1361                return -ENOMEM;
1362
1363        err = register_pernet_subsys(&ip6mr_net_ops);
1364        if (err)
1365                goto reg_pernet_fail;
1366
1367        err = register_netdevice_notifier(&ip6_mr_notifier);
1368        if (err)
1369                goto reg_notif_fail;
1370#ifdef CONFIG_IPV6_PIMSM_V2
1371        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1372                pr_err("%s: can't add PIM protocol\n", __func__);
1373                err = -EAGAIN;
1374                goto add_proto_fail;
1375        }
1376#endif
1377        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1378                                   NULL, ip6mr_rtm_dumproute, 0);
1379        if (err == 0)
1380                return 0;
1381
1382#ifdef CONFIG_IPV6_PIMSM_V2
1383        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1384add_proto_fail:
1385        unregister_netdevice_notifier(&ip6_mr_notifier);
1386#endif
1387reg_notif_fail:
1388        unregister_pernet_subsys(&ip6mr_net_ops);
1389reg_pernet_fail:
1390        kmem_cache_destroy(mrt_cachep);
1391        return err;
1392}
1393
1394void ip6_mr_cleanup(void)
1395{
1396        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1397#ifdef CONFIG_IPV6_PIMSM_V2
1398        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1399#endif
1400        unregister_netdevice_notifier(&ip6_mr_notifier);
1401        unregister_pernet_subsys(&ip6mr_net_ops);
1402        kmem_cache_destroy(mrt_cachep);
1403}
1404
1405static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1406                         struct mf6cctl *mfc, int mrtsock, int parent)
1407{
1408        unsigned char ttls[MAXMIFS];
1409        struct mfc6_cache *uc, *c;
1410        struct mr_mfc *_uc;
1411        bool found;
1412        int i, err;
1413
1414        if (mfc->mf6cc_parent >= MAXMIFS)
1415                return -ENFILE;
1416
1417        memset(ttls, 255, MAXMIFS);
1418        for (i = 0; i < MAXMIFS; i++) {
1419                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1420                        ttls[i] = 1;
1421        }
1422
1423        /* The entries are added/deleted only under RTNL */
1424        rcu_read_lock();
1425        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1426                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1427        rcu_read_unlock();
1428        if (c) {
1429                write_lock_bh(&mrt_lock);
1430                c->_c.mfc_parent = mfc->mf6cc_parent;
1431                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1432                if (!mrtsock)
1433                        c->_c.mfc_flags |= MFC_STATIC;
1434                write_unlock_bh(&mrt_lock);
1435                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1436                                               c, mrt->id);
1437                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1438                return 0;
1439        }
1440
1441        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1442            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1443                return -EINVAL;
1444
1445        c = ip6mr_cache_alloc();
1446        if (!c)
1447                return -ENOMEM;
1448
1449        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1450        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1451        c->_c.mfc_parent = mfc->mf6cc_parent;
1452        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1453        if (!mrtsock)
1454                c->_c.mfc_flags |= MFC_STATIC;
1455
1456        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1457                                  ip6mr_rht_params);
1458        if (err) {
1459                pr_err("ip6mr: rhtable insert error %d\n", err);
1460                ip6mr_cache_free(c);
1461                return err;
1462        }
1463        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1464
1465        /* Check to see if we resolved a queued list. If so we
1466         * need to send on the frames and tidy up.
1467         */
1468        found = false;
1469        spin_lock_bh(&mfc_unres_lock);
1470        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1471                uc = (struct mfc6_cache *)_uc;
1472                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1473                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1474                        list_del(&_uc->list);
1475                        atomic_dec(&mrt->cache_resolve_queue_len);
1476                        found = true;
1477                        break;
1478                }
1479        }
1480        if (list_empty(&mrt->mfc_unres_queue))
1481                del_timer(&mrt->ipmr_expire_timer);
1482        spin_unlock_bh(&mfc_unres_lock);
1483
1484        if (found) {
1485                ip6mr_cache_resolve(net, mrt, uc, c);
1486                ip6mr_cache_free(uc);
1487        }
1488        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1489                                       c, mrt->id);
1490        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1491        return 0;
1492}
1493
1494/*
1495 *      Close the multicast socket, and clear the vif tables etc
1496 */
1497
1498static void mroute_clean_tables(struct mr_table *mrt, int flags)
1499{
1500        struct mr_mfc *c, *tmp;
1501        LIST_HEAD(list);
1502        int i;
1503
1504        /* Shut down all active vif entries */
1505        if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1506                for (i = 0; i < mrt->maxvif; i++) {
1507                        if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1508                             !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1509                            (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1510                                continue;
1511                        mif6_delete(mrt, i, 0, &list);
1512                }
1513                unregister_netdevice_many(&list);
1514        }
1515
1516        /* Wipe the cache */
1517        if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1518                list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1519                        if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1520                            (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1521                                continue;
1522                        rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1523                        list_del_rcu(&c->list);
1524                        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1525                                                       FIB_EVENT_ENTRY_DEL,
1526                                                       (struct mfc6_cache *)c, mrt->id);
1527                        mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1528                        mr_cache_put(c);
1529                }
1530        }
1531
1532        if (flags & MRT6_FLUSH_MFC) {
1533                if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1534                        spin_lock_bh(&mfc_unres_lock);
1535                        list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1536                                list_del(&c->list);
1537                                mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1538                                                  RTM_DELROUTE);
1539                                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1540                        }
1541                        spin_unlock_bh(&mfc_unres_lock);
1542                }
1543        }
1544}
1545
1546static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1547{
1548        int err = 0;
1549        struct net *net = sock_net(sk);
1550
1551        rtnl_lock();
1552        write_lock_bh(&mrt_lock);
1553        if (rtnl_dereference(mrt->mroute_sk)) {
1554                err = -EADDRINUSE;
1555        } else {
1556                rcu_assign_pointer(mrt->mroute_sk, sk);
1557                sock_set_flag(sk, SOCK_RCU_FREE);
1558                net->ipv6.devconf_all->mc_forwarding++;
1559        }
1560        write_unlock_bh(&mrt_lock);
1561
1562        if (!err)
1563                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1564                                             NETCONFA_MC_FORWARDING,
1565                                             NETCONFA_IFINDEX_ALL,
1566                                             net->ipv6.devconf_all);
1567        rtnl_unlock();
1568
1569        return err;
1570}
1571
1572int ip6mr_sk_done(struct sock *sk)
1573{
1574        int err = -EACCES;
1575        struct net *net = sock_net(sk);
1576        struct mr_table *mrt;
1577
1578        if (sk->sk_type != SOCK_RAW ||
1579            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1580                return err;
1581
1582        rtnl_lock();
1583        ip6mr_for_each_table(mrt, net) {
1584                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1585                        write_lock_bh(&mrt_lock);
1586                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1587                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1588                         * so the RCU grace period before sk freeing
1589                         * is guaranteed by sk_destruct()
1590                         */
1591                        net->ipv6.devconf_all->mc_forwarding--;
1592                        write_unlock_bh(&mrt_lock);
1593                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1594                                                     NETCONFA_MC_FORWARDING,
1595                                                     NETCONFA_IFINDEX_ALL,
1596                                                     net->ipv6.devconf_all);
1597
1598                        mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1599                        err = 0;
1600                        break;
1601                }
1602        }
1603        rtnl_unlock();
1604
1605        return err;
1606}
1607
1608bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1609{
1610        struct mr_table *mrt;
1611        struct flowi6 fl6 = {
1612                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1613                .flowi6_oif     = skb->dev->ifindex,
1614                .flowi6_mark    = skb->mark,
1615        };
1616
1617        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1618                return NULL;
1619
1620        return rcu_access_pointer(mrt->mroute_sk);
1621}
1622EXPORT_SYMBOL(mroute6_is_socket);
1623
1624/*
1625 *      Socket options and virtual interface manipulation. The whole
1626 *      virtual interface system is a complete heap, but unfortunately
1627 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1628 *      MOSPF/PIM router set up we can clean this up.
1629 */
1630
1631int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1632                          unsigned int optlen)
1633{
1634        int ret, parent = 0;
1635        struct mif6ctl vif;
1636        struct mf6cctl mfc;
1637        mifi_t mifi;
1638        struct net *net = sock_net(sk);
1639        struct mr_table *mrt;
1640
1641        if (sk->sk_type != SOCK_RAW ||
1642            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1643                return -EOPNOTSUPP;
1644
1645        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1646        if (!mrt)
1647                return -ENOENT;
1648
1649        if (optname != MRT6_INIT) {
1650                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1651                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1652                        return -EACCES;
1653        }
1654
1655        switch (optname) {
1656        case MRT6_INIT:
1657                if (optlen < sizeof(int))
1658                        return -EINVAL;
1659
1660                return ip6mr_sk_init(mrt, sk);
1661
1662        case MRT6_DONE:
1663                return ip6mr_sk_done(sk);
1664
1665        case MRT6_ADD_MIF:
1666                if (optlen < sizeof(vif))
1667                        return -EINVAL;
1668                if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1669                        return -EFAULT;
1670                if (vif.mif6c_mifi >= MAXMIFS)
1671                        return -ENFILE;
1672                rtnl_lock();
1673                ret = mif6_add(net, mrt, &vif,
1674                               sk == rtnl_dereference(mrt->mroute_sk));
1675                rtnl_unlock();
1676                return ret;
1677
1678        case MRT6_DEL_MIF:
1679                if (optlen < sizeof(mifi_t))
1680                        return -EINVAL;
1681                if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1682                        return -EFAULT;
1683                rtnl_lock();
1684                ret = mif6_delete(mrt, mifi, 0, NULL);
1685                rtnl_unlock();
1686                return ret;
1687
1688        /*
1689         *      Manipulate the forwarding caches. These live
1690         *      in a sort of kernel/user symbiosis.
1691         */
1692        case MRT6_ADD_MFC:
1693        case MRT6_DEL_MFC:
1694                parent = -1;
1695                fallthrough;
1696        case MRT6_ADD_MFC_PROXY:
1697        case MRT6_DEL_MFC_PROXY:
1698                if (optlen < sizeof(mfc))
1699                        return -EINVAL;
1700                if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1701                        return -EFAULT;
1702                if (parent == 0)
1703                        parent = mfc.mf6cc_parent;
1704                rtnl_lock();
1705                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1706                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1707                else
1708                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1709                                            sk ==
1710                                            rtnl_dereference(mrt->mroute_sk),
1711                                            parent);
1712                rtnl_unlock();
1713                return ret;
1714
1715        case MRT6_FLUSH:
1716        {
1717                int flags;
1718
1719                if (optlen != sizeof(flags))
1720                        return -EINVAL;
1721                if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1722                        return -EFAULT;
1723                rtnl_lock();
1724                mroute_clean_tables(mrt, flags);
1725                rtnl_unlock();
1726                return 0;
1727        }
1728
1729        /*
1730         *      Control PIM assert (to activate pim will activate assert)
1731         */
1732        case MRT6_ASSERT:
1733        {
1734                int v;
1735
1736                if (optlen != sizeof(v))
1737                        return -EINVAL;
1738                if (copy_from_sockptr(&v, optval, sizeof(v)))
1739                        return -EFAULT;
1740                mrt->mroute_do_assert = v;
1741                return 0;
1742        }
1743
1744#ifdef CONFIG_IPV6_PIMSM_V2
1745        case MRT6_PIM:
1746        {
1747                int v;
1748
1749                if (optlen != sizeof(v))
1750                        return -EINVAL;
1751                if (copy_from_sockptr(&v, optval, sizeof(v)))
1752                        return -EFAULT;
1753                v = !!v;
1754                rtnl_lock();
1755                ret = 0;
1756                if (v != mrt->mroute_do_pim) {
1757                        mrt->mroute_do_pim = v;
1758                        mrt->mroute_do_assert = v;
1759                }
1760                rtnl_unlock();
1761                return ret;
1762        }
1763
1764#endif
1765#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1766        case MRT6_TABLE:
1767        {
1768                u32 v;
1769
1770                if (optlen != sizeof(u32))
1771                        return -EINVAL;
1772                if (copy_from_sockptr(&v, optval, sizeof(v)))
1773                        return -EFAULT;
1774                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1775                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1776                        return -EINVAL;
1777                if (sk == rcu_access_pointer(mrt->mroute_sk))
1778                        return -EBUSY;
1779
1780                rtnl_lock();
1781                ret = 0;
1782                mrt = ip6mr_new_table(net, v);
1783                if (IS_ERR(mrt))
1784                        ret = PTR_ERR(mrt);
1785                else
1786                        raw6_sk(sk)->ip6mr_table = v;
1787                rtnl_unlock();
1788                return ret;
1789        }
1790#endif
1791        /*
1792         *      Spurious command, or MRT6_VERSION which you cannot
1793         *      set.
1794         */
1795        default:
1796                return -ENOPROTOOPT;
1797        }
1798}
1799
1800/*
1801 *      Getsock opt support for the multicast routing system.
1802 */
1803
1804int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1805                          int __user *optlen)
1806{
1807        int olr;
1808        int val;
1809        struct net *net = sock_net(sk);
1810        struct mr_table *mrt;
1811
1812        if (sk->sk_type != SOCK_RAW ||
1813            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1814                return -EOPNOTSUPP;
1815
1816        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1817        if (!mrt)
1818                return -ENOENT;
1819
1820        switch (optname) {
1821        case MRT6_VERSION:
1822                val = 0x0305;
1823                break;
1824#ifdef CONFIG_IPV6_PIMSM_V2
1825        case MRT6_PIM:
1826                val = mrt->mroute_do_pim;
1827                break;
1828#endif
1829        case MRT6_ASSERT:
1830                val = mrt->mroute_do_assert;
1831                break;
1832        default:
1833                return -ENOPROTOOPT;
1834        }
1835
1836        if (get_user(olr, optlen))
1837                return -EFAULT;
1838
1839        olr = min_t(int, olr, sizeof(int));
1840        if (olr < 0)
1841                return -EINVAL;
1842
1843        if (put_user(olr, optlen))
1844                return -EFAULT;
1845        if (copy_to_user(optval, &val, olr))
1846                return -EFAULT;
1847        return 0;
1848}
1849
1850/*
1851 *      The IP multicast ioctl support routines.
1852 */
1853
1854int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1855{
1856        struct sioc_sg_req6 sr;
1857        struct sioc_mif_req6 vr;
1858        struct vif_device *vif;
1859        struct mfc6_cache *c;
1860        struct net *net = sock_net(sk);
1861        struct mr_table *mrt;
1862
1863        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864        if (!mrt)
1865                return -ENOENT;
1866
1867        switch (cmd) {
1868        case SIOCGETMIFCNT_IN6:
1869                if (copy_from_user(&vr, arg, sizeof(vr)))
1870                        return -EFAULT;
1871                if (vr.mifi >= mrt->maxvif)
1872                        return -EINVAL;
1873                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1874                read_lock(&mrt_lock);
1875                vif = &mrt->vif_table[vr.mifi];
1876                if (VIF_EXISTS(mrt, vr.mifi)) {
1877                        vr.icount = vif->pkt_in;
1878                        vr.ocount = vif->pkt_out;
1879                        vr.ibytes = vif->bytes_in;
1880                        vr.obytes = vif->bytes_out;
1881                        read_unlock(&mrt_lock);
1882
1883                        if (copy_to_user(arg, &vr, sizeof(vr)))
1884                                return -EFAULT;
1885                        return 0;
1886                }
1887                read_unlock(&mrt_lock);
1888                return -EADDRNOTAVAIL;
1889        case SIOCGETSGCNT_IN6:
1890                if (copy_from_user(&sr, arg, sizeof(sr)))
1891                        return -EFAULT;
1892
1893                rcu_read_lock();
1894                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1895                if (c) {
1896                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1897                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1898                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1899                        rcu_read_unlock();
1900
1901                        if (copy_to_user(arg, &sr, sizeof(sr)))
1902                                return -EFAULT;
1903                        return 0;
1904                }
1905                rcu_read_unlock();
1906                return -EADDRNOTAVAIL;
1907        default:
1908                return -ENOIOCTLCMD;
1909        }
1910}
1911
1912#ifdef CONFIG_COMPAT
1913struct compat_sioc_sg_req6 {
1914        struct sockaddr_in6 src;
1915        struct sockaddr_in6 grp;
1916        compat_ulong_t pktcnt;
1917        compat_ulong_t bytecnt;
1918        compat_ulong_t wrong_if;
1919};
1920
1921struct compat_sioc_mif_req6 {
1922        mifi_t  mifi;
1923        compat_ulong_t icount;
1924        compat_ulong_t ocount;
1925        compat_ulong_t ibytes;
1926        compat_ulong_t obytes;
1927};
1928
1929int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1930{
1931        struct compat_sioc_sg_req6 sr;
1932        struct compat_sioc_mif_req6 vr;
1933        struct vif_device *vif;
1934        struct mfc6_cache *c;
1935        struct net *net = sock_net(sk);
1936        struct mr_table *mrt;
1937
1938        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1939        if (!mrt)
1940                return -ENOENT;
1941
1942        switch (cmd) {
1943        case SIOCGETMIFCNT_IN6:
1944                if (copy_from_user(&vr, arg, sizeof(vr)))
1945                        return -EFAULT;
1946                if (vr.mifi >= mrt->maxvif)
1947                        return -EINVAL;
1948                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1949                read_lock(&mrt_lock);
1950                vif = &mrt->vif_table[vr.mifi];
1951                if (VIF_EXISTS(mrt, vr.mifi)) {
1952                        vr.icount = vif->pkt_in;
1953                        vr.ocount = vif->pkt_out;
1954                        vr.ibytes = vif->bytes_in;
1955                        vr.obytes = vif->bytes_out;
1956                        read_unlock(&mrt_lock);
1957
1958                        if (copy_to_user(arg, &vr, sizeof(vr)))
1959                                return -EFAULT;
1960                        return 0;
1961                }
1962                read_unlock(&mrt_lock);
1963                return -EADDRNOTAVAIL;
1964        case SIOCGETSGCNT_IN6:
1965                if (copy_from_user(&sr, arg, sizeof(sr)))
1966                        return -EFAULT;
1967
1968                rcu_read_lock();
1969                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1970                if (c) {
1971                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1972                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1973                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1974                        rcu_read_unlock();
1975
1976                        if (copy_to_user(arg, &sr, sizeof(sr)))
1977                                return -EFAULT;
1978                        return 0;
1979                }
1980                rcu_read_unlock();
1981                return -EADDRNOTAVAIL;
1982        default:
1983                return -ENOIOCTLCMD;
1984        }
1985}
1986#endif
1987
1988static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1989{
1990        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1991                      IPSTATS_MIB_OUTFORWDATAGRAMS);
1992        IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1993                      IPSTATS_MIB_OUTOCTETS, skb->len);
1994        return dst_output(net, sk, skb);
1995}
1996
1997/*
1998 *      Processing handlers for ip6mr_forward
1999 */
2000
2001static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2002                          struct sk_buff *skb, int vifi)
2003{
2004        struct ipv6hdr *ipv6h;
2005        struct vif_device *vif = &mrt->vif_table[vifi];
2006        struct net_device *dev;
2007        struct dst_entry *dst;
2008        struct flowi6 fl6;
2009
2010        if (!vif->dev)
2011                goto out_free;
2012
2013#ifdef CONFIG_IPV6_PIMSM_V2
2014        if (vif->flags & MIFF_REGISTER) {
2015                vif->pkt_out++;
2016                vif->bytes_out += skb->len;
2017                vif->dev->stats.tx_bytes += skb->len;
2018                vif->dev->stats.tx_packets++;
2019                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2020                goto out_free;
2021        }
2022#endif
2023
2024        ipv6h = ipv6_hdr(skb);
2025
2026        fl6 = (struct flowi6) {
2027                .flowi6_oif = vif->link,
2028                .daddr = ipv6h->daddr,
2029        };
2030
2031        dst = ip6_route_output(net, NULL, &fl6);
2032        if (dst->error) {
2033                dst_release(dst);
2034                goto out_free;
2035        }
2036
2037        skb_dst_drop(skb);
2038        skb_dst_set(skb, dst);
2039
2040        /*
2041         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2042         * not only before forwarding, but after forwarding on all output
2043         * interfaces. It is clear, if mrouter runs a multicasting
2044         * program, it should receive packets not depending to what interface
2045         * program is joined.
2046         * If we will not make it, the program will have to join on all
2047         * interfaces. On the other hand, multihoming host (or router, but
2048         * not mrouter) cannot join to more than one interface - it will
2049         * result in receiving multiple packets.
2050         */
2051        dev = vif->dev;
2052        skb->dev = dev;
2053        vif->pkt_out++;
2054        vif->bytes_out += skb->len;
2055
2056        /* We are about to write */
2057        /* XXX: extension headers? */
2058        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2059                goto out_free;
2060
2061        ipv6h = ipv6_hdr(skb);
2062        ipv6h->hop_limit--;
2063
2064        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2065
2066        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2067                       net, NULL, skb, skb->dev, dev,
2068                       ip6mr_forward2_finish);
2069
2070out_free:
2071        kfree_skb(skb);
2072        return 0;
2073}
2074
2075static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2076{
2077        int ct;
2078
2079        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2080                if (mrt->vif_table[ct].dev == dev)
2081                        break;
2082        }
2083        return ct;
2084}
2085
2086static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2087                           struct net_device *dev, struct sk_buff *skb,
2088                           struct mfc6_cache *c)
2089{
2090        int psend = -1;
2091        int vif, ct;
2092        int true_vifi = ip6mr_find_vif(mrt, dev);
2093
2094        vif = c->_c.mfc_parent;
2095        c->_c.mfc_un.res.pkt++;
2096        c->_c.mfc_un.res.bytes += skb->len;
2097        c->_c.mfc_un.res.lastuse = jiffies;
2098
2099        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2100                struct mfc6_cache *cache_proxy;
2101
2102                /* For an (*,G) entry, we only check that the incoming
2103                 * interface is part of the static tree.
2104                 */
2105                rcu_read_lock();
2106                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2107                if (cache_proxy &&
2108                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2109                        rcu_read_unlock();
2110                        goto forward;
2111                }
2112                rcu_read_unlock();
2113        }
2114
2115        /*
2116         * Wrong interface: drop packet and (maybe) send PIM assert.
2117         */
2118        if (mrt->vif_table[vif].dev != dev) {
2119                c->_c.mfc_un.res.wrong_if++;
2120
2121                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2122                    /* pimsm uses asserts, when switching from RPT to SPT,
2123                       so that we cannot check that packet arrived on an oif.
2124                       It is bad, but otherwise we would need to move pretty
2125                       large chunk of pimd to kernel. Ough... --ANK
2126                     */
2127                    (mrt->mroute_do_pim ||
2128                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2129                    time_after(jiffies,
2130                               c->_c.mfc_un.res.last_assert +
2131                               MFC_ASSERT_THRESH)) {
2132                        c->_c.mfc_un.res.last_assert = jiffies;
2133                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2134                }
2135                goto dont_forward;
2136        }
2137
2138forward:
2139        mrt->vif_table[vif].pkt_in++;
2140        mrt->vif_table[vif].bytes_in += skb->len;
2141
2142        /*
2143         *      Forward the frame
2144         */
2145        if (ipv6_addr_any(&c->mf6c_origin) &&
2146            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2147                if (true_vifi >= 0 &&
2148                    true_vifi != c->_c.mfc_parent &&
2149                    ipv6_hdr(skb)->hop_limit >
2150                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2151                        /* It's an (*,*) entry and the packet is not coming from
2152                         * the upstream: forward the packet to the upstream
2153                         * only.
2154                         */
2155                        psend = c->_c.mfc_parent;
2156                        goto last_forward;
2157                }
2158                goto dont_forward;
2159        }
2160        for (ct = c->_c.mfc_un.res.maxvif - 1;
2161             ct >= c->_c.mfc_un.res.minvif; ct--) {
2162                /* For (*,G) entry, don't forward to the incoming interface */
2163                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2164                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2165                        if (psend != -1) {
2166                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2167                                if (skb2)
2168                                        ip6mr_forward2(net, mrt, skb2, psend);
2169                        }
2170                        psend = ct;
2171                }
2172        }
2173last_forward:
2174        if (psend != -1) {
2175                ip6mr_forward2(net, mrt, skb, psend);
2176                return;
2177        }
2178
2179dont_forward:
2180        kfree_skb(skb);
2181}
2182
2183
2184/*
2185 *      Multicast packets for forwarding arrive here
2186 */
2187
2188int ip6_mr_input(struct sk_buff *skb)
2189{
2190        struct mfc6_cache *cache;
2191        struct net *net = dev_net(skb->dev);
2192        struct mr_table *mrt;
2193        struct flowi6 fl6 = {
2194                .flowi6_iif     = skb->dev->ifindex,
2195                .flowi6_mark    = skb->mark,
2196        };
2197        int err;
2198        struct net_device *dev;
2199
2200        /* skb->dev passed in is the master dev for vrfs.
2201         * Get the proper interface that does have a vif associated with it.
2202         */
2203        dev = skb->dev;
2204        if (netif_is_l3_master(skb->dev)) {
2205                dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2206                if (!dev) {
2207                        kfree_skb(skb);
2208                        return -ENODEV;
2209                }
2210        }
2211
2212        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2213        if (err < 0) {
2214                kfree_skb(skb);
2215                return err;
2216        }
2217
2218        read_lock(&mrt_lock);
2219        cache = ip6mr_cache_find(mrt,
2220                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2221        if (!cache) {
2222                int vif = ip6mr_find_vif(mrt, dev);
2223
2224                if (vif >= 0)
2225                        cache = ip6mr_cache_find_any(mrt,
2226                                                     &ipv6_hdr(skb)->daddr,
2227                                                     vif);
2228        }
2229
2230        /*
2231         *      No usable cache entry
2232         */
2233        if (!cache) {
2234                int vif;
2235
2236                vif = ip6mr_find_vif(mrt, dev);
2237                if (vif >= 0) {
2238                        int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2239                        read_unlock(&mrt_lock);
2240
2241                        return err;
2242                }
2243                read_unlock(&mrt_lock);
2244                kfree_skb(skb);
2245                return -ENODEV;
2246        }
2247
2248        ip6_mr_forward(net, mrt, dev, skb, cache);
2249
2250        read_unlock(&mrt_lock);
2251
2252        return 0;
2253}
2254
2255int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2256                    u32 portid)
2257{
2258        int err;
2259        struct mr_table *mrt;
2260        struct mfc6_cache *cache;
2261        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2262
2263        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2264        if (!mrt)
2265                return -ENOENT;
2266
2267        read_lock(&mrt_lock);
2268        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2269        if (!cache && skb->dev) {
2270                int vif = ip6mr_find_vif(mrt, skb->dev);
2271
2272                if (vif >= 0)
2273                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2274                                                     vif);
2275        }
2276
2277        if (!cache) {
2278                struct sk_buff *skb2;
2279                struct ipv6hdr *iph;
2280                struct net_device *dev;
2281                int vif;
2282
2283                dev = skb->dev;
2284                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2285                        read_unlock(&mrt_lock);
2286                        return -ENODEV;
2287                }
2288
2289                /* really correct? */
2290                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2291                if (!skb2) {
2292                        read_unlock(&mrt_lock);
2293                        return -ENOMEM;
2294                }
2295
2296                NETLINK_CB(skb2).portid = portid;
2297                skb_reset_transport_header(skb2);
2298
2299                skb_put(skb2, sizeof(struct ipv6hdr));
2300                skb_reset_network_header(skb2);
2301
2302                iph = ipv6_hdr(skb2);
2303                iph->version = 0;
2304                iph->priority = 0;
2305                iph->flow_lbl[0] = 0;
2306                iph->flow_lbl[1] = 0;
2307                iph->flow_lbl[2] = 0;
2308                iph->payload_len = 0;
2309                iph->nexthdr = IPPROTO_NONE;
2310                iph->hop_limit = 0;
2311                iph->saddr = rt->rt6i_src.addr;
2312                iph->daddr = rt->rt6i_dst.addr;
2313
2314                err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2315                read_unlock(&mrt_lock);
2316
2317                return err;
2318        }
2319
2320        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2321        read_unlock(&mrt_lock);
2322        return err;
2323}
2324
2325static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2326                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2327                             int flags)
2328{
2329        struct nlmsghdr *nlh;
2330        struct rtmsg *rtm;
2331        int err;
2332
2333        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2334        if (!nlh)
2335                return -EMSGSIZE;
2336
2337        rtm = nlmsg_data(nlh);
2338        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2339        rtm->rtm_dst_len  = 128;
2340        rtm->rtm_src_len  = 128;
2341        rtm->rtm_tos      = 0;
2342        rtm->rtm_table    = mrt->id;
2343        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2344                goto nla_put_failure;
2345        rtm->rtm_type = RTN_MULTICAST;
2346        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2347        if (c->_c.mfc_flags & MFC_STATIC)
2348                rtm->rtm_protocol = RTPROT_STATIC;
2349        else
2350                rtm->rtm_protocol = RTPROT_MROUTED;
2351        rtm->rtm_flags    = 0;
2352
2353        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2354            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2355                goto nla_put_failure;
2356        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2357        /* do not break the dump if cache is unresolved */
2358        if (err < 0 && err != -ENOENT)
2359                goto nla_put_failure;
2360
2361        nlmsg_end(skb, nlh);
2362        return 0;
2363
2364nla_put_failure:
2365        nlmsg_cancel(skb, nlh);
2366        return -EMSGSIZE;
2367}
2368
2369static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2370                              u32 portid, u32 seq, struct mr_mfc *c,
2371                              int cmd, int flags)
2372{
2373        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2374                                 cmd, flags);
2375}
2376
2377static int mr6_msgsize(bool unresolved, int maxvif)
2378{
2379        size_t len =
2380                NLMSG_ALIGN(sizeof(struct rtmsg))
2381                + nla_total_size(4)     /* RTA_TABLE */
2382                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2383                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2384                ;
2385
2386        if (!unresolved)
2387                len = len
2388                      + nla_total_size(4)       /* RTA_IIF */
2389                      + nla_total_size(0)       /* RTA_MULTIPATH */
2390                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2391                                                /* RTA_MFC_STATS */
2392                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2393                ;
2394
2395        return len;
2396}
2397
2398static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2399                              int cmd)
2400{
2401        struct net *net = read_pnet(&mrt->net);
2402        struct sk_buff *skb;
2403        int err = -ENOBUFS;
2404
2405        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2406                        GFP_ATOMIC);
2407        if (!skb)
2408                goto errout;
2409
2410        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2411        if (err < 0)
2412                goto errout;
2413
2414        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2415        return;
2416
2417errout:
2418        kfree_skb(skb);
2419        if (err < 0)
2420                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2421}
2422
2423static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2424{
2425        size_t len =
2426                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2427                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2428                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2429                                        /* IP6MRA_CREPORT_SRC_ADDR */
2430                + nla_total_size(sizeof(struct in6_addr))
2431                                        /* IP6MRA_CREPORT_DST_ADDR */
2432                + nla_total_size(sizeof(struct in6_addr))
2433                                        /* IP6MRA_CREPORT_PKT */
2434                + nla_total_size(payloadlen)
2435                ;
2436
2437        return len;
2438}
2439
2440static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2441{
2442        struct net *net = read_pnet(&mrt->net);
2443        struct nlmsghdr *nlh;
2444        struct rtgenmsg *rtgenm;
2445        struct mrt6msg *msg;
2446        struct sk_buff *skb;
2447        struct nlattr *nla;
2448        int payloadlen;
2449
2450        payloadlen = pkt->len - sizeof(struct mrt6msg);
2451        msg = (struct mrt6msg *)skb_transport_header(pkt);
2452
2453        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2454        if (!skb)
2455                goto errout;
2456
2457        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2458                        sizeof(struct rtgenmsg), 0);
2459        if (!nlh)
2460                goto errout;
2461        rtgenm = nlmsg_data(nlh);
2462        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2463        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2464            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2465            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2466                             &msg->im6_src) ||
2467            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2468                             &msg->im6_dst))
2469                goto nla_put_failure;
2470
2471        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2472        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2473                                  nla_data(nla), payloadlen))
2474                goto nla_put_failure;
2475
2476        nlmsg_end(skb, nlh);
2477
2478        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2479        return;
2480
2481nla_put_failure:
2482        nlmsg_cancel(skb, nlh);
2483errout:
2484        kfree_skb(skb);
2485        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2486}
2487
2488static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2489{
2490        const struct nlmsghdr *nlh = cb->nlh;
2491        struct fib_dump_filter filter = {};
2492        int err;
2493
2494        if (cb->strict_check) {
2495                err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2496                                            &filter, cb);
2497                if (err < 0)
2498                        return err;
2499        }
2500
2501        if (filter.table_id) {
2502                struct mr_table *mrt;
2503
2504                mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2505                if (!mrt) {
2506                        if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2507                                return skb->len;
2508
2509                        NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2510                        return -ENOENT;
2511                }
2512                err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2513                                    &mfc_unres_lock, &filter);
2514                return skb->len ? : err;
2515        }
2516
2517        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2518                                _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2519}
2520