linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Linux IPv6 multicast routing support for BSD pim6sd
   4 *      Based on net/ipv4/ipmr.c.
   5 *
   6 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   7 *              LSIIT Laboratory, Strasbourg, France
   8 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   9 *              6WIND, Paris, France
  10 *      Copyright (C)2007,2008 USAGI/WIDE Project
  11 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  12 */
  13
  14#include <linux/uaccess.h>
  15#include <linux/types.h>
  16#include <linux/sched.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/kernel.h>
  20#include <linux/fcntl.h>
  21#include <linux/stat.h>
  22#include <linux/socket.h>
  23#include <linux/inet.h>
  24#include <linux/netdevice.h>
  25#include <linux/inetdevice.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/init.h>
  29#include <linux/compat.h>
  30#include <linux/rhashtable.h>
  31#include <net/protocol.h>
  32#include <linux/skbuff.h>
  33#include <net/raw.h>
  34#include <linux/notifier.h>
  35#include <linux/if_arp.h>
  36#include <net/checksum.h>
  37#include <net/netlink.h>
  38#include <net/fib_rules.h>
  39
  40#include <net/ipv6.h>
  41#include <net/ip6_route.h>
  42#include <linux/mroute6.h>
  43#include <linux/pim.h>
  44#include <net/addrconf.h>
  45#include <linux/netfilter_ipv6.h>
  46#include <linux/export.h>
  47#include <net/ip6_checksum.h>
  48#include <linux/netconf.h>
  49#include <net/ip_tunnels.h>
  50
  51#include <linux/nospec.h>
  52
  53struct ip6mr_rule {
  54        struct fib_rule         common;
  55};
  56
  57struct ip6mr_result {
  58        struct mr_table *mrt;
  59};
  60
  61/* Big lock, protecting vif table, mrt cache and mroute socket state.
  62   Note that the changes are semaphored via rtnl_lock.
  63 */
  64
  65static DEFINE_RWLOCK(mrt_lock);
  66
  67/* Multicast router control variables */
  68
  69/* Special spinlock for queue of unresolved entries */
  70static DEFINE_SPINLOCK(mfc_unres_lock);
  71
  72/* We return to original Alan's scheme. Hash table of resolved
  73   entries is changed only in process context and protected
  74   with weak lock mrt_lock. Queue of unresolved entries is protected
  75   with strong spinlock mfc_unres_lock.
  76
  77   In this case data path is free of exclusive locks at all.
  78 */
  79
  80static struct kmem_cache *mrt_cachep __read_mostly;
  81
  82static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  83static void ip6mr_free_table(struct mr_table *mrt);
  84
  85static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  86                           struct net_device *dev, struct sk_buff *skb,
  87                           struct mfc6_cache *cache);
  88static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  89                              mifi_t mifi, int assert);
  90static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  91                              int cmd);
  92static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  93static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  94                               struct netlink_callback *cb);
  95static void mroute_clean_tables(struct mr_table *mrt, int flags);
  96static void ipmr_expire_process(struct timer_list *t);
  97
  98#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
  99#define ip6mr_for_each_table(mrt, net) \
 100        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
 101                                lockdep_rtnl_is_held() || \
 102                                list_empty(&net->ipv6.mr6_tables))
 103
 104static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 105                                            struct mr_table *mrt)
 106{
 107        struct mr_table *ret;
 108
 109        if (!mrt)
 110                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 111                                     struct mr_table, list);
 112        else
 113                ret = list_entry_rcu(mrt->list.next,
 114                                     struct mr_table, list);
 115
 116        if (&ret->list == &net->ipv6.mr6_tables)
 117                return NULL;
 118        return ret;
 119}
 120
 121static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 122{
 123        struct mr_table *mrt;
 124
 125        ip6mr_for_each_table(mrt, net) {
 126                if (mrt->id == id)
 127                        return mrt;
 128        }
 129        return NULL;
 130}
 131
 132static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 133                            struct mr_table **mrt)
 134{
 135        int err;
 136        struct ip6mr_result res;
 137        struct fib_lookup_arg arg = {
 138                .result = &res,
 139                .flags = FIB_LOOKUP_NOREF,
 140        };
 141
 142        /* update flow if oif or iif point to device enslaved to l3mdev */
 143        l3mdev_update_flow(net, flowi6_to_flowi(flp6));
 144
 145        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 146                               flowi6_to_flowi(flp6), 0, &arg);
 147        if (err < 0)
 148                return err;
 149        *mrt = res.mrt;
 150        return 0;
 151}
 152
 153static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 154                             int flags, struct fib_lookup_arg *arg)
 155{
 156        struct ip6mr_result *res = arg->result;
 157        struct mr_table *mrt;
 158
 159        switch (rule->action) {
 160        case FR_ACT_TO_TBL:
 161                break;
 162        case FR_ACT_UNREACHABLE:
 163                return -ENETUNREACH;
 164        case FR_ACT_PROHIBIT:
 165                return -EACCES;
 166        case FR_ACT_BLACKHOLE:
 167        default:
 168                return -EINVAL;
 169        }
 170
 171        arg->table = fib_rule_get_table(rule, arg);
 172
 173        mrt = ip6mr_get_table(rule->fr_net, arg->table);
 174        if (!mrt)
 175                return -EAGAIN;
 176        res->mrt = mrt;
 177        return 0;
 178}
 179
 180static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 181{
 182        return 1;
 183}
 184
 185static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 186        FRA_GENERIC_POLICY,
 187};
 188
 189static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 190                                struct fib_rule_hdr *frh, struct nlattr **tb,
 191                                struct netlink_ext_ack *extack)
 192{
 193        return 0;
 194}
 195
 196static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 197                              struct nlattr **tb)
 198{
 199        return 1;
 200}
 201
 202static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 203                           struct fib_rule_hdr *frh)
 204{
 205        frh->dst_len = 0;
 206        frh->src_len = 0;
 207        frh->tos     = 0;
 208        return 0;
 209}
 210
 211static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 212        .family         = RTNL_FAMILY_IP6MR,
 213        .rule_size      = sizeof(struct ip6mr_rule),
 214        .addr_size      = sizeof(struct in6_addr),
 215        .action         = ip6mr_rule_action,
 216        .match          = ip6mr_rule_match,
 217        .configure      = ip6mr_rule_configure,
 218        .compare        = ip6mr_rule_compare,
 219        .fill           = ip6mr_rule_fill,
 220        .nlgroup        = RTNLGRP_IPV6_RULE,
 221        .policy         = ip6mr_rule_policy,
 222        .owner          = THIS_MODULE,
 223};
 224
 225static int __net_init ip6mr_rules_init(struct net *net)
 226{
 227        struct fib_rules_ops *ops;
 228        struct mr_table *mrt;
 229        int err;
 230
 231        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 232        if (IS_ERR(ops))
 233                return PTR_ERR(ops);
 234
 235        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 236
 237        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 238        if (IS_ERR(mrt)) {
 239                err = PTR_ERR(mrt);
 240                goto err1;
 241        }
 242
 243        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 244        if (err < 0)
 245                goto err2;
 246
 247        net->ipv6.mr6_rules_ops = ops;
 248        return 0;
 249
 250err2:
 251        ip6mr_free_table(mrt);
 252err1:
 253        fib_rules_unregister(ops);
 254        return err;
 255}
 256
 257static void __net_exit ip6mr_rules_exit(struct net *net)
 258{
 259        struct mr_table *mrt, *next;
 260
 261        rtnl_lock();
 262        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 263                list_del(&mrt->list);
 264                ip6mr_free_table(mrt);
 265        }
 266        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 267        rtnl_unlock();
 268}
 269
 270static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 271                            struct netlink_ext_ack *extack)
 272{
 273        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
 274}
 275
 276static unsigned int ip6mr_rules_seq_read(struct net *net)
 277{
 278        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 279}
 280
 281bool ip6mr_rule_default(const struct fib_rule *rule)
 282{
 283        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 284               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 285}
 286EXPORT_SYMBOL(ip6mr_rule_default);
 287#else
 288#define ip6mr_for_each_table(mrt, net) \
 289        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 290
 291static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 292                                            struct mr_table *mrt)
 293{
 294        if (!mrt)
 295                return net->ipv6.mrt6;
 296        return NULL;
 297}
 298
 299static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 300{
 301        return net->ipv6.mrt6;
 302}
 303
 304static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 305                            struct mr_table **mrt)
 306{
 307        *mrt = net->ipv6.mrt6;
 308        return 0;
 309}
 310
 311static int __net_init ip6mr_rules_init(struct net *net)
 312{
 313        struct mr_table *mrt;
 314
 315        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 316        if (IS_ERR(mrt))
 317                return PTR_ERR(mrt);
 318        net->ipv6.mrt6 = mrt;
 319        return 0;
 320}
 321
 322static void __net_exit ip6mr_rules_exit(struct net *net)
 323{
 324        rtnl_lock();
 325        ip6mr_free_table(net->ipv6.mrt6);
 326        net->ipv6.mrt6 = NULL;
 327        rtnl_unlock();
 328}
 329
 330static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 331                            struct netlink_ext_ack *extack)
 332{
 333        return 0;
 334}
 335
 336static unsigned int ip6mr_rules_seq_read(struct net *net)
 337{
 338        return 0;
 339}
 340#endif
 341
 342static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 343                          const void *ptr)
 344{
 345        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 346        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 347
 348        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 349               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 350}
 351
 352static const struct rhashtable_params ip6mr_rht_params = {
 353        .head_offset = offsetof(struct mr_mfc, mnode),
 354        .key_offset = offsetof(struct mfc6_cache, cmparg),
 355        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 356        .nelem_hint = 3,
 357        .obj_cmpfn = ip6mr_hash_cmp,
 358        .automatic_shrinking = true,
 359};
 360
 361static void ip6mr_new_table_set(struct mr_table *mrt,
 362                                struct net *net)
 363{
 364#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 365        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 366#endif
 367}
 368
 369static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 370        .mf6c_origin = IN6ADDR_ANY_INIT,
 371        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 372};
 373
 374static struct mr_table_ops ip6mr_mr_table_ops = {
 375        .rht_params = &ip6mr_rht_params,
 376        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 377};
 378
 379static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 380{
 381        struct mr_table *mrt;
 382
 383        mrt = ip6mr_get_table(net, id);
 384        if (mrt)
 385                return mrt;
 386
 387        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 388                              ipmr_expire_process, ip6mr_new_table_set);
 389}
 390
 391static void ip6mr_free_table(struct mr_table *mrt)
 392{
 393        del_timer_sync(&mrt->ipmr_expire_timer);
 394        mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
 395                                 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 396        rhltable_destroy(&mrt->mfc_hash);
 397        kfree(mrt);
 398}
 399
 400#ifdef CONFIG_PROC_FS
 401/* The /proc interfaces to multicast routing
 402 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 403 */
 404
 405static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 406        __acquires(mrt_lock)
 407{
 408        struct mr_vif_iter *iter = seq->private;
 409        struct net *net = seq_file_net(seq);
 410        struct mr_table *mrt;
 411
 412        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 413        if (!mrt)
 414                return ERR_PTR(-ENOENT);
 415
 416        iter->mrt = mrt;
 417
 418        read_lock(&mrt_lock);
 419        return mr_vif_seq_start(seq, pos);
 420}
 421
 422static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 423        __releases(mrt_lock)
 424{
 425        read_unlock(&mrt_lock);
 426}
 427
 428static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 429{
 430        struct mr_vif_iter *iter = seq->private;
 431        struct mr_table *mrt = iter->mrt;
 432
 433        if (v == SEQ_START_TOKEN) {
 434                seq_puts(seq,
 435                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 436        } else {
 437                const struct vif_device *vif = v;
 438                const char *name = vif->dev ? vif->dev->name : "none";
 439
 440                seq_printf(seq,
 441                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 442                           vif - mrt->vif_table,
 443                           name, vif->bytes_in, vif->pkt_in,
 444                           vif->bytes_out, vif->pkt_out,
 445                           vif->flags);
 446        }
 447        return 0;
 448}
 449
 450static const struct seq_operations ip6mr_vif_seq_ops = {
 451        .start = ip6mr_vif_seq_start,
 452        .next  = mr_vif_seq_next,
 453        .stop  = ip6mr_vif_seq_stop,
 454        .show  = ip6mr_vif_seq_show,
 455};
 456
 457static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 458{
 459        struct net *net = seq_file_net(seq);
 460        struct mr_table *mrt;
 461
 462        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 463        if (!mrt)
 464                return ERR_PTR(-ENOENT);
 465
 466        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 467}
 468
 469static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 470{
 471        int n;
 472
 473        if (v == SEQ_START_TOKEN) {
 474                seq_puts(seq,
 475                         "Group                            "
 476                         "Origin                           "
 477                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 478        } else {
 479                const struct mfc6_cache *mfc = v;
 480                const struct mr_mfc_iter *it = seq->private;
 481                struct mr_table *mrt = it->mrt;
 482
 483                seq_printf(seq, "%pI6 %pI6 %-3hd",
 484                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 485                           mfc->_c.mfc_parent);
 486
 487                if (it->cache != &mrt->mfc_unres_queue) {
 488                        seq_printf(seq, " %8lu %8lu %8lu",
 489                                   mfc->_c.mfc_un.res.pkt,
 490                                   mfc->_c.mfc_un.res.bytes,
 491                                   mfc->_c.mfc_un.res.wrong_if);
 492                        for (n = mfc->_c.mfc_un.res.minvif;
 493                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 494                                if (VIF_EXISTS(mrt, n) &&
 495                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 496                                        seq_printf(seq,
 497                                                   " %2d:%-3d", n,
 498                                                   mfc->_c.mfc_un.res.ttls[n]);
 499                        }
 500                } else {
 501                        /* unresolved mfc_caches don't contain
 502                         * pkt, bytes and wrong_if values
 503                         */
 504                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 505                }
 506                seq_putc(seq, '\n');
 507        }
 508        return 0;
 509}
 510
 511static const struct seq_operations ipmr_mfc_seq_ops = {
 512        .start = ipmr_mfc_seq_start,
 513        .next  = mr_mfc_seq_next,
 514        .stop  = mr_mfc_seq_stop,
 515        .show  = ipmr_mfc_seq_show,
 516};
 517#endif
 518
 519#ifdef CONFIG_IPV6_PIMSM_V2
 520
 521static int pim6_rcv(struct sk_buff *skb)
 522{
 523        struct pimreghdr *pim;
 524        struct ipv6hdr   *encap;
 525        struct net_device  *reg_dev = NULL;
 526        struct net *net = dev_net(skb->dev);
 527        struct mr_table *mrt;
 528        struct flowi6 fl6 = {
 529                .flowi6_iif     = skb->dev->ifindex,
 530                .flowi6_mark    = skb->mark,
 531        };
 532        int reg_vif_num;
 533
 534        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 535                goto drop;
 536
 537        pim = (struct pimreghdr *)skb_transport_header(skb);
 538        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 539            (pim->flags & PIM_NULL_REGISTER) ||
 540            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 541                             sizeof(*pim), IPPROTO_PIM,
 542                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 543             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 544                goto drop;
 545
 546        /* check if the inner packet is destined to mcast group */
 547        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 548                                   sizeof(*pim));
 549
 550        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 551            encap->payload_len == 0 ||
 552            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 553                goto drop;
 554
 555        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 556                goto drop;
 557        reg_vif_num = mrt->mroute_reg_vif_num;
 558
 559        read_lock(&mrt_lock);
 560        if (reg_vif_num >= 0)
 561                reg_dev = mrt->vif_table[reg_vif_num].dev;
 562        if (reg_dev)
 563                dev_hold(reg_dev);
 564        read_unlock(&mrt_lock);
 565
 566        if (!reg_dev)
 567                goto drop;
 568
 569        skb->mac_header = skb->network_header;
 570        skb_pull(skb, (u8 *)encap - skb->data);
 571        skb_reset_network_header(skb);
 572        skb->protocol = htons(ETH_P_IPV6);
 573        skb->ip_summed = CHECKSUM_NONE;
 574
 575        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 576
 577        netif_rx(skb);
 578
 579        dev_put(reg_dev);
 580        return 0;
 581 drop:
 582        kfree_skb(skb);
 583        return 0;
 584}
 585
 586static const struct inet6_protocol pim6_protocol = {
 587        .handler        =       pim6_rcv,
 588};
 589
 590/* Service routines creating virtual interfaces: PIMREG */
 591
 592static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 593                                      struct net_device *dev)
 594{
 595        struct net *net = dev_net(dev);
 596        struct mr_table *mrt;
 597        struct flowi6 fl6 = {
 598                .flowi6_oif     = dev->ifindex,
 599                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 600                .flowi6_mark    = skb->mark,
 601        };
 602
 603        if (!pskb_inet_may_pull(skb))
 604                goto tx_err;
 605
 606        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 607                goto tx_err;
 608
 609        read_lock(&mrt_lock);
 610        dev->stats.tx_bytes += skb->len;
 611        dev->stats.tx_packets++;
 612        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 613        read_unlock(&mrt_lock);
 614        kfree_skb(skb);
 615        return NETDEV_TX_OK;
 616
 617tx_err:
 618        dev->stats.tx_errors++;
 619        kfree_skb(skb);
 620        return NETDEV_TX_OK;
 621}
 622
 623static int reg_vif_get_iflink(const struct net_device *dev)
 624{
 625        return 0;
 626}
 627
 628static const struct net_device_ops reg_vif_netdev_ops = {
 629        .ndo_start_xmit = reg_vif_xmit,
 630        .ndo_get_iflink = reg_vif_get_iflink,
 631};
 632
 633static void reg_vif_setup(struct net_device *dev)
 634{
 635        dev->type               = ARPHRD_PIMREG;
 636        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 637        dev->flags              = IFF_NOARP;
 638        dev->netdev_ops         = &reg_vif_netdev_ops;
 639        dev->needs_free_netdev  = true;
 640        dev->features           |= NETIF_F_NETNS_LOCAL;
 641}
 642
 643static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 644{
 645        struct net_device *dev;
 646        char name[IFNAMSIZ];
 647
 648        if (mrt->id == RT6_TABLE_DFLT)
 649                sprintf(name, "pim6reg");
 650        else
 651                sprintf(name, "pim6reg%u", mrt->id);
 652
 653        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 654        if (!dev)
 655                return NULL;
 656
 657        dev_net_set(dev, net);
 658
 659        if (register_netdevice(dev)) {
 660                free_netdev(dev);
 661                return NULL;
 662        }
 663
 664        if (dev_open(dev, NULL))
 665                goto failure;
 666
 667        dev_hold(dev);
 668        return dev;
 669
 670failure:
 671        unregister_netdevice(dev);
 672        return NULL;
 673}
 674#endif
 675
 676static int call_ip6mr_vif_entry_notifiers(struct net *net,
 677                                          enum fib_event_type event_type,
 678                                          struct vif_device *vif,
 679                                          mifi_t vif_index, u32 tb_id)
 680{
 681        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 682                                     vif, vif_index, tb_id,
 683                                     &net->ipv6.ipmr_seq);
 684}
 685
 686static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 687                                          enum fib_event_type event_type,
 688                                          struct mfc6_cache *mfc, u32 tb_id)
 689{
 690        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 691                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 692}
 693
 694/* Delete a VIF entry */
 695static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 696                       struct list_head *head)
 697{
 698        struct vif_device *v;
 699        struct net_device *dev;
 700        struct inet6_dev *in6_dev;
 701
 702        if (vifi < 0 || vifi >= mrt->maxvif)
 703                return -EADDRNOTAVAIL;
 704
 705        v = &mrt->vif_table[vifi];
 706
 707        if (VIF_EXISTS(mrt, vifi))
 708                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 709                                               FIB_EVENT_VIF_DEL, v, vifi,
 710                                               mrt->id);
 711
 712        write_lock_bh(&mrt_lock);
 713        dev = v->dev;
 714        v->dev = NULL;
 715
 716        if (!dev) {
 717                write_unlock_bh(&mrt_lock);
 718                return -EADDRNOTAVAIL;
 719        }
 720
 721#ifdef CONFIG_IPV6_PIMSM_V2
 722        if (vifi == mrt->mroute_reg_vif_num)
 723                mrt->mroute_reg_vif_num = -1;
 724#endif
 725
 726        if (vifi + 1 == mrt->maxvif) {
 727                int tmp;
 728                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 729                        if (VIF_EXISTS(mrt, tmp))
 730                                break;
 731                }
 732                mrt->maxvif = tmp + 1;
 733        }
 734
 735        write_unlock_bh(&mrt_lock);
 736
 737        dev_set_allmulti(dev, -1);
 738
 739        in6_dev = __in6_dev_get(dev);
 740        if (in6_dev) {
 741                in6_dev->cnf.mc_forwarding--;
 742                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 743                                             NETCONFA_MC_FORWARDING,
 744                                             dev->ifindex, &in6_dev->cnf);
 745        }
 746
 747        if ((v->flags & MIFF_REGISTER) && !notify)
 748                unregister_netdevice_queue(dev, head);
 749
 750        dev_put(dev);
 751        return 0;
 752}
 753
 754static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 755{
 756        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 757
 758        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 759}
 760
 761static inline void ip6mr_cache_free(struct mfc6_cache *c)
 762{
 763        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 764}
 765
 766/* Destroy an unresolved cache entry, killing queued skbs
 767   and reporting error to netlink readers.
 768 */
 769
 770static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 771{
 772        struct net *net = read_pnet(&mrt->net);
 773        struct sk_buff *skb;
 774
 775        atomic_dec(&mrt->cache_resolve_queue_len);
 776
 777        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 778                if (ipv6_hdr(skb)->version == 0) {
 779                        struct nlmsghdr *nlh = skb_pull(skb,
 780                                                        sizeof(struct ipv6hdr));
 781                        nlh->nlmsg_type = NLMSG_ERROR;
 782                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 783                        skb_trim(skb, nlh->nlmsg_len);
 784                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 785                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 786                } else
 787                        kfree_skb(skb);
 788        }
 789
 790        ip6mr_cache_free(c);
 791}
 792
 793
 794/* Timer process for all the unresolved queue. */
 795
 796static void ipmr_do_expire_process(struct mr_table *mrt)
 797{
 798        unsigned long now = jiffies;
 799        unsigned long expires = 10 * HZ;
 800        struct mr_mfc *c, *next;
 801
 802        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 803                if (time_after(c->mfc_un.unres.expires, now)) {
 804                        /* not yet... */
 805                        unsigned long interval = c->mfc_un.unres.expires - now;
 806                        if (interval < expires)
 807                                expires = interval;
 808                        continue;
 809                }
 810
 811                list_del(&c->list);
 812                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 813                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 814        }
 815
 816        if (!list_empty(&mrt->mfc_unres_queue))
 817                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 818}
 819
 820static void ipmr_expire_process(struct timer_list *t)
 821{
 822        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 823
 824        if (!spin_trylock(&mfc_unres_lock)) {
 825                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 826                return;
 827        }
 828
 829        if (!list_empty(&mrt->mfc_unres_queue))
 830                ipmr_do_expire_process(mrt);
 831
 832        spin_unlock(&mfc_unres_lock);
 833}
 834
 835/* Fill oifs list. It is called under write locked mrt_lock. */
 836
 837static void ip6mr_update_thresholds(struct mr_table *mrt,
 838                                    struct mr_mfc *cache,
 839                                    unsigned char *ttls)
 840{
 841        int vifi;
 842
 843        cache->mfc_un.res.minvif = MAXMIFS;
 844        cache->mfc_un.res.maxvif = 0;
 845        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 846
 847        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 848                if (VIF_EXISTS(mrt, vifi) &&
 849                    ttls[vifi] && ttls[vifi] < 255) {
 850                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 851                        if (cache->mfc_un.res.minvif > vifi)
 852                                cache->mfc_un.res.minvif = vifi;
 853                        if (cache->mfc_un.res.maxvif <= vifi)
 854                                cache->mfc_un.res.maxvif = vifi + 1;
 855                }
 856        }
 857        cache->mfc_un.res.lastuse = jiffies;
 858}
 859
 860static int mif6_add(struct net *net, struct mr_table *mrt,
 861                    struct mif6ctl *vifc, int mrtsock)
 862{
 863        int vifi = vifc->mif6c_mifi;
 864        struct vif_device *v = &mrt->vif_table[vifi];
 865        struct net_device *dev;
 866        struct inet6_dev *in6_dev;
 867        int err;
 868
 869        /* Is vif busy ? */
 870        if (VIF_EXISTS(mrt, vifi))
 871                return -EADDRINUSE;
 872
 873        switch (vifc->mif6c_flags) {
 874#ifdef CONFIG_IPV6_PIMSM_V2
 875        case MIFF_REGISTER:
 876                /*
 877                 * Special Purpose VIF in PIM
 878                 * All the packets will be sent to the daemon
 879                 */
 880                if (mrt->mroute_reg_vif_num >= 0)
 881                        return -EADDRINUSE;
 882                dev = ip6mr_reg_vif(net, mrt);
 883                if (!dev)
 884                        return -ENOBUFS;
 885                err = dev_set_allmulti(dev, 1);
 886                if (err) {
 887                        unregister_netdevice(dev);
 888                        dev_put(dev);
 889                        return err;
 890                }
 891                break;
 892#endif
 893        case 0:
 894                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 895                if (!dev)
 896                        return -EADDRNOTAVAIL;
 897                err = dev_set_allmulti(dev, 1);
 898                if (err) {
 899                        dev_put(dev);
 900                        return err;
 901                }
 902                break;
 903        default:
 904                return -EINVAL;
 905        }
 906
 907        in6_dev = __in6_dev_get(dev);
 908        if (in6_dev) {
 909                in6_dev->cnf.mc_forwarding++;
 910                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 911                                             NETCONFA_MC_FORWARDING,
 912                                             dev->ifindex, &in6_dev->cnf);
 913        }
 914
 915        /* Fill in the VIF structures */
 916        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 917                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 918                        MIFF_REGISTER);
 919
 920        /* And finish update writing critical data */
 921        write_lock_bh(&mrt_lock);
 922        v->dev = dev;
 923#ifdef CONFIG_IPV6_PIMSM_V2
 924        if (v->flags & MIFF_REGISTER)
 925                mrt->mroute_reg_vif_num = vifi;
 926#endif
 927        if (vifi + 1 > mrt->maxvif)
 928                mrt->maxvif = vifi + 1;
 929        write_unlock_bh(&mrt_lock);
 930        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 931                                       v, vifi, mrt->id);
 932        return 0;
 933}
 934
 935static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 936                                           const struct in6_addr *origin,
 937                                           const struct in6_addr *mcastgrp)
 938{
 939        struct mfc6_cache_cmp_arg arg = {
 940                .mf6c_origin = *origin,
 941                .mf6c_mcastgrp = *mcastgrp,
 942        };
 943
 944        return mr_mfc_find(mrt, &arg);
 945}
 946
 947/* Look for a (*,G) entry */
 948static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 949                                               struct in6_addr *mcastgrp,
 950                                               mifi_t mifi)
 951{
 952        struct mfc6_cache_cmp_arg arg = {
 953                .mf6c_origin = in6addr_any,
 954                .mf6c_mcastgrp = *mcastgrp,
 955        };
 956
 957        if (ipv6_addr_any(mcastgrp))
 958                return mr_mfc_find_any_parent(mrt, mifi);
 959        return mr_mfc_find_any(mrt, mifi, &arg);
 960}
 961
 962/* Look for a (S,G,iif) entry if parent != -1 */
 963static struct mfc6_cache *
 964ip6mr_cache_find_parent(struct mr_table *mrt,
 965                        const struct in6_addr *origin,
 966                        const struct in6_addr *mcastgrp,
 967                        int parent)
 968{
 969        struct mfc6_cache_cmp_arg arg = {
 970                .mf6c_origin = *origin,
 971                .mf6c_mcastgrp = *mcastgrp,
 972        };
 973
 974        return mr_mfc_find_parent(mrt, &arg, parent);
 975}
 976
 977/* Allocate a multicast cache entry */
 978static struct mfc6_cache *ip6mr_cache_alloc(void)
 979{
 980        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 981        if (!c)
 982                return NULL;
 983        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 984        c->_c.mfc_un.res.minvif = MAXMIFS;
 985        c->_c.free = ip6mr_cache_free_rcu;
 986        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 987        return c;
 988}
 989
 990static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 991{
 992        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 993        if (!c)
 994                return NULL;
 995        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 996        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 997        return c;
 998}
 999
1000/*
1001 *      A cache entry has gone into a resolved state from queued
1002 */
1003
1004static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1005                                struct mfc6_cache *uc, struct mfc6_cache *c)
1006{
1007        struct sk_buff *skb;
1008
1009        /*
1010         *      Play the pending entries through our router
1011         */
1012
1013        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1014                if (ipv6_hdr(skb)->version == 0) {
1015                        struct nlmsghdr *nlh = skb_pull(skb,
1016                                                        sizeof(struct ipv6hdr));
1017
1018                        if (mr_fill_mroute(mrt, skb, &c->_c,
1019                                           nlmsg_data(nlh)) > 0) {
1020                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1021                        } else {
1022                                nlh->nlmsg_type = NLMSG_ERROR;
1023                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1024                                skb_trim(skb, nlh->nlmsg_len);
1025                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1026                        }
1027                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1028                } else
1029                        ip6_mr_forward(net, mrt, skb->dev, skb, c);
1030        }
1031}
1032
1033/*
1034 *      Bounce a cache query up to pim6sd and netlink.
1035 *
1036 *      Called under mrt_lock.
1037 */
1038
1039static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1040                              mifi_t mifi, int assert)
1041{
1042        struct sock *mroute6_sk;
1043        struct sk_buff *skb;
1044        struct mrt6msg *msg;
1045        int ret;
1046
1047#ifdef CONFIG_IPV6_PIMSM_V2
1048        if (assert == MRT6MSG_WHOLEPKT)
1049                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1050                                                +sizeof(*msg));
1051        else
1052#endif
1053                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1054
1055        if (!skb)
1056                return -ENOBUFS;
1057
1058        /* I suppose that internal messages
1059         * do not require checksums */
1060
1061        skb->ip_summed = CHECKSUM_UNNECESSARY;
1062
1063#ifdef CONFIG_IPV6_PIMSM_V2
1064        if (assert == MRT6MSG_WHOLEPKT) {
1065                /* Ugly, but we have no choice with this interface.
1066                   Duplicate old header, fix length etc.
1067                   And all this only to mangle msg->im6_msgtype and
1068                   to set msg->im6_mbz to "mbz" :-)
1069                 */
1070                skb_push(skb, -skb_network_offset(pkt));
1071
1072                skb_push(skb, sizeof(*msg));
1073                skb_reset_transport_header(skb);
1074                msg = (struct mrt6msg *)skb_transport_header(skb);
1075                msg->im6_mbz = 0;
1076                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1077                msg->im6_mif = mrt->mroute_reg_vif_num;
1078                msg->im6_pad = 0;
1079                msg->im6_src = ipv6_hdr(pkt)->saddr;
1080                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1081
1082                skb->ip_summed = CHECKSUM_UNNECESSARY;
1083        } else
1084#endif
1085        {
1086        /*
1087         *      Copy the IP header
1088         */
1089
1090        skb_put(skb, sizeof(struct ipv6hdr));
1091        skb_reset_network_header(skb);
1092        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1093
1094        /*
1095         *      Add our header
1096         */
1097        skb_put(skb, sizeof(*msg));
1098        skb_reset_transport_header(skb);
1099        msg = (struct mrt6msg *)skb_transport_header(skb);
1100
1101        msg->im6_mbz = 0;
1102        msg->im6_msgtype = assert;
1103        msg->im6_mif = mifi;
1104        msg->im6_pad = 0;
1105        msg->im6_src = ipv6_hdr(pkt)->saddr;
1106        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1107
1108        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1109        skb->ip_summed = CHECKSUM_UNNECESSARY;
1110        }
1111
1112        rcu_read_lock();
1113        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1114        if (!mroute6_sk) {
1115                rcu_read_unlock();
1116                kfree_skb(skb);
1117                return -EINVAL;
1118        }
1119
1120        mrt6msg_netlink_event(mrt, skb);
1121
1122        /* Deliver to user space multicast routing algorithms */
1123        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1124        rcu_read_unlock();
1125        if (ret < 0) {
1126                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1127                kfree_skb(skb);
1128        }
1129
1130        return ret;
1131}
1132
1133/* Queue a packet for resolution. It gets locked cache entry! */
1134static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1135                                  struct sk_buff *skb, struct net_device *dev)
1136{
1137        struct mfc6_cache *c;
1138        bool found = false;
1139        int err;
1140
1141        spin_lock_bh(&mfc_unres_lock);
1142        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1143                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1144                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1145                        found = true;
1146                        break;
1147                }
1148        }
1149
1150        if (!found) {
1151                /*
1152                 *      Create a new entry if allowable
1153                 */
1154
1155                c = ip6mr_cache_alloc_unres();
1156                if (!c) {
1157                        spin_unlock_bh(&mfc_unres_lock);
1158
1159                        kfree_skb(skb);
1160                        return -ENOBUFS;
1161                }
1162
1163                /* Fill in the new cache entry */
1164                c->_c.mfc_parent = -1;
1165                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1166                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1167
1168                /*
1169                 *      Reflect first query at pim6sd
1170                 */
1171                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1172                if (err < 0) {
1173                        /* If the report failed throw the cache entry
1174                           out - Brad Parker
1175                         */
1176                        spin_unlock_bh(&mfc_unres_lock);
1177
1178                        ip6mr_cache_free(c);
1179                        kfree_skb(skb);
1180                        return err;
1181                }
1182
1183                atomic_inc(&mrt->cache_resolve_queue_len);
1184                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1185                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1186
1187                ipmr_do_expire_process(mrt);
1188        }
1189
1190        /* See if we can append the packet */
1191        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1192                kfree_skb(skb);
1193                err = -ENOBUFS;
1194        } else {
1195                if (dev) {
1196                        skb->dev = dev;
1197                        skb->skb_iif = dev->ifindex;
1198                }
1199                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1200                err = 0;
1201        }
1202
1203        spin_unlock_bh(&mfc_unres_lock);
1204        return err;
1205}
1206
1207/*
1208 *      MFC6 cache manipulation by user space
1209 */
1210
1211static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1212                            int parent)
1213{
1214        struct mfc6_cache *c;
1215
1216        /* The entries are added/deleted only under RTNL */
1217        rcu_read_lock();
1218        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1219                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1220        rcu_read_unlock();
1221        if (!c)
1222                return -ENOENT;
1223        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1224        list_del_rcu(&c->_c.list);
1225
1226        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1227                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1228        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1229        mr_cache_put(&c->_c);
1230        return 0;
1231}
1232
1233static int ip6mr_device_event(struct notifier_block *this,
1234                              unsigned long event, void *ptr)
1235{
1236        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1237        struct net *net = dev_net(dev);
1238        struct mr_table *mrt;
1239        struct vif_device *v;
1240        int ct;
1241
1242        if (event != NETDEV_UNREGISTER)
1243                return NOTIFY_DONE;
1244
1245        ip6mr_for_each_table(mrt, net) {
1246                v = &mrt->vif_table[0];
1247                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1248                        if (v->dev == dev)
1249                                mif6_delete(mrt, ct, 1, NULL);
1250                }
1251        }
1252
1253        return NOTIFY_DONE;
1254}
1255
1256static unsigned int ip6mr_seq_read(struct net *net)
1257{
1258        ASSERT_RTNL();
1259
1260        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1261}
1262
1263static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1264                      struct netlink_ext_ack *extack)
1265{
1266        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1267                       ip6mr_mr_table_iter, &mrt_lock, extack);
1268}
1269
1270static struct notifier_block ip6_mr_notifier = {
1271        .notifier_call = ip6mr_device_event
1272};
1273
1274static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1275        .family         = RTNL_FAMILY_IP6MR,
1276        .fib_seq_read   = ip6mr_seq_read,
1277        .fib_dump       = ip6mr_dump,
1278        .owner          = THIS_MODULE,
1279};
1280
1281static int __net_init ip6mr_notifier_init(struct net *net)
1282{
1283        struct fib_notifier_ops *ops;
1284
1285        net->ipv6.ipmr_seq = 0;
1286
1287        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1288        if (IS_ERR(ops))
1289                return PTR_ERR(ops);
1290
1291        net->ipv6.ip6mr_notifier_ops = ops;
1292
1293        return 0;
1294}
1295
1296static void __net_exit ip6mr_notifier_exit(struct net *net)
1297{
1298        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1299        net->ipv6.ip6mr_notifier_ops = NULL;
1300}
1301
1302/* Setup for IP multicast routing */
1303static int __net_init ip6mr_net_init(struct net *net)
1304{
1305        int err;
1306
1307        err = ip6mr_notifier_init(net);
1308        if (err)
1309                return err;
1310
1311        err = ip6mr_rules_init(net);
1312        if (err < 0)
1313                goto ip6mr_rules_fail;
1314
1315#ifdef CONFIG_PROC_FS
1316        err = -ENOMEM;
1317        if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1318                        sizeof(struct mr_vif_iter)))
1319                goto proc_vif_fail;
1320        if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1321                        sizeof(struct mr_mfc_iter)))
1322                goto proc_cache_fail;
1323#endif
1324
1325        return 0;
1326
1327#ifdef CONFIG_PROC_FS
1328proc_cache_fail:
1329        remove_proc_entry("ip6_mr_vif", net->proc_net);
1330proc_vif_fail:
1331        ip6mr_rules_exit(net);
1332#endif
1333ip6mr_rules_fail:
1334        ip6mr_notifier_exit(net);
1335        return err;
1336}
1337
1338static void __net_exit ip6mr_net_exit(struct net *net)
1339{
1340#ifdef CONFIG_PROC_FS
1341        remove_proc_entry("ip6_mr_cache", net->proc_net);
1342        remove_proc_entry("ip6_mr_vif", net->proc_net);
1343#endif
1344        ip6mr_rules_exit(net);
1345        ip6mr_notifier_exit(net);
1346}
1347
1348static struct pernet_operations ip6mr_net_ops = {
1349        .init = ip6mr_net_init,
1350        .exit = ip6mr_net_exit,
1351};
1352
1353int __init ip6_mr_init(void)
1354{
1355        int err;
1356
1357        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1358                                       sizeof(struct mfc6_cache),
1359                                       0, SLAB_HWCACHE_ALIGN,
1360                                       NULL);
1361        if (!mrt_cachep)
1362                return -ENOMEM;
1363
1364        err = register_pernet_subsys(&ip6mr_net_ops);
1365        if (err)
1366                goto reg_pernet_fail;
1367
1368        err = register_netdevice_notifier(&ip6_mr_notifier);
1369        if (err)
1370                goto reg_notif_fail;
1371#ifdef CONFIG_IPV6_PIMSM_V2
1372        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1373                pr_err("%s: can't add PIM protocol\n", __func__);
1374                err = -EAGAIN;
1375                goto add_proto_fail;
1376        }
1377#endif
1378        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1379                                   NULL, ip6mr_rtm_dumproute, 0);
1380        if (err == 0)
1381                return 0;
1382
1383#ifdef CONFIG_IPV6_PIMSM_V2
1384        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1385add_proto_fail:
1386        unregister_netdevice_notifier(&ip6_mr_notifier);
1387#endif
1388reg_notif_fail:
1389        unregister_pernet_subsys(&ip6mr_net_ops);
1390reg_pernet_fail:
1391        kmem_cache_destroy(mrt_cachep);
1392        return err;
1393}
1394
1395void ip6_mr_cleanup(void)
1396{
1397        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1398#ifdef CONFIG_IPV6_PIMSM_V2
1399        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1400#endif
1401        unregister_netdevice_notifier(&ip6_mr_notifier);
1402        unregister_pernet_subsys(&ip6mr_net_ops);
1403        kmem_cache_destroy(mrt_cachep);
1404}
1405
1406static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1407                         struct mf6cctl *mfc, int mrtsock, int parent)
1408{
1409        unsigned char ttls[MAXMIFS];
1410        struct mfc6_cache *uc, *c;
1411        struct mr_mfc *_uc;
1412        bool found;
1413        int i, err;
1414
1415        if (mfc->mf6cc_parent >= MAXMIFS)
1416                return -ENFILE;
1417
1418        memset(ttls, 255, MAXMIFS);
1419        for (i = 0; i < MAXMIFS; i++) {
1420                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1421                        ttls[i] = 1;
1422        }
1423
1424        /* The entries are added/deleted only under RTNL */
1425        rcu_read_lock();
1426        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1427                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1428        rcu_read_unlock();
1429        if (c) {
1430                write_lock_bh(&mrt_lock);
1431                c->_c.mfc_parent = mfc->mf6cc_parent;
1432                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1433                if (!mrtsock)
1434                        c->_c.mfc_flags |= MFC_STATIC;
1435                write_unlock_bh(&mrt_lock);
1436                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1437                                               c, mrt->id);
1438                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1439                return 0;
1440        }
1441
1442        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1443            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1444                return -EINVAL;
1445
1446        c = ip6mr_cache_alloc();
1447        if (!c)
1448                return -ENOMEM;
1449
1450        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1451        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1452        c->_c.mfc_parent = mfc->mf6cc_parent;
1453        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1454        if (!mrtsock)
1455                c->_c.mfc_flags |= MFC_STATIC;
1456
1457        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1458                                  ip6mr_rht_params);
1459        if (err) {
1460                pr_err("ip6mr: rhtable insert error %d\n", err);
1461                ip6mr_cache_free(c);
1462                return err;
1463        }
1464        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1465
1466        /* Check to see if we resolved a queued list. If so we
1467         * need to send on the frames and tidy up.
1468         */
1469        found = false;
1470        spin_lock_bh(&mfc_unres_lock);
1471        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1472                uc = (struct mfc6_cache *)_uc;
1473                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1474                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1475                        list_del(&_uc->list);
1476                        atomic_dec(&mrt->cache_resolve_queue_len);
1477                        found = true;
1478                        break;
1479                }
1480        }
1481        if (list_empty(&mrt->mfc_unres_queue))
1482                del_timer(&mrt->ipmr_expire_timer);
1483        spin_unlock_bh(&mfc_unres_lock);
1484
1485        if (found) {
1486                ip6mr_cache_resolve(net, mrt, uc, c);
1487                ip6mr_cache_free(uc);
1488        }
1489        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1490                                       c, mrt->id);
1491        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1492        return 0;
1493}
1494
1495/*
1496 *      Close the multicast socket, and clear the vif tables etc
1497 */
1498
1499static void mroute_clean_tables(struct mr_table *mrt, int flags)
1500{
1501        struct mr_mfc *c, *tmp;
1502        LIST_HEAD(list);
1503        int i;
1504
1505        /* Shut down all active vif entries */
1506        if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1507                for (i = 0; i < mrt->maxvif; i++) {
1508                        if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1509                             !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1510                            (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1511                                continue;
1512                        mif6_delete(mrt, i, 0, &list);
1513                }
1514                unregister_netdevice_many(&list);
1515        }
1516
1517        /* Wipe the cache */
1518        if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1519                list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1520                        if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1521                            (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1522                                continue;
1523                        rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1524                        list_del_rcu(&c->list);
1525                        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1526                                                       FIB_EVENT_ENTRY_DEL,
1527                                                       (struct mfc6_cache *)c, mrt->id);
1528                        mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1529                        mr_cache_put(c);
1530                }
1531        }
1532
1533        if (flags & MRT6_FLUSH_MFC) {
1534                if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1535                        spin_lock_bh(&mfc_unres_lock);
1536                        list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1537                                list_del(&c->list);
1538                                mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1539                                                  RTM_DELROUTE);
1540                                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1541                        }
1542                        spin_unlock_bh(&mfc_unres_lock);
1543                }
1544        }
1545}
1546
1547static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1548{
1549        int err = 0;
1550        struct net *net = sock_net(sk);
1551
1552        rtnl_lock();
1553        write_lock_bh(&mrt_lock);
1554        if (rtnl_dereference(mrt->mroute_sk)) {
1555                err = -EADDRINUSE;
1556        } else {
1557                rcu_assign_pointer(mrt->mroute_sk, sk);
1558                sock_set_flag(sk, SOCK_RCU_FREE);
1559                net->ipv6.devconf_all->mc_forwarding++;
1560        }
1561        write_unlock_bh(&mrt_lock);
1562
1563        if (!err)
1564                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1565                                             NETCONFA_MC_FORWARDING,
1566                                             NETCONFA_IFINDEX_ALL,
1567                                             net->ipv6.devconf_all);
1568        rtnl_unlock();
1569
1570        return err;
1571}
1572
1573int ip6mr_sk_done(struct sock *sk)
1574{
1575        int err = -EACCES;
1576        struct net *net = sock_net(sk);
1577        struct mr_table *mrt;
1578
1579        if (sk->sk_type != SOCK_RAW ||
1580            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1581                return err;
1582
1583        rtnl_lock();
1584        ip6mr_for_each_table(mrt, net) {
1585                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1586                        write_lock_bh(&mrt_lock);
1587                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1588                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1589                         * so the RCU grace period before sk freeing
1590                         * is guaranteed by sk_destruct()
1591                         */
1592                        net->ipv6.devconf_all->mc_forwarding--;
1593                        write_unlock_bh(&mrt_lock);
1594                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1595                                                     NETCONFA_MC_FORWARDING,
1596                                                     NETCONFA_IFINDEX_ALL,
1597                                                     net->ipv6.devconf_all);
1598
1599                        mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1600                        err = 0;
1601                        break;
1602                }
1603        }
1604        rtnl_unlock();
1605
1606        return err;
1607}
1608
1609bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1610{
1611        struct mr_table *mrt;
1612        struct flowi6 fl6 = {
1613                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1614                .flowi6_oif     = skb->dev->ifindex,
1615                .flowi6_mark    = skb->mark,
1616        };
1617
1618        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1619                return NULL;
1620
1621        return rcu_access_pointer(mrt->mroute_sk);
1622}
1623EXPORT_SYMBOL(mroute6_is_socket);
1624
1625/*
1626 *      Socket options and virtual interface manipulation. The whole
1627 *      virtual interface system is a complete heap, but unfortunately
1628 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1629 *      MOSPF/PIM router set up we can clean this up.
1630 */
1631
1632int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1633                          unsigned int optlen)
1634{
1635        int ret, parent = 0;
1636        struct mif6ctl vif;
1637        struct mf6cctl mfc;
1638        mifi_t mifi;
1639        struct net *net = sock_net(sk);
1640        struct mr_table *mrt;
1641
1642        if (sk->sk_type != SOCK_RAW ||
1643            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1644                return -EOPNOTSUPP;
1645
1646        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1647        if (!mrt)
1648                return -ENOENT;
1649
1650        if (optname != MRT6_INIT) {
1651                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1652                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1653                        return -EACCES;
1654        }
1655
1656        switch (optname) {
1657        case MRT6_INIT:
1658                if (optlen < sizeof(int))
1659                        return -EINVAL;
1660
1661                return ip6mr_sk_init(mrt, sk);
1662
1663        case MRT6_DONE:
1664                return ip6mr_sk_done(sk);
1665
1666        case MRT6_ADD_MIF:
1667                if (optlen < sizeof(vif))
1668                        return -EINVAL;
1669                if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1670                        return -EFAULT;
1671                if (vif.mif6c_mifi >= MAXMIFS)
1672                        return -ENFILE;
1673                rtnl_lock();
1674                ret = mif6_add(net, mrt, &vif,
1675                               sk == rtnl_dereference(mrt->mroute_sk));
1676                rtnl_unlock();
1677                return ret;
1678
1679        case MRT6_DEL_MIF:
1680                if (optlen < sizeof(mifi_t))
1681                        return -EINVAL;
1682                if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1683                        return -EFAULT;
1684                rtnl_lock();
1685                ret = mif6_delete(mrt, mifi, 0, NULL);
1686                rtnl_unlock();
1687                return ret;
1688
1689        /*
1690         *      Manipulate the forwarding caches. These live
1691         *      in a sort of kernel/user symbiosis.
1692         */
1693        case MRT6_ADD_MFC:
1694        case MRT6_DEL_MFC:
1695                parent = -1;
1696                fallthrough;
1697        case MRT6_ADD_MFC_PROXY:
1698        case MRT6_DEL_MFC_PROXY:
1699                if (optlen < sizeof(mfc))
1700                        return -EINVAL;
1701                if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1702                        return -EFAULT;
1703                if (parent == 0)
1704                        parent = mfc.mf6cc_parent;
1705                rtnl_lock();
1706                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1707                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1708                else
1709                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1710                                            sk ==
1711                                            rtnl_dereference(mrt->mroute_sk),
1712                                            parent);
1713                rtnl_unlock();
1714                return ret;
1715
1716        case MRT6_FLUSH:
1717        {
1718                int flags;
1719
1720                if (optlen != sizeof(flags))
1721                        return -EINVAL;
1722                if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1723                        return -EFAULT;
1724                rtnl_lock();
1725                mroute_clean_tables(mrt, flags);
1726                rtnl_unlock();
1727                return 0;
1728        }
1729
1730        /*
1731         *      Control PIM assert (to activate pim will activate assert)
1732         */
1733        case MRT6_ASSERT:
1734        {
1735                int v;
1736
1737                if (optlen != sizeof(v))
1738                        return -EINVAL;
1739                if (copy_from_sockptr(&v, optval, sizeof(v)))
1740                        return -EFAULT;
1741                mrt->mroute_do_assert = v;
1742                return 0;
1743        }
1744
1745#ifdef CONFIG_IPV6_PIMSM_V2
1746        case MRT6_PIM:
1747        {
1748                int v;
1749
1750                if (optlen != sizeof(v))
1751                        return -EINVAL;
1752                if (copy_from_sockptr(&v, optval, sizeof(v)))
1753                        return -EFAULT;
1754                v = !!v;
1755                rtnl_lock();
1756                ret = 0;
1757                if (v != mrt->mroute_do_pim) {
1758                        mrt->mroute_do_pim = v;
1759                        mrt->mroute_do_assert = v;
1760                }
1761                rtnl_unlock();
1762                return ret;
1763        }
1764
1765#endif
1766#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1767        case MRT6_TABLE:
1768        {
1769                u32 v;
1770
1771                if (optlen != sizeof(u32))
1772                        return -EINVAL;
1773                if (copy_from_sockptr(&v, optval, sizeof(v)))
1774                        return -EFAULT;
1775                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1776                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1777                        return -EINVAL;
1778                if (sk == rcu_access_pointer(mrt->mroute_sk))
1779                        return -EBUSY;
1780
1781                rtnl_lock();
1782                ret = 0;
1783                mrt = ip6mr_new_table(net, v);
1784                if (IS_ERR(mrt))
1785                        ret = PTR_ERR(mrt);
1786                else
1787                        raw6_sk(sk)->ip6mr_table = v;
1788                rtnl_unlock();
1789                return ret;
1790        }
1791#endif
1792        /*
1793         *      Spurious command, or MRT6_VERSION which you cannot
1794         *      set.
1795         */
1796        default:
1797                return -ENOPROTOOPT;
1798        }
1799}
1800
1801/*
1802 *      Getsock opt support for the multicast routing system.
1803 */
1804
1805int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1806                          int __user *optlen)
1807{
1808        int olr;
1809        int val;
1810        struct net *net = sock_net(sk);
1811        struct mr_table *mrt;
1812
1813        if (sk->sk_type != SOCK_RAW ||
1814            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1815                return -EOPNOTSUPP;
1816
1817        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1818        if (!mrt)
1819                return -ENOENT;
1820
1821        switch (optname) {
1822        case MRT6_VERSION:
1823                val = 0x0305;
1824                break;
1825#ifdef CONFIG_IPV6_PIMSM_V2
1826        case MRT6_PIM:
1827                val = mrt->mroute_do_pim;
1828                break;
1829#endif
1830        case MRT6_ASSERT:
1831                val = mrt->mroute_do_assert;
1832                break;
1833        default:
1834                return -ENOPROTOOPT;
1835        }
1836
1837        if (get_user(olr, optlen))
1838                return -EFAULT;
1839
1840        olr = min_t(int, olr, sizeof(int));
1841        if (olr < 0)
1842                return -EINVAL;
1843
1844        if (put_user(olr, optlen))
1845                return -EFAULT;
1846        if (copy_to_user(optval, &val, olr))
1847                return -EFAULT;
1848        return 0;
1849}
1850
1851/*
1852 *      The IP multicast ioctl support routines.
1853 */
1854
1855int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1856{
1857        struct sioc_sg_req6 sr;
1858        struct sioc_mif_req6 vr;
1859        struct vif_device *vif;
1860        struct mfc6_cache *c;
1861        struct net *net = sock_net(sk);
1862        struct mr_table *mrt;
1863
1864        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1865        if (!mrt)
1866                return -ENOENT;
1867
1868        switch (cmd) {
1869        case SIOCGETMIFCNT_IN6:
1870                if (copy_from_user(&vr, arg, sizeof(vr)))
1871                        return -EFAULT;
1872                if (vr.mifi >= mrt->maxvif)
1873                        return -EINVAL;
1874                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1875                read_lock(&mrt_lock);
1876                vif = &mrt->vif_table[vr.mifi];
1877                if (VIF_EXISTS(mrt, vr.mifi)) {
1878                        vr.icount = vif->pkt_in;
1879                        vr.ocount = vif->pkt_out;
1880                        vr.ibytes = vif->bytes_in;
1881                        vr.obytes = vif->bytes_out;
1882                        read_unlock(&mrt_lock);
1883
1884                        if (copy_to_user(arg, &vr, sizeof(vr)))
1885                                return -EFAULT;
1886                        return 0;
1887                }
1888                read_unlock(&mrt_lock);
1889                return -EADDRNOTAVAIL;
1890        case SIOCGETSGCNT_IN6:
1891                if (copy_from_user(&sr, arg, sizeof(sr)))
1892                        return -EFAULT;
1893
1894                rcu_read_lock();
1895                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1896                if (c) {
1897                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1898                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1899                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1900                        rcu_read_unlock();
1901
1902                        if (copy_to_user(arg, &sr, sizeof(sr)))
1903                                return -EFAULT;
1904                        return 0;
1905                }
1906                rcu_read_unlock();
1907                return -EADDRNOTAVAIL;
1908        default:
1909                return -ENOIOCTLCMD;
1910        }
1911}
1912
1913#ifdef CONFIG_COMPAT
1914struct compat_sioc_sg_req6 {
1915        struct sockaddr_in6 src;
1916        struct sockaddr_in6 grp;
1917        compat_ulong_t pktcnt;
1918        compat_ulong_t bytecnt;
1919        compat_ulong_t wrong_if;
1920};
1921
1922struct compat_sioc_mif_req6 {
1923        mifi_t  mifi;
1924        compat_ulong_t icount;
1925        compat_ulong_t ocount;
1926        compat_ulong_t ibytes;
1927        compat_ulong_t obytes;
1928};
1929
1930int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1931{
1932        struct compat_sioc_sg_req6 sr;
1933        struct compat_sioc_mif_req6 vr;
1934        struct vif_device *vif;
1935        struct mfc6_cache *c;
1936        struct net *net = sock_net(sk);
1937        struct mr_table *mrt;
1938
1939        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1940        if (!mrt)
1941                return -ENOENT;
1942
1943        switch (cmd) {
1944        case SIOCGETMIFCNT_IN6:
1945                if (copy_from_user(&vr, arg, sizeof(vr)))
1946                        return -EFAULT;
1947                if (vr.mifi >= mrt->maxvif)
1948                        return -EINVAL;
1949                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1950                read_lock(&mrt_lock);
1951                vif = &mrt->vif_table[vr.mifi];
1952                if (VIF_EXISTS(mrt, vr.mifi)) {
1953                        vr.icount = vif->pkt_in;
1954                        vr.ocount = vif->pkt_out;
1955                        vr.ibytes = vif->bytes_in;
1956                        vr.obytes = vif->bytes_out;
1957                        read_unlock(&mrt_lock);
1958
1959                        if (copy_to_user(arg, &vr, sizeof(vr)))
1960                                return -EFAULT;
1961                        return 0;
1962                }
1963                read_unlock(&mrt_lock);
1964                return -EADDRNOTAVAIL;
1965        case SIOCGETSGCNT_IN6:
1966                if (copy_from_user(&sr, arg, sizeof(sr)))
1967                        return -EFAULT;
1968
1969                rcu_read_lock();
1970                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1971                if (c) {
1972                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1973                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1974                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1975                        rcu_read_unlock();
1976
1977                        if (copy_to_user(arg, &sr, sizeof(sr)))
1978                                return -EFAULT;
1979                        return 0;
1980                }
1981                rcu_read_unlock();
1982                return -EADDRNOTAVAIL;
1983        default:
1984                return -ENOIOCTLCMD;
1985        }
1986}
1987#endif
1988
1989static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1990{
1991        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1992                      IPSTATS_MIB_OUTFORWDATAGRAMS);
1993        IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1994                      IPSTATS_MIB_OUTOCTETS, skb->len);
1995        return dst_output(net, sk, skb);
1996}
1997
1998/*
1999 *      Processing handlers for ip6mr_forward
2000 */
2001
2002static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2003                          struct sk_buff *skb, int vifi)
2004{
2005        struct ipv6hdr *ipv6h;
2006        struct vif_device *vif = &mrt->vif_table[vifi];
2007        struct net_device *dev;
2008        struct dst_entry *dst;
2009        struct flowi6 fl6;
2010
2011        if (!vif->dev)
2012                goto out_free;
2013
2014#ifdef CONFIG_IPV6_PIMSM_V2
2015        if (vif->flags & MIFF_REGISTER) {
2016                vif->pkt_out++;
2017                vif->bytes_out += skb->len;
2018                vif->dev->stats.tx_bytes += skb->len;
2019                vif->dev->stats.tx_packets++;
2020                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2021                goto out_free;
2022        }
2023#endif
2024
2025        ipv6h = ipv6_hdr(skb);
2026
2027        fl6 = (struct flowi6) {
2028                .flowi6_oif = vif->link,
2029                .daddr = ipv6h->daddr,
2030        };
2031
2032        dst = ip6_route_output(net, NULL, &fl6);
2033        if (dst->error) {
2034                dst_release(dst);
2035                goto out_free;
2036        }
2037
2038        skb_dst_drop(skb);
2039        skb_dst_set(skb, dst);
2040
2041        /*
2042         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2043         * not only before forwarding, but after forwarding on all output
2044         * interfaces. It is clear, if mrouter runs a multicasting
2045         * program, it should receive packets not depending to what interface
2046         * program is joined.
2047         * If we will not make it, the program will have to join on all
2048         * interfaces. On the other hand, multihoming host (or router, but
2049         * not mrouter) cannot join to more than one interface - it will
2050         * result in receiving multiple packets.
2051         */
2052        dev = vif->dev;
2053        skb->dev = dev;
2054        vif->pkt_out++;
2055        vif->bytes_out += skb->len;
2056
2057        /* We are about to write */
2058        /* XXX: extension headers? */
2059        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2060                goto out_free;
2061
2062        ipv6h = ipv6_hdr(skb);
2063        ipv6h->hop_limit--;
2064
2065        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2066
2067        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2068                       net, NULL, skb, skb->dev, dev,
2069                       ip6mr_forward2_finish);
2070
2071out_free:
2072        kfree_skb(skb);
2073        return 0;
2074}
2075
2076static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2077{
2078        int ct;
2079
2080        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2081                if (mrt->vif_table[ct].dev == dev)
2082                        break;
2083        }
2084        return ct;
2085}
2086
2087static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2088                           struct net_device *dev, struct sk_buff *skb,
2089                           struct mfc6_cache *c)
2090{
2091        int psend = -1;
2092        int vif, ct;
2093        int true_vifi = ip6mr_find_vif(mrt, dev);
2094
2095        vif = c->_c.mfc_parent;
2096        c->_c.mfc_un.res.pkt++;
2097        c->_c.mfc_un.res.bytes += skb->len;
2098        c->_c.mfc_un.res.lastuse = jiffies;
2099
2100        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2101                struct mfc6_cache *cache_proxy;
2102
2103                /* For an (*,G) entry, we only check that the incoming
2104                 * interface is part of the static tree.
2105                 */
2106                rcu_read_lock();
2107                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2108                if (cache_proxy &&
2109                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2110                        rcu_read_unlock();
2111                        goto forward;
2112                }
2113                rcu_read_unlock();
2114        }
2115
2116        /*
2117         * Wrong interface: drop packet and (maybe) send PIM assert.
2118         */
2119        if (mrt->vif_table[vif].dev != dev) {
2120                c->_c.mfc_un.res.wrong_if++;
2121
2122                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2123                    /* pimsm uses asserts, when switching from RPT to SPT,
2124                       so that we cannot check that packet arrived on an oif.
2125                       It is bad, but otherwise we would need to move pretty
2126                       large chunk of pimd to kernel. Ough... --ANK
2127                     */
2128                    (mrt->mroute_do_pim ||
2129                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2130                    time_after(jiffies,
2131                               c->_c.mfc_un.res.last_assert +
2132                               MFC_ASSERT_THRESH)) {
2133                        c->_c.mfc_un.res.last_assert = jiffies;
2134                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2135                }
2136                goto dont_forward;
2137        }
2138
2139forward:
2140        mrt->vif_table[vif].pkt_in++;
2141        mrt->vif_table[vif].bytes_in += skb->len;
2142
2143        /*
2144         *      Forward the frame
2145         */
2146        if (ipv6_addr_any(&c->mf6c_origin) &&
2147            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2148                if (true_vifi >= 0 &&
2149                    true_vifi != c->_c.mfc_parent &&
2150                    ipv6_hdr(skb)->hop_limit >
2151                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2152                        /* It's an (*,*) entry and the packet is not coming from
2153                         * the upstream: forward the packet to the upstream
2154                         * only.
2155                         */
2156                        psend = c->_c.mfc_parent;
2157                        goto last_forward;
2158                }
2159                goto dont_forward;
2160        }
2161        for (ct = c->_c.mfc_un.res.maxvif - 1;
2162             ct >= c->_c.mfc_un.res.minvif; ct--) {
2163                /* For (*,G) entry, don't forward to the incoming interface */
2164                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2165                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2166                        if (psend != -1) {
2167                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2168                                if (skb2)
2169                                        ip6mr_forward2(net, mrt, skb2, psend);
2170                        }
2171                        psend = ct;
2172                }
2173        }
2174last_forward:
2175        if (psend != -1) {
2176                ip6mr_forward2(net, mrt, skb, psend);
2177                return;
2178        }
2179
2180dont_forward:
2181        kfree_skb(skb);
2182}
2183
2184
2185/*
2186 *      Multicast packets for forwarding arrive here
2187 */
2188
2189int ip6_mr_input(struct sk_buff *skb)
2190{
2191        struct mfc6_cache *cache;
2192        struct net *net = dev_net(skb->dev);
2193        struct mr_table *mrt;
2194        struct flowi6 fl6 = {
2195                .flowi6_iif     = skb->dev->ifindex,
2196                .flowi6_mark    = skb->mark,
2197        };
2198        int err;
2199        struct net_device *dev;
2200
2201        /* skb->dev passed in is the master dev for vrfs.
2202         * Get the proper interface that does have a vif associated with it.
2203         */
2204        dev = skb->dev;
2205        if (netif_is_l3_master(skb->dev)) {
2206                dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2207                if (!dev) {
2208                        kfree_skb(skb);
2209                        return -ENODEV;
2210                }
2211        }
2212
2213        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2214        if (err < 0) {
2215                kfree_skb(skb);
2216                return err;
2217        }
2218
2219        read_lock(&mrt_lock);
2220        cache = ip6mr_cache_find(mrt,
2221                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2222        if (!cache) {
2223                int vif = ip6mr_find_vif(mrt, dev);
2224
2225                if (vif >= 0)
2226                        cache = ip6mr_cache_find_any(mrt,
2227                                                     &ipv6_hdr(skb)->daddr,
2228                                                     vif);
2229        }
2230
2231        /*
2232         *      No usable cache entry
2233         */
2234        if (!cache) {
2235                int vif;
2236
2237                vif = ip6mr_find_vif(mrt, dev);
2238                if (vif >= 0) {
2239                        int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2240                        read_unlock(&mrt_lock);
2241
2242                        return err;
2243                }
2244                read_unlock(&mrt_lock);
2245                kfree_skb(skb);
2246                return -ENODEV;
2247        }
2248
2249        ip6_mr_forward(net, mrt, dev, skb, cache);
2250
2251        read_unlock(&mrt_lock);
2252
2253        return 0;
2254}
2255
2256int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2257                    u32 portid)
2258{
2259        int err;
2260        struct mr_table *mrt;
2261        struct mfc6_cache *cache;
2262        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2263
2264        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2265        if (!mrt)
2266                return -ENOENT;
2267
2268        read_lock(&mrt_lock);
2269        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2270        if (!cache && skb->dev) {
2271                int vif = ip6mr_find_vif(mrt, skb->dev);
2272
2273                if (vif >= 0)
2274                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2275                                                     vif);
2276        }
2277
2278        if (!cache) {
2279                struct sk_buff *skb2;
2280                struct ipv6hdr *iph;
2281                struct net_device *dev;
2282                int vif;
2283
2284                dev = skb->dev;
2285                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2286                        read_unlock(&mrt_lock);
2287                        return -ENODEV;
2288                }
2289
2290                /* really correct? */
2291                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2292                if (!skb2) {
2293                        read_unlock(&mrt_lock);
2294                        return -ENOMEM;
2295                }
2296
2297                NETLINK_CB(skb2).portid = portid;
2298                skb_reset_transport_header(skb2);
2299
2300                skb_put(skb2, sizeof(struct ipv6hdr));
2301                skb_reset_network_header(skb2);
2302
2303                iph = ipv6_hdr(skb2);
2304                iph->version = 0;
2305                iph->priority = 0;
2306                iph->flow_lbl[0] = 0;
2307                iph->flow_lbl[1] = 0;
2308                iph->flow_lbl[2] = 0;
2309                iph->payload_len = 0;
2310                iph->nexthdr = IPPROTO_NONE;
2311                iph->hop_limit = 0;
2312                iph->saddr = rt->rt6i_src.addr;
2313                iph->daddr = rt->rt6i_dst.addr;
2314
2315                err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2316                read_unlock(&mrt_lock);
2317
2318                return err;
2319        }
2320
2321        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2322        read_unlock(&mrt_lock);
2323        return err;
2324}
2325
2326static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2327                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2328                             int flags)
2329{
2330        struct nlmsghdr *nlh;
2331        struct rtmsg *rtm;
2332        int err;
2333
2334        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2335        if (!nlh)
2336                return -EMSGSIZE;
2337
2338        rtm = nlmsg_data(nlh);
2339        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2340        rtm->rtm_dst_len  = 128;
2341        rtm->rtm_src_len  = 128;
2342        rtm->rtm_tos      = 0;
2343        rtm->rtm_table    = mrt->id;
2344        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2345                goto nla_put_failure;
2346        rtm->rtm_type = RTN_MULTICAST;
2347        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2348        if (c->_c.mfc_flags & MFC_STATIC)
2349                rtm->rtm_protocol = RTPROT_STATIC;
2350        else
2351                rtm->rtm_protocol = RTPROT_MROUTED;
2352        rtm->rtm_flags    = 0;
2353
2354        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2355            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2356                goto nla_put_failure;
2357        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2358        /* do not break the dump if cache is unresolved */
2359        if (err < 0 && err != -ENOENT)
2360                goto nla_put_failure;
2361
2362        nlmsg_end(skb, nlh);
2363        return 0;
2364
2365nla_put_failure:
2366        nlmsg_cancel(skb, nlh);
2367        return -EMSGSIZE;
2368}
2369
2370static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2371                              u32 portid, u32 seq, struct mr_mfc *c,
2372                              int cmd, int flags)
2373{
2374        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2375                                 cmd, flags);
2376}
2377
2378static int mr6_msgsize(bool unresolved, int maxvif)
2379{
2380        size_t len =
2381                NLMSG_ALIGN(sizeof(struct rtmsg))
2382                + nla_total_size(4)     /* RTA_TABLE */
2383                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2384                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2385                ;
2386
2387        if (!unresolved)
2388                len = len
2389                      + nla_total_size(4)       /* RTA_IIF */
2390                      + nla_total_size(0)       /* RTA_MULTIPATH */
2391                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2392                                                /* RTA_MFC_STATS */
2393                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2394                ;
2395
2396        return len;
2397}
2398
2399static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2400                              int cmd)
2401{
2402        struct net *net = read_pnet(&mrt->net);
2403        struct sk_buff *skb;
2404        int err = -ENOBUFS;
2405
2406        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2407                        GFP_ATOMIC);
2408        if (!skb)
2409                goto errout;
2410
2411        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2412        if (err < 0)
2413                goto errout;
2414
2415        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2416        return;
2417
2418errout:
2419        kfree_skb(skb);
2420        if (err < 0)
2421                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2422}
2423
2424static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2425{
2426        size_t len =
2427                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2428                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2429                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2430                                        /* IP6MRA_CREPORT_SRC_ADDR */
2431                + nla_total_size(sizeof(struct in6_addr))
2432                                        /* IP6MRA_CREPORT_DST_ADDR */
2433                + nla_total_size(sizeof(struct in6_addr))
2434                                        /* IP6MRA_CREPORT_PKT */
2435                + nla_total_size(payloadlen)
2436                ;
2437
2438        return len;
2439}
2440
2441static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2442{
2443        struct net *net = read_pnet(&mrt->net);
2444        struct nlmsghdr *nlh;
2445        struct rtgenmsg *rtgenm;
2446        struct mrt6msg *msg;
2447        struct sk_buff *skb;
2448        struct nlattr *nla;
2449        int payloadlen;
2450
2451        payloadlen = pkt->len - sizeof(struct mrt6msg);
2452        msg = (struct mrt6msg *)skb_transport_header(pkt);
2453
2454        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2455        if (!skb)
2456                goto errout;
2457
2458        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2459                        sizeof(struct rtgenmsg), 0);
2460        if (!nlh)
2461                goto errout;
2462        rtgenm = nlmsg_data(nlh);
2463        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2464        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2465            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2466            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2467                             &msg->im6_src) ||
2468            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2469                             &msg->im6_dst))
2470                goto nla_put_failure;
2471
2472        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2473        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2474                                  nla_data(nla), payloadlen))
2475                goto nla_put_failure;
2476
2477        nlmsg_end(skb, nlh);
2478
2479        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2480        return;
2481
2482nla_put_failure:
2483        nlmsg_cancel(skb, nlh);
2484errout:
2485        kfree_skb(skb);
2486        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2487}
2488
2489static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2490{
2491        const struct nlmsghdr *nlh = cb->nlh;
2492        struct fib_dump_filter filter = {};
2493        int err;
2494
2495        if (cb->strict_check) {
2496                err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2497                                            &filter, cb);
2498                if (err < 0)
2499                        return err;
2500        }
2501
2502        if (filter.table_id) {
2503                struct mr_table *mrt;
2504
2505                mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2506                if (!mrt) {
2507                        if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2508                                return skb->len;
2509
2510                        NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2511                        return -ENOENT;
2512                }
2513                err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2514                                    &mfc_unres_lock, &filter);
2515                return skb->len ? : err;
2516        }
2517
2518        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2519                                _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2520}
2521