linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Linux IPv6 multicast routing support for BSD pim6sd
   4 *      Based on net/ipv4/ipmr.c.
   5 *
   6 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   7 *              LSIIT Laboratory, Strasbourg, France
   8 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   9 *              6WIND, Paris, France
  10 *      Copyright (C)2007,2008 USAGI/WIDE Project
  11 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  12 */
  13
  14#include <linux/uaccess.h>
  15#include <linux/types.h>
  16#include <linux/sched.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/kernel.h>
  20#include <linux/fcntl.h>
  21#include <linux/stat.h>
  22#include <linux/socket.h>
  23#include <linux/inet.h>
  24#include <linux/netdevice.h>
  25#include <linux/inetdevice.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/init.h>
  29#include <linux/compat.h>
  30#include <linux/rhashtable.h>
  31#include <net/protocol.h>
  32#include <linux/skbuff.h>
  33#include <net/raw.h>
  34#include <linux/notifier.h>
  35#include <linux/if_arp.h>
  36#include <net/checksum.h>
  37#include <net/netlink.h>
  38#include <net/fib_rules.h>
  39
  40#include <net/ipv6.h>
  41#include <net/ip6_route.h>
  42#include <linux/mroute6.h>
  43#include <linux/pim.h>
  44#include <net/addrconf.h>
  45#include <linux/netfilter_ipv6.h>
  46#include <linux/export.h>
  47#include <net/ip6_checksum.h>
  48#include <linux/netconf.h>
  49#include <net/ip_tunnels.h>
  50
  51#include <linux/nospec.h>
  52
  53struct ip6mr_rule {
  54        struct fib_rule         common;
  55};
  56
  57struct ip6mr_result {
  58        struct mr_table *mrt;
  59};
  60
  61/* Big lock, protecting vif table, mrt cache and mroute socket state.
  62   Note that the changes are semaphored via rtnl_lock.
  63 */
  64
  65static DEFINE_RWLOCK(mrt_lock);
  66
  67/* Multicast router control variables */
  68
  69/* Special spinlock for queue of unresolved entries */
  70static DEFINE_SPINLOCK(mfc_unres_lock);
  71
  72/* We return to original Alan's scheme. Hash table of resolved
  73   entries is changed only in process context and protected
  74   with weak lock mrt_lock. Queue of unresolved entries is protected
  75   with strong spinlock mfc_unres_lock.
  76
  77   In this case data path is free of exclusive locks at all.
  78 */
  79
  80static struct kmem_cache *mrt_cachep __read_mostly;
  81
  82static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
  83static void ip6mr_free_table(struct mr_table *mrt);
  84
  85static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
  86                           struct net_device *dev, struct sk_buff *skb,
  87                           struct mfc6_cache *cache);
  88static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
  89                              mifi_t mifi, int assert);
  90static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
  91                              int cmd);
  92static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
  93static int ip6mr_rtm_dumproute(struct sk_buff *skb,
  94                               struct netlink_callback *cb);
  95static void mroute_clean_tables(struct mr_table *mrt, int flags);
  96static void ipmr_expire_process(struct timer_list *t);
  97
  98#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
  99#define ip6mr_for_each_table(mrt, net) \
 100        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \
 101                                lockdep_rtnl_is_held() || \
 102                                list_empty(&net->ipv6.mr6_tables))
 103
 104static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 105                                            struct mr_table *mrt)
 106{
 107        struct mr_table *ret;
 108
 109        if (!mrt)
 110                ret = list_entry_rcu(net->ipv6.mr6_tables.next,
 111                                     struct mr_table, list);
 112        else
 113                ret = list_entry_rcu(mrt->list.next,
 114                                     struct mr_table, list);
 115
 116        if (&ret->list == &net->ipv6.mr6_tables)
 117                return NULL;
 118        return ret;
 119}
 120
 121static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 122{
 123        struct mr_table *mrt;
 124
 125        ip6mr_for_each_table(mrt, net) {
 126                if (mrt->id == id)
 127                        return mrt;
 128        }
 129        return NULL;
 130}
 131
 132static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 133                            struct mr_table **mrt)
 134{
 135        int err;
 136        struct ip6mr_result res;
 137        struct fib_lookup_arg arg = {
 138                .result = &res,
 139                .flags = FIB_LOOKUP_NOREF,
 140        };
 141
 142        /* update flow if oif or iif point to device enslaved to l3mdev */
 143        l3mdev_update_flow(net, flowi6_to_flowi(flp6));
 144
 145        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 146                               flowi6_to_flowi(flp6), 0, &arg);
 147        if (err < 0)
 148                return err;
 149        *mrt = res.mrt;
 150        return 0;
 151}
 152
 153static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 154                             int flags, struct fib_lookup_arg *arg)
 155{
 156        struct ip6mr_result *res = arg->result;
 157        struct mr_table *mrt;
 158
 159        switch (rule->action) {
 160        case FR_ACT_TO_TBL:
 161                break;
 162        case FR_ACT_UNREACHABLE:
 163                return -ENETUNREACH;
 164        case FR_ACT_PROHIBIT:
 165                return -EACCES;
 166        case FR_ACT_BLACKHOLE:
 167        default:
 168                return -EINVAL;
 169        }
 170
 171        arg->table = fib_rule_get_table(rule, arg);
 172
 173        mrt = ip6mr_get_table(rule->fr_net, arg->table);
 174        if (!mrt)
 175                return -EAGAIN;
 176        res->mrt = mrt;
 177        return 0;
 178}
 179
 180static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 181{
 182        return 1;
 183}
 184
 185static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 186                                struct fib_rule_hdr *frh, struct nlattr **tb,
 187                                struct netlink_ext_ack *extack)
 188{
 189        return 0;
 190}
 191
 192static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 193                              struct nlattr **tb)
 194{
 195        return 1;
 196}
 197
 198static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 199                           struct fib_rule_hdr *frh)
 200{
 201        frh->dst_len = 0;
 202        frh->src_len = 0;
 203        frh->tos     = 0;
 204        return 0;
 205}
 206
 207static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 208        .family         = RTNL_FAMILY_IP6MR,
 209        .rule_size      = sizeof(struct ip6mr_rule),
 210        .addr_size      = sizeof(struct in6_addr),
 211        .action         = ip6mr_rule_action,
 212        .match          = ip6mr_rule_match,
 213        .configure      = ip6mr_rule_configure,
 214        .compare        = ip6mr_rule_compare,
 215        .fill           = ip6mr_rule_fill,
 216        .nlgroup        = RTNLGRP_IPV6_RULE,
 217        .owner          = THIS_MODULE,
 218};
 219
 220static int __net_init ip6mr_rules_init(struct net *net)
 221{
 222        struct fib_rules_ops *ops;
 223        struct mr_table *mrt;
 224        int err;
 225
 226        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 227        if (IS_ERR(ops))
 228                return PTR_ERR(ops);
 229
 230        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 231
 232        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 233        if (IS_ERR(mrt)) {
 234                err = PTR_ERR(mrt);
 235                goto err1;
 236        }
 237
 238        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 239        if (err < 0)
 240                goto err2;
 241
 242        net->ipv6.mr6_rules_ops = ops;
 243        return 0;
 244
 245err2:
 246        rtnl_lock();
 247        ip6mr_free_table(mrt);
 248        rtnl_unlock();
 249err1:
 250        fib_rules_unregister(ops);
 251        return err;
 252}
 253
 254static void __net_exit ip6mr_rules_exit(struct net *net)
 255{
 256        struct mr_table *mrt, *next;
 257
 258        rtnl_lock();
 259        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 260                list_del(&mrt->list);
 261                ip6mr_free_table(mrt);
 262        }
 263        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 264        rtnl_unlock();
 265}
 266
 267static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 268                            struct netlink_ext_ack *extack)
 269{
 270        return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
 271}
 272
 273static unsigned int ip6mr_rules_seq_read(struct net *net)
 274{
 275        return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
 276}
 277
 278bool ip6mr_rule_default(const struct fib_rule *rule)
 279{
 280        return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
 281               rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
 282}
 283EXPORT_SYMBOL(ip6mr_rule_default);
 284#else
 285#define ip6mr_for_each_table(mrt, net) \
 286        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 287
 288static struct mr_table *ip6mr_mr_table_iter(struct net *net,
 289                                            struct mr_table *mrt)
 290{
 291        if (!mrt)
 292                return net->ipv6.mrt6;
 293        return NULL;
 294}
 295
 296static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
 297{
 298        return net->ipv6.mrt6;
 299}
 300
 301static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 302                            struct mr_table **mrt)
 303{
 304        *mrt = net->ipv6.mrt6;
 305        return 0;
 306}
 307
 308static int __net_init ip6mr_rules_init(struct net *net)
 309{
 310        struct mr_table *mrt;
 311
 312        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 313        if (IS_ERR(mrt))
 314                return PTR_ERR(mrt);
 315        net->ipv6.mrt6 = mrt;
 316        return 0;
 317}
 318
 319static void __net_exit ip6mr_rules_exit(struct net *net)
 320{
 321        rtnl_lock();
 322        ip6mr_free_table(net->ipv6.mrt6);
 323        net->ipv6.mrt6 = NULL;
 324        rtnl_unlock();
 325}
 326
 327static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
 328                            struct netlink_ext_ack *extack)
 329{
 330        return 0;
 331}
 332
 333static unsigned int ip6mr_rules_seq_read(struct net *net)
 334{
 335        return 0;
 336}
 337#endif
 338
 339static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
 340                          const void *ptr)
 341{
 342        const struct mfc6_cache_cmp_arg *cmparg = arg->key;
 343        struct mfc6_cache *c = (struct mfc6_cache *)ptr;
 344
 345        return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
 346               !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
 347}
 348
 349static const struct rhashtable_params ip6mr_rht_params = {
 350        .head_offset = offsetof(struct mr_mfc, mnode),
 351        .key_offset = offsetof(struct mfc6_cache, cmparg),
 352        .key_len = sizeof(struct mfc6_cache_cmp_arg),
 353        .nelem_hint = 3,
 354        .obj_cmpfn = ip6mr_hash_cmp,
 355        .automatic_shrinking = true,
 356};
 357
 358static void ip6mr_new_table_set(struct mr_table *mrt,
 359                                struct net *net)
 360{
 361#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 362        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 363#endif
 364}
 365
 366static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
 367        .mf6c_origin = IN6ADDR_ANY_INIT,
 368        .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
 369};
 370
 371static struct mr_table_ops ip6mr_mr_table_ops = {
 372        .rht_params = &ip6mr_rht_params,
 373        .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
 374};
 375
 376static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 377{
 378        struct mr_table *mrt;
 379
 380        mrt = ip6mr_get_table(net, id);
 381        if (mrt)
 382                return mrt;
 383
 384        return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
 385                              ipmr_expire_process, ip6mr_new_table_set);
 386}
 387
 388static void ip6mr_free_table(struct mr_table *mrt)
 389{
 390        del_timer_sync(&mrt->ipmr_expire_timer);
 391        mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
 392                                 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 393        rhltable_destroy(&mrt->mfc_hash);
 394        kfree(mrt);
 395}
 396
 397#ifdef CONFIG_PROC_FS
 398/* The /proc interfaces to multicast routing
 399 * /proc/ip6_mr_cache /proc/ip6_mr_vif
 400 */
 401
 402static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 403        __acquires(mrt_lock)
 404{
 405        struct mr_vif_iter *iter = seq->private;
 406        struct net *net = seq_file_net(seq);
 407        struct mr_table *mrt;
 408
 409        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 410        if (!mrt)
 411                return ERR_PTR(-ENOENT);
 412
 413        iter->mrt = mrt;
 414
 415        read_lock(&mrt_lock);
 416        return mr_vif_seq_start(seq, pos);
 417}
 418
 419static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 420        __releases(mrt_lock)
 421{
 422        read_unlock(&mrt_lock);
 423}
 424
 425static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 426{
 427        struct mr_vif_iter *iter = seq->private;
 428        struct mr_table *mrt = iter->mrt;
 429
 430        if (v == SEQ_START_TOKEN) {
 431                seq_puts(seq,
 432                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 433        } else {
 434                const struct vif_device *vif = v;
 435                const char *name = vif->dev ? vif->dev->name : "none";
 436
 437                seq_printf(seq,
 438                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 439                           vif - mrt->vif_table,
 440                           name, vif->bytes_in, vif->pkt_in,
 441                           vif->bytes_out, vif->pkt_out,
 442                           vif->flags);
 443        }
 444        return 0;
 445}
 446
 447static const struct seq_operations ip6mr_vif_seq_ops = {
 448        .start = ip6mr_vif_seq_start,
 449        .next  = mr_vif_seq_next,
 450        .stop  = ip6mr_vif_seq_stop,
 451        .show  = ip6mr_vif_seq_show,
 452};
 453
 454static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 455{
 456        struct net *net = seq_file_net(seq);
 457        struct mr_table *mrt;
 458
 459        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 460        if (!mrt)
 461                return ERR_PTR(-ENOENT);
 462
 463        return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
 464}
 465
 466static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 467{
 468        int n;
 469
 470        if (v == SEQ_START_TOKEN) {
 471                seq_puts(seq,
 472                         "Group                            "
 473                         "Origin                           "
 474                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 475        } else {
 476                const struct mfc6_cache *mfc = v;
 477                const struct mr_mfc_iter *it = seq->private;
 478                struct mr_table *mrt = it->mrt;
 479
 480                seq_printf(seq, "%pI6 %pI6 %-3hd",
 481                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 482                           mfc->_c.mfc_parent);
 483
 484                if (it->cache != &mrt->mfc_unres_queue) {
 485                        seq_printf(seq, " %8lu %8lu %8lu",
 486                                   mfc->_c.mfc_un.res.pkt,
 487                                   mfc->_c.mfc_un.res.bytes,
 488                                   mfc->_c.mfc_un.res.wrong_if);
 489                        for (n = mfc->_c.mfc_un.res.minvif;
 490                             n < mfc->_c.mfc_un.res.maxvif; n++) {
 491                                if (VIF_EXISTS(mrt, n) &&
 492                                    mfc->_c.mfc_un.res.ttls[n] < 255)
 493                                        seq_printf(seq,
 494                                                   " %2d:%-3d", n,
 495                                                   mfc->_c.mfc_un.res.ttls[n]);
 496                        }
 497                } else {
 498                        /* unresolved mfc_caches don't contain
 499                         * pkt, bytes and wrong_if values
 500                         */
 501                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 502                }
 503                seq_putc(seq, '\n');
 504        }
 505        return 0;
 506}
 507
 508static const struct seq_operations ipmr_mfc_seq_ops = {
 509        .start = ipmr_mfc_seq_start,
 510        .next  = mr_mfc_seq_next,
 511        .stop  = mr_mfc_seq_stop,
 512        .show  = ipmr_mfc_seq_show,
 513};
 514#endif
 515
 516#ifdef CONFIG_IPV6_PIMSM_V2
 517
 518static int pim6_rcv(struct sk_buff *skb)
 519{
 520        struct pimreghdr *pim;
 521        struct ipv6hdr   *encap;
 522        struct net_device  *reg_dev = NULL;
 523        struct net *net = dev_net(skb->dev);
 524        struct mr_table *mrt;
 525        struct flowi6 fl6 = {
 526                .flowi6_iif     = skb->dev->ifindex,
 527                .flowi6_mark    = skb->mark,
 528        };
 529        int reg_vif_num;
 530
 531        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 532                goto drop;
 533
 534        pim = (struct pimreghdr *)skb_transport_header(skb);
 535        if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
 536            (pim->flags & PIM_NULL_REGISTER) ||
 537            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 538                             sizeof(*pim), IPPROTO_PIM,
 539                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 540             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 541                goto drop;
 542
 543        /* check if the inner packet is destined to mcast group */
 544        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 545                                   sizeof(*pim));
 546
 547        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 548            encap->payload_len == 0 ||
 549            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 550                goto drop;
 551
 552        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 553                goto drop;
 554        reg_vif_num = mrt->mroute_reg_vif_num;
 555
 556        read_lock(&mrt_lock);
 557        if (reg_vif_num >= 0)
 558                reg_dev = mrt->vif_table[reg_vif_num].dev;
 559        dev_hold(reg_dev);
 560        read_unlock(&mrt_lock);
 561
 562        if (!reg_dev)
 563                goto drop;
 564
 565        skb->mac_header = skb->network_header;
 566        skb_pull(skb, (u8 *)encap - skb->data);
 567        skb_reset_network_header(skb);
 568        skb->protocol = htons(ETH_P_IPV6);
 569        skb->ip_summed = CHECKSUM_NONE;
 570
 571        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 572
 573        netif_rx(skb);
 574
 575        dev_put(reg_dev);
 576        return 0;
 577 drop:
 578        kfree_skb(skb);
 579        return 0;
 580}
 581
 582static const struct inet6_protocol pim6_protocol = {
 583        .handler        =       pim6_rcv,
 584};
 585
 586/* Service routines creating virtual interfaces: PIMREG */
 587
 588static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 589                                      struct net_device *dev)
 590{
 591        struct net *net = dev_net(dev);
 592        struct mr_table *mrt;
 593        struct flowi6 fl6 = {
 594                .flowi6_oif     = dev->ifindex,
 595                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 596                .flowi6_mark    = skb->mark,
 597        };
 598
 599        if (!pskb_inet_may_pull(skb))
 600                goto tx_err;
 601
 602        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 603                goto tx_err;
 604
 605        read_lock(&mrt_lock);
 606        dev->stats.tx_bytes += skb->len;
 607        dev->stats.tx_packets++;
 608        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 609        read_unlock(&mrt_lock);
 610        kfree_skb(skb);
 611        return NETDEV_TX_OK;
 612
 613tx_err:
 614        dev->stats.tx_errors++;
 615        kfree_skb(skb);
 616        return NETDEV_TX_OK;
 617}
 618
 619static int reg_vif_get_iflink(const struct net_device *dev)
 620{
 621        return 0;
 622}
 623
 624static const struct net_device_ops reg_vif_netdev_ops = {
 625        .ndo_start_xmit = reg_vif_xmit,
 626        .ndo_get_iflink = reg_vif_get_iflink,
 627};
 628
 629static void reg_vif_setup(struct net_device *dev)
 630{
 631        dev->type               = ARPHRD_PIMREG;
 632        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 633        dev->flags              = IFF_NOARP;
 634        dev->netdev_ops         = &reg_vif_netdev_ops;
 635        dev->needs_free_netdev  = true;
 636        dev->features           |= NETIF_F_NETNS_LOCAL;
 637}
 638
 639static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
 640{
 641        struct net_device *dev;
 642        char name[IFNAMSIZ];
 643
 644        if (mrt->id == RT6_TABLE_DFLT)
 645                sprintf(name, "pim6reg");
 646        else
 647                sprintf(name, "pim6reg%u", mrt->id);
 648
 649        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 650        if (!dev)
 651                return NULL;
 652
 653        dev_net_set(dev, net);
 654
 655        if (register_netdevice(dev)) {
 656                free_netdev(dev);
 657                return NULL;
 658        }
 659
 660        if (dev_open(dev, NULL))
 661                goto failure;
 662
 663        dev_hold(dev);
 664        return dev;
 665
 666failure:
 667        unregister_netdevice(dev);
 668        return NULL;
 669}
 670#endif
 671
 672static int call_ip6mr_vif_entry_notifiers(struct net *net,
 673                                          enum fib_event_type event_type,
 674                                          struct vif_device *vif,
 675                                          mifi_t vif_index, u32 tb_id)
 676{
 677        return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 678                                     vif, vif_index, tb_id,
 679                                     &net->ipv6.ipmr_seq);
 680}
 681
 682static int call_ip6mr_mfc_entry_notifiers(struct net *net,
 683                                          enum fib_event_type event_type,
 684                                          struct mfc6_cache *mfc, u32 tb_id)
 685{
 686        return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
 687                                     &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
 688}
 689
 690/* Delete a VIF entry */
 691static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
 692                       struct list_head *head)
 693{
 694        struct vif_device *v;
 695        struct net_device *dev;
 696        struct inet6_dev *in6_dev;
 697
 698        if (vifi < 0 || vifi >= mrt->maxvif)
 699                return -EADDRNOTAVAIL;
 700
 701        v = &mrt->vif_table[vifi];
 702
 703        if (VIF_EXISTS(mrt, vifi))
 704                call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
 705                                               FIB_EVENT_VIF_DEL, v, vifi,
 706                                               mrt->id);
 707
 708        write_lock_bh(&mrt_lock);
 709        dev = v->dev;
 710        v->dev = NULL;
 711
 712        if (!dev) {
 713                write_unlock_bh(&mrt_lock);
 714                return -EADDRNOTAVAIL;
 715        }
 716
 717#ifdef CONFIG_IPV6_PIMSM_V2
 718        if (vifi == mrt->mroute_reg_vif_num)
 719                mrt->mroute_reg_vif_num = -1;
 720#endif
 721
 722        if (vifi + 1 == mrt->maxvif) {
 723                int tmp;
 724                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 725                        if (VIF_EXISTS(mrt, tmp))
 726                                break;
 727                }
 728                mrt->maxvif = tmp + 1;
 729        }
 730
 731        write_unlock_bh(&mrt_lock);
 732
 733        dev_set_allmulti(dev, -1);
 734
 735        in6_dev = __in6_dev_get(dev);
 736        if (in6_dev) {
 737                in6_dev->cnf.mc_forwarding--;
 738                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 739                                             NETCONFA_MC_FORWARDING,
 740                                             dev->ifindex, &in6_dev->cnf);
 741        }
 742
 743        if ((v->flags & MIFF_REGISTER) && !notify)
 744                unregister_netdevice_queue(dev, head);
 745
 746        dev_put_track(dev, &v->dev_tracker);
 747        return 0;
 748}
 749
 750static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
 751{
 752        struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
 753
 754        kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
 755}
 756
 757static inline void ip6mr_cache_free(struct mfc6_cache *c)
 758{
 759        call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
 760}
 761
 762/* Destroy an unresolved cache entry, killing queued skbs
 763   and reporting error to netlink readers.
 764 */
 765
 766static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
 767{
 768        struct net *net = read_pnet(&mrt->net);
 769        struct sk_buff *skb;
 770
 771        atomic_dec(&mrt->cache_resolve_queue_len);
 772
 773        while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
 774                if (ipv6_hdr(skb)->version == 0) {
 775                        struct nlmsghdr *nlh = skb_pull(skb,
 776                                                        sizeof(struct ipv6hdr));
 777                        nlh->nlmsg_type = NLMSG_ERROR;
 778                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 779                        skb_trim(skb, nlh->nlmsg_len);
 780                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 781                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 782                } else
 783                        kfree_skb(skb);
 784        }
 785
 786        ip6mr_cache_free(c);
 787}
 788
 789
 790/* Timer process for all the unresolved queue. */
 791
 792static void ipmr_do_expire_process(struct mr_table *mrt)
 793{
 794        unsigned long now = jiffies;
 795        unsigned long expires = 10 * HZ;
 796        struct mr_mfc *c, *next;
 797
 798        list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 799                if (time_after(c->mfc_un.unres.expires, now)) {
 800                        /* not yet... */
 801                        unsigned long interval = c->mfc_un.unres.expires - now;
 802                        if (interval < expires)
 803                                expires = interval;
 804                        continue;
 805                }
 806
 807                list_del(&c->list);
 808                mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
 809                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
 810        }
 811
 812        if (!list_empty(&mrt->mfc_unres_queue))
 813                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 814}
 815
 816static void ipmr_expire_process(struct timer_list *t)
 817{
 818        struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
 819
 820        if (!spin_trylock(&mfc_unres_lock)) {
 821                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 822                return;
 823        }
 824
 825        if (!list_empty(&mrt->mfc_unres_queue))
 826                ipmr_do_expire_process(mrt);
 827
 828        spin_unlock(&mfc_unres_lock);
 829}
 830
 831/* Fill oifs list. It is called under write locked mrt_lock. */
 832
 833static void ip6mr_update_thresholds(struct mr_table *mrt,
 834                                    struct mr_mfc *cache,
 835                                    unsigned char *ttls)
 836{
 837        int vifi;
 838
 839        cache->mfc_un.res.minvif = MAXMIFS;
 840        cache->mfc_un.res.maxvif = 0;
 841        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 842
 843        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 844                if (VIF_EXISTS(mrt, vifi) &&
 845                    ttls[vifi] && ttls[vifi] < 255) {
 846                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 847                        if (cache->mfc_un.res.minvif > vifi)
 848                                cache->mfc_un.res.minvif = vifi;
 849                        if (cache->mfc_un.res.maxvif <= vifi)
 850                                cache->mfc_un.res.maxvif = vifi + 1;
 851                }
 852        }
 853        cache->mfc_un.res.lastuse = jiffies;
 854}
 855
 856static int mif6_add(struct net *net, struct mr_table *mrt,
 857                    struct mif6ctl *vifc, int mrtsock)
 858{
 859        int vifi = vifc->mif6c_mifi;
 860        struct vif_device *v = &mrt->vif_table[vifi];
 861        struct net_device *dev;
 862        struct inet6_dev *in6_dev;
 863        int err;
 864
 865        /* Is vif busy ? */
 866        if (VIF_EXISTS(mrt, vifi))
 867                return -EADDRINUSE;
 868
 869        switch (vifc->mif6c_flags) {
 870#ifdef CONFIG_IPV6_PIMSM_V2
 871        case MIFF_REGISTER:
 872                /*
 873                 * Special Purpose VIF in PIM
 874                 * All the packets will be sent to the daemon
 875                 */
 876                if (mrt->mroute_reg_vif_num >= 0)
 877                        return -EADDRINUSE;
 878                dev = ip6mr_reg_vif(net, mrt);
 879                if (!dev)
 880                        return -ENOBUFS;
 881                err = dev_set_allmulti(dev, 1);
 882                if (err) {
 883                        unregister_netdevice(dev);
 884                        dev_put(dev);
 885                        return err;
 886                }
 887                break;
 888#endif
 889        case 0:
 890                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 891                if (!dev)
 892                        return -EADDRNOTAVAIL;
 893                err = dev_set_allmulti(dev, 1);
 894                if (err) {
 895                        dev_put(dev);
 896                        return err;
 897                }
 898                break;
 899        default:
 900                return -EINVAL;
 901        }
 902
 903        in6_dev = __in6_dev_get(dev);
 904        if (in6_dev) {
 905                in6_dev->cnf.mc_forwarding++;
 906                inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
 907                                             NETCONFA_MC_FORWARDING,
 908                                             dev->ifindex, &in6_dev->cnf);
 909        }
 910
 911        /* Fill in the VIF structures */
 912        vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
 913                        vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
 914                        MIFF_REGISTER);
 915
 916        /* And finish update writing critical data */
 917        write_lock_bh(&mrt_lock);
 918        v->dev = dev;
 919        netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
 920#ifdef CONFIG_IPV6_PIMSM_V2
 921        if (v->flags & MIFF_REGISTER)
 922                mrt->mroute_reg_vif_num = vifi;
 923#endif
 924        if (vifi + 1 > mrt->maxvif)
 925                mrt->maxvif = vifi + 1;
 926        write_unlock_bh(&mrt_lock);
 927        call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
 928                                       v, vifi, mrt->id);
 929        return 0;
 930}
 931
 932static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
 933                                           const struct in6_addr *origin,
 934                                           const struct in6_addr *mcastgrp)
 935{
 936        struct mfc6_cache_cmp_arg arg = {
 937                .mf6c_origin = *origin,
 938                .mf6c_mcastgrp = *mcastgrp,
 939        };
 940
 941        return mr_mfc_find(mrt, &arg);
 942}
 943
 944/* Look for a (*,G) entry */
 945static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
 946                                               struct in6_addr *mcastgrp,
 947                                               mifi_t mifi)
 948{
 949        struct mfc6_cache_cmp_arg arg = {
 950                .mf6c_origin = in6addr_any,
 951                .mf6c_mcastgrp = *mcastgrp,
 952        };
 953
 954        if (ipv6_addr_any(mcastgrp))
 955                return mr_mfc_find_any_parent(mrt, mifi);
 956        return mr_mfc_find_any(mrt, mifi, &arg);
 957}
 958
 959/* Look for a (S,G,iif) entry if parent != -1 */
 960static struct mfc6_cache *
 961ip6mr_cache_find_parent(struct mr_table *mrt,
 962                        const struct in6_addr *origin,
 963                        const struct in6_addr *mcastgrp,
 964                        int parent)
 965{
 966        struct mfc6_cache_cmp_arg arg = {
 967                .mf6c_origin = *origin,
 968                .mf6c_mcastgrp = *mcastgrp,
 969        };
 970
 971        return mr_mfc_find_parent(mrt, &arg, parent);
 972}
 973
 974/* Allocate a multicast cache entry */
 975static struct mfc6_cache *ip6mr_cache_alloc(void)
 976{
 977        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 978        if (!c)
 979                return NULL;
 980        c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 981        c->_c.mfc_un.res.minvif = MAXMIFS;
 982        c->_c.free = ip6mr_cache_free_rcu;
 983        refcount_set(&c->_c.mfc_un.res.refcount, 1);
 984        return c;
 985}
 986
 987static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 988{
 989        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 990        if (!c)
 991                return NULL;
 992        skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
 993        c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
 994        return c;
 995}
 996
 997/*
 998 *      A cache entry has gone into a resolved state from queued
 999 */
1000
1001static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
1002                                struct mfc6_cache *uc, struct mfc6_cache *c)
1003{
1004        struct sk_buff *skb;
1005
1006        /*
1007         *      Play the pending entries through our router
1008         */
1009
1010        while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1011                if (ipv6_hdr(skb)->version == 0) {
1012                        struct nlmsghdr *nlh = skb_pull(skb,
1013                                                        sizeof(struct ipv6hdr));
1014
1015                        if (mr_fill_mroute(mrt, skb, &c->_c,
1016                                           nlmsg_data(nlh)) > 0) {
1017                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1018                        } else {
1019                                nlh->nlmsg_type = NLMSG_ERROR;
1020                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1021                                skb_trim(skb, nlh->nlmsg_len);
1022                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1023                        }
1024                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1025                } else
1026                        ip6_mr_forward(net, mrt, skb->dev, skb, c);
1027        }
1028}
1029
1030/*
1031 *      Bounce a cache query up to pim6sd and netlink.
1032 *
1033 *      Called under mrt_lock.
1034 */
1035
1036static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
1037                              mifi_t mifi, int assert)
1038{
1039        struct sock *mroute6_sk;
1040        struct sk_buff *skb;
1041        struct mrt6msg *msg;
1042        int ret;
1043
1044#ifdef CONFIG_IPV6_PIMSM_V2
1045        if (assert == MRT6MSG_WHOLEPKT)
1046                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1047                                                +sizeof(*msg));
1048        else
1049#endif
1050                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1051
1052        if (!skb)
1053                return -ENOBUFS;
1054
1055        /* I suppose that internal messages
1056         * do not require checksums */
1057
1058        skb->ip_summed = CHECKSUM_UNNECESSARY;
1059
1060#ifdef CONFIG_IPV6_PIMSM_V2
1061        if (assert == MRT6MSG_WHOLEPKT) {
1062                /* Ugly, but we have no choice with this interface.
1063                   Duplicate old header, fix length etc.
1064                   And all this only to mangle msg->im6_msgtype and
1065                   to set msg->im6_mbz to "mbz" :-)
1066                 */
1067                skb_push(skb, -skb_network_offset(pkt));
1068
1069                skb_push(skb, sizeof(*msg));
1070                skb_reset_transport_header(skb);
1071                msg = (struct mrt6msg *)skb_transport_header(skb);
1072                msg->im6_mbz = 0;
1073                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1074                msg->im6_mif = mrt->mroute_reg_vif_num;
1075                msg->im6_pad = 0;
1076                msg->im6_src = ipv6_hdr(pkt)->saddr;
1077                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1078
1079                skb->ip_summed = CHECKSUM_UNNECESSARY;
1080        } else
1081#endif
1082        {
1083        /*
1084         *      Copy the IP header
1085         */
1086
1087        skb_put(skb, sizeof(struct ipv6hdr));
1088        skb_reset_network_header(skb);
1089        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1090
1091        /*
1092         *      Add our header
1093         */
1094        skb_put(skb, sizeof(*msg));
1095        skb_reset_transport_header(skb);
1096        msg = (struct mrt6msg *)skb_transport_header(skb);
1097
1098        msg->im6_mbz = 0;
1099        msg->im6_msgtype = assert;
1100        msg->im6_mif = mifi;
1101        msg->im6_pad = 0;
1102        msg->im6_src = ipv6_hdr(pkt)->saddr;
1103        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1104
1105        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1106        skb->ip_summed = CHECKSUM_UNNECESSARY;
1107        }
1108
1109        rcu_read_lock();
1110        mroute6_sk = rcu_dereference(mrt->mroute_sk);
1111        if (!mroute6_sk) {
1112                rcu_read_unlock();
1113                kfree_skb(skb);
1114                return -EINVAL;
1115        }
1116
1117        mrt6msg_netlink_event(mrt, skb);
1118
1119        /* Deliver to user space multicast routing algorithms */
1120        ret = sock_queue_rcv_skb(mroute6_sk, skb);
1121        rcu_read_unlock();
1122        if (ret < 0) {
1123                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1124                kfree_skb(skb);
1125        }
1126
1127        return ret;
1128}
1129
1130/* Queue a packet for resolution. It gets locked cache entry! */
1131static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
1132                                  struct sk_buff *skb, struct net_device *dev)
1133{
1134        struct mfc6_cache *c;
1135        bool found = false;
1136        int err;
1137
1138        spin_lock_bh(&mfc_unres_lock);
1139        list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1140                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1141                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1142                        found = true;
1143                        break;
1144                }
1145        }
1146
1147        if (!found) {
1148                /*
1149                 *      Create a new entry if allowable
1150                 */
1151
1152                c = ip6mr_cache_alloc_unres();
1153                if (!c) {
1154                        spin_unlock_bh(&mfc_unres_lock);
1155
1156                        kfree_skb(skb);
1157                        return -ENOBUFS;
1158                }
1159
1160                /* Fill in the new cache entry */
1161                c->_c.mfc_parent = -1;
1162                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1163                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1164
1165                /*
1166                 *      Reflect first query at pim6sd
1167                 */
1168                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1169                if (err < 0) {
1170                        /* If the report failed throw the cache entry
1171                           out - Brad Parker
1172                         */
1173                        spin_unlock_bh(&mfc_unres_lock);
1174
1175                        ip6mr_cache_free(c);
1176                        kfree_skb(skb);
1177                        return err;
1178                }
1179
1180                atomic_inc(&mrt->cache_resolve_queue_len);
1181                list_add(&c->_c.list, &mrt->mfc_unres_queue);
1182                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1183
1184                ipmr_do_expire_process(mrt);
1185        }
1186
1187        /* See if we can append the packet */
1188        if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1189                kfree_skb(skb);
1190                err = -ENOBUFS;
1191        } else {
1192                if (dev) {
1193                        skb->dev = dev;
1194                        skb->skb_iif = dev->ifindex;
1195                }
1196                skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1197                err = 0;
1198        }
1199
1200        spin_unlock_bh(&mfc_unres_lock);
1201        return err;
1202}
1203
1204/*
1205 *      MFC6 cache manipulation by user space
1206 */
1207
1208static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
1209                            int parent)
1210{
1211        struct mfc6_cache *c;
1212
1213        /* The entries are added/deleted only under RTNL */
1214        rcu_read_lock();
1215        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1216                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1217        rcu_read_unlock();
1218        if (!c)
1219                return -ENOENT;
1220        rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
1221        list_del_rcu(&c->_c.list);
1222
1223        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1224                                       FIB_EVENT_ENTRY_DEL, c, mrt->id);
1225        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1226        mr_cache_put(&c->_c);
1227        return 0;
1228}
1229
1230static int ip6mr_device_event(struct notifier_block *this,
1231                              unsigned long event, void *ptr)
1232{
1233        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1234        struct net *net = dev_net(dev);
1235        struct mr_table *mrt;
1236        struct vif_device *v;
1237        int ct;
1238
1239        if (event != NETDEV_UNREGISTER)
1240                return NOTIFY_DONE;
1241
1242        ip6mr_for_each_table(mrt, net) {
1243                v = &mrt->vif_table[0];
1244                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1245                        if (v->dev == dev)
1246                                mif6_delete(mrt, ct, 1, NULL);
1247                }
1248        }
1249
1250        return NOTIFY_DONE;
1251}
1252
1253static unsigned int ip6mr_seq_read(struct net *net)
1254{
1255        ASSERT_RTNL();
1256
1257        return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
1258}
1259
1260static int ip6mr_dump(struct net *net, struct notifier_block *nb,
1261                      struct netlink_ext_ack *extack)
1262{
1263        return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
1264                       ip6mr_mr_table_iter, &mrt_lock, extack);
1265}
1266
1267static struct notifier_block ip6_mr_notifier = {
1268        .notifier_call = ip6mr_device_event
1269};
1270
1271static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
1272        .family         = RTNL_FAMILY_IP6MR,
1273        .fib_seq_read   = ip6mr_seq_read,
1274        .fib_dump       = ip6mr_dump,
1275        .owner          = THIS_MODULE,
1276};
1277
1278static int __net_init ip6mr_notifier_init(struct net *net)
1279{
1280        struct fib_notifier_ops *ops;
1281
1282        net->ipv6.ipmr_seq = 0;
1283
1284        ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
1285        if (IS_ERR(ops))
1286                return PTR_ERR(ops);
1287
1288        net->ipv6.ip6mr_notifier_ops = ops;
1289
1290        return 0;
1291}
1292
1293static void __net_exit ip6mr_notifier_exit(struct net *net)
1294{
1295        fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
1296        net->ipv6.ip6mr_notifier_ops = NULL;
1297}
1298
1299/* Setup for IP multicast routing */
1300static int __net_init ip6mr_net_init(struct net *net)
1301{
1302        int err;
1303
1304        err = ip6mr_notifier_init(net);
1305        if (err)
1306                return err;
1307
1308        err = ip6mr_rules_init(net);
1309        if (err < 0)
1310                goto ip6mr_rules_fail;
1311
1312#ifdef CONFIG_PROC_FS
1313        err = -ENOMEM;
1314        if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
1315                        sizeof(struct mr_vif_iter)))
1316                goto proc_vif_fail;
1317        if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
1318                        sizeof(struct mr_mfc_iter)))
1319                goto proc_cache_fail;
1320#endif
1321
1322        return 0;
1323
1324#ifdef CONFIG_PROC_FS
1325proc_cache_fail:
1326        remove_proc_entry("ip6_mr_vif", net->proc_net);
1327proc_vif_fail:
1328        ip6mr_rules_exit(net);
1329#endif
1330ip6mr_rules_fail:
1331        ip6mr_notifier_exit(net);
1332        return err;
1333}
1334
1335static void __net_exit ip6mr_net_exit(struct net *net)
1336{
1337#ifdef CONFIG_PROC_FS
1338        remove_proc_entry("ip6_mr_cache", net->proc_net);
1339        remove_proc_entry("ip6_mr_vif", net->proc_net);
1340#endif
1341        ip6mr_rules_exit(net);
1342        ip6mr_notifier_exit(net);
1343}
1344
1345static struct pernet_operations ip6mr_net_ops = {
1346        .init = ip6mr_net_init,
1347        .exit = ip6mr_net_exit,
1348};
1349
1350int __init ip6_mr_init(void)
1351{
1352        int err;
1353
1354        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1355                                       sizeof(struct mfc6_cache),
1356                                       0, SLAB_HWCACHE_ALIGN,
1357                                       NULL);
1358        if (!mrt_cachep)
1359                return -ENOMEM;
1360
1361        err = register_pernet_subsys(&ip6mr_net_ops);
1362        if (err)
1363                goto reg_pernet_fail;
1364
1365        err = register_netdevice_notifier(&ip6_mr_notifier);
1366        if (err)
1367                goto reg_notif_fail;
1368#ifdef CONFIG_IPV6_PIMSM_V2
1369        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1370                pr_err("%s: can't add PIM protocol\n", __func__);
1371                err = -EAGAIN;
1372                goto add_proto_fail;
1373        }
1374#endif
1375        err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
1376                                   NULL, ip6mr_rtm_dumproute, 0);
1377        if (err == 0)
1378                return 0;
1379
1380#ifdef CONFIG_IPV6_PIMSM_V2
1381        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1382add_proto_fail:
1383        unregister_netdevice_notifier(&ip6_mr_notifier);
1384#endif
1385reg_notif_fail:
1386        unregister_pernet_subsys(&ip6mr_net_ops);
1387reg_pernet_fail:
1388        kmem_cache_destroy(mrt_cachep);
1389        return err;
1390}
1391
1392void ip6_mr_cleanup(void)
1393{
1394        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1395#ifdef CONFIG_IPV6_PIMSM_V2
1396        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1397#endif
1398        unregister_netdevice_notifier(&ip6_mr_notifier);
1399        unregister_pernet_subsys(&ip6mr_net_ops);
1400        kmem_cache_destroy(mrt_cachep);
1401}
1402
1403static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
1404                         struct mf6cctl *mfc, int mrtsock, int parent)
1405{
1406        unsigned char ttls[MAXMIFS];
1407        struct mfc6_cache *uc, *c;
1408        struct mr_mfc *_uc;
1409        bool found;
1410        int i, err;
1411
1412        if (mfc->mf6cc_parent >= MAXMIFS)
1413                return -ENFILE;
1414
1415        memset(ttls, 255, MAXMIFS);
1416        for (i = 0; i < MAXMIFS; i++) {
1417                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1418                        ttls[i] = 1;
1419        }
1420
1421        /* The entries are added/deleted only under RTNL */
1422        rcu_read_lock();
1423        c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
1424                                    &mfc->mf6cc_mcastgrp.sin6_addr, parent);
1425        rcu_read_unlock();
1426        if (c) {
1427                write_lock_bh(&mrt_lock);
1428                c->_c.mfc_parent = mfc->mf6cc_parent;
1429                ip6mr_update_thresholds(mrt, &c->_c, ttls);
1430                if (!mrtsock)
1431                        c->_c.mfc_flags |= MFC_STATIC;
1432                write_unlock_bh(&mrt_lock);
1433                call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
1434                                               c, mrt->id);
1435                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1436                return 0;
1437        }
1438
1439        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1440            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1441                return -EINVAL;
1442
1443        c = ip6mr_cache_alloc();
1444        if (!c)
1445                return -ENOMEM;
1446
1447        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1448        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1449        c->_c.mfc_parent = mfc->mf6cc_parent;
1450        ip6mr_update_thresholds(mrt, &c->_c, ttls);
1451        if (!mrtsock)
1452                c->_c.mfc_flags |= MFC_STATIC;
1453
1454        err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1455                                  ip6mr_rht_params);
1456        if (err) {
1457                pr_err("ip6mr: rhtable insert error %d\n", err);
1458                ip6mr_cache_free(c);
1459                return err;
1460        }
1461        list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1462
1463        /* Check to see if we resolved a queued list. If so we
1464         * need to send on the frames and tidy up.
1465         */
1466        found = false;
1467        spin_lock_bh(&mfc_unres_lock);
1468        list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1469                uc = (struct mfc6_cache *)_uc;
1470                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1471                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1472                        list_del(&_uc->list);
1473                        atomic_dec(&mrt->cache_resolve_queue_len);
1474                        found = true;
1475                        break;
1476                }
1477        }
1478        if (list_empty(&mrt->mfc_unres_queue))
1479                del_timer(&mrt->ipmr_expire_timer);
1480        spin_unlock_bh(&mfc_unres_lock);
1481
1482        if (found) {
1483                ip6mr_cache_resolve(net, mrt, uc, c);
1484                ip6mr_cache_free(uc);
1485        }
1486        call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
1487                                       c, mrt->id);
1488        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1489        return 0;
1490}
1491
1492/*
1493 *      Close the multicast socket, and clear the vif tables etc
1494 */
1495
1496static void mroute_clean_tables(struct mr_table *mrt, int flags)
1497{
1498        struct mr_mfc *c, *tmp;
1499        LIST_HEAD(list);
1500        int i;
1501
1502        /* Shut down all active vif entries */
1503        if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
1504                for (i = 0; i < mrt->maxvif; i++) {
1505                        if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1506                             !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
1507                            (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
1508                                continue;
1509                        mif6_delete(mrt, i, 0, &list);
1510                }
1511                unregister_netdevice_many(&list);
1512        }
1513
1514        /* Wipe the cache */
1515        if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
1516                list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1517                        if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
1518                            (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
1519                                continue;
1520                        rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
1521                        list_del_rcu(&c->list);
1522                        call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
1523                                                       FIB_EVENT_ENTRY_DEL,
1524                                                       (struct mfc6_cache *)c, mrt->id);
1525                        mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
1526                        mr_cache_put(c);
1527                }
1528        }
1529
1530        if (flags & MRT6_FLUSH_MFC) {
1531                if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1532                        spin_lock_bh(&mfc_unres_lock);
1533                        list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1534                                list_del(&c->list);
1535                                mr6_netlink_event(mrt, (struct mfc6_cache *)c,
1536                                                  RTM_DELROUTE);
1537                                ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
1538                        }
1539                        spin_unlock_bh(&mfc_unres_lock);
1540                }
1541        }
1542}
1543
1544static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
1545{
1546        int err = 0;
1547        struct net *net = sock_net(sk);
1548
1549        rtnl_lock();
1550        write_lock_bh(&mrt_lock);
1551        if (rtnl_dereference(mrt->mroute_sk)) {
1552                err = -EADDRINUSE;
1553        } else {
1554                rcu_assign_pointer(mrt->mroute_sk, sk);
1555                sock_set_flag(sk, SOCK_RCU_FREE);
1556                net->ipv6.devconf_all->mc_forwarding++;
1557        }
1558        write_unlock_bh(&mrt_lock);
1559
1560        if (!err)
1561                inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1562                                             NETCONFA_MC_FORWARDING,
1563                                             NETCONFA_IFINDEX_ALL,
1564                                             net->ipv6.devconf_all);
1565        rtnl_unlock();
1566
1567        return err;
1568}
1569
1570int ip6mr_sk_done(struct sock *sk)
1571{
1572        int err = -EACCES;
1573        struct net *net = sock_net(sk);
1574        struct mr_table *mrt;
1575
1576        if (sk->sk_type != SOCK_RAW ||
1577            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1578                return err;
1579
1580        rtnl_lock();
1581        ip6mr_for_each_table(mrt, net) {
1582                if (sk == rtnl_dereference(mrt->mroute_sk)) {
1583                        write_lock_bh(&mrt_lock);
1584                        RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1585                        /* Note that mroute_sk had SOCK_RCU_FREE set,
1586                         * so the RCU grace period before sk freeing
1587                         * is guaranteed by sk_destruct()
1588                         */
1589                        net->ipv6.devconf_all->mc_forwarding--;
1590                        write_unlock_bh(&mrt_lock);
1591                        inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
1592                                                     NETCONFA_MC_FORWARDING,
1593                                                     NETCONFA_IFINDEX_ALL,
1594                                                     net->ipv6.devconf_all);
1595
1596                        mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
1597                        err = 0;
1598                        break;
1599                }
1600        }
1601        rtnl_unlock();
1602
1603        return err;
1604}
1605
1606bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
1607{
1608        struct mr_table *mrt;
1609        struct flowi6 fl6 = {
1610                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1611                .flowi6_oif     = skb->dev->ifindex,
1612                .flowi6_mark    = skb->mark,
1613        };
1614
1615        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1616                return NULL;
1617
1618        return rcu_access_pointer(mrt->mroute_sk);
1619}
1620EXPORT_SYMBOL(mroute6_is_socket);
1621
1622/*
1623 *      Socket options and virtual interface manipulation. The whole
1624 *      virtual interface system is a complete heap, but unfortunately
1625 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1626 *      MOSPF/PIM router set up we can clean this up.
1627 */
1628
1629int ip6_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1630                          unsigned int optlen)
1631{
1632        int ret, parent = 0;
1633        struct mif6ctl vif;
1634        struct mf6cctl mfc;
1635        mifi_t mifi;
1636        struct net *net = sock_net(sk);
1637        struct mr_table *mrt;
1638
1639        if (sk->sk_type != SOCK_RAW ||
1640            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1641                return -EOPNOTSUPP;
1642
1643        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1644        if (!mrt)
1645                return -ENOENT;
1646
1647        if (optname != MRT6_INIT) {
1648                if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1649                    !ns_capable(net->user_ns, CAP_NET_ADMIN))
1650                        return -EACCES;
1651        }
1652
1653        switch (optname) {
1654        case MRT6_INIT:
1655                if (optlen < sizeof(int))
1656                        return -EINVAL;
1657
1658                return ip6mr_sk_init(mrt, sk);
1659
1660        case MRT6_DONE:
1661                return ip6mr_sk_done(sk);
1662
1663        case MRT6_ADD_MIF:
1664                if (optlen < sizeof(vif))
1665                        return -EINVAL;
1666                if (copy_from_sockptr(&vif, optval, sizeof(vif)))
1667                        return -EFAULT;
1668                if (vif.mif6c_mifi >= MAXMIFS)
1669                        return -ENFILE;
1670                rtnl_lock();
1671                ret = mif6_add(net, mrt, &vif,
1672                               sk == rtnl_dereference(mrt->mroute_sk));
1673                rtnl_unlock();
1674                return ret;
1675
1676        case MRT6_DEL_MIF:
1677                if (optlen < sizeof(mifi_t))
1678                        return -EINVAL;
1679                if (copy_from_sockptr(&mifi, optval, sizeof(mifi_t)))
1680                        return -EFAULT;
1681                rtnl_lock();
1682                ret = mif6_delete(mrt, mifi, 0, NULL);
1683                rtnl_unlock();
1684                return ret;
1685
1686        /*
1687         *      Manipulate the forwarding caches. These live
1688         *      in a sort of kernel/user symbiosis.
1689         */
1690        case MRT6_ADD_MFC:
1691        case MRT6_DEL_MFC:
1692                parent = -1;
1693                fallthrough;
1694        case MRT6_ADD_MFC_PROXY:
1695        case MRT6_DEL_MFC_PROXY:
1696                if (optlen < sizeof(mfc))
1697                        return -EINVAL;
1698                if (copy_from_sockptr(&mfc, optval, sizeof(mfc)))
1699                        return -EFAULT;
1700                if (parent == 0)
1701                        parent = mfc.mf6cc_parent;
1702                rtnl_lock();
1703                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1704                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1705                else
1706                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1707                                            sk ==
1708                                            rtnl_dereference(mrt->mroute_sk),
1709                                            parent);
1710                rtnl_unlock();
1711                return ret;
1712
1713        case MRT6_FLUSH:
1714        {
1715                int flags;
1716
1717                if (optlen != sizeof(flags))
1718                        return -EINVAL;
1719                if (copy_from_sockptr(&flags, optval, sizeof(flags)))
1720                        return -EFAULT;
1721                rtnl_lock();
1722                mroute_clean_tables(mrt, flags);
1723                rtnl_unlock();
1724                return 0;
1725        }
1726
1727        /*
1728         *      Control PIM assert (to activate pim will activate assert)
1729         */
1730        case MRT6_ASSERT:
1731        {
1732                int v;
1733
1734                if (optlen != sizeof(v))
1735                        return -EINVAL;
1736                if (copy_from_sockptr(&v, optval, sizeof(v)))
1737                        return -EFAULT;
1738                mrt->mroute_do_assert = v;
1739                return 0;
1740        }
1741
1742#ifdef CONFIG_IPV6_PIMSM_V2
1743        case MRT6_PIM:
1744        {
1745                int v;
1746
1747                if (optlen != sizeof(v))
1748                        return -EINVAL;
1749                if (copy_from_sockptr(&v, optval, sizeof(v)))
1750                        return -EFAULT;
1751                v = !!v;
1752                rtnl_lock();
1753                ret = 0;
1754                if (v != mrt->mroute_do_pim) {
1755                        mrt->mroute_do_pim = v;
1756                        mrt->mroute_do_assert = v;
1757                }
1758                rtnl_unlock();
1759                return ret;
1760        }
1761
1762#endif
1763#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1764        case MRT6_TABLE:
1765        {
1766                u32 v;
1767
1768                if (optlen != sizeof(u32))
1769                        return -EINVAL;
1770                if (copy_from_sockptr(&v, optval, sizeof(v)))
1771                        return -EFAULT;
1772                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1773                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1774                        return -EINVAL;
1775                if (sk == rcu_access_pointer(mrt->mroute_sk))
1776                        return -EBUSY;
1777
1778                rtnl_lock();
1779                ret = 0;
1780                mrt = ip6mr_new_table(net, v);
1781                if (IS_ERR(mrt))
1782                        ret = PTR_ERR(mrt);
1783                else
1784                        raw6_sk(sk)->ip6mr_table = v;
1785                rtnl_unlock();
1786                return ret;
1787        }
1788#endif
1789        /*
1790         *      Spurious command, or MRT6_VERSION which you cannot
1791         *      set.
1792         */
1793        default:
1794                return -ENOPROTOOPT;
1795        }
1796}
1797
1798/*
1799 *      Getsock opt support for the multicast routing system.
1800 */
1801
1802int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1803                          int __user *optlen)
1804{
1805        int olr;
1806        int val;
1807        struct net *net = sock_net(sk);
1808        struct mr_table *mrt;
1809
1810        if (sk->sk_type != SOCK_RAW ||
1811            inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1812                return -EOPNOTSUPP;
1813
1814        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1815        if (!mrt)
1816                return -ENOENT;
1817
1818        switch (optname) {
1819        case MRT6_VERSION:
1820                val = 0x0305;
1821                break;
1822#ifdef CONFIG_IPV6_PIMSM_V2
1823        case MRT6_PIM:
1824                val = mrt->mroute_do_pim;
1825                break;
1826#endif
1827        case MRT6_ASSERT:
1828                val = mrt->mroute_do_assert;
1829                break;
1830        default:
1831                return -ENOPROTOOPT;
1832        }
1833
1834        if (get_user(olr, optlen))
1835                return -EFAULT;
1836
1837        olr = min_t(int, olr, sizeof(int));
1838        if (olr < 0)
1839                return -EINVAL;
1840
1841        if (put_user(olr, optlen))
1842                return -EFAULT;
1843        if (copy_to_user(optval, &val, olr))
1844                return -EFAULT;
1845        return 0;
1846}
1847
1848/*
1849 *      The IP multicast ioctl support routines.
1850 */
1851
1852int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1853{
1854        struct sioc_sg_req6 sr;
1855        struct sioc_mif_req6 vr;
1856        struct vif_device *vif;
1857        struct mfc6_cache *c;
1858        struct net *net = sock_net(sk);
1859        struct mr_table *mrt;
1860
1861        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1862        if (!mrt)
1863                return -ENOENT;
1864
1865        switch (cmd) {
1866        case SIOCGETMIFCNT_IN6:
1867                if (copy_from_user(&vr, arg, sizeof(vr)))
1868                        return -EFAULT;
1869                if (vr.mifi >= mrt->maxvif)
1870                        return -EINVAL;
1871                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1872                read_lock(&mrt_lock);
1873                vif = &mrt->vif_table[vr.mifi];
1874                if (VIF_EXISTS(mrt, vr.mifi)) {
1875                        vr.icount = vif->pkt_in;
1876                        vr.ocount = vif->pkt_out;
1877                        vr.ibytes = vif->bytes_in;
1878                        vr.obytes = vif->bytes_out;
1879                        read_unlock(&mrt_lock);
1880
1881                        if (copy_to_user(arg, &vr, sizeof(vr)))
1882                                return -EFAULT;
1883                        return 0;
1884                }
1885                read_unlock(&mrt_lock);
1886                return -EADDRNOTAVAIL;
1887        case SIOCGETSGCNT_IN6:
1888                if (copy_from_user(&sr, arg, sizeof(sr)))
1889                        return -EFAULT;
1890
1891                rcu_read_lock();
1892                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1893                if (c) {
1894                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1895                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1896                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1897                        rcu_read_unlock();
1898
1899                        if (copy_to_user(arg, &sr, sizeof(sr)))
1900                                return -EFAULT;
1901                        return 0;
1902                }
1903                rcu_read_unlock();
1904                return -EADDRNOTAVAIL;
1905        default:
1906                return -ENOIOCTLCMD;
1907        }
1908}
1909
1910#ifdef CONFIG_COMPAT
1911struct compat_sioc_sg_req6 {
1912        struct sockaddr_in6 src;
1913        struct sockaddr_in6 grp;
1914        compat_ulong_t pktcnt;
1915        compat_ulong_t bytecnt;
1916        compat_ulong_t wrong_if;
1917};
1918
1919struct compat_sioc_mif_req6 {
1920        mifi_t  mifi;
1921        compat_ulong_t icount;
1922        compat_ulong_t ocount;
1923        compat_ulong_t ibytes;
1924        compat_ulong_t obytes;
1925};
1926
1927int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1928{
1929        struct compat_sioc_sg_req6 sr;
1930        struct compat_sioc_mif_req6 vr;
1931        struct vif_device *vif;
1932        struct mfc6_cache *c;
1933        struct net *net = sock_net(sk);
1934        struct mr_table *mrt;
1935
1936        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1937        if (!mrt)
1938                return -ENOENT;
1939
1940        switch (cmd) {
1941        case SIOCGETMIFCNT_IN6:
1942                if (copy_from_user(&vr, arg, sizeof(vr)))
1943                        return -EFAULT;
1944                if (vr.mifi >= mrt->maxvif)
1945                        return -EINVAL;
1946                vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1947                read_lock(&mrt_lock);
1948                vif = &mrt->vif_table[vr.mifi];
1949                if (VIF_EXISTS(mrt, vr.mifi)) {
1950                        vr.icount = vif->pkt_in;
1951                        vr.ocount = vif->pkt_out;
1952                        vr.ibytes = vif->bytes_in;
1953                        vr.obytes = vif->bytes_out;
1954                        read_unlock(&mrt_lock);
1955
1956                        if (copy_to_user(arg, &vr, sizeof(vr)))
1957                                return -EFAULT;
1958                        return 0;
1959                }
1960                read_unlock(&mrt_lock);
1961                return -EADDRNOTAVAIL;
1962        case SIOCGETSGCNT_IN6:
1963                if (copy_from_user(&sr, arg, sizeof(sr)))
1964                        return -EFAULT;
1965
1966                rcu_read_lock();
1967                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1968                if (c) {
1969                        sr.pktcnt = c->_c.mfc_un.res.pkt;
1970                        sr.bytecnt = c->_c.mfc_un.res.bytes;
1971                        sr.wrong_if = c->_c.mfc_un.res.wrong_if;
1972                        rcu_read_unlock();
1973
1974                        if (copy_to_user(arg, &sr, sizeof(sr)))
1975                                return -EFAULT;
1976                        return 0;
1977                }
1978                rcu_read_unlock();
1979                return -EADDRNOTAVAIL;
1980        default:
1981                return -ENOIOCTLCMD;
1982        }
1983}
1984#endif
1985
1986static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1987{
1988        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1989                      IPSTATS_MIB_OUTFORWDATAGRAMS);
1990        IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1991                      IPSTATS_MIB_OUTOCTETS, skb->len);
1992        return dst_output(net, sk, skb);
1993}
1994
1995/*
1996 *      Processing handlers for ip6mr_forward
1997 */
1998
1999static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
2000                          struct sk_buff *skb, int vifi)
2001{
2002        struct ipv6hdr *ipv6h;
2003        struct vif_device *vif = &mrt->vif_table[vifi];
2004        struct net_device *dev;
2005        struct dst_entry *dst;
2006        struct flowi6 fl6;
2007
2008        if (!vif->dev)
2009                goto out_free;
2010
2011#ifdef CONFIG_IPV6_PIMSM_V2
2012        if (vif->flags & MIFF_REGISTER) {
2013                vif->pkt_out++;
2014                vif->bytes_out += skb->len;
2015                vif->dev->stats.tx_bytes += skb->len;
2016                vif->dev->stats.tx_packets++;
2017                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2018                goto out_free;
2019        }
2020#endif
2021
2022        ipv6h = ipv6_hdr(skb);
2023
2024        fl6 = (struct flowi6) {
2025                .flowi6_oif = vif->link,
2026                .daddr = ipv6h->daddr,
2027        };
2028
2029        dst = ip6_route_output(net, NULL, &fl6);
2030        if (dst->error) {
2031                dst_release(dst);
2032                goto out_free;
2033        }
2034
2035        skb_dst_drop(skb);
2036        skb_dst_set(skb, dst);
2037
2038        /*
2039         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2040         * not only before forwarding, but after forwarding on all output
2041         * interfaces. It is clear, if mrouter runs a multicasting
2042         * program, it should receive packets not depending to what interface
2043         * program is joined.
2044         * If we will not make it, the program will have to join on all
2045         * interfaces. On the other hand, multihoming host (or router, but
2046         * not mrouter) cannot join to more than one interface - it will
2047         * result in receiving multiple packets.
2048         */
2049        dev = vif->dev;
2050        skb->dev = dev;
2051        vif->pkt_out++;
2052        vif->bytes_out += skb->len;
2053
2054        /* We are about to write */
2055        /* XXX: extension headers? */
2056        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2057                goto out_free;
2058
2059        ipv6h = ipv6_hdr(skb);
2060        ipv6h->hop_limit--;
2061
2062        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2063
2064        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2065                       net, NULL, skb, skb->dev, dev,
2066                       ip6mr_forward2_finish);
2067
2068out_free:
2069        kfree_skb(skb);
2070        return 0;
2071}
2072
2073static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
2074{
2075        int ct;
2076
2077        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2078                if (mrt->vif_table[ct].dev == dev)
2079                        break;
2080        }
2081        return ct;
2082}
2083
2084static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
2085                           struct net_device *dev, struct sk_buff *skb,
2086                           struct mfc6_cache *c)
2087{
2088        int psend = -1;
2089        int vif, ct;
2090        int true_vifi = ip6mr_find_vif(mrt, dev);
2091
2092        vif = c->_c.mfc_parent;
2093        c->_c.mfc_un.res.pkt++;
2094        c->_c.mfc_un.res.bytes += skb->len;
2095        c->_c.mfc_un.res.lastuse = jiffies;
2096
2097        if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
2098                struct mfc6_cache *cache_proxy;
2099
2100                /* For an (*,G) entry, we only check that the incoming
2101                 * interface is part of the static tree.
2102                 */
2103                rcu_read_lock();
2104                cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2105                if (cache_proxy &&
2106                    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
2107                        rcu_read_unlock();
2108                        goto forward;
2109                }
2110                rcu_read_unlock();
2111        }
2112
2113        /*
2114         * Wrong interface: drop packet and (maybe) send PIM assert.
2115         */
2116        if (mrt->vif_table[vif].dev != dev) {
2117                c->_c.mfc_un.res.wrong_if++;
2118
2119                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2120                    /* pimsm uses asserts, when switching from RPT to SPT,
2121                       so that we cannot check that packet arrived on an oif.
2122                       It is bad, but otherwise we would need to move pretty
2123                       large chunk of pimd to kernel. Ough... --ANK
2124                     */
2125                    (mrt->mroute_do_pim ||
2126                     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2127                    time_after(jiffies,
2128                               c->_c.mfc_un.res.last_assert +
2129                               MFC_ASSERT_THRESH)) {
2130                        c->_c.mfc_un.res.last_assert = jiffies;
2131                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2132                }
2133                goto dont_forward;
2134        }
2135
2136forward:
2137        mrt->vif_table[vif].pkt_in++;
2138        mrt->vif_table[vif].bytes_in += skb->len;
2139
2140        /*
2141         *      Forward the frame
2142         */
2143        if (ipv6_addr_any(&c->mf6c_origin) &&
2144            ipv6_addr_any(&c->mf6c_mcastgrp)) {
2145                if (true_vifi >= 0 &&
2146                    true_vifi != c->_c.mfc_parent &&
2147                    ipv6_hdr(skb)->hop_limit >
2148                                c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2149                        /* It's an (*,*) entry and the packet is not coming from
2150                         * the upstream: forward the packet to the upstream
2151                         * only.
2152                         */
2153                        psend = c->_c.mfc_parent;
2154                        goto last_forward;
2155                }
2156                goto dont_forward;
2157        }
2158        for (ct = c->_c.mfc_un.res.maxvif - 1;
2159             ct >= c->_c.mfc_un.res.minvif; ct--) {
2160                /* For (*,G) entry, don't forward to the incoming interface */
2161                if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
2162                    ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
2163                        if (psend != -1) {
2164                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2165                                if (skb2)
2166                                        ip6mr_forward2(net, mrt, skb2, psend);
2167                        }
2168                        psend = ct;
2169                }
2170        }
2171last_forward:
2172        if (psend != -1) {
2173                ip6mr_forward2(net, mrt, skb, psend);
2174                return;
2175        }
2176
2177dont_forward:
2178        kfree_skb(skb);
2179}
2180
2181
2182/*
2183 *      Multicast packets for forwarding arrive here
2184 */
2185
2186int ip6_mr_input(struct sk_buff *skb)
2187{
2188        struct mfc6_cache *cache;
2189        struct net *net = dev_net(skb->dev);
2190        struct mr_table *mrt;
2191        struct flowi6 fl6 = {
2192                .flowi6_iif     = skb->dev->ifindex,
2193                .flowi6_mark    = skb->mark,
2194        };
2195        int err;
2196        struct net_device *dev;
2197
2198        /* skb->dev passed in is the master dev for vrfs.
2199         * Get the proper interface that does have a vif associated with it.
2200         */
2201        dev = skb->dev;
2202        if (netif_is_l3_master(skb->dev)) {
2203                dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2204                if (!dev) {
2205                        kfree_skb(skb);
2206                        return -ENODEV;
2207                }
2208        }
2209
2210        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2211        if (err < 0) {
2212                kfree_skb(skb);
2213                return err;
2214        }
2215
2216        read_lock(&mrt_lock);
2217        cache = ip6mr_cache_find(mrt,
2218                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2219        if (!cache) {
2220                int vif = ip6mr_find_vif(mrt, dev);
2221
2222                if (vif >= 0)
2223                        cache = ip6mr_cache_find_any(mrt,
2224                                                     &ipv6_hdr(skb)->daddr,
2225                                                     vif);
2226        }
2227
2228        /*
2229         *      No usable cache entry
2230         */
2231        if (!cache) {
2232                int vif;
2233
2234                vif = ip6mr_find_vif(mrt, dev);
2235                if (vif >= 0) {
2236                        int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
2237                        read_unlock(&mrt_lock);
2238
2239                        return err;
2240                }
2241                read_unlock(&mrt_lock);
2242                kfree_skb(skb);
2243                return -ENODEV;
2244        }
2245
2246        ip6_mr_forward(net, mrt, dev, skb, cache);
2247
2248        read_unlock(&mrt_lock);
2249
2250        return 0;
2251}
2252
2253int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2254                    u32 portid)
2255{
2256        int err;
2257        struct mr_table *mrt;
2258        struct mfc6_cache *cache;
2259        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2260
2261        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2262        if (!mrt)
2263                return -ENOENT;
2264
2265        read_lock(&mrt_lock);
2266        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2267        if (!cache && skb->dev) {
2268                int vif = ip6mr_find_vif(mrt, skb->dev);
2269
2270                if (vif >= 0)
2271                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2272                                                     vif);
2273        }
2274
2275        if (!cache) {
2276                struct sk_buff *skb2;
2277                struct ipv6hdr *iph;
2278                struct net_device *dev;
2279                int vif;
2280
2281                dev = skb->dev;
2282                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2283                        read_unlock(&mrt_lock);
2284                        return -ENODEV;
2285                }
2286
2287                /* really correct? */
2288                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2289                if (!skb2) {
2290                        read_unlock(&mrt_lock);
2291                        return -ENOMEM;
2292                }
2293
2294                NETLINK_CB(skb2).portid = portid;
2295                skb_reset_transport_header(skb2);
2296
2297                skb_put(skb2, sizeof(struct ipv6hdr));
2298                skb_reset_network_header(skb2);
2299
2300                iph = ipv6_hdr(skb2);
2301                iph->version = 0;
2302                iph->priority = 0;
2303                iph->flow_lbl[0] = 0;
2304                iph->flow_lbl[1] = 0;
2305                iph->flow_lbl[2] = 0;
2306                iph->payload_len = 0;
2307                iph->nexthdr = IPPROTO_NONE;
2308                iph->hop_limit = 0;
2309                iph->saddr = rt->rt6i_src.addr;
2310                iph->daddr = rt->rt6i_dst.addr;
2311
2312                err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
2313                read_unlock(&mrt_lock);
2314
2315                return err;
2316        }
2317
2318        err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2319        read_unlock(&mrt_lock);
2320        return err;
2321}
2322
2323static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2324                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2325                             int flags)
2326{
2327        struct nlmsghdr *nlh;
2328        struct rtmsg *rtm;
2329        int err;
2330
2331        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2332        if (!nlh)
2333                return -EMSGSIZE;
2334
2335        rtm = nlmsg_data(nlh);
2336        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2337        rtm->rtm_dst_len  = 128;
2338        rtm->rtm_src_len  = 128;
2339        rtm->rtm_tos      = 0;
2340        rtm->rtm_table    = mrt->id;
2341        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2342                goto nla_put_failure;
2343        rtm->rtm_type = RTN_MULTICAST;
2344        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2345        if (c->_c.mfc_flags & MFC_STATIC)
2346                rtm->rtm_protocol = RTPROT_STATIC;
2347        else
2348                rtm->rtm_protocol = RTPROT_MROUTED;
2349        rtm->rtm_flags    = 0;
2350
2351        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2352            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2353                goto nla_put_failure;
2354        err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2355        /* do not break the dump if cache is unresolved */
2356        if (err < 0 && err != -ENOENT)
2357                goto nla_put_failure;
2358
2359        nlmsg_end(skb, nlh);
2360        return 0;
2361
2362nla_put_failure:
2363        nlmsg_cancel(skb, nlh);
2364        return -EMSGSIZE;
2365}
2366
2367static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2368                              u32 portid, u32 seq, struct mr_mfc *c,
2369                              int cmd, int flags)
2370{
2371        return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
2372                                 cmd, flags);
2373}
2374
2375static int mr6_msgsize(bool unresolved, int maxvif)
2376{
2377        size_t len =
2378                NLMSG_ALIGN(sizeof(struct rtmsg))
2379                + nla_total_size(4)     /* RTA_TABLE */
2380                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2381                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2382                ;
2383
2384        if (!unresolved)
2385                len = len
2386                      + nla_total_size(4)       /* RTA_IIF */
2387                      + nla_total_size(0)       /* RTA_MULTIPATH */
2388                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2389                                                /* RTA_MFC_STATS */
2390                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2391                ;
2392
2393        return len;
2394}
2395
2396static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
2397                              int cmd)
2398{
2399        struct net *net = read_pnet(&mrt->net);
2400        struct sk_buff *skb;
2401        int err = -ENOBUFS;
2402
2403        skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
2404                        GFP_ATOMIC);
2405        if (!skb)
2406                goto errout;
2407
2408        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2409        if (err < 0)
2410                goto errout;
2411
2412        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2413        return;
2414
2415errout:
2416        kfree_skb(skb);
2417        if (err < 0)
2418                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2419}
2420
2421static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
2422{
2423        size_t len =
2424                NLMSG_ALIGN(sizeof(struct rtgenmsg))
2425                + nla_total_size(1)     /* IP6MRA_CREPORT_MSGTYPE */
2426                + nla_total_size(4)     /* IP6MRA_CREPORT_MIF_ID */
2427                                        /* IP6MRA_CREPORT_SRC_ADDR */
2428                + nla_total_size(sizeof(struct in6_addr))
2429                                        /* IP6MRA_CREPORT_DST_ADDR */
2430                + nla_total_size(sizeof(struct in6_addr))
2431                                        /* IP6MRA_CREPORT_PKT */
2432                + nla_total_size(payloadlen)
2433                ;
2434
2435        return len;
2436}
2437
2438static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
2439{
2440        struct net *net = read_pnet(&mrt->net);
2441        struct nlmsghdr *nlh;
2442        struct rtgenmsg *rtgenm;
2443        struct mrt6msg *msg;
2444        struct sk_buff *skb;
2445        struct nlattr *nla;
2446        int payloadlen;
2447
2448        payloadlen = pkt->len - sizeof(struct mrt6msg);
2449        msg = (struct mrt6msg *)skb_transport_header(pkt);
2450
2451        skb = nlmsg_new(mrt6msg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2452        if (!skb)
2453                goto errout;
2454
2455        nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2456                        sizeof(struct rtgenmsg), 0);
2457        if (!nlh)
2458                goto errout;
2459        rtgenm = nlmsg_data(nlh);
2460        rtgenm->rtgen_family = RTNL_FAMILY_IP6MR;
2461        if (nla_put_u8(skb, IP6MRA_CREPORT_MSGTYPE, msg->im6_msgtype) ||
2462            nla_put_u32(skb, IP6MRA_CREPORT_MIF_ID, msg->im6_mif) ||
2463            nla_put_in6_addr(skb, IP6MRA_CREPORT_SRC_ADDR,
2464                             &msg->im6_src) ||
2465            nla_put_in6_addr(skb, IP6MRA_CREPORT_DST_ADDR,
2466                             &msg->im6_dst))
2467                goto nla_put_failure;
2468
2469        nla = nla_reserve(skb, IP6MRA_CREPORT_PKT, payloadlen);
2470        if (!nla || skb_copy_bits(pkt, sizeof(struct mrt6msg),
2471                                  nla_data(nla), payloadlen))
2472                goto nla_put_failure;
2473
2474        nlmsg_end(skb, nlh);
2475
2476        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE_R, NULL, GFP_ATOMIC);
2477        return;
2478
2479nla_put_failure:
2480        nlmsg_cancel(skb, nlh);
2481errout:
2482        kfree_skb(skb);
2483        rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE_R, -ENOBUFS);
2484}
2485
2486static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2487{
2488        const struct nlmsghdr *nlh = cb->nlh;
2489        struct fib_dump_filter filter = {};
2490        int err;
2491
2492        if (cb->strict_check) {
2493                err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
2494                                            &filter, cb);
2495                if (err < 0)
2496                        return err;
2497        }
2498
2499        if (filter.table_id) {
2500                struct mr_table *mrt;
2501
2502                mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
2503                if (!mrt) {
2504                        if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
2505                                return skb->len;
2506
2507                        NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
2508                        return -ENOENT;
2509                }
2510                err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
2511                                    &mfc_unres_lock, &filter);
2512                return skb->len ? : err;
2513        }
2514
2515        return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
2516                                _ip6mr_fill_mroute, &mfc_unres_lock, &filter);
2517}
2518