linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55#include <linux/netconf.h>
  56
  57struct mr6_table {
  58        struct list_head        list;
  59        possible_net_t          net;
  60        u32                     id;
  61        struct sock             *mroute6_sk;
  62        struct timer_list       ipmr_expire_timer;
  63        struct list_head        mfc6_unres_queue;
  64        struct list_head        mfc6_cache_array[MFC6_LINES];
  65        struct mif_device       vif6_table[MAXMIFS];
  66        int                     maxvif;
  67        atomic_t                cache_resolve_queue_len;
  68        bool                    mroute_do_assert;
  69        bool                    mroute_do_pim;
  70#ifdef CONFIG_IPV6_PIMSM_V2
  71        int                     mroute_reg_vif_num;
  72#endif
  73};
  74
  75struct ip6mr_rule {
  76        struct fib_rule         common;
  77};
  78
  79struct ip6mr_result {
  80        struct mr6_table        *mrt;
  81};
  82
  83/* Big lock, protecting vif table, mrt cache and mroute socket state.
  84   Note that the changes are semaphored via rtnl_lock.
  85 */
  86
  87static DEFINE_RWLOCK(mrt_lock);
  88
  89/*
  90 *      Multicast router control variables
  91 */
  92
  93#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  94
  95/* Special spinlock for queue of unresolved entries */
  96static DEFINE_SPINLOCK(mfc_unres_lock);
  97
  98/* We return to original Alan's scheme. Hash table of resolved
  99   entries is changed only in process context and protected
 100   with weak lock mrt_lock. Queue of unresolved entries is protected
 101   with strong spinlock mfc_unres_lock.
 102
 103   In this case data path is free of exclusive locks at all.
 104 */
 105
 106static struct kmem_cache *mrt_cachep __read_mostly;
 107
 108static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 109static void ip6mr_free_table(struct mr6_table *mrt);
 110
 111static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 112                           struct sk_buff *skb, struct mfc6_cache *cache);
 113static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 114                              mifi_t mifi, int assert);
 115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 116                               struct mfc6_cache *c, struct rtmsg *rtm);
 117static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 118                              int cmd);
 119static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 120                               struct netlink_callback *cb);
 121static void mroute_clean_tables(struct mr6_table *mrt, bool all);
 122static void ipmr_expire_process(unsigned long arg);
 123
 124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 125#define ip6mr_for_each_table(mrt, net) \
 126        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 127
 128static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 129{
 130        struct mr6_table *mrt;
 131
 132        ip6mr_for_each_table(mrt, net) {
 133                if (mrt->id == id)
 134                        return mrt;
 135        }
 136        return NULL;
 137}
 138
 139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 140                            struct mr6_table **mrt)
 141{
 142        int err;
 143        struct ip6mr_result res;
 144        struct fib_lookup_arg arg = {
 145                .result = &res,
 146                .flags = FIB_LOOKUP_NOREF,
 147        };
 148
 149        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 150                               flowi6_to_flowi(flp6), 0, &arg);
 151        if (err < 0)
 152                return err;
 153        *mrt = res.mrt;
 154        return 0;
 155}
 156
 157static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 158                             int flags, struct fib_lookup_arg *arg)
 159{
 160        struct ip6mr_result *res = arg->result;
 161        struct mr6_table *mrt;
 162
 163        switch (rule->action) {
 164        case FR_ACT_TO_TBL:
 165                break;
 166        case FR_ACT_UNREACHABLE:
 167                return -ENETUNREACH;
 168        case FR_ACT_PROHIBIT:
 169                return -EACCES;
 170        case FR_ACT_BLACKHOLE:
 171        default:
 172                return -EINVAL;
 173        }
 174
 175        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 176        if (!mrt)
 177                return -EAGAIN;
 178        res->mrt = mrt;
 179        return 0;
 180}
 181
 182static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 183{
 184        return 1;
 185}
 186
 187static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 188        FRA_GENERIC_POLICY,
 189};
 190
 191static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 192                                struct fib_rule_hdr *frh, struct nlattr **tb)
 193{
 194        return 0;
 195}
 196
 197static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 198                              struct nlattr **tb)
 199{
 200        return 1;
 201}
 202
 203static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 204                           struct fib_rule_hdr *frh)
 205{
 206        frh->dst_len = 0;
 207        frh->src_len = 0;
 208        frh->tos     = 0;
 209        return 0;
 210}
 211
 212static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 213        .family         = RTNL_FAMILY_IP6MR,
 214        .rule_size      = sizeof(struct ip6mr_rule),
 215        .addr_size      = sizeof(struct in6_addr),
 216        .action         = ip6mr_rule_action,
 217        .match          = ip6mr_rule_match,
 218        .configure      = ip6mr_rule_configure,
 219        .compare        = ip6mr_rule_compare,
 220        .fill           = ip6mr_rule_fill,
 221        .nlgroup        = RTNLGRP_IPV6_RULE,
 222        .policy         = ip6mr_rule_policy,
 223        .owner          = THIS_MODULE,
 224};
 225
 226static int __net_init ip6mr_rules_init(struct net *net)
 227{
 228        struct fib_rules_ops *ops;
 229        struct mr6_table *mrt;
 230        int err;
 231
 232        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 233        if (IS_ERR(ops))
 234                return PTR_ERR(ops);
 235
 236        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 237
 238        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 239        if (!mrt) {
 240                err = -ENOMEM;
 241                goto err1;
 242        }
 243
 244        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 245        if (err < 0)
 246                goto err2;
 247
 248        net->ipv6.mr6_rules_ops = ops;
 249        return 0;
 250
 251err2:
 252        ip6mr_free_table(mrt);
 253err1:
 254        fib_rules_unregister(ops);
 255        return err;
 256}
 257
 258static void __net_exit ip6mr_rules_exit(struct net *net)
 259{
 260        struct mr6_table *mrt, *next;
 261
 262        rtnl_lock();
 263        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 264                list_del(&mrt->list);
 265                ip6mr_free_table(mrt);
 266        }
 267        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 268        rtnl_unlock();
 269}
 270#else
 271#define ip6mr_for_each_table(mrt, net) \
 272        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 273
 274static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 275{
 276        return net->ipv6.mrt6;
 277}
 278
 279static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 280                            struct mr6_table **mrt)
 281{
 282        *mrt = net->ipv6.mrt6;
 283        return 0;
 284}
 285
 286static int __net_init ip6mr_rules_init(struct net *net)
 287{
 288        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 289        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 290}
 291
 292static void __net_exit ip6mr_rules_exit(struct net *net)
 293{
 294        rtnl_lock();
 295        ip6mr_free_table(net->ipv6.mrt6);
 296        net->ipv6.mrt6 = NULL;
 297        rtnl_unlock();
 298}
 299#endif
 300
 301static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 302{
 303        struct mr6_table *mrt;
 304        unsigned int i;
 305
 306        mrt = ip6mr_get_table(net, id);
 307        if (mrt)
 308                return mrt;
 309
 310        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 311        if (!mrt)
 312                return NULL;
 313        mrt->id = id;
 314        write_pnet(&mrt->net, net);
 315
 316        /* Forwarding cache */
 317        for (i = 0; i < MFC6_LINES; i++)
 318                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 319
 320        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 321
 322        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 323                    (unsigned long)mrt);
 324
 325#ifdef CONFIG_IPV6_PIMSM_V2
 326        mrt->mroute_reg_vif_num = -1;
 327#endif
 328#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 329        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 330#endif
 331        return mrt;
 332}
 333
 334static void ip6mr_free_table(struct mr6_table *mrt)
 335{
 336        del_timer_sync(&mrt->ipmr_expire_timer);
 337        mroute_clean_tables(mrt, true);
 338        kfree(mrt);
 339}
 340
 341#ifdef CONFIG_PROC_FS
 342
 343struct ipmr_mfc_iter {
 344        struct seq_net_private p;
 345        struct mr6_table *mrt;
 346        struct list_head *cache;
 347        int ct;
 348};
 349
 350
 351static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 352                                           struct ipmr_mfc_iter *it, loff_t pos)
 353{
 354        struct mr6_table *mrt = it->mrt;
 355        struct mfc6_cache *mfc;
 356
 357        read_lock(&mrt_lock);
 358        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 359                it->cache = &mrt->mfc6_cache_array[it->ct];
 360                list_for_each_entry(mfc, it->cache, list)
 361                        if (pos-- == 0)
 362                                return mfc;
 363        }
 364        read_unlock(&mrt_lock);
 365
 366        spin_lock_bh(&mfc_unres_lock);
 367        it->cache = &mrt->mfc6_unres_queue;
 368        list_for_each_entry(mfc, it->cache, list)
 369                if (pos-- == 0)
 370                        return mfc;
 371        spin_unlock_bh(&mfc_unres_lock);
 372
 373        it->cache = NULL;
 374        return NULL;
 375}
 376
 377/*
 378 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 379 */
 380
 381struct ipmr_vif_iter {
 382        struct seq_net_private p;
 383        struct mr6_table *mrt;
 384        int ct;
 385};
 386
 387static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 388                                            struct ipmr_vif_iter *iter,
 389                                            loff_t pos)
 390{
 391        struct mr6_table *mrt = iter->mrt;
 392
 393        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 394                if (!MIF_EXISTS(mrt, iter->ct))
 395                        continue;
 396                if (pos-- == 0)
 397                        return &mrt->vif6_table[iter->ct];
 398        }
 399        return NULL;
 400}
 401
 402static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 403        __acquires(mrt_lock)
 404{
 405        struct ipmr_vif_iter *iter = seq->private;
 406        struct net *net = seq_file_net(seq);
 407        struct mr6_table *mrt;
 408
 409        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 410        if (!mrt)
 411                return ERR_PTR(-ENOENT);
 412
 413        iter->mrt = mrt;
 414
 415        read_lock(&mrt_lock);
 416        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 417                : SEQ_START_TOKEN;
 418}
 419
 420static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 421{
 422        struct ipmr_vif_iter *iter = seq->private;
 423        struct net *net = seq_file_net(seq);
 424        struct mr6_table *mrt = iter->mrt;
 425
 426        ++*pos;
 427        if (v == SEQ_START_TOKEN)
 428                return ip6mr_vif_seq_idx(net, iter, 0);
 429
 430        while (++iter->ct < mrt->maxvif) {
 431                if (!MIF_EXISTS(mrt, iter->ct))
 432                        continue;
 433                return &mrt->vif6_table[iter->ct];
 434        }
 435        return NULL;
 436}
 437
 438static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 439        __releases(mrt_lock)
 440{
 441        read_unlock(&mrt_lock);
 442}
 443
 444static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 445{
 446        struct ipmr_vif_iter *iter = seq->private;
 447        struct mr6_table *mrt = iter->mrt;
 448
 449        if (v == SEQ_START_TOKEN) {
 450                seq_puts(seq,
 451                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 452        } else {
 453                const struct mif_device *vif = v;
 454                const char *name = vif->dev ? vif->dev->name : "none";
 455
 456                seq_printf(seq,
 457                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 458                           vif - mrt->vif6_table,
 459                           name, vif->bytes_in, vif->pkt_in,
 460                           vif->bytes_out, vif->pkt_out,
 461                           vif->flags);
 462        }
 463        return 0;
 464}
 465
 466static const struct seq_operations ip6mr_vif_seq_ops = {
 467        .start = ip6mr_vif_seq_start,
 468        .next  = ip6mr_vif_seq_next,
 469        .stop  = ip6mr_vif_seq_stop,
 470        .show  = ip6mr_vif_seq_show,
 471};
 472
 473static int ip6mr_vif_open(struct inode *inode, struct file *file)
 474{
 475        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 476                            sizeof(struct ipmr_vif_iter));
 477}
 478
 479static const struct file_operations ip6mr_vif_fops = {
 480        .owner   = THIS_MODULE,
 481        .open    = ip6mr_vif_open,
 482        .read    = seq_read,
 483        .llseek  = seq_lseek,
 484        .release = seq_release_net,
 485};
 486
 487static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 488{
 489        struct ipmr_mfc_iter *it = seq->private;
 490        struct net *net = seq_file_net(seq);
 491        struct mr6_table *mrt;
 492
 493        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 494        if (!mrt)
 495                return ERR_PTR(-ENOENT);
 496
 497        it->mrt = mrt;
 498        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 499                : SEQ_START_TOKEN;
 500}
 501
 502static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 503{
 504        struct mfc6_cache *mfc = v;
 505        struct ipmr_mfc_iter *it = seq->private;
 506        struct net *net = seq_file_net(seq);
 507        struct mr6_table *mrt = it->mrt;
 508
 509        ++*pos;
 510
 511        if (v == SEQ_START_TOKEN)
 512                return ipmr_mfc_seq_idx(net, seq->private, 0);
 513
 514        if (mfc->list.next != it->cache)
 515                return list_entry(mfc->list.next, struct mfc6_cache, list);
 516
 517        if (it->cache == &mrt->mfc6_unres_queue)
 518                goto end_of_list;
 519
 520        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 521
 522        while (++it->ct < MFC6_LINES) {
 523                it->cache = &mrt->mfc6_cache_array[it->ct];
 524                if (list_empty(it->cache))
 525                        continue;
 526                return list_first_entry(it->cache, struct mfc6_cache, list);
 527        }
 528
 529        /* exhausted cache_array, show unresolved */
 530        read_unlock(&mrt_lock);
 531        it->cache = &mrt->mfc6_unres_queue;
 532        it->ct = 0;
 533
 534        spin_lock_bh(&mfc_unres_lock);
 535        if (!list_empty(it->cache))
 536                return list_first_entry(it->cache, struct mfc6_cache, list);
 537
 538 end_of_list:
 539        spin_unlock_bh(&mfc_unres_lock);
 540        it->cache = NULL;
 541
 542        return NULL;
 543}
 544
 545static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 546{
 547        struct ipmr_mfc_iter *it = seq->private;
 548        struct mr6_table *mrt = it->mrt;
 549
 550        if (it->cache == &mrt->mfc6_unres_queue)
 551                spin_unlock_bh(&mfc_unres_lock);
 552        else if (it->cache == &mrt->mfc6_cache_array[it->ct])
 553                read_unlock(&mrt_lock);
 554}
 555
 556static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 557{
 558        int n;
 559
 560        if (v == SEQ_START_TOKEN) {
 561                seq_puts(seq,
 562                         "Group                            "
 563                         "Origin                           "
 564                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 565        } else {
 566                const struct mfc6_cache *mfc = v;
 567                const struct ipmr_mfc_iter *it = seq->private;
 568                struct mr6_table *mrt = it->mrt;
 569
 570                seq_printf(seq, "%pI6 %pI6 %-3hd",
 571                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 572                           mfc->mf6c_parent);
 573
 574                if (it->cache != &mrt->mfc6_unres_queue) {
 575                        seq_printf(seq, " %8lu %8lu %8lu",
 576                                   mfc->mfc_un.res.pkt,
 577                                   mfc->mfc_un.res.bytes,
 578                                   mfc->mfc_un.res.wrong_if);
 579                        for (n = mfc->mfc_un.res.minvif;
 580                             n < mfc->mfc_un.res.maxvif; n++) {
 581                                if (MIF_EXISTS(mrt, n) &&
 582                                    mfc->mfc_un.res.ttls[n] < 255)
 583                                        seq_printf(seq,
 584                                                   " %2d:%-3d",
 585                                                   n, mfc->mfc_un.res.ttls[n]);
 586                        }
 587                } else {
 588                        /* unresolved mfc_caches don't contain
 589                         * pkt, bytes and wrong_if values
 590                         */
 591                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 592                }
 593                seq_putc(seq, '\n');
 594        }
 595        return 0;
 596}
 597
 598static const struct seq_operations ipmr_mfc_seq_ops = {
 599        .start = ipmr_mfc_seq_start,
 600        .next  = ipmr_mfc_seq_next,
 601        .stop  = ipmr_mfc_seq_stop,
 602        .show  = ipmr_mfc_seq_show,
 603};
 604
 605static int ipmr_mfc_open(struct inode *inode, struct file *file)
 606{
 607        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 608                            sizeof(struct ipmr_mfc_iter));
 609}
 610
 611static const struct file_operations ip6mr_mfc_fops = {
 612        .owner   = THIS_MODULE,
 613        .open    = ipmr_mfc_open,
 614        .read    = seq_read,
 615        .llseek  = seq_lseek,
 616        .release = seq_release_net,
 617};
 618#endif
 619
 620#ifdef CONFIG_IPV6_PIMSM_V2
 621
 622static int pim6_rcv(struct sk_buff *skb)
 623{
 624        struct pimreghdr *pim;
 625        struct ipv6hdr   *encap;
 626        struct net_device  *reg_dev = NULL;
 627        struct net *net = dev_net(skb->dev);
 628        struct mr6_table *mrt;
 629        struct flowi6 fl6 = {
 630                .flowi6_iif     = skb->dev->ifindex,
 631                .flowi6_mark    = skb->mark,
 632        };
 633        int reg_vif_num;
 634
 635        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 636                goto drop;
 637
 638        pim = (struct pimreghdr *)skb_transport_header(skb);
 639        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 640            (pim->flags & PIM_NULL_REGISTER) ||
 641            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 642                             sizeof(*pim), IPPROTO_PIM,
 643                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 644             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 645                goto drop;
 646
 647        /* check if the inner packet is destined to mcast group */
 648        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 649                                   sizeof(*pim));
 650
 651        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 652            encap->payload_len == 0 ||
 653            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 654                goto drop;
 655
 656        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 657                goto drop;
 658        reg_vif_num = mrt->mroute_reg_vif_num;
 659
 660        read_lock(&mrt_lock);
 661        if (reg_vif_num >= 0)
 662                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 663        if (reg_dev)
 664                dev_hold(reg_dev);
 665        read_unlock(&mrt_lock);
 666
 667        if (!reg_dev)
 668                goto drop;
 669
 670        skb->mac_header = skb->network_header;
 671        skb_pull(skb, (u8 *)encap - skb->data);
 672        skb_reset_network_header(skb);
 673        skb->protocol = htons(ETH_P_IPV6);
 674        skb->ip_summed = CHECKSUM_NONE;
 675
 676        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 677
 678        netif_rx(skb);
 679
 680        dev_put(reg_dev);
 681        return 0;
 682 drop:
 683        kfree_skb(skb);
 684        return 0;
 685}
 686
 687static const struct inet6_protocol pim6_protocol = {
 688        .handler        =       pim6_rcv,
 689};
 690
 691/* Service routines creating virtual interfaces: PIMREG */
 692
 693static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 694                                      struct net_device *dev)
 695{
 696        struct net *net = dev_net(dev);
 697        struct mr6_table *mrt;
 698        struct flowi6 fl6 = {
 699                .flowi6_oif     = dev->ifindex,
 700                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 701                .flowi6_mark    = skb->mark,
 702        };
 703        int err;
 704
 705        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 706        if (err < 0) {
 707                kfree_skb(skb);
 708                return err;
 709        }
 710
 711        read_lock(&mrt_lock);
 712        dev->stats.tx_bytes += skb->len;
 713        dev->stats.tx_packets++;
 714        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 715        read_unlock(&mrt_lock);
 716        kfree_skb(skb);
 717        return NETDEV_TX_OK;
 718}
 719
 720static int reg_vif_get_iflink(const struct net_device *dev)
 721{
 722        return 0;
 723}
 724
 725static const struct net_device_ops reg_vif_netdev_ops = {
 726        .ndo_start_xmit = reg_vif_xmit,
 727        .ndo_get_iflink = reg_vif_get_iflink,
 728};
 729
 730static void reg_vif_setup(struct net_device *dev)
 731{
 732        dev->type               = ARPHRD_PIMREG;
 733        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 734        dev->flags              = IFF_NOARP;
 735        dev->netdev_ops         = &reg_vif_netdev_ops;
 736        dev->destructor         = free_netdev;
 737        dev->features           |= NETIF_F_NETNS_LOCAL;
 738}
 739
 740static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 741{
 742        struct net_device *dev;
 743        char name[IFNAMSIZ];
 744
 745        if (mrt->id == RT6_TABLE_DFLT)
 746                sprintf(name, "pim6reg");
 747        else
 748                sprintf(name, "pim6reg%u", mrt->id);
 749
 750        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 751        if (!dev)
 752                return NULL;
 753
 754        dev_net_set(dev, net);
 755
 756        if (register_netdevice(dev)) {
 757                free_netdev(dev);
 758                return NULL;
 759        }
 760
 761        if (dev_open(dev))
 762                goto failure;
 763
 764        dev_hold(dev);
 765        return dev;
 766
 767failure:
 768        unregister_netdevice(dev);
 769        return NULL;
 770}
 771#endif
 772
 773/*
 774 *      Delete a VIF entry
 775 */
 776
 777static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 778{
 779        struct mif_device *v;
 780        struct net_device *dev;
 781        struct inet6_dev *in6_dev;
 782
 783        if (vifi < 0 || vifi >= mrt->maxvif)
 784                return -EADDRNOTAVAIL;
 785
 786        v = &mrt->vif6_table[vifi];
 787
 788        write_lock_bh(&mrt_lock);
 789        dev = v->dev;
 790        v->dev = NULL;
 791
 792        if (!dev) {
 793                write_unlock_bh(&mrt_lock);
 794                return -EADDRNOTAVAIL;
 795        }
 796
 797#ifdef CONFIG_IPV6_PIMSM_V2
 798        if (vifi == mrt->mroute_reg_vif_num)
 799                mrt->mroute_reg_vif_num = -1;
 800#endif
 801
 802        if (vifi + 1 == mrt->maxvif) {
 803                int tmp;
 804                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 805                        if (MIF_EXISTS(mrt, tmp))
 806                                break;
 807                }
 808                mrt->maxvif = tmp + 1;
 809        }
 810
 811        write_unlock_bh(&mrt_lock);
 812
 813        dev_set_allmulti(dev, -1);
 814
 815        in6_dev = __in6_dev_get(dev);
 816        if (in6_dev) {
 817                in6_dev->cnf.mc_forwarding--;
 818                inet6_netconf_notify_devconf(dev_net(dev),
 819                                             NETCONFA_MC_FORWARDING,
 820                                             dev->ifindex, &in6_dev->cnf);
 821        }
 822
 823        if (v->flags & MIFF_REGISTER)
 824                unregister_netdevice_queue(dev, head);
 825
 826        dev_put(dev);
 827        return 0;
 828}
 829
 830static inline void ip6mr_cache_free(struct mfc6_cache *c)
 831{
 832        kmem_cache_free(mrt_cachep, c);
 833}
 834
 835/* Destroy an unresolved cache entry, killing queued skbs
 836   and reporting error to netlink readers.
 837 */
 838
 839static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 840{
 841        struct net *net = read_pnet(&mrt->net);
 842        struct sk_buff *skb;
 843
 844        atomic_dec(&mrt->cache_resolve_queue_len);
 845
 846        while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 847                if (ipv6_hdr(skb)->version == 0) {
 848                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 849                        nlh->nlmsg_type = NLMSG_ERROR;
 850                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 851                        skb_trim(skb, nlh->nlmsg_len);
 852                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 853                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 854                } else
 855                        kfree_skb(skb);
 856        }
 857
 858        ip6mr_cache_free(c);
 859}
 860
 861
 862/* Timer process for all the unresolved queue. */
 863
 864static void ipmr_do_expire_process(struct mr6_table *mrt)
 865{
 866        unsigned long now = jiffies;
 867        unsigned long expires = 10 * HZ;
 868        struct mfc6_cache *c, *next;
 869
 870        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 871                if (time_after(c->mfc_un.unres.expires, now)) {
 872                        /* not yet... */
 873                        unsigned long interval = c->mfc_un.unres.expires - now;
 874                        if (interval < expires)
 875                                expires = interval;
 876                        continue;
 877                }
 878
 879                list_del(&c->list);
 880                mr6_netlink_event(mrt, c, RTM_DELROUTE);
 881                ip6mr_destroy_unres(mrt, c);
 882        }
 883
 884        if (!list_empty(&mrt->mfc6_unres_queue))
 885                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 886}
 887
 888static void ipmr_expire_process(unsigned long arg)
 889{
 890        struct mr6_table *mrt = (struct mr6_table *)arg;
 891
 892        if (!spin_trylock(&mfc_unres_lock)) {
 893                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 894                return;
 895        }
 896
 897        if (!list_empty(&mrt->mfc6_unres_queue))
 898                ipmr_do_expire_process(mrt);
 899
 900        spin_unlock(&mfc_unres_lock);
 901}
 902
 903/* Fill oifs list. It is called under write locked mrt_lock. */
 904
 905static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 906                                    unsigned char *ttls)
 907{
 908        int vifi;
 909
 910        cache->mfc_un.res.minvif = MAXMIFS;
 911        cache->mfc_un.res.maxvif = 0;
 912        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 913
 914        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 915                if (MIF_EXISTS(mrt, vifi) &&
 916                    ttls[vifi] && ttls[vifi] < 255) {
 917                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 918                        if (cache->mfc_un.res.minvif > vifi)
 919                                cache->mfc_un.res.minvif = vifi;
 920                        if (cache->mfc_un.res.maxvif <= vifi)
 921                                cache->mfc_un.res.maxvif = vifi + 1;
 922                }
 923        }
 924        cache->mfc_un.res.lastuse = jiffies;
 925}
 926
 927static int mif6_add(struct net *net, struct mr6_table *mrt,
 928                    struct mif6ctl *vifc, int mrtsock)
 929{
 930        int vifi = vifc->mif6c_mifi;
 931        struct mif_device *v = &mrt->vif6_table[vifi];
 932        struct net_device *dev;
 933        struct inet6_dev *in6_dev;
 934        int err;
 935
 936        /* Is vif busy ? */
 937        if (MIF_EXISTS(mrt, vifi))
 938                return -EADDRINUSE;
 939
 940        switch (vifc->mif6c_flags) {
 941#ifdef CONFIG_IPV6_PIMSM_V2
 942        case MIFF_REGISTER:
 943                /*
 944                 * Special Purpose VIF in PIM
 945                 * All the packets will be sent to the daemon
 946                 */
 947                if (mrt->mroute_reg_vif_num >= 0)
 948                        return -EADDRINUSE;
 949                dev = ip6mr_reg_vif(net, mrt);
 950                if (!dev)
 951                        return -ENOBUFS;
 952                err = dev_set_allmulti(dev, 1);
 953                if (err) {
 954                        unregister_netdevice(dev);
 955                        dev_put(dev);
 956                        return err;
 957                }
 958                break;
 959#endif
 960        case 0:
 961                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 962                if (!dev)
 963                        return -EADDRNOTAVAIL;
 964                err = dev_set_allmulti(dev, 1);
 965                if (err) {
 966                        dev_put(dev);
 967                        return err;
 968                }
 969                break;
 970        default:
 971                return -EINVAL;
 972        }
 973
 974        in6_dev = __in6_dev_get(dev);
 975        if (in6_dev) {
 976                in6_dev->cnf.mc_forwarding++;
 977                inet6_netconf_notify_devconf(dev_net(dev),
 978                                             NETCONFA_MC_FORWARDING,
 979                                             dev->ifindex, &in6_dev->cnf);
 980        }
 981
 982        /*
 983         *      Fill in the VIF structures
 984         */
 985        v->rate_limit = vifc->vifc_rate_limit;
 986        v->flags = vifc->mif6c_flags;
 987        if (!mrtsock)
 988                v->flags |= VIFF_STATIC;
 989        v->threshold = vifc->vifc_threshold;
 990        v->bytes_in = 0;
 991        v->bytes_out = 0;
 992        v->pkt_in = 0;
 993        v->pkt_out = 0;
 994        v->link = dev->ifindex;
 995        if (v->flags & MIFF_REGISTER)
 996                v->link = dev_get_iflink(dev);
 997
 998        /* And finish update writing critical data */
 999        write_lock_bh(&mrt_lock);
1000        v->dev = dev;
1001#ifdef CONFIG_IPV6_PIMSM_V2
1002        if (v->flags & MIFF_REGISTER)
1003                mrt->mroute_reg_vif_num = vifi;
1004#endif
1005        if (vifi + 1 > mrt->maxvif)
1006                mrt->maxvif = vifi + 1;
1007        write_unlock_bh(&mrt_lock);
1008        return 0;
1009}
1010
1011static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1012                                           const struct in6_addr *origin,
1013                                           const struct in6_addr *mcastgrp)
1014{
1015        int line = MFC6_HASH(mcastgrp, origin);
1016        struct mfc6_cache *c;
1017
1018        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1019                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1020                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1021                        return c;
1022        }
1023        return NULL;
1024}
1025
1026/* Look for a (*,*,oif) entry */
1027static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1028                                                      mifi_t mifi)
1029{
1030        int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1031        struct mfc6_cache *c;
1032
1033        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1034                if (ipv6_addr_any(&c->mf6c_origin) &&
1035                    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1036                    (c->mfc_un.res.ttls[mifi] < 255))
1037                        return c;
1038
1039        return NULL;
1040}
1041
1042/* Look for a (*,G) entry */
1043static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1044                                               struct in6_addr *mcastgrp,
1045                                               mifi_t mifi)
1046{
1047        int line = MFC6_HASH(mcastgrp, &in6addr_any);
1048        struct mfc6_cache *c, *proxy;
1049
1050        if (ipv6_addr_any(mcastgrp))
1051                goto skip;
1052
1053        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1054                if (ipv6_addr_any(&c->mf6c_origin) &&
1055                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1056                        if (c->mfc_un.res.ttls[mifi] < 255)
1057                                return c;
1058
1059                        /* It's ok if the mifi is part of the static tree */
1060                        proxy = ip6mr_cache_find_any_parent(mrt,
1061                                                            c->mf6c_parent);
1062                        if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1063                                return c;
1064                }
1065
1066skip:
1067        return ip6mr_cache_find_any_parent(mrt, mifi);
1068}
1069
1070/*
1071 *      Allocate a multicast cache entry
1072 */
1073static struct mfc6_cache *ip6mr_cache_alloc(void)
1074{
1075        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1076        if (!c)
1077                return NULL;
1078        c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1079        c->mfc_un.res.minvif = MAXMIFS;
1080        return c;
1081}
1082
1083static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1084{
1085        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1086        if (!c)
1087                return NULL;
1088        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1089        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1090        return c;
1091}
1092
1093/*
1094 *      A cache entry has gone into a resolved state from queued
1095 */
1096
1097static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1098                                struct mfc6_cache *uc, struct mfc6_cache *c)
1099{
1100        struct sk_buff *skb;
1101
1102        /*
1103         *      Play the pending entries through our router
1104         */
1105
1106        while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1107                if (ipv6_hdr(skb)->version == 0) {
1108                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1109
1110                        if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1111                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1112                        } else {
1113                                nlh->nlmsg_type = NLMSG_ERROR;
1114                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1115                                skb_trim(skb, nlh->nlmsg_len);
1116                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1117                        }
1118                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1119                } else
1120                        ip6_mr_forward(net, mrt, skb, c);
1121        }
1122}
1123
1124/*
1125 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1126 *      expects the following bizarre scheme.
1127 *
1128 *      Called under mrt_lock.
1129 */
1130
1131static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1132                              mifi_t mifi, int assert)
1133{
1134        struct sk_buff *skb;
1135        struct mrt6msg *msg;
1136        int ret;
1137
1138#ifdef CONFIG_IPV6_PIMSM_V2
1139        if (assert == MRT6MSG_WHOLEPKT)
1140                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1141                                                +sizeof(*msg));
1142        else
1143#endif
1144                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1145
1146        if (!skb)
1147                return -ENOBUFS;
1148
1149        /* I suppose that internal messages
1150         * do not require checksums */
1151
1152        skb->ip_summed = CHECKSUM_UNNECESSARY;
1153
1154#ifdef CONFIG_IPV6_PIMSM_V2
1155        if (assert == MRT6MSG_WHOLEPKT) {
1156                /* Ugly, but we have no choice with this interface.
1157                   Duplicate old header, fix length etc.
1158                   And all this only to mangle msg->im6_msgtype and
1159                   to set msg->im6_mbz to "mbz" :-)
1160                 */
1161                skb_push(skb, -skb_network_offset(pkt));
1162
1163                skb_push(skb, sizeof(*msg));
1164                skb_reset_transport_header(skb);
1165                msg = (struct mrt6msg *)skb_transport_header(skb);
1166                msg->im6_mbz = 0;
1167                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1168                msg->im6_mif = mrt->mroute_reg_vif_num;
1169                msg->im6_pad = 0;
1170                msg->im6_src = ipv6_hdr(pkt)->saddr;
1171                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1172
1173                skb->ip_summed = CHECKSUM_UNNECESSARY;
1174        } else
1175#endif
1176        {
1177        /*
1178         *      Copy the IP header
1179         */
1180
1181        skb_put(skb, sizeof(struct ipv6hdr));
1182        skb_reset_network_header(skb);
1183        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1184
1185        /*
1186         *      Add our header
1187         */
1188        skb_put(skb, sizeof(*msg));
1189        skb_reset_transport_header(skb);
1190        msg = (struct mrt6msg *)skb_transport_header(skb);
1191
1192        msg->im6_mbz = 0;
1193        msg->im6_msgtype = assert;
1194        msg->im6_mif = mifi;
1195        msg->im6_pad = 0;
1196        msg->im6_src = ipv6_hdr(pkt)->saddr;
1197        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1198
1199        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1200        skb->ip_summed = CHECKSUM_UNNECESSARY;
1201        }
1202
1203        if (!mrt->mroute6_sk) {
1204                kfree_skb(skb);
1205                return -EINVAL;
1206        }
1207
1208        /*
1209         *      Deliver to user space multicast routing algorithms
1210         */
1211        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1212        if (ret < 0) {
1213                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1214                kfree_skb(skb);
1215        }
1216
1217        return ret;
1218}
1219
1220/*
1221 *      Queue a packet for resolution. It gets locked cache entry!
1222 */
1223
1224static int
1225ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1226{
1227        bool found = false;
1228        int err;
1229        struct mfc6_cache *c;
1230
1231        spin_lock_bh(&mfc_unres_lock);
1232        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1233                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1234                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1235                        found = true;
1236                        break;
1237                }
1238        }
1239
1240        if (!found) {
1241                /*
1242                 *      Create a new entry if allowable
1243                 */
1244
1245                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1246                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1247                        spin_unlock_bh(&mfc_unres_lock);
1248
1249                        kfree_skb(skb);
1250                        return -ENOBUFS;
1251                }
1252
1253                /*
1254                 *      Fill in the new cache entry
1255                 */
1256                c->mf6c_parent = -1;
1257                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1258                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1259
1260                /*
1261                 *      Reflect first query at pim6sd
1262                 */
1263                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1264                if (err < 0) {
1265                        /* If the report failed throw the cache entry
1266                           out - Brad Parker
1267                         */
1268                        spin_unlock_bh(&mfc_unres_lock);
1269
1270                        ip6mr_cache_free(c);
1271                        kfree_skb(skb);
1272                        return err;
1273                }
1274
1275                atomic_inc(&mrt->cache_resolve_queue_len);
1276                list_add(&c->list, &mrt->mfc6_unres_queue);
1277                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1278
1279                ipmr_do_expire_process(mrt);
1280        }
1281
1282        /*
1283         *      See if we can append the packet
1284         */
1285        if (c->mfc_un.unres.unresolved.qlen > 3) {
1286                kfree_skb(skb);
1287                err = -ENOBUFS;
1288        } else {
1289                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1290                err = 0;
1291        }
1292
1293        spin_unlock_bh(&mfc_unres_lock);
1294        return err;
1295}
1296
1297/*
1298 *      MFC6 cache manipulation by user space
1299 */
1300
1301static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1302                            int parent)
1303{
1304        int line;
1305        struct mfc6_cache *c, *next;
1306
1307        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1308
1309        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1310                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1311                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1312                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1313                    (parent == -1 || parent == c->mf6c_parent)) {
1314                        write_lock_bh(&mrt_lock);
1315                        list_del(&c->list);
1316                        write_unlock_bh(&mrt_lock);
1317
1318                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1319                        ip6mr_cache_free(c);
1320                        return 0;
1321                }
1322        }
1323        return -ENOENT;
1324}
1325
1326static int ip6mr_device_event(struct notifier_block *this,
1327                              unsigned long event, void *ptr)
1328{
1329        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1330        struct net *net = dev_net(dev);
1331        struct mr6_table *mrt;
1332        struct mif_device *v;
1333        int ct;
1334        LIST_HEAD(list);
1335
1336        if (event != NETDEV_UNREGISTER)
1337                return NOTIFY_DONE;
1338
1339        ip6mr_for_each_table(mrt, net) {
1340                v = &mrt->vif6_table[0];
1341                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1342                        if (v->dev == dev)
1343                                mif6_delete(mrt, ct, &list);
1344                }
1345        }
1346        unregister_netdevice_many(&list);
1347
1348        return NOTIFY_DONE;
1349}
1350
1351static struct notifier_block ip6_mr_notifier = {
1352        .notifier_call = ip6mr_device_event
1353};
1354
1355/*
1356 *      Setup for IP multicast routing
1357 */
1358
1359static int __net_init ip6mr_net_init(struct net *net)
1360{
1361        int err;
1362
1363        err = ip6mr_rules_init(net);
1364        if (err < 0)
1365                goto fail;
1366
1367#ifdef CONFIG_PROC_FS
1368        err = -ENOMEM;
1369        if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1370                goto proc_vif_fail;
1371        if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1372                goto proc_cache_fail;
1373#endif
1374
1375        return 0;
1376
1377#ifdef CONFIG_PROC_FS
1378proc_cache_fail:
1379        remove_proc_entry("ip6_mr_vif", net->proc_net);
1380proc_vif_fail:
1381        ip6mr_rules_exit(net);
1382#endif
1383fail:
1384        return err;
1385}
1386
1387static void __net_exit ip6mr_net_exit(struct net *net)
1388{
1389#ifdef CONFIG_PROC_FS
1390        remove_proc_entry("ip6_mr_cache", net->proc_net);
1391        remove_proc_entry("ip6_mr_vif", net->proc_net);
1392#endif
1393        ip6mr_rules_exit(net);
1394}
1395
1396static struct pernet_operations ip6mr_net_ops = {
1397        .init = ip6mr_net_init,
1398        .exit = ip6mr_net_exit,
1399};
1400
1401int __init ip6_mr_init(void)
1402{
1403        int err;
1404
1405        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1406                                       sizeof(struct mfc6_cache),
1407                                       0, SLAB_HWCACHE_ALIGN,
1408                                       NULL);
1409        if (!mrt_cachep)
1410                return -ENOMEM;
1411
1412        err = register_pernet_subsys(&ip6mr_net_ops);
1413        if (err)
1414                goto reg_pernet_fail;
1415
1416        err = register_netdevice_notifier(&ip6_mr_notifier);
1417        if (err)
1418                goto reg_notif_fail;
1419#ifdef CONFIG_IPV6_PIMSM_V2
1420        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421                pr_err("%s: can't add PIM protocol\n", __func__);
1422                err = -EAGAIN;
1423                goto add_proto_fail;
1424        }
1425#endif
1426        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1427                      ip6mr_rtm_dumproute, NULL);
1428        return 0;
1429#ifdef CONFIG_IPV6_PIMSM_V2
1430add_proto_fail:
1431        unregister_netdevice_notifier(&ip6_mr_notifier);
1432#endif
1433reg_notif_fail:
1434        unregister_pernet_subsys(&ip6mr_net_ops);
1435reg_pernet_fail:
1436        kmem_cache_destroy(mrt_cachep);
1437        return err;
1438}
1439
1440void ip6_mr_cleanup(void)
1441{
1442        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1443#ifdef CONFIG_IPV6_PIMSM_V2
1444        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1445#endif
1446        unregister_netdevice_notifier(&ip6_mr_notifier);
1447        unregister_pernet_subsys(&ip6mr_net_ops);
1448        kmem_cache_destroy(mrt_cachep);
1449}
1450
1451static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1452                         struct mf6cctl *mfc, int mrtsock, int parent)
1453{
1454        bool found = false;
1455        int line;
1456        struct mfc6_cache *uc, *c;
1457        unsigned char ttls[MAXMIFS];
1458        int i;
1459
1460        if (mfc->mf6cc_parent >= MAXMIFS)
1461                return -ENFILE;
1462
1463        memset(ttls, 255, MAXMIFS);
1464        for (i = 0; i < MAXMIFS; i++) {
1465                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1466                        ttls[i] = 1;
1467
1468        }
1469
1470        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1471
1472        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1473                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1474                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1475                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1476                    (parent == -1 || parent == mfc->mf6cc_parent)) {
1477                        found = true;
1478                        break;
1479                }
1480        }
1481
1482        if (found) {
1483                write_lock_bh(&mrt_lock);
1484                c->mf6c_parent = mfc->mf6cc_parent;
1485                ip6mr_update_thresholds(mrt, c, ttls);
1486                if (!mrtsock)
1487                        c->mfc_flags |= MFC_STATIC;
1488                write_unlock_bh(&mrt_lock);
1489                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1490                return 0;
1491        }
1492
1493        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1494            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1495                return -EINVAL;
1496
1497        c = ip6mr_cache_alloc();
1498        if (!c)
1499                return -ENOMEM;
1500
1501        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1502        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1503        c->mf6c_parent = mfc->mf6cc_parent;
1504        ip6mr_update_thresholds(mrt, c, ttls);
1505        if (!mrtsock)
1506                c->mfc_flags |= MFC_STATIC;
1507
1508        write_lock_bh(&mrt_lock);
1509        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1510        write_unlock_bh(&mrt_lock);
1511
1512        /*
1513         *      Check to see if we resolved a queued list. If so we
1514         *      need to send on the frames and tidy up.
1515         */
1516        found = false;
1517        spin_lock_bh(&mfc_unres_lock);
1518        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1519                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1520                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1521                        list_del(&uc->list);
1522                        atomic_dec(&mrt->cache_resolve_queue_len);
1523                        found = true;
1524                        break;
1525                }
1526        }
1527        if (list_empty(&mrt->mfc6_unres_queue))
1528                del_timer(&mrt->ipmr_expire_timer);
1529        spin_unlock_bh(&mfc_unres_lock);
1530
1531        if (found) {
1532                ip6mr_cache_resolve(net, mrt, uc, c);
1533                ip6mr_cache_free(uc);
1534        }
1535        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1536        return 0;
1537}
1538
1539/*
1540 *      Close the multicast socket, and clear the vif tables etc
1541 */
1542
1543static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1544{
1545        int i;
1546        LIST_HEAD(list);
1547        struct mfc6_cache *c, *next;
1548
1549        /*
1550         *      Shut down all active vif entries
1551         */
1552        for (i = 0; i < mrt->maxvif; i++) {
1553                if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1554                        continue;
1555                mif6_delete(mrt, i, &list);
1556        }
1557        unregister_netdevice_many(&list);
1558
1559        /*
1560         *      Wipe the cache
1561         */
1562        for (i = 0; i < MFC6_LINES; i++) {
1563                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1564                        if (!all && (c->mfc_flags & MFC_STATIC))
1565                                continue;
1566                        write_lock_bh(&mrt_lock);
1567                        list_del(&c->list);
1568                        write_unlock_bh(&mrt_lock);
1569
1570                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1571                        ip6mr_cache_free(c);
1572                }
1573        }
1574
1575        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1576                spin_lock_bh(&mfc_unres_lock);
1577                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1578                        list_del(&c->list);
1579                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1580                        ip6mr_destroy_unres(mrt, c);
1581                }
1582                spin_unlock_bh(&mfc_unres_lock);
1583        }
1584}
1585
1586static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1587{
1588        int err = 0;
1589        struct net *net = sock_net(sk);
1590
1591        rtnl_lock();
1592        write_lock_bh(&mrt_lock);
1593        if (likely(mrt->mroute6_sk == NULL)) {
1594                mrt->mroute6_sk = sk;
1595                net->ipv6.devconf_all->mc_forwarding++;
1596        } else {
1597                err = -EADDRINUSE;
1598        }
1599        write_unlock_bh(&mrt_lock);
1600
1601        if (!err)
1602                inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1603                                             NETCONFA_IFINDEX_ALL,
1604                                             net->ipv6.devconf_all);
1605        rtnl_unlock();
1606
1607        return err;
1608}
1609
1610int ip6mr_sk_done(struct sock *sk)
1611{
1612        int err = -EACCES;
1613        struct net *net = sock_net(sk);
1614        struct mr6_table *mrt;
1615
1616        rtnl_lock();
1617        ip6mr_for_each_table(mrt, net) {
1618                if (sk == mrt->mroute6_sk) {
1619                        write_lock_bh(&mrt_lock);
1620                        mrt->mroute6_sk = NULL;
1621                        net->ipv6.devconf_all->mc_forwarding--;
1622                        write_unlock_bh(&mrt_lock);
1623                        inet6_netconf_notify_devconf(net,
1624                                                     NETCONFA_MC_FORWARDING,
1625                                                     NETCONFA_IFINDEX_ALL,
1626                                                     net->ipv6.devconf_all);
1627
1628                        mroute_clean_tables(mrt, false);
1629                        err = 0;
1630                        break;
1631                }
1632        }
1633        rtnl_unlock();
1634
1635        return err;
1636}
1637
1638struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1639{
1640        struct mr6_table *mrt;
1641        struct flowi6 fl6 = {
1642                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1643                .flowi6_oif     = skb->dev->ifindex,
1644                .flowi6_mark    = skb->mark,
1645        };
1646
1647        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1648                return NULL;
1649
1650        return mrt->mroute6_sk;
1651}
1652
1653/*
1654 *      Socket options and virtual interface manipulation. The whole
1655 *      virtual interface system is a complete heap, but unfortunately
1656 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1657 *      MOSPF/PIM router set up we can clean this up.
1658 */
1659
1660int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1661{
1662        int ret, parent = 0;
1663        struct mif6ctl vif;
1664        struct mf6cctl mfc;
1665        mifi_t mifi;
1666        struct net *net = sock_net(sk);
1667        struct mr6_table *mrt;
1668
1669        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1670        if (!mrt)
1671                return -ENOENT;
1672
1673        if (optname != MRT6_INIT) {
1674                if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1675                        return -EACCES;
1676        }
1677
1678        switch (optname) {
1679        case MRT6_INIT:
1680                if (sk->sk_type != SOCK_RAW ||
1681                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1682                        return -EOPNOTSUPP;
1683                if (optlen < sizeof(int))
1684                        return -EINVAL;
1685
1686                return ip6mr_sk_init(mrt, sk);
1687
1688        case MRT6_DONE:
1689                return ip6mr_sk_done(sk);
1690
1691        case MRT6_ADD_MIF:
1692                if (optlen < sizeof(vif))
1693                        return -EINVAL;
1694                if (copy_from_user(&vif, optval, sizeof(vif)))
1695                        return -EFAULT;
1696                if (vif.mif6c_mifi >= MAXMIFS)
1697                        return -ENFILE;
1698                rtnl_lock();
1699                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1700                rtnl_unlock();
1701                return ret;
1702
1703        case MRT6_DEL_MIF:
1704                if (optlen < sizeof(mifi_t))
1705                        return -EINVAL;
1706                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1707                        return -EFAULT;
1708                rtnl_lock();
1709                ret = mif6_delete(mrt, mifi, NULL);
1710                rtnl_unlock();
1711                return ret;
1712
1713        /*
1714         *      Manipulate the forwarding caches. These live
1715         *      in a sort of kernel/user symbiosis.
1716         */
1717        case MRT6_ADD_MFC:
1718        case MRT6_DEL_MFC:
1719                parent = -1;
1720        case MRT6_ADD_MFC_PROXY:
1721        case MRT6_DEL_MFC_PROXY:
1722                if (optlen < sizeof(mfc))
1723                        return -EINVAL;
1724                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1725                        return -EFAULT;
1726                if (parent == 0)
1727                        parent = mfc.mf6cc_parent;
1728                rtnl_lock();
1729                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1730                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1731                else
1732                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1733                                            sk == mrt->mroute6_sk, parent);
1734                rtnl_unlock();
1735                return ret;
1736
1737        /*
1738         *      Control PIM assert (to activate pim will activate assert)
1739         */
1740        case MRT6_ASSERT:
1741        {
1742                int v;
1743
1744                if (optlen != sizeof(v))
1745                        return -EINVAL;
1746                if (get_user(v, (int __user *)optval))
1747                        return -EFAULT;
1748                mrt->mroute_do_assert = v;
1749                return 0;
1750        }
1751
1752#ifdef CONFIG_IPV6_PIMSM_V2
1753        case MRT6_PIM:
1754        {
1755                int v;
1756
1757                if (optlen != sizeof(v))
1758                        return -EINVAL;
1759                if (get_user(v, (int __user *)optval))
1760                        return -EFAULT;
1761                v = !!v;
1762                rtnl_lock();
1763                ret = 0;
1764                if (v != mrt->mroute_do_pim) {
1765                        mrt->mroute_do_pim = v;
1766                        mrt->mroute_do_assert = v;
1767                }
1768                rtnl_unlock();
1769                return ret;
1770        }
1771
1772#endif
1773#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1774        case MRT6_TABLE:
1775        {
1776                u32 v;
1777
1778                if (optlen != sizeof(u32))
1779                        return -EINVAL;
1780                if (get_user(v, (u32 __user *)optval))
1781                        return -EFAULT;
1782                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1783                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1784                        return -EINVAL;
1785                if (sk == mrt->mroute6_sk)
1786                        return -EBUSY;
1787
1788                rtnl_lock();
1789                ret = 0;
1790                if (!ip6mr_new_table(net, v))
1791                        ret = -ENOMEM;
1792                raw6_sk(sk)->ip6mr_table = v;
1793                rtnl_unlock();
1794                return ret;
1795        }
1796#endif
1797        /*
1798         *      Spurious command, or MRT6_VERSION which you cannot
1799         *      set.
1800         */
1801        default:
1802                return -ENOPROTOOPT;
1803        }
1804}
1805
1806/*
1807 *      Getsock opt support for the multicast routing system.
1808 */
1809
1810int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1811                          int __user *optlen)
1812{
1813        int olr;
1814        int val;
1815        struct net *net = sock_net(sk);
1816        struct mr6_table *mrt;
1817
1818        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1819        if (!mrt)
1820                return -ENOENT;
1821
1822        switch (optname) {
1823        case MRT6_VERSION:
1824                val = 0x0305;
1825                break;
1826#ifdef CONFIG_IPV6_PIMSM_V2
1827        case MRT6_PIM:
1828                val = mrt->mroute_do_pim;
1829                break;
1830#endif
1831        case MRT6_ASSERT:
1832                val = mrt->mroute_do_assert;
1833                break;
1834        default:
1835                return -ENOPROTOOPT;
1836        }
1837
1838        if (get_user(olr, optlen))
1839                return -EFAULT;
1840
1841        olr = min_t(int, olr, sizeof(int));
1842        if (olr < 0)
1843                return -EINVAL;
1844
1845        if (put_user(olr, optlen))
1846                return -EFAULT;
1847        if (copy_to_user(optval, &val, olr))
1848                return -EFAULT;
1849        return 0;
1850}
1851
1852/*
1853 *      The IP multicast ioctl support routines.
1854 */
1855
1856int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1857{
1858        struct sioc_sg_req6 sr;
1859        struct sioc_mif_req6 vr;
1860        struct mif_device *vif;
1861        struct mfc6_cache *c;
1862        struct net *net = sock_net(sk);
1863        struct mr6_table *mrt;
1864
1865        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1866        if (!mrt)
1867                return -ENOENT;
1868
1869        switch (cmd) {
1870        case SIOCGETMIFCNT_IN6:
1871                if (copy_from_user(&vr, arg, sizeof(vr)))
1872                        return -EFAULT;
1873                if (vr.mifi >= mrt->maxvif)
1874                        return -EINVAL;
1875                read_lock(&mrt_lock);
1876                vif = &mrt->vif6_table[vr.mifi];
1877                if (MIF_EXISTS(mrt, vr.mifi)) {
1878                        vr.icount = vif->pkt_in;
1879                        vr.ocount = vif->pkt_out;
1880                        vr.ibytes = vif->bytes_in;
1881                        vr.obytes = vif->bytes_out;
1882                        read_unlock(&mrt_lock);
1883
1884                        if (copy_to_user(arg, &vr, sizeof(vr)))
1885                                return -EFAULT;
1886                        return 0;
1887                }
1888                read_unlock(&mrt_lock);
1889                return -EADDRNOTAVAIL;
1890        case SIOCGETSGCNT_IN6:
1891                if (copy_from_user(&sr, arg, sizeof(sr)))
1892                        return -EFAULT;
1893
1894                read_lock(&mrt_lock);
1895                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1896                if (c) {
1897                        sr.pktcnt = c->mfc_un.res.pkt;
1898                        sr.bytecnt = c->mfc_un.res.bytes;
1899                        sr.wrong_if = c->mfc_un.res.wrong_if;
1900                        read_unlock(&mrt_lock);
1901
1902                        if (copy_to_user(arg, &sr, sizeof(sr)))
1903                                return -EFAULT;
1904                        return 0;
1905                }
1906                read_unlock(&mrt_lock);
1907                return -EADDRNOTAVAIL;
1908        default:
1909                return -ENOIOCTLCMD;
1910        }
1911}
1912
1913#ifdef CONFIG_COMPAT
1914struct compat_sioc_sg_req6 {
1915        struct sockaddr_in6 src;
1916        struct sockaddr_in6 grp;
1917        compat_ulong_t pktcnt;
1918        compat_ulong_t bytecnt;
1919        compat_ulong_t wrong_if;
1920};
1921
1922struct compat_sioc_mif_req6 {
1923        mifi_t  mifi;
1924        compat_ulong_t icount;
1925        compat_ulong_t ocount;
1926        compat_ulong_t ibytes;
1927        compat_ulong_t obytes;
1928};
1929
1930int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1931{
1932        struct compat_sioc_sg_req6 sr;
1933        struct compat_sioc_mif_req6 vr;
1934        struct mif_device *vif;
1935        struct mfc6_cache *c;
1936        struct net *net = sock_net(sk);
1937        struct mr6_table *mrt;
1938
1939        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1940        if (!mrt)
1941                return -ENOENT;
1942
1943        switch (cmd) {
1944        case SIOCGETMIFCNT_IN6:
1945                if (copy_from_user(&vr, arg, sizeof(vr)))
1946                        return -EFAULT;
1947                if (vr.mifi >= mrt->maxvif)
1948                        return -EINVAL;
1949                read_lock(&mrt_lock);
1950                vif = &mrt->vif6_table[vr.mifi];
1951                if (MIF_EXISTS(mrt, vr.mifi)) {
1952                        vr.icount = vif->pkt_in;
1953                        vr.ocount = vif->pkt_out;
1954                        vr.ibytes = vif->bytes_in;
1955                        vr.obytes = vif->bytes_out;
1956                        read_unlock(&mrt_lock);
1957
1958                        if (copy_to_user(arg, &vr, sizeof(vr)))
1959                                return -EFAULT;
1960                        return 0;
1961                }
1962                read_unlock(&mrt_lock);
1963                return -EADDRNOTAVAIL;
1964        case SIOCGETSGCNT_IN6:
1965                if (copy_from_user(&sr, arg, sizeof(sr)))
1966                        return -EFAULT;
1967
1968                read_lock(&mrt_lock);
1969                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1970                if (c) {
1971                        sr.pktcnt = c->mfc_un.res.pkt;
1972                        sr.bytecnt = c->mfc_un.res.bytes;
1973                        sr.wrong_if = c->mfc_un.res.wrong_if;
1974                        read_unlock(&mrt_lock);
1975
1976                        if (copy_to_user(arg, &sr, sizeof(sr)))
1977                                return -EFAULT;
1978                        return 0;
1979                }
1980                read_unlock(&mrt_lock);
1981                return -EADDRNOTAVAIL;
1982        default:
1983                return -ENOIOCTLCMD;
1984        }
1985}
1986#endif
1987
1988static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1989{
1990        __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1991                        IPSTATS_MIB_OUTFORWDATAGRAMS);
1992        __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
1993                        IPSTATS_MIB_OUTOCTETS, skb->len);
1994        return dst_output(net, sk, skb);
1995}
1996
1997/*
1998 *      Processing handlers for ip6mr_forward
1999 */
2000
2001static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2002                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2003{
2004        struct ipv6hdr *ipv6h;
2005        struct mif_device *vif = &mrt->vif6_table[vifi];
2006        struct net_device *dev;
2007        struct dst_entry *dst;
2008        struct flowi6 fl6;
2009
2010        if (!vif->dev)
2011                goto out_free;
2012
2013#ifdef CONFIG_IPV6_PIMSM_V2
2014        if (vif->flags & MIFF_REGISTER) {
2015                vif->pkt_out++;
2016                vif->bytes_out += skb->len;
2017                vif->dev->stats.tx_bytes += skb->len;
2018                vif->dev->stats.tx_packets++;
2019                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2020                goto out_free;
2021        }
2022#endif
2023
2024        ipv6h = ipv6_hdr(skb);
2025
2026        fl6 = (struct flowi6) {
2027                .flowi6_oif = vif->link,
2028                .daddr = ipv6h->daddr,
2029        };
2030
2031        dst = ip6_route_output(net, NULL, &fl6);
2032        if (dst->error) {
2033                dst_release(dst);
2034                goto out_free;
2035        }
2036
2037        skb_dst_drop(skb);
2038        skb_dst_set(skb, dst);
2039
2040        /*
2041         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2042         * not only before forwarding, but after forwarding on all output
2043         * interfaces. It is clear, if mrouter runs a multicasting
2044         * program, it should receive packets not depending to what interface
2045         * program is joined.
2046         * If we will not make it, the program will have to join on all
2047         * interfaces. On the other hand, multihoming host (or router, but
2048         * not mrouter) cannot join to more than one interface - it will
2049         * result in receiving multiple packets.
2050         */
2051        dev = vif->dev;
2052        skb->dev = dev;
2053        vif->pkt_out++;
2054        vif->bytes_out += skb->len;
2055
2056        /* We are about to write */
2057        /* XXX: extension headers? */
2058        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2059                goto out_free;
2060
2061        ipv6h = ipv6_hdr(skb);
2062        ipv6h->hop_limit--;
2063
2064        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2065
2066        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2067                       net, NULL, skb, skb->dev, dev,
2068                       ip6mr_forward2_finish);
2069
2070out_free:
2071        kfree_skb(skb);
2072        return 0;
2073}
2074
2075static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2076{
2077        int ct;
2078
2079        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2080                if (mrt->vif6_table[ct].dev == dev)
2081                        break;
2082        }
2083        return ct;
2084}
2085
2086static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2087                           struct sk_buff *skb, struct mfc6_cache *cache)
2088{
2089        int psend = -1;
2090        int vif, ct;
2091        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2092
2093        vif = cache->mf6c_parent;
2094        cache->mfc_un.res.pkt++;
2095        cache->mfc_un.res.bytes += skb->len;
2096        cache->mfc_un.res.lastuse = jiffies;
2097
2098        if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2099                struct mfc6_cache *cache_proxy;
2100
2101                /* For an (*,G) entry, we only check that the incoming
2102                 * interface is part of the static tree.
2103                 */
2104                cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2105                if (cache_proxy &&
2106                    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2107                        goto forward;
2108        }
2109
2110        /*
2111         * Wrong interface: drop packet and (maybe) send PIM assert.
2112         */
2113        if (mrt->vif6_table[vif].dev != skb->dev) {
2114                cache->mfc_un.res.wrong_if++;
2115
2116                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2117                    /* pimsm uses asserts, when switching from RPT to SPT,
2118                       so that we cannot check that packet arrived on an oif.
2119                       It is bad, but otherwise we would need to move pretty
2120                       large chunk of pimd to kernel. Ough... --ANK
2121                     */
2122                    (mrt->mroute_do_pim ||
2123                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2124                    time_after(jiffies,
2125                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2126                        cache->mfc_un.res.last_assert = jiffies;
2127                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2128                }
2129                goto dont_forward;
2130        }
2131
2132forward:
2133        mrt->vif6_table[vif].pkt_in++;
2134        mrt->vif6_table[vif].bytes_in += skb->len;
2135
2136        /*
2137         *      Forward the frame
2138         */
2139        if (ipv6_addr_any(&cache->mf6c_origin) &&
2140            ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2141                if (true_vifi >= 0 &&
2142                    true_vifi != cache->mf6c_parent &&
2143                    ipv6_hdr(skb)->hop_limit >
2144                                cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2145                        /* It's an (*,*) entry and the packet is not coming from
2146                         * the upstream: forward the packet to the upstream
2147                         * only.
2148                         */
2149                        psend = cache->mf6c_parent;
2150                        goto last_forward;
2151                }
2152                goto dont_forward;
2153        }
2154        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2155                /* For (*,G) entry, don't forward to the incoming interface */
2156                if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2157                    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2158                        if (psend != -1) {
2159                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2160                                if (skb2)
2161                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2162                        }
2163                        psend = ct;
2164                }
2165        }
2166last_forward:
2167        if (psend != -1) {
2168                ip6mr_forward2(net, mrt, skb, cache, psend);
2169                return;
2170        }
2171
2172dont_forward:
2173        kfree_skb(skb);
2174}
2175
2176
2177/*
2178 *      Multicast packets for forwarding arrive here
2179 */
2180
2181int ip6_mr_input(struct sk_buff *skb)
2182{
2183        struct mfc6_cache *cache;
2184        struct net *net = dev_net(skb->dev);
2185        struct mr6_table *mrt;
2186        struct flowi6 fl6 = {
2187                .flowi6_iif     = skb->dev->ifindex,
2188                .flowi6_mark    = skb->mark,
2189        };
2190        int err;
2191
2192        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2193        if (err < 0) {
2194                kfree_skb(skb);
2195                return err;
2196        }
2197
2198        read_lock(&mrt_lock);
2199        cache = ip6mr_cache_find(mrt,
2200                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2201        if (!cache) {
2202                int vif = ip6mr_find_vif(mrt, skb->dev);
2203
2204                if (vif >= 0)
2205                        cache = ip6mr_cache_find_any(mrt,
2206                                                     &ipv6_hdr(skb)->daddr,
2207                                                     vif);
2208        }
2209
2210        /*
2211         *      No usable cache entry
2212         */
2213        if (!cache) {
2214                int vif;
2215
2216                vif = ip6mr_find_vif(mrt, skb->dev);
2217                if (vif >= 0) {
2218                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2219                        read_unlock(&mrt_lock);
2220
2221                        return err;
2222                }
2223                read_unlock(&mrt_lock);
2224                kfree_skb(skb);
2225                return -ENODEV;
2226        }
2227
2228        ip6_mr_forward(net, mrt, skb, cache);
2229
2230        read_unlock(&mrt_lock);
2231
2232        return 0;
2233}
2234
2235
2236static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2237                               struct mfc6_cache *c, struct rtmsg *rtm)
2238{
2239        struct rta_mfc_stats mfcs;
2240        struct nlattr *mp_attr;
2241        struct rtnexthop *nhp;
2242        unsigned long lastuse;
2243        int ct;
2244
2245        /* If cache is unresolved, don't try to parse IIF and OIF */
2246        if (c->mf6c_parent >= MAXMIFS)
2247                return -ENOENT;
2248
2249        if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2250            nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2251                return -EMSGSIZE;
2252        mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2253        if (!mp_attr)
2254                return -EMSGSIZE;
2255
2256        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2257                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2258                        nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2259                        if (!nhp) {
2260                                nla_nest_cancel(skb, mp_attr);
2261                                return -EMSGSIZE;
2262                        }
2263
2264                        nhp->rtnh_flags = 0;
2265                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2266                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2267                        nhp->rtnh_len = sizeof(*nhp);
2268                }
2269        }
2270
2271        nla_nest_end(skb, mp_attr);
2272
2273        lastuse = READ_ONCE(c->mfc_un.res.lastuse);
2274        lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
2275
2276        mfcs.mfcs_packets = c->mfc_un.res.pkt;
2277        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2278        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2279        if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
2280            nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
2281                              RTA_PAD))
2282                return -EMSGSIZE;
2283
2284        rtm->rtm_type = RTN_MULTICAST;
2285        return 1;
2286}
2287
2288int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2289                    int nowait, u32 portid)
2290{
2291        int err;
2292        struct mr6_table *mrt;
2293        struct mfc6_cache *cache;
2294        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2295
2296        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2297        if (!mrt)
2298                return -ENOENT;
2299
2300        read_lock(&mrt_lock);
2301        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2302        if (!cache && skb->dev) {
2303                int vif = ip6mr_find_vif(mrt, skb->dev);
2304
2305                if (vif >= 0)
2306                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2307                                                     vif);
2308        }
2309
2310        if (!cache) {
2311                struct sk_buff *skb2;
2312                struct ipv6hdr *iph;
2313                struct net_device *dev;
2314                int vif;
2315
2316                if (nowait) {
2317                        read_unlock(&mrt_lock);
2318                        return -EAGAIN;
2319                }
2320
2321                dev = skb->dev;
2322                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2323                        read_unlock(&mrt_lock);
2324                        return -ENODEV;
2325                }
2326
2327                /* really correct? */
2328                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2329                if (!skb2) {
2330                        read_unlock(&mrt_lock);
2331                        return -ENOMEM;
2332                }
2333
2334                NETLINK_CB(skb2).portid = portid;
2335                skb_reset_transport_header(skb2);
2336
2337                skb_put(skb2, sizeof(struct ipv6hdr));
2338                skb_reset_network_header(skb2);
2339
2340                iph = ipv6_hdr(skb2);
2341                iph->version = 0;
2342                iph->priority = 0;
2343                iph->flow_lbl[0] = 0;
2344                iph->flow_lbl[1] = 0;
2345                iph->flow_lbl[2] = 0;
2346                iph->payload_len = 0;
2347                iph->nexthdr = IPPROTO_NONE;
2348                iph->hop_limit = 0;
2349                iph->saddr = rt->rt6i_src.addr;
2350                iph->daddr = rt->rt6i_dst.addr;
2351
2352                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2353                read_unlock(&mrt_lock);
2354
2355                return err;
2356        }
2357
2358        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2359                cache->mfc_flags |= MFC_NOTIFY;
2360
2361        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2362        read_unlock(&mrt_lock);
2363        return err;
2364}
2365
2366static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2367                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2368                             int flags)
2369{
2370        struct nlmsghdr *nlh;
2371        struct rtmsg *rtm;
2372        int err;
2373
2374        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2375        if (!nlh)
2376                return -EMSGSIZE;
2377
2378        rtm = nlmsg_data(nlh);
2379        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2380        rtm->rtm_dst_len  = 128;
2381        rtm->rtm_src_len  = 128;
2382        rtm->rtm_tos      = 0;
2383        rtm->rtm_table    = mrt->id;
2384        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2385                goto nla_put_failure;
2386        rtm->rtm_type = RTN_MULTICAST;
2387        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2388        if (c->mfc_flags & MFC_STATIC)
2389                rtm->rtm_protocol = RTPROT_STATIC;
2390        else
2391                rtm->rtm_protocol = RTPROT_MROUTED;
2392        rtm->rtm_flags    = 0;
2393
2394        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2395            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2396                goto nla_put_failure;
2397        err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2398        /* do not break the dump if cache is unresolved */
2399        if (err < 0 && err != -ENOENT)
2400                goto nla_put_failure;
2401
2402        nlmsg_end(skb, nlh);
2403        return 0;
2404
2405nla_put_failure:
2406        nlmsg_cancel(skb, nlh);
2407        return -EMSGSIZE;
2408}
2409
2410static int mr6_msgsize(bool unresolved, int maxvif)
2411{
2412        size_t len =
2413                NLMSG_ALIGN(sizeof(struct rtmsg))
2414                + nla_total_size(4)     /* RTA_TABLE */
2415                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2416                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2417                ;
2418
2419        if (!unresolved)
2420                len = len
2421                      + nla_total_size(4)       /* RTA_IIF */
2422                      + nla_total_size(0)       /* RTA_MULTIPATH */
2423                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2424                                                /* RTA_MFC_STATS */
2425                      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2426                ;
2427
2428        return len;
2429}
2430
2431static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2432                              int cmd)
2433{
2434        struct net *net = read_pnet(&mrt->net);
2435        struct sk_buff *skb;
2436        int err = -ENOBUFS;
2437
2438        skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2439                        GFP_ATOMIC);
2440        if (!skb)
2441                goto errout;
2442
2443        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2444        if (err < 0)
2445                goto errout;
2446
2447        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2448        return;
2449
2450errout:
2451        kfree_skb(skb);
2452        if (err < 0)
2453                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2454}
2455
2456static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2457{
2458        struct net *net = sock_net(skb->sk);
2459        struct mr6_table *mrt;
2460        struct mfc6_cache *mfc;
2461        unsigned int t = 0, s_t;
2462        unsigned int h = 0, s_h;
2463        unsigned int e = 0, s_e;
2464
2465        s_t = cb->args[0];
2466        s_h = cb->args[1];
2467        s_e = cb->args[2];
2468
2469        read_lock(&mrt_lock);
2470        ip6mr_for_each_table(mrt, net) {
2471                if (t < s_t)
2472                        goto next_table;
2473                if (t > s_t)
2474                        s_h = 0;
2475                for (h = s_h; h < MFC6_LINES; h++) {
2476                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2477                                if (e < s_e)
2478                                        goto next_entry;
2479                                if (ip6mr_fill_mroute(mrt, skb,
2480                                                      NETLINK_CB(cb->skb).portid,
2481                                                      cb->nlh->nlmsg_seq,
2482                                                      mfc, RTM_NEWROUTE,
2483                                                      NLM_F_MULTI) < 0)
2484                                        goto done;
2485next_entry:
2486                                e++;
2487                        }
2488                        e = s_e = 0;
2489                }
2490                spin_lock_bh(&mfc_unres_lock);
2491                list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2492                        if (e < s_e)
2493                                goto next_entry2;
2494                        if (ip6mr_fill_mroute(mrt, skb,
2495                                              NETLINK_CB(cb->skb).portid,
2496                                              cb->nlh->nlmsg_seq,
2497                                              mfc, RTM_NEWROUTE,
2498                                              NLM_F_MULTI) < 0) {
2499                                spin_unlock_bh(&mfc_unres_lock);
2500                                goto done;
2501                        }
2502next_entry2:
2503                        e++;
2504                }
2505                spin_unlock_bh(&mfc_unres_lock);
2506                e = s_e = 0;
2507                s_h = 0;
2508next_table:
2509                t++;
2510        }
2511done:
2512        read_unlock(&mrt_lock);
2513
2514        cb->args[2] = e;
2515        cb->args[1] = h;
2516        cb->args[0] = t;
2517
2518        return skb->len;
2519}
2520