linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55#include <linux/netconf.h>
  56
  57struct mr6_table {
  58        struct list_head        list;
  59        possible_net_t          net;
  60        u32                     id;
  61        struct sock             *mroute6_sk;
  62        struct timer_list       ipmr_expire_timer;
  63        struct list_head        mfc6_unres_queue;
  64        struct list_head        mfc6_cache_array[MFC6_LINES];
  65        struct mif_device       vif6_table[MAXMIFS];
  66        int                     maxvif;
  67        atomic_t                cache_resolve_queue_len;
  68        bool                    mroute_do_assert;
  69        bool                    mroute_do_pim;
  70#ifdef CONFIG_IPV6_PIMSM_V2
  71        int                     mroute_reg_vif_num;
  72#endif
  73};
  74
  75struct ip6mr_rule {
  76        struct fib_rule         common;
  77};
  78
  79struct ip6mr_result {
  80        struct mr6_table        *mrt;
  81};
  82
  83/* Big lock, protecting vif table, mrt cache and mroute socket state.
  84   Note that the changes are semaphored via rtnl_lock.
  85 */
  86
  87static DEFINE_RWLOCK(mrt_lock);
  88
  89/*
  90 *      Multicast router control variables
  91 */
  92
  93#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  94
  95/* Special spinlock for queue of unresolved entries */
  96static DEFINE_SPINLOCK(mfc_unres_lock);
  97
  98/* We return to original Alan's scheme. Hash table of resolved
  99   entries is changed only in process context and protected
 100   with weak lock mrt_lock. Queue of unresolved entries is protected
 101   with strong spinlock mfc_unres_lock.
 102
 103   In this case data path is free of exclusive locks at all.
 104 */
 105
 106static struct kmem_cache *mrt_cachep __read_mostly;
 107
 108static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 109static void ip6mr_free_table(struct mr6_table *mrt);
 110
 111static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 112                           struct sk_buff *skb, struct mfc6_cache *cache);
 113static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 114                              mifi_t mifi, int assert);
 115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 116                               struct mfc6_cache *c, struct rtmsg *rtm);
 117static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 118                              int cmd);
 119static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 120                               struct netlink_callback *cb);
 121static void mroute_clean_tables(struct mr6_table *mrt, bool all);
 122static void ipmr_expire_process(unsigned long arg);
 123
 124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 125#define ip6mr_for_each_table(mrt, net) \
 126        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 127
 128static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 129{
 130        struct mr6_table *mrt;
 131
 132        ip6mr_for_each_table(mrt, net) {
 133                if (mrt->id == id)
 134                        return mrt;
 135        }
 136        return NULL;
 137}
 138
 139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 140                            struct mr6_table **mrt)
 141{
 142        int err;
 143        struct ip6mr_result res;
 144        struct fib_lookup_arg arg = {
 145                .result = &res,
 146                .flags = FIB_LOOKUP_NOREF,
 147        };
 148
 149        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 150                               flowi6_to_flowi(flp6), 0, &arg);
 151        if (err < 0)
 152                return err;
 153        *mrt = res.mrt;
 154        return 0;
 155}
 156
 157static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 158                             int flags, struct fib_lookup_arg *arg)
 159{
 160        struct ip6mr_result *res = arg->result;
 161        struct mr6_table *mrt;
 162
 163        switch (rule->action) {
 164        case FR_ACT_TO_TBL:
 165                break;
 166        case FR_ACT_UNREACHABLE:
 167                return -ENETUNREACH;
 168        case FR_ACT_PROHIBIT:
 169                return -EACCES;
 170        case FR_ACT_BLACKHOLE:
 171        default:
 172                return -EINVAL;
 173        }
 174
 175        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 176        if (!mrt)
 177                return -EAGAIN;
 178        res->mrt = mrt;
 179        return 0;
 180}
 181
 182static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 183{
 184        return 1;
 185}
 186
 187static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 188        FRA_GENERIC_POLICY,
 189};
 190
 191static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 192                                struct fib_rule_hdr *frh, struct nlattr **tb)
 193{
 194        return 0;
 195}
 196
 197static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 198                              struct nlattr **tb)
 199{
 200        return 1;
 201}
 202
 203static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 204                           struct fib_rule_hdr *frh)
 205{
 206        frh->dst_len = 0;
 207        frh->src_len = 0;
 208        frh->tos     = 0;
 209        return 0;
 210}
 211
 212static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 213        .family         = RTNL_FAMILY_IP6MR,
 214        .rule_size      = sizeof(struct ip6mr_rule),
 215        .addr_size      = sizeof(struct in6_addr),
 216        .action         = ip6mr_rule_action,
 217        .match          = ip6mr_rule_match,
 218        .configure      = ip6mr_rule_configure,
 219        .compare        = ip6mr_rule_compare,
 220        .fill           = ip6mr_rule_fill,
 221        .nlgroup        = RTNLGRP_IPV6_RULE,
 222        .policy         = ip6mr_rule_policy,
 223        .owner          = THIS_MODULE,
 224};
 225
 226static int __net_init ip6mr_rules_init(struct net *net)
 227{
 228        struct fib_rules_ops *ops;
 229        struct mr6_table *mrt;
 230        int err;
 231
 232        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 233        if (IS_ERR(ops))
 234                return PTR_ERR(ops);
 235
 236        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 237
 238        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 239        if (!mrt) {
 240                err = -ENOMEM;
 241                goto err1;
 242        }
 243
 244        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 245        if (err < 0)
 246                goto err2;
 247
 248        net->ipv6.mr6_rules_ops = ops;
 249        return 0;
 250
 251err2:
 252        ip6mr_free_table(mrt);
 253err1:
 254        fib_rules_unregister(ops);
 255        return err;
 256}
 257
 258static void __net_exit ip6mr_rules_exit(struct net *net)
 259{
 260        struct mr6_table *mrt, *next;
 261
 262        rtnl_lock();
 263        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 264                list_del(&mrt->list);
 265                ip6mr_free_table(mrt);
 266        }
 267        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 268        rtnl_unlock();
 269}
 270#else
 271#define ip6mr_for_each_table(mrt, net) \
 272        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 273
 274static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 275{
 276        return net->ipv6.mrt6;
 277}
 278
 279static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 280                            struct mr6_table **mrt)
 281{
 282        *mrt = net->ipv6.mrt6;
 283        return 0;
 284}
 285
 286static int __net_init ip6mr_rules_init(struct net *net)
 287{
 288        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 289        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 290}
 291
 292static void __net_exit ip6mr_rules_exit(struct net *net)
 293{
 294        rtnl_lock();
 295        ip6mr_free_table(net->ipv6.mrt6);
 296        net->ipv6.mrt6 = NULL;
 297        rtnl_unlock();
 298}
 299#endif
 300
 301static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 302{
 303        struct mr6_table *mrt;
 304        unsigned int i;
 305
 306        mrt = ip6mr_get_table(net, id);
 307        if (mrt)
 308                return mrt;
 309
 310        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 311        if (!mrt)
 312                return NULL;
 313        mrt->id = id;
 314        write_pnet(&mrt->net, net);
 315
 316        /* Forwarding cache */
 317        for (i = 0; i < MFC6_LINES; i++)
 318                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 319
 320        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 321
 322        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 323                    (unsigned long)mrt);
 324
 325#ifdef CONFIG_IPV6_PIMSM_V2
 326        mrt->mroute_reg_vif_num = -1;
 327#endif
 328#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 329        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 330#endif
 331        return mrt;
 332}
 333
 334static void ip6mr_free_table(struct mr6_table *mrt)
 335{
 336        del_timer_sync(&mrt->ipmr_expire_timer);
 337        mroute_clean_tables(mrt, true);
 338        kfree(mrt);
 339}
 340
 341#ifdef CONFIG_PROC_FS
 342
 343struct ipmr_mfc_iter {
 344        struct seq_net_private p;
 345        struct mr6_table *mrt;
 346        struct list_head *cache;
 347        int ct;
 348};
 349
 350
 351static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 352                                           struct ipmr_mfc_iter *it, loff_t pos)
 353{
 354        struct mr6_table *mrt = it->mrt;
 355        struct mfc6_cache *mfc;
 356
 357        read_lock(&mrt_lock);
 358        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 359                it->cache = &mrt->mfc6_cache_array[it->ct];
 360                list_for_each_entry(mfc, it->cache, list)
 361                        if (pos-- == 0)
 362                                return mfc;
 363        }
 364        read_unlock(&mrt_lock);
 365
 366        spin_lock_bh(&mfc_unres_lock);
 367        it->cache = &mrt->mfc6_unres_queue;
 368        list_for_each_entry(mfc, it->cache, list)
 369                if (pos-- == 0)
 370                        return mfc;
 371        spin_unlock_bh(&mfc_unres_lock);
 372
 373        it->cache = NULL;
 374        return NULL;
 375}
 376
 377/*
 378 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 379 */
 380
 381struct ipmr_vif_iter {
 382        struct seq_net_private p;
 383        struct mr6_table *mrt;
 384        int ct;
 385};
 386
 387static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 388                                            struct ipmr_vif_iter *iter,
 389                                            loff_t pos)
 390{
 391        struct mr6_table *mrt = iter->mrt;
 392
 393        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 394                if (!MIF_EXISTS(mrt, iter->ct))
 395                        continue;
 396                if (pos-- == 0)
 397                        return &mrt->vif6_table[iter->ct];
 398        }
 399        return NULL;
 400}
 401
 402static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 403        __acquires(mrt_lock)
 404{
 405        struct ipmr_vif_iter *iter = seq->private;
 406        struct net *net = seq_file_net(seq);
 407        struct mr6_table *mrt;
 408
 409        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 410        if (!mrt)
 411                return ERR_PTR(-ENOENT);
 412
 413        iter->mrt = mrt;
 414
 415        read_lock(&mrt_lock);
 416        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 417                : SEQ_START_TOKEN;
 418}
 419
 420static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 421{
 422        struct ipmr_vif_iter *iter = seq->private;
 423        struct net *net = seq_file_net(seq);
 424        struct mr6_table *mrt = iter->mrt;
 425
 426        ++*pos;
 427        if (v == SEQ_START_TOKEN)
 428                return ip6mr_vif_seq_idx(net, iter, 0);
 429
 430        while (++iter->ct < mrt->maxvif) {
 431                if (!MIF_EXISTS(mrt, iter->ct))
 432                        continue;
 433                return &mrt->vif6_table[iter->ct];
 434        }
 435        return NULL;
 436}
 437
 438static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 439        __releases(mrt_lock)
 440{
 441        read_unlock(&mrt_lock);
 442}
 443
 444static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 445{
 446        struct ipmr_vif_iter *iter = seq->private;
 447        struct mr6_table *mrt = iter->mrt;
 448
 449        if (v == SEQ_START_TOKEN) {
 450                seq_puts(seq,
 451                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 452        } else {
 453                const struct mif_device *vif = v;
 454                const char *name = vif->dev ? vif->dev->name : "none";
 455
 456                seq_printf(seq,
 457                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 458                           vif - mrt->vif6_table,
 459                           name, vif->bytes_in, vif->pkt_in,
 460                           vif->bytes_out, vif->pkt_out,
 461                           vif->flags);
 462        }
 463        return 0;
 464}
 465
 466static const struct seq_operations ip6mr_vif_seq_ops = {
 467        .start = ip6mr_vif_seq_start,
 468        .next  = ip6mr_vif_seq_next,
 469        .stop  = ip6mr_vif_seq_stop,
 470        .show  = ip6mr_vif_seq_show,
 471};
 472
 473static int ip6mr_vif_open(struct inode *inode, struct file *file)
 474{
 475        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 476                            sizeof(struct ipmr_vif_iter));
 477}
 478
 479static const struct file_operations ip6mr_vif_fops = {
 480        .owner   = THIS_MODULE,
 481        .open    = ip6mr_vif_open,
 482        .read    = seq_read,
 483        .llseek  = seq_lseek,
 484        .release = seq_release_net,
 485};
 486
 487static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 488{
 489        struct ipmr_mfc_iter *it = seq->private;
 490        struct net *net = seq_file_net(seq);
 491        struct mr6_table *mrt;
 492
 493        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 494        if (!mrt)
 495                return ERR_PTR(-ENOENT);
 496
 497        it->mrt = mrt;
 498        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 499                : SEQ_START_TOKEN;
 500}
 501
 502static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 503{
 504        struct mfc6_cache *mfc = v;
 505        struct ipmr_mfc_iter *it = seq->private;
 506        struct net *net = seq_file_net(seq);
 507        struct mr6_table *mrt = it->mrt;
 508
 509        ++*pos;
 510
 511        if (v == SEQ_START_TOKEN)
 512                return ipmr_mfc_seq_idx(net, seq->private, 0);
 513
 514        if (mfc->list.next != it->cache)
 515                return list_entry(mfc->list.next, struct mfc6_cache, list);
 516
 517        if (it->cache == &mrt->mfc6_unres_queue)
 518                goto end_of_list;
 519
 520        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 521
 522        while (++it->ct < MFC6_LINES) {
 523                it->cache = &mrt->mfc6_cache_array[it->ct];
 524                if (list_empty(it->cache))
 525                        continue;
 526                return list_first_entry(it->cache, struct mfc6_cache, list);
 527        }
 528
 529        /* exhausted cache_array, show unresolved */
 530        read_unlock(&mrt_lock);
 531        it->cache = &mrt->mfc6_unres_queue;
 532        it->ct = 0;
 533
 534        spin_lock_bh(&mfc_unres_lock);
 535        if (!list_empty(it->cache))
 536                return list_first_entry(it->cache, struct mfc6_cache, list);
 537
 538 end_of_list:
 539        spin_unlock_bh(&mfc_unres_lock);
 540        it->cache = NULL;
 541
 542        return NULL;
 543}
 544
 545static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 546{
 547        struct ipmr_mfc_iter *it = seq->private;
 548        struct mr6_table *mrt = it->mrt;
 549
 550        if (it->cache == &mrt->mfc6_unres_queue)
 551                spin_unlock_bh(&mfc_unres_lock);
 552        else if (it->cache == &mrt->mfc6_cache_array[it->ct])
 553                read_unlock(&mrt_lock);
 554}
 555
 556static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 557{
 558        int n;
 559
 560        if (v == SEQ_START_TOKEN) {
 561                seq_puts(seq,
 562                         "Group                            "
 563                         "Origin                           "
 564                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 565        } else {
 566                const struct mfc6_cache *mfc = v;
 567                const struct ipmr_mfc_iter *it = seq->private;
 568                struct mr6_table *mrt = it->mrt;
 569
 570                seq_printf(seq, "%pI6 %pI6 %-3hd",
 571                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 572                           mfc->mf6c_parent);
 573
 574                if (it->cache != &mrt->mfc6_unres_queue) {
 575                        seq_printf(seq, " %8lu %8lu %8lu",
 576                                   mfc->mfc_un.res.pkt,
 577                                   mfc->mfc_un.res.bytes,
 578                                   mfc->mfc_un.res.wrong_if);
 579                        for (n = mfc->mfc_un.res.minvif;
 580                             n < mfc->mfc_un.res.maxvif; n++) {
 581                                if (MIF_EXISTS(mrt, n) &&
 582                                    mfc->mfc_un.res.ttls[n] < 255)
 583                                        seq_printf(seq,
 584                                                   " %2d:%-3d",
 585                                                   n, mfc->mfc_un.res.ttls[n]);
 586                        }
 587                } else {
 588                        /* unresolved mfc_caches don't contain
 589                         * pkt, bytes and wrong_if values
 590                         */
 591                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 592                }
 593                seq_putc(seq, '\n');
 594        }
 595        return 0;
 596}
 597
 598static const struct seq_operations ipmr_mfc_seq_ops = {
 599        .start = ipmr_mfc_seq_start,
 600        .next  = ipmr_mfc_seq_next,
 601        .stop  = ipmr_mfc_seq_stop,
 602        .show  = ipmr_mfc_seq_show,
 603};
 604
 605static int ipmr_mfc_open(struct inode *inode, struct file *file)
 606{
 607        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 608                            sizeof(struct ipmr_mfc_iter));
 609}
 610
 611static const struct file_operations ip6mr_mfc_fops = {
 612        .owner   = THIS_MODULE,
 613        .open    = ipmr_mfc_open,
 614        .read    = seq_read,
 615        .llseek  = seq_lseek,
 616        .release = seq_release_net,
 617};
 618#endif
 619
 620#ifdef CONFIG_IPV6_PIMSM_V2
 621
 622static int pim6_rcv(struct sk_buff *skb)
 623{
 624        struct pimreghdr *pim;
 625        struct ipv6hdr   *encap;
 626        struct net_device  *reg_dev = NULL;
 627        struct net *net = dev_net(skb->dev);
 628        struct mr6_table *mrt;
 629        struct flowi6 fl6 = {
 630                .flowi6_iif     = skb->dev->ifindex,
 631                .flowi6_mark    = skb->mark,
 632        };
 633        int reg_vif_num;
 634
 635        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 636                goto drop;
 637
 638        pim = (struct pimreghdr *)skb_transport_header(skb);
 639        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 640            (pim->flags & PIM_NULL_REGISTER) ||
 641            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 642                             sizeof(*pim), IPPROTO_PIM,
 643                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 644             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 645                goto drop;
 646
 647        /* check if the inner packet is destined to mcast group */
 648        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 649                                   sizeof(*pim));
 650
 651        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 652            encap->payload_len == 0 ||
 653            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 654                goto drop;
 655
 656        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 657                goto drop;
 658        reg_vif_num = mrt->mroute_reg_vif_num;
 659
 660        read_lock(&mrt_lock);
 661        if (reg_vif_num >= 0)
 662                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 663        if (reg_dev)
 664                dev_hold(reg_dev);
 665        read_unlock(&mrt_lock);
 666
 667        if (!reg_dev)
 668                goto drop;
 669
 670        skb->mac_header = skb->network_header;
 671        skb_pull(skb, (u8 *)encap - skb->data);
 672        skb_reset_network_header(skb);
 673        skb->protocol = htons(ETH_P_IPV6);
 674        skb->ip_summed = CHECKSUM_NONE;
 675
 676        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 677
 678        netif_rx(skb);
 679
 680        dev_put(reg_dev);
 681        return 0;
 682 drop:
 683        kfree_skb(skb);
 684        return 0;
 685}
 686
 687static const struct inet6_protocol pim6_protocol = {
 688        .handler        =       pim6_rcv,
 689};
 690
 691/* Service routines creating virtual interfaces: PIMREG */
 692
 693static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 694                                      struct net_device *dev)
 695{
 696        struct net *net = dev_net(dev);
 697        struct mr6_table *mrt;
 698        struct flowi6 fl6 = {
 699                .flowi6_oif     = dev->ifindex,
 700                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 701                .flowi6_mark    = skb->mark,
 702        };
 703        int err;
 704
 705        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 706        if (err < 0) {
 707                kfree_skb(skb);
 708                return err;
 709        }
 710
 711        read_lock(&mrt_lock);
 712        dev->stats.tx_bytes += skb->len;
 713        dev->stats.tx_packets++;
 714        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 715        read_unlock(&mrt_lock);
 716        kfree_skb(skb);
 717        return NETDEV_TX_OK;
 718}
 719
 720static int reg_vif_get_iflink(const struct net_device *dev)
 721{
 722        return 0;
 723}
 724
 725static const struct net_device_ops reg_vif_netdev_ops = {
 726        .ndo_start_xmit = reg_vif_xmit,
 727        .ndo_get_iflink = reg_vif_get_iflink,
 728};
 729
 730static void reg_vif_setup(struct net_device *dev)
 731{
 732        dev->type               = ARPHRD_PIMREG;
 733        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 734        dev->flags              = IFF_NOARP;
 735        dev->netdev_ops         = &reg_vif_netdev_ops;
 736        dev->destructor         = free_netdev;
 737        dev->features           |= NETIF_F_NETNS_LOCAL;
 738}
 739
 740static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 741{
 742        struct net_device *dev;
 743        char name[IFNAMSIZ];
 744
 745        if (mrt->id == RT6_TABLE_DFLT)
 746                sprintf(name, "pim6reg");
 747        else
 748                sprintf(name, "pim6reg%u", mrt->id);
 749
 750        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 751        if (!dev)
 752                return NULL;
 753
 754        dev_net_set(dev, net);
 755
 756        if (register_netdevice(dev)) {
 757                free_netdev(dev);
 758                return NULL;
 759        }
 760
 761        if (dev_open(dev))
 762                goto failure;
 763
 764        dev_hold(dev);
 765        return dev;
 766
 767failure:
 768        unregister_netdevice(dev);
 769        return NULL;
 770}
 771#endif
 772
 773/*
 774 *      Delete a VIF entry
 775 */
 776
 777static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 778{
 779        struct mif_device *v;
 780        struct net_device *dev;
 781        struct inet6_dev *in6_dev;
 782
 783        if (vifi < 0 || vifi >= mrt->maxvif)
 784                return -EADDRNOTAVAIL;
 785
 786        v = &mrt->vif6_table[vifi];
 787
 788        write_lock_bh(&mrt_lock);
 789        dev = v->dev;
 790        v->dev = NULL;
 791
 792        if (!dev) {
 793                write_unlock_bh(&mrt_lock);
 794                return -EADDRNOTAVAIL;
 795        }
 796
 797#ifdef CONFIG_IPV6_PIMSM_V2
 798        if (vifi == mrt->mroute_reg_vif_num)
 799                mrt->mroute_reg_vif_num = -1;
 800#endif
 801
 802        if (vifi + 1 == mrt->maxvif) {
 803                int tmp;
 804                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 805                        if (MIF_EXISTS(mrt, tmp))
 806                                break;
 807                }
 808                mrt->maxvif = tmp + 1;
 809        }
 810
 811        write_unlock_bh(&mrt_lock);
 812
 813        dev_set_allmulti(dev, -1);
 814
 815        in6_dev = __in6_dev_get(dev);
 816        if (in6_dev) {
 817                in6_dev->cnf.mc_forwarding--;
 818                inet6_netconf_notify_devconf(dev_net(dev),
 819                                             NETCONFA_MC_FORWARDING,
 820                                             dev->ifindex, &in6_dev->cnf);
 821        }
 822
 823        if (v->flags & MIFF_REGISTER)
 824                unregister_netdevice_queue(dev, head);
 825
 826        dev_put(dev);
 827        return 0;
 828}
 829
 830static inline void ip6mr_cache_free(struct mfc6_cache *c)
 831{
 832        kmem_cache_free(mrt_cachep, c);
 833}
 834
 835/* Destroy an unresolved cache entry, killing queued skbs
 836   and reporting error to netlink readers.
 837 */
 838
 839static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 840{
 841        struct net *net = read_pnet(&mrt->net);
 842        struct sk_buff *skb;
 843
 844        atomic_dec(&mrt->cache_resolve_queue_len);
 845
 846        while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 847                if (ipv6_hdr(skb)->version == 0) {
 848                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 849                        nlh->nlmsg_type = NLMSG_ERROR;
 850                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 851                        skb_trim(skb, nlh->nlmsg_len);
 852                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 853                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 854                } else
 855                        kfree_skb(skb);
 856        }
 857
 858        ip6mr_cache_free(c);
 859}
 860
 861
 862/* Timer process for all the unresolved queue. */
 863
 864static void ipmr_do_expire_process(struct mr6_table *mrt)
 865{
 866        unsigned long now = jiffies;
 867        unsigned long expires = 10 * HZ;
 868        struct mfc6_cache *c, *next;
 869
 870        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 871                if (time_after(c->mfc_un.unres.expires, now)) {
 872                        /* not yet... */
 873                        unsigned long interval = c->mfc_un.unres.expires - now;
 874                        if (interval < expires)
 875                                expires = interval;
 876                        continue;
 877                }
 878
 879                list_del(&c->list);
 880                mr6_netlink_event(mrt, c, RTM_DELROUTE);
 881                ip6mr_destroy_unres(mrt, c);
 882        }
 883
 884        if (!list_empty(&mrt->mfc6_unres_queue))
 885                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 886}
 887
 888static void ipmr_expire_process(unsigned long arg)
 889{
 890        struct mr6_table *mrt = (struct mr6_table *)arg;
 891
 892        if (!spin_trylock(&mfc_unres_lock)) {
 893                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 894                return;
 895        }
 896
 897        if (!list_empty(&mrt->mfc6_unres_queue))
 898                ipmr_do_expire_process(mrt);
 899
 900        spin_unlock(&mfc_unres_lock);
 901}
 902
 903/* Fill oifs list. It is called under write locked mrt_lock. */
 904
 905static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 906                                    unsigned char *ttls)
 907{
 908        int vifi;
 909
 910        cache->mfc_un.res.minvif = MAXMIFS;
 911        cache->mfc_un.res.maxvif = 0;
 912        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 913
 914        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 915                if (MIF_EXISTS(mrt, vifi) &&
 916                    ttls[vifi] && ttls[vifi] < 255) {
 917                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 918                        if (cache->mfc_un.res.minvif > vifi)
 919                                cache->mfc_un.res.minvif = vifi;
 920                        if (cache->mfc_un.res.maxvif <= vifi)
 921                                cache->mfc_un.res.maxvif = vifi + 1;
 922                }
 923        }
 924}
 925
 926static int mif6_add(struct net *net, struct mr6_table *mrt,
 927                    struct mif6ctl *vifc, int mrtsock)
 928{
 929        int vifi = vifc->mif6c_mifi;
 930        struct mif_device *v = &mrt->vif6_table[vifi];
 931        struct net_device *dev;
 932        struct inet6_dev *in6_dev;
 933        int err;
 934
 935        /* Is vif busy ? */
 936        if (MIF_EXISTS(mrt, vifi))
 937                return -EADDRINUSE;
 938
 939        switch (vifc->mif6c_flags) {
 940#ifdef CONFIG_IPV6_PIMSM_V2
 941        case MIFF_REGISTER:
 942                /*
 943                 * Special Purpose VIF in PIM
 944                 * All the packets will be sent to the daemon
 945                 */
 946                if (mrt->mroute_reg_vif_num >= 0)
 947                        return -EADDRINUSE;
 948                dev = ip6mr_reg_vif(net, mrt);
 949                if (!dev)
 950                        return -ENOBUFS;
 951                err = dev_set_allmulti(dev, 1);
 952                if (err) {
 953                        unregister_netdevice(dev);
 954                        dev_put(dev);
 955                        return err;
 956                }
 957                break;
 958#endif
 959        case 0:
 960                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 961                if (!dev)
 962                        return -EADDRNOTAVAIL;
 963                err = dev_set_allmulti(dev, 1);
 964                if (err) {
 965                        dev_put(dev);
 966                        return err;
 967                }
 968                break;
 969        default:
 970                return -EINVAL;
 971        }
 972
 973        in6_dev = __in6_dev_get(dev);
 974        if (in6_dev) {
 975                in6_dev->cnf.mc_forwarding++;
 976                inet6_netconf_notify_devconf(dev_net(dev),
 977                                             NETCONFA_MC_FORWARDING,
 978                                             dev->ifindex, &in6_dev->cnf);
 979        }
 980
 981        /*
 982         *      Fill in the VIF structures
 983         */
 984        v->rate_limit = vifc->vifc_rate_limit;
 985        v->flags = vifc->mif6c_flags;
 986        if (!mrtsock)
 987                v->flags |= VIFF_STATIC;
 988        v->threshold = vifc->vifc_threshold;
 989        v->bytes_in = 0;
 990        v->bytes_out = 0;
 991        v->pkt_in = 0;
 992        v->pkt_out = 0;
 993        v->link = dev->ifindex;
 994        if (v->flags & MIFF_REGISTER)
 995                v->link = dev_get_iflink(dev);
 996
 997        /* And finish update writing critical data */
 998        write_lock_bh(&mrt_lock);
 999        v->dev = dev;
1000#ifdef CONFIG_IPV6_PIMSM_V2
1001        if (v->flags & MIFF_REGISTER)
1002                mrt->mroute_reg_vif_num = vifi;
1003#endif
1004        if (vifi + 1 > mrt->maxvif)
1005                mrt->maxvif = vifi + 1;
1006        write_unlock_bh(&mrt_lock);
1007        return 0;
1008}
1009
1010static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011                                           const struct in6_addr *origin,
1012                                           const struct in6_addr *mcastgrp)
1013{
1014        int line = MFC6_HASH(mcastgrp, origin);
1015        struct mfc6_cache *c;
1016
1017        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020                        return c;
1021        }
1022        return NULL;
1023}
1024
1025/* Look for a (*,*,oif) entry */
1026static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027                                                      mifi_t mifi)
1028{
1029        int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030        struct mfc6_cache *c;
1031
1032        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033                if (ipv6_addr_any(&c->mf6c_origin) &&
1034                    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035                    (c->mfc_un.res.ttls[mifi] < 255))
1036                        return c;
1037
1038        return NULL;
1039}
1040
1041/* Look for a (*,G) entry */
1042static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043                                               struct in6_addr *mcastgrp,
1044                                               mifi_t mifi)
1045{
1046        int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047        struct mfc6_cache *c, *proxy;
1048
1049        if (ipv6_addr_any(mcastgrp))
1050                goto skip;
1051
1052        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053                if (ipv6_addr_any(&c->mf6c_origin) &&
1054                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055                        if (c->mfc_un.res.ttls[mifi] < 255)
1056                                return c;
1057
1058                        /* It's ok if the mifi is part of the static tree */
1059                        proxy = ip6mr_cache_find_any_parent(mrt,
1060                                                            c->mf6c_parent);
1061                        if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062                                return c;
1063                }
1064
1065skip:
1066        return ip6mr_cache_find_any_parent(mrt, mifi);
1067}
1068
1069/*
1070 *      Allocate a multicast cache entry
1071 */
1072static struct mfc6_cache *ip6mr_cache_alloc(void)
1073{
1074        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075        if (!c)
1076                return NULL;
1077        c->mfc_un.res.minvif = MAXMIFS;
1078        return c;
1079}
1080
1081static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082{
1083        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084        if (!c)
1085                return NULL;
1086        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088        return c;
1089}
1090
1091/*
1092 *      A cache entry has gone into a resolved state from queued
1093 */
1094
1095static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096                                struct mfc6_cache *uc, struct mfc6_cache *c)
1097{
1098        struct sk_buff *skb;
1099
1100        /*
1101         *      Play the pending entries through our router
1102         */
1103
1104        while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105                if (ipv6_hdr(skb)->version == 0) {
1106                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107
1108                        if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110                        } else {
1111                                nlh->nlmsg_type = NLMSG_ERROR;
1112                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113                                skb_trim(skb, nlh->nlmsg_len);
1114                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115                        }
1116                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117                } else
1118                        ip6_mr_forward(net, mrt, skb, c);
1119        }
1120}
1121
1122/*
1123 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124 *      expects the following bizarre scheme.
1125 *
1126 *      Called under mrt_lock.
1127 */
1128
1129static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130                              mifi_t mifi, int assert)
1131{
1132        struct sk_buff *skb;
1133        struct mrt6msg *msg;
1134        int ret;
1135
1136#ifdef CONFIG_IPV6_PIMSM_V2
1137        if (assert == MRT6MSG_WHOLEPKT)
1138                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139                                                +sizeof(*msg));
1140        else
1141#endif
1142                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143
1144        if (!skb)
1145                return -ENOBUFS;
1146
1147        /* I suppose that internal messages
1148         * do not require checksums */
1149
1150        skb->ip_summed = CHECKSUM_UNNECESSARY;
1151
1152#ifdef CONFIG_IPV6_PIMSM_V2
1153        if (assert == MRT6MSG_WHOLEPKT) {
1154                /* Ugly, but we have no choice with this interface.
1155                   Duplicate old header, fix length etc.
1156                   And all this only to mangle msg->im6_msgtype and
1157                   to set msg->im6_mbz to "mbz" :-)
1158                 */
1159                skb_push(skb, -skb_network_offset(pkt));
1160
1161                skb_push(skb, sizeof(*msg));
1162                skb_reset_transport_header(skb);
1163                msg = (struct mrt6msg *)skb_transport_header(skb);
1164                msg->im6_mbz = 0;
1165                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166                msg->im6_mif = mrt->mroute_reg_vif_num;
1167                msg->im6_pad = 0;
1168                msg->im6_src = ipv6_hdr(pkt)->saddr;
1169                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170
1171                skb->ip_summed = CHECKSUM_UNNECESSARY;
1172        } else
1173#endif
1174        {
1175        /*
1176         *      Copy the IP header
1177         */
1178
1179        skb_put(skb, sizeof(struct ipv6hdr));
1180        skb_reset_network_header(skb);
1181        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182
1183        /*
1184         *      Add our header
1185         */
1186        skb_put(skb, sizeof(*msg));
1187        skb_reset_transport_header(skb);
1188        msg = (struct mrt6msg *)skb_transport_header(skb);
1189
1190        msg->im6_mbz = 0;
1191        msg->im6_msgtype = assert;
1192        msg->im6_mif = mifi;
1193        msg->im6_pad = 0;
1194        msg->im6_src = ipv6_hdr(pkt)->saddr;
1195        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196
1197        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198        skb->ip_summed = CHECKSUM_UNNECESSARY;
1199        }
1200
1201        if (!mrt->mroute6_sk) {
1202                kfree_skb(skb);
1203                return -EINVAL;
1204        }
1205
1206        /*
1207         *      Deliver to user space multicast routing algorithms
1208         */
1209        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210        if (ret < 0) {
1211                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212                kfree_skb(skb);
1213        }
1214
1215        return ret;
1216}
1217
1218/*
1219 *      Queue a packet for resolution. It gets locked cache entry!
1220 */
1221
1222static int
1223ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224{
1225        bool found = false;
1226        int err;
1227        struct mfc6_cache *c;
1228
1229        spin_lock_bh(&mfc_unres_lock);
1230        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233                        found = true;
1234                        break;
1235                }
1236        }
1237
1238        if (!found) {
1239                /*
1240                 *      Create a new entry if allowable
1241                 */
1242
1243                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1245                        spin_unlock_bh(&mfc_unres_lock);
1246
1247                        kfree_skb(skb);
1248                        return -ENOBUFS;
1249                }
1250
1251                /*
1252                 *      Fill in the new cache entry
1253                 */
1254                c->mf6c_parent = -1;
1255                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257
1258                /*
1259                 *      Reflect first query at pim6sd
1260                 */
1261                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262                if (err < 0) {
1263                        /* If the report failed throw the cache entry
1264                           out - Brad Parker
1265                         */
1266                        spin_unlock_bh(&mfc_unres_lock);
1267
1268                        ip6mr_cache_free(c);
1269                        kfree_skb(skb);
1270                        return err;
1271                }
1272
1273                atomic_inc(&mrt->cache_resolve_queue_len);
1274                list_add(&c->list, &mrt->mfc6_unres_queue);
1275                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276
1277                ipmr_do_expire_process(mrt);
1278        }
1279
1280        /*
1281         *      See if we can append the packet
1282         */
1283        if (c->mfc_un.unres.unresolved.qlen > 3) {
1284                kfree_skb(skb);
1285                err = -ENOBUFS;
1286        } else {
1287                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288                err = 0;
1289        }
1290
1291        spin_unlock_bh(&mfc_unres_lock);
1292        return err;
1293}
1294
1295/*
1296 *      MFC6 cache manipulation by user space
1297 */
1298
1299static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300                            int parent)
1301{
1302        int line;
1303        struct mfc6_cache *c, *next;
1304
1305        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306
1307        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1310                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311                    (parent == -1 || parent == c->mf6c_parent)) {
1312                        write_lock_bh(&mrt_lock);
1313                        list_del(&c->list);
1314                        write_unlock_bh(&mrt_lock);
1315
1316                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317                        ip6mr_cache_free(c);
1318                        return 0;
1319                }
1320        }
1321        return -ENOENT;
1322}
1323
1324static int ip6mr_device_event(struct notifier_block *this,
1325                              unsigned long event, void *ptr)
1326{
1327        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328        struct net *net = dev_net(dev);
1329        struct mr6_table *mrt;
1330        struct mif_device *v;
1331        int ct;
1332        LIST_HEAD(list);
1333
1334        if (event != NETDEV_UNREGISTER)
1335                return NOTIFY_DONE;
1336
1337        ip6mr_for_each_table(mrt, net) {
1338                v = &mrt->vif6_table[0];
1339                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340                        if (v->dev == dev)
1341                                mif6_delete(mrt, ct, &list);
1342                }
1343        }
1344        unregister_netdevice_many(&list);
1345
1346        return NOTIFY_DONE;
1347}
1348
1349static struct notifier_block ip6_mr_notifier = {
1350        .notifier_call = ip6mr_device_event
1351};
1352
1353/*
1354 *      Setup for IP multicast routing
1355 */
1356
1357static int __net_init ip6mr_net_init(struct net *net)
1358{
1359        int err;
1360
1361        err = ip6mr_rules_init(net);
1362        if (err < 0)
1363                goto fail;
1364
1365#ifdef CONFIG_PROC_FS
1366        err = -ENOMEM;
1367        if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368                goto proc_vif_fail;
1369        if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370                goto proc_cache_fail;
1371#endif
1372
1373        return 0;
1374
1375#ifdef CONFIG_PROC_FS
1376proc_cache_fail:
1377        remove_proc_entry("ip6_mr_vif", net->proc_net);
1378proc_vif_fail:
1379        ip6mr_rules_exit(net);
1380#endif
1381fail:
1382        return err;
1383}
1384
1385static void __net_exit ip6mr_net_exit(struct net *net)
1386{
1387#ifdef CONFIG_PROC_FS
1388        remove_proc_entry("ip6_mr_cache", net->proc_net);
1389        remove_proc_entry("ip6_mr_vif", net->proc_net);
1390#endif
1391        ip6mr_rules_exit(net);
1392}
1393
1394static struct pernet_operations ip6mr_net_ops = {
1395        .init = ip6mr_net_init,
1396        .exit = ip6mr_net_exit,
1397};
1398
1399int __init ip6_mr_init(void)
1400{
1401        int err;
1402
1403        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404                                       sizeof(struct mfc6_cache),
1405                                       0, SLAB_HWCACHE_ALIGN,
1406                                       NULL);
1407        if (!mrt_cachep)
1408                return -ENOMEM;
1409
1410        err = register_pernet_subsys(&ip6mr_net_ops);
1411        if (err)
1412                goto reg_pernet_fail;
1413
1414        err = register_netdevice_notifier(&ip6_mr_notifier);
1415        if (err)
1416                goto reg_notif_fail;
1417#ifdef CONFIG_IPV6_PIMSM_V2
1418        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419                pr_err("%s: can't add PIM protocol\n", __func__);
1420                err = -EAGAIN;
1421                goto add_proto_fail;
1422        }
1423#endif
1424        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425                      ip6mr_rtm_dumproute, NULL);
1426        return 0;
1427#ifdef CONFIG_IPV6_PIMSM_V2
1428add_proto_fail:
1429        unregister_netdevice_notifier(&ip6_mr_notifier);
1430#endif
1431reg_notif_fail:
1432        unregister_pernet_subsys(&ip6mr_net_ops);
1433reg_pernet_fail:
1434        kmem_cache_destroy(mrt_cachep);
1435        return err;
1436}
1437
1438void ip6_mr_cleanup(void)
1439{
1440        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1441#ifdef CONFIG_IPV6_PIMSM_V2
1442        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1443#endif
1444        unregister_netdevice_notifier(&ip6_mr_notifier);
1445        unregister_pernet_subsys(&ip6mr_net_ops);
1446        kmem_cache_destroy(mrt_cachep);
1447}
1448
1449static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1450                         struct mf6cctl *mfc, int mrtsock, int parent)
1451{
1452        bool found = false;
1453        int line;
1454        struct mfc6_cache *uc, *c;
1455        unsigned char ttls[MAXMIFS];
1456        int i;
1457
1458        if (mfc->mf6cc_parent >= MAXMIFS)
1459                return -ENFILE;
1460
1461        memset(ttls, 255, MAXMIFS);
1462        for (i = 0; i < MAXMIFS; i++) {
1463                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1464                        ttls[i] = 1;
1465
1466        }
1467
1468        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1469
1470        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1471                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1472                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1473                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1474                    (parent == -1 || parent == mfc->mf6cc_parent)) {
1475                        found = true;
1476                        break;
1477                }
1478        }
1479
1480        if (found) {
1481                write_lock_bh(&mrt_lock);
1482                c->mf6c_parent = mfc->mf6cc_parent;
1483                ip6mr_update_thresholds(mrt, c, ttls);
1484                if (!mrtsock)
1485                        c->mfc_flags |= MFC_STATIC;
1486                write_unlock_bh(&mrt_lock);
1487                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488                return 0;
1489        }
1490
1491        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1492            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1493                return -EINVAL;
1494
1495        c = ip6mr_cache_alloc();
1496        if (!c)
1497                return -ENOMEM;
1498
1499        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1500        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1501        c->mf6c_parent = mfc->mf6cc_parent;
1502        ip6mr_update_thresholds(mrt, c, ttls);
1503        if (!mrtsock)
1504                c->mfc_flags |= MFC_STATIC;
1505
1506        write_lock_bh(&mrt_lock);
1507        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1508        write_unlock_bh(&mrt_lock);
1509
1510        /*
1511         *      Check to see if we resolved a queued list. If so we
1512         *      need to send on the frames and tidy up.
1513         */
1514        found = false;
1515        spin_lock_bh(&mfc_unres_lock);
1516        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1517                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1518                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1519                        list_del(&uc->list);
1520                        atomic_dec(&mrt->cache_resolve_queue_len);
1521                        found = true;
1522                        break;
1523                }
1524        }
1525        if (list_empty(&mrt->mfc6_unres_queue))
1526                del_timer(&mrt->ipmr_expire_timer);
1527        spin_unlock_bh(&mfc_unres_lock);
1528
1529        if (found) {
1530                ip6mr_cache_resolve(net, mrt, uc, c);
1531                ip6mr_cache_free(uc);
1532        }
1533        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1534        return 0;
1535}
1536
1537/*
1538 *      Close the multicast socket, and clear the vif tables etc
1539 */
1540
1541static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1542{
1543        int i;
1544        LIST_HEAD(list);
1545        struct mfc6_cache *c, *next;
1546
1547        /*
1548         *      Shut down all active vif entries
1549         */
1550        for (i = 0; i < mrt->maxvif; i++) {
1551                if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1552                        continue;
1553                mif6_delete(mrt, i, &list);
1554        }
1555        unregister_netdevice_many(&list);
1556
1557        /*
1558         *      Wipe the cache
1559         */
1560        for (i = 0; i < MFC6_LINES; i++) {
1561                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1562                        if (!all && (c->mfc_flags & MFC_STATIC))
1563                                continue;
1564                        write_lock_bh(&mrt_lock);
1565                        list_del(&c->list);
1566                        write_unlock_bh(&mrt_lock);
1567
1568                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1569                        ip6mr_cache_free(c);
1570                }
1571        }
1572
1573        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1574                spin_lock_bh(&mfc_unres_lock);
1575                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1576                        list_del(&c->list);
1577                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1578                        ip6mr_destroy_unres(mrt, c);
1579                }
1580                spin_unlock_bh(&mfc_unres_lock);
1581        }
1582}
1583
1584static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1585{
1586        int err = 0;
1587        struct net *net = sock_net(sk);
1588
1589        rtnl_lock();
1590        write_lock_bh(&mrt_lock);
1591        if (likely(mrt->mroute6_sk == NULL)) {
1592                mrt->mroute6_sk = sk;
1593                net->ipv6.devconf_all->mc_forwarding++;
1594                inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1595                                             NETCONFA_IFINDEX_ALL,
1596                                             net->ipv6.devconf_all);
1597        }
1598        else
1599                err = -EADDRINUSE;
1600        write_unlock_bh(&mrt_lock);
1601
1602        rtnl_unlock();
1603
1604        return err;
1605}
1606
1607int ip6mr_sk_done(struct sock *sk)
1608{
1609        int err = -EACCES;
1610        struct net *net = sock_net(sk);
1611        struct mr6_table *mrt;
1612
1613        rtnl_lock();
1614        ip6mr_for_each_table(mrt, net) {
1615                if (sk == mrt->mroute6_sk) {
1616                        write_lock_bh(&mrt_lock);
1617                        mrt->mroute6_sk = NULL;
1618                        net->ipv6.devconf_all->mc_forwarding--;
1619                        inet6_netconf_notify_devconf(net,
1620                                                     NETCONFA_MC_FORWARDING,
1621                                                     NETCONFA_IFINDEX_ALL,
1622                                                     net->ipv6.devconf_all);
1623                        write_unlock_bh(&mrt_lock);
1624
1625                        mroute_clean_tables(mrt, false);
1626                        err = 0;
1627                        break;
1628                }
1629        }
1630        rtnl_unlock();
1631
1632        return err;
1633}
1634
1635struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1636{
1637        struct mr6_table *mrt;
1638        struct flowi6 fl6 = {
1639                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1640                .flowi6_oif     = skb->dev->ifindex,
1641                .flowi6_mark    = skb->mark,
1642        };
1643
1644        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1645                return NULL;
1646
1647        return mrt->mroute6_sk;
1648}
1649
1650/*
1651 *      Socket options and virtual interface manipulation. The whole
1652 *      virtual interface system is a complete heap, but unfortunately
1653 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1654 *      MOSPF/PIM router set up we can clean this up.
1655 */
1656
1657int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1658{
1659        int ret, parent = 0;
1660        struct mif6ctl vif;
1661        struct mf6cctl mfc;
1662        mifi_t mifi;
1663        struct net *net = sock_net(sk);
1664        struct mr6_table *mrt;
1665
1666        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1667        if (!mrt)
1668                return -ENOENT;
1669
1670        if (optname != MRT6_INIT) {
1671                if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1672                        return -EACCES;
1673        }
1674
1675        switch (optname) {
1676        case MRT6_INIT:
1677                if (sk->sk_type != SOCK_RAW ||
1678                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1679                        return -EOPNOTSUPP;
1680                if (optlen < sizeof(int))
1681                        return -EINVAL;
1682
1683                return ip6mr_sk_init(mrt, sk);
1684
1685        case MRT6_DONE:
1686                return ip6mr_sk_done(sk);
1687
1688        case MRT6_ADD_MIF:
1689                if (optlen < sizeof(vif))
1690                        return -EINVAL;
1691                if (copy_from_user(&vif, optval, sizeof(vif)))
1692                        return -EFAULT;
1693                if (vif.mif6c_mifi >= MAXMIFS)
1694                        return -ENFILE;
1695                rtnl_lock();
1696                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1697                rtnl_unlock();
1698                return ret;
1699
1700        case MRT6_DEL_MIF:
1701                if (optlen < sizeof(mifi_t))
1702                        return -EINVAL;
1703                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1704                        return -EFAULT;
1705                rtnl_lock();
1706                ret = mif6_delete(mrt, mifi, NULL);
1707                rtnl_unlock();
1708                return ret;
1709
1710        /*
1711         *      Manipulate the forwarding caches. These live
1712         *      in a sort of kernel/user symbiosis.
1713         */
1714        case MRT6_ADD_MFC:
1715        case MRT6_DEL_MFC:
1716                parent = -1;
1717        case MRT6_ADD_MFC_PROXY:
1718        case MRT6_DEL_MFC_PROXY:
1719                if (optlen < sizeof(mfc))
1720                        return -EINVAL;
1721                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1722                        return -EFAULT;
1723                if (parent == 0)
1724                        parent = mfc.mf6cc_parent;
1725                rtnl_lock();
1726                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1727                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1728                else
1729                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1730                                            sk == mrt->mroute6_sk, parent);
1731                rtnl_unlock();
1732                return ret;
1733
1734        /*
1735         *      Control PIM assert (to activate pim will activate assert)
1736         */
1737        case MRT6_ASSERT:
1738        {
1739                int v;
1740
1741                if (optlen != sizeof(v))
1742                        return -EINVAL;
1743                if (get_user(v, (int __user *)optval))
1744                        return -EFAULT;
1745                mrt->mroute_do_assert = v;
1746                return 0;
1747        }
1748
1749#ifdef CONFIG_IPV6_PIMSM_V2
1750        case MRT6_PIM:
1751        {
1752                int v;
1753
1754                if (optlen != sizeof(v))
1755                        return -EINVAL;
1756                if (get_user(v, (int __user *)optval))
1757                        return -EFAULT;
1758                v = !!v;
1759                rtnl_lock();
1760                ret = 0;
1761                if (v != mrt->mroute_do_pim) {
1762                        mrt->mroute_do_pim = v;
1763                        mrt->mroute_do_assert = v;
1764                }
1765                rtnl_unlock();
1766                return ret;
1767        }
1768
1769#endif
1770#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1771        case MRT6_TABLE:
1772        {
1773                u32 v;
1774
1775                if (optlen != sizeof(u32))
1776                        return -EINVAL;
1777                if (get_user(v, (u32 __user *)optval))
1778                        return -EFAULT;
1779                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1780                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1781                        return -EINVAL;
1782                if (sk == mrt->mroute6_sk)
1783                        return -EBUSY;
1784
1785                rtnl_lock();
1786                ret = 0;
1787                if (!ip6mr_new_table(net, v))
1788                        ret = -ENOMEM;
1789                raw6_sk(sk)->ip6mr_table = v;
1790                rtnl_unlock();
1791                return ret;
1792        }
1793#endif
1794        /*
1795         *      Spurious command, or MRT6_VERSION which you cannot
1796         *      set.
1797         */
1798        default:
1799                return -ENOPROTOOPT;
1800        }
1801}
1802
1803/*
1804 *      Getsock opt support for the multicast routing system.
1805 */
1806
1807int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1808                          int __user *optlen)
1809{
1810        int olr;
1811        int val;
1812        struct net *net = sock_net(sk);
1813        struct mr6_table *mrt;
1814
1815        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1816        if (!mrt)
1817                return -ENOENT;
1818
1819        switch (optname) {
1820        case MRT6_VERSION:
1821                val = 0x0305;
1822                break;
1823#ifdef CONFIG_IPV6_PIMSM_V2
1824        case MRT6_PIM:
1825                val = mrt->mroute_do_pim;
1826                break;
1827#endif
1828        case MRT6_ASSERT:
1829                val = mrt->mroute_do_assert;
1830                break;
1831        default:
1832                return -ENOPROTOOPT;
1833        }
1834
1835        if (get_user(olr, optlen))
1836                return -EFAULT;
1837
1838        olr = min_t(int, olr, sizeof(int));
1839        if (olr < 0)
1840                return -EINVAL;
1841
1842        if (put_user(olr, optlen))
1843                return -EFAULT;
1844        if (copy_to_user(optval, &val, olr))
1845                return -EFAULT;
1846        return 0;
1847}
1848
1849/*
1850 *      The IP multicast ioctl support routines.
1851 */
1852
1853int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1854{
1855        struct sioc_sg_req6 sr;
1856        struct sioc_mif_req6 vr;
1857        struct mif_device *vif;
1858        struct mfc6_cache *c;
1859        struct net *net = sock_net(sk);
1860        struct mr6_table *mrt;
1861
1862        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1863        if (!mrt)
1864                return -ENOENT;
1865
1866        switch (cmd) {
1867        case SIOCGETMIFCNT_IN6:
1868                if (copy_from_user(&vr, arg, sizeof(vr)))
1869                        return -EFAULT;
1870                if (vr.mifi >= mrt->maxvif)
1871                        return -EINVAL;
1872                read_lock(&mrt_lock);
1873                vif = &mrt->vif6_table[vr.mifi];
1874                if (MIF_EXISTS(mrt, vr.mifi)) {
1875                        vr.icount = vif->pkt_in;
1876                        vr.ocount = vif->pkt_out;
1877                        vr.ibytes = vif->bytes_in;
1878                        vr.obytes = vif->bytes_out;
1879                        read_unlock(&mrt_lock);
1880
1881                        if (copy_to_user(arg, &vr, sizeof(vr)))
1882                                return -EFAULT;
1883                        return 0;
1884                }
1885                read_unlock(&mrt_lock);
1886                return -EADDRNOTAVAIL;
1887        case SIOCGETSGCNT_IN6:
1888                if (copy_from_user(&sr, arg, sizeof(sr)))
1889                        return -EFAULT;
1890
1891                read_lock(&mrt_lock);
1892                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1893                if (c) {
1894                        sr.pktcnt = c->mfc_un.res.pkt;
1895                        sr.bytecnt = c->mfc_un.res.bytes;
1896                        sr.wrong_if = c->mfc_un.res.wrong_if;
1897                        read_unlock(&mrt_lock);
1898
1899                        if (copy_to_user(arg, &sr, sizeof(sr)))
1900                                return -EFAULT;
1901                        return 0;
1902                }
1903                read_unlock(&mrt_lock);
1904                return -EADDRNOTAVAIL;
1905        default:
1906                return -ENOIOCTLCMD;
1907        }
1908}
1909
1910#ifdef CONFIG_COMPAT
1911struct compat_sioc_sg_req6 {
1912        struct sockaddr_in6 src;
1913        struct sockaddr_in6 grp;
1914        compat_ulong_t pktcnt;
1915        compat_ulong_t bytecnt;
1916        compat_ulong_t wrong_if;
1917};
1918
1919struct compat_sioc_mif_req6 {
1920        mifi_t  mifi;
1921        compat_ulong_t icount;
1922        compat_ulong_t ocount;
1923        compat_ulong_t ibytes;
1924        compat_ulong_t obytes;
1925};
1926
1927int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1928{
1929        struct compat_sioc_sg_req6 sr;
1930        struct compat_sioc_mif_req6 vr;
1931        struct mif_device *vif;
1932        struct mfc6_cache *c;
1933        struct net *net = sock_net(sk);
1934        struct mr6_table *mrt;
1935
1936        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1937        if (!mrt)
1938                return -ENOENT;
1939
1940        switch (cmd) {
1941        case SIOCGETMIFCNT_IN6:
1942                if (copy_from_user(&vr, arg, sizeof(vr)))
1943                        return -EFAULT;
1944                if (vr.mifi >= mrt->maxvif)
1945                        return -EINVAL;
1946                read_lock(&mrt_lock);
1947                vif = &mrt->vif6_table[vr.mifi];
1948                if (MIF_EXISTS(mrt, vr.mifi)) {
1949                        vr.icount = vif->pkt_in;
1950                        vr.ocount = vif->pkt_out;
1951                        vr.ibytes = vif->bytes_in;
1952                        vr.obytes = vif->bytes_out;
1953                        read_unlock(&mrt_lock);
1954
1955                        if (copy_to_user(arg, &vr, sizeof(vr)))
1956                                return -EFAULT;
1957                        return 0;
1958                }
1959                read_unlock(&mrt_lock);
1960                return -EADDRNOTAVAIL;
1961        case SIOCGETSGCNT_IN6:
1962                if (copy_from_user(&sr, arg, sizeof(sr)))
1963                        return -EFAULT;
1964
1965                read_lock(&mrt_lock);
1966                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1967                if (c) {
1968                        sr.pktcnt = c->mfc_un.res.pkt;
1969                        sr.bytecnt = c->mfc_un.res.bytes;
1970                        sr.wrong_if = c->mfc_un.res.wrong_if;
1971                        read_unlock(&mrt_lock);
1972
1973                        if (copy_to_user(arg, &sr, sizeof(sr)))
1974                                return -EFAULT;
1975                        return 0;
1976                }
1977                read_unlock(&mrt_lock);
1978                return -EADDRNOTAVAIL;
1979        default:
1980                return -ENOIOCTLCMD;
1981        }
1982}
1983#endif
1984
1985static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1986{
1987        IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1988                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1989        IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1990                         IPSTATS_MIB_OUTOCTETS, skb->len);
1991        return dst_output(net, sk, skb);
1992}
1993
1994/*
1995 *      Processing handlers for ip6mr_forward
1996 */
1997
1998static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1999                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2000{
2001        struct ipv6hdr *ipv6h;
2002        struct mif_device *vif = &mrt->vif6_table[vifi];
2003        struct net_device *dev;
2004        struct dst_entry *dst;
2005        struct flowi6 fl6;
2006
2007        if (!vif->dev)
2008                goto out_free;
2009
2010#ifdef CONFIG_IPV6_PIMSM_V2
2011        if (vif->flags & MIFF_REGISTER) {
2012                vif->pkt_out++;
2013                vif->bytes_out += skb->len;
2014                vif->dev->stats.tx_bytes += skb->len;
2015                vif->dev->stats.tx_packets++;
2016                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2017                goto out_free;
2018        }
2019#endif
2020
2021        ipv6h = ipv6_hdr(skb);
2022
2023        fl6 = (struct flowi6) {
2024                .flowi6_oif = vif->link,
2025                .daddr = ipv6h->daddr,
2026        };
2027
2028        dst = ip6_route_output(net, NULL, &fl6);
2029        if (dst->error) {
2030                dst_release(dst);
2031                goto out_free;
2032        }
2033
2034        skb_dst_drop(skb);
2035        skb_dst_set(skb, dst);
2036
2037        /*
2038         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2039         * not only before forwarding, but after forwarding on all output
2040         * interfaces. It is clear, if mrouter runs a multicasting
2041         * program, it should receive packets not depending to what interface
2042         * program is joined.
2043         * If we will not make it, the program will have to join on all
2044         * interfaces. On the other hand, multihoming host (or router, but
2045         * not mrouter) cannot join to more than one interface - it will
2046         * result in receiving multiple packets.
2047         */
2048        dev = vif->dev;
2049        skb->dev = dev;
2050        vif->pkt_out++;
2051        vif->bytes_out += skb->len;
2052
2053        /* We are about to write */
2054        /* XXX: extension headers? */
2055        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2056                goto out_free;
2057
2058        ipv6h = ipv6_hdr(skb);
2059        ipv6h->hop_limit--;
2060
2061        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2062
2063        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2064                       net, NULL, skb, skb->dev, dev,
2065                       ip6mr_forward2_finish);
2066
2067out_free:
2068        kfree_skb(skb);
2069        return 0;
2070}
2071
2072static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2073{
2074        int ct;
2075
2076        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2077                if (mrt->vif6_table[ct].dev == dev)
2078                        break;
2079        }
2080        return ct;
2081}
2082
2083static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2084                           struct sk_buff *skb, struct mfc6_cache *cache)
2085{
2086        int psend = -1;
2087        int vif, ct;
2088        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2089
2090        vif = cache->mf6c_parent;
2091        cache->mfc_un.res.pkt++;
2092        cache->mfc_un.res.bytes += skb->len;
2093
2094        if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2095                struct mfc6_cache *cache_proxy;
2096
2097                /* For an (*,G) entry, we only check that the incoming
2098                 * interface is part of the static tree.
2099                 */
2100                cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2101                if (cache_proxy &&
2102                    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2103                        goto forward;
2104        }
2105
2106        /*
2107         * Wrong interface: drop packet and (maybe) send PIM assert.
2108         */
2109        if (mrt->vif6_table[vif].dev != skb->dev) {
2110                cache->mfc_un.res.wrong_if++;
2111
2112                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2113                    /* pimsm uses asserts, when switching from RPT to SPT,
2114                       so that we cannot check that packet arrived on an oif.
2115                       It is bad, but otherwise we would need to move pretty
2116                       large chunk of pimd to kernel. Ough... --ANK
2117                     */
2118                    (mrt->mroute_do_pim ||
2119                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2120                    time_after(jiffies,
2121                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2122                        cache->mfc_un.res.last_assert = jiffies;
2123                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2124                }
2125                goto dont_forward;
2126        }
2127
2128forward:
2129        mrt->vif6_table[vif].pkt_in++;
2130        mrt->vif6_table[vif].bytes_in += skb->len;
2131
2132        /*
2133         *      Forward the frame
2134         */
2135        if (ipv6_addr_any(&cache->mf6c_origin) &&
2136            ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2137                if (true_vifi >= 0 &&
2138                    true_vifi != cache->mf6c_parent &&
2139                    ipv6_hdr(skb)->hop_limit >
2140                                cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2141                        /* It's an (*,*) entry and the packet is not coming from
2142                         * the upstream: forward the packet to the upstream
2143                         * only.
2144                         */
2145                        psend = cache->mf6c_parent;
2146                        goto last_forward;
2147                }
2148                goto dont_forward;
2149        }
2150        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2151                /* For (*,G) entry, don't forward to the incoming interface */
2152                if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2153                    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2154                        if (psend != -1) {
2155                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2156                                if (skb2)
2157                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2158                        }
2159                        psend = ct;
2160                }
2161        }
2162last_forward:
2163        if (psend != -1) {
2164                ip6mr_forward2(net, mrt, skb, cache, psend);
2165                return;
2166        }
2167
2168dont_forward:
2169        kfree_skb(skb);
2170}
2171
2172
2173/*
2174 *      Multicast packets for forwarding arrive here
2175 */
2176
2177int ip6_mr_input(struct sk_buff *skb)
2178{
2179        struct mfc6_cache *cache;
2180        struct net *net = dev_net(skb->dev);
2181        struct mr6_table *mrt;
2182        struct flowi6 fl6 = {
2183                .flowi6_iif     = skb->dev->ifindex,
2184                .flowi6_mark    = skb->mark,
2185        };
2186        int err;
2187
2188        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2189        if (err < 0) {
2190                kfree_skb(skb);
2191                return err;
2192        }
2193
2194        read_lock(&mrt_lock);
2195        cache = ip6mr_cache_find(mrt,
2196                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2197        if (!cache) {
2198                int vif = ip6mr_find_vif(mrt, skb->dev);
2199
2200                if (vif >= 0)
2201                        cache = ip6mr_cache_find_any(mrt,
2202                                                     &ipv6_hdr(skb)->daddr,
2203                                                     vif);
2204        }
2205
2206        /*
2207         *      No usable cache entry
2208         */
2209        if (!cache) {
2210                int vif;
2211
2212                vif = ip6mr_find_vif(mrt, skb->dev);
2213                if (vif >= 0) {
2214                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2215                        read_unlock(&mrt_lock);
2216
2217                        return err;
2218                }
2219                read_unlock(&mrt_lock);
2220                kfree_skb(skb);
2221                return -ENODEV;
2222        }
2223
2224        ip6_mr_forward(net, mrt, skb, cache);
2225
2226        read_unlock(&mrt_lock);
2227
2228        return 0;
2229}
2230
2231
2232static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2233                               struct mfc6_cache *c, struct rtmsg *rtm)
2234{
2235        int ct;
2236        struct rtnexthop *nhp;
2237        struct nlattr *mp_attr;
2238        struct rta_mfc_stats mfcs;
2239
2240        /* If cache is unresolved, don't try to parse IIF and OIF */
2241        if (c->mf6c_parent >= MAXMIFS)
2242                return -ENOENT;
2243
2244        if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2245            nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2246                return -EMSGSIZE;
2247        mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2248        if (!mp_attr)
2249                return -EMSGSIZE;
2250
2251        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2252                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2253                        nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2254                        if (!nhp) {
2255                                nla_nest_cancel(skb, mp_attr);
2256                                return -EMSGSIZE;
2257                        }
2258
2259                        nhp->rtnh_flags = 0;
2260                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2261                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2262                        nhp->rtnh_len = sizeof(*nhp);
2263                }
2264        }
2265
2266        nla_nest_end(skb, mp_attr);
2267
2268        mfcs.mfcs_packets = c->mfc_un.res.pkt;
2269        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2270        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2271        if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2272                return -EMSGSIZE;
2273
2274        rtm->rtm_type = RTN_MULTICAST;
2275        return 1;
2276}
2277
2278int ip6mr_get_route(struct net *net,
2279                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2280{
2281        int err;
2282        struct mr6_table *mrt;
2283        struct mfc6_cache *cache;
2284        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2285
2286        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2287        if (!mrt)
2288                return -ENOENT;
2289
2290        read_lock(&mrt_lock);
2291        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2292        if (!cache && skb->dev) {
2293                int vif = ip6mr_find_vif(mrt, skb->dev);
2294
2295                if (vif >= 0)
2296                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2297                                                     vif);
2298        }
2299
2300        if (!cache) {
2301                struct sk_buff *skb2;
2302                struct ipv6hdr *iph;
2303                struct net_device *dev;
2304                int vif;
2305
2306                if (nowait) {
2307                        read_unlock(&mrt_lock);
2308                        return -EAGAIN;
2309                }
2310
2311                dev = skb->dev;
2312                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313                        read_unlock(&mrt_lock);
2314                        return -ENODEV;
2315                }
2316
2317                /* really correct? */
2318                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319                if (!skb2) {
2320                        read_unlock(&mrt_lock);
2321                        return -ENOMEM;
2322                }
2323
2324                skb_reset_transport_header(skb2);
2325
2326                skb_put(skb2, sizeof(struct ipv6hdr));
2327                skb_reset_network_header(skb2);
2328
2329                iph = ipv6_hdr(skb2);
2330                iph->version = 0;
2331                iph->priority = 0;
2332                iph->flow_lbl[0] = 0;
2333                iph->flow_lbl[1] = 0;
2334                iph->flow_lbl[2] = 0;
2335                iph->payload_len = 0;
2336                iph->nexthdr = IPPROTO_NONE;
2337                iph->hop_limit = 0;
2338                iph->saddr = rt->rt6i_src.addr;
2339                iph->daddr = rt->rt6i_dst.addr;
2340
2341                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2342                read_unlock(&mrt_lock);
2343
2344                return err;
2345        }
2346
2347        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2348                cache->mfc_flags |= MFC_NOTIFY;
2349
2350        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2351        read_unlock(&mrt_lock);
2352        return err;
2353}
2354
2355static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2356                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2357                             int flags)
2358{
2359        struct nlmsghdr *nlh;
2360        struct rtmsg *rtm;
2361        int err;
2362
2363        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2364        if (!nlh)
2365                return -EMSGSIZE;
2366
2367        rtm = nlmsg_data(nlh);
2368        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2369        rtm->rtm_dst_len  = 128;
2370        rtm->rtm_src_len  = 128;
2371        rtm->rtm_tos      = 0;
2372        rtm->rtm_table    = mrt->id;
2373        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2374                goto nla_put_failure;
2375        rtm->rtm_type = RTN_MULTICAST;
2376        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2377        if (c->mfc_flags & MFC_STATIC)
2378                rtm->rtm_protocol = RTPROT_STATIC;
2379        else
2380                rtm->rtm_protocol = RTPROT_MROUTED;
2381        rtm->rtm_flags    = 0;
2382
2383        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2384            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2385                goto nla_put_failure;
2386        err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2387        /* do not break the dump if cache is unresolved */
2388        if (err < 0 && err != -ENOENT)
2389                goto nla_put_failure;
2390
2391        nlmsg_end(skb, nlh);
2392        return 0;
2393
2394nla_put_failure:
2395        nlmsg_cancel(skb, nlh);
2396        return -EMSGSIZE;
2397}
2398
2399static int mr6_msgsize(bool unresolved, int maxvif)
2400{
2401        size_t len =
2402                NLMSG_ALIGN(sizeof(struct rtmsg))
2403                + nla_total_size(4)     /* RTA_TABLE */
2404                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2405                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2406                ;
2407
2408        if (!unresolved)
2409                len = len
2410                      + nla_total_size(4)       /* RTA_IIF */
2411                      + nla_total_size(0)       /* RTA_MULTIPATH */
2412                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2413                                                /* RTA_MFC_STATS */
2414                      + nla_total_size(sizeof(struct rta_mfc_stats))
2415                ;
2416
2417        return len;
2418}
2419
2420static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2421                              int cmd)
2422{
2423        struct net *net = read_pnet(&mrt->net);
2424        struct sk_buff *skb;
2425        int err = -ENOBUFS;
2426
2427        skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2428                        GFP_ATOMIC);
2429        if (!skb)
2430                goto errout;
2431
2432        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2433        if (err < 0)
2434                goto errout;
2435
2436        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2437        return;
2438
2439errout:
2440        kfree_skb(skb);
2441        if (err < 0)
2442                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443}
2444
2445static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446{
2447        struct net *net = sock_net(skb->sk);
2448        struct mr6_table *mrt;
2449        struct mfc6_cache *mfc;
2450        unsigned int t = 0, s_t;
2451        unsigned int h = 0, s_h;
2452        unsigned int e = 0, s_e;
2453
2454        s_t = cb->args[0];
2455        s_h = cb->args[1];
2456        s_e = cb->args[2];
2457
2458        read_lock(&mrt_lock);
2459        ip6mr_for_each_table(mrt, net) {
2460                if (t < s_t)
2461                        goto next_table;
2462                if (t > s_t)
2463                        s_h = 0;
2464                for (h = s_h; h < MFC6_LINES; h++) {
2465                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2466                                if (e < s_e)
2467                                        goto next_entry;
2468                                if (ip6mr_fill_mroute(mrt, skb,
2469                                                      NETLINK_CB(cb->skb).portid,
2470                                                      cb->nlh->nlmsg_seq,
2471                                                      mfc, RTM_NEWROUTE,
2472                                                      NLM_F_MULTI) < 0)
2473                                        goto done;
2474next_entry:
2475                                e++;
2476                        }
2477                        e = s_e = 0;
2478                }
2479                spin_lock_bh(&mfc_unres_lock);
2480                list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2481                        if (e < s_e)
2482                                goto next_entry2;
2483                        if (ip6mr_fill_mroute(mrt, skb,
2484                                              NETLINK_CB(cb->skb).portid,
2485                                              cb->nlh->nlmsg_seq,
2486                                              mfc, RTM_NEWROUTE,
2487                                              NLM_F_MULTI) < 0) {
2488                                spin_unlock_bh(&mfc_unres_lock);
2489                                goto done;
2490                        }
2491next_entry2:
2492                        e++;
2493                }
2494                spin_unlock_bh(&mfc_unres_lock);
2495                e = s_e = 0;
2496                s_h = 0;
2497next_table:
2498                t++;
2499        }
2500done:
2501        read_unlock(&mrt_lock);
2502
2503        cb->args[2] = e;
2504        cb->args[1] = h;
2505        cb->args[0] = t;
2506
2507        return skb->len;
2508}
2509