linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55#include <linux/netconf.h>
  56
  57struct mr6_table {
  58        struct list_head        list;
  59#ifdef CONFIG_NET_NS
  60        struct net              *net;
  61#endif
  62        u32                     id;
  63        struct sock             *mroute6_sk;
  64        struct timer_list       ipmr_expire_timer;
  65        struct list_head        mfc6_unres_queue;
  66        struct list_head        mfc6_cache_array[MFC6_LINES];
  67        struct mif_device       vif6_table[MAXMIFS];
  68        int                     maxvif;
  69        atomic_t                cache_resolve_queue_len;
  70        bool                    mroute_do_assert;
  71        bool                    mroute_do_pim;
  72#ifdef CONFIG_IPV6_PIMSM_V2
  73        int                     mroute_reg_vif_num;
  74#endif
  75};
  76
  77struct ip6mr_rule {
  78        struct fib_rule         common;
  79};
  80
  81struct ip6mr_result {
  82        struct mr6_table        *mrt;
  83};
  84
  85/* Big lock, protecting vif table, mrt cache and mroute socket state.
  86   Note that the changes are semaphored via rtnl_lock.
  87 */
  88
  89static DEFINE_RWLOCK(mrt_lock);
  90
  91/*
  92 *      Multicast router control variables
  93 */
  94
  95#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  96
  97/* Special spinlock for queue of unresolved entries */
  98static DEFINE_SPINLOCK(mfc_unres_lock);
  99
 100/* We return to original Alan's scheme. Hash table of resolved
 101   entries is changed only in process context and protected
 102   with weak lock mrt_lock. Queue of unresolved entries is protected
 103   with strong spinlock mfc_unres_lock.
 104
 105   In this case data path is free of exclusive locks at all.
 106 */
 107
 108static struct kmem_cache *mrt_cachep __read_mostly;
 109
 110static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 111static void ip6mr_free_table(struct mr6_table *mrt);
 112
 113static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 114                           struct sk_buff *skb, struct mfc6_cache *cache);
 115static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 116                              mifi_t mifi, int assert);
 117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 118                               struct mfc6_cache *c, struct rtmsg *rtm);
 119static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 120                              int cmd);
 121static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 122                               struct netlink_callback *cb);
 123static void mroute_clean_tables(struct mr6_table *mrt);
 124static void ipmr_expire_process(unsigned long arg);
 125
 126#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 127#define ip6mr_for_each_table(mrt, net) \
 128        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 129
 130static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 131{
 132        struct mr6_table *mrt;
 133
 134        ip6mr_for_each_table(mrt, net) {
 135                if (mrt->id == id)
 136                        return mrt;
 137        }
 138        return NULL;
 139}
 140
 141static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 142                            struct mr6_table **mrt)
 143{
 144        int err;
 145        struct ip6mr_result res;
 146        struct fib_lookup_arg arg = {
 147                .result = &res,
 148                .flags = FIB_LOOKUP_NOREF,
 149        };
 150
 151        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 152                               flowi6_to_flowi(flp6), 0, &arg);
 153        if (err < 0)
 154                return err;
 155        *mrt = res.mrt;
 156        return 0;
 157}
 158
 159static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 160                             int flags, struct fib_lookup_arg *arg)
 161{
 162        struct ip6mr_result *res = arg->result;
 163        struct mr6_table *mrt;
 164
 165        switch (rule->action) {
 166        case FR_ACT_TO_TBL:
 167                break;
 168        case FR_ACT_UNREACHABLE:
 169                return -ENETUNREACH;
 170        case FR_ACT_PROHIBIT:
 171                return -EACCES;
 172        case FR_ACT_BLACKHOLE:
 173        default:
 174                return -EINVAL;
 175        }
 176
 177        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 178        if (mrt == NULL)
 179                return -EAGAIN;
 180        res->mrt = mrt;
 181        return 0;
 182}
 183
 184static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 185{
 186        return 1;
 187}
 188
 189static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 190        FRA_GENERIC_POLICY,
 191};
 192
 193static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 194                                struct fib_rule_hdr *frh, struct nlattr **tb)
 195{
 196        return 0;
 197}
 198
 199static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 200                              struct nlattr **tb)
 201{
 202        return 1;
 203}
 204
 205static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 206                           struct fib_rule_hdr *frh)
 207{
 208        frh->dst_len = 0;
 209        frh->src_len = 0;
 210        frh->tos     = 0;
 211        return 0;
 212}
 213
 214static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 215        .family         = RTNL_FAMILY_IP6MR,
 216        .rule_size      = sizeof(struct ip6mr_rule),
 217        .addr_size      = sizeof(struct in6_addr),
 218        .action         = ip6mr_rule_action,
 219        .match          = ip6mr_rule_match,
 220        .configure      = ip6mr_rule_configure,
 221        .compare        = ip6mr_rule_compare,
 222        .default_pref   = fib_default_rule_pref,
 223        .fill           = ip6mr_rule_fill,
 224        .nlgroup        = RTNLGRP_IPV6_RULE,
 225        .policy         = ip6mr_rule_policy,
 226        .owner          = THIS_MODULE,
 227};
 228
 229static int __net_init ip6mr_rules_init(struct net *net)
 230{
 231        struct fib_rules_ops *ops;
 232        struct mr6_table *mrt;
 233        int err;
 234
 235        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 236        if (IS_ERR(ops))
 237                return PTR_ERR(ops);
 238
 239        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 240
 241        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 242        if (mrt == NULL) {
 243                err = -ENOMEM;
 244                goto err1;
 245        }
 246
 247        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 248        if (err < 0)
 249                goto err2;
 250
 251        net->ipv6.mr6_rules_ops = ops;
 252        return 0;
 253
 254err2:
 255        kfree(mrt);
 256err1:
 257        fib_rules_unregister(ops);
 258        return err;
 259}
 260
 261static void __net_exit ip6mr_rules_exit(struct net *net)
 262{
 263        struct mr6_table *mrt, *next;
 264
 265        rtnl_lock();
 266        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 267                list_del(&mrt->list);
 268                ip6mr_free_table(mrt);
 269        }
 270        rtnl_unlock();
 271        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 272}
 273#else
 274#define ip6mr_for_each_table(mrt, net) \
 275        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 276
 277static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 278{
 279        return net->ipv6.mrt6;
 280}
 281
 282static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 283                            struct mr6_table **mrt)
 284{
 285        *mrt = net->ipv6.mrt6;
 286        return 0;
 287}
 288
 289static int __net_init ip6mr_rules_init(struct net *net)
 290{
 291        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 292        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 293}
 294
 295static void __net_exit ip6mr_rules_exit(struct net *net)
 296{
 297        rtnl_lock();
 298        ip6mr_free_table(net->ipv6.mrt6);
 299        net->ipv6.mrt6 = NULL;
 300        rtnl_unlock();
 301}
 302#endif
 303
 304static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 305{
 306        struct mr6_table *mrt;
 307        unsigned int i;
 308
 309        mrt = ip6mr_get_table(net, id);
 310        if (mrt != NULL)
 311                return mrt;
 312
 313        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 314        if (mrt == NULL)
 315                return NULL;
 316        mrt->id = id;
 317        write_pnet(&mrt->net, net);
 318
 319        /* Forwarding cache */
 320        for (i = 0; i < MFC6_LINES; i++)
 321                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 322
 323        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 324
 325        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 326                    (unsigned long)mrt);
 327
 328#ifdef CONFIG_IPV6_PIMSM_V2
 329        mrt->mroute_reg_vif_num = -1;
 330#endif
 331#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 332        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 333#endif
 334        return mrt;
 335}
 336
 337static void ip6mr_free_table(struct mr6_table *mrt)
 338{
 339        del_timer(&mrt->ipmr_expire_timer);
 340        mroute_clean_tables(mrt);
 341        kfree(mrt);
 342}
 343
 344#ifdef CONFIG_PROC_FS
 345
 346struct ipmr_mfc_iter {
 347        struct seq_net_private p;
 348        struct mr6_table *mrt;
 349        struct list_head *cache;
 350        int ct;
 351};
 352
 353
 354static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 355                                           struct ipmr_mfc_iter *it, loff_t pos)
 356{
 357        struct mr6_table *mrt = it->mrt;
 358        struct mfc6_cache *mfc;
 359
 360        read_lock(&mrt_lock);
 361        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 362                it->cache = &mrt->mfc6_cache_array[it->ct];
 363                list_for_each_entry(mfc, it->cache, list)
 364                        if (pos-- == 0)
 365                                return mfc;
 366        }
 367        read_unlock(&mrt_lock);
 368
 369        spin_lock_bh(&mfc_unres_lock);
 370        it->cache = &mrt->mfc6_unres_queue;
 371        list_for_each_entry(mfc, it->cache, list)
 372                if (pos-- == 0)
 373                        return mfc;
 374        spin_unlock_bh(&mfc_unres_lock);
 375
 376        it->cache = NULL;
 377        return NULL;
 378}
 379
 380/*
 381 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 382 */
 383
 384struct ipmr_vif_iter {
 385        struct seq_net_private p;
 386        struct mr6_table *mrt;
 387        int ct;
 388};
 389
 390static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 391                                            struct ipmr_vif_iter *iter,
 392                                            loff_t pos)
 393{
 394        struct mr6_table *mrt = iter->mrt;
 395
 396        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 397                if (!MIF_EXISTS(mrt, iter->ct))
 398                        continue;
 399                if (pos-- == 0)
 400                        return &mrt->vif6_table[iter->ct];
 401        }
 402        return NULL;
 403}
 404
 405static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 406        __acquires(mrt_lock)
 407{
 408        struct ipmr_vif_iter *iter = seq->private;
 409        struct net *net = seq_file_net(seq);
 410        struct mr6_table *mrt;
 411
 412        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 413        if (mrt == NULL)
 414                return ERR_PTR(-ENOENT);
 415
 416        iter->mrt = mrt;
 417
 418        read_lock(&mrt_lock);
 419        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 420                : SEQ_START_TOKEN;
 421}
 422
 423static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 424{
 425        struct ipmr_vif_iter *iter = seq->private;
 426        struct net *net = seq_file_net(seq);
 427        struct mr6_table *mrt = iter->mrt;
 428
 429        ++*pos;
 430        if (v == SEQ_START_TOKEN)
 431                return ip6mr_vif_seq_idx(net, iter, 0);
 432
 433        while (++iter->ct < mrt->maxvif) {
 434                if (!MIF_EXISTS(mrt, iter->ct))
 435                        continue;
 436                return &mrt->vif6_table[iter->ct];
 437        }
 438        return NULL;
 439}
 440
 441static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 442        __releases(mrt_lock)
 443{
 444        read_unlock(&mrt_lock);
 445}
 446
 447static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 448{
 449        struct ipmr_vif_iter *iter = seq->private;
 450        struct mr6_table *mrt = iter->mrt;
 451
 452        if (v == SEQ_START_TOKEN) {
 453                seq_puts(seq,
 454                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 455        } else {
 456                const struct mif_device *vif = v;
 457                const char *name = vif->dev ? vif->dev->name : "none";
 458
 459                seq_printf(seq,
 460                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 461                           vif - mrt->vif6_table,
 462                           name, vif->bytes_in, vif->pkt_in,
 463                           vif->bytes_out, vif->pkt_out,
 464                           vif->flags);
 465        }
 466        return 0;
 467}
 468
 469static const struct seq_operations ip6mr_vif_seq_ops = {
 470        .start = ip6mr_vif_seq_start,
 471        .next  = ip6mr_vif_seq_next,
 472        .stop  = ip6mr_vif_seq_stop,
 473        .show  = ip6mr_vif_seq_show,
 474};
 475
 476static int ip6mr_vif_open(struct inode *inode, struct file *file)
 477{
 478        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 479                            sizeof(struct ipmr_vif_iter));
 480}
 481
 482static const struct file_operations ip6mr_vif_fops = {
 483        .owner   = THIS_MODULE,
 484        .open    = ip6mr_vif_open,
 485        .read    = seq_read,
 486        .llseek  = seq_lseek,
 487        .release = seq_release_net,
 488};
 489
 490static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 491{
 492        struct ipmr_mfc_iter *it = seq->private;
 493        struct net *net = seq_file_net(seq);
 494        struct mr6_table *mrt;
 495
 496        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 497        if (mrt == NULL)
 498                return ERR_PTR(-ENOENT);
 499
 500        it->mrt = mrt;
 501        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 502                : SEQ_START_TOKEN;
 503}
 504
 505static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 506{
 507        struct mfc6_cache *mfc = v;
 508        struct ipmr_mfc_iter *it = seq->private;
 509        struct net *net = seq_file_net(seq);
 510        struct mr6_table *mrt = it->mrt;
 511
 512        ++*pos;
 513
 514        if (v == SEQ_START_TOKEN)
 515                return ipmr_mfc_seq_idx(net, seq->private, 0);
 516
 517        if (mfc->list.next != it->cache)
 518                return list_entry(mfc->list.next, struct mfc6_cache, list);
 519
 520        if (it->cache == &mrt->mfc6_unres_queue)
 521                goto end_of_list;
 522
 523        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 524
 525        while (++it->ct < MFC6_LINES) {
 526                it->cache = &mrt->mfc6_cache_array[it->ct];
 527                if (list_empty(it->cache))
 528                        continue;
 529                return list_first_entry(it->cache, struct mfc6_cache, list);
 530        }
 531
 532        /* exhausted cache_array, show unresolved */
 533        read_unlock(&mrt_lock);
 534        it->cache = &mrt->mfc6_unres_queue;
 535        it->ct = 0;
 536
 537        spin_lock_bh(&mfc_unres_lock);
 538        if (!list_empty(it->cache))
 539                return list_first_entry(it->cache, struct mfc6_cache, list);
 540
 541 end_of_list:
 542        spin_unlock_bh(&mfc_unres_lock);
 543        it->cache = NULL;
 544
 545        return NULL;
 546}
 547
 548static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 549{
 550        struct ipmr_mfc_iter *it = seq->private;
 551        struct mr6_table *mrt = it->mrt;
 552
 553        if (it->cache == &mrt->mfc6_unres_queue)
 554                spin_unlock_bh(&mfc_unres_lock);
 555        else if (it->cache == mrt->mfc6_cache_array)
 556                read_unlock(&mrt_lock);
 557}
 558
 559static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 560{
 561        int n;
 562
 563        if (v == SEQ_START_TOKEN) {
 564                seq_puts(seq,
 565                         "Group                            "
 566                         "Origin                           "
 567                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 568        } else {
 569                const struct mfc6_cache *mfc = v;
 570                const struct ipmr_mfc_iter *it = seq->private;
 571                struct mr6_table *mrt = it->mrt;
 572
 573                seq_printf(seq, "%pI6 %pI6 %-3hd",
 574                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 575                           mfc->mf6c_parent);
 576
 577                if (it->cache != &mrt->mfc6_unres_queue) {
 578                        seq_printf(seq, " %8lu %8lu %8lu",
 579                                   mfc->mfc_un.res.pkt,
 580                                   mfc->mfc_un.res.bytes,
 581                                   mfc->mfc_un.res.wrong_if);
 582                        for (n = mfc->mfc_un.res.minvif;
 583                             n < mfc->mfc_un.res.maxvif; n++) {
 584                                if (MIF_EXISTS(mrt, n) &&
 585                                    mfc->mfc_un.res.ttls[n] < 255)
 586                                        seq_printf(seq,
 587                                                   " %2d:%-3d",
 588                                                   n, mfc->mfc_un.res.ttls[n]);
 589                        }
 590                } else {
 591                        /* unresolved mfc_caches don't contain
 592                         * pkt, bytes and wrong_if values
 593                         */
 594                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 595                }
 596                seq_putc(seq, '\n');
 597        }
 598        return 0;
 599}
 600
 601static const struct seq_operations ipmr_mfc_seq_ops = {
 602        .start = ipmr_mfc_seq_start,
 603        .next  = ipmr_mfc_seq_next,
 604        .stop  = ipmr_mfc_seq_stop,
 605        .show  = ipmr_mfc_seq_show,
 606};
 607
 608static int ipmr_mfc_open(struct inode *inode, struct file *file)
 609{
 610        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 611                            sizeof(struct ipmr_mfc_iter));
 612}
 613
 614static const struct file_operations ip6mr_mfc_fops = {
 615        .owner   = THIS_MODULE,
 616        .open    = ipmr_mfc_open,
 617        .read    = seq_read,
 618        .llseek  = seq_lseek,
 619        .release = seq_release_net,
 620};
 621#endif
 622
 623#ifdef CONFIG_IPV6_PIMSM_V2
 624
 625static int pim6_rcv(struct sk_buff *skb)
 626{
 627        struct pimreghdr *pim;
 628        struct ipv6hdr   *encap;
 629        struct net_device  *reg_dev = NULL;
 630        struct net *net = dev_net(skb->dev);
 631        struct mr6_table *mrt;
 632        struct flowi6 fl6 = {
 633                .flowi6_iif     = skb->dev->ifindex,
 634                .flowi6_mark    = skb->mark,
 635        };
 636        int reg_vif_num;
 637
 638        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 639                goto drop;
 640
 641        pim = (struct pimreghdr *)skb_transport_header(skb);
 642        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 643            (pim->flags & PIM_NULL_REGISTER) ||
 644            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 645                             sizeof(*pim), IPPROTO_PIM,
 646                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 647             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 648                goto drop;
 649
 650        /* check if the inner packet is destined to mcast group */
 651        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 652                                   sizeof(*pim));
 653
 654        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 655            encap->payload_len == 0 ||
 656            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 657                goto drop;
 658
 659        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 660                goto drop;
 661        reg_vif_num = mrt->mroute_reg_vif_num;
 662
 663        read_lock(&mrt_lock);
 664        if (reg_vif_num >= 0)
 665                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 666        if (reg_dev)
 667                dev_hold(reg_dev);
 668        read_unlock(&mrt_lock);
 669
 670        if (reg_dev == NULL)
 671                goto drop;
 672
 673        skb->mac_header = skb->network_header;
 674        skb_pull(skb, (u8 *)encap - skb->data);
 675        skb_reset_network_header(skb);
 676        skb->protocol = htons(ETH_P_IPV6);
 677        skb->ip_summed = CHECKSUM_NONE;
 678
 679        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 680
 681        netif_rx(skb);
 682
 683        dev_put(reg_dev);
 684        return 0;
 685 drop:
 686        kfree_skb(skb);
 687        return 0;
 688}
 689
 690static const struct inet6_protocol pim6_protocol = {
 691        .handler        =       pim6_rcv,
 692};
 693
 694/* Service routines creating virtual interfaces: PIMREG */
 695
 696static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 697                                      struct net_device *dev)
 698{
 699        struct net *net = dev_net(dev);
 700        struct mr6_table *mrt;
 701        struct flowi6 fl6 = {
 702                .flowi6_oif     = dev->ifindex,
 703                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 704                .flowi6_mark    = skb->mark,
 705        };
 706        int err;
 707
 708        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 709        if (err < 0) {
 710                kfree_skb(skb);
 711                return err;
 712        }
 713
 714        read_lock(&mrt_lock);
 715        dev->stats.tx_bytes += skb->len;
 716        dev->stats.tx_packets++;
 717        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 718        read_unlock(&mrt_lock);
 719        kfree_skb(skb);
 720        return NETDEV_TX_OK;
 721}
 722
 723static const struct net_device_ops reg_vif_netdev_ops = {
 724        .ndo_start_xmit = reg_vif_xmit,
 725};
 726
 727static void reg_vif_setup(struct net_device *dev)
 728{
 729        dev->type               = ARPHRD_PIMREG;
 730        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 731        dev->flags              = IFF_NOARP;
 732        dev->netdev_ops         = &reg_vif_netdev_ops;
 733        dev->destructor         = free_netdev;
 734        dev->features           |= NETIF_F_NETNS_LOCAL;
 735}
 736
 737static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 738{
 739        struct net_device *dev;
 740        char name[IFNAMSIZ];
 741
 742        if (mrt->id == RT6_TABLE_DFLT)
 743                sprintf(name, "pim6reg");
 744        else
 745                sprintf(name, "pim6reg%u", mrt->id);
 746
 747        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 748        if (dev == NULL)
 749                return NULL;
 750
 751        dev_net_set(dev, net);
 752
 753        if (register_netdevice(dev)) {
 754                free_netdev(dev);
 755                return NULL;
 756        }
 757        dev->iflink = 0;
 758
 759        if (dev_open(dev))
 760                goto failure;
 761
 762        dev_hold(dev);
 763        return dev;
 764
 765failure:
 766        /* allow the register to be completed before unregistering. */
 767        rtnl_unlock();
 768        rtnl_lock();
 769
 770        unregister_netdevice(dev);
 771        return NULL;
 772}
 773#endif
 774
 775/*
 776 *      Delete a VIF entry
 777 */
 778
 779static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 780{
 781        struct mif_device *v;
 782        struct net_device *dev;
 783        struct inet6_dev *in6_dev;
 784
 785        if (vifi < 0 || vifi >= mrt->maxvif)
 786                return -EADDRNOTAVAIL;
 787
 788        v = &mrt->vif6_table[vifi];
 789
 790        write_lock_bh(&mrt_lock);
 791        dev = v->dev;
 792        v->dev = NULL;
 793
 794        if (!dev) {
 795                write_unlock_bh(&mrt_lock);
 796                return -EADDRNOTAVAIL;
 797        }
 798
 799#ifdef CONFIG_IPV6_PIMSM_V2
 800        if (vifi == mrt->mroute_reg_vif_num)
 801                mrt->mroute_reg_vif_num = -1;
 802#endif
 803
 804        if (vifi + 1 == mrt->maxvif) {
 805                int tmp;
 806                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 807                        if (MIF_EXISTS(mrt, tmp))
 808                                break;
 809                }
 810                mrt->maxvif = tmp + 1;
 811        }
 812
 813        write_unlock_bh(&mrt_lock);
 814
 815        dev_set_allmulti(dev, -1);
 816
 817        in6_dev = __in6_dev_get(dev);
 818        if (in6_dev) {
 819                in6_dev->cnf.mc_forwarding--;
 820                inet6_netconf_notify_devconf(dev_net(dev),
 821                                             NETCONFA_MC_FORWARDING,
 822                                             dev->ifindex, &in6_dev->cnf);
 823        }
 824
 825        if (v->flags & MIFF_REGISTER)
 826                unregister_netdevice_queue(dev, head);
 827
 828        dev_put(dev);
 829        return 0;
 830}
 831
 832static inline void ip6mr_cache_free(struct mfc6_cache *c)
 833{
 834        kmem_cache_free(mrt_cachep, c);
 835}
 836
 837/* Destroy an unresolved cache entry, killing queued skbs
 838   and reporting error to netlink readers.
 839 */
 840
 841static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 842{
 843        struct net *net = read_pnet(&mrt->net);
 844        struct sk_buff *skb;
 845
 846        atomic_dec(&mrt->cache_resolve_queue_len);
 847
 848        while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 849                if (ipv6_hdr(skb)->version == 0) {
 850                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 851                        nlh->nlmsg_type = NLMSG_ERROR;
 852                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 853                        skb_trim(skb, nlh->nlmsg_len);
 854                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 855                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 856                } else
 857                        kfree_skb(skb);
 858        }
 859
 860        ip6mr_cache_free(c);
 861}
 862
 863
 864/* Timer process for all the unresolved queue. */
 865
 866static void ipmr_do_expire_process(struct mr6_table *mrt)
 867{
 868        unsigned long now = jiffies;
 869        unsigned long expires = 10 * HZ;
 870        struct mfc6_cache *c, *next;
 871
 872        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 873                if (time_after(c->mfc_un.unres.expires, now)) {
 874                        /* not yet... */
 875                        unsigned long interval = c->mfc_un.unres.expires - now;
 876                        if (interval < expires)
 877                                expires = interval;
 878                        continue;
 879                }
 880
 881                list_del(&c->list);
 882                mr6_netlink_event(mrt, c, RTM_DELROUTE);
 883                ip6mr_destroy_unres(mrt, c);
 884        }
 885
 886        if (!list_empty(&mrt->mfc6_unres_queue))
 887                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 888}
 889
 890static void ipmr_expire_process(unsigned long arg)
 891{
 892        struct mr6_table *mrt = (struct mr6_table *)arg;
 893
 894        if (!spin_trylock(&mfc_unres_lock)) {
 895                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 896                return;
 897        }
 898
 899        if (!list_empty(&mrt->mfc6_unres_queue))
 900                ipmr_do_expire_process(mrt);
 901
 902        spin_unlock(&mfc_unres_lock);
 903}
 904
 905/* Fill oifs list. It is called under write locked mrt_lock. */
 906
 907static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 908                                    unsigned char *ttls)
 909{
 910        int vifi;
 911
 912        cache->mfc_un.res.minvif = MAXMIFS;
 913        cache->mfc_un.res.maxvif = 0;
 914        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 915
 916        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 917                if (MIF_EXISTS(mrt, vifi) &&
 918                    ttls[vifi] && ttls[vifi] < 255) {
 919                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 920                        if (cache->mfc_un.res.minvif > vifi)
 921                                cache->mfc_un.res.minvif = vifi;
 922                        if (cache->mfc_un.res.maxvif <= vifi)
 923                                cache->mfc_un.res.maxvif = vifi + 1;
 924                }
 925        }
 926}
 927
 928static int mif6_add(struct net *net, struct mr6_table *mrt,
 929                    struct mif6ctl *vifc, int mrtsock)
 930{
 931        int vifi = vifc->mif6c_mifi;
 932        struct mif_device *v = &mrt->vif6_table[vifi];
 933        struct net_device *dev;
 934        struct inet6_dev *in6_dev;
 935        int err;
 936
 937        /* Is vif busy ? */
 938        if (MIF_EXISTS(mrt, vifi))
 939                return -EADDRINUSE;
 940
 941        switch (vifc->mif6c_flags) {
 942#ifdef CONFIG_IPV6_PIMSM_V2
 943        case MIFF_REGISTER:
 944                /*
 945                 * Special Purpose VIF in PIM
 946                 * All the packets will be sent to the daemon
 947                 */
 948                if (mrt->mroute_reg_vif_num >= 0)
 949                        return -EADDRINUSE;
 950                dev = ip6mr_reg_vif(net, mrt);
 951                if (!dev)
 952                        return -ENOBUFS;
 953                err = dev_set_allmulti(dev, 1);
 954                if (err) {
 955                        unregister_netdevice(dev);
 956                        dev_put(dev);
 957                        return err;
 958                }
 959                break;
 960#endif
 961        case 0:
 962                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 963                if (!dev)
 964                        return -EADDRNOTAVAIL;
 965                err = dev_set_allmulti(dev, 1);
 966                if (err) {
 967                        dev_put(dev);
 968                        return err;
 969                }
 970                break;
 971        default:
 972                return -EINVAL;
 973        }
 974
 975        in6_dev = __in6_dev_get(dev);
 976        if (in6_dev) {
 977                in6_dev->cnf.mc_forwarding++;
 978                inet6_netconf_notify_devconf(dev_net(dev),
 979                                             NETCONFA_MC_FORWARDING,
 980                                             dev->ifindex, &in6_dev->cnf);
 981        }
 982
 983        /*
 984         *      Fill in the VIF structures
 985         */
 986        v->rate_limit = vifc->vifc_rate_limit;
 987        v->flags = vifc->mif6c_flags;
 988        if (!mrtsock)
 989                v->flags |= VIFF_STATIC;
 990        v->threshold = vifc->vifc_threshold;
 991        v->bytes_in = 0;
 992        v->bytes_out = 0;
 993        v->pkt_in = 0;
 994        v->pkt_out = 0;
 995        v->link = dev->ifindex;
 996        if (v->flags & MIFF_REGISTER)
 997                v->link = dev->iflink;
 998
 999        /* And finish update writing critical data */
1000        write_lock_bh(&mrt_lock);
1001        v->dev = dev;
1002#ifdef CONFIG_IPV6_PIMSM_V2
1003        if (v->flags & MIFF_REGISTER)
1004                mrt->mroute_reg_vif_num = vifi;
1005#endif
1006        if (vifi + 1 > mrt->maxvif)
1007                mrt->maxvif = vifi + 1;
1008        write_unlock_bh(&mrt_lock);
1009        return 0;
1010}
1011
1012static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1013                                           const struct in6_addr *origin,
1014                                           const struct in6_addr *mcastgrp)
1015{
1016        int line = MFC6_HASH(mcastgrp, origin);
1017        struct mfc6_cache *c;
1018
1019        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1020                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1021                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1022                        return c;
1023        }
1024        return NULL;
1025}
1026
1027/* Look for a (*,*,oif) entry */
1028static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1029                                                      mifi_t mifi)
1030{
1031        int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1032        struct mfc6_cache *c;
1033
1034        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1035                if (ipv6_addr_any(&c->mf6c_origin) &&
1036                    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1037                    (c->mfc_un.res.ttls[mifi] < 255))
1038                        return c;
1039
1040        return NULL;
1041}
1042
1043/* Look for a (*,G) entry */
1044static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1045                                               struct in6_addr *mcastgrp,
1046                                               mifi_t mifi)
1047{
1048        int line = MFC6_HASH(mcastgrp, &in6addr_any);
1049        struct mfc6_cache *c, *proxy;
1050
1051        if (ipv6_addr_any(mcastgrp))
1052                goto skip;
1053
1054        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1055                if (ipv6_addr_any(&c->mf6c_origin) &&
1056                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1057                        if (c->mfc_un.res.ttls[mifi] < 255)
1058                                return c;
1059
1060                        /* It's ok if the mifi is part of the static tree */
1061                        proxy = ip6mr_cache_find_any_parent(mrt,
1062                                                            c->mf6c_parent);
1063                        if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1064                                return c;
1065                }
1066
1067skip:
1068        return ip6mr_cache_find_any_parent(mrt, mifi);
1069}
1070
1071/*
1072 *      Allocate a multicast cache entry
1073 */
1074static struct mfc6_cache *ip6mr_cache_alloc(void)
1075{
1076        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1077        if (c == NULL)
1078                return NULL;
1079        c->mfc_un.res.minvif = MAXMIFS;
1080        return c;
1081}
1082
1083static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1084{
1085        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1086        if (c == NULL)
1087                return NULL;
1088        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1089        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1090        return c;
1091}
1092
1093/*
1094 *      A cache entry has gone into a resolved state from queued
1095 */
1096
1097static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1098                                struct mfc6_cache *uc, struct mfc6_cache *c)
1099{
1100        struct sk_buff *skb;
1101
1102        /*
1103         *      Play the pending entries through our router
1104         */
1105
1106        while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1107                if (ipv6_hdr(skb)->version == 0) {
1108                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1109
1110                        if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1111                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1112                        } else {
1113                                nlh->nlmsg_type = NLMSG_ERROR;
1114                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1115                                skb_trim(skb, nlh->nlmsg_len);
1116                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1117                        }
1118                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1119                } else
1120                        ip6_mr_forward(net, mrt, skb, c);
1121        }
1122}
1123
1124/*
1125 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1126 *      expects the following bizarre scheme.
1127 *
1128 *      Called under mrt_lock.
1129 */
1130
1131static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1132                              mifi_t mifi, int assert)
1133{
1134        struct sk_buff *skb;
1135        struct mrt6msg *msg;
1136        int ret;
1137
1138#ifdef CONFIG_IPV6_PIMSM_V2
1139        if (assert == MRT6MSG_WHOLEPKT)
1140                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1141                                                +sizeof(*msg));
1142        else
1143#endif
1144                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1145
1146        if (!skb)
1147                return -ENOBUFS;
1148
1149        /* I suppose that internal messages
1150         * do not require checksums */
1151
1152        skb->ip_summed = CHECKSUM_UNNECESSARY;
1153
1154#ifdef CONFIG_IPV6_PIMSM_V2
1155        if (assert == MRT6MSG_WHOLEPKT) {
1156                /* Ugly, but we have no choice with this interface.
1157                   Duplicate old header, fix length etc.
1158                   And all this only to mangle msg->im6_msgtype and
1159                   to set msg->im6_mbz to "mbz" :-)
1160                 */
1161                skb_push(skb, -skb_network_offset(pkt));
1162
1163                skb_push(skb, sizeof(*msg));
1164                skb_reset_transport_header(skb);
1165                msg = (struct mrt6msg *)skb_transport_header(skb);
1166                msg->im6_mbz = 0;
1167                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1168                msg->im6_mif = mrt->mroute_reg_vif_num;
1169                msg->im6_pad = 0;
1170                msg->im6_src = ipv6_hdr(pkt)->saddr;
1171                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1172
1173                skb->ip_summed = CHECKSUM_UNNECESSARY;
1174        } else
1175#endif
1176        {
1177        /*
1178         *      Copy the IP header
1179         */
1180
1181        skb_put(skb, sizeof(struct ipv6hdr));
1182        skb_reset_network_header(skb);
1183        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1184
1185        /*
1186         *      Add our header
1187         */
1188        skb_put(skb, sizeof(*msg));
1189        skb_reset_transport_header(skb);
1190        msg = (struct mrt6msg *)skb_transport_header(skb);
1191
1192        msg->im6_mbz = 0;
1193        msg->im6_msgtype = assert;
1194        msg->im6_mif = mifi;
1195        msg->im6_pad = 0;
1196        msg->im6_src = ipv6_hdr(pkt)->saddr;
1197        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1198
1199        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1200        skb->ip_summed = CHECKSUM_UNNECESSARY;
1201        }
1202
1203        if (mrt->mroute6_sk == NULL) {
1204                kfree_skb(skb);
1205                return -EINVAL;
1206        }
1207
1208        /*
1209         *      Deliver to user space multicast routing algorithms
1210         */
1211        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1212        if (ret < 0) {
1213                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1214                kfree_skb(skb);
1215        }
1216
1217        return ret;
1218}
1219
1220/*
1221 *      Queue a packet for resolution. It gets locked cache entry!
1222 */
1223
1224static int
1225ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1226{
1227        bool found = false;
1228        int err;
1229        struct mfc6_cache *c;
1230
1231        spin_lock_bh(&mfc_unres_lock);
1232        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1233                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1234                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1235                        found = true;
1236                        break;
1237                }
1238        }
1239
1240        if (!found) {
1241                /*
1242                 *      Create a new entry if allowable
1243                 */
1244
1245                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1246                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1247                        spin_unlock_bh(&mfc_unres_lock);
1248
1249                        kfree_skb(skb);
1250                        return -ENOBUFS;
1251                }
1252
1253                /*
1254                 *      Fill in the new cache entry
1255                 */
1256                c->mf6c_parent = -1;
1257                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1258                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1259
1260                /*
1261                 *      Reflect first query at pim6sd
1262                 */
1263                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1264                if (err < 0) {
1265                        /* If the report failed throw the cache entry
1266                           out - Brad Parker
1267                         */
1268                        spin_unlock_bh(&mfc_unres_lock);
1269
1270                        ip6mr_cache_free(c);
1271                        kfree_skb(skb);
1272                        return err;
1273                }
1274
1275                atomic_inc(&mrt->cache_resolve_queue_len);
1276                list_add(&c->list, &mrt->mfc6_unres_queue);
1277                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1278
1279                ipmr_do_expire_process(mrt);
1280        }
1281
1282        /*
1283         *      See if we can append the packet
1284         */
1285        if (c->mfc_un.unres.unresolved.qlen > 3) {
1286                kfree_skb(skb);
1287                err = -ENOBUFS;
1288        } else {
1289                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1290                err = 0;
1291        }
1292
1293        spin_unlock_bh(&mfc_unres_lock);
1294        return err;
1295}
1296
1297/*
1298 *      MFC6 cache manipulation by user space
1299 */
1300
1301static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1302                            int parent)
1303{
1304        int line;
1305        struct mfc6_cache *c, *next;
1306
1307        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1308
1309        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1310                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1311                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1312                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1313                    (parent == -1 || parent == c->mf6c_parent)) {
1314                        write_lock_bh(&mrt_lock);
1315                        list_del(&c->list);
1316                        write_unlock_bh(&mrt_lock);
1317
1318                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1319                        ip6mr_cache_free(c);
1320                        return 0;
1321                }
1322        }
1323        return -ENOENT;
1324}
1325
1326static int ip6mr_device_event(struct notifier_block *this,
1327                              unsigned long event, void *ptr)
1328{
1329        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1330        struct net *net = dev_net(dev);
1331        struct mr6_table *mrt;
1332        struct mif_device *v;
1333        int ct;
1334        LIST_HEAD(list);
1335
1336        if (event != NETDEV_UNREGISTER)
1337                return NOTIFY_DONE;
1338
1339        ip6mr_for_each_table(mrt, net) {
1340                v = &mrt->vif6_table[0];
1341                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1342                        if (v->dev == dev)
1343                                mif6_delete(mrt, ct, &list);
1344                }
1345        }
1346        unregister_netdevice_many(&list);
1347
1348        return NOTIFY_DONE;
1349}
1350
1351static struct notifier_block ip6_mr_notifier = {
1352        .notifier_call = ip6mr_device_event
1353};
1354
1355/*
1356 *      Setup for IP multicast routing
1357 */
1358
1359static int __net_init ip6mr_net_init(struct net *net)
1360{
1361        int err;
1362
1363        err = ip6mr_rules_init(net);
1364        if (err < 0)
1365                goto fail;
1366
1367#ifdef CONFIG_PROC_FS
1368        err = -ENOMEM;
1369        if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1370                goto proc_vif_fail;
1371        if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1372                goto proc_cache_fail;
1373#endif
1374
1375        return 0;
1376
1377#ifdef CONFIG_PROC_FS
1378proc_cache_fail:
1379        remove_proc_entry("ip6_mr_vif", net->proc_net);
1380proc_vif_fail:
1381        ip6mr_rules_exit(net);
1382#endif
1383fail:
1384        return err;
1385}
1386
1387static void __net_exit ip6mr_net_exit(struct net *net)
1388{
1389#ifdef CONFIG_PROC_FS
1390        remove_proc_entry("ip6_mr_cache", net->proc_net);
1391        remove_proc_entry("ip6_mr_vif", net->proc_net);
1392#endif
1393        ip6mr_rules_exit(net);
1394}
1395
1396static struct pernet_operations ip6mr_net_ops = {
1397        .init = ip6mr_net_init,
1398        .exit = ip6mr_net_exit,
1399};
1400
1401int __init ip6_mr_init(void)
1402{
1403        int err;
1404
1405        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1406                                       sizeof(struct mfc6_cache),
1407                                       0, SLAB_HWCACHE_ALIGN,
1408                                       NULL);
1409        if (!mrt_cachep)
1410                return -ENOMEM;
1411
1412        err = register_pernet_subsys(&ip6mr_net_ops);
1413        if (err)
1414                goto reg_pernet_fail;
1415
1416        err = register_netdevice_notifier(&ip6_mr_notifier);
1417        if (err)
1418                goto reg_notif_fail;
1419#ifdef CONFIG_IPV6_PIMSM_V2
1420        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421                pr_err("%s: can't add PIM protocol\n", __func__);
1422                err = -EAGAIN;
1423                goto add_proto_fail;
1424        }
1425#endif
1426        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1427                      ip6mr_rtm_dumproute, NULL);
1428        return 0;
1429#ifdef CONFIG_IPV6_PIMSM_V2
1430add_proto_fail:
1431        unregister_netdevice_notifier(&ip6_mr_notifier);
1432#endif
1433reg_notif_fail:
1434        unregister_pernet_subsys(&ip6mr_net_ops);
1435reg_pernet_fail:
1436        kmem_cache_destroy(mrt_cachep);
1437        return err;
1438}
1439
1440void ip6_mr_cleanup(void)
1441{
1442        unregister_netdevice_notifier(&ip6_mr_notifier);
1443        unregister_pernet_subsys(&ip6mr_net_ops);
1444        kmem_cache_destroy(mrt_cachep);
1445}
1446
1447static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1448                         struct mf6cctl *mfc, int mrtsock, int parent)
1449{
1450        bool found = false;
1451        int line;
1452        struct mfc6_cache *uc, *c;
1453        unsigned char ttls[MAXMIFS];
1454        int i;
1455
1456        if (mfc->mf6cc_parent >= MAXMIFS)
1457                return -ENFILE;
1458
1459        memset(ttls, 255, MAXMIFS);
1460        for (i = 0; i < MAXMIFS; i++) {
1461                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1462                        ttls[i] = 1;
1463
1464        }
1465
1466        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1467
1468        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1469                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1470                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1471                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1472                    (parent == -1 || parent == mfc->mf6cc_parent)) {
1473                        found = true;
1474                        break;
1475                }
1476        }
1477
1478        if (found) {
1479                write_lock_bh(&mrt_lock);
1480                c->mf6c_parent = mfc->mf6cc_parent;
1481                ip6mr_update_thresholds(mrt, c, ttls);
1482                if (!mrtsock)
1483                        c->mfc_flags |= MFC_STATIC;
1484                write_unlock_bh(&mrt_lock);
1485                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1486                return 0;
1487        }
1488
1489        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1490            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1491                return -EINVAL;
1492
1493        c = ip6mr_cache_alloc();
1494        if (c == NULL)
1495                return -ENOMEM;
1496
1497        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1498        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1499        c->mf6c_parent = mfc->mf6cc_parent;
1500        ip6mr_update_thresholds(mrt, c, ttls);
1501        if (!mrtsock)
1502                c->mfc_flags |= MFC_STATIC;
1503
1504        write_lock_bh(&mrt_lock);
1505        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1506        write_unlock_bh(&mrt_lock);
1507
1508        /*
1509         *      Check to see if we resolved a queued list. If so we
1510         *      need to send on the frames and tidy up.
1511         */
1512        found = false;
1513        spin_lock_bh(&mfc_unres_lock);
1514        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1515                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1516                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1517                        list_del(&uc->list);
1518                        atomic_dec(&mrt->cache_resolve_queue_len);
1519                        found = true;
1520                        break;
1521                }
1522        }
1523        if (list_empty(&mrt->mfc6_unres_queue))
1524                del_timer(&mrt->ipmr_expire_timer);
1525        spin_unlock_bh(&mfc_unres_lock);
1526
1527        if (found) {
1528                ip6mr_cache_resolve(net, mrt, uc, c);
1529                ip6mr_cache_free(uc);
1530        }
1531        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1532        return 0;
1533}
1534
1535/*
1536 *      Close the multicast socket, and clear the vif tables etc
1537 */
1538
1539static void mroute_clean_tables(struct mr6_table *mrt)
1540{
1541        int i;
1542        LIST_HEAD(list);
1543        struct mfc6_cache *c, *next;
1544
1545        /*
1546         *      Shut down all active vif entries
1547         */
1548        for (i = 0; i < mrt->maxvif; i++) {
1549                if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1550                        mif6_delete(mrt, i, &list);
1551        }
1552        unregister_netdevice_many(&list);
1553
1554        /*
1555         *      Wipe the cache
1556         */
1557        for (i = 0; i < MFC6_LINES; i++) {
1558                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1559                        if (c->mfc_flags & MFC_STATIC)
1560                                continue;
1561                        write_lock_bh(&mrt_lock);
1562                        list_del(&c->list);
1563                        write_unlock_bh(&mrt_lock);
1564
1565                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1566                        ip6mr_cache_free(c);
1567                }
1568        }
1569
1570        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1571                spin_lock_bh(&mfc_unres_lock);
1572                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1573                        list_del(&c->list);
1574                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1575                        ip6mr_destroy_unres(mrt, c);
1576                }
1577                spin_unlock_bh(&mfc_unres_lock);
1578        }
1579}
1580
1581static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1582{
1583        int err = 0;
1584        struct net *net = sock_net(sk);
1585
1586        rtnl_lock();
1587        write_lock_bh(&mrt_lock);
1588        if (likely(mrt->mroute6_sk == NULL)) {
1589                mrt->mroute6_sk = sk;
1590                net->ipv6.devconf_all->mc_forwarding++;
1591                inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1592                                             NETCONFA_IFINDEX_ALL,
1593                                             net->ipv6.devconf_all);
1594        }
1595        else
1596                err = -EADDRINUSE;
1597        write_unlock_bh(&mrt_lock);
1598
1599        rtnl_unlock();
1600
1601        return err;
1602}
1603
1604int ip6mr_sk_done(struct sock *sk)
1605{
1606        int err = -EACCES;
1607        struct net *net = sock_net(sk);
1608        struct mr6_table *mrt;
1609
1610        rtnl_lock();
1611        ip6mr_for_each_table(mrt, net) {
1612                if (sk == mrt->mroute6_sk) {
1613                        write_lock_bh(&mrt_lock);
1614                        mrt->mroute6_sk = NULL;
1615                        net->ipv6.devconf_all->mc_forwarding--;
1616                        inet6_netconf_notify_devconf(net,
1617                                                     NETCONFA_MC_FORWARDING,
1618                                                     NETCONFA_IFINDEX_ALL,
1619                                                     net->ipv6.devconf_all);
1620                        write_unlock_bh(&mrt_lock);
1621
1622                        mroute_clean_tables(mrt);
1623                        err = 0;
1624                        break;
1625                }
1626        }
1627        rtnl_unlock();
1628
1629        return err;
1630}
1631
1632struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1633{
1634        struct mr6_table *mrt;
1635        struct flowi6 fl6 = {
1636                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1637                .flowi6_oif     = skb->dev->ifindex,
1638                .flowi6_mark    = skb->mark,
1639        };
1640
1641        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1642                return NULL;
1643
1644        return mrt->mroute6_sk;
1645}
1646
1647/*
1648 *      Socket options and virtual interface manipulation. The whole
1649 *      virtual interface system is a complete heap, but unfortunately
1650 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1651 *      MOSPF/PIM router set up we can clean this up.
1652 */
1653
1654int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1655{
1656        int ret, parent = 0;
1657        struct mif6ctl vif;
1658        struct mf6cctl mfc;
1659        mifi_t mifi;
1660        struct net *net = sock_net(sk);
1661        struct mr6_table *mrt;
1662
1663        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1664        if (mrt == NULL)
1665                return -ENOENT;
1666
1667        if (optname != MRT6_INIT) {
1668                if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1669                        return -EACCES;
1670        }
1671
1672        switch (optname) {
1673        case MRT6_INIT:
1674                if (sk->sk_type != SOCK_RAW ||
1675                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1676                        return -EOPNOTSUPP;
1677                if (optlen < sizeof(int))
1678                        return -EINVAL;
1679
1680                return ip6mr_sk_init(mrt, sk);
1681
1682        case MRT6_DONE:
1683                return ip6mr_sk_done(sk);
1684
1685        case MRT6_ADD_MIF:
1686                if (optlen < sizeof(vif))
1687                        return -EINVAL;
1688                if (copy_from_user(&vif, optval, sizeof(vif)))
1689                        return -EFAULT;
1690                if (vif.mif6c_mifi >= MAXMIFS)
1691                        return -ENFILE;
1692                rtnl_lock();
1693                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1694                rtnl_unlock();
1695                return ret;
1696
1697        case MRT6_DEL_MIF:
1698                if (optlen < sizeof(mifi_t))
1699                        return -EINVAL;
1700                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1701                        return -EFAULT;
1702                rtnl_lock();
1703                ret = mif6_delete(mrt, mifi, NULL);
1704                rtnl_unlock();
1705                return ret;
1706
1707        /*
1708         *      Manipulate the forwarding caches. These live
1709         *      in a sort of kernel/user symbiosis.
1710         */
1711        case MRT6_ADD_MFC:
1712        case MRT6_DEL_MFC:
1713                parent = -1;
1714        case MRT6_ADD_MFC_PROXY:
1715        case MRT6_DEL_MFC_PROXY:
1716                if (optlen < sizeof(mfc))
1717                        return -EINVAL;
1718                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1719                        return -EFAULT;
1720                if (parent == 0)
1721                        parent = mfc.mf6cc_parent;
1722                rtnl_lock();
1723                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1724                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1725                else
1726                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1727                                            sk == mrt->mroute6_sk, parent);
1728                rtnl_unlock();
1729                return ret;
1730
1731        /*
1732         *      Control PIM assert (to activate pim will activate assert)
1733         */
1734        case MRT6_ASSERT:
1735        {
1736                int v;
1737
1738                if (optlen != sizeof(v))
1739                        return -EINVAL;
1740                if (get_user(v, (int __user *)optval))
1741                        return -EFAULT;
1742                mrt->mroute_do_assert = v;
1743                return 0;
1744        }
1745
1746#ifdef CONFIG_IPV6_PIMSM_V2
1747        case MRT6_PIM:
1748        {
1749                int v;
1750
1751                if (optlen != sizeof(v))
1752                        return -EINVAL;
1753                if (get_user(v, (int __user *)optval))
1754                        return -EFAULT;
1755                v = !!v;
1756                rtnl_lock();
1757                ret = 0;
1758                if (v != mrt->mroute_do_pim) {
1759                        mrt->mroute_do_pim = v;
1760                        mrt->mroute_do_assert = v;
1761                }
1762                rtnl_unlock();
1763                return ret;
1764        }
1765
1766#endif
1767#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1768        case MRT6_TABLE:
1769        {
1770                u32 v;
1771
1772                if (optlen != sizeof(u32))
1773                        return -EINVAL;
1774                if (get_user(v, (u32 __user *)optval))
1775                        return -EFAULT;
1776                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1777                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1778                        return -EINVAL;
1779                if (sk == mrt->mroute6_sk)
1780                        return -EBUSY;
1781
1782                rtnl_lock();
1783                ret = 0;
1784                if (!ip6mr_new_table(net, v))
1785                        ret = -ENOMEM;
1786                raw6_sk(sk)->ip6mr_table = v;
1787                rtnl_unlock();
1788                return ret;
1789        }
1790#endif
1791        /*
1792         *      Spurious command, or MRT6_VERSION which you cannot
1793         *      set.
1794         */
1795        default:
1796                return -ENOPROTOOPT;
1797        }
1798}
1799
1800/*
1801 *      Getsock opt support for the multicast routing system.
1802 */
1803
1804int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1805                          int __user *optlen)
1806{
1807        int olr;
1808        int val;
1809        struct net *net = sock_net(sk);
1810        struct mr6_table *mrt;
1811
1812        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1813        if (mrt == NULL)
1814                return -ENOENT;
1815
1816        switch (optname) {
1817        case MRT6_VERSION:
1818                val = 0x0305;
1819                break;
1820#ifdef CONFIG_IPV6_PIMSM_V2
1821        case MRT6_PIM:
1822                val = mrt->mroute_do_pim;
1823                break;
1824#endif
1825        case MRT6_ASSERT:
1826                val = mrt->mroute_do_assert;
1827                break;
1828        default:
1829                return -ENOPROTOOPT;
1830        }
1831
1832        if (get_user(olr, optlen))
1833                return -EFAULT;
1834
1835        olr = min_t(int, olr, sizeof(int));
1836        if (olr < 0)
1837                return -EINVAL;
1838
1839        if (put_user(olr, optlen))
1840                return -EFAULT;
1841        if (copy_to_user(optval, &val, olr))
1842                return -EFAULT;
1843        return 0;
1844}
1845
1846/*
1847 *      The IP multicast ioctl support routines.
1848 */
1849
1850int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1851{
1852        struct sioc_sg_req6 sr;
1853        struct sioc_mif_req6 vr;
1854        struct mif_device *vif;
1855        struct mfc6_cache *c;
1856        struct net *net = sock_net(sk);
1857        struct mr6_table *mrt;
1858
1859        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1860        if (mrt == NULL)
1861                return -ENOENT;
1862
1863        switch (cmd) {
1864        case SIOCGETMIFCNT_IN6:
1865                if (copy_from_user(&vr, arg, sizeof(vr)))
1866                        return -EFAULT;
1867                if (vr.mifi >= mrt->maxvif)
1868                        return -EINVAL;
1869                read_lock(&mrt_lock);
1870                vif = &mrt->vif6_table[vr.mifi];
1871                if (MIF_EXISTS(mrt, vr.mifi)) {
1872                        vr.icount = vif->pkt_in;
1873                        vr.ocount = vif->pkt_out;
1874                        vr.ibytes = vif->bytes_in;
1875                        vr.obytes = vif->bytes_out;
1876                        read_unlock(&mrt_lock);
1877
1878                        if (copy_to_user(arg, &vr, sizeof(vr)))
1879                                return -EFAULT;
1880                        return 0;
1881                }
1882                read_unlock(&mrt_lock);
1883                return -EADDRNOTAVAIL;
1884        case SIOCGETSGCNT_IN6:
1885                if (copy_from_user(&sr, arg, sizeof(sr)))
1886                        return -EFAULT;
1887
1888                read_lock(&mrt_lock);
1889                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1890                if (c) {
1891                        sr.pktcnt = c->mfc_un.res.pkt;
1892                        sr.bytecnt = c->mfc_un.res.bytes;
1893                        sr.wrong_if = c->mfc_un.res.wrong_if;
1894                        read_unlock(&mrt_lock);
1895
1896                        if (copy_to_user(arg, &sr, sizeof(sr)))
1897                                return -EFAULT;
1898                        return 0;
1899                }
1900                read_unlock(&mrt_lock);
1901                return -EADDRNOTAVAIL;
1902        default:
1903                return -ENOIOCTLCMD;
1904        }
1905}
1906
1907#ifdef CONFIG_COMPAT
1908struct compat_sioc_sg_req6 {
1909        struct sockaddr_in6 src;
1910        struct sockaddr_in6 grp;
1911        compat_ulong_t pktcnt;
1912        compat_ulong_t bytecnt;
1913        compat_ulong_t wrong_if;
1914};
1915
1916struct compat_sioc_mif_req6 {
1917        mifi_t  mifi;
1918        compat_ulong_t icount;
1919        compat_ulong_t ocount;
1920        compat_ulong_t ibytes;
1921        compat_ulong_t obytes;
1922};
1923
1924int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1925{
1926        struct compat_sioc_sg_req6 sr;
1927        struct compat_sioc_mif_req6 vr;
1928        struct mif_device *vif;
1929        struct mfc6_cache *c;
1930        struct net *net = sock_net(sk);
1931        struct mr6_table *mrt;
1932
1933        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1934        if (mrt == NULL)
1935                return -ENOENT;
1936
1937        switch (cmd) {
1938        case SIOCGETMIFCNT_IN6:
1939                if (copy_from_user(&vr, arg, sizeof(vr)))
1940                        return -EFAULT;
1941                if (vr.mifi >= mrt->maxvif)
1942                        return -EINVAL;
1943                read_lock(&mrt_lock);
1944                vif = &mrt->vif6_table[vr.mifi];
1945                if (MIF_EXISTS(mrt, vr.mifi)) {
1946                        vr.icount = vif->pkt_in;
1947                        vr.ocount = vif->pkt_out;
1948                        vr.ibytes = vif->bytes_in;
1949                        vr.obytes = vif->bytes_out;
1950                        read_unlock(&mrt_lock);
1951
1952                        if (copy_to_user(arg, &vr, sizeof(vr)))
1953                                return -EFAULT;
1954                        return 0;
1955                }
1956                read_unlock(&mrt_lock);
1957                return -EADDRNOTAVAIL;
1958        case SIOCGETSGCNT_IN6:
1959                if (copy_from_user(&sr, arg, sizeof(sr)))
1960                        return -EFAULT;
1961
1962                read_lock(&mrt_lock);
1963                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1964                if (c) {
1965                        sr.pktcnt = c->mfc_un.res.pkt;
1966                        sr.bytecnt = c->mfc_un.res.bytes;
1967                        sr.wrong_if = c->mfc_un.res.wrong_if;
1968                        read_unlock(&mrt_lock);
1969
1970                        if (copy_to_user(arg, &sr, sizeof(sr)))
1971                                return -EFAULT;
1972                        return 0;
1973                }
1974                read_unlock(&mrt_lock);
1975                return -EADDRNOTAVAIL;
1976        default:
1977                return -ENOIOCTLCMD;
1978        }
1979}
1980#endif
1981
1982static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1983{
1984        IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1985                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1986        IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1987                         IPSTATS_MIB_OUTOCTETS, skb->len);
1988        return dst_output(skb);
1989}
1990
1991/*
1992 *      Processing handlers for ip6mr_forward
1993 */
1994
1995static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1996                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1997{
1998        struct ipv6hdr *ipv6h;
1999        struct mif_device *vif = &mrt->vif6_table[vifi];
2000        struct net_device *dev;
2001        struct dst_entry *dst;
2002        struct flowi6 fl6;
2003
2004        if (vif->dev == NULL)
2005                goto out_free;
2006
2007#ifdef CONFIG_IPV6_PIMSM_V2
2008        if (vif->flags & MIFF_REGISTER) {
2009                vif->pkt_out++;
2010                vif->bytes_out += skb->len;
2011                vif->dev->stats.tx_bytes += skb->len;
2012                vif->dev->stats.tx_packets++;
2013                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2014                goto out_free;
2015        }
2016#endif
2017
2018        ipv6h = ipv6_hdr(skb);
2019
2020        fl6 = (struct flowi6) {
2021                .flowi6_oif = vif->link,
2022                .daddr = ipv6h->daddr,
2023        };
2024
2025        dst = ip6_route_output(net, NULL, &fl6);
2026        if (dst->error) {
2027                dst_release(dst);
2028                goto out_free;
2029        }
2030
2031        skb_dst_drop(skb);
2032        skb_dst_set(skb, dst);
2033
2034        /*
2035         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2036         * not only before forwarding, but after forwarding on all output
2037         * interfaces. It is clear, if mrouter runs a multicasting
2038         * program, it should receive packets not depending to what interface
2039         * program is joined.
2040         * If we will not make it, the program will have to join on all
2041         * interfaces. On the other hand, multihoming host (or router, but
2042         * not mrouter) cannot join to more than one interface - it will
2043         * result in receiving multiple packets.
2044         */
2045        dev = vif->dev;
2046        skb->dev = dev;
2047        vif->pkt_out++;
2048        vif->bytes_out += skb->len;
2049
2050        /* We are about to write */
2051        /* XXX: extension headers? */
2052        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2053                goto out_free;
2054
2055        ipv6h = ipv6_hdr(skb);
2056        ipv6h->hop_limit--;
2057
2058        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2059
2060        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2061                       ip6mr_forward2_finish);
2062
2063out_free:
2064        kfree_skb(skb);
2065        return 0;
2066}
2067
2068static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2069{
2070        int ct;
2071
2072        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2073                if (mrt->vif6_table[ct].dev == dev)
2074                        break;
2075        }
2076        return ct;
2077}
2078
2079static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2080                           struct sk_buff *skb, struct mfc6_cache *cache)
2081{
2082        int psend = -1;
2083        int vif, ct;
2084        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2085
2086        vif = cache->mf6c_parent;
2087        cache->mfc_un.res.pkt++;
2088        cache->mfc_un.res.bytes += skb->len;
2089
2090        if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2091                struct mfc6_cache *cache_proxy;
2092
2093                /* For an (*,G) entry, we only check that the incomming
2094                 * interface is part of the static tree.
2095                 */
2096                cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2097                if (cache_proxy &&
2098                    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2099                        goto forward;
2100        }
2101
2102        /*
2103         * Wrong interface: drop packet and (maybe) send PIM assert.
2104         */
2105        if (mrt->vif6_table[vif].dev != skb->dev) {
2106                cache->mfc_un.res.wrong_if++;
2107
2108                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2109                    /* pimsm uses asserts, when switching from RPT to SPT,
2110                       so that we cannot check that packet arrived on an oif.
2111                       It is bad, but otherwise we would need to move pretty
2112                       large chunk of pimd to kernel. Ough... --ANK
2113                     */
2114                    (mrt->mroute_do_pim ||
2115                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2116                    time_after(jiffies,
2117                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2118                        cache->mfc_un.res.last_assert = jiffies;
2119                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2120                }
2121                goto dont_forward;
2122        }
2123
2124forward:
2125        mrt->vif6_table[vif].pkt_in++;
2126        mrt->vif6_table[vif].bytes_in += skb->len;
2127
2128        /*
2129         *      Forward the frame
2130         */
2131        if (ipv6_addr_any(&cache->mf6c_origin) &&
2132            ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2133                if (true_vifi >= 0 &&
2134                    true_vifi != cache->mf6c_parent &&
2135                    ipv6_hdr(skb)->hop_limit >
2136                                cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2137                        /* It's an (*,*) entry and the packet is not coming from
2138                         * the upstream: forward the packet to the upstream
2139                         * only.
2140                         */
2141                        psend = cache->mf6c_parent;
2142                        goto last_forward;
2143                }
2144                goto dont_forward;
2145        }
2146        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2147                /* For (*,G) entry, don't forward to the incoming interface */
2148                if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2149                    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2150                        if (psend != -1) {
2151                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2152                                if (skb2)
2153                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2154                        }
2155                        psend = ct;
2156                }
2157        }
2158last_forward:
2159        if (psend != -1) {
2160                ip6mr_forward2(net, mrt, skb, cache, psend);
2161                return;
2162        }
2163
2164dont_forward:
2165        kfree_skb(skb);
2166}
2167
2168
2169/*
2170 *      Multicast packets for forwarding arrive here
2171 */
2172
2173int ip6_mr_input(struct sk_buff *skb)
2174{
2175        struct mfc6_cache *cache;
2176        struct net *net = dev_net(skb->dev);
2177        struct mr6_table *mrt;
2178        struct flowi6 fl6 = {
2179                .flowi6_iif     = skb->dev->ifindex,
2180                .flowi6_mark    = skb->mark,
2181        };
2182        int err;
2183
2184        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2185        if (err < 0) {
2186                kfree_skb(skb);
2187                return err;
2188        }
2189
2190        read_lock(&mrt_lock);
2191        cache = ip6mr_cache_find(mrt,
2192                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2193        if (cache == NULL) {
2194                int vif = ip6mr_find_vif(mrt, skb->dev);
2195
2196                if (vif >= 0)
2197                        cache = ip6mr_cache_find_any(mrt,
2198                                                     &ipv6_hdr(skb)->daddr,
2199                                                     vif);
2200        }
2201
2202        /*
2203         *      No usable cache entry
2204         */
2205        if (cache == NULL) {
2206                int vif;
2207
2208                vif = ip6mr_find_vif(mrt, skb->dev);
2209                if (vif >= 0) {
2210                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2211                        read_unlock(&mrt_lock);
2212
2213                        return err;
2214                }
2215                read_unlock(&mrt_lock);
2216                kfree_skb(skb);
2217                return -ENODEV;
2218        }
2219
2220        ip6_mr_forward(net, mrt, skb, cache);
2221
2222        read_unlock(&mrt_lock);
2223
2224        return 0;
2225}
2226
2227
2228static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2229                               struct mfc6_cache *c, struct rtmsg *rtm)
2230{
2231        int ct;
2232        struct rtnexthop *nhp;
2233        struct nlattr *mp_attr;
2234        struct rta_mfc_stats mfcs;
2235
2236        /* If cache is unresolved, don't try to parse IIF and OIF */
2237        if (c->mf6c_parent >= MAXMIFS)
2238                return -ENOENT;
2239
2240        if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2241            nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2242                return -EMSGSIZE;
2243        mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2244        if (mp_attr == NULL)
2245                return -EMSGSIZE;
2246
2247        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2248                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2249                        nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2250                        if (nhp == NULL) {
2251                                nla_nest_cancel(skb, mp_attr);
2252                                return -EMSGSIZE;
2253                        }
2254
2255                        nhp->rtnh_flags = 0;
2256                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2257                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2258                        nhp->rtnh_len = sizeof(*nhp);
2259                }
2260        }
2261
2262        nla_nest_end(skb, mp_attr);
2263
2264        mfcs.mfcs_packets = c->mfc_un.res.pkt;
2265        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2266        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2267        if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2268                return -EMSGSIZE;
2269
2270        rtm->rtm_type = RTN_MULTICAST;
2271        return 1;
2272}
2273
2274int ip6mr_get_route(struct net *net,
2275                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2276{
2277        int err;
2278        struct mr6_table *mrt;
2279        struct mfc6_cache *cache;
2280        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2281
2282        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2283        if (mrt == NULL)
2284                return -ENOENT;
2285
2286        read_lock(&mrt_lock);
2287        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2288        if (!cache && skb->dev) {
2289                int vif = ip6mr_find_vif(mrt, skb->dev);
2290
2291                if (vif >= 0)
2292                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2293                                                     vif);
2294        }
2295
2296        if (!cache) {
2297                struct sk_buff *skb2;
2298                struct ipv6hdr *iph;
2299                struct net_device *dev;
2300                int vif;
2301
2302                if (nowait) {
2303                        read_unlock(&mrt_lock);
2304                        return -EAGAIN;
2305                }
2306
2307                dev = skb->dev;
2308                if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2309                        read_unlock(&mrt_lock);
2310                        return -ENODEV;
2311                }
2312
2313                /* really correct? */
2314                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2315                if (!skb2) {
2316                        read_unlock(&mrt_lock);
2317                        return -ENOMEM;
2318                }
2319
2320                skb_reset_transport_header(skb2);
2321
2322                skb_put(skb2, sizeof(struct ipv6hdr));
2323                skb_reset_network_header(skb2);
2324
2325                iph = ipv6_hdr(skb2);
2326                iph->version = 0;
2327                iph->priority = 0;
2328                iph->flow_lbl[0] = 0;
2329                iph->flow_lbl[1] = 0;
2330                iph->flow_lbl[2] = 0;
2331                iph->payload_len = 0;
2332                iph->nexthdr = IPPROTO_NONE;
2333                iph->hop_limit = 0;
2334                iph->saddr = rt->rt6i_src.addr;
2335                iph->daddr = rt->rt6i_dst.addr;
2336
2337                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2338                read_unlock(&mrt_lock);
2339
2340                return err;
2341        }
2342
2343        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2344                cache->mfc_flags |= MFC_NOTIFY;
2345
2346        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2347        read_unlock(&mrt_lock);
2348        return err;
2349}
2350
2351static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2352                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2353                             int flags)
2354{
2355        struct nlmsghdr *nlh;
2356        struct rtmsg *rtm;
2357        int err;
2358
2359        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2360        if (nlh == NULL)
2361                return -EMSGSIZE;
2362
2363        rtm = nlmsg_data(nlh);
2364        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2365        rtm->rtm_dst_len  = 128;
2366        rtm->rtm_src_len  = 128;
2367        rtm->rtm_tos      = 0;
2368        rtm->rtm_table    = mrt->id;
2369        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2370                goto nla_put_failure;
2371        rtm->rtm_type = RTN_MULTICAST;
2372        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2373        if (c->mfc_flags & MFC_STATIC)
2374                rtm->rtm_protocol = RTPROT_STATIC;
2375        else
2376                rtm->rtm_protocol = RTPROT_MROUTED;
2377        rtm->rtm_flags    = 0;
2378
2379        if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2380            nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2381                goto nla_put_failure;
2382        err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2383        /* do not break the dump if cache is unresolved */
2384        if (err < 0 && err != -ENOENT)
2385                goto nla_put_failure;
2386
2387        return nlmsg_end(skb, nlh);
2388
2389nla_put_failure:
2390        nlmsg_cancel(skb, nlh);
2391        return -EMSGSIZE;
2392}
2393
2394static int mr6_msgsize(bool unresolved, int maxvif)
2395{
2396        size_t len =
2397                NLMSG_ALIGN(sizeof(struct rtmsg))
2398                + nla_total_size(4)     /* RTA_TABLE */
2399                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2400                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2401                ;
2402
2403        if (!unresolved)
2404                len = len
2405                      + nla_total_size(4)       /* RTA_IIF */
2406                      + nla_total_size(0)       /* RTA_MULTIPATH */
2407                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2408                                                /* RTA_MFC_STATS */
2409                      + nla_total_size(sizeof(struct rta_mfc_stats))
2410                ;
2411
2412        return len;
2413}
2414
2415static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2416                              int cmd)
2417{
2418        struct net *net = read_pnet(&mrt->net);
2419        struct sk_buff *skb;
2420        int err = -ENOBUFS;
2421
2422        skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2423                        GFP_ATOMIC);
2424        if (skb == NULL)
2425                goto errout;
2426
2427        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2428        if (err < 0)
2429                goto errout;
2430
2431        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2432        return;
2433
2434errout:
2435        kfree_skb(skb);
2436        if (err < 0)
2437                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2438}
2439
2440static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2441{
2442        struct net *net = sock_net(skb->sk);
2443        struct mr6_table *mrt;
2444        struct mfc6_cache *mfc;
2445        unsigned int t = 0, s_t;
2446        unsigned int h = 0, s_h;
2447        unsigned int e = 0, s_e;
2448
2449        s_t = cb->args[0];
2450        s_h = cb->args[1];
2451        s_e = cb->args[2];
2452
2453        read_lock(&mrt_lock);
2454        ip6mr_for_each_table(mrt, net) {
2455                if (t < s_t)
2456                        goto next_table;
2457                if (t > s_t)
2458                        s_h = 0;
2459                for (h = s_h; h < MFC6_LINES; h++) {
2460                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2461                                if (e < s_e)
2462                                        goto next_entry;
2463                                if (ip6mr_fill_mroute(mrt, skb,
2464                                                      NETLINK_CB(cb->skb).portid,
2465                                                      cb->nlh->nlmsg_seq,
2466                                                      mfc, RTM_NEWROUTE,
2467                                                      NLM_F_MULTI) < 0)
2468                                        goto done;
2469next_entry:
2470                                e++;
2471                        }
2472                        e = s_e = 0;
2473                }
2474                spin_lock_bh(&mfc_unres_lock);
2475                list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2476                        if (e < s_e)
2477                                goto next_entry2;
2478                        if (ip6mr_fill_mroute(mrt, skb,
2479                                              NETLINK_CB(cb->skb).portid,
2480                                              cb->nlh->nlmsg_seq,
2481                                              mfc, RTM_NEWROUTE,
2482                                              NLM_F_MULTI) < 0) {
2483                                spin_unlock_bh(&mfc_unres_lock);
2484                                goto done;
2485                        }
2486next_entry2:
2487                        e++;
2488                }
2489                spin_unlock_bh(&mfc_unres_lock);
2490                e = s_e = 0;
2491                s_h = 0;
2492next_table:
2493                t++;
2494        }
2495done:
2496        read_unlock(&mrt_lock);
2497
2498        cb->args[2] = e;
2499        cb->args[1] = h;
2500        cb->args[0] = t;
2501
2502        return skb->len;
2503}
2504