linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/system.h>
  20#include <asm/uaccess.h>
  21#include <linux/types.h>
  22#include <linux/sched.h>
  23#include <linux/errno.h>
  24#include <linux/timer.h>
  25#include <linux/mm.h>
  26#include <linux/kernel.h>
  27#include <linux/fcntl.h>
  28#include <linux/stat.h>
  29#include <linux/socket.h>
  30#include <linux/inet.h>
  31#include <linux/netdevice.h>
  32#include <linux/inetdevice.h>
  33#include <linux/proc_fs.h>
  34#include <linux/seq_file.h>
  35#include <linux/init.h>
  36#include <linux/slab.h>
  37#include <linux/compat.h>
  38#include <net/protocol.h>
  39#include <linux/skbuff.h>
  40#include <net/sock.h>
  41#include <net/raw.h>
  42#include <linux/notifier.h>
  43#include <linux/if_arp.h>
  44#include <net/checksum.h>
  45#include <net/netlink.h>
  46#include <net/fib_rules.h>
  47
  48#include <net/ipv6.h>
  49#include <net/ip6_route.h>
  50#include <linux/mroute6.h>
  51#include <linux/pim.h>
  52#include <net/addrconf.h>
  53#include <linux/netfilter_ipv6.h>
  54#include <net/ip6_checksum.h>
  55
  56struct mr6_table {
  57        struct list_head        list;
  58#ifdef CONFIG_NET_NS
  59        struct net              *net;
  60#endif
  61        u32                     id;
  62        struct sock             *mroute6_sk;
  63        struct timer_list       ipmr_expire_timer;
  64        struct list_head        mfc6_unres_queue;
  65        struct list_head        mfc6_cache_array[MFC6_LINES];
  66        struct mif_device       vif6_table[MAXMIFS];
  67        int                     maxvif;
  68        atomic_t                cache_resolve_queue_len;
  69        int                     mroute_do_assert;
  70        int                     mroute_do_pim;
  71#ifdef CONFIG_IPV6_PIMSM_V2
  72        int                     mroute_reg_vif_num;
  73#endif
  74};
  75
  76struct ip6mr_rule {
  77        struct fib_rule         common;
  78};
  79
  80struct ip6mr_result {
  81        struct mr6_table        *mrt;
  82};
  83
  84/* Big lock, protecting vif table, mrt cache and mroute socket state.
  85   Note that the changes are semaphored via rtnl_lock.
  86 */
  87
  88static DEFINE_RWLOCK(mrt_lock);
  89
  90/*
  91 *      Multicast router control variables
  92 */
  93
  94#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  95
  96/* Special spinlock for queue of unresolved entries */
  97static DEFINE_SPINLOCK(mfc_unres_lock);
  98
  99/* We return to original Alan's scheme. Hash table of resolved
 100   entries is changed only in process context and protected
 101   with weak lock mrt_lock. Queue of unresolved entries is protected
 102   with strong spinlock mfc_unres_lock.
 103
 104   In this case data path is free of exclusive locks at all.
 105 */
 106
 107static struct kmem_cache *mrt_cachep __read_mostly;
 108
 109static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 110static void ip6mr_free_table(struct mr6_table *mrt);
 111
 112static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 113                          struct sk_buff *skb, struct mfc6_cache *cache);
 114static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 115                              mifi_t mifi, int assert);
 116static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 117                               struct mfc6_cache *c, struct rtmsg *rtm);
 118static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 119                               struct netlink_callback *cb);
 120static void mroute_clean_tables(struct mr6_table *mrt);
 121static void ipmr_expire_process(unsigned long arg);
 122
 123#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 124#define ip6mr_for_each_table(mrt, net) \
 125        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 126
 127static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 128{
 129        struct mr6_table *mrt;
 130
 131        ip6mr_for_each_table(mrt, net) {
 132                if (mrt->id == id)
 133                        return mrt;
 134        }
 135        return NULL;
 136}
 137
 138static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
 139                            struct mr6_table **mrt)
 140{
 141        struct ip6mr_result res;
 142        struct fib_lookup_arg arg = { .result = &res, };
 143        int err;
 144
 145        err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
 146        if (err < 0)
 147                return err;
 148        *mrt = res.mrt;
 149        return 0;
 150}
 151
 152static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 153                             int flags, struct fib_lookup_arg *arg)
 154{
 155        struct ip6mr_result *res = arg->result;
 156        struct mr6_table *mrt;
 157
 158        switch (rule->action) {
 159        case FR_ACT_TO_TBL:
 160                break;
 161        case FR_ACT_UNREACHABLE:
 162                return -ENETUNREACH;
 163        case FR_ACT_PROHIBIT:
 164                return -EACCES;
 165        case FR_ACT_BLACKHOLE:
 166        default:
 167                return -EINVAL;
 168        }
 169
 170        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 171        if (mrt == NULL)
 172                return -EAGAIN;
 173        res->mrt = mrt;
 174        return 0;
 175}
 176
 177static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 178{
 179        return 1;
 180}
 181
 182static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 183        FRA_GENERIC_POLICY,
 184};
 185
 186static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 187                                struct fib_rule_hdr *frh, struct nlattr **tb)
 188{
 189        return 0;
 190}
 191
 192static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 193                              struct nlattr **tb)
 194{
 195        return 1;
 196}
 197
 198static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 199                           struct fib_rule_hdr *frh)
 200{
 201        frh->dst_len = 0;
 202        frh->src_len = 0;
 203        frh->tos     = 0;
 204        return 0;
 205}
 206
 207static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
 208        .family         = RTNL_FAMILY_IP6MR,
 209        .rule_size      = sizeof(struct ip6mr_rule),
 210        .addr_size      = sizeof(struct in6_addr),
 211        .action         = ip6mr_rule_action,
 212        .match          = ip6mr_rule_match,
 213        .configure      = ip6mr_rule_configure,
 214        .compare        = ip6mr_rule_compare,
 215        .default_pref   = fib_default_rule_pref,
 216        .fill           = ip6mr_rule_fill,
 217        .nlgroup        = RTNLGRP_IPV6_RULE,
 218        .policy         = ip6mr_rule_policy,
 219        .owner          = THIS_MODULE,
 220};
 221
 222static int __net_init ip6mr_rules_init(struct net *net)
 223{
 224        struct fib_rules_ops *ops;
 225        struct mr6_table *mrt;
 226        int err;
 227
 228        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 229        if (IS_ERR(ops))
 230                return PTR_ERR(ops);
 231
 232        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 233
 234        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 235        if (mrt == NULL) {
 236                err = -ENOMEM;
 237                goto err1;
 238        }
 239
 240        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 241        if (err < 0)
 242                goto err2;
 243
 244        net->ipv6.mr6_rules_ops = ops;
 245        return 0;
 246
 247err2:
 248        kfree(mrt);
 249err1:
 250        fib_rules_unregister(ops);
 251        return err;
 252}
 253
 254static void __net_exit ip6mr_rules_exit(struct net *net)
 255{
 256        struct mr6_table *mrt, *next;
 257
 258        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 259                list_del(&mrt->list);
 260                ip6mr_free_table(mrt);
 261        }
 262        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 263}
 264#else
 265#define ip6mr_for_each_table(mrt, net) \
 266        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 267
 268static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 269{
 270        return net->ipv6.mrt6;
 271}
 272
 273static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
 274                            struct mr6_table **mrt)
 275{
 276        *mrt = net->ipv6.mrt6;
 277        return 0;
 278}
 279
 280static int __net_init ip6mr_rules_init(struct net *net)
 281{
 282        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 283        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 284}
 285
 286static void __net_exit ip6mr_rules_exit(struct net *net)
 287{
 288        ip6mr_free_table(net->ipv6.mrt6);
 289}
 290#endif
 291
 292static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 293{
 294        struct mr6_table *mrt;
 295        unsigned int i;
 296
 297        mrt = ip6mr_get_table(net, id);
 298        if (mrt != NULL)
 299                return mrt;
 300
 301        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 302        if (mrt == NULL)
 303                return NULL;
 304        mrt->id = id;
 305        write_pnet(&mrt->net, net);
 306
 307        /* Forwarding cache */
 308        for (i = 0; i < MFC6_LINES; i++)
 309                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 310
 311        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 312
 313        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 314                    (unsigned long)mrt);
 315
 316#ifdef CONFIG_IPV6_PIMSM_V2
 317        mrt->mroute_reg_vif_num = -1;
 318#endif
 319#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 320        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 321#endif
 322        return mrt;
 323}
 324
 325static void ip6mr_free_table(struct mr6_table *mrt)
 326{
 327        del_timer(&mrt->ipmr_expire_timer);
 328        mroute_clean_tables(mrt);
 329        kfree(mrt);
 330}
 331
 332#ifdef CONFIG_PROC_FS
 333
 334struct ipmr_mfc_iter {
 335        struct seq_net_private p;
 336        struct mr6_table *mrt;
 337        struct list_head *cache;
 338        int ct;
 339};
 340
 341
 342static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 343                                           struct ipmr_mfc_iter *it, loff_t pos)
 344{
 345        struct mr6_table *mrt = it->mrt;
 346        struct mfc6_cache *mfc;
 347
 348        read_lock(&mrt_lock);
 349        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 350                it->cache = &mrt->mfc6_cache_array[it->ct];
 351                list_for_each_entry(mfc, it->cache, list)
 352                        if (pos-- == 0)
 353                                return mfc;
 354        }
 355        read_unlock(&mrt_lock);
 356
 357        spin_lock_bh(&mfc_unres_lock);
 358        it->cache = &mrt->mfc6_unres_queue;
 359        list_for_each_entry(mfc, it->cache, list)
 360                if (pos-- == 0)
 361                        return mfc;
 362        spin_unlock_bh(&mfc_unres_lock);
 363
 364        it->cache = NULL;
 365        return NULL;
 366}
 367
 368/*
 369 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 370 */
 371
 372struct ipmr_vif_iter {
 373        struct seq_net_private p;
 374        struct mr6_table *mrt;
 375        int ct;
 376};
 377
 378static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 379                                            struct ipmr_vif_iter *iter,
 380                                            loff_t pos)
 381{
 382        struct mr6_table *mrt = iter->mrt;
 383
 384        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 385                if (!MIF_EXISTS(mrt, iter->ct))
 386                        continue;
 387                if (pos-- == 0)
 388                        return &mrt->vif6_table[iter->ct];
 389        }
 390        return NULL;
 391}
 392
 393static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 394        __acquires(mrt_lock)
 395{
 396        struct ipmr_vif_iter *iter = seq->private;
 397        struct net *net = seq_file_net(seq);
 398        struct mr6_table *mrt;
 399
 400        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 401        if (mrt == NULL)
 402                return ERR_PTR(-ENOENT);
 403
 404        iter->mrt = mrt;
 405
 406        read_lock(&mrt_lock);
 407        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 408                : SEQ_START_TOKEN;
 409}
 410
 411static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 412{
 413        struct ipmr_vif_iter *iter = seq->private;
 414        struct net *net = seq_file_net(seq);
 415        struct mr6_table *mrt = iter->mrt;
 416
 417        ++*pos;
 418        if (v == SEQ_START_TOKEN)
 419                return ip6mr_vif_seq_idx(net, iter, 0);
 420
 421        while (++iter->ct < mrt->maxvif) {
 422                if (!MIF_EXISTS(mrt, iter->ct))
 423                        continue;
 424                return &mrt->vif6_table[iter->ct];
 425        }
 426        return NULL;
 427}
 428
 429static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 430        __releases(mrt_lock)
 431{
 432        read_unlock(&mrt_lock);
 433}
 434
 435static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 436{
 437        struct ipmr_vif_iter *iter = seq->private;
 438        struct mr6_table *mrt = iter->mrt;
 439
 440        if (v == SEQ_START_TOKEN) {
 441                seq_puts(seq,
 442                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 443        } else {
 444                const struct mif_device *vif = v;
 445                const char *name = vif->dev ? vif->dev->name : "none";
 446
 447                seq_printf(seq,
 448                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 449                           vif - mrt->vif6_table,
 450                           name, vif->bytes_in, vif->pkt_in,
 451                           vif->bytes_out, vif->pkt_out,
 452                           vif->flags);
 453        }
 454        return 0;
 455}
 456
 457static const struct seq_operations ip6mr_vif_seq_ops = {
 458        .start = ip6mr_vif_seq_start,
 459        .next  = ip6mr_vif_seq_next,
 460        .stop  = ip6mr_vif_seq_stop,
 461        .show  = ip6mr_vif_seq_show,
 462};
 463
 464static int ip6mr_vif_open(struct inode *inode, struct file *file)
 465{
 466        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 467                            sizeof(struct ipmr_vif_iter));
 468}
 469
 470static const struct file_operations ip6mr_vif_fops = {
 471        .owner   = THIS_MODULE,
 472        .open    = ip6mr_vif_open,
 473        .read    = seq_read,
 474        .llseek  = seq_lseek,
 475        .release = seq_release_net,
 476};
 477
 478static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 479{
 480        struct ipmr_mfc_iter *it = seq->private;
 481        struct net *net = seq_file_net(seq);
 482        struct mr6_table *mrt;
 483
 484        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 485        if (mrt == NULL)
 486                return ERR_PTR(-ENOENT);
 487
 488        it->mrt = mrt;
 489        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 490                : SEQ_START_TOKEN;
 491}
 492
 493static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 494{
 495        struct mfc6_cache *mfc = v;
 496        struct ipmr_mfc_iter *it = seq->private;
 497        struct net *net = seq_file_net(seq);
 498        struct mr6_table *mrt = it->mrt;
 499
 500        ++*pos;
 501
 502        if (v == SEQ_START_TOKEN)
 503                return ipmr_mfc_seq_idx(net, seq->private, 0);
 504
 505        if (mfc->list.next != it->cache)
 506                return list_entry(mfc->list.next, struct mfc6_cache, list);
 507
 508        if (it->cache == &mrt->mfc6_unres_queue)
 509                goto end_of_list;
 510
 511        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 512
 513        while (++it->ct < MFC6_LINES) {
 514                it->cache = &mrt->mfc6_cache_array[it->ct];
 515                if (list_empty(it->cache))
 516                        continue;
 517                return list_first_entry(it->cache, struct mfc6_cache, list);
 518        }
 519
 520        /* exhausted cache_array, show unresolved */
 521        read_unlock(&mrt_lock);
 522        it->cache = &mrt->mfc6_unres_queue;
 523        it->ct = 0;
 524
 525        spin_lock_bh(&mfc_unres_lock);
 526        if (!list_empty(it->cache))
 527                return list_first_entry(it->cache, struct mfc6_cache, list);
 528
 529 end_of_list:
 530        spin_unlock_bh(&mfc_unres_lock);
 531        it->cache = NULL;
 532
 533        return NULL;
 534}
 535
 536static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 537{
 538        struct ipmr_mfc_iter *it = seq->private;
 539        struct mr6_table *mrt = it->mrt;
 540
 541        if (it->cache == &mrt->mfc6_unres_queue)
 542                spin_unlock_bh(&mfc_unres_lock);
 543        else if (it->cache == mrt->mfc6_cache_array)
 544                read_unlock(&mrt_lock);
 545}
 546
 547static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 548{
 549        int n;
 550
 551        if (v == SEQ_START_TOKEN) {
 552                seq_puts(seq,
 553                         "Group                            "
 554                         "Origin                           "
 555                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 556        } else {
 557                const struct mfc6_cache *mfc = v;
 558                const struct ipmr_mfc_iter *it = seq->private;
 559                struct mr6_table *mrt = it->mrt;
 560
 561                seq_printf(seq, "%pI6 %pI6 %-3hd",
 562                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 563                           mfc->mf6c_parent);
 564
 565                if (it->cache != &mrt->mfc6_unres_queue) {
 566                        seq_printf(seq, " %8lu %8lu %8lu",
 567                                   mfc->mfc_un.res.pkt,
 568                                   mfc->mfc_un.res.bytes,
 569                                   mfc->mfc_un.res.wrong_if);
 570                        for (n = mfc->mfc_un.res.minvif;
 571                             n < mfc->mfc_un.res.maxvif; n++) {
 572                                if (MIF_EXISTS(mrt, n) &&
 573                                    mfc->mfc_un.res.ttls[n] < 255)
 574                                        seq_printf(seq,
 575                                                   " %2d:%-3d",
 576                                                   n, mfc->mfc_un.res.ttls[n]);
 577                        }
 578                } else {
 579                        /* unresolved mfc_caches don't contain
 580                         * pkt, bytes and wrong_if values
 581                         */
 582                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 583                }
 584                seq_putc(seq, '\n');
 585        }
 586        return 0;
 587}
 588
 589static const struct seq_operations ipmr_mfc_seq_ops = {
 590        .start = ipmr_mfc_seq_start,
 591        .next  = ipmr_mfc_seq_next,
 592        .stop  = ipmr_mfc_seq_stop,
 593        .show  = ipmr_mfc_seq_show,
 594};
 595
 596static int ipmr_mfc_open(struct inode *inode, struct file *file)
 597{
 598        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 599                            sizeof(struct ipmr_mfc_iter));
 600}
 601
 602static const struct file_operations ip6mr_mfc_fops = {
 603        .owner   = THIS_MODULE,
 604        .open    = ipmr_mfc_open,
 605        .read    = seq_read,
 606        .llseek  = seq_lseek,
 607        .release = seq_release_net,
 608};
 609#endif
 610
 611#ifdef CONFIG_IPV6_PIMSM_V2
 612
 613static int pim6_rcv(struct sk_buff *skb)
 614{
 615        struct pimreghdr *pim;
 616        struct ipv6hdr   *encap;
 617        struct net_device  *reg_dev = NULL;
 618        struct net *net = dev_net(skb->dev);
 619        struct mr6_table *mrt;
 620        struct flowi fl = {
 621                .iif    = skb->dev->ifindex,
 622                .mark   = skb->mark,
 623        };
 624        int reg_vif_num;
 625
 626        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 627                goto drop;
 628
 629        pim = (struct pimreghdr *)skb_transport_header(skb);
 630        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 631            (pim->flags & PIM_NULL_REGISTER) ||
 632            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 633                             sizeof(*pim), IPPROTO_PIM,
 634                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 635             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 636                goto drop;
 637
 638        /* check if the inner packet is destined to mcast group */
 639        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 640                                   sizeof(*pim));
 641
 642        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 643            encap->payload_len == 0 ||
 644            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 645                goto drop;
 646
 647        if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
 648                goto drop;
 649        reg_vif_num = mrt->mroute_reg_vif_num;
 650
 651        read_lock(&mrt_lock);
 652        if (reg_vif_num >= 0)
 653                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 654        if (reg_dev)
 655                dev_hold(reg_dev);
 656        read_unlock(&mrt_lock);
 657
 658        if (reg_dev == NULL)
 659                goto drop;
 660
 661        skb->mac_header = skb->network_header;
 662        skb_pull(skb, (u8 *)encap - skb->data);
 663        skb_reset_network_header(skb);
 664        skb->protocol = htons(ETH_P_IPV6);
 665        skb->ip_summed = 0;
 666        skb->pkt_type = PACKET_HOST;
 667
 668        skb_tunnel_rx(skb, reg_dev);
 669
 670        netif_rx(skb);
 671
 672        dev_put(reg_dev);
 673        return 0;
 674 drop:
 675        kfree_skb(skb);
 676        return 0;
 677}
 678
 679static const struct inet6_protocol pim6_protocol = {
 680        .handler        =       pim6_rcv,
 681};
 682
 683/* Service routines creating virtual interfaces: PIMREG */
 684
 685static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 686                                      struct net_device *dev)
 687{
 688        struct net *net = dev_net(dev);
 689        struct mr6_table *mrt;
 690        struct flowi fl = {
 691                .oif            = dev->ifindex,
 692                .iif            = skb->skb_iif,
 693                .mark           = skb->mark,
 694        };
 695        int err;
 696
 697        err = ip6mr_fib_lookup(net, &fl, &mrt);
 698        if (err < 0)
 699                return err;
 700
 701        read_lock(&mrt_lock);
 702        dev->stats.tx_bytes += skb->len;
 703        dev->stats.tx_packets++;
 704        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 705        read_unlock(&mrt_lock);
 706        kfree_skb(skb);
 707        return NETDEV_TX_OK;
 708}
 709
 710static const struct net_device_ops reg_vif_netdev_ops = {
 711        .ndo_start_xmit = reg_vif_xmit,
 712};
 713
 714static void reg_vif_setup(struct net_device *dev)
 715{
 716        dev->type               = ARPHRD_PIMREG;
 717        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 718        dev->flags              = IFF_NOARP;
 719        dev->netdev_ops         = &reg_vif_netdev_ops;
 720        dev->destructor         = free_netdev;
 721        dev->features           |= NETIF_F_NETNS_LOCAL;
 722}
 723
 724static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 725{
 726        struct net_device *dev;
 727        char name[IFNAMSIZ];
 728
 729        if (mrt->id == RT6_TABLE_DFLT)
 730                sprintf(name, "pim6reg");
 731        else
 732                sprintf(name, "pim6reg%u", mrt->id);
 733
 734        dev = alloc_netdev(0, name, reg_vif_setup);
 735        if (dev == NULL)
 736                return NULL;
 737
 738        dev_net_set(dev, net);
 739
 740        if (register_netdevice(dev)) {
 741                free_netdev(dev);
 742                return NULL;
 743        }
 744        dev->iflink = 0;
 745
 746        if (dev_open(dev))
 747                goto failure;
 748
 749        dev_hold(dev);
 750        return dev;
 751
 752failure:
 753        /* allow the register to be completed before unregistering. */
 754        rtnl_unlock();
 755        rtnl_lock();
 756
 757        unregister_netdevice(dev);
 758        return NULL;
 759}
 760#endif
 761
 762/*
 763 *      Delete a VIF entry
 764 */
 765
 766static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 767{
 768        struct mif_device *v;
 769        struct net_device *dev;
 770        struct inet6_dev *in6_dev;
 771
 772        if (vifi < 0 || vifi >= mrt->maxvif)
 773                return -EADDRNOTAVAIL;
 774
 775        v = &mrt->vif6_table[vifi];
 776
 777        write_lock_bh(&mrt_lock);
 778        dev = v->dev;
 779        v->dev = NULL;
 780
 781        if (!dev) {
 782                write_unlock_bh(&mrt_lock);
 783                return -EADDRNOTAVAIL;
 784        }
 785
 786#ifdef CONFIG_IPV6_PIMSM_V2
 787        if (vifi == mrt->mroute_reg_vif_num)
 788                mrt->mroute_reg_vif_num = -1;
 789#endif
 790
 791        if (vifi + 1 == mrt->maxvif) {
 792                int tmp;
 793                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 794                        if (MIF_EXISTS(mrt, tmp))
 795                                break;
 796                }
 797                mrt->maxvif = tmp + 1;
 798        }
 799
 800        write_unlock_bh(&mrt_lock);
 801
 802        dev_set_allmulti(dev, -1);
 803
 804        in6_dev = __in6_dev_get(dev);
 805        if (in6_dev)
 806                in6_dev->cnf.mc_forwarding--;
 807
 808        if (v->flags & MIFF_REGISTER)
 809                unregister_netdevice_queue(dev, head);
 810
 811        dev_put(dev);
 812        return 0;
 813}
 814
 815static inline void ip6mr_cache_free(struct mfc6_cache *c)
 816{
 817        kmem_cache_free(mrt_cachep, c);
 818}
 819
 820/* Destroy an unresolved cache entry, killing queued skbs
 821   and reporting error to netlink readers.
 822 */
 823
 824static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 825{
 826        struct net *net = read_pnet(&mrt->net);
 827        struct sk_buff *skb;
 828
 829        atomic_dec(&mrt->cache_resolve_queue_len);
 830
 831        while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 832                if (ipv6_hdr(skb)->version == 0) {
 833                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 834                        nlh->nlmsg_type = NLMSG_ERROR;
 835                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 836                        skb_trim(skb, nlh->nlmsg_len);
 837                        ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 838                        rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 839                } else
 840                        kfree_skb(skb);
 841        }
 842
 843        ip6mr_cache_free(c);
 844}
 845
 846
 847/* Timer process for all the unresolved queue. */
 848
 849static void ipmr_do_expire_process(struct mr6_table *mrt)
 850{
 851        unsigned long now = jiffies;
 852        unsigned long expires = 10 * HZ;
 853        struct mfc6_cache *c, *next;
 854
 855        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 856                if (time_after(c->mfc_un.unres.expires, now)) {
 857                        /* not yet... */
 858                        unsigned long interval = c->mfc_un.unres.expires - now;
 859                        if (interval < expires)
 860                                expires = interval;
 861                        continue;
 862                }
 863
 864                list_del(&c->list);
 865                ip6mr_destroy_unres(mrt, c);
 866        }
 867
 868        if (!list_empty(&mrt->mfc6_unres_queue))
 869                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 870}
 871
 872static void ipmr_expire_process(unsigned long arg)
 873{
 874        struct mr6_table *mrt = (struct mr6_table *)arg;
 875
 876        if (!spin_trylock(&mfc_unres_lock)) {
 877                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 878                return;
 879        }
 880
 881        if (!list_empty(&mrt->mfc6_unres_queue))
 882                ipmr_do_expire_process(mrt);
 883
 884        spin_unlock(&mfc_unres_lock);
 885}
 886
 887/* Fill oifs list. It is called under write locked mrt_lock. */
 888
 889static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 890                                    unsigned char *ttls)
 891{
 892        int vifi;
 893
 894        cache->mfc_un.res.minvif = MAXMIFS;
 895        cache->mfc_un.res.maxvif = 0;
 896        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 897
 898        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 899                if (MIF_EXISTS(mrt, vifi) &&
 900                    ttls[vifi] && ttls[vifi] < 255) {
 901                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 902                        if (cache->mfc_un.res.minvif > vifi)
 903                                cache->mfc_un.res.minvif = vifi;
 904                        if (cache->mfc_un.res.maxvif <= vifi)
 905                                cache->mfc_un.res.maxvif = vifi + 1;
 906                }
 907        }
 908}
 909
 910static int mif6_add(struct net *net, struct mr6_table *mrt,
 911                    struct mif6ctl *vifc, int mrtsock)
 912{
 913        int vifi = vifc->mif6c_mifi;
 914        struct mif_device *v = &mrt->vif6_table[vifi];
 915        struct net_device *dev;
 916        struct inet6_dev *in6_dev;
 917        int err;
 918
 919        /* Is vif busy ? */
 920        if (MIF_EXISTS(mrt, vifi))
 921                return -EADDRINUSE;
 922
 923        switch (vifc->mif6c_flags) {
 924#ifdef CONFIG_IPV6_PIMSM_V2
 925        case MIFF_REGISTER:
 926                /*
 927                 * Special Purpose VIF in PIM
 928                 * All the packets will be sent to the daemon
 929                 */
 930                if (mrt->mroute_reg_vif_num >= 0)
 931                        return -EADDRINUSE;
 932                dev = ip6mr_reg_vif(net, mrt);
 933                if (!dev)
 934                        return -ENOBUFS;
 935                err = dev_set_allmulti(dev, 1);
 936                if (err) {
 937                        unregister_netdevice(dev);
 938                        dev_put(dev);
 939                        return err;
 940                }
 941                break;
 942#endif
 943        case 0:
 944                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 945                if (!dev)
 946                        return -EADDRNOTAVAIL;
 947                err = dev_set_allmulti(dev, 1);
 948                if (err) {
 949                        dev_put(dev);
 950                        return err;
 951                }
 952                break;
 953        default:
 954                return -EINVAL;
 955        }
 956
 957        in6_dev = __in6_dev_get(dev);
 958        if (in6_dev)
 959                in6_dev->cnf.mc_forwarding++;
 960
 961        /*
 962         *      Fill in the VIF structures
 963         */
 964        v->rate_limit = vifc->vifc_rate_limit;
 965        v->flags = vifc->mif6c_flags;
 966        if (!mrtsock)
 967                v->flags |= VIFF_STATIC;
 968        v->threshold = vifc->vifc_threshold;
 969        v->bytes_in = 0;
 970        v->bytes_out = 0;
 971        v->pkt_in = 0;
 972        v->pkt_out = 0;
 973        v->link = dev->ifindex;
 974        if (v->flags & MIFF_REGISTER)
 975                v->link = dev->iflink;
 976
 977        /* And finish update writing critical data */
 978        write_lock_bh(&mrt_lock);
 979        v->dev = dev;
 980#ifdef CONFIG_IPV6_PIMSM_V2
 981        if (v->flags & MIFF_REGISTER)
 982                mrt->mroute_reg_vif_num = vifi;
 983#endif
 984        if (vifi + 1 > mrt->maxvif)
 985                mrt->maxvif = vifi + 1;
 986        write_unlock_bh(&mrt_lock);
 987        return 0;
 988}
 989
 990static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 991                                           struct in6_addr *origin,
 992                                           struct in6_addr *mcastgrp)
 993{
 994        int line = MFC6_HASH(mcastgrp, origin);
 995        struct mfc6_cache *c;
 996
 997        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 998                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 999                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1000                        return c;
1001        }
1002        return NULL;
1003}
1004
1005/*
1006 *      Allocate a multicast cache entry
1007 */
1008static struct mfc6_cache *ip6mr_cache_alloc(void)
1009{
1010        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1011        if (c == NULL)
1012                return NULL;
1013        c->mfc_un.res.minvif = MAXMIFS;
1014        return c;
1015}
1016
1017static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1018{
1019        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1020        if (c == NULL)
1021                return NULL;
1022        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1023        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1024        return c;
1025}
1026
1027/*
1028 *      A cache entry has gone into a resolved state from queued
1029 */
1030
1031static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1032                                struct mfc6_cache *uc, struct mfc6_cache *c)
1033{
1034        struct sk_buff *skb;
1035
1036        /*
1037         *      Play the pending entries through our router
1038         */
1039
1040        while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1041                if (ipv6_hdr(skb)->version == 0) {
1042                        int err;
1043                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1044
1045                        if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1046                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1047                        } else {
1048                                nlh->nlmsg_type = NLMSG_ERROR;
1049                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1050                                skb_trim(skb, nlh->nlmsg_len);
1051                                ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1052                        }
1053                        err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
1054                } else
1055                        ip6_mr_forward(net, mrt, skb, c);
1056        }
1057}
1058
1059/*
1060 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1061 *      expects the following bizarre scheme.
1062 *
1063 *      Called under mrt_lock.
1064 */
1065
1066static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1067                              mifi_t mifi, int assert)
1068{
1069        struct sk_buff *skb;
1070        struct mrt6msg *msg;
1071        int ret;
1072
1073#ifdef CONFIG_IPV6_PIMSM_V2
1074        if (assert == MRT6MSG_WHOLEPKT)
1075                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1076                                                +sizeof(*msg));
1077        else
1078#endif
1079                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1080
1081        if (!skb)
1082                return -ENOBUFS;
1083
1084        /* I suppose that internal messages
1085         * do not require checksums */
1086
1087        skb->ip_summed = CHECKSUM_UNNECESSARY;
1088
1089#ifdef CONFIG_IPV6_PIMSM_V2
1090        if (assert == MRT6MSG_WHOLEPKT) {
1091                /* Ugly, but we have no choice with this interface.
1092                   Duplicate old header, fix length etc.
1093                   And all this only to mangle msg->im6_msgtype and
1094                   to set msg->im6_mbz to "mbz" :-)
1095                 */
1096                skb_push(skb, -skb_network_offset(pkt));
1097
1098                skb_push(skb, sizeof(*msg));
1099                skb_reset_transport_header(skb);
1100                msg = (struct mrt6msg *)skb_transport_header(skb);
1101                msg->im6_mbz = 0;
1102                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1103                msg->im6_mif = mrt->mroute_reg_vif_num;
1104                msg->im6_pad = 0;
1105                ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1106                ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1107
1108                skb->ip_summed = CHECKSUM_UNNECESSARY;
1109        } else
1110#endif
1111        {
1112        /*
1113         *      Copy the IP header
1114         */
1115
1116        skb_put(skb, sizeof(struct ipv6hdr));
1117        skb_reset_network_header(skb);
1118        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1119
1120        /*
1121         *      Add our header
1122         */
1123        skb_put(skb, sizeof(*msg));
1124        skb_reset_transport_header(skb);
1125        msg = (struct mrt6msg *)skb_transport_header(skb);
1126
1127        msg->im6_mbz = 0;
1128        msg->im6_msgtype = assert;
1129        msg->im6_mif = mifi;
1130        msg->im6_pad = 0;
1131        ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
1132        ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
1133
1134        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1135        skb->ip_summed = CHECKSUM_UNNECESSARY;
1136        }
1137
1138        if (mrt->mroute6_sk == NULL) {
1139                kfree_skb(skb);
1140                return -EINVAL;
1141        }
1142
1143        /*
1144         *      Deliver to user space multicast routing algorithms
1145         */
1146        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1147        if (ret < 0) {
1148                if (net_ratelimit())
1149                        printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
1150                kfree_skb(skb);
1151        }
1152
1153        return ret;
1154}
1155
1156/*
1157 *      Queue a packet for resolution. It gets locked cache entry!
1158 */
1159
1160static int
1161ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1162{
1163        bool found = false;
1164        int err;
1165        struct mfc6_cache *c;
1166
1167        spin_lock_bh(&mfc_unres_lock);
1168        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1169                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1170                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1171                        found = true;
1172                        break;
1173                }
1174        }
1175
1176        if (!found) {
1177                /*
1178                 *      Create a new entry if allowable
1179                 */
1180
1181                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1182                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1183                        spin_unlock_bh(&mfc_unres_lock);
1184
1185                        kfree_skb(skb);
1186                        return -ENOBUFS;
1187                }
1188
1189                /*
1190                 *      Fill in the new cache entry
1191                 */
1192                c->mf6c_parent = -1;
1193                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1194                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1195
1196                /*
1197                 *      Reflect first query at pim6sd
1198                 */
1199                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1200                if (err < 0) {
1201                        /* If the report failed throw the cache entry
1202                           out - Brad Parker
1203                         */
1204                        spin_unlock_bh(&mfc_unres_lock);
1205
1206                        ip6mr_cache_free(c);
1207                        kfree_skb(skb);
1208                        return err;
1209                }
1210
1211                atomic_inc(&mrt->cache_resolve_queue_len);
1212                list_add(&c->list, &mrt->mfc6_unres_queue);
1213
1214                ipmr_do_expire_process(mrt);
1215        }
1216
1217        /*
1218         *      See if we can append the packet
1219         */
1220        if (c->mfc_un.unres.unresolved.qlen > 3) {
1221                kfree_skb(skb);
1222                err = -ENOBUFS;
1223        } else {
1224                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1225                err = 0;
1226        }
1227
1228        spin_unlock_bh(&mfc_unres_lock);
1229        return err;
1230}
1231
1232/*
1233 *      MFC6 cache manipulation by user space
1234 */
1235
1236static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1237{
1238        int line;
1239        struct mfc6_cache *c, *next;
1240
1241        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1242
1243        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1244                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1245                    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1246                        write_lock_bh(&mrt_lock);
1247                        list_del(&c->list);
1248                        write_unlock_bh(&mrt_lock);
1249
1250                        ip6mr_cache_free(c);
1251                        return 0;
1252                }
1253        }
1254        return -ENOENT;
1255}
1256
1257static int ip6mr_device_event(struct notifier_block *this,
1258                              unsigned long event, void *ptr)
1259{
1260        struct net_device *dev = ptr;
1261        struct net *net = dev_net(dev);
1262        struct mr6_table *mrt;
1263        struct mif_device *v;
1264        int ct;
1265        LIST_HEAD(list);
1266
1267        if (event != NETDEV_UNREGISTER)
1268                return NOTIFY_DONE;
1269
1270        ip6mr_for_each_table(mrt, net) {
1271                v = &mrt->vif6_table[0];
1272                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1273                        if (v->dev == dev)
1274                                mif6_delete(mrt, ct, &list);
1275                }
1276        }
1277        unregister_netdevice_many(&list);
1278
1279        return NOTIFY_DONE;
1280}
1281
1282static struct notifier_block ip6_mr_notifier = {
1283        .notifier_call = ip6mr_device_event
1284};
1285
1286/*
1287 *      Setup for IP multicast routing
1288 */
1289
1290static int __net_init ip6mr_net_init(struct net *net)
1291{
1292        int err;
1293
1294        err = ip6mr_rules_init(net);
1295        if (err < 0)
1296                goto fail;
1297
1298#ifdef CONFIG_PROC_FS
1299        err = -ENOMEM;
1300        if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1301                goto proc_vif_fail;
1302        if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1303                goto proc_cache_fail;
1304#endif
1305
1306        return 0;
1307
1308#ifdef CONFIG_PROC_FS
1309proc_cache_fail:
1310        proc_net_remove(net, "ip6_mr_vif");
1311proc_vif_fail:
1312        ip6mr_rules_exit(net);
1313#endif
1314fail:
1315        return err;
1316}
1317
1318static void __net_exit ip6mr_net_exit(struct net *net)
1319{
1320#ifdef CONFIG_PROC_FS
1321        proc_net_remove(net, "ip6_mr_cache");
1322        proc_net_remove(net, "ip6_mr_vif");
1323#endif
1324        ip6mr_rules_exit(net);
1325}
1326
1327static struct pernet_operations ip6mr_net_ops = {
1328        .init = ip6mr_net_init,
1329        .exit = ip6mr_net_exit,
1330};
1331
1332int __init ip6_mr_init(void)
1333{
1334        int err;
1335
1336        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1337                                       sizeof(struct mfc6_cache),
1338                                       0, SLAB_HWCACHE_ALIGN,
1339                                       NULL);
1340        if (!mrt_cachep)
1341                return -ENOMEM;
1342
1343        err = register_pernet_subsys(&ip6mr_net_ops);
1344        if (err)
1345                goto reg_pernet_fail;
1346
1347        err = register_netdevice_notifier(&ip6_mr_notifier);
1348        if (err)
1349                goto reg_notif_fail;
1350#ifdef CONFIG_IPV6_PIMSM_V2
1351        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1352                printk(KERN_ERR "ip6_mr_init: can't add PIM protocol\n");
1353                err = -EAGAIN;
1354                goto add_proto_fail;
1355        }
1356#endif
1357        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1358        return 0;
1359#ifdef CONFIG_IPV6_PIMSM_V2
1360add_proto_fail:
1361        unregister_netdevice_notifier(&ip6_mr_notifier);
1362#endif
1363reg_notif_fail:
1364        unregister_pernet_subsys(&ip6mr_net_ops);
1365reg_pernet_fail:
1366        kmem_cache_destroy(mrt_cachep);
1367        return err;
1368}
1369
1370void ip6_mr_cleanup(void)
1371{
1372        unregister_netdevice_notifier(&ip6_mr_notifier);
1373        unregister_pernet_subsys(&ip6mr_net_ops);
1374        kmem_cache_destroy(mrt_cachep);
1375}
1376
1377static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1378                         struct mf6cctl *mfc, int mrtsock)
1379{
1380        bool found = false;
1381        int line;
1382        struct mfc6_cache *uc, *c;
1383        unsigned char ttls[MAXMIFS];
1384        int i;
1385
1386        if (mfc->mf6cc_parent >= MAXMIFS)
1387                return -ENFILE;
1388
1389        memset(ttls, 255, MAXMIFS);
1390        for (i = 0; i < MAXMIFS; i++) {
1391                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1392                        ttls[i] = 1;
1393
1394        }
1395
1396        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1397
1398        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1399                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1400                    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1401                        found = true;
1402                        break;
1403                }
1404        }
1405
1406        if (found) {
1407                write_lock_bh(&mrt_lock);
1408                c->mf6c_parent = mfc->mf6cc_parent;
1409                ip6mr_update_thresholds(mrt, c, ttls);
1410                if (!mrtsock)
1411                        c->mfc_flags |= MFC_STATIC;
1412                write_unlock_bh(&mrt_lock);
1413                return 0;
1414        }
1415
1416        if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1417                return -EINVAL;
1418
1419        c = ip6mr_cache_alloc();
1420        if (c == NULL)
1421                return -ENOMEM;
1422
1423        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1424        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1425        c->mf6c_parent = mfc->mf6cc_parent;
1426        ip6mr_update_thresholds(mrt, c, ttls);
1427        if (!mrtsock)
1428                c->mfc_flags |= MFC_STATIC;
1429
1430        write_lock_bh(&mrt_lock);
1431        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1432        write_unlock_bh(&mrt_lock);
1433
1434        /*
1435         *      Check to see if we resolved a queued list. If so we
1436         *      need to send on the frames and tidy up.
1437         */
1438        found = false;
1439        spin_lock_bh(&mfc_unres_lock);
1440        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1441                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1442                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1443                        list_del(&uc->list);
1444                        atomic_dec(&mrt->cache_resolve_queue_len);
1445                        found = true;
1446                        break;
1447                }
1448        }
1449        if (list_empty(&mrt->mfc6_unres_queue))
1450                del_timer(&mrt->ipmr_expire_timer);
1451        spin_unlock_bh(&mfc_unres_lock);
1452
1453        if (found) {
1454                ip6mr_cache_resolve(net, mrt, uc, c);
1455                ip6mr_cache_free(uc);
1456        }
1457        return 0;
1458}
1459
1460/*
1461 *      Close the multicast socket, and clear the vif tables etc
1462 */
1463
1464static void mroute_clean_tables(struct mr6_table *mrt)
1465{
1466        int i;
1467        LIST_HEAD(list);
1468        struct mfc6_cache *c, *next;
1469
1470        /*
1471         *      Shut down all active vif entries
1472         */
1473        for (i = 0; i < mrt->maxvif; i++) {
1474                if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1475                        mif6_delete(mrt, i, &list);
1476        }
1477        unregister_netdevice_many(&list);
1478
1479        /*
1480         *      Wipe the cache
1481         */
1482        for (i = 0; i < MFC6_LINES; i++) {
1483                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1484                        if (c->mfc_flags & MFC_STATIC)
1485                                continue;
1486                        write_lock_bh(&mrt_lock);
1487                        list_del(&c->list);
1488                        write_unlock_bh(&mrt_lock);
1489
1490                        ip6mr_cache_free(c);
1491                }
1492        }
1493
1494        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1495                spin_lock_bh(&mfc_unres_lock);
1496                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1497                        list_del(&c->list);
1498                        ip6mr_destroy_unres(mrt, c);
1499                }
1500                spin_unlock_bh(&mfc_unres_lock);
1501        }
1502}
1503
1504static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1505{
1506        int err = 0;
1507        struct net *net = sock_net(sk);
1508
1509        rtnl_lock();
1510        write_lock_bh(&mrt_lock);
1511        if (likely(mrt->mroute6_sk == NULL)) {
1512                mrt->mroute6_sk = sk;
1513                net->ipv6.devconf_all->mc_forwarding++;
1514        }
1515        else
1516                err = -EADDRINUSE;
1517        write_unlock_bh(&mrt_lock);
1518
1519        rtnl_unlock();
1520
1521        return err;
1522}
1523
1524int ip6mr_sk_done(struct sock *sk)
1525{
1526        int err = -EACCES;
1527        struct net *net = sock_net(sk);
1528        struct mr6_table *mrt;
1529
1530        rtnl_lock();
1531        ip6mr_for_each_table(mrt, net) {
1532                if (sk == mrt->mroute6_sk) {
1533                        write_lock_bh(&mrt_lock);
1534                        mrt->mroute6_sk = NULL;
1535                        net->ipv6.devconf_all->mc_forwarding--;
1536                        write_unlock_bh(&mrt_lock);
1537
1538                        mroute_clean_tables(mrt);
1539                        err = 0;
1540                        break;
1541                }
1542        }
1543        rtnl_unlock();
1544
1545        return err;
1546}
1547
1548struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1549{
1550        struct mr6_table *mrt;
1551        struct flowi fl = {
1552                .iif    = skb->skb_iif,
1553                .oif    = skb->dev->ifindex,
1554                .mark   = skb->mark,
1555        };
1556
1557        if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1558                return NULL;
1559
1560        return mrt->mroute6_sk;
1561}
1562
1563/*
1564 *      Socket options and virtual interface manipulation. The whole
1565 *      virtual interface system is a complete heap, but unfortunately
1566 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1567 *      MOSPF/PIM router set up we can clean this up.
1568 */
1569
1570int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1571{
1572        int ret;
1573        struct mif6ctl vif;
1574        struct mf6cctl mfc;
1575        mifi_t mifi;
1576        struct net *net = sock_net(sk);
1577        struct mr6_table *mrt;
1578
1579        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1580        if (mrt == NULL)
1581                return -ENOENT;
1582
1583        if (optname != MRT6_INIT) {
1584                if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1585                        return -EACCES;
1586        }
1587
1588        switch (optname) {
1589        case MRT6_INIT:
1590                if (sk->sk_type != SOCK_RAW ||
1591                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1592                        return -EOPNOTSUPP;
1593                if (optlen < sizeof(int))
1594                        return -EINVAL;
1595
1596                return ip6mr_sk_init(mrt, sk);
1597
1598        case MRT6_DONE:
1599                return ip6mr_sk_done(sk);
1600
1601        case MRT6_ADD_MIF:
1602                if (optlen < sizeof(vif))
1603                        return -EINVAL;
1604                if (copy_from_user(&vif, optval, sizeof(vif)))
1605                        return -EFAULT;
1606                if (vif.mif6c_mifi >= MAXMIFS)
1607                        return -ENFILE;
1608                rtnl_lock();
1609                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1610                rtnl_unlock();
1611                return ret;
1612
1613        case MRT6_DEL_MIF:
1614                if (optlen < sizeof(mifi_t))
1615                        return -EINVAL;
1616                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1617                        return -EFAULT;
1618                rtnl_lock();
1619                ret = mif6_delete(mrt, mifi, NULL);
1620                rtnl_unlock();
1621                return ret;
1622
1623        /*
1624         *      Manipulate the forwarding caches. These live
1625         *      in a sort of kernel/user symbiosis.
1626         */
1627        case MRT6_ADD_MFC:
1628        case MRT6_DEL_MFC:
1629                if (optlen < sizeof(mfc))
1630                        return -EINVAL;
1631                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1632                        return -EFAULT;
1633                rtnl_lock();
1634                if (optname == MRT6_DEL_MFC)
1635                        ret = ip6mr_mfc_delete(mrt, &mfc);
1636                else
1637                        ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1638                rtnl_unlock();
1639                return ret;
1640
1641        /*
1642         *      Control PIM assert (to activate pim will activate assert)
1643         */
1644        case MRT6_ASSERT:
1645        {
1646                int v;
1647                if (get_user(v, (int __user *)optval))
1648                        return -EFAULT;
1649                mrt->mroute_do_assert = !!v;
1650                return 0;
1651        }
1652
1653#ifdef CONFIG_IPV6_PIMSM_V2
1654        case MRT6_PIM:
1655        {
1656                int v;
1657                if (get_user(v, (int __user *)optval))
1658                        return -EFAULT;
1659                v = !!v;
1660                rtnl_lock();
1661                ret = 0;
1662                if (v != mrt->mroute_do_pim) {
1663                        mrt->mroute_do_pim = v;
1664                        mrt->mroute_do_assert = v;
1665                }
1666                rtnl_unlock();
1667                return ret;
1668        }
1669
1670#endif
1671#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1672        case MRT6_TABLE:
1673        {
1674                u32 v;
1675
1676                if (optlen != sizeof(u32))
1677                        return -EINVAL;
1678                if (get_user(v, (u32 __user *)optval))
1679                        return -EFAULT;
1680                if (sk == mrt->mroute6_sk)
1681                        return -EBUSY;
1682
1683                rtnl_lock();
1684                ret = 0;
1685                if (!ip6mr_new_table(net, v))
1686                        ret = -ENOMEM;
1687                raw6_sk(sk)->ip6mr_table = v;
1688                rtnl_unlock();
1689                return ret;
1690        }
1691#endif
1692        /*
1693         *      Spurious command, or MRT6_VERSION which you cannot
1694         *      set.
1695         */
1696        default:
1697                return -ENOPROTOOPT;
1698        }
1699}
1700
1701/*
1702 *      Getsock opt support for the multicast routing system.
1703 */
1704
1705int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1706                          int __user *optlen)
1707{
1708        int olr;
1709        int val;
1710        struct net *net = sock_net(sk);
1711        struct mr6_table *mrt;
1712
1713        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1714        if (mrt == NULL)
1715                return -ENOENT;
1716
1717        switch (optname) {
1718        case MRT6_VERSION:
1719                val = 0x0305;
1720                break;
1721#ifdef CONFIG_IPV6_PIMSM_V2
1722        case MRT6_PIM:
1723                val = mrt->mroute_do_pim;
1724                break;
1725#endif
1726        case MRT6_ASSERT:
1727                val = mrt->mroute_do_assert;
1728                break;
1729        default:
1730                return -ENOPROTOOPT;
1731        }
1732
1733        if (get_user(olr, optlen))
1734                return -EFAULT;
1735
1736        olr = min_t(int, olr, sizeof(int));
1737        if (olr < 0)
1738                return -EINVAL;
1739
1740        if (put_user(olr, optlen))
1741                return -EFAULT;
1742        if (copy_to_user(optval, &val, olr))
1743                return -EFAULT;
1744        return 0;
1745}
1746
1747/*
1748 *      The IP multicast ioctl support routines.
1749 */
1750
1751int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1752{
1753        struct sioc_sg_req6 sr;
1754        struct sioc_mif_req6 vr;
1755        struct mif_device *vif;
1756        struct mfc6_cache *c;
1757        struct net *net = sock_net(sk);
1758        struct mr6_table *mrt;
1759
1760        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1761        if (mrt == NULL)
1762                return -ENOENT;
1763
1764        switch (cmd) {
1765        case SIOCGETMIFCNT_IN6:
1766                if (copy_from_user(&vr, arg, sizeof(vr)))
1767                        return -EFAULT;
1768                if (vr.mifi >= mrt->maxvif)
1769                        return -EINVAL;
1770                read_lock(&mrt_lock);
1771                vif = &mrt->vif6_table[vr.mifi];
1772                if (MIF_EXISTS(mrt, vr.mifi)) {
1773                        vr.icount = vif->pkt_in;
1774                        vr.ocount = vif->pkt_out;
1775                        vr.ibytes = vif->bytes_in;
1776                        vr.obytes = vif->bytes_out;
1777                        read_unlock(&mrt_lock);
1778
1779                        if (copy_to_user(arg, &vr, sizeof(vr)))
1780                                return -EFAULT;
1781                        return 0;
1782                }
1783                read_unlock(&mrt_lock);
1784                return -EADDRNOTAVAIL;
1785        case SIOCGETSGCNT_IN6:
1786                if (copy_from_user(&sr, arg, sizeof(sr)))
1787                        return -EFAULT;
1788
1789                read_lock(&mrt_lock);
1790                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1791                if (c) {
1792                        sr.pktcnt = c->mfc_un.res.pkt;
1793                        sr.bytecnt = c->mfc_un.res.bytes;
1794                        sr.wrong_if = c->mfc_un.res.wrong_if;
1795                        read_unlock(&mrt_lock);
1796
1797                        if (copy_to_user(arg, &sr, sizeof(sr)))
1798                                return -EFAULT;
1799                        return 0;
1800                }
1801                read_unlock(&mrt_lock);
1802                return -EADDRNOTAVAIL;
1803        default:
1804                return -ENOIOCTLCMD;
1805        }
1806}
1807
1808#ifdef CONFIG_COMPAT
1809struct compat_sioc_sg_req6 {
1810        struct sockaddr_in6 src;
1811        struct sockaddr_in6 grp;
1812        compat_ulong_t pktcnt;
1813        compat_ulong_t bytecnt;
1814        compat_ulong_t wrong_if;
1815};
1816
1817struct compat_sioc_mif_req6 {
1818        mifi_t  mifi;
1819        compat_ulong_t icount;
1820        compat_ulong_t ocount;
1821        compat_ulong_t ibytes;
1822        compat_ulong_t obytes;
1823};
1824
1825int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1826{
1827        struct compat_sioc_sg_req6 sr;
1828        struct compat_sioc_mif_req6 vr;
1829        struct mif_device *vif;
1830        struct mfc6_cache *c;
1831        struct net *net = sock_net(sk);
1832        struct mr6_table *mrt;
1833
1834        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1835        if (mrt == NULL)
1836                return -ENOENT;
1837
1838        switch (cmd) {
1839        case SIOCGETMIFCNT_IN6:
1840                if (copy_from_user(&vr, arg, sizeof(vr)))
1841                        return -EFAULT;
1842                if (vr.mifi >= mrt->maxvif)
1843                        return -EINVAL;
1844                read_lock(&mrt_lock);
1845                vif = &mrt->vif6_table[vr.mifi];
1846                if (MIF_EXISTS(mrt, vr.mifi)) {
1847                        vr.icount = vif->pkt_in;
1848                        vr.ocount = vif->pkt_out;
1849                        vr.ibytes = vif->bytes_in;
1850                        vr.obytes = vif->bytes_out;
1851                        read_unlock(&mrt_lock);
1852
1853                        if (copy_to_user(arg, &vr, sizeof(vr)))
1854                                return -EFAULT;
1855                        return 0;
1856                }
1857                read_unlock(&mrt_lock);
1858                return -EADDRNOTAVAIL;
1859        case SIOCGETSGCNT_IN6:
1860                if (copy_from_user(&sr, arg, sizeof(sr)))
1861                        return -EFAULT;
1862
1863                read_lock(&mrt_lock);
1864                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1865                if (c) {
1866                        sr.pktcnt = c->mfc_un.res.pkt;
1867                        sr.bytecnt = c->mfc_un.res.bytes;
1868                        sr.wrong_if = c->mfc_un.res.wrong_if;
1869                        read_unlock(&mrt_lock);
1870
1871                        if (copy_to_user(arg, &sr, sizeof(sr)))
1872                                return -EFAULT;
1873                        return 0;
1874                }
1875                read_unlock(&mrt_lock);
1876                return -EADDRNOTAVAIL;
1877        default:
1878                return -ENOIOCTLCMD;
1879        }
1880}
1881#endif
1882
1883static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1884{
1885        IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1886                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1887        return dst_output(skb);
1888}
1889
1890/*
1891 *      Processing handlers for ip6mr_forward
1892 */
1893
1894static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1895                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1896{
1897        struct ipv6hdr *ipv6h;
1898        struct mif_device *vif = &mrt->vif6_table[vifi];
1899        struct net_device *dev;
1900        struct dst_entry *dst;
1901        struct flowi fl;
1902
1903        if (vif->dev == NULL)
1904                goto out_free;
1905
1906#ifdef CONFIG_IPV6_PIMSM_V2
1907        if (vif->flags & MIFF_REGISTER) {
1908                vif->pkt_out++;
1909                vif->bytes_out += skb->len;
1910                vif->dev->stats.tx_bytes += skb->len;
1911                vif->dev->stats.tx_packets++;
1912                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1913                goto out_free;
1914        }
1915#endif
1916
1917        ipv6h = ipv6_hdr(skb);
1918
1919        fl = (struct flowi) {
1920                .oif = vif->link,
1921                .fl6_dst = ipv6h->daddr,
1922        };
1923
1924        dst = ip6_route_output(net, NULL, &fl);
1925        if (!dst)
1926                goto out_free;
1927
1928        skb_dst_drop(skb);
1929        skb_dst_set(skb, dst);
1930
1931        /*
1932         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1933         * not only before forwarding, but after forwarding on all output
1934         * interfaces. It is clear, if mrouter runs a multicasting
1935         * program, it should receive packets not depending to what interface
1936         * program is joined.
1937         * If we will not make it, the program will have to join on all
1938         * interfaces. On the other hand, multihoming host (or router, but
1939         * not mrouter) cannot join to more than one interface - it will
1940         * result in receiving multiple packets.
1941         */
1942        dev = vif->dev;
1943        skb->dev = dev;
1944        vif->pkt_out++;
1945        vif->bytes_out += skb->len;
1946
1947        /* We are about to write */
1948        /* XXX: extension headers? */
1949        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1950                goto out_free;
1951
1952        ipv6h = ipv6_hdr(skb);
1953        ipv6h->hop_limit--;
1954
1955        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1956
1957        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1958                       ip6mr_forward2_finish);
1959
1960out_free:
1961        kfree_skb(skb);
1962        return 0;
1963}
1964
1965static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1966{
1967        int ct;
1968
1969        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1970                if (mrt->vif6_table[ct].dev == dev)
1971                        break;
1972        }
1973        return ct;
1974}
1975
1976static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1977                          struct sk_buff *skb, struct mfc6_cache *cache)
1978{
1979        int psend = -1;
1980        int vif, ct;
1981
1982        vif = cache->mf6c_parent;
1983        cache->mfc_un.res.pkt++;
1984        cache->mfc_un.res.bytes += skb->len;
1985
1986        /*
1987         * Wrong interface: drop packet and (maybe) send PIM assert.
1988         */
1989        if (mrt->vif6_table[vif].dev != skb->dev) {
1990                int true_vifi;
1991
1992                cache->mfc_un.res.wrong_if++;
1993                true_vifi = ip6mr_find_vif(mrt, skb->dev);
1994
1995                if (true_vifi >= 0 && mrt->mroute_do_assert &&
1996                    /* pimsm uses asserts, when switching from RPT to SPT,
1997                       so that we cannot check that packet arrived on an oif.
1998                       It is bad, but otherwise we would need to move pretty
1999                       large chunk of pimd to kernel. Ough... --ANK
2000                     */
2001                    (mrt->mroute_do_pim ||
2002                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2003                    time_after(jiffies,
2004                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2005                        cache->mfc_un.res.last_assert = jiffies;
2006                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2007                }
2008                goto dont_forward;
2009        }
2010
2011        mrt->vif6_table[vif].pkt_in++;
2012        mrt->vif6_table[vif].bytes_in += skb->len;
2013
2014        /*
2015         *      Forward the frame
2016         */
2017        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2018                if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2019                        if (psend != -1) {
2020                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2021                                if (skb2)
2022                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2023                        }
2024                        psend = ct;
2025                }
2026        }
2027        if (psend != -1) {
2028                ip6mr_forward2(net, mrt, skb, cache, psend);
2029                return 0;
2030        }
2031
2032dont_forward:
2033        kfree_skb(skb);
2034        return 0;
2035}
2036
2037
2038/*
2039 *      Multicast packets for forwarding arrive here
2040 */
2041
2042int ip6_mr_input(struct sk_buff *skb)
2043{
2044        struct mfc6_cache *cache;
2045        struct net *net = dev_net(skb->dev);
2046        struct mr6_table *mrt;
2047        struct flowi fl = {
2048                .iif    = skb->dev->ifindex,
2049                .mark   = skb->mark,
2050        };
2051        int err;
2052
2053        err = ip6mr_fib_lookup(net, &fl, &mrt);
2054        if (err < 0)
2055                return err;
2056
2057        read_lock(&mrt_lock);
2058        cache = ip6mr_cache_find(mrt,
2059                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2060
2061        /*
2062         *      No usable cache entry
2063         */
2064        if (cache == NULL) {
2065                int vif;
2066
2067                vif = ip6mr_find_vif(mrt, skb->dev);
2068                if (vif >= 0) {
2069                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2070                        read_unlock(&mrt_lock);
2071
2072                        return err;
2073                }
2074                read_unlock(&mrt_lock);
2075                kfree_skb(skb);
2076                return -ENODEV;
2077        }
2078
2079        ip6_mr_forward(net, mrt, skb, cache);
2080
2081        read_unlock(&mrt_lock);
2082
2083        return 0;
2084}
2085
2086
2087static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2088                               struct mfc6_cache *c, struct rtmsg *rtm)
2089{
2090        int ct;
2091        struct rtnexthop *nhp;
2092        u8 *b = skb_tail_pointer(skb);
2093        struct rtattr *mp_head;
2094
2095        /* If cache is unresolved, don't try to parse IIF and OIF */
2096        if (c->mf6c_parent >= MAXMIFS)
2097                return -ENOENT;
2098
2099        if (MIF_EXISTS(mrt, c->mf6c_parent))
2100                RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
2101
2102        mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2103
2104        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2105                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2106                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2107                                goto rtattr_failure;
2108                        nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2109                        nhp->rtnh_flags = 0;
2110                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2111                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2112                        nhp->rtnh_len = sizeof(*nhp);
2113                }
2114        }
2115        mp_head->rta_type = RTA_MULTIPATH;
2116        mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2117        rtm->rtm_type = RTN_MULTICAST;
2118        return 1;
2119
2120rtattr_failure:
2121        nlmsg_trim(skb, b);
2122        return -EMSGSIZE;
2123}
2124
2125int ip6mr_get_route(struct net *net,
2126                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2127{
2128        int err;
2129        struct mr6_table *mrt;
2130        struct mfc6_cache *cache;
2131        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2132
2133        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2134        if (mrt == NULL)
2135                return -ENOENT;
2136
2137        read_lock(&mrt_lock);
2138        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2139
2140        if (!cache) {
2141                struct sk_buff *skb2;
2142                struct ipv6hdr *iph;
2143                struct net_device *dev;
2144                int vif;
2145
2146                if (nowait) {
2147                        read_unlock(&mrt_lock);
2148                        return -EAGAIN;
2149                }
2150
2151                dev = skb->dev;
2152                if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2153                        read_unlock(&mrt_lock);
2154                        return -ENODEV;
2155                }
2156
2157                /* really correct? */
2158                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2159                if (!skb2) {
2160                        read_unlock(&mrt_lock);
2161                        return -ENOMEM;
2162                }
2163
2164                skb_reset_transport_header(skb2);
2165
2166                skb_put(skb2, sizeof(struct ipv6hdr));
2167                skb_reset_network_header(skb2);
2168
2169                iph = ipv6_hdr(skb2);
2170                iph->version = 0;
2171                iph->priority = 0;
2172                iph->flow_lbl[0] = 0;
2173                iph->flow_lbl[1] = 0;
2174                iph->flow_lbl[2] = 0;
2175                iph->payload_len = 0;
2176                iph->nexthdr = IPPROTO_NONE;
2177                iph->hop_limit = 0;
2178                ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
2179                ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
2180
2181                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2182                read_unlock(&mrt_lock);
2183
2184                return err;
2185        }
2186
2187        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2188                cache->mfc_flags |= MFC_NOTIFY;
2189
2190        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2191        read_unlock(&mrt_lock);
2192        return err;
2193}
2194
2195static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2196                             u32 pid, u32 seq, struct mfc6_cache *c)
2197{
2198        struct nlmsghdr *nlh;
2199        struct rtmsg *rtm;
2200
2201        nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2202        if (nlh == NULL)
2203                return -EMSGSIZE;
2204
2205        rtm = nlmsg_data(nlh);
2206        rtm->rtm_family   = RTNL_FAMILY_IPMR;
2207        rtm->rtm_dst_len  = 128;
2208        rtm->rtm_src_len  = 128;
2209        rtm->rtm_tos      = 0;
2210        rtm->rtm_table    = mrt->id;
2211        NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2212        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2213        rtm->rtm_protocol = RTPROT_UNSPEC;
2214        rtm->rtm_flags    = 0;
2215
2216        NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2217        NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2218
2219        if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2220                goto nla_put_failure;
2221
2222        return nlmsg_end(skb, nlh);
2223
2224nla_put_failure:
2225        nlmsg_cancel(skb, nlh);
2226        return -EMSGSIZE;
2227}
2228
2229static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2230{
2231        struct net *net = sock_net(skb->sk);
2232        struct mr6_table *mrt;
2233        struct mfc6_cache *mfc;
2234        unsigned int t = 0, s_t;
2235        unsigned int h = 0, s_h;
2236        unsigned int e = 0, s_e;
2237
2238        s_t = cb->args[0];
2239        s_h = cb->args[1];
2240        s_e = cb->args[2];
2241
2242        read_lock(&mrt_lock);
2243        ip6mr_for_each_table(mrt, net) {
2244                if (t < s_t)
2245                        goto next_table;
2246                if (t > s_t)
2247                        s_h = 0;
2248                for (h = s_h; h < MFC6_LINES; h++) {
2249                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2250                                if (e < s_e)
2251                                        goto next_entry;
2252                                if (ip6mr_fill_mroute(mrt, skb,
2253                                                      NETLINK_CB(cb->skb).pid,
2254                                                      cb->nlh->nlmsg_seq,
2255                                                      mfc) < 0)
2256                                        goto done;
2257next_entry:
2258                                e++;
2259                        }
2260                        e = s_e = 0;
2261                }
2262                s_h = 0;
2263next_table:
2264                t++;
2265        }
2266done:
2267        read_unlock(&mrt_lock);
2268
2269        cb->args[2] = e;
2270        cb->args[1] = h;
2271        cb->args[0] = t;
2272
2273        return skb->len;
2274}
2275