linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55#include <linux/netconf.h>
  56
  57struct mr6_table {
  58        struct list_head        list;
  59#ifdef CONFIG_NET_NS
  60        struct net              *net;
  61#endif
  62        u32                     id;
  63        struct sock             *mroute6_sk;
  64        struct timer_list       ipmr_expire_timer;
  65        struct list_head        mfc6_unres_queue;
  66        struct list_head        mfc6_cache_array[MFC6_LINES];
  67        struct mif_device       vif6_table[MAXMIFS];
  68        int                     maxvif;
  69        atomic_t                cache_resolve_queue_len;
  70        bool                    mroute_do_assert;
  71        bool                    mroute_do_pim;
  72#ifdef CONFIG_IPV6_PIMSM_V2
  73        int                     mroute_reg_vif_num;
  74#endif
  75};
  76
  77struct ip6mr_rule {
  78        struct fib_rule         common;
  79};
  80
  81struct ip6mr_result {
  82        struct mr6_table        *mrt;
  83};
  84
  85/* Big lock, protecting vif table, mrt cache and mroute socket state.
  86   Note that the changes are semaphored via rtnl_lock.
  87 */
  88
  89static DEFINE_RWLOCK(mrt_lock);
  90
  91/*
  92 *      Multicast router control variables
  93 */
  94
  95#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  96
  97/* Special spinlock for queue of unresolved entries */
  98static DEFINE_SPINLOCK(mfc_unres_lock);
  99
 100/* We return to original Alan's scheme. Hash table of resolved
 101   entries is changed only in process context and protected
 102   with weak lock mrt_lock. Queue of unresolved entries is protected
 103   with strong spinlock mfc_unres_lock.
 104
 105   In this case data path is free of exclusive locks at all.
 106 */
 107
 108static struct kmem_cache *mrt_cachep __read_mostly;
 109
 110static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 111static void ip6mr_free_table(struct mr6_table *mrt);
 112
 113static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 114                           struct sk_buff *skb, struct mfc6_cache *cache);
 115static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 116                              mifi_t mifi, int assert);
 117static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 118                               struct mfc6_cache *c, struct rtmsg *rtm);
 119static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 120                              int cmd);
 121static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 122                               struct netlink_callback *cb);
 123static void mroute_clean_tables(struct mr6_table *mrt);
 124static void ipmr_expire_process(unsigned long arg);
 125
 126#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 127#define ip6mr_for_each_table(mrt, net) \
 128        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 129
 130static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 131{
 132        struct mr6_table *mrt;
 133
 134        ip6mr_for_each_table(mrt, net) {
 135                if (mrt->id == id)
 136                        return mrt;
 137        }
 138        return NULL;
 139}
 140
 141static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 142                            struct mr6_table **mrt)
 143{
 144        int err;
 145        struct ip6mr_result res;
 146        struct fib_lookup_arg arg = {
 147                .result = &res,
 148                .flags = FIB_LOOKUP_NOREF,
 149        };
 150
 151        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 152                               flowi6_to_flowi(flp6), 0, &arg);
 153        if (err < 0)
 154                return err;
 155        *mrt = res.mrt;
 156        return 0;
 157}
 158
 159static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 160                             int flags, struct fib_lookup_arg *arg)
 161{
 162        struct ip6mr_result *res = arg->result;
 163        struct mr6_table *mrt;
 164
 165        switch (rule->action) {
 166        case FR_ACT_TO_TBL:
 167                break;
 168        case FR_ACT_UNREACHABLE:
 169                return -ENETUNREACH;
 170        case FR_ACT_PROHIBIT:
 171                return -EACCES;
 172        case FR_ACT_BLACKHOLE:
 173        default:
 174                return -EINVAL;
 175        }
 176
 177        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 178        if (mrt == NULL)
 179                return -EAGAIN;
 180        res->mrt = mrt;
 181        return 0;
 182}
 183
 184static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 185{
 186        return 1;
 187}
 188
 189static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 190        FRA_GENERIC_POLICY,
 191};
 192
 193static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 194                                struct fib_rule_hdr *frh, struct nlattr **tb)
 195{
 196        return 0;
 197}
 198
 199static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 200                              struct nlattr **tb)
 201{
 202        return 1;
 203}
 204
 205static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 206                           struct fib_rule_hdr *frh)
 207{
 208        frh->dst_len = 0;
 209        frh->src_len = 0;
 210        frh->tos     = 0;
 211        return 0;
 212}
 213
 214static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 215        .family         = RTNL_FAMILY_IP6MR,
 216        .rule_size      = sizeof(struct ip6mr_rule),
 217        .addr_size      = sizeof(struct in6_addr),
 218        .action         = ip6mr_rule_action,
 219        .match          = ip6mr_rule_match,
 220        .configure      = ip6mr_rule_configure,
 221        .compare        = ip6mr_rule_compare,
 222        .default_pref   = fib_default_rule_pref,
 223        .fill           = ip6mr_rule_fill,
 224        .nlgroup        = RTNLGRP_IPV6_RULE,
 225        .policy         = ip6mr_rule_policy,
 226        .owner          = THIS_MODULE,
 227};
 228
 229static int __net_init ip6mr_rules_init(struct net *net)
 230{
 231        struct fib_rules_ops *ops;
 232        struct mr6_table *mrt;
 233        int err;
 234
 235        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 236        if (IS_ERR(ops))
 237                return PTR_ERR(ops);
 238
 239        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 240
 241        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 242        if (mrt == NULL) {
 243                err = -ENOMEM;
 244                goto err1;
 245        }
 246
 247        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 248        if (err < 0)
 249                goto err2;
 250
 251        net->ipv6.mr6_rules_ops = ops;
 252        return 0;
 253
 254err2:
 255        ip6mr_free_table(mrt);
 256err1:
 257        fib_rules_unregister(ops);
 258        return err;
 259}
 260
 261static void __net_exit ip6mr_rules_exit(struct net *net)
 262{
 263        struct mr6_table *mrt, *next;
 264
 265        rtnl_lock();
 266        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 267                list_del(&mrt->list);
 268                ip6mr_free_table(mrt);
 269        }
 270        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 271        rtnl_unlock();
 272}
 273#else
 274#define ip6mr_for_each_table(mrt, net) \
 275        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 276
 277static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 278{
 279        return net->ipv6.mrt6;
 280}
 281
 282static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 283                            struct mr6_table **mrt)
 284{
 285        *mrt = net->ipv6.mrt6;
 286        return 0;
 287}
 288
 289static int __net_init ip6mr_rules_init(struct net *net)
 290{
 291        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 292        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 293}
 294
 295static void __net_exit ip6mr_rules_exit(struct net *net)
 296{
 297        rtnl_lock();
 298        ip6mr_free_table(net->ipv6.mrt6);
 299        net->ipv6.mrt6 = NULL;
 300        rtnl_unlock();
 301}
 302#endif
 303
 304static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 305{
 306        struct mr6_table *mrt;
 307        unsigned int i;
 308
 309        mrt = ip6mr_get_table(net, id);
 310        if (mrt != NULL)
 311                return mrt;
 312
 313        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 314        if (mrt == NULL)
 315                return NULL;
 316        mrt->id = id;
 317        write_pnet(&mrt->net, net);
 318
 319        /* Forwarding cache */
 320        for (i = 0; i < MFC6_LINES; i++)
 321                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 322
 323        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 324
 325        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 326                    (unsigned long)mrt);
 327
 328#ifdef CONFIG_IPV6_PIMSM_V2
 329        mrt->mroute_reg_vif_num = -1;
 330#endif
 331#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 332        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 333#endif
 334        return mrt;
 335}
 336
 337static void ip6mr_free_table(struct mr6_table *mrt)
 338{
 339        del_timer_sync(&mrt->ipmr_expire_timer);
 340        mroute_clean_tables(mrt);
 341        kfree(mrt);
 342}
 343
 344#ifdef CONFIG_PROC_FS
 345
 346struct ipmr_mfc_iter {
 347        struct seq_net_private p;
 348        struct mr6_table *mrt;
 349        struct list_head *cache;
 350        int ct;
 351};
 352
 353
 354static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 355                                           struct ipmr_mfc_iter *it, loff_t pos)
 356{
 357        struct mr6_table *mrt = it->mrt;
 358        struct mfc6_cache *mfc;
 359
 360        read_lock(&mrt_lock);
 361        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 362                it->cache = &mrt->mfc6_cache_array[it->ct];
 363                list_for_each_entry(mfc, it->cache, list)
 364                        if (pos-- == 0)
 365                                return mfc;
 366        }
 367        read_unlock(&mrt_lock);
 368
 369        spin_lock_bh(&mfc_unres_lock);
 370        it->cache = &mrt->mfc6_unres_queue;
 371        list_for_each_entry(mfc, it->cache, list)
 372                if (pos-- == 0)
 373                        return mfc;
 374        spin_unlock_bh(&mfc_unres_lock);
 375
 376        it->cache = NULL;
 377        return NULL;
 378}
 379
 380/*
 381 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 382 */
 383
 384struct ipmr_vif_iter {
 385        struct seq_net_private p;
 386        struct mr6_table *mrt;
 387        int ct;
 388};
 389
 390static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 391                                            struct ipmr_vif_iter *iter,
 392                                            loff_t pos)
 393{
 394        struct mr6_table *mrt = iter->mrt;
 395
 396        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 397                if (!MIF_EXISTS(mrt, iter->ct))
 398                        continue;
 399                if (pos-- == 0)
 400                        return &mrt->vif6_table[iter->ct];
 401        }
 402        return NULL;
 403}
 404
 405static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 406        __acquires(mrt_lock)
 407{
 408        struct ipmr_vif_iter *iter = seq->private;
 409        struct net *net = seq_file_net(seq);
 410        struct mr6_table *mrt;
 411
 412        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 413        if (mrt == NULL)
 414                return ERR_PTR(-ENOENT);
 415
 416        iter->mrt = mrt;
 417
 418        read_lock(&mrt_lock);
 419        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 420                : SEQ_START_TOKEN;
 421}
 422
 423static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 424{
 425        struct ipmr_vif_iter *iter = seq->private;
 426        struct net *net = seq_file_net(seq);
 427        struct mr6_table *mrt = iter->mrt;
 428
 429        ++*pos;
 430        if (v == SEQ_START_TOKEN)
 431                return ip6mr_vif_seq_idx(net, iter, 0);
 432
 433        while (++iter->ct < mrt->maxvif) {
 434                if (!MIF_EXISTS(mrt, iter->ct))
 435                        continue;
 436                return &mrt->vif6_table[iter->ct];
 437        }
 438        return NULL;
 439}
 440
 441static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 442        __releases(mrt_lock)
 443{
 444        read_unlock(&mrt_lock);
 445}
 446
 447static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 448{
 449        struct ipmr_vif_iter *iter = seq->private;
 450        struct mr6_table *mrt = iter->mrt;
 451
 452        if (v == SEQ_START_TOKEN) {
 453                seq_puts(seq,
 454                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 455        } else {
 456                const struct mif_device *vif = v;
 457                const char *name = vif->dev ? vif->dev->name : "none";
 458
 459                seq_printf(seq,
 460                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 461                           vif - mrt->vif6_table,
 462                           name, vif->bytes_in, vif->pkt_in,
 463                           vif->bytes_out, vif->pkt_out,
 464                           vif->flags);
 465        }
 466        return 0;
 467}
 468
 469static const struct seq_operations ip6mr_vif_seq_ops = {
 470        .start = ip6mr_vif_seq_start,
 471        .next  = ip6mr_vif_seq_next,
 472        .stop  = ip6mr_vif_seq_stop,
 473        .show  = ip6mr_vif_seq_show,
 474};
 475
 476static int ip6mr_vif_open(struct inode *inode, struct file *file)
 477{
 478        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 479                            sizeof(struct ipmr_vif_iter));
 480}
 481
 482static const struct file_operations ip6mr_vif_fops = {
 483        .owner   = THIS_MODULE,
 484        .open    = ip6mr_vif_open,
 485        .read    = seq_read,
 486        .llseek  = seq_lseek,
 487        .release = seq_release_net,
 488};
 489
 490static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 491{
 492        struct ipmr_mfc_iter *it = seq->private;
 493        struct net *net = seq_file_net(seq);
 494        struct mr6_table *mrt;
 495
 496        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 497        if (mrt == NULL)
 498                return ERR_PTR(-ENOENT);
 499
 500        it->mrt = mrt;
 501        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 502                : SEQ_START_TOKEN;
 503}
 504
 505static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 506{
 507        struct mfc6_cache *mfc = v;
 508        struct ipmr_mfc_iter *it = seq->private;
 509        struct net *net = seq_file_net(seq);
 510        struct mr6_table *mrt = it->mrt;
 511
 512        ++*pos;
 513
 514        if (v == SEQ_START_TOKEN)
 515                return ipmr_mfc_seq_idx(net, seq->private, 0);
 516
 517        if (mfc->list.next != it->cache)
 518                return list_entry(mfc->list.next, struct mfc6_cache, list);
 519
 520        if (it->cache == &mrt->mfc6_unres_queue)
 521                goto end_of_list;
 522
 523        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 524
 525        while (++it->ct < MFC6_LINES) {
 526                it->cache = &mrt->mfc6_cache_array[it->ct];
 527                if (list_empty(it->cache))
 528                        continue;
 529                return list_first_entry(it->cache, struct mfc6_cache, list);
 530        }
 531
 532        /* exhausted cache_array, show unresolved */
 533        read_unlock(&mrt_lock);
 534        it->cache = &mrt->mfc6_unres_queue;
 535        it->ct = 0;
 536
 537        spin_lock_bh(&mfc_unres_lock);
 538        if (!list_empty(it->cache))
 539                return list_first_entry(it->cache, struct mfc6_cache, list);
 540
 541 end_of_list:
 542        spin_unlock_bh(&mfc_unres_lock);
 543        it->cache = NULL;
 544
 545        return NULL;
 546}
 547
 548static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 549{
 550        struct ipmr_mfc_iter *it = seq->private;
 551        struct mr6_table *mrt = it->mrt;
 552
 553        if (it->cache == &mrt->mfc6_unres_queue)
 554                spin_unlock_bh(&mfc_unres_lock);
 555        else if (it->cache == mrt->mfc6_cache_array)
 556                read_unlock(&mrt_lock);
 557}
 558
 559static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 560{
 561        int n;
 562
 563        if (v == SEQ_START_TOKEN) {
 564                seq_puts(seq,
 565                         "Group                            "
 566                         "Origin                           "
 567                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 568        } else {
 569                const struct mfc6_cache *mfc = v;
 570                const struct ipmr_mfc_iter *it = seq->private;
 571                struct mr6_table *mrt = it->mrt;
 572
 573                seq_printf(seq, "%pI6 %pI6 %-3hd",
 574                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 575                           mfc->mf6c_parent);
 576
 577                if (it->cache != &mrt->mfc6_unres_queue) {
 578                        seq_printf(seq, " %8lu %8lu %8lu",
 579                                   mfc->mfc_un.res.pkt,
 580                                   mfc->mfc_un.res.bytes,
 581                                   mfc->mfc_un.res.wrong_if);
 582                        for (n = mfc->mfc_un.res.minvif;
 583                             n < mfc->mfc_un.res.maxvif; n++) {
 584                                if (MIF_EXISTS(mrt, n) &&
 585                                    mfc->mfc_un.res.ttls[n] < 255)
 586                                        seq_printf(seq,
 587                                                   " %2d:%-3d",
 588                                                   n, mfc->mfc_un.res.ttls[n]);
 589                        }
 590                } else {
 591                        /* unresolved mfc_caches don't contain
 592                         * pkt, bytes and wrong_if values
 593                         */
 594                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 595                }
 596                seq_putc(seq, '\n');
 597        }
 598        return 0;
 599}
 600
 601static const struct seq_operations ipmr_mfc_seq_ops = {
 602        .start = ipmr_mfc_seq_start,
 603        .next  = ipmr_mfc_seq_next,
 604        .stop  = ipmr_mfc_seq_stop,
 605        .show  = ipmr_mfc_seq_show,
 606};
 607
 608static int ipmr_mfc_open(struct inode *inode, struct file *file)
 609{
 610        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 611                            sizeof(struct ipmr_mfc_iter));
 612}
 613
 614static const struct file_operations ip6mr_mfc_fops = {
 615        .owner   = THIS_MODULE,
 616        .open    = ipmr_mfc_open,
 617        .read    = seq_read,
 618        .llseek  = seq_lseek,
 619        .release = seq_release_net,
 620};
 621#endif
 622
 623#ifdef CONFIG_IPV6_PIMSM_V2
 624
 625static int pim6_rcv(struct sk_buff *skb)
 626{
 627        struct pimreghdr *pim;
 628        struct ipv6hdr   *encap;
 629        struct net_device  *reg_dev = NULL;
 630        struct net *net = dev_net(skb->dev);
 631        struct mr6_table *mrt;
 632        struct flowi6 fl6 = {
 633                .flowi6_iif     = skb->dev->ifindex,
 634                .flowi6_mark    = skb->mark,
 635        };
 636        int reg_vif_num;
 637
 638        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 639                goto drop;
 640
 641        pim = (struct pimreghdr *)skb_transport_header(skb);
 642        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 643            (pim->flags & PIM_NULL_REGISTER) ||
 644            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 645                             sizeof(*pim), IPPROTO_PIM,
 646                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 647             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 648                goto drop;
 649
 650        /* check if the inner packet is destined to mcast group */
 651        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 652                                   sizeof(*pim));
 653
 654        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 655            encap->payload_len == 0 ||
 656            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 657                goto drop;
 658
 659        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 660                goto drop;
 661        reg_vif_num = mrt->mroute_reg_vif_num;
 662
 663        read_lock(&mrt_lock);
 664        if (reg_vif_num >= 0)
 665                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 666        if (reg_dev)
 667                dev_hold(reg_dev);
 668        read_unlock(&mrt_lock);
 669
 670        if (reg_dev == NULL)
 671                goto drop;
 672
 673        skb->mac_header = skb->network_header;
 674        skb_pull(skb, (u8 *)encap - skb->data);
 675        skb_reset_network_header(skb);
 676        skb->protocol = htons(ETH_P_IPV6);
 677        skb->ip_summed = CHECKSUM_NONE;
 678
 679        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 680
 681        netif_rx(skb);
 682
 683        dev_put(reg_dev);
 684        return 0;
 685 drop:
 686        kfree_skb(skb);
 687        return 0;
 688}
 689
 690static const struct inet6_protocol pim6_protocol = {
 691        .handler        =       pim6_rcv,
 692};
 693
 694/* Service routines creating virtual interfaces: PIMREG */
 695
 696static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 697                                      struct net_device *dev)
 698{
 699        struct net *net = dev_net(dev);
 700        struct mr6_table *mrt;
 701        struct flowi6 fl6 = {
 702                .flowi6_oif     = dev->ifindex,
 703                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 704                .flowi6_mark    = skb->mark,
 705        };
 706        int err;
 707
 708        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 709        if (err < 0) {
 710                kfree_skb(skb);
 711                return err;
 712        }
 713
 714        read_lock(&mrt_lock);
 715        dev->stats.tx_bytes += skb->len;
 716        dev->stats.tx_packets++;
 717        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 718        read_unlock(&mrt_lock);
 719        kfree_skb(skb);
 720        return NETDEV_TX_OK;
 721}
 722
 723static const struct net_device_ops reg_vif_netdev_ops = {
 724        .ndo_start_xmit = reg_vif_xmit,
 725};
 726
 727static void reg_vif_setup(struct net_device *dev)
 728{
 729        dev->type               = ARPHRD_PIMREG;
 730        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 731        dev->flags              = IFF_NOARP;
 732        dev->netdev_ops         = &reg_vif_netdev_ops;
 733        dev->destructor         = free_netdev;
 734        dev->features           |= NETIF_F_NETNS_LOCAL;
 735}
 736
 737static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 738{
 739        struct net_device *dev;
 740        char name[IFNAMSIZ];
 741
 742        if (mrt->id == RT6_TABLE_DFLT)
 743                sprintf(name, "pim6reg");
 744        else
 745                sprintf(name, "pim6reg%u", mrt->id);
 746
 747        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 748        if (dev == NULL)
 749                return NULL;
 750
 751        dev_net_set(dev, net);
 752
 753        if (register_netdevice(dev)) {
 754                free_netdev(dev);
 755                return NULL;
 756        }
 757        dev->iflink = 0;
 758
 759        if (dev_open(dev))
 760                goto failure;
 761
 762        dev_hold(dev);
 763        return dev;
 764
 765failure:
 766        /* allow the register to be completed before unregistering. */
 767        rtnl_unlock();
 768        rtnl_lock();
 769
 770        unregister_netdevice(dev);
 771        return NULL;
 772}
 773#endif
 774
 775/*
 776 *      Delete a VIF entry
 777 */
 778
 779static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 780{
 781        struct mif_device *v;
 782        struct net_device *dev;
 783        struct inet6_dev *in6_dev;
 784
 785        if (vifi < 0 || vifi >= mrt->maxvif)
 786                return -EADDRNOTAVAIL;
 787
 788        v = &mrt->vif6_table[vifi];
 789
 790        write_lock_bh(&mrt_lock);
 791        dev = v->dev;
 792        v->dev = NULL;
 793
 794        if (!dev) {
 795                write_unlock_bh(&mrt_lock);
 796                return -EADDRNOTAVAIL;
 797        }
 798
 799#ifdef CONFIG_IPV6_PIMSM_V2
 800        if (vifi == mrt->mroute_reg_vif_num)
 801                mrt->mroute_reg_vif_num = -1;
 802#endif
 803
 804        if (vifi + 1 == mrt->maxvif) {
 805                int tmp;
 806                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 807                        if (MIF_EXISTS(mrt, tmp))
 808                                break;
 809                }
 810                mrt->maxvif = tmp + 1;
 811        }
 812
 813        write_unlock_bh(&mrt_lock);
 814
 815        dev_set_allmulti(dev, -1);
 816
 817        in6_dev = __in6_dev_get(dev);
 818        if (in6_dev) {
 819                in6_dev->cnf.mc_forwarding--;
 820                inet6_netconf_notify_devconf(dev_net(dev),
 821                                             NETCONFA_MC_FORWARDING,
 822                                             dev->ifindex, &in6_dev->cnf);
 823        }
 824
 825        if (v->flags & MIFF_REGISTER)
 826                unregister_netdevice_queue(dev, head);
 827
 828        dev_put(dev);
 829        return 0;
 830}
 831
 832static inline void ip6mr_cache_free(struct mfc6_cache *c)
 833{
 834        kmem_cache_free(mrt_cachep, c);
 835}
 836
 837/* Destroy an unresolved cache entry, killing queued skbs
 838   and reporting error to netlink readers.
 839 */
 840
 841static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 842{
 843        struct net *net = read_pnet(&mrt->net);
 844        struct sk_buff *skb;
 845
 846        atomic_dec(&mrt->cache_resolve_queue_len);
 847
 848        while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 849                if (ipv6_hdr(skb)->version == 0) {
 850                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 851                        nlh->nlmsg_type = NLMSG_ERROR;
 852                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 853                        skb_trim(skb, nlh->nlmsg_len);
 854                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 855                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 856                } else
 857                        kfree_skb(skb);
 858        }
 859
 860        ip6mr_cache_free(c);
 861}
 862
 863
 864/* Timer process for all the unresolved queue. */
 865
 866static void ipmr_do_expire_process(struct mr6_table *mrt)
 867{
 868        unsigned long now = jiffies;
 869        unsigned long expires = 10 * HZ;
 870        struct mfc6_cache *c, *next;
 871
 872        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 873                if (time_after(c->mfc_un.unres.expires, now)) {
 874                        /* not yet... */
 875                        unsigned long interval = c->mfc_un.unres.expires - now;
 876                        if (interval < expires)
 877                                expires = interval;
 878                        continue;
 879                }
 880
 881                list_del(&c->list);
 882                mr6_netlink_event(mrt, c, RTM_DELROUTE);
 883                ip6mr_destroy_unres(mrt, c);
 884        }
 885
 886        if (!list_empty(&mrt->mfc6_unres_queue))
 887                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 888}
 889
 890static void ipmr_expire_process(unsigned long arg)
 891{
 892        struct mr6_table *mrt = (struct mr6_table *)arg;
 893
 894        if (!spin_trylock(&mfc_unres_lock)) {
 895                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 896                return;
 897        }
 898
 899        if (!list_empty(&mrt->mfc6_unres_queue))
 900                ipmr_do_expire_process(mrt);
 901
 902        spin_unlock(&mfc_unres_lock);
 903}
 904
 905/* Fill oifs list. It is called under write locked mrt_lock. */
 906
 907static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 908                                    unsigned char *ttls)
 909{
 910        int vifi;
 911
 912        cache->mfc_un.res.minvif = MAXMIFS;
 913        cache->mfc_un.res.maxvif = 0;
 914        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 915
 916        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 917                if (MIF_EXISTS(mrt, vifi) &&
 918                    ttls[vifi] && ttls[vifi] < 255) {
 919                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 920                        if (cache->mfc_un.res.minvif > vifi)
 921                                cache->mfc_un.res.minvif = vifi;
 922                        if (cache->mfc_un.res.maxvif <= vifi)
 923                                cache->mfc_un.res.maxvif = vifi + 1;
 924                }
 925        }
 926}
 927
 928static int mif6_add(struct net *net, struct mr6_table *mrt,
 929                    struct mif6ctl *vifc, int mrtsock)
 930{
 931        int vifi = vifc->mif6c_mifi;
 932        struct mif_device *v = &mrt->vif6_table[vifi];
 933        struct net_device *dev;
 934        struct inet6_dev *in6_dev;
 935        int err;
 936
 937        /* Is vif busy ? */
 938        if (MIF_EXISTS(mrt, vifi))
 939                return -EADDRINUSE;
 940
 941        switch (vifc->mif6c_flags) {
 942#ifdef CONFIG_IPV6_PIMSM_V2
 943        case MIFF_REGISTER:
 944                /*
 945                 * Special Purpose VIF in PIM
 946                 * All the packets will be sent to the daemon
 947                 */
 948                if (mrt->mroute_reg_vif_num >= 0)
 949                        return -EADDRINUSE;
 950                dev = ip6mr_reg_vif(net, mrt);
 951                if (!dev)
 952                        return -ENOBUFS;
 953                err = dev_set_allmulti(dev, 1);
 954                if (err) {
 955                        unregister_netdevice(dev);
 956                        dev_put(dev);
 957                        return err;
 958                }
 959                break;
 960#endif
 961        case 0:
 962                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 963                if (!dev)
 964                        return -EADDRNOTAVAIL;
 965                err = dev_set_allmulti(dev, 1);
 966                if (err) {
 967                        dev_put(dev);
 968                        return err;
 969                }
 970                break;
 971        default:
 972                return -EINVAL;
 973        }
 974
 975        in6_dev = __in6_dev_get(dev);
 976        if (in6_dev) {
 977                in6_dev->cnf.mc_forwarding++;
 978                inet6_netconf_notify_devconf(dev_net(dev),
 979                                             NETCONFA_MC_FORWARDING,
 980                                             dev->ifindex, &in6_dev->cnf);
 981        }
 982
 983        /*
 984         *      Fill in the VIF structures
 985         */
 986        v->rate_limit = vifc->vifc_rate_limit;
 987        v->flags = vifc->mif6c_flags;
 988        if (!mrtsock)
 989                v->flags |= VIFF_STATIC;
 990        v->threshold = vifc->vifc_threshold;
 991        v->bytes_in = 0;
 992        v->bytes_out = 0;
 993        v->pkt_in = 0;
 994        v->pkt_out = 0;
 995        v->link = dev->ifindex;
 996        if (v->flags & MIFF_REGISTER)
 997                v->link = dev->iflink;
 998
 999        /* And finish update writing critical data */
1000        write_lock_bh(&mrt_lock);
1001        v->dev = dev;
1002#ifdef CONFIG_IPV6_PIMSM_V2
1003        if (v->flags & MIFF_REGISTER)
1004                mrt->mroute_reg_vif_num = vifi;
1005#endif
1006        if (vifi + 1 > mrt->maxvif)
1007                mrt->maxvif = vifi + 1;
1008        write_unlock_bh(&mrt_lock);
1009        return 0;
1010}
1011
1012static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1013                                           const struct in6_addr *origin,
1014                                           const struct in6_addr *mcastgrp)
1015{
1016        int line = MFC6_HASH(mcastgrp, origin);
1017        struct mfc6_cache *c;
1018
1019        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1020                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1021                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1022                        return c;
1023        }
1024        return NULL;
1025}
1026
1027/* Look for a (*,*,oif) entry */
1028static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1029                                                      mifi_t mifi)
1030{
1031        int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1032        struct mfc6_cache *c;
1033
1034        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1035                if (ipv6_addr_any(&c->mf6c_origin) &&
1036                    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1037                    (c->mfc_un.res.ttls[mifi] < 255))
1038                        return c;
1039
1040        return NULL;
1041}
1042
1043/* Look for a (*,G) entry */
1044static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1045                                               struct in6_addr *mcastgrp,
1046                                               mifi_t mifi)
1047{
1048        int line = MFC6_HASH(mcastgrp, &in6addr_any);
1049        struct mfc6_cache *c, *proxy;
1050
1051        if (ipv6_addr_any(mcastgrp))
1052                goto skip;
1053
1054        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1055                if (ipv6_addr_any(&c->mf6c_origin) &&
1056                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1057                        if (c->mfc_un.res.ttls[mifi] < 255)
1058                                return c;
1059
1060                        /* It's ok if the mifi is part of the static tree */
1061                        proxy = ip6mr_cache_find_any_parent(mrt,
1062                                                            c->mf6c_parent);
1063                        if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1064                                return c;
1065                }
1066
1067skip:
1068        return ip6mr_cache_find_any_parent(mrt, mifi);
1069}
1070
1071/*
1072 *      Allocate a multicast cache entry
1073 */
1074static struct mfc6_cache *ip6mr_cache_alloc(void)
1075{
1076        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1077        if (c == NULL)
1078                return NULL;
1079        c->mfc_un.res.minvif = MAXMIFS;
1080        return c;
1081}
1082
1083static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1084{
1085        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1086        if (c == NULL)
1087                return NULL;
1088        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1089        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1090        return c;
1091}
1092
1093/*
1094 *      A cache entry has gone into a resolved state from queued
1095 */
1096
1097static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1098                                struct mfc6_cache *uc, struct mfc6_cache *c)
1099{
1100        struct sk_buff *skb;
1101
1102        /*
1103         *      Play the pending entries through our router
1104         */
1105
1106        while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1107                if (ipv6_hdr(skb)->version == 0) {
1108                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1109
1110                        if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1111                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1112                        } else {
1113                                nlh->nlmsg_type = NLMSG_ERROR;
1114                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1115                                skb_trim(skb, nlh->nlmsg_len);
1116                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1117                        }
1118                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1119                } else
1120                        ip6_mr_forward(net, mrt, skb, c);
1121        }
1122}
1123
1124/*
1125 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1126 *      expects the following bizarre scheme.
1127 *
1128 *      Called under mrt_lock.
1129 */
1130
1131static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1132                              mifi_t mifi, int assert)
1133{
1134        struct sk_buff *skb;
1135        struct mrt6msg *msg;
1136        int ret;
1137
1138#ifdef CONFIG_IPV6_PIMSM_V2
1139        if (assert == MRT6MSG_WHOLEPKT)
1140                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1141                                                +sizeof(*msg));
1142        else
1143#endif
1144                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1145
1146        if (!skb)
1147                return -ENOBUFS;
1148
1149        /* I suppose that internal messages
1150         * do not require checksums */
1151
1152        skb->ip_summed = CHECKSUM_UNNECESSARY;
1153
1154#ifdef CONFIG_IPV6_PIMSM_V2
1155        if (assert == MRT6MSG_WHOLEPKT) {
1156                /* Ugly, but we have no choice with this interface.
1157                   Duplicate old header, fix length etc.
1158                   And all this only to mangle msg->im6_msgtype and
1159                   to set msg->im6_mbz to "mbz" :-)
1160                 */
1161                skb_push(skb, -skb_network_offset(pkt));
1162
1163                skb_push(skb, sizeof(*msg));
1164                skb_reset_transport_header(skb);
1165                msg = (struct mrt6msg *)skb_transport_header(skb);
1166                msg->im6_mbz = 0;
1167                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1168                msg->im6_mif = mrt->mroute_reg_vif_num;
1169                msg->im6_pad = 0;
1170                msg->im6_src = ipv6_hdr(pkt)->saddr;
1171                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1172
1173                skb->ip_summed = CHECKSUM_UNNECESSARY;
1174        } else
1175#endif
1176        {
1177        /*
1178         *      Copy the IP header
1179         */
1180
1181        skb_put(skb, sizeof(struct ipv6hdr));
1182        skb_reset_network_header(skb);
1183        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1184
1185        /*
1186         *      Add our header
1187         */
1188        skb_put(skb, sizeof(*msg));
1189        skb_reset_transport_header(skb);
1190        msg = (struct mrt6msg *)skb_transport_header(skb);
1191
1192        msg->im6_mbz = 0;
1193        msg->im6_msgtype = assert;
1194        msg->im6_mif = mifi;
1195        msg->im6_pad = 0;
1196        msg->im6_src = ipv6_hdr(pkt)->saddr;
1197        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1198
1199        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1200        skb->ip_summed = CHECKSUM_UNNECESSARY;
1201        }
1202
1203        if (mrt->mroute6_sk == NULL) {
1204                kfree_skb(skb);
1205                return -EINVAL;
1206        }
1207
1208        /*
1209         *      Deliver to user space multicast routing algorithms
1210         */
1211        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1212        if (ret < 0) {
1213                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1214                kfree_skb(skb);
1215        }
1216
1217        return ret;
1218}
1219
1220/*
1221 *      Queue a packet for resolution. It gets locked cache entry!
1222 */
1223
1224static int
1225ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1226{
1227        bool found = false;
1228        int err;
1229        struct mfc6_cache *c;
1230
1231        spin_lock_bh(&mfc_unres_lock);
1232        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1233                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1234                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1235                        found = true;
1236                        break;
1237                }
1238        }
1239
1240        if (!found) {
1241                /*
1242                 *      Create a new entry if allowable
1243                 */
1244
1245                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1246                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1247                        spin_unlock_bh(&mfc_unres_lock);
1248
1249                        kfree_skb(skb);
1250                        return -ENOBUFS;
1251                }
1252
1253                /*
1254                 *      Fill in the new cache entry
1255                 */
1256                c->mf6c_parent = -1;
1257                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1258                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1259
1260                /*
1261                 *      Reflect first query at pim6sd
1262                 */
1263                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1264                if (err < 0) {
1265                        /* If the report failed throw the cache entry
1266                           out - Brad Parker
1267                         */
1268                        spin_unlock_bh(&mfc_unres_lock);
1269
1270                        ip6mr_cache_free(c);
1271                        kfree_skb(skb);
1272                        return err;
1273                }
1274
1275                atomic_inc(&mrt->cache_resolve_queue_len);
1276                list_add(&c->list, &mrt->mfc6_unres_queue);
1277                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1278
1279                ipmr_do_expire_process(mrt);
1280        }
1281
1282        /*
1283         *      See if we can append the packet
1284         */
1285        if (c->mfc_un.unres.unresolved.qlen > 3) {
1286                kfree_skb(skb);
1287                err = -ENOBUFS;
1288        } else {
1289                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1290                err = 0;
1291        }
1292
1293        spin_unlock_bh(&mfc_unres_lock);
1294        return err;
1295}
1296
1297/*
1298 *      MFC6 cache manipulation by user space
1299 */
1300
1301static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1302                            int parent)
1303{
1304        int line;
1305        struct mfc6_cache *c, *next;
1306
1307        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1308
1309        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1310                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1311                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1312                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1313                    (parent == -1 || parent == c->mf6c_parent)) {
1314                        write_lock_bh(&mrt_lock);
1315                        list_del(&c->list);
1316                        write_unlock_bh(&mrt_lock);
1317
1318                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1319                        ip6mr_cache_free(c);
1320                        return 0;
1321                }
1322        }
1323        return -ENOENT;
1324}
1325
1326static int ip6mr_device_event(struct notifier_block *this,
1327                              unsigned long event, void *ptr)
1328{
1329        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1330        struct net *net = dev_net(dev);
1331        struct mr6_table *mrt;
1332        struct mif_device *v;
1333        int ct;
1334        LIST_HEAD(list);
1335
1336        if (event != NETDEV_UNREGISTER)
1337                return NOTIFY_DONE;
1338
1339        ip6mr_for_each_table(mrt, net) {
1340                v = &mrt->vif6_table[0];
1341                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1342                        if (v->dev == dev)
1343                                mif6_delete(mrt, ct, &list);
1344                }
1345        }
1346        unregister_netdevice_many(&list);
1347
1348        return NOTIFY_DONE;
1349}
1350
1351static struct notifier_block ip6_mr_notifier = {
1352        .notifier_call = ip6mr_device_event
1353};
1354
1355/*
1356 *      Setup for IP multicast routing
1357 */
1358
1359static int __net_init ip6mr_net_init(struct net *net)
1360{
1361        int err;
1362
1363        err = ip6mr_rules_init(net);
1364        if (err < 0)
1365                goto fail;
1366
1367#ifdef CONFIG_PROC_FS
1368        err = -ENOMEM;
1369        if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1370                goto proc_vif_fail;
1371        if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1372                goto proc_cache_fail;
1373#endif
1374
1375        return 0;
1376
1377#ifdef CONFIG_PROC_FS
1378proc_cache_fail:
1379        remove_proc_entry("ip6_mr_vif", net->proc_net);
1380proc_vif_fail:
1381        ip6mr_rules_exit(net);
1382#endif
1383fail:
1384        return err;
1385}
1386
1387static void __net_exit ip6mr_net_exit(struct net *net)
1388{
1389#ifdef CONFIG_PROC_FS
1390        remove_proc_entry("ip6_mr_cache", net->proc_net);
1391        remove_proc_entry("ip6_mr_vif", net->proc_net);
1392#endif
1393        ip6mr_rules_exit(net);
1394}
1395
1396static struct pernet_operations ip6mr_net_ops = {
1397        .init = ip6mr_net_init,
1398        .exit = ip6mr_net_exit,
1399};
1400
1401int __init ip6_mr_init(void)
1402{
1403        int err;
1404
1405        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1406                                       sizeof(struct mfc6_cache),
1407                                       0, SLAB_HWCACHE_ALIGN,
1408                                       NULL);
1409        if (!mrt_cachep)
1410                return -ENOMEM;
1411
1412        err = register_pernet_subsys(&ip6mr_net_ops);
1413        if (err)
1414                goto reg_pernet_fail;
1415
1416        err = register_netdevice_notifier(&ip6_mr_notifier);
1417        if (err)
1418                goto reg_notif_fail;
1419#ifdef CONFIG_IPV6_PIMSM_V2
1420        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1421                pr_err("%s: can't add PIM protocol\n", __func__);
1422                err = -EAGAIN;
1423                goto add_proto_fail;
1424        }
1425#endif
1426        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1427                      ip6mr_rtm_dumproute, NULL);
1428        return 0;
1429#ifdef CONFIG_IPV6_PIMSM_V2
1430add_proto_fail:
1431        unregister_netdevice_notifier(&ip6_mr_notifier);
1432#endif
1433reg_notif_fail:
1434        unregister_pernet_subsys(&ip6mr_net_ops);
1435reg_pernet_fail:
1436        kmem_cache_destroy(mrt_cachep);
1437        return err;
1438}
1439
1440void ip6_mr_cleanup(void)
1441{
1442        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1443#ifdef CONFIG_IPV6_PIMSM_V2
1444        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1445#endif
1446        unregister_netdevice_notifier(&ip6_mr_notifier);
1447        unregister_pernet_subsys(&ip6mr_net_ops);
1448        kmem_cache_destroy(mrt_cachep);
1449}
1450
1451static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1452                         struct mf6cctl *mfc, int mrtsock, int parent)
1453{
1454        bool found = false;
1455        int line;
1456        struct mfc6_cache *uc, *c;
1457        unsigned char ttls[MAXMIFS];
1458        int i;
1459
1460        if (mfc->mf6cc_parent >= MAXMIFS)
1461                return -ENFILE;
1462
1463        memset(ttls, 255, MAXMIFS);
1464        for (i = 0; i < MAXMIFS; i++) {
1465                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1466                        ttls[i] = 1;
1467
1468        }
1469
1470        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1471
1472        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1473                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1474                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1475                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1476                    (parent == -1 || parent == mfc->mf6cc_parent)) {
1477                        found = true;
1478                        break;
1479                }
1480        }
1481
1482        if (found) {
1483                write_lock_bh(&mrt_lock);
1484                c->mf6c_parent = mfc->mf6cc_parent;
1485                ip6mr_update_thresholds(mrt, c, ttls);
1486                if (!mrtsock)
1487                        c->mfc_flags |= MFC_STATIC;
1488                write_unlock_bh(&mrt_lock);
1489                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1490                return 0;
1491        }
1492
1493        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1494            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1495                return -EINVAL;
1496
1497        c = ip6mr_cache_alloc();
1498        if (c == NULL)
1499                return -ENOMEM;
1500
1501        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1502        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1503        c->mf6c_parent = mfc->mf6cc_parent;
1504        ip6mr_update_thresholds(mrt, c, ttls);
1505        if (!mrtsock)
1506                c->mfc_flags |= MFC_STATIC;
1507
1508        write_lock_bh(&mrt_lock);
1509        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1510        write_unlock_bh(&mrt_lock);
1511
1512        /*
1513         *      Check to see if we resolved a queued list. If so we
1514         *      need to send on the frames and tidy up.
1515         */
1516        found = false;
1517        spin_lock_bh(&mfc_unres_lock);
1518        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1519                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1520                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1521                        list_del(&uc->list);
1522                        atomic_dec(&mrt->cache_resolve_queue_len);
1523                        found = true;
1524                        break;
1525                }
1526        }
1527        if (list_empty(&mrt->mfc6_unres_queue))
1528                del_timer(&mrt->ipmr_expire_timer);
1529        spin_unlock_bh(&mfc_unres_lock);
1530
1531        if (found) {
1532                ip6mr_cache_resolve(net, mrt, uc, c);
1533                ip6mr_cache_free(uc);
1534        }
1535        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1536        return 0;
1537}
1538
1539/*
1540 *      Close the multicast socket, and clear the vif tables etc
1541 */
1542
1543static void mroute_clean_tables(struct mr6_table *mrt)
1544{
1545        int i;
1546        LIST_HEAD(list);
1547        struct mfc6_cache *c, *next;
1548
1549        /*
1550         *      Shut down all active vif entries
1551         */
1552        for (i = 0; i < mrt->maxvif; i++) {
1553                if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1554                        mif6_delete(mrt, i, &list);
1555        }
1556        unregister_netdevice_many(&list);
1557
1558        /*
1559         *      Wipe the cache
1560         */
1561        for (i = 0; i < MFC6_LINES; i++) {
1562                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1563                        if (c->mfc_flags & MFC_STATIC)
1564                                continue;
1565                        write_lock_bh(&mrt_lock);
1566                        list_del(&c->list);
1567                        write_unlock_bh(&mrt_lock);
1568
1569                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1570                        ip6mr_cache_free(c);
1571                }
1572        }
1573
1574        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1575                spin_lock_bh(&mfc_unres_lock);
1576                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1577                        list_del(&c->list);
1578                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1579                        ip6mr_destroy_unres(mrt, c);
1580                }
1581                spin_unlock_bh(&mfc_unres_lock);
1582        }
1583}
1584
1585static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1586{
1587        int err = 0;
1588        struct net *net = sock_net(sk);
1589
1590        rtnl_lock();
1591        write_lock_bh(&mrt_lock);
1592        if (likely(mrt->mroute6_sk == NULL)) {
1593                mrt->mroute6_sk = sk;
1594                net->ipv6.devconf_all->mc_forwarding++;
1595                inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1596                                             NETCONFA_IFINDEX_ALL,
1597                                             net->ipv6.devconf_all);
1598        }
1599        else
1600                err = -EADDRINUSE;
1601        write_unlock_bh(&mrt_lock);
1602
1603        rtnl_unlock();
1604
1605        return err;
1606}
1607
1608int ip6mr_sk_done(struct sock *sk)
1609{
1610        int err = -EACCES;
1611        struct net *net = sock_net(sk);
1612        struct mr6_table *mrt;
1613
1614        rtnl_lock();
1615        ip6mr_for_each_table(mrt, net) {
1616                if (sk == mrt->mroute6_sk) {
1617                        write_lock_bh(&mrt_lock);
1618                        mrt->mroute6_sk = NULL;
1619                        net->ipv6.devconf_all->mc_forwarding--;
1620                        inet6_netconf_notify_devconf(net,
1621                                                     NETCONFA_MC_FORWARDING,
1622                                                     NETCONFA_IFINDEX_ALL,
1623                                                     net->ipv6.devconf_all);
1624                        write_unlock_bh(&mrt_lock);
1625
1626                        mroute_clean_tables(mrt);
1627                        err = 0;
1628                        break;
1629                }
1630        }
1631        rtnl_unlock();
1632
1633        return err;
1634}
1635
1636struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1637{
1638        struct mr6_table *mrt;
1639        struct flowi6 fl6 = {
1640                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1641                .flowi6_oif     = skb->dev->ifindex,
1642                .flowi6_mark    = skb->mark,
1643        };
1644
1645        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1646                return NULL;
1647
1648        return mrt->mroute6_sk;
1649}
1650
1651/*
1652 *      Socket options and virtual interface manipulation. The whole
1653 *      virtual interface system is a complete heap, but unfortunately
1654 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1655 *      MOSPF/PIM router set up we can clean this up.
1656 */
1657
1658int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1659{
1660        int ret, parent = 0;
1661        struct mif6ctl vif;
1662        struct mf6cctl mfc;
1663        mifi_t mifi;
1664        struct net *net = sock_net(sk);
1665        struct mr6_table *mrt;
1666
1667        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1668        if (mrt == NULL)
1669                return -ENOENT;
1670
1671        if (optname != MRT6_INIT) {
1672                if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1673                        return -EACCES;
1674        }
1675
1676        switch (optname) {
1677        case MRT6_INIT:
1678                if (sk->sk_type != SOCK_RAW ||
1679                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1680                        return -EOPNOTSUPP;
1681                if (optlen < sizeof(int))
1682                        return -EINVAL;
1683
1684                return ip6mr_sk_init(mrt, sk);
1685
1686        case MRT6_DONE:
1687                return ip6mr_sk_done(sk);
1688
1689        case MRT6_ADD_MIF:
1690                if (optlen < sizeof(vif))
1691                        return -EINVAL;
1692                if (copy_from_user(&vif, optval, sizeof(vif)))
1693                        return -EFAULT;
1694                if (vif.mif6c_mifi >= MAXMIFS)
1695                        return -ENFILE;
1696                rtnl_lock();
1697                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1698                rtnl_unlock();
1699                return ret;
1700
1701        case MRT6_DEL_MIF:
1702                if (optlen < sizeof(mifi_t))
1703                        return -EINVAL;
1704                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1705                        return -EFAULT;
1706                rtnl_lock();
1707                ret = mif6_delete(mrt, mifi, NULL);
1708                rtnl_unlock();
1709                return ret;
1710
1711        /*
1712         *      Manipulate the forwarding caches. These live
1713         *      in a sort of kernel/user symbiosis.
1714         */
1715        case MRT6_ADD_MFC:
1716        case MRT6_DEL_MFC:
1717                parent = -1;
1718        case MRT6_ADD_MFC_PROXY:
1719        case MRT6_DEL_MFC_PROXY:
1720                if (optlen < sizeof(mfc))
1721                        return -EINVAL;
1722                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1723                        return -EFAULT;
1724                if (parent == 0)
1725                        parent = mfc.mf6cc_parent;
1726                rtnl_lock();
1727                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1728                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1729                else
1730                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1731                                            sk == mrt->mroute6_sk, parent);
1732                rtnl_unlock();
1733                return ret;
1734
1735        /*
1736         *      Control PIM assert (to activate pim will activate assert)
1737         */
1738        case MRT6_ASSERT:
1739        {
1740                int v;
1741
1742                if (optlen != sizeof(v))
1743                        return -EINVAL;
1744                if (get_user(v, (int __user *)optval))
1745                        return -EFAULT;
1746                mrt->mroute_do_assert = v;
1747                return 0;
1748        }
1749
1750#ifdef CONFIG_IPV6_PIMSM_V2
1751        case MRT6_PIM:
1752        {
1753                int v;
1754
1755                if (optlen != sizeof(v))
1756                        return -EINVAL;
1757                if (get_user(v, (int __user *)optval))
1758                        return -EFAULT;
1759                v = !!v;
1760                rtnl_lock();
1761                ret = 0;
1762                if (v != mrt->mroute_do_pim) {
1763                        mrt->mroute_do_pim = v;
1764                        mrt->mroute_do_assert = v;
1765                }
1766                rtnl_unlock();
1767                return ret;
1768        }
1769
1770#endif
1771#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1772        case MRT6_TABLE:
1773        {
1774                u32 v;
1775
1776                if (optlen != sizeof(u32))
1777                        return -EINVAL;
1778                if (get_user(v, (u32 __user *)optval))
1779                        return -EFAULT;
1780                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1781                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1782                        return -EINVAL;
1783                if (sk == mrt->mroute6_sk)
1784                        return -EBUSY;
1785
1786                rtnl_lock();
1787                ret = 0;
1788                if (!ip6mr_new_table(net, v))
1789                        ret = -ENOMEM;
1790                raw6_sk(sk)->ip6mr_table = v;
1791                rtnl_unlock();
1792                return ret;
1793        }
1794#endif
1795        /*
1796         *      Spurious command, or MRT6_VERSION which you cannot
1797         *      set.
1798         */
1799        default:
1800                return -ENOPROTOOPT;
1801        }
1802}
1803
1804/*
1805 *      Getsock opt support for the multicast routing system.
1806 */
1807
1808int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1809                          int __user *optlen)
1810{
1811        int olr;
1812        int val;
1813        struct net *net = sock_net(sk);
1814        struct mr6_table *mrt;
1815
1816        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1817        if (mrt == NULL)
1818                return -ENOENT;
1819
1820        switch (optname) {
1821        case MRT6_VERSION:
1822                val = 0x0305;
1823                break;
1824#ifdef CONFIG_IPV6_PIMSM_V2
1825        case MRT6_PIM:
1826                val = mrt->mroute_do_pim;
1827                break;
1828#endif
1829        case MRT6_ASSERT:
1830                val = mrt->mroute_do_assert;
1831                break;
1832        default:
1833                return -ENOPROTOOPT;
1834        }
1835
1836        if (get_user(olr, optlen))
1837                return -EFAULT;
1838
1839        olr = min_t(int, olr, sizeof(int));
1840        if (olr < 0)
1841                return -EINVAL;
1842
1843        if (put_user(olr, optlen))
1844                return -EFAULT;
1845        if (copy_to_user(optval, &val, olr))
1846                return -EFAULT;
1847        return 0;
1848}
1849
1850/*
1851 *      The IP multicast ioctl support routines.
1852 */
1853
1854int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1855{
1856        struct sioc_sg_req6 sr;
1857        struct sioc_mif_req6 vr;
1858        struct mif_device *vif;
1859        struct mfc6_cache *c;
1860        struct net *net = sock_net(sk);
1861        struct mr6_table *mrt;
1862
1863        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1864        if (mrt == NULL)
1865                return -ENOENT;
1866
1867        switch (cmd) {
1868        case SIOCGETMIFCNT_IN6:
1869                if (copy_from_user(&vr, arg, sizeof(vr)))
1870                        return -EFAULT;
1871                if (vr.mifi >= mrt->maxvif)
1872                        return -EINVAL;
1873                read_lock(&mrt_lock);
1874                vif = &mrt->vif6_table[vr.mifi];
1875                if (MIF_EXISTS(mrt, vr.mifi)) {
1876                        vr.icount = vif->pkt_in;
1877                        vr.ocount = vif->pkt_out;
1878                        vr.ibytes = vif->bytes_in;
1879                        vr.obytes = vif->bytes_out;
1880                        read_unlock(&mrt_lock);
1881
1882                        if (copy_to_user(arg, &vr, sizeof(vr)))
1883                                return -EFAULT;
1884                        return 0;
1885                }
1886                read_unlock(&mrt_lock);
1887                return -EADDRNOTAVAIL;
1888        case SIOCGETSGCNT_IN6:
1889                if (copy_from_user(&sr, arg, sizeof(sr)))
1890                        return -EFAULT;
1891
1892                read_lock(&mrt_lock);
1893                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1894                if (c) {
1895                        sr.pktcnt = c->mfc_un.res.pkt;
1896                        sr.bytecnt = c->mfc_un.res.bytes;
1897                        sr.wrong_if = c->mfc_un.res.wrong_if;
1898                        read_unlock(&mrt_lock);
1899
1900                        if (copy_to_user(arg, &sr, sizeof(sr)))
1901                                return -EFAULT;
1902                        return 0;
1903                }
1904                read_unlock(&mrt_lock);
1905                return -EADDRNOTAVAIL;
1906        default:
1907                return -ENOIOCTLCMD;
1908        }
1909}
1910
1911#ifdef CONFIG_COMPAT
1912struct compat_sioc_sg_req6 {
1913        struct sockaddr_in6 src;
1914        struct sockaddr_in6 grp;
1915        compat_ulong_t pktcnt;
1916        compat_ulong_t bytecnt;
1917        compat_ulong_t wrong_if;
1918};
1919
1920struct compat_sioc_mif_req6 {
1921        mifi_t  mifi;
1922        compat_ulong_t icount;
1923        compat_ulong_t ocount;
1924        compat_ulong_t ibytes;
1925        compat_ulong_t obytes;
1926};
1927
1928int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1929{
1930        struct compat_sioc_sg_req6 sr;
1931        struct compat_sioc_mif_req6 vr;
1932        struct mif_device *vif;
1933        struct mfc6_cache *c;
1934        struct net *net = sock_net(sk);
1935        struct mr6_table *mrt;
1936
1937        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1938        if (mrt == NULL)
1939                return -ENOENT;
1940
1941        switch (cmd) {
1942        case SIOCGETMIFCNT_IN6:
1943                if (copy_from_user(&vr, arg, sizeof(vr)))
1944                        return -EFAULT;
1945                if (vr.mifi >= mrt->maxvif)
1946                        return -EINVAL;
1947                read_lock(&mrt_lock);
1948                vif = &mrt->vif6_table[vr.mifi];
1949                if (MIF_EXISTS(mrt, vr.mifi)) {
1950                        vr.icount = vif->pkt_in;
1951                        vr.ocount = vif->pkt_out;
1952                        vr.ibytes = vif->bytes_in;
1953                        vr.obytes = vif->bytes_out;
1954                        read_unlock(&mrt_lock);
1955
1956                        if (copy_to_user(arg, &vr, sizeof(vr)))
1957                                return -EFAULT;
1958                        return 0;
1959                }
1960                read_unlock(&mrt_lock);
1961                return -EADDRNOTAVAIL;
1962        case SIOCGETSGCNT_IN6:
1963                if (copy_from_user(&sr, arg, sizeof(sr)))
1964                        return -EFAULT;
1965
1966                read_lock(&mrt_lock);
1967                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1968                if (c) {
1969                        sr.pktcnt = c->mfc_un.res.pkt;
1970                        sr.bytecnt = c->mfc_un.res.bytes;
1971                        sr.wrong_if = c->mfc_un.res.wrong_if;
1972                        read_unlock(&mrt_lock);
1973
1974                        if (copy_to_user(arg, &sr, sizeof(sr)))
1975                                return -EFAULT;
1976                        return 0;
1977                }
1978                read_unlock(&mrt_lock);
1979                return -EADDRNOTAVAIL;
1980        default:
1981                return -ENOIOCTLCMD;
1982        }
1983}
1984#endif
1985
1986static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1987{
1988        IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1989                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1990        IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1991                         IPSTATS_MIB_OUTOCTETS, skb->len);
1992        return dst_output(skb);
1993}
1994
1995/*
1996 *      Processing handlers for ip6mr_forward
1997 */
1998
1999static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2000                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2001{
2002        struct ipv6hdr *ipv6h;
2003        struct mif_device *vif = &mrt->vif6_table[vifi];
2004        struct net_device *dev;
2005        struct dst_entry *dst;
2006        struct flowi6 fl6;
2007
2008        if (vif->dev == NULL)
2009                goto out_free;
2010
2011#ifdef CONFIG_IPV6_PIMSM_V2
2012        if (vif->flags & MIFF_REGISTER) {
2013                vif->pkt_out++;
2014                vif->bytes_out += skb->len;
2015                vif->dev->stats.tx_bytes += skb->len;
2016                vif->dev->stats.tx_packets++;
2017                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2018                goto out_free;
2019        }
2020#endif
2021
2022        ipv6h = ipv6_hdr(skb);
2023
2024        fl6 = (struct flowi6) {
2025                .flowi6_oif = vif->link,
2026                .daddr = ipv6h->daddr,
2027        };
2028
2029        dst = ip6_route_output(net, NULL, &fl6);
2030        if (dst->error) {
2031                dst_release(dst);
2032                goto out_free;
2033        }
2034
2035        skb_dst_drop(skb);
2036        skb_dst_set(skb, dst);
2037
2038        /*
2039         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2040         * not only before forwarding, but after forwarding on all output
2041         * interfaces. It is clear, if mrouter runs a multicasting
2042         * program, it should receive packets not depending to what interface
2043         * program is joined.
2044         * If we will not make it, the program will have to join on all
2045         * interfaces. On the other hand, multihoming host (or router, but
2046         * not mrouter) cannot join to more than one interface - it will
2047         * result in receiving multiple packets.
2048         */
2049        dev = vif->dev;
2050        skb->dev = dev;
2051        vif->pkt_out++;
2052        vif->bytes_out += skb->len;
2053
2054        /* We are about to write */
2055        /* XXX: extension headers? */
2056        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2057                goto out_free;
2058
2059        ipv6h = ipv6_hdr(skb);
2060        ipv6h->hop_limit--;
2061
2062        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2063
2064        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2065                       ip6mr_forward2_finish);
2066
2067out_free:
2068        kfree_skb(skb);
2069        return 0;
2070}
2071
2072static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2073{
2074        int ct;
2075
2076        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2077                if (mrt->vif6_table[ct].dev == dev)
2078                        break;
2079        }
2080        return ct;
2081}
2082
2083static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2084                           struct sk_buff *skb, struct mfc6_cache *cache)
2085{
2086        int psend = -1;
2087        int vif, ct;
2088        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2089
2090        vif = cache->mf6c_parent;
2091        cache->mfc_un.res.pkt++;
2092        cache->mfc_un.res.bytes += skb->len;
2093
2094        if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2095                struct mfc6_cache *cache_proxy;
2096
2097                /* For an (*,G) entry, we only check that the incoming
2098                 * interface is part of the static tree.
2099                 */
2100                cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2101                if (cache_proxy &&
2102                    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2103                        goto forward;
2104        }
2105
2106        /*
2107         * Wrong interface: drop packet and (maybe) send PIM assert.
2108         */
2109        if (mrt->vif6_table[vif].dev != skb->dev) {
2110                cache->mfc_un.res.wrong_if++;
2111
2112                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2113                    /* pimsm uses asserts, when switching from RPT to SPT,
2114                       so that we cannot check that packet arrived on an oif.
2115                       It is bad, but otherwise we would need to move pretty
2116                       large chunk of pimd to kernel. Ough... --ANK
2117                     */
2118                    (mrt->mroute_do_pim ||
2119                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2120                    time_after(jiffies,
2121                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2122                        cache->mfc_un.res.last_assert = jiffies;
2123                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2124                }
2125                goto dont_forward;
2126        }
2127
2128forward:
2129        mrt->vif6_table[vif].pkt_in++;
2130        mrt->vif6_table[vif].bytes_in += skb->len;
2131
2132        /*
2133         *      Forward the frame
2134         */
2135        if (ipv6_addr_any(&cache->mf6c_origin) &&
2136            ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2137                if (true_vifi >= 0 &&
2138                    true_vifi != cache->mf6c_parent &&
2139                    ipv6_hdr(skb)->hop_limit >
2140                                cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2141                        /* It's an (*,*) entry and the packet is not coming from
2142                         * the upstream: forward the packet to the upstream
2143                         * only.
2144                         */
2145                        psend = cache->mf6c_parent;
2146                        goto last_forward;
2147                }
2148                goto dont_forward;
2149        }
2150        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2151                /* For (*,G) entry, don't forward to the incoming interface */
2152                if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2153                    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2154                        if (psend != -1) {
2155                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2156                                if (skb2)
2157                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2158                        }
2159                        psend = ct;
2160                }
2161        }
2162last_forward:
2163        if (psend != -1) {
2164                ip6mr_forward2(net, mrt, skb, cache, psend);
2165                return;
2166        }
2167
2168dont_forward:
2169        kfree_skb(skb);
2170}
2171
2172
2173/*
2174 *      Multicast packets for forwarding arrive here
2175 */
2176
2177int ip6_mr_input(struct sk_buff *skb)
2178{
2179        struct mfc6_cache *cache;
2180        struct net *net = dev_net(skb->dev);
2181        struct mr6_table *mrt;
2182        struct flowi6 fl6 = {
2183                .flowi6_iif     = skb->dev->ifindex,
2184                .flowi6_mark    = skb->mark,
2185        };
2186        int err;
2187
2188        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2189        if (err < 0) {
2190                kfree_skb(skb);
2191                return err;
2192        }
2193
2194        read_lock(&mrt_lock);
2195        cache = ip6mr_cache_find(mrt,
2196                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2197        if (cache == NULL) {
2198                int vif = ip6mr_find_vif(mrt, skb->dev);
2199
2200                if (vif >= 0)
2201                        cache = ip6mr_cache_find_any(mrt,
2202                                                     &ipv6_hdr(skb)->daddr,
2203                                                     vif);
2204        }
2205
2206        /*
2207         *      No usable cache entry
2208         */
2209        if (cache == NULL) {
2210                int vif;
2211
2212                vif = ip6mr_find_vif(mrt, skb->dev);
2213                if (vif >= 0) {
2214                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2215                        read_unlock(&mrt_lock);
2216
2217                        return err;
2218                }
2219                read_unlock(&mrt_lock);
2220                kfree_skb(skb);
2221                return -ENODEV;
2222        }
2223
2224        ip6_mr_forward(net, mrt, skb, cache);
2225
2226        read_unlock(&mrt_lock);
2227
2228        return 0;
2229}
2230
2231
2232static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2233                               struct mfc6_cache *c, struct rtmsg *rtm)
2234{
2235        int ct;
2236        struct rtnexthop *nhp;
2237        struct nlattr *mp_attr;
2238        struct rta_mfc_stats mfcs;
2239
2240        /* If cache is unresolved, don't try to parse IIF and OIF */
2241        if (c->mf6c_parent >= MAXMIFS)
2242                return -ENOENT;
2243
2244        if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2245            nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2246                return -EMSGSIZE;
2247        mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2248        if (mp_attr == NULL)
2249                return -EMSGSIZE;
2250
2251        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2252                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2253                        nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2254                        if (nhp == NULL) {
2255                                nla_nest_cancel(skb, mp_attr);
2256                                return -EMSGSIZE;
2257                        }
2258
2259                        nhp->rtnh_flags = 0;
2260                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2261                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2262                        nhp->rtnh_len = sizeof(*nhp);
2263                }
2264        }
2265
2266        nla_nest_end(skb, mp_attr);
2267
2268        mfcs.mfcs_packets = c->mfc_un.res.pkt;
2269        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2270        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2271        if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2272                return -EMSGSIZE;
2273
2274        rtm->rtm_type = RTN_MULTICAST;
2275        return 1;
2276}
2277
2278int ip6mr_get_route(struct net *net,
2279                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2280{
2281        int err;
2282        struct mr6_table *mrt;
2283        struct mfc6_cache *cache;
2284        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2285
2286        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2287        if (mrt == NULL)
2288                return -ENOENT;
2289
2290        read_lock(&mrt_lock);
2291        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2292        if (!cache && skb->dev) {
2293                int vif = ip6mr_find_vif(mrt, skb->dev);
2294
2295                if (vif >= 0)
2296                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2297                                                     vif);
2298        }
2299
2300        if (!cache) {
2301                struct sk_buff *skb2;
2302                struct ipv6hdr *iph;
2303                struct net_device *dev;
2304                int vif;
2305
2306                if (nowait) {
2307                        read_unlock(&mrt_lock);
2308                        return -EAGAIN;
2309                }
2310
2311                dev = skb->dev;
2312                if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313                        read_unlock(&mrt_lock);
2314                        return -ENODEV;
2315                }
2316
2317                /* really correct? */
2318                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319                if (!skb2) {
2320                        read_unlock(&mrt_lock);
2321                        return -ENOMEM;
2322                }
2323
2324                skb_reset_transport_header(skb2);
2325
2326                skb_put(skb2, sizeof(struct ipv6hdr));
2327                skb_reset_network_header(skb2);
2328
2329                iph = ipv6_hdr(skb2);
2330                iph->version = 0;
2331                iph->priority = 0;
2332                iph->flow_lbl[0] = 0;
2333                iph->flow_lbl[1] = 0;
2334                iph->flow_lbl[2] = 0;
2335                iph->payload_len = 0;
2336                iph->nexthdr = IPPROTO_NONE;
2337                iph->hop_limit = 0;
2338                iph->saddr = rt->rt6i_src.addr;
2339                iph->daddr = rt->rt6i_dst.addr;
2340
2341                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2342                read_unlock(&mrt_lock);
2343
2344                return err;
2345        }
2346
2347        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2348                cache->mfc_flags |= MFC_NOTIFY;
2349
2350        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2351        read_unlock(&mrt_lock);
2352        return err;
2353}
2354
2355static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2356                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2357                             int flags)
2358{
2359        struct nlmsghdr *nlh;
2360        struct rtmsg *rtm;
2361        int err;
2362
2363        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2364        if (nlh == NULL)
2365                return -EMSGSIZE;
2366
2367        rtm = nlmsg_data(nlh);
2368        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2369        rtm->rtm_dst_len  = 128;
2370        rtm->rtm_src_len  = 128;
2371        rtm->rtm_tos      = 0;
2372        rtm->rtm_table    = mrt->id;
2373        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2374                goto nla_put_failure;
2375        rtm->rtm_type = RTN_MULTICAST;
2376        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2377        if (c->mfc_flags & MFC_STATIC)
2378                rtm->rtm_protocol = RTPROT_STATIC;
2379        else
2380                rtm->rtm_protocol = RTPROT_MROUTED;
2381        rtm->rtm_flags    = 0;
2382
2383        if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2384            nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2385                goto nla_put_failure;
2386        err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2387        /* do not break the dump if cache is unresolved */
2388        if (err < 0 && err != -ENOENT)
2389                goto nla_put_failure;
2390
2391        nlmsg_end(skb, nlh);
2392        return 0;
2393
2394nla_put_failure:
2395        nlmsg_cancel(skb, nlh);
2396        return -EMSGSIZE;
2397}
2398
2399static int mr6_msgsize(bool unresolved, int maxvif)
2400{
2401        size_t len =
2402                NLMSG_ALIGN(sizeof(struct rtmsg))
2403                + nla_total_size(4)     /* RTA_TABLE */
2404                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2405                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2406                ;
2407
2408        if (!unresolved)
2409                len = len
2410                      + nla_total_size(4)       /* RTA_IIF */
2411                      + nla_total_size(0)       /* RTA_MULTIPATH */
2412                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2413                                                /* RTA_MFC_STATS */
2414                      + nla_total_size(sizeof(struct rta_mfc_stats))
2415                ;
2416
2417        return len;
2418}
2419
2420static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2421                              int cmd)
2422{
2423        struct net *net = read_pnet(&mrt->net);
2424        struct sk_buff *skb;
2425        int err = -ENOBUFS;
2426
2427        skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2428                        GFP_ATOMIC);
2429        if (skb == NULL)
2430                goto errout;
2431
2432        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2433        if (err < 0)
2434                goto errout;
2435
2436        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2437        return;
2438
2439errout:
2440        kfree_skb(skb);
2441        if (err < 0)
2442                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443}
2444
2445static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446{
2447        struct net *net = sock_net(skb->sk);
2448        struct mr6_table *mrt;
2449        struct mfc6_cache *mfc;
2450        unsigned int t = 0, s_t;
2451        unsigned int h = 0, s_h;
2452        unsigned int e = 0, s_e;
2453
2454        s_t = cb->args[0];
2455        s_h = cb->args[1];
2456        s_e = cb->args[2];
2457
2458        read_lock(&mrt_lock);
2459        ip6mr_for_each_table(mrt, net) {
2460                if (t < s_t)
2461                        goto next_table;
2462                if (t > s_t)
2463                        s_h = 0;
2464                for (h = s_h; h < MFC6_LINES; h++) {
2465                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2466                                if (e < s_e)
2467                                        goto next_entry;
2468                                if (ip6mr_fill_mroute(mrt, skb,
2469                                                      NETLINK_CB(cb->skb).portid,
2470                                                      cb->nlh->nlmsg_seq,
2471                                                      mfc, RTM_NEWROUTE,
2472                                                      NLM_F_MULTI) < 0)
2473                                        goto done;
2474next_entry:
2475                                e++;
2476                        }
2477                        e = s_e = 0;
2478                }
2479                spin_lock_bh(&mfc_unres_lock);
2480                list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2481                        if (e < s_e)
2482                                goto next_entry2;
2483                        if (ip6mr_fill_mroute(mrt, skb,
2484                                              NETLINK_CB(cb->skb).portid,
2485                                              cb->nlh->nlmsg_seq,
2486                                              mfc, RTM_NEWROUTE,
2487                                              NLM_F_MULTI) < 0) {
2488                                spin_unlock_bh(&mfc_unres_lock);
2489                                goto done;
2490                        }
2491next_entry2:
2492                        e++;
2493                }
2494                spin_unlock_bh(&mfc_unres_lock);
2495                e = s_e = 0;
2496                s_h = 0;
2497next_table:
2498                t++;
2499        }
2500done:
2501        read_unlock(&mrt_lock);
2502
2503        cb->args[2] = e;
2504        cb->args[1] = h;
2505        cb->args[0] = t;
2506
2507        return skb->len;
2508}
2509