linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55#include <linux/netconf.h>
  56
  57struct mr6_table {
  58        struct list_head        list;
  59        possible_net_t          net;
  60        u32                     id;
  61        struct sock             *mroute6_sk;
  62        struct timer_list       ipmr_expire_timer;
  63        struct list_head        mfc6_unres_queue;
  64        struct list_head        mfc6_cache_array[MFC6_LINES];
  65        struct mif_device       vif6_table[MAXMIFS];
  66        int                     maxvif;
  67        atomic_t                cache_resolve_queue_len;
  68        bool                    mroute_do_assert;
  69        bool                    mroute_do_pim;
  70#ifdef CONFIG_IPV6_PIMSM_V2
  71        int                     mroute_reg_vif_num;
  72#endif
  73};
  74
  75struct ip6mr_rule {
  76        struct fib_rule         common;
  77};
  78
  79struct ip6mr_result {
  80        struct mr6_table        *mrt;
  81};
  82
  83/* Big lock, protecting vif table, mrt cache and mroute socket state.
  84   Note that the changes are semaphored via rtnl_lock.
  85 */
  86
  87static DEFINE_RWLOCK(mrt_lock);
  88
  89/*
  90 *      Multicast router control variables
  91 */
  92
  93#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  94
  95/* Special spinlock for queue of unresolved entries */
  96static DEFINE_SPINLOCK(mfc_unres_lock);
  97
  98/* We return to original Alan's scheme. Hash table of resolved
  99   entries is changed only in process context and protected
 100   with weak lock mrt_lock. Queue of unresolved entries is protected
 101   with strong spinlock mfc_unres_lock.
 102
 103   In this case data path is free of exclusive locks at all.
 104 */
 105
 106static struct kmem_cache *mrt_cachep __read_mostly;
 107
 108static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 109static void ip6mr_free_table(struct mr6_table *mrt);
 110
 111static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 112                           struct sk_buff *skb, struct mfc6_cache *cache);
 113static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 114                              mifi_t mifi, int assert);
 115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 116                               struct mfc6_cache *c, struct rtmsg *rtm);
 117static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 118                              int cmd);
 119static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 120                               struct netlink_callback *cb);
 121static void mroute_clean_tables(struct mr6_table *mrt);
 122static void ipmr_expire_process(unsigned long arg);
 123
 124#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 125#define ip6mr_for_each_table(mrt, net) \
 126        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 127
 128static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 129{
 130        struct mr6_table *mrt;
 131
 132        ip6mr_for_each_table(mrt, net) {
 133                if (mrt->id == id)
 134                        return mrt;
 135        }
 136        return NULL;
 137}
 138
 139static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 140                            struct mr6_table **mrt)
 141{
 142        int err;
 143        struct ip6mr_result res;
 144        struct fib_lookup_arg arg = {
 145                .result = &res,
 146                .flags = FIB_LOOKUP_NOREF,
 147        };
 148
 149        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 150                               flowi6_to_flowi(flp6), 0, &arg);
 151        if (err < 0)
 152                return err;
 153        *mrt = res.mrt;
 154        return 0;
 155}
 156
 157static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 158                             int flags, struct fib_lookup_arg *arg)
 159{
 160        struct ip6mr_result *res = arg->result;
 161        struct mr6_table *mrt;
 162
 163        switch (rule->action) {
 164        case FR_ACT_TO_TBL:
 165                break;
 166        case FR_ACT_UNREACHABLE:
 167                return -ENETUNREACH;
 168        case FR_ACT_PROHIBIT:
 169                return -EACCES;
 170        case FR_ACT_BLACKHOLE:
 171        default:
 172                return -EINVAL;
 173        }
 174
 175        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 176        if (!mrt)
 177                return -EAGAIN;
 178        res->mrt = mrt;
 179        return 0;
 180}
 181
 182static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 183{
 184        return 1;
 185}
 186
 187static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 188        FRA_GENERIC_POLICY,
 189};
 190
 191static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 192                                struct fib_rule_hdr *frh, struct nlattr **tb)
 193{
 194        return 0;
 195}
 196
 197static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 198                              struct nlattr **tb)
 199{
 200        return 1;
 201}
 202
 203static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 204                           struct fib_rule_hdr *frh)
 205{
 206        frh->dst_len = 0;
 207        frh->src_len = 0;
 208        frh->tos     = 0;
 209        return 0;
 210}
 211
 212static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 213        .family         = RTNL_FAMILY_IP6MR,
 214        .rule_size      = sizeof(struct ip6mr_rule),
 215        .addr_size      = sizeof(struct in6_addr),
 216        .action         = ip6mr_rule_action,
 217        .match          = ip6mr_rule_match,
 218        .configure      = ip6mr_rule_configure,
 219        .compare        = ip6mr_rule_compare,
 220        .default_pref   = fib_default_rule_pref,
 221        .fill           = ip6mr_rule_fill,
 222        .nlgroup        = RTNLGRP_IPV6_RULE,
 223        .policy         = ip6mr_rule_policy,
 224        .owner          = THIS_MODULE,
 225};
 226
 227static int __net_init ip6mr_rules_init(struct net *net)
 228{
 229        struct fib_rules_ops *ops;
 230        struct mr6_table *mrt;
 231        int err;
 232
 233        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 234        if (IS_ERR(ops))
 235                return PTR_ERR(ops);
 236
 237        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 238
 239        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 240        if (!mrt) {
 241                err = -ENOMEM;
 242                goto err1;
 243        }
 244
 245        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 246        if (err < 0)
 247                goto err2;
 248
 249        net->ipv6.mr6_rules_ops = ops;
 250        return 0;
 251
 252err2:
 253        ip6mr_free_table(mrt);
 254err1:
 255        fib_rules_unregister(ops);
 256        return err;
 257}
 258
 259static void __net_exit ip6mr_rules_exit(struct net *net)
 260{
 261        struct mr6_table *mrt, *next;
 262
 263        rtnl_lock();
 264        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 265                list_del(&mrt->list);
 266                ip6mr_free_table(mrt);
 267        }
 268        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 269        rtnl_unlock();
 270}
 271#else
 272#define ip6mr_for_each_table(mrt, net) \
 273        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 274
 275static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 276{
 277        return net->ipv6.mrt6;
 278}
 279
 280static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 281                            struct mr6_table **mrt)
 282{
 283        *mrt = net->ipv6.mrt6;
 284        return 0;
 285}
 286
 287static int __net_init ip6mr_rules_init(struct net *net)
 288{
 289        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 290        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 291}
 292
 293static void __net_exit ip6mr_rules_exit(struct net *net)
 294{
 295        rtnl_lock();
 296        ip6mr_free_table(net->ipv6.mrt6);
 297        net->ipv6.mrt6 = NULL;
 298        rtnl_unlock();
 299}
 300#endif
 301
 302static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 303{
 304        struct mr6_table *mrt;
 305        unsigned int i;
 306
 307        mrt = ip6mr_get_table(net, id);
 308        if (mrt)
 309                return mrt;
 310
 311        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 312        if (!mrt)
 313                return NULL;
 314        mrt->id = id;
 315        write_pnet(&mrt->net, net);
 316
 317        /* Forwarding cache */
 318        for (i = 0; i < MFC6_LINES; i++)
 319                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 320
 321        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 322
 323        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 324                    (unsigned long)mrt);
 325
 326#ifdef CONFIG_IPV6_PIMSM_V2
 327        mrt->mroute_reg_vif_num = -1;
 328#endif
 329#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 330        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 331#endif
 332        return mrt;
 333}
 334
 335static void ip6mr_free_table(struct mr6_table *mrt)
 336{
 337        del_timer_sync(&mrt->ipmr_expire_timer);
 338        mroute_clean_tables(mrt);
 339        kfree(mrt);
 340}
 341
 342#ifdef CONFIG_PROC_FS
 343
 344struct ipmr_mfc_iter {
 345        struct seq_net_private p;
 346        struct mr6_table *mrt;
 347        struct list_head *cache;
 348        int ct;
 349};
 350
 351
 352static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 353                                           struct ipmr_mfc_iter *it, loff_t pos)
 354{
 355        struct mr6_table *mrt = it->mrt;
 356        struct mfc6_cache *mfc;
 357
 358        read_lock(&mrt_lock);
 359        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 360                it->cache = &mrt->mfc6_cache_array[it->ct];
 361                list_for_each_entry(mfc, it->cache, list)
 362                        if (pos-- == 0)
 363                                return mfc;
 364        }
 365        read_unlock(&mrt_lock);
 366
 367        spin_lock_bh(&mfc_unres_lock);
 368        it->cache = &mrt->mfc6_unres_queue;
 369        list_for_each_entry(mfc, it->cache, list)
 370                if (pos-- == 0)
 371                        return mfc;
 372        spin_unlock_bh(&mfc_unres_lock);
 373
 374        it->cache = NULL;
 375        return NULL;
 376}
 377
 378/*
 379 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 380 */
 381
 382struct ipmr_vif_iter {
 383        struct seq_net_private p;
 384        struct mr6_table *mrt;
 385        int ct;
 386};
 387
 388static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 389                                            struct ipmr_vif_iter *iter,
 390                                            loff_t pos)
 391{
 392        struct mr6_table *mrt = iter->mrt;
 393
 394        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 395                if (!MIF_EXISTS(mrt, iter->ct))
 396                        continue;
 397                if (pos-- == 0)
 398                        return &mrt->vif6_table[iter->ct];
 399        }
 400        return NULL;
 401}
 402
 403static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 404        __acquires(mrt_lock)
 405{
 406        struct ipmr_vif_iter *iter = seq->private;
 407        struct net *net = seq_file_net(seq);
 408        struct mr6_table *mrt;
 409
 410        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 411        if (!mrt)
 412                return ERR_PTR(-ENOENT);
 413
 414        iter->mrt = mrt;
 415
 416        read_lock(&mrt_lock);
 417        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 418                : SEQ_START_TOKEN;
 419}
 420
 421static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 422{
 423        struct ipmr_vif_iter *iter = seq->private;
 424        struct net *net = seq_file_net(seq);
 425        struct mr6_table *mrt = iter->mrt;
 426
 427        ++*pos;
 428        if (v == SEQ_START_TOKEN)
 429                return ip6mr_vif_seq_idx(net, iter, 0);
 430
 431        while (++iter->ct < mrt->maxvif) {
 432                if (!MIF_EXISTS(mrt, iter->ct))
 433                        continue;
 434                return &mrt->vif6_table[iter->ct];
 435        }
 436        return NULL;
 437}
 438
 439static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 440        __releases(mrt_lock)
 441{
 442        read_unlock(&mrt_lock);
 443}
 444
 445static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 446{
 447        struct ipmr_vif_iter *iter = seq->private;
 448        struct mr6_table *mrt = iter->mrt;
 449
 450        if (v == SEQ_START_TOKEN) {
 451                seq_puts(seq,
 452                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 453        } else {
 454                const struct mif_device *vif = v;
 455                const char *name = vif->dev ? vif->dev->name : "none";
 456
 457                seq_printf(seq,
 458                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 459                           vif - mrt->vif6_table,
 460                           name, vif->bytes_in, vif->pkt_in,
 461                           vif->bytes_out, vif->pkt_out,
 462                           vif->flags);
 463        }
 464        return 0;
 465}
 466
 467static const struct seq_operations ip6mr_vif_seq_ops = {
 468        .start = ip6mr_vif_seq_start,
 469        .next  = ip6mr_vif_seq_next,
 470        .stop  = ip6mr_vif_seq_stop,
 471        .show  = ip6mr_vif_seq_show,
 472};
 473
 474static int ip6mr_vif_open(struct inode *inode, struct file *file)
 475{
 476        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 477                            sizeof(struct ipmr_vif_iter));
 478}
 479
 480static const struct file_operations ip6mr_vif_fops = {
 481        .owner   = THIS_MODULE,
 482        .open    = ip6mr_vif_open,
 483        .read    = seq_read,
 484        .llseek  = seq_lseek,
 485        .release = seq_release_net,
 486};
 487
 488static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 489{
 490        struct ipmr_mfc_iter *it = seq->private;
 491        struct net *net = seq_file_net(seq);
 492        struct mr6_table *mrt;
 493
 494        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 495        if (!mrt)
 496                return ERR_PTR(-ENOENT);
 497
 498        it->mrt = mrt;
 499        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 500                : SEQ_START_TOKEN;
 501}
 502
 503static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 504{
 505        struct mfc6_cache *mfc = v;
 506        struct ipmr_mfc_iter *it = seq->private;
 507        struct net *net = seq_file_net(seq);
 508        struct mr6_table *mrt = it->mrt;
 509
 510        ++*pos;
 511
 512        if (v == SEQ_START_TOKEN)
 513                return ipmr_mfc_seq_idx(net, seq->private, 0);
 514
 515        if (mfc->list.next != it->cache)
 516                return list_entry(mfc->list.next, struct mfc6_cache, list);
 517
 518        if (it->cache == &mrt->mfc6_unres_queue)
 519                goto end_of_list;
 520
 521        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 522
 523        while (++it->ct < MFC6_LINES) {
 524                it->cache = &mrt->mfc6_cache_array[it->ct];
 525                if (list_empty(it->cache))
 526                        continue;
 527                return list_first_entry(it->cache, struct mfc6_cache, list);
 528        }
 529
 530        /* exhausted cache_array, show unresolved */
 531        read_unlock(&mrt_lock);
 532        it->cache = &mrt->mfc6_unres_queue;
 533        it->ct = 0;
 534
 535        spin_lock_bh(&mfc_unres_lock);
 536        if (!list_empty(it->cache))
 537                return list_first_entry(it->cache, struct mfc6_cache, list);
 538
 539 end_of_list:
 540        spin_unlock_bh(&mfc_unres_lock);
 541        it->cache = NULL;
 542
 543        return NULL;
 544}
 545
 546static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 547{
 548        struct ipmr_mfc_iter *it = seq->private;
 549        struct mr6_table *mrt = it->mrt;
 550
 551        if (it->cache == &mrt->mfc6_unres_queue)
 552                spin_unlock_bh(&mfc_unres_lock);
 553        else if (it->cache == mrt->mfc6_cache_array)
 554                read_unlock(&mrt_lock);
 555}
 556
 557static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 558{
 559        int n;
 560
 561        if (v == SEQ_START_TOKEN) {
 562                seq_puts(seq,
 563                         "Group                            "
 564                         "Origin                           "
 565                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 566        } else {
 567                const struct mfc6_cache *mfc = v;
 568                const struct ipmr_mfc_iter *it = seq->private;
 569                struct mr6_table *mrt = it->mrt;
 570
 571                seq_printf(seq, "%pI6 %pI6 %-3hd",
 572                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 573                           mfc->mf6c_parent);
 574
 575                if (it->cache != &mrt->mfc6_unres_queue) {
 576                        seq_printf(seq, " %8lu %8lu %8lu",
 577                                   mfc->mfc_un.res.pkt,
 578                                   mfc->mfc_un.res.bytes,
 579                                   mfc->mfc_un.res.wrong_if);
 580                        for (n = mfc->mfc_un.res.minvif;
 581                             n < mfc->mfc_un.res.maxvif; n++) {
 582                                if (MIF_EXISTS(mrt, n) &&
 583                                    mfc->mfc_un.res.ttls[n] < 255)
 584                                        seq_printf(seq,
 585                                                   " %2d:%-3d",
 586                                                   n, mfc->mfc_un.res.ttls[n]);
 587                        }
 588                } else {
 589                        /* unresolved mfc_caches don't contain
 590                         * pkt, bytes and wrong_if values
 591                         */
 592                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 593                }
 594                seq_putc(seq, '\n');
 595        }
 596        return 0;
 597}
 598
 599static const struct seq_operations ipmr_mfc_seq_ops = {
 600        .start = ipmr_mfc_seq_start,
 601        .next  = ipmr_mfc_seq_next,
 602        .stop  = ipmr_mfc_seq_stop,
 603        .show  = ipmr_mfc_seq_show,
 604};
 605
 606static int ipmr_mfc_open(struct inode *inode, struct file *file)
 607{
 608        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 609                            sizeof(struct ipmr_mfc_iter));
 610}
 611
 612static const struct file_operations ip6mr_mfc_fops = {
 613        .owner   = THIS_MODULE,
 614        .open    = ipmr_mfc_open,
 615        .read    = seq_read,
 616        .llseek  = seq_lseek,
 617        .release = seq_release_net,
 618};
 619#endif
 620
 621#ifdef CONFIG_IPV6_PIMSM_V2
 622
 623static int pim6_rcv(struct sk_buff *skb)
 624{
 625        struct pimreghdr *pim;
 626        struct ipv6hdr   *encap;
 627        struct net_device  *reg_dev = NULL;
 628        struct net *net = dev_net(skb->dev);
 629        struct mr6_table *mrt;
 630        struct flowi6 fl6 = {
 631                .flowi6_iif     = skb->dev->ifindex,
 632                .flowi6_mark    = skb->mark,
 633        };
 634        int reg_vif_num;
 635
 636        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 637                goto drop;
 638
 639        pim = (struct pimreghdr *)skb_transport_header(skb);
 640        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 641            (pim->flags & PIM_NULL_REGISTER) ||
 642            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 643                             sizeof(*pim), IPPROTO_PIM,
 644                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 645             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 646                goto drop;
 647
 648        /* check if the inner packet is destined to mcast group */
 649        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 650                                   sizeof(*pim));
 651
 652        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 653            encap->payload_len == 0 ||
 654            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 655                goto drop;
 656
 657        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 658                goto drop;
 659        reg_vif_num = mrt->mroute_reg_vif_num;
 660
 661        read_lock(&mrt_lock);
 662        if (reg_vif_num >= 0)
 663                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 664        if (reg_dev)
 665                dev_hold(reg_dev);
 666        read_unlock(&mrt_lock);
 667
 668        if (!reg_dev)
 669                goto drop;
 670
 671        skb->mac_header = skb->network_header;
 672        skb_pull(skb, (u8 *)encap - skb->data);
 673        skb_reset_network_header(skb);
 674        skb->protocol = htons(ETH_P_IPV6);
 675        skb->ip_summed = CHECKSUM_NONE;
 676
 677        skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
 678
 679        netif_rx(skb);
 680
 681        dev_put(reg_dev);
 682        return 0;
 683 drop:
 684        kfree_skb(skb);
 685        return 0;
 686}
 687
 688static const struct inet6_protocol pim6_protocol = {
 689        .handler        =       pim6_rcv,
 690};
 691
 692/* Service routines creating virtual interfaces: PIMREG */
 693
 694static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 695                                      struct net_device *dev)
 696{
 697        struct net *net = dev_net(dev);
 698        struct mr6_table *mrt;
 699        struct flowi6 fl6 = {
 700                .flowi6_oif     = dev->ifindex,
 701                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
 702                .flowi6_mark    = skb->mark,
 703        };
 704        int err;
 705
 706        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 707        if (err < 0) {
 708                kfree_skb(skb);
 709                return err;
 710        }
 711
 712        read_lock(&mrt_lock);
 713        dev->stats.tx_bytes += skb->len;
 714        dev->stats.tx_packets++;
 715        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 716        read_unlock(&mrt_lock);
 717        kfree_skb(skb);
 718        return NETDEV_TX_OK;
 719}
 720
 721static int reg_vif_get_iflink(const struct net_device *dev)
 722{
 723        return 0;
 724}
 725
 726static const struct net_device_ops reg_vif_netdev_ops = {
 727        .ndo_start_xmit = reg_vif_xmit,
 728        .ndo_get_iflink = reg_vif_get_iflink,
 729};
 730
 731static void reg_vif_setup(struct net_device *dev)
 732{
 733        dev->type               = ARPHRD_PIMREG;
 734        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 735        dev->flags              = IFF_NOARP;
 736        dev->netdev_ops         = &reg_vif_netdev_ops;
 737        dev->destructor         = free_netdev;
 738        dev->features           |= NETIF_F_NETNS_LOCAL;
 739}
 740
 741static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 742{
 743        struct net_device *dev;
 744        char name[IFNAMSIZ];
 745
 746        if (mrt->id == RT6_TABLE_DFLT)
 747                sprintf(name, "pim6reg");
 748        else
 749                sprintf(name, "pim6reg%u", mrt->id);
 750
 751        dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
 752        if (!dev)
 753                return NULL;
 754
 755        dev_net_set(dev, net);
 756
 757        if (register_netdevice(dev)) {
 758                free_netdev(dev);
 759                return NULL;
 760        }
 761
 762        if (dev_open(dev))
 763                goto failure;
 764
 765        dev_hold(dev);
 766        return dev;
 767
 768failure:
 769        /* allow the register to be completed before unregistering. */
 770        rtnl_unlock();
 771        rtnl_lock();
 772
 773        unregister_netdevice(dev);
 774        return NULL;
 775}
 776#endif
 777
 778/*
 779 *      Delete a VIF entry
 780 */
 781
 782static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 783{
 784        struct mif_device *v;
 785        struct net_device *dev;
 786        struct inet6_dev *in6_dev;
 787
 788        if (vifi < 0 || vifi >= mrt->maxvif)
 789                return -EADDRNOTAVAIL;
 790
 791        v = &mrt->vif6_table[vifi];
 792
 793        write_lock_bh(&mrt_lock);
 794        dev = v->dev;
 795        v->dev = NULL;
 796
 797        if (!dev) {
 798                write_unlock_bh(&mrt_lock);
 799                return -EADDRNOTAVAIL;
 800        }
 801
 802#ifdef CONFIG_IPV6_PIMSM_V2
 803        if (vifi == mrt->mroute_reg_vif_num)
 804                mrt->mroute_reg_vif_num = -1;
 805#endif
 806
 807        if (vifi + 1 == mrt->maxvif) {
 808                int tmp;
 809                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 810                        if (MIF_EXISTS(mrt, tmp))
 811                                break;
 812                }
 813                mrt->maxvif = tmp + 1;
 814        }
 815
 816        write_unlock_bh(&mrt_lock);
 817
 818        dev_set_allmulti(dev, -1);
 819
 820        in6_dev = __in6_dev_get(dev);
 821        if (in6_dev) {
 822                in6_dev->cnf.mc_forwarding--;
 823                inet6_netconf_notify_devconf(dev_net(dev),
 824                                             NETCONFA_MC_FORWARDING,
 825                                             dev->ifindex, &in6_dev->cnf);
 826        }
 827
 828        if (v->flags & MIFF_REGISTER)
 829                unregister_netdevice_queue(dev, head);
 830
 831        dev_put(dev);
 832        return 0;
 833}
 834
 835static inline void ip6mr_cache_free(struct mfc6_cache *c)
 836{
 837        kmem_cache_free(mrt_cachep, c);
 838}
 839
 840/* Destroy an unresolved cache entry, killing queued skbs
 841   and reporting error to netlink readers.
 842 */
 843
 844static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 845{
 846        struct net *net = read_pnet(&mrt->net);
 847        struct sk_buff *skb;
 848
 849        atomic_dec(&mrt->cache_resolve_queue_len);
 850
 851        while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 852                if (ipv6_hdr(skb)->version == 0) {
 853                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 854                        nlh->nlmsg_type = NLMSG_ERROR;
 855                        nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
 856                        skb_trim(skb, nlh->nlmsg_len);
 857                        ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
 858                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 859                } else
 860                        kfree_skb(skb);
 861        }
 862
 863        ip6mr_cache_free(c);
 864}
 865
 866
 867/* Timer process for all the unresolved queue. */
 868
 869static void ipmr_do_expire_process(struct mr6_table *mrt)
 870{
 871        unsigned long now = jiffies;
 872        unsigned long expires = 10 * HZ;
 873        struct mfc6_cache *c, *next;
 874
 875        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 876                if (time_after(c->mfc_un.unres.expires, now)) {
 877                        /* not yet... */
 878                        unsigned long interval = c->mfc_un.unres.expires - now;
 879                        if (interval < expires)
 880                                expires = interval;
 881                        continue;
 882                }
 883
 884                list_del(&c->list);
 885                mr6_netlink_event(mrt, c, RTM_DELROUTE);
 886                ip6mr_destroy_unres(mrt, c);
 887        }
 888
 889        if (!list_empty(&mrt->mfc6_unres_queue))
 890                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 891}
 892
 893static void ipmr_expire_process(unsigned long arg)
 894{
 895        struct mr6_table *mrt = (struct mr6_table *)arg;
 896
 897        if (!spin_trylock(&mfc_unres_lock)) {
 898                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 899                return;
 900        }
 901
 902        if (!list_empty(&mrt->mfc6_unres_queue))
 903                ipmr_do_expire_process(mrt);
 904
 905        spin_unlock(&mfc_unres_lock);
 906}
 907
 908/* Fill oifs list. It is called under write locked mrt_lock. */
 909
 910static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 911                                    unsigned char *ttls)
 912{
 913        int vifi;
 914
 915        cache->mfc_un.res.minvif = MAXMIFS;
 916        cache->mfc_un.res.maxvif = 0;
 917        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 918
 919        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 920                if (MIF_EXISTS(mrt, vifi) &&
 921                    ttls[vifi] && ttls[vifi] < 255) {
 922                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 923                        if (cache->mfc_un.res.minvif > vifi)
 924                                cache->mfc_un.res.minvif = vifi;
 925                        if (cache->mfc_un.res.maxvif <= vifi)
 926                                cache->mfc_un.res.maxvif = vifi + 1;
 927                }
 928        }
 929}
 930
 931static int mif6_add(struct net *net, struct mr6_table *mrt,
 932                    struct mif6ctl *vifc, int mrtsock)
 933{
 934        int vifi = vifc->mif6c_mifi;
 935        struct mif_device *v = &mrt->vif6_table[vifi];
 936        struct net_device *dev;
 937        struct inet6_dev *in6_dev;
 938        int err;
 939
 940        /* Is vif busy ? */
 941        if (MIF_EXISTS(mrt, vifi))
 942                return -EADDRINUSE;
 943
 944        switch (vifc->mif6c_flags) {
 945#ifdef CONFIG_IPV6_PIMSM_V2
 946        case MIFF_REGISTER:
 947                /*
 948                 * Special Purpose VIF in PIM
 949                 * All the packets will be sent to the daemon
 950                 */
 951                if (mrt->mroute_reg_vif_num >= 0)
 952                        return -EADDRINUSE;
 953                dev = ip6mr_reg_vif(net, mrt);
 954                if (!dev)
 955                        return -ENOBUFS;
 956                err = dev_set_allmulti(dev, 1);
 957                if (err) {
 958                        unregister_netdevice(dev);
 959                        dev_put(dev);
 960                        return err;
 961                }
 962                break;
 963#endif
 964        case 0:
 965                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 966                if (!dev)
 967                        return -EADDRNOTAVAIL;
 968                err = dev_set_allmulti(dev, 1);
 969                if (err) {
 970                        dev_put(dev);
 971                        return err;
 972                }
 973                break;
 974        default:
 975                return -EINVAL;
 976        }
 977
 978        in6_dev = __in6_dev_get(dev);
 979        if (in6_dev) {
 980                in6_dev->cnf.mc_forwarding++;
 981                inet6_netconf_notify_devconf(dev_net(dev),
 982                                             NETCONFA_MC_FORWARDING,
 983                                             dev->ifindex, &in6_dev->cnf);
 984        }
 985
 986        /*
 987         *      Fill in the VIF structures
 988         */
 989        v->rate_limit = vifc->vifc_rate_limit;
 990        v->flags = vifc->mif6c_flags;
 991        if (!mrtsock)
 992                v->flags |= VIFF_STATIC;
 993        v->threshold = vifc->vifc_threshold;
 994        v->bytes_in = 0;
 995        v->bytes_out = 0;
 996        v->pkt_in = 0;
 997        v->pkt_out = 0;
 998        v->link = dev->ifindex;
 999        if (v->flags & MIFF_REGISTER)
1000                v->link = dev_get_iflink(dev);
1001
1002        /* And finish update writing critical data */
1003        write_lock_bh(&mrt_lock);
1004        v->dev = dev;
1005#ifdef CONFIG_IPV6_PIMSM_V2
1006        if (v->flags & MIFF_REGISTER)
1007                mrt->mroute_reg_vif_num = vifi;
1008#endif
1009        if (vifi + 1 > mrt->maxvif)
1010                mrt->maxvif = vifi + 1;
1011        write_unlock_bh(&mrt_lock);
1012        return 0;
1013}
1014
1015static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1016                                           const struct in6_addr *origin,
1017                                           const struct in6_addr *mcastgrp)
1018{
1019        int line = MFC6_HASH(mcastgrp, origin);
1020        struct mfc6_cache *c;
1021
1022        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1023                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1024                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1025                        return c;
1026        }
1027        return NULL;
1028}
1029
1030/* Look for a (*,*,oif) entry */
1031static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1032                                                      mifi_t mifi)
1033{
1034        int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1035        struct mfc6_cache *c;
1036
1037        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1038                if (ipv6_addr_any(&c->mf6c_origin) &&
1039                    ipv6_addr_any(&c->mf6c_mcastgrp) &&
1040                    (c->mfc_un.res.ttls[mifi] < 255))
1041                        return c;
1042
1043        return NULL;
1044}
1045
1046/* Look for a (*,G) entry */
1047static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1048                                               struct in6_addr *mcastgrp,
1049                                               mifi_t mifi)
1050{
1051        int line = MFC6_HASH(mcastgrp, &in6addr_any);
1052        struct mfc6_cache *c, *proxy;
1053
1054        if (ipv6_addr_any(mcastgrp))
1055                goto skip;
1056
1057        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1058                if (ipv6_addr_any(&c->mf6c_origin) &&
1059                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1060                        if (c->mfc_un.res.ttls[mifi] < 255)
1061                                return c;
1062
1063                        /* It's ok if the mifi is part of the static tree */
1064                        proxy = ip6mr_cache_find_any_parent(mrt,
1065                                                            c->mf6c_parent);
1066                        if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1067                                return c;
1068                }
1069
1070skip:
1071        return ip6mr_cache_find_any_parent(mrt, mifi);
1072}
1073
1074/*
1075 *      Allocate a multicast cache entry
1076 */
1077static struct mfc6_cache *ip6mr_cache_alloc(void)
1078{
1079        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1080        if (!c)
1081                return NULL;
1082        c->mfc_un.res.minvif = MAXMIFS;
1083        return c;
1084}
1085
1086static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1087{
1088        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1089        if (!c)
1090                return NULL;
1091        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1092        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1093        return c;
1094}
1095
1096/*
1097 *      A cache entry has gone into a resolved state from queued
1098 */
1099
1100static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1101                                struct mfc6_cache *uc, struct mfc6_cache *c)
1102{
1103        struct sk_buff *skb;
1104
1105        /*
1106         *      Play the pending entries through our router
1107         */
1108
1109        while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1110                if (ipv6_hdr(skb)->version == 0) {
1111                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1112
1113                        if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1114                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1115                        } else {
1116                                nlh->nlmsg_type = NLMSG_ERROR;
1117                                nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1118                                skb_trim(skb, nlh->nlmsg_len);
1119                                ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1120                        }
1121                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1122                } else
1123                        ip6_mr_forward(net, mrt, skb, c);
1124        }
1125}
1126
1127/*
1128 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1129 *      expects the following bizarre scheme.
1130 *
1131 *      Called under mrt_lock.
1132 */
1133
1134static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1135                              mifi_t mifi, int assert)
1136{
1137        struct sk_buff *skb;
1138        struct mrt6msg *msg;
1139        int ret;
1140
1141#ifdef CONFIG_IPV6_PIMSM_V2
1142        if (assert == MRT6MSG_WHOLEPKT)
1143                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1144                                                +sizeof(*msg));
1145        else
1146#endif
1147                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1148
1149        if (!skb)
1150                return -ENOBUFS;
1151
1152        /* I suppose that internal messages
1153         * do not require checksums */
1154
1155        skb->ip_summed = CHECKSUM_UNNECESSARY;
1156
1157#ifdef CONFIG_IPV6_PIMSM_V2
1158        if (assert == MRT6MSG_WHOLEPKT) {
1159                /* Ugly, but we have no choice with this interface.
1160                   Duplicate old header, fix length etc.
1161                   And all this only to mangle msg->im6_msgtype and
1162                   to set msg->im6_mbz to "mbz" :-)
1163                 */
1164                skb_push(skb, -skb_network_offset(pkt));
1165
1166                skb_push(skb, sizeof(*msg));
1167                skb_reset_transport_header(skb);
1168                msg = (struct mrt6msg *)skb_transport_header(skb);
1169                msg->im6_mbz = 0;
1170                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1171                msg->im6_mif = mrt->mroute_reg_vif_num;
1172                msg->im6_pad = 0;
1173                msg->im6_src = ipv6_hdr(pkt)->saddr;
1174                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1175
1176                skb->ip_summed = CHECKSUM_UNNECESSARY;
1177        } else
1178#endif
1179        {
1180        /*
1181         *      Copy the IP header
1182         */
1183
1184        skb_put(skb, sizeof(struct ipv6hdr));
1185        skb_reset_network_header(skb);
1186        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1187
1188        /*
1189         *      Add our header
1190         */
1191        skb_put(skb, sizeof(*msg));
1192        skb_reset_transport_header(skb);
1193        msg = (struct mrt6msg *)skb_transport_header(skb);
1194
1195        msg->im6_mbz = 0;
1196        msg->im6_msgtype = assert;
1197        msg->im6_mif = mifi;
1198        msg->im6_pad = 0;
1199        msg->im6_src = ipv6_hdr(pkt)->saddr;
1200        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1201
1202        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1203        skb->ip_summed = CHECKSUM_UNNECESSARY;
1204        }
1205
1206        if (!mrt->mroute6_sk) {
1207                kfree_skb(skb);
1208                return -EINVAL;
1209        }
1210
1211        /*
1212         *      Deliver to user space multicast routing algorithms
1213         */
1214        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1215        if (ret < 0) {
1216                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1217                kfree_skb(skb);
1218        }
1219
1220        return ret;
1221}
1222
1223/*
1224 *      Queue a packet for resolution. It gets locked cache entry!
1225 */
1226
1227static int
1228ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1229{
1230        bool found = false;
1231        int err;
1232        struct mfc6_cache *c;
1233
1234        spin_lock_bh(&mfc_unres_lock);
1235        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1236                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1237                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1238                        found = true;
1239                        break;
1240                }
1241        }
1242
1243        if (!found) {
1244                /*
1245                 *      Create a new entry if allowable
1246                 */
1247
1248                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1249                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1250                        spin_unlock_bh(&mfc_unres_lock);
1251
1252                        kfree_skb(skb);
1253                        return -ENOBUFS;
1254                }
1255
1256                /*
1257                 *      Fill in the new cache entry
1258                 */
1259                c->mf6c_parent = -1;
1260                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1261                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1262
1263                /*
1264                 *      Reflect first query at pim6sd
1265                 */
1266                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1267                if (err < 0) {
1268                        /* If the report failed throw the cache entry
1269                           out - Brad Parker
1270                         */
1271                        spin_unlock_bh(&mfc_unres_lock);
1272
1273                        ip6mr_cache_free(c);
1274                        kfree_skb(skb);
1275                        return err;
1276                }
1277
1278                atomic_inc(&mrt->cache_resolve_queue_len);
1279                list_add(&c->list, &mrt->mfc6_unres_queue);
1280                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1281
1282                ipmr_do_expire_process(mrt);
1283        }
1284
1285        /*
1286         *      See if we can append the packet
1287         */
1288        if (c->mfc_un.unres.unresolved.qlen > 3) {
1289                kfree_skb(skb);
1290                err = -ENOBUFS;
1291        } else {
1292                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1293                err = 0;
1294        }
1295
1296        spin_unlock_bh(&mfc_unres_lock);
1297        return err;
1298}
1299
1300/*
1301 *      MFC6 cache manipulation by user space
1302 */
1303
1304static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1305                            int parent)
1306{
1307        int line;
1308        struct mfc6_cache *c, *next;
1309
1310        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1311
1312        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1313                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1314                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1315                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1316                    (parent == -1 || parent == c->mf6c_parent)) {
1317                        write_lock_bh(&mrt_lock);
1318                        list_del(&c->list);
1319                        write_unlock_bh(&mrt_lock);
1320
1321                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1322                        ip6mr_cache_free(c);
1323                        return 0;
1324                }
1325        }
1326        return -ENOENT;
1327}
1328
1329static int ip6mr_device_event(struct notifier_block *this,
1330                              unsigned long event, void *ptr)
1331{
1332        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1333        struct net *net = dev_net(dev);
1334        struct mr6_table *mrt;
1335        struct mif_device *v;
1336        int ct;
1337        LIST_HEAD(list);
1338
1339        if (event != NETDEV_UNREGISTER)
1340                return NOTIFY_DONE;
1341
1342        ip6mr_for_each_table(mrt, net) {
1343                v = &mrt->vif6_table[0];
1344                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1345                        if (v->dev == dev)
1346                                mif6_delete(mrt, ct, &list);
1347                }
1348        }
1349        unregister_netdevice_many(&list);
1350
1351        return NOTIFY_DONE;
1352}
1353
1354static struct notifier_block ip6_mr_notifier = {
1355        .notifier_call = ip6mr_device_event
1356};
1357
1358/*
1359 *      Setup for IP multicast routing
1360 */
1361
1362static int __net_init ip6mr_net_init(struct net *net)
1363{
1364        int err;
1365
1366        err = ip6mr_rules_init(net);
1367        if (err < 0)
1368                goto fail;
1369
1370#ifdef CONFIG_PROC_FS
1371        err = -ENOMEM;
1372        if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1373                goto proc_vif_fail;
1374        if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1375                goto proc_cache_fail;
1376#endif
1377
1378        return 0;
1379
1380#ifdef CONFIG_PROC_FS
1381proc_cache_fail:
1382        remove_proc_entry("ip6_mr_vif", net->proc_net);
1383proc_vif_fail:
1384        ip6mr_rules_exit(net);
1385#endif
1386fail:
1387        return err;
1388}
1389
1390static void __net_exit ip6mr_net_exit(struct net *net)
1391{
1392#ifdef CONFIG_PROC_FS
1393        remove_proc_entry("ip6_mr_cache", net->proc_net);
1394        remove_proc_entry("ip6_mr_vif", net->proc_net);
1395#endif
1396        ip6mr_rules_exit(net);
1397}
1398
1399static struct pernet_operations ip6mr_net_ops = {
1400        .init = ip6mr_net_init,
1401        .exit = ip6mr_net_exit,
1402};
1403
1404int __init ip6_mr_init(void)
1405{
1406        int err;
1407
1408        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1409                                       sizeof(struct mfc6_cache),
1410                                       0, SLAB_HWCACHE_ALIGN,
1411                                       NULL);
1412        if (!mrt_cachep)
1413                return -ENOMEM;
1414
1415        err = register_pernet_subsys(&ip6mr_net_ops);
1416        if (err)
1417                goto reg_pernet_fail;
1418
1419        err = register_netdevice_notifier(&ip6_mr_notifier);
1420        if (err)
1421                goto reg_notif_fail;
1422#ifdef CONFIG_IPV6_PIMSM_V2
1423        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1424                pr_err("%s: can't add PIM protocol\n", __func__);
1425                err = -EAGAIN;
1426                goto add_proto_fail;
1427        }
1428#endif
1429        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1430                      ip6mr_rtm_dumproute, NULL);
1431        return 0;
1432#ifdef CONFIG_IPV6_PIMSM_V2
1433add_proto_fail:
1434        unregister_netdevice_notifier(&ip6_mr_notifier);
1435#endif
1436reg_notif_fail:
1437        unregister_pernet_subsys(&ip6mr_net_ops);
1438reg_pernet_fail:
1439        kmem_cache_destroy(mrt_cachep);
1440        return err;
1441}
1442
1443void ip6_mr_cleanup(void)
1444{
1445        rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1446#ifdef CONFIG_IPV6_PIMSM_V2
1447        inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1448#endif
1449        unregister_netdevice_notifier(&ip6_mr_notifier);
1450        unregister_pernet_subsys(&ip6mr_net_ops);
1451        kmem_cache_destroy(mrt_cachep);
1452}
1453
1454static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1455                         struct mf6cctl *mfc, int mrtsock, int parent)
1456{
1457        bool found = false;
1458        int line;
1459        struct mfc6_cache *uc, *c;
1460        unsigned char ttls[MAXMIFS];
1461        int i;
1462
1463        if (mfc->mf6cc_parent >= MAXMIFS)
1464                return -ENFILE;
1465
1466        memset(ttls, 255, MAXMIFS);
1467        for (i = 0; i < MAXMIFS; i++) {
1468                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1469                        ttls[i] = 1;
1470
1471        }
1472
1473        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1474
1475        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1476                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1477                    ipv6_addr_equal(&c->mf6c_mcastgrp,
1478                                    &mfc->mf6cc_mcastgrp.sin6_addr) &&
1479                    (parent == -1 || parent == mfc->mf6cc_parent)) {
1480                        found = true;
1481                        break;
1482                }
1483        }
1484
1485        if (found) {
1486                write_lock_bh(&mrt_lock);
1487                c->mf6c_parent = mfc->mf6cc_parent;
1488                ip6mr_update_thresholds(mrt, c, ttls);
1489                if (!mrtsock)
1490                        c->mfc_flags |= MFC_STATIC;
1491                write_unlock_bh(&mrt_lock);
1492                mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1493                return 0;
1494        }
1495
1496        if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1497            !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1498                return -EINVAL;
1499
1500        c = ip6mr_cache_alloc();
1501        if (!c)
1502                return -ENOMEM;
1503
1504        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1505        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1506        c->mf6c_parent = mfc->mf6cc_parent;
1507        ip6mr_update_thresholds(mrt, c, ttls);
1508        if (!mrtsock)
1509                c->mfc_flags |= MFC_STATIC;
1510
1511        write_lock_bh(&mrt_lock);
1512        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1513        write_unlock_bh(&mrt_lock);
1514
1515        /*
1516         *      Check to see if we resolved a queued list. If so we
1517         *      need to send on the frames and tidy up.
1518         */
1519        found = false;
1520        spin_lock_bh(&mfc_unres_lock);
1521        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1522                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1523                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1524                        list_del(&uc->list);
1525                        atomic_dec(&mrt->cache_resolve_queue_len);
1526                        found = true;
1527                        break;
1528                }
1529        }
1530        if (list_empty(&mrt->mfc6_unres_queue))
1531                del_timer(&mrt->ipmr_expire_timer);
1532        spin_unlock_bh(&mfc_unres_lock);
1533
1534        if (found) {
1535                ip6mr_cache_resolve(net, mrt, uc, c);
1536                ip6mr_cache_free(uc);
1537        }
1538        mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1539        return 0;
1540}
1541
1542/*
1543 *      Close the multicast socket, and clear the vif tables etc
1544 */
1545
1546static void mroute_clean_tables(struct mr6_table *mrt)
1547{
1548        int i;
1549        LIST_HEAD(list);
1550        struct mfc6_cache *c, *next;
1551
1552        /*
1553         *      Shut down all active vif entries
1554         */
1555        for (i = 0; i < mrt->maxvif; i++) {
1556                if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1557                        mif6_delete(mrt, i, &list);
1558        }
1559        unregister_netdevice_many(&list);
1560
1561        /*
1562         *      Wipe the cache
1563         */
1564        for (i = 0; i < MFC6_LINES; i++) {
1565                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1566                        if (c->mfc_flags & MFC_STATIC)
1567                                continue;
1568                        write_lock_bh(&mrt_lock);
1569                        list_del(&c->list);
1570                        write_unlock_bh(&mrt_lock);
1571
1572                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1573                        ip6mr_cache_free(c);
1574                }
1575        }
1576
1577        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1578                spin_lock_bh(&mfc_unres_lock);
1579                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1580                        list_del(&c->list);
1581                        mr6_netlink_event(mrt, c, RTM_DELROUTE);
1582                        ip6mr_destroy_unres(mrt, c);
1583                }
1584                spin_unlock_bh(&mfc_unres_lock);
1585        }
1586}
1587
1588static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1589{
1590        int err = 0;
1591        struct net *net = sock_net(sk);
1592
1593        rtnl_lock();
1594        write_lock_bh(&mrt_lock);
1595        if (likely(mrt->mroute6_sk == NULL)) {
1596                mrt->mroute6_sk = sk;
1597                net->ipv6.devconf_all->mc_forwarding++;
1598                inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1599                                             NETCONFA_IFINDEX_ALL,
1600                                             net->ipv6.devconf_all);
1601        }
1602        else
1603                err = -EADDRINUSE;
1604        write_unlock_bh(&mrt_lock);
1605
1606        rtnl_unlock();
1607
1608        return err;
1609}
1610
1611int ip6mr_sk_done(struct sock *sk)
1612{
1613        int err = -EACCES;
1614        struct net *net = sock_net(sk);
1615        struct mr6_table *mrt;
1616
1617        rtnl_lock();
1618        ip6mr_for_each_table(mrt, net) {
1619                if (sk == mrt->mroute6_sk) {
1620                        write_lock_bh(&mrt_lock);
1621                        mrt->mroute6_sk = NULL;
1622                        net->ipv6.devconf_all->mc_forwarding--;
1623                        inet6_netconf_notify_devconf(net,
1624                                                     NETCONFA_MC_FORWARDING,
1625                                                     NETCONFA_IFINDEX_ALL,
1626                                                     net->ipv6.devconf_all);
1627                        write_unlock_bh(&mrt_lock);
1628
1629                        mroute_clean_tables(mrt);
1630                        err = 0;
1631                        break;
1632                }
1633        }
1634        rtnl_unlock();
1635
1636        return err;
1637}
1638
1639struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1640{
1641        struct mr6_table *mrt;
1642        struct flowi6 fl6 = {
1643                .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1644                .flowi6_oif     = skb->dev->ifindex,
1645                .flowi6_mark    = skb->mark,
1646        };
1647
1648        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1649                return NULL;
1650
1651        return mrt->mroute6_sk;
1652}
1653
1654/*
1655 *      Socket options and virtual interface manipulation. The whole
1656 *      virtual interface system is a complete heap, but unfortunately
1657 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1658 *      MOSPF/PIM router set up we can clean this up.
1659 */
1660
1661int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1662{
1663        int ret, parent = 0;
1664        struct mif6ctl vif;
1665        struct mf6cctl mfc;
1666        mifi_t mifi;
1667        struct net *net = sock_net(sk);
1668        struct mr6_table *mrt;
1669
1670        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1671        if (!mrt)
1672                return -ENOENT;
1673
1674        if (optname != MRT6_INIT) {
1675                if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1676                        return -EACCES;
1677        }
1678
1679        switch (optname) {
1680        case MRT6_INIT:
1681                if (sk->sk_type != SOCK_RAW ||
1682                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1683                        return -EOPNOTSUPP;
1684                if (optlen < sizeof(int))
1685                        return -EINVAL;
1686
1687                return ip6mr_sk_init(mrt, sk);
1688
1689        case MRT6_DONE:
1690                return ip6mr_sk_done(sk);
1691
1692        case MRT6_ADD_MIF:
1693                if (optlen < sizeof(vif))
1694                        return -EINVAL;
1695                if (copy_from_user(&vif, optval, sizeof(vif)))
1696                        return -EFAULT;
1697                if (vif.mif6c_mifi >= MAXMIFS)
1698                        return -ENFILE;
1699                rtnl_lock();
1700                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1701                rtnl_unlock();
1702                return ret;
1703
1704        case MRT6_DEL_MIF:
1705                if (optlen < sizeof(mifi_t))
1706                        return -EINVAL;
1707                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1708                        return -EFAULT;
1709                rtnl_lock();
1710                ret = mif6_delete(mrt, mifi, NULL);
1711                rtnl_unlock();
1712                return ret;
1713
1714        /*
1715         *      Manipulate the forwarding caches. These live
1716         *      in a sort of kernel/user symbiosis.
1717         */
1718        case MRT6_ADD_MFC:
1719        case MRT6_DEL_MFC:
1720                parent = -1;
1721        case MRT6_ADD_MFC_PROXY:
1722        case MRT6_DEL_MFC_PROXY:
1723                if (optlen < sizeof(mfc))
1724                        return -EINVAL;
1725                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1726                        return -EFAULT;
1727                if (parent == 0)
1728                        parent = mfc.mf6cc_parent;
1729                rtnl_lock();
1730                if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1731                        ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1732                else
1733                        ret = ip6mr_mfc_add(net, mrt, &mfc,
1734                                            sk == mrt->mroute6_sk, parent);
1735                rtnl_unlock();
1736                return ret;
1737
1738        /*
1739         *      Control PIM assert (to activate pim will activate assert)
1740         */
1741        case MRT6_ASSERT:
1742        {
1743                int v;
1744
1745                if (optlen != sizeof(v))
1746                        return -EINVAL;
1747                if (get_user(v, (int __user *)optval))
1748                        return -EFAULT;
1749                mrt->mroute_do_assert = v;
1750                return 0;
1751        }
1752
1753#ifdef CONFIG_IPV6_PIMSM_V2
1754        case MRT6_PIM:
1755        {
1756                int v;
1757
1758                if (optlen != sizeof(v))
1759                        return -EINVAL;
1760                if (get_user(v, (int __user *)optval))
1761                        return -EFAULT;
1762                v = !!v;
1763                rtnl_lock();
1764                ret = 0;
1765                if (v != mrt->mroute_do_pim) {
1766                        mrt->mroute_do_pim = v;
1767                        mrt->mroute_do_assert = v;
1768                }
1769                rtnl_unlock();
1770                return ret;
1771        }
1772
1773#endif
1774#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1775        case MRT6_TABLE:
1776        {
1777                u32 v;
1778
1779                if (optlen != sizeof(u32))
1780                        return -EINVAL;
1781                if (get_user(v, (u32 __user *)optval))
1782                        return -EFAULT;
1783                /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1784                if (v != RT_TABLE_DEFAULT && v >= 100000000)
1785                        return -EINVAL;
1786                if (sk == mrt->mroute6_sk)
1787                        return -EBUSY;
1788
1789                rtnl_lock();
1790                ret = 0;
1791                if (!ip6mr_new_table(net, v))
1792                        ret = -ENOMEM;
1793                raw6_sk(sk)->ip6mr_table = v;
1794                rtnl_unlock();
1795                return ret;
1796        }
1797#endif
1798        /*
1799         *      Spurious command, or MRT6_VERSION which you cannot
1800         *      set.
1801         */
1802        default:
1803                return -ENOPROTOOPT;
1804        }
1805}
1806
1807/*
1808 *      Getsock opt support for the multicast routing system.
1809 */
1810
1811int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1812                          int __user *optlen)
1813{
1814        int olr;
1815        int val;
1816        struct net *net = sock_net(sk);
1817        struct mr6_table *mrt;
1818
1819        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1820        if (!mrt)
1821                return -ENOENT;
1822
1823        switch (optname) {
1824        case MRT6_VERSION:
1825                val = 0x0305;
1826                break;
1827#ifdef CONFIG_IPV6_PIMSM_V2
1828        case MRT6_PIM:
1829                val = mrt->mroute_do_pim;
1830                break;
1831#endif
1832        case MRT6_ASSERT:
1833                val = mrt->mroute_do_assert;
1834                break;
1835        default:
1836                return -ENOPROTOOPT;
1837        }
1838
1839        if (get_user(olr, optlen))
1840                return -EFAULT;
1841
1842        olr = min_t(int, olr, sizeof(int));
1843        if (olr < 0)
1844                return -EINVAL;
1845
1846        if (put_user(olr, optlen))
1847                return -EFAULT;
1848        if (copy_to_user(optval, &val, olr))
1849                return -EFAULT;
1850        return 0;
1851}
1852
1853/*
1854 *      The IP multicast ioctl support routines.
1855 */
1856
1857int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1858{
1859        struct sioc_sg_req6 sr;
1860        struct sioc_mif_req6 vr;
1861        struct mif_device *vif;
1862        struct mfc6_cache *c;
1863        struct net *net = sock_net(sk);
1864        struct mr6_table *mrt;
1865
1866        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1867        if (!mrt)
1868                return -ENOENT;
1869
1870        switch (cmd) {
1871        case SIOCGETMIFCNT_IN6:
1872                if (copy_from_user(&vr, arg, sizeof(vr)))
1873                        return -EFAULT;
1874                if (vr.mifi >= mrt->maxvif)
1875                        return -EINVAL;
1876                read_lock(&mrt_lock);
1877                vif = &mrt->vif6_table[vr.mifi];
1878                if (MIF_EXISTS(mrt, vr.mifi)) {
1879                        vr.icount = vif->pkt_in;
1880                        vr.ocount = vif->pkt_out;
1881                        vr.ibytes = vif->bytes_in;
1882                        vr.obytes = vif->bytes_out;
1883                        read_unlock(&mrt_lock);
1884
1885                        if (copy_to_user(arg, &vr, sizeof(vr)))
1886                                return -EFAULT;
1887                        return 0;
1888                }
1889                read_unlock(&mrt_lock);
1890                return -EADDRNOTAVAIL;
1891        case SIOCGETSGCNT_IN6:
1892                if (copy_from_user(&sr, arg, sizeof(sr)))
1893                        return -EFAULT;
1894
1895                read_lock(&mrt_lock);
1896                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1897                if (c) {
1898                        sr.pktcnt = c->mfc_un.res.pkt;
1899                        sr.bytecnt = c->mfc_un.res.bytes;
1900                        sr.wrong_if = c->mfc_un.res.wrong_if;
1901                        read_unlock(&mrt_lock);
1902
1903                        if (copy_to_user(arg, &sr, sizeof(sr)))
1904                                return -EFAULT;
1905                        return 0;
1906                }
1907                read_unlock(&mrt_lock);
1908                return -EADDRNOTAVAIL;
1909        default:
1910                return -ENOIOCTLCMD;
1911        }
1912}
1913
1914#ifdef CONFIG_COMPAT
1915struct compat_sioc_sg_req6 {
1916        struct sockaddr_in6 src;
1917        struct sockaddr_in6 grp;
1918        compat_ulong_t pktcnt;
1919        compat_ulong_t bytecnt;
1920        compat_ulong_t wrong_if;
1921};
1922
1923struct compat_sioc_mif_req6 {
1924        mifi_t  mifi;
1925        compat_ulong_t icount;
1926        compat_ulong_t ocount;
1927        compat_ulong_t ibytes;
1928        compat_ulong_t obytes;
1929};
1930
1931int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1932{
1933        struct compat_sioc_sg_req6 sr;
1934        struct compat_sioc_mif_req6 vr;
1935        struct mif_device *vif;
1936        struct mfc6_cache *c;
1937        struct net *net = sock_net(sk);
1938        struct mr6_table *mrt;
1939
1940        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1941        if (!mrt)
1942                return -ENOENT;
1943
1944        switch (cmd) {
1945        case SIOCGETMIFCNT_IN6:
1946                if (copy_from_user(&vr, arg, sizeof(vr)))
1947                        return -EFAULT;
1948                if (vr.mifi >= mrt->maxvif)
1949                        return -EINVAL;
1950                read_lock(&mrt_lock);
1951                vif = &mrt->vif6_table[vr.mifi];
1952                if (MIF_EXISTS(mrt, vr.mifi)) {
1953                        vr.icount = vif->pkt_in;
1954                        vr.ocount = vif->pkt_out;
1955                        vr.ibytes = vif->bytes_in;
1956                        vr.obytes = vif->bytes_out;
1957                        read_unlock(&mrt_lock);
1958
1959                        if (copy_to_user(arg, &vr, sizeof(vr)))
1960                                return -EFAULT;
1961                        return 0;
1962                }
1963                read_unlock(&mrt_lock);
1964                return -EADDRNOTAVAIL;
1965        case SIOCGETSGCNT_IN6:
1966                if (copy_from_user(&sr, arg, sizeof(sr)))
1967                        return -EFAULT;
1968
1969                read_lock(&mrt_lock);
1970                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1971                if (c) {
1972                        sr.pktcnt = c->mfc_un.res.pkt;
1973                        sr.bytecnt = c->mfc_un.res.bytes;
1974                        sr.wrong_if = c->mfc_un.res.wrong_if;
1975                        read_unlock(&mrt_lock);
1976
1977                        if (copy_to_user(arg, &sr, sizeof(sr)))
1978                                return -EFAULT;
1979                        return 0;
1980                }
1981                read_unlock(&mrt_lock);
1982                return -EADDRNOTAVAIL;
1983        default:
1984                return -ENOIOCTLCMD;
1985        }
1986}
1987#endif
1988
1989static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
1990{
1991        IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1992                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1993        IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1994                         IPSTATS_MIB_OUTOCTETS, skb->len);
1995        return dst_output_sk(sk, skb);
1996}
1997
1998/*
1999 *      Processing handlers for ip6mr_forward
2000 */
2001
2002static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2003                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2004{
2005        struct ipv6hdr *ipv6h;
2006        struct mif_device *vif = &mrt->vif6_table[vifi];
2007        struct net_device *dev;
2008        struct dst_entry *dst;
2009        struct flowi6 fl6;
2010
2011        if (!vif->dev)
2012                goto out_free;
2013
2014#ifdef CONFIG_IPV6_PIMSM_V2
2015        if (vif->flags & MIFF_REGISTER) {
2016                vif->pkt_out++;
2017                vif->bytes_out += skb->len;
2018                vif->dev->stats.tx_bytes += skb->len;
2019                vif->dev->stats.tx_packets++;
2020                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2021                goto out_free;
2022        }
2023#endif
2024
2025        ipv6h = ipv6_hdr(skb);
2026
2027        fl6 = (struct flowi6) {
2028                .flowi6_oif = vif->link,
2029                .daddr = ipv6h->daddr,
2030        };
2031
2032        dst = ip6_route_output(net, NULL, &fl6);
2033        if (dst->error) {
2034                dst_release(dst);
2035                goto out_free;
2036        }
2037
2038        skb_dst_drop(skb);
2039        skb_dst_set(skb, dst);
2040
2041        /*
2042         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2043         * not only before forwarding, but after forwarding on all output
2044         * interfaces. It is clear, if mrouter runs a multicasting
2045         * program, it should receive packets not depending to what interface
2046         * program is joined.
2047         * If we will not make it, the program will have to join on all
2048         * interfaces. On the other hand, multihoming host (or router, but
2049         * not mrouter) cannot join to more than one interface - it will
2050         * result in receiving multiple packets.
2051         */
2052        dev = vif->dev;
2053        skb->dev = dev;
2054        vif->pkt_out++;
2055        vif->bytes_out += skb->len;
2056
2057        /* We are about to write */
2058        /* XXX: extension headers? */
2059        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2060                goto out_free;
2061
2062        ipv6h = ipv6_hdr(skb);
2063        ipv6h->hop_limit--;
2064
2065        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2066
2067        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
2068                       skb->dev, dev,
2069                       ip6mr_forward2_finish);
2070
2071out_free:
2072        kfree_skb(skb);
2073        return 0;
2074}
2075
2076static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2077{
2078        int ct;
2079
2080        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2081                if (mrt->vif6_table[ct].dev == dev)
2082                        break;
2083        }
2084        return ct;
2085}
2086
2087static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2088                           struct sk_buff *skb, struct mfc6_cache *cache)
2089{
2090        int psend = -1;
2091        int vif, ct;
2092        int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2093
2094        vif = cache->mf6c_parent;
2095        cache->mfc_un.res.pkt++;
2096        cache->mfc_un.res.bytes += skb->len;
2097
2098        if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2099                struct mfc6_cache *cache_proxy;
2100
2101                /* For an (*,G) entry, we only check that the incoming
2102                 * interface is part of the static tree.
2103                 */
2104                cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2105                if (cache_proxy &&
2106                    cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2107                        goto forward;
2108        }
2109
2110        /*
2111         * Wrong interface: drop packet and (maybe) send PIM assert.
2112         */
2113        if (mrt->vif6_table[vif].dev != skb->dev) {
2114                cache->mfc_un.res.wrong_if++;
2115
2116                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2117                    /* pimsm uses asserts, when switching from RPT to SPT,
2118                       so that we cannot check that packet arrived on an oif.
2119                       It is bad, but otherwise we would need to move pretty
2120                       large chunk of pimd to kernel. Ough... --ANK
2121                     */
2122                    (mrt->mroute_do_pim ||
2123                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2124                    time_after(jiffies,
2125                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2126                        cache->mfc_un.res.last_assert = jiffies;
2127                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2128                }
2129                goto dont_forward;
2130        }
2131
2132forward:
2133        mrt->vif6_table[vif].pkt_in++;
2134        mrt->vif6_table[vif].bytes_in += skb->len;
2135
2136        /*
2137         *      Forward the frame
2138         */
2139        if (ipv6_addr_any(&cache->mf6c_origin) &&
2140            ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2141                if (true_vifi >= 0 &&
2142                    true_vifi != cache->mf6c_parent &&
2143                    ipv6_hdr(skb)->hop_limit >
2144                                cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2145                        /* It's an (*,*) entry and the packet is not coming from
2146                         * the upstream: forward the packet to the upstream
2147                         * only.
2148                         */
2149                        psend = cache->mf6c_parent;
2150                        goto last_forward;
2151                }
2152                goto dont_forward;
2153        }
2154        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2155                /* For (*,G) entry, don't forward to the incoming interface */
2156                if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2157                    ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2158                        if (psend != -1) {
2159                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2160                                if (skb2)
2161                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2162                        }
2163                        psend = ct;
2164                }
2165        }
2166last_forward:
2167        if (psend != -1) {
2168                ip6mr_forward2(net, mrt, skb, cache, psend);
2169                return;
2170        }
2171
2172dont_forward:
2173        kfree_skb(skb);
2174}
2175
2176
2177/*
2178 *      Multicast packets for forwarding arrive here
2179 */
2180
2181int ip6_mr_input(struct sk_buff *skb)
2182{
2183        struct mfc6_cache *cache;
2184        struct net *net = dev_net(skb->dev);
2185        struct mr6_table *mrt;
2186        struct flowi6 fl6 = {
2187                .flowi6_iif     = skb->dev->ifindex,
2188                .flowi6_mark    = skb->mark,
2189        };
2190        int err;
2191
2192        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2193        if (err < 0) {
2194                kfree_skb(skb);
2195                return err;
2196        }
2197
2198        read_lock(&mrt_lock);
2199        cache = ip6mr_cache_find(mrt,
2200                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2201        if (!cache) {
2202                int vif = ip6mr_find_vif(mrt, skb->dev);
2203
2204                if (vif >= 0)
2205                        cache = ip6mr_cache_find_any(mrt,
2206                                                     &ipv6_hdr(skb)->daddr,
2207                                                     vif);
2208        }
2209
2210        /*
2211         *      No usable cache entry
2212         */
2213        if (!cache) {
2214                int vif;
2215
2216                vif = ip6mr_find_vif(mrt, skb->dev);
2217                if (vif >= 0) {
2218                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2219                        read_unlock(&mrt_lock);
2220
2221                        return err;
2222                }
2223                read_unlock(&mrt_lock);
2224                kfree_skb(skb);
2225                return -ENODEV;
2226        }
2227
2228        ip6_mr_forward(net, mrt, skb, cache);
2229
2230        read_unlock(&mrt_lock);
2231
2232        return 0;
2233}
2234
2235
2236static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2237                               struct mfc6_cache *c, struct rtmsg *rtm)
2238{
2239        int ct;
2240        struct rtnexthop *nhp;
2241        struct nlattr *mp_attr;
2242        struct rta_mfc_stats mfcs;
2243
2244        /* If cache is unresolved, don't try to parse IIF and OIF */
2245        if (c->mf6c_parent >= MAXMIFS)
2246                return -ENOENT;
2247
2248        if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2249            nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2250                return -EMSGSIZE;
2251        mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2252        if (!mp_attr)
2253                return -EMSGSIZE;
2254
2255        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2256                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2257                        nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2258                        if (!nhp) {
2259                                nla_nest_cancel(skb, mp_attr);
2260                                return -EMSGSIZE;
2261                        }
2262
2263                        nhp->rtnh_flags = 0;
2264                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2265                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2266                        nhp->rtnh_len = sizeof(*nhp);
2267                }
2268        }
2269
2270        nla_nest_end(skb, mp_attr);
2271
2272        mfcs.mfcs_packets = c->mfc_un.res.pkt;
2273        mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2274        mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2275        if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2276                return -EMSGSIZE;
2277
2278        rtm->rtm_type = RTN_MULTICAST;
2279        return 1;
2280}
2281
2282int ip6mr_get_route(struct net *net,
2283                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2284{
2285        int err;
2286        struct mr6_table *mrt;
2287        struct mfc6_cache *cache;
2288        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2289
2290        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2291        if (!mrt)
2292                return -ENOENT;
2293
2294        read_lock(&mrt_lock);
2295        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2296        if (!cache && skb->dev) {
2297                int vif = ip6mr_find_vif(mrt, skb->dev);
2298
2299                if (vif >= 0)
2300                        cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2301                                                     vif);
2302        }
2303
2304        if (!cache) {
2305                struct sk_buff *skb2;
2306                struct ipv6hdr *iph;
2307                struct net_device *dev;
2308                int vif;
2309
2310                if (nowait) {
2311                        read_unlock(&mrt_lock);
2312                        return -EAGAIN;
2313                }
2314
2315                dev = skb->dev;
2316                if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2317                        read_unlock(&mrt_lock);
2318                        return -ENODEV;
2319                }
2320
2321                /* really correct? */
2322                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2323                if (!skb2) {
2324                        read_unlock(&mrt_lock);
2325                        return -ENOMEM;
2326                }
2327
2328                skb_reset_transport_header(skb2);
2329
2330                skb_put(skb2, sizeof(struct ipv6hdr));
2331                skb_reset_network_header(skb2);
2332
2333                iph = ipv6_hdr(skb2);
2334                iph->version = 0;
2335                iph->priority = 0;
2336                iph->flow_lbl[0] = 0;
2337                iph->flow_lbl[1] = 0;
2338                iph->flow_lbl[2] = 0;
2339                iph->payload_len = 0;
2340                iph->nexthdr = IPPROTO_NONE;
2341                iph->hop_limit = 0;
2342                iph->saddr = rt->rt6i_src.addr;
2343                iph->daddr = rt->rt6i_dst.addr;
2344
2345                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2346                read_unlock(&mrt_lock);
2347
2348                return err;
2349        }
2350
2351        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2352                cache->mfc_flags |= MFC_NOTIFY;
2353
2354        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2355        read_unlock(&mrt_lock);
2356        return err;
2357}
2358
2359static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2360                             u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2361                             int flags)
2362{
2363        struct nlmsghdr *nlh;
2364        struct rtmsg *rtm;
2365        int err;
2366
2367        nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2368        if (!nlh)
2369                return -EMSGSIZE;
2370
2371        rtm = nlmsg_data(nlh);
2372        rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2373        rtm->rtm_dst_len  = 128;
2374        rtm->rtm_src_len  = 128;
2375        rtm->rtm_tos      = 0;
2376        rtm->rtm_table    = mrt->id;
2377        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2378                goto nla_put_failure;
2379        rtm->rtm_type = RTN_MULTICAST;
2380        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2381        if (c->mfc_flags & MFC_STATIC)
2382                rtm->rtm_protocol = RTPROT_STATIC;
2383        else
2384                rtm->rtm_protocol = RTPROT_MROUTED;
2385        rtm->rtm_flags    = 0;
2386
2387        if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2388            nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2389                goto nla_put_failure;
2390        err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2391        /* do not break the dump if cache is unresolved */
2392        if (err < 0 && err != -ENOENT)
2393                goto nla_put_failure;
2394
2395        nlmsg_end(skb, nlh);
2396        return 0;
2397
2398nla_put_failure:
2399        nlmsg_cancel(skb, nlh);
2400        return -EMSGSIZE;
2401}
2402
2403static int mr6_msgsize(bool unresolved, int maxvif)
2404{
2405        size_t len =
2406                NLMSG_ALIGN(sizeof(struct rtmsg))
2407                + nla_total_size(4)     /* RTA_TABLE */
2408                + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2409                + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2410                ;
2411
2412        if (!unresolved)
2413                len = len
2414                      + nla_total_size(4)       /* RTA_IIF */
2415                      + nla_total_size(0)       /* RTA_MULTIPATH */
2416                      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2417                                                /* RTA_MFC_STATS */
2418                      + nla_total_size(sizeof(struct rta_mfc_stats))
2419                ;
2420
2421        return len;
2422}
2423
2424static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2425                              int cmd)
2426{
2427        struct net *net = read_pnet(&mrt->net);
2428        struct sk_buff *skb;
2429        int err = -ENOBUFS;
2430
2431        skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2432                        GFP_ATOMIC);
2433        if (!skb)
2434                goto errout;
2435
2436        err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2437        if (err < 0)
2438                goto errout;
2439
2440        rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2441        return;
2442
2443errout:
2444        kfree_skb(skb);
2445        if (err < 0)
2446                rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2447}
2448
2449static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2450{
2451        struct net *net = sock_net(skb->sk);
2452        struct mr6_table *mrt;
2453        struct mfc6_cache *mfc;
2454        unsigned int t = 0, s_t;
2455        unsigned int h = 0, s_h;
2456        unsigned int e = 0, s_e;
2457
2458        s_t = cb->args[0];
2459        s_h = cb->args[1];
2460        s_e = cb->args[2];
2461
2462        read_lock(&mrt_lock);
2463        ip6mr_for_each_table(mrt, net) {
2464                if (t < s_t)
2465                        goto next_table;
2466                if (t > s_t)
2467                        s_h = 0;
2468                for (h = s_h; h < MFC6_LINES; h++) {
2469                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2470                                if (e < s_e)
2471                                        goto next_entry;
2472                                if (ip6mr_fill_mroute(mrt, skb,
2473                                                      NETLINK_CB(cb->skb).portid,
2474                                                      cb->nlh->nlmsg_seq,
2475                                                      mfc, RTM_NEWROUTE,
2476                                                      NLM_F_MULTI) < 0)
2477                                        goto done;
2478next_entry:
2479                                e++;
2480                        }
2481                        e = s_e = 0;
2482                }
2483                spin_lock_bh(&mfc_unres_lock);
2484                list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2485                        if (e < s_e)
2486                                goto next_entry2;
2487                        if (ip6mr_fill_mroute(mrt, skb,
2488                                              NETLINK_CB(cb->skb).portid,
2489                                              cb->nlh->nlmsg_seq,
2490                                              mfc, RTM_NEWROUTE,
2491                                              NLM_F_MULTI) < 0) {
2492                                spin_unlock_bh(&mfc_unres_lock);
2493                                goto done;
2494                        }
2495next_entry2:
2496                        e++;
2497                }
2498                spin_unlock_bh(&mfc_unres_lock);
2499                e = s_e = 0;
2500                s_h = 0;
2501next_table:
2502                t++;
2503        }
2504done:
2505        read_unlock(&mrt_lock);
2506
2507        cb->args[2] = e;
2508        cb->args[1] = h;
2509        cb->args[0] = t;
2510
2511        return skb->len;
2512}
2513