linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60#include <net/nexthop.h>
  61
  62#include <asm/uaccess.h>
  63
  64#ifdef CONFIG_SYSCTL
  65#include <linux/sysctl.h>
  66#endif
  67
  68static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  69                                    const struct in6_addr *dest);
  70static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  71static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  72static unsigned int      ip6_mtu(const struct dst_entry *dst);
  73static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  74static void             ip6_dst_destroy(struct dst_entry *);
  75static void             ip6_dst_ifdown(struct dst_entry *,
  76                                       struct net_device *dev, int how);
  77static int               ip6_dst_gc(struct dst_ops *ops);
  78
  79static int              ip6_pkt_discard(struct sk_buff *skb);
  80static int              ip6_pkt_discard_out(struct sk_buff *skb);
  81static void             ip6_link_failure(struct sk_buff *skb);
  82static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  83                                           struct sk_buff *skb, u32 mtu);
  84static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  85                                        struct sk_buff *skb);
  86
  87#ifdef CONFIG_IPV6_ROUTE_INFO
  88static struct rt6_info *rt6_add_route_info(struct net *net,
  89                                           const struct in6_addr *prefix, int prefixlen,
  90                                           const struct in6_addr *gwaddr, int ifindex,
  91                                           unsigned int pref);
  92static struct rt6_info *rt6_get_route_info(struct net *net,
  93                                           const struct in6_addr *prefix, int prefixlen,
  94                                           const struct in6_addr *gwaddr, int ifindex);
  95#endif
  96
  97static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
  98{
  99        struct rt6_info *rt = (struct rt6_info *) dst;
 100        struct inet_peer *peer;
 101        u32 *p = NULL;
 102
 103        if (!(rt->dst.flags & DST_HOST))
 104                return NULL;
 105
 106        peer = rt6_get_peer_create(rt);
 107        if (peer) {
 108                u32 *old_p = __DST_METRICS_PTR(old);
 109                unsigned long prev, new;
 110
 111                p = peer->metrics;
 112                if (inet_metrics_new(peer))
 113                        memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 114
 115                new = (unsigned long) p;
 116                prev = cmpxchg(&dst->_metrics, old, new);
 117
 118                if (prev != old) {
 119                        p = __DST_METRICS_PTR(prev);
 120                        if (prev & DST_METRICS_READ_ONLY)
 121                                p = NULL;
 122                }
 123        }
 124        return p;
 125}
 126
 127static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 128                                             struct sk_buff *skb,
 129                                             const void *daddr)
 130{
 131        struct in6_addr *p = &rt->rt6i_gateway;
 132
 133        if (!ipv6_addr_any(p))
 134                return (const void *) p;
 135        else if (skb)
 136                return &ipv6_hdr(skb)->daddr;
 137        return daddr;
 138}
 139
 140static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 141                                          struct sk_buff *skb,
 142                                          const void *daddr)
 143{
 144        struct rt6_info *rt = (struct rt6_info *) dst;
 145        struct neighbour *n;
 146
 147        daddr = choose_neigh_daddr(rt, skb, daddr);
 148        n = __ipv6_neigh_lookup(dst->dev, daddr);
 149        if (n)
 150                return n;
 151        return neigh_create(&nd_tbl, daddr, dst->dev);
 152}
 153
 154static struct dst_ops ip6_dst_ops_template = {
 155        .family                 =       AF_INET6,
 156        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 157        .gc                     =       ip6_dst_gc,
 158        .gc_thresh              =       1024,
 159        .check                  =       ip6_dst_check,
 160        .default_advmss         =       ip6_default_advmss,
 161        .mtu                    =       ip6_mtu,
 162        .cow_metrics            =       ipv6_cow_metrics,
 163        .destroy                =       ip6_dst_destroy,
 164        .ifdown                 =       ip6_dst_ifdown,
 165        .negative_advice        =       ip6_negative_advice,
 166        .link_failure           =       ip6_link_failure,
 167        .update_pmtu            =       ip6_rt_update_pmtu,
 168        .redirect               =       rt6_do_redirect,
 169        .local_out              =       __ip6_local_out,
 170        .neigh_lookup           =       ip6_neigh_lookup,
 171};
 172
 173static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 174{
 175        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 176
 177        return mtu ? : dst->dev->mtu;
 178}
 179
 180static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 181                                         struct sk_buff *skb, u32 mtu)
 182{
 183}
 184
 185static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 186                                      struct sk_buff *skb)
 187{
 188}
 189
 190static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 191                                         unsigned long old)
 192{
 193        return NULL;
 194}
 195
 196static struct dst_ops ip6_dst_blackhole_ops = {
 197        .family                 =       AF_INET6,
 198        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 199        .destroy                =       ip6_dst_destroy,
 200        .check                  =       ip6_dst_check,
 201        .mtu                    =       ip6_blackhole_mtu,
 202        .default_advmss         =       ip6_default_advmss,
 203        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 204        .redirect               =       ip6_rt_blackhole_redirect,
 205        .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
 206        .neigh_lookup           =       ip6_neigh_lookup,
 207};
 208
 209static const u32 ip6_template_metrics[RTAX_MAX] = {
 210        [RTAX_HOPLIMIT - 1] = 0,
 211};
 212
 213static const struct rt6_info ip6_null_entry_template = {
 214        .dst = {
 215                .__refcnt       = ATOMIC_INIT(1),
 216                .__use          = 1,
 217                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 218                .error          = -ENETUNREACH,
 219                .input          = ip6_pkt_discard,
 220                .output         = ip6_pkt_discard_out,
 221        },
 222        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 223        .rt6i_protocol  = RTPROT_KERNEL,
 224        .rt6i_metric    = ~(u32) 0,
 225        .rt6i_ref       = ATOMIC_INIT(1),
 226};
 227
 228#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 229
 230static int ip6_pkt_prohibit(struct sk_buff *skb);
 231static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 232
 233static const struct rt6_info ip6_prohibit_entry_template = {
 234        .dst = {
 235                .__refcnt       = ATOMIC_INIT(1),
 236                .__use          = 1,
 237                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 238                .error          = -EACCES,
 239                .input          = ip6_pkt_prohibit,
 240                .output         = ip6_pkt_prohibit_out,
 241        },
 242        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 243        .rt6i_protocol  = RTPROT_KERNEL,
 244        .rt6i_metric    = ~(u32) 0,
 245        .rt6i_ref       = ATOMIC_INIT(1),
 246};
 247
 248static const struct rt6_info ip6_blk_hole_entry_template = {
 249        .dst = {
 250                .__refcnt       = ATOMIC_INIT(1),
 251                .__use          = 1,
 252                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 253                .error          = -EINVAL,
 254                .input          = dst_discard,
 255                .output         = dst_discard,
 256        },
 257        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 258        .rt6i_protocol  = RTPROT_KERNEL,
 259        .rt6i_metric    = ~(u32) 0,
 260        .rt6i_ref       = ATOMIC_INIT(1),
 261};
 262
 263#endif
 264
 265/* allocate dst with ip6_dst_ops */
 266static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 267                                             struct net_device *dev,
 268                                             int flags,
 269                                             struct fib6_table *table)
 270{
 271        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 272                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 273
 274        if (rt) {
 275                struct dst_entry *dst = &rt->dst;
 276
 277                memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 278                rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 279                rt->rt6i_genid = rt_genid(net);
 280                INIT_LIST_HEAD(&rt->rt6i_siblings);
 281                rt->rt6i_nsiblings = 0;
 282        }
 283        return rt;
 284}
 285
 286static void ip6_dst_destroy(struct dst_entry *dst)
 287{
 288        struct rt6_info *rt = (struct rt6_info *)dst;
 289        struct inet6_dev *idev = rt->rt6i_idev;
 290        struct dst_entry *from = dst->from;
 291
 292        if (!(rt->dst.flags & DST_HOST))
 293                dst_destroy_metrics_generic(dst);
 294
 295        if (idev) {
 296                rt->rt6i_idev = NULL;
 297                in6_dev_put(idev);
 298        }
 299
 300        dst->from = NULL;
 301        dst_release(from);
 302
 303        if (rt6_has_peer(rt)) {
 304                struct inet_peer *peer = rt6_peer_ptr(rt);
 305                inet_putpeer(peer);
 306        }
 307}
 308
 309void rt6_bind_peer(struct rt6_info *rt, int create)
 310{
 311        struct inet_peer_base *base;
 312        struct inet_peer *peer;
 313
 314        base = inetpeer_base_ptr(rt->_rt6i_peer);
 315        if (!base)
 316                return;
 317
 318        peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
 319        if (peer) {
 320                if (!rt6_set_peer(rt, peer))
 321                        inet_putpeer(peer);
 322        }
 323}
 324
 325static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 326                           int how)
 327{
 328        struct rt6_info *rt = (struct rt6_info *)dst;
 329        struct inet6_dev *idev = rt->rt6i_idev;
 330        struct net_device *loopback_dev =
 331                dev_net(dev)->loopback_dev;
 332
 333        if (dev != loopback_dev) {
 334                if (idev && idev->dev == dev) {
 335                        struct inet6_dev *loopback_idev =
 336                                in6_dev_get(loopback_dev);
 337                        if (loopback_idev) {
 338                                rt->rt6i_idev = loopback_idev;
 339                                in6_dev_put(idev);
 340                        }
 341                }
 342        }
 343}
 344
 345static bool rt6_check_expired(const struct rt6_info *rt)
 346{
 347        if (rt->rt6i_flags & RTF_EXPIRES) {
 348                if (time_after(jiffies, rt->dst.expires))
 349                        return true;
 350        } else if (rt->dst.from) {
 351                return rt6_check_expired((struct rt6_info *) rt->dst.from);
 352        }
 353        return false;
 354}
 355
 356static bool rt6_need_strict(const struct in6_addr *daddr)
 357{
 358        return ipv6_addr_type(daddr) &
 359                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 360}
 361
 362/* Multipath route selection:
 363 *   Hash based function using packet header and flowlabel.
 364 * Adapted from fib_info_hashfn()
 365 */
 366static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 367                               const struct flowi6 *fl6)
 368{
 369        unsigned int val = fl6->flowi6_proto;
 370
 371        val ^= ipv6_addr_hash(&fl6->daddr);
 372        val ^= ipv6_addr_hash(&fl6->saddr);
 373
 374        /* Work only if this not encapsulated */
 375        switch (fl6->flowi6_proto) {
 376        case IPPROTO_UDP:
 377        case IPPROTO_TCP:
 378        case IPPROTO_SCTP:
 379                val ^= (__force u16)fl6->fl6_sport;
 380                val ^= (__force u16)fl6->fl6_dport;
 381                break;
 382
 383        case IPPROTO_ICMPV6:
 384                val ^= (__force u16)fl6->fl6_icmp_type;
 385                val ^= (__force u16)fl6->fl6_icmp_code;
 386                break;
 387        }
 388        /* RFC6438 recommands to use flowlabel */
 389        val ^= (__force u32)fl6->flowlabel;
 390
 391        /* Perhaps, we need to tune, this function? */
 392        val = val ^ (val >> 7) ^ (val >> 12);
 393        return val % candidate_count;
 394}
 395
 396static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 397                                             struct flowi6 *fl6)
 398{
 399        struct rt6_info *sibling, *next_sibling;
 400        int route_choosen;
 401
 402        route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 403        /* Don't change the route, if route_choosen == 0
 404         * (siblings does not include ourself)
 405         */
 406        if (route_choosen)
 407                list_for_each_entry_safe(sibling, next_sibling,
 408                                &match->rt6i_siblings, rt6i_siblings) {
 409                        route_choosen--;
 410                        if (route_choosen == 0) {
 411                                match = sibling;
 412                                break;
 413                        }
 414                }
 415        return match;
 416}
 417
 418/*
 419 *      Route lookup. Any table->tb6_lock is implied.
 420 */
 421
 422static inline struct rt6_info *rt6_device_match(struct net *net,
 423                                                    struct rt6_info *rt,
 424                                                    const struct in6_addr *saddr,
 425                                                    int oif,
 426                                                    int flags)
 427{
 428        struct rt6_info *local = NULL;
 429        struct rt6_info *sprt;
 430
 431        if (!oif && ipv6_addr_any(saddr))
 432                goto out;
 433
 434        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 435                struct net_device *dev = sprt->dst.dev;
 436
 437                if (oif) {
 438                        if (dev->ifindex == oif)
 439                                return sprt;
 440                        if (dev->flags & IFF_LOOPBACK) {
 441                                if (!sprt->rt6i_idev ||
 442                                    sprt->rt6i_idev->dev->ifindex != oif) {
 443                                        if (flags & RT6_LOOKUP_F_IFACE && oif)
 444                                                continue;
 445                                        if (local && (!oif ||
 446                                                      local->rt6i_idev->dev->ifindex == oif))
 447                                                continue;
 448                                }
 449                                local = sprt;
 450                        }
 451                } else {
 452                        if (ipv6_chk_addr(net, saddr, dev,
 453                                          flags & RT6_LOOKUP_F_IFACE))
 454                                return sprt;
 455                }
 456        }
 457
 458        if (oif) {
 459                if (local)
 460                        return local;
 461
 462                if (flags & RT6_LOOKUP_F_IFACE)
 463                        return net->ipv6.ip6_null_entry;
 464        }
 465out:
 466        return rt;
 467}
 468
 469#ifdef CONFIG_IPV6_ROUTER_PREF
 470static void rt6_probe(struct rt6_info *rt)
 471{
 472        struct neighbour *neigh;
 473        /*
 474         * Okay, this does not seem to be appropriate
 475         * for now, however, we need to check if it
 476         * is really so; aka Router Reachability Probing.
 477         *
 478         * Router Reachability Probe MUST be rate-limited
 479         * to no more than one per minute.
 480         */
 481        if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 482                return;
 483        rcu_read_lock_bh();
 484        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 485        if (neigh) {
 486                write_lock(&neigh->lock);
 487                if (neigh->nud_state & NUD_VALID)
 488                        goto out;
 489        }
 490
 491        if (!neigh ||
 492            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 493                struct in6_addr mcaddr;
 494                struct in6_addr *target;
 495
 496                if (neigh) {
 497                        neigh->updated = jiffies;
 498                        write_unlock(&neigh->lock);
 499                }
 500
 501                target = (struct in6_addr *)&rt->rt6i_gateway;
 502                addrconf_addr_solict_mult(target, &mcaddr);
 503                ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 504        } else {
 505out:
 506                write_unlock(&neigh->lock);
 507        }
 508        rcu_read_unlock_bh();
 509}
 510#else
 511static inline void rt6_probe(struct rt6_info *rt)
 512{
 513}
 514#endif
 515
 516/*
 517 * Default Router Selection (RFC 2461 6.3.6)
 518 */
 519static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 520{
 521        struct net_device *dev = rt->dst.dev;
 522        if (!oif || dev->ifindex == oif)
 523                return 2;
 524        if ((dev->flags & IFF_LOOPBACK) &&
 525            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 526                return 1;
 527        return 0;
 528}
 529
 530static inline bool rt6_check_neigh(struct rt6_info *rt)
 531{
 532        struct neighbour *neigh;
 533        bool ret = false;
 534
 535        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 536            !(rt->rt6i_flags & RTF_GATEWAY))
 537                return true;
 538
 539        rcu_read_lock_bh();
 540        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 541        if (neigh) {
 542                read_lock(&neigh->lock);
 543                if (neigh->nud_state & NUD_VALID)
 544                        ret = true;
 545#ifdef CONFIG_IPV6_ROUTER_PREF
 546                else if (!(neigh->nud_state & NUD_FAILED))
 547                        ret = true;
 548#endif
 549                read_unlock(&neigh->lock);
 550        }
 551        rcu_read_unlock_bh();
 552
 553        return ret;
 554}
 555
 556static int rt6_score_route(struct rt6_info *rt, int oif,
 557                           int strict)
 558{
 559        int m;
 560
 561        m = rt6_check_dev(rt, oif);
 562        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 563                return -1;
 564#ifdef CONFIG_IPV6_ROUTER_PREF
 565        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 566#endif
 567        if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
 568                return -1;
 569        return m;
 570}
 571
 572static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 573                                   int *mpri, struct rt6_info *match)
 574{
 575        int m;
 576
 577        if (rt6_check_expired(rt))
 578                goto out;
 579
 580        m = rt6_score_route(rt, oif, strict);
 581        if (m < 0)
 582                goto out;
 583
 584        if (m > *mpri) {
 585                if (strict & RT6_LOOKUP_F_REACHABLE)
 586                        rt6_probe(match);
 587                *mpri = m;
 588                match = rt;
 589        } else if (strict & RT6_LOOKUP_F_REACHABLE) {
 590                rt6_probe(rt);
 591        }
 592
 593out:
 594        return match;
 595}
 596
 597static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 598                                     struct rt6_info *rr_head,
 599                                     u32 metric, int oif, int strict)
 600{
 601        struct rt6_info *rt, *match;
 602        int mpri = -1;
 603
 604        match = NULL;
 605        for (rt = rr_head; rt && rt->rt6i_metric == metric;
 606             rt = rt->dst.rt6_next)
 607                match = find_match(rt, oif, strict, &mpri, match);
 608        for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 609             rt = rt->dst.rt6_next)
 610                match = find_match(rt, oif, strict, &mpri, match);
 611
 612        return match;
 613}
 614
 615static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 616{
 617        struct rt6_info *match, *rt0;
 618        struct net *net;
 619
 620        rt0 = fn->rr_ptr;
 621        if (!rt0)
 622                fn->rr_ptr = rt0 = fn->leaf;
 623
 624        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 625
 626        if (!match &&
 627            (strict & RT6_LOOKUP_F_REACHABLE)) {
 628                struct rt6_info *next = rt0->dst.rt6_next;
 629
 630                /* no entries matched; do round-robin */
 631                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 632                        next = fn->leaf;
 633
 634                if (next != rt0)
 635                        fn->rr_ptr = next;
 636        }
 637
 638        net = dev_net(rt0->dst.dev);
 639        return match ? match : net->ipv6.ip6_null_entry;
 640}
 641
 642#ifdef CONFIG_IPV6_ROUTE_INFO
 643int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 644                  const struct in6_addr *gwaddr)
 645{
 646        struct net *net = dev_net(dev);
 647        struct route_info *rinfo = (struct route_info *) opt;
 648        struct in6_addr prefix_buf, *prefix;
 649        unsigned int pref;
 650        unsigned long lifetime;
 651        struct rt6_info *rt;
 652
 653        if (len < sizeof(struct route_info)) {
 654                return -EINVAL;
 655        }
 656
 657        /* Sanity check for prefix_len and length */
 658        if (rinfo->length > 3) {
 659                return -EINVAL;
 660        } else if (rinfo->prefix_len > 128) {
 661                return -EINVAL;
 662        } else if (rinfo->prefix_len > 64) {
 663                if (rinfo->length < 2) {
 664                        return -EINVAL;
 665                }
 666        } else if (rinfo->prefix_len > 0) {
 667                if (rinfo->length < 1) {
 668                        return -EINVAL;
 669                }
 670        }
 671
 672        pref = rinfo->route_pref;
 673        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 674                return -EINVAL;
 675
 676        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 677
 678        if (rinfo->length == 3)
 679                prefix = (struct in6_addr *)rinfo->prefix;
 680        else {
 681                /* this function is safe */
 682                ipv6_addr_prefix(&prefix_buf,
 683                                 (struct in6_addr *)rinfo->prefix,
 684                                 rinfo->prefix_len);
 685                prefix = &prefix_buf;
 686        }
 687
 688        rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 689                                dev->ifindex);
 690
 691        if (rt && !lifetime) {
 692                ip6_del_rt(rt);
 693                rt = NULL;
 694        }
 695
 696        if (!rt && lifetime)
 697                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 698                                        pref);
 699        else if (rt)
 700                rt->rt6i_flags = RTF_ROUTEINFO |
 701                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 702
 703        if (rt) {
 704                if (!addrconf_finite_timeout(lifetime))
 705                        rt6_clean_expires(rt);
 706                else
 707                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 708
 709                ip6_rt_put(rt);
 710        }
 711        return 0;
 712}
 713#endif
 714
 715#define BACKTRACK(__net, saddr)                 \
 716do { \
 717        if (rt == __net->ipv6.ip6_null_entry) { \
 718                struct fib6_node *pn; \
 719                while (1) { \
 720                        if (fn->fn_flags & RTN_TL_ROOT) \
 721                                goto out; \
 722                        pn = fn->parent; \
 723                        if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 724                                fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 725                        else \
 726                                fn = pn; \
 727                        if (fn->fn_flags & RTN_RTINFO) \
 728                                goto restart; \
 729                } \
 730        } \
 731} while (0)
 732
 733static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 734                                             struct fib6_table *table,
 735                                             struct flowi6 *fl6, int flags)
 736{
 737        struct fib6_node *fn;
 738        struct rt6_info *rt;
 739
 740        read_lock_bh(&table->tb6_lock);
 741        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 742restart:
 743        rt = fn->leaf;
 744        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 745        if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 746                rt = rt6_multipath_select(rt, fl6);
 747        BACKTRACK(net, &fl6->saddr);
 748out:
 749        dst_use(&rt->dst, jiffies);
 750        read_unlock_bh(&table->tb6_lock);
 751        return rt;
 752
 753}
 754
 755struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 756                                    int flags)
 757{
 758        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 759}
 760EXPORT_SYMBOL_GPL(ip6_route_lookup);
 761
 762struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 763                            const struct in6_addr *saddr, int oif, int strict)
 764{
 765        struct flowi6 fl6 = {
 766                .flowi6_oif = oif,
 767                .daddr = *daddr,
 768        };
 769        struct dst_entry *dst;
 770        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 771
 772        if (saddr) {
 773                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 774                flags |= RT6_LOOKUP_F_HAS_SADDR;
 775        }
 776
 777        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 778        if (dst->error == 0)
 779                return (struct rt6_info *) dst;
 780
 781        dst_release(dst);
 782
 783        return NULL;
 784}
 785
 786EXPORT_SYMBOL(rt6_lookup);
 787
 788/* ip6_ins_rt is called with FREE table->tb6_lock.
 789   It takes new route entry, the addition fails by any reason the
 790   route is freed. In any case, if caller does not hold it, it may
 791   be destroyed.
 792 */
 793
 794static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 795{
 796        int err;
 797        struct fib6_table *table;
 798
 799        table = rt->rt6i_table;
 800        write_lock_bh(&table->tb6_lock);
 801        err = fib6_add(&table->tb6_root, rt, info);
 802        write_unlock_bh(&table->tb6_lock);
 803
 804        return err;
 805}
 806
 807int ip6_ins_rt(struct rt6_info *rt)
 808{
 809        struct nl_info info = {
 810                .nl_net = dev_net(rt->dst.dev),
 811        };
 812        return __ip6_ins_rt(rt, &info);
 813}
 814
 815static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 816                                      const struct in6_addr *daddr,
 817                                      const struct in6_addr *saddr)
 818{
 819        struct rt6_info *rt;
 820
 821        /*
 822         *      Clone the route.
 823         */
 824
 825        rt = ip6_rt_copy(ort, daddr);
 826
 827        if (rt) {
 828                if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 829                        if (ort->rt6i_dst.plen != 128 &&
 830                            ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 831                                rt->rt6i_flags |= RTF_ANYCAST;
 832                        rt->rt6i_gateway = *daddr;
 833                }
 834
 835                rt->rt6i_flags |= RTF_CACHE;
 836
 837#ifdef CONFIG_IPV6_SUBTREES
 838                if (rt->rt6i_src.plen && saddr) {
 839                        rt->rt6i_src.addr = *saddr;
 840                        rt->rt6i_src.plen = 128;
 841                }
 842#endif
 843        }
 844
 845        return rt;
 846}
 847
 848static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 849                                        const struct in6_addr *daddr)
 850{
 851        struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 852
 853        if (rt)
 854                rt->rt6i_flags |= RTF_CACHE;
 855        return rt;
 856}
 857
 858static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 859                                      struct flowi6 *fl6, int flags)
 860{
 861        struct fib6_node *fn;
 862        struct rt6_info *rt, *nrt;
 863        int strict = 0;
 864        int attempts = 3;
 865        int err;
 866        int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 867
 868        strict |= flags & RT6_LOOKUP_F_IFACE;
 869
 870relookup:
 871        read_lock_bh(&table->tb6_lock);
 872
 873restart_2:
 874        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 875
 876restart:
 877        rt = rt6_select(fn, oif, strict | reachable);
 878        if (rt->rt6i_nsiblings && oif == 0)
 879                rt = rt6_multipath_select(rt, fl6);
 880        BACKTRACK(net, &fl6->saddr);
 881        if (rt == net->ipv6.ip6_null_entry ||
 882            rt->rt6i_flags & RTF_CACHE)
 883                goto out;
 884
 885        dst_hold(&rt->dst);
 886        read_unlock_bh(&table->tb6_lock);
 887
 888        if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
 889                nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 890        else if (!(rt->dst.flags & DST_HOST))
 891                nrt = rt6_alloc_clone(rt, &fl6->daddr);
 892        else
 893                goto out2;
 894
 895        ip6_rt_put(rt);
 896        rt = nrt ? : net->ipv6.ip6_null_entry;
 897
 898        dst_hold(&rt->dst);
 899        if (nrt) {
 900                err = ip6_ins_rt(nrt);
 901                if (!err)
 902                        goto out2;
 903        }
 904
 905        if (--attempts <= 0)
 906                goto out2;
 907
 908        /*
 909         * Race condition! In the gap, when table->tb6_lock was
 910         * released someone could insert this route.  Relookup.
 911         */
 912        ip6_rt_put(rt);
 913        goto relookup;
 914
 915out:
 916        if (reachable) {
 917                reachable = 0;
 918                goto restart_2;
 919        }
 920        dst_hold(&rt->dst);
 921        read_unlock_bh(&table->tb6_lock);
 922out2:
 923        rt->dst.lastuse = jiffies;
 924        rt->dst.__use++;
 925
 926        return rt;
 927}
 928
 929static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 930                                            struct flowi6 *fl6, int flags)
 931{
 932        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 933}
 934
 935static struct dst_entry *ip6_route_input_lookup(struct net *net,
 936                                                struct net_device *dev,
 937                                                struct flowi6 *fl6, int flags)
 938{
 939        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 940                flags |= RT6_LOOKUP_F_IFACE;
 941
 942        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 943}
 944
 945void ip6_route_input(struct sk_buff *skb)
 946{
 947        const struct ipv6hdr *iph = ipv6_hdr(skb);
 948        struct net *net = dev_net(skb->dev);
 949        int flags = RT6_LOOKUP_F_HAS_SADDR;
 950        struct flowi6 fl6 = {
 951                .flowi6_iif = skb->dev->ifindex,
 952                .daddr = iph->daddr,
 953                .saddr = iph->saddr,
 954                .flowlabel = ip6_flowinfo(iph),
 955                .flowi6_mark = skb->mark,
 956                .flowi6_proto = iph->nexthdr,
 957        };
 958
 959        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 960}
 961
 962static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 963                                             struct flowi6 *fl6, int flags)
 964{
 965        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 966}
 967
 968struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 969                                    struct flowi6 *fl6)
 970{
 971        int flags = 0;
 972
 973        fl6->flowi6_iif = LOOPBACK_IFINDEX;
 974
 975        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 976                flags |= RT6_LOOKUP_F_IFACE;
 977
 978        if (!ipv6_addr_any(&fl6->saddr))
 979                flags |= RT6_LOOKUP_F_HAS_SADDR;
 980        else if (sk)
 981                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 982
 983        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 984}
 985
 986EXPORT_SYMBOL(ip6_route_output);
 987
 988struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 989{
 990        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 991        struct dst_entry *new = NULL;
 992
 993        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 994        if (rt) {
 995                new = &rt->dst;
 996
 997                memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
 998                rt6_init_peer(rt, net->ipv6.peers);
 999
1000                new->__use = 1;
1001                new->input = dst_discard;
1002                new->output = dst_discard;
1003
1004                if (dst_metrics_read_only(&ort->dst))
1005                        new->_metrics = ort->dst._metrics;
1006                else
1007                        dst_copy_metrics(new, &ort->dst);
1008                rt->rt6i_idev = ort->rt6i_idev;
1009                if (rt->rt6i_idev)
1010                        in6_dev_hold(rt->rt6i_idev);
1011
1012                rt->rt6i_gateway = ort->rt6i_gateway;
1013                rt->rt6i_flags = ort->rt6i_flags;
1014                rt->rt6i_metric = 0;
1015
1016                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017#ifdef CONFIG_IPV6_SUBTREES
1018                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019#endif
1020
1021                dst_free(new);
1022        }
1023
1024        dst_release(dst_orig);
1025        return new ? new : ERR_PTR(-ENOMEM);
1026}
1027
1028/*
1029 *      Destination cache support functions
1030 */
1031
1032static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033{
1034        struct rt6_info *rt;
1035
1036        rt = (struct rt6_info *) dst;
1037
1038        /* All IPV6 dsts are created with ->obsolete set to the value
1039         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040         * into this function always.
1041         */
1042        if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043                return NULL;
1044
1045        if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046                return dst;
1047
1048        return NULL;
1049}
1050
1051static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052{
1053        struct rt6_info *rt = (struct rt6_info *) dst;
1054
1055        if (rt) {
1056                if (rt->rt6i_flags & RTF_CACHE) {
1057                        if (rt6_check_expired(rt)) {
1058                                ip6_del_rt(rt);
1059                                dst = NULL;
1060                        }
1061                } else {
1062                        dst_release(dst);
1063                        dst = NULL;
1064                }
1065        }
1066        return dst;
1067}
1068
1069static void ip6_link_failure(struct sk_buff *skb)
1070{
1071        struct rt6_info *rt;
1072
1073        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074
1075        rt = (struct rt6_info *) skb_dst(skb);
1076        if (rt) {
1077                if (rt->rt6i_flags & RTF_CACHE)
1078                        rt6_update_expires(rt, 0);
1079                else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080                        rt->rt6i_node->fn_sernum = -1;
1081        }
1082}
1083
1084static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085                               struct sk_buff *skb, u32 mtu)
1086{
1087        struct rt6_info *rt6 = (struct rt6_info*)dst;
1088
1089        dst_confirm(dst);
1090        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091                struct net *net = dev_net(dst->dev);
1092
1093                rt6->rt6i_flags |= RTF_MODIFIED;
1094                if (mtu < IPV6_MIN_MTU) {
1095                        u32 features = dst_metric(dst, RTAX_FEATURES);
1096                        mtu = IPV6_MIN_MTU;
1097                        features |= RTAX_FEATURE_ALLFRAG;
1098                        dst_metric_set(dst, RTAX_FEATURES, features);
1099                }
1100                dst_metric_set(dst, RTAX_MTU, mtu);
1101                rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102        }
1103}
1104
1105void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106                     int oif, u32 mark)
1107{
1108        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109        struct dst_entry *dst;
1110        struct flowi6 fl6;
1111
1112        memset(&fl6, 0, sizeof(fl6));
1113        fl6.flowi6_oif = oif;
1114        fl6.flowi6_mark = mark;
1115        fl6.flowi6_flags = 0;
1116        fl6.daddr = iph->daddr;
1117        fl6.saddr = iph->saddr;
1118        fl6.flowlabel = ip6_flowinfo(iph);
1119
1120        dst = ip6_route_output(net, NULL, &fl6);
1121        if (!dst->error)
1122                ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123        dst_release(dst);
1124}
1125EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126
1127void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128{
1129        ip6_update_pmtu(skb, sock_net(sk), mtu,
1130                        sk->sk_bound_dev_if, sk->sk_mark);
1131}
1132EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133
1134void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135{
1136        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137        struct dst_entry *dst;
1138        struct flowi6 fl6;
1139
1140        memset(&fl6, 0, sizeof(fl6));
1141        fl6.flowi6_oif = oif;
1142        fl6.flowi6_mark = mark;
1143        fl6.flowi6_flags = 0;
1144        fl6.daddr = iph->daddr;
1145        fl6.saddr = iph->saddr;
1146        fl6.flowlabel = ip6_flowinfo(iph);
1147
1148        dst = ip6_route_output(net, NULL, &fl6);
1149        if (!dst->error)
1150                rt6_do_redirect(dst, NULL, skb);
1151        dst_release(dst);
1152}
1153EXPORT_SYMBOL_GPL(ip6_redirect);
1154
1155void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156{
1157        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158}
1159EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160
1161static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162{
1163        struct net_device *dev = dst->dev;
1164        unsigned int mtu = dst_mtu(dst);
1165        struct net *net = dev_net(dev);
1166
1167        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168
1169        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171
1172        /*
1173         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175         * IPV6_MAXPLEN is also valid and means: "any MSS,
1176         * rely only on pmtu discovery"
1177         */
1178        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179                mtu = IPV6_MAXPLEN;
1180        return mtu;
1181}
1182
1183static unsigned int ip6_mtu(const struct dst_entry *dst)
1184{
1185        struct inet6_dev *idev;
1186        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187
1188        if (mtu)
1189                return mtu;
1190
1191        mtu = IPV6_MIN_MTU;
1192
1193        rcu_read_lock();
1194        idev = __in6_dev_get(dst->dev);
1195        if (idev)
1196                mtu = idev->cnf.mtu6;
1197        rcu_read_unlock();
1198
1199        return mtu;
1200}
1201
1202static struct dst_entry *icmp6_dst_gc_list;
1203static DEFINE_SPINLOCK(icmp6_dst_lock);
1204
1205struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206                                  struct flowi6 *fl6)
1207{
1208        struct dst_entry *dst;
1209        struct rt6_info *rt;
1210        struct inet6_dev *idev = in6_dev_get(dev);
1211        struct net *net = dev_net(dev);
1212
1213        if (unlikely(!idev))
1214                return ERR_PTR(-ENODEV);
1215
1216        rt = ip6_dst_alloc(net, dev, 0, NULL);
1217        if (unlikely(!rt)) {
1218                in6_dev_put(idev);
1219                dst = ERR_PTR(-ENOMEM);
1220                goto out;
1221        }
1222
1223        rt->dst.flags |= DST_HOST;
1224        rt->dst.output  = ip6_output;
1225        atomic_set(&rt->dst.__refcnt, 1);
1226        rt->rt6i_dst.addr = fl6->daddr;
1227        rt->rt6i_dst.plen = 128;
1228        rt->rt6i_idev     = idev;
1229        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230
1231        spin_lock_bh(&icmp6_dst_lock);
1232        rt->dst.next = icmp6_dst_gc_list;
1233        icmp6_dst_gc_list = &rt->dst;
1234        spin_unlock_bh(&icmp6_dst_lock);
1235
1236        fib6_force_start_gc(net);
1237
1238        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239
1240out:
1241        return dst;
1242}
1243
1244int icmp6_dst_gc(void)
1245{
1246        struct dst_entry *dst, **pprev;
1247        int more = 0;
1248
1249        spin_lock_bh(&icmp6_dst_lock);
1250        pprev = &icmp6_dst_gc_list;
1251
1252        while ((dst = *pprev) != NULL) {
1253                if (!atomic_read(&dst->__refcnt)) {
1254                        *pprev = dst->next;
1255                        dst_free(dst);
1256                } else {
1257                        pprev = &dst->next;
1258                        ++more;
1259                }
1260        }
1261
1262        spin_unlock_bh(&icmp6_dst_lock);
1263
1264        return more;
1265}
1266
1267static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268                            void *arg)
1269{
1270        struct dst_entry *dst, **pprev;
1271
1272        spin_lock_bh(&icmp6_dst_lock);
1273        pprev = &icmp6_dst_gc_list;
1274        while ((dst = *pprev) != NULL) {
1275                struct rt6_info *rt = (struct rt6_info *) dst;
1276                if (func(rt, arg)) {
1277                        *pprev = dst->next;
1278                        dst_free(dst);
1279                } else {
1280                        pprev = &dst->next;
1281                }
1282        }
1283        spin_unlock_bh(&icmp6_dst_lock);
1284}
1285
1286static int ip6_dst_gc(struct dst_ops *ops)
1287{
1288        unsigned long now = jiffies;
1289        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295        int entries;
1296
1297        entries = dst_entries_get_fast(ops);
1298        if (time_after(rt_last_gc + rt_min_interval, now) &&
1299            entries <= rt_max_size)
1300                goto out;
1301
1302        net->ipv6.ip6_rt_gc_expire++;
1303        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304        net->ipv6.ip6_rt_last_gc = now;
1305        entries = dst_entries_get_slow(ops);
1306        if (entries < ops->gc_thresh)
1307                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308out:
1309        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310        return entries > rt_max_size;
1311}
1312
1313int ip6_dst_hoplimit(struct dst_entry *dst)
1314{
1315        int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316        if (hoplimit == 0) {
1317                struct net_device *dev = dst->dev;
1318                struct inet6_dev *idev;
1319
1320                rcu_read_lock();
1321                idev = __in6_dev_get(dev);
1322                if (idev)
1323                        hoplimit = idev->cnf.hop_limit;
1324                else
1325                        hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326                rcu_read_unlock();
1327        }
1328        return hoplimit;
1329}
1330EXPORT_SYMBOL(ip6_dst_hoplimit);
1331
1332/*
1333 *
1334 */
1335
1336int ip6_route_add(struct fib6_config *cfg)
1337{
1338        int err;
1339        struct net *net = cfg->fc_nlinfo.nl_net;
1340        struct rt6_info *rt = NULL;
1341        struct net_device *dev = NULL;
1342        struct inet6_dev *idev = NULL;
1343        struct fib6_table *table;
1344        int addr_type;
1345
1346        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347                return -EINVAL;
1348#ifndef CONFIG_IPV6_SUBTREES
1349        if (cfg->fc_src_len)
1350                return -EINVAL;
1351#endif
1352        if (cfg->fc_ifindex) {
1353                err = -ENODEV;
1354                dev = dev_get_by_index(net, cfg->fc_ifindex);
1355                if (!dev)
1356                        goto out;
1357                idev = in6_dev_get(dev);
1358                if (!idev)
1359                        goto out;
1360        }
1361
1362        if (cfg->fc_metric == 0)
1363                cfg->fc_metric = IP6_RT_PRIO_USER;
1364
1365        err = -ENOBUFS;
1366        if (cfg->fc_nlinfo.nlh &&
1367            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368                table = fib6_get_table(net, cfg->fc_table);
1369                if (!table) {
1370                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371                        table = fib6_new_table(net, cfg->fc_table);
1372                }
1373        } else {
1374                table = fib6_new_table(net, cfg->fc_table);
1375        }
1376
1377        if (!table)
1378                goto out;
1379
1380        rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381
1382        if (!rt) {
1383                err = -ENOMEM;
1384                goto out;
1385        }
1386
1387        if (cfg->fc_flags & RTF_EXPIRES)
1388                rt6_set_expires(rt, jiffies +
1389                                clock_t_to_jiffies(cfg->fc_expires));
1390        else
1391                rt6_clean_expires(rt);
1392
1393        if (cfg->fc_protocol == RTPROT_UNSPEC)
1394                cfg->fc_protocol = RTPROT_BOOT;
1395        rt->rt6i_protocol = cfg->fc_protocol;
1396
1397        addr_type = ipv6_addr_type(&cfg->fc_dst);
1398
1399        if (addr_type & IPV6_ADDR_MULTICAST)
1400                rt->dst.input = ip6_mc_input;
1401        else if (cfg->fc_flags & RTF_LOCAL)
1402                rt->dst.input = ip6_input;
1403        else
1404                rt->dst.input = ip6_forward;
1405
1406        rt->dst.output = ip6_output;
1407
1408        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409        rt->rt6i_dst.plen = cfg->fc_dst_len;
1410        if (rt->rt6i_dst.plen == 128)
1411               rt->dst.flags |= DST_HOST;
1412
1413        if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414                u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415                if (!metrics) {
1416                        err = -ENOMEM;
1417                        goto out;
1418                }
1419                dst_init_metrics(&rt->dst, metrics, 0);
1420        }
1421#ifdef CONFIG_IPV6_SUBTREES
1422        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423        rt->rt6i_src.plen = cfg->fc_src_len;
1424#endif
1425
1426        rt->rt6i_metric = cfg->fc_metric;
1427
1428        /* We cannot add true routes via loopback here,
1429           they would result in kernel looping; promote them to reject routes
1430         */
1431        if ((cfg->fc_flags & RTF_REJECT) ||
1432            (dev && (dev->flags & IFF_LOOPBACK) &&
1433             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434             !(cfg->fc_flags & RTF_LOCAL))) {
1435                /* hold loopback dev/idev if we haven't done so. */
1436                if (dev != net->loopback_dev) {
1437                        if (dev) {
1438                                dev_put(dev);
1439                                in6_dev_put(idev);
1440                        }
1441                        dev = net->loopback_dev;
1442                        dev_hold(dev);
1443                        idev = in6_dev_get(dev);
1444                        if (!idev) {
1445                                err = -ENODEV;
1446                                goto out;
1447                        }
1448                }
1449                rt->dst.output = ip6_pkt_discard_out;
1450                rt->dst.input = ip6_pkt_discard;
1451                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452                switch (cfg->fc_type) {
1453                case RTN_BLACKHOLE:
1454                        rt->dst.error = -EINVAL;
1455                        break;
1456                case RTN_PROHIBIT:
1457                        rt->dst.error = -EACCES;
1458                        break;
1459                case RTN_THROW:
1460                        rt->dst.error = -EAGAIN;
1461                        break;
1462                default:
1463                        rt->dst.error = -ENETUNREACH;
1464                        break;
1465                }
1466                goto install_route;
1467        }
1468
1469        if (cfg->fc_flags & RTF_GATEWAY) {
1470                const struct in6_addr *gw_addr;
1471                int gwa_type;
1472
1473                gw_addr = &cfg->fc_gateway;
1474                rt->rt6i_gateway = *gw_addr;
1475                gwa_type = ipv6_addr_type(gw_addr);
1476
1477                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478                        struct rt6_info *grt;
1479
1480                        /* IPv6 strictly inhibits using not link-local
1481                           addresses as nexthop address.
1482                           Otherwise, router will not able to send redirects.
1483                           It is very good, but in some (rare!) circumstances
1484                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1485                           some exceptions. --ANK
1486                         */
1487                        err = -EINVAL;
1488                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1489                                goto out;
1490
1491                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492
1493                        err = -EHOSTUNREACH;
1494                        if (!grt)
1495                                goto out;
1496                        if (dev) {
1497                                if (dev != grt->dst.dev) {
1498                                        ip6_rt_put(grt);
1499                                        goto out;
1500                                }
1501                        } else {
1502                                dev = grt->dst.dev;
1503                                idev = grt->rt6i_idev;
1504                                dev_hold(dev);
1505                                in6_dev_hold(grt->rt6i_idev);
1506                        }
1507                        if (!(grt->rt6i_flags & RTF_GATEWAY))
1508                                err = 0;
1509                        ip6_rt_put(grt);
1510
1511                        if (err)
1512                                goto out;
1513                }
1514                err = -EINVAL;
1515                if (!dev || (dev->flags & IFF_LOOPBACK))
1516                        goto out;
1517        }
1518
1519        err = -ENODEV;
1520        if (!dev)
1521                goto out;
1522
1523        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525                        err = -EINVAL;
1526                        goto out;
1527                }
1528                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529                rt->rt6i_prefsrc.plen = 128;
1530        } else
1531                rt->rt6i_prefsrc.plen = 0;
1532
1533        rt->rt6i_flags = cfg->fc_flags;
1534
1535install_route:
1536        if (cfg->fc_mx) {
1537                struct nlattr *nla;
1538                int remaining;
1539
1540                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541                        int type = nla_type(nla);
1542
1543                        if (type) {
1544                                if (type > RTAX_MAX) {
1545                                        err = -EINVAL;
1546                                        goto out;
1547                                }
1548
1549                                dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550                        }
1551                }
1552        }
1553
1554        rt->dst.dev = dev;
1555        rt->rt6i_idev = idev;
1556        rt->rt6i_table = table;
1557
1558        cfg->fc_nlinfo.nl_net = dev_net(dev);
1559
1560        return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561
1562out:
1563        if (dev)
1564                dev_put(dev);
1565        if (idev)
1566                in6_dev_put(idev);
1567        if (rt)
1568                dst_free(&rt->dst);
1569        return err;
1570}
1571
1572static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573{
1574        int err;
1575        struct fib6_table *table;
1576        struct net *net = dev_net(rt->dst.dev);
1577
1578        if (rt == net->ipv6.ip6_null_entry) {
1579                err = -ENOENT;
1580                goto out;
1581        }
1582
1583        table = rt->rt6i_table;
1584        write_lock_bh(&table->tb6_lock);
1585        err = fib6_del(rt, info);
1586        write_unlock_bh(&table->tb6_lock);
1587
1588out:
1589        ip6_rt_put(rt);
1590        return err;
1591}
1592
1593int ip6_del_rt(struct rt6_info *rt)
1594{
1595        struct nl_info info = {
1596                .nl_net = dev_net(rt->dst.dev),
1597        };
1598        return __ip6_del_rt(rt, &info);
1599}
1600
1601static int ip6_route_del(struct fib6_config *cfg)
1602{
1603        struct fib6_table *table;
1604        struct fib6_node *fn;
1605        struct rt6_info *rt;
1606        int err = -ESRCH;
1607
1608        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609        if (!table)
1610                return err;
1611
1612        read_lock_bh(&table->tb6_lock);
1613
1614        fn = fib6_locate(&table->tb6_root,
1615                         &cfg->fc_dst, cfg->fc_dst_len,
1616                         &cfg->fc_src, cfg->fc_src_len);
1617
1618        if (fn) {
1619                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620                        if (cfg->fc_ifindex &&
1621                            (!rt->dst.dev ||
1622                             rt->dst.dev->ifindex != cfg->fc_ifindex))
1623                                continue;
1624                        if (cfg->fc_flags & RTF_GATEWAY &&
1625                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626                                continue;
1627                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628                                continue;
1629                        dst_hold(&rt->dst);
1630                        read_unlock_bh(&table->tb6_lock);
1631
1632                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633                }
1634        }
1635        read_unlock_bh(&table->tb6_lock);
1636
1637        return err;
1638}
1639
1640static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641{
1642        struct net *net = dev_net(skb->dev);
1643        struct netevent_redirect netevent;
1644        struct rt6_info *rt, *nrt = NULL;
1645        struct ndisc_options ndopts;
1646        struct inet6_dev *in6_dev;
1647        struct neighbour *neigh;
1648        struct rd_msg *msg;
1649        int optlen, on_link;
1650        u8 *lladdr;
1651
1652        optlen = skb->tail - skb->transport_header;
1653        optlen -= sizeof(*msg);
1654
1655        if (optlen < 0) {
1656                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657                return;
1658        }
1659
1660        msg = (struct rd_msg *)icmp6_hdr(skb);
1661
1662        if (ipv6_addr_is_multicast(&msg->dest)) {
1663                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664                return;
1665        }
1666
1667        on_link = 0;
1668        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669                on_link = 1;
1670        } else if (ipv6_addr_type(&msg->target) !=
1671                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673                return;
1674        }
1675
1676        in6_dev = __in6_dev_get(skb->dev);
1677        if (!in6_dev)
1678                return;
1679        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680                return;
1681
1682        /* RFC2461 8.1:
1683         *      The IP source address of the Redirect MUST be the same as the current
1684         *      first-hop router for the specified ICMP Destination Address.
1685         */
1686
1687        if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689                return;
1690        }
1691
1692        lladdr = NULL;
1693        if (ndopts.nd_opts_tgt_lladdr) {
1694                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695                                             skb->dev);
1696                if (!lladdr) {
1697                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698                        return;
1699                }
1700        }
1701
1702        rt = (struct rt6_info *) dst;
1703        if (rt == net->ipv6.ip6_null_entry) {
1704                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705                return;
1706        }
1707
1708        /* Redirect received -> path was valid.
1709         * Look, redirects are sent only in response to data packets,
1710         * so that this nexthop apparently is reachable. --ANK
1711         */
1712        dst_confirm(&rt->dst);
1713
1714        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715        if (!neigh)
1716                return;
1717
1718        /*
1719         *      We have finally decided to accept it.
1720         */
1721
1722        neigh_update(neigh, lladdr, NUD_STALE,
1723                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724                     NEIGH_UPDATE_F_OVERRIDE|
1725                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726                                     NEIGH_UPDATE_F_ISROUTER))
1727                     );
1728
1729        nrt = ip6_rt_copy(rt, &msg->dest);
1730        if (!nrt)
1731                goto out;
1732
1733        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734        if (on_link)
1735                nrt->rt6i_flags &= ~RTF_GATEWAY;
1736
1737        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738
1739        if (ip6_ins_rt(nrt))
1740                goto out;
1741
1742        netevent.old = &rt->dst;
1743        netevent.new = &nrt->dst;
1744        netevent.daddr = &msg->dest;
1745        netevent.neigh = neigh;
1746        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747
1748        if (rt->rt6i_flags & RTF_CACHE) {
1749                rt = (struct rt6_info *) dst_clone(&rt->dst);
1750                ip6_del_rt(rt);
1751        }
1752
1753out:
1754        neigh_release(neigh);
1755}
1756
1757/*
1758 *      Misc support functions
1759 */
1760
1761static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762                                    const struct in6_addr *dest)
1763{
1764        struct net *net = dev_net(ort->dst.dev);
1765        struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766                                            ort->rt6i_table);
1767
1768        if (rt) {
1769                rt->dst.input = ort->dst.input;
1770                rt->dst.output = ort->dst.output;
1771                rt->dst.flags |= DST_HOST;
1772
1773                rt->rt6i_dst.addr = *dest;
1774                rt->rt6i_dst.plen = 128;
1775                dst_copy_metrics(&rt->dst, &ort->dst);
1776                rt->dst.error = ort->dst.error;
1777                rt->rt6i_idev = ort->rt6i_idev;
1778                if (rt->rt6i_idev)
1779                        in6_dev_hold(rt->rt6i_idev);
1780                rt->dst.lastuse = jiffies;
1781
1782                rt->rt6i_gateway = ort->rt6i_gateway;
1783                rt->rt6i_flags = ort->rt6i_flags;
1784                if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785                    (RTF_DEFAULT | RTF_ADDRCONF))
1786                        rt6_set_from(rt, ort);
1787                rt->rt6i_metric = 0;
1788
1789#ifdef CONFIG_IPV6_SUBTREES
1790                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1791#endif
1792                memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1793                rt->rt6i_table = ort->rt6i_table;
1794        }
1795        return rt;
1796}
1797
1798#ifdef CONFIG_IPV6_ROUTE_INFO
1799static struct rt6_info *rt6_get_route_info(struct net *net,
1800                                           const struct in6_addr *prefix, int prefixlen,
1801                                           const struct in6_addr *gwaddr, int ifindex)
1802{
1803        struct fib6_node *fn;
1804        struct rt6_info *rt = NULL;
1805        struct fib6_table *table;
1806
1807        table = fib6_get_table(net, RT6_TABLE_INFO);
1808        if (!table)
1809                return NULL;
1810
1811        read_lock_bh(&table->tb6_lock);
1812        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1813        if (!fn)
1814                goto out;
1815
1816        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1817                if (rt->dst.dev->ifindex != ifindex)
1818                        continue;
1819                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1820                        continue;
1821                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1822                        continue;
1823                dst_hold(&rt->dst);
1824                break;
1825        }
1826out:
1827        read_unlock_bh(&table->tb6_lock);
1828        return rt;
1829}
1830
1831static struct rt6_info *rt6_add_route_info(struct net *net,
1832                                           const struct in6_addr *prefix, int prefixlen,
1833                                           const struct in6_addr *gwaddr, int ifindex,
1834                                           unsigned int pref)
1835{
1836        struct fib6_config cfg = {
1837                .fc_table       = RT6_TABLE_INFO,
1838                .fc_metric      = IP6_RT_PRIO_USER,
1839                .fc_ifindex     = ifindex,
1840                .fc_dst_len     = prefixlen,
1841                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1842                                  RTF_UP | RTF_PREF(pref),
1843                .fc_nlinfo.portid = 0,
1844                .fc_nlinfo.nlh = NULL,
1845                .fc_nlinfo.nl_net = net,
1846        };
1847
1848        cfg.fc_dst = *prefix;
1849        cfg.fc_gateway = *gwaddr;
1850
1851        /* We should treat it as a default route if prefix length is 0. */
1852        if (!prefixlen)
1853                cfg.fc_flags |= RTF_DEFAULT;
1854
1855        ip6_route_add(&cfg);
1856
1857        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1858}
1859#endif
1860
1861struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1862{
1863        struct rt6_info *rt;
1864        struct fib6_table *table;
1865
1866        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1867        if (!table)
1868                return NULL;
1869
1870        read_lock_bh(&table->tb6_lock);
1871        for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1872                if (dev == rt->dst.dev &&
1873                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1874                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1875                        break;
1876        }
1877        if (rt)
1878                dst_hold(&rt->dst);
1879        read_unlock_bh(&table->tb6_lock);
1880        return rt;
1881}
1882
1883struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1884                                     struct net_device *dev,
1885                                     unsigned int pref)
1886{
1887        struct fib6_config cfg = {
1888                .fc_table       = RT6_TABLE_DFLT,
1889                .fc_metric      = IP6_RT_PRIO_USER,
1890                .fc_ifindex     = dev->ifindex,
1891                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1892                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1893                .fc_nlinfo.portid = 0,
1894                .fc_nlinfo.nlh = NULL,
1895                .fc_nlinfo.nl_net = dev_net(dev),
1896        };
1897
1898        cfg.fc_gateway = *gwaddr;
1899
1900        ip6_route_add(&cfg);
1901
1902        return rt6_get_dflt_router(gwaddr, dev);
1903}
1904
1905void rt6_purge_dflt_routers(struct net *net)
1906{
1907        struct rt6_info *rt;
1908        struct fib6_table *table;
1909
1910        /* NOTE: Keep consistent with rt6_get_dflt_router */
1911        table = fib6_get_table(net, RT6_TABLE_DFLT);
1912        if (!table)
1913                return;
1914
1915restart:
1916        read_lock_bh(&table->tb6_lock);
1917        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1918                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1919                    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1920                        dst_hold(&rt->dst);
1921                        read_unlock_bh(&table->tb6_lock);
1922                        ip6_del_rt(rt);
1923                        goto restart;
1924                }
1925        }
1926        read_unlock_bh(&table->tb6_lock);
1927}
1928
1929static void rtmsg_to_fib6_config(struct net *net,
1930                                 struct in6_rtmsg *rtmsg,
1931                                 struct fib6_config *cfg)
1932{
1933        memset(cfg, 0, sizeof(*cfg));
1934
1935        cfg->fc_table = RT6_TABLE_MAIN;
1936        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1937        cfg->fc_metric = rtmsg->rtmsg_metric;
1938        cfg->fc_expires = rtmsg->rtmsg_info;
1939        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1940        cfg->fc_src_len = rtmsg->rtmsg_src_len;
1941        cfg->fc_flags = rtmsg->rtmsg_flags;
1942
1943        cfg->fc_nlinfo.nl_net = net;
1944
1945        cfg->fc_dst = rtmsg->rtmsg_dst;
1946        cfg->fc_src = rtmsg->rtmsg_src;
1947        cfg->fc_gateway = rtmsg->rtmsg_gateway;
1948}
1949
1950int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1951{
1952        struct fib6_config cfg;
1953        struct in6_rtmsg rtmsg;
1954        int err;
1955
1956        switch(cmd) {
1957        case SIOCADDRT:         /* Add a route */
1958        case SIOCDELRT:         /* Delete a route */
1959                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1960                        return -EPERM;
1961                err = copy_from_user(&rtmsg, arg,
1962                                     sizeof(struct in6_rtmsg));
1963                if (err)
1964                        return -EFAULT;
1965
1966                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1967
1968                rtnl_lock();
1969                switch (cmd) {
1970                case SIOCADDRT:
1971                        err = ip6_route_add(&cfg);
1972                        break;
1973                case SIOCDELRT:
1974                        err = ip6_route_del(&cfg);
1975                        break;
1976                default:
1977                        err = -EINVAL;
1978                }
1979                rtnl_unlock();
1980
1981                return err;
1982        }
1983
1984        return -EINVAL;
1985}
1986
1987/*
1988 *      Drop the packet on the floor
1989 */
1990
1991static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1992{
1993        int type;
1994        struct dst_entry *dst = skb_dst(skb);
1995        switch (ipstats_mib_noroutes) {
1996        case IPSTATS_MIB_INNOROUTES:
1997                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1998                if (type == IPV6_ADDR_ANY) {
1999                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2000                                      IPSTATS_MIB_INADDRERRORS);
2001                        break;
2002                }
2003                /* FALLTHROUGH */
2004        case IPSTATS_MIB_OUTNOROUTES:
2005                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006                              ipstats_mib_noroutes);
2007                break;
2008        }
2009        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2010        kfree_skb(skb);
2011        return 0;
2012}
2013
2014static int ip6_pkt_discard(struct sk_buff *skb)
2015{
2016        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2017}
2018
2019static int ip6_pkt_discard_out(struct sk_buff *skb)
2020{
2021        skb->dev = skb_dst(skb)->dev;
2022        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2023}
2024
2025#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2026
2027static int ip6_pkt_prohibit(struct sk_buff *skb)
2028{
2029        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2030}
2031
2032static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2033{
2034        skb->dev = skb_dst(skb)->dev;
2035        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2036}
2037
2038#endif
2039
2040/*
2041 *      Allocate a dst for local (unicast / anycast) address.
2042 */
2043
2044struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2045                                    const struct in6_addr *addr,
2046                                    bool anycast)
2047{
2048        struct net *net = dev_net(idev->dev);
2049        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2050
2051        if (!rt) {
2052                net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2053                return ERR_PTR(-ENOMEM);
2054        }
2055
2056        in6_dev_hold(idev);
2057
2058        rt->dst.flags |= DST_HOST;
2059        rt->dst.input = ip6_input;
2060        rt->dst.output = ip6_output;
2061        rt->rt6i_idev = idev;
2062
2063        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2064        if (anycast)
2065                rt->rt6i_flags |= RTF_ANYCAST;
2066        else
2067                rt->rt6i_flags |= RTF_LOCAL;
2068
2069        rt->rt6i_dst.addr = *addr;
2070        rt->rt6i_dst.plen = 128;
2071        rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2072
2073        atomic_set(&rt->dst.__refcnt, 1);
2074
2075        return rt;
2076}
2077
2078int ip6_route_get_saddr(struct net *net,
2079                        struct rt6_info *rt,
2080                        const struct in6_addr *daddr,
2081                        unsigned int prefs,
2082                        struct in6_addr *saddr)
2083{
2084        struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2085        int err = 0;
2086        if (rt->rt6i_prefsrc.plen)
2087                *saddr = rt->rt6i_prefsrc.addr;
2088        else
2089                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2090                                         daddr, prefs, saddr);
2091        return err;
2092}
2093
2094/* remove deleted ip from prefsrc entries */
2095struct arg_dev_net_ip {
2096        struct net_device *dev;
2097        struct net *net;
2098        struct in6_addr *addr;
2099};
2100
2101static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2102{
2103        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2104        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2105        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2106
2107        if (((void *)rt->dst.dev == dev || !dev) &&
2108            rt != net->ipv6.ip6_null_entry &&
2109            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2110                /* remove prefsrc entry */
2111                rt->rt6i_prefsrc.plen = 0;
2112        }
2113        return 0;
2114}
2115
2116void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2117{
2118        struct net *net = dev_net(ifp->idev->dev);
2119        struct arg_dev_net_ip adni = {
2120                .dev = ifp->idev->dev,
2121                .net = net,
2122                .addr = &ifp->addr,
2123        };
2124        fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2125}
2126
2127struct arg_dev_net {
2128        struct net_device *dev;
2129        struct net *net;
2130};
2131
2132static int fib6_ifdown(struct rt6_info *rt, void *arg)
2133{
2134        const struct arg_dev_net *adn = arg;
2135        const struct net_device *dev = adn->dev;
2136
2137        if ((rt->dst.dev == dev || !dev) &&
2138            rt != adn->net->ipv6.ip6_null_entry)
2139                return -1;
2140
2141        return 0;
2142}
2143
2144void rt6_ifdown(struct net *net, struct net_device *dev)
2145{
2146        struct arg_dev_net adn = {
2147                .dev = dev,
2148                .net = net,
2149        };
2150
2151        fib6_clean_all(net, fib6_ifdown, 0, &adn);
2152        icmp6_clean_all(fib6_ifdown, &adn);
2153}
2154
2155struct rt6_mtu_change_arg {
2156        struct net_device *dev;
2157        unsigned int mtu;
2158};
2159
2160static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2161{
2162        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2163        struct inet6_dev *idev;
2164
2165        /* In IPv6 pmtu discovery is not optional,
2166           so that RTAX_MTU lock cannot disable it.
2167           We still use this lock to block changes
2168           caused by addrconf/ndisc.
2169        */
2170
2171        idev = __in6_dev_get(arg->dev);
2172        if (!idev)
2173                return 0;
2174
2175        /* For administrative MTU increase, there is no way to discover
2176           IPv6 PMTU increase, so PMTU increase should be updated here.
2177           Since RFC 1981 doesn't include administrative MTU increase
2178           update PMTU increase is a MUST. (i.e. jumbo frame)
2179         */
2180        /*
2181           If new MTU is less than route PMTU, this new MTU will be the
2182           lowest MTU in the path, update the route PMTU to reflect PMTU
2183           decreases; if new MTU is greater than route PMTU, and the
2184           old MTU is the lowest MTU in the path, update the route PMTU
2185           to reflect the increase. In this case if the other nodes' MTU
2186           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2187           PMTU discouvery.
2188         */
2189        if (rt->dst.dev == arg->dev &&
2190            !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2191            (dst_mtu(&rt->dst) >= arg->mtu ||
2192             (dst_mtu(&rt->dst) < arg->mtu &&
2193              dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2194                dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2195        }
2196        return 0;
2197}
2198
2199void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2200{
2201        struct rt6_mtu_change_arg arg = {
2202                .dev = dev,
2203                .mtu = mtu,
2204        };
2205
2206        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2207}
2208
2209static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2210        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2211        [RTA_OIF]               = { .type = NLA_U32 },
2212        [RTA_IIF]               = { .type = NLA_U32 },
2213        [RTA_PRIORITY]          = { .type = NLA_U32 },
2214        [RTA_METRICS]           = { .type = NLA_NESTED },
2215        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2216};
2217
2218static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2219                              struct fib6_config *cfg)
2220{
2221        struct rtmsg *rtm;
2222        struct nlattr *tb[RTA_MAX+1];
2223        int err;
2224
2225        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2226        if (err < 0)
2227                goto errout;
2228
2229        err = -EINVAL;
2230        rtm = nlmsg_data(nlh);
2231        memset(cfg, 0, sizeof(*cfg));
2232
2233        cfg->fc_table = rtm->rtm_table;
2234        cfg->fc_dst_len = rtm->rtm_dst_len;
2235        cfg->fc_src_len = rtm->rtm_src_len;
2236        cfg->fc_flags = RTF_UP;
2237        cfg->fc_protocol = rtm->rtm_protocol;
2238        cfg->fc_type = rtm->rtm_type;
2239
2240        if (rtm->rtm_type == RTN_UNREACHABLE ||
2241            rtm->rtm_type == RTN_BLACKHOLE ||
2242            rtm->rtm_type == RTN_PROHIBIT ||
2243            rtm->rtm_type == RTN_THROW)
2244                cfg->fc_flags |= RTF_REJECT;
2245
2246        if (rtm->rtm_type == RTN_LOCAL)
2247                cfg->fc_flags |= RTF_LOCAL;
2248
2249        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2250        cfg->fc_nlinfo.nlh = nlh;
2251        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2252
2253        if (tb[RTA_GATEWAY]) {
2254                nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2255                cfg->fc_flags |= RTF_GATEWAY;
2256        }
2257
2258        if (tb[RTA_DST]) {
2259                int plen = (rtm->rtm_dst_len + 7) >> 3;
2260
2261                if (nla_len(tb[RTA_DST]) < plen)
2262                        goto errout;
2263
2264                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2265        }
2266
2267        if (tb[RTA_SRC]) {
2268                int plen = (rtm->rtm_src_len + 7) >> 3;
2269
2270                if (nla_len(tb[RTA_SRC]) < plen)
2271                        goto errout;
2272
2273                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2274        }
2275
2276        if (tb[RTA_PREFSRC])
2277                nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2278
2279        if (tb[RTA_OIF])
2280                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2281
2282        if (tb[RTA_PRIORITY])
2283                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2284
2285        if (tb[RTA_METRICS]) {
2286                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2287                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2288        }
2289
2290        if (tb[RTA_TABLE])
2291                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2292
2293        if (tb[RTA_MULTIPATH]) {
2294                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2295                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2296        }
2297
2298        err = 0;
2299errout:
2300        return err;
2301}
2302
2303static int ip6_route_multipath(struct fib6_config *cfg, int add)
2304{
2305        struct fib6_config r_cfg;
2306        struct rtnexthop *rtnh;
2307        int remaining;
2308        int attrlen;
2309        int err = 0, last_err = 0;
2310
2311beginning:
2312        rtnh = (struct rtnexthop *)cfg->fc_mp;
2313        remaining = cfg->fc_mp_len;
2314
2315        /* Parse a Multipath Entry */
2316        while (rtnh_ok(rtnh, remaining)) {
2317                memcpy(&r_cfg, cfg, sizeof(*cfg));
2318                if (rtnh->rtnh_ifindex)
2319                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2320
2321                attrlen = rtnh_attrlen(rtnh);
2322                if (attrlen > 0) {
2323                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2324
2325                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2326                        if (nla) {
2327                                nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2328                                r_cfg.fc_flags |= RTF_GATEWAY;
2329                        }
2330                }
2331                err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2332                if (err) {
2333                        last_err = err;
2334                        /* If we are trying to remove a route, do not stop the
2335                         * loop when ip6_route_del() fails (because next hop is
2336                         * already gone), we should try to remove all next hops.
2337                         */
2338                        if (add) {
2339                                /* If add fails, we should try to delete all
2340                                 * next hops that have been already added.
2341                                 */
2342                                add = 0;
2343                                goto beginning;
2344                        }
2345                }
2346                /* Because each route is added like a single route we remove
2347                 * this flag after the first nexthop (if there is a collision,
2348                 * we have already fail to add the first nexthop:
2349                 * fib6_add_rt2node() has reject it).
2350                 */
2351                cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2352                rtnh = rtnh_next(rtnh, &remaining);
2353        }
2354
2355        return last_err;
2356}
2357
2358static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2359{
2360        struct fib6_config cfg;
2361        int err;
2362
2363        err = rtm_to_fib6_config(skb, nlh, &cfg);
2364        if (err < 0)
2365                return err;
2366
2367        if (cfg.fc_mp)
2368                return ip6_route_multipath(&cfg, 0);
2369        else
2370                return ip6_route_del(&cfg);
2371}
2372
2373static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2374{
2375        struct fib6_config cfg;
2376        int err;
2377
2378        err = rtm_to_fib6_config(skb, nlh, &cfg);
2379        if (err < 0)
2380                return err;
2381
2382        if (cfg.fc_mp)
2383                return ip6_route_multipath(&cfg, 1);
2384        else
2385                return ip6_route_add(&cfg);
2386}
2387
2388static inline size_t rt6_nlmsg_size(void)
2389{
2390        return NLMSG_ALIGN(sizeof(struct rtmsg))
2391               + nla_total_size(16) /* RTA_SRC */
2392               + nla_total_size(16) /* RTA_DST */
2393               + nla_total_size(16) /* RTA_GATEWAY */
2394               + nla_total_size(16) /* RTA_PREFSRC */
2395               + nla_total_size(4) /* RTA_TABLE */
2396               + nla_total_size(4) /* RTA_IIF */
2397               + nla_total_size(4) /* RTA_OIF */
2398               + nla_total_size(4) /* RTA_PRIORITY */
2399               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2400               + nla_total_size(sizeof(struct rta_cacheinfo));
2401}
2402
2403static int rt6_fill_node(struct net *net,
2404                         struct sk_buff *skb, struct rt6_info *rt,
2405                         struct in6_addr *dst, struct in6_addr *src,
2406                         int iif, int type, u32 portid, u32 seq,
2407                         int prefix, int nowait, unsigned int flags)
2408{
2409        struct rtmsg *rtm;
2410        struct nlmsghdr *nlh;
2411        long expires;
2412        u32 table;
2413
2414        if (prefix) {   /* user wants prefix routes only */
2415                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2416                        /* success since this is not a prefix route */
2417                        return 1;
2418                }
2419        }
2420
2421        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2422        if (!nlh)
2423                return -EMSGSIZE;
2424
2425        rtm = nlmsg_data(nlh);
2426        rtm->rtm_family = AF_INET6;
2427        rtm->rtm_dst_len = rt->rt6i_dst.plen;
2428        rtm->rtm_src_len = rt->rt6i_src.plen;
2429        rtm->rtm_tos = 0;
2430        if (rt->rt6i_table)
2431                table = rt->rt6i_table->tb6_id;
2432        else
2433                table = RT6_TABLE_UNSPEC;
2434        rtm->rtm_table = table;
2435        if (nla_put_u32(skb, RTA_TABLE, table))
2436                goto nla_put_failure;
2437        if (rt->rt6i_flags & RTF_REJECT) {
2438                switch (rt->dst.error) {
2439                case -EINVAL:
2440                        rtm->rtm_type = RTN_BLACKHOLE;
2441                        break;
2442                case -EACCES:
2443                        rtm->rtm_type = RTN_PROHIBIT;
2444                        break;
2445                case -EAGAIN:
2446                        rtm->rtm_type = RTN_THROW;
2447                        break;
2448                default:
2449                        rtm->rtm_type = RTN_UNREACHABLE;
2450                        break;
2451                }
2452        }
2453        else if (rt->rt6i_flags & RTF_LOCAL)
2454                rtm->rtm_type = RTN_LOCAL;
2455        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2456                rtm->rtm_type = RTN_LOCAL;
2457        else
2458                rtm->rtm_type = RTN_UNICAST;
2459        rtm->rtm_flags = 0;
2460        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2461        rtm->rtm_protocol = rt->rt6i_protocol;
2462        if (rt->rt6i_flags & RTF_DYNAMIC)
2463                rtm->rtm_protocol = RTPROT_REDIRECT;
2464        else if (rt->rt6i_flags & RTF_ADDRCONF) {
2465                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2466                        rtm->rtm_protocol = RTPROT_RA;
2467                else
2468                        rtm->rtm_protocol = RTPROT_KERNEL;
2469        }
2470
2471        if (rt->rt6i_flags & RTF_CACHE)
2472                rtm->rtm_flags |= RTM_F_CLONED;
2473
2474        if (dst) {
2475                if (nla_put(skb, RTA_DST, 16, dst))
2476                        goto nla_put_failure;
2477                rtm->rtm_dst_len = 128;
2478        } else if (rtm->rtm_dst_len)
2479                if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2480                        goto nla_put_failure;
2481#ifdef CONFIG_IPV6_SUBTREES
2482        if (src) {
2483                if (nla_put(skb, RTA_SRC, 16, src))
2484                        goto nla_put_failure;
2485                rtm->rtm_src_len = 128;
2486        } else if (rtm->rtm_src_len &&
2487                   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2488                goto nla_put_failure;
2489#endif
2490        if (iif) {
2491#ifdef CONFIG_IPV6_MROUTE
2492                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2493                        int err = ip6mr_get_route(net, skb, rtm, nowait);
2494                        if (err <= 0) {
2495                                if (!nowait) {
2496                                        if (err == 0)
2497                                                return 0;
2498                                        goto nla_put_failure;
2499                                } else {
2500                                        if (err == -EMSGSIZE)
2501                                                goto nla_put_failure;
2502                                }
2503                        }
2504                } else
2505#endif
2506                        if (nla_put_u32(skb, RTA_IIF, iif))
2507                                goto nla_put_failure;
2508        } else if (dst) {
2509                struct in6_addr saddr_buf;
2510                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2511                    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2512                        goto nla_put_failure;
2513        }
2514
2515        if (rt->rt6i_prefsrc.plen) {
2516                struct in6_addr saddr_buf;
2517                saddr_buf = rt->rt6i_prefsrc.addr;
2518                if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2519                        goto nla_put_failure;
2520        }
2521
2522        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2523                goto nla_put_failure;
2524
2525        if (rt->rt6i_flags & RTF_GATEWAY) {
2526                if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2527                        goto nla_put_failure;
2528        }
2529
2530        if (rt->dst.dev &&
2531            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2532                goto nla_put_failure;
2533        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2534                goto nla_put_failure;
2535
2536        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2537
2538        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2539                goto nla_put_failure;
2540
2541        return nlmsg_end(skb, nlh);
2542
2543nla_put_failure:
2544        nlmsg_cancel(skb, nlh);
2545        return -EMSGSIZE;
2546}
2547
2548int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2549{
2550        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2551        int prefix;
2552
2553        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2554                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2555                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2556        } else
2557                prefix = 0;
2558
2559        return rt6_fill_node(arg->net,
2560                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2561                     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2562                     prefix, 0, NLM_F_MULTI);
2563}
2564
2565static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2566{
2567        struct net *net = sock_net(in_skb->sk);
2568        struct nlattr *tb[RTA_MAX+1];
2569        struct rt6_info *rt;
2570        struct sk_buff *skb;
2571        struct rtmsg *rtm;
2572        struct flowi6 fl6;
2573        int err, iif = 0, oif = 0;
2574
2575        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2576        if (err < 0)
2577                goto errout;
2578
2579        err = -EINVAL;
2580        memset(&fl6, 0, sizeof(fl6));
2581
2582        if (tb[RTA_SRC]) {
2583                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2584                        goto errout;
2585
2586                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2587        }
2588
2589        if (tb[RTA_DST]) {
2590                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2591                        goto errout;
2592
2593                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2594        }
2595
2596        if (tb[RTA_IIF])
2597                iif = nla_get_u32(tb[RTA_IIF]);
2598
2599        if (tb[RTA_OIF])
2600                oif = nla_get_u32(tb[RTA_OIF]);
2601
2602        if (iif) {
2603                struct net_device *dev;
2604                int flags = 0;
2605
2606                dev = __dev_get_by_index(net, iif);
2607                if (!dev) {
2608                        err = -ENODEV;
2609                        goto errout;
2610                }
2611
2612                fl6.flowi6_iif = iif;
2613
2614                if (!ipv6_addr_any(&fl6.saddr))
2615                        flags |= RT6_LOOKUP_F_HAS_SADDR;
2616
2617                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2618                                                               flags);
2619        } else {
2620                fl6.flowi6_oif = oif;
2621
2622                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2623        }
2624
2625        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2626        if (!skb) {
2627                ip6_rt_put(rt);
2628                err = -ENOBUFS;
2629                goto errout;
2630        }
2631
2632        /* Reserve room for dummy headers, this skb can pass
2633           through good chunk of routing engine.
2634         */
2635        skb_reset_mac_header(skb);
2636        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2637
2638        skb_dst_set(skb, &rt->dst);
2639
2640        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2641                            RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2642                            nlh->nlmsg_seq, 0, 0, 0);
2643        if (err < 0) {
2644                kfree_skb(skb);
2645                goto errout;
2646        }
2647
2648        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2649errout:
2650        return err;
2651}
2652
2653void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2654{
2655        struct sk_buff *skb;
2656        struct net *net = info->nl_net;
2657        u32 seq;
2658        int err;
2659
2660        err = -ENOBUFS;
2661        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2662
2663        skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2664        if (!skb)
2665                goto errout;
2666
2667        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2668                                event, info->portid, seq, 0, 0, 0);
2669        if (err < 0) {
2670                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2671                WARN_ON(err == -EMSGSIZE);
2672                kfree_skb(skb);
2673                goto errout;
2674        }
2675        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2676                    info->nlh, gfp_any());
2677        return;
2678errout:
2679        if (err < 0)
2680                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2681}
2682
2683static int ip6_route_dev_notify(struct notifier_block *this,
2684                                unsigned long event, void *data)
2685{
2686        struct net_device *dev = (struct net_device *)data;
2687        struct net *net = dev_net(dev);
2688
2689        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2690                net->ipv6.ip6_null_entry->dst.dev = dev;
2691                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2692#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2694                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2695                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2696                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2697#endif
2698        }
2699
2700        return NOTIFY_OK;
2701}
2702
2703/*
2704 *      /proc
2705 */
2706
2707#ifdef CONFIG_PROC_FS
2708
2709struct rt6_proc_arg
2710{
2711        char *buffer;
2712        int offset;
2713        int length;
2714        int skip;
2715        int len;
2716};
2717
2718static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2719{
2720        struct seq_file *m = p_arg;
2721
2722        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2723
2724#ifdef CONFIG_IPV6_SUBTREES
2725        seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2726#else
2727        seq_puts(m, "00000000000000000000000000000000 00 ");
2728#endif
2729        if (rt->rt6i_flags & RTF_GATEWAY) {
2730                seq_printf(m, "%pi6", &rt->rt6i_gateway);
2731        } else {
2732                seq_puts(m, "00000000000000000000000000000000");
2733        }
2734        seq_printf(m, " %08x %08x %08x %08x %8s\n",
2735                   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2736                   rt->dst.__use, rt->rt6i_flags,
2737                   rt->dst.dev ? rt->dst.dev->name : "");
2738        return 0;
2739}
2740
2741static int ipv6_route_show(struct seq_file *m, void *v)
2742{
2743        struct net *net = (struct net *)m->private;
2744        fib6_clean_all_ro(net, rt6_info_route, 0, m);
2745        return 0;
2746}
2747
2748static int ipv6_route_open(struct inode *inode, struct file *file)
2749{
2750        return single_open_net(inode, file, ipv6_route_show);
2751}
2752
2753static const struct file_operations ipv6_route_proc_fops = {
2754        .owner          = THIS_MODULE,
2755        .open           = ipv6_route_open,
2756        .read           = seq_read,
2757        .llseek         = seq_lseek,
2758        .release        = single_release_net,
2759};
2760
2761static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2762{
2763        struct net *net = (struct net *)seq->private;
2764        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2765                   net->ipv6.rt6_stats->fib_nodes,
2766                   net->ipv6.rt6_stats->fib_route_nodes,
2767                   net->ipv6.rt6_stats->fib_rt_alloc,
2768                   net->ipv6.rt6_stats->fib_rt_entries,
2769                   net->ipv6.rt6_stats->fib_rt_cache,
2770                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2771                   net->ipv6.rt6_stats->fib_discarded_routes);
2772
2773        return 0;
2774}
2775
2776static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2777{
2778        return single_open_net(inode, file, rt6_stats_seq_show);
2779}
2780
2781static const struct file_operations rt6_stats_seq_fops = {
2782        .owner   = THIS_MODULE,
2783        .open    = rt6_stats_seq_open,
2784        .read    = seq_read,
2785        .llseek  = seq_lseek,
2786        .release = single_release_net,
2787};
2788#endif  /* CONFIG_PROC_FS */
2789
2790#ifdef CONFIG_SYSCTL
2791
2792static
2793int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2794                              void __user *buffer, size_t *lenp, loff_t *ppos)
2795{
2796        struct net *net;
2797        int delay;
2798        if (!write)
2799                return -EINVAL;
2800
2801        net = (struct net *)ctl->extra1;
2802        delay = net->ipv6.sysctl.flush_delay;
2803        proc_dointvec(ctl, write, buffer, lenp, ppos);
2804        fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2805        return 0;
2806}
2807
2808ctl_table ipv6_route_table_template[] = {
2809        {
2810                .procname       =       "flush",
2811                .data           =       &init_net.ipv6.sysctl.flush_delay,
2812                .maxlen         =       sizeof(int),
2813                .mode           =       0200,
2814                .proc_handler   =       ipv6_sysctl_rtcache_flush
2815        },
2816        {
2817                .procname       =       "gc_thresh",
2818                .data           =       &ip6_dst_ops_template.gc_thresh,
2819                .maxlen         =       sizeof(int),
2820                .mode           =       0644,
2821                .proc_handler   =       proc_dointvec,
2822        },
2823        {
2824                .procname       =       "max_size",
2825                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2826                .maxlen         =       sizeof(int),
2827                .mode           =       0644,
2828                .proc_handler   =       proc_dointvec,
2829        },
2830        {
2831                .procname       =       "gc_min_interval",
2832                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2833                .maxlen         =       sizeof(int),
2834                .mode           =       0644,
2835                .proc_handler   =       proc_dointvec_jiffies,
2836        },
2837        {
2838                .procname       =       "gc_timeout",
2839                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2840                .maxlen         =       sizeof(int),
2841                .mode           =       0644,
2842                .proc_handler   =       proc_dointvec_jiffies,
2843        },
2844        {
2845                .procname       =       "gc_interval",
2846                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2847                .maxlen         =       sizeof(int),
2848                .mode           =       0644,
2849                .proc_handler   =       proc_dointvec_jiffies,
2850        },
2851        {
2852                .procname       =       "gc_elasticity",
2853                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2854                .maxlen         =       sizeof(int),
2855                .mode           =       0644,
2856                .proc_handler   =       proc_dointvec,
2857        },
2858        {
2859                .procname       =       "mtu_expires",
2860                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2861                .maxlen         =       sizeof(int),
2862                .mode           =       0644,
2863                .proc_handler   =       proc_dointvec_jiffies,
2864        },
2865        {
2866                .procname       =       "min_adv_mss",
2867                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2868                .maxlen         =       sizeof(int),
2869                .mode           =       0644,
2870                .proc_handler   =       proc_dointvec,
2871        },
2872        {
2873                .procname       =       "gc_min_interval_ms",
2874                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2875                .maxlen         =       sizeof(int),
2876                .mode           =       0644,
2877                .proc_handler   =       proc_dointvec_ms_jiffies,
2878        },
2879        { }
2880};
2881
2882struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2883{
2884        struct ctl_table *table;
2885
2886        table = kmemdup(ipv6_route_table_template,
2887                        sizeof(ipv6_route_table_template),
2888                        GFP_KERNEL);
2889
2890        if (table) {
2891                table[0].data = &net->ipv6.sysctl.flush_delay;
2892                table[0].extra1 = net;
2893                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2894                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2895                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2896                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2897                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2898                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2899                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2900                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2901                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2902
2903                /* Don't export sysctls to unprivileged users */
2904                if (net->user_ns != &init_user_ns)
2905                        table[0].procname = NULL;
2906        }
2907
2908        return table;
2909}
2910#endif
2911
2912static int __net_init ip6_route_net_init(struct net *net)
2913{
2914        int ret = -ENOMEM;
2915
2916        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2917               sizeof(net->ipv6.ip6_dst_ops));
2918
2919        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2920                goto out_ip6_dst_ops;
2921
2922        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2923                                           sizeof(*net->ipv6.ip6_null_entry),
2924                                           GFP_KERNEL);
2925        if (!net->ipv6.ip6_null_entry)
2926                goto out_ip6_dst_entries;
2927        net->ipv6.ip6_null_entry->dst.path =
2928                (struct dst_entry *)net->ipv6.ip6_null_entry;
2929        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2930        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2931                         ip6_template_metrics, true);
2932
2933#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2934        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2935                                               sizeof(*net->ipv6.ip6_prohibit_entry),
2936                                               GFP_KERNEL);
2937        if (!net->ipv6.ip6_prohibit_entry)
2938                goto out_ip6_null_entry;
2939        net->ipv6.ip6_prohibit_entry->dst.path =
2940                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2941        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2942        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2943                         ip6_template_metrics, true);
2944
2945        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2946                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
2947                                               GFP_KERNEL);
2948        if (!net->ipv6.ip6_blk_hole_entry)
2949                goto out_ip6_prohibit_entry;
2950        net->ipv6.ip6_blk_hole_entry->dst.path =
2951                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2952        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2953        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2954                         ip6_template_metrics, true);
2955#endif
2956
2957        net->ipv6.sysctl.flush_delay = 0;
2958        net->ipv6.sysctl.ip6_rt_max_size = 4096;
2959        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2960        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2961        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2962        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2963        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2964        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2965
2966        net->ipv6.ip6_rt_gc_expire = 30*HZ;
2967
2968        ret = 0;
2969out:
2970        return ret;
2971
2972#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2973out_ip6_prohibit_entry:
2974        kfree(net->ipv6.ip6_prohibit_entry);
2975out_ip6_null_entry:
2976        kfree(net->ipv6.ip6_null_entry);
2977#endif
2978out_ip6_dst_entries:
2979        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2980out_ip6_dst_ops:
2981        goto out;
2982}
2983
2984static void __net_exit ip6_route_net_exit(struct net *net)
2985{
2986        kfree(net->ipv6.ip6_null_entry);
2987#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2988        kfree(net->ipv6.ip6_prohibit_entry);
2989        kfree(net->ipv6.ip6_blk_hole_entry);
2990#endif
2991        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2992}
2993
2994static int __net_init ip6_route_net_init_late(struct net *net)
2995{
2996#ifdef CONFIG_PROC_FS
2997        proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2998        proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
2999#endif
3000        return 0;
3001}
3002
3003static void __net_exit ip6_route_net_exit_late(struct net *net)
3004{
3005#ifdef CONFIG_PROC_FS
3006        remove_proc_entry("ipv6_route", net->proc_net);
3007        remove_proc_entry("rt6_stats", net->proc_net);
3008#endif
3009}
3010
3011static struct pernet_operations ip6_route_net_ops = {
3012        .init = ip6_route_net_init,
3013        .exit = ip6_route_net_exit,
3014};
3015
3016static int __net_init ipv6_inetpeer_init(struct net *net)
3017{
3018        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3019
3020        if (!bp)
3021                return -ENOMEM;
3022        inet_peer_base_init(bp);
3023        net->ipv6.peers = bp;
3024        return 0;
3025}
3026
3027static void __net_exit ipv6_inetpeer_exit(struct net *net)
3028{
3029        struct inet_peer_base *bp = net->ipv6.peers;
3030
3031        net->ipv6.peers = NULL;
3032        inetpeer_invalidate_tree(bp);
3033        kfree(bp);
3034}
3035
3036static struct pernet_operations ipv6_inetpeer_ops = {
3037        .init   =       ipv6_inetpeer_init,
3038        .exit   =       ipv6_inetpeer_exit,
3039};
3040
3041static struct pernet_operations ip6_route_net_late_ops = {
3042        .init = ip6_route_net_init_late,
3043        .exit = ip6_route_net_exit_late,
3044};
3045
3046static struct notifier_block ip6_route_dev_notifier = {
3047        .notifier_call = ip6_route_dev_notify,
3048        .priority = 0,
3049};
3050
3051int __init ip6_route_init(void)
3052{
3053        int ret;
3054
3055        ret = -ENOMEM;
3056        ip6_dst_ops_template.kmem_cachep =
3057                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3058                                  SLAB_HWCACHE_ALIGN, NULL);
3059        if (!ip6_dst_ops_template.kmem_cachep)
3060                goto out;
3061
3062        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3063        if (ret)
3064                goto out_kmem_cache;
3065
3066        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3067        if (ret)
3068                goto out_dst_entries;
3069
3070        ret = register_pernet_subsys(&ip6_route_net_ops);
3071        if (ret)
3072                goto out_register_inetpeer;
3073
3074        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3075
3076        /* Registering of the loopback is done before this portion of code,
3077         * the loopback reference in rt6_info will not be taken, do it
3078         * manually for init_net */
3079        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3080        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3081  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3082        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3083        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3084        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3085        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3086  #endif
3087        ret = fib6_init();
3088        if (ret)
3089                goto out_register_subsys;
3090
3091        ret = xfrm6_init();
3092        if (ret)
3093                goto out_fib6_init;
3094
3095        ret = fib6_rules_init();
3096        if (ret)
3097                goto xfrm6_init;
3098
3099        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3100        if (ret)
3101                goto fib6_rules_init;
3102
3103        ret = -ENOBUFS;
3104        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3105            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3106            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3107                goto out_register_late_subsys;
3108
3109        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3110        if (ret)
3111                goto out_register_late_subsys;
3112
3113out:
3114        return ret;
3115
3116out_register_late_subsys:
3117        unregister_pernet_subsys(&ip6_route_net_late_ops);
3118fib6_rules_init:
3119        fib6_rules_cleanup();
3120xfrm6_init:
3121        xfrm6_fini();
3122out_fib6_init:
3123        fib6_gc_cleanup();
3124out_register_subsys:
3125        unregister_pernet_subsys(&ip6_route_net_ops);
3126out_register_inetpeer:
3127        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3128out_dst_entries:
3129        dst_entries_destroy(&ip6_dst_blackhole_ops);
3130out_kmem_cache:
3131        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3132        goto out;
3133}
3134
3135void ip6_route_cleanup(void)
3136{
3137        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3138        unregister_pernet_subsys(&ip6_route_net_late_ops);
3139        fib6_rules_cleanup();
3140        xfrm6_fini();
3141        fib6_gc_cleanup();
3142        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3143        unregister_pernet_subsys(&ip6_route_net_ops);
3144        dst_entries_destroy(&ip6_dst_blackhole_ops);
3145        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3146}
3147