linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60#include <net/nexthop.h>
  61
  62#include <asm/uaccess.h>
  63
  64#ifdef CONFIG_SYSCTL
  65#include <linux/sysctl.h>
  66#endif
  67
  68enum rt6_nud_state {
  69        RT6_NUD_FAIL_HARD = -2,
  70        RT6_NUD_FAIL_SOFT = -1,
  71        RT6_NUD_SUCCEED = 1
  72};
  73
  74static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  75                                    const struct in6_addr *dest);
  76static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  77static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  78static unsigned int      ip6_mtu(const struct dst_entry *dst);
  79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  80static void             ip6_dst_destroy(struct dst_entry *);
  81static void             ip6_dst_ifdown(struct dst_entry *,
  82                                       struct net_device *dev, int how);
  83static int               ip6_dst_gc(struct dst_ops *ops);
  84
  85static int              ip6_pkt_discard(struct sk_buff *skb);
  86static int              ip6_pkt_discard_out(struct sk_buff *skb);
  87static void             ip6_link_failure(struct sk_buff *skb);
  88static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  89                                           struct sk_buff *skb, u32 mtu);
  90static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  91                                        struct sk_buff *skb);
  92static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
  93
  94#ifdef CONFIG_IPV6_ROUTE_INFO
  95static struct rt6_info *rt6_add_route_info(struct net *net,
  96                                           const struct in6_addr *prefix, int prefixlen,
  97                                           const struct in6_addr *gwaddr, int ifindex,
  98                                           unsigned int pref);
  99static struct rt6_info *rt6_get_route_info(struct net *net,
 100                                           const struct in6_addr *prefix, int prefixlen,
 101                                           const struct in6_addr *gwaddr, int ifindex);
 102#endif
 103
 104static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 105{
 106        struct rt6_info *rt = (struct rt6_info *) dst;
 107        struct inet_peer *peer;
 108        u32 *p = NULL;
 109
 110        if (!(rt->dst.flags & DST_HOST))
 111                return NULL;
 112
 113        peer = rt6_get_peer_create(rt);
 114        if (peer) {
 115                u32 *old_p = __DST_METRICS_PTR(old);
 116                unsigned long prev, new;
 117
 118                p = peer->metrics;
 119                if (inet_metrics_new(peer))
 120                        memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 121
 122                new = (unsigned long) p;
 123                prev = cmpxchg(&dst->_metrics, old, new);
 124
 125                if (prev != old) {
 126                        p = __DST_METRICS_PTR(prev);
 127                        if (prev & DST_METRICS_READ_ONLY)
 128                                p = NULL;
 129                }
 130        }
 131        return p;
 132}
 133
 134static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 135                                             struct sk_buff *skb,
 136                                             const void *daddr)
 137{
 138        struct in6_addr *p = &rt->rt6i_gateway;
 139
 140        if (!ipv6_addr_any(p))
 141                return (const void *) p;
 142        else if (skb)
 143                return &ipv6_hdr(skb)->daddr;
 144        return daddr;
 145}
 146
 147static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 148                                          struct sk_buff *skb,
 149                                          const void *daddr)
 150{
 151        struct rt6_info *rt = (struct rt6_info *) dst;
 152        struct neighbour *n;
 153
 154        daddr = choose_neigh_daddr(rt, skb, daddr);
 155        n = __ipv6_neigh_lookup(dst->dev, daddr);
 156        if (n)
 157                return n;
 158        return neigh_create(&nd_tbl, daddr, dst->dev);
 159}
 160
 161static struct dst_ops ip6_dst_ops_template = {
 162        .family                 =       AF_INET6,
 163        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 164        .gc                     =       ip6_dst_gc,
 165        .gc_thresh              =       1024,
 166        .check                  =       ip6_dst_check,
 167        .default_advmss         =       ip6_default_advmss,
 168        .mtu                    =       ip6_mtu,
 169        .cow_metrics            =       ipv6_cow_metrics,
 170        .destroy                =       ip6_dst_destroy,
 171        .ifdown                 =       ip6_dst_ifdown,
 172        .negative_advice        =       ip6_negative_advice,
 173        .link_failure           =       ip6_link_failure,
 174        .update_pmtu            =       ip6_rt_update_pmtu,
 175        .redirect               =       rt6_do_redirect,
 176        .local_out              =       __ip6_local_out,
 177        .neigh_lookup           =       ip6_neigh_lookup,
 178};
 179
 180static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 181{
 182        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 183
 184        return mtu ? : dst->dev->mtu;
 185}
 186
 187static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 188                                         struct sk_buff *skb, u32 mtu)
 189{
 190}
 191
 192static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 193                                      struct sk_buff *skb)
 194{
 195}
 196
 197static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 198                                         unsigned long old)
 199{
 200        return NULL;
 201}
 202
 203static struct dst_ops ip6_dst_blackhole_ops = {
 204        .family                 =       AF_INET6,
 205        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 206        .destroy                =       ip6_dst_destroy,
 207        .check                  =       ip6_dst_check,
 208        .mtu                    =       ip6_blackhole_mtu,
 209        .default_advmss         =       ip6_default_advmss,
 210        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 211        .redirect               =       ip6_rt_blackhole_redirect,
 212        .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
 213        .neigh_lookup           =       ip6_neigh_lookup,
 214};
 215
 216static const u32 ip6_template_metrics[RTAX_MAX] = {
 217        [RTAX_HOPLIMIT - 1] = 0,
 218};
 219
 220static const struct rt6_info ip6_null_entry_template = {
 221        .dst = {
 222                .__refcnt       = ATOMIC_INIT(1),
 223                .__use          = 1,
 224                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 225                .error          = -ENETUNREACH,
 226                .input          = ip6_pkt_discard,
 227                .output         = ip6_pkt_discard_out,
 228        },
 229        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 230        .rt6i_protocol  = RTPROT_KERNEL,
 231        .rt6i_metric    = ~(u32) 0,
 232        .rt6i_ref       = ATOMIC_INIT(1),
 233};
 234
 235#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 236
 237static int ip6_pkt_prohibit(struct sk_buff *skb);
 238static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 239
 240static const struct rt6_info ip6_prohibit_entry_template = {
 241        .dst = {
 242                .__refcnt       = ATOMIC_INIT(1),
 243                .__use          = 1,
 244                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 245                .error          = -EACCES,
 246                .input          = ip6_pkt_prohibit,
 247                .output         = ip6_pkt_prohibit_out,
 248        },
 249        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 250        .rt6i_protocol  = RTPROT_KERNEL,
 251        .rt6i_metric    = ~(u32) 0,
 252        .rt6i_ref       = ATOMIC_INIT(1),
 253};
 254
 255static const struct rt6_info ip6_blk_hole_entry_template = {
 256        .dst = {
 257                .__refcnt       = ATOMIC_INIT(1),
 258                .__use          = 1,
 259                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 260                .error          = -EINVAL,
 261                .input          = dst_discard,
 262                .output         = dst_discard,
 263        },
 264        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 265        .rt6i_protocol  = RTPROT_KERNEL,
 266        .rt6i_metric    = ~(u32) 0,
 267        .rt6i_ref       = ATOMIC_INIT(1),
 268};
 269
 270#endif
 271
 272/* allocate dst with ip6_dst_ops */
 273static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 274                                             struct net_device *dev,
 275                                             int flags,
 276                                             struct fib6_table *table)
 277{
 278        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 279                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 280
 281        if (rt) {
 282                struct dst_entry *dst = &rt->dst;
 283
 284                memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 285                rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 286                rt->rt6i_genid = rt_genid(net);
 287                INIT_LIST_HEAD(&rt->rt6i_siblings);
 288                rt->rt6i_nsiblings = 0;
 289        }
 290        return rt;
 291}
 292
 293static void ip6_dst_destroy(struct dst_entry *dst)
 294{
 295        struct rt6_info *rt = (struct rt6_info *)dst;
 296        struct inet6_dev *idev = rt->rt6i_idev;
 297        struct dst_entry *from = dst->from;
 298
 299        if (!(rt->dst.flags & DST_HOST))
 300                dst_destroy_metrics_generic(dst);
 301
 302        if (idev) {
 303                rt->rt6i_idev = NULL;
 304                in6_dev_put(idev);
 305        }
 306
 307        dst->from = NULL;
 308        dst_release(from);
 309
 310        if (rt6_has_peer(rt)) {
 311                struct inet_peer *peer = rt6_peer_ptr(rt);
 312                inet_putpeer(peer);
 313        }
 314}
 315
 316void rt6_bind_peer(struct rt6_info *rt, int create)
 317{
 318        struct inet_peer_base *base;
 319        struct inet_peer *peer;
 320
 321        base = inetpeer_base_ptr(rt->_rt6i_peer);
 322        if (!base)
 323                return;
 324
 325        peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
 326        if (peer) {
 327                if (!rt6_set_peer(rt, peer))
 328                        inet_putpeer(peer);
 329        }
 330}
 331
 332static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 333                           int how)
 334{
 335        struct rt6_info *rt = (struct rt6_info *)dst;
 336        struct inet6_dev *idev = rt->rt6i_idev;
 337        struct net_device *loopback_dev =
 338                dev_net(dev)->loopback_dev;
 339
 340        if (dev != loopback_dev) {
 341                if (idev && idev->dev == dev) {
 342                        struct inet6_dev *loopback_idev =
 343                                in6_dev_get(loopback_dev);
 344                        if (loopback_idev) {
 345                                rt->rt6i_idev = loopback_idev;
 346                                in6_dev_put(idev);
 347                        }
 348                }
 349        }
 350}
 351
 352static bool rt6_check_expired(const struct rt6_info *rt)
 353{
 354        if (rt->rt6i_flags & RTF_EXPIRES) {
 355                if (time_after(jiffies, rt->dst.expires))
 356                        return true;
 357        } else if (rt->dst.from) {
 358                return rt6_check_expired((struct rt6_info *) rt->dst.from);
 359        }
 360        return false;
 361}
 362
 363static bool rt6_need_strict(const struct in6_addr *daddr)
 364{
 365        return ipv6_addr_type(daddr) &
 366                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 367}
 368
 369/* Multipath route selection:
 370 *   Hash based function using packet header and flowlabel.
 371 * Adapted from fib_info_hashfn()
 372 */
 373static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 374                               const struct flowi6 *fl6)
 375{
 376        unsigned int val = fl6->flowi6_proto;
 377
 378        val ^= ipv6_addr_hash(&fl6->daddr);
 379        val ^= ipv6_addr_hash(&fl6->saddr);
 380
 381        /* Work only if this not encapsulated */
 382        switch (fl6->flowi6_proto) {
 383        case IPPROTO_UDP:
 384        case IPPROTO_TCP:
 385        case IPPROTO_SCTP:
 386                val ^= (__force u16)fl6->fl6_sport;
 387                val ^= (__force u16)fl6->fl6_dport;
 388                break;
 389
 390        case IPPROTO_ICMPV6:
 391                val ^= (__force u16)fl6->fl6_icmp_type;
 392                val ^= (__force u16)fl6->fl6_icmp_code;
 393                break;
 394        }
 395        /* RFC6438 recommands to use flowlabel */
 396        val ^= (__force u32)fl6->flowlabel;
 397
 398        /* Perhaps, we need to tune, this function? */
 399        val = val ^ (val >> 7) ^ (val >> 12);
 400        return val % candidate_count;
 401}
 402
 403static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 404                                             struct flowi6 *fl6, int oif,
 405                                             int strict)
 406{
 407        struct rt6_info *sibling, *next_sibling;
 408        int route_choosen;
 409
 410        route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 411        /* Don't change the route, if route_choosen == 0
 412         * (siblings does not include ourself)
 413         */
 414        if (route_choosen)
 415                list_for_each_entry_safe(sibling, next_sibling,
 416                                &match->rt6i_siblings, rt6i_siblings) {
 417                        route_choosen--;
 418                        if (route_choosen == 0) {
 419                                if (rt6_score_route(sibling, oif, strict) < 0)
 420                                        break;
 421                                match = sibling;
 422                                break;
 423                        }
 424                }
 425        return match;
 426}
 427
 428/*
 429 *      Route lookup. Any table->tb6_lock is implied.
 430 */
 431
 432static inline struct rt6_info *rt6_device_match(struct net *net,
 433                                                    struct rt6_info *rt,
 434                                                    const struct in6_addr *saddr,
 435                                                    int oif,
 436                                                    int flags)
 437{
 438        struct rt6_info *local = NULL;
 439        struct rt6_info *sprt;
 440
 441        if (!oif && ipv6_addr_any(saddr))
 442                goto out;
 443
 444        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 445                struct net_device *dev = sprt->dst.dev;
 446
 447                if (oif) {
 448                        if (dev->ifindex == oif)
 449                                return sprt;
 450                        if (dev->flags & IFF_LOOPBACK) {
 451                                if (!sprt->rt6i_idev ||
 452                                    sprt->rt6i_idev->dev->ifindex != oif) {
 453                                        if (flags & RT6_LOOKUP_F_IFACE && oif)
 454                                                continue;
 455                                        if (local && (!oif ||
 456                                                      local->rt6i_idev->dev->ifindex == oif))
 457                                                continue;
 458                                }
 459                                local = sprt;
 460                        }
 461                } else {
 462                        if (ipv6_chk_addr(net, saddr, dev,
 463                                          flags & RT6_LOOKUP_F_IFACE))
 464                                return sprt;
 465                }
 466        }
 467
 468        if (oif) {
 469                if (local)
 470                        return local;
 471
 472                if (flags & RT6_LOOKUP_F_IFACE)
 473                        return net->ipv6.ip6_null_entry;
 474        }
 475out:
 476        return rt;
 477}
 478
 479#ifdef CONFIG_IPV6_ROUTER_PREF
 480static void rt6_probe(struct rt6_info *rt)
 481{
 482        struct neighbour *neigh;
 483        /*
 484         * Okay, this does not seem to be appropriate
 485         * for now, however, we need to check if it
 486         * is really so; aka Router Reachability Probing.
 487         *
 488         * Router Reachability Probe MUST be rate-limited
 489         * to no more than one per minute.
 490         */
 491        if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 492                return;
 493        rcu_read_lock_bh();
 494        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 495        if (neigh) {
 496                write_lock(&neigh->lock);
 497                if (neigh->nud_state & NUD_VALID)
 498                        goto out;
 499        }
 500
 501        if (!neigh ||
 502            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 503                struct in6_addr mcaddr;
 504                struct in6_addr *target;
 505
 506                if (neigh) {
 507                        neigh->updated = jiffies;
 508                        write_unlock(&neigh->lock);
 509                }
 510
 511                target = (struct in6_addr *)&rt->rt6i_gateway;
 512                addrconf_addr_solict_mult(target, &mcaddr);
 513                ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 514        } else {
 515out:
 516                write_unlock(&neigh->lock);
 517        }
 518        rcu_read_unlock_bh();
 519}
 520#else
 521static inline void rt6_probe(struct rt6_info *rt)
 522{
 523}
 524#endif
 525
 526/*
 527 * Default Router Selection (RFC 2461 6.3.6)
 528 */
 529static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 530{
 531        struct net_device *dev = rt->dst.dev;
 532        if (!oif || dev->ifindex == oif)
 533                return 2;
 534        if ((dev->flags & IFF_LOOPBACK) &&
 535            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 536                return 1;
 537        return 0;
 538}
 539
 540static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 541{
 542        struct neighbour *neigh;
 543        enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 544
 545        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 546            !(rt->rt6i_flags & RTF_GATEWAY))
 547                return RT6_NUD_SUCCEED;
 548
 549        rcu_read_lock_bh();
 550        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 551        if (neigh) {
 552                read_lock(&neigh->lock);
 553                if (neigh->nud_state & NUD_VALID)
 554                        ret = RT6_NUD_SUCCEED;
 555#ifdef CONFIG_IPV6_ROUTER_PREF
 556                else if (!(neigh->nud_state & NUD_FAILED))
 557                        ret = RT6_NUD_SUCCEED;
 558#endif
 559                read_unlock(&neigh->lock);
 560        } else {
 561                ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 562                      RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
 563        }
 564        rcu_read_unlock_bh();
 565
 566        return ret;
 567}
 568
 569static int rt6_score_route(struct rt6_info *rt, int oif,
 570                           int strict)
 571{
 572        int m;
 573
 574        m = rt6_check_dev(rt, oif);
 575        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 576                return RT6_NUD_FAIL_HARD;
 577#ifdef CONFIG_IPV6_ROUTER_PREF
 578        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 579#endif
 580        if (strict & RT6_LOOKUP_F_REACHABLE) {
 581                int n = rt6_check_neigh(rt);
 582                if (n < 0)
 583                        return n;
 584        }
 585        return m;
 586}
 587
 588static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 589                                   int *mpri, struct rt6_info *match,
 590                                   bool *do_rr)
 591{
 592        int m;
 593        bool match_do_rr = false;
 594
 595        if (rt6_check_expired(rt))
 596                goto out;
 597
 598        m = rt6_score_route(rt, oif, strict);
 599        if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
 600                match_do_rr = true;
 601                m = 0; /* lowest valid score */
 602        } else if (m < 0) {
 603                goto out;
 604        }
 605
 606        if (strict & RT6_LOOKUP_F_REACHABLE)
 607                rt6_probe(rt);
 608
 609        if (m > *mpri) {
 610                *do_rr = match_do_rr;
 611                *mpri = m;
 612                match = rt;
 613        }
 614out:
 615        return match;
 616}
 617
 618static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 619                                     struct rt6_info *rr_head,
 620                                     u32 metric, int oif, int strict,
 621                                     bool *do_rr)
 622{
 623        struct rt6_info *rt, *match;
 624        int mpri = -1;
 625
 626        match = NULL;
 627        for (rt = rr_head; rt && rt->rt6i_metric == metric;
 628             rt = rt->dst.rt6_next)
 629                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 630        for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 631             rt = rt->dst.rt6_next)
 632                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 633
 634        return match;
 635}
 636
 637static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 638{
 639        struct rt6_info *match, *rt0;
 640        struct net *net;
 641        bool do_rr = false;
 642
 643        rt0 = fn->rr_ptr;
 644        if (!rt0)
 645                fn->rr_ptr = rt0 = fn->leaf;
 646
 647        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 648                             &do_rr);
 649
 650        if (do_rr) {
 651                struct rt6_info *next = rt0->dst.rt6_next;
 652
 653                /* no entries matched; do round-robin */
 654                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 655                        next = fn->leaf;
 656
 657                if (next != rt0)
 658                        fn->rr_ptr = next;
 659        }
 660
 661        net = dev_net(rt0->dst.dev);
 662        return match ? match : net->ipv6.ip6_null_entry;
 663}
 664
 665#ifdef CONFIG_IPV6_ROUTE_INFO
 666int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 667                  const struct in6_addr *gwaddr)
 668{
 669        struct net *net = dev_net(dev);
 670        struct route_info *rinfo = (struct route_info *) opt;
 671        struct in6_addr prefix_buf, *prefix;
 672        unsigned int pref;
 673        unsigned long lifetime;
 674        struct rt6_info *rt;
 675
 676        if (len < sizeof(struct route_info)) {
 677                return -EINVAL;
 678        }
 679
 680        /* Sanity check for prefix_len and length */
 681        if (rinfo->length > 3) {
 682                return -EINVAL;
 683        } else if (rinfo->prefix_len > 128) {
 684                return -EINVAL;
 685        } else if (rinfo->prefix_len > 64) {
 686                if (rinfo->length < 2) {
 687                        return -EINVAL;
 688                }
 689        } else if (rinfo->prefix_len > 0) {
 690                if (rinfo->length < 1) {
 691                        return -EINVAL;
 692                }
 693        }
 694
 695        pref = rinfo->route_pref;
 696        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 697                return -EINVAL;
 698
 699        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 700
 701        if (rinfo->length == 3)
 702                prefix = (struct in6_addr *)rinfo->prefix;
 703        else {
 704                /* this function is safe */
 705                ipv6_addr_prefix(&prefix_buf,
 706                                 (struct in6_addr *)rinfo->prefix,
 707                                 rinfo->prefix_len);
 708                prefix = &prefix_buf;
 709        }
 710
 711        rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 712                                dev->ifindex);
 713
 714        if (rt && !lifetime) {
 715                ip6_del_rt(rt);
 716                rt = NULL;
 717        }
 718
 719        if (!rt && lifetime)
 720                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 721                                        pref);
 722        else if (rt)
 723                rt->rt6i_flags = RTF_ROUTEINFO |
 724                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 725
 726        if (rt) {
 727                if (!addrconf_finite_timeout(lifetime))
 728                        rt6_clean_expires(rt);
 729                else
 730                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 731
 732                ip6_rt_put(rt);
 733        }
 734        return 0;
 735}
 736#endif
 737
 738#define BACKTRACK(__net, saddr)                 \
 739do { \
 740        if (rt == __net->ipv6.ip6_null_entry) { \
 741                struct fib6_node *pn; \
 742                while (1) { \
 743                        if (fn->fn_flags & RTN_TL_ROOT) \
 744                                goto out; \
 745                        pn = fn->parent; \
 746                        if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 747                                fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 748                        else \
 749                                fn = pn; \
 750                        if (fn->fn_flags & RTN_RTINFO) \
 751                                goto restart; \
 752                } \
 753        } \
 754} while (0)
 755
 756static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 757                                             struct fib6_table *table,
 758                                             struct flowi6 *fl6, int flags)
 759{
 760        struct fib6_node *fn;
 761        struct rt6_info *rt;
 762
 763        read_lock_bh(&table->tb6_lock);
 764        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 765restart:
 766        rt = fn->leaf;
 767        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 768        if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 769                rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 770        BACKTRACK(net, &fl6->saddr);
 771out:
 772        dst_use(&rt->dst, jiffies);
 773        read_unlock_bh(&table->tb6_lock);
 774        return rt;
 775
 776}
 777
 778struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 779                                    int flags)
 780{
 781        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 782}
 783EXPORT_SYMBOL_GPL(ip6_route_lookup);
 784
 785struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 786                            const struct in6_addr *saddr, int oif, int strict)
 787{
 788        struct flowi6 fl6 = {
 789                .flowi6_oif = oif,
 790                .daddr = *daddr,
 791        };
 792        struct dst_entry *dst;
 793        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 794
 795        if (saddr) {
 796                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 797                flags |= RT6_LOOKUP_F_HAS_SADDR;
 798        }
 799
 800        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 801        if (dst->error == 0)
 802                return (struct rt6_info *) dst;
 803
 804        dst_release(dst);
 805
 806        return NULL;
 807}
 808
 809EXPORT_SYMBOL(rt6_lookup);
 810
 811/* ip6_ins_rt is called with FREE table->tb6_lock.
 812   It takes new route entry, the addition fails by any reason the
 813   route is freed. In any case, if caller does not hold it, it may
 814   be destroyed.
 815 */
 816
 817static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 818{
 819        int err;
 820        struct fib6_table *table;
 821
 822        table = rt->rt6i_table;
 823        write_lock_bh(&table->tb6_lock);
 824        err = fib6_add(&table->tb6_root, rt, info);
 825        write_unlock_bh(&table->tb6_lock);
 826
 827        return err;
 828}
 829
 830int ip6_ins_rt(struct rt6_info *rt)
 831{
 832        struct nl_info info = {
 833                .nl_net = dev_net(rt->dst.dev),
 834        };
 835        return __ip6_ins_rt(rt, &info);
 836}
 837
 838static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 839                                      const struct in6_addr *daddr,
 840                                      const struct in6_addr *saddr)
 841{
 842        struct rt6_info *rt;
 843
 844        /*
 845         *      Clone the route.
 846         */
 847
 848        rt = ip6_rt_copy(ort, daddr);
 849
 850        if (rt) {
 851                if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 852                        if (ort->rt6i_dst.plen != 128 &&
 853                            ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 854                                rt->rt6i_flags |= RTF_ANYCAST;
 855                        rt->rt6i_gateway = *daddr;
 856                }
 857
 858                rt->rt6i_flags |= RTF_CACHE;
 859
 860#ifdef CONFIG_IPV6_SUBTREES
 861                if (rt->rt6i_src.plen && saddr) {
 862                        rt->rt6i_src.addr = *saddr;
 863                        rt->rt6i_src.plen = 128;
 864                }
 865#endif
 866        }
 867
 868        return rt;
 869}
 870
 871static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 872                                        const struct in6_addr *daddr)
 873{
 874        struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 875
 876        if (rt)
 877                rt->rt6i_flags |= RTF_CACHE;
 878        return rt;
 879}
 880
 881static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 882                                      struct flowi6 *fl6, int flags)
 883{
 884        struct fib6_node *fn;
 885        struct rt6_info *rt, *nrt;
 886        int strict = 0;
 887        int attempts = 3;
 888        int err;
 889        int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 890
 891        strict |= flags & RT6_LOOKUP_F_IFACE;
 892
 893relookup:
 894        read_lock_bh(&table->tb6_lock);
 895
 896restart_2:
 897        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 898
 899restart:
 900        rt = rt6_select(fn, oif, strict | reachable);
 901        if (rt->rt6i_nsiblings)
 902                rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
 903        BACKTRACK(net, &fl6->saddr);
 904        if (rt == net->ipv6.ip6_null_entry ||
 905            rt->rt6i_flags & RTF_CACHE)
 906                goto out;
 907
 908        dst_hold(&rt->dst);
 909        read_unlock_bh(&table->tb6_lock);
 910
 911        if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
 912                nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 913        else if (!(rt->dst.flags & DST_HOST))
 914                nrt = rt6_alloc_clone(rt, &fl6->daddr);
 915        else
 916                goto out2;
 917
 918        ip6_rt_put(rt);
 919        rt = nrt ? : net->ipv6.ip6_null_entry;
 920
 921        dst_hold(&rt->dst);
 922        if (nrt) {
 923                err = ip6_ins_rt(nrt);
 924                if (!err)
 925                        goto out2;
 926        }
 927
 928        if (--attempts <= 0)
 929                goto out2;
 930
 931        /*
 932         * Race condition! In the gap, when table->tb6_lock was
 933         * released someone could insert this route.  Relookup.
 934         */
 935        ip6_rt_put(rt);
 936        goto relookup;
 937
 938out:
 939        if (reachable) {
 940                reachable = 0;
 941                goto restart_2;
 942        }
 943        dst_hold(&rt->dst);
 944        read_unlock_bh(&table->tb6_lock);
 945out2:
 946        rt->dst.lastuse = jiffies;
 947        rt->dst.__use++;
 948
 949        return rt;
 950}
 951
 952static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 953                                            struct flowi6 *fl6, int flags)
 954{
 955        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 956}
 957
 958static struct dst_entry *ip6_route_input_lookup(struct net *net,
 959                                                struct net_device *dev,
 960                                                struct flowi6 *fl6, int flags)
 961{
 962        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 963                flags |= RT6_LOOKUP_F_IFACE;
 964
 965        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 966}
 967
 968void ip6_route_input(struct sk_buff *skb)
 969{
 970        const struct ipv6hdr *iph = ipv6_hdr(skb);
 971        struct net *net = dev_net(skb->dev);
 972        int flags = RT6_LOOKUP_F_HAS_SADDR;
 973        struct flowi6 fl6 = {
 974                .flowi6_iif = skb->dev->ifindex,
 975                .daddr = iph->daddr,
 976                .saddr = iph->saddr,
 977                .flowlabel = ip6_flowinfo(iph),
 978                .flowi6_mark = skb->mark,
 979                .flowi6_proto = iph->nexthdr,
 980        };
 981
 982        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 983}
 984
 985static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 986                                             struct flowi6 *fl6, int flags)
 987{
 988        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 989}
 990
 991struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 992                                    struct flowi6 *fl6)
 993{
 994        int flags = 0;
 995
 996        fl6->flowi6_iif = LOOPBACK_IFINDEX;
 997
 998        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 999                flags |= RT6_LOOKUP_F_IFACE;
1000
1001        if (!ipv6_addr_any(&fl6->saddr))
1002                flags |= RT6_LOOKUP_F_HAS_SADDR;
1003        else if (sk)
1004                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1005
1006        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1007}
1008
1009EXPORT_SYMBOL(ip6_route_output);
1010
1011struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1012{
1013        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1014        struct dst_entry *new = NULL;
1015
1016        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1017        if (rt) {
1018                new = &rt->dst;
1019
1020                memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1021                rt6_init_peer(rt, net->ipv6.peers);
1022
1023                new->__use = 1;
1024                new->input = dst_discard;
1025                new->output = dst_discard;
1026
1027                if (dst_metrics_read_only(&ort->dst))
1028                        new->_metrics = ort->dst._metrics;
1029                else
1030                        dst_copy_metrics(new, &ort->dst);
1031                rt->rt6i_idev = ort->rt6i_idev;
1032                if (rt->rt6i_idev)
1033                        in6_dev_hold(rt->rt6i_idev);
1034
1035                rt->rt6i_gateway = ort->rt6i_gateway;
1036                rt->rt6i_flags = ort->rt6i_flags;
1037                rt->rt6i_metric = 0;
1038
1039                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1040#ifdef CONFIG_IPV6_SUBTREES
1041                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1042#endif
1043
1044                dst_free(new);
1045        }
1046
1047        dst_release(dst_orig);
1048        return new ? new : ERR_PTR(-ENOMEM);
1049}
1050
1051/*
1052 *      Destination cache support functions
1053 */
1054
1055static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1056{
1057        struct rt6_info *rt;
1058
1059        rt = (struct rt6_info *) dst;
1060
1061        /* All IPV6 dsts are created with ->obsolete set to the value
1062         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1063         * into this function always.
1064         */
1065        if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1066                return NULL;
1067
1068        if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1069                return dst;
1070
1071        return NULL;
1072}
1073
1074static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1075{
1076        struct rt6_info *rt = (struct rt6_info *) dst;
1077
1078        if (rt) {
1079                if (rt->rt6i_flags & RTF_CACHE) {
1080                        if (rt6_check_expired(rt)) {
1081                                ip6_del_rt(rt);
1082                                dst = NULL;
1083                        }
1084                } else {
1085                        dst_release(dst);
1086                        dst = NULL;
1087                }
1088        }
1089        return dst;
1090}
1091
1092static void ip6_link_failure(struct sk_buff *skb)
1093{
1094        struct rt6_info *rt;
1095
1096        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1097
1098        rt = (struct rt6_info *) skb_dst(skb);
1099        if (rt) {
1100                if (rt->rt6i_flags & RTF_CACHE) {
1101                        dst_hold(&rt->dst);
1102                        if (ip6_del_rt(rt))
1103                                dst_free(&rt->dst);
1104                } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1105                        rt->rt6i_node->fn_sernum = -1;
1106                }
1107        }
1108}
1109
1110static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1111                               struct sk_buff *skb, u32 mtu)
1112{
1113        struct rt6_info *rt6 = (struct rt6_info*)dst;
1114
1115        dst_confirm(dst);
1116        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1117                struct net *net = dev_net(dst->dev);
1118
1119                rt6->rt6i_flags |= RTF_MODIFIED;
1120                if (mtu < IPV6_MIN_MTU) {
1121                        u32 features = dst_metric(dst, RTAX_FEATURES);
1122                        mtu = IPV6_MIN_MTU;
1123                        features |= RTAX_FEATURE_ALLFRAG;
1124                        dst_metric_set(dst, RTAX_FEATURES, features);
1125                }
1126                dst_metric_set(dst, RTAX_MTU, mtu);
1127                rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1128        }
1129}
1130
1131void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1132                     int oif, u32 mark)
1133{
1134        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1135        struct dst_entry *dst;
1136        struct flowi6 fl6;
1137
1138        memset(&fl6, 0, sizeof(fl6));
1139        fl6.flowi6_oif = oif;
1140        fl6.flowi6_mark = mark;
1141        fl6.flowi6_flags = 0;
1142        fl6.daddr = iph->daddr;
1143        fl6.saddr = iph->saddr;
1144        fl6.flowlabel = ip6_flowinfo(iph);
1145
1146        dst = ip6_route_output(net, NULL, &fl6);
1147        if (!dst->error)
1148                ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1149        dst_release(dst);
1150}
1151EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1152
1153void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1154{
1155        ip6_update_pmtu(skb, sock_net(sk), mtu,
1156                        sk->sk_bound_dev_if, sk->sk_mark);
1157}
1158EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1159
1160void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1161{
1162        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1163        struct dst_entry *dst;
1164        struct flowi6 fl6;
1165
1166        memset(&fl6, 0, sizeof(fl6));
1167        fl6.flowi6_oif = oif;
1168        fl6.flowi6_mark = mark;
1169        fl6.flowi6_flags = 0;
1170        fl6.daddr = iph->daddr;
1171        fl6.saddr = iph->saddr;
1172        fl6.flowlabel = ip6_flowinfo(iph);
1173
1174        dst = ip6_route_output(net, NULL, &fl6);
1175        if (!dst->error)
1176                rt6_do_redirect(dst, NULL, skb);
1177        dst_release(dst);
1178}
1179EXPORT_SYMBOL_GPL(ip6_redirect);
1180
1181void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1182                            u32 mark)
1183{
1184        const struct ipv6hdr *iph = ipv6_hdr(skb);
1185        const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1186        struct dst_entry *dst;
1187        struct flowi6 fl6;
1188
1189        memset(&fl6, 0, sizeof(fl6));
1190        fl6.flowi6_oif = oif;
1191        fl6.flowi6_mark = mark;
1192        fl6.flowi6_flags = 0;
1193        fl6.daddr = msg->dest;
1194        fl6.saddr = iph->daddr;
1195
1196        dst = ip6_route_output(net, NULL, &fl6);
1197        if (!dst->error)
1198                rt6_do_redirect(dst, NULL, skb);
1199        dst_release(dst);
1200}
1201
1202void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1203{
1204        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1205}
1206EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1207
1208static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1209{
1210        struct net_device *dev = dst->dev;
1211        unsigned int mtu = dst_mtu(dst);
1212        struct net *net = dev_net(dev);
1213
1214        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1215
1216        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1217                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1218
1219        /*
1220         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1221         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1222         * IPV6_MAXPLEN is also valid and means: "any MSS,
1223         * rely only on pmtu discovery"
1224         */
1225        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1226                mtu = IPV6_MAXPLEN;
1227        return mtu;
1228}
1229
1230static unsigned int ip6_mtu(const struct dst_entry *dst)
1231{
1232        struct inet6_dev *idev;
1233        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1234
1235        if (mtu)
1236                return mtu;
1237
1238        mtu = IPV6_MIN_MTU;
1239
1240        rcu_read_lock();
1241        idev = __in6_dev_get(dst->dev);
1242        if (idev)
1243                mtu = idev->cnf.mtu6;
1244        rcu_read_unlock();
1245
1246        return mtu;
1247}
1248
1249static struct dst_entry *icmp6_dst_gc_list;
1250static DEFINE_SPINLOCK(icmp6_dst_lock);
1251
1252struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1253                                  struct flowi6 *fl6)
1254{
1255        struct dst_entry *dst;
1256        struct rt6_info *rt;
1257        struct inet6_dev *idev = in6_dev_get(dev);
1258        struct net *net = dev_net(dev);
1259
1260        if (unlikely(!idev))
1261                return ERR_PTR(-ENODEV);
1262
1263        rt = ip6_dst_alloc(net, dev, 0, NULL);
1264        if (unlikely(!rt)) {
1265                in6_dev_put(idev);
1266                dst = ERR_PTR(-ENOMEM);
1267                goto out;
1268        }
1269
1270        rt->dst.flags |= DST_HOST;
1271        rt->dst.output  = ip6_output;
1272        atomic_set(&rt->dst.__refcnt, 1);
1273        rt->rt6i_dst.addr = fl6->daddr;
1274        rt->rt6i_dst.plen = 128;
1275        rt->rt6i_idev     = idev;
1276        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1277
1278        spin_lock_bh(&icmp6_dst_lock);
1279        rt->dst.next = icmp6_dst_gc_list;
1280        icmp6_dst_gc_list = &rt->dst;
1281        spin_unlock_bh(&icmp6_dst_lock);
1282
1283        fib6_force_start_gc(net);
1284
1285        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1286
1287out:
1288        return dst;
1289}
1290
1291int icmp6_dst_gc(void)
1292{
1293        struct dst_entry *dst, **pprev;
1294        int more = 0;
1295
1296        spin_lock_bh(&icmp6_dst_lock);
1297        pprev = &icmp6_dst_gc_list;
1298
1299        while ((dst = *pprev) != NULL) {
1300                if (!atomic_read(&dst->__refcnt)) {
1301                        *pprev = dst->next;
1302                        dst_free(dst);
1303                } else {
1304                        pprev = &dst->next;
1305                        ++more;
1306                }
1307        }
1308
1309        spin_unlock_bh(&icmp6_dst_lock);
1310
1311        return more;
1312}
1313
1314static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1315                            void *arg)
1316{
1317        struct dst_entry *dst, **pprev;
1318
1319        spin_lock_bh(&icmp6_dst_lock);
1320        pprev = &icmp6_dst_gc_list;
1321        while ((dst = *pprev) != NULL) {
1322                struct rt6_info *rt = (struct rt6_info *) dst;
1323                if (func(rt, arg)) {
1324                        *pprev = dst->next;
1325                        dst_free(dst);
1326                } else {
1327                        pprev = &dst->next;
1328                }
1329        }
1330        spin_unlock_bh(&icmp6_dst_lock);
1331}
1332
1333static int ip6_dst_gc(struct dst_ops *ops)
1334{
1335        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1336        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1337        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1338        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1339        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1340        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1341        int entries;
1342
1343        entries = dst_entries_get_fast(ops);
1344        if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1345            entries <= rt_max_size)
1346                goto out;
1347
1348        net->ipv6.ip6_rt_gc_expire++;
1349        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1350        entries = dst_entries_get_slow(ops);
1351        if (entries < ops->gc_thresh)
1352                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1353out:
1354        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1355        return entries > rt_max_size;
1356}
1357
1358int ip6_dst_hoplimit(struct dst_entry *dst)
1359{
1360        int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1361        if (hoplimit == 0) {
1362                struct net_device *dev = dst->dev;
1363                struct inet6_dev *idev;
1364
1365                rcu_read_lock();
1366                idev = __in6_dev_get(dev);
1367                if (idev)
1368                        hoplimit = idev->cnf.hop_limit;
1369                else
1370                        hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1371                rcu_read_unlock();
1372        }
1373        return hoplimit;
1374}
1375EXPORT_SYMBOL(ip6_dst_hoplimit);
1376
1377/*
1378 *
1379 */
1380
1381int ip6_route_add(struct fib6_config *cfg)
1382{
1383        int err;
1384        struct net *net = cfg->fc_nlinfo.nl_net;
1385        struct rt6_info *rt = NULL;
1386        struct net_device *dev = NULL;
1387        struct inet6_dev *idev = NULL;
1388        struct fib6_table *table;
1389        int addr_type;
1390
1391        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1392                return -EINVAL;
1393#ifndef CONFIG_IPV6_SUBTREES
1394        if (cfg->fc_src_len)
1395                return -EINVAL;
1396#endif
1397        if (cfg->fc_ifindex) {
1398                err = -ENODEV;
1399                dev = dev_get_by_index(net, cfg->fc_ifindex);
1400                if (!dev)
1401                        goto out;
1402                idev = in6_dev_get(dev);
1403                if (!idev)
1404                        goto out;
1405        }
1406
1407        if (cfg->fc_metric == 0)
1408                cfg->fc_metric = IP6_RT_PRIO_USER;
1409
1410        err = -ENOBUFS;
1411        if (cfg->fc_nlinfo.nlh &&
1412            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1413                table = fib6_get_table(net, cfg->fc_table);
1414                if (!table) {
1415                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1416                        table = fib6_new_table(net, cfg->fc_table);
1417                }
1418        } else {
1419                table = fib6_new_table(net, cfg->fc_table);
1420        }
1421
1422        if (!table)
1423                goto out;
1424
1425        rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1426
1427        if (!rt) {
1428                err = -ENOMEM;
1429                goto out;
1430        }
1431
1432        if (cfg->fc_flags & RTF_EXPIRES)
1433                rt6_set_expires(rt, jiffies +
1434                                clock_t_to_jiffies(cfg->fc_expires));
1435        else
1436                rt6_clean_expires(rt);
1437
1438        if (cfg->fc_protocol == RTPROT_UNSPEC)
1439                cfg->fc_protocol = RTPROT_BOOT;
1440        rt->rt6i_protocol = cfg->fc_protocol;
1441
1442        addr_type = ipv6_addr_type(&cfg->fc_dst);
1443
1444        if (addr_type & IPV6_ADDR_MULTICAST)
1445                rt->dst.input = ip6_mc_input;
1446        else if (cfg->fc_flags & RTF_LOCAL)
1447                rt->dst.input = ip6_input;
1448        else
1449                rt->dst.input = ip6_forward;
1450
1451        rt->dst.output = ip6_output;
1452
1453        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1454        rt->rt6i_dst.plen = cfg->fc_dst_len;
1455        if (rt->rt6i_dst.plen == 128)
1456               rt->dst.flags |= DST_HOST;
1457
1458        if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1459                u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1460                if (!metrics) {
1461                        err = -ENOMEM;
1462                        goto out;
1463                }
1464                dst_init_metrics(&rt->dst, metrics, 0);
1465        }
1466#ifdef CONFIG_IPV6_SUBTREES
1467        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1468        rt->rt6i_src.plen = cfg->fc_src_len;
1469#endif
1470
1471        rt->rt6i_metric = cfg->fc_metric;
1472
1473        /* We cannot add true routes via loopback here,
1474           they would result in kernel looping; promote them to reject routes
1475         */
1476        if ((cfg->fc_flags & RTF_REJECT) ||
1477            (dev && (dev->flags & IFF_LOOPBACK) &&
1478             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1479             !(cfg->fc_flags & RTF_LOCAL))) {
1480                /* hold loopback dev/idev if we haven't done so. */
1481                if (dev != net->loopback_dev) {
1482                        if (dev) {
1483                                dev_put(dev);
1484                                in6_dev_put(idev);
1485                        }
1486                        dev = net->loopback_dev;
1487                        dev_hold(dev);
1488                        idev = in6_dev_get(dev);
1489                        if (!idev) {
1490                                err = -ENODEV;
1491                                goto out;
1492                        }
1493                }
1494                rt->dst.output = ip6_pkt_discard_out;
1495                rt->dst.input = ip6_pkt_discard;
1496                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1497                switch (cfg->fc_type) {
1498                case RTN_BLACKHOLE:
1499                        rt->dst.error = -EINVAL;
1500                        break;
1501                case RTN_PROHIBIT:
1502                        rt->dst.error = -EACCES;
1503                        break;
1504                case RTN_THROW:
1505                        rt->dst.error = -EAGAIN;
1506                        break;
1507                default:
1508                        rt->dst.error = -ENETUNREACH;
1509                        break;
1510                }
1511                goto install_route;
1512        }
1513
1514        if (cfg->fc_flags & RTF_GATEWAY) {
1515                const struct in6_addr *gw_addr;
1516                int gwa_type;
1517
1518                gw_addr = &cfg->fc_gateway;
1519                rt->rt6i_gateway = *gw_addr;
1520                gwa_type = ipv6_addr_type(gw_addr);
1521
1522                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1523                        struct rt6_info *grt;
1524
1525                        /* IPv6 strictly inhibits using not link-local
1526                           addresses as nexthop address.
1527                           Otherwise, router will not able to send redirects.
1528                           It is very good, but in some (rare!) circumstances
1529                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1530                           some exceptions. --ANK
1531                         */
1532                        err = -EINVAL;
1533                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1534                                goto out;
1535
1536                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1537
1538                        err = -EHOSTUNREACH;
1539                        if (!grt)
1540                                goto out;
1541                        if (dev) {
1542                                if (dev != grt->dst.dev) {
1543                                        ip6_rt_put(grt);
1544                                        goto out;
1545                                }
1546                        } else {
1547                                dev = grt->dst.dev;
1548                                idev = grt->rt6i_idev;
1549                                dev_hold(dev);
1550                                in6_dev_hold(grt->rt6i_idev);
1551                        }
1552                        if (!(grt->rt6i_flags & RTF_GATEWAY))
1553                                err = 0;
1554                        ip6_rt_put(grt);
1555
1556                        if (err)
1557                                goto out;
1558                }
1559                err = -EINVAL;
1560                if (!dev || (dev->flags & IFF_LOOPBACK))
1561                        goto out;
1562        }
1563
1564        err = -ENODEV;
1565        if (!dev)
1566                goto out;
1567
1568        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1569                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1570                        err = -EINVAL;
1571                        goto out;
1572                }
1573                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1574                rt->rt6i_prefsrc.plen = 128;
1575        } else
1576                rt->rt6i_prefsrc.plen = 0;
1577
1578        rt->rt6i_flags = cfg->fc_flags;
1579
1580install_route:
1581        if (cfg->fc_mx) {
1582                struct nlattr *nla;
1583                int remaining;
1584
1585                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1586                        int type = nla_type(nla);
1587
1588                        if (type) {
1589                                if (type > RTAX_MAX) {
1590                                        err = -EINVAL;
1591                                        goto out;
1592                                }
1593
1594                                dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1595                        }
1596                }
1597        }
1598
1599        rt->dst.dev = dev;
1600        rt->rt6i_idev = idev;
1601        rt->rt6i_table = table;
1602
1603        cfg->fc_nlinfo.nl_net = dev_net(dev);
1604
1605        return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1606
1607out:
1608        if (dev)
1609                dev_put(dev);
1610        if (idev)
1611                in6_dev_put(idev);
1612        if (rt)
1613                dst_free(&rt->dst);
1614        return err;
1615}
1616
1617static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1618{
1619        int err;
1620        struct fib6_table *table;
1621        struct net *net = dev_net(rt->dst.dev);
1622
1623        if (rt == net->ipv6.ip6_null_entry) {
1624                err = -ENOENT;
1625                goto out;
1626        }
1627
1628        table = rt->rt6i_table;
1629        write_lock_bh(&table->tb6_lock);
1630        err = fib6_del(rt, info);
1631        write_unlock_bh(&table->tb6_lock);
1632
1633out:
1634        ip6_rt_put(rt);
1635        return err;
1636}
1637
1638int ip6_del_rt(struct rt6_info *rt)
1639{
1640        struct nl_info info = {
1641                .nl_net = dev_net(rt->dst.dev),
1642        };
1643        return __ip6_del_rt(rt, &info);
1644}
1645
1646static int ip6_route_del(struct fib6_config *cfg)
1647{
1648        struct fib6_table *table;
1649        struct fib6_node *fn;
1650        struct rt6_info *rt;
1651        int err = -ESRCH;
1652
1653        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1654        if (!table)
1655                return err;
1656
1657        read_lock_bh(&table->tb6_lock);
1658
1659        fn = fib6_locate(&table->tb6_root,
1660                         &cfg->fc_dst, cfg->fc_dst_len,
1661                         &cfg->fc_src, cfg->fc_src_len);
1662
1663        if (fn) {
1664                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1665                        if (cfg->fc_ifindex &&
1666                            (!rt->dst.dev ||
1667                             rt->dst.dev->ifindex != cfg->fc_ifindex))
1668                                continue;
1669                        if (cfg->fc_flags & RTF_GATEWAY &&
1670                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1671                                continue;
1672                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1673                                continue;
1674                        dst_hold(&rt->dst);
1675                        read_unlock_bh(&table->tb6_lock);
1676
1677                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1678                }
1679        }
1680        read_unlock_bh(&table->tb6_lock);
1681
1682        return err;
1683}
1684
1685static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1686{
1687        struct net *net = dev_net(skb->dev);
1688        struct netevent_redirect netevent;
1689        struct rt6_info *rt, *nrt = NULL;
1690        struct ndisc_options ndopts;
1691        struct inet6_dev *in6_dev;
1692        struct neighbour *neigh;
1693        struct rd_msg *msg;
1694        int optlen, on_link;
1695        u8 *lladdr;
1696
1697        optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1698        optlen -= sizeof(*msg);
1699
1700        if (optlen < 0) {
1701                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1702                return;
1703        }
1704
1705        msg = (struct rd_msg *)icmp6_hdr(skb);
1706
1707        if (ipv6_addr_is_multicast(&msg->dest)) {
1708                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1709                return;
1710        }
1711
1712        on_link = 0;
1713        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1714                on_link = 1;
1715        } else if (ipv6_addr_type(&msg->target) !=
1716                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1717                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1718                return;
1719        }
1720
1721        in6_dev = __in6_dev_get(skb->dev);
1722        if (!in6_dev)
1723                return;
1724        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1725                return;
1726
1727        /* RFC2461 8.1:
1728         *      The IP source address of the Redirect MUST be the same as the current
1729         *      first-hop router for the specified ICMP Destination Address.
1730         */
1731
1732        if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1733                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1734                return;
1735        }
1736
1737        lladdr = NULL;
1738        if (ndopts.nd_opts_tgt_lladdr) {
1739                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1740                                             skb->dev);
1741                if (!lladdr) {
1742                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1743                        return;
1744                }
1745        }
1746
1747        rt = (struct rt6_info *) dst;
1748        if (rt == net->ipv6.ip6_null_entry) {
1749                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1750                return;
1751        }
1752
1753        /* Redirect received -> path was valid.
1754         * Look, redirects are sent only in response to data packets,
1755         * so that this nexthop apparently is reachable. --ANK
1756         */
1757        dst_confirm(&rt->dst);
1758
1759        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1760        if (!neigh)
1761                return;
1762
1763        /*
1764         *      We have finally decided to accept it.
1765         */
1766
1767        neigh_update(neigh, lladdr, NUD_STALE,
1768                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1769                     NEIGH_UPDATE_F_OVERRIDE|
1770                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1771                                     NEIGH_UPDATE_F_ISROUTER))
1772                     );
1773
1774        nrt = ip6_rt_copy(rt, &msg->dest);
1775        if (!nrt)
1776                goto out;
1777
1778        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1779        if (on_link)
1780                nrt->rt6i_flags &= ~RTF_GATEWAY;
1781
1782        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1783
1784        if (ip6_ins_rt(nrt))
1785                goto out;
1786
1787        netevent.old = &rt->dst;
1788        netevent.new = &nrt->dst;
1789        netevent.daddr = &msg->dest;
1790        netevent.neigh = neigh;
1791        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1792
1793        if (rt->rt6i_flags & RTF_CACHE) {
1794                rt = (struct rt6_info *) dst_clone(&rt->dst);
1795                ip6_del_rt(rt);
1796        }
1797
1798out:
1799        neigh_release(neigh);
1800}
1801
1802/*
1803 *      Misc support functions
1804 */
1805
1806static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1807                                    const struct in6_addr *dest)
1808{
1809        struct net *net = dev_net(ort->dst.dev);
1810        struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1811                                            ort->rt6i_table);
1812
1813        if (rt) {
1814                rt->dst.input = ort->dst.input;
1815                rt->dst.output = ort->dst.output;
1816                rt->dst.flags |= DST_HOST;
1817
1818                rt->rt6i_dst.addr = *dest;
1819                rt->rt6i_dst.plen = 128;
1820                dst_copy_metrics(&rt->dst, &ort->dst);
1821                rt->dst.error = ort->dst.error;
1822                rt->rt6i_idev = ort->rt6i_idev;
1823                if (rt->rt6i_idev)
1824                        in6_dev_hold(rt->rt6i_idev);
1825                rt->dst.lastuse = jiffies;
1826
1827                rt->rt6i_gateway = ort->rt6i_gateway;
1828                rt->rt6i_flags = ort->rt6i_flags;
1829                if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1830                    (RTF_DEFAULT | RTF_ADDRCONF))
1831                        rt6_set_from(rt, ort);
1832                rt->rt6i_metric = 0;
1833
1834#ifdef CONFIG_IPV6_SUBTREES
1835                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1836#endif
1837                memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1838                rt->rt6i_table = ort->rt6i_table;
1839        }
1840        return rt;
1841}
1842
1843#ifdef CONFIG_IPV6_ROUTE_INFO
1844static struct rt6_info *rt6_get_route_info(struct net *net,
1845                                           const struct in6_addr *prefix, int prefixlen,
1846                                           const struct in6_addr *gwaddr, int ifindex)
1847{
1848        struct fib6_node *fn;
1849        struct rt6_info *rt = NULL;
1850        struct fib6_table *table;
1851
1852        table = fib6_get_table(net, RT6_TABLE_INFO);
1853        if (!table)
1854                return NULL;
1855
1856        read_lock_bh(&table->tb6_lock);
1857        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1858        if (!fn)
1859                goto out;
1860
1861        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1862                if (rt->dst.dev->ifindex != ifindex)
1863                        continue;
1864                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1865                        continue;
1866                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1867                        continue;
1868                dst_hold(&rt->dst);
1869                break;
1870        }
1871out:
1872        read_unlock_bh(&table->tb6_lock);
1873        return rt;
1874}
1875
1876static struct rt6_info *rt6_add_route_info(struct net *net,
1877                                           const struct in6_addr *prefix, int prefixlen,
1878                                           const struct in6_addr *gwaddr, int ifindex,
1879                                           unsigned int pref)
1880{
1881        struct fib6_config cfg = {
1882                .fc_table       = RT6_TABLE_INFO,
1883                .fc_metric      = IP6_RT_PRIO_USER,
1884                .fc_ifindex     = ifindex,
1885                .fc_dst_len     = prefixlen,
1886                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1887                                  RTF_UP | RTF_PREF(pref),
1888                .fc_nlinfo.portid = 0,
1889                .fc_nlinfo.nlh = NULL,
1890                .fc_nlinfo.nl_net = net,
1891        };
1892
1893        cfg.fc_dst = *prefix;
1894        cfg.fc_gateway = *gwaddr;
1895
1896        /* We should treat it as a default route if prefix length is 0. */
1897        if (!prefixlen)
1898                cfg.fc_flags |= RTF_DEFAULT;
1899
1900        ip6_route_add(&cfg);
1901
1902        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1903}
1904#endif
1905
1906struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1907{
1908        struct rt6_info *rt;
1909        struct fib6_table *table;
1910
1911        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1912        if (!table)
1913                return NULL;
1914
1915        read_lock_bh(&table->tb6_lock);
1916        for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1917                if (dev == rt->dst.dev &&
1918                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1919                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1920                        break;
1921        }
1922        if (rt)
1923                dst_hold(&rt->dst);
1924        read_unlock_bh(&table->tb6_lock);
1925        return rt;
1926}
1927
1928struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1929                                     struct net_device *dev,
1930                                     unsigned int pref)
1931{
1932        struct fib6_config cfg = {
1933                .fc_table       = RT6_TABLE_DFLT,
1934                .fc_metric      = IP6_RT_PRIO_USER,
1935                .fc_ifindex     = dev->ifindex,
1936                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1937                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1938                .fc_nlinfo.portid = 0,
1939                .fc_nlinfo.nlh = NULL,
1940                .fc_nlinfo.nl_net = dev_net(dev),
1941        };
1942
1943        cfg.fc_gateway = *gwaddr;
1944
1945        ip6_route_add(&cfg);
1946
1947        return rt6_get_dflt_router(gwaddr, dev);
1948}
1949
1950void rt6_purge_dflt_routers(struct net *net)
1951{
1952        struct rt6_info *rt;
1953        struct fib6_table *table;
1954
1955        /* NOTE: Keep consistent with rt6_get_dflt_router */
1956        table = fib6_get_table(net, RT6_TABLE_DFLT);
1957        if (!table)
1958                return;
1959
1960restart:
1961        read_lock_bh(&table->tb6_lock);
1962        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1963                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1964                    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1965                        dst_hold(&rt->dst);
1966                        read_unlock_bh(&table->tb6_lock);
1967                        ip6_del_rt(rt);
1968                        goto restart;
1969                }
1970        }
1971        read_unlock_bh(&table->tb6_lock);
1972}
1973
1974static void rtmsg_to_fib6_config(struct net *net,
1975                                 struct in6_rtmsg *rtmsg,
1976                                 struct fib6_config *cfg)
1977{
1978        memset(cfg, 0, sizeof(*cfg));
1979
1980        cfg->fc_table = RT6_TABLE_MAIN;
1981        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1982        cfg->fc_metric = rtmsg->rtmsg_metric;
1983        cfg->fc_expires = rtmsg->rtmsg_info;
1984        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1985        cfg->fc_src_len = rtmsg->rtmsg_src_len;
1986        cfg->fc_flags = rtmsg->rtmsg_flags;
1987
1988        cfg->fc_nlinfo.nl_net = net;
1989
1990        cfg->fc_dst = rtmsg->rtmsg_dst;
1991        cfg->fc_src = rtmsg->rtmsg_src;
1992        cfg->fc_gateway = rtmsg->rtmsg_gateway;
1993}
1994
1995int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1996{
1997        struct fib6_config cfg;
1998        struct in6_rtmsg rtmsg;
1999        int err;
2000
2001        switch(cmd) {
2002        case SIOCADDRT:         /* Add a route */
2003        case SIOCDELRT:         /* Delete a route */
2004                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2005                        return -EPERM;
2006                err = copy_from_user(&rtmsg, arg,
2007                                     sizeof(struct in6_rtmsg));
2008                if (err)
2009                        return -EFAULT;
2010
2011                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2012
2013                rtnl_lock();
2014                switch (cmd) {
2015                case SIOCADDRT:
2016                        err = ip6_route_add(&cfg);
2017                        break;
2018                case SIOCDELRT:
2019                        err = ip6_route_del(&cfg);
2020                        break;
2021                default:
2022                        err = -EINVAL;
2023                }
2024                rtnl_unlock();
2025
2026                return err;
2027        }
2028
2029        return -EINVAL;
2030}
2031
2032/*
2033 *      Drop the packet on the floor
2034 */
2035
2036static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2037{
2038        int type;
2039        struct dst_entry *dst = skb_dst(skb);
2040        switch (ipstats_mib_noroutes) {
2041        case IPSTATS_MIB_INNOROUTES:
2042                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2043                if (type == IPV6_ADDR_ANY) {
2044                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2045                                      IPSTATS_MIB_INADDRERRORS);
2046                        break;
2047                }
2048                /* FALLTHROUGH */
2049        case IPSTATS_MIB_OUTNOROUTES:
2050                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2051                              ipstats_mib_noroutes);
2052                break;
2053        }
2054        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2055        kfree_skb(skb);
2056        return 0;
2057}
2058
2059static int ip6_pkt_discard(struct sk_buff *skb)
2060{
2061        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2062}
2063
2064static int ip6_pkt_discard_out(struct sk_buff *skb)
2065{
2066        skb->dev = skb_dst(skb)->dev;
2067        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2068}
2069
2070#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2071
2072static int ip6_pkt_prohibit(struct sk_buff *skb)
2073{
2074        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2075}
2076
2077static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2078{
2079        skb->dev = skb_dst(skb)->dev;
2080        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2081}
2082
2083#endif
2084
2085/*
2086 *      Allocate a dst for local (unicast / anycast) address.
2087 */
2088
2089struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2090                                    const struct in6_addr *addr,
2091                                    bool anycast)
2092{
2093        struct net *net = dev_net(idev->dev);
2094        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2095
2096        if (!rt) {
2097                net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2098                return ERR_PTR(-ENOMEM);
2099        }
2100
2101        in6_dev_hold(idev);
2102
2103        rt->dst.flags |= DST_HOST;
2104        rt->dst.input = ip6_input;
2105        rt->dst.output = ip6_output;
2106        rt->rt6i_idev = idev;
2107
2108        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2109        if (anycast)
2110                rt->rt6i_flags |= RTF_ANYCAST;
2111        else
2112                rt->rt6i_flags |= RTF_LOCAL;
2113
2114        rt->rt6i_dst.addr = *addr;
2115        rt->rt6i_dst.plen = 128;
2116        rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2117
2118        atomic_set(&rt->dst.__refcnt, 1);
2119
2120        return rt;
2121}
2122
2123int ip6_route_get_saddr(struct net *net,
2124                        struct rt6_info *rt,
2125                        const struct in6_addr *daddr,
2126                        unsigned int prefs,
2127                        struct in6_addr *saddr)
2128{
2129        struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2130        int err = 0;
2131        if (rt->rt6i_prefsrc.plen)
2132                *saddr = rt->rt6i_prefsrc.addr;
2133        else
2134                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2135                                         daddr, prefs, saddr);
2136        return err;
2137}
2138
2139/* remove deleted ip from prefsrc entries */
2140struct arg_dev_net_ip {
2141        struct net_device *dev;
2142        struct net *net;
2143        struct in6_addr *addr;
2144};
2145
2146static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2147{
2148        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2149        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2150        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2151
2152        if (((void *)rt->dst.dev == dev || !dev) &&
2153            rt != net->ipv6.ip6_null_entry &&
2154            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2155                /* remove prefsrc entry */
2156                rt->rt6i_prefsrc.plen = 0;
2157        }
2158        return 0;
2159}
2160
2161void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2162{
2163        struct net *net = dev_net(ifp->idev->dev);
2164        struct arg_dev_net_ip adni = {
2165                .dev = ifp->idev->dev,
2166                .net = net,
2167                .addr = &ifp->addr,
2168        };
2169        fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2170}
2171
2172struct arg_dev_net {
2173        struct net_device *dev;
2174        struct net *net;
2175};
2176
2177static int fib6_ifdown(struct rt6_info *rt, void *arg)
2178{
2179        const struct arg_dev_net *adn = arg;
2180        const struct net_device *dev = adn->dev;
2181
2182        if ((rt->dst.dev == dev || !dev) &&
2183            rt != adn->net->ipv6.ip6_null_entry)
2184                return -1;
2185
2186        return 0;
2187}
2188
2189void rt6_ifdown(struct net *net, struct net_device *dev)
2190{
2191        struct arg_dev_net adn = {
2192                .dev = dev,
2193                .net = net,
2194        };
2195
2196        fib6_clean_all(net, fib6_ifdown, 0, &adn);
2197        icmp6_clean_all(fib6_ifdown, &adn);
2198}
2199
2200struct rt6_mtu_change_arg {
2201        struct net_device *dev;
2202        unsigned int mtu;
2203};
2204
2205static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2206{
2207        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2208        struct inet6_dev *idev;
2209
2210        /* In IPv6 pmtu discovery is not optional,
2211           so that RTAX_MTU lock cannot disable it.
2212           We still use this lock to block changes
2213           caused by addrconf/ndisc.
2214        */
2215
2216        idev = __in6_dev_get(arg->dev);
2217        if (!idev)
2218                return 0;
2219
2220        /* For administrative MTU increase, there is no way to discover
2221           IPv6 PMTU increase, so PMTU increase should be updated here.
2222           Since RFC 1981 doesn't include administrative MTU increase
2223           update PMTU increase is a MUST. (i.e. jumbo frame)
2224         */
2225        /*
2226           If new MTU is less than route PMTU, this new MTU will be the
2227           lowest MTU in the path, update the route PMTU to reflect PMTU
2228           decreases; if new MTU is greater than route PMTU, and the
2229           old MTU is the lowest MTU in the path, update the route PMTU
2230           to reflect the increase. In this case if the other nodes' MTU
2231           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2232           PMTU discouvery.
2233         */
2234        if (rt->dst.dev == arg->dev &&
2235            !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2236            (dst_mtu(&rt->dst) >= arg->mtu ||
2237             (dst_mtu(&rt->dst) < arg->mtu &&
2238              dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2239                dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2240        }
2241        return 0;
2242}
2243
2244void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2245{
2246        struct rt6_mtu_change_arg arg = {
2247                .dev = dev,
2248                .mtu = mtu,
2249        };
2250
2251        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2252}
2253
2254static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2255        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2256        [RTA_OIF]               = { .type = NLA_U32 },
2257        [RTA_IIF]               = { .type = NLA_U32 },
2258        [RTA_PRIORITY]          = { .type = NLA_U32 },
2259        [RTA_METRICS]           = { .type = NLA_NESTED },
2260        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2261};
2262
2263static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2264                              struct fib6_config *cfg)
2265{
2266        struct rtmsg *rtm;
2267        struct nlattr *tb[RTA_MAX+1];
2268        int err;
2269
2270        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2271        if (err < 0)
2272                goto errout;
2273
2274        err = -EINVAL;
2275        rtm = nlmsg_data(nlh);
2276        memset(cfg, 0, sizeof(*cfg));
2277
2278        cfg->fc_table = rtm->rtm_table;
2279        cfg->fc_dst_len = rtm->rtm_dst_len;
2280        cfg->fc_src_len = rtm->rtm_src_len;
2281        cfg->fc_flags = RTF_UP;
2282        cfg->fc_protocol = rtm->rtm_protocol;
2283        cfg->fc_type = rtm->rtm_type;
2284
2285        if (rtm->rtm_type == RTN_UNREACHABLE ||
2286            rtm->rtm_type == RTN_BLACKHOLE ||
2287            rtm->rtm_type == RTN_PROHIBIT ||
2288            rtm->rtm_type == RTN_THROW)
2289                cfg->fc_flags |= RTF_REJECT;
2290
2291        if (rtm->rtm_type == RTN_LOCAL)
2292                cfg->fc_flags |= RTF_LOCAL;
2293
2294        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2295        cfg->fc_nlinfo.nlh = nlh;
2296        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2297
2298        if (tb[RTA_GATEWAY]) {
2299                nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2300                cfg->fc_flags |= RTF_GATEWAY;
2301        }
2302
2303        if (tb[RTA_DST]) {
2304                int plen = (rtm->rtm_dst_len + 7) >> 3;
2305
2306                if (nla_len(tb[RTA_DST]) < plen)
2307                        goto errout;
2308
2309                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2310        }
2311
2312        if (tb[RTA_SRC]) {
2313                int plen = (rtm->rtm_src_len + 7) >> 3;
2314
2315                if (nla_len(tb[RTA_SRC]) < plen)
2316                        goto errout;
2317
2318                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2319        }
2320
2321        if (tb[RTA_PREFSRC])
2322                nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2323
2324        if (tb[RTA_OIF])
2325                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2326
2327        if (tb[RTA_PRIORITY])
2328                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2329
2330        if (tb[RTA_METRICS]) {
2331                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2332                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2333        }
2334
2335        if (tb[RTA_TABLE])
2336                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2337
2338        if (tb[RTA_MULTIPATH]) {
2339                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2340                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2341        }
2342
2343        err = 0;
2344errout:
2345        return err;
2346}
2347
2348static int ip6_route_multipath(struct fib6_config *cfg, int add)
2349{
2350        struct fib6_config r_cfg;
2351        struct rtnexthop *rtnh;
2352        int remaining;
2353        int attrlen;
2354        int err = 0, last_err = 0;
2355
2356beginning:
2357        rtnh = (struct rtnexthop *)cfg->fc_mp;
2358        remaining = cfg->fc_mp_len;
2359
2360        /* Parse a Multipath Entry */
2361        while (rtnh_ok(rtnh, remaining)) {
2362                memcpy(&r_cfg, cfg, sizeof(*cfg));
2363                if (rtnh->rtnh_ifindex)
2364                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2365
2366                attrlen = rtnh_attrlen(rtnh);
2367                if (attrlen > 0) {
2368                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2369
2370                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2371                        if (nla) {
2372                                nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2373                                r_cfg.fc_flags |= RTF_GATEWAY;
2374                        }
2375                }
2376                err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2377                if (err) {
2378                        last_err = err;
2379                        /* If we are trying to remove a route, do not stop the
2380                         * loop when ip6_route_del() fails (because next hop is
2381                         * already gone), we should try to remove all next hops.
2382                         */
2383                        if (add) {
2384                                /* If add fails, we should try to delete all
2385                                 * next hops that have been already added.
2386                                 */
2387                                add = 0;
2388                                goto beginning;
2389                        }
2390                }
2391                /* Because each route is added like a single route we remove
2392                 * this flag after the first nexthop (if there is a collision,
2393                 * we have already fail to add the first nexthop:
2394                 * fib6_add_rt2node() has reject it).
2395                 */
2396                cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2397                rtnh = rtnh_next(rtnh, &remaining);
2398        }
2399
2400        return last_err;
2401}
2402
2403static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2404{
2405        struct fib6_config cfg;
2406        int err;
2407
2408        err = rtm_to_fib6_config(skb, nlh, &cfg);
2409        if (err < 0)
2410                return err;
2411
2412        if (cfg.fc_mp)
2413                return ip6_route_multipath(&cfg, 0);
2414        else
2415                return ip6_route_del(&cfg);
2416}
2417
2418static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2419{
2420        struct fib6_config cfg;
2421        int err;
2422
2423        err = rtm_to_fib6_config(skb, nlh, &cfg);
2424        if (err < 0)
2425                return err;
2426
2427        if (cfg.fc_mp)
2428                return ip6_route_multipath(&cfg, 1);
2429        else
2430                return ip6_route_add(&cfg);
2431}
2432
2433static inline size_t rt6_nlmsg_size(void)
2434{
2435        return NLMSG_ALIGN(sizeof(struct rtmsg))
2436               + nla_total_size(16) /* RTA_SRC */
2437               + nla_total_size(16) /* RTA_DST */
2438               + nla_total_size(16) /* RTA_GATEWAY */
2439               + nla_total_size(16) /* RTA_PREFSRC */
2440               + nla_total_size(4) /* RTA_TABLE */
2441               + nla_total_size(4) /* RTA_IIF */
2442               + nla_total_size(4) /* RTA_OIF */
2443               + nla_total_size(4) /* RTA_PRIORITY */
2444               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2445               + nla_total_size(sizeof(struct rta_cacheinfo));
2446}
2447
2448static int rt6_fill_node(struct net *net,
2449                         struct sk_buff *skb, struct rt6_info *rt,
2450                         struct in6_addr *dst, struct in6_addr *src,
2451                         int iif, int type, u32 portid, u32 seq,
2452                         int prefix, int nowait, unsigned int flags)
2453{
2454        struct rtmsg *rtm;
2455        struct nlmsghdr *nlh;
2456        long expires;
2457        u32 table;
2458
2459        if (prefix) {   /* user wants prefix routes only */
2460                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2461                        /* success since this is not a prefix route */
2462                        return 1;
2463                }
2464        }
2465
2466        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2467        if (!nlh)
2468                return -EMSGSIZE;
2469
2470        rtm = nlmsg_data(nlh);
2471        rtm->rtm_family = AF_INET6;
2472        rtm->rtm_dst_len = rt->rt6i_dst.plen;
2473        rtm->rtm_src_len = rt->rt6i_src.plen;
2474        rtm->rtm_tos = 0;
2475        if (rt->rt6i_table)
2476                table = rt->rt6i_table->tb6_id;
2477        else
2478                table = RT6_TABLE_UNSPEC;
2479        rtm->rtm_table = table;
2480        if (nla_put_u32(skb, RTA_TABLE, table))
2481                goto nla_put_failure;
2482        if (rt->rt6i_flags & RTF_REJECT) {
2483                switch (rt->dst.error) {
2484                case -EINVAL:
2485                        rtm->rtm_type = RTN_BLACKHOLE;
2486                        break;
2487                case -EACCES:
2488                        rtm->rtm_type = RTN_PROHIBIT;
2489                        break;
2490                case -EAGAIN:
2491                        rtm->rtm_type = RTN_THROW;
2492                        break;
2493                default:
2494                        rtm->rtm_type = RTN_UNREACHABLE;
2495                        break;
2496                }
2497        }
2498        else if (rt->rt6i_flags & RTF_LOCAL)
2499                rtm->rtm_type = RTN_LOCAL;
2500        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2501                rtm->rtm_type = RTN_LOCAL;
2502        else
2503                rtm->rtm_type = RTN_UNICAST;
2504        rtm->rtm_flags = 0;
2505        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2506        rtm->rtm_protocol = rt->rt6i_protocol;
2507        if (rt->rt6i_flags & RTF_DYNAMIC)
2508                rtm->rtm_protocol = RTPROT_REDIRECT;
2509        else if (rt->rt6i_flags & RTF_ADDRCONF) {
2510                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2511                        rtm->rtm_protocol = RTPROT_RA;
2512                else
2513                        rtm->rtm_protocol = RTPROT_KERNEL;
2514        }
2515
2516        if (rt->rt6i_flags & RTF_CACHE)
2517                rtm->rtm_flags |= RTM_F_CLONED;
2518
2519        if (dst) {
2520                if (nla_put(skb, RTA_DST, 16, dst))
2521                        goto nla_put_failure;
2522                rtm->rtm_dst_len = 128;
2523        } else if (rtm->rtm_dst_len)
2524                if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2525                        goto nla_put_failure;
2526#ifdef CONFIG_IPV6_SUBTREES
2527        if (src) {
2528                if (nla_put(skb, RTA_SRC, 16, src))
2529                        goto nla_put_failure;
2530                rtm->rtm_src_len = 128;
2531        } else if (rtm->rtm_src_len &&
2532                   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2533                goto nla_put_failure;
2534#endif
2535        if (iif) {
2536#ifdef CONFIG_IPV6_MROUTE
2537                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2538                        int err = ip6mr_get_route(net, skb, rtm, nowait);
2539                        if (err <= 0) {
2540                                if (!nowait) {
2541                                        if (err == 0)
2542                                                return 0;
2543                                        goto nla_put_failure;
2544                                } else {
2545                                        if (err == -EMSGSIZE)
2546                                                goto nla_put_failure;
2547                                }
2548                        }
2549                } else
2550#endif
2551                        if (nla_put_u32(skb, RTA_IIF, iif))
2552                                goto nla_put_failure;
2553        } else if (dst) {
2554                struct in6_addr saddr_buf;
2555                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2556                    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2557                        goto nla_put_failure;
2558        }
2559
2560        if (rt->rt6i_prefsrc.plen) {
2561                struct in6_addr saddr_buf;
2562                saddr_buf = rt->rt6i_prefsrc.addr;
2563                if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2564                        goto nla_put_failure;
2565        }
2566
2567        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2568                goto nla_put_failure;
2569
2570        if (rt->rt6i_flags & RTF_GATEWAY) {
2571                if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2572                        goto nla_put_failure;
2573        }
2574
2575        if (rt->dst.dev &&
2576            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2577                goto nla_put_failure;
2578        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2579                goto nla_put_failure;
2580
2581        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2582
2583        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2584                goto nla_put_failure;
2585
2586        return nlmsg_end(skb, nlh);
2587
2588nla_put_failure:
2589        nlmsg_cancel(skb, nlh);
2590        return -EMSGSIZE;
2591}
2592
2593int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2594{
2595        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2596        int prefix;
2597
2598        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2599                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2600                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2601        } else
2602                prefix = 0;
2603
2604        return rt6_fill_node(arg->net,
2605                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2606                     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2607                     prefix, 0, NLM_F_MULTI);
2608}
2609
2610static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2611{
2612        struct net *net = sock_net(in_skb->sk);
2613        struct nlattr *tb[RTA_MAX+1];
2614        struct rt6_info *rt;
2615        struct sk_buff *skb;
2616        struct rtmsg *rtm;
2617        struct flowi6 fl6;
2618        int err, iif = 0, oif = 0;
2619
2620        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2621        if (err < 0)
2622                goto errout;
2623
2624        err = -EINVAL;
2625        memset(&fl6, 0, sizeof(fl6));
2626
2627        if (tb[RTA_SRC]) {
2628                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2629                        goto errout;
2630
2631                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2632        }
2633
2634        if (tb[RTA_DST]) {
2635                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2636                        goto errout;
2637
2638                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2639        }
2640
2641        if (tb[RTA_IIF])
2642                iif = nla_get_u32(tb[RTA_IIF]);
2643
2644        if (tb[RTA_OIF])
2645                oif = nla_get_u32(tb[RTA_OIF]);
2646
2647        if (iif) {
2648                struct net_device *dev;
2649                int flags = 0;
2650
2651                dev = __dev_get_by_index(net, iif);
2652                if (!dev) {
2653                        err = -ENODEV;
2654                        goto errout;
2655                }
2656
2657                fl6.flowi6_iif = iif;
2658
2659                if (!ipv6_addr_any(&fl6.saddr))
2660                        flags |= RT6_LOOKUP_F_HAS_SADDR;
2661
2662                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2663                                                               flags);
2664        } else {
2665                fl6.flowi6_oif = oif;
2666
2667                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2668        }
2669
2670        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2671        if (!skb) {
2672                ip6_rt_put(rt);
2673                err = -ENOBUFS;
2674                goto errout;
2675        }
2676
2677        /* Reserve room for dummy headers, this skb can pass
2678           through good chunk of routing engine.
2679         */
2680        skb_reset_mac_header(skb);
2681        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2682
2683        skb_dst_set(skb, &rt->dst);
2684
2685        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2686                            RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2687                            nlh->nlmsg_seq, 0, 0, 0);
2688        if (err < 0) {
2689                kfree_skb(skb);
2690                goto errout;
2691        }
2692
2693        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2694errout:
2695        return err;
2696}
2697
2698void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2699{
2700        struct sk_buff *skb;
2701        struct net *net = info->nl_net;
2702        u32 seq;
2703        int err;
2704
2705        err = -ENOBUFS;
2706        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2707
2708        skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2709        if (!skb)
2710                goto errout;
2711
2712        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2713                                event, info->portid, seq, 0, 0, 0);
2714        if (err < 0) {
2715                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2716                WARN_ON(err == -EMSGSIZE);
2717                kfree_skb(skb);
2718                goto errout;
2719        }
2720        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2721                    info->nlh, gfp_any());
2722        return;
2723errout:
2724        if (err < 0)
2725                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2726}
2727
2728static int ip6_route_dev_notify(struct notifier_block *this,
2729                                unsigned long event, void *ptr)
2730{
2731        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2732        struct net *net = dev_net(dev);
2733
2734        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2735                net->ipv6.ip6_null_entry->dst.dev = dev;
2736                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2737#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2738                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2739                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2740                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2741                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2742#endif
2743        }
2744
2745        return NOTIFY_OK;
2746}
2747
2748/*
2749 *      /proc
2750 */
2751
2752#ifdef CONFIG_PROC_FS
2753
2754struct rt6_proc_arg
2755{
2756        char *buffer;
2757        int offset;
2758        int length;
2759        int skip;
2760        int len;
2761};
2762
2763static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2764{
2765        struct seq_file *m = p_arg;
2766
2767        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2768
2769#ifdef CONFIG_IPV6_SUBTREES
2770        seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2771#else
2772        seq_puts(m, "00000000000000000000000000000000 00 ");
2773#endif
2774        if (rt->rt6i_flags & RTF_GATEWAY) {
2775                seq_printf(m, "%pi6", &rt->rt6i_gateway);
2776        } else {
2777                seq_puts(m, "00000000000000000000000000000000");
2778        }
2779        seq_printf(m, " %08x %08x %08x %08x %8s\n",
2780                   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2781                   rt->dst.__use, rt->rt6i_flags,
2782                   rt->dst.dev ? rt->dst.dev->name : "");
2783        return 0;
2784}
2785
2786static int ipv6_route_show(struct seq_file *m, void *v)
2787{
2788        struct net *net = (struct net *)m->private;
2789        fib6_clean_all_ro(net, rt6_info_route, 0, m);
2790        return 0;
2791}
2792
2793static int ipv6_route_open(struct inode *inode, struct file *file)
2794{
2795        return single_open_net(inode, file, ipv6_route_show);
2796}
2797
2798static const struct file_operations ipv6_route_proc_fops = {
2799        .owner          = THIS_MODULE,
2800        .open           = ipv6_route_open,
2801        .read           = seq_read,
2802        .llseek         = seq_lseek,
2803        .release        = single_release_net,
2804};
2805
2806static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2807{
2808        struct net *net = (struct net *)seq->private;
2809        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2810                   net->ipv6.rt6_stats->fib_nodes,
2811                   net->ipv6.rt6_stats->fib_route_nodes,
2812                   net->ipv6.rt6_stats->fib_rt_alloc,
2813                   net->ipv6.rt6_stats->fib_rt_entries,
2814                   net->ipv6.rt6_stats->fib_rt_cache,
2815                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2816                   net->ipv6.rt6_stats->fib_discarded_routes);
2817
2818        return 0;
2819}
2820
2821static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2822{
2823        return single_open_net(inode, file, rt6_stats_seq_show);
2824}
2825
2826static const struct file_operations rt6_stats_seq_fops = {
2827        .owner   = THIS_MODULE,
2828        .open    = rt6_stats_seq_open,
2829        .read    = seq_read,
2830        .llseek  = seq_lseek,
2831        .release = single_release_net,
2832};
2833#endif  /* CONFIG_PROC_FS */
2834
2835#ifdef CONFIG_SYSCTL
2836
2837static
2838int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2839                              void __user *buffer, size_t *lenp, loff_t *ppos)
2840{
2841        struct net *net;
2842        int delay;
2843        if (!write)
2844                return -EINVAL;
2845
2846        net = (struct net *)ctl->extra1;
2847        delay = net->ipv6.sysctl.flush_delay;
2848        proc_dointvec(ctl, write, buffer, lenp, ppos);
2849        fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2850        return 0;
2851}
2852
2853struct ctl_table ipv6_route_table_template[] = {
2854        {
2855                .procname       =       "flush",
2856                .data           =       &init_net.ipv6.sysctl.flush_delay,
2857                .maxlen         =       sizeof(int),
2858                .mode           =       0200,
2859                .proc_handler   =       ipv6_sysctl_rtcache_flush
2860        },
2861        {
2862                .procname       =       "gc_thresh",
2863                .data           =       &ip6_dst_ops_template.gc_thresh,
2864                .maxlen         =       sizeof(int),
2865                .mode           =       0644,
2866                .proc_handler   =       proc_dointvec,
2867        },
2868        {
2869                .procname       =       "max_size",
2870                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2871                .maxlen         =       sizeof(int),
2872                .mode           =       0644,
2873                .proc_handler   =       proc_dointvec,
2874        },
2875        {
2876                .procname       =       "gc_min_interval",
2877                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2878                .maxlen         =       sizeof(int),
2879                .mode           =       0644,
2880                .proc_handler   =       proc_dointvec_jiffies,
2881        },
2882        {
2883                .procname       =       "gc_timeout",
2884                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2885                .maxlen         =       sizeof(int),
2886                .mode           =       0644,
2887                .proc_handler   =       proc_dointvec_jiffies,
2888        },
2889        {
2890                .procname       =       "gc_interval",
2891                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2892                .maxlen         =       sizeof(int),
2893                .mode           =       0644,
2894                .proc_handler   =       proc_dointvec_jiffies,
2895        },
2896        {
2897                .procname       =       "gc_elasticity",
2898                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2899                .maxlen         =       sizeof(int),
2900                .mode           =       0644,
2901                .proc_handler   =       proc_dointvec,
2902        },
2903        {
2904                .procname       =       "mtu_expires",
2905                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2906                .maxlen         =       sizeof(int),
2907                .mode           =       0644,
2908                .proc_handler   =       proc_dointvec_jiffies,
2909        },
2910        {
2911                .procname       =       "min_adv_mss",
2912                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2913                .maxlen         =       sizeof(int),
2914                .mode           =       0644,
2915                .proc_handler   =       proc_dointvec,
2916        },
2917        {
2918                .procname       =       "gc_min_interval_ms",
2919                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2920                .maxlen         =       sizeof(int),
2921                .mode           =       0644,
2922                .proc_handler   =       proc_dointvec_ms_jiffies,
2923        },
2924        { }
2925};
2926
2927struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2928{
2929        struct ctl_table *table;
2930
2931        table = kmemdup(ipv6_route_table_template,
2932                        sizeof(ipv6_route_table_template),
2933                        GFP_KERNEL);
2934
2935        if (table) {
2936                table[0].data = &net->ipv6.sysctl.flush_delay;
2937                table[0].extra1 = net;
2938                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2939                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2940                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2941                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2942                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2943                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2944                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2945                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2946                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2947
2948                /* Don't export sysctls to unprivileged users */
2949                if (net->user_ns != &init_user_ns)
2950                        table[0].procname = NULL;
2951        }
2952
2953        return table;
2954}
2955#endif
2956
2957static int __net_init ip6_route_net_init(struct net *net)
2958{
2959        int ret = -ENOMEM;
2960
2961        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2962               sizeof(net->ipv6.ip6_dst_ops));
2963
2964        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2965                goto out_ip6_dst_ops;
2966
2967        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2968                                           sizeof(*net->ipv6.ip6_null_entry),
2969                                           GFP_KERNEL);
2970        if (!net->ipv6.ip6_null_entry)
2971                goto out_ip6_dst_entries;
2972        net->ipv6.ip6_null_entry->dst.path =
2973                (struct dst_entry *)net->ipv6.ip6_null_entry;
2974        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2975        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2976                         ip6_template_metrics, true);
2977
2978#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2979        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2980                                               sizeof(*net->ipv6.ip6_prohibit_entry),
2981                                               GFP_KERNEL);
2982        if (!net->ipv6.ip6_prohibit_entry)
2983                goto out_ip6_null_entry;
2984        net->ipv6.ip6_prohibit_entry->dst.path =
2985                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2986        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2987        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2988                         ip6_template_metrics, true);
2989
2990        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2991                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
2992                                               GFP_KERNEL);
2993        if (!net->ipv6.ip6_blk_hole_entry)
2994                goto out_ip6_prohibit_entry;
2995        net->ipv6.ip6_blk_hole_entry->dst.path =
2996                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2997        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2998        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2999                         ip6_template_metrics, true);
3000#endif
3001
3002        net->ipv6.sysctl.flush_delay = 0;
3003        net->ipv6.sysctl.ip6_rt_max_size = 4096;
3004        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3005        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3006        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3007        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3008        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3009        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3010
3011        net->ipv6.ip6_rt_gc_expire = 30*HZ;
3012
3013        ret = 0;
3014out:
3015        return ret;
3016
3017#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3018out_ip6_prohibit_entry:
3019        kfree(net->ipv6.ip6_prohibit_entry);
3020out_ip6_null_entry:
3021        kfree(net->ipv6.ip6_null_entry);
3022#endif
3023out_ip6_dst_entries:
3024        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3025out_ip6_dst_ops:
3026        goto out;
3027}
3028
3029static void __net_exit ip6_route_net_exit(struct net *net)
3030{
3031        kfree(net->ipv6.ip6_null_entry);
3032#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3033        kfree(net->ipv6.ip6_prohibit_entry);
3034        kfree(net->ipv6.ip6_blk_hole_entry);
3035#endif
3036        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3037}
3038
3039static int __net_init ip6_route_net_init_late(struct net *net)
3040{
3041#ifdef CONFIG_PROC_FS
3042        proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3043        proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3044#endif
3045        return 0;
3046}
3047
3048static void __net_exit ip6_route_net_exit_late(struct net *net)
3049{
3050#ifdef CONFIG_PROC_FS
3051        remove_proc_entry("ipv6_route", net->proc_net);
3052        remove_proc_entry("rt6_stats", net->proc_net);
3053#endif
3054}
3055
3056static struct pernet_operations ip6_route_net_ops = {
3057        .init = ip6_route_net_init,
3058        .exit = ip6_route_net_exit,
3059};
3060
3061static int __net_init ipv6_inetpeer_init(struct net *net)
3062{
3063        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3064
3065        if (!bp)
3066                return -ENOMEM;
3067        inet_peer_base_init(bp);
3068        net->ipv6.peers = bp;
3069        return 0;
3070}
3071
3072static void __net_exit ipv6_inetpeer_exit(struct net *net)
3073{
3074        struct inet_peer_base *bp = net->ipv6.peers;
3075
3076        net->ipv6.peers = NULL;
3077        inetpeer_invalidate_tree(bp);
3078        kfree(bp);
3079}
3080
3081static struct pernet_operations ipv6_inetpeer_ops = {
3082        .init   =       ipv6_inetpeer_init,
3083        .exit   =       ipv6_inetpeer_exit,
3084};
3085
3086static struct pernet_operations ip6_route_net_late_ops = {
3087        .init = ip6_route_net_init_late,
3088        .exit = ip6_route_net_exit_late,
3089};
3090
3091static struct notifier_block ip6_route_dev_notifier = {
3092        .notifier_call = ip6_route_dev_notify,
3093        .priority = 0,
3094};
3095
3096int __init ip6_route_init(void)
3097{
3098        int ret;
3099
3100        ret = -ENOMEM;
3101        ip6_dst_ops_template.kmem_cachep =
3102                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3103                                  SLAB_HWCACHE_ALIGN, NULL);
3104        if (!ip6_dst_ops_template.kmem_cachep)
3105                goto out;
3106
3107        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3108        if (ret)
3109                goto out_kmem_cache;
3110
3111        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3112        if (ret)
3113                goto out_dst_entries;
3114
3115        ret = register_pernet_subsys(&ip6_route_net_ops);
3116        if (ret)
3117                goto out_register_inetpeer;
3118
3119        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3120
3121        /* Registering of the loopback is done before this portion of code,
3122         * the loopback reference in rt6_info will not be taken, do it
3123         * manually for init_net */
3124        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3125        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3126  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3127        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3128        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3129        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3130        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3131  #endif
3132        ret = fib6_init();
3133        if (ret)
3134                goto out_register_subsys;
3135
3136        ret = xfrm6_init();
3137        if (ret)
3138                goto out_fib6_init;
3139
3140        ret = fib6_rules_init();
3141        if (ret)
3142                goto xfrm6_init;
3143
3144        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3145        if (ret)
3146                goto fib6_rules_init;
3147
3148        ret = -ENOBUFS;
3149        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3150            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3151            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3152                goto out_register_late_subsys;
3153
3154        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3155        if (ret)
3156                goto out_register_late_subsys;
3157
3158out:
3159        return ret;
3160
3161out_register_late_subsys:
3162        unregister_pernet_subsys(&ip6_route_net_late_ops);
3163fib6_rules_init:
3164        fib6_rules_cleanup();
3165xfrm6_init:
3166        xfrm6_fini();
3167out_fib6_init:
3168        fib6_gc_cleanup();
3169out_register_subsys:
3170        unregister_pernet_subsys(&ip6_route_net_ops);
3171out_register_inetpeer:
3172        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3173out_dst_entries:
3174        dst_entries_destroy(&ip6_dst_blackhole_ops);
3175out_kmem_cache:
3176        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3177        goto out;
3178}
3179
3180void ip6_route_cleanup(void)
3181{
3182        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3183        unregister_pernet_subsys(&ip6_route_net_late_ops);
3184        fib6_rules_cleanup();
3185        xfrm6_fini();
3186        fib6_gc_cleanup();
3187        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3188        unregister_pernet_subsys(&ip6_route_net_ops);
3189        dst_entries_destroy(&ip6_dst_blackhole_ops);
3190        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3191}
3192