linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
  73enum rt6_nud_state {
  74        RT6_NUD_FAIL_HARD = -3,
  75        RT6_NUD_FAIL_PROBE = -2,
  76        RT6_NUD_FAIL_DO_RR = -1,
  77        RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int      ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void             ip6_dst_destroy(struct dst_entry *);
  86static void             ip6_dst_ifdown(struct dst_entry *,
  87                                       struct net_device *dev, int how);
  88static int               ip6_dst_gc(struct dst_ops *ops);
  89
  90static int              ip6_pkt_discard(struct sk_buff *skb);
  91static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int              ip6_pkt_prohibit(struct sk_buff *skb);
  93static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void             ip6_link_failure(struct sk_buff *skb);
  95static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96                                           struct sk_buff *skb, u32 mtu);
  97static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98                                        struct sk_buff *skb);
  99static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104                                           const struct in6_addr *prefix, int prefixlen,
 105                                           const struct in6_addr *gwaddr, int ifindex,
 106                                           unsigned int pref);
 107static struct rt6_info *rt6_get_route_info(struct net *net,
 108                                           const struct in6_addr *prefix, int prefixlen,
 109                                           const struct in6_addr *gwaddr, int ifindex);
 110#endif
 111
 112struct uncached_list {
 113        spinlock_t              lock;
 114        struct list_head        head;
 115};
 116
 117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 118
 119static void rt6_uncached_list_add(struct rt6_info *rt)
 120{
 121        struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 122
 123        rt->dst.flags |= DST_NOCACHE;
 124        rt->rt6i_uncached_list = ul;
 125
 126        spin_lock_bh(&ul->lock);
 127        list_add_tail(&rt->rt6i_uncached, &ul->head);
 128        spin_unlock_bh(&ul->lock);
 129}
 130
 131static void rt6_uncached_list_del(struct rt6_info *rt)
 132{
 133        if (!list_empty(&rt->rt6i_uncached)) {
 134                struct uncached_list *ul = rt->rt6i_uncached_list;
 135
 136                spin_lock_bh(&ul->lock);
 137                list_del(&rt->rt6i_uncached);
 138                spin_unlock_bh(&ul->lock);
 139        }
 140}
 141
 142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 143{
 144        struct net_device *loopback_dev = net->loopback_dev;
 145        int cpu;
 146
 147        if (dev == loopback_dev)
 148                return;
 149
 150        for_each_possible_cpu(cpu) {
 151                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 152                struct rt6_info *rt;
 153
 154                spin_lock_bh(&ul->lock);
 155                list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 156                        struct inet6_dev *rt_idev = rt->rt6i_idev;
 157                        struct net_device *rt_dev = rt->dst.dev;
 158
 159                        if (rt_idev->dev == dev) {
 160                                rt->rt6i_idev = in6_dev_get(loopback_dev);
 161                                in6_dev_put(rt_idev);
 162                        }
 163
 164                        if (rt_dev == dev) {
 165                                rt->dst.dev = loopback_dev;
 166                                dev_hold(rt->dst.dev);
 167                                dev_put(rt_dev);
 168                        }
 169                }
 170                spin_unlock_bh(&ul->lock);
 171        }
 172}
 173
 174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 175{
 176        return dst_metrics_write_ptr(rt->dst.from);
 177}
 178
 179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 180{
 181        struct rt6_info *rt = (struct rt6_info *)dst;
 182
 183        if (rt->rt6i_flags & RTF_PCPU)
 184                return rt6_pcpu_cow_metrics(rt);
 185        else if (rt->rt6i_flags & RTF_CACHE)
 186                return NULL;
 187        else
 188                return dst_cow_metrics_generic(dst, old);
 189}
 190
 191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 192                                             struct sk_buff *skb,
 193                                             const void *daddr)
 194{
 195        struct in6_addr *p = &rt->rt6i_gateway;
 196
 197        if (!ipv6_addr_any(p))
 198                return (const void *) p;
 199        else if (skb)
 200                return &ipv6_hdr(skb)->daddr;
 201        return daddr;
 202}
 203
 204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 205                                          struct sk_buff *skb,
 206                                          const void *daddr)
 207{
 208        struct rt6_info *rt = (struct rt6_info *) dst;
 209        struct neighbour *n;
 210
 211        daddr = choose_neigh_daddr(rt, skb, daddr);
 212        n = __ipv6_neigh_lookup(dst->dev, daddr);
 213        if (n)
 214                return n;
 215        return neigh_create(&nd_tbl, daddr, dst->dev);
 216}
 217
 218static struct dst_ops ip6_dst_ops_template = {
 219        .family                 =       AF_INET6,
 220        .gc                     =       ip6_dst_gc,
 221        .gc_thresh              =       1024,
 222        .check                  =       ip6_dst_check,
 223        .default_advmss         =       ip6_default_advmss,
 224        .mtu                    =       ip6_mtu,
 225        .cow_metrics            =       ipv6_cow_metrics,
 226        .destroy                =       ip6_dst_destroy,
 227        .ifdown                 =       ip6_dst_ifdown,
 228        .negative_advice        =       ip6_negative_advice,
 229        .link_failure           =       ip6_link_failure,
 230        .update_pmtu            =       ip6_rt_update_pmtu,
 231        .redirect               =       rt6_do_redirect,
 232        .local_out              =       __ip6_local_out,
 233        .neigh_lookup           =       ip6_neigh_lookup,
 234};
 235
 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 237{
 238        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 239
 240        return mtu ? : dst->dev->mtu;
 241}
 242
 243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 244                                         struct sk_buff *skb, u32 mtu)
 245{
 246}
 247
 248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 249                                      struct sk_buff *skb)
 250{
 251}
 252
 253static struct dst_ops ip6_dst_blackhole_ops = {
 254        .family                 =       AF_INET6,
 255        .destroy                =       ip6_dst_destroy,
 256        .check                  =       ip6_dst_check,
 257        .mtu                    =       ip6_blackhole_mtu,
 258        .default_advmss         =       ip6_default_advmss,
 259        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 260        .redirect               =       ip6_rt_blackhole_redirect,
 261        .cow_metrics            =       dst_cow_metrics_generic,
 262        .neigh_lookup           =       ip6_neigh_lookup,
 263};
 264
 265static const u32 ip6_template_metrics[RTAX_MAX] = {
 266        [RTAX_HOPLIMIT - 1] = 0,
 267};
 268
 269static const struct rt6_info ip6_null_entry_template = {
 270        .dst = {
 271                .__refcnt       = ATOMIC_INIT(1),
 272                .__use          = 1,
 273                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 274                .error          = -ENETUNREACH,
 275                .input          = ip6_pkt_discard,
 276                .output         = ip6_pkt_discard_out,
 277        },
 278        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 279        .rt6i_protocol  = RTPROT_KERNEL,
 280        .rt6i_metric    = ~(u32) 0,
 281        .rt6i_ref       = ATOMIC_INIT(1),
 282};
 283
 284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 285
 286static const struct rt6_info ip6_prohibit_entry_template = {
 287        .dst = {
 288                .__refcnt       = ATOMIC_INIT(1),
 289                .__use          = 1,
 290                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 291                .error          = -EACCES,
 292                .input          = ip6_pkt_prohibit,
 293                .output         = ip6_pkt_prohibit_out,
 294        },
 295        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 296        .rt6i_protocol  = RTPROT_KERNEL,
 297        .rt6i_metric    = ~(u32) 0,
 298        .rt6i_ref       = ATOMIC_INIT(1),
 299};
 300
 301static const struct rt6_info ip6_blk_hole_entry_template = {
 302        .dst = {
 303                .__refcnt       = ATOMIC_INIT(1),
 304                .__use          = 1,
 305                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 306                .error          = -EINVAL,
 307                .input          = dst_discard,
 308                .output         = dst_discard_out,
 309        },
 310        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 311        .rt6i_protocol  = RTPROT_KERNEL,
 312        .rt6i_metric    = ~(u32) 0,
 313        .rt6i_ref       = ATOMIC_INIT(1),
 314};
 315
 316#endif
 317
 318static void rt6_info_init(struct rt6_info *rt)
 319{
 320        struct dst_entry *dst = &rt->dst;
 321
 322        memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 323        INIT_LIST_HEAD(&rt->rt6i_siblings);
 324        INIT_LIST_HEAD(&rt->rt6i_uncached);
 325}
 326
 327/* allocate dst with ip6_dst_ops */
 328static struct rt6_info *__ip6_dst_alloc(struct net *net,
 329                                        struct net_device *dev,
 330                                        int flags)
 331{
 332        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 333                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 334
 335        if (rt)
 336                rt6_info_init(rt);
 337
 338        return rt;
 339}
 340
 341struct rt6_info *ip6_dst_alloc(struct net *net,
 342                               struct net_device *dev,
 343                               int flags)
 344{
 345        struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 346
 347        if (rt) {
 348                rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 349                if (rt->rt6i_pcpu) {
 350                        int cpu;
 351
 352                        for_each_possible_cpu(cpu) {
 353                                struct rt6_info **p;
 354
 355                                p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 356                                /* no one shares rt */
 357                                *p =  NULL;
 358                        }
 359                } else {
 360                        dst_destroy((struct dst_entry *)rt);
 361                        return NULL;
 362                }
 363        }
 364
 365        return rt;
 366}
 367EXPORT_SYMBOL(ip6_dst_alloc);
 368
 369static void ip6_dst_destroy(struct dst_entry *dst)
 370{
 371        struct rt6_info *rt = (struct rt6_info *)dst;
 372        struct dst_entry *from = dst->from;
 373        struct inet6_dev *idev;
 374
 375        dst_destroy_metrics_generic(dst);
 376        free_percpu(rt->rt6i_pcpu);
 377        rt6_uncached_list_del(rt);
 378
 379        idev = rt->rt6i_idev;
 380        if (idev) {
 381                rt->rt6i_idev = NULL;
 382                in6_dev_put(idev);
 383        }
 384
 385        dst->from = NULL;
 386        dst_release(from);
 387}
 388
 389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 390                           int how)
 391{
 392        struct rt6_info *rt = (struct rt6_info *)dst;
 393        struct inet6_dev *idev = rt->rt6i_idev;
 394        struct net_device *loopback_dev =
 395                dev_net(dev)->loopback_dev;
 396
 397        if (dev != loopback_dev) {
 398                if (idev && idev->dev == dev) {
 399                        struct inet6_dev *loopback_idev =
 400                                in6_dev_get(loopback_dev);
 401                        if (loopback_idev) {
 402                                rt->rt6i_idev = loopback_idev;
 403                                in6_dev_put(idev);
 404                        }
 405                }
 406        }
 407}
 408
 409static bool __rt6_check_expired(const struct rt6_info *rt)
 410{
 411        if (rt->rt6i_flags & RTF_EXPIRES)
 412                return time_after(jiffies, rt->dst.expires);
 413        else
 414                return false;
 415}
 416
 417static bool rt6_check_expired(const struct rt6_info *rt)
 418{
 419        if (rt->rt6i_flags & RTF_EXPIRES) {
 420                if (time_after(jiffies, rt->dst.expires))
 421                        return true;
 422        } else if (rt->dst.from) {
 423                return rt6_check_expired((struct rt6_info *) rt->dst.from);
 424        }
 425        return false;
 426}
 427
 428/* Multipath route selection:
 429 *   Hash based function using packet header and flowlabel.
 430 * Adapted from fib_info_hashfn()
 431 */
 432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 433                               const struct flowi6 *fl6)
 434{
 435        return get_hash_from_flowi6(fl6) % candidate_count;
 436}
 437
 438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 439                                             struct flowi6 *fl6, int oif,
 440                                             int strict)
 441{
 442        struct rt6_info *sibling, *next_sibling;
 443        int route_choosen;
 444
 445        route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 446        /* Don't change the route, if route_choosen == 0
 447         * (siblings does not include ourself)
 448         */
 449        if (route_choosen)
 450                list_for_each_entry_safe(sibling, next_sibling,
 451                                &match->rt6i_siblings, rt6i_siblings) {
 452                        route_choosen--;
 453                        if (route_choosen == 0) {
 454                                if (rt6_score_route(sibling, oif, strict) < 0)
 455                                        break;
 456                                match = sibling;
 457                                break;
 458                        }
 459                }
 460        return match;
 461}
 462
 463/*
 464 *      Route lookup. Any table->tb6_lock is implied.
 465 */
 466
 467static inline struct rt6_info *rt6_device_match(struct net *net,
 468                                                    struct rt6_info *rt,
 469                                                    const struct in6_addr *saddr,
 470                                                    int oif,
 471                                                    int flags)
 472{
 473        struct rt6_info *local = NULL;
 474        struct rt6_info *sprt;
 475
 476        if (!oif && ipv6_addr_any(saddr))
 477                goto out;
 478
 479        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 480                struct net_device *dev = sprt->dst.dev;
 481
 482                if (oif) {
 483                        if (dev->ifindex == oif)
 484                                return sprt;
 485                        if (dev->flags & IFF_LOOPBACK) {
 486                                if (!sprt->rt6i_idev ||
 487                                    sprt->rt6i_idev->dev->ifindex != oif) {
 488                                        if (flags & RT6_LOOKUP_F_IFACE)
 489                                                continue;
 490                                        if (local &&
 491                                            local->rt6i_idev->dev->ifindex == oif)
 492                                                continue;
 493                                }
 494                                local = sprt;
 495                        }
 496                } else {
 497                        if (ipv6_chk_addr(net, saddr, dev,
 498                                          flags & RT6_LOOKUP_F_IFACE))
 499                                return sprt;
 500                }
 501        }
 502
 503        if (oif) {
 504                if (local)
 505                        return local;
 506
 507                if (flags & RT6_LOOKUP_F_IFACE)
 508                        return net->ipv6.ip6_null_entry;
 509        }
 510out:
 511        return rt;
 512}
 513
 514#ifdef CONFIG_IPV6_ROUTER_PREF
 515struct __rt6_probe_work {
 516        struct work_struct work;
 517        struct in6_addr target;
 518        struct net_device *dev;
 519};
 520
 521static void rt6_probe_deferred(struct work_struct *w)
 522{
 523        struct in6_addr mcaddr;
 524        struct __rt6_probe_work *work =
 525                container_of(w, struct __rt6_probe_work, work);
 526
 527        addrconf_addr_solict_mult(&work->target, &mcaddr);
 528        ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 529        dev_put(work->dev);
 530        kfree(work);
 531}
 532
 533static void rt6_probe(struct rt6_info *rt)
 534{
 535        struct __rt6_probe_work *work;
 536        struct neighbour *neigh;
 537        /*
 538         * Okay, this does not seem to be appropriate
 539         * for now, however, we need to check if it
 540         * is really so; aka Router Reachability Probing.
 541         *
 542         * Router Reachability Probe MUST be rate-limited
 543         * to no more than one per minute.
 544         */
 545        if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 546                return;
 547        rcu_read_lock_bh();
 548        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 549        if (neigh) {
 550                if (neigh->nud_state & NUD_VALID)
 551                        goto out;
 552
 553                work = NULL;
 554                write_lock(&neigh->lock);
 555                if (!(neigh->nud_state & NUD_VALID) &&
 556                    time_after(jiffies,
 557                               neigh->updated +
 558                               rt->rt6i_idev->cnf.rtr_probe_interval)) {
 559                        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 560                        if (work)
 561                                __neigh_set_probe_once(neigh);
 562                }
 563                write_unlock(&neigh->lock);
 564        } else {
 565                work = kmalloc(sizeof(*work), GFP_ATOMIC);
 566        }
 567
 568        if (work) {
 569                INIT_WORK(&work->work, rt6_probe_deferred);
 570                work->target = rt->rt6i_gateway;
 571                dev_hold(rt->dst.dev);
 572                work->dev = rt->dst.dev;
 573                schedule_work(&work->work);
 574        }
 575
 576out:
 577        rcu_read_unlock_bh();
 578}
 579#else
 580static inline void rt6_probe(struct rt6_info *rt)
 581{
 582}
 583#endif
 584
 585/*
 586 * Default Router Selection (RFC 2461 6.3.6)
 587 */
 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 589{
 590        struct net_device *dev = rt->dst.dev;
 591        if (!oif || dev->ifindex == oif)
 592                return 2;
 593        if ((dev->flags & IFF_LOOPBACK) &&
 594            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 595                return 1;
 596        return 0;
 597}
 598
 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 600{
 601        struct neighbour *neigh;
 602        enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 603
 604        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 605            !(rt->rt6i_flags & RTF_GATEWAY))
 606                return RT6_NUD_SUCCEED;
 607
 608        rcu_read_lock_bh();
 609        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 610        if (neigh) {
 611                read_lock(&neigh->lock);
 612                if (neigh->nud_state & NUD_VALID)
 613                        ret = RT6_NUD_SUCCEED;
 614#ifdef CONFIG_IPV6_ROUTER_PREF
 615                else if (!(neigh->nud_state & NUD_FAILED))
 616                        ret = RT6_NUD_SUCCEED;
 617                else
 618                        ret = RT6_NUD_FAIL_PROBE;
 619#endif
 620                read_unlock(&neigh->lock);
 621        } else {
 622                ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 623                      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 624        }
 625        rcu_read_unlock_bh();
 626
 627        return ret;
 628}
 629
 630static int rt6_score_route(struct rt6_info *rt, int oif,
 631                           int strict)
 632{
 633        int m;
 634
 635        m = rt6_check_dev(rt, oif);
 636        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 637                return RT6_NUD_FAIL_HARD;
 638#ifdef CONFIG_IPV6_ROUTER_PREF
 639        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 640#endif
 641        if (strict & RT6_LOOKUP_F_REACHABLE) {
 642                int n = rt6_check_neigh(rt);
 643                if (n < 0)
 644                        return n;
 645        }
 646        return m;
 647}
 648
 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 650                                   int *mpri, struct rt6_info *match,
 651                                   bool *do_rr)
 652{
 653        int m;
 654        bool match_do_rr = false;
 655        struct inet6_dev *idev = rt->rt6i_idev;
 656        struct net_device *dev = rt->dst.dev;
 657
 658        if (dev && !netif_carrier_ok(dev) &&
 659            idev->cnf.ignore_routes_with_linkdown)
 660                goto out;
 661
 662        if (rt6_check_expired(rt))
 663                goto out;
 664
 665        m = rt6_score_route(rt, oif, strict);
 666        if (m == RT6_NUD_FAIL_DO_RR) {
 667                match_do_rr = true;
 668                m = 0; /* lowest valid score */
 669        } else if (m == RT6_NUD_FAIL_HARD) {
 670                goto out;
 671        }
 672
 673        if (strict & RT6_LOOKUP_F_REACHABLE)
 674                rt6_probe(rt);
 675
 676        /* note that m can be RT6_NUD_FAIL_PROBE at this point */
 677        if (m > *mpri) {
 678                *do_rr = match_do_rr;
 679                *mpri = m;
 680                match = rt;
 681        }
 682out:
 683        return match;
 684}
 685
 686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 687                                     struct rt6_info *rr_head,
 688                                     u32 metric, int oif, int strict,
 689                                     bool *do_rr)
 690{
 691        struct rt6_info *rt, *match, *cont;
 692        int mpri = -1;
 693
 694        match = NULL;
 695        cont = NULL;
 696        for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 697                if (rt->rt6i_metric != metric) {
 698                        cont = rt;
 699                        break;
 700                }
 701
 702                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 703        }
 704
 705        for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 706                if (rt->rt6i_metric != metric) {
 707                        cont = rt;
 708                        break;
 709                }
 710
 711                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 712        }
 713
 714        if (match || !cont)
 715                return match;
 716
 717        for (rt = cont; rt; rt = rt->dst.rt6_next)
 718                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 719
 720        return match;
 721}
 722
 723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 724{
 725        struct rt6_info *match, *rt0;
 726        struct net *net;
 727        bool do_rr = false;
 728
 729        rt0 = fn->rr_ptr;
 730        if (!rt0)
 731                fn->rr_ptr = rt0 = fn->leaf;
 732
 733        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 734                             &do_rr);
 735
 736        if (do_rr) {
 737                struct rt6_info *next = rt0->dst.rt6_next;
 738
 739                /* no entries matched; do round-robin */
 740                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 741                        next = fn->leaf;
 742
 743                if (next != rt0)
 744                        fn->rr_ptr = next;
 745        }
 746
 747        net = dev_net(rt0->dst.dev);
 748        return match ? match : net->ipv6.ip6_null_entry;
 749}
 750
 751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 752{
 753        return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 754}
 755
 756#ifdef CONFIG_IPV6_ROUTE_INFO
 757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 758                  const struct in6_addr *gwaddr)
 759{
 760        struct net *net = dev_net(dev);
 761        struct route_info *rinfo = (struct route_info *) opt;
 762        struct in6_addr prefix_buf, *prefix;
 763        unsigned int pref;
 764        unsigned long lifetime;
 765        struct rt6_info *rt;
 766
 767        if (len < sizeof(struct route_info)) {
 768                return -EINVAL;
 769        }
 770
 771        /* Sanity check for prefix_len and length */
 772        if (rinfo->length > 3) {
 773                return -EINVAL;
 774        } else if (rinfo->prefix_len > 128) {
 775                return -EINVAL;
 776        } else if (rinfo->prefix_len > 64) {
 777                if (rinfo->length < 2) {
 778                        return -EINVAL;
 779                }
 780        } else if (rinfo->prefix_len > 0) {
 781                if (rinfo->length < 1) {
 782                        return -EINVAL;
 783                }
 784        }
 785
 786        pref = rinfo->route_pref;
 787        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 788                return -EINVAL;
 789
 790        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 791
 792        if (rinfo->length == 3)
 793                prefix = (struct in6_addr *)rinfo->prefix;
 794        else {
 795                /* this function is safe */
 796                ipv6_addr_prefix(&prefix_buf,
 797                                 (struct in6_addr *)rinfo->prefix,
 798                                 rinfo->prefix_len);
 799                prefix = &prefix_buf;
 800        }
 801
 802        if (rinfo->prefix_len == 0)
 803                rt = rt6_get_dflt_router(gwaddr, dev);
 804        else
 805                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 806                                        gwaddr, dev->ifindex);
 807
 808        if (rt && !lifetime) {
 809                ip6_del_rt(rt);
 810                rt = NULL;
 811        }
 812
 813        if (!rt && lifetime)
 814                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 815                                        pref);
 816        else if (rt)
 817                rt->rt6i_flags = RTF_ROUTEINFO |
 818                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 819
 820        if (rt) {
 821                if (!addrconf_finite_timeout(lifetime))
 822                        rt6_clean_expires(rt);
 823                else
 824                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 825
 826                ip6_rt_put(rt);
 827        }
 828        return 0;
 829}
 830#endif
 831
 832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 833                                        struct in6_addr *saddr)
 834{
 835        struct fib6_node *pn;
 836        while (1) {
 837                if (fn->fn_flags & RTN_TL_ROOT)
 838                        return NULL;
 839                pn = fn->parent;
 840                if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 841                        fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 842                else
 843                        fn = pn;
 844                if (fn->fn_flags & RTN_RTINFO)
 845                        return fn;
 846        }
 847}
 848
 849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 850                                             struct fib6_table *table,
 851                                             struct flowi6 *fl6, int flags)
 852{
 853        struct fib6_node *fn;
 854        struct rt6_info *rt;
 855
 856        read_lock_bh(&table->tb6_lock);
 857        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 858restart:
 859        rt = fn->leaf;
 860        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 861        if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 862                rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 863        if (rt == net->ipv6.ip6_null_entry) {
 864                fn = fib6_backtrack(fn, &fl6->saddr);
 865                if (fn)
 866                        goto restart;
 867        }
 868        dst_use(&rt->dst, jiffies);
 869        read_unlock_bh(&table->tb6_lock);
 870
 871        trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 872
 873        return rt;
 874
 875}
 876
 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 878                                    int flags)
 879{
 880        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 881}
 882EXPORT_SYMBOL_GPL(ip6_route_lookup);
 883
 884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 885                            const struct in6_addr *saddr, int oif, int strict)
 886{
 887        struct flowi6 fl6 = {
 888                .flowi6_oif = oif,
 889                .daddr = *daddr,
 890        };
 891        struct dst_entry *dst;
 892        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 893
 894        if (saddr) {
 895                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 896                flags |= RT6_LOOKUP_F_HAS_SADDR;
 897        }
 898
 899        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 900        if (dst->error == 0)
 901                return (struct rt6_info *) dst;
 902
 903        dst_release(dst);
 904
 905        return NULL;
 906}
 907EXPORT_SYMBOL(rt6_lookup);
 908
 909/* ip6_ins_rt is called with FREE table->tb6_lock.
 910   It takes new route entry, the addition fails by any reason the
 911   route is freed. In any case, if caller does not hold it, it may
 912   be destroyed.
 913 */
 914
 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 916                        struct mx6_config *mxc)
 917{
 918        int err;
 919        struct fib6_table *table;
 920
 921        table = rt->rt6i_table;
 922        write_lock_bh(&table->tb6_lock);
 923        err = fib6_add(&table->tb6_root, rt, info, mxc);
 924        write_unlock_bh(&table->tb6_lock);
 925
 926        return err;
 927}
 928
 929int ip6_ins_rt(struct rt6_info *rt)
 930{
 931        struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
 932        struct mx6_config mxc = { .mx = NULL, };
 933
 934        return __ip6_ins_rt(rt, &info, &mxc);
 935}
 936
 937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 938                                           const struct in6_addr *daddr,
 939                                           const struct in6_addr *saddr)
 940{
 941        struct rt6_info *rt;
 942
 943        /*
 944         *      Clone the route.
 945         */
 946
 947        if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 948                ort = (struct rt6_info *)ort->dst.from;
 949
 950        rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 951
 952        if (!rt)
 953                return NULL;
 954
 955        ip6_rt_copy_init(rt, ort);
 956        rt->rt6i_flags |= RTF_CACHE;
 957        rt->rt6i_metric = 0;
 958        rt->dst.flags |= DST_HOST;
 959        rt->rt6i_dst.addr = *daddr;
 960        rt->rt6i_dst.plen = 128;
 961
 962        if (!rt6_is_gw_or_nonexthop(ort)) {
 963                if (ort->rt6i_dst.plen != 128 &&
 964                    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 965                        rt->rt6i_flags |= RTF_ANYCAST;
 966#ifdef CONFIG_IPV6_SUBTREES
 967                if (rt->rt6i_src.plen && saddr) {
 968                        rt->rt6i_src.addr = *saddr;
 969                        rt->rt6i_src.plen = 128;
 970                }
 971#endif
 972        }
 973
 974        return rt;
 975}
 976
 977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 978{
 979        struct rt6_info *pcpu_rt;
 980
 981        pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 982                                  rt->dst.dev, rt->dst.flags);
 983
 984        if (!pcpu_rt)
 985                return NULL;
 986        ip6_rt_copy_init(pcpu_rt, rt);
 987        pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 988        pcpu_rt->rt6i_flags |= RTF_PCPU;
 989        return pcpu_rt;
 990}
 991
 992/* It should be called with read_lock_bh(&tb6_lock) acquired */
 993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 994{
 995        struct rt6_info *pcpu_rt, **p;
 996
 997        p = this_cpu_ptr(rt->rt6i_pcpu);
 998        pcpu_rt = *p;
 999
1000        if (pcpu_rt) {
1001                dst_hold(&pcpu_rt->dst);
1002                rt6_dst_from_metrics_check(pcpu_rt);
1003        }
1004        return pcpu_rt;
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1008{
1009        struct fib6_table *table = rt->rt6i_table;
1010        struct rt6_info *pcpu_rt, *prev, **p;
1011
1012        pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013        if (!pcpu_rt) {
1014                struct net *net = dev_net(rt->dst.dev);
1015
1016                dst_hold(&net->ipv6.ip6_null_entry->dst);
1017                return net->ipv6.ip6_null_entry;
1018        }
1019
1020        read_lock_bh(&table->tb6_lock);
1021        if (rt->rt6i_pcpu) {
1022                p = this_cpu_ptr(rt->rt6i_pcpu);
1023                prev = cmpxchg(p, NULL, pcpu_rt);
1024                if (prev) {
1025                        /* If someone did it before us, return prev instead */
1026                        dst_destroy(&pcpu_rt->dst);
1027                        pcpu_rt = prev;
1028                }
1029        } else {
1030                /* rt has been removed from the fib6 tree
1031                 * before we have a chance to acquire the read_lock.
1032                 * In this case, don't brother to create a pcpu rt
1033                 * since rt is going away anyway.  The next
1034                 * dst_check() will trigger a re-lookup.
1035                 */
1036                dst_destroy(&pcpu_rt->dst);
1037                pcpu_rt = rt;
1038        }
1039        dst_hold(&pcpu_rt->dst);
1040        rt6_dst_from_metrics_check(pcpu_rt);
1041        read_unlock_bh(&table->tb6_lock);
1042        return pcpu_rt;
1043}
1044
1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1046                                      struct flowi6 *fl6, int flags)
1047{
1048        struct fib6_node *fn, *saved_fn;
1049        struct rt6_info *rt;
1050        int strict = 0;
1051
1052        strict |= flags & RT6_LOOKUP_F_IFACE;
1053        if (net->ipv6.devconf_all->forwarding == 0)
1054                strict |= RT6_LOOKUP_F_REACHABLE;
1055
1056        read_lock_bh(&table->tb6_lock);
1057
1058        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059        saved_fn = fn;
1060
1061        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062                oif = 0;
1063
1064redo_rt6_select:
1065        rt = rt6_select(fn, oif, strict);
1066        if (rt->rt6i_nsiblings)
1067                rt = rt6_multipath_select(rt, fl6, oif, strict);
1068        if (rt == net->ipv6.ip6_null_entry) {
1069                fn = fib6_backtrack(fn, &fl6->saddr);
1070                if (fn)
1071                        goto redo_rt6_select;
1072                else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073                        /* also consider unreachable route */
1074                        strict &= ~RT6_LOOKUP_F_REACHABLE;
1075                        fn = saved_fn;
1076                        goto redo_rt6_select;
1077                }
1078        }
1079
1080
1081        if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082                dst_use(&rt->dst, jiffies);
1083                read_unlock_bh(&table->tb6_lock);
1084
1085                rt6_dst_from_metrics_check(rt);
1086
1087                trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088                return rt;
1089        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090                            !(rt->rt6i_flags & RTF_GATEWAY))) {
1091                /* Create a RTF_CACHE clone which will not be
1092                 * owned by the fib6 tree.  It is for the special case where
1093                 * the daddr in the skb during the neighbor look-up is different
1094                 * from the fl6->daddr used to look-up route here.
1095                 */
1096
1097                struct rt6_info *uncached_rt;
1098
1099                dst_use(&rt->dst, jiffies);
1100                read_unlock_bh(&table->tb6_lock);
1101
1102                uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103                dst_release(&rt->dst);
1104
1105                if (uncached_rt)
1106                        rt6_uncached_list_add(uncached_rt);
1107                else
1108                        uncached_rt = net->ipv6.ip6_null_entry;
1109
1110                dst_hold(&uncached_rt->dst);
1111
1112                trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113                return uncached_rt;
1114
1115        } else {
1116                /* Get a percpu copy */
1117
1118                struct rt6_info *pcpu_rt;
1119
1120                rt->dst.lastuse = jiffies;
1121                rt->dst.__use++;
1122                pcpu_rt = rt6_get_pcpu_route(rt);
1123
1124                if (pcpu_rt) {
1125                        read_unlock_bh(&table->tb6_lock);
1126                } else {
1127                        /* We have to do the read_unlock first
1128                         * because rt6_make_pcpu_route() may trigger
1129                         * ip6_dst_gc() which will take the write_lock.
1130                         */
1131                        dst_hold(&rt->dst);
1132                        read_unlock_bh(&table->tb6_lock);
1133                        pcpu_rt = rt6_make_pcpu_route(rt);
1134                        dst_release(&rt->dst);
1135                }
1136
1137                trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138                return pcpu_rt;
1139
1140        }
1141}
1142
1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1144                                            struct flowi6 *fl6, int flags)
1145{
1146        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1147}
1148
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150                                                struct net_device *dev,
1151                                                struct flowi6 *fl6, int flags)
1152{
1153        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154                flags |= RT6_LOOKUP_F_IFACE;
1155
1156        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
1158
1159void ip6_route_input(struct sk_buff *skb)
1160{
1161        const struct ipv6hdr *iph = ipv6_hdr(skb);
1162        struct net *net = dev_net(skb->dev);
1163        int flags = RT6_LOOKUP_F_HAS_SADDR;
1164        struct ip_tunnel_info *tun_info;
1165        struct flowi6 fl6 = {
1166                .flowi6_iif = l3mdev_fib_oif(skb->dev),
1167                .daddr = iph->daddr,
1168                .saddr = iph->saddr,
1169                .flowlabel = ip6_flowinfo(iph),
1170                .flowi6_mark = skb->mark,
1171                .flowi6_proto = iph->nexthdr,
1172        };
1173
1174        tun_info = skb_tunnel_info(skb);
1175        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1176                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1177        skb_dst_drop(skb);
1178        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1179}
1180
1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1182                                             struct flowi6 *fl6, int flags)
1183{
1184        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1185}
1186
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188                                         struct flowi6 *fl6, int flags)
1189{
1190        struct dst_entry *dst;
1191        bool any_src;
1192
1193        dst = l3mdev_rt6_dst_by_oif(net, fl6);
1194        if (dst)
1195                return dst;
1196
1197        fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
1199        any_src = ipv6_addr_any(&fl6->saddr);
1200        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201            (fl6->flowi6_oif && any_src))
1202                flags |= RT6_LOOKUP_F_IFACE;
1203
1204        if (!any_src)
1205                flags |= RT6_LOOKUP_F_HAS_SADDR;
1206        else if (sk)
1207                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210}
1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1212
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{
1215        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1216        struct dst_entry *new = NULL;
1217
1218        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1219        if (rt) {
1220                rt6_info_init(rt);
1221
1222                new = &rt->dst;
1223                new->__use = 1;
1224                new->input = dst_discard;
1225                new->output = dst_discard_out;
1226
1227                dst_copy_metrics(new, &ort->dst);
1228                rt->rt6i_idev = ort->rt6i_idev;
1229                if (rt->rt6i_idev)
1230                        in6_dev_hold(rt->rt6i_idev);
1231
1232                rt->rt6i_gateway = ort->rt6i_gateway;
1233                rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234                rt->rt6i_metric = 0;
1235
1236                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241                dst_free(new);
1242        }
1243
1244        dst_release(dst_orig);
1245        return new ? new : ERR_PTR(-ENOMEM);
1246}
1247
1248/*
1249 *      Destination cache support functions
1250 */
1251
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254        if (rt->dst.from &&
1255            dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256                dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261        if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262                return NULL;
1263
1264        if (rt6_check_expired(rt))
1265                return NULL;
1266
1267        return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
1272        if (!__rt6_check_expired(rt) &&
1273            rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274            rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275                return &rt->dst;
1276        else
1277                return NULL;
1278}
1279
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282        struct rt6_info *rt;
1283
1284        rt = (struct rt6_info *) dst;
1285
1286        /* All IPV6 dsts are created with ->obsolete set to the value
1287         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288         * into this function always.
1289         */
1290
1291        rt6_dst_from_metrics_check(rt);
1292
1293        if (rt->rt6i_flags & RTF_PCPU ||
1294            (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295                return rt6_dst_from_check(rt, cookie);
1296        else
1297                return rt6_check(rt, cookie);
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302        struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304        if (rt) {
1305                if (rt->rt6i_flags & RTF_CACHE) {
1306                        if (rt6_check_expired(rt)) {
1307                                ip6_del_rt(rt);
1308                                dst = NULL;
1309                        }
1310                } else {
1311                        dst_release(dst);
1312                        dst = NULL;
1313                }
1314        }
1315        return dst;
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320        struct rt6_info *rt;
1321
1322        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324        rt = (struct rt6_info *) skb_dst(skb);
1325        if (rt) {
1326                if (rt->rt6i_flags & RTF_CACHE) {
1327                        dst_hold(&rt->dst);
1328                        ip6_del_rt(rt);
1329                } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330                        rt->rt6i_node->fn_sernum = -1;
1331                }
1332        }
1333}
1334
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337        struct net *net = dev_net(rt->dst.dev);
1338
1339        rt->rt6i_flags |= RTF_MODIFIED;
1340        rt->rt6i_pmtu = mtu;
1341        rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346        return !(rt->rt6i_flags & RTF_CACHE) &&
1347                (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351                                 const struct ipv6hdr *iph, u32 mtu)
1352{
1353        struct rt6_info *rt6 = (struct rt6_info *)dst;
1354
1355        if (rt6->rt6i_flags & RTF_LOCAL)
1356                return;
1357
1358        dst_confirm(dst);
1359        mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360        if (mtu >= dst_mtu(dst))
1361                return;
1362
1363        if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364                rt6_do_update_pmtu(rt6, mtu);
1365        } else {
1366                const struct in6_addr *daddr, *saddr;
1367                struct rt6_info *nrt6;
1368
1369                if (iph) {
1370                        daddr = &iph->daddr;
1371                        saddr = &iph->saddr;
1372                } else if (sk) {
1373                        daddr = &sk->sk_v6_daddr;
1374                        saddr = &inet6_sk(sk)->saddr;
1375                } else {
1376                        return;
1377                }
1378                nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379                if (nrt6) {
1380                        rt6_do_update_pmtu(nrt6, mtu);
1381
1382                        /* ip6_ins_rt(nrt6) will bump the
1383                         * rt6->rt6i_node->fn_sernum
1384                         * which will fail the next rt6_check() and
1385                         * invalidate the sk->sk_dst_cache.
1386                         */
1387                        ip6_ins_rt(nrt6);
1388                }
1389        }
1390}
1391
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393                               struct sk_buff *skb, u32 mtu)
1394{
1395        __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399                     int oif, u32 mark)
1400{
1401        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402        struct dst_entry *dst;
1403        struct flowi6 fl6;
1404
1405        memset(&fl6, 0, sizeof(fl6));
1406        fl6.flowi6_oif = oif;
1407        fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408        fl6.daddr = iph->daddr;
1409        fl6.saddr = iph->saddr;
1410        fl6.flowlabel = ip6_flowinfo(iph);
1411
1412        dst = ip6_route_output(net, NULL, &fl6);
1413        if (!dst->error)
1414                __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415        dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
1421        struct dst_entry *dst;
1422
1423        ip6_update_pmtu(skb, sock_net(sk), mtu,
1424                        sk->sk_bound_dev_if, sk->sk_mark);
1425
1426        dst = __sk_dst_get(sk);
1427        if (!dst || !dst->obsolete ||
1428            dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429                return;
1430
1431        bh_lock_sock(sk);
1432        if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433                ip6_datagram_dst_update(sk, false);
1434        bh_unlock_sock(sk);
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440        struct flowi6 fl6;
1441        struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445                                             struct fib6_table *table,
1446                                             struct flowi6 *fl6,
1447                                             int flags)
1448{
1449        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450        struct rt6_info *rt;
1451        struct fib6_node *fn;
1452
1453        /* Get the "current" route for this destination and
1454         * check if the redirect has come from approriate router.
1455         *
1456         * RFC 4861 specifies that redirects should only be
1457         * accepted if they come from the nexthop to the target.
1458         * Due to the way the routes are chosen, this notion
1459         * is a bit fuzzy and one might need to check all possible
1460         * routes.
1461         */
1462
1463        read_lock_bh(&table->tb6_lock);
1464        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467                if (rt6_check_expired(rt))
1468                        continue;
1469                if (rt->dst.error)
1470                        break;
1471                if (!(rt->rt6i_flags & RTF_GATEWAY))
1472                        continue;
1473                if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474                        continue;
1475                if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476                        continue;
1477                break;
1478        }
1479
1480        if (!rt)
1481                rt = net->ipv6.ip6_null_entry;
1482        else if (rt->dst.error) {
1483                rt = net->ipv6.ip6_null_entry;
1484                goto out;
1485        }
1486
1487        if (rt == net->ipv6.ip6_null_entry) {
1488                fn = fib6_backtrack(fn, &fl6->saddr);
1489                if (fn)
1490                        goto restart;
1491        }
1492
1493out:
1494        dst_hold(&rt->dst);
1495
1496        read_unlock_bh(&table->tb6_lock);
1497
1498        trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1499        return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503                                        const struct flowi6 *fl6,
1504                                        const struct in6_addr *gateway)
1505{
1506        int flags = RT6_LOOKUP_F_HAS_SADDR;
1507        struct ip6rd_flowi rdfl;
1508
1509        rdfl.fl6 = *fl6;
1510        rdfl.gateway = *gateway;
1511
1512        return fib6_rule_lookup(net, &rdfl.fl6,
1513                                flags, __ip6_route_redirect);
1514}
1515
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1517{
1518        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519        struct dst_entry *dst;
1520        struct flowi6 fl6;
1521
1522        memset(&fl6, 0, sizeof(fl6));
1523        fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524        fl6.flowi6_oif = oif;
1525        fl6.flowi6_mark = mark;
1526        fl6.daddr = iph->daddr;
1527        fl6.saddr = iph->saddr;
1528        fl6.flowlabel = ip6_flowinfo(iph);
1529
1530        dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531        rt6_do_redirect(dst, NULL, skb);
1532        dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537                            u32 mark)
1538{
1539        const struct ipv6hdr *iph = ipv6_hdr(skb);
1540        const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541        struct dst_entry *dst;
1542        struct flowi6 fl6;
1543
1544        memset(&fl6, 0, sizeof(fl6));
1545        fl6.flowi6_iif = LOOPBACK_IFINDEX;
1546        fl6.flowi6_oif = oif;
1547        fl6.flowi6_mark = mark;
1548        fl6.daddr = msg->dest;
1549        fl6.saddr = iph->daddr;
1550
1551        dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552        rt6_do_redirect(dst, NULL, skb);
1553        dst_release(dst);
1554}
1555
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1563{
1564        struct net_device *dev = dst->dev;
1565        unsigned int mtu = dst_mtu(dst);
1566        struct net *net = dev_net(dev);
1567
1568        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
1570        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1572
1573        /*
1574         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576         * IPV6_MAXPLEN is also valid and means: "any MSS,
1577         * rely only on pmtu discovery"
1578         */
1579        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580                mtu = IPV6_MAXPLEN;
1581        return mtu;
1582}
1583
1584static unsigned int ip6_mtu(const struct dst_entry *dst)
1585{
1586        const struct rt6_info *rt = (const struct rt6_info *)dst;
1587        unsigned int mtu = rt->rt6i_pmtu;
1588        struct inet6_dev *idev;
1589
1590        if (mtu)
1591                goto out;
1592
1593        mtu = dst_metric_raw(dst, RTAX_MTU);
1594        if (mtu)
1595                goto out;
1596
1597        mtu = IPV6_MIN_MTU;
1598
1599        rcu_read_lock();
1600        idev = __in6_dev_get(dst->dev);
1601        if (idev)
1602                mtu = idev->cnf.mtu6;
1603        rcu_read_unlock();
1604
1605out:
1606        return min_t(unsigned int, mtu, IP6_MAX_MTU);
1607}
1608
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
1611
1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1613                                  struct flowi6 *fl6)
1614{
1615        struct dst_entry *dst;
1616        struct rt6_info *rt;
1617        struct inet6_dev *idev = in6_dev_get(dev);
1618        struct net *net = dev_net(dev);
1619
1620        if (unlikely(!idev))
1621                return ERR_PTR(-ENODEV);
1622
1623        rt = ip6_dst_alloc(net, dev, 0);
1624        if (unlikely(!rt)) {
1625                in6_dev_put(idev);
1626                dst = ERR_PTR(-ENOMEM);
1627                goto out;
1628        }
1629
1630        rt->dst.flags |= DST_HOST;
1631        rt->dst.output  = ip6_output;
1632        atomic_set(&rt->dst.__refcnt, 1);
1633        rt->rt6i_gateway  = fl6->daddr;
1634        rt->rt6i_dst.addr = fl6->daddr;
1635        rt->rt6i_dst.plen = 128;
1636        rt->rt6i_idev     = idev;
1637        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1638
1639        spin_lock_bh(&icmp6_dst_lock);
1640        rt->dst.next = icmp6_dst_gc_list;
1641        icmp6_dst_gc_list = &rt->dst;
1642        spin_unlock_bh(&icmp6_dst_lock);
1643
1644        fib6_force_start_gc(net);
1645
1646        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1648out:
1649        return dst;
1650}
1651
1652int icmp6_dst_gc(void)
1653{
1654        struct dst_entry *dst, **pprev;
1655        int more = 0;
1656
1657        spin_lock_bh(&icmp6_dst_lock);
1658        pprev = &icmp6_dst_gc_list;
1659
1660        while ((dst = *pprev) != NULL) {
1661                if (!atomic_read(&dst->__refcnt)) {
1662                        *pprev = dst->next;
1663                        dst_free(dst);
1664                } else {
1665                        pprev = &dst->next;
1666                        ++more;
1667                }
1668        }
1669
1670        spin_unlock_bh(&icmp6_dst_lock);
1671
1672        return more;
1673}
1674
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676                            void *arg)
1677{
1678        struct dst_entry *dst, **pprev;
1679
1680        spin_lock_bh(&icmp6_dst_lock);
1681        pprev = &icmp6_dst_gc_list;
1682        while ((dst = *pprev) != NULL) {
1683                struct rt6_info *rt = (struct rt6_info *) dst;
1684                if (func(rt, arg)) {
1685                        *pprev = dst->next;
1686                        dst_free(dst);
1687                } else {
1688                        pprev = &dst->next;
1689                }
1690        }
1691        spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
1694static int ip6_dst_gc(struct dst_ops *ops)
1695{
1696        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1697        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1702        int entries;
1703
1704        entries = dst_entries_get_fast(ops);
1705        if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1706            entries <= rt_max_size)
1707                goto out;
1708
1709        net->ipv6.ip6_rt_gc_expire++;
1710        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1711        entries = dst_entries_get_slow(ops);
1712        if (entries < ops->gc_thresh)
1713                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1714out:
1715        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1716        return entries > rt_max_size;
1717}
1718
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720                               const struct fib6_config *cfg)
1721{
1722        bool ecn_ca = false;
1723        struct nlattr *nla;
1724        int remaining;
1725        u32 *mp;
1726
1727        if (!cfg->fc_mx)
1728                return 0;
1729
1730        mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731        if (unlikely(!mp))
1732                return -ENOMEM;
1733
1734        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735                int type = nla_type(nla);
1736                u32 val;
1737
1738                if (!type)
1739                        continue;
1740                if (unlikely(type > RTAX_MAX))
1741                        goto err;
1742
1743                if (type == RTAX_CC_ALGO) {
1744                        char tmp[TCP_CA_NAME_MAX];
1745
1746                        nla_strlcpy(tmp, nla, sizeof(tmp));
1747                        val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1748                        if (val == TCP_CA_UNSPEC)
1749                                goto err;
1750                } else {
1751                        val = nla_get_u32(nla);
1752                }
1753                if (type == RTAX_HOPLIMIT && val > 255)
1754                        val = 255;
1755                if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756                        goto err;
1757
1758                mp[type - 1] = val;
1759                __set_bit(type - 1, mxc->mx_valid);
1760        }
1761
1762        if (ecn_ca) {
1763                __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764                mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1765        }
1766
1767        mxc->mx = mp;
1768        return 0;
1769 err:
1770        kfree(mp);
1771        return -EINVAL;
1772}
1773
1774static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1775{
1776        struct net *net = cfg->fc_nlinfo.nl_net;
1777        struct rt6_info *rt = NULL;
1778        struct net_device *dev = NULL;
1779        struct inet6_dev *idev = NULL;
1780        struct fib6_table *table;
1781        int addr_type;
1782        int err = -EINVAL;
1783
1784        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1785                goto out;
1786#ifndef CONFIG_IPV6_SUBTREES
1787        if (cfg->fc_src_len)
1788                goto out;
1789#endif
1790        if (cfg->fc_ifindex) {
1791                err = -ENODEV;
1792                dev = dev_get_by_index(net, cfg->fc_ifindex);
1793                if (!dev)
1794                        goto out;
1795                idev = in6_dev_get(dev);
1796                if (!idev)
1797                        goto out;
1798        }
1799
1800        if (cfg->fc_metric == 0)
1801                cfg->fc_metric = IP6_RT_PRIO_USER;
1802
1803        err = -ENOBUFS;
1804        if (cfg->fc_nlinfo.nlh &&
1805            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1806                table = fib6_get_table(net, cfg->fc_table);
1807                if (!table) {
1808                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1809                        table = fib6_new_table(net, cfg->fc_table);
1810                }
1811        } else {
1812                table = fib6_new_table(net, cfg->fc_table);
1813        }
1814
1815        if (!table)
1816                goto out;
1817
1818        rt = ip6_dst_alloc(net, NULL,
1819                           (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1820
1821        if (!rt) {
1822                err = -ENOMEM;
1823                goto out;
1824        }
1825
1826        if (cfg->fc_flags & RTF_EXPIRES)
1827                rt6_set_expires(rt, jiffies +
1828                                clock_t_to_jiffies(cfg->fc_expires));
1829        else
1830                rt6_clean_expires(rt);
1831
1832        if (cfg->fc_protocol == RTPROT_UNSPEC)
1833                cfg->fc_protocol = RTPROT_BOOT;
1834        rt->rt6i_protocol = cfg->fc_protocol;
1835
1836        addr_type = ipv6_addr_type(&cfg->fc_dst);
1837
1838        if (addr_type & IPV6_ADDR_MULTICAST)
1839                rt->dst.input = ip6_mc_input;
1840        else if (cfg->fc_flags & RTF_LOCAL)
1841                rt->dst.input = ip6_input;
1842        else
1843                rt->dst.input = ip6_forward;
1844
1845        rt->dst.output = ip6_output;
1846
1847        if (cfg->fc_encap) {
1848                struct lwtunnel_state *lwtstate;
1849
1850                err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1851                                           cfg->fc_encap, AF_INET6, cfg,
1852                                           &lwtstate);
1853                if (err)
1854                        goto out;
1855                rt->dst.lwtstate = lwtstate_get(lwtstate);
1856                if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1857                        rt->dst.lwtstate->orig_output = rt->dst.output;
1858                        rt->dst.output = lwtunnel_output;
1859                }
1860                if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1861                        rt->dst.lwtstate->orig_input = rt->dst.input;
1862                        rt->dst.input = lwtunnel_input;
1863                }
1864        }
1865
1866        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1867        rt->rt6i_dst.plen = cfg->fc_dst_len;
1868        if (rt->rt6i_dst.plen == 128)
1869                rt->dst.flags |= DST_HOST;
1870
1871#ifdef CONFIG_IPV6_SUBTREES
1872        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1873        rt->rt6i_src.plen = cfg->fc_src_len;
1874#endif
1875
1876        rt->rt6i_metric = cfg->fc_metric;
1877
1878        /* We cannot add true routes via loopback here,
1879           they would result in kernel looping; promote them to reject routes
1880         */
1881        if ((cfg->fc_flags & RTF_REJECT) ||
1882            (dev && (dev->flags & IFF_LOOPBACK) &&
1883             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1884             !(cfg->fc_flags & RTF_LOCAL))) {
1885                /* hold loopback dev/idev if we haven't done so. */
1886                if (dev != net->loopback_dev) {
1887                        if (dev) {
1888                                dev_put(dev);
1889                                in6_dev_put(idev);
1890                        }
1891                        dev = net->loopback_dev;
1892                        dev_hold(dev);
1893                        idev = in6_dev_get(dev);
1894                        if (!idev) {
1895                                err = -ENODEV;
1896                                goto out;
1897                        }
1898                }
1899                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1900                switch (cfg->fc_type) {
1901                case RTN_BLACKHOLE:
1902                        rt->dst.error = -EINVAL;
1903                        rt->dst.output = dst_discard_out;
1904                        rt->dst.input = dst_discard;
1905                        break;
1906                case RTN_PROHIBIT:
1907                        rt->dst.error = -EACCES;
1908                        rt->dst.output = ip6_pkt_prohibit_out;
1909                        rt->dst.input = ip6_pkt_prohibit;
1910                        break;
1911                case RTN_THROW:
1912                case RTN_UNREACHABLE:
1913                default:
1914                        rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1915                                        : (cfg->fc_type == RTN_UNREACHABLE)
1916                                        ? -EHOSTUNREACH : -ENETUNREACH;
1917                        rt->dst.output = ip6_pkt_discard_out;
1918                        rt->dst.input = ip6_pkt_discard;
1919                        break;
1920                }
1921                goto install_route;
1922        }
1923
1924        if (cfg->fc_flags & RTF_GATEWAY) {
1925                const struct in6_addr *gw_addr;
1926                int gwa_type;
1927
1928                gw_addr = &cfg->fc_gateway;
1929                gwa_type = ipv6_addr_type(gw_addr);
1930
1931                /* if gw_addr is local we will fail to detect this in case
1932                 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1933                 * will return already-added prefix route via interface that
1934                 * prefix route was assigned to, which might be non-loopback.
1935                 */
1936                err = -EINVAL;
1937                if (ipv6_chk_addr_and_flags(net, gw_addr,
1938                                            gwa_type & IPV6_ADDR_LINKLOCAL ?
1939                                            dev : NULL, 0, 0))
1940                        goto out;
1941
1942                rt->rt6i_gateway = *gw_addr;
1943
1944                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1945                        struct rt6_info *grt;
1946
1947                        /* IPv6 strictly inhibits using not link-local
1948                           addresses as nexthop address.
1949                           Otherwise, router will not able to send redirects.
1950                           It is very good, but in some (rare!) circumstances
1951                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1952                           some exceptions. --ANK
1953                         */
1954                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1955                                goto out;
1956
1957                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1958
1959                        err = -EHOSTUNREACH;
1960                        if (!grt)
1961                                goto out;
1962                        if (dev) {
1963                                if (dev != grt->dst.dev) {
1964                                        ip6_rt_put(grt);
1965                                        goto out;
1966                                }
1967                        } else {
1968                                dev = grt->dst.dev;
1969                                idev = grt->rt6i_idev;
1970                                dev_hold(dev);
1971                                in6_dev_hold(grt->rt6i_idev);
1972                        }
1973                        if (!(grt->rt6i_flags & RTF_GATEWAY))
1974                                err = 0;
1975                        ip6_rt_put(grt);
1976
1977                        if (err)
1978                                goto out;
1979                }
1980                err = -EINVAL;
1981                if (!dev || (dev->flags & IFF_LOOPBACK))
1982                        goto out;
1983        }
1984
1985        err = -ENODEV;
1986        if (!dev)
1987                goto out;
1988
1989        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1990                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1991                        err = -EINVAL;
1992                        goto out;
1993                }
1994                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1995                rt->rt6i_prefsrc.plen = 128;
1996        } else
1997                rt->rt6i_prefsrc.plen = 0;
1998
1999        rt->rt6i_flags = cfg->fc_flags;
2000
2001install_route:
2002        rt->dst.dev = dev;
2003        rt->rt6i_idev = idev;
2004        rt->rt6i_table = table;
2005
2006        cfg->fc_nlinfo.nl_net = dev_net(dev);
2007
2008        return rt;
2009out:
2010        if (dev)
2011                dev_put(dev);
2012        if (idev)
2013                in6_dev_put(idev);
2014        if (rt)
2015                dst_free(&rt->dst);
2016
2017        return ERR_PTR(err);
2018}
2019
2020int ip6_route_add(struct fib6_config *cfg)
2021{
2022        struct mx6_config mxc = { .mx = NULL, };
2023        struct rt6_info *rt;
2024        int err;
2025
2026        rt = ip6_route_info_create(cfg);
2027        if (IS_ERR(rt)) {
2028                err = PTR_ERR(rt);
2029                rt = NULL;
2030                goto out;
2031        }
2032
2033        err = ip6_convert_metrics(&mxc, cfg);
2034        if (err)
2035                goto out;
2036
2037        err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2038
2039        kfree(mxc.mx);
2040
2041        return err;
2042out:
2043        if (rt)
2044                dst_free(&rt->dst);
2045
2046        return err;
2047}
2048
2049static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2050{
2051        int err;
2052        struct fib6_table *table;
2053        struct net *net = dev_net(rt->dst.dev);
2054
2055        if (rt == net->ipv6.ip6_null_entry ||
2056            rt->dst.flags & DST_NOCACHE) {
2057                err = -ENOENT;
2058                goto out;
2059        }
2060
2061        table = rt->rt6i_table;
2062        write_lock_bh(&table->tb6_lock);
2063        err = fib6_del(rt, info);
2064        write_unlock_bh(&table->tb6_lock);
2065
2066out:
2067        ip6_rt_put(rt);
2068        return err;
2069}
2070
2071int ip6_del_rt(struct rt6_info *rt)
2072{
2073        struct nl_info info = {
2074                .nl_net = dev_net(rt->dst.dev),
2075        };
2076        return __ip6_del_rt(rt, &info);
2077}
2078
2079static int ip6_route_del(struct fib6_config *cfg)
2080{
2081        struct fib6_table *table;
2082        struct fib6_node *fn;
2083        struct rt6_info *rt;
2084        int err = -ESRCH;
2085
2086        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2087        if (!table)
2088                return err;
2089
2090        read_lock_bh(&table->tb6_lock);
2091
2092        fn = fib6_locate(&table->tb6_root,
2093                         &cfg->fc_dst, cfg->fc_dst_len,
2094                         &cfg->fc_src, cfg->fc_src_len);
2095
2096        if (fn) {
2097                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2098                        if ((rt->rt6i_flags & RTF_CACHE) &&
2099                            !(cfg->fc_flags & RTF_CACHE))
2100                                continue;
2101                        if (cfg->fc_ifindex &&
2102                            (!rt->dst.dev ||
2103                             rt->dst.dev->ifindex != cfg->fc_ifindex))
2104                                continue;
2105                        if (cfg->fc_flags & RTF_GATEWAY &&
2106                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2107                                continue;
2108                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2109                                continue;
2110                        dst_hold(&rt->dst);
2111                        read_unlock_bh(&table->tb6_lock);
2112
2113                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2114                }
2115        }
2116        read_unlock_bh(&table->tb6_lock);
2117
2118        return err;
2119}
2120
2121static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2122{
2123        struct netevent_redirect netevent;
2124        struct rt6_info *rt, *nrt = NULL;
2125        struct ndisc_options ndopts;
2126        struct inet6_dev *in6_dev;
2127        struct neighbour *neigh;
2128        struct rd_msg *msg;
2129        int optlen, on_link;
2130        u8 *lladdr;
2131
2132        optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2133        optlen -= sizeof(*msg);
2134
2135        if (optlen < 0) {
2136                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2137                return;
2138        }
2139
2140        msg = (struct rd_msg *)icmp6_hdr(skb);
2141
2142        if (ipv6_addr_is_multicast(&msg->dest)) {
2143                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2144                return;
2145        }
2146
2147        on_link = 0;
2148        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2149                on_link = 1;
2150        } else if (ipv6_addr_type(&msg->target) !=
2151                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2152                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2153                return;
2154        }
2155
2156        in6_dev = __in6_dev_get(skb->dev);
2157        if (!in6_dev)
2158                return;
2159        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2160                return;
2161
2162        /* RFC2461 8.1:
2163         *      The IP source address of the Redirect MUST be the same as the current
2164         *      first-hop router for the specified ICMP Destination Address.
2165         */
2166
2167        if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2168                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2169                return;
2170        }
2171
2172        lladdr = NULL;
2173        if (ndopts.nd_opts_tgt_lladdr) {
2174                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2175                                             skb->dev);
2176                if (!lladdr) {
2177                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2178                        return;
2179                }
2180        }
2181
2182        rt = (struct rt6_info *) dst;
2183        if (rt->rt6i_flags & RTF_REJECT) {
2184                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2185                return;
2186        }
2187
2188        /* Redirect received -> path was valid.
2189         * Look, redirects are sent only in response to data packets,
2190         * so that this nexthop apparently is reachable. --ANK
2191         */
2192        dst_confirm(&rt->dst);
2193
2194        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2195        if (!neigh)
2196                return;
2197
2198        /*
2199         *      We have finally decided to accept it.
2200         */
2201
2202        neigh_update(neigh, lladdr, NUD_STALE,
2203                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2204                     NEIGH_UPDATE_F_OVERRIDE|
2205                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2206                                     NEIGH_UPDATE_F_ISROUTER))
2207                     );
2208
2209        nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2210        if (!nrt)
2211                goto out;
2212
2213        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2214        if (on_link)
2215                nrt->rt6i_flags &= ~RTF_GATEWAY;
2216
2217        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2218
2219        if (ip6_ins_rt(nrt))
2220                goto out;
2221
2222        netevent.old = &rt->dst;
2223        netevent.new = &nrt->dst;
2224        netevent.daddr = &msg->dest;
2225        netevent.neigh = neigh;
2226        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2227
2228        if (rt->rt6i_flags & RTF_CACHE) {
2229                rt = (struct rt6_info *) dst_clone(&rt->dst);
2230                ip6_del_rt(rt);
2231        }
2232
2233out:
2234        neigh_release(neigh);
2235}
2236
2237/*
2238 *      Misc support functions
2239 */
2240
2241static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2242{
2243        BUG_ON(from->dst.from);
2244
2245        rt->rt6i_flags &= ~RTF_EXPIRES;
2246        dst_hold(&from->dst);
2247        rt->dst.from = &from->dst;
2248        dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2249}
2250
2251static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2252{
2253        rt->dst.input = ort->dst.input;
2254        rt->dst.output = ort->dst.output;
2255        rt->rt6i_dst = ort->rt6i_dst;
2256        rt->dst.error = ort->dst.error;
2257        rt->rt6i_idev = ort->rt6i_idev;
2258        if (rt->rt6i_idev)
2259                in6_dev_hold(rt->rt6i_idev);
2260        rt->dst.lastuse = jiffies;
2261        rt->rt6i_gateway = ort->rt6i_gateway;
2262        rt->rt6i_flags = ort->rt6i_flags;
2263        rt6_set_from(rt, ort);
2264        rt->rt6i_metric = ort->rt6i_metric;
2265#ifdef CONFIG_IPV6_SUBTREES
2266        rt->rt6i_src = ort->rt6i_src;
2267#endif
2268        rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2269        rt->rt6i_table = ort->rt6i_table;
2270        rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2271}
2272
2273#ifdef CONFIG_IPV6_ROUTE_INFO
2274static struct rt6_info *rt6_get_route_info(struct net *net,
2275                                           const struct in6_addr *prefix, int prefixlen,
2276                                           const struct in6_addr *gwaddr, int ifindex)
2277{
2278        struct fib6_node *fn;
2279        struct rt6_info *rt = NULL;
2280        struct fib6_table *table;
2281
2282        table = fib6_get_table(net, RT6_TABLE_INFO);
2283        if (!table)
2284                return NULL;
2285
2286        read_lock_bh(&table->tb6_lock);
2287        fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2288        if (!fn)
2289                goto out;
2290
2291        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2292                if (rt->dst.dev->ifindex != ifindex)
2293                        continue;
2294                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2295                        continue;
2296                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2297                        continue;
2298                dst_hold(&rt->dst);
2299                break;
2300        }
2301out:
2302        read_unlock_bh(&table->tb6_lock);
2303        return rt;
2304}
2305
2306static struct rt6_info *rt6_add_route_info(struct net *net,
2307                                           const struct in6_addr *prefix, int prefixlen,
2308                                           const struct in6_addr *gwaddr, int ifindex,
2309                                           unsigned int pref)
2310{
2311        struct fib6_config cfg = {
2312                .fc_metric      = IP6_RT_PRIO_USER,
2313                .fc_ifindex     = ifindex,
2314                .fc_dst_len     = prefixlen,
2315                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2316                                  RTF_UP | RTF_PREF(pref),
2317                .fc_nlinfo.portid = 0,
2318                .fc_nlinfo.nlh = NULL,
2319                .fc_nlinfo.nl_net = net,
2320        };
2321
2322        cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2323        cfg.fc_dst = *prefix;
2324        cfg.fc_gateway = *gwaddr;
2325
2326        /* We should treat it as a default route if prefix length is 0. */
2327        if (!prefixlen)
2328                cfg.fc_flags |= RTF_DEFAULT;
2329
2330        ip6_route_add(&cfg);
2331
2332        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2333}
2334#endif
2335
2336struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2337{
2338        struct rt6_info *rt;
2339        struct fib6_table *table;
2340
2341        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2342        if (!table)
2343                return NULL;
2344
2345        read_lock_bh(&table->tb6_lock);
2346        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2347                if (dev == rt->dst.dev &&
2348                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2349                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2350                        break;
2351        }
2352        if (rt)
2353                dst_hold(&rt->dst);
2354        read_unlock_bh(&table->tb6_lock);
2355        return rt;
2356}
2357
2358struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2359                                     struct net_device *dev,
2360                                     unsigned int pref)
2361{
2362        struct fib6_config cfg = {
2363                .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2364                .fc_metric      = IP6_RT_PRIO_USER,
2365                .fc_ifindex     = dev->ifindex,
2366                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2367                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2368                .fc_nlinfo.portid = 0,
2369                .fc_nlinfo.nlh = NULL,
2370                .fc_nlinfo.nl_net = dev_net(dev),
2371        };
2372
2373        cfg.fc_gateway = *gwaddr;
2374
2375        ip6_route_add(&cfg);
2376
2377        return rt6_get_dflt_router(gwaddr, dev);
2378}
2379
2380void rt6_purge_dflt_routers(struct net *net)
2381{
2382        struct rt6_info *rt;
2383        struct fib6_table *table;
2384
2385        /* NOTE: Keep consistent with rt6_get_dflt_router */
2386        table = fib6_get_table(net, RT6_TABLE_DFLT);
2387        if (!table)
2388                return;
2389
2390restart:
2391        read_lock_bh(&table->tb6_lock);
2392        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2393                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2394                    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2395                        dst_hold(&rt->dst);
2396                        read_unlock_bh(&table->tb6_lock);
2397                        ip6_del_rt(rt);
2398                        goto restart;
2399                }
2400        }
2401        read_unlock_bh(&table->tb6_lock);
2402}
2403
2404static void rtmsg_to_fib6_config(struct net *net,
2405                                 struct in6_rtmsg *rtmsg,
2406                                 struct fib6_config *cfg)
2407{
2408        memset(cfg, 0, sizeof(*cfg));
2409
2410        cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2411                         : RT6_TABLE_MAIN;
2412        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2413        cfg->fc_metric = rtmsg->rtmsg_metric;
2414        cfg->fc_expires = rtmsg->rtmsg_info;
2415        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2416        cfg->fc_src_len = rtmsg->rtmsg_src_len;
2417        cfg->fc_flags = rtmsg->rtmsg_flags;
2418
2419        cfg->fc_nlinfo.nl_net = net;
2420
2421        cfg->fc_dst = rtmsg->rtmsg_dst;
2422        cfg->fc_src = rtmsg->rtmsg_src;
2423        cfg->fc_gateway = rtmsg->rtmsg_gateway;
2424}
2425
2426int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2427{
2428        struct fib6_config cfg;
2429        struct in6_rtmsg rtmsg;
2430        int err;
2431
2432        switch (cmd) {
2433        case SIOCADDRT:         /* Add a route */
2434        case SIOCDELRT:         /* Delete a route */
2435                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2436                        return -EPERM;
2437                err = copy_from_user(&rtmsg, arg,
2438                                     sizeof(struct in6_rtmsg));
2439                if (err)
2440                        return -EFAULT;
2441
2442                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2443
2444                rtnl_lock();
2445                switch (cmd) {
2446                case SIOCADDRT:
2447                        err = ip6_route_add(&cfg);
2448                        break;
2449                case SIOCDELRT:
2450                        err = ip6_route_del(&cfg);
2451                        break;
2452                default:
2453                        err = -EINVAL;
2454                }
2455                rtnl_unlock();
2456
2457                return err;
2458        }
2459
2460        return -EINVAL;
2461}
2462
2463/*
2464 *      Drop the packet on the floor
2465 */
2466
2467static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2468{
2469        int type;
2470        struct dst_entry *dst = skb_dst(skb);
2471        switch (ipstats_mib_noroutes) {
2472        case IPSTATS_MIB_INNOROUTES:
2473                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2474                if (type == IPV6_ADDR_ANY) {
2475                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2476                                      IPSTATS_MIB_INADDRERRORS);
2477                        break;
2478                }
2479                /* FALLTHROUGH */
2480        case IPSTATS_MIB_OUTNOROUTES:
2481                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2482                              ipstats_mib_noroutes);
2483                break;
2484        }
2485        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2486        kfree_skb(skb);
2487        return 0;
2488}
2489
2490static int ip6_pkt_discard(struct sk_buff *skb)
2491{
2492        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2493}
2494
2495static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2496{
2497        skb->dev = skb_dst(skb)->dev;
2498        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2499}
2500
2501static int ip6_pkt_prohibit(struct sk_buff *skb)
2502{
2503        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2504}
2505
2506static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2507{
2508        skb->dev = skb_dst(skb)->dev;
2509        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2510}
2511
2512/*
2513 *      Allocate a dst for local (unicast / anycast) address.
2514 */
2515
2516struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2517                                    const struct in6_addr *addr,
2518                                    bool anycast)
2519{
2520        u32 tb_id;
2521        struct net *net = dev_net(idev->dev);
2522        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2523                                            DST_NOCOUNT);
2524        if (!rt)
2525                return ERR_PTR(-ENOMEM);
2526
2527        in6_dev_hold(idev);
2528
2529        rt->dst.flags |= DST_HOST;
2530        rt->dst.input = ip6_input;
2531        rt->dst.output = ip6_output;
2532        rt->rt6i_idev = idev;
2533
2534        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2535        if (anycast)
2536                rt->rt6i_flags |= RTF_ANYCAST;
2537        else
2538                rt->rt6i_flags |= RTF_LOCAL;
2539
2540        rt->rt6i_gateway  = *addr;
2541        rt->rt6i_dst.addr = *addr;
2542        rt->rt6i_dst.plen = 128;
2543        tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2544        rt->rt6i_table = fib6_get_table(net, tb_id);
2545        rt->dst.flags |= DST_NOCACHE;
2546
2547        atomic_set(&rt->dst.__refcnt, 1);
2548
2549        return rt;
2550}
2551
2552int ip6_route_get_saddr(struct net *net,
2553                        struct rt6_info *rt,
2554                        const struct in6_addr *daddr,
2555                        unsigned int prefs,
2556                        struct in6_addr *saddr)
2557{
2558        struct inet6_dev *idev =
2559                rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2560        int err = 0;
2561        if (rt && rt->rt6i_prefsrc.plen)
2562                *saddr = rt->rt6i_prefsrc.addr;
2563        else
2564                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2565                                         daddr, prefs, saddr);
2566        return err;
2567}
2568
2569/* remove deleted ip from prefsrc entries */
2570struct arg_dev_net_ip {
2571        struct net_device *dev;
2572        struct net *net;
2573        struct in6_addr *addr;
2574};
2575
2576static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2577{
2578        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2579        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2580        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2581
2582        if (((void *)rt->dst.dev == dev || !dev) &&
2583            rt != net->ipv6.ip6_null_entry &&
2584            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2585                /* remove prefsrc entry */
2586                rt->rt6i_prefsrc.plen = 0;
2587        }
2588        return 0;
2589}
2590
2591void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2592{
2593        struct net *net = dev_net(ifp->idev->dev);
2594        struct arg_dev_net_ip adni = {
2595                .dev = ifp->idev->dev,
2596                .net = net,
2597                .addr = &ifp->addr,
2598        };
2599        fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2600}
2601
2602#define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2603#define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2604
2605/* Remove routers and update dst entries when gateway turn into host. */
2606static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2607{
2608        struct in6_addr *gateway = (struct in6_addr *)arg;
2609
2610        if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2611             ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2612             ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2613                return -1;
2614        }
2615        return 0;
2616}
2617
2618void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2619{
2620        fib6_clean_all(net, fib6_clean_tohost, gateway);
2621}
2622
2623struct arg_dev_net {
2624        struct net_device *dev;
2625        struct net *net;
2626};
2627
2628static int fib6_ifdown(struct rt6_info *rt, void *arg)
2629{
2630        const struct arg_dev_net *adn = arg;
2631        const struct net_device *dev = adn->dev;
2632
2633        if ((rt->dst.dev == dev || !dev) &&
2634            rt != adn->net->ipv6.ip6_null_entry)
2635                return -1;
2636
2637        return 0;
2638}
2639
2640void rt6_ifdown(struct net *net, struct net_device *dev)
2641{
2642        struct arg_dev_net adn = {
2643                .dev = dev,
2644                .net = net,
2645        };
2646
2647        fib6_clean_all(net, fib6_ifdown, &adn);
2648        icmp6_clean_all(fib6_ifdown, &adn);
2649        if (dev)
2650                rt6_uncached_list_flush_dev(net, dev);
2651}
2652
2653struct rt6_mtu_change_arg {
2654        struct net_device *dev;
2655        unsigned int mtu;
2656};
2657
2658static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2659{
2660        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2661        struct inet6_dev *idev;
2662
2663        /* In IPv6 pmtu discovery is not optional,
2664           so that RTAX_MTU lock cannot disable it.
2665           We still use this lock to block changes
2666           caused by addrconf/ndisc.
2667        */
2668
2669        idev = __in6_dev_get(arg->dev);
2670        if (!idev)
2671                return 0;
2672
2673        /* For administrative MTU increase, there is no way to discover
2674           IPv6 PMTU increase, so PMTU increase should be updated here.
2675           Since RFC 1981 doesn't include administrative MTU increase
2676           update PMTU increase is a MUST. (i.e. jumbo frame)
2677         */
2678        /*
2679           If new MTU is less than route PMTU, this new MTU will be the
2680           lowest MTU in the path, update the route PMTU to reflect PMTU
2681           decreases; if new MTU is greater than route PMTU, and the
2682           old MTU is the lowest MTU in the path, update the route PMTU
2683           to reflect the increase. In this case if the other nodes' MTU
2684           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2685           PMTU discouvery.
2686         */
2687        if (rt->dst.dev == arg->dev &&
2688            !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2689                if (rt->rt6i_flags & RTF_CACHE) {
2690                        /* For RTF_CACHE with rt6i_pmtu == 0
2691                         * (i.e. a redirected route),
2692                         * the metrics of its rt->dst.from has already
2693                         * been updated.
2694                         */
2695                        if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2696                                rt->rt6i_pmtu = arg->mtu;
2697                } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2698                           (dst_mtu(&rt->dst) < arg->mtu &&
2699                            dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2700                        dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2701                }
2702        }
2703        return 0;
2704}
2705
2706void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2707{
2708        struct rt6_mtu_change_arg arg = {
2709                .dev = dev,
2710                .mtu = mtu,
2711        };
2712
2713        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2714}
2715
2716static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2717        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2718        [RTA_OIF]               = { .type = NLA_U32 },
2719        [RTA_IIF]               = { .type = NLA_U32 },
2720        [RTA_PRIORITY]          = { .type = NLA_U32 },
2721        [RTA_METRICS]           = { .type = NLA_NESTED },
2722        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2723        [RTA_PREF]              = { .type = NLA_U8 },
2724        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2725        [RTA_ENCAP]             = { .type = NLA_NESTED },
2726        [RTA_EXPIRES]           = { .type = NLA_U32 },
2727};
2728
2729static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2730                              struct fib6_config *cfg)
2731{
2732        struct rtmsg *rtm;
2733        struct nlattr *tb[RTA_MAX+1];
2734        unsigned int pref;
2735        int err;
2736
2737        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2738        if (err < 0)
2739                goto errout;
2740
2741        err = -EINVAL;
2742        rtm = nlmsg_data(nlh);
2743        memset(cfg, 0, sizeof(*cfg));
2744
2745        cfg->fc_table = rtm->rtm_table;
2746        cfg->fc_dst_len = rtm->rtm_dst_len;
2747        cfg->fc_src_len = rtm->rtm_src_len;
2748        cfg->fc_flags = RTF_UP;
2749        cfg->fc_protocol = rtm->rtm_protocol;
2750        cfg->fc_type = rtm->rtm_type;
2751
2752        if (rtm->rtm_type == RTN_UNREACHABLE ||
2753            rtm->rtm_type == RTN_BLACKHOLE ||
2754            rtm->rtm_type == RTN_PROHIBIT ||
2755            rtm->rtm_type == RTN_THROW)
2756                cfg->fc_flags |= RTF_REJECT;
2757
2758        if (rtm->rtm_type == RTN_LOCAL)
2759                cfg->fc_flags |= RTF_LOCAL;
2760
2761        if (rtm->rtm_flags & RTM_F_CLONED)
2762                cfg->fc_flags |= RTF_CACHE;
2763
2764        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2765        cfg->fc_nlinfo.nlh = nlh;
2766        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2767
2768        if (tb[RTA_GATEWAY]) {
2769                cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2770                cfg->fc_flags |= RTF_GATEWAY;
2771        }
2772
2773        if (tb[RTA_DST]) {
2774                int plen = (rtm->rtm_dst_len + 7) >> 3;
2775
2776                if (nla_len(tb[RTA_DST]) < plen)
2777                        goto errout;
2778
2779                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2780        }
2781
2782        if (tb[RTA_SRC]) {
2783                int plen = (rtm->rtm_src_len + 7) >> 3;
2784
2785                if (nla_len(tb[RTA_SRC]) < plen)
2786                        goto errout;
2787
2788                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2789        }
2790
2791        if (tb[RTA_PREFSRC])
2792                cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2793
2794        if (tb[RTA_OIF])
2795                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2796
2797        if (tb[RTA_PRIORITY])
2798                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2799
2800        if (tb[RTA_METRICS]) {
2801                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2802                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2803        }
2804
2805        if (tb[RTA_TABLE])
2806                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2807
2808        if (tb[RTA_MULTIPATH]) {
2809                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2810                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2811        }
2812
2813        if (tb[RTA_PREF]) {
2814                pref = nla_get_u8(tb[RTA_PREF]);
2815                if (pref != ICMPV6_ROUTER_PREF_LOW &&
2816                    pref != ICMPV6_ROUTER_PREF_HIGH)
2817                        pref = ICMPV6_ROUTER_PREF_MEDIUM;
2818                cfg->fc_flags |= RTF_PREF(pref);
2819        }
2820
2821        if (tb[RTA_ENCAP])
2822                cfg->fc_encap = tb[RTA_ENCAP];
2823
2824        if (tb[RTA_ENCAP_TYPE])
2825                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2826
2827        if (tb[RTA_EXPIRES]) {
2828                unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2829
2830                if (addrconf_finite_timeout(timeout)) {
2831                        cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2832                        cfg->fc_flags |= RTF_EXPIRES;
2833                }
2834        }
2835
2836        err = 0;
2837errout:
2838        return err;
2839}
2840
2841struct rt6_nh {
2842        struct rt6_info *rt6_info;
2843        struct fib6_config r_cfg;
2844        struct mx6_config mxc;
2845        struct list_head next;
2846};
2847
2848static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2849{
2850        struct rt6_nh *nh;
2851
2852        list_for_each_entry(nh, rt6_nh_list, next) {
2853                pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2854                        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2855                        nh->r_cfg.fc_ifindex);
2856        }
2857}
2858
2859static int ip6_route_info_append(struct list_head *rt6_nh_list,
2860                                 struct rt6_info *rt, struct fib6_config *r_cfg)
2861{
2862        struct rt6_nh *nh;
2863        struct rt6_info *rtnh;
2864        int err = -EEXIST;
2865
2866        list_for_each_entry(nh, rt6_nh_list, next) {
2867                /* check if rt6_info already exists */
2868                rtnh = nh->rt6_info;
2869
2870                if (rtnh->dst.dev == rt->dst.dev &&
2871                    rtnh->rt6i_idev == rt->rt6i_idev &&
2872                    ipv6_addr_equal(&rtnh->rt6i_gateway,
2873                                    &rt->rt6i_gateway))
2874                        return err;
2875        }
2876
2877        nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2878        if (!nh)
2879                return -ENOMEM;
2880        nh->rt6_info = rt;
2881        err = ip6_convert_metrics(&nh->mxc, r_cfg);
2882        if (err) {
2883                kfree(nh);
2884                return err;
2885        }
2886        memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2887        list_add_tail(&nh->next, rt6_nh_list);
2888
2889        return 0;
2890}
2891
2892static int ip6_route_multipath_add(struct fib6_config *cfg)
2893{
2894        struct fib6_config r_cfg;
2895        struct rtnexthop *rtnh;
2896        struct rt6_info *rt;
2897        struct rt6_nh *err_nh;
2898        struct rt6_nh *nh, *nh_safe;
2899        int remaining;
2900        int attrlen;
2901        int err = 1;
2902        int nhn = 0;
2903        int replace = (cfg->fc_nlinfo.nlh &&
2904                       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2905        LIST_HEAD(rt6_nh_list);
2906
2907        remaining = cfg->fc_mp_len;
2908        rtnh = (struct rtnexthop *)cfg->fc_mp;
2909
2910        /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2911         * rt6_info structs per nexthop
2912         */
2913        while (rtnh_ok(rtnh, remaining)) {
2914                memcpy(&r_cfg, cfg, sizeof(*cfg));
2915                if (rtnh->rtnh_ifindex)
2916                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2917
2918                attrlen = rtnh_attrlen(rtnh);
2919                if (attrlen > 0) {
2920                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2921
2922                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2923                        if (nla) {
2924                                r_cfg.fc_gateway = nla_get_in6_addr(nla);
2925                                r_cfg.fc_flags |= RTF_GATEWAY;
2926                        }
2927                        r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2928                        nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2929                        if (nla)
2930                                r_cfg.fc_encap_type = nla_get_u16(nla);
2931                }
2932
2933                rt = ip6_route_info_create(&r_cfg);
2934                if (IS_ERR(rt)) {
2935                        err = PTR_ERR(rt);
2936                        rt = NULL;
2937                        goto cleanup;
2938                }
2939
2940                err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2941                if (err) {
2942                        dst_free(&rt->dst);
2943                        goto cleanup;
2944                }
2945
2946                rtnh = rtnh_next(rtnh, &remaining);
2947        }
2948
2949        err_nh = NULL;
2950        list_for_each_entry(nh, &rt6_nh_list, next) {
2951                err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2952                /* nh->rt6_info is used or freed at this point, reset to NULL*/
2953                nh->rt6_info = NULL;
2954                if (err) {
2955                        if (replace && nhn)
2956                                ip6_print_replace_route_err(&rt6_nh_list);
2957                        err_nh = nh;
2958                        goto add_errout;
2959                }
2960
2961                /* Because each route is added like a single route we remove
2962                 * these flags after the first nexthop: if there is a collision,
2963                 * we have already failed to add the first nexthop:
2964                 * fib6_add_rt2node() has rejected it; when replacing, old
2965                 * nexthops have been replaced by first new, the rest should
2966                 * be added to it.
2967                 */
2968                cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2969                                                     NLM_F_REPLACE);
2970                nhn++;
2971        }
2972
2973        goto cleanup;
2974
2975add_errout:
2976        /* Delete routes that were already added */
2977        list_for_each_entry(nh, &rt6_nh_list, next) {
2978                if (err_nh == nh)
2979                        break;
2980                ip6_route_del(&nh->r_cfg);
2981        }
2982
2983cleanup:
2984        list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2985                if (nh->rt6_info)
2986                        dst_free(&nh->rt6_info->dst);
2987                kfree(nh->mxc.mx);
2988                list_del(&nh->next);
2989                kfree(nh);
2990        }
2991
2992        return err;
2993}
2994
2995static int ip6_route_multipath_del(struct fib6_config *cfg)
2996{
2997        struct fib6_config r_cfg;
2998        struct rtnexthop *rtnh;
2999        int remaining;
3000        int attrlen;
3001        int err = 1, last_err = 0;
3002
3003        remaining = cfg->fc_mp_len;
3004        rtnh = (struct rtnexthop *)cfg->fc_mp;
3005
3006        /* Parse a Multipath Entry */
3007        while (rtnh_ok(rtnh, remaining)) {
3008                memcpy(&r_cfg, cfg, sizeof(*cfg));
3009                if (rtnh->rtnh_ifindex)
3010                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3011
3012                attrlen = rtnh_attrlen(rtnh);
3013                if (attrlen > 0) {
3014                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3015
3016                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3017                        if (nla) {
3018                                nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3019                                r_cfg.fc_flags |= RTF_GATEWAY;
3020                        }
3021                }
3022                err = ip6_route_del(&r_cfg);
3023                if (err)
3024                        last_err = err;
3025
3026                rtnh = rtnh_next(rtnh, &remaining);
3027        }
3028
3029        return last_err;
3030}
3031
3032static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3033{
3034        struct fib6_config cfg;
3035        int err;
3036
3037        err = rtm_to_fib6_config(skb, nlh, &cfg);
3038        if (err < 0)
3039                return err;
3040
3041        if (cfg.fc_mp)
3042                return ip6_route_multipath_del(&cfg);
3043        else
3044                return ip6_route_del(&cfg);
3045}
3046
3047static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3048{
3049        struct fib6_config cfg;
3050        int err;
3051
3052        err = rtm_to_fib6_config(skb, nlh, &cfg);
3053        if (err < 0)
3054                return err;
3055
3056        if (cfg.fc_mp)
3057                return ip6_route_multipath_add(&cfg);
3058        else
3059                return ip6_route_add(&cfg);
3060}
3061
3062static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3063{
3064        return NLMSG_ALIGN(sizeof(struct rtmsg))
3065               + nla_total_size(16) /* RTA_SRC */
3066               + nla_total_size(16) /* RTA_DST */
3067               + nla_total_size(16) /* RTA_GATEWAY */
3068               + nla_total_size(16) /* RTA_PREFSRC */
3069               + nla_total_size(4) /* RTA_TABLE */
3070               + nla_total_size(4) /* RTA_IIF */
3071               + nla_total_size(4) /* RTA_OIF */
3072               + nla_total_size(4) /* RTA_PRIORITY */
3073               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3074               + nla_total_size(sizeof(struct rta_cacheinfo))
3075               + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3076               + nla_total_size(1) /* RTA_PREF */
3077               + lwtunnel_get_encap_size(rt->dst.lwtstate);
3078}
3079
3080static int rt6_fill_node(struct net *net,
3081                         struct sk_buff *skb, struct rt6_info *rt,
3082                         struct in6_addr *dst, struct in6_addr *src,
3083                         int iif, int type, u32 portid, u32 seq,
3084                         int prefix, int nowait, unsigned int flags)
3085{
3086        u32 metrics[RTAX_MAX];
3087        struct rtmsg *rtm;
3088        struct nlmsghdr *nlh;
3089        long expires;
3090        u32 table;
3091
3092        if (prefix) {   /* user wants prefix routes only */
3093                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3094                        /* success since this is not a prefix route */
3095                        return 1;
3096                }
3097        }
3098
3099        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3100        if (!nlh)
3101                return -EMSGSIZE;
3102
3103        rtm = nlmsg_data(nlh);
3104        rtm->rtm_family = AF_INET6;
3105        rtm->rtm_dst_len = rt->rt6i_dst.plen;
3106        rtm->rtm_src_len = rt->rt6i_src.plen;
3107        rtm->rtm_tos = 0;
3108        if (rt->rt6i_table)
3109                table = rt->rt6i_table->tb6_id;
3110        else
3111                table = RT6_TABLE_UNSPEC;
3112        rtm->rtm_table = table;
3113        if (nla_put_u32(skb, RTA_TABLE, table))
3114                goto nla_put_failure;
3115        if (rt->rt6i_flags & RTF_REJECT) {
3116                switch (rt->dst.error) {
3117                case -EINVAL:
3118                        rtm->rtm_type = RTN_BLACKHOLE;
3119                        break;
3120                case -EACCES:
3121                        rtm->rtm_type = RTN_PROHIBIT;
3122                        break;
3123                case -EAGAIN:
3124                        rtm->rtm_type = RTN_THROW;
3125                        break;
3126                default:
3127                        rtm->rtm_type = RTN_UNREACHABLE;
3128                        break;
3129                }
3130        }
3131        else if (rt->rt6i_flags & RTF_LOCAL)
3132                rtm->rtm_type = RTN_LOCAL;
3133        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3134                rtm->rtm_type = RTN_LOCAL;
3135        else
3136                rtm->rtm_type = RTN_UNICAST;
3137        rtm->rtm_flags = 0;
3138        if (!netif_carrier_ok(rt->dst.dev)) {
3139                rtm->rtm_flags |= RTNH_F_LINKDOWN;
3140                if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3141                        rtm->rtm_flags |= RTNH_F_DEAD;
3142        }
3143        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3144        rtm->rtm_protocol = rt->rt6i_protocol;
3145        if (rt->rt6i_flags & RTF_DYNAMIC)
3146                rtm->rtm_protocol = RTPROT_REDIRECT;
3147        else if (rt->rt6i_flags & RTF_ADDRCONF) {
3148                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3149                        rtm->rtm_protocol = RTPROT_RA;
3150                else
3151                        rtm->rtm_protocol = RTPROT_KERNEL;
3152        }
3153
3154        if (rt->rt6i_flags & RTF_CACHE)
3155                rtm->rtm_flags |= RTM_F_CLONED;
3156
3157        if (dst) {
3158                if (nla_put_in6_addr(skb, RTA_DST, dst))
3159                        goto nla_put_failure;
3160                rtm->rtm_dst_len = 128;
3161        } else if (rtm->rtm_dst_len)
3162                if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3163                        goto nla_put_failure;
3164#ifdef CONFIG_IPV6_SUBTREES
3165        if (src) {
3166                if (nla_put_in6_addr(skb, RTA_SRC, src))
3167                        goto nla_put_failure;
3168                rtm->rtm_src_len = 128;
3169        } else if (rtm->rtm_src_len &&
3170                   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3171                goto nla_put_failure;
3172#endif
3173        if (iif) {
3174#ifdef CONFIG_IPV6_MROUTE
3175                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3176                        int err = ip6mr_get_route(net, skb, rtm, nowait);
3177                        if (err <= 0) {
3178                                if (!nowait) {
3179                                        if (err == 0)
3180                                                return 0;
3181                                        goto nla_put_failure;
3182                                } else {
3183                                        if (err == -EMSGSIZE)
3184                                                goto nla_put_failure;
3185                                }
3186                        }
3187                } else
3188#endif
3189                        if (nla_put_u32(skb, RTA_IIF, iif))
3190                                goto nla_put_failure;
3191        } else if (dst) {
3192                struct in6_addr saddr_buf;
3193                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3194                    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3195                        goto nla_put_failure;
3196        }
3197
3198        if (rt->rt6i_prefsrc.plen) {
3199                struct in6_addr saddr_buf;
3200                saddr_buf = rt->rt6i_prefsrc.addr;
3201                if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3202                        goto nla_put_failure;
3203        }
3204
3205        memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3206        if (rt->rt6i_pmtu)
3207                metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3208        if (rtnetlink_put_metrics(skb, metrics) < 0)
3209                goto nla_put_failure;
3210
3211        if (rt->rt6i_flags & RTF_GATEWAY) {
3212                if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3213                        goto nla_put_failure;
3214        }
3215
3216        if (rt->dst.dev &&
3217            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3218                goto nla_put_failure;
3219        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3220                goto nla_put_failure;
3221
3222        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3223
3224        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3225                goto nla_put_failure;
3226
3227        if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3228                goto nla_put_failure;
3229
3230        lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3231
3232        nlmsg_end(skb, nlh);
3233        return 0;
3234
3235nla_put_failure:
3236        nlmsg_cancel(skb, nlh);
3237        return -EMSGSIZE;
3238}
3239
3240int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3241{
3242        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3243        int prefix;
3244
3245        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3246                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3247                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3248        } else
3249                prefix = 0;
3250
3251        return rt6_fill_node(arg->net,
3252                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3253                     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3254                     prefix, 0, NLM_F_MULTI);
3255}
3256
3257static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3258{
3259        struct net *net = sock_net(in_skb->sk);
3260        struct nlattr *tb[RTA_MAX+1];
3261        struct rt6_info *rt;
3262        struct sk_buff *skb;
3263        struct rtmsg *rtm;
3264        struct flowi6 fl6;
3265        int err, iif = 0, oif = 0;
3266
3267        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3268        if (err < 0)
3269                goto errout;
3270
3271        err = -EINVAL;
3272        memset(&fl6, 0, sizeof(fl6));
3273
3274        if (tb[RTA_SRC]) {
3275                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3276                        goto errout;
3277
3278                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3279        }
3280
3281        if (tb[RTA_DST]) {
3282                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3283                        goto errout;
3284
3285                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3286        }
3287
3288        if (tb[RTA_IIF])
3289                iif = nla_get_u32(tb[RTA_IIF]);
3290
3291        if (tb[RTA_OIF])
3292                oif = nla_get_u32(tb[RTA_OIF]);
3293
3294        if (tb[RTA_MARK])
3295                fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3296
3297        if (iif) {
3298                struct net_device *dev;
3299                int flags = 0;
3300
3301                dev = __dev_get_by_index(net, iif);
3302                if (!dev) {
3303                        err = -ENODEV;
3304                        goto errout;
3305                }
3306
3307                fl6.flowi6_iif = iif;
3308
3309                if (!ipv6_addr_any(&fl6.saddr))
3310                        flags |= RT6_LOOKUP_F_HAS_SADDR;
3311
3312                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3313                                                               flags);
3314        } else {
3315                fl6.flowi6_oif = oif;
3316
3317                if (netif_index_is_l3_master(net, oif)) {
3318                        fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3319                                           FLOWI_FLAG_SKIP_NH_OIF;
3320                }
3321
3322                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3323        }
3324
3325        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3326        if (!skb) {
3327                ip6_rt_put(rt);
3328                err = -ENOBUFS;
3329                goto errout;
3330        }
3331
3332        /* Reserve room for dummy headers, this skb can pass
3333           through good chunk of routing engine.
3334         */
3335        skb_reset_mac_header(skb);
3336        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3337
3338        skb_dst_set(skb, &rt->dst);
3339
3340        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3341                            RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3342                            nlh->nlmsg_seq, 0, 0, 0);
3343        if (err < 0) {
3344                kfree_skb(skb);
3345                goto errout;
3346        }
3347
3348        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3349errout:
3350        return err;
3351}
3352
3353void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3354                     unsigned int nlm_flags)
3355{
3356        struct sk_buff *skb;
3357        struct net *net = info->nl_net;
3358        u32 seq;
3359        int err;
3360
3361        err = -ENOBUFS;
3362        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3363
3364        skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3365        if (!skb)
3366                goto errout;
3367
3368        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3369                                event, info->portid, seq, 0, 0, nlm_flags);
3370        if (err < 0) {
3371                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3372                WARN_ON(err == -EMSGSIZE);
3373                kfree_skb(skb);
3374                goto errout;
3375        }
3376        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3377                    info->nlh, gfp_any());
3378        return;
3379errout:
3380        if (err < 0)
3381                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3382}
3383
3384static int ip6_route_dev_notify(struct notifier_block *this,
3385                                unsigned long event, void *ptr)
3386{
3387        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3388        struct net *net = dev_net(dev);
3389
3390        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3391                net->ipv6.ip6_null_entry->dst.dev = dev;
3392                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3393#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3394                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3395                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3396                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3397                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3398#endif
3399        }
3400
3401        return NOTIFY_OK;
3402}
3403
3404/*
3405 *      /proc
3406 */
3407
3408#ifdef CONFIG_PROC_FS
3409
3410static const struct file_operations ipv6_route_proc_fops = {
3411        .owner          = THIS_MODULE,
3412        .open           = ipv6_route_open,
3413        .read           = seq_read,
3414        .llseek         = seq_lseek,
3415        .release        = seq_release_net,
3416};
3417
3418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3419{
3420        struct net *net = (struct net *)seq->private;
3421        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3422                   net->ipv6.rt6_stats->fib_nodes,
3423                   net->ipv6.rt6_stats->fib_route_nodes,
3424                   net->ipv6.rt6_stats->fib_rt_alloc,
3425                   net->ipv6.rt6_stats->fib_rt_entries,
3426                   net->ipv6.rt6_stats->fib_rt_cache,
3427                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3428                   net->ipv6.rt6_stats->fib_discarded_routes);
3429
3430        return 0;
3431}
3432
3433static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3434{
3435        return single_open_net(inode, file, rt6_stats_seq_show);
3436}
3437
3438static const struct file_operations rt6_stats_seq_fops = {
3439        .owner   = THIS_MODULE,
3440        .open    = rt6_stats_seq_open,
3441        .read    = seq_read,
3442        .llseek  = seq_lseek,
3443        .release = single_release_net,
3444};
3445#endif  /* CONFIG_PROC_FS */
3446
3447#ifdef CONFIG_SYSCTL
3448
3449static
3450int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3451                              void __user *buffer, size_t *lenp, loff_t *ppos)
3452{
3453        struct net *net;
3454        int delay;
3455        if (!write)
3456                return -EINVAL;
3457
3458        net = (struct net *)ctl->extra1;
3459        delay = net->ipv6.sysctl.flush_delay;
3460        proc_dointvec(ctl, write, buffer, lenp, ppos);
3461        fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3462        return 0;
3463}
3464
3465struct ctl_table ipv6_route_table_template[] = {
3466        {
3467                .procname       =       "flush",
3468                .data           =       &init_net.ipv6.sysctl.flush_delay,
3469                .maxlen         =       sizeof(int),
3470                .mode           =       0200,
3471                .proc_handler   =       ipv6_sysctl_rtcache_flush
3472        },
3473        {
3474                .procname       =       "gc_thresh",
3475                .data           =       &ip6_dst_ops_template.gc_thresh,
3476                .maxlen         =       sizeof(int),
3477                .mode           =       0644,
3478                .proc_handler   =       proc_dointvec,
3479        },
3480        {
3481                .procname       =       "max_size",
3482                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3483                .maxlen         =       sizeof(int),
3484                .mode           =       0644,
3485                .proc_handler   =       proc_dointvec,
3486        },
3487        {
3488                .procname       =       "gc_min_interval",
3489                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3490                .maxlen         =       sizeof(int),
3491                .mode           =       0644,
3492                .proc_handler   =       proc_dointvec_jiffies,
3493        },
3494        {
3495                .procname       =       "gc_timeout",
3496                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3497                .maxlen         =       sizeof(int),
3498                .mode           =       0644,
3499                .proc_handler   =       proc_dointvec_jiffies,
3500        },
3501        {
3502                .procname       =       "gc_interval",
3503                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3504                .maxlen         =       sizeof(int),
3505                .mode           =       0644,
3506                .proc_handler   =       proc_dointvec_jiffies,
3507        },
3508        {
3509                .procname       =       "gc_elasticity",
3510                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3511                .maxlen         =       sizeof(int),
3512                .mode           =       0644,
3513                .proc_handler   =       proc_dointvec,
3514        },
3515        {
3516                .procname       =       "mtu_expires",
3517                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3518                .maxlen         =       sizeof(int),
3519                .mode           =       0644,
3520                .proc_handler   =       proc_dointvec_jiffies,
3521        },
3522        {
3523                .procname       =       "min_adv_mss",
3524                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3525                .maxlen         =       sizeof(int),
3526                .mode           =       0644,
3527                .proc_handler   =       proc_dointvec,
3528        },
3529        {
3530                .procname       =       "gc_min_interval_ms",
3531                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3532                .maxlen         =       sizeof(int),
3533                .mode           =       0644,
3534                .proc_handler   =       proc_dointvec_ms_jiffies,
3535        },
3536        { }
3537};
3538
3539struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3540{
3541        struct ctl_table *table;
3542
3543        table = kmemdup(ipv6_route_table_template,
3544                        sizeof(ipv6_route_table_template),
3545                        GFP_KERNEL);
3546
3547        if (table) {
3548                table[0].data = &net->ipv6.sysctl.flush_delay;
3549                table[0].extra1 = net;
3550                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3551                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3552                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3553                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3554                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3555                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3556                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3557                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3558                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3559
3560                /* Don't export sysctls to unprivileged users */
3561                if (net->user_ns != &init_user_ns)
3562                        table[0].procname = NULL;
3563        }
3564
3565        return table;
3566}
3567#endif
3568
3569static int __net_init ip6_route_net_init(struct net *net)
3570{
3571        int ret = -ENOMEM;
3572
3573        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3574               sizeof(net->ipv6.ip6_dst_ops));
3575
3576        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3577                goto out_ip6_dst_ops;
3578
3579        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3580                                           sizeof(*net->ipv6.ip6_null_entry),
3581                                           GFP_KERNEL);
3582        if (!net->ipv6.ip6_null_entry)
3583                goto out_ip6_dst_entries;
3584        net->ipv6.ip6_null_entry->dst.path =
3585                (struct dst_entry *)net->ipv6.ip6_null_entry;
3586        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3587        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3588                         ip6_template_metrics, true);
3589
3590#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3591        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3592                                               sizeof(*net->ipv6.ip6_prohibit_entry),
3593                                               GFP_KERNEL);
3594        if (!net->ipv6.ip6_prohibit_entry)
3595                goto out_ip6_null_entry;
3596        net->ipv6.ip6_prohibit_entry->dst.path =
3597                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3598        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3599        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3600                         ip6_template_metrics, true);
3601
3602        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3603                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
3604                                               GFP_KERNEL);
3605        if (!net->ipv6.ip6_blk_hole_entry)
3606                goto out_ip6_prohibit_entry;
3607        net->ipv6.ip6_blk_hole_entry->dst.path =
3608                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3609        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3610        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3611                         ip6_template_metrics, true);
3612#endif
3613
3614        net->ipv6.sysctl.flush_delay = 0;
3615        net->ipv6.sysctl.ip6_rt_max_size = 4096;
3616        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3617        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3618        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3619        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3620        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3621        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3622
3623        net->ipv6.ip6_rt_gc_expire = 30*HZ;
3624
3625        ret = 0;
3626out:
3627        return ret;
3628
3629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630out_ip6_prohibit_entry:
3631        kfree(net->ipv6.ip6_prohibit_entry);
3632out_ip6_null_entry:
3633        kfree(net->ipv6.ip6_null_entry);
3634#endif
3635out_ip6_dst_entries:
3636        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3637out_ip6_dst_ops:
3638        goto out;
3639}
3640
3641static void __net_exit ip6_route_net_exit(struct net *net)
3642{
3643        kfree(net->ipv6.ip6_null_entry);
3644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3645        kfree(net->ipv6.ip6_prohibit_entry);
3646        kfree(net->ipv6.ip6_blk_hole_entry);
3647#endif
3648        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3649}
3650
3651static int __net_init ip6_route_net_init_late(struct net *net)
3652{
3653#ifdef CONFIG_PROC_FS
3654        proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3655        proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3656#endif
3657        return 0;
3658}
3659
3660static void __net_exit ip6_route_net_exit_late(struct net *net)
3661{
3662#ifdef CONFIG_PROC_FS
3663        remove_proc_entry("ipv6_route", net->proc_net);
3664        remove_proc_entry("rt6_stats", net->proc_net);
3665#endif
3666}
3667
3668static struct pernet_operations ip6_route_net_ops = {
3669        .init = ip6_route_net_init,
3670        .exit = ip6_route_net_exit,
3671};
3672
3673static int __net_init ipv6_inetpeer_init(struct net *net)
3674{
3675        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3676
3677        if (!bp)
3678                return -ENOMEM;
3679        inet_peer_base_init(bp);
3680        net->ipv6.peers = bp;
3681        return 0;
3682}
3683
3684static void __net_exit ipv6_inetpeer_exit(struct net *net)
3685{
3686        struct inet_peer_base *bp = net->ipv6.peers;
3687
3688        net->ipv6.peers = NULL;
3689        inetpeer_invalidate_tree(bp);
3690        kfree(bp);
3691}
3692
3693static struct pernet_operations ipv6_inetpeer_ops = {
3694        .init   =       ipv6_inetpeer_init,
3695        .exit   =       ipv6_inetpeer_exit,
3696};
3697
3698static struct pernet_operations ip6_route_net_late_ops = {
3699        .init = ip6_route_net_init_late,
3700        .exit = ip6_route_net_exit_late,
3701};
3702
3703static struct notifier_block ip6_route_dev_notifier = {
3704        .notifier_call = ip6_route_dev_notify,
3705        .priority = 0,
3706};
3707
3708int __init ip6_route_init(void)
3709{
3710        int ret;
3711        int cpu;
3712
3713        ret = -ENOMEM;
3714        ip6_dst_ops_template.kmem_cachep =
3715                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3716                                  SLAB_HWCACHE_ALIGN, NULL);
3717        if (!ip6_dst_ops_template.kmem_cachep)
3718                goto out;
3719
3720        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3721        if (ret)
3722                goto out_kmem_cache;
3723
3724        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3725        if (ret)
3726                goto out_dst_entries;
3727
3728        ret = register_pernet_subsys(&ip6_route_net_ops);
3729        if (ret)
3730                goto out_register_inetpeer;
3731
3732        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3733
3734        /* Registering of the loopback is done before this portion of code,
3735         * the loopback reference in rt6_info will not be taken, do it
3736         * manually for init_net */
3737        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3738        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3739  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3740        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3741        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3742        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3743        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3744  #endif
3745        ret = fib6_init();
3746        if (ret)
3747                goto out_register_subsys;
3748
3749        ret = xfrm6_init();
3750        if (ret)
3751                goto out_fib6_init;
3752
3753        ret = fib6_rules_init();
3754        if (ret)
3755                goto xfrm6_init;
3756
3757        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3758        if (ret)
3759                goto fib6_rules_init;
3760
3761        ret = -ENOBUFS;
3762        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3763            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3764            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3765                goto out_register_late_subsys;
3766
3767        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3768        if (ret)
3769                goto out_register_late_subsys;
3770
3771        for_each_possible_cpu(cpu) {
3772                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3773
3774                INIT_LIST_HEAD(&ul->head);
3775                spin_lock_init(&ul->lock);
3776        }
3777
3778out:
3779        return ret;
3780
3781out_register_late_subsys:
3782        unregister_pernet_subsys(&ip6_route_net_late_ops);
3783fib6_rules_init:
3784        fib6_rules_cleanup();
3785xfrm6_init:
3786        xfrm6_fini();
3787out_fib6_init:
3788        fib6_gc_cleanup();
3789out_register_subsys:
3790        unregister_pernet_subsys(&ip6_route_net_ops);
3791out_register_inetpeer:
3792        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3793out_dst_entries:
3794        dst_entries_destroy(&ip6_dst_blackhole_ops);
3795out_kmem_cache:
3796        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3797        goto out;
3798}
3799
3800void ip6_route_cleanup(void)
3801{
3802        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3803        unregister_pernet_subsys(&ip6_route_net_late_ops);
3804        fib6_rules_cleanup();
3805        xfrm6_fini();
3806        fib6_gc_cleanup();
3807        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3808        unregister_pernet_subsys(&ip6_route_net_ops);
3809        dst_entries_destroy(&ip6_dst_blackhole_ops);
3810        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3811}
3812