linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
  73enum rt6_nud_state {
  74        RT6_NUD_FAIL_HARD = -3,
  75        RT6_NUD_FAIL_PROBE = -2,
  76        RT6_NUD_FAIL_DO_RR = -1,
  77        RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int      ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void             ip6_dst_destroy(struct dst_entry *);
  86static void             ip6_dst_ifdown(struct dst_entry *,
  87                                       struct net_device *dev, int how);
  88static int               ip6_dst_gc(struct dst_ops *ops);
  89
  90static int              ip6_pkt_discard(struct sk_buff *skb);
  91static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int              ip6_pkt_prohibit(struct sk_buff *skb);
  93static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void             ip6_link_failure(struct sk_buff *skb);
  95static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96                                           struct sk_buff *skb, u32 mtu);
  97static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98                                        struct sk_buff *skb);
  99static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104                                           const struct in6_addr *prefix, int prefixlen,
 105                                           const struct in6_addr *gwaddr, int ifindex,
 106                                           unsigned int pref);
 107static struct rt6_info *rt6_get_route_info(struct net *net,
 108                                           const struct in6_addr *prefix, int prefixlen,
 109                                           const struct in6_addr *gwaddr, int ifindex);
 110#endif
 111
 112struct uncached_list {
 113        spinlock_t              lock;
 114        struct list_head        head;
 115};
 116
 117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 118
 119static void rt6_uncached_list_add(struct rt6_info *rt)
 120{
 121        struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 122
 123        rt->dst.flags |= DST_NOCACHE;
 124        rt->rt6i_uncached_list = ul;
 125
 126        spin_lock_bh(&ul->lock);
 127        list_add_tail(&rt->rt6i_uncached, &ul->head);
 128        spin_unlock_bh(&ul->lock);
 129}
 130
 131static void rt6_uncached_list_del(struct rt6_info *rt)
 132{
 133        if (!list_empty(&rt->rt6i_uncached)) {
 134                struct uncached_list *ul = rt->rt6i_uncached_list;
 135
 136                spin_lock_bh(&ul->lock);
 137                list_del(&rt->rt6i_uncached);
 138                spin_unlock_bh(&ul->lock);
 139        }
 140}
 141
 142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 143{
 144        struct net_device *loopback_dev = net->loopback_dev;
 145        int cpu;
 146
 147        if (dev == loopback_dev)
 148                return;
 149
 150        for_each_possible_cpu(cpu) {
 151                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 152                struct rt6_info *rt;
 153
 154                spin_lock_bh(&ul->lock);
 155                list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 156                        struct inet6_dev *rt_idev = rt->rt6i_idev;
 157                        struct net_device *rt_dev = rt->dst.dev;
 158
 159                        if (rt_idev->dev == dev) {
 160                                rt->rt6i_idev = in6_dev_get(loopback_dev);
 161                                in6_dev_put(rt_idev);
 162                        }
 163
 164                        if (rt_dev == dev) {
 165                                rt->dst.dev = loopback_dev;
 166                                dev_hold(rt->dst.dev);
 167                                dev_put(rt_dev);
 168                        }
 169                }
 170                spin_unlock_bh(&ul->lock);
 171        }
 172}
 173
 174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 175{
 176        return dst_metrics_write_ptr(rt->dst.from);
 177}
 178
 179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 180{
 181        struct rt6_info *rt = (struct rt6_info *)dst;
 182
 183        if (rt->rt6i_flags & RTF_PCPU)
 184                return rt6_pcpu_cow_metrics(rt);
 185        else if (rt->rt6i_flags & RTF_CACHE)
 186                return NULL;
 187        else
 188                return dst_cow_metrics_generic(dst, old);
 189}
 190
 191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 192                                             struct sk_buff *skb,
 193                                             const void *daddr)
 194{
 195        struct in6_addr *p = &rt->rt6i_gateway;
 196
 197        if (!ipv6_addr_any(p))
 198                return (const void *) p;
 199        else if (skb)
 200                return &ipv6_hdr(skb)->daddr;
 201        return daddr;
 202}
 203
 204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 205                                          struct sk_buff *skb,
 206                                          const void *daddr)
 207{
 208        struct rt6_info *rt = (struct rt6_info *) dst;
 209        struct neighbour *n;
 210
 211        daddr = choose_neigh_daddr(rt, skb, daddr);
 212        n = __ipv6_neigh_lookup(dst->dev, daddr);
 213        if (n)
 214                return n;
 215        return neigh_create(&nd_tbl, daddr, dst->dev);
 216}
 217
 218static struct dst_ops ip6_dst_ops_template = {
 219        .family                 =       AF_INET6,
 220        .gc                     =       ip6_dst_gc,
 221        .gc_thresh              =       1024,
 222        .check                  =       ip6_dst_check,
 223        .default_advmss         =       ip6_default_advmss,
 224        .mtu                    =       ip6_mtu,
 225        .cow_metrics            =       ipv6_cow_metrics,
 226        .destroy                =       ip6_dst_destroy,
 227        .ifdown                 =       ip6_dst_ifdown,
 228        .negative_advice        =       ip6_negative_advice,
 229        .link_failure           =       ip6_link_failure,
 230        .update_pmtu            =       ip6_rt_update_pmtu,
 231        .redirect               =       rt6_do_redirect,
 232        .local_out              =       __ip6_local_out,
 233        .neigh_lookup           =       ip6_neigh_lookup,
 234};
 235
 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 237{
 238        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 239
 240        return mtu ? : dst->dev->mtu;
 241}
 242
 243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 244                                         struct sk_buff *skb, u32 mtu)
 245{
 246}
 247
 248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 249                                      struct sk_buff *skb)
 250{
 251}
 252
 253static struct dst_ops ip6_dst_blackhole_ops = {
 254        .family                 =       AF_INET6,
 255        .destroy                =       ip6_dst_destroy,
 256        .check                  =       ip6_dst_check,
 257        .mtu                    =       ip6_blackhole_mtu,
 258        .default_advmss         =       ip6_default_advmss,
 259        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 260        .redirect               =       ip6_rt_blackhole_redirect,
 261        .cow_metrics            =       dst_cow_metrics_generic,
 262        .neigh_lookup           =       ip6_neigh_lookup,
 263};
 264
 265static const u32 ip6_template_metrics[RTAX_MAX] = {
 266        [RTAX_HOPLIMIT - 1] = 0,
 267};
 268
 269static const struct rt6_info ip6_null_entry_template = {
 270        .dst = {
 271                .__refcnt       = ATOMIC_INIT(1),
 272                .__use          = 1,
 273                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 274                .error          = -ENETUNREACH,
 275                .input          = ip6_pkt_discard,
 276                .output         = ip6_pkt_discard_out,
 277        },
 278        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 279        .rt6i_protocol  = RTPROT_KERNEL,
 280        .rt6i_metric    = ~(u32) 0,
 281        .rt6i_ref       = ATOMIC_INIT(1),
 282};
 283
 284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 285
 286static const struct rt6_info ip6_prohibit_entry_template = {
 287        .dst = {
 288                .__refcnt       = ATOMIC_INIT(1),
 289                .__use          = 1,
 290                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 291                .error          = -EACCES,
 292                .input          = ip6_pkt_prohibit,
 293                .output         = ip6_pkt_prohibit_out,
 294        },
 295        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 296        .rt6i_protocol  = RTPROT_KERNEL,
 297        .rt6i_metric    = ~(u32) 0,
 298        .rt6i_ref       = ATOMIC_INIT(1),
 299};
 300
 301static const struct rt6_info ip6_blk_hole_entry_template = {
 302        .dst = {
 303                .__refcnt       = ATOMIC_INIT(1),
 304                .__use          = 1,
 305                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 306                .error          = -EINVAL,
 307                .input          = dst_discard,
 308                .output         = dst_discard_out,
 309        },
 310        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 311        .rt6i_protocol  = RTPROT_KERNEL,
 312        .rt6i_metric    = ~(u32) 0,
 313        .rt6i_ref       = ATOMIC_INIT(1),
 314};
 315
 316#endif
 317
 318static void rt6_info_init(struct rt6_info *rt)
 319{
 320        struct dst_entry *dst = &rt->dst;
 321
 322        memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 323        INIT_LIST_HEAD(&rt->rt6i_siblings);
 324        INIT_LIST_HEAD(&rt->rt6i_uncached);
 325}
 326
 327/* allocate dst with ip6_dst_ops */
 328static struct rt6_info *__ip6_dst_alloc(struct net *net,
 329                                        struct net_device *dev,
 330                                        int flags)
 331{
 332        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 333                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 334
 335        if (rt)
 336                rt6_info_init(rt);
 337
 338        return rt;
 339}
 340
 341struct rt6_info *ip6_dst_alloc(struct net *net,
 342                               struct net_device *dev,
 343                               int flags)
 344{
 345        struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 346
 347        if (rt) {
 348                rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 349                if (rt->rt6i_pcpu) {
 350                        int cpu;
 351
 352                        for_each_possible_cpu(cpu) {
 353                                struct rt6_info **p;
 354
 355                                p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 356                                /* no one shares rt */
 357                                *p =  NULL;
 358                        }
 359                } else {
 360                        dst_destroy((struct dst_entry *)rt);
 361                        return NULL;
 362                }
 363        }
 364
 365        return rt;
 366}
 367EXPORT_SYMBOL(ip6_dst_alloc);
 368
 369static void ip6_dst_destroy(struct dst_entry *dst)
 370{
 371        struct rt6_info *rt = (struct rt6_info *)dst;
 372        struct dst_entry *from = dst->from;
 373        struct inet6_dev *idev;
 374
 375        dst_destroy_metrics_generic(dst);
 376        free_percpu(rt->rt6i_pcpu);
 377        rt6_uncached_list_del(rt);
 378
 379        idev = rt->rt6i_idev;
 380        if (idev) {
 381                rt->rt6i_idev = NULL;
 382                in6_dev_put(idev);
 383        }
 384
 385        dst->from = NULL;
 386        dst_release(from);
 387}
 388
 389static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 390                           int how)
 391{
 392        struct rt6_info *rt = (struct rt6_info *)dst;
 393        struct inet6_dev *idev = rt->rt6i_idev;
 394        struct net_device *loopback_dev =
 395                dev_net(dev)->loopback_dev;
 396
 397        if (dev != loopback_dev) {
 398                if (idev && idev->dev == dev) {
 399                        struct inet6_dev *loopback_idev =
 400                                in6_dev_get(loopback_dev);
 401                        if (loopback_idev) {
 402                                rt->rt6i_idev = loopback_idev;
 403                                in6_dev_put(idev);
 404                        }
 405                }
 406        }
 407}
 408
 409static bool __rt6_check_expired(const struct rt6_info *rt)
 410{
 411        if (rt->rt6i_flags & RTF_EXPIRES)
 412                return time_after(jiffies, rt->dst.expires);
 413        else
 414                return false;
 415}
 416
 417static bool rt6_check_expired(const struct rt6_info *rt)
 418{
 419        if (rt->rt6i_flags & RTF_EXPIRES) {
 420                if (time_after(jiffies, rt->dst.expires))
 421                        return true;
 422        } else if (rt->dst.from) {
 423                return rt6_check_expired((struct rt6_info *) rt->dst.from);
 424        }
 425        return false;
 426}
 427
 428/* Multipath route selection:
 429 *   Hash based function using packet header and flowlabel.
 430 * Adapted from fib_info_hashfn()
 431 */
 432static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 433                               const struct flowi6 *fl6)
 434{
 435        return get_hash_from_flowi6(fl6) % candidate_count;
 436}
 437
 438static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 439                                             struct flowi6 *fl6, int oif,
 440                                             int strict)
 441{
 442        struct rt6_info *sibling, *next_sibling;
 443        int route_choosen;
 444
 445        route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 446        /* Don't change the route, if route_choosen == 0
 447         * (siblings does not include ourself)
 448         */
 449        if (route_choosen)
 450                list_for_each_entry_safe(sibling, next_sibling,
 451                                &match->rt6i_siblings, rt6i_siblings) {
 452                        route_choosen--;
 453                        if (route_choosen == 0) {
 454                                if (rt6_score_route(sibling, oif, strict) < 0)
 455                                        break;
 456                                match = sibling;
 457                                break;
 458                        }
 459                }
 460        return match;
 461}
 462
 463/*
 464 *      Route lookup. Any table->tb6_lock is implied.
 465 */
 466
 467static inline struct rt6_info *rt6_device_match(struct net *net,
 468                                                    struct rt6_info *rt,
 469                                                    const struct in6_addr *saddr,
 470                                                    int oif,
 471                                                    int flags)
 472{
 473        struct rt6_info *local = NULL;
 474        struct rt6_info *sprt;
 475
 476        if (!oif && ipv6_addr_any(saddr))
 477                goto out;
 478
 479        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 480                struct net_device *dev = sprt->dst.dev;
 481
 482                if (oif) {
 483                        if (dev->ifindex == oif)
 484                                return sprt;
 485                        if (dev->flags & IFF_LOOPBACK) {
 486                                if (!sprt->rt6i_idev ||
 487                                    sprt->rt6i_idev->dev->ifindex != oif) {
 488                                        if (flags & RT6_LOOKUP_F_IFACE)
 489                                                continue;
 490                                        if (local &&
 491                                            local->rt6i_idev->dev->ifindex == oif)
 492                                                continue;
 493                                }
 494                                local = sprt;
 495                        }
 496                } else {
 497                        if (ipv6_chk_addr(net, saddr, dev,
 498                                          flags & RT6_LOOKUP_F_IFACE))
 499                                return sprt;
 500                }
 501        }
 502
 503        if (oif) {
 504                if (local)
 505                        return local;
 506
 507                if (flags & RT6_LOOKUP_F_IFACE)
 508                        return net->ipv6.ip6_null_entry;
 509        }
 510out:
 511        return rt;
 512}
 513
 514#ifdef CONFIG_IPV6_ROUTER_PREF
 515struct __rt6_probe_work {
 516        struct work_struct work;
 517        struct in6_addr target;
 518        struct net_device *dev;
 519};
 520
 521static void rt6_probe_deferred(struct work_struct *w)
 522{
 523        struct in6_addr mcaddr;
 524        struct __rt6_probe_work *work =
 525                container_of(w, struct __rt6_probe_work, work);
 526
 527        addrconf_addr_solict_mult(&work->target, &mcaddr);
 528        ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 529        dev_put(work->dev);
 530        kfree(work);
 531}
 532
 533static void rt6_probe(struct rt6_info *rt)
 534{
 535        struct __rt6_probe_work *work;
 536        struct neighbour *neigh;
 537        /*
 538         * Okay, this does not seem to be appropriate
 539         * for now, however, we need to check if it
 540         * is really so; aka Router Reachability Probing.
 541         *
 542         * Router Reachability Probe MUST be rate-limited
 543         * to no more than one per minute.
 544         */
 545        if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 546                return;
 547        rcu_read_lock_bh();
 548        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 549        if (neigh) {
 550                if (neigh->nud_state & NUD_VALID)
 551                        goto out;
 552
 553                work = NULL;
 554                write_lock(&neigh->lock);
 555                if (!(neigh->nud_state & NUD_VALID) &&
 556                    time_after(jiffies,
 557                               neigh->updated +
 558                               rt->rt6i_idev->cnf.rtr_probe_interval)) {
 559                        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 560                        if (work)
 561                                __neigh_set_probe_once(neigh);
 562                }
 563                write_unlock(&neigh->lock);
 564        } else {
 565                work = kmalloc(sizeof(*work), GFP_ATOMIC);
 566        }
 567
 568        if (work) {
 569                INIT_WORK(&work->work, rt6_probe_deferred);
 570                work->target = rt->rt6i_gateway;
 571                dev_hold(rt->dst.dev);
 572                work->dev = rt->dst.dev;
 573                schedule_work(&work->work);
 574        }
 575
 576out:
 577        rcu_read_unlock_bh();
 578}
 579#else
 580static inline void rt6_probe(struct rt6_info *rt)
 581{
 582}
 583#endif
 584
 585/*
 586 * Default Router Selection (RFC 2461 6.3.6)
 587 */
 588static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 589{
 590        struct net_device *dev = rt->dst.dev;
 591        if (!oif || dev->ifindex == oif)
 592                return 2;
 593        if ((dev->flags & IFF_LOOPBACK) &&
 594            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 595                return 1;
 596        return 0;
 597}
 598
 599static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 600{
 601        struct neighbour *neigh;
 602        enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 603
 604        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 605            !(rt->rt6i_flags & RTF_GATEWAY))
 606                return RT6_NUD_SUCCEED;
 607
 608        rcu_read_lock_bh();
 609        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 610        if (neigh) {
 611                read_lock(&neigh->lock);
 612                if (neigh->nud_state & NUD_VALID)
 613                        ret = RT6_NUD_SUCCEED;
 614#ifdef CONFIG_IPV6_ROUTER_PREF
 615                else if (!(neigh->nud_state & NUD_FAILED))
 616                        ret = RT6_NUD_SUCCEED;
 617                else
 618                        ret = RT6_NUD_FAIL_PROBE;
 619#endif
 620                read_unlock(&neigh->lock);
 621        } else {
 622                ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 623                      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 624        }
 625        rcu_read_unlock_bh();
 626
 627        return ret;
 628}
 629
 630static int rt6_score_route(struct rt6_info *rt, int oif,
 631                           int strict)
 632{
 633        int m;
 634
 635        m = rt6_check_dev(rt, oif);
 636        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 637                return RT6_NUD_FAIL_HARD;
 638#ifdef CONFIG_IPV6_ROUTER_PREF
 639        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 640#endif
 641        if (strict & RT6_LOOKUP_F_REACHABLE) {
 642                int n = rt6_check_neigh(rt);
 643                if (n < 0)
 644                        return n;
 645        }
 646        return m;
 647}
 648
 649static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 650                                   int *mpri, struct rt6_info *match,
 651                                   bool *do_rr)
 652{
 653        int m;
 654        bool match_do_rr = false;
 655        struct inet6_dev *idev = rt->rt6i_idev;
 656        struct net_device *dev = rt->dst.dev;
 657
 658        if (dev && !netif_carrier_ok(dev) &&
 659            idev->cnf.ignore_routes_with_linkdown)
 660                goto out;
 661
 662        if (rt6_check_expired(rt))
 663                goto out;
 664
 665        m = rt6_score_route(rt, oif, strict);
 666        if (m == RT6_NUD_FAIL_DO_RR) {
 667                match_do_rr = true;
 668                m = 0; /* lowest valid score */
 669        } else if (m == RT6_NUD_FAIL_HARD) {
 670                goto out;
 671        }
 672
 673        if (strict & RT6_LOOKUP_F_REACHABLE)
 674                rt6_probe(rt);
 675
 676        /* note that m can be RT6_NUD_FAIL_PROBE at this point */
 677        if (m > *mpri) {
 678                *do_rr = match_do_rr;
 679                *mpri = m;
 680                match = rt;
 681        }
 682out:
 683        return match;
 684}
 685
 686static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 687                                     struct rt6_info *rr_head,
 688                                     u32 metric, int oif, int strict,
 689                                     bool *do_rr)
 690{
 691        struct rt6_info *rt, *match, *cont;
 692        int mpri = -1;
 693
 694        match = NULL;
 695        cont = NULL;
 696        for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 697                if (rt->rt6i_metric != metric) {
 698                        cont = rt;
 699                        break;
 700                }
 701
 702                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 703        }
 704
 705        for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 706                if (rt->rt6i_metric != metric) {
 707                        cont = rt;
 708                        break;
 709                }
 710
 711                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 712        }
 713
 714        if (match || !cont)
 715                return match;
 716
 717        for (rt = cont; rt; rt = rt->dst.rt6_next)
 718                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 719
 720        return match;
 721}
 722
 723static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 724{
 725        struct rt6_info *match, *rt0;
 726        struct net *net;
 727        bool do_rr = false;
 728
 729        rt0 = fn->rr_ptr;
 730        if (!rt0)
 731                fn->rr_ptr = rt0 = fn->leaf;
 732
 733        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 734                             &do_rr);
 735
 736        if (do_rr) {
 737                struct rt6_info *next = rt0->dst.rt6_next;
 738
 739                /* no entries matched; do round-robin */
 740                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 741                        next = fn->leaf;
 742
 743                if (next != rt0)
 744                        fn->rr_ptr = next;
 745        }
 746
 747        net = dev_net(rt0->dst.dev);
 748        return match ? match : net->ipv6.ip6_null_entry;
 749}
 750
 751static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 752{
 753        return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 754}
 755
 756#ifdef CONFIG_IPV6_ROUTE_INFO
 757int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 758                  const struct in6_addr *gwaddr)
 759{
 760        struct net *net = dev_net(dev);
 761        struct route_info *rinfo = (struct route_info *) opt;
 762        struct in6_addr prefix_buf, *prefix;
 763        unsigned int pref;
 764        unsigned long lifetime;
 765        struct rt6_info *rt;
 766
 767        if (len < sizeof(struct route_info)) {
 768                return -EINVAL;
 769        }
 770
 771        /* Sanity check for prefix_len and length */
 772        if (rinfo->length > 3) {
 773                return -EINVAL;
 774        } else if (rinfo->prefix_len > 128) {
 775                return -EINVAL;
 776        } else if (rinfo->prefix_len > 64) {
 777                if (rinfo->length < 2) {
 778                        return -EINVAL;
 779                }
 780        } else if (rinfo->prefix_len > 0) {
 781                if (rinfo->length < 1) {
 782                        return -EINVAL;
 783                }
 784        }
 785
 786        pref = rinfo->route_pref;
 787        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 788                return -EINVAL;
 789
 790        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 791
 792        if (rinfo->length == 3)
 793                prefix = (struct in6_addr *)rinfo->prefix;
 794        else {
 795                /* this function is safe */
 796                ipv6_addr_prefix(&prefix_buf,
 797                                 (struct in6_addr *)rinfo->prefix,
 798                                 rinfo->prefix_len);
 799                prefix = &prefix_buf;
 800        }
 801
 802        if (rinfo->prefix_len == 0)
 803                rt = rt6_get_dflt_router(gwaddr, dev);
 804        else
 805                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 806                                        gwaddr, dev->ifindex);
 807
 808        if (rt && !lifetime) {
 809                ip6_del_rt(rt);
 810                rt = NULL;
 811        }
 812
 813        if (!rt && lifetime)
 814                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 815                                        pref);
 816        else if (rt)
 817                rt->rt6i_flags = RTF_ROUTEINFO |
 818                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 819
 820        if (rt) {
 821                if (!addrconf_finite_timeout(lifetime))
 822                        rt6_clean_expires(rt);
 823                else
 824                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 825
 826                ip6_rt_put(rt);
 827        }
 828        return 0;
 829}
 830#endif
 831
 832static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 833                                        struct in6_addr *saddr)
 834{
 835        struct fib6_node *pn;
 836        while (1) {
 837                if (fn->fn_flags & RTN_TL_ROOT)
 838                        return NULL;
 839                pn = fn->parent;
 840                if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 841                        fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 842                else
 843                        fn = pn;
 844                if (fn->fn_flags & RTN_RTINFO)
 845                        return fn;
 846        }
 847}
 848
 849static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 850                                             struct fib6_table *table,
 851                                             struct flowi6 *fl6, int flags)
 852{
 853        struct fib6_node *fn;
 854        struct rt6_info *rt;
 855
 856        read_lock_bh(&table->tb6_lock);
 857        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 858restart:
 859        rt = fn->leaf;
 860        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 861        if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 862                rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 863        if (rt == net->ipv6.ip6_null_entry) {
 864                fn = fib6_backtrack(fn, &fl6->saddr);
 865                if (fn)
 866                        goto restart;
 867        }
 868        dst_use(&rt->dst, jiffies);
 869        read_unlock_bh(&table->tb6_lock);
 870
 871        trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 872
 873        return rt;
 874
 875}
 876
 877struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 878                                    int flags)
 879{
 880        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 881}
 882EXPORT_SYMBOL_GPL(ip6_route_lookup);
 883
 884struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 885                            const struct in6_addr *saddr, int oif, int strict)
 886{
 887        struct flowi6 fl6 = {
 888                .flowi6_oif = oif,
 889                .daddr = *daddr,
 890        };
 891        struct dst_entry *dst;
 892        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 893
 894        if (saddr) {
 895                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 896                flags |= RT6_LOOKUP_F_HAS_SADDR;
 897        }
 898
 899        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 900        if (dst->error == 0)
 901                return (struct rt6_info *) dst;
 902
 903        dst_release(dst);
 904
 905        return NULL;
 906}
 907EXPORT_SYMBOL(rt6_lookup);
 908
 909/* ip6_ins_rt is called with FREE table->tb6_lock.
 910   It takes new route entry, the addition fails by any reason the
 911   route is freed. In any case, if caller does not hold it, it may
 912   be destroyed.
 913 */
 914
 915static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 916                        struct mx6_config *mxc)
 917{
 918        int err;
 919        struct fib6_table *table;
 920
 921        table = rt->rt6i_table;
 922        write_lock_bh(&table->tb6_lock);
 923        err = fib6_add(&table->tb6_root, rt, info, mxc);
 924        write_unlock_bh(&table->tb6_lock);
 925
 926        return err;
 927}
 928
 929int ip6_ins_rt(struct rt6_info *rt)
 930{
 931        struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
 932        struct mx6_config mxc = { .mx = NULL, };
 933
 934        return __ip6_ins_rt(rt, &info, &mxc);
 935}
 936
 937static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 938                                           const struct in6_addr *daddr,
 939                                           const struct in6_addr *saddr)
 940{
 941        struct rt6_info *rt;
 942
 943        /*
 944         *      Clone the route.
 945         */
 946
 947        if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 948                ort = (struct rt6_info *)ort->dst.from;
 949
 950        rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 951
 952        if (!rt)
 953                return NULL;
 954
 955        ip6_rt_copy_init(rt, ort);
 956        rt->rt6i_flags |= RTF_CACHE;
 957        rt->rt6i_metric = 0;
 958        rt->dst.flags |= DST_HOST;
 959        rt->rt6i_dst.addr = *daddr;
 960        rt->rt6i_dst.plen = 128;
 961
 962        if (!rt6_is_gw_or_nonexthop(ort)) {
 963                if (ort->rt6i_dst.plen != 128 &&
 964                    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 965                        rt->rt6i_flags |= RTF_ANYCAST;
 966#ifdef CONFIG_IPV6_SUBTREES
 967                if (rt->rt6i_src.plen && saddr) {
 968                        rt->rt6i_src.addr = *saddr;
 969                        rt->rt6i_src.plen = 128;
 970                }
 971#endif
 972        }
 973
 974        return rt;
 975}
 976
 977static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 978{
 979        struct rt6_info *pcpu_rt;
 980
 981        pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 982                                  rt->dst.dev, rt->dst.flags);
 983
 984        if (!pcpu_rt)
 985                return NULL;
 986        ip6_rt_copy_init(pcpu_rt, rt);
 987        pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 988        pcpu_rt->rt6i_flags |= RTF_PCPU;
 989        return pcpu_rt;
 990}
 991
 992/* It should be called with read_lock_bh(&tb6_lock) acquired */
 993static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 994{
 995        struct rt6_info *pcpu_rt, **p;
 996
 997        p = this_cpu_ptr(rt->rt6i_pcpu);
 998        pcpu_rt = *p;
 999
1000        if (pcpu_rt) {
1001                dst_hold(&pcpu_rt->dst);
1002                rt6_dst_from_metrics_check(pcpu_rt);
1003        }
1004        return pcpu_rt;
1005}
1006
1007static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1008{
1009        struct fib6_table *table = rt->rt6i_table;
1010        struct rt6_info *pcpu_rt, *prev, **p;
1011
1012        pcpu_rt = ip6_rt_pcpu_alloc(rt);
1013        if (!pcpu_rt) {
1014                struct net *net = dev_net(rt->dst.dev);
1015
1016                dst_hold(&net->ipv6.ip6_null_entry->dst);
1017                return net->ipv6.ip6_null_entry;
1018        }
1019
1020        read_lock_bh(&table->tb6_lock);
1021        if (rt->rt6i_pcpu) {
1022                p = this_cpu_ptr(rt->rt6i_pcpu);
1023                prev = cmpxchg(p, NULL, pcpu_rt);
1024                if (prev) {
1025                        /* If someone did it before us, return prev instead */
1026                        dst_destroy(&pcpu_rt->dst);
1027                        pcpu_rt = prev;
1028                }
1029        } else {
1030                /* rt has been removed from the fib6 tree
1031                 * before we have a chance to acquire the read_lock.
1032                 * In this case, don't brother to create a pcpu rt
1033                 * since rt is going away anyway.  The next
1034                 * dst_check() will trigger a re-lookup.
1035                 */
1036                dst_destroy(&pcpu_rt->dst);
1037                pcpu_rt = rt;
1038        }
1039        dst_hold(&pcpu_rt->dst);
1040        rt6_dst_from_metrics_check(pcpu_rt);
1041        read_unlock_bh(&table->tb6_lock);
1042        return pcpu_rt;
1043}
1044
1045static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1046                                      struct flowi6 *fl6, int flags)
1047{
1048        struct fib6_node *fn, *saved_fn;
1049        struct rt6_info *rt;
1050        int strict = 0;
1051
1052        strict |= flags & RT6_LOOKUP_F_IFACE;
1053        if (net->ipv6.devconf_all->forwarding == 0)
1054                strict |= RT6_LOOKUP_F_REACHABLE;
1055
1056        read_lock_bh(&table->tb6_lock);
1057
1058        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1059        saved_fn = fn;
1060
1061        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1062                oif = 0;
1063
1064redo_rt6_select:
1065        rt = rt6_select(fn, oif, strict);
1066        if (rt->rt6i_nsiblings)
1067                rt = rt6_multipath_select(rt, fl6, oif, strict);
1068        if (rt == net->ipv6.ip6_null_entry) {
1069                fn = fib6_backtrack(fn, &fl6->saddr);
1070                if (fn)
1071                        goto redo_rt6_select;
1072                else if (strict & RT6_LOOKUP_F_REACHABLE) {
1073                        /* also consider unreachable route */
1074                        strict &= ~RT6_LOOKUP_F_REACHABLE;
1075                        fn = saved_fn;
1076                        goto redo_rt6_select;
1077                }
1078        }
1079
1080
1081        if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1082                dst_use(&rt->dst, jiffies);
1083                read_unlock_bh(&table->tb6_lock);
1084
1085                rt6_dst_from_metrics_check(rt);
1086
1087                trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1088                return rt;
1089        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1090                            !(rt->rt6i_flags & RTF_GATEWAY))) {
1091                /* Create a RTF_CACHE clone which will not be
1092                 * owned by the fib6 tree.  It is for the special case where
1093                 * the daddr in the skb during the neighbor look-up is different
1094                 * from the fl6->daddr used to look-up route here.
1095                 */
1096
1097                struct rt6_info *uncached_rt;
1098
1099                dst_use(&rt->dst, jiffies);
1100                read_unlock_bh(&table->tb6_lock);
1101
1102                uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1103                dst_release(&rt->dst);
1104
1105                if (uncached_rt)
1106                        rt6_uncached_list_add(uncached_rt);
1107                else
1108                        uncached_rt = net->ipv6.ip6_null_entry;
1109
1110                dst_hold(&uncached_rt->dst);
1111
1112                trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1113                return uncached_rt;
1114
1115        } else {
1116                /* Get a percpu copy */
1117
1118                struct rt6_info *pcpu_rt;
1119
1120                rt->dst.lastuse = jiffies;
1121                rt->dst.__use++;
1122                pcpu_rt = rt6_get_pcpu_route(rt);
1123
1124                if (pcpu_rt) {
1125                        read_unlock_bh(&table->tb6_lock);
1126                } else {
1127                        /* We have to do the read_unlock first
1128                         * because rt6_make_pcpu_route() may trigger
1129                         * ip6_dst_gc() which will take the write_lock.
1130                         */
1131                        dst_hold(&rt->dst);
1132                        read_unlock_bh(&table->tb6_lock);
1133                        pcpu_rt = rt6_make_pcpu_route(rt);
1134                        dst_release(&rt->dst);
1135                }
1136
1137                trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1138                return pcpu_rt;
1139
1140        }
1141}
1142
1143static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1144                                            struct flowi6 *fl6, int flags)
1145{
1146        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1147}
1148
1149static struct dst_entry *ip6_route_input_lookup(struct net *net,
1150                                                struct net_device *dev,
1151                                                struct flowi6 *fl6, int flags)
1152{
1153        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1154                flags |= RT6_LOOKUP_F_IFACE;
1155
1156        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1157}
1158
1159void ip6_route_input(struct sk_buff *skb)
1160{
1161        const struct ipv6hdr *iph = ipv6_hdr(skb);
1162        struct net *net = dev_net(skb->dev);
1163        int flags = RT6_LOOKUP_F_HAS_SADDR;
1164        struct ip_tunnel_info *tun_info;
1165        struct flowi6 fl6 = {
1166                .flowi6_iif = l3mdev_fib_oif(skb->dev),
1167                .daddr = iph->daddr,
1168                .saddr = iph->saddr,
1169                .flowlabel = ip6_flowinfo(iph),
1170                .flowi6_mark = skb->mark,
1171                .flowi6_proto = iph->nexthdr,
1172        };
1173
1174        tun_info = skb_tunnel_info(skb);
1175        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1176                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1177        skb_dst_drop(skb);
1178        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1179}
1180
1181static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1182                                             struct flowi6 *fl6, int flags)
1183{
1184        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1185}
1186
1187struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1188                                         struct flowi6 *fl6, int flags)
1189{
1190        struct dst_entry *dst;
1191        bool any_src;
1192
1193        dst = l3mdev_get_rt6_dst(net, fl6);
1194        if (dst)
1195                return dst;
1196
1197        fl6->flowi6_iif = LOOPBACK_IFINDEX;
1198
1199        any_src = ipv6_addr_any(&fl6->saddr);
1200        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1201            (fl6->flowi6_oif && any_src))
1202                flags |= RT6_LOOKUP_F_IFACE;
1203
1204        if (!any_src)
1205                flags |= RT6_LOOKUP_F_HAS_SADDR;
1206        else if (sk)
1207                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1208
1209        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1210}
1211EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1212
1213struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1214{
1215        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1216        struct dst_entry *new = NULL;
1217
1218        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1219        if (rt) {
1220                rt6_info_init(rt);
1221
1222                new = &rt->dst;
1223                new->__use = 1;
1224                new->input = dst_discard;
1225                new->output = dst_discard_out;
1226
1227                dst_copy_metrics(new, &ort->dst);
1228                rt->rt6i_idev = ort->rt6i_idev;
1229                if (rt->rt6i_idev)
1230                        in6_dev_hold(rt->rt6i_idev);
1231
1232                rt->rt6i_gateway = ort->rt6i_gateway;
1233                rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1234                rt->rt6i_metric = 0;
1235
1236                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1237#ifdef CONFIG_IPV6_SUBTREES
1238                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1239#endif
1240
1241                dst_free(new);
1242        }
1243
1244        dst_release(dst_orig);
1245        return new ? new : ERR_PTR(-ENOMEM);
1246}
1247
1248/*
1249 *      Destination cache support functions
1250 */
1251
1252static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1253{
1254        if (rt->dst.from &&
1255            dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1256                dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1257}
1258
1259static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1260{
1261        if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1262                return NULL;
1263
1264        if (rt6_check_expired(rt))
1265                return NULL;
1266
1267        return &rt->dst;
1268}
1269
1270static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1271{
1272        if (!__rt6_check_expired(rt) &&
1273            rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1274            rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1275                return &rt->dst;
1276        else
1277                return NULL;
1278}
1279
1280static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1281{
1282        struct rt6_info *rt;
1283
1284        rt = (struct rt6_info *) dst;
1285
1286        /* All IPV6 dsts are created with ->obsolete set to the value
1287         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1288         * into this function always.
1289         */
1290
1291        rt6_dst_from_metrics_check(rt);
1292
1293        if (rt->rt6i_flags & RTF_PCPU ||
1294            (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1295                return rt6_dst_from_check(rt, cookie);
1296        else
1297                return rt6_check(rt, cookie);
1298}
1299
1300static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1301{
1302        struct rt6_info *rt = (struct rt6_info *) dst;
1303
1304        if (rt) {
1305                if (rt->rt6i_flags & RTF_CACHE) {
1306                        if (rt6_check_expired(rt)) {
1307                                ip6_del_rt(rt);
1308                                dst = NULL;
1309                        }
1310                } else {
1311                        dst_release(dst);
1312                        dst = NULL;
1313                }
1314        }
1315        return dst;
1316}
1317
1318static void ip6_link_failure(struct sk_buff *skb)
1319{
1320        struct rt6_info *rt;
1321
1322        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1323
1324        rt = (struct rt6_info *) skb_dst(skb);
1325        if (rt) {
1326                if (rt->rt6i_flags & RTF_CACHE) {
1327                        dst_hold(&rt->dst);
1328                        ip6_del_rt(rt);
1329                } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1330                        rt->rt6i_node->fn_sernum = -1;
1331                }
1332        }
1333}
1334
1335static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1336{
1337        struct net *net = dev_net(rt->dst.dev);
1338
1339        rt->rt6i_flags |= RTF_MODIFIED;
1340        rt->rt6i_pmtu = mtu;
1341        rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1342}
1343
1344static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1345{
1346        return !(rt->rt6i_flags & RTF_CACHE) &&
1347                (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1348}
1349
1350static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1351                                 const struct ipv6hdr *iph, u32 mtu)
1352{
1353        struct rt6_info *rt6 = (struct rt6_info *)dst;
1354
1355        if (rt6->rt6i_flags & RTF_LOCAL)
1356                return;
1357
1358        dst_confirm(dst);
1359        mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1360        if (mtu >= dst_mtu(dst))
1361                return;
1362
1363        if (!rt6_cache_allowed_for_pmtu(rt6)) {
1364                rt6_do_update_pmtu(rt6, mtu);
1365        } else {
1366                const struct in6_addr *daddr, *saddr;
1367                struct rt6_info *nrt6;
1368
1369                if (iph) {
1370                        daddr = &iph->daddr;
1371                        saddr = &iph->saddr;
1372                } else if (sk) {
1373                        daddr = &sk->sk_v6_daddr;
1374                        saddr = &inet6_sk(sk)->saddr;
1375                } else {
1376                        return;
1377                }
1378                nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1379                if (nrt6) {
1380                        rt6_do_update_pmtu(nrt6, mtu);
1381
1382                        /* ip6_ins_rt(nrt6) will bump the
1383                         * rt6->rt6i_node->fn_sernum
1384                         * which will fail the next rt6_check() and
1385                         * invalidate the sk->sk_dst_cache.
1386                         */
1387                        ip6_ins_rt(nrt6);
1388                }
1389        }
1390}
1391
1392static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1393                               struct sk_buff *skb, u32 mtu)
1394{
1395        __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1396}
1397
1398void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1399                     int oif, u32 mark)
1400{
1401        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1402        struct dst_entry *dst;
1403        struct flowi6 fl6;
1404
1405        memset(&fl6, 0, sizeof(fl6));
1406        fl6.flowi6_oif = oif;
1407        fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1408        fl6.daddr = iph->daddr;
1409        fl6.saddr = iph->saddr;
1410        fl6.flowlabel = ip6_flowinfo(iph);
1411
1412        dst = ip6_route_output(net, NULL, &fl6);
1413        if (!dst->error)
1414                __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1415        dst_release(dst);
1416}
1417EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1418
1419void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1420{
1421        struct dst_entry *dst;
1422
1423        ip6_update_pmtu(skb, sock_net(sk), mtu,
1424                        sk->sk_bound_dev_if, sk->sk_mark);
1425
1426        dst = __sk_dst_get(sk);
1427        if (!dst || !dst->obsolete ||
1428            dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1429                return;
1430
1431        bh_lock_sock(sk);
1432        if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1433                ip6_datagram_dst_update(sk, false);
1434        bh_unlock_sock(sk);
1435}
1436EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1437
1438/* Handle redirects */
1439struct ip6rd_flowi {
1440        struct flowi6 fl6;
1441        struct in6_addr gateway;
1442};
1443
1444static struct rt6_info *__ip6_route_redirect(struct net *net,
1445                                             struct fib6_table *table,
1446                                             struct flowi6 *fl6,
1447                                             int flags)
1448{
1449        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1450        struct rt6_info *rt;
1451        struct fib6_node *fn;
1452
1453        /* Get the "current" route for this destination and
1454         * check if the redirect has come from approriate router.
1455         *
1456         * RFC 4861 specifies that redirects should only be
1457         * accepted if they come from the nexthop to the target.
1458         * Due to the way the routes are chosen, this notion
1459         * is a bit fuzzy and one might need to check all possible
1460         * routes.
1461         */
1462
1463        read_lock_bh(&table->tb6_lock);
1464        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1465restart:
1466        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1467                if (rt6_check_expired(rt))
1468                        continue;
1469                if (rt->dst.error)
1470                        break;
1471                if (!(rt->rt6i_flags & RTF_GATEWAY))
1472                        continue;
1473                if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1474                        continue;
1475                if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1476                        continue;
1477                break;
1478        }
1479
1480        if (!rt)
1481                rt = net->ipv6.ip6_null_entry;
1482        else if (rt->dst.error) {
1483                rt = net->ipv6.ip6_null_entry;
1484                goto out;
1485        }
1486
1487        if (rt == net->ipv6.ip6_null_entry) {
1488                fn = fib6_backtrack(fn, &fl6->saddr);
1489                if (fn)
1490                        goto restart;
1491        }
1492
1493out:
1494        dst_hold(&rt->dst);
1495
1496        read_unlock_bh(&table->tb6_lock);
1497
1498        trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1499        return rt;
1500};
1501
1502static struct dst_entry *ip6_route_redirect(struct net *net,
1503                                        const struct flowi6 *fl6,
1504                                        const struct in6_addr *gateway)
1505{
1506        int flags = RT6_LOOKUP_F_HAS_SADDR;
1507        struct ip6rd_flowi rdfl;
1508
1509        rdfl.fl6 = *fl6;
1510        rdfl.gateway = *gateway;
1511
1512        return fib6_rule_lookup(net, &rdfl.fl6,
1513                                flags, __ip6_route_redirect);
1514}
1515
1516void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1517{
1518        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1519        struct dst_entry *dst;
1520        struct flowi6 fl6;
1521
1522        memset(&fl6, 0, sizeof(fl6));
1523        fl6.flowi6_iif = LOOPBACK_IFINDEX;
1524        fl6.flowi6_oif = oif;
1525        fl6.flowi6_mark = mark;
1526        fl6.daddr = iph->daddr;
1527        fl6.saddr = iph->saddr;
1528        fl6.flowlabel = ip6_flowinfo(iph);
1529
1530        dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1531        rt6_do_redirect(dst, NULL, skb);
1532        dst_release(dst);
1533}
1534EXPORT_SYMBOL_GPL(ip6_redirect);
1535
1536void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1537                            u32 mark)
1538{
1539        const struct ipv6hdr *iph = ipv6_hdr(skb);
1540        const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1541        struct dst_entry *dst;
1542        struct flowi6 fl6;
1543
1544        memset(&fl6, 0, sizeof(fl6));
1545        fl6.flowi6_iif = LOOPBACK_IFINDEX;
1546        fl6.flowi6_oif = oif;
1547        fl6.flowi6_mark = mark;
1548        fl6.daddr = msg->dest;
1549        fl6.saddr = iph->daddr;
1550
1551        dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1552        rt6_do_redirect(dst, NULL, skb);
1553        dst_release(dst);
1554}
1555
1556void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1557{
1558        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1559}
1560EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1561
1562static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1563{
1564        struct net_device *dev = dst->dev;
1565        unsigned int mtu = dst_mtu(dst);
1566        struct net *net = dev_net(dev);
1567
1568        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1569
1570        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1571                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1572
1573        /*
1574         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1575         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1576         * IPV6_MAXPLEN is also valid and means: "any MSS,
1577         * rely only on pmtu discovery"
1578         */
1579        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1580                mtu = IPV6_MAXPLEN;
1581        return mtu;
1582}
1583
1584static unsigned int ip6_mtu(const struct dst_entry *dst)
1585{
1586        const struct rt6_info *rt = (const struct rt6_info *)dst;
1587        unsigned int mtu = rt->rt6i_pmtu;
1588        struct inet6_dev *idev;
1589
1590        if (mtu)
1591                goto out;
1592
1593        mtu = dst_metric_raw(dst, RTAX_MTU);
1594        if (mtu)
1595                goto out;
1596
1597        mtu = IPV6_MIN_MTU;
1598
1599        rcu_read_lock();
1600        idev = __in6_dev_get(dst->dev);
1601        if (idev)
1602                mtu = idev->cnf.mtu6;
1603        rcu_read_unlock();
1604
1605out:
1606        return min_t(unsigned int, mtu, IP6_MAX_MTU);
1607}
1608
1609static struct dst_entry *icmp6_dst_gc_list;
1610static DEFINE_SPINLOCK(icmp6_dst_lock);
1611
1612struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1613                                  struct flowi6 *fl6)
1614{
1615        struct dst_entry *dst;
1616        struct rt6_info *rt;
1617        struct inet6_dev *idev = in6_dev_get(dev);
1618        struct net *net = dev_net(dev);
1619
1620        if (unlikely(!idev))
1621                return ERR_PTR(-ENODEV);
1622
1623        rt = ip6_dst_alloc(net, dev, 0);
1624        if (unlikely(!rt)) {
1625                in6_dev_put(idev);
1626                dst = ERR_PTR(-ENOMEM);
1627                goto out;
1628        }
1629
1630        rt->dst.flags |= DST_HOST;
1631        rt->dst.output  = ip6_output;
1632        atomic_set(&rt->dst.__refcnt, 1);
1633        rt->rt6i_gateway  = fl6->daddr;
1634        rt->rt6i_dst.addr = fl6->daddr;
1635        rt->rt6i_dst.plen = 128;
1636        rt->rt6i_idev     = idev;
1637        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1638
1639        spin_lock_bh(&icmp6_dst_lock);
1640        rt->dst.next = icmp6_dst_gc_list;
1641        icmp6_dst_gc_list = &rt->dst;
1642        spin_unlock_bh(&icmp6_dst_lock);
1643
1644        fib6_force_start_gc(net);
1645
1646        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1647
1648out:
1649        return dst;
1650}
1651
1652int icmp6_dst_gc(void)
1653{
1654        struct dst_entry *dst, **pprev;
1655        int more = 0;
1656
1657        spin_lock_bh(&icmp6_dst_lock);
1658        pprev = &icmp6_dst_gc_list;
1659
1660        while ((dst = *pprev) != NULL) {
1661                if (!atomic_read(&dst->__refcnt)) {
1662                        *pprev = dst->next;
1663                        dst_free(dst);
1664                } else {
1665                        pprev = &dst->next;
1666                        ++more;
1667                }
1668        }
1669
1670        spin_unlock_bh(&icmp6_dst_lock);
1671
1672        return more;
1673}
1674
1675static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1676                            void *arg)
1677{
1678        struct dst_entry *dst, **pprev;
1679
1680        spin_lock_bh(&icmp6_dst_lock);
1681        pprev = &icmp6_dst_gc_list;
1682        while ((dst = *pprev) != NULL) {
1683                struct rt6_info *rt = (struct rt6_info *) dst;
1684                if (func(rt, arg)) {
1685                        *pprev = dst->next;
1686                        dst_free(dst);
1687                } else {
1688                        pprev = &dst->next;
1689                }
1690        }
1691        spin_unlock_bh(&icmp6_dst_lock);
1692}
1693
1694static int ip6_dst_gc(struct dst_ops *ops)
1695{
1696        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1697        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1698        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1699        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1700        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1701        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1702        int entries;
1703
1704        entries = dst_entries_get_fast(ops);
1705        if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1706            entries <= rt_max_size)
1707                goto out;
1708
1709        net->ipv6.ip6_rt_gc_expire++;
1710        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1711        entries = dst_entries_get_slow(ops);
1712        if (entries < ops->gc_thresh)
1713                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1714out:
1715        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1716        return entries > rt_max_size;
1717}
1718
1719static int ip6_convert_metrics(struct mx6_config *mxc,
1720                               const struct fib6_config *cfg)
1721{
1722        bool ecn_ca = false;
1723        struct nlattr *nla;
1724        int remaining;
1725        u32 *mp;
1726
1727        if (!cfg->fc_mx)
1728                return 0;
1729
1730        mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1731        if (unlikely(!mp))
1732                return -ENOMEM;
1733
1734        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1735                int type = nla_type(nla);
1736                u32 val;
1737
1738                if (!type)
1739                        continue;
1740                if (unlikely(type > RTAX_MAX))
1741                        goto err;
1742
1743                if (type == RTAX_CC_ALGO) {
1744                        char tmp[TCP_CA_NAME_MAX];
1745
1746                        nla_strlcpy(tmp, nla, sizeof(tmp));
1747                        val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1748                        if (val == TCP_CA_UNSPEC)
1749                                goto err;
1750                } else {
1751                        val = nla_get_u32(nla);
1752                }
1753                if (type == RTAX_HOPLIMIT && val > 255)
1754                        val = 255;
1755                if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1756                        goto err;
1757
1758                mp[type - 1] = val;
1759                __set_bit(type - 1, mxc->mx_valid);
1760        }
1761
1762        if (ecn_ca) {
1763                __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1764                mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1765        }
1766
1767        mxc->mx = mp;
1768        return 0;
1769 err:
1770        kfree(mp);
1771        return -EINVAL;
1772}
1773
1774static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1775                                            struct fib6_config *cfg,
1776                                            const struct in6_addr *gw_addr)
1777{
1778        struct flowi6 fl6 = {
1779                .flowi6_oif = cfg->fc_ifindex,
1780                .daddr = *gw_addr,
1781                .saddr = cfg->fc_prefsrc,
1782        };
1783        struct fib6_table *table;
1784        struct rt6_info *rt;
1785        int flags = RT6_LOOKUP_F_IFACE;
1786
1787        table = fib6_get_table(net, cfg->fc_table);
1788        if (!table)
1789                return NULL;
1790
1791        if (!ipv6_addr_any(&cfg->fc_prefsrc))
1792                flags |= RT6_LOOKUP_F_HAS_SADDR;
1793
1794        rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1795
1796        /* if table lookup failed, fall back to full lookup */
1797        if (rt == net->ipv6.ip6_null_entry) {
1798                ip6_rt_put(rt);
1799                rt = NULL;
1800        }
1801
1802        return rt;
1803}
1804
1805static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1806{
1807        struct net *net = cfg->fc_nlinfo.nl_net;
1808        struct rt6_info *rt = NULL;
1809        struct net_device *dev = NULL;
1810        struct inet6_dev *idev = NULL;
1811        struct fib6_table *table;
1812        int addr_type;
1813        int err = -EINVAL;
1814
1815        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1816                goto out;
1817#ifndef CONFIG_IPV6_SUBTREES
1818        if (cfg->fc_src_len)
1819                goto out;
1820#endif
1821        if (cfg->fc_ifindex) {
1822                err = -ENODEV;
1823                dev = dev_get_by_index(net, cfg->fc_ifindex);
1824                if (!dev)
1825                        goto out;
1826                idev = in6_dev_get(dev);
1827                if (!idev)
1828                        goto out;
1829        }
1830
1831        if (cfg->fc_metric == 0)
1832                cfg->fc_metric = IP6_RT_PRIO_USER;
1833
1834        err = -ENOBUFS;
1835        if (cfg->fc_nlinfo.nlh &&
1836            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1837                table = fib6_get_table(net, cfg->fc_table);
1838                if (!table) {
1839                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1840                        table = fib6_new_table(net, cfg->fc_table);
1841                }
1842        } else {
1843                table = fib6_new_table(net, cfg->fc_table);
1844        }
1845
1846        if (!table)
1847                goto out;
1848
1849        rt = ip6_dst_alloc(net, NULL,
1850                           (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1851
1852        if (!rt) {
1853                err = -ENOMEM;
1854                goto out;
1855        }
1856
1857        if (cfg->fc_flags & RTF_EXPIRES)
1858                rt6_set_expires(rt, jiffies +
1859                                clock_t_to_jiffies(cfg->fc_expires));
1860        else
1861                rt6_clean_expires(rt);
1862
1863        if (cfg->fc_protocol == RTPROT_UNSPEC)
1864                cfg->fc_protocol = RTPROT_BOOT;
1865        rt->rt6i_protocol = cfg->fc_protocol;
1866
1867        addr_type = ipv6_addr_type(&cfg->fc_dst);
1868
1869        if (addr_type & IPV6_ADDR_MULTICAST)
1870                rt->dst.input = ip6_mc_input;
1871        else if (cfg->fc_flags & RTF_LOCAL)
1872                rt->dst.input = ip6_input;
1873        else
1874                rt->dst.input = ip6_forward;
1875
1876        rt->dst.output = ip6_output;
1877
1878        if (cfg->fc_encap) {
1879                struct lwtunnel_state *lwtstate;
1880
1881                err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1882                                           cfg->fc_encap, AF_INET6, cfg,
1883                                           &lwtstate);
1884                if (err)
1885                        goto out;
1886                rt->dst.lwtstate = lwtstate_get(lwtstate);
1887                if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1888                        rt->dst.lwtstate->orig_output = rt->dst.output;
1889                        rt->dst.output = lwtunnel_output;
1890                }
1891                if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1892                        rt->dst.lwtstate->orig_input = rt->dst.input;
1893                        rt->dst.input = lwtunnel_input;
1894                }
1895        }
1896
1897        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1898        rt->rt6i_dst.plen = cfg->fc_dst_len;
1899        if (rt->rt6i_dst.plen == 128)
1900                rt->dst.flags |= DST_HOST;
1901
1902#ifdef CONFIG_IPV6_SUBTREES
1903        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1904        rt->rt6i_src.plen = cfg->fc_src_len;
1905#endif
1906
1907        rt->rt6i_metric = cfg->fc_metric;
1908
1909        /* We cannot add true routes via loopback here,
1910           they would result in kernel looping; promote them to reject routes
1911         */
1912        if ((cfg->fc_flags & RTF_REJECT) ||
1913            (dev && (dev->flags & IFF_LOOPBACK) &&
1914             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1915             !(cfg->fc_flags & RTF_LOCAL))) {
1916                /* hold loopback dev/idev if we haven't done so. */
1917                if (dev != net->loopback_dev) {
1918                        if (dev) {
1919                                dev_put(dev);
1920                                in6_dev_put(idev);
1921                        }
1922                        dev = net->loopback_dev;
1923                        dev_hold(dev);
1924                        idev = in6_dev_get(dev);
1925                        if (!idev) {
1926                                err = -ENODEV;
1927                                goto out;
1928                        }
1929                }
1930                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1931                switch (cfg->fc_type) {
1932                case RTN_BLACKHOLE:
1933                        rt->dst.error = -EINVAL;
1934                        rt->dst.output = dst_discard_out;
1935                        rt->dst.input = dst_discard;
1936                        break;
1937                case RTN_PROHIBIT:
1938                        rt->dst.error = -EACCES;
1939                        rt->dst.output = ip6_pkt_prohibit_out;
1940                        rt->dst.input = ip6_pkt_prohibit;
1941                        break;
1942                case RTN_THROW:
1943                case RTN_UNREACHABLE:
1944                default:
1945                        rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1946                                        : (cfg->fc_type == RTN_UNREACHABLE)
1947                                        ? -EHOSTUNREACH : -ENETUNREACH;
1948                        rt->dst.output = ip6_pkt_discard_out;
1949                        rt->dst.input = ip6_pkt_discard;
1950                        break;
1951                }
1952                goto install_route;
1953        }
1954
1955        if (cfg->fc_flags & RTF_GATEWAY) {
1956                const struct in6_addr *gw_addr;
1957                int gwa_type;
1958
1959                gw_addr = &cfg->fc_gateway;
1960                gwa_type = ipv6_addr_type(gw_addr);
1961
1962                /* if gw_addr is local we will fail to detect this in case
1963                 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1964                 * will return already-added prefix route via interface that
1965                 * prefix route was assigned to, which might be non-loopback.
1966                 */
1967                err = -EINVAL;
1968                if (ipv6_chk_addr_and_flags(net, gw_addr,
1969                                            gwa_type & IPV6_ADDR_LINKLOCAL ?
1970                                            dev : NULL, 0, 0))
1971                        goto out;
1972
1973                rt->rt6i_gateway = *gw_addr;
1974
1975                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1976                        struct rt6_info *grt = NULL;
1977
1978                        /* IPv6 strictly inhibits using not link-local
1979                           addresses as nexthop address.
1980                           Otherwise, router will not able to send redirects.
1981                           It is very good, but in some (rare!) circumstances
1982                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1983                           some exceptions. --ANK
1984                         */
1985                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1986                                goto out;
1987
1988                        if (cfg->fc_table)
1989                                grt = ip6_nh_lookup_table(net, cfg, gw_addr);
1990
1991                        if (!grt)
1992                                grt = rt6_lookup(net, gw_addr, NULL,
1993                                                 cfg->fc_ifindex, 1);
1994
1995                        err = -EHOSTUNREACH;
1996                        if (!grt)
1997                                goto out;
1998                        if (dev) {
1999                                if (dev != grt->dst.dev) {
2000                                        ip6_rt_put(grt);
2001                                        goto out;
2002                                }
2003                        } else {
2004                                dev = grt->dst.dev;
2005                                idev = grt->rt6i_idev;
2006                                dev_hold(dev);
2007                                in6_dev_hold(grt->rt6i_idev);
2008                        }
2009                        if (!(grt->rt6i_flags & RTF_GATEWAY))
2010                                err = 0;
2011                        ip6_rt_put(grt);
2012
2013                        if (err)
2014                                goto out;
2015                }
2016                err = -EINVAL;
2017                if (!dev || (dev->flags & IFF_LOOPBACK))
2018                        goto out;
2019        }
2020
2021        err = -ENODEV;
2022        if (!dev)
2023                goto out;
2024
2025        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2026                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2027                        err = -EINVAL;
2028                        goto out;
2029                }
2030                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2031                rt->rt6i_prefsrc.plen = 128;
2032        } else
2033                rt->rt6i_prefsrc.plen = 0;
2034
2035        rt->rt6i_flags = cfg->fc_flags;
2036
2037install_route:
2038        rt->dst.dev = dev;
2039        rt->rt6i_idev = idev;
2040        rt->rt6i_table = table;
2041
2042        cfg->fc_nlinfo.nl_net = dev_net(dev);
2043
2044        return rt;
2045out:
2046        if (dev)
2047                dev_put(dev);
2048        if (idev)
2049                in6_dev_put(idev);
2050        if (rt)
2051                dst_free(&rt->dst);
2052
2053        return ERR_PTR(err);
2054}
2055
2056int ip6_route_add(struct fib6_config *cfg)
2057{
2058        struct mx6_config mxc = { .mx = NULL, };
2059        struct rt6_info *rt;
2060        int err;
2061
2062        rt = ip6_route_info_create(cfg);
2063        if (IS_ERR(rt)) {
2064                err = PTR_ERR(rt);
2065                rt = NULL;
2066                goto out;
2067        }
2068
2069        err = ip6_convert_metrics(&mxc, cfg);
2070        if (err)
2071                goto out;
2072
2073        err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2074
2075        kfree(mxc.mx);
2076
2077        return err;
2078out:
2079        if (rt)
2080                dst_free(&rt->dst);
2081
2082        return err;
2083}
2084
2085static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2086{
2087        int err;
2088        struct fib6_table *table;
2089        struct net *net = dev_net(rt->dst.dev);
2090
2091        if (rt == net->ipv6.ip6_null_entry ||
2092            rt->dst.flags & DST_NOCACHE) {
2093                err = -ENOENT;
2094                goto out;
2095        }
2096
2097        table = rt->rt6i_table;
2098        write_lock_bh(&table->tb6_lock);
2099        err = fib6_del(rt, info);
2100        write_unlock_bh(&table->tb6_lock);
2101
2102out:
2103        ip6_rt_put(rt);
2104        return err;
2105}
2106
2107int ip6_del_rt(struct rt6_info *rt)
2108{
2109        struct nl_info info = {
2110                .nl_net = dev_net(rt->dst.dev),
2111        };
2112        return __ip6_del_rt(rt, &info);
2113}
2114
2115static int ip6_route_del(struct fib6_config *cfg)
2116{
2117        struct fib6_table *table;
2118        struct fib6_node *fn;
2119        struct rt6_info *rt;
2120        int err = -ESRCH;
2121
2122        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2123        if (!table)
2124                return err;
2125
2126        read_lock_bh(&table->tb6_lock);
2127
2128        fn = fib6_locate(&table->tb6_root,
2129                         &cfg->fc_dst, cfg->fc_dst_len,
2130                         &cfg->fc_src, cfg->fc_src_len);
2131
2132        if (fn) {
2133                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2134                        if ((rt->rt6i_flags & RTF_CACHE) &&
2135                            !(cfg->fc_flags & RTF_CACHE))
2136                                continue;
2137                        if (cfg->fc_ifindex &&
2138                            (!rt->dst.dev ||
2139                             rt->dst.dev->ifindex != cfg->fc_ifindex))
2140                                continue;
2141                        if (cfg->fc_flags & RTF_GATEWAY &&
2142                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2143                                continue;
2144                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2145                                continue;
2146                        dst_hold(&rt->dst);
2147                        read_unlock_bh(&table->tb6_lock);
2148
2149                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2150                }
2151        }
2152        read_unlock_bh(&table->tb6_lock);
2153
2154        return err;
2155}
2156
2157static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2158{
2159        struct netevent_redirect netevent;
2160        struct rt6_info *rt, *nrt = NULL;
2161        struct ndisc_options ndopts;
2162        struct inet6_dev *in6_dev;
2163        struct neighbour *neigh;
2164        struct rd_msg *msg;
2165        int optlen, on_link;
2166        u8 *lladdr;
2167
2168        optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2169        optlen -= sizeof(*msg);
2170
2171        if (optlen < 0) {
2172                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2173                return;
2174        }
2175
2176        msg = (struct rd_msg *)icmp6_hdr(skb);
2177
2178        if (ipv6_addr_is_multicast(&msg->dest)) {
2179                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2180                return;
2181        }
2182
2183        on_link = 0;
2184        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2185                on_link = 1;
2186        } else if (ipv6_addr_type(&msg->target) !=
2187                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2188                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2189                return;
2190        }
2191
2192        in6_dev = __in6_dev_get(skb->dev);
2193        if (!in6_dev)
2194                return;
2195        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2196                return;
2197
2198        /* RFC2461 8.1:
2199         *      The IP source address of the Redirect MUST be the same as the current
2200         *      first-hop router for the specified ICMP Destination Address.
2201         */
2202
2203        if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2204                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2205                return;
2206        }
2207
2208        lladdr = NULL;
2209        if (ndopts.nd_opts_tgt_lladdr) {
2210                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2211                                             skb->dev);
2212                if (!lladdr) {
2213                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2214                        return;
2215                }
2216        }
2217
2218        rt = (struct rt6_info *) dst;
2219        if (rt->rt6i_flags & RTF_REJECT) {
2220                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2221                return;
2222        }
2223
2224        /* Redirect received -> path was valid.
2225         * Look, redirects are sent only in response to data packets,
2226         * so that this nexthop apparently is reachable. --ANK
2227         */
2228        dst_confirm(&rt->dst);
2229
2230        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2231        if (!neigh)
2232                return;
2233
2234        /*
2235         *      We have finally decided to accept it.
2236         */
2237
2238        neigh_update(neigh, lladdr, NUD_STALE,
2239                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2240                     NEIGH_UPDATE_F_OVERRIDE|
2241                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2242                                     NEIGH_UPDATE_F_ISROUTER))
2243                     );
2244
2245        nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2246        if (!nrt)
2247                goto out;
2248
2249        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2250        if (on_link)
2251                nrt->rt6i_flags &= ~RTF_GATEWAY;
2252
2253        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2254
2255        if (ip6_ins_rt(nrt))
2256                goto out;
2257
2258        netevent.old = &rt->dst;
2259        netevent.new = &nrt->dst;
2260        netevent.daddr = &msg->dest;
2261        netevent.neigh = neigh;
2262        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2263
2264        if (rt->rt6i_flags & RTF_CACHE) {
2265                rt = (struct rt6_info *) dst_clone(&rt->dst);
2266                ip6_del_rt(rt);
2267        }
2268
2269out:
2270        neigh_release(neigh);
2271}
2272
2273/*
2274 *      Misc support functions
2275 */
2276
2277static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2278{
2279        BUG_ON(from->dst.from);
2280
2281        rt->rt6i_flags &= ~RTF_EXPIRES;
2282        dst_hold(&from->dst);
2283        rt->dst.from = &from->dst;
2284        dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2285}
2286
2287static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2288{
2289        rt->dst.input = ort->dst.input;
2290        rt->dst.output = ort->dst.output;
2291        rt->rt6i_dst = ort->rt6i_dst;
2292        rt->dst.error = ort->dst.error;
2293        rt->rt6i_idev = ort->rt6i_idev;
2294        if (rt->rt6i_idev)
2295                in6_dev_hold(rt->rt6i_idev);
2296        rt->dst.lastuse = jiffies;
2297        rt->rt6i_gateway = ort->rt6i_gateway;
2298        rt->rt6i_flags = ort->rt6i_flags;
2299        rt6_set_from(rt, ort);
2300        rt->rt6i_metric = ort->rt6i_metric;
2301#ifdef CONFIG_IPV6_SUBTREES
2302        rt->rt6i_src = ort->rt6i_src;
2303#endif
2304        rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2305        rt->rt6i_table = ort->rt6i_table;
2306        rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2307}
2308
2309#ifdef CONFIG_IPV6_ROUTE_INFO
2310static struct rt6_info *rt6_get_route_info(struct net *net,
2311                                           const struct in6_addr *prefix, int prefixlen,
2312                                           const struct in6_addr *gwaddr, int ifindex)
2313{
2314        struct fib6_node *fn;
2315        struct rt6_info *rt = NULL;
2316        struct fib6_table *table;
2317
2318        table = fib6_get_table(net, RT6_TABLE_INFO);
2319        if (!table)
2320                return NULL;
2321
2322        read_lock_bh(&table->tb6_lock);
2323        fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2324        if (!fn)
2325                goto out;
2326
2327        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2328                if (rt->dst.dev->ifindex != ifindex)
2329                        continue;
2330                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2331                        continue;
2332                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2333                        continue;
2334                dst_hold(&rt->dst);
2335                break;
2336        }
2337out:
2338        read_unlock_bh(&table->tb6_lock);
2339        return rt;
2340}
2341
2342static struct rt6_info *rt6_add_route_info(struct net *net,
2343                                           const struct in6_addr *prefix, int prefixlen,
2344                                           const struct in6_addr *gwaddr, int ifindex,
2345                                           unsigned int pref)
2346{
2347        struct fib6_config cfg = {
2348                .fc_metric      = IP6_RT_PRIO_USER,
2349                .fc_ifindex     = ifindex,
2350                .fc_dst_len     = prefixlen,
2351                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2352                                  RTF_UP | RTF_PREF(pref),
2353                .fc_nlinfo.portid = 0,
2354                .fc_nlinfo.nlh = NULL,
2355                .fc_nlinfo.nl_net = net,
2356        };
2357
2358        cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2359        cfg.fc_dst = *prefix;
2360        cfg.fc_gateway = *gwaddr;
2361
2362        /* We should treat it as a default route if prefix length is 0. */
2363        if (!prefixlen)
2364                cfg.fc_flags |= RTF_DEFAULT;
2365
2366        ip6_route_add(&cfg);
2367
2368        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2369}
2370#endif
2371
2372struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2373{
2374        struct rt6_info *rt;
2375        struct fib6_table *table;
2376
2377        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2378        if (!table)
2379                return NULL;
2380
2381        read_lock_bh(&table->tb6_lock);
2382        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2383                if (dev == rt->dst.dev &&
2384                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2385                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2386                        break;
2387        }
2388        if (rt)
2389                dst_hold(&rt->dst);
2390        read_unlock_bh(&table->tb6_lock);
2391        return rt;
2392}
2393
2394struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2395                                     struct net_device *dev,
2396                                     unsigned int pref)
2397{
2398        struct fib6_config cfg = {
2399                .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2400                .fc_metric      = IP6_RT_PRIO_USER,
2401                .fc_ifindex     = dev->ifindex,
2402                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2403                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2404                .fc_nlinfo.portid = 0,
2405                .fc_nlinfo.nlh = NULL,
2406                .fc_nlinfo.nl_net = dev_net(dev),
2407        };
2408
2409        cfg.fc_gateway = *gwaddr;
2410
2411        ip6_route_add(&cfg);
2412
2413        return rt6_get_dflt_router(gwaddr, dev);
2414}
2415
2416void rt6_purge_dflt_routers(struct net *net)
2417{
2418        struct rt6_info *rt;
2419        struct fib6_table *table;
2420
2421        /* NOTE: Keep consistent with rt6_get_dflt_router */
2422        table = fib6_get_table(net, RT6_TABLE_DFLT);
2423        if (!table)
2424                return;
2425
2426restart:
2427        read_lock_bh(&table->tb6_lock);
2428        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2429                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2430                    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2431                        dst_hold(&rt->dst);
2432                        read_unlock_bh(&table->tb6_lock);
2433                        ip6_del_rt(rt);
2434                        goto restart;
2435                }
2436        }
2437        read_unlock_bh(&table->tb6_lock);
2438}
2439
2440static void rtmsg_to_fib6_config(struct net *net,
2441                                 struct in6_rtmsg *rtmsg,
2442                                 struct fib6_config *cfg)
2443{
2444        memset(cfg, 0, sizeof(*cfg));
2445
2446        cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2447                         : RT6_TABLE_MAIN;
2448        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2449        cfg->fc_metric = rtmsg->rtmsg_metric;
2450        cfg->fc_expires = rtmsg->rtmsg_info;
2451        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2452        cfg->fc_src_len = rtmsg->rtmsg_src_len;
2453        cfg->fc_flags = rtmsg->rtmsg_flags;
2454
2455        cfg->fc_nlinfo.nl_net = net;
2456
2457        cfg->fc_dst = rtmsg->rtmsg_dst;
2458        cfg->fc_src = rtmsg->rtmsg_src;
2459        cfg->fc_gateway = rtmsg->rtmsg_gateway;
2460}
2461
2462int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2463{
2464        struct fib6_config cfg;
2465        struct in6_rtmsg rtmsg;
2466        int err;
2467
2468        switch (cmd) {
2469        case SIOCADDRT:         /* Add a route */
2470        case SIOCDELRT:         /* Delete a route */
2471                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2472                        return -EPERM;
2473                err = copy_from_user(&rtmsg, arg,
2474                                     sizeof(struct in6_rtmsg));
2475                if (err)
2476                        return -EFAULT;
2477
2478                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2479
2480                rtnl_lock();
2481                switch (cmd) {
2482                case SIOCADDRT:
2483                        err = ip6_route_add(&cfg);
2484                        break;
2485                case SIOCDELRT:
2486                        err = ip6_route_del(&cfg);
2487                        break;
2488                default:
2489                        err = -EINVAL;
2490                }
2491                rtnl_unlock();
2492
2493                return err;
2494        }
2495
2496        return -EINVAL;
2497}
2498
2499/*
2500 *      Drop the packet on the floor
2501 */
2502
2503static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2504{
2505        int type;
2506        struct dst_entry *dst = skb_dst(skb);
2507        switch (ipstats_mib_noroutes) {
2508        case IPSTATS_MIB_INNOROUTES:
2509                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2510                if (type == IPV6_ADDR_ANY) {
2511                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2512                                      IPSTATS_MIB_INADDRERRORS);
2513                        break;
2514                }
2515                /* FALLTHROUGH */
2516        case IPSTATS_MIB_OUTNOROUTES:
2517                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2518                              ipstats_mib_noroutes);
2519                break;
2520        }
2521        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2522        kfree_skb(skb);
2523        return 0;
2524}
2525
2526static int ip6_pkt_discard(struct sk_buff *skb)
2527{
2528        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2529}
2530
2531static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2532{
2533        skb->dev = skb_dst(skb)->dev;
2534        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2535}
2536
2537static int ip6_pkt_prohibit(struct sk_buff *skb)
2538{
2539        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2540}
2541
2542static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2543{
2544        skb->dev = skb_dst(skb)->dev;
2545        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2546}
2547
2548/*
2549 *      Allocate a dst for local (unicast / anycast) address.
2550 */
2551
2552struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2553                                    const struct in6_addr *addr,
2554                                    bool anycast)
2555{
2556        u32 tb_id;
2557        struct net *net = dev_net(idev->dev);
2558        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2559                                            DST_NOCOUNT);
2560        if (!rt)
2561                return ERR_PTR(-ENOMEM);
2562
2563        in6_dev_hold(idev);
2564
2565        rt->dst.flags |= DST_HOST;
2566        rt->dst.input = ip6_input;
2567        rt->dst.output = ip6_output;
2568        rt->rt6i_idev = idev;
2569
2570        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2571        if (anycast)
2572                rt->rt6i_flags |= RTF_ANYCAST;
2573        else
2574                rt->rt6i_flags |= RTF_LOCAL;
2575
2576        rt->rt6i_gateway  = *addr;
2577        rt->rt6i_dst.addr = *addr;
2578        rt->rt6i_dst.plen = 128;
2579        tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2580        rt->rt6i_table = fib6_get_table(net, tb_id);
2581        rt->dst.flags |= DST_NOCACHE;
2582
2583        atomic_set(&rt->dst.__refcnt, 1);
2584
2585        return rt;
2586}
2587
2588int ip6_route_get_saddr(struct net *net,
2589                        struct rt6_info *rt,
2590                        const struct in6_addr *daddr,
2591                        unsigned int prefs,
2592                        struct in6_addr *saddr)
2593{
2594        struct inet6_dev *idev =
2595                rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2596        int err = 0;
2597        if (rt && rt->rt6i_prefsrc.plen)
2598                *saddr = rt->rt6i_prefsrc.addr;
2599        else
2600                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2601                                         daddr, prefs, saddr);
2602        return err;
2603}
2604
2605/* remove deleted ip from prefsrc entries */
2606struct arg_dev_net_ip {
2607        struct net_device *dev;
2608        struct net *net;
2609        struct in6_addr *addr;
2610};
2611
2612static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2613{
2614        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2615        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2616        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2617
2618        if (((void *)rt->dst.dev == dev || !dev) &&
2619            rt != net->ipv6.ip6_null_entry &&
2620            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2621                /* remove prefsrc entry */
2622                rt->rt6i_prefsrc.plen = 0;
2623        }
2624        return 0;
2625}
2626
2627void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2628{
2629        struct net *net = dev_net(ifp->idev->dev);
2630        struct arg_dev_net_ip adni = {
2631                .dev = ifp->idev->dev,
2632                .net = net,
2633                .addr = &ifp->addr,
2634        };
2635        fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2636}
2637
2638#define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2639#define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2640
2641/* Remove routers and update dst entries when gateway turn into host. */
2642static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2643{
2644        struct in6_addr *gateway = (struct in6_addr *)arg;
2645
2646        if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2647             ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2648             ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2649                return -1;
2650        }
2651        return 0;
2652}
2653
2654void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2655{
2656        fib6_clean_all(net, fib6_clean_tohost, gateway);
2657}
2658
2659struct arg_dev_net {
2660        struct net_device *dev;
2661        struct net *net;
2662};
2663
2664static int fib6_ifdown(struct rt6_info *rt, void *arg)
2665{
2666        const struct arg_dev_net *adn = arg;
2667        const struct net_device *dev = adn->dev;
2668
2669        if ((rt->dst.dev == dev || !dev) &&
2670            rt != adn->net->ipv6.ip6_null_entry)
2671                return -1;
2672
2673        return 0;
2674}
2675
2676void rt6_ifdown(struct net *net, struct net_device *dev)
2677{
2678        struct arg_dev_net adn = {
2679                .dev = dev,
2680                .net = net,
2681        };
2682
2683        fib6_clean_all(net, fib6_ifdown, &adn);
2684        icmp6_clean_all(fib6_ifdown, &adn);
2685        if (dev)
2686                rt6_uncached_list_flush_dev(net, dev);
2687}
2688
2689struct rt6_mtu_change_arg {
2690        struct net_device *dev;
2691        unsigned int mtu;
2692};
2693
2694static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2695{
2696        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2697        struct inet6_dev *idev;
2698
2699        /* In IPv6 pmtu discovery is not optional,
2700           so that RTAX_MTU lock cannot disable it.
2701           We still use this lock to block changes
2702           caused by addrconf/ndisc.
2703        */
2704
2705        idev = __in6_dev_get(arg->dev);
2706        if (!idev)
2707                return 0;
2708
2709        /* For administrative MTU increase, there is no way to discover
2710           IPv6 PMTU increase, so PMTU increase should be updated here.
2711           Since RFC 1981 doesn't include administrative MTU increase
2712           update PMTU increase is a MUST. (i.e. jumbo frame)
2713         */
2714        /*
2715           If new MTU is less than route PMTU, this new MTU will be the
2716           lowest MTU in the path, update the route PMTU to reflect PMTU
2717           decreases; if new MTU is greater than route PMTU, and the
2718           old MTU is the lowest MTU in the path, update the route PMTU
2719           to reflect the increase. In this case if the other nodes' MTU
2720           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2721           PMTU discouvery.
2722         */
2723        if (rt->dst.dev == arg->dev &&
2724            !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2725                if (rt->rt6i_flags & RTF_CACHE) {
2726                        /* For RTF_CACHE with rt6i_pmtu == 0
2727                         * (i.e. a redirected route),
2728                         * the metrics of its rt->dst.from has already
2729                         * been updated.
2730                         */
2731                        if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2732                                rt->rt6i_pmtu = arg->mtu;
2733                } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2734                           (dst_mtu(&rt->dst) < arg->mtu &&
2735                            dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2736                        dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2737                }
2738        }
2739        return 0;
2740}
2741
2742void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2743{
2744        struct rt6_mtu_change_arg arg = {
2745                .dev = dev,
2746                .mtu = mtu,
2747        };
2748
2749        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2750}
2751
2752static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2753        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2754        [RTA_OIF]               = { .type = NLA_U32 },
2755        [RTA_IIF]               = { .type = NLA_U32 },
2756        [RTA_PRIORITY]          = { .type = NLA_U32 },
2757        [RTA_METRICS]           = { .type = NLA_NESTED },
2758        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2759        [RTA_PREF]              = { .type = NLA_U8 },
2760        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2761        [RTA_ENCAP]             = { .type = NLA_NESTED },
2762        [RTA_EXPIRES]           = { .type = NLA_U32 },
2763};
2764
2765static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2766                              struct fib6_config *cfg)
2767{
2768        struct rtmsg *rtm;
2769        struct nlattr *tb[RTA_MAX+1];
2770        unsigned int pref;
2771        int err;
2772
2773        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2774        if (err < 0)
2775                goto errout;
2776
2777        err = -EINVAL;
2778        rtm = nlmsg_data(nlh);
2779        memset(cfg, 0, sizeof(*cfg));
2780
2781        cfg->fc_table = rtm->rtm_table;
2782        cfg->fc_dst_len = rtm->rtm_dst_len;
2783        cfg->fc_src_len = rtm->rtm_src_len;
2784        cfg->fc_flags = RTF_UP;
2785        cfg->fc_protocol = rtm->rtm_protocol;
2786        cfg->fc_type = rtm->rtm_type;
2787
2788        if (rtm->rtm_type == RTN_UNREACHABLE ||
2789            rtm->rtm_type == RTN_BLACKHOLE ||
2790            rtm->rtm_type == RTN_PROHIBIT ||
2791            rtm->rtm_type == RTN_THROW)
2792                cfg->fc_flags |= RTF_REJECT;
2793
2794        if (rtm->rtm_type == RTN_LOCAL)
2795                cfg->fc_flags |= RTF_LOCAL;
2796
2797        if (rtm->rtm_flags & RTM_F_CLONED)
2798                cfg->fc_flags |= RTF_CACHE;
2799
2800        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2801        cfg->fc_nlinfo.nlh = nlh;
2802        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2803
2804        if (tb[RTA_GATEWAY]) {
2805                cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2806                cfg->fc_flags |= RTF_GATEWAY;
2807        }
2808
2809        if (tb[RTA_DST]) {
2810                int plen = (rtm->rtm_dst_len + 7) >> 3;
2811
2812                if (nla_len(tb[RTA_DST]) < plen)
2813                        goto errout;
2814
2815                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2816        }
2817
2818        if (tb[RTA_SRC]) {
2819                int plen = (rtm->rtm_src_len + 7) >> 3;
2820
2821                if (nla_len(tb[RTA_SRC]) < plen)
2822                        goto errout;
2823
2824                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2825        }
2826
2827        if (tb[RTA_PREFSRC])
2828                cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2829
2830        if (tb[RTA_OIF])
2831                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2832
2833        if (tb[RTA_PRIORITY])
2834                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2835
2836        if (tb[RTA_METRICS]) {
2837                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2838                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2839        }
2840
2841        if (tb[RTA_TABLE])
2842                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2843
2844        if (tb[RTA_MULTIPATH]) {
2845                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2846                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2847        }
2848
2849        if (tb[RTA_PREF]) {
2850                pref = nla_get_u8(tb[RTA_PREF]);
2851                if (pref != ICMPV6_ROUTER_PREF_LOW &&
2852                    pref != ICMPV6_ROUTER_PREF_HIGH)
2853                        pref = ICMPV6_ROUTER_PREF_MEDIUM;
2854                cfg->fc_flags |= RTF_PREF(pref);
2855        }
2856
2857        if (tb[RTA_ENCAP])
2858                cfg->fc_encap = tb[RTA_ENCAP];
2859
2860        if (tb[RTA_ENCAP_TYPE])
2861                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2862
2863        if (tb[RTA_EXPIRES]) {
2864                unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2865
2866                if (addrconf_finite_timeout(timeout)) {
2867                        cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2868                        cfg->fc_flags |= RTF_EXPIRES;
2869                }
2870        }
2871
2872        err = 0;
2873errout:
2874        return err;
2875}
2876
2877struct rt6_nh {
2878        struct rt6_info *rt6_info;
2879        struct fib6_config r_cfg;
2880        struct mx6_config mxc;
2881        struct list_head next;
2882};
2883
2884static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2885{
2886        struct rt6_nh *nh;
2887
2888        list_for_each_entry(nh, rt6_nh_list, next) {
2889                pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2890                        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2891                        nh->r_cfg.fc_ifindex);
2892        }
2893}
2894
2895static int ip6_route_info_append(struct list_head *rt6_nh_list,
2896                                 struct rt6_info *rt, struct fib6_config *r_cfg)
2897{
2898        struct rt6_nh *nh;
2899        struct rt6_info *rtnh;
2900        int err = -EEXIST;
2901
2902        list_for_each_entry(nh, rt6_nh_list, next) {
2903                /* check if rt6_info already exists */
2904                rtnh = nh->rt6_info;
2905
2906                if (rtnh->dst.dev == rt->dst.dev &&
2907                    rtnh->rt6i_idev == rt->rt6i_idev &&
2908                    ipv6_addr_equal(&rtnh->rt6i_gateway,
2909                                    &rt->rt6i_gateway))
2910                        return err;
2911        }
2912
2913        nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2914        if (!nh)
2915                return -ENOMEM;
2916        nh->rt6_info = rt;
2917        err = ip6_convert_metrics(&nh->mxc, r_cfg);
2918        if (err) {
2919                kfree(nh);
2920                return err;
2921        }
2922        memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2923        list_add_tail(&nh->next, rt6_nh_list);
2924
2925        return 0;
2926}
2927
2928static int ip6_route_multipath_add(struct fib6_config *cfg)
2929{
2930        struct fib6_config r_cfg;
2931        struct rtnexthop *rtnh;
2932        struct rt6_info *rt;
2933        struct rt6_nh *err_nh;
2934        struct rt6_nh *nh, *nh_safe;
2935        int remaining;
2936        int attrlen;
2937        int err = 1;
2938        int nhn = 0;
2939        int replace = (cfg->fc_nlinfo.nlh &&
2940                       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2941        LIST_HEAD(rt6_nh_list);
2942
2943        remaining = cfg->fc_mp_len;
2944        rtnh = (struct rtnexthop *)cfg->fc_mp;
2945
2946        /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2947         * rt6_info structs per nexthop
2948         */
2949        while (rtnh_ok(rtnh, remaining)) {
2950                memcpy(&r_cfg, cfg, sizeof(*cfg));
2951                if (rtnh->rtnh_ifindex)
2952                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2953
2954                attrlen = rtnh_attrlen(rtnh);
2955                if (attrlen > 0) {
2956                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2957
2958                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2959                        if (nla) {
2960                                r_cfg.fc_gateway = nla_get_in6_addr(nla);
2961                                r_cfg.fc_flags |= RTF_GATEWAY;
2962                        }
2963                        r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2964                        nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2965                        if (nla)
2966                                r_cfg.fc_encap_type = nla_get_u16(nla);
2967                }
2968
2969                rt = ip6_route_info_create(&r_cfg);
2970                if (IS_ERR(rt)) {
2971                        err = PTR_ERR(rt);
2972                        rt = NULL;
2973                        goto cleanup;
2974                }
2975
2976                err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2977                if (err) {
2978                        dst_free(&rt->dst);
2979                        goto cleanup;
2980                }
2981
2982                rtnh = rtnh_next(rtnh, &remaining);
2983        }
2984
2985        err_nh = NULL;
2986        list_for_each_entry(nh, &rt6_nh_list, next) {
2987                err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2988                /* nh->rt6_info is used or freed at this point, reset to NULL*/
2989                nh->rt6_info = NULL;
2990                if (err) {
2991                        if (replace && nhn)
2992                                ip6_print_replace_route_err(&rt6_nh_list);
2993                        err_nh = nh;
2994                        goto add_errout;
2995                }
2996
2997                /* Because each route is added like a single route we remove
2998                 * these flags after the first nexthop: if there is a collision,
2999                 * we have already failed to add the first nexthop:
3000                 * fib6_add_rt2node() has rejected it; when replacing, old
3001                 * nexthops have been replaced by first new, the rest should
3002                 * be added to it.
3003                 */
3004                cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3005                                                     NLM_F_REPLACE);
3006                nhn++;
3007        }
3008
3009        goto cleanup;
3010
3011add_errout:
3012        /* Delete routes that were already added */
3013        list_for_each_entry(nh, &rt6_nh_list, next) {
3014                if (err_nh == nh)
3015                        break;
3016                ip6_route_del(&nh->r_cfg);
3017        }
3018
3019cleanup:
3020        list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3021                if (nh->rt6_info)
3022                        dst_free(&nh->rt6_info->dst);
3023                kfree(nh->mxc.mx);
3024                list_del(&nh->next);
3025                kfree(nh);
3026        }
3027
3028        return err;
3029}
3030
3031static int ip6_route_multipath_del(struct fib6_config *cfg)
3032{
3033        struct fib6_config r_cfg;
3034        struct rtnexthop *rtnh;
3035        int remaining;
3036        int attrlen;
3037        int err = 1, last_err = 0;
3038
3039        remaining = cfg->fc_mp_len;
3040        rtnh = (struct rtnexthop *)cfg->fc_mp;
3041
3042        /* Parse a Multipath Entry */
3043        while (rtnh_ok(rtnh, remaining)) {
3044                memcpy(&r_cfg, cfg, sizeof(*cfg));
3045                if (rtnh->rtnh_ifindex)
3046                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3047
3048                attrlen = rtnh_attrlen(rtnh);
3049                if (attrlen > 0) {
3050                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3051
3052                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3053                        if (nla) {
3054                                nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3055                                r_cfg.fc_flags |= RTF_GATEWAY;
3056                        }
3057                }
3058                err = ip6_route_del(&r_cfg);
3059                if (err)
3060                        last_err = err;
3061
3062                rtnh = rtnh_next(rtnh, &remaining);
3063        }
3064
3065        return last_err;
3066}
3067
3068static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3069{
3070        struct fib6_config cfg;
3071        int err;
3072
3073        err = rtm_to_fib6_config(skb, nlh, &cfg);
3074        if (err < 0)
3075                return err;
3076
3077        if (cfg.fc_mp)
3078                return ip6_route_multipath_del(&cfg);
3079        else
3080                return ip6_route_del(&cfg);
3081}
3082
3083static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3084{
3085        struct fib6_config cfg;
3086        int err;
3087
3088        err = rtm_to_fib6_config(skb, nlh, &cfg);
3089        if (err < 0)
3090                return err;
3091
3092        if (cfg.fc_mp)
3093                return ip6_route_multipath_add(&cfg);
3094        else
3095                return ip6_route_add(&cfg);
3096}
3097
3098static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3099{
3100        return NLMSG_ALIGN(sizeof(struct rtmsg))
3101               + nla_total_size(16) /* RTA_SRC */
3102               + nla_total_size(16) /* RTA_DST */
3103               + nla_total_size(16) /* RTA_GATEWAY */
3104               + nla_total_size(16) /* RTA_PREFSRC */
3105               + nla_total_size(4) /* RTA_TABLE */
3106               + nla_total_size(4) /* RTA_IIF */
3107               + nla_total_size(4) /* RTA_OIF */
3108               + nla_total_size(4) /* RTA_PRIORITY */
3109               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3110               + nla_total_size(sizeof(struct rta_cacheinfo))
3111               + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3112               + nla_total_size(1) /* RTA_PREF */
3113               + lwtunnel_get_encap_size(rt->dst.lwtstate);
3114}
3115
3116static int rt6_fill_node(struct net *net,
3117                         struct sk_buff *skb, struct rt6_info *rt,
3118                         struct in6_addr *dst, struct in6_addr *src,
3119                         int iif, int type, u32 portid, u32 seq,
3120                         int prefix, int nowait, unsigned int flags)
3121{
3122        u32 metrics[RTAX_MAX];
3123        struct rtmsg *rtm;
3124        struct nlmsghdr *nlh;
3125        long expires;
3126        u32 table;
3127
3128        if (prefix) {   /* user wants prefix routes only */
3129                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3130                        /* success since this is not a prefix route */
3131                        return 1;
3132                }
3133        }
3134
3135        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3136        if (!nlh)
3137                return -EMSGSIZE;
3138
3139        rtm = nlmsg_data(nlh);
3140        rtm->rtm_family = AF_INET6;
3141        rtm->rtm_dst_len = rt->rt6i_dst.plen;
3142        rtm->rtm_src_len = rt->rt6i_src.plen;
3143        rtm->rtm_tos = 0;
3144        if (rt->rt6i_table)
3145                table = rt->rt6i_table->tb6_id;
3146        else
3147                table = RT6_TABLE_UNSPEC;
3148        rtm->rtm_table = table;
3149        if (nla_put_u32(skb, RTA_TABLE, table))
3150                goto nla_put_failure;
3151        if (rt->rt6i_flags & RTF_REJECT) {
3152                switch (rt->dst.error) {
3153                case -EINVAL:
3154                        rtm->rtm_type = RTN_BLACKHOLE;
3155                        break;
3156                case -EACCES:
3157                        rtm->rtm_type = RTN_PROHIBIT;
3158                        break;
3159                case -EAGAIN:
3160                        rtm->rtm_type = RTN_THROW;
3161                        break;
3162                default:
3163                        rtm->rtm_type = RTN_UNREACHABLE;
3164                        break;
3165                }
3166        }
3167        else if (rt->rt6i_flags & RTF_LOCAL)
3168                rtm->rtm_type = RTN_LOCAL;
3169        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3170                rtm->rtm_type = RTN_LOCAL;
3171        else
3172                rtm->rtm_type = RTN_UNICAST;
3173        rtm->rtm_flags = 0;
3174        if (!netif_carrier_ok(rt->dst.dev)) {
3175                rtm->rtm_flags |= RTNH_F_LINKDOWN;
3176                if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3177                        rtm->rtm_flags |= RTNH_F_DEAD;
3178        }
3179        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3180        rtm->rtm_protocol = rt->rt6i_protocol;
3181        if (rt->rt6i_flags & RTF_DYNAMIC)
3182                rtm->rtm_protocol = RTPROT_REDIRECT;
3183        else if (rt->rt6i_flags & RTF_ADDRCONF) {
3184                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3185                        rtm->rtm_protocol = RTPROT_RA;
3186                else
3187                        rtm->rtm_protocol = RTPROT_KERNEL;
3188        }
3189
3190        if (rt->rt6i_flags & RTF_CACHE)
3191                rtm->rtm_flags |= RTM_F_CLONED;
3192
3193        if (dst) {
3194                if (nla_put_in6_addr(skb, RTA_DST, dst))
3195                        goto nla_put_failure;
3196                rtm->rtm_dst_len = 128;
3197        } else if (rtm->rtm_dst_len)
3198                if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3199                        goto nla_put_failure;
3200#ifdef CONFIG_IPV6_SUBTREES
3201        if (src) {
3202                if (nla_put_in6_addr(skb, RTA_SRC, src))
3203                        goto nla_put_failure;
3204                rtm->rtm_src_len = 128;
3205        } else if (rtm->rtm_src_len &&
3206                   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3207                goto nla_put_failure;
3208#endif
3209        if (iif) {
3210#ifdef CONFIG_IPV6_MROUTE
3211                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3212                        int err = ip6mr_get_route(net, skb, rtm, nowait);
3213                        if (err <= 0) {
3214                                if (!nowait) {
3215                                        if (err == 0)
3216                                                return 0;
3217                                        goto nla_put_failure;
3218                                } else {
3219                                        if (err == -EMSGSIZE)
3220                                                goto nla_put_failure;
3221                                }
3222                        }
3223                } else
3224#endif
3225                        if (nla_put_u32(skb, RTA_IIF, iif))
3226                                goto nla_put_failure;
3227        } else if (dst) {
3228                struct in6_addr saddr_buf;
3229                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3230                    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3231                        goto nla_put_failure;
3232        }
3233
3234        if (rt->rt6i_prefsrc.plen) {
3235                struct in6_addr saddr_buf;
3236                saddr_buf = rt->rt6i_prefsrc.addr;
3237                if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3238                        goto nla_put_failure;
3239        }
3240
3241        memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3242        if (rt->rt6i_pmtu)
3243                metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3244        if (rtnetlink_put_metrics(skb, metrics) < 0)
3245                goto nla_put_failure;
3246
3247        if (rt->rt6i_flags & RTF_GATEWAY) {
3248                if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3249                        goto nla_put_failure;
3250        }
3251
3252        if (rt->dst.dev &&
3253            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3254                goto nla_put_failure;
3255        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3256                goto nla_put_failure;
3257
3258        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3259
3260        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3261                goto nla_put_failure;
3262
3263        if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3264                goto nla_put_failure;
3265
3266        lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3267
3268        nlmsg_end(skb, nlh);
3269        return 0;
3270
3271nla_put_failure:
3272        nlmsg_cancel(skb, nlh);
3273        return -EMSGSIZE;
3274}
3275
3276int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3277{
3278        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3279        int prefix;
3280
3281        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3282                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3283                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3284        } else
3285                prefix = 0;
3286
3287        return rt6_fill_node(arg->net,
3288                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3289                     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3290                     prefix, 0, NLM_F_MULTI);
3291}
3292
3293static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3294{
3295        struct net *net = sock_net(in_skb->sk);
3296        struct nlattr *tb[RTA_MAX+1];
3297        struct rt6_info *rt;
3298        struct sk_buff *skb;
3299        struct rtmsg *rtm;
3300        struct flowi6 fl6;
3301        int err, iif = 0, oif = 0;
3302
3303        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3304        if (err < 0)
3305                goto errout;
3306
3307        err = -EINVAL;
3308        memset(&fl6, 0, sizeof(fl6));
3309
3310        if (tb[RTA_SRC]) {
3311                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3312                        goto errout;
3313
3314                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3315        }
3316
3317        if (tb[RTA_DST]) {
3318                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3319                        goto errout;
3320
3321                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3322        }
3323
3324        if (tb[RTA_IIF])
3325                iif = nla_get_u32(tb[RTA_IIF]);
3326
3327        if (tb[RTA_OIF])
3328                oif = nla_get_u32(tb[RTA_OIF]);
3329
3330        if (tb[RTA_MARK])
3331                fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3332
3333        if (iif) {
3334                struct net_device *dev;
3335                int flags = 0;
3336
3337                dev = __dev_get_by_index(net, iif);
3338                if (!dev) {
3339                        err = -ENODEV;
3340                        goto errout;
3341                }
3342
3343                fl6.flowi6_iif = iif;
3344
3345                if (!ipv6_addr_any(&fl6.saddr))
3346                        flags |= RT6_LOOKUP_F_HAS_SADDR;
3347
3348                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3349                                                               flags);
3350        } else {
3351                fl6.flowi6_oif = oif;
3352
3353                if (netif_index_is_l3_master(net, oif)) {
3354                        fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3355                                           FLOWI_FLAG_SKIP_NH_OIF;
3356                }
3357
3358                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3359        }
3360
3361        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3362        if (!skb) {
3363                ip6_rt_put(rt);
3364                err = -ENOBUFS;
3365                goto errout;
3366        }
3367
3368        /* Reserve room for dummy headers, this skb can pass
3369           through good chunk of routing engine.
3370         */
3371        skb_reset_mac_header(skb);
3372        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3373
3374        skb_dst_set(skb, &rt->dst);
3375
3376        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3377                            RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3378                            nlh->nlmsg_seq, 0, 0, 0);
3379        if (err < 0) {
3380                kfree_skb(skb);
3381                goto errout;
3382        }
3383
3384        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3385errout:
3386        return err;
3387}
3388
3389void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3390                     unsigned int nlm_flags)
3391{
3392        struct sk_buff *skb;
3393        struct net *net = info->nl_net;
3394        u32 seq;
3395        int err;
3396
3397        err = -ENOBUFS;
3398        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3399
3400        skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3401        if (!skb)
3402                goto errout;
3403
3404        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3405                                event, info->portid, seq, 0, 0, nlm_flags);
3406        if (err < 0) {
3407                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3408                WARN_ON(err == -EMSGSIZE);
3409                kfree_skb(skb);
3410                goto errout;
3411        }
3412        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3413                    info->nlh, gfp_any());
3414        return;
3415errout:
3416        if (err < 0)
3417                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3418}
3419
3420static int ip6_route_dev_notify(struct notifier_block *this,
3421                                unsigned long event, void *ptr)
3422{
3423        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3424        struct net *net = dev_net(dev);
3425
3426        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3427                net->ipv6.ip6_null_entry->dst.dev = dev;
3428                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3429#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3430                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3431                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3432                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3433                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3434#endif
3435        }
3436
3437        return NOTIFY_OK;
3438}
3439
3440/*
3441 *      /proc
3442 */
3443
3444#ifdef CONFIG_PROC_FS
3445
3446static const struct file_operations ipv6_route_proc_fops = {
3447        .owner          = THIS_MODULE,
3448        .open           = ipv6_route_open,
3449        .read           = seq_read,
3450        .llseek         = seq_lseek,
3451        .release        = seq_release_net,
3452};
3453
3454static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3455{
3456        struct net *net = (struct net *)seq->private;
3457        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3458                   net->ipv6.rt6_stats->fib_nodes,
3459                   net->ipv6.rt6_stats->fib_route_nodes,
3460                   net->ipv6.rt6_stats->fib_rt_alloc,
3461                   net->ipv6.rt6_stats->fib_rt_entries,
3462                   net->ipv6.rt6_stats->fib_rt_cache,
3463                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3464                   net->ipv6.rt6_stats->fib_discarded_routes);
3465
3466        return 0;
3467}
3468
3469static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3470{
3471        return single_open_net(inode, file, rt6_stats_seq_show);
3472}
3473
3474static const struct file_operations rt6_stats_seq_fops = {
3475        .owner   = THIS_MODULE,
3476        .open    = rt6_stats_seq_open,
3477        .read    = seq_read,
3478        .llseek  = seq_lseek,
3479        .release = single_release_net,
3480};
3481#endif  /* CONFIG_PROC_FS */
3482
3483#ifdef CONFIG_SYSCTL
3484
3485static
3486int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3487                              void __user *buffer, size_t *lenp, loff_t *ppos)
3488{
3489        struct net *net;
3490        int delay;
3491        if (!write)
3492                return -EINVAL;
3493
3494        net = (struct net *)ctl->extra1;
3495        delay = net->ipv6.sysctl.flush_delay;
3496        proc_dointvec(ctl, write, buffer, lenp, ppos);
3497        fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3498        return 0;
3499}
3500
3501struct ctl_table ipv6_route_table_template[] = {
3502        {
3503                .procname       =       "flush",
3504                .data           =       &init_net.ipv6.sysctl.flush_delay,
3505                .maxlen         =       sizeof(int),
3506                .mode           =       0200,
3507                .proc_handler   =       ipv6_sysctl_rtcache_flush
3508        },
3509        {
3510                .procname       =       "gc_thresh",
3511                .data           =       &ip6_dst_ops_template.gc_thresh,
3512                .maxlen         =       sizeof(int),
3513                .mode           =       0644,
3514                .proc_handler   =       proc_dointvec,
3515        },
3516        {
3517                .procname       =       "max_size",
3518                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3519                .maxlen         =       sizeof(int),
3520                .mode           =       0644,
3521                .proc_handler   =       proc_dointvec,
3522        },
3523        {
3524                .procname       =       "gc_min_interval",
3525                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3526                .maxlen         =       sizeof(int),
3527                .mode           =       0644,
3528                .proc_handler   =       proc_dointvec_jiffies,
3529        },
3530        {
3531                .procname       =       "gc_timeout",
3532                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3533                .maxlen         =       sizeof(int),
3534                .mode           =       0644,
3535                .proc_handler   =       proc_dointvec_jiffies,
3536        },
3537        {
3538                .procname       =       "gc_interval",
3539                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3540                .maxlen         =       sizeof(int),
3541                .mode           =       0644,
3542                .proc_handler   =       proc_dointvec_jiffies,
3543        },
3544        {
3545                .procname       =       "gc_elasticity",
3546                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3547                .maxlen         =       sizeof(int),
3548                .mode           =       0644,
3549                .proc_handler   =       proc_dointvec,
3550        },
3551        {
3552                .procname       =       "mtu_expires",
3553                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3554                .maxlen         =       sizeof(int),
3555                .mode           =       0644,
3556                .proc_handler   =       proc_dointvec_jiffies,
3557        },
3558        {
3559                .procname       =       "min_adv_mss",
3560                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3561                .maxlen         =       sizeof(int),
3562                .mode           =       0644,
3563                .proc_handler   =       proc_dointvec,
3564        },
3565        {
3566                .procname       =       "gc_min_interval_ms",
3567                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3568                .maxlen         =       sizeof(int),
3569                .mode           =       0644,
3570                .proc_handler   =       proc_dointvec_ms_jiffies,
3571        },
3572        { }
3573};
3574
3575struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3576{
3577        struct ctl_table *table;
3578
3579        table = kmemdup(ipv6_route_table_template,
3580                        sizeof(ipv6_route_table_template),
3581                        GFP_KERNEL);
3582
3583        if (table) {
3584                table[0].data = &net->ipv6.sysctl.flush_delay;
3585                table[0].extra1 = net;
3586                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3587                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3588                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3589                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3590                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3591                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3592                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3593                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3594                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3595
3596                /* Don't export sysctls to unprivileged users */
3597                if (net->user_ns != &init_user_ns)
3598                        table[0].procname = NULL;
3599        }
3600
3601        return table;
3602}
3603#endif
3604
3605static int __net_init ip6_route_net_init(struct net *net)
3606{
3607        int ret = -ENOMEM;
3608
3609        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3610               sizeof(net->ipv6.ip6_dst_ops));
3611
3612        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3613                goto out_ip6_dst_ops;
3614
3615        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3616                                           sizeof(*net->ipv6.ip6_null_entry),
3617                                           GFP_KERNEL);
3618        if (!net->ipv6.ip6_null_entry)
3619                goto out_ip6_dst_entries;
3620        net->ipv6.ip6_null_entry->dst.path =
3621                (struct dst_entry *)net->ipv6.ip6_null_entry;
3622        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3623        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3624                         ip6_template_metrics, true);
3625
3626#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3627        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3628                                               sizeof(*net->ipv6.ip6_prohibit_entry),
3629                                               GFP_KERNEL);
3630        if (!net->ipv6.ip6_prohibit_entry)
3631                goto out_ip6_null_entry;
3632        net->ipv6.ip6_prohibit_entry->dst.path =
3633                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3634        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3635        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3636                         ip6_template_metrics, true);
3637
3638        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3639                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
3640                                               GFP_KERNEL);
3641        if (!net->ipv6.ip6_blk_hole_entry)
3642                goto out_ip6_prohibit_entry;
3643        net->ipv6.ip6_blk_hole_entry->dst.path =
3644                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3645        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3646        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3647                         ip6_template_metrics, true);
3648#endif
3649
3650        net->ipv6.sysctl.flush_delay = 0;
3651        net->ipv6.sysctl.ip6_rt_max_size = 4096;
3652        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3653        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3654        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3655        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3656        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3657        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3658
3659        net->ipv6.ip6_rt_gc_expire = 30*HZ;
3660
3661        ret = 0;
3662out:
3663        return ret;
3664
3665#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3666out_ip6_prohibit_entry:
3667        kfree(net->ipv6.ip6_prohibit_entry);
3668out_ip6_null_entry:
3669        kfree(net->ipv6.ip6_null_entry);
3670#endif
3671out_ip6_dst_entries:
3672        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3673out_ip6_dst_ops:
3674        goto out;
3675}
3676
3677static void __net_exit ip6_route_net_exit(struct net *net)
3678{
3679        kfree(net->ipv6.ip6_null_entry);
3680#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3681        kfree(net->ipv6.ip6_prohibit_entry);
3682        kfree(net->ipv6.ip6_blk_hole_entry);
3683#endif
3684        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3685}
3686
3687static int __net_init ip6_route_net_init_late(struct net *net)
3688{
3689#ifdef CONFIG_PROC_FS
3690        proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3691        proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3692#endif
3693        return 0;
3694}
3695
3696static void __net_exit ip6_route_net_exit_late(struct net *net)
3697{
3698#ifdef CONFIG_PROC_FS
3699        remove_proc_entry("ipv6_route", net->proc_net);
3700        remove_proc_entry("rt6_stats", net->proc_net);
3701#endif
3702}
3703
3704static struct pernet_operations ip6_route_net_ops = {
3705        .init = ip6_route_net_init,
3706        .exit = ip6_route_net_exit,
3707};
3708
3709static int __net_init ipv6_inetpeer_init(struct net *net)
3710{
3711        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3712
3713        if (!bp)
3714                return -ENOMEM;
3715        inet_peer_base_init(bp);
3716        net->ipv6.peers = bp;
3717        return 0;
3718}
3719
3720static void __net_exit ipv6_inetpeer_exit(struct net *net)
3721{
3722        struct inet_peer_base *bp = net->ipv6.peers;
3723
3724        net->ipv6.peers = NULL;
3725        inetpeer_invalidate_tree(bp);
3726        kfree(bp);
3727}
3728
3729static struct pernet_operations ipv6_inetpeer_ops = {
3730        .init   =       ipv6_inetpeer_init,
3731        .exit   =       ipv6_inetpeer_exit,
3732};
3733
3734static struct pernet_operations ip6_route_net_late_ops = {
3735        .init = ip6_route_net_init_late,
3736        .exit = ip6_route_net_exit_late,
3737};
3738
3739static struct notifier_block ip6_route_dev_notifier = {
3740        .notifier_call = ip6_route_dev_notify,
3741        .priority = 0,
3742};
3743
3744int __init ip6_route_init(void)
3745{
3746        int ret;
3747        int cpu;
3748
3749        ret = -ENOMEM;
3750        ip6_dst_ops_template.kmem_cachep =
3751                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3752                                  SLAB_HWCACHE_ALIGN, NULL);
3753        if (!ip6_dst_ops_template.kmem_cachep)
3754                goto out;
3755
3756        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3757        if (ret)
3758                goto out_kmem_cache;
3759
3760        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3761        if (ret)
3762                goto out_dst_entries;
3763
3764        ret = register_pernet_subsys(&ip6_route_net_ops);
3765        if (ret)
3766                goto out_register_inetpeer;
3767
3768        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3769
3770        /* Registering of the loopback is done before this portion of code,
3771         * the loopback reference in rt6_info will not be taken, do it
3772         * manually for init_net */
3773        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3774        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3775  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3776        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3777        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3778        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3779        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3780  #endif
3781        ret = fib6_init();
3782        if (ret)
3783                goto out_register_subsys;
3784
3785        ret = xfrm6_init();
3786        if (ret)
3787                goto out_fib6_init;
3788
3789        ret = fib6_rules_init();
3790        if (ret)
3791                goto xfrm6_init;
3792
3793        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3794        if (ret)
3795                goto fib6_rules_init;
3796
3797        ret = -ENOBUFS;
3798        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3799            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3800            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3801                goto out_register_late_subsys;
3802
3803        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3804        if (ret)
3805                goto out_register_late_subsys;
3806
3807        for_each_possible_cpu(cpu) {
3808                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3809
3810                INIT_LIST_HEAD(&ul->head);
3811                spin_lock_init(&ul->lock);
3812        }
3813
3814out:
3815        return ret;
3816
3817out_register_late_subsys:
3818        unregister_pernet_subsys(&ip6_route_net_late_ops);
3819fib6_rules_init:
3820        fib6_rules_cleanup();
3821xfrm6_init:
3822        xfrm6_fini();
3823out_fib6_init:
3824        fib6_gc_cleanup();
3825out_register_subsys:
3826        unregister_pernet_subsys(&ip6_route_net_ops);
3827out_register_inetpeer:
3828        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3829out_dst_entries:
3830        dst_entries_destroy(&ip6_dst_blackhole_ops);
3831out_kmem_cache:
3832        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3833        goto out;
3834}
3835
3836void ip6_route_cleanup(void)
3837{
3838        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3839        unregister_pernet_subsys(&ip6_route_net_late_ops);
3840        fib6_rules_cleanup();
3841        xfrm6_fini();
3842        fib6_gc_cleanup();
3843        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3844        unregister_pernet_subsys(&ip6_route_net_ops);
3845        dst_entries_destroy(&ip6_dst_blackhole_ops);
3846        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3847}
3848