linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/dst_metadata.h>
  58#include <net/xfrm.h>
  59#include <net/netevent.h>
  60#include <net/netlink.h>
  61#include <net/nexthop.h>
  62#include <net/lwtunnel.h>
  63#include <net/ip_tunnels.h>
  64#include <net/l3mdev.h>
  65#include <trace/events/fib6.h>
  66
  67#include <asm/uaccess.h>
  68
  69#ifdef CONFIG_SYSCTL
  70#include <linux/sysctl.h>
  71#endif
  72
  73enum rt6_nud_state {
  74        RT6_NUD_FAIL_HARD = -3,
  75        RT6_NUD_FAIL_PROBE = -2,
  76        RT6_NUD_FAIL_DO_RR = -1,
  77        RT6_NUD_SUCCEED = 1
  78};
  79
  80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
  81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  82static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  83static unsigned int      ip6_mtu(const struct dst_entry *dst);
  84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  85static void             ip6_dst_destroy(struct dst_entry *);
  86static void             ip6_dst_ifdown(struct dst_entry *,
  87                                       struct net_device *dev, int how);
  88static int               ip6_dst_gc(struct dst_ops *ops);
  89
  90static int              ip6_pkt_discard(struct sk_buff *skb);
  91static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  92static int              ip6_pkt_prohibit(struct sk_buff *skb);
  93static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static void             ip6_link_failure(struct sk_buff *skb);
  95static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  96                                           struct sk_buff *skb, u32 mtu);
  97static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  98                                        struct sk_buff *skb);
  99static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 101
 102#ifdef CONFIG_IPV6_ROUTE_INFO
 103static struct rt6_info *rt6_add_route_info(struct net *net,
 104                                           const struct in6_addr *prefix, int prefixlen,
 105                                           const struct in6_addr *gwaddr,
 106                                           struct net_device *dev,
 107                                           unsigned int pref);
 108static struct rt6_info *rt6_get_route_info(struct net *net,
 109                                           const struct in6_addr *prefix, int prefixlen,
 110                                           const struct in6_addr *gwaddr,
 111                                           struct net_device *dev);
 112#endif
 113
 114struct uncached_list {
 115        spinlock_t              lock;
 116        struct list_head        head;
 117};
 118
 119static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 120
 121static void rt6_uncached_list_add(struct rt6_info *rt)
 122{
 123        struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 124
 125        rt->dst.flags |= DST_NOCACHE;
 126        rt->rt6i_uncached_list = ul;
 127
 128        spin_lock_bh(&ul->lock);
 129        list_add_tail(&rt->rt6i_uncached, &ul->head);
 130        spin_unlock_bh(&ul->lock);
 131}
 132
 133static void rt6_uncached_list_del(struct rt6_info *rt)
 134{
 135        if (!list_empty(&rt->rt6i_uncached)) {
 136                struct uncached_list *ul = rt->rt6i_uncached_list;
 137
 138                spin_lock_bh(&ul->lock);
 139                list_del(&rt->rt6i_uncached);
 140                spin_unlock_bh(&ul->lock);
 141        }
 142}
 143
 144static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 145{
 146        struct net_device *loopback_dev = net->loopback_dev;
 147        int cpu;
 148
 149        if (dev == loopback_dev)
 150                return;
 151
 152        for_each_possible_cpu(cpu) {
 153                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 154                struct rt6_info *rt;
 155
 156                spin_lock_bh(&ul->lock);
 157                list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 158                        struct inet6_dev *rt_idev = rt->rt6i_idev;
 159                        struct net_device *rt_dev = rt->dst.dev;
 160
 161                        if (rt_idev->dev == dev) {
 162                                rt->rt6i_idev = in6_dev_get(loopback_dev);
 163                                in6_dev_put(rt_idev);
 164                        }
 165
 166                        if (rt_dev == dev) {
 167                                rt->dst.dev = loopback_dev;
 168                                dev_hold(rt->dst.dev);
 169                                dev_put(rt_dev);
 170                        }
 171                }
 172                spin_unlock_bh(&ul->lock);
 173        }
 174}
 175
 176static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
 177{
 178        return dst_metrics_write_ptr(rt->dst.from);
 179}
 180
 181static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 182{
 183        struct rt6_info *rt = (struct rt6_info *)dst;
 184
 185        if (rt->rt6i_flags & RTF_PCPU)
 186                return rt6_pcpu_cow_metrics(rt);
 187        else if (rt->rt6i_flags & RTF_CACHE)
 188                return NULL;
 189        else
 190                return dst_cow_metrics_generic(dst, old);
 191}
 192
 193static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 194                                             struct sk_buff *skb,
 195                                             const void *daddr)
 196{
 197        struct in6_addr *p = &rt->rt6i_gateway;
 198
 199        if (!ipv6_addr_any(p))
 200                return (const void *) p;
 201        else if (skb)
 202                return &ipv6_hdr(skb)->daddr;
 203        return daddr;
 204}
 205
 206static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 207                                          struct sk_buff *skb,
 208                                          const void *daddr)
 209{
 210        struct rt6_info *rt = (struct rt6_info *) dst;
 211        struct neighbour *n;
 212
 213        daddr = choose_neigh_daddr(rt, skb, daddr);
 214        n = __ipv6_neigh_lookup(dst->dev, daddr);
 215        if (n)
 216                return n;
 217        return neigh_create(&nd_tbl, daddr, dst->dev);
 218}
 219
 220static struct dst_ops ip6_dst_ops_template = {
 221        .family                 =       AF_INET6,
 222        .gc                     =       ip6_dst_gc,
 223        .gc_thresh              =       1024,
 224        .check                  =       ip6_dst_check,
 225        .default_advmss         =       ip6_default_advmss,
 226        .mtu                    =       ip6_mtu,
 227        .cow_metrics            =       ipv6_cow_metrics,
 228        .destroy                =       ip6_dst_destroy,
 229        .ifdown                 =       ip6_dst_ifdown,
 230        .negative_advice        =       ip6_negative_advice,
 231        .link_failure           =       ip6_link_failure,
 232        .update_pmtu            =       ip6_rt_update_pmtu,
 233        .redirect               =       rt6_do_redirect,
 234        .local_out              =       __ip6_local_out,
 235        .neigh_lookup           =       ip6_neigh_lookup,
 236};
 237
 238static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 239{
 240        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 241
 242        return mtu ? : dst->dev->mtu;
 243}
 244
 245static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 246                                         struct sk_buff *skb, u32 mtu)
 247{
 248}
 249
 250static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 251                                      struct sk_buff *skb)
 252{
 253}
 254
 255static struct dst_ops ip6_dst_blackhole_ops = {
 256        .family                 =       AF_INET6,
 257        .destroy                =       ip6_dst_destroy,
 258        .check                  =       ip6_dst_check,
 259        .mtu                    =       ip6_blackhole_mtu,
 260        .default_advmss         =       ip6_default_advmss,
 261        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 262        .redirect               =       ip6_rt_blackhole_redirect,
 263        .cow_metrics            =       dst_cow_metrics_generic,
 264        .neigh_lookup           =       ip6_neigh_lookup,
 265};
 266
 267static const u32 ip6_template_metrics[RTAX_MAX] = {
 268        [RTAX_HOPLIMIT - 1] = 0,
 269};
 270
 271static const struct rt6_info ip6_null_entry_template = {
 272        .dst = {
 273                .__refcnt       = ATOMIC_INIT(1),
 274                .__use          = 1,
 275                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 276                .error          = -ENETUNREACH,
 277                .input          = ip6_pkt_discard,
 278                .output         = ip6_pkt_discard_out,
 279        },
 280        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 281        .rt6i_protocol  = RTPROT_KERNEL,
 282        .rt6i_metric    = ~(u32) 0,
 283        .rt6i_ref       = ATOMIC_INIT(1),
 284};
 285
 286#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 287
 288static const struct rt6_info ip6_prohibit_entry_template = {
 289        .dst = {
 290                .__refcnt       = ATOMIC_INIT(1),
 291                .__use          = 1,
 292                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 293                .error          = -EACCES,
 294                .input          = ip6_pkt_prohibit,
 295                .output         = ip6_pkt_prohibit_out,
 296        },
 297        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 298        .rt6i_protocol  = RTPROT_KERNEL,
 299        .rt6i_metric    = ~(u32) 0,
 300        .rt6i_ref       = ATOMIC_INIT(1),
 301};
 302
 303static const struct rt6_info ip6_blk_hole_entry_template = {
 304        .dst = {
 305                .__refcnt       = ATOMIC_INIT(1),
 306                .__use          = 1,
 307                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 308                .error          = -EINVAL,
 309                .input          = dst_discard,
 310                .output         = dst_discard_out,
 311        },
 312        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 313        .rt6i_protocol  = RTPROT_KERNEL,
 314        .rt6i_metric    = ~(u32) 0,
 315        .rt6i_ref       = ATOMIC_INIT(1),
 316};
 317
 318#endif
 319
 320static void rt6_info_init(struct rt6_info *rt)
 321{
 322        struct dst_entry *dst = &rt->dst;
 323
 324        memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 325        INIT_LIST_HEAD(&rt->rt6i_siblings);
 326        INIT_LIST_HEAD(&rt->rt6i_uncached);
 327}
 328
 329/* allocate dst with ip6_dst_ops */
 330static struct rt6_info *__ip6_dst_alloc(struct net *net,
 331                                        struct net_device *dev,
 332                                        int flags)
 333{
 334        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 335                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 336
 337        if (rt)
 338                rt6_info_init(rt);
 339
 340        return rt;
 341}
 342
 343struct rt6_info *ip6_dst_alloc(struct net *net,
 344                               struct net_device *dev,
 345                               int flags)
 346{
 347        struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
 348
 349        if (rt) {
 350                rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
 351                if (rt->rt6i_pcpu) {
 352                        int cpu;
 353
 354                        for_each_possible_cpu(cpu) {
 355                                struct rt6_info **p;
 356
 357                                p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
 358                                /* no one shares rt */
 359                                *p =  NULL;
 360                        }
 361                } else {
 362                        dst_destroy((struct dst_entry *)rt);
 363                        return NULL;
 364                }
 365        }
 366
 367        return rt;
 368}
 369EXPORT_SYMBOL(ip6_dst_alloc);
 370
 371static void ip6_dst_destroy(struct dst_entry *dst)
 372{
 373        struct rt6_info *rt = (struct rt6_info *)dst;
 374        struct dst_entry *from = dst->from;
 375        struct inet6_dev *idev;
 376
 377        dst_destroy_metrics_generic(dst);
 378        free_percpu(rt->rt6i_pcpu);
 379        rt6_uncached_list_del(rt);
 380
 381        idev = rt->rt6i_idev;
 382        if (idev) {
 383                rt->rt6i_idev = NULL;
 384                in6_dev_put(idev);
 385        }
 386
 387        dst->from = NULL;
 388        dst_release(from);
 389}
 390
 391static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 392                           int how)
 393{
 394        struct rt6_info *rt = (struct rt6_info *)dst;
 395        struct inet6_dev *idev = rt->rt6i_idev;
 396        struct net_device *loopback_dev =
 397                dev_net(dev)->loopback_dev;
 398
 399        if (dev != loopback_dev) {
 400                if (idev && idev->dev == dev) {
 401                        struct inet6_dev *loopback_idev =
 402                                in6_dev_get(loopback_dev);
 403                        if (loopback_idev) {
 404                                rt->rt6i_idev = loopback_idev;
 405                                in6_dev_put(idev);
 406                        }
 407                }
 408        }
 409}
 410
 411static bool __rt6_check_expired(const struct rt6_info *rt)
 412{
 413        if (rt->rt6i_flags & RTF_EXPIRES)
 414                return time_after(jiffies, rt->dst.expires);
 415        else
 416                return false;
 417}
 418
 419static bool rt6_check_expired(const struct rt6_info *rt)
 420{
 421        if (rt->rt6i_flags & RTF_EXPIRES) {
 422                if (time_after(jiffies, rt->dst.expires))
 423                        return true;
 424        } else if (rt->dst.from) {
 425                return rt6_check_expired((struct rt6_info *) rt->dst.from);
 426        }
 427        return false;
 428}
 429
 430/* Multipath route selection:
 431 *   Hash based function using packet header and flowlabel.
 432 * Adapted from fib_info_hashfn()
 433 */
 434static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 435                               const struct flowi6 *fl6)
 436{
 437        return get_hash_from_flowi6(fl6) % candidate_count;
 438}
 439
 440static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
 441                                             struct flowi6 *fl6, int oif,
 442                                             int strict)
 443{
 444        struct rt6_info *sibling, *next_sibling;
 445        int route_choosen;
 446
 447        route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
 448        /* Don't change the route, if route_choosen == 0
 449         * (siblings does not include ourself)
 450         */
 451        if (route_choosen)
 452                list_for_each_entry_safe(sibling, next_sibling,
 453                                &match->rt6i_siblings, rt6i_siblings) {
 454                        route_choosen--;
 455                        if (route_choosen == 0) {
 456                                if (rt6_score_route(sibling, oif, strict) < 0)
 457                                        break;
 458                                match = sibling;
 459                                break;
 460                        }
 461                }
 462        return match;
 463}
 464
 465/*
 466 *      Route lookup. Any table->tb6_lock is implied.
 467 */
 468
 469static inline struct rt6_info *rt6_device_match(struct net *net,
 470                                                    struct rt6_info *rt,
 471                                                    const struct in6_addr *saddr,
 472                                                    int oif,
 473                                                    int flags)
 474{
 475        struct rt6_info *local = NULL;
 476        struct rt6_info *sprt;
 477
 478        if (!oif && ipv6_addr_any(saddr))
 479                goto out;
 480
 481        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 482                struct net_device *dev = sprt->dst.dev;
 483
 484                if (oif) {
 485                        if (dev->ifindex == oif)
 486                                return sprt;
 487                        if (dev->flags & IFF_LOOPBACK) {
 488                                if (!sprt->rt6i_idev ||
 489                                    sprt->rt6i_idev->dev->ifindex != oif) {
 490                                        if (flags & RT6_LOOKUP_F_IFACE)
 491                                                continue;
 492                                        if (local &&
 493                                            local->rt6i_idev->dev->ifindex == oif)
 494                                                continue;
 495                                }
 496                                local = sprt;
 497                        }
 498                } else {
 499                        if (ipv6_chk_addr(net, saddr, dev,
 500                                          flags & RT6_LOOKUP_F_IFACE))
 501                                return sprt;
 502                }
 503        }
 504
 505        if (oif) {
 506                if (local)
 507                        return local;
 508
 509                if (flags & RT6_LOOKUP_F_IFACE)
 510                        return net->ipv6.ip6_null_entry;
 511        }
 512out:
 513        return rt;
 514}
 515
 516#ifdef CONFIG_IPV6_ROUTER_PREF
 517struct __rt6_probe_work {
 518        struct work_struct work;
 519        struct in6_addr target;
 520        struct net_device *dev;
 521};
 522
 523static void rt6_probe_deferred(struct work_struct *w)
 524{
 525        struct in6_addr mcaddr;
 526        struct __rt6_probe_work *work =
 527                container_of(w, struct __rt6_probe_work, work);
 528
 529        addrconf_addr_solict_mult(&work->target, &mcaddr);
 530        ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 531        dev_put(work->dev);
 532        kfree(work);
 533}
 534
 535static void rt6_probe(struct rt6_info *rt)
 536{
 537        struct __rt6_probe_work *work;
 538        struct neighbour *neigh;
 539        /*
 540         * Okay, this does not seem to be appropriate
 541         * for now, however, we need to check if it
 542         * is really so; aka Router Reachability Probing.
 543         *
 544         * Router Reachability Probe MUST be rate-limited
 545         * to no more than one per minute.
 546         */
 547        if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
 548                return;
 549        rcu_read_lock_bh();
 550        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 551        if (neigh) {
 552                if (neigh->nud_state & NUD_VALID)
 553                        goto out;
 554
 555                work = NULL;
 556                write_lock(&neigh->lock);
 557                if (!(neigh->nud_state & NUD_VALID) &&
 558                    time_after(jiffies,
 559                               neigh->updated +
 560                               rt->rt6i_idev->cnf.rtr_probe_interval)) {
 561                        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 562                        if (work)
 563                                __neigh_set_probe_once(neigh);
 564                }
 565                write_unlock(&neigh->lock);
 566        } else {
 567                work = kmalloc(sizeof(*work), GFP_ATOMIC);
 568        }
 569
 570        if (work) {
 571                INIT_WORK(&work->work, rt6_probe_deferred);
 572                work->target = rt->rt6i_gateway;
 573                dev_hold(rt->dst.dev);
 574                work->dev = rt->dst.dev;
 575                schedule_work(&work->work);
 576        }
 577
 578out:
 579        rcu_read_unlock_bh();
 580}
 581#else
 582static inline void rt6_probe(struct rt6_info *rt)
 583{
 584}
 585#endif
 586
 587/*
 588 * Default Router Selection (RFC 2461 6.3.6)
 589 */
 590static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 591{
 592        struct net_device *dev = rt->dst.dev;
 593        if (!oif || dev->ifindex == oif)
 594                return 2;
 595        if ((dev->flags & IFF_LOOPBACK) &&
 596            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 597                return 1;
 598        return 0;
 599}
 600
 601static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
 602{
 603        struct neighbour *neigh;
 604        enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 605
 606        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 607            !(rt->rt6i_flags & RTF_GATEWAY))
 608                return RT6_NUD_SUCCEED;
 609
 610        rcu_read_lock_bh();
 611        neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 612        if (neigh) {
 613                read_lock(&neigh->lock);
 614                if (neigh->nud_state & NUD_VALID)
 615                        ret = RT6_NUD_SUCCEED;
 616#ifdef CONFIG_IPV6_ROUTER_PREF
 617                else if (!(neigh->nud_state & NUD_FAILED))
 618                        ret = RT6_NUD_SUCCEED;
 619                else
 620                        ret = RT6_NUD_FAIL_PROBE;
 621#endif
 622                read_unlock(&neigh->lock);
 623        } else {
 624                ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 625                      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 626        }
 627        rcu_read_unlock_bh();
 628
 629        return ret;
 630}
 631
 632static int rt6_score_route(struct rt6_info *rt, int oif,
 633                           int strict)
 634{
 635        int m;
 636
 637        m = rt6_check_dev(rt, oif);
 638        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 639                return RT6_NUD_FAIL_HARD;
 640#ifdef CONFIG_IPV6_ROUTER_PREF
 641        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 642#endif
 643        if (strict & RT6_LOOKUP_F_REACHABLE) {
 644                int n = rt6_check_neigh(rt);
 645                if (n < 0)
 646                        return n;
 647        }
 648        return m;
 649}
 650
 651static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 652                                   int *mpri, struct rt6_info *match,
 653                                   bool *do_rr)
 654{
 655        int m;
 656        bool match_do_rr = false;
 657        struct inet6_dev *idev = rt->rt6i_idev;
 658        struct net_device *dev = rt->dst.dev;
 659
 660        if (dev && !netif_carrier_ok(dev) &&
 661            idev->cnf.ignore_routes_with_linkdown &&
 662            !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 663                goto out;
 664
 665        if (rt6_check_expired(rt))
 666                goto out;
 667
 668        m = rt6_score_route(rt, oif, strict);
 669        if (m == RT6_NUD_FAIL_DO_RR) {
 670                match_do_rr = true;
 671                m = 0; /* lowest valid score */
 672        } else if (m == RT6_NUD_FAIL_HARD) {
 673                goto out;
 674        }
 675
 676        if (strict & RT6_LOOKUP_F_REACHABLE)
 677                rt6_probe(rt);
 678
 679        /* note that m can be RT6_NUD_FAIL_PROBE at this point */
 680        if (m > *mpri) {
 681                *do_rr = match_do_rr;
 682                *mpri = m;
 683                match = rt;
 684        }
 685out:
 686        return match;
 687}
 688
 689static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 690                                     struct rt6_info *rr_head,
 691                                     u32 metric, int oif, int strict,
 692                                     bool *do_rr)
 693{
 694        struct rt6_info *rt, *match, *cont;
 695        int mpri = -1;
 696
 697        match = NULL;
 698        cont = NULL;
 699        for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
 700                if (rt->rt6i_metric != metric) {
 701                        cont = rt;
 702                        break;
 703                }
 704
 705                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 706        }
 707
 708        for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
 709                if (rt->rt6i_metric != metric) {
 710                        cont = rt;
 711                        break;
 712                }
 713
 714                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 715        }
 716
 717        if (match || !cont)
 718                return match;
 719
 720        for (rt = cont; rt; rt = rt->dst.rt6_next)
 721                match = find_match(rt, oif, strict, &mpri, match, do_rr);
 722
 723        return match;
 724}
 725
 726static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 727{
 728        struct rt6_info *match, *rt0;
 729        struct net *net;
 730        bool do_rr = false;
 731
 732        rt0 = fn->rr_ptr;
 733        if (!rt0)
 734                fn->rr_ptr = rt0 = fn->leaf;
 735
 736        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
 737                             &do_rr);
 738
 739        if (do_rr) {
 740                struct rt6_info *next = rt0->dst.rt6_next;
 741
 742                /* no entries matched; do round-robin */
 743                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 744                        next = fn->leaf;
 745
 746                if (next != rt0)
 747                        fn->rr_ptr = next;
 748        }
 749
 750        net = dev_net(rt0->dst.dev);
 751        return match ? match : net->ipv6.ip6_null_entry;
 752}
 753
 754static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
 755{
 756        return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 757}
 758
 759#ifdef CONFIG_IPV6_ROUTE_INFO
 760int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 761                  const struct in6_addr *gwaddr)
 762{
 763        struct net *net = dev_net(dev);
 764        struct route_info *rinfo = (struct route_info *) opt;
 765        struct in6_addr prefix_buf, *prefix;
 766        unsigned int pref;
 767        unsigned long lifetime;
 768        struct rt6_info *rt;
 769
 770        if (len < sizeof(struct route_info)) {
 771                return -EINVAL;
 772        }
 773
 774        /* Sanity check for prefix_len and length */
 775        if (rinfo->length > 3) {
 776                return -EINVAL;
 777        } else if (rinfo->prefix_len > 128) {
 778                return -EINVAL;
 779        } else if (rinfo->prefix_len > 64) {
 780                if (rinfo->length < 2) {
 781                        return -EINVAL;
 782                }
 783        } else if (rinfo->prefix_len > 0) {
 784                if (rinfo->length < 1) {
 785                        return -EINVAL;
 786                }
 787        }
 788
 789        pref = rinfo->route_pref;
 790        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 791                return -EINVAL;
 792
 793        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 794
 795        if (rinfo->length == 3)
 796                prefix = (struct in6_addr *)rinfo->prefix;
 797        else {
 798                /* this function is safe */
 799                ipv6_addr_prefix(&prefix_buf,
 800                                 (struct in6_addr *)rinfo->prefix,
 801                                 rinfo->prefix_len);
 802                prefix = &prefix_buf;
 803        }
 804
 805        if (rinfo->prefix_len == 0)
 806                rt = rt6_get_dflt_router(gwaddr, dev);
 807        else
 808                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 809                                        gwaddr, dev);
 810
 811        if (rt && !lifetime) {
 812                ip6_del_rt(rt);
 813                rt = NULL;
 814        }
 815
 816        if (!rt && lifetime)
 817                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 818                                        dev, pref);
 819        else if (rt)
 820                rt->rt6i_flags = RTF_ROUTEINFO |
 821                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 822
 823        if (rt) {
 824                if (!addrconf_finite_timeout(lifetime))
 825                        rt6_clean_expires(rt);
 826                else
 827                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 828
 829                ip6_rt_put(rt);
 830        }
 831        return 0;
 832}
 833#endif
 834
 835static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
 836                                        struct in6_addr *saddr)
 837{
 838        struct fib6_node *pn;
 839        while (1) {
 840                if (fn->fn_flags & RTN_TL_ROOT)
 841                        return NULL;
 842                pn = fn->parent;
 843                if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
 844                        fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
 845                else
 846                        fn = pn;
 847                if (fn->fn_flags & RTN_RTINFO)
 848                        return fn;
 849        }
 850}
 851
 852static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 853                                             struct fib6_table *table,
 854                                             struct flowi6 *fl6, int flags)
 855{
 856        struct fib6_node *fn;
 857        struct rt6_info *rt;
 858
 859        read_lock_bh(&table->tb6_lock);
 860        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 861restart:
 862        rt = fn->leaf;
 863        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 864        if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
 865                rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
 866        if (rt == net->ipv6.ip6_null_entry) {
 867                fn = fib6_backtrack(fn, &fl6->saddr);
 868                if (fn)
 869                        goto restart;
 870        }
 871        dst_use(&rt->dst, jiffies);
 872        read_unlock_bh(&table->tb6_lock);
 873
 874        trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
 875
 876        return rt;
 877
 878}
 879
 880struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 881                                    int flags)
 882{
 883        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 884}
 885EXPORT_SYMBOL_GPL(ip6_route_lookup);
 886
 887struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 888                            const struct in6_addr *saddr, int oif, int strict)
 889{
 890        struct flowi6 fl6 = {
 891                .flowi6_oif = oif,
 892                .daddr = *daddr,
 893        };
 894        struct dst_entry *dst;
 895        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 896
 897        if (saddr) {
 898                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 899                flags |= RT6_LOOKUP_F_HAS_SADDR;
 900        }
 901
 902        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 903        if (dst->error == 0)
 904                return (struct rt6_info *) dst;
 905
 906        dst_release(dst);
 907
 908        return NULL;
 909}
 910EXPORT_SYMBOL(rt6_lookup);
 911
 912/* ip6_ins_rt is called with FREE table->tb6_lock.
 913   It takes new route entry, the addition fails by any reason the
 914   route is freed. In any case, if caller does not hold it, it may
 915   be destroyed.
 916 */
 917
 918static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 919                        struct mx6_config *mxc)
 920{
 921        int err;
 922        struct fib6_table *table;
 923
 924        table = rt->rt6i_table;
 925        write_lock_bh(&table->tb6_lock);
 926        err = fib6_add(&table->tb6_root, rt, info, mxc);
 927        write_unlock_bh(&table->tb6_lock);
 928
 929        return err;
 930}
 931
 932int ip6_ins_rt(struct rt6_info *rt)
 933{
 934        struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
 935        struct mx6_config mxc = { .mx = NULL, };
 936
 937        return __ip6_ins_rt(rt, &info, &mxc);
 938}
 939
 940static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 941                                           const struct in6_addr *daddr,
 942                                           const struct in6_addr *saddr)
 943{
 944        struct rt6_info *rt;
 945
 946        /*
 947         *      Clone the route.
 948         */
 949
 950        if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 951                ort = (struct rt6_info *)ort->dst.from;
 952
 953        rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
 954
 955        if (!rt)
 956                return NULL;
 957
 958        ip6_rt_copy_init(rt, ort);
 959        rt->rt6i_flags |= RTF_CACHE;
 960        rt->rt6i_metric = 0;
 961        rt->dst.flags |= DST_HOST;
 962        rt->rt6i_dst.addr = *daddr;
 963        rt->rt6i_dst.plen = 128;
 964
 965        if (!rt6_is_gw_or_nonexthop(ort)) {
 966                if (ort->rt6i_dst.plen != 128 &&
 967                    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 968                        rt->rt6i_flags |= RTF_ANYCAST;
 969#ifdef CONFIG_IPV6_SUBTREES
 970                if (rt->rt6i_src.plen && saddr) {
 971                        rt->rt6i_src.addr = *saddr;
 972                        rt->rt6i_src.plen = 128;
 973                }
 974#endif
 975        }
 976
 977        return rt;
 978}
 979
 980static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 981{
 982        struct rt6_info *pcpu_rt;
 983
 984        pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
 985                                  rt->dst.dev, rt->dst.flags);
 986
 987        if (!pcpu_rt)
 988                return NULL;
 989        ip6_rt_copy_init(pcpu_rt, rt);
 990        pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
 991        pcpu_rt->rt6i_flags |= RTF_PCPU;
 992        return pcpu_rt;
 993}
 994
 995/* It should be called with read_lock_bh(&tb6_lock) acquired */
 996static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
 997{
 998        struct rt6_info *pcpu_rt, **p;
 999
1000        p = this_cpu_ptr(rt->rt6i_pcpu);
1001        pcpu_rt = *p;
1002
1003        if (pcpu_rt) {
1004                dst_hold(&pcpu_rt->dst);
1005                rt6_dst_from_metrics_check(pcpu_rt);
1006        }
1007        return pcpu_rt;
1008}
1009
1010static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1011{
1012        struct fib6_table *table = rt->rt6i_table;
1013        struct rt6_info *pcpu_rt, *prev, **p;
1014
1015        pcpu_rt = ip6_rt_pcpu_alloc(rt);
1016        if (!pcpu_rt) {
1017                struct net *net = dev_net(rt->dst.dev);
1018
1019                dst_hold(&net->ipv6.ip6_null_entry->dst);
1020                return net->ipv6.ip6_null_entry;
1021        }
1022
1023        read_lock_bh(&table->tb6_lock);
1024        if (rt->rt6i_pcpu) {
1025                p = this_cpu_ptr(rt->rt6i_pcpu);
1026                prev = cmpxchg(p, NULL, pcpu_rt);
1027                if (prev) {
1028                        /* If someone did it before us, return prev instead */
1029                        dst_destroy(&pcpu_rt->dst);
1030                        pcpu_rt = prev;
1031                }
1032        } else {
1033                /* rt has been removed from the fib6 tree
1034                 * before we have a chance to acquire the read_lock.
1035                 * In this case, don't brother to create a pcpu rt
1036                 * since rt is going away anyway.  The next
1037                 * dst_check() will trigger a re-lookup.
1038                 */
1039                dst_destroy(&pcpu_rt->dst);
1040                pcpu_rt = rt;
1041        }
1042        dst_hold(&pcpu_rt->dst);
1043        rt6_dst_from_metrics_check(pcpu_rt);
1044        read_unlock_bh(&table->tb6_lock);
1045        return pcpu_rt;
1046}
1047
1048struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1049                               int oif, struct flowi6 *fl6, int flags)
1050{
1051        struct fib6_node *fn, *saved_fn;
1052        struct rt6_info *rt;
1053        int strict = 0;
1054
1055        strict |= flags & RT6_LOOKUP_F_IFACE;
1056        strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1057        if (net->ipv6.devconf_all->forwarding == 0)
1058                strict |= RT6_LOOKUP_F_REACHABLE;
1059
1060        read_lock_bh(&table->tb6_lock);
1061
1062        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063        saved_fn = fn;
1064
1065        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1066                oif = 0;
1067
1068redo_rt6_select:
1069        rt = rt6_select(fn, oif, strict);
1070        if (rt->rt6i_nsiblings)
1071                rt = rt6_multipath_select(rt, fl6, oif, strict);
1072        if (rt == net->ipv6.ip6_null_entry) {
1073                fn = fib6_backtrack(fn, &fl6->saddr);
1074                if (fn)
1075                        goto redo_rt6_select;
1076                else if (strict & RT6_LOOKUP_F_REACHABLE) {
1077                        /* also consider unreachable route */
1078                        strict &= ~RT6_LOOKUP_F_REACHABLE;
1079                        fn = saved_fn;
1080                        goto redo_rt6_select;
1081                }
1082        }
1083
1084
1085        if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1086                dst_use(&rt->dst, jiffies);
1087                read_unlock_bh(&table->tb6_lock);
1088
1089                rt6_dst_from_metrics_check(rt);
1090
1091                trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1092                return rt;
1093        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1094                            !(rt->rt6i_flags & RTF_GATEWAY))) {
1095                /* Create a RTF_CACHE clone which will not be
1096                 * owned by the fib6 tree.  It is for the special case where
1097                 * the daddr in the skb during the neighbor look-up is different
1098                 * from the fl6->daddr used to look-up route here.
1099                 */
1100
1101                struct rt6_info *uncached_rt;
1102
1103                dst_use(&rt->dst, jiffies);
1104                read_unlock_bh(&table->tb6_lock);
1105
1106                uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1107                dst_release(&rt->dst);
1108
1109                if (uncached_rt)
1110                        rt6_uncached_list_add(uncached_rt);
1111                else
1112                        uncached_rt = net->ipv6.ip6_null_entry;
1113
1114                dst_hold(&uncached_rt->dst);
1115
1116                trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1117                return uncached_rt;
1118
1119        } else {
1120                /* Get a percpu copy */
1121
1122                struct rt6_info *pcpu_rt;
1123
1124                rt->dst.lastuse = jiffies;
1125                rt->dst.__use++;
1126                pcpu_rt = rt6_get_pcpu_route(rt);
1127
1128                if (pcpu_rt) {
1129                        read_unlock_bh(&table->tb6_lock);
1130                } else {
1131                        /* We have to do the read_unlock first
1132                         * because rt6_make_pcpu_route() may trigger
1133                         * ip6_dst_gc() which will take the write_lock.
1134                         */
1135                        dst_hold(&rt->dst);
1136                        read_unlock_bh(&table->tb6_lock);
1137                        pcpu_rt = rt6_make_pcpu_route(rt);
1138                        dst_release(&rt->dst);
1139                }
1140
1141                trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1142                return pcpu_rt;
1143
1144        }
1145}
1146EXPORT_SYMBOL_GPL(ip6_pol_route);
1147
1148static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1149                                            struct flowi6 *fl6, int flags)
1150{
1151        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1152}
1153
1154struct dst_entry *ip6_route_input_lookup(struct net *net,
1155                                         struct net_device *dev,
1156                                         struct flowi6 *fl6, int flags)
1157{
1158        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1159                flags |= RT6_LOOKUP_F_IFACE;
1160
1161        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1162}
1163EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1164
1165void ip6_route_input(struct sk_buff *skb)
1166{
1167        const struct ipv6hdr *iph = ipv6_hdr(skb);
1168        struct net *net = dev_net(skb->dev);
1169        int flags = RT6_LOOKUP_F_HAS_SADDR;
1170        struct ip_tunnel_info *tun_info;
1171        struct flowi6 fl6 = {
1172                .flowi6_iif = skb->dev->ifindex,
1173                .daddr = iph->daddr,
1174                .saddr = iph->saddr,
1175                .flowlabel = ip6_flowinfo(iph),
1176                .flowi6_mark = skb->mark,
1177                .flowi6_proto = iph->nexthdr,
1178        };
1179
1180        tun_info = skb_tunnel_info(skb);
1181        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1182                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1183        skb_dst_drop(skb);
1184        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1185}
1186
1187static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1188                                             struct flowi6 *fl6, int flags)
1189{
1190        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1191}
1192
1193struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1194                                         struct flowi6 *fl6, int flags)
1195{
1196        bool any_src;
1197
1198        if (rt6_need_strict(&fl6->daddr)) {
1199                struct dst_entry *dst;
1200
1201                dst = l3mdev_link_scope_lookup(net, fl6);
1202                if (dst)
1203                        return dst;
1204        }
1205
1206        fl6->flowi6_iif = LOOPBACK_IFINDEX;
1207
1208        any_src = ipv6_addr_any(&fl6->saddr);
1209        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1210            (fl6->flowi6_oif && any_src))
1211                flags |= RT6_LOOKUP_F_IFACE;
1212
1213        if (!any_src)
1214                flags |= RT6_LOOKUP_F_HAS_SADDR;
1215        else if (sk)
1216                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1217
1218        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1219}
1220EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1221
1222struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1223{
1224        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1225        struct dst_entry *new = NULL;
1226
1227        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1228        if (rt) {
1229                rt6_info_init(rt);
1230
1231                new = &rt->dst;
1232                new->__use = 1;
1233                new->input = dst_discard;
1234                new->output = dst_discard_out;
1235
1236                dst_copy_metrics(new, &ort->dst);
1237                rt->rt6i_idev = ort->rt6i_idev;
1238                if (rt->rt6i_idev)
1239                        in6_dev_hold(rt->rt6i_idev);
1240
1241                rt->rt6i_gateway = ort->rt6i_gateway;
1242                rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1243                rt->rt6i_metric = 0;
1244
1245                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1246#ifdef CONFIG_IPV6_SUBTREES
1247                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1248#endif
1249
1250                dst_free(new);
1251        }
1252
1253        dst_release(dst_orig);
1254        return new ? new : ERR_PTR(-ENOMEM);
1255}
1256
1257/*
1258 *      Destination cache support functions
1259 */
1260
1261static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1262{
1263        if (rt->dst.from &&
1264            dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1265                dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1266}
1267
1268static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1269{
1270        if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1271                return NULL;
1272
1273        if (rt6_check_expired(rt))
1274                return NULL;
1275
1276        return &rt->dst;
1277}
1278
1279static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1280{
1281        if (!__rt6_check_expired(rt) &&
1282            rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1283            rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1284                return &rt->dst;
1285        else
1286                return NULL;
1287}
1288
1289static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1290{
1291        struct rt6_info *rt;
1292
1293        rt = (struct rt6_info *) dst;
1294
1295        /* All IPV6 dsts are created with ->obsolete set to the value
1296         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1297         * into this function always.
1298         */
1299
1300        rt6_dst_from_metrics_check(rt);
1301
1302        if (rt->rt6i_flags & RTF_PCPU ||
1303            (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1304                return rt6_dst_from_check(rt, cookie);
1305        else
1306                return rt6_check(rt, cookie);
1307}
1308
1309static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1310{
1311        struct rt6_info *rt = (struct rt6_info *) dst;
1312
1313        if (rt) {
1314                if (rt->rt6i_flags & RTF_CACHE) {
1315                        if (rt6_check_expired(rt)) {
1316                                ip6_del_rt(rt);
1317                                dst = NULL;
1318                        }
1319                } else {
1320                        dst_release(dst);
1321                        dst = NULL;
1322                }
1323        }
1324        return dst;
1325}
1326
1327static void ip6_link_failure(struct sk_buff *skb)
1328{
1329        struct rt6_info *rt;
1330
1331        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1332
1333        rt = (struct rt6_info *) skb_dst(skb);
1334        if (rt) {
1335                if (rt->rt6i_flags & RTF_CACHE) {
1336                        dst_hold(&rt->dst);
1337                        ip6_del_rt(rt);
1338                } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1339                        rt->rt6i_node->fn_sernum = -1;
1340                }
1341        }
1342}
1343
1344static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1345{
1346        struct net *net = dev_net(rt->dst.dev);
1347
1348        rt->rt6i_flags |= RTF_MODIFIED;
1349        rt->rt6i_pmtu = mtu;
1350        rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1351}
1352
1353static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1354{
1355        return !(rt->rt6i_flags & RTF_CACHE) &&
1356                (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1357}
1358
1359static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1360                                 const struct ipv6hdr *iph, u32 mtu)
1361{
1362        struct rt6_info *rt6 = (struct rt6_info *)dst;
1363
1364        if (rt6->rt6i_flags & RTF_LOCAL)
1365                return;
1366
1367        if (dst_metric_locked(dst, RTAX_MTU))
1368                return;
1369
1370        dst_confirm(dst);
1371        mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1372        if (mtu >= dst_mtu(dst))
1373                return;
1374
1375        if (!rt6_cache_allowed_for_pmtu(rt6)) {
1376                rt6_do_update_pmtu(rt6, mtu);
1377        } else {
1378                const struct in6_addr *daddr, *saddr;
1379                struct rt6_info *nrt6;
1380
1381                if (iph) {
1382                        daddr = &iph->daddr;
1383                        saddr = &iph->saddr;
1384                } else if (sk) {
1385                        daddr = &sk->sk_v6_daddr;
1386                        saddr = &inet6_sk(sk)->saddr;
1387                } else {
1388                        return;
1389                }
1390                nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1391                if (nrt6) {
1392                        rt6_do_update_pmtu(nrt6, mtu);
1393
1394                        /* ip6_ins_rt(nrt6) will bump the
1395                         * rt6->rt6i_node->fn_sernum
1396                         * which will fail the next rt6_check() and
1397                         * invalidate the sk->sk_dst_cache.
1398                         */
1399                        ip6_ins_rt(nrt6);
1400                }
1401        }
1402}
1403
1404static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1405                               struct sk_buff *skb, u32 mtu)
1406{
1407        __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1408}
1409
1410void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1411                     int oif, u32 mark)
1412{
1413        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1414        struct dst_entry *dst;
1415        struct flowi6 fl6;
1416
1417        memset(&fl6, 0, sizeof(fl6));
1418        fl6.flowi6_oif = oif;
1419        fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1420        fl6.daddr = iph->daddr;
1421        fl6.saddr = iph->saddr;
1422        fl6.flowlabel = ip6_flowinfo(iph);
1423
1424        dst = ip6_route_output(net, NULL, &fl6);
1425        if (!dst->error)
1426                __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1427        dst_release(dst);
1428}
1429EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1430
1431void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1432{
1433        struct dst_entry *dst;
1434
1435        ip6_update_pmtu(skb, sock_net(sk), mtu,
1436                        sk->sk_bound_dev_if, sk->sk_mark);
1437
1438        dst = __sk_dst_get(sk);
1439        if (!dst || !dst->obsolete ||
1440            dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1441                return;
1442
1443        bh_lock_sock(sk);
1444        if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1445                ip6_datagram_dst_update(sk, false);
1446        bh_unlock_sock(sk);
1447}
1448EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1449
1450/* Handle redirects */
1451struct ip6rd_flowi {
1452        struct flowi6 fl6;
1453        struct in6_addr gateway;
1454};
1455
1456static struct rt6_info *__ip6_route_redirect(struct net *net,
1457                                             struct fib6_table *table,
1458                                             struct flowi6 *fl6,
1459                                             int flags)
1460{
1461        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1462        struct rt6_info *rt;
1463        struct fib6_node *fn;
1464
1465        /* Get the "current" route for this destination and
1466         * check if the redirect has come from approriate router.
1467         *
1468         * RFC 4861 specifies that redirects should only be
1469         * accepted if they come from the nexthop to the target.
1470         * Due to the way the routes are chosen, this notion
1471         * is a bit fuzzy and one might need to check all possible
1472         * routes.
1473         */
1474
1475        read_lock_bh(&table->tb6_lock);
1476        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1477restart:
1478        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1479                if (rt6_check_expired(rt))
1480                        continue;
1481                if (rt->dst.error)
1482                        break;
1483                if (!(rt->rt6i_flags & RTF_GATEWAY))
1484                        continue;
1485                if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1486                        continue;
1487                if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1488                        continue;
1489                break;
1490        }
1491
1492        if (!rt)
1493                rt = net->ipv6.ip6_null_entry;
1494        else if (rt->dst.error) {
1495                rt = net->ipv6.ip6_null_entry;
1496                goto out;
1497        }
1498
1499        if (rt == net->ipv6.ip6_null_entry) {
1500                fn = fib6_backtrack(fn, &fl6->saddr);
1501                if (fn)
1502                        goto restart;
1503        }
1504
1505out:
1506        dst_hold(&rt->dst);
1507
1508        read_unlock_bh(&table->tb6_lock);
1509
1510        trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1511        return rt;
1512};
1513
1514static struct dst_entry *ip6_route_redirect(struct net *net,
1515                                        const struct flowi6 *fl6,
1516                                        const struct in6_addr *gateway)
1517{
1518        int flags = RT6_LOOKUP_F_HAS_SADDR;
1519        struct ip6rd_flowi rdfl;
1520
1521        rdfl.fl6 = *fl6;
1522        rdfl.gateway = *gateway;
1523
1524        return fib6_rule_lookup(net, &rdfl.fl6,
1525                                flags, __ip6_route_redirect);
1526}
1527
1528void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1529{
1530        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1531        struct dst_entry *dst;
1532        struct flowi6 fl6;
1533
1534        memset(&fl6, 0, sizeof(fl6));
1535        fl6.flowi6_iif = LOOPBACK_IFINDEX;
1536        fl6.flowi6_oif = oif;
1537        fl6.flowi6_mark = mark;
1538        fl6.daddr = iph->daddr;
1539        fl6.saddr = iph->saddr;
1540        fl6.flowlabel = ip6_flowinfo(iph);
1541
1542        dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1543        rt6_do_redirect(dst, NULL, skb);
1544        dst_release(dst);
1545}
1546EXPORT_SYMBOL_GPL(ip6_redirect);
1547
1548void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1549                            u32 mark)
1550{
1551        const struct ipv6hdr *iph = ipv6_hdr(skb);
1552        const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1553        struct dst_entry *dst;
1554        struct flowi6 fl6;
1555
1556        memset(&fl6, 0, sizeof(fl6));
1557        fl6.flowi6_iif = LOOPBACK_IFINDEX;
1558        fl6.flowi6_oif = oif;
1559        fl6.flowi6_mark = mark;
1560        fl6.daddr = msg->dest;
1561        fl6.saddr = iph->daddr;
1562
1563        dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1564        rt6_do_redirect(dst, NULL, skb);
1565        dst_release(dst);
1566}
1567
1568void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1569{
1570        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1571}
1572EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1573
1574static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1575{
1576        struct net_device *dev = dst->dev;
1577        unsigned int mtu = dst_mtu(dst);
1578        struct net *net = dev_net(dev);
1579
1580        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1581
1582        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1583                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1584
1585        /*
1586         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1587         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1588         * IPV6_MAXPLEN is also valid and means: "any MSS,
1589         * rely only on pmtu discovery"
1590         */
1591        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1592                mtu = IPV6_MAXPLEN;
1593        return mtu;
1594}
1595
1596static unsigned int ip6_mtu(const struct dst_entry *dst)
1597{
1598        const struct rt6_info *rt = (const struct rt6_info *)dst;
1599        unsigned int mtu = rt->rt6i_pmtu;
1600        struct inet6_dev *idev;
1601
1602        if (mtu)
1603                goto out;
1604
1605        mtu = dst_metric_raw(dst, RTAX_MTU);
1606        if (mtu)
1607                goto out;
1608
1609        mtu = IPV6_MIN_MTU;
1610
1611        rcu_read_lock();
1612        idev = __in6_dev_get(dst->dev);
1613        if (idev)
1614                mtu = idev->cnf.mtu6;
1615        rcu_read_unlock();
1616
1617out:
1618        mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1619
1620        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1621}
1622
1623static struct dst_entry *icmp6_dst_gc_list;
1624static DEFINE_SPINLOCK(icmp6_dst_lock);
1625
1626struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1627                                  struct flowi6 *fl6)
1628{
1629        struct dst_entry *dst;
1630        struct rt6_info *rt;
1631        struct inet6_dev *idev = in6_dev_get(dev);
1632        struct net *net = dev_net(dev);
1633
1634        if (unlikely(!idev))
1635                return ERR_PTR(-ENODEV);
1636
1637        rt = ip6_dst_alloc(net, dev, 0);
1638        if (unlikely(!rt)) {
1639                in6_dev_put(idev);
1640                dst = ERR_PTR(-ENOMEM);
1641                goto out;
1642        }
1643
1644        rt->dst.flags |= DST_HOST;
1645        rt->dst.output  = ip6_output;
1646        atomic_set(&rt->dst.__refcnt, 1);
1647        rt->rt6i_gateway  = fl6->daddr;
1648        rt->rt6i_dst.addr = fl6->daddr;
1649        rt->rt6i_dst.plen = 128;
1650        rt->rt6i_idev     = idev;
1651        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1652
1653        spin_lock_bh(&icmp6_dst_lock);
1654        rt->dst.next = icmp6_dst_gc_list;
1655        icmp6_dst_gc_list = &rt->dst;
1656        spin_unlock_bh(&icmp6_dst_lock);
1657
1658        fib6_force_start_gc(net);
1659
1660        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1661
1662out:
1663        return dst;
1664}
1665
1666int icmp6_dst_gc(void)
1667{
1668        struct dst_entry *dst, **pprev;
1669        int more = 0;
1670
1671        spin_lock_bh(&icmp6_dst_lock);
1672        pprev = &icmp6_dst_gc_list;
1673
1674        while ((dst = *pprev) != NULL) {
1675                if (!atomic_read(&dst->__refcnt)) {
1676                        *pprev = dst->next;
1677                        dst_free(dst);
1678                } else {
1679                        pprev = &dst->next;
1680                        ++more;
1681                }
1682        }
1683
1684        spin_unlock_bh(&icmp6_dst_lock);
1685
1686        return more;
1687}
1688
1689static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1690                            void *arg)
1691{
1692        struct dst_entry *dst, **pprev;
1693
1694        spin_lock_bh(&icmp6_dst_lock);
1695        pprev = &icmp6_dst_gc_list;
1696        while ((dst = *pprev) != NULL) {
1697                struct rt6_info *rt = (struct rt6_info *) dst;
1698                if (func(rt, arg)) {
1699                        *pprev = dst->next;
1700                        dst_free(dst);
1701                } else {
1702                        pprev = &dst->next;
1703                }
1704        }
1705        spin_unlock_bh(&icmp6_dst_lock);
1706}
1707
1708static int ip6_dst_gc(struct dst_ops *ops)
1709{
1710        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1711        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1712        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1713        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1714        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1715        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1716        int entries;
1717
1718        entries = dst_entries_get_fast(ops);
1719        if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1720            entries <= rt_max_size)
1721                goto out;
1722
1723        net->ipv6.ip6_rt_gc_expire++;
1724        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1725        entries = dst_entries_get_slow(ops);
1726        if (entries < ops->gc_thresh)
1727                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1728out:
1729        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1730        return entries > rt_max_size;
1731}
1732
1733static int ip6_convert_metrics(struct mx6_config *mxc,
1734                               const struct fib6_config *cfg)
1735{
1736        bool ecn_ca = false;
1737        struct nlattr *nla;
1738        int remaining;
1739        u32 *mp;
1740
1741        if (!cfg->fc_mx)
1742                return 0;
1743
1744        mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1745        if (unlikely(!mp))
1746                return -ENOMEM;
1747
1748        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1749                int type = nla_type(nla);
1750                u32 val;
1751
1752                if (!type)
1753                        continue;
1754                if (unlikely(type > RTAX_MAX))
1755                        goto err;
1756
1757                if (type == RTAX_CC_ALGO) {
1758                        char tmp[TCP_CA_NAME_MAX];
1759
1760                        nla_strlcpy(tmp, nla, sizeof(tmp));
1761                        val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1762                        if (val == TCP_CA_UNSPEC)
1763                                goto err;
1764                } else {
1765                        val = nla_get_u32(nla);
1766                }
1767                if (type == RTAX_HOPLIMIT && val > 255)
1768                        val = 255;
1769                if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1770                        goto err;
1771
1772                mp[type - 1] = val;
1773                __set_bit(type - 1, mxc->mx_valid);
1774        }
1775
1776        if (ecn_ca) {
1777                __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1778                mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1779        }
1780
1781        mxc->mx = mp;
1782        return 0;
1783 err:
1784        kfree(mp);
1785        return -EINVAL;
1786}
1787
1788static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1789                                            struct fib6_config *cfg,
1790                                            const struct in6_addr *gw_addr)
1791{
1792        struct flowi6 fl6 = {
1793                .flowi6_oif = cfg->fc_ifindex,
1794                .daddr = *gw_addr,
1795                .saddr = cfg->fc_prefsrc,
1796        };
1797        struct fib6_table *table;
1798        struct rt6_info *rt;
1799        int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1800
1801        table = fib6_get_table(net, cfg->fc_table);
1802        if (!table)
1803                return NULL;
1804
1805        if (!ipv6_addr_any(&cfg->fc_prefsrc))
1806                flags |= RT6_LOOKUP_F_HAS_SADDR;
1807
1808        rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1809
1810        /* if table lookup failed, fall back to full lookup */
1811        if (rt == net->ipv6.ip6_null_entry) {
1812                ip6_rt_put(rt);
1813                rt = NULL;
1814        }
1815
1816        return rt;
1817}
1818
1819static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1820{
1821        struct net *net = cfg->fc_nlinfo.nl_net;
1822        struct rt6_info *rt = NULL;
1823        struct net_device *dev = NULL;
1824        struct inet6_dev *idev = NULL;
1825        struct fib6_table *table;
1826        int addr_type;
1827        int err = -EINVAL;
1828
1829        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1830                goto out;
1831#ifndef CONFIG_IPV6_SUBTREES
1832        if (cfg->fc_src_len)
1833                goto out;
1834#endif
1835        if (cfg->fc_ifindex) {
1836                err = -ENODEV;
1837                dev = dev_get_by_index(net, cfg->fc_ifindex);
1838                if (!dev)
1839                        goto out;
1840                idev = in6_dev_get(dev);
1841                if (!idev)
1842                        goto out;
1843        }
1844
1845        if (cfg->fc_metric == 0)
1846                cfg->fc_metric = IP6_RT_PRIO_USER;
1847
1848        err = -ENOBUFS;
1849        if (cfg->fc_nlinfo.nlh &&
1850            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1851                table = fib6_get_table(net, cfg->fc_table);
1852                if (!table) {
1853                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1854                        table = fib6_new_table(net, cfg->fc_table);
1855                }
1856        } else {
1857                table = fib6_new_table(net, cfg->fc_table);
1858        }
1859
1860        if (!table)
1861                goto out;
1862
1863        rt = ip6_dst_alloc(net, NULL,
1864                           (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1865
1866        if (!rt) {
1867                err = -ENOMEM;
1868                goto out;
1869        }
1870
1871        if (cfg->fc_flags & RTF_EXPIRES)
1872                rt6_set_expires(rt, jiffies +
1873                                clock_t_to_jiffies(cfg->fc_expires));
1874        else
1875                rt6_clean_expires(rt);
1876
1877        if (cfg->fc_protocol == RTPROT_UNSPEC)
1878                cfg->fc_protocol = RTPROT_BOOT;
1879        rt->rt6i_protocol = cfg->fc_protocol;
1880
1881        addr_type = ipv6_addr_type(&cfg->fc_dst);
1882
1883        if (addr_type & IPV6_ADDR_MULTICAST)
1884                rt->dst.input = ip6_mc_input;
1885        else if (cfg->fc_flags & RTF_LOCAL)
1886                rt->dst.input = ip6_input;
1887        else
1888                rt->dst.input = ip6_forward;
1889
1890        rt->dst.output = ip6_output;
1891
1892        if (cfg->fc_encap) {
1893                struct lwtunnel_state *lwtstate;
1894
1895                err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1896                                           cfg->fc_encap, AF_INET6, cfg,
1897                                           &lwtstate);
1898                if (err)
1899                        goto out;
1900                rt->dst.lwtstate = lwtstate_get(lwtstate);
1901                if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1902                        rt->dst.lwtstate->orig_output = rt->dst.output;
1903                        rt->dst.output = lwtunnel_output;
1904                }
1905                if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1906                        rt->dst.lwtstate->orig_input = rt->dst.input;
1907                        rt->dst.input = lwtunnel_input;
1908                }
1909        }
1910
1911        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1912        rt->rt6i_dst.plen = cfg->fc_dst_len;
1913        if (rt->rt6i_dst.plen == 128)
1914                rt->dst.flags |= DST_HOST;
1915
1916#ifdef CONFIG_IPV6_SUBTREES
1917        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1918        rt->rt6i_src.plen = cfg->fc_src_len;
1919#endif
1920
1921        rt->rt6i_metric = cfg->fc_metric;
1922
1923        /* We cannot add true routes via loopback here,
1924           they would result in kernel looping; promote them to reject routes
1925         */
1926        if ((cfg->fc_flags & RTF_REJECT) ||
1927            (dev && (dev->flags & IFF_LOOPBACK) &&
1928             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1929             !(cfg->fc_flags & RTF_LOCAL))) {
1930                /* hold loopback dev/idev if we haven't done so. */
1931                if (dev != net->loopback_dev) {
1932                        if (dev) {
1933                                dev_put(dev);
1934                                in6_dev_put(idev);
1935                        }
1936                        dev = net->loopback_dev;
1937                        dev_hold(dev);
1938                        idev = in6_dev_get(dev);
1939                        if (!idev) {
1940                                err = -ENODEV;
1941                                goto out;
1942                        }
1943                }
1944                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1945                switch (cfg->fc_type) {
1946                case RTN_BLACKHOLE:
1947                        rt->dst.error = -EINVAL;
1948                        rt->dst.output = dst_discard_out;
1949                        rt->dst.input = dst_discard;
1950                        break;
1951                case RTN_PROHIBIT:
1952                        rt->dst.error = -EACCES;
1953                        rt->dst.output = ip6_pkt_prohibit_out;
1954                        rt->dst.input = ip6_pkt_prohibit;
1955                        break;
1956                case RTN_THROW:
1957                case RTN_UNREACHABLE:
1958                default:
1959                        rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1960                                        : (cfg->fc_type == RTN_UNREACHABLE)
1961                                        ? -EHOSTUNREACH : -ENETUNREACH;
1962                        rt->dst.output = ip6_pkt_discard_out;
1963                        rt->dst.input = ip6_pkt_discard;
1964                        break;
1965                }
1966                goto install_route;
1967        }
1968
1969        if (cfg->fc_flags & RTF_GATEWAY) {
1970                const struct in6_addr *gw_addr;
1971                int gwa_type;
1972
1973                gw_addr = &cfg->fc_gateway;
1974                gwa_type = ipv6_addr_type(gw_addr);
1975
1976                /* if gw_addr is local we will fail to detect this in case
1977                 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1978                 * will return already-added prefix route via interface that
1979                 * prefix route was assigned to, which might be non-loopback.
1980                 */
1981                err = -EINVAL;
1982                if (ipv6_chk_addr_and_flags(net, gw_addr,
1983                                            gwa_type & IPV6_ADDR_LINKLOCAL ?
1984                                            dev : NULL, 0, 0))
1985                        goto out;
1986
1987                rt->rt6i_gateway = *gw_addr;
1988
1989                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1990                        struct rt6_info *grt = NULL;
1991
1992                        /* IPv6 strictly inhibits using not link-local
1993                           addresses as nexthop address.
1994                           Otherwise, router will not able to send redirects.
1995                           It is very good, but in some (rare!) circumstances
1996                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1997                           some exceptions. --ANK
1998                         */
1999                        if (!(gwa_type & IPV6_ADDR_UNICAST))
2000                                goto out;
2001
2002                        if (cfg->fc_table) {
2003                                grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2004
2005                                if (grt) {
2006                                        if (grt->rt6i_flags & RTF_GATEWAY ||
2007                                            (dev && dev != grt->dst.dev)) {
2008                                                ip6_rt_put(grt);
2009                                                grt = NULL;
2010                                        }
2011                                }
2012                        }
2013
2014                        if (!grt)
2015                                grt = rt6_lookup(net, gw_addr, NULL,
2016                                                 cfg->fc_ifindex, 1);
2017
2018                        err = -EHOSTUNREACH;
2019                        if (!grt)
2020                                goto out;
2021                        if (dev) {
2022                                if (dev != grt->dst.dev) {
2023                                        ip6_rt_put(grt);
2024                                        goto out;
2025                                }
2026                        } else {
2027                                dev = grt->dst.dev;
2028                                idev = grt->rt6i_idev;
2029                                dev_hold(dev);
2030                                in6_dev_hold(grt->rt6i_idev);
2031                        }
2032                        if (!(grt->rt6i_flags & RTF_GATEWAY))
2033                                err = 0;
2034                        ip6_rt_put(grt);
2035
2036                        if (err)
2037                                goto out;
2038                }
2039                err = -EINVAL;
2040                if (!dev || (dev->flags & IFF_LOOPBACK))
2041                        goto out;
2042        }
2043
2044        err = -ENODEV;
2045        if (!dev)
2046                goto out;
2047
2048        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2049                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2050                        err = -EINVAL;
2051                        goto out;
2052                }
2053                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2054                rt->rt6i_prefsrc.plen = 128;
2055        } else
2056                rt->rt6i_prefsrc.plen = 0;
2057
2058        rt->rt6i_flags = cfg->fc_flags;
2059
2060install_route:
2061        rt->dst.dev = dev;
2062        rt->rt6i_idev = idev;
2063        rt->rt6i_table = table;
2064
2065        cfg->fc_nlinfo.nl_net = dev_net(dev);
2066
2067        return rt;
2068out:
2069        if (dev)
2070                dev_put(dev);
2071        if (idev)
2072                in6_dev_put(idev);
2073        if (rt)
2074                dst_free(&rt->dst);
2075
2076        return ERR_PTR(err);
2077}
2078
2079int ip6_route_add(struct fib6_config *cfg)
2080{
2081        struct mx6_config mxc = { .mx = NULL, };
2082        struct rt6_info *rt;
2083        int err;
2084
2085        rt = ip6_route_info_create(cfg);
2086        if (IS_ERR(rt)) {
2087                err = PTR_ERR(rt);
2088                rt = NULL;
2089                goto out;
2090        }
2091
2092        err = ip6_convert_metrics(&mxc, cfg);
2093        if (err)
2094                goto out;
2095
2096        err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2097
2098        kfree(mxc.mx);
2099
2100        return err;
2101out:
2102        if (rt)
2103                dst_free(&rt->dst);
2104
2105        return err;
2106}
2107
2108static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2109{
2110        int err;
2111        struct fib6_table *table;
2112        struct net *net = dev_net(rt->dst.dev);
2113
2114        if (rt == net->ipv6.ip6_null_entry ||
2115            rt->dst.flags & DST_NOCACHE) {
2116                err = -ENOENT;
2117                goto out;
2118        }
2119
2120        table = rt->rt6i_table;
2121        write_lock_bh(&table->tb6_lock);
2122        err = fib6_del(rt, info);
2123        write_unlock_bh(&table->tb6_lock);
2124
2125out:
2126        ip6_rt_put(rt);
2127        return err;
2128}
2129
2130int ip6_del_rt(struct rt6_info *rt)
2131{
2132        struct nl_info info = {
2133                .nl_net = dev_net(rt->dst.dev),
2134        };
2135        return __ip6_del_rt(rt, &info);
2136}
2137
2138static int ip6_route_del(struct fib6_config *cfg)
2139{
2140        struct fib6_table *table;
2141        struct fib6_node *fn;
2142        struct rt6_info *rt;
2143        int err = -ESRCH;
2144
2145        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2146        if (!table)
2147                return err;
2148
2149        read_lock_bh(&table->tb6_lock);
2150
2151        fn = fib6_locate(&table->tb6_root,
2152                         &cfg->fc_dst, cfg->fc_dst_len,
2153                         &cfg->fc_src, cfg->fc_src_len);
2154
2155        if (fn) {
2156                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2157                        if ((rt->rt6i_flags & RTF_CACHE) &&
2158                            !(cfg->fc_flags & RTF_CACHE))
2159                                continue;
2160                        if (cfg->fc_ifindex &&
2161                            (!rt->dst.dev ||
2162                             rt->dst.dev->ifindex != cfg->fc_ifindex))
2163                                continue;
2164                        if (cfg->fc_flags & RTF_GATEWAY &&
2165                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2166                                continue;
2167                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2168                                continue;
2169                        dst_hold(&rt->dst);
2170                        read_unlock_bh(&table->tb6_lock);
2171
2172                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2173                }
2174        }
2175        read_unlock_bh(&table->tb6_lock);
2176
2177        return err;
2178}
2179
2180static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2181{
2182        struct netevent_redirect netevent;
2183        struct rt6_info *rt, *nrt = NULL;
2184        struct ndisc_options ndopts;
2185        struct inet6_dev *in6_dev;
2186        struct neighbour *neigh;
2187        struct rd_msg *msg;
2188        int optlen, on_link;
2189        u8 *lladdr;
2190
2191        optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2192        optlen -= sizeof(*msg);
2193
2194        if (optlen < 0) {
2195                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2196                return;
2197        }
2198
2199        msg = (struct rd_msg *)icmp6_hdr(skb);
2200
2201        if (ipv6_addr_is_multicast(&msg->dest)) {
2202                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2203                return;
2204        }
2205
2206        on_link = 0;
2207        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2208                on_link = 1;
2209        } else if (ipv6_addr_type(&msg->target) !=
2210                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2211                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2212                return;
2213        }
2214
2215        in6_dev = __in6_dev_get(skb->dev);
2216        if (!in6_dev)
2217                return;
2218        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2219                return;
2220
2221        /* RFC2461 8.1:
2222         *      The IP source address of the Redirect MUST be the same as the current
2223         *      first-hop router for the specified ICMP Destination Address.
2224         */
2225
2226        if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2227                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2228                return;
2229        }
2230
2231        lladdr = NULL;
2232        if (ndopts.nd_opts_tgt_lladdr) {
2233                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2234                                             skb->dev);
2235                if (!lladdr) {
2236                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2237                        return;
2238                }
2239        }
2240
2241        rt = (struct rt6_info *) dst;
2242        if (rt->rt6i_flags & RTF_REJECT) {
2243                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2244                return;
2245        }
2246
2247        /* Redirect received -> path was valid.
2248         * Look, redirects are sent only in response to data packets,
2249         * so that this nexthop apparently is reachable. --ANK
2250         */
2251        dst_confirm(&rt->dst);
2252
2253        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2254        if (!neigh)
2255                return;
2256
2257        /*
2258         *      We have finally decided to accept it.
2259         */
2260
2261        ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2262                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
2263                     NEIGH_UPDATE_F_OVERRIDE|
2264                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2265                                     NEIGH_UPDATE_F_ISROUTER)),
2266                     NDISC_REDIRECT, &ndopts);
2267
2268        nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2269        if (!nrt)
2270                goto out;
2271
2272        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2273        if (on_link)
2274                nrt->rt6i_flags &= ~RTF_GATEWAY;
2275
2276        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2277
2278        if (ip6_ins_rt(nrt))
2279                goto out;
2280
2281        netevent.old = &rt->dst;
2282        netevent.new = &nrt->dst;
2283        netevent.daddr = &msg->dest;
2284        netevent.neigh = neigh;
2285        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2286
2287        if (rt->rt6i_flags & RTF_CACHE) {
2288                rt = (struct rt6_info *) dst_clone(&rt->dst);
2289                ip6_del_rt(rt);
2290        }
2291
2292out:
2293        neigh_release(neigh);
2294}
2295
2296/*
2297 *      Misc support functions
2298 */
2299
2300static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2301{
2302        BUG_ON(from->dst.from);
2303
2304        rt->rt6i_flags &= ~RTF_EXPIRES;
2305        dst_hold(&from->dst);
2306        rt->dst.from = &from->dst;
2307        dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2308}
2309
2310static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2311{
2312        rt->dst.input = ort->dst.input;
2313        rt->dst.output = ort->dst.output;
2314        rt->rt6i_dst = ort->rt6i_dst;
2315        rt->dst.error = ort->dst.error;
2316        rt->rt6i_idev = ort->rt6i_idev;
2317        if (rt->rt6i_idev)
2318                in6_dev_hold(rt->rt6i_idev);
2319        rt->dst.lastuse = jiffies;
2320        rt->rt6i_gateway = ort->rt6i_gateway;
2321        rt->rt6i_flags = ort->rt6i_flags;
2322        rt6_set_from(rt, ort);
2323        rt->rt6i_metric = ort->rt6i_metric;
2324#ifdef CONFIG_IPV6_SUBTREES
2325        rt->rt6i_src = ort->rt6i_src;
2326#endif
2327        rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2328        rt->rt6i_table = ort->rt6i_table;
2329        rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2330}
2331
2332#ifdef CONFIG_IPV6_ROUTE_INFO
2333static struct rt6_info *rt6_get_route_info(struct net *net,
2334                                           const struct in6_addr *prefix, int prefixlen,
2335                                           const struct in6_addr *gwaddr,
2336                                           struct net_device *dev)
2337{
2338        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2339        int ifindex = dev->ifindex;
2340        struct fib6_node *fn;
2341        struct rt6_info *rt = NULL;
2342        struct fib6_table *table;
2343
2344        table = fib6_get_table(net, tb_id);
2345        if (!table)
2346                return NULL;
2347
2348        read_lock_bh(&table->tb6_lock);
2349        fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2350        if (!fn)
2351                goto out;
2352
2353        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2354                if (rt->dst.dev->ifindex != ifindex)
2355                        continue;
2356                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2357                        continue;
2358                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2359                        continue;
2360                dst_hold(&rt->dst);
2361                break;
2362        }
2363out:
2364        read_unlock_bh(&table->tb6_lock);
2365        return rt;
2366}
2367
2368static struct rt6_info *rt6_add_route_info(struct net *net,
2369                                           const struct in6_addr *prefix, int prefixlen,
2370                                           const struct in6_addr *gwaddr,
2371                                           struct net_device *dev,
2372                                           unsigned int pref)
2373{
2374        struct fib6_config cfg = {
2375                .fc_metric      = IP6_RT_PRIO_USER,
2376                .fc_ifindex     = dev->ifindex,
2377                .fc_dst_len     = prefixlen,
2378                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2379                                  RTF_UP | RTF_PREF(pref),
2380                .fc_nlinfo.portid = 0,
2381                .fc_nlinfo.nlh = NULL,
2382                .fc_nlinfo.nl_net = net,
2383        };
2384
2385        cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2386        cfg.fc_dst = *prefix;
2387        cfg.fc_gateway = *gwaddr;
2388
2389        /* We should treat it as a default route if prefix length is 0. */
2390        if (!prefixlen)
2391                cfg.fc_flags |= RTF_DEFAULT;
2392
2393        ip6_route_add(&cfg);
2394
2395        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2396}
2397#endif
2398
2399struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2400{
2401        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2402        struct rt6_info *rt;
2403        struct fib6_table *table;
2404
2405        table = fib6_get_table(dev_net(dev), tb_id);
2406        if (!table)
2407                return NULL;
2408
2409        read_lock_bh(&table->tb6_lock);
2410        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2411                if (dev == rt->dst.dev &&
2412                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2413                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
2414                        break;
2415        }
2416        if (rt)
2417                dst_hold(&rt->dst);
2418        read_unlock_bh(&table->tb6_lock);
2419        return rt;
2420}
2421
2422struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2423                                     struct net_device *dev,
2424                                     unsigned int pref)
2425{
2426        struct fib6_config cfg = {
2427                .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2428                .fc_metric      = IP6_RT_PRIO_USER,
2429                .fc_ifindex     = dev->ifindex,
2430                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2431                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2432                .fc_nlinfo.portid = 0,
2433                .fc_nlinfo.nlh = NULL,
2434                .fc_nlinfo.nl_net = dev_net(dev),
2435        };
2436
2437        cfg.fc_gateway = *gwaddr;
2438
2439        if (!ip6_route_add(&cfg)) {
2440                struct fib6_table *table;
2441
2442                table = fib6_get_table(dev_net(dev), cfg.fc_table);
2443                if (table)
2444                        table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2445        }
2446
2447        return rt6_get_dflt_router(gwaddr, dev);
2448}
2449
2450static void __rt6_purge_dflt_routers(struct fib6_table *table)
2451{
2452        struct rt6_info *rt;
2453
2454restart:
2455        read_lock_bh(&table->tb6_lock);
2456        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2457                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2458                    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2459                        dst_hold(&rt->dst);
2460                        read_unlock_bh(&table->tb6_lock);
2461                        ip6_del_rt(rt);
2462                        goto restart;
2463                }
2464        }
2465        read_unlock_bh(&table->tb6_lock);
2466
2467        table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2468}
2469
2470void rt6_purge_dflt_routers(struct net *net)
2471{
2472        struct fib6_table *table;
2473        struct hlist_head *head;
2474        unsigned int h;
2475
2476        rcu_read_lock();
2477
2478        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2479                head = &net->ipv6.fib_table_hash[h];
2480                hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2481                        if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2482                                __rt6_purge_dflt_routers(table);
2483                }
2484        }
2485
2486        rcu_read_unlock();
2487}
2488
2489static void rtmsg_to_fib6_config(struct net *net,
2490                                 struct in6_rtmsg *rtmsg,
2491                                 struct fib6_config *cfg)
2492{
2493        memset(cfg, 0, sizeof(*cfg));
2494
2495        cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2496                         : RT6_TABLE_MAIN;
2497        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2498        cfg->fc_metric = rtmsg->rtmsg_metric;
2499        cfg->fc_expires = rtmsg->rtmsg_info;
2500        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2501        cfg->fc_src_len = rtmsg->rtmsg_src_len;
2502        cfg->fc_flags = rtmsg->rtmsg_flags;
2503
2504        cfg->fc_nlinfo.nl_net = net;
2505
2506        cfg->fc_dst = rtmsg->rtmsg_dst;
2507        cfg->fc_src = rtmsg->rtmsg_src;
2508        cfg->fc_gateway = rtmsg->rtmsg_gateway;
2509}
2510
2511int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2512{
2513        struct fib6_config cfg;
2514        struct in6_rtmsg rtmsg;
2515        int err;
2516
2517        switch (cmd) {
2518        case SIOCADDRT:         /* Add a route */
2519        case SIOCDELRT:         /* Delete a route */
2520                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2521                        return -EPERM;
2522                err = copy_from_user(&rtmsg, arg,
2523                                     sizeof(struct in6_rtmsg));
2524                if (err)
2525                        return -EFAULT;
2526
2527                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2528
2529                rtnl_lock();
2530                switch (cmd) {
2531                case SIOCADDRT:
2532                        err = ip6_route_add(&cfg);
2533                        break;
2534                case SIOCDELRT:
2535                        err = ip6_route_del(&cfg);
2536                        break;
2537                default:
2538                        err = -EINVAL;
2539                }
2540                rtnl_unlock();
2541
2542                return err;
2543        }
2544
2545        return -EINVAL;
2546}
2547
2548/*
2549 *      Drop the packet on the floor
2550 */
2551
2552static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2553{
2554        int type;
2555        struct dst_entry *dst = skb_dst(skb);
2556        switch (ipstats_mib_noroutes) {
2557        case IPSTATS_MIB_INNOROUTES:
2558                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2559                if (type == IPV6_ADDR_ANY) {
2560                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2561                                      IPSTATS_MIB_INADDRERRORS);
2562                        break;
2563                }
2564                /* FALLTHROUGH */
2565        case IPSTATS_MIB_OUTNOROUTES:
2566                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2567                              ipstats_mib_noroutes);
2568                break;
2569        }
2570        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2571        kfree_skb(skb);
2572        return 0;
2573}
2574
2575static int ip6_pkt_discard(struct sk_buff *skb)
2576{
2577        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2578}
2579
2580static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2581{
2582        skb->dev = skb_dst(skb)->dev;
2583        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2584}
2585
2586static int ip6_pkt_prohibit(struct sk_buff *skb)
2587{
2588        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2589}
2590
2591static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2592{
2593        skb->dev = skb_dst(skb)->dev;
2594        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2595}
2596
2597/*
2598 *      Allocate a dst for local (unicast / anycast) address.
2599 */
2600
2601struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2602                                    const struct in6_addr *addr,
2603                                    bool anycast)
2604{
2605        u32 tb_id;
2606        struct net *net = dev_net(idev->dev);
2607        struct net_device *dev = net->loopback_dev;
2608        struct rt6_info *rt;
2609
2610        /* use L3 Master device as loopback for host routes if device
2611         * is enslaved and address is not link local or multicast
2612         */
2613        if (!rt6_need_strict(addr))
2614                dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2615
2616        rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2617        if (!rt)
2618                return ERR_PTR(-ENOMEM);
2619
2620        in6_dev_hold(idev);
2621
2622        rt->dst.flags |= DST_HOST;
2623        rt->dst.input = ip6_input;
2624        rt->dst.output = ip6_output;
2625        rt->rt6i_idev = idev;
2626
2627        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2628        if (anycast)
2629                rt->rt6i_flags |= RTF_ANYCAST;
2630        else
2631                rt->rt6i_flags |= RTF_LOCAL;
2632
2633        rt->rt6i_gateway  = *addr;
2634        rt->rt6i_dst.addr = *addr;
2635        rt->rt6i_dst.plen = 128;
2636        tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2637        rt->rt6i_table = fib6_get_table(net, tb_id);
2638        rt->dst.flags |= DST_NOCACHE;
2639
2640        atomic_set(&rt->dst.__refcnt, 1);
2641
2642        return rt;
2643}
2644
2645/* remove deleted ip from prefsrc entries */
2646struct arg_dev_net_ip {
2647        struct net_device *dev;
2648        struct net *net;
2649        struct in6_addr *addr;
2650};
2651
2652static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2653{
2654        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2655        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2656        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2657
2658        if (((void *)rt->dst.dev == dev || !dev) &&
2659            rt != net->ipv6.ip6_null_entry &&
2660            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2661                /* remove prefsrc entry */
2662                rt->rt6i_prefsrc.plen = 0;
2663        }
2664        return 0;
2665}
2666
2667void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2668{
2669        struct net *net = dev_net(ifp->idev->dev);
2670        struct arg_dev_net_ip adni = {
2671                .dev = ifp->idev->dev,
2672                .net = net,
2673                .addr = &ifp->addr,
2674        };
2675        fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2676}
2677
2678#define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2679#define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2680
2681/* Remove routers and update dst entries when gateway turn into host. */
2682static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2683{
2684        struct in6_addr *gateway = (struct in6_addr *)arg;
2685
2686        if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2687             ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2688             ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2689                return -1;
2690        }
2691        return 0;
2692}
2693
2694void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2695{
2696        fib6_clean_all(net, fib6_clean_tohost, gateway);
2697}
2698
2699struct arg_dev_net {
2700        struct net_device *dev;
2701        struct net *net;
2702};
2703
2704static int fib6_ifdown(struct rt6_info *rt, void *arg)
2705{
2706        const struct arg_dev_net *adn = arg;
2707        const struct net_device *dev = adn->dev;
2708
2709        if ((rt->dst.dev == dev || !dev) &&
2710            rt != adn->net->ipv6.ip6_null_entry)
2711                return -1;
2712
2713        return 0;
2714}
2715
2716void rt6_ifdown(struct net *net, struct net_device *dev)
2717{
2718        struct arg_dev_net adn = {
2719                .dev = dev,
2720                .net = net,
2721        };
2722
2723        fib6_clean_all(net, fib6_ifdown, &adn);
2724        icmp6_clean_all(fib6_ifdown, &adn);
2725        if (dev)
2726                rt6_uncached_list_flush_dev(net, dev);
2727}
2728
2729struct rt6_mtu_change_arg {
2730        struct net_device *dev;
2731        unsigned int mtu;
2732};
2733
2734static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2735{
2736        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2737        struct inet6_dev *idev;
2738
2739        /* In IPv6 pmtu discovery is not optional,
2740           so that RTAX_MTU lock cannot disable it.
2741           We still use this lock to block changes
2742           caused by addrconf/ndisc.
2743        */
2744
2745        idev = __in6_dev_get(arg->dev);
2746        if (!idev)
2747                return 0;
2748
2749        /* For administrative MTU increase, there is no way to discover
2750           IPv6 PMTU increase, so PMTU increase should be updated here.
2751           Since RFC 1981 doesn't include administrative MTU increase
2752           update PMTU increase is a MUST. (i.e. jumbo frame)
2753         */
2754        /*
2755           If new MTU is less than route PMTU, this new MTU will be the
2756           lowest MTU in the path, update the route PMTU to reflect PMTU
2757           decreases; if new MTU is greater than route PMTU, and the
2758           old MTU is the lowest MTU in the path, update the route PMTU
2759           to reflect the increase. In this case if the other nodes' MTU
2760           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2761           PMTU discouvery.
2762         */
2763        if (rt->dst.dev == arg->dev &&
2764            dst_metric_raw(&rt->dst, RTAX_MTU) &&
2765            !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2766                if (rt->rt6i_flags & RTF_CACHE) {
2767                        /* For RTF_CACHE with rt6i_pmtu == 0
2768                         * (i.e. a redirected route),
2769                         * the metrics of its rt->dst.from has already
2770                         * been updated.
2771                         */
2772                        if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2773                                rt->rt6i_pmtu = arg->mtu;
2774                } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2775                           (dst_mtu(&rt->dst) < arg->mtu &&
2776                            dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2777                        dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2778                }
2779        }
2780        return 0;
2781}
2782
2783void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2784{
2785        struct rt6_mtu_change_arg arg = {
2786                .dev = dev,
2787                .mtu = mtu,
2788        };
2789
2790        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2791}
2792
2793static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2794        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2795        [RTA_OIF]               = { .type = NLA_U32 },
2796        [RTA_IIF]               = { .type = NLA_U32 },
2797        [RTA_PRIORITY]          = { .type = NLA_U32 },
2798        [RTA_METRICS]           = { .type = NLA_NESTED },
2799        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2800        [RTA_PREF]              = { .type = NLA_U8 },
2801        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2802        [RTA_ENCAP]             = { .type = NLA_NESTED },
2803        [RTA_EXPIRES]           = { .type = NLA_U32 },
2804};
2805
2806static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2807                              struct fib6_config *cfg)
2808{
2809        struct rtmsg *rtm;
2810        struct nlattr *tb[RTA_MAX+1];
2811        unsigned int pref;
2812        int err;
2813
2814        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2815        if (err < 0)
2816                goto errout;
2817
2818        err = -EINVAL;
2819        rtm = nlmsg_data(nlh);
2820        memset(cfg, 0, sizeof(*cfg));
2821
2822        cfg->fc_table = rtm->rtm_table;
2823        cfg->fc_dst_len = rtm->rtm_dst_len;
2824        cfg->fc_src_len = rtm->rtm_src_len;
2825        cfg->fc_flags = RTF_UP;
2826        cfg->fc_protocol = rtm->rtm_protocol;
2827        cfg->fc_type = rtm->rtm_type;
2828
2829        if (rtm->rtm_type == RTN_UNREACHABLE ||
2830            rtm->rtm_type == RTN_BLACKHOLE ||
2831            rtm->rtm_type == RTN_PROHIBIT ||
2832            rtm->rtm_type == RTN_THROW)
2833                cfg->fc_flags |= RTF_REJECT;
2834
2835        if (rtm->rtm_type == RTN_LOCAL)
2836                cfg->fc_flags |= RTF_LOCAL;
2837
2838        if (rtm->rtm_flags & RTM_F_CLONED)
2839                cfg->fc_flags |= RTF_CACHE;
2840
2841        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2842        cfg->fc_nlinfo.nlh = nlh;
2843        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2844
2845        if (tb[RTA_GATEWAY]) {
2846                cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2847                cfg->fc_flags |= RTF_GATEWAY;
2848        }
2849
2850        if (tb[RTA_DST]) {
2851                int plen = (rtm->rtm_dst_len + 7) >> 3;
2852
2853                if (nla_len(tb[RTA_DST]) < plen)
2854                        goto errout;
2855
2856                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2857        }
2858
2859        if (tb[RTA_SRC]) {
2860                int plen = (rtm->rtm_src_len + 7) >> 3;
2861
2862                if (nla_len(tb[RTA_SRC]) < plen)
2863                        goto errout;
2864
2865                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2866        }
2867
2868        if (tb[RTA_PREFSRC])
2869                cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2870
2871        if (tb[RTA_OIF])
2872                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2873
2874        if (tb[RTA_PRIORITY])
2875                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2876
2877        if (tb[RTA_METRICS]) {
2878                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2879                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2880        }
2881
2882        if (tb[RTA_TABLE])
2883                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2884
2885        if (tb[RTA_MULTIPATH]) {
2886                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2887                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2888        }
2889
2890        if (tb[RTA_PREF]) {
2891                pref = nla_get_u8(tb[RTA_PREF]);
2892                if (pref != ICMPV6_ROUTER_PREF_LOW &&
2893                    pref != ICMPV6_ROUTER_PREF_HIGH)
2894                        pref = ICMPV6_ROUTER_PREF_MEDIUM;
2895                cfg->fc_flags |= RTF_PREF(pref);
2896        }
2897
2898        if (tb[RTA_ENCAP])
2899                cfg->fc_encap = tb[RTA_ENCAP];
2900
2901        if (tb[RTA_ENCAP_TYPE])
2902                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2903
2904        if (tb[RTA_EXPIRES]) {
2905                unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2906
2907                if (addrconf_finite_timeout(timeout)) {
2908                        cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2909                        cfg->fc_flags |= RTF_EXPIRES;
2910                }
2911        }
2912
2913        err = 0;
2914errout:
2915        return err;
2916}
2917
2918struct rt6_nh {
2919        struct rt6_info *rt6_info;
2920        struct fib6_config r_cfg;
2921        struct mx6_config mxc;
2922        struct list_head next;
2923};
2924
2925static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2926{
2927        struct rt6_nh *nh;
2928
2929        list_for_each_entry(nh, rt6_nh_list, next) {
2930                pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2931                        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2932                        nh->r_cfg.fc_ifindex);
2933        }
2934}
2935
2936static int ip6_route_info_append(struct list_head *rt6_nh_list,
2937                                 struct rt6_info *rt, struct fib6_config *r_cfg)
2938{
2939        struct rt6_nh *nh;
2940        struct rt6_info *rtnh;
2941        int err = -EEXIST;
2942
2943        list_for_each_entry(nh, rt6_nh_list, next) {
2944                /* check if rt6_info already exists */
2945                rtnh = nh->rt6_info;
2946
2947                if (rtnh->dst.dev == rt->dst.dev &&
2948                    rtnh->rt6i_idev == rt->rt6i_idev &&
2949                    ipv6_addr_equal(&rtnh->rt6i_gateway,
2950                                    &rt->rt6i_gateway))
2951                        return err;
2952        }
2953
2954        nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2955        if (!nh)
2956                return -ENOMEM;
2957        nh->rt6_info = rt;
2958        err = ip6_convert_metrics(&nh->mxc, r_cfg);
2959        if (err) {
2960                kfree(nh);
2961                return err;
2962        }
2963        memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2964        list_add_tail(&nh->next, rt6_nh_list);
2965
2966        return 0;
2967}
2968
2969static int ip6_route_multipath_add(struct fib6_config *cfg)
2970{
2971        struct fib6_config r_cfg;
2972        struct rtnexthop *rtnh;
2973        struct rt6_info *rt;
2974        struct rt6_nh *err_nh;
2975        struct rt6_nh *nh, *nh_safe;
2976        int remaining;
2977        int attrlen;
2978        int err = 1;
2979        int nhn = 0;
2980        int replace = (cfg->fc_nlinfo.nlh &&
2981                       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2982        LIST_HEAD(rt6_nh_list);
2983
2984        remaining = cfg->fc_mp_len;
2985        rtnh = (struct rtnexthop *)cfg->fc_mp;
2986
2987        /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2988         * rt6_info structs per nexthop
2989         */
2990        while (rtnh_ok(rtnh, remaining)) {
2991                memcpy(&r_cfg, cfg, sizeof(*cfg));
2992                if (rtnh->rtnh_ifindex)
2993                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2994
2995                attrlen = rtnh_attrlen(rtnh);
2996                if (attrlen > 0) {
2997                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2998
2999                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3000                        if (nla) {
3001                                r_cfg.fc_gateway = nla_get_in6_addr(nla);
3002                                r_cfg.fc_flags |= RTF_GATEWAY;
3003                        }
3004                        r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3005                        nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3006                        if (nla)
3007                                r_cfg.fc_encap_type = nla_get_u16(nla);
3008                }
3009
3010                rt = ip6_route_info_create(&r_cfg);
3011                if (IS_ERR(rt)) {
3012                        err = PTR_ERR(rt);
3013                        rt = NULL;
3014                        goto cleanup;
3015                }
3016
3017                err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3018                if (err) {
3019                        dst_free(&rt->dst);
3020                        goto cleanup;
3021                }
3022
3023                rtnh = rtnh_next(rtnh, &remaining);
3024        }
3025
3026        err_nh = NULL;
3027        list_for_each_entry(nh, &rt6_nh_list, next) {
3028                err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3029                /* nh->rt6_info is used or freed at this point, reset to NULL*/
3030                nh->rt6_info = NULL;
3031                if (err) {
3032                        if (replace && nhn)
3033                                ip6_print_replace_route_err(&rt6_nh_list);
3034                        err_nh = nh;
3035                        goto add_errout;
3036                }
3037
3038                /* Because each route is added like a single route we remove
3039                 * these flags after the first nexthop: if there is a collision,
3040                 * we have already failed to add the first nexthop:
3041                 * fib6_add_rt2node() has rejected it; when replacing, old
3042                 * nexthops have been replaced by first new, the rest should
3043                 * be added to it.
3044                 */
3045                cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3046                                                     NLM_F_REPLACE);
3047                nhn++;
3048        }
3049
3050        goto cleanup;
3051
3052add_errout:
3053        /* Delete routes that were already added */
3054        list_for_each_entry(nh, &rt6_nh_list, next) {
3055                if (err_nh == nh)
3056                        break;
3057                ip6_route_del(&nh->r_cfg);
3058        }
3059
3060cleanup:
3061        list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3062                if (nh->rt6_info)
3063                        dst_free(&nh->rt6_info->dst);
3064                kfree(nh->mxc.mx);
3065                list_del(&nh->next);
3066                kfree(nh);
3067        }
3068
3069        return err;
3070}
3071
3072static int ip6_route_multipath_del(struct fib6_config *cfg)
3073{
3074        struct fib6_config r_cfg;
3075        struct rtnexthop *rtnh;
3076        int remaining;
3077        int attrlen;
3078        int err = 1, last_err = 0;
3079
3080        remaining = cfg->fc_mp_len;
3081        rtnh = (struct rtnexthop *)cfg->fc_mp;
3082
3083        /* Parse a Multipath Entry */
3084        while (rtnh_ok(rtnh, remaining)) {
3085                memcpy(&r_cfg, cfg, sizeof(*cfg));
3086                if (rtnh->rtnh_ifindex)
3087                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3088
3089                attrlen = rtnh_attrlen(rtnh);
3090                if (attrlen > 0) {
3091                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3092
3093                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3094                        if (nla) {
3095                                nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3096                                r_cfg.fc_flags |= RTF_GATEWAY;
3097                        }
3098                }
3099                err = ip6_route_del(&r_cfg);
3100                if (err)
3101                        last_err = err;
3102
3103                rtnh = rtnh_next(rtnh, &remaining);
3104        }
3105
3106        return last_err;
3107}
3108
3109static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3110{
3111        struct fib6_config cfg;
3112        int err;
3113
3114        err = rtm_to_fib6_config(skb, nlh, &cfg);
3115        if (err < 0)
3116                return err;
3117
3118        if (cfg.fc_mp)
3119                return ip6_route_multipath_del(&cfg);
3120        else
3121                return ip6_route_del(&cfg);
3122}
3123
3124static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3125{
3126        struct fib6_config cfg;
3127        int err;
3128
3129        err = rtm_to_fib6_config(skb, nlh, &cfg);
3130        if (err < 0)
3131                return err;
3132
3133        if (cfg.fc_mp)
3134                return ip6_route_multipath_add(&cfg);
3135        else
3136                return ip6_route_add(&cfg);
3137}
3138
3139static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3140{
3141        return NLMSG_ALIGN(sizeof(struct rtmsg))
3142               + nla_total_size(16) /* RTA_SRC */
3143               + nla_total_size(16) /* RTA_DST */
3144               + nla_total_size(16) /* RTA_GATEWAY */
3145               + nla_total_size(16) /* RTA_PREFSRC */
3146               + nla_total_size(4) /* RTA_TABLE */
3147               + nla_total_size(4) /* RTA_IIF */
3148               + nla_total_size(4) /* RTA_OIF */
3149               + nla_total_size(4) /* RTA_PRIORITY */
3150               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3151               + nla_total_size(sizeof(struct rta_cacheinfo))
3152               + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3153               + nla_total_size(1) /* RTA_PREF */
3154               + lwtunnel_get_encap_size(rt->dst.lwtstate);
3155}
3156
3157static int rt6_fill_node(struct net *net,
3158                         struct sk_buff *skb, struct rt6_info *rt,
3159                         struct in6_addr *dst, struct in6_addr *src,
3160                         int iif, int type, u32 portid, u32 seq,
3161                         int prefix, int nowait, unsigned int flags)
3162{
3163        u32 metrics[RTAX_MAX];
3164        struct rtmsg *rtm;
3165        struct nlmsghdr *nlh;
3166        long expires;
3167        u32 table;
3168
3169        if (prefix) {   /* user wants prefix routes only */
3170                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3171                        /* success since this is not a prefix route */
3172                        return 1;
3173                }
3174        }
3175
3176        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3177        if (!nlh)
3178                return -EMSGSIZE;
3179
3180        rtm = nlmsg_data(nlh);
3181        rtm->rtm_family = AF_INET6;
3182        rtm->rtm_dst_len = rt->rt6i_dst.plen;
3183        rtm->rtm_src_len = rt->rt6i_src.plen;
3184        rtm->rtm_tos = 0;
3185        if (rt->rt6i_table)
3186                table = rt->rt6i_table->tb6_id;
3187        else
3188                table = RT6_TABLE_UNSPEC;
3189        rtm->rtm_table = table;
3190        if (nla_put_u32(skb, RTA_TABLE, table))
3191                goto nla_put_failure;
3192        if (rt->rt6i_flags & RTF_REJECT) {
3193                switch (rt->dst.error) {
3194                case -EINVAL:
3195                        rtm->rtm_type = RTN_BLACKHOLE;
3196                        break;
3197                case -EACCES:
3198                        rtm->rtm_type = RTN_PROHIBIT;
3199                        break;
3200                case -EAGAIN:
3201                        rtm->rtm_type = RTN_THROW;
3202                        break;
3203                default:
3204                        rtm->rtm_type = RTN_UNREACHABLE;
3205                        break;
3206                }
3207        }
3208        else if (rt->rt6i_flags & RTF_LOCAL)
3209                rtm->rtm_type = RTN_LOCAL;
3210        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3211                rtm->rtm_type = RTN_LOCAL;
3212        else
3213                rtm->rtm_type = RTN_UNICAST;
3214        rtm->rtm_flags = 0;
3215        if (!netif_carrier_ok(rt->dst.dev)) {
3216                rtm->rtm_flags |= RTNH_F_LINKDOWN;
3217                if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3218                        rtm->rtm_flags |= RTNH_F_DEAD;
3219        }
3220        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3221        rtm->rtm_protocol = rt->rt6i_protocol;
3222        if (rt->rt6i_flags & RTF_DYNAMIC)
3223                rtm->rtm_protocol = RTPROT_REDIRECT;
3224        else if (rt->rt6i_flags & RTF_ADDRCONF) {
3225                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3226                        rtm->rtm_protocol = RTPROT_RA;
3227                else
3228                        rtm->rtm_protocol = RTPROT_KERNEL;
3229        }
3230
3231        if (rt->rt6i_flags & RTF_CACHE)
3232                rtm->rtm_flags |= RTM_F_CLONED;
3233
3234        if (dst) {
3235                if (nla_put_in6_addr(skb, RTA_DST, dst))
3236                        goto nla_put_failure;
3237                rtm->rtm_dst_len = 128;
3238        } else if (rtm->rtm_dst_len)
3239                if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3240                        goto nla_put_failure;
3241#ifdef CONFIG_IPV6_SUBTREES
3242        if (src) {
3243                if (nla_put_in6_addr(skb, RTA_SRC, src))
3244                        goto nla_put_failure;
3245                rtm->rtm_src_len = 128;
3246        } else if (rtm->rtm_src_len &&
3247                   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3248                goto nla_put_failure;
3249#endif
3250        if (iif) {
3251#ifdef CONFIG_IPV6_MROUTE
3252                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3253                        int err = ip6mr_get_route(net, skb, rtm, nowait,
3254                                                  portid);
3255
3256                        if (err <= 0) {
3257                                if (!nowait) {
3258                                        if (err == 0)
3259                                                return 0;
3260                                        goto nla_put_failure;
3261                                } else {
3262                                        if (err == -EMSGSIZE)
3263                                                goto nla_put_failure;
3264                                }
3265                        }
3266                } else
3267#endif
3268                        if (nla_put_u32(skb, RTA_IIF, iif))
3269                                goto nla_put_failure;
3270        } else if (dst) {
3271                struct in6_addr saddr_buf;
3272                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3273                    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3274                        goto nla_put_failure;
3275        }
3276
3277        if (rt->rt6i_prefsrc.plen) {
3278                struct in6_addr saddr_buf;
3279                saddr_buf = rt->rt6i_prefsrc.addr;
3280                if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3281                        goto nla_put_failure;
3282        }
3283
3284        memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3285        if (rt->rt6i_pmtu)
3286                metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3287        if (rtnetlink_put_metrics(skb, metrics) < 0)
3288                goto nla_put_failure;
3289
3290        if (rt->rt6i_flags & RTF_GATEWAY) {
3291                if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3292                        goto nla_put_failure;
3293        }
3294
3295        if (rt->dst.dev &&
3296            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3297                goto nla_put_failure;
3298        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3299                goto nla_put_failure;
3300
3301        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3302
3303        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3304                goto nla_put_failure;
3305
3306        if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3307                goto nla_put_failure;
3308
3309        lwtunnel_fill_encap(skb, rt->dst.lwtstate);
3310
3311        nlmsg_end(skb, nlh);
3312        return 0;
3313
3314nla_put_failure:
3315        nlmsg_cancel(skb, nlh);
3316        return -EMSGSIZE;
3317}
3318
3319int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3320{
3321        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3322        int prefix;
3323
3324        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3325                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3326                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3327        } else
3328                prefix = 0;
3329
3330        return rt6_fill_node(arg->net,
3331                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3332                     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3333                     prefix, 0, NLM_F_MULTI);
3334}
3335
3336static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3337{
3338        struct net *net = sock_net(in_skb->sk);
3339        struct nlattr *tb[RTA_MAX+1];
3340        struct rt6_info *rt;
3341        struct sk_buff *skb;
3342        struct rtmsg *rtm;
3343        struct flowi6 fl6;
3344        int err, iif = 0, oif = 0;
3345
3346        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3347        if (err < 0)
3348                goto errout;
3349
3350        err = -EINVAL;
3351        memset(&fl6, 0, sizeof(fl6));
3352        rtm = nlmsg_data(nlh);
3353        fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3354
3355        if (tb[RTA_SRC]) {
3356                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3357                        goto errout;
3358
3359                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3360        }
3361
3362        if (tb[RTA_DST]) {
3363                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3364                        goto errout;
3365
3366                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3367        }
3368
3369        if (tb[RTA_IIF])
3370                iif = nla_get_u32(tb[RTA_IIF]);
3371
3372        if (tb[RTA_OIF])
3373                oif = nla_get_u32(tb[RTA_OIF]);
3374
3375        if (tb[RTA_MARK])
3376                fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3377
3378        if (iif) {
3379                struct net_device *dev;
3380                int flags = 0;
3381
3382                dev = __dev_get_by_index(net, iif);
3383                if (!dev) {
3384                        err = -ENODEV;
3385                        goto errout;
3386                }
3387
3388                fl6.flowi6_iif = iif;
3389
3390                if (!ipv6_addr_any(&fl6.saddr))
3391                        flags |= RT6_LOOKUP_F_HAS_SADDR;
3392
3393                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3394                                                               flags);
3395        } else {
3396                fl6.flowi6_oif = oif;
3397
3398                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3399        }
3400
3401        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3402        if (!skb) {
3403                ip6_rt_put(rt);
3404                err = -ENOBUFS;
3405                goto errout;
3406        }
3407
3408        /* Reserve room for dummy headers, this skb can pass
3409           through good chunk of routing engine.
3410         */
3411        skb_reset_mac_header(skb);
3412        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3413
3414        skb_dst_set(skb, &rt->dst);
3415
3416        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3417                            RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3418                            nlh->nlmsg_seq, 0, 0, 0);
3419        if (err < 0) {
3420                kfree_skb(skb);
3421                goto errout;
3422        }
3423
3424        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3425errout:
3426        return err;
3427}
3428
3429void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3430                     unsigned int nlm_flags)
3431{
3432        struct sk_buff *skb;
3433        struct net *net = info->nl_net;
3434        u32 seq;
3435        int err;
3436
3437        err = -ENOBUFS;
3438        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3439
3440        skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3441        if (!skb)
3442                goto errout;
3443
3444        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3445                                event, info->portid, seq, 0, 0, nlm_flags);
3446        if (err < 0) {
3447                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3448                WARN_ON(err == -EMSGSIZE);
3449                kfree_skb(skb);
3450                goto errout;
3451        }
3452        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3453                    info->nlh, gfp_any());
3454        return;
3455errout:
3456        if (err < 0)
3457                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3458}
3459
3460static int ip6_route_dev_notify(struct notifier_block *this,
3461                                unsigned long event, void *ptr)
3462{
3463        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3464        struct net *net = dev_net(dev);
3465
3466        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3467                net->ipv6.ip6_null_entry->dst.dev = dev;
3468                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3469#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3470                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3471                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3472                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3473                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3474#endif
3475        }
3476
3477        return NOTIFY_OK;
3478}
3479
3480/*
3481 *      /proc
3482 */
3483
3484#ifdef CONFIG_PROC_FS
3485
3486static const struct file_operations ipv6_route_proc_fops = {
3487        .owner          = THIS_MODULE,
3488        .open           = ipv6_route_open,
3489        .read           = seq_read,
3490        .llseek         = seq_lseek,
3491        .release        = seq_release_net,
3492};
3493
3494static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3495{
3496        struct net *net = (struct net *)seq->private;
3497        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3498                   net->ipv6.rt6_stats->fib_nodes,
3499                   net->ipv6.rt6_stats->fib_route_nodes,
3500                   net->ipv6.rt6_stats->fib_rt_alloc,
3501                   net->ipv6.rt6_stats->fib_rt_entries,
3502                   net->ipv6.rt6_stats->fib_rt_cache,
3503                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3504                   net->ipv6.rt6_stats->fib_discarded_routes);
3505
3506        return 0;
3507}
3508
3509static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3510{
3511        return single_open_net(inode, file, rt6_stats_seq_show);
3512}
3513
3514static const struct file_operations rt6_stats_seq_fops = {
3515        .owner   = THIS_MODULE,
3516        .open    = rt6_stats_seq_open,
3517        .read    = seq_read,
3518        .llseek  = seq_lseek,
3519        .release = single_release_net,
3520};
3521#endif  /* CONFIG_PROC_FS */
3522
3523#ifdef CONFIG_SYSCTL
3524
3525static
3526int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3527                              void __user *buffer, size_t *lenp, loff_t *ppos)
3528{
3529        struct net *net;
3530        int delay;
3531        if (!write)
3532                return -EINVAL;
3533
3534        net = (struct net *)ctl->extra1;
3535        delay = net->ipv6.sysctl.flush_delay;
3536        proc_dointvec(ctl, write, buffer, lenp, ppos);
3537        fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3538        return 0;
3539}
3540
3541struct ctl_table ipv6_route_table_template[] = {
3542        {
3543                .procname       =       "flush",
3544                .data           =       &init_net.ipv6.sysctl.flush_delay,
3545                .maxlen         =       sizeof(int),
3546                .mode           =       0200,
3547                .proc_handler   =       ipv6_sysctl_rtcache_flush
3548        },
3549        {
3550                .procname       =       "gc_thresh",
3551                .data           =       &ip6_dst_ops_template.gc_thresh,
3552                .maxlen         =       sizeof(int),
3553                .mode           =       0644,
3554                .proc_handler   =       proc_dointvec,
3555        },
3556        {
3557                .procname       =       "max_size",
3558                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3559                .maxlen         =       sizeof(int),
3560                .mode           =       0644,
3561                .proc_handler   =       proc_dointvec,
3562        },
3563        {
3564                .procname       =       "gc_min_interval",
3565                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3566                .maxlen         =       sizeof(int),
3567                .mode           =       0644,
3568                .proc_handler   =       proc_dointvec_jiffies,
3569        },
3570        {
3571                .procname       =       "gc_timeout",
3572                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3573                .maxlen         =       sizeof(int),
3574                .mode           =       0644,
3575                .proc_handler   =       proc_dointvec_jiffies,
3576        },
3577        {
3578                .procname       =       "gc_interval",
3579                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3580                .maxlen         =       sizeof(int),
3581                .mode           =       0644,
3582                .proc_handler   =       proc_dointvec_jiffies,
3583        },
3584        {
3585                .procname       =       "gc_elasticity",
3586                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3587                .maxlen         =       sizeof(int),
3588                .mode           =       0644,
3589                .proc_handler   =       proc_dointvec,
3590        },
3591        {
3592                .procname       =       "mtu_expires",
3593                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3594                .maxlen         =       sizeof(int),
3595                .mode           =       0644,
3596                .proc_handler   =       proc_dointvec_jiffies,
3597        },
3598        {
3599                .procname       =       "min_adv_mss",
3600                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3601                .maxlen         =       sizeof(int),
3602                .mode           =       0644,
3603                .proc_handler   =       proc_dointvec,
3604        },
3605        {
3606                .procname       =       "gc_min_interval_ms",
3607                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3608                .maxlen         =       sizeof(int),
3609                .mode           =       0644,
3610                .proc_handler   =       proc_dointvec_ms_jiffies,
3611        },
3612        { }
3613};
3614
3615struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3616{
3617        struct ctl_table *table;
3618
3619        table = kmemdup(ipv6_route_table_template,
3620                        sizeof(ipv6_route_table_template),
3621                        GFP_KERNEL);
3622
3623        if (table) {
3624                table[0].data = &net->ipv6.sysctl.flush_delay;
3625                table[0].extra1 = net;
3626                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3627                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3628                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3629                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3630                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3631                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3632                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3633                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3634                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3635
3636                /* Don't export sysctls to unprivileged users */
3637                if (net->user_ns != &init_user_ns)
3638                        table[0].procname = NULL;
3639        }
3640
3641        return table;
3642}
3643#endif
3644
3645static int __net_init ip6_route_net_init(struct net *net)
3646{
3647        int ret = -ENOMEM;
3648
3649        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3650               sizeof(net->ipv6.ip6_dst_ops));
3651
3652        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3653                goto out_ip6_dst_ops;
3654
3655        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3656                                           sizeof(*net->ipv6.ip6_null_entry),
3657                                           GFP_KERNEL);
3658        if (!net->ipv6.ip6_null_entry)
3659                goto out_ip6_dst_entries;
3660        net->ipv6.ip6_null_entry->dst.path =
3661                (struct dst_entry *)net->ipv6.ip6_null_entry;
3662        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3663        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3664                         ip6_template_metrics, true);
3665
3666#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3667        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3668                                               sizeof(*net->ipv6.ip6_prohibit_entry),
3669                                               GFP_KERNEL);
3670        if (!net->ipv6.ip6_prohibit_entry)
3671                goto out_ip6_null_entry;
3672        net->ipv6.ip6_prohibit_entry->dst.path =
3673                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3674        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3675        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3676                         ip6_template_metrics, true);
3677
3678        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3679                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
3680                                               GFP_KERNEL);
3681        if (!net->ipv6.ip6_blk_hole_entry)
3682                goto out_ip6_prohibit_entry;
3683        net->ipv6.ip6_blk_hole_entry->dst.path =
3684                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3685        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3686        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3687                         ip6_template_metrics, true);
3688#endif
3689
3690        net->ipv6.sysctl.flush_delay = 0;
3691        net->ipv6.sysctl.ip6_rt_max_size = 4096;
3692        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3693        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3694        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3695        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3696        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3697        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3698
3699        net->ipv6.ip6_rt_gc_expire = 30*HZ;
3700
3701        ret = 0;
3702out:
3703        return ret;
3704
3705#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3706out_ip6_prohibit_entry:
3707        kfree(net->ipv6.ip6_prohibit_entry);
3708out_ip6_null_entry:
3709        kfree(net->ipv6.ip6_null_entry);
3710#endif
3711out_ip6_dst_entries:
3712        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3713out_ip6_dst_ops:
3714        goto out;
3715}
3716
3717static void __net_exit ip6_route_net_exit(struct net *net)
3718{
3719        kfree(net->ipv6.ip6_null_entry);
3720#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3721        kfree(net->ipv6.ip6_prohibit_entry);
3722        kfree(net->ipv6.ip6_blk_hole_entry);
3723#endif
3724        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3725}
3726
3727static int __net_init ip6_route_net_init_late(struct net *net)
3728{
3729#ifdef CONFIG_PROC_FS
3730        proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3731        proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3732#endif
3733        return 0;
3734}
3735
3736static void __net_exit ip6_route_net_exit_late(struct net *net)
3737{
3738#ifdef CONFIG_PROC_FS
3739        remove_proc_entry("ipv6_route", net->proc_net);
3740        remove_proc_entry("rt6_stats", net->proc_net);
3741#endif
3742}
3743
3744static struct pernet_operations ip6_route_net_ops = {
3745        .init = ip6_route_net_init,
3746        .exit = ip6_route_net_exit,
3747};
3748
3749static int __net_init ipv6_inetpeer_init(struct net *net)
3750{
3751        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3752
3753        if (!bp)
3754                return -ENOMEM;
3755        inet_peer_base_init(bp);
3756        net->ipv6.peers = bp;
3757        return 0;
3758}
3759
3760static void __net_exit ipv6_inetpeer_exit(struct net *net)
3761{
3762        struct inet_peer_base *bp = net->ipv6.peers;
3763
3764        net->ipv6.peers = NULL;
3765        inetpeer_invalidate_tree(bp);
3766        kfree(bp);
3767}
3768
3769static struct pernet_operations ipv6_inetpeer_ops = {
3770        .init   =       ipv6_inetpeer_init,
3771        .exit   =       ipv6_inetpeer_exit,
3772};
3773
3774static struct pernet_operations ip6_route_net_late_ops = {
3775        .init = ip6_route_net_init_late,
3776        .exit = ip6_route_net_exit_late,
3777};
3778
3779static struct notifier_block ip6_route_dev_notifier = {
3780        .notifier_call = ip6_route_dev_notify,
3781        .priority = 0,
3782};
3783
3784int __init ip6_route_init(void)
3785{
3786        int ret;
3787        int cpu;
3788
3789        ret = -ENOMEM;
3790        ip6_dst_ops_template.kmem_cachep =
3791                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3792                                  SLAB_HWCACHE_ALIGN, NULL);
3793        if (!ip6_dst_ops_template.kmem_cachep)
3794                goto out;
3795
3796        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3797        if (ret)
3798                goto out_kmem_cache;
3799
3800        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3801        if (ret)
3802                goto out_dst_entries;
3803
3804        ret = register_pernet_subsys(&ip6_route_net_ops);
3805        if (ret)
3806                goto out_register_inetpeer;
3807
3808        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3809
3810        /* Registering of the loopback is done before this portion of code,
3811         * the loopback reference in rt6_info will not be taken, do it
3812         * manually for init_net */
3813        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3814        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3815  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3816        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3817        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3818        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3819        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3820  #endif
3821        ret = fib6_init();
3822        if (ret)
3823                goto out_register_subsys;
3824
3825        ret = xfrm6_init();
3826        if (ret)
3827                goto out_fib6_init;
3828
3829        ret = fib6_rules_init();
3830        if (ret)
3831                goto xfrm6_init;
3832
3833        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3834        if (ret)
3835                goto fib6_rules_init;
3836
3837        ret = -ENOBUFS;
3838        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3839            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3840            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3841                goto out_register_late_subsys;
3842
3843        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3844        if (ret)
3845                goto out_register_late_subsys;
3846
3847        for_each_possible_cpu(cpu) {
3848                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3849
3850                INIT_LIST_HEAD(&ul->head);
3851                spin_lock_init(&ul->lock);
3852        }
3853
3854out:
3855        return ret;
3856
3857out_register_late_subsys:
3858        unregister_pernet_subsys(&ip6_route_net_late_ops);
3859fib6_rules_init:
3860        fib6_rules_cleanup();
3861xfrm6_init:
3862        xfrm6_fini();
3863out_fib6_init:
3864        fib6_gc_cleanup();
3865out_register_subsys:
3866        unregister_pernet_subsys(&ip6_route_net_ops);
3867out_register_inetpeer:
3868        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3869out_dst_entries:
3870        dst_entries_destroy(&ip6_dst_blackhole_ops);
3871out_kmem_cache:
3872        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3873        goto out;
3874}
3875
3876void ip6_route_cleanup(void)
3877{
3878        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3879        unregister_pernet_subsys(&ip6_route_net_late_ops);
3880        fib6_rules_cleanup();
3881        xfrm6_fini();
3882        fib6_gc_cleanup();
3883        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3884        unregister_pernet_subsys(&ip6_route_net_ops);
3885        dst_entries_destroy(&ip6_dst_blackhole_ops);
3886        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3887}
3888