linux/net/ipv6/route.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Linux INET6 implementation
   4 *      FIB front-end.
   5 *
   6 *      Authors:
   7 *      Pedro Roque             <roque@di.fc.ul.pt>
   8 */
   9
  10/*      Changes:
  11 *
  12 *      YOSHIFUJI Hideaki @USAGI
  13 *              reworked default router selection.
  14 *              - respect outgoing interface
  15 *              - select from (probably) reachable routers (i.e.
  16 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  17 *              - always select the same router if it is (probably)
  18 *              reachable.  otherwise, round-robin the list.
  19 *      Ville Nuorvala
  20 *              Fixed routing subtrees.
  21 */
  22
  23#define pr_fmt(fmt) "IPv6: " fmt
  24
  25#include <linux/capability.h>
  26#include <linux/errno.h>
  27#include <linux/export.h>
  28#include <linux/types.h>
  29#include <linux/times.h>
  30#include <linux/socket.h>
  31#include <linux/sockios.h>
  32#include <linux/net.h>
  33#include <linux/route.h>
  34#include <linux/netdevice.h>
  35#include <linux/in6.h>
  36#include <linux/mroute6.h>
  37#include <linux/init.h>
  38#include <linux/if_arp.h>
  39#include <linux/proc_fs.h>
  40#include <linux/seq_file.h>
  41#include <linux/nsproxy.h>
  42#include <linux/slab.h>
  43#include <linux/jhash.h>
  44#include <net/net_namespace.h>
  45#include <net/snmp.h>
  46#include <net/ipv6.h>
  47#include <net/ip6_fib.h>
  48#include <net/ip6_route.h>
  49#include <net/ndisc.h>
  50#include <net/addrconf.h>
  51#include <net/tcp.h>
  52#include <linux/rtnetlink.h>
  53#include <net/dst.h>
  54#include <net/dst_metadata.h>
  55#include <net/xfrm.h>
  56#include <net/netevent.h>
  57#include <net/netlink.h>
  58#include <net/rtnh.h>
  59#include <net/lwtunnel.h>
  60#include <net/ip_tunnels.h>
  61#include <net/l3mdev.h>
  62#include <net/ip.h>
  63#include <linux/uaccess.h>
  64
  65#ifdef CONFIG_SYSCTL
  66#include <linux/sysctl.h>
  67#endif
  68
  69static int ip6_rt_type_to_error(u8 fib6_type);
  70
  71#define CREATE_TRACE_POINTS
  72#include <trace/events/fib6.h>
  73EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
  74#undef CREATE_TRACE_POINTS
  75
  76enum rt6_nud_state {
  77        RT6_NUD_FAIL_HARD = -3,
  78        RT6_NUD_FAIL_PROBE = -2,
  79        RT6_NUD_FAIL_DO_RR = -1,
  80        RT6_NUD_SUCCEED = 1
  81};
  82
  83static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  84static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  85static unsigned int      ip6_mtu(const struct dst_entry *dst);
  86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  87static void             ip6_dst_destroy(struct dst_entry *);
  88static void             ip6_dst_ifdown(struct dst_entry *,
  89                                       struct net_device *dev, int how);
  90static int               ip6_dst_gc(struct dst_ops *ops);
  91
  92static int              ip6_pkt_discard(struct sk_buff *skb);
  93static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  94static int              ip6_pkt_prohibit(struct sk_buff *skb);
  95static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  96static void             ip6_link_failure(struct sk_buff *skb);
  97static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  98                                           struct sk_buff *skb, u32 mtu);
  99static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
 100                                        struct sk_buff *skb);
 101static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 102                           int strict);
 103static size_t rt6_nlmsg_size(struct fib6_info *rt);
 104static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 105                         struct fib6_info *rt, struct dst_entry *dst,
 106                         struct in6_addr *dest, struct in6_addr *src,
 107                         int iif, int type, u32 portid, u32 seq,
 108                         unsigned int flags);
 109static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
 110                                           const struct in6_addr *daddr,
 111                                           const struct in6_addr *saddr);
 112
 113#ifdef CONFIG_IPV6_ROUTE_INFO
 114static struct fib6_info *rt6_add_route_info(struct net *net,
 115                                           const struct in6_addr *prefix, int prefixlen,
 116                                           const struct in6_addr *gwaddr,
 117                                           struct net_device *dev,
 118                                           unsigned int pref);
 119static struct fib6_info *rt6_get_route_info(struct net *net,
 120                                           const struct in6_addr *prefix, int prefixlen,
 121                                           const struct in6_addr *gwaddr,
 122                                           struct net_device *dev);
 123#endif
 124
 125struct uncached_list {
 126        spinlock_t              lock;
 127        struct list_head        head;
 128};
 129
 130static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
 131
 132void rt6_uncached_list_add(struct rt6_info *rt)
 133{
 134        struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
 135
 136        rt->rt6i_uncached_list = ul;
 137
 138        spin_lock_bh(&ul->lock);
 139        list_add_tail(&rt->rt6i_uncached, &ul->head);
 140        spin_unlock_bh(&ul->lock);
 141}
 142
 143void rt6_uncached_list_del(struct rt6_info *rt)
 144{
 145        if (!list_empty(&rt->rt6i_uncached)) {
 146                struct uncached_list *ul = rt->rt6i_uncached_list;
 147                struct net *net = dev_net(rt->dst.dev);
 148
 149                spin_lock_bh(&ul->lock);
 150                list_del(&rt->rt6i_uncached);
 151                atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
 152                spin_unlock_bh(&ul->lock);
 153        }
 154}
 155
 156static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 157{
 158        struct net_device *loopback_dev = net->loopback_dev;
 159        int cpu;
 160
 161        if (dev == loopback_dev)
 162                return;
 163
 164        for_each_possible_cpu(cpu) {
 165                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 166                struct rt6_info *rt;
 167
 168                spin_lock_bh(&ul->lock);
 169                list_for_each_entry(rt, &ul->head, rt6i_uncached) {
 170                        struct inet6_dev *rt_idev = rt->rt6i_idev;
 171                        struct net_device *rt_dev = rt->dst.dev;
 172
 173                        if (rt_idev->dev == dev) {
 174                                rt->rt6i_idev = in6_dev_get(loopback_dev);
 175                                in6_dev_put(rt_idev);
 176                        }
 177
 178                        if (rt_dev == dev) {
 179                                rt->dst.dev = loopback_dev;
 180                                dev_hold(rt->dst.dev);
 181                                dev_put(rt_dev);
 182                        }
 183                }
 184                spin_unlock_bh(&ul->lock);
 185        }
 186}
 187
 188static inline const void *choose_neigh_daddr(const struct in6_addr *p,
 189                                             struct sk_buff *skb,
 190                                             const void *daddr)
 191{
 192        if (!ipv6_addr_any(p))
 193                return (const void *) p;
 194        else if (skb)
 195                return &ipv6_hdr(skb)->daddr;
 196        return daddr;
 197}
 198
 199struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
 200                                   struct net_device *dev,
 201                                   struct sk_buff *skb,
 202                                   const void *daddr)
 203{
 204        struct neighbour *n;
 205
 206        daddr = choose_neigh_daddr(gw, skb, daddr);
 207        n = __ipv6_neigh_lookup(dev, daddr);
 208        if (n)
 209                return n;
 210
 211        n = neigh_create(&nd_tbl, daddr, dev);
 212        return IS_ERR(n) ? NULL : n;
 213}
 214
 215static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
 216                                              struct sk_buff *skb,
 217                                              const void *daddr)
 218{
 219        const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
 220
 221        return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
 222                                dst->dev, skb, daddr);
 223}
 224
 225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 226{
 227        struct net_device *dev = dst->dev;
 228        struct rt6_info *rt = (struct rt6_info *)dst;
 229
 230        daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
 231        if (!daddr)
 232                return;
 233        if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
 234                return;
 235        if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
 236                return;
 237        __ipv6_confirm_neigh(dev, daddr);
 238}
 239
 240static struct dst_ops ip6_dst_ops_template = {
 241        .family                 =       AF_INET6,
 242        .gc                     =       ip6_dst_gc,
 243        .gc_thresh              =       1024,
 244        .check                  =       ip6_dst_check,
 245        .default_advmss         =       ip6_default_advmss,
 246        .mtu                    =       ip6_mtu,
 247        .cow_metrics            =       dst_cow_metrics_generic,
 248        .destroy                =       ip6_dst_destroy,
 249        .ifdown                 =       ip6_dst_ifdown,
 250        .negative_advice        =       ip6_negative_advice,
 251        .link_failure           =       ip6_link_failure,
 252        .update_pmtu            =       ip6_rt_update_pmtu,
 253        .redirect               =       rt6_do_redirect,
 254        .local_out              =       __ip6_local_out,
 255        .neigh_lookup           =       ip6_dst_neigh_lookup,
 256        .confirm_neigh          =       ip6_confirm_neigh,
 257};
 258
 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 260{
 261        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 262
 263        return mtu ? : dst->dev->mtu;
 264}
 265
 266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 267                                         struct sk_buff *skb, u32 mtu)
 268{
 269}
 270
 271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 272                                      struct sk_buff *skb)
 273{
 274}
 275
 276static struct dst_ops ip6_dst_blackhole_ops = {
 277        .family                 =       AF_INET6,
 278        .destroy                =       ip6_dst_destroy,
 279        .check                  =       ip6_dst_check,
 280        .mtu                    =       ip6_blackhole_mtu,
 281        .default_advmss         =       ip6_default_advmss,
 282        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 283        .redirect               =       ip6_rt_blackhole_redirect,
 284        .cow_metrics            =       dst_cow_metrics_generic,
 285        .neigh_lookup           =       ip6_dst_neigh_lookup,
 286};
 287
 288static const u32 ip6_template_metrics[RTAX_MAX] = {
 289        [RTAX_HOPLIMIT - 1] = 0,
 290};
 291
 292static const struct fib6_info fib6_null_entry_template = {
 293        .fib6_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 294        .fib6_protocol  = RTPROT_KERNEL,
 295        .fib6_metric    = ~(u32)0,
 296        .fib6_ref       = REFCOUNT_INIT(1),
 297        .fib6_type      = RTN_UNREACHABLE,
 298        .fib6_metrics   = (struct dst_metrics *)&dst_default_metrics,
 299};
 300
 301static const struct rt6_info ip6_null_entry_template = {
 302        .dst = {
 303                .__refcnt       = ATOMIC_INIT(1),
 304                .__use          = 1,
 305                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 306                .error          = -ENETUNREACH,
 307                .input          = ip6_pkt_discard,
 308                .output         = ip6_pkt_discard_out,
 309        },
 310        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 311};
 312
 313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 314
 315static const struct rt6_info ip6_prohibit_entry_template = {
 316        .dst = {
 317                .__refcnt       = ATOMIC_INIT(1),
 318                .__use          = 1,
 319                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 320                .error          = -EACCES,
 321                .input          = ip6_pkt_prohibit,
 322                .output         = ip6_pkt_prohibit_out,
 323        },
 324        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 325};
 326
 327static const struct rt6_info ip6_blk_hole_entry_template = {
 328        .dst = {
 329                .__refcnt       = ATOMIC_INIT(1),
 330                .__use          = 1,
 331                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 332                .error          = -EINVAL,
 333                .input          = dst_discard,
 334                .output         = dst_discard_out,
 335        },
 336        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 337};
 338
 339#endif
 340
 341static void rt6_info_init(struct rt6_info *rt)
 342{
 343        struct dst_entry *dst = &rt->dst;
 344
 345        memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 346        INIT_LIST_HEAD(&rt->rt6i_uncached);
 347}
 348
 349/* allocate dst with ip6_dst_ops */
 350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
 351                               int flags)
 352{
 353        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 354                                        1, DST_OBSOLETE_FORCE_CHK, flags);
 355
 356        if (rt) {
 357                rt6_info_init(rt);
 358                atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
 359        }
 360
 361        return rt;
 362}
 363EXPORT_SYMBOL(ip6_dst_alloc);
 364
 365static void ip6_dst_destroy(struct dst_entry *dst)
 366{
 367        struct rt6_info *rt = (struct rt6_info *)dst;
 368        struct fib6_info *from;
 369        struct inet6_dev *idev;
 370
 371        ip_dst_metrics_put(dst);
 372        rt6_uncached_list_del(rt);
 373
 374        idev = rt->rt6i_idev;
 375        if (idev) {
 376                rt->rt6i_idev = NULL;
 377                in6_dev_put(idev);
 378        }
 379
 380        from = xchg((__force struct fib6_info **)&rt->from, NULL);
 381        fib6_info_release(from);
 382}
 383
 384static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 385                           int how)
 386{
 387        struct rt6_info *rt = (struct rt6_info *)dst;
 388        struct inet6_dev *idev = rt->rt6i_idev;
 389        struct net_device *loopback_dev =
 390                dev_net(dev)->loopback_dev;
 391
 392        if (idev && idev->dev != loopback_dev) {
 393                struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
 394                if (loopback_idev) {
 395                        rt->rt6i_idev = loopback_idev;
 396                        in6_dev_put(idev);
 397                }
 398        }
 399}
 400
 401static bool __rt6_check_expired(const struct rt6_info *rt)
 402{
 403        if (rt->rt6i_flags & RTF_EXPIRES)
 404                return time_after(jiffies, rt->dst.expires);
 405        else
 406                return false;
 407}
 408
 409static bool rt6_check_expired(const struct rt6_info *rt)
 410{
 411        struct fib6_info *from;
 412
 413        from = rcu_dereference(rt->from);
 414
 415        if (rt->rt6i_flags & RTF_EXPIRES) {
 416                if (time_after(jiffies, rt->dst.expires))
 417                        return true;
 418        } else if (from) {
 419                return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
 420                        fib6_check_expired(from);
 421        }
 422        return false;
 423}
 424
 425void fib6_select_path(const struct net *net, struct fib6_result *res,
 426                      struct flowi6 *fl6, int oif, bool have_oif_match,
 427                      const struct sk_buff *skb, int strict)
 428{
 429        struct fib6_info *sibling, *next_sibling;
 430        struct fib6_info *match = res->f6i;
 431
 432        if (!match->fib6_nsiblings || have_oif_match)
 433                goto out;
 434
 435        /* We might have already computed the hash for ICMPv6 errors. In such
 436         * case it will always be non-zero. Otherwise now is the time to do it.
 437         */
 438        if (!fl6->mp_hash)
 439                fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
 440
 441        if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
 442                goto out;
 443
 444        list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
 445                                 fib6_siblings) {
 446                const struct fib6_nh *nh = &sibling->fib6_nh;
 447                int nh_upper_bound;
 448
 449                nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
 450                if (fl6->mp_hash > nh_upper_bound)
 451                        continue;
 452                if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
 453                        break;
 454                match = sibling;
 455                break;
 456        }
 457
 458out:
 459        res->f6i = match;
 460        res->nh = &match->fib6_nh;
 461}
 462
 463/*
 464 *      Route lookup. rcu_read_lock() should be held.
 465 */
 466
 467static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 468                               const struct in6_addr *saddr, int oif, int flags)
 469{
 470        const struct net_device *dev;
 471
 472        if (nh->fib_nh_flags & RTNH_F_DEAD)
 473                return false;
 474
 475        dev = nh->fib_nh_dev;
 476        if (oif) {
 477                if (dev->ifindex == oif)
 478                        return true;
 479        } else {
 480                if (ipv6_chk_addr(net, saddr, dev,
 481                                  flags & RT6_LOOKUP_F_IFACE))
 482                        return true;
 483        }
 484
 485        return false;
 486}
 487
 488static void rt6_device_match(struct net *net, struct fib6_result *res,
 489                             const struct in6_addr *saddr, int oif, int flags)
 490{
 491        struct fib6_info *f6i = res->f6i;
 492        struct fib6_info *spf6i;
 493        struct fib6_nh *nh;
 494
 495        if (!oif && ipv6_addr_any(saddr)) {
 496                nh = &f6i->fib6_nh;
 497                if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 498                        goto out;
 499        }
 500
 501        for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 502                nh = &spf6i->fib6_nh;
 503                if (__rt6_device_match(net, nh, saddr, oif, flags)) {
 504                        res->f6i = spf6i;
 505                        goto out;
 506                }
 507        }
 508
 509        if (oif && flags & RT6_LOOKUP_F_IFACE) {
 510                res->f6i = net->ipv6.fib6_null_entry;
 511                nh = &res->f6i->fib6_nh;
 512                goto out;
 513        }
 514
 515        nh = &f6i->fib6_nh;
 516        if (nh->fib_nh_flags & RTNH_F_DEAD) {
 517                res->f6i = net->ipv6.fib6_null_entry;
 518                nh = &res->f6i->fib6_nh;
 519        }
 520out:
 521        res->nh = nh;
 522        res->fib6_type = res->f6i->fib6_type;
 523        res->fib6_flags = res->f6i->fib6_flags;
 524}
 525
 526#ifdef CONFIG_IPV6_ROUTER_PREF
 527struct __rt6_probe_work {
 528        struct work_struct work;
 529        struct in6_addr target;
 530        struct net_device *dev;
 531};
 532
 533static void rt6_probe_deferred(struct work_struct *w)
 534{
 535        struct in6_addr mcaddr;
 536        struct __rt6_probe_work *work =
 537                container_of(w, struct __rt6_probe_work, work);
 538
 539        addrconf_addr_solict_mult(&work->target, &mcaddr);
 540        ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
 541        dev_put(work->dev);
 542        kfree(work);
 543}
 544
 545static void rt6_probe(struct fib6_nh *fib6_nh)
 546{
 547        struct __rt6_probe_work *work = NULL;
 548        const struct in6_addr *nh_gw;
 549        struct neighbour *neigh;
 550        struct net_device *dev;
 551        struct inet6_dev *idev;
 552
 553        /*
 554         * Okay, this does not seem to be appropriate
 555         * for now, however, we need to check if it
 556         * is really so; aka Router Reachability Probing.
 557         *
 558         * Router Reachability Probe MUST be rate-limited
 559         * to no more than one per minute.
 560         */
 561        if (fib6_nh->fib_nh_gw_family)
 562                return;
 563
 564        nh_gw = &fib6_nh->fib_nh_gw6;
 565        dev = fib6_nh->fib_nh_dev;
 566        rcu_read_lock_bh();
 567        idev = __in6_dev_get(dev);
 568        neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 569        if (neigh) {
 570                if (neigh->nud_state & NUD_VALID)
 571                        goto out;
 572
 573                write_lock(&neigh->lock);
 574                if (!(neigh->nud_state & NUD_VALID) &&
 575                    time_after(jiffies,
 576                               neigh->updated + idev->cnf.rtr_probe_interval)) {
 577                        work = kmalloc(sizeof(*work), GFP_ATOMIC);
 578                        if (work)
 579                                __neigh_set_probe_once(neigh);
 580                }
 581                write_unlock(&neigh->lock);
 582        } else if (time_after(jiffies, fib6_nh->last_probe +
 583                                       idev->cnf.rtr_probe_interval)) {
 584                work = kmalloc(sizeof(*work), GFP_ATOMIC);
 585        }
 586
 587        if (work) {
 588                fib6_nh->last_probe = jiffies;
 589                INIT_WORK(&work->work, rt6_probe_deferred);
 590                work->target = *nh_gw;
 591                dev_hold(dev);
 592                work->dev = dev;
 593                schedule_work(&work->work);
 594        }
 595
 596out:
 597        rcu_read_unlock_bh();
 598}
 599#else
 600static inline void rt6_probe(struct fib6_nh *fib6_nh)
 601{
 602}
 603#endif
 604
 605/*
 606 * Default Router Selection (RFC 2461 6.3.6)
 607 */
 608static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
 609{
 610        enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
 611        struct neighbour *neigh;
 612
 613        rcu_read_lock_bh();
 614        neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 615                                          &fib6_nh->fib_nh_gw6);
 616        if (neigh) {
 617                read_lock(&neigh->lock);
 618                if (neigh->nud_state & NUD_VALID)
 619                        ret = RT6_NUD_SUCCEED;
 620#ifdef CONFIG_IPV6_ROUTER_PREF
 621                else if (!(neigh->nud_state & NUD_FAILED))
 622                        ret = RT6_NUD_SUCCEED;
 623                else
 624                        ret = RT6_NUD_FAIL_PROBE;
 625#endif
 626                read_unlock(&neigh->lock);
 627        } else {
 628                ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
 629                      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
 630        }
 631        rcu_read_unlock_bh();
 632
 633        return ret;
 634}
 635
 636static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 637                           int strict)
 638{
 639        int m = 0;
 640
 641        if (!oif || nh->fib_nh_dev->ifindex == oif)
 642                m = 2;
 643
 644        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 645                return RT6_NUD_FAIL_HARD;
 646#ifdef CONFIG_IPV6_ROUTER_PREF
 647        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
 648#endif
 649        if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 650            !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 651                int n = rt6_check_neigh(nh);
 652                if (n < 0)
 653                        return n;
 654        }
 655        return m;
 656}
 657
 658static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 659                       int oif, int strict, int *mpri, bool *do_rr)
 660{
 661        bool match_do_rr = false;
 662        bool rc = false;
 663        int m;
 664
 665        if (nh->fib_nh_flags & RTNH_F_DEAD)
 666                goto out;
 667
 668        if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 669            nh->fib_nh_flags & RTNH_F_LINKDOWN &&
 670            !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
 671                goto out;
 672
 673        m = rt6_score_route(nh, fib6_flags, oif, strict);
 674        if (m == RT6_NUD_FAIL_DO_RR) {
 675                match_do_rr = true;
 676                m = 0; /* lowest valid score */
 677        } else if (m == RT6_NUD_FAIL_HARD) {
 678                goto out;
 679        }
 680
 681        if (strict & RT6_LOOKUP_F_REACHABLE)
 682                rt6_probe(nh);
 683
 684        /* note that m can be RT6_NUD_FAIL_PROBE at this point */
 685        if (m > *mpri) {
 686                *do_rr = match_do_rr;
 687                *mpri = m;
 688                rc = true;
 689        }
 690out:
 691        return rc;
 692}
 693
 694static void __find_rr_leaf(struct fib6_info *f6i_start,
 695                           struct fib6_info *nomatch, u32 metric,
 696                           struct fib6_result *res, struct fib6_info **cont,
 697                           int oif, int strict, bool *do_rr, int *mpri)
 698{
 699        struct fib6_info *f6i;
 700
 701        for (f6i = f6i_start;
 702             f6i && f6i != nomatch;
 703             f6i = rcu_dereference(f6i->fib6_next)) {
 704                struct fib6_nh *nh;
 705
 706                if (cont && f6i->fib6_metric != metric) {
 707                        *cont = f6i;
 708                        return;
 709                }
 710
 711                if (fib6_check_expired(f6i))
 712                        continue;
 713
 714                nh = &f6i->fib6_nh;
 715                if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
 716                        res->f6i = f6i;
 717                        res->nh = nh;
 718                        res->fib6_flags = f6i->fib6_flags;
 719                        res->fib6_type = f6i->fib6_type;
 720                }
 721        }
 722}
 723
 724static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 725                         struct fib6_info *rr_head, int oif, int strict,
 726                         bool *do_rr, struct fib6_result *res)
 727{
 728        u32 metric = rr_head->fib6_metric;
 729        struct fib6_info *cont = NULL;
 730        int mpri = -1;
 731
 732        __find_rr_leaf(rr_head, NULL, metric, res, &cont,
 733                       oif, strict, do_rr, &mpri);
 734
 735        __find_rr_leaf(leaf, rr_head, metric, res, &cont,
 736                       oif, strict, do_rr, &mpri);
 737
 738        if (res->f6i || !cont)
 739                return;
 740
 741        __find_rr_leaf(cont, NULL, metric, res, NULL,
 742                       oif, strict, do_rr, &mpri);
 743}
 744
 745static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 746                       struct fib6_result *res, int strict)
 747{
 748        struct fib6_info *leaf = rcu_dereference(fn->leaf);
 749        struct fib6_info *rt0;
 750        bool do_rr = false;
 751        int key_plen;
 752
 753        /* make sure this function or its helpers sets f6i */
 754        res->f6i = NULL;
 755
 756        if (!leaf || leaf == net->ipv6.fib6_null_entry)
 757                goto out;
 758
 759        rt0 = rcu_dereference(fn->rr_ptr);
 760        if (!rt0)
 761                rt0 = leaf;
 762
 763        /* Double check to make sure fn is not an intermediate node
 764         * and fn->leaf does not points to its child's leaf
 765         * (This might happen if all routes under fn are deleted from
 766         * the tree and fib6_repair_tree() is called on the node.)
 767         */
 768        key_plen = rt0->fib6_dst.plen;
 769#ifdef CONFIG_IPV6_SUBTREES
 770        if (rt0->fib6_src.plen)
 771                key_plen = rt0->fib6_src.plen;
 772#endif
 773        if (fn->fn_bit != key_plen)
 774                goto out;
 775
 776        find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
 777        if (do_rr) {
 778                struct fib6_info *next = rcu_dereference(rt0->fib6_next);
 779
 780                /* no entries matched; do round-robin */
 781                if (!next || next->fib6_metric != rt0->fib6_metric)
 782                        next = leaf;
 783
 784                if (next != rt0) {
 785                        spin_lock_bh(&leaf->fib6_table->tb6_lock);
 786                        /* make sure next is not being deleted from the tree */
 787                        if (next->fib6_node)
 788                                rcu_assign_pointer(fn->rr_ptr, next);
 789                        spin_unlock_bh(&leaf->fib6_table->tb6_lock);
 790                }
 791        }
 792
 793out:
 794        if (!res->f6i) {
 795                res->f6i = net->ipv6.fib6_null_entry;
 796                res->nh = &res->f6i->fib6_nh;
 797                res->fib6_flags = res->f6i->fib6_flags;
 798                res->fib6_type = res->f6i->fib6_type;
 799        }
 800}
 801
 802static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
 803{
 804        return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 805               res->nh->fib_nh_gw_family;
 806}
 807
 808#ifdef CONFIG_IPV6_ROUTE_INFO
 809int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 810                  const struct in6_addr *gwaddr)
 811{
 812        struct net *net = dev_net(dev);
 813        struct route_info *rinfo = (struct route_info *) opt;
 814        struct in6_addr prefix_buf, *prefix;
 815        unsigned int pref;
 816        unsigned long lifetime;
 817        struct fib6_info *rt;
 818
 819        if (len < sizeof(struct route_info)) {
 820                return -EINVAL;
 821        }
 822
 823        /* Sanity check for prefix_len and length */
 824        if (rinfo->length > 3) {
 825                return -EINVAL;
 826        } else if (rinfo->prefix_len > 128) {
 827                return -EINVAL;
 828        } else if (rinfo->prefix_len > 64) {
 829                if (rinfo->length < 2) {
 830                        return -EINVAL;
 831                }
 832        } else if (rinfo->prefix_len > 0) {
 833                if (rinfo->length < 1) {
 834                        return -EINVAL;
 835                }
 836        }
 837
 838        pref = rinfo->route_pref;
 839        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 840                return -EINVAL;
 841
 842        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 843
 844        if (rinfo->length == 3)
 845                prefix = (struct in6_addr *)rinfo->prefix;
 846        else {
 847                /* this function is safe */
 848                ipv6_addr_prefix(&prefix_buf,
 849                                 (struct in6_addr *)rinfo->prefix,
 850                                 rinfo->prefix_len);
 851                prefix = &prefix_buf;
 852        }
 853
 854        if (rinfo->prefix_len == 0)
 855                rt = rt6_get_dflt_router(net, gwaddr, dev);
 856        else
 857                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
 858                                        gwaddr, dev);
 859
 860        if (rt && !lifetime) {
 861                ip6_del_rt(net, rt);
 862                rt = NULL;
 863        }
 864
 865        if (!rt && lifetime)
 866                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 867                                        dev, pref);
 868        else if (rt)
 869                rt->fib6_flags = RTF_ROUTEINFO |
 870                                 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 871
 872        if (rt) {
 873                if (!addrconf_finite_timeout(lifetime))
 874                        fib6_clean_expires(rt);
 875                else
 876                        fib6_set_expires(rt, jiffies + HZ * lifetime);
 877
 878                fib6_info_release(rt);
 879        }
 880        return 0;
 881}
 882#endif
 883
 884/*
 885 *      Misc support functions
 886 */
 887
 888/* called with rcu_lock held */
 889static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
 890{
 891        struct net_device *dev = res->nh->fib_nh_dev;
 892
 893        if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
 894                /* for copies of local routes, dst->dev needs to be the
 895                 * device if it is a master device, the master device if
 896                 * device is enslaved, and the loopback as the default
 897                 */
 898                if (netif_is_l3_slave(dev) &&
 899                    !rt6_need_strict(&res->f6i->fib6_dst.addr))
 900                        dev = l3mdev_master_dev_rcu(dev);
 901                else if (!netif_is_l3_master(dev))
 902                        dev = dev_net(dev)->loopback_dev;
 903                /* last case is netif_is_l3_master(dev) is true in which
 904                 * case we want dev returned to be dev
 905                 */
 906        }
 907
 908        return dev;
 909}
 910
 911static const int fib6_prop[RTN_MAX + 1] = {
 912        [RTN_UNSPEC]    = 0,
 913        [RTN_UNICAST]   = 0,
 914        [RTN_LOCAL]     = 0,
 915        [RTN_BROADCAST] = 0,
 916        [RTN_ANYCAST]   = 0,
 917        [RTN_MULTICAST] = 0,
 918        [RTN_BLACKHOLE] = -EINVAL,
 919        [RTN_UNREACHABLE] = -EHOSTUNREACH,
 920        [RTN_PROHIBIT]  = -EACCES,
 921        [RTN_THROW]     = -EAGAIN,
 922        [RTN_NAT]       = -EINVAL,
 923        [RTN_XRESOLVE]  = -EINVAL,
 924};
 925
 926static int ip6_rt_type_to_error(u8 fib6_type)
 927{
 928        return fib6_prop[fib6_type];
 929}
 930
 931static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
 932{
 933        unsigned short flags = 0;
 934
 935        if (rt->dst_nocount)
 936                flags |= DST_NOCOUNT;
 937        if (rt->dst_nopolicy)
 938                flags |= DST_NOPOLICY;
 939        if (rt->dst_host)
 940                flags |= DST_HOST;
 941
 942        return flags;
 943}
 944
 945static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
 946{
 947        rt->dst.error = ip6_rt_type_to_error(fib6_type);
 948
 949        switch (fib6_type) {
 950        case RTN_BLACKHOLE:
 951                rt->dst.output = dst_discard_out;
 952                rt->dst.input = dst_discard;
 953                break;
 954        case RTN_PROHIBIT:
 955                rt->dst.output = ip6_pkt_prohibit_out;
 956                rt->dst.input = ip6_pkt_prohibit;
 957                break;
 958        case RTN_THROW:
 959        case RTN_UNREACHABLE:
 960        default:
 961                rt->dst.output = ip6_pkt_discard_out;
 962                rt->dst.input = ip6_pkt_discard;
 963                break;
 964        }
 965}
 966
 967static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
 968{
 969        struct fib6_info *f6i = res->f6i;
 970
 971        if (res->fib6_flags & RTF_REJECT) {
 972                ip6_rt_init_dst_reject(rt, res->fib6_type);
 973                return;
 974        }
 975
 976        rt->dst.error = 0;
 977        rt->dst.output = ip6_output;
 978
 979        if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
 980                rt->dst.input = ip6_input;
 981        } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
 982                rt->dst.input = ip6_mc_input;
 983        } else {
 984                rt->dst.input = ip6_forward;
 985        }
 986
 987        if (res->nh->fib_nh_lws) {
 988                rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
 989                lwtunnel_set_redirect(&rt->dst);
 990        }
 991
 992        rt->dst.lastuse = jiffies;
 993}
 994
 995/* Caller must already hold reference to @from */
 996static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 997{
 998        rt->rt6i_flags &= ~RTF_EXPIRES;
 999        rcu_assign_pointer(rt->from, from);
1000        ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1001}
1002
1003/* Caller must already hold reference to f6i in result */
1004static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1005{
1006        const struct fib6_nh *nh = res->nh;
1007        const struct net_device *dev = nh->fib_nh_dev;
1008        struct fib6_info *f6i = res->f6i;
1009
1010        ip6_rt_init_dst(rt, res);
1011
1012        rt->rt6i_dst = f6i->fib6_dst;
1013        rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1014        rt->rt6i_flags = res->fib6_flags;
1015        if (nh->fib_nh_gw_family) {
1016                rt->rt6i_gateway = nh->fib_nh_gw6;
1017                rt->rt6i_flags |= RTF_GATEWAY;
1018        }
1019        rt6_set_from(rt, f6i);
1020#ifdef CONFIG_IPV6_SUBTREES
1021        rt->rt6i_src = f6i->fib6_src;
1022#endif
1023}
1024
1025static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1026                                        struct in6_addr *saddr)
1027{
1028        struct fib6_node *pn, *sn;
1029        while (1) {
1030                if (fn->fn_flags & RTN_TL_ROOT)
1031                        return NULL;
1032                pn = rcu_dereference(fn->parent);
1033                sn = FIB6_SUBTREE(pn);
1034                if (sn && sn != fn)
1035                        fn = fib6_node_lookup(sn, NULL, saddr);
1036                else
1037                        fn = pn;
1038                if (fn->fn_flags & RTN_RTINFO)
1039                        return fn;
1040        }
1041}
1042
1043static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1044{
1045        struct rt6_info *rt = *prt;
1046
1047        if (dst_hold_safe(&rt->dst))
1048                return true;
1049        if (net) {
1050                rt = net->ipv6.ip6_null_entry;
1051                dst_hold(&rt->dst);
1052        } else {
1053                rt = NULL;
1054        }
1055        *prt = rt;
1056        return false;
1057}
1058
1059/* called with rcu_lock held */
1060static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1061{
1062        struct net_device *dev = res->nh->fib_nh_dev;
1063        struct fib6_info *f6i = res->f6i;
1064        unsigned short flags;
1065        struct rt6_info *nrt;
1066
1067        if (!fib6_info_hold_safe(f6i))
1068                goto fallback;
1069
1070        flags = fib6_info_dst_flags(f6i);
1071        nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1072        if (!nrt) {
1073                fib6_info_release(f6i);
1074                goto fallback;
1075        }
1076
1077        ip6_rt_copy_init(nrt, res);
1078        return nrt;
1079
1080fallback:
1081        nrt = dev_net(dev)->ipv6.ip6_null_entry;
1082        dst_hold(&nrt->dst);
1083        return nrt;
1084}
1085
1086static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1087                                             struct fib6_table *table,
1088                                             struct flowi6 *fl6,
1089                                             const struct sk_buff *skb,
1090                                             int flags)
1091{
1092        struct fib6_result res = {};
1093        struct fib6_node *fn;
1094        struct rt6_info *rt;
1095
1096        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1097                flags &= ~RT6_LOOKUP_F_IFACE;
1098
1099        rcu_read_lock();
1100        fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1101restart:
1102        res.f6i = rcu_dereference(fn->leaf);
1103        if (!res.f6i)
1104                res.f6i = net->ipv6.fib6_null_entry;
1105        else
1106                rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
1107                                 flags);
1108
1109        if (res.f6i == net->ipv6.fib6_null_entry) {
1110                fn = fib6_backtrack(fn, &fl6->saddr);
1111                if (fn)
1112                        goto restart;
1113
1114                rt = net->ipv6.ip6_null_entry;
1115                dst_hold(&rt->dst);
1116                goto out;
1117        }
1118
1119        fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1120                         fl6->flowi6_oif != 0, skb, flags);
1121
1122        /* Search through exception table */
1123        rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1124        if (rt) {
1125                if (ip6_hold_safe(net, &rt))
1126                        dst_use_noref(&rt->dst, jiffies);
1127        } else {
1128                rt = ip6_create_rt_rcu(&res);
1129        }
1130
1131out:
1132        trace_fib6_table_lookup(net, &res, table, fl6);
1133
1134        rcu_read_unlock();
1135
1136        return rt;
1137}
1138
1139struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1140                                   const struct sk_buff *skb, int flags)
1141{
1142        return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1143}
1144EXPORT_SYMBOL_GPL(ip6_route_lookup);
1145
1146struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1147                            const struct in6_addr *saddr, int oif,
1148                            const struct sk_buff *skb, int strict)
1149{
1150        struct flowi6 fl6 = {
1151                .flowi6_oif = oif,
1152                .daddr = *daddr,
1153        };
1154        struct dst_entry *dst;
1155        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1156
1157        if (saddr) {
1158                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1159                flags |= RT6_LOOKUP_F_HAS_SADDR;
1160        }
1161
1162        dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1163        if (dst->error == 0)
1164                return (struct rt6_info *) dst;
1165
1166        dst_release(dst);
1167
1168        return NULL;
1169}
1170EXPORT_SYMBOL(rt6_lookup);
1171
1172/* ip6_ins_rt is called with FREE table->tb6_lock.
1173 * It takes new route entry, the addition fails by any reason the
1174 * route is released.
1175 * Caller must hold dst before calling it.
1176 */
1177
1178static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1179                        struct netlink_ext_ack *extack)
1180{
1181        int err;
1182        struct fib6_table *table;
1183
1184        table = rt->fib6_table;
1185        spin_lock_bh(&table->tb6_lock);
1186        err = fib6_add(&table->tb6_root, rt, info, extack);
1187        spin_unlock_bh(&table->tb6_lock);
1188
1189        return err;
1190}
1191
1192int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1193{
1194        struct nl_info info = { .nl_net = net, };
1195
1196        return __ip6_ins_rt(rt, &info, NULL);
1197}
1198
1199static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
1200                                           const struct in6_addr *daddr,
1201                                           const struct in6_addr *saddr)
1202{
1203        struct fib6_info *f6i = res->f6i;
1204        struct net_device *dev;
1205        struct rt6_info *rt;
1206
1207        /*
1208         *      Clone the route.
1209         */
1210
1211        if (!fib6_info_hold_safe(f6i))
1212                return NULL;
1213
1214        dev = ip6_rt_get_dev_rcu(res);
1215        rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1216        if (!rt) {
1217                fib6_info_release(f6i);
1218                return NULL;
1219        }
1220
1221        ip6_rt_copy_init(rt, res);
1222        rt->rt6i_flags |= RTF_CACHE;
1223        rt->dst.flags |= DST_HOST;
1224        rt->rt6i_dst.addr = *daddr;
1225        rt->rt6i_dst.plen = 128;
1226
1227        if (!rt6_is_gw_or_nonexthop(res)) {
1228                if (f6i->fib6_dst.plen != 128 &&
1229                    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
1230                        rt->rt6i_flags |= RTF_ANYCAST;
1231#ifdef CONFIG_IPV6_SUBTREES
1232                if (rt->rt6i_src.plen && saddr) {
1233                        rt->rt6i_src.addr = *saddr;
1234                        rt->rt6i_src.plen = 128;
1235                }
1236#endif
1237        }
1238
1239        return rt;
1240}
1241
1242static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1243{
1244        struct fib6_info *f6i = res->f6i;
1245        unsigned short flags = fib6_info_dst_flags(f6i);
1246        struct net_device *dev;
1247        struct rt6_info *pcpu_rt;
1248
1249        if (!fib6_info_hold_safe(f6i))
1250                return NULL;
1251
1252        rcu_read_lock();
1253        dev = ip6_rt_get_dev_rcu(res);
1254        pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1255        rcu_read_unlock();
1256        if (!pcpu_rt) {
1257                fib6_info_release(f6i);
1258                return NULL;
1259        }
1260        ip6_rt_copy_init(pcpu_rt, res);
1261        pcpu_rt->rt6i_flags |= RTF_PCPU;
1262        return pcpu_rt;
1263}
1264
1265/* It should be called with rcu_read_lock() acquired */
1266static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1267{
1268        struct rt6_info *pcpu_rt, **p;
1269
1270        p = this_cpu_ptr(res->f6i->rt6i_pcpu);
1271        pcpu_rt = *p;
1272
1273        if (pcpu_rt)
1274                ip6_hold_safe(NULL, &pcpu_rt);
1275
1276        return pcpu_rt;
1277}
1278
1279static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1280                                            const struct fib6_result *res)
1281{
1282        struct rt6_info *pcpu_rt, *prev, **p;
1283
1284        pcpu_rt = ip6_rt_pcpu_alloc(res);
1285        if (!pcpu_rt) {
1286                dst_hold(&net->ipv6.ip6_null_entry->dst);
1287                return net->ipv6.ip6_null_entry;
1288        }
1289
1290        dst_hold(&pcpu_rt->dst);
1291        p = this_cpu_ptr(res->f6i->rt6i_pcpu);
1292        prev = cmpxchg(p, NULL, pcpu_rt);
1293        BUG_ON(prev);
1294
1295        if (res->f6i->fib6_destroying) {
1296                struct fib6_info *from;
1297
1298                from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1299                fib6_info_release(from);
1300        }
1301
1302        return pcpu_rt;
1303}
1304
1305/* exception hash table implementation
1306 */
1307static DEFINE_SPINLOCK(rt6_exception_lock);
1308
1309/* Remove rt6_ex from hash table and free the memory
1310 * Caller must hold rt6_exception_lock
1311 */
1312static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1313                                 struct rt6_exception *rt6_ex)
1314{
1315        struct fib6_info *from;
1316        struct net *net;
1317
1318        if (!bucket || !rt6_ex)
1319                return;
1320
1321        net = dev_net(rt6_ex->rt6i->dst.dev);
1322        net->ipv6.rt6_stats->fib_rt_cache--;
1323
1324        /* purge completely the exception to allow releasing the held resources:
1325         * some [sk] cache may keep the dst around for unlimited time
1326         */
1327        from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1328        fib6_info_release(from);
1329        dst_dev_put(&rt6_ex->rt6i->dst);
1330
1331        hlist_del_rcu(&rt6_ex->hlist);
1332        dst_release(&rt6_ex->rt6i->dst);
1333        kfree_rcu(rt6_ex, rcu);
1334        WARN_ON_ONCE(!bucket->depth);
1335        bucket->depth--;
1336}
1337
1338/* Remove oldest rt6_ex in bucket and free the memory
1339 * Caller must hold rt6_exception_lock
1340 */
1341static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1342{
1343        struct rt6_exception *rt6_ex, *oldest = NULL;
1344
1345        if (!bucket)
1346                return;
1347
1348        hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1349                if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1350                        oldest = rt6_ex;
1351        }
1352        rt6_remove_exception(bucket, oldest);
1353}
1354
1355static u32 rt6_exception_hash(const struct in6_addr *dst,
1356                              const struct in6_addr *src)
1357{
1358        static u32 seed __read_mostly;
1359        u32 val;
1360
1361        net_get_random_once(&seed, sizeof(seed));
1362        val = jhash(dst, sizeof(*dst), seed);
1363
1364#ifdef CONFIG_IPV6_SUBTREES
1365        if (src)
1366                val = jhash(src, sizeof(*src), val);
1367#endif
1368        return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1369}
1370
1371/* Helper function to find the cached rt in the hash table
1372 * and update bucket pointer to point to the bucket for this
1373 * (daddr, saddr) pair
1374 * Caller must hold rt6_exception_lock
1375 */
1376static struct rt6_exception *
1377__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1378                              const struct in6_addr *daddr,
1379                              const struct in6_addr *saddr)
1380{
1381        struct rt6_exception *rt6_ex;
1382        u32 hval;
1383
1384        if (!(*bucket) || !daddr)
1385                return NULL;
1386
1387        hval = rt6_exception_hash(daddr, saddr);
1388        *bucket += hval;
1389
1390        hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1391                struct rt6_info *rt6 = rt6_ex->rt6i;
1392                bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1393
1394#ifdef CONFIG_IPV6_SUBTREES
1395                if (matched && saddr)
1396                        matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1397#endif
1398                if (matched)
1399                        return rt6_ex;
1400        }
1401        return NULL;
1402}
1403
1404/* Helper function to find the cached rt in the hash table
1405 * and update bucket pointer to point to the bucket for this
1406 * (daddr, saddr) pair
1407 * Caller must hold rcu_read_lock()
1408 */
1409static struct rt6_exception *
1410__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1411                         const struct in6_addr *daddr,
1412                         const struct in6_addr *saddr)
1413{
1414        struct rt6_exception *rt6_ex;
1415        u32 hval;
1416
1417        WARN_ON_ONCE(!rcu_read_lock_held());
1418
1419        if (!(*bucket) || !daddr)
1420                return NULL;
1421
1422        hval = rt6_exception_hash(daddr, saddr);
1423        *bucket += hval;
1424
1425        hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1426                struct rt6_info *rt6 = rt6_ex->rt6i;
1427                bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1428
1429#ifdef CONFIG_IPV6_SUBTREES
1430                if (matched && saddr)
1431                        matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1432#endif
1433                if (matched)
1434                        return rt6_ex;
1435        }
1436        return NULL;
1437}
1438
1439static unsigned int fib6_mtu(const struct fib6_result *res)
1440{
1441        const struct fib6_nh *nh = res->nh;
1442        unsigned int mtu;
1443
1444        if (res->f6i->fib6_pmtu) {
1445                mtu = res->f6i->fib6_pmtu;
1446        } else {
1447                struct net_device *dev = nh->fib_nh_dev;
1448                struct inet6_dev *idev;
1449
1450                rcu_read_lock();
1451                idev = __in6_dev_get(dev);
1452                mtu = idev->cnf.mtu6;
1453                rcu_read_unlock();
1454        }
1455
1456        mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1457
1458        return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1459}
1460
1461static int rt6_insert_exception(struct rt6_info *nrt,
1462                                const struct fib6_result *res)
1463{
1464        struct net *net = dev_net(nrt->dst.dev);
1465        struct rt6_exception_bucket *bucket;
1466        struct in6_addr *src_key = NULL;
1467        struct rt6_exception *rt6_ex;
1468        struct fib6_info *f6i = res->f6i;
1469        int err = 0;
1470
1471        spin_lock_bh(&rt6_exception_lock);
1472
1473        if (f6i->exception_bucket_flushed) {
1474                err = -EINVAL;
1475                goto out;
1476        }
1477
1478        bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
1479                                        lockdep_is_held(&rt6_exception_lock));
1480        if (!bucket) {
1481                bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1482                                 GFP_ATOMIC);
1483                if (!bucket) {
1484                        err = -ENOMEM;
1485                        goto out;
1486                }
1487                rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
1488        }
1489
1490#ifdef CONFIG_IPV6_SUBTREES
1491        /* fib6_src.plen != 0 indicates f6i is in subtree
1492         * and exception table is indexed by a hash of
1493         * both fib6_dst and fib6_src.
1494         * Otherwise, the exception table is indexed by
1495         * a hash of only fib6_dst.
1496         */
1497        if (f6i->fib6_src.plen)
1498                src_key = &nrt->rt6i_src.addr;
1499#endif
1500        /* rt6_mtu_change() might lower mtu on f6i.
1501         * Only insert this exception route if its mtu
1502         * is less than f6i's mtu value.
1503         */
1504        if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1505                err = -EINVAL;
1506                goto out;
1507        }
1508
1509        rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1510                                               src_key);
1511        if (rt6_ex)
1512                rt6_remove_exception(bucket, rt6_ex);
1513
1514        rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1515        if (!rt6_ex) {
1516                err = -ENOMEM;
1517                goto out;
1518        }
1519        rt6_ex->rt6i = nrt;
1520        rt6_ex->stamp = jiffies;
1521        hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1522        bucket->depth++;
1523        net->ipv6.rt6_stats->fib_rt_cache++;
1524
1525        if (bucket->depth > FIB6_MAX_DEPTH)
1526                rt6_exception_remove_oldest(bucket);
1527
1528out:
1529        spin_unlock_bh(&rt6_exception_lock);
1530
1531        /* Update fn->fn_sernum to invalidate all cached dst */
1532        if (!err) {
1533                spin_lock_bh(&f6i->fib6_table->tb6_lock);
1534                fib6_update_sernum(net, f6i);
1535                spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1536                fib6_force_start_gc(net);
1537        }
1538
1539        return err;
1540}
1541
1542void rt6_flush_exceptions(struct fib6_info *rt)
1543{
1544        struct rt6_exception_bucket *bucket;
1545        struct rt6_exception *rt6_ex;
1546        struct hlist_node *tmp;
1547        int i;
1548
1549        spin_lock_bh(&rt6_exception_lock);
1550        /* Prevent rt6_insert_exception() to recreate the bucket list */
1551        rt->exception_bucket_flushed = 1;
1552
1553        bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1554                                    lockdep_is_held(&rt6_exception_lock));
1555        if (!bucket)
1556                goto out;
1557
1558        for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1559                hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1560                        rt6_remove_exception(bucket, rt6_ex);
1561                WARN_ON_ONCE(bucket->depth);
1562                bucket++;
1563        }
1564
1565out:
1566        spin_unlock_bh(&rt6_exception_lock);
1567}
1568
1569/* Find cached rt in the hash table inside passed in rt
1570 * Caller has to hold rcu_read_lock()
1571 */
1572static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
1573                                           const struct in6_addr *daddr,
1574                                           const struct in6_addr *saddr)
1575{
1576        const struct in6_addr *src_key = NULL;
1577        struct rt6_exception_bucket *bucket;
1578        struct rt6_exception *rt6_ex;
1579        struct rt6_info *ret = NULL;
1580
1581#ifdef CONFIG_IPV6_SUBTREES
1582        /* fib6i_src.plen != 0 indicates f6i is in subtree
1583         * and exception table is indexed by a hash of
1584         * both fib6_dst and fib6_src.
1585         * However, the src addr used to create the hash
1586         * might not be exactly the passed in saddr which
1587         * is a /128 addr from the flow.
1588         * So we need to use f6i->fib6_src to redo lookup
1589         * if the passed in saddr does not find anything.
1590         * (See the logic in ip6_rt_cache_alloc() on how
1591         * rt->rt6i_src is updated.)
1592         */
1593        if (res->f6i->fib6_src.plen)
1594                src_key = saddr;
1595find_ex:
1596#endif
1597        bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
1598        rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1599
1600        if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1601                ret = rt6_ex->rt6i;
1602
1603#ifdef CONFIG_IPV6_SUBTREES
1604        /* Use fib6_src as src_key and redo lookup */
1605        if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
1606                src_key = &res->f6i->fib6_src.addr;
1607                goto find_ex;
1608        }
1609#endif
1610
1611        return ret;
1612}
1613
1614/* Remove the passed in cached rt from the hash table that contains it */
1615static int rt6_remove_exception_rt(struct rt6_info *rt)
1616{
1617        struct rt6_exception_bucket *bucket;
1618        struct in6_addr *src_key = NULL;
1619        struct rt6_exception *rt6_ex;
1620        struct fib6_info *from;
1621        int err;
1622
1623        from = rcu_dereference(rt->from);
1624        if (!from ||
1625            !(rt->rt6i_flags & RTF_CACHE))
1626                return -EINVAL;
1627
1628        if (!rcu_access_pointer(from->rt6i_exception_bucket))
1629                return -ENOENT;
1630
1631        spin_lock_bh(&rt6_exception_lock);
1632        bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1633                                    lockdep_is_held(&rt6_exception_lock));
1634#ifdef CONFIG_IPV6_SUBTREES
1635        /* rt6i_src.plen != 0 indicates 'from' is in subtree
1636         * and exception table is indexed by a hash of
1637         * both rt6i_dst and rt6i_src.
1638         * Otherwise, the exception table is indexed by
1639         * a hash of only rt6i_dst.
1640         */
1641        if (from->fib6_src.plen)
1642                src_key = &rt->rt6i_src.addr;
1643#endif
1644        rt6_ex = __rt6_find_exception_spinlock(&bucket,
1645                                               &rt->rt6i_dst.addr,
1646                                               src_key);
1647        if (rt6_ex) {
1648                rt6_remove_exception(bucket, rt6_ex);
1649                err = 0;
1650        } else {
1651                err = -ENOENT;
1652        }
1653
1654        spin_unlock_bh(&rt6_exception_lock);
1655        return err;
1656}
1657
1658/* Find rt6_ex which contains the passed in rt cache and
1659 * refresh its stamp
1660 */
1661static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1662{
1663        struct rt6_exception_bucket *bucket;
1664        struct in6_addr *src_key = NULL;
1665        struct rt6_exception *rt6_ex;
1666        struct fib6_info *from;
1667
1668        rcu_read_lock();
1669        from = rcu_dereference(rt->from);
1670        if (!from || !(rt->rt6i_flags & RTF_CACHE))
1671                goto unlock;
1672
1673        bucket = rcu_dereference(from->rt6i_exception_bucket);
1674
1675#ifdef CONFIG_IPV6_SUBTREES
1676        /* rt6i_src.plen != 0 indicates 'from' is in subtree
1677         * and exception table is indexed by a hash of
1678         * both rt6i_dst and rt6i_src.
1679         * Otherwise, the exception table is indexed by
1680         * a hash of only rt6i_dst.
1681         */
1682        if (from->fib6_src.plen)
1683                src_key = &rt->rt6i_src.addr;
1684#endif
1685        rt6_ex = __rt6_find_exception_rcu(&bucket,
1686                                          &rt->rt6i_dst.addr,
1687                                          src_key);
1688        if (rt6_ex)
1689                rt6_ex->stamp = jiffies;
1690
1691unlock:
1692        rcu_read_unlock();
1693}
1694
1695static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1696                                         struct rt6_info *rt, int mtu)
1697{
1698        /* If the new MTU is lower than the route PMTU, this new MTU will be the
1699         * lowest MTU in the path: always allow updating the route PMTU to
1700         * reflect PMTU decreases.
1701         *
1702         * If the new MTU is higher, and the route PMTU is equal to the local
1703         * MTU, this means the old MTU is the lowest in the path, so allow
1704         * updating it: if other nodes now have lower MTUs, PMTU discovery will
1705         * handle this.
1706         */
1707
1708        if (dst_mtu(&rt->dst) >= mtu)
1709                return true;
1710
1711        if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1712                return true;
1713
1714        return false;
1715}
1716
1717static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1718                                       struct fib6_info *rt, int mtu)
1719{
1720        struct rt6_exception_bucket *bucket;
1721        struct rt6_exception *rt6_ex;
1722        int i;
1723
1724        bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1725                                        lockdep_is_held(&rt6_exception_lock));
1726
1727        if (!bucket)
1728                return;
1729
1730        for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1731                hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1732                        struct rt6_info *entry = rt6_ex->rt6i;
1733
1734                        /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1735                         * route), the metrics of its rt->from have already
1736                         * been updated.
1737                         */
1738                        if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1739                            rt6_mtu_change_route_allowed(idev, entry, mtu))
1740                                dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1741                }
1742                bucket++;
1743        }
1744}
1745
1746#define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
1747
1748static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1749                                        struct in6_addr *gateway)
1750{
1751        struct rt6_exception_bucket *bucket;
1752        struct rt6_exception *rt6_ex;
1753        struct hlist_node *tmp;
1754        int i;
1755
1756        if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1757                return;
1758
1759        spin_lock_bh(&rt6_exception_lock);
1760        bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1761                                     lockdep_is_held(&rt6_exception_lock));
1762
1763        if (bucket) {
1764                for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1765                        hlist_for_each_entry_safe(rt6_ex, tmp,
1766                                                  &bucket->chain, hlist) {
1767                                struct rt6_info *entry = rt6_ex->rt6i;
1768
1769                                if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1770                                    RTF_CACHE_GATEWAY &&
1771                                    ipv6_addr_equal(gateway,
1772                                                    &entry->rt6i_gateway)) {
1773                                        rt6_remove_exception(bucket, rt6_ex);
1774                                }
1775                        }
1776                        bucket++;
1777                }
1778        }
1779
1780        spin_unlock_bh(&rt6_exception_lock);
1781}
1782
1783static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1784                                      struct rt6_exception *rt6_ex,
1785                                      struct fib6_gc_args *gc_args,
1786                                      unsigned long now)
1787{
1788        struct rt6_info *rt = rt6_ex->rt6i;
1789
1790        /* we are pruning and obsoleting aged-out and non gateway exceptions
1791         * even if others have still references to them, so that on next
1792         * dst_check() such references can be dropped.
1793         * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1794         * expired, independently from their aging, as per RFC 8201 section 4
1795         */
1796        if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1797                if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1798                        RT6_TRACE("aging clone %p\n", rt);
1799                        rt6_remove_exception(bucket, rt6_ex);
1800                        return;
1801                }
1802        } else if (time_after(jiffies, rt->dst.expires)) {
1803                RT6_TRACE("purging expired route %p\n", rt);
1804                rt6_remove_exception(bucket, rt6_ex);
1805                return;
1806        }
1807
1808        if (rt->rt6i_flags & RTF_GATEWAY) {
1809                struct neighbour *neigh;
1810                __u8 neigh_flags = 0;
1811
1812                neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1813                if (neigh)
1814                        neigh_flags = neigh->flags;
1815
1816                if (!(neigh_flags & NTF_ROUTER)) {
1817                        RT6_TRACE("purging route %p via non-router but gateway\n",
1818                                  rt);
1819                        rt6_remove_exception(bucket, rt6_ex);
1820                        return;
1821                }
1822        }
1823
1824        gc_args->more++;
1825}
1826
1827void rt6_age_exceptions(struct fib6_info *rt,
1828                        struct fib6_gc_args *gc_args,
1829                        unsigned long now)
1830{
1831        struct rt6_exception_bucket *bucket;
1832        struct rt6_exception *rt6_ex;
1833        struct hlist_node *tmp;
1834        int i;
1835
1836        if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1837                return;
1838
1839        rcu_read_lock_bh();
1840        spin_lock(&rt6_exception_lock);
1841        bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1842                                    lockdep_is_held(&rt6_exception_lock));
1843
1844        if (bucket) {
1845                for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1846                        hlist_for_each_entry_safe(rt6_ex, tmp,
1847                                                  &bucket->chain, hlist) {
1848                                rt6_age_examine_exception(bucket, rt6_ex,
1849                                                          gc_args, now);
1850                        }
1851                        bucket++;
1852                }
1853        }
1854        spin_unlock(&rt6_exception_lock);
1855        rcu_read_unlock_bh();
1856}
1857
1858/* must be called with rcu lock held */
1859int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
1860                      struct flowi6 *fl6, struct fib6_result *res, int strict)
1861{
1862        struct fib6_node *fn, *saved_fn;
1863
1864        fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1865        saved_fn = fn;
1866
1867        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1868                oif = 0;
1869
1870redo_rt6_select:
1871        rt6_select(net, fn, oif, res, strict);
1872        if (res->f6i == net->ipv6.fib6_null_entry) {
1873                fn = fib6_backtrack(fn, &fl6->saddr);
1874                if (fn)
1875                        goto redo_rt6_select;
1876                else if (strict & RT6_LOOKUP_F_REACHABLE) {
1877                        /* also consider unreachable route */
1878                        strict &= ~RT6_LOOKUP_F_REACHABLE;
1879                        fn = saved_fn;
1880                        goto redo_rt6_select;
1881                }
1882        }
1883
1884        trace_fib6_table_lookup(net, res, table, fl6);
1885
1886        return 0;
1887}
1888
1889struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1890                               int oif, struct flowi6 *fl6,
1891                               const struct sk_buff *skb, int flags)
1892{
1893        struct fib6_result res = {};
1894        struct rt6_info *rt;
1895        int strict = 0;
1896
1897        strict |= flags & RT6_LOOKUP_F_IFACE;
1898        strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1899        if (net->ipv6.devconf_all->forwarding == 0)
1900                strict |= RT6_LOOKUP_F_REACHABLE;
1901
1902        rcu_read_lock();
1903
1904        fib6_table_lookup(net, table, oif, fl6, &res, strict);
1905        if (res.f6i == net->ipv6.fib6_null_entry) {
1906                rt = net->ipv6.ip6_null_entry;
1907                rcu_read_unlock();
1908                dst_hold(&rt->dst);
1909                return rt;
1910        }
1911
1912        fib6_select_path(net, &res, fl6, oif, false, skb, strict);
1913
1914        /*Search through exception table */
1915        rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
1916        if (rt) {
1917                if (ip6_hold_safe(net, &rt))
1918                        dst_use_noref(&rt->dst, jiffies);
1919
1920                rcu_read_unlock();
1921                return rt;
1922        } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1923                            !res.nh->fib_nh_gw_family)) {
1924                /* Create a RTF_CACHE clone which will not be
1925                 * owned by the fib6 tree.  It is for the special case where
1926                 * the daddr in the skb during the neighbor look-up is different
1927                 * from the fl6->daddr used to look-up route here.
1928                 */
1929                struct rt6_info *uncached_rt;
1930
1931                uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
1932
1933                rcu_read_unlock();
1934
1935                if (uncached_rt) {
1936                        /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1937                         * No need for another dst_hold()
1938                         */
1939                        rt6_uncached_list_add(uncached_rt);
1940                        atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1941                } else {
1942                        uncached_rt = net->ipv6.ip6_null_entry;
1943                        dst_hold(&uncached_rt->dst);
1944                }
1945
1946                return uncached_rt;
1947        } else {
1948                /* Get a percpu copy */
1949
1950                struct rt6_info *pcpu_rt;
1951
1952                local_bh_disable();
1953                pcpu_rt = rt6_get_pcpu_route(&res);
1954
1955                if (!pcpu_rt)
1956                        pcpu_rt = rt6_make_pcpu_route(net, &res);
1957
1958                local_bh_enable();
1959                rcu_read_unlock();
1960
1961                return pcpu_rt;
1962        }
1963}
1964EXPORT_SYMBOL_GPL(ip6_pol_route);
1965
1966static struct rt6_info *ip6_pol_route_input(struct net *net,
1967                                            struct fib6_table *table,
1968                                            struct flowi6 *fl6,
1969                                            const struct sk_buff *skb,
1970                                            int flags)
1971{
1972        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1973}
1974
1975struct dst_entry *ip6_route_input_lookup(struct net *net,
1976                                         struct net_device *dev,
1977                                         struct flowi6 *fl6,
1978                                         const struct sk_buff *skb,
1979                                         int flags)
1980{
1981        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1982                flags |= RT6_LOOKUP_F_IFACE;
1983
1984        return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1985}
1986EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1987
1988static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1989                                  struct flow_keys *keys,
1990                                  struct flow_keys *flkeys)
1991{
1992        const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1993        const struct ipv6hdr *key_iph = outer_iph;
1994        struct flow_keys *_flkeys = flkeys;
1995        const struct ipv6hdr *inner_iph;
1996        const struct icmp6hdr *icmph;
1997        struct ipv6hdr _inner_iph;
1998        struct icmp6hdr _icmph;
1999
2000        if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2001                goto out;
2002
2003        icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2004                                   sizeof(_icmph), &_icmph);
2005        if (!icmph)
2006                goto out;
2007
2008        if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2009            icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2010            icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2011            icmph->icmp6_type != ICMPV6_PARAMPROB)
2012                goto out;
2013
2014        inner_iph = skb_header_pointer(skb,
2015                                       skb_transport_offset(skb) + sizeof(*icmph),
2016                                       sizeof(_inner_iph), &_inner_iph);
2017        if (!inner_iph)
2018                goto out;
2019
2020        key_iph = inner_iph;
2021        _flkeys = NULL;
2022out:
2023        if (_flkeys) {
2024                keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2025                keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2026                keys->tags.flow_label = _flkeys->tags.flow_label;
2027                keys->basic.ip_proto = _flkeys->basic.ip_proto;
2028        } else {
2029                keys->addrs.v6addrs.src = key_iph->saddr;
2030                keys->addrs.v6addrs.dst = key_iph->daddr;
2031                keys->tags.flow_label = ip6_flowlabel(key_iph);
2032                keys->basic.ip_proto = key_iph->nexthdr;
2033        }
2034}
2035
2036/* if skb is set it will be used and fl6 can be NULL */
2037u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2038                       const struct sk_buff *skb, struct flow_keys *flkeys)
2039{
2040        struct flow_keys hash_keys;
2041        u32 mhash;
2042
2043        switch (ip6_multipath_hash_policy(net)) {
2044        case 0:
2045                memset(&hash_keys, 0, sizeof(hash_keys));
2046                hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2047                if (skb) {
2048                        ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2049                } else {
2050                        hash_keys.addrs.v6addrs.src = fl6->saddr;
2051                        hash_keys.addrs.v6addrs.dst = fl6->daddr;
2052                        hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2053                        hash_keys.basic.ip_proto = fl6->flowi6_proto;
2054                }
2055                break;
2056        case 1:
2057                if (skb) {
2058                        unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2059                        struct flow_keys keys;
2060
2061                        /* short-circuit if we already have L4 hash present */
2062                        if (skb->l4_hash)
2063                                return skb_get_hash_raw(skb) >> 1;
2064
2065                        memset(&hash_keys, 0, sizeof(hash_keys));
2066
2067                        if (!flkeys) {
2068                                skb_flow_dissect_flow_keys(skb, &keys, flag);
2069                                flkeys = &keys;
2070                        }
2071                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2072                        hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2073                        hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2074                        hash_keys.ports.src = flkeys->ports.src;
2075                        hash_keys.ports.dst = flkeys->ports.dst;
2076                        hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2077                } else {
2078                        memset(&hash_keys, 0, sizeof(hash_keys));
2079                        hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2080                        hash_keys.addrs.v6addrs.src = fl6->saddr;
2081                        hash_keys.addrs.v6addrs.dst = fl6->daddr;
2082                        hash_keys.ports.src = fl6->fl6_sport;
2083                        hash_keys.ports.dst = fl6->fl6_dport;
2084                        hash_keys.basic.ip_proto = fl6->flowi6_proto;
2085                }
2086                break;
2087        }
2088        mhash = flow_hash_from_keys(&hash_keys);
2089
2090        return mhash >> 1;
2091}
2092
2093void ip6_route_input(struct sk_buff *skb)
2094{
2095        const struct ipv6hdr *iph = ipv6_hdr(skb);
2096        struct net *net = dev_net(skb->dev);
2097        int flags = RT6_LOOKUP_F_HAS_SADDR;
2098        struct ip_tunnel_info *tun_info;
2099        struct flowi6 fl6 = {
2100                .flowi6_iif = skb->dev->ifindex,
2101                .daddr = iph->daddr,
2102                .saddr = iph->saddr,
2103                .flowlabel = ip6_flowinfo(iph),
2104                .flowi6_mark = skb->mark,
2105                .flowi6_proto = iph->nexthdr,
2106        };
2107        struct flow_keys *flkeys = NULL, _flkeys;
2108
2109        tun_info = skb_tunnel_info(skb);
2110        if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2111                fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2112
2113        if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2114                flkeys = &_flkeys;
2115
2116        if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2117                fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2118        skb_dst_drop(skb);
2119        skb_dst_set(skb,
2120                    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2121}
2122
2123static struct rt6_info *ip6_pol_route_output(struct net *net,
2124                                             struct fib6_table *table,
2125                                             struct flowi6 *fl6,
2126                                             const struct sk_buff *skb,
2127                                             int flags)
2128{
2129        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2130}
2131
2132struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2133                                         struct flowi6 *fl6, int flags)
2134{
2135        bool any_src;
2136
2137        if (ipv6_addr_type(&fl6->daddr) &
2138            (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
2139                struct dst_entry *dst;
2140
2141                dst = l3mdev_link_scope_lookup(net, fl6);
2142                if (dst)
2143                        return dst;
2144        }
2145
2146        fl6->flowi6_iif = LOOPBACK_IFINDEX;
2147
2148        any_src = ipv6_addr_any(&fl6->saddr);
2149        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2150            (fl6->flowi6_oif && any_src))
2151                flags |= RT6_LOOKUP_F_IFACE;
2152
2153        if (!any_src)
2154                flags |= RT6_LOOKUP_F_HAS_SADDR;
2155        else if (sk)
2156                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2157
2158        return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2159}
2160EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2161
2162struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2163{
2164        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2165        struct net_device *loopback_dev = net->loopback_dev;
2166        struct dst_entry *new = NULL;
2167
2168        rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2169                       DST_OBSOLETE_DEAD, 0);
2170        if (rt) {
2171                rt6_info_init(rt);
2172                atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2173
2174                new = &rt->dst;
2175                new->__use = 1;
2176                new->input = dst_discard;
2177                new->output = dst_discard_out;
2178
2179                dst_copy_metrics(new, &ort->dst);
2180
2181                rt->rt6i_idev = in6_dev_get(loopback_dev);
2182                rt->rt6i_gateway = ort->rt6i_gateway;
2183                rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2184
2185                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2186#ifdef CONFIG_IPV6_SUBTREES
2187                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2188#endif
2189        }
2190
2191        dst_release(dst_orig);
2192        return new ? new : ERR_PTR(-ENOMEM);
2193}
2194
2195/*
2196 *      Destination cache support functions
2197 */
2198
2199static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2200{
2201        u32 rt_cookie = 0;
2202
2203        if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2204                return false;
2205
2206        if (fib6_check_expired(f6i))
2207                return false;
2208
2209        return true;
2210}
2211
2212static struct dst_entry *rt6_check(struct rt6_info *rt,
2213                                   struct fib6_info *from,
2214                                   u32 cookie)
2215{
2216        u32 rt_cookie = 0;
2217
2218        if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
2219            rt_cookie != cookie)
2220                return NULL;
2221
2222        if (rt6_check_expired(rt))
2223                return NULL;
2224
2225        return &rt->dst;
2226}
2227
2228static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2229                                            struct fib6_info *from,
2230                                            u32 cookie)
2231{
2232        if (!__rt6_check_expired(rt) &&
2233            rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2234            fib6_check(from, cookie))
2235                return &rt->dst;
2236        else
2237                return NULL;
2238}
2239
2240static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2241{
2242        struct dst_entry *dst_ret;
2243        struct fib6_info *from;
2244        struct rt6_info *rt;
2245
2246        rt = container_of(dst, struct rt6_info, dst);
2247
2248        rcu_read_lock();
2249
2250        /* All IPV6 dsts are created with ->obsolete set to the value
2251         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2252         * into this function always.
2253         */
2254
2255        from = rcu_dereference(rt->from);
2256
2257        if (from && (rt->rt6i_flags & RTF_PCPU ||
2258            unlikely(!list_empty(&rt->rt6i_uncached))))
2259                dst_ret = rt6_dst_from_check(rt, from, cookie);
2260        else
2261                dst_ret = rt6_check(rt, from, cookie);
2262
2263        rcu_read_unlock();
2264
2265        return dst_ret;
2266}
2267
2268static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2269{
2270        struct rt6_info *rt = (struct rt6_info *) dst;
2271
2272        if (rt) {
2273                if (rt->rt6i_flags & RTF_CACHE) {
2274                        rcu_read_lock();
2275                        if (rt6_check_expired(rt)) {
2276                                rt6_remove_exception_rt(rt);
2277                                dst = NULL;
2278                        }
2279                        rcu_read_unlock();
2280                } else {
2281                        dst_release(dst);
2282                        dst = NULL;
2283                }
2284        }
2285        return dst;
2286}
2287
2288static void ip6_link_failure(struct sk_buff *skb)
2289{
2290        struct rt6_info *rt;
2291
2292        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2293
2294        rt = (struct rt6_info *) skb_dst(skb);
2295        if (rt) {
2296                rcu_read_lock();
2297                if (rt->rt6i_flags & RTF_CACHE) {
2298                        rt6_remove_exception_rt(rt);
2299                } else {
2300                        struct fib6_info *from;
2301                        struct fib6_node *fn;
2302
2303                        from = rcu_dereference(rt->from);
2304                        if (from) {
2305                                fn = rcu_dereference(from->fib6_node);
2306                                if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2307                                        fn->fn_sernum = -1;
2308                        }
2309                }
2310                rcu_read_unlock();
2311        }
2312}
2313
2314static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2315{
2316        if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2317                struct fib6_info *from;
2318
2319                rcu_read_lock();
2320                from = rcu_dereference(rt0->from);
2321                if (from)
2322                        rt0->dst.expires = from->expires;
2323                rcu_read_unlock();
2324        }
2325
2326        dst_set_expires(&rt0->dst, timeout);
2327        rt0->rt6i_flags |= RTF_EXPIRES;
2328}
2329
2330static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2331{
2332        struct net *net = dev_net(rt->dst.dev);
2333
2334        dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2335        rt->rt6i_flags |= RTF_MODIFIED;
2336        rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2337}
2338
2339static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2340{
2341        return !(rt->rt6i_flags & RTF_CACHE) &&
2342                (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
2343}
2344
2345static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2346                                 const struct ipv6hdr *iph, u32 mtu)
2347{
2348        const struct in6_addr *daddr, *saddr;
2349        struct rt6_info *rt6 = (struct rt6_info *)dst;
2350
2351        if (dst_metric_locked(dst, RTAX_MTU))
2352                return;
2353
2354        if (iph) {
2355                daddr = &iph->daddr;
2356                saddr = &iph->saddr;
2357        } else if (sk) {
2358                daddr = &sk->sk_v6_daddr;
2359                saddr = &inet6_sk(sk)->saddr;
2360        } else {
2361                daddr = NULL;
2362                saddr = NULL;
2363        }
2364        dst_confirm_neigh(dst, daddr);
2365        mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2366        if (mtu >= dst_mtu(dst))
2367                return;
2368
2369        if (!rt6_cache_allowed_for_pmtu(rt6)) {
2370                rt6_do_update_pmtu(rt6, mtu);
2371                /* update rt6_ex->stamp for cache */
2372                if (rt6->rt6i_flags & RTF_CACHE)
2373                        rt6_update_exception_stamp_rt(rt6);
2374        } else if (daddr) {
2375                struct fib6_result res = {};
2376                struct rt6_info *nrt6;
2377
2378                rcu_read_lock();
2379                res.f6i = rcu_dereference(rt6->from);
2380                if (!res.f6i) {
2381                        rcu_read_unlock();
2382                        return;
2383                }
2384                res.nh = &res.f6i->fib6_nh;
2385                res.fib6_flags = res.f6i->fib6_flags;
2386                res.fib6_type = res.f6i->fib6_type;
2387
2388                nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
2389                if (nrt6) {
2390                        rt6_do_update_pmtu(nrt6, mtu);
2391                        if (rt6_insert_exception(nrt6, &res))
2392                                dst_release_immediate(&nrt6->dst);
2393                }
2394                rcu_read_unlock();
2395        }
2396}
2397
2398static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2399                               struct sk_buff *skb, u32 mtu)
2400{
2401        __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2402}
2403
2404void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2405                     int oif, u32 mark, kuid_t uid)
2406{
2407        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2408        struct dst_entry *dst;
2409        struct flowi6 fl6 = {
2410                .flowi6_oif = oif,
2411                .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2412                .daddr = iph->daddr,
2413                .saddr = iph->saddr,
2414                .flowlabel = ip6_flowinfo(iph),
2415                .flowi6_uid = uid,
2416        };
2417
2418        dst = ip6_route_output(net, NULL, &fl6);
2419        if (!dst->error)
2420                __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
2421        dst_release(dst);
2422}
2423EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2424
2425void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2426{
2427        int oif = sk->sk_bound_dev_if;
2428        struct dst_entry *dst;
2429
2430        if (!oif && skb->dev)
2431                oif = l3mdev_master_ifindex(skb->dev);
2432
2433        ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2434
2435        dst = __sk_dst_get(sk);
2436        if (!dst || !dst->obsolete ||
2437            dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2438                return;
2439
2440        bh_lock_sock(sk);
2441        if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2442                ip6_datagram_dst_update(sk, false);
2443        bh_unlock_sock(sk);
2444}
2445EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2446
2447void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2448                           const struct flowi6 *fl6)
2449{
2450#ifdef CONFIG_IPV6_SUBTREES
2451        struct ipv6_pinfo *np = inet6_sk(sk);
2452#endif
2453
2454        ip6_dst_store(sk, dst,
2455                      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2456                      &sk->sk_v6_daddr : NULL,
2457#ifdef CONFIG_IPV6_SUBTREES
2458                      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2459                      &np->saddr :
2460#endif
2461                      NULL);
2462}
2463
2464static bool ip6_redirect_nh_match(const struct fib6_result *res,
2465                                  struct flowi6 *fl6,
2466                                  const struct in6_addr *gw,
2467                                  struct rt6_info **ret)
2468{
2469        const struct fib6_nh *nh = res->nh;
2470
2471        if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
2472            fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
2473                return false;
2474
2475        /* rt_cache's gateway might be different from its 'parent'
2476         * in the case of an ip redirect.
2477         * So we keep searching in the exception table if the gateway
2478         * is different.
2479         */
2480        if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2481                struct rt6_info *rt_cache;
2482
2483                rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
2484                if (rt_cache &&
2485                    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
2486                        *ret = rt_cache;
2487                        return true;
2488                }
2489                return false;
2490        }
2491        return true;
2492}
2493
2494/* Handle redirects */
2495struct ip6rd_flowi {
2496        struct flowi6 fl6;
2497        struct in6_addr gateway;
2498};
2499
2500static struct rt6_info *__ip6_route_redirect(struct net *net,
2501                                             struct fib6_table *table,
2502                                             struct flowi6 *fl6,
2503                                             const struct sk_buff *skb,
2504                                             int flags)
2505{
2506        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2507        struct rt6_info *ret = NULL;
2508        struct fib6_result res = {};
2509        struct fib6_info *rt;
2510        struct fib6_node *fn;
2511
2512        /* l3mdev_update_flow overrides oif if the device is enslaved; in
2513         * this case we must match on the real ingress device, so reset it
2514         */
2515        if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2516                fl6->flowi6_oif = skb->dev->ifindex;
2517
2518        /* Get the "current" route for this destination and
2519         * check if the redirect has come from appropriate router.
2520         *
2521         * RFC 4861 specifies that redirects should only be
2522         * accepted if they come from the nexthop to the target.
2523         * Due to the way the routes are chosen, this notion
2524         * is a bit fuzzy and one might need to check all possible
2525         * routes.
2526         */
2527
2528        rcu_read_lock();
2529        fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2530restart:
2531        for_each_fib6_node_rt_rcu(fn) {
2532                res.f6i = rt;
2533                res.nh = &rt->fib6_nh;
2534
2535                if (fib6_check_expired(rt))
2536                        continue;
2537                if (rt->fib6_flags & RTF_REJECT)
2538                        break;
2539                if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
2540                        goto out;
2541        }
2542
2543        if (!rt)
2544                rt = net->ipv6.fib6_null_entry;
2545        else if (rt->fib6_flags & RTF_REJECT) {
2546                ret = net->ipv6.ip6_null_entry;
2547                goto out;
2548        }
2549
2550        if (rt == net->ipv6.fib6_null_entry) {
2551                fn = fib6_backtrack(fn, &fl6->saddr);
2552                if (fn)
2553                        goto restart;
2554        }
2555
2556        res.f6i = rt;
2557        res.nh = &rt->fib6_nh;
2558out:
2559        if (ret) {
2560                ip6_hold_safe(net, &ret);
2561        } else {
2562                res.fib6_flags = res.f6i->fib6_flags;
2563                res.fib6_type = res.f6i->fib6_type;
2564                ret = ip6_create_rt_rcu(&res);
2565        }
2566
2567        rcu_read_unlock();
2568
2569        trace_fib6_table_lookup(net, &res, table, fl6);
2570        return ret;
2571};
2572
2573static struct dst_entry *ip6_route_redirect(struct net *net,
2574                                            const struct flowi6 *fl6,
2575                                            const struct sk_buff *skb,
2576                                            const struct in6_addr *gateway)
2577{
2578        int flags = RT6_LOOKUP_F_HAS_SADDR;
2579        struct ip6rd_flowi rdfl;
2580
2581        rdfl.fl6 = *fl6;
2582        rdfl.gateway = *gateway;
2583
2584        return fib6_rule_lookup(net, &rdfl.fl6, skb,
2585                                flags, __ip6_route_redirect);
2586}
2587
2588void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2589                  kuid_t uid)
2590{
2591        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2592        struct dst_entry *dst;
2593        struct flowi6 fl6 = {
2594                .flowi6_iif = LOOPBACK_IFINDEX,
2595                .flowi6_oif = oif,
2596                .flowi6_mark = mark,
2597                .daddr = iph->daddr,
2598                .saddr = iph->saddr,
2599                .flowlabel = ip6_flowinfo(iph),
2600                .flowi6_uid = uid,
2601        };
2602
2603        dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2604        rt6_do_redirect(dst, NULL, skb);
2605        dst_release(dst);
2606}
2607EXPORT_SYMBOL_GPL(ip6_redirect);
2608
2609void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2610{
2611        const struct ipv6hdr *iph = ipv6_hdr(skb);
2612        const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2613        struct dst_entry *dst;
2614        struct flowi6 fl6 = {
2615                .flowi6_iif = LOOPBACK_IFINDEX,
2616                .flowi6_oif = oif,
2617                .daddr = msg->dest,
2618                .saddr = iph->daddr,
2619                .flowi6_uid = sock_net_uid(net, NULL),
2620        };
2621
2622        dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2623        rt6_do_redirect(dst, NULL, skb);
2624        dst_release(dst);
2625}
2626
2627void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2628{
2629        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2630                     sk->sk_uid);
2631}
2632EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2633
2634static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2635{
2636        struct net_device *dev = dst->dev;
2637        unsigned int mtu = dst_mtu(dst);
2638        struct net *net = dev_net(dev);
2639
2640        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2641
2642        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2643                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2644
2645        /*
2646         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2647         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2648         * IPV6_MAXPLEN is also valid and means: "any MSS,
2649         * rely only on pmtu discovery"
2650         */
2651        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2652                mtu = IPV6_MAXPLEN;
2653        return mtu;
2654}
2655
2656static unsigned int ip6_mtu(const struct dst_entry *dst)
2657{
2658        struct inet6_dev *idev;
2659        unsigned int mtu;
2660
2661        mtu = dst_metric_raw(dst, RTAX_MTU);
2662        if (mtu)
2663                goto out;
2664
2665        mtu = IPV6_MIN_MTU;
2666
2667        rcu_read_lock();
2668        idev = __in6_dev_get(dst->dev);
2669        if (idev)
2670                mtu = idev->cnf.mtu6;
2671        rcu_read_unlock();
2672
2673out:
2674        mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2675
2676        return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2677}
2678
2679/* MTU selection:
2680 * 1. mtu on route is locked - use it
2681 * 2. mtu from nexthop exception
2682 * 3. mtu from egress device
2683 *
2684 * based on ip6_dst_mtu_forward and exception logic of
2685 * rt6_find_cached_rt; called with rcu_read_lock
2686 */
2687u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2688                      const struct in6_addr *daddr,
2689                      const struct in6_addr *saddr)
2690{
2691        const struct fib6_nh *nh = res->nh;
2692        struct fib6_info *f6i = res->f6i;
2693        struct inet6_dev *idev;
2694        struct rt6_info *rt;
2695        u32 mtu = 0;
2696
2697        if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2698                mtu = f6i->fib6_pmtu;
2699                if (mtu)
2700                        goto out;
2701        }
2702
2703        rt = rt6_find_cached_rt(res, daddr, saddr);
2704        if (unlikely(rt)) {
2705                mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
2706        } else {
2707                struct net_device *dev = nh->fib_nh_dev;
2708
2709                mtu = IPV6_MIN_MTU;
2710                idev = __in6_dev_get(dev);
2711                if (idev && idev->cnf.mtu6 > mtu)
2712                        mtu = idev->cnf.mtu6;
2713        }
2714
2715        mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2716out:
2717        return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
2718}
2719
2720struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2721                                  struct flowi6 *fl6)
2722{
2723        struct dst_entry *dst;
2724        struct rt6_info *rt;
2725        struct inet6_dev *idev = in6_dev_get(dev);
2726        struct net *net = dev_net(dev);
2727
2728        if (unlikely(!idev))
2729                return ERR_PTR(-ENODEV);
2730
2731        rt = ip6_dst_alloc(net, dev, 0);
2732        if (unlikely(!rt)) {
2733                in6_dev_put(idev);
2734                dst = ERR_PTR(-ENOMEM);
2735                goto out;
2736        }
2737
2738        rt->dst.flags |= DST_HOST;
2739        rt->dst.input = ip6_input;
2740        rt->dst.output  = ip6_output;
2741        rt->rt6i_gateway  = fl6->daddr;
2742        rt->rt6i_dst.addr = fl6->daddr;
2743        rt->rt6i_dst.plen = 128;
2744        rt->rt6i_idev     = idev;
2745        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2746
2747        /* Add this dst into uncached_list so that rt6_disable_ip() can
2748         * do proper release of the net_device
2749         */
2750        rt6_uncached_list_add(rt);
2751        atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2752
2753        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2754
2755out:
2756        return dst;
2757}
2758
2759static int ip6_dst_gc(struct dst_ops *ops)
2760{
2761        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2762        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2763        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2764        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2765        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2766        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2767        int entries;
2768
2769        entries = dst_entries_get_fast(ops);
2770        if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2771            entries <= rt_max_size)
2772                goto out;
2773
2774        net->ipv6.ip6_rt_gc_expire++;
2775        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2776        entries = dst_entries_get_slow(ops);
2777        if (entries < ops->gc_thresh)
2778                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2779out:
2780        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2781        return entries > rt_max_size;
2782}
2783
2784static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2785                                            struct fib6_config *cfg,
2786                                            const struct in6_addr *gw_addr,
2787                                            u32 tbid, int flags)
2788{
2789        struct flowi6 fl6 = {
2790                .flowi6_oif = cfg->fc_ifindex,
2791                .daddr = *gw_addr,
2792                .saddr = cfg->fc_prefsrc,
2793        };
2794        struct fib6_table *table;
2795        struct rt6_info *rt;
2796
2797        table = fib6_get_table(net, tbid);
2798        if (!table)
2799                return NULL;
2800
2801        if (!ipv6_addr_any(&cfg->fc_prefsrc))
2802                flags |= RT6_LOOKUP_F_HAS_SADDR;
2803
2804        flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2805        rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2806
2807        /* if table lookup failed, fall back to full lookup */
2808        if (rt == net->ipv6.ip6_null_entry) {
2809                ip6_rt_put(rt);
2810                rt = NULL;
2811        }
2812
2813        return rt;
2814}
2815
2816static int ip6_route_check_nh_onlink(struct net *net,
2817                                     struct fib6_config *cfg,
2818                                     const struct net_device *dev,
2819                                     struct netlink_ext_ack *extack)
2820{
2821        u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2822        const struct in6_addr *gw_addr = &cfg->fc_gateway;
2823        u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2824        struct fib6_info *from;
2825        struct rt6_info *grt;
2826        int err;
2827
2828        err = 0;
2829        grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2830        if (grt) {
2831                rcu_read_lock();
2832                from = rcu_dereference(grt->from);
2833                if (!grt->dst.error &&
2834                    /* ignore match if it is the default route */
2835                    from && !ipv6_addr_any(&from->fib6_dst.addr) &&
2836                    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2837                        NL_SET_ERR_MSG(extack,
2838                                       "Nexthop has invalid gateway or device mismatch");
2839                        err = -EINVAL;
2840                }
2841                rcu_read_unlock();
2842
2843                ip6_rt_put(grt);
2844        }
2845
2846        return err;
2847}
2848
2849static int ip6_route_check_nh(struct net *net,
2850                              struct fib6_config *cfg,
2851                              struct net_device **_dev,
2852                              struct inet6_dev **idev)
2853{
2854        const struct in6_addr *gw_addr = &cfg->fc_gateway;
2855        struct net_device *dev = _dev ? *_dev : NULL;
2856        struct rt6_info *grt = NULL;
2857        int err = -EHOSTUNREACH;
2858
2859        if (cfg->fc_table) {
2860                int flags = RT6_LOOKUP_F_IFACE;
2861
2862                grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2863                                          cfg->fc_table, flags);
2864                if (grt) {
2865                        if (grt->rt6i_flags & RTF_GATEWAY ||
2866                            (dev && dev != grt->dst.dev)) {
2867                                ip6_rt_put(grt);
2868                                grt = NULL;
2869                        }
2870                }
2871        }
2872
2873        if (!grt)
2874                grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2875
2876        if (!grt)
2877                goto out;
2878
2879        if (dev) {
2880                if (dev != grt->dst.dev) {
2881                        ip6_rt_put(grt);
2882                        goto out;
2883                }
2884        } else {
2885                *_dev = dev = grt->dst.dev;
2886                *idev = grt->rt6i_idev;
2887                dev_hold(dev);
2888                in6_dev_hold(grt->rt6i_idev);
2889        }
2890
2891        if (!(grt->rt6i_flags & RTF_GATEWAY))
2892                err = 0;
2893
2894        ip6_rt_put(grt);
2895
2896out:
2897        return err;
2898}
2899
2900static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2901                           struct net_device **_dev, struct inet6_dev **idev,
2902                           struct netlink_ext_ack *extack)
2903{
2904        const struct in6_addr *gw_addr = &cfg->fc_gateway;
2905        int gwa_type = ipv6_addr_type(gw_addr);
2906        bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2907        const struct net_device *dev = *_dev;
2908        bool need_addr_check = !dev;
2909        int err = -EINVAL;
2910
2911        /* if gw_addr is local we will fail to detect this in case
2912         * address is still TENTATIVE (DAD in progress). rt6_lookup()
2913         * will return already-added prefix route via interface that
2914         * prefix route was assigned to, which might be non-loopback.
2915         */
2916        if (dev &&
2917            ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2918                NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2919                goto out;
2920        }
2921
2922        if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2923                /* IPv6 strictly inhibits using not link-local
2924                 * addresses as nexthop address.
2925                 * Otherwise, router will not able to send redirects.
2926                 * It is very good, but in some (rare!) circumstances
2927                 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2928                 * some exceptions. --ANK
2929                 * We allow IPv4-mapped nexthops to support RFC4798-type
2930                 * addressing
2931                 */
2932                if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2933                        NL_SET_ERR_MSG(extack, "Invalid gateway address");
2934                        goto out;
2935                }
2936
2937                if (cfg->fc_flags & RTNH_F_ONLINK)
2938                        err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2939                else
2940                        err = ip6_route_check_nh(net, cfg, _dev, idev);
2941
2942                if (err)
2943                        goto out;
2944        }
2945
2946        /* reload in case device was changed */
2947        dev = *_dev;
2948
2949        err = -EINVAL;
2950        if (!dev) {
2951                NL_SET_ERR_MSG(extack, "Egress device not specified");
2952                goto out;
2953        } else if (dev->flags & IFF_LOOPBACK) {
2954                NL_SET_ERR_MSG(extack,
2955                               "Egress device can not be loopback device for this route");
2956                goto out;
2957        }
2958
2959        /* if we did not check gw_addr above, do so now that the
2960         * egress device has been resolved.
2961         */
2962        if (need_addr_check &&
2963            ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2964                NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2965                goto out;
2966        }
2967
2968        err = 0;
2969out:
2970        return err;
2971}
2972
2973static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
2974{
2975        if ((flags & RTF_REJECT) ||
2976            (dev && (dev->flags & IFF_LOOPBACK) &&
2977             !(addr_type & IPV6_ADDR_LOOPBACK) &&
2978             !(flags & RTF_LOCAL)))
2979                return true;
2980
2981        return false;
2982}
2983
2984int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
2985                 struct fib6_config *cfg, gfp_t gfp_flags,
2986                 struct netlink_ext_ack *extack)
2987{
2988        struct net_device *dev = NULL;
2989        struct inet6_dev *idev = NULL;
2990        int addr_type;
2991        int err;
2992
2993        fib6_nh->fib_nh_family = AF_INET6;
2994
2995        err = -ENODEV;
2996        if (cfg->fc_ifindex) {
2997                dev = dev_get_by_index(net, cfg->fc_ifindex);
2998                if (!dev)
2999                        goto out;
3000                idev = in6_dev_get(dev);
3001                if (!idev)
3002                        goto out;
3003        }
3004
3005        if (cfg->fc_flags & RTNH_F_ONLINK) {
3006                if (!dev) {
3007                        NL_SET_ERR_MSG(extack,
3008                                       "Nexthop device required for onlink");
3009                        goto out;
3010                }
3011
3012                if (!(dev->flags & IFF_UP)) {
3013                        NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3014                        err = -ENETDOWN;
3015                        goto out;
3016                }
3017
3018                fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
3019        }
3020
3021        fib6_nh->fib_nh_weight = 1;
3022
3023        /* We cannot add true routes via loopback here,
3024         * they would result in kernel looping; promote them to reject routes
3025         */
3026        addr_type = ipv6_addr_type(&cfg->fc_dst);
3027        if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
3028                /* hold loopback dev/idev if we haven't done so. */
3029                if (dev != net->loopback_dev) {
3030                        if (dev) {
3031                                dev_put(dev);
3032                                in6_dev_put(idev);
3033                        }
3034                        dev = net->loopback_dev;
3035                        dev_hold(dev);
3036                        idev = in6_dev_get(dev);
3037                        if (!idev) {
3038                                err = -ENODEV;
3039                                goto out;
3040                        }
3041                }
3042                goto set_dev;
3043        }
3044
3045        if (cfg->fc_flags & RTF_GATEWAY) {
3046                err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3047                if (err)
3048                        goto out;
3049
3050                fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3051                fib6_nh->fib_nh_gw_family = AF_INET6;
3052        }
3053
3054        err = -ENODEV;
3055        if (!dev)
3056                goto out;
3057
3058        if (idev->cnf.disable_ipv6) {
3059                NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3060                err = -EACCES;
3061                goto out;
3062        }
3063
3064        if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
3065                NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3066                err = -ENETDOWN;
3067                goto out;
3068        }
3069
3070        if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3071            !netif_carrier_ok(dev))
3072                fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
3073
3074        err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3075                                 cfg->fc_encap_type, cfg, gfp_flags, extack);
3076        if (err)
3077                goto out;
3078set_dev:
3079        fib6_nh->fib_nh_dev = dev;
3080        fib6_nh->fib_nh_oif = dev->ifindex;
3081        err = 0;
3082out:
3083        if (idev)
3084                in6_dev_put(idev);
3085
3086        if (err) {
3087                lwtstate_put(fib6_nh->fib_nh_lws);
3088                fib6_nh->fib_nh_lws = NULL;
3089                if (dev)
3090                        dev_put(dev);
3091        }
3092
3093        return err;
3094}
3095
3096void fib6_nh_release(struct fib6_nh *fib6_nh)
3097{
3098        fib_nh_common_release(&fib6_nh->nh_common);
3099}
3100
3101static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3102                                              gfp_t gfp_flags,
3103                                              struct netlink_ext_ack *extack)
3104{
3105        struct net *net = cfg->fc_nlinfo.nl_net;
3106        struct fib6_info *rt = NULL;
3107        struct fib6_table *table;
3108        int err = -EINVAL;
3109        int addr_type;
3110
3111        /* RTF_PCPU is an internal flag; can not be set by userspace */
3112        if (cfg->fc_flags & RTF_PCPU) {
3113                NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3114                goto out;
3115        }
3116
3117        /* RTF_CACHE is an internal flag; can not be set by userspace */
3118        if (cfg->fc_flags & RTF_CACHE) {
3119                NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3120                goto out;
3121        }
3122
3123        if (cfg->fc_type > RTN_MAX) {
3124                NL_SET_ERR_MSG(extack, "Invalid route type");
3125                goto out;
3126        }
3127
3128        if (cfg->fc_dst_len > 128) {
3129                NL_SET_ERR_MSG(extack, "Invalid prefix length");
3130                goto out;
3131        }
3132        if (cfg->fc_src_len > 128) {
3133                NL_SET_ERR_MSG(extack, "Invalid source address length");
3134                goto out;
3135        }
3136#ifndef CONFIG_IPV6_SUBTREES
3137        if (cfg->fc_src_len) {
3138                NL_SET_ERR_MSG(extack,
3139                               "Specifying source address requires IPV6_SUBTREES to be enabled");
3140                goto out;
3141        }
3142#endif
3143
3144        err = -ENOBUFS;
3145        if (cfg->fc_nlinfo.nlh &&
3146            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3147                table = fib6_get_table(net, cfg->fc_table);
3148                if (!table) {
3149                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3150                        table = fib6_new_table(net, cfg->fc_table);
3151                }
3152        } else {
3153                table = fib6_new_table(net, cfg->fc_table);
3154        }
3155
3156        if (!table)
3157                goto out;
3158
3159        err = -ENOMEM;
3160        rt = fib6_info_alloc(gfp_flags);
3161        if (!rt)
3162                goto out;
3163
3164        rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3165                                               extack);
3166        if (IS_ERR(rt->fib6_metrics)) {
3167                err = PTR_ERR(rt->fib6_metrics);
3168                /* Do not leave garbage there. */
3169                rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3170                goto out;
3171        }
3172
3173        if (cfg->fc_flags & RTF_ADDRCONF)
3174                rt->dst_nocount = true;
3175
3176        if (cfg->fc_flags & RTF_EXPIRES)
3177                fib6_set_expires(rt, jiffies +
3178                                clock_t_to_jiffies(cfg->fc_expires));
3179        else
3180                fib6_clean_expires(rt);
3181
3182        if (cfg->fc_protocol == RTPROT_UNSPEC)
3183                cfg->fc_protocol = RTPROT_BOOT;
3184        rt->fib6_protocol = cfg->fc_protocol;
3185
3186        rt->fib6_table = table;
3187        rt->fib6_metric = cfg->fc_metric;
3188        rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3189        rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
3190
3191        ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3192        rt->fib6_dst.plen = cfg->fc_dst_len;
3193        if (rt->fib6_dst.plen == 128)
3194                rt->dst_host = true;
3195
3196#ifdef CONFIG_IPV6_SUBTREES
3197        ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3198        rt->fib6_src.plen = cfg->fc_src_len;
3199#endif
3200        err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
3201        if (err)
3202                goto out;
3203
3204        /* We cannot add true routes via loopback here,
3205         * they would result in kernel looping; promote them to reject routes
3206         */
3207        addr_type = ipv6_addr_type(&cfg->fc_dst);
3208        if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
3209                rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3210
3211        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3212                struct net_device *dev = fib6_info_nh_dev(rt);
3213
3214                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3215                        NL_SET_ERR_MSG(extack, "Invalid source address");
3216                        err = -EINVAL;
3217                        goto out;
3218                }
3219                rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3220                rt->fib6_prefsrc.plen = 128;
3221        } else
3222                rt->fib6_prefsrc.plen = 0;
3223
3224        return rt;
3225out:
3226        fib6_info_release(rt);
3227        return ERR_PTR(err);
3228}
3229
3230int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3231                  struct netlink_ext_ack *extack)
3232{
3233        struct fib6_info *rt;
3234        int err;
3235
3236        rt = ip6_route_info_create(cfg, gfp_flags, extack);
3237        if (IS_ERR(rt))
3238                return PTR_ERR(rt);
3239
3240        err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3241        fib6_info_release(rt);
3242
3243        return err;
3244}
3245
3246static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3247{
3248        struct net *net = info->nl_net;
3249        struct fib6_table *table;
3250        int err;
3251
3252        if (rt == net->ipv6.fib6_null_entry) {
3253                err = -ENOENT;
3254                goto out;
3255        }
3256
3257        table = rt->fib6_table;
3258        spin_lock_bh(&table->tb6_lock);
3259        err = fib6_del(rt, info);
3260        spin_unlock_bh(&table->tb6_lock);
3261
3262out:
3263        fib6_info_release(rt);
3264        return err;
3265}
3266
3267int ip6_del_rt(struct net *net, struct fib6_info *rt)
3268{
3269        struct nl_info info = { .nl_net = net };
3270
3271        return __ip6_del_rt(rt, &info);
3272}
3273
3274static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3275{
3276        struct nl_info *info = &cfg->fc_nlinfo;
3277        struct net *net = info->nl_net;
3278        struct sk_buff *skb = NULL;
3279        struct fib6_table *table;
3280        int err = -ENOENT;
3281
3282        if (rt == net->ipv6.fib6_null_entry)
3283                goto out_put;
3284        table = rt->fib6_table;
3285        spin_lock_bh(&table->tb6_lock);
3286
3287        if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3288                struct fib6_info *sibling, *next_sibling;
3289
3290                /* prefer to send a single notification with all hops */
3291                skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3292                if (skb) {
3293                        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3294
3295                        if (rt6_fill_node(net, skb, rt, NULL,
3296                                          NULL, NULL, 0, RTM_DELROUTE,
3297                                          info->portid, seq, 0) < 0) {
3298                                kfree_skb(skb);
3299                                skb = NULL;
3300                        } else
3301                                info->skip_notify = 1;
3302                }
3303
3304                list_for_each_entry_safe(sibling, next_sibling,
3305                                         &rt->fib6_siblings,
3306                                         fib6_siblings) {
3307                        err = fib6_del(sibling, info);
3308                        if (err)
3309                                goto out_unlock;
3310                }
3311        }
3312
3313        err = fib6_del(rt, info);
3314out_unlock:
3315        spin_unlock_bh(&table->tb6_lock);
3316out_put:
3317        fib6_info_release(rt);
3318
3319        if (skb) {
3320                rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3321                            info->nlh, gfp_any());
3322        }
3323        return err;
3324}
3325
3326static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3327{
3328        int rc = -ESRCH;
3329
3330        if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3331                goto out;
3332
3333        if (cfg->fc_flags & RTF_GATEWAY &&
3334            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3335                goto out;
3336
3337        rc = rt6_remove_exception_rt(rt);
3338out:
3339        return rc;
3340}
3341
3342static int ip6_route_del(struct fib6_config *cfg,
3343                         struct netlink_ext_ack *extack)
3344{
3345        struct rt6_info *rt_cache;
3346        struct fib6_table *table;
3347        struct fib6_info *rt;
3348        struct fib6_node *fn;
3349        int err = -ESRCH;
3350
3351        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3352        if (!table) {
3353                NL_SET_ERR_MSG(extack, "FIB table does not exist");
3354                return err;
3355        }
3356
3357        rcu_read_lock();
3358
3359        fn = fib6_locate(&table->tb6_root,
3360                         &cfg->fc_dst, cfg->fc_dst_len,
3361                         &cfg->fc_src, cfg->fc_src_len,
3362                         !(cfg->fc_flags & RTF_CACHE));
3363
3364        if (fn) {
3365                for_each_fib6_node_rt_rcu(fn) {
3366                        struct fib6_nh *nh;
3367
3368                        if (cfg->fc_flags & RTF_CACHE) {
3369                                struct fib6_result res = {
3370                                        .f6i = rt,
3371                                };
3372                                int rc;
3373
3374                                rt_cache = rt6_find_cached_rt(&res,
3375                                                              &cfg->fc_dst,
3376                                                              &cfg->fc_src);
3377                                if (rt_cache) {
3378                                        rc = ip6_del_cached_rt(rt_cache, cfg);
3379                                        if (rc != -ESRCH) {
3380                                                rcu_read_unlock();
3381                                                return rc;
3382                                        }
3383                                }
3384                                continue;
3385                        }
3386
3387                        nh = &rt->fib6_nh;
3388                        if (cfg->fc_ifindex &&
3389                            (!nh->fib_nh_dev ||
3390                             nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
3391                                continue;
3392                        if (cfg->fc_flags & RTF_GATEWAY &&
3393                            !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
3394                                continue;
3395                        if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3396                                continue;
3397                        if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3398                                continue;
3399                        if (!fib6_info_hold_safe(rt))
3400                                continue;
3401                        rcu_read_unlock();
3402
3403                        /* if gateway was specified only delete the one hop */
3404                        if (cfg->fc_flags & RTF_GATEWAY)
3405                                return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3406
3407                        return __ip6_del_rt_siblings(rt, cfg);
3408                }
3409        }
3410        rcu_read_unlock();
3411
3412        return err;
3413}
3414
3415static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3416{
3417        struct netevent_redirect netevent;
3418        struct rt6_info *rt, *nrt = NULL;
3419        struct fib6_result res = {};
3420        struct ndisc_options ndopts;
3421        struct inet6_dev *in6_dev;
3422        struct neighbour *neigh;
3423        struct rd_msg *msg;
3424        int optlen, on_link;
3425        u8 *lladdr;
3426
3427        optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3428        optlen -= sizeof(*msg);
3429
3430        if (optlen < 0) {
3431                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3432                return;
3433        }
3434
3435        msg = (struct rd_msg *)icmp6_hdr(skb);
3436
3437        if (ipv6_addr_is_multicast(&msg->dest)) {
3438                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3439                return;
3440        }
3441
3442        on_link = 0;
3443        if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3444                on_link = 1;
3445        } else if (ipv6_addr_type(&msg->target) !=
3446                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3447                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3448                return;
3449        }
3450
3451        in6_dev = __in6_dev_get(skb->dev);
3452        if (!in6_dev)
3453                return;
3454        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3455                return;
3456
3457        /* RFC2461 8.1:
3458         *      The IP source address of the Redirect MUST be the same as the current
3459         *      first-hop router for the specified ICMP Destination Address.
3460         */
3461
3462        if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3463                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3464                return;
3465        }
3466
3467        lladdr = NULL;
3468        if (ndopts.nd_opts_tgt_lladdr) {
3469                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3470                                             skb->dev);
3471                if (!lladdr) {
3472                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3473                        return;
3474                }
3475        }
3476
3477        rt = (struct rt6_info *) dst;
3478        if (rt->rt6i_flags & RTF_REJECT) {
3479                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3480                return;
3481        }
3482
3483        /* Redirect received -> path was valid.
3484         * Look, redirects are sent only in response to data packets,
3485         * so that this nexthop apparently is reachable. --ANK
3486         */
3487        dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3488
3489        neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3490        if (!neigh)
3491                return;
3492
3493        /*
3494         *      We have finally decided to accept it.
3495         */
3496
3497        ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3498                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
3499                     NEIGH_UPDATE_F_OVERRIDE|
3500                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3501                                     NEIGH_UPDATE_F_ISROUTER)),
3502                     NDISC_REDIRECT, &ndopts);
3503
3504        rcu_read_lock();
3505        res.f6i = rcu_dereference(rt->from);
3506        if (!res.f6i)
3507                goto out;
3508
3509        res.nh = &res.f6i->fib6_nh;
3510        res.fib6_flags = res.f6i->fib6_flags;
3511        res.fib6_type = res.f6i->fib6_type;
3512        nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
3513        if (!nrt)
3514                goto out;
3515
3516        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3517        if (on_link)
3518                nrt->rt6i_flags &= ~RTF_GATEWAY;
3519
3520        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3521
3522        /* rt6_insert_exception() will take care of duplicated exceptions */
3523        if (rt6_insert_exception(nrt, &res)) {
3524                dst_release_immediate(&nrt->dst);
3525                goto out;
3526        }
3527
3528        netevent.old = &rt->dst;
3529        netevent.new = &nrt->dst;
3530        netevent.daddr = &msg->dest;
3531        netevent.neigh = neigh;
3532        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3533
3534out:
3535        rcu_read_unlock();
3536        neigh_release(neigh);
3537}
3538
3539#ifdef CONFIG_IPV6_ROUTE_INFO
3540static struct fib6_info *rt6_get_route_info(struct net *net,
3541                                           const struct in6_addr *prefix, int prefixlen,
3542                                           const struct in6_addr *gwaddr,
3543                                           struct net_device *dev)
3544{
3545        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3546        int ifindex = dev->ifindex;
3547        struct fib6_node *fn;
3548        struct fib6_info *rt = NULL;
3549        struct fib6_table *table;
3550
3551        table = fib6_get_table(net, tb_id);
3552        if (!table)
3553                return NULL;
3554
3555        rcu_read_lock();
3556        fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3557        if (!fn)
3558                goto out;
3559
3560        for_each_fib6_node_rt_rcu(fn) {
3561                if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
3562                        continue;
3563                if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
3564                    !rt->fib6_nh.fib_nh_gw_family)
3565                        continue;
3566                if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
3567                        continue;
3568                if (!fib6_info_hold_safe(rt))
3569                        continue;
3570                break;
3571        }
3572out:
3573        rcu_read_unlock();
3574        return rt;
3575}
3576
3577static struct fib6_info *rt6_add_route_info(struct net *net,
3578                                           const struct in6_addr *prefix, int prefixlen,
3579                                           const struct in6_addr *gwaddr,
3580                                           struct net_device *dev,
3581                                           unsigned int pref)
3582{
3583        struct fib6_config cfg = {
3584                .fc_metric      = IP6_RT_PRIO_USER,
3585                .fc_ifindex     = dev->ifindex,
3586                .fc_dst_len     = prefixlen,
3587                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3588                                  RTF_UP | RTF_PREF(pref),
3589                .fc_protocol = RTPROT_RA,
3590                .fc_type = RTN_UNICAST,
3591                .fc_nlinfo.portid = 0,
3592                .fc_nlinfo.nlh = NULL,
3593                .fc_nlinfo.nl_net = net,
3594        };
3595
3596        cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3597        cfg.fc_dst = *prefix;
3598        cfg.fc_gateway = *gwaddr;
3599
3600        /* We should treat it as a default route if prefix length is 0. */
3601        if (!prefixlen)
3602                cfg.fc_flags |= RTF_DEFAULT;
3603
3604        ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3605
3606        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3607}
3608#endif
3609
3610struct fib6_info *rt6_get_dflt_router(struct net *net,
3611                                     const struct in6_addr *addr,
3612                                     struct net_device *dev)
3613{
3614        u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3615        struct fib6_info *rt;
3616        struct fib6_table *table;
3617
3618        table = fib6_get_table(net, tb_id);
3619        if (!table)
3620                return NULL;
3621
3622        rcu_read_lock();
3623        for_each_fib6_node_rt_rcu(&table->tb6_root) {
3624                struct fib6_nh *nh = &rt->fib6_nh;
3625
3626                if (dev == nh->fib_nh_dev &&
3627                    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3628                    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
3629                        break;
3630        }
3631        if (rt && !fib6_info_hold_safe(rt))
3632                rt = NULL;
3633        rcu_read_unlock();
3634        return rt;
3635}
3636
3637struct fib6_info *rt6_add_dflt_router(struct net *net,
3638                                     const struct in6_addr *gwaddr,
3639                                     struct net_device *dev,
3640                                     unsigned int pref)
3641{
3642        struct fib6_config cfg = {
3643                .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3644                .fc_metric      = IP6_RT_PRIO_USER,
3645                .fc_ifindex     = dev->ifindex,
3646                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3647                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3648                .fc_protocol = RTPROT_RA,
3649                .fc_type = RTN_UNICAST,
3650                .fc_nlinfo.portid = 0,
3651                .fc_nlinfo.nlh = NULL,
3652                .fc_nlinfo.nl_net = net,
3653        };
3654
3655        cfg.fc_gateway = *gwaddr;
3656
3657        if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3658                struct fib6_table *table;
3659
3660                table = fib6_get_table(dev_net(dev), cfg.fc_table);
3661                if (table)
3662                        table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3663        }
3664
3665        return rt6_get_dflt_router(net, gwaddr, dev);
3666}
3667
3668static void __rt6_purge_dflt_routers(struct net *net,
3669                                     struct fib6_table *table)
3670{
3671        struct fib6_info *rt;
3672
3673restart:
3674        rcu_read_lock();
3675        for_each_fib6_node_rt_rcu(&table->tb6_root) {
3676                struct net_device *dev = fib6_info_nh_dev(rt);
3677                struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3678
3679                if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3680                    (!idev || idev->cnf.accept_ra != 2) &&
3681                    fib6_info_hold_safe(rt)) {
3682                        rcu_read_unlock();
3683                        ip6_del_rt(net, rt);
3684                        goto restart;
3685                }
3686        }
3687        rcu_read_unlock();
3688
3689        table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3690}
3691
3692void rt6_purge_dflt_routers(struct net *net)
3693{
3694        struct fib6_table *table;
3695        struct hlist_head *head;
3696        unsigned int h;
3697
3698        rcu_read_lock();
3699
3700        for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3701                head = &net->ipv6.fib_table_hash[h];
3702                hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3703                        if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3704                                __rt6_purge_dflt_routers(net, table);
3705                }
3706        }
3707
3708        rcu_read_unlock();
3709}
3710
3711static void rtmsg_to_fib6_config(struct net *net,
3712                                 struct in6_rtmsg *rtmsg,
3713                                 struct fib6_config *cfg)
3714{
3715        *cfg = (struct fib6_config){
3716                .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3717                         : RT6_TABLE_MAIN,
3718                .fc_ifindex = rtmsg->rtmsg_ifindex,
3719                .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
3720                .fc_expires = rtmsg->rtmsg_info,
3721                .fc_dst_len = rtmsg->rtmsg_dst_len,
3722                .fc_src_len = rtmsg->rtmsg_src_len,
3723                .fc_flags = rtmsg->rtmsg_flags,
3724                .fc_type = rtmsg->rtmsg_type,
3725
3726                .fc_nlinfo.nl_net = net,
3727
3728                .fc_dst = rtmsg->rtmsg_dst,
3729                .fc_src = rtmsg->rtmsg_src,
3730                .fc_gateway = rtmsg->rtmsg_gateway,
3731        };
3732}
3733
3734int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3735{
3736        struct fib6_config cfg;
3737        struct in6_rtmsg rtmsg;
3738        int err;
3739
3740        switch (cmd) {
3741        case SIOCADDRT:         /* Add a route */
3742        case SIOCDELRT:         /* Delete a route */
3743                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3744                        return -EPERM;
3745                err = copy_from_user(&rtmsg, arg,
3746                                     sizeof(struct in6_rtmsg));
3747                if (err)
3748                        return -EFAULT;
3749
3750                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3751
3752                rtnl_lock();
3753                switch (cmd) {
3754                case SIOCADDRT:
3755                        err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3756                        break;
3757                case SIOCDELRT:
3758                        err = ip6_route_del(&cfg, NULL);
3759                        break;
3760                default:
3761                        err = -EINVAL;
3762                }
3763                rtnl_unlock();
3764
3765                return err;
3766        }
3767
3768        return -EINVAL;
3769}
3770
3771/*
3772 *      Drop the packet on the floor
3773 */
3774
3775static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3776{
3777        struct dst_entry *dst = skb_dst(skb);
3778        struct net *net = dev_net(dst->dev);
3779        struct inet6_dev *idev;
3780        int type;
3781
3782        if (netif_is_l3_master(skb->dev) &&
3783            dst->dev == net->loopback_dev)
3784                idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
3785        else
3786                idev = ip6_dst_idev(dst);
3787
3788        switch (ipstats_mib_noroutes) {
3789        case IPSTATS_MIB_INNOROUTES:
3790                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3791                if (type == IPV6_ADDR_ANY) {
3792                        IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
3793                        break;
3794                }
3795                /* FALLTHROUGH */
3796        case IPSTATS_MIB_OUTNOROUTES:
3797                IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
3798                break;
3799        }
3800
3801        /* Start over by dropping the dst for l3mdev case */
3802        if (netif_is_l3_master(skb->dev))
3803                skb_dst_drop(skb);
3804
3805        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3806        kfree_skb(skb);
3807        return 0;
3808}
3809
3810static int ip6_pkt_discard(struct sk_buff *skb)
3811{
3812        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3813}
3814
3815static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3816{
3817        skb->dev = skb_dst(skb)->dev;
3818        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3819}
3820
3821static int ip6_pkt_prohibit(struct sk_buff *skb)
3822{
3823        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3824}
3825
3826static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3827{
3828        skb->dev = skb_dst(skb)->dev;
3829        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3830}
3831
3832/*
3833 *      Allocate a dst for local (unicast / anycast) address.
3834 */
3835
3836struct fib6_info *addrconf_f6i_alloc(struct net *net,
3837                                     struct inet6_dev *idev,
3838                                     const struct in6_addr *addr,
3839                                     bool anycast, gfp_t gfp_flags)
3840{
3841        struct fib6_config cfg = {
3842                .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3843                .fc_ifindex = idev->dev->ifindex,
3844                .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3845                .fc_dst = *addr,
3846                .fc_dst_len = 128,
3847                .fc_protocol = RTPROT_KERNEL,
3848                .fc_nlinfo.nl_net = net,
3849                .fc_ignore_dev_down = true,
3850        };
3851
3852        if (anycast) {
3853                cfg.fc_type = RTN_ANYCAST;
3854                cfg.fc_flags |= RTF_ANYCAST;
3855        } else {
3856                cfg.fc_type = RTN_LOCAL;
3857                cfg.fc_flags |= RTF_LOCAL;
3858        }
3859
3860        return ip6_route_info_create(&cfg, gfp_flags, NULL);
3861}
3862
3863/* remove deleted ip from prefsrc entries */
3864struct arg_dev_net_ip {
3865        struct net_device *dev;
3866        struct net *net;
3867        struct in6_addr *addr;
3868};
3869
3870static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3871{
3872        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3873        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3874        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3875
3876        if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
3877            rt != net->ipv6.fib6_null_entry &&
3878            ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3879                spin_lock_bh(&rt6_exception_lock);
3880                /* remove prefsrc entry */
3881                rt->fib6_prefsrc.plen = 0;
3882                spin_unlock_bh(&rt6_exception_lock);
3883        }
3884        return 0;
3885}
3886
3887void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3888{
3889        struct net *net = dev_net(ifp->idev->dev);
3890        struct arg_dev_net_ip adni = {
3891                .dev = ifp->idev->dev,
3892                .net = net,
3893                .addr = &ifp->addr,
3894        };
3895        fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3896}
3897
3898#define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT)
3899
3900/* Remove routers and update dst entries when gateway turn into host. */
3901static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3902{
3903        struct in6_addr *gateway = (struct in6_addr *)arg;
3904
3905        if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3906            rt->fib6_nh.fib_nh_gw_family &&
3907            ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
3908                return -1;
3909        }
3910
3911        /* Further clean up cached routes in exception table.
3912         * This is needed because cached route may have a different
3913         * gateway than its 'parent' in the case of an ip redirect.
3914         */
3915        rt6_exceptions_clean_tohost(rt, gateway);
3916
3917        return 0;
3918}
3919
3920void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3921{
3922        fib6_clean_all(net, fib6_clean_tohost, gateway);
3923}
3924
3925struct arg_netdev_event {
3926        const struct net_device *dev;
3927        union {
3928                unsigned char nh_flags;
3929                unsigned long event;
3930        };
3931};
3932
3933static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3934{
3935        struct fib6_info *iter;
3936        struct fib6_node *fn;
3937
3938        fn = rcu_dereference_protected(rt->fib6_node,
3939                        lockdep_is_held(&rt->fib6_table->tb6_lock));
3940        iter = rcu_dereference_protected(fn->leaf,
3941                        lockdep_is_held(&rt->fib6_table->tb6_lock));
3942        while (iter) {
3943                if (iter->fib6_metric == rt->fib6_metric &&
3944                    rt6_qualify_for_ecmp(iter))
3945                        return iter;
3946                iter = rcu_dereference_protected(iter->fib6_next,
3947                                lockdep_is_held(&rt->fib6_table->tb6_lock));
3948        }
3949
3950        return NULL;
3951}
3952
3953static bool rt6_is_dead(const struct fib6_info *rt)
3954{
3955        if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3956            (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3957             ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
3958                return true;
3959
3960        return false;
3961}
3962
3963static int rt6_multipath_total_weight(const struct fib6_info *rt)
3964{
3965        struct fib6_info *iter;
3966        int total = 0;
3967
3968        if (!rt6_is_dead(rt))
3969                total += rt->fib6_nh.fib_nh_weight;
3970
3971        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3972                if (!rt6_is_dead(iter))
3973                        total += iter->fib6_nh.fib_nh_weight;
3974        }
3975
3976        return total;
3977}
3978
3979static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3980{
3981        int upper_bound = -1;
3982
3983        if (!rt6_is_dead(rt)) {
3984                *weight += rt->fib6_nh.fib_nh_weight;
3985                upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3986                                                    total) - 1;
3987        }
3988        atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
3989}
3990
3991static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3992{
3993        struct fib6_info *iter;
3994        int weight = 0;
3995
3996        rt6_upper_bound_set(rt, &weight, total);
3997
3998        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3999                rt6_upper_bound_set(iter, &weight, total);
4000}
4001
4002void rt6_multipath_rebalance(struct fib6_info *rt)
4003{
4004        struct fib6_info *first;
4005        int total;
4006
4007        /* In case the entire multipath route was marked for flushing,
4008         * then there is no need to rebalance upon the removal of every
4009         * sibling route.
4010         */
4011        if (!rt->fib6_nsiblings || rt->should_flush)
4012                return;
4013
4014        /* During lookup routes are evaluated in order, so we need to
4015         * make sure upper bounds are assigned from the first sibling
4016         * onwards.
4017         */
4018        first = rt6_multipath_first_sibling(rt);
4019        if (WARN_ON_ONCE(!first))
4020                return;
4021
4022        total = rt6_multipath_total_weight(first);
4023        rt6_multipath_upper_bound_set(first, total);
4024}
4025
4026static int fib6_ifup(struct fib6_info *rt, void *p_arg)
4027{
4028        const struct arg_netdev_event *arg = p_arg;
4029        struct net *net = dev_net(arg->dev);
4030
4031        if (rt != net->ipv6.fib6_null_entry &&
4032            rt->fib6_nh.fib_nh_dev == arg->dev) {
4033                rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
4034                fib6_update_sernum_upto_root(net, rt);
4035                rt6_multipath_rebalance(rt);
4036        }
4037
4038        return 0;
4039}
4040
4041void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
4042{
4043        struct arg_netdev_event arg = {
4044                .dev = dev,
4045                {
4046                        .nh_flags = nh_flags,
4047                },
4048        };
4049
4050        if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4051                arg.nh_flags |= RTNH_F_LINKDOWN;
4052
4053        fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4054}
4055
4056static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4057                                   const struct net_device *dev)
4058{
4059        struct fib6_info *iter;
4060
4061        if (rt->fib6_nh.fib_nh_dev == dev)
4062                return true;
4063        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4064                if (iter->fib6_nh.fib_nh_dev == dev)
4065                        return true;
4066
4067        return false;
4068}
4069
4070static void rt6_multipath_flush(struct fib6_info *rt)
4071{
4072        struct fib6_info *iter;
4073
4074        rt->should_flush = 1;
4075        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4076                iter->should_flush = 1;
4077}
4078
4079static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4080                                             const struct net_device *down_dev)
4081{
4082        struct fib6_info *iter;
4083        unsigned int dead = 0;
4084
4085        if (rt->fib6_nh.fib_nh_dev == down_dev ||
4086            rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
4087                dead++;
4088        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4089                if (iter->fib6_nh.fib_nh_dev == down_dev ||
4090                    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
4091                        dead++;
4092
4093        return dead;
4094}
4095
4096static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4097                                       const struct net_device *dev,
4098                                       unsigned char nh_flags)
4099{
4100        struct fib6_info *iter;
4101
4102        if (rt->fib6_nh.fib_nh_dev == dev)
4103                rt->fib6_nh.fib_nh_flags |= nh_flags;
4104        list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4105                if (iter->fib6_nh.fib_nh_dev == dev)
4106                        iter->fib6_nh.fib_nh_flags |= nh_flags;
4107}
4108
4109/* called with write lock held for table with rt */
4110static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4111{
4112        const struct arg_netdev_event *arg = p_arg;
4113        const struct net_device *dev = arg->dev;
4114        struct net *net = dev_net(dev);
4115
4116        if (rt == net->ipv6.fib6_null_entry)
4117                return 0;
4118
4119        switch (arg->event) {
4120        case NETDEV_UNREGISTER:
4121                return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
4122        case NETDEV_DOWN:
4123                if (rt->should_flush)
4124                        return -1;
4125                if (!rt->fib6_nsiblings)
4126                        return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
4127                if (rt6_multipath_uses_dev(rt, dev)) {
4128                        unsigned int count;
4129
4130                        count = rt6_multipath_dead_count(rt, dev);
4131                        if (rt->fib6_nsiblings + 1 == count) {
4132                                rt6_multipath_flush(rt);
4133                                return -1;
4134                        }
4135                        rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4136                                                   RTNH_F_LINKDOWN);
4137                        fib6_update_sernum(net, rt);
4138                        rt6_multipath_rebalance(rt);
4139                }
4140                return -2;
4141        case NETDEV_CHANGE:
4142                if (rt->fib6_nh.fib_nh_dev != dev ||
4143                    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4144                        break;
4145                rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
4146                rt6_multipath_rebalance(rt);
4147                break;
4148        }
4149
4150        return 0;
4151}
4152
4153void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4154{
4155        struct arg_netdev_event arg = {
4156                .dev = dev,
4157                {
4158                        .event = event,
4159                },
4160        };
4161        struct net *net = dev_net(dev);
4162
4163        if (net->ipv6.sysctl.skip_notify_on_dev_down)
4164                fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
4165        else
4166                fib6_clean_all(net, fib6_ifdown, &arg);
4167}
4168
4169void rt6_disable_ip(struct net_device *dev, unsigned long event)
4170{
4171        rt6_sync_down_dev(dev, event);
4172        rt6_uncached_list_flush_dev(dev_net(dev), dev);
4173        neigh_ifdown(&nd_tbl, dev);
4174}
4175
4176struct rt6_mtu_change_arg {
4177        struct net_device *dev;
4178        unsigned int mtu;
4179};
4180
4181static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4182{
4183        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4184        struct inet6_dev *idev;
4185
4186        /* In IPv6 pmtu discovery is not optional,
4187           so that RTAX_MTU lock cannot disable it.
4188           We still use this lock to block changes
4189           caused by addrconf/ndisc.
4190        */
4191
4192        idev = __in6_dev_get(arg->dev);
4193        if (!idev)
4194                return 0;
4195
4196        /* For administrative MTU increase, there is no way to discover
4197           IPv6 PMTU increase, so PMTU increase should be updated here.
4198           Since RFC 1981 doesn't include administrative MTU increase
4199           update PMTU increase is a MUST. (i.e. jumbo frame)
4200         */
4201        if (rt->fib6_nh.fib_nh_dev == arg->dev &&
4202            !fib6_metric_locked(rt, RTAX_MTU)) {
4203                u32 mtu = rt->fib6_pmtu;
4204
4205                if (mtu >= arg->mtu ||
4206                    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4207                        fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4208
4209                spin_lock_bh(&rt6_exception_lock);
4210                rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4211                spin_unlock_bh(&rt6_exception_lock);
4212        }
4213        return 0;
4214}
4215
4216void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4217{
4218        struct rt6_mtu_change_arg arg = {
4219                .dev = dev,
4220                .mtu = mtu,
4221        };
4222
4223        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4224}
4225
4226static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4227        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4228        [RTA_PREFSRC]           = { .len = sizeof(struct in6_addr) },
4229        [RTA_OIF]               = { .type = NLA_U32 },
4230        [RTA_IIF]               = { .type = NLA_U32 },
4231        [RTA_PRIORITY]          = { .type = NLA_U32 },
4232        [RTA_METRICS]           = { .type = NLA_NESTED },
4233        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
4234        [RTA_PREF]              = { .type = NLA_U8 },
4235        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
4236        [RTA_ENCAP]             = { .type = NLA_NESTED },
4237        [RTA_EXPIRES]           = { .type = NLA_U32 },
4238        [RTA_UID]               = { .type = NLA_U32 },
4239        [RTA_MARK]              = { .type = NLA_U32 },
4240        [RTA_TABLE]             = { .type = NLA_U32 },
4241        [RTA_IP_PROTO]          = { .type = NLA_U8 },
4242        [RTA_SPORT]             = { .type = NLA_U16 },
4243        [RTA_DPORT]             = { .type = NLA_U16 },
4244};
4245
4246static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4247                              struct fib6_config *cfg,
4248                              struct netlink_ext_ack *extack)
4249{
4250        struct rtmsg *rtm;
4251        struct nlattr *tb[RTA_MAX+1];
4252        unsigned int pref;
4253        int err;
4254
4255        err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4256                                     rtm_ipv6_policy, extack);
4257        if (err < 0)
4258                goto errout;
4259
4260        err = -EINVAL;
4261        rtm = nlmsg_data(nlh);
4262
4263        *cfg = (struct fib6_config){
4264                .fc_table = rtm->rtm_table,
4265                .fc_dst_len = rtm->rtm_dst_len,
4266                .fc_src_len = rtm->rtm_src_len,
4267                .fc_flags = RTF_UP,
4268                .fc_protocol = rtm->rtm_protocol,
4269                .fc_type = rtm->rtm_type,
4270
4271                .fc_nlinfo.portid = NETLINK_CB(skb).portid,
4272                .fc_nlinfo.nlh = nlh,
4273                .fc_nlinfo.nl_net = sock_net(skb->sk),
4274        };
4275
4276        if (rtm->rtm_type == RTN_UNREACHABLE ||
4277            rtm->rtm_type == RTN_BLACKHOLE ||
4278            rtm->rtm_type == RTN_PROHIBIT ||
4279            rtm->rtm_type == RTN_THROW)
4280                cfg->fc_flags |= RTF_REJECT;
4281
4282        if (rtm->rtm_type == RTN_LOCAL)
4283                cfg->fc_flags |= RTF_LOCAL;
4284
4285        if (rtm->rtm_flags & RTM_F_CLONED)
4286                cfg->fc_flags |= RTF_CACHE;
4287
4288        cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4289
4290        if (tb[RTA_GATEWAY]) {
4291                cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4292                cfg->fc_flags |= RTF_GATEWAY;
4293        }
4294        if (tb[RTA_VIA]) {
4295                NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4296                goto errout;
4297        }
4298
4299        if (tb[RTA_DST]) {
4300                int plen = (rtm->rtm_dst_len + 7) >> 3;
4301
4302                if (nla_len(tb[RTA_DST]) < plen)
4303                        goto errout;
4304
4305                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4306        }
4307
4308        if (tb[RTA_SRC]) {
4309                int plen = (rtm->rtm_src_len + 7) >> 3;
4310
4311                if (nla_len(tb[RTA_SRC]) < plen)
4312                        goto errout;
4313
4314                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4315        }
4316
4317        if (tb[RTA_PREFSRC])
4318                cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4319
4320        if (tb[RTA_OIF])
4321                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4322
4323        if (tb[RTA_PRIORITY])
4324                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4325
4326        if (tb[RTA_METRICS]) {
4327                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4328                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4329        }
4330
4331        if (tb[RTA_TABLE])
4332                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4333
4334        if (tb[RTA_MULTIPATH]) {
4335                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4336                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4337
4338                err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4339                                                     cfg->fc_mp_len, extack);
4340                if (err < 0)
4341                        goto errout;
4342        }
4343
4344        if (tb[RTA_PREF]) {
4345                pref = nla_get_u8(tb[RTA_PREF]);
4346                if (pref != ICMPV6_ROUTER_PREF_LOW &&
4347                    pref != ICMPV6_ROUTER_PREF_HIGH)
4348                        pref = ICMPV6_ROUTER_PREF_MEDIUM;
4349                cfg->fc_flags |= RTF_PREF(pref);
4350        }
4351
4352        if (tb[RTA_ENCAP])
4353                cfg->fc_encap = tb[RTA_ENCAP];
4354
4355        if (tb[RTA_ENCAP_TYPE]) {
4356                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4357
4358                err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4359                if (err < 0)
4360                        goto errout;
4361        }
4362
4363        if (tb[RTA_EXPIRES]) {
4364                unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4365
4366                if (addrconf_finite_timeout(timeout)) {
4367                        cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4368                        cfg->fc_flags |= RTF_EXPIRES;
4369                }
4370        }
4371
4372        err = 0;
4373errout:
4374        return err;
4375}
4376
4377struct rt6_nh {
4378        struct fib6_info *fib6_info;
4379        struct fib6_config r_cfg;
4380        struct list_head next;
4381};
4382
4383static int ip6_route_info_append(struct net *net,
4384                                 struct list_head *rt6_nh_list,
4385                                 struct fib6_info *rt,
4386                                 struct fib6_config *r_cfg)
4387{
4388        struct rt6_nh *nh;
4389        int err = -EEXIST;
4390
4391        list_for_each_entry(nh, rt6_nh_list, next) {
4392                /* check if fib6_info already exists */
4393                if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4394                        return err;
4395        }
4396
4397        nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4398        if (!nh)
4399                return -ENOMEM;
4400        nh->fib6_info = rt;
4401        memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4402        list_add_tail(&nh->next, rt6_nh_list);
4403
4404        return 0;
4405}
4406
4407static void ip6_route_mpath_notify(struct fib6_info *rt,
4408                                   struct fib6_info *rt_last,
4409                                   struct nl_info *info,
4410                                   __u16 nlflags)
4411{
4412        /* if this is an APPEND route, then rt points to the first route
4413         * inserted and rt_last points to last route inserted. Userspace
4414         * wants a consistent dump of the route which starts at the first
4415         * nexthop. Since sibling routes are always added at the end of
4416         * the list, find the first sibling of the last route appended
4417         */
4418        if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4419                rt = list_first_entry(&rt_last->fib6_siblings,
4420                                      struct fib6_info,
4421                                      fib6_siblings);
4422        }
4423
4424        if (rt)
4425                inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4426}
4427
4428static int ip6_route_multipath_add(struct fib6_config *cfg,
4429                                   struct netlink_ext_ack *extack)
4430{
4431        struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4432        struct nl_info *info = &cfg->fc_nlinfo;
4433        struct fib6_config r_cfg;
4434        struct rtnexthop *rtnh;
4435        struct fib6_info *rt;
4436        struct rt6_nh *err_nh;
4437        struct rt6_nh *nh, *nh_safe;
4438        __u16 nlflags;
4439        int remaining;
4440        int attrlen;
4441        int err = 1;
4442        int nhn = 0;
4443        int replace = (cfg->fc_nlinfo.nlh &&
4444                       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4445        LIST_HEAD(rt6_nh_list);
4446
4447        nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4448        if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4449                nlflags |= NLM_F_APPEND;
4450
4451        remaining = cfg->fc_mp_len;
4452        rtnh = (struct rtnexthop *)cfg->fc_mp;
4453
4454        /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4455         * fib6_info structs per nexthop
4456         */
4457        while (rtnh_ok(rtnh, remaining)) {
4458                memcpy(&r_cfg, cfg, sizeof(*cfg));
4459                if (rtnh->rtnh_ifindex)
4460                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4461
4462                attrlen = rtnh_attrlen(rtnh);
4463                if (attrlen > 0) {
4464                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4465
4466                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4467                        if (nla) {
4468                                r_cfg.fc_gateway = nla_get_in6_addr(nla);
4469                                r_cfg.fc_flags |= RTF_GATEWAY;
4470                        }
4471                        r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4472                        nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4473                        if (nla)
4474                                r_cfg.fc_encap_type = nla_get_u16(nla);
4475                }
4476
4477                r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4478                rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4479                if (IS_ERR(rt)) {
4480                        err = PTR_ERR(rt);
4481                        rt = NULL;
4482                        goto cleanup;
4483                }
4484                if (!rt6_qualify_for_ecmp(rt)) {
4485                        err = -EINVAL;
4486                        NL_SET_ERR_MSG(extack,
4487                                       "Device only routes can not be added for IPv6 using the multipath API.");
4488                        fib6_info_release(rt);
4489                        goto cleanup;
4490                }
4491
4492                rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
4493
4494                err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4495                                            rt, &r_cfg);
4496                if (err) {
4497                        fib6_info_release(rt);
4498                        goto cleanup;
4499                }
4500
4501                rtnh = rtnh_next(rtnh, &remaining);
4502        }
4503
4504        /* for add and replace send one notification with all nexthops.
4505         * Skip the notification in fib6_add_rt2node and send one with
4506         * the full route when done
4507         */
4508        info->skip_notify = 1;
4509
4510        err_nh = NULL;
4511        list_for_each_entry(nh, &rt6_nh_list, next) {
4512                err = __ip6_ins_rt(nh->fib6_info, info, extack);
4513                fib6_info_release(nh->fib6_info);
4514
4515                if (!err) {
4516                        /* save reference to last route successfully inserted */
4517                        rt_last = nh->fib6_info;
4518
4519                        /* save reference to first route for notification */
4520                        if (!rt_notif)
4521                                rt_notif = nh->fib6_info;
4522                }
4523
4524                /* nh->fib6_info is used or freed at this point, reset to NULL*/
4525                nh->fib6_info = NULL;
4526                if (err) {
4527                        if (replace && nhn)
4528                                NL_SET_ERR_MSG_MOD(extack,
4529                                                   "multipath route replace failed (check consistency of installed routes)");
4530                        err_nh = nh;
4531                        goto add_errout;
4532                }
4533
4534                /* Because each route is added like a single route we remove
4535                 * these flags after the first nexthop: if there is a collision,
4536                 * we have already failed to add the first nexthop:
4537                 * fib6_add_rt2node() has rejected it; when replacing, old
4538                 * nexthops have been replaced by first new, the rest should
4539                 * be added to it.
4540                 */
4541                cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4542                                                     NLM_F_REPLACE);
4543                nhn++;
4544        }
4545
4546        /* success ... tell user about new route */
4547        ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4548        goto cleanup;
4549
4550add_errout:
4551        /* send notification for routes that were added so that
4552         * the delete notifications sent by ip6_route_del are
4553         * coherent
4554         */
4555        if (rt_notif)
4556                ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4557
4558        /* Delete routes that were already added */
4559        list_for_each_entry(nh, &rt6_nh_list, next) {
4560                if (err_nh == nh)
4561                        break;
4562                ip6_route_del(&nh->r_cfg, extack);
4563        }
4564
4565cleanup:
4566        list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4567                if (nh->fib6_info)
4568                        fib6_info_release(nh->fib6_info);
4569                list_del(&nh->next);
4570                kfree(nh);
4571        }
4572
4573        return err;
4574}
4575
4576static int ip6_route_multipath_del(struct fib6_config *cfg,
4577                                   struct netlink_ext_ack *extack)
4578{
4579        struct fib6_config r_cfg;
4580        struct rtnexthop *rtnh;
4581        int remaining;
4582        int attrlen;
4583        int err = 1, last_err = 0;
4584
4585        remaining = cfg->fc_mp_len;
4586        rtnh = (struct rtnexthop *)cfg->fc_mp;
4587
4588        /* Parse a Multipath Entry */
4589        while (rtnh_ok(rtnh, remaining)) {
4590                memcpy(&r_cfg, cfg, sizeof(*cfg));
4591                if (rtnh->rtnh_ifindex)
4592                        r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4593
4594                attrlen = rtnh_attrlen(rtnh);
4595                if (attrlen > 0) {
4596                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4597
4598                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4599                        if (nla) {
4600                                nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4601                                r_cfg.fc_flags |= RTF_GATEWAY;
4602                        }
4603                }
4604                err = ip6_route_del(&r_cfg, extack);
4605                if (err)
4606                        last_err = err;
4607
4608                rtnh = rtnh_next(rtnh, &remaining);
4609        }
4610
4611        return last_err;
4612}
4613
4614static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4615                              struct netlink_ext_ack *extack)
4616{
4617        struct fib6_config cfg;
4618        int err;
4619
4620        err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4621        if (err < 0)
4622                return err;
4623
4624        if (cfg.fc_mp)
4625                return ip6_route_multipath_del(&cfg, extack);
4626        else {
4627                cfg.fc_delete_all_nh = 1;
4628                return ip6_route_del(&cfg, extack);
4629        }
4630}
4631
4632static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4633                              struct netlink_ext_ack *extack)
4634{
4635        struct fib6_config cfg;
4636        int err;
4637
4638        err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4639        if (err < 0)
4640                return err;
4641
4642        if (cfg.fc_metric == 0)
4643                cfg.fc_metric = IP6_RT_PRIO_USER;
4644
4645        if (cfg.fc_mp)
4646                return ip6_route_multipath_add(&cfg, extack);
4647        else
4648                return ip6_route_add(&cfg, GFP_KERNEL, extack);
4649}
4650
4651static size_t rt6_nlmsg_size(struct fib6_info *rt)
4652{
4653        int nexthop_len = 0;
4654
4655        if (rt->fib6_nsiblings) {
4656                nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
4657                            + NLA_ALIGN(sizeof(struct rtnexthop))
4658                            + nla_total_size(16) /* RTA_GATEWAY */
4659                            + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
4660
4661                nexthop_len *= rt->fib6_nsiblings;
4662        }
4663
4664        return NLMSG_ALIGN(sizeof(struct rtmsg))
4665               + nla_total_size(16) /* RTA_SRC */
4666               + nla_total_size(16) /* RTA_DST */
4667               + nla_total_size(16) /* RTA_GATEWAY */
4668               + nla_total_size(16) /* RTA_PREFSRC */
4669               + nla_total_size(4) /* RTA_TABLE */
4670               + nla_total_size(4) /* RTA_IIF */
4671               + nla_total_size(4) /* RTA_OIF */
4672               + nla_total_size(4) /* RTA_PRIORITY */
4673               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4674               + nla_total_size(sizeof(struct rta_cacheinfo))
4675               + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4676               + nla_total_size(1) /* RTA_PREF */
4677               + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
4678               + nexthop_len;
4679}
4680
4681static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4682                         struct fib6_info *rt, struct dst_entry *dst,
4683                         struct in6_addr *dest, struct in6_addr *src,
4684                         int iif, int type, u32 portid, u32 seq,
4685                         unsigned int flags)
4686{
4687        struct rt6_info *rt6 = (struct rt6_info *)dst;
4688        struct rt6key *rt6_dst, *rt6_src;
4689        u32 *pmetrics, table, rt6_flags;
4690        struct nlmsghdr *nlh;
4691        struct rtmsg *rtm;
4692        long expires = 0;
4693
4694        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4695        if (!nlh)
4696                return -EMSGSIZE;
4697
4698        if (rt6) {
4699                rt6_dst = &rt6->rt6i_dst;
4700                rt6_src = &rt6->rt6i_src;
4701                rt6_flags = rt6->rt6i_flags;
4702        } else {
4703                rt6_dst = &rt->fib6_dst;
4704                rt6_src = &rt->fib6_src;
4705                rt6_flags = rt->fib6_flags;
4706        }
4707
4708        rtm = nlmsg_data(nlh);
4709        rtm->rtm_family = AF_INET6;
4710        rtm->rtm_dst_len = rt6_dst->plen;
4711        rtm->rtm_src_len = rt6_src->plen;
4712        rtm->rtm_tos = 0;
4713        if (rt->fib6_table)
4714                table = rt->fib6_table->tb6_id;
4715        else
4716                table = RT6_TABLE_UNSPEC;
4717        rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4718        if (nla_put_u32(skb, RTA_TABLE, table))
4719                goto nla_put_failure;
4720
4721        rtm->rtm_type = rt->fib6_type;
4722        rtm->rtm_flags = 0;
4723        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4724        rtm->rtm_protocol = rt->fib6_protocol;
4725
4726        if (rt6_flags & RTF_CACHE)
4727                rtm->rtm_flags |= RTM_F_CLONED;
4728
4729        if (dest) {
4730                if (nla_put_in6_addr(skb, RTA_DST, dest))
4731                        goto nla_put_failure;
4732                rtm->rtm_dst_len = 128;
4733        } else if (rtm->rtm_dst_len)
4734                if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4735                        goto nla_put_failure;
4736#ifdef CONFIG_IPV6_SUBTREES
4737        if (src) {
4738                if (nla_put_in6_addr(skb, RTA_SRC, src))
4739                        goto nla_put_failure;
4740                rtm->rtm_src_len = 128;
4741        } else if (rtm->rtm_src_len &&
4742                   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4743                goto nla_put_failure;
4744#endif
4745        if (iif) {
4746#ifdef CONFIG_IPV6_MROUTE
4747                if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4748                        int err = ip6mr_get_route(net, skb, rtm, portid);
4749
4750                        if (err == 0)
4751                                return 0;
4752                        if (err < 0)
4753                                goto nla_put_failure;
4754                } else
4755#endif
4756                        if (nla_put_u32(skb, RTA_IIF, iif))
4757                                goto nla_put_failure;
4758        } else if (dest) {
4759                struct in6_addr saddr_buf;
4760                if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4761                    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4762                        goto nla_put_failure;
4763        }
4764
4765        if (rt->fib6_prefsrc.plen) {
4766                struct in6_addr saddr_buf;
4767                saddr_buf = rt->fib6_prefsrc.addr;
4768                if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4769                        goto nla_put_failure;
4770        }
4771
4772        pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4773        if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4774                goto nla_put_failure;
4775
4776        if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4777                goto nla_put_failure;
4778
4779        /* For multipath routes, walk the siblings list and add
4780         * each as a nexthop within RTA_MULTIPATH.
4781         */
4782        if (rt6) {
4783                if (rt6_flags & RTF_GATEWAY &&
4784                    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4785                        goto nla_put_failure;
4786
4787                if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4788                        goto nla_put_failure;
4789        } else if (rt->fib6_nsiblings) {
4790                struct fib6_info *sibling, *next_sibling;
4791                struct nlattr *mp;
4792
4793                mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
4794                if (!mp)
4795                        goto nla_put_failure;
4796
4797                if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4798                                    rt->fib6_nh.fib_nh_weight) < 0)
4799                        goto nla_put_failure;
4800
4801                list_for_each_entry_safe(sibling, next_sibling,
4802                                         &rt->fib6_siblings, fib6_siblings) {
4803                        if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4804                                            sibling->fib6_nh.fib_nh_weight) < 0)
4805                                goto nla_put_failure;
4806                }
4807
4808                nla_nest_end(skb, mp);
4809        } else {
4810                unsigned char nh_flags = 0;
4811
4812                if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4813                                     &nh_flags, false) < 0)
4814                        goto nla_put_failure;
4815
4816                rtm->rtm_flags |= nh_flags;
4817        }
4818
4819        if (rt6_flags & RTF_EXPIRES) {
4820                expires = dst ? dst->expires : rt->expires;
4821                expires -= jiffies;
4822        }
4823
4824        if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4825                goto nla_put_failure;
4826
4827        if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4828                goto nla_put_failure;
4829
4830
4831        nlmsg_end(skb, nlh);
4832        return 0;
4833
4834nla_put_failure:
4835        nlmsg_cancel(skb, nlh);
4836        return -EMSGSIZE;
4837}
4838
4839static bool fib6_info_uses_dev(const struct fib6_info *f6i,
4840                               const struct net_device *dev)
4841{
4842        if (f6i->fib6_nh.fib_nh_dev == dev)
4843                return true;
4844
4845        if (f6i->fib6_nsiblings) {
4846                struct fib6_info *sibling, *next_sibling;
4847
4848                list_for_each_entry_safe(sibling, next_sibling,
4849                                         &f6i->fib6_siblings, fib6_siblings) {
4850                        if (sibling->fib6_nh.fib_nh_dev == dev)
4851                                return true;
4852                }
4853        }
4854
4855        return false;
4856}
4857
4858int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4859{
4860        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4861        struct fib_dump_filter *filter = &arg->filter;
4862        unsigned int flags = NLM_F_MULTI;
4863        struct net *net = arg->net;
4864
4865        if (rt == net->ipv6.fib6_null_entry)
4866                return 0;
4867
4868        if ((filter->flags & RTM_F_PREFIX) &&
4869            !(rt->fib6_flags & RTF_PREFIX_RT)) {
4870                /* success since this is not a prefix route */
4871                return 1;
4872        }
4873        if (filter->filter_set) {
4874                if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
4875                    (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
4876                    (filter->protocol && rt->fib6_protocol != filter->protocol)) {
4877                        return 1;
4878                }
4879                flags |= NLM_F_DUMP_FILTERED;
4880        }
4881
4882        return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4883                             RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4884                             arg->cb->nlh->nlmsg_seq, flags);
4885}
4886
4887static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
4888                                        const struct nlmsghdr *nlh,
4889                                        struct nlattr **tb,
4890                                        struct netlink_ext_ack *extack)
4891{
4892        struct rtmsg *rtm;
4893        int i, err;
4894
4895        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
4896                NL_SET_ERR_MSG_MOD(extack,
4897                                   "Invalid header for get route request");
4898                return -EINVAL;
4899        }
4900
4901        if (!netlink_strict_get_check(skb))
4902                return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
4903                                              rtm_ipv6_policy, extack);
4904
4905        rtm = nlmsg_data(nlh);
4906        if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
4907            (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
4908            rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
4909            rtm->rtm_type) {
4910                NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
4911                return -EINVAL;
4912        }
4913        if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
4914                NL_SET_ERR_MSG_MOD(extack,
4915                                   "Invalid flags for get route request");
4916                return -EINVAL;
4917        }
4918
4919        err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
4920                                            rtm_ipv6_policy, extack);
4921        if (err)
4922                return err;
4923
4924        if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
4925            (tb[RTA_DST] && !rtm->rtm_dst_len)) {
4926                NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
4927                return -EINVAL;
4928        }
4929
4930        for (i = 0; i <= RTA_MAX; i++) {
4931                if (!tb[i])
4932                        continue;
4933
4934                switch (i) {
4935                case RTA_SRC:
4936                case RTA_DST:
4937                case RTA_IIF:
4938                case RTA_OIF:
4939                case RTA_MARK:
4940                case RTA_UID:
4941                case RTA_SPORT:
4942                case RTA_DPORT:
4943                case RTA_IP_PROTO:
4944                        break;
4945                default:
4946                        NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
4947                        return -EINVAL;
4948                }
4949        }
4950
4951        return 0;
4952}
4953
4954static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4955                              struct netlink_ext_ack *extack)
4956{
4957        struct net *net = sock_net(in_skb->sk);
4958        struct nlattr *tb[RTA_MAX+1];
4959        int err, iif = 0, oif = 0;
4960        struct fib6_info *from;
4961        struct dst_entry *dst;
4962        struct rt6_info *rt;
4963        struct sk_buff *skb;
4964        struct rtmsg *rtm;
4965        struct flowi6 fl6 = {};
4966        bool fibmatch;
4967
4968        err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4969        if (err < 0)
4970                goto errout;
4971
4972        err = -EINVAL;
4973        rtm = nlmsg_data(nlh);
4974        fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4975        fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4976
4977        if (tb[RTA_SRC]) {
4978                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4979                        goto errout;
4980
4981                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4982        }
4983
4984        if (tb[RTA_DST]) {
4985                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4986                        goto errout;
4987
4988                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4989        }
4990
4991        if (tb[RTA_IIF])
4992                iif = nla_get_u32(tb[RTA_IIF]);
4993
4994        if (tb[RTA_OIF])
4995                oif = nla_get_u32(tb[RTA_OIF]);
4996
4997        if (tb[RTA_MARK])
4998                fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4999
5000        if (tb[RTA_UID])
5001                fl6.flowi6_uid = make_kuid(current_user_ns(),
5002                                           nla_get_u32(tb[RTA_UID]));
5003        else
5004                fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
5005
5006        if (tb[RTA_SPORT])
5007                fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
5008
5009        if (tb[RTA_DPORT])
5010                fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
5011
5012        if (tb[RTA_IP_PROTO]) {
5013                err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
5014                                                  &fl6.flowi6_proto, AF_INET6,
5015                                                  extack);
5016                if (err)
5017                        goto errout;
5018        }
5019
5020        if (iif) {
5021                struct net_device *dev;
5022                int flags = 0;
5023
5024                rcu_read_lock();
5025
5026                dev = dev_get_by_index_rcu(net, iif);
5027                if (!dev) {
5028                        rcu_read_unlock();
5029                        err = -ENODEV;
5030                        goto errout;
5031                }
5032
5033                fl6.flowi6_iif = iif;
5034
5035                if (!ipv6_addr_any(&fl6.saddr))
5036                        flags |= RT6_LOOKUP_F_HAS_SADDR;
5037
5038                dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5039
5040                rcu_read_unlock();
5041        } else {
5042                fl6.flowi6_oif = oif;
5043
5044                dst = ip6_route_output(net, NULL, &fl6);
5045        }
5046
5047
5048        rt = container_of(dst, struct rt6_info, dst);
5049        if (rt->dst.error) {
5050                err = rt->dst.error;
5051                ip6_rt_put(rt);
5052                goto errout;
5053        }
5054
5055        if (rt == net->ipv6.ip6_null_entry) {
5056                err = rt->dst.error;
5057                ip6_rt_put(rt);
5058                goto errout;
5059        }
5060
5061        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5062        if (!skb) {
5063                ip6_rt_put(rt);
5064                err = -ENOBUFS;
5065                goto errout;
5066        }
5067
5068        skb_dst_set(skb, &rt->dst);
5069
5070        rcu_read_lock();
5071        from = rcu_dereference(rt->from);
5072        if (from) {
5073                if (fibmatch)
5074                        err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5075                                            iif, RTM_NEWROUTE,
5076                                            NETLINK_CB(in_skb).portid,
5077                                            nlh->nlmsg_seq, 0);
5078                else
5079                        err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5080                                            &fl6.saddr, iif, RTM_NEWROUTE,
5081                                            NETLINK_CB(in_skb).portid,
5082                                            nlh->nlmsg_seq, 0);
5083        } else {
5084                err = -ENETUNREACH;
5085        }
5086        rcu_read_unlock();
5087
5088        if (err < 0) {
5089                kfree_skb(skb);
5090                goto errout;
5091        }
5092
5093        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5094errout:
5095        return err;
5096}
5097
5098void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5099                     unsigned int nlm_flags)
5100{
5101        struct sk_buff *skb;
5102        struct net *net = info->nl_net;
5103        u32 seq;
5104        int err;
5105
5106        err = -ENOBUFS;
5107        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5108
5109        skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5110        if (!skb)
5111                goto errout;
5112
5113        err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5114                            event, info->portid, seq, nlm_flags);
5115        if (err < 0) {
5116                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5117                WARN_ON(err == -EMSGSIZE);
5118                kfree_skb(skb);
5119                goto errout;
5120        }
5121        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5122                    info->nlh, gfp_any());
5123        return;
5124errout:
5125        if (err < 0)
5126                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5127}
5128
5129static int ip6_route_dev_notify(struct notifier_block *this,
5130                                unsigned long event, void *ptr)
5131{
5132        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5133        struct net *net = dev_net(dev);
5134
5135        if (!(dev->flags & IFF_LOOPBACK))
5136                return NOTIFY_OK;
5137
5138        if (event == NETDEV_REGISTER) {
5139                net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
5140                net->ipv6.ip6_null_entry->dst.dev = dev;
5141                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5143                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5144                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5145                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5146                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5147#endif
5148         } else if (event == NETDEV_UNREGISTER &&
5149                    dev->reg_state != NETREG_UNREGISTERED) {
5150                /* NETDEV_UNREGISTER could be fired for multiple times by
5151                 * netdev_wait_allrefs(). Make sure we only call this once.
5152                 */
5153                in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5154#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5155                in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5156                in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5157#endif
5158        }
5159
5160        return NOTIFY_OK;
5161}
5162
5163/*
5164 *      /proc
5165 */
5166
5167#ifdef CONFIG_PROC_FS
5168static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5169{
5170        struct net *net = (struct net *)seq->private;
5171        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5172                   net->ipv6.rt6_stats->fib_nodes,
5173                   net->ipv6.rt6_stats->fib_route_nodes,
5174                   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5175                   net->ipv6.rt6_stats->fib_rt_entries,
5176                   net->ipv6.rt6_stats->fib_rt_cache,
5177                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5178                   net->ipv6.rt6_stats->fib_discarded_routes);
5179
5180        return 0;
5181}
5182#endif  /* CONFIG_PROC_FS */
5183
5184#ifdef CONFIG_SYSCTL
5185
5186static
5187int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5188                              void __user *buffer, size_t *lenp, loff_t *ppos)
5189{
5190        struct net *net;
5191        int delay;
5192        int ret;
5193        if (!write)
5194                return -EINVAL;
5195
5196        net = (struct net *)ctl->extra1;
5197        delay = net->ipv6.sysctl.flush_delay;
5198        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5199        if (ret)
5200                return ret;
5201
5202        fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5203        return 0;
5204}
5205
5206static int zero;
5207static int one = 1;
5208
5209static struct ctl_table ipv6_route_table_template[] = {
5210        {
5211                .procname       =       "flush",
5212                .data           =       &init_net.ipv6.sysctl.flush_delay,
5213                .maxlen         =       sizeof(int),
5214                .mode           =       0200,
5215                .proc_handler   =       ipv6_sysctl_rtcache_flush
5216        },
5217        {
5218                .procname       =       "gc_thresh",
5219                .data           =       &ip6_dst_ops_template.gc_thresh,
5220                .maxlen         =       sizeof(int),
5221                .mode           =       0644,
5222                .proc_handler   =       proc_dointvec,
5223        },
5224        {
5225                .procname       =       "max_size",
5226                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
5227                .maxlen         =       sizeof(int),
5228                .mode           =       0644,
5229                .proc_handler   =       proc_dointvec,
5230        },
5231        {
5232                .procname       =       "gc_min_interval",
5233                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5234                .maxlen         =       sizeof(int),
5235                .mode           =       0644,
5236                .proc_handler   =       proc_dointvec_jiffies,
5237        },
5238        {
5239                .procname       =       "gc_timeout",
5240                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5241                .maxlen         =       sizeof(int),
5242                .mode           =       0644,
5243                .proc_handler   =       proc_dointvec_jiffies,
5244        },
5245        {
5246                .procname       =       "gc_interval",
5247                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5248                .maxlen         =       sizeof(int),
5249                .mode           =       0644,
5250                .proc_handler   =       proc_dointvec_jiffies,
5251        },
5252        {
5253                .procname       =       "gc_elasticity",
5254                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5255                .maxlen         =       sizeof(int),
5256                .mode           =       0644,
5257                .proc_handler   =       proc_dointvec,
5258        },
5259        {
5260                .procname       =       "mtu_expires",
5261                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5262                .maxlen         =       sizeof(int),
5263                .mode           =       0644,
5264                .proc_handler   =       proc_dointvec_jiffies,
5265        },
5266        {
5267                .procname       =       "min_adv_mss",
5268                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5269                .maxlen         =       sizeof(int),
5270                .mode           =       0644,
5271                .proc_handler   =       proc_dointvec,
5272        },
5273        {
5274                .procname       =       "gc_min_interval_ms",
5275                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5276                .maxlen         =       sizeof(int),
5277                .mode           =       0644,
5278                .proc_handler   =       proc_dointvec_ms_jiffies,
5279        },
5280        {
5281                .procname       =       "skip_notify_on_dev_down",
5282                .data           =       &init_net.ipv6.sysctl.skip_notify_on_dev_down,
5283                .maxlen         =       sizeof(int),
5284                .mode           =       0644,
5285                .proc_handler   =       proc_dointvec_minmax,
5286                .extra1         =       &zero,
5287                .extra2         =       &one,
5288        },
5289        { }
5290};
5291
5292struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5293{
5294        struct ctl_table *table;
5295
5296        table = kmemdup(ipv6_route_table_template,
5297                        sizeof(ipv6_route_table_template),
5298                        GFP_KERNEL);
5299
5300        if (table) {
5301                table[0].data = &net->ipv6.sysctl.flush_delay;
5302                table[0].extra1 = net;
5303                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5304                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5305                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5306                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5307                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5308                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5309                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5310                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5311                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5312                table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5313
5314                /* Don't export sysctls to unprivileged users */
5315                if (net->user_ns != &init_user_ns)
5316                        table[0].procname = NULL;
5317        }
5318
5319        return table;
5320}
5321#endif
5322
5323static int __net_init ip6_route_net_init(struct net *net)
5324{
5325        int ret = -ENOMEM;
5326
5327        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5328               sizeof(net->ipv6.ip6_dst_ops));
5329
5330        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5331                goto out_ip6_dst_ops;
5332
5333        net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5334                                            sizeof(*net->ipv6.fib6_null_entry),
5335                                            GFP_KERNEL);
5336        if (!net->ipv6.fib6_null_entry)
5337                goto out_ip6_dst_entries;
5338
5339        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5340                                           sizeof(*net->ipv6.ip6_null_entry),
5341                                           GFP_KERNEL);
5342        if (!net->ipv6.ip6_null_entry)
5343                goto out_fib6_null_entry;
5344        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5345        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5346                         ip6_template_metrics, true);
5347
5348#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5349        net->ipv6.fib6_has_custom_rules = false;
5350        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5351                                               sizeof(*net->ipv6.ip6_prohibit_entry),
5352                                               GFP_KERNEL);
5353        if (!net->ipv6.ip6_prohibit_entry)
5354                goto out_ip6_null_entry;
5355        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5356        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5357                         ip6_template_metrics, true);
5358
5359        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5360                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
5361                                               GFP_KERNEL);
5362        if (!net->ipv6.ip6_blk_hole_entry)
5363                goto out_ip6_prohibit_entry;
5364        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5365        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5366                         ip6_template_metrics, true);
5367#endif
5368
5369        net->ipv6.sysctl.flush_delay = 0;
5370        net->ipv6.sysctl.ip6_rt_max_size = 4096;
5371        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5372        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5373        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5374        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5375        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5376        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5377        net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5378
5379        net->ipv6.ip6_rt_gc_expire = 30*HZ;
5380
5381        ret = 0;
5382out:
5383        return ret;
5384
5385#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5386out_ip6_prohibit_entry:
5387        kfree(net->ipv6.ip6_prohibit_entry);
5388out_ip6_null_entry:
5389        kfree(net->ipv6.ip6_null_entry);
5390#endif
5391out_fib6_null_entry:
5392        kfree(net->ipv6.fib6_null_entry);
5393out_ip6_dst_entries:
5394        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5395out_ip6_dst_ops:
5396        goto out;
5397}
5398
5399static void __net_exit ip6_route_net_exit(struct net *net)
5400{
5401        kfree(net->ipv6.fib6_null_entry);
5402        kfree(net->ipv6.ip6_null_entry);
5403#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5404        kfree(net->ipv6.ip6_prohibit_entry);
5405        kfree(net->ipv6.ip6_blk_hole_entry);
5406#endif
5407        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5408}
5409
5410static int __net_init ip6_route_net_init_late(struct net *net)
5411{
5412#ifdef CONFIG_PROC_FS
5413        proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5414                        sizeof(struct ipv6_route_iter));
5415        proc_create_net_single("rt6_stats", 0444, net->proc_net,
5416                        rt6_stats_seq_show, NULL);
5417#endif
5418        return 0;
5419}
5420
5421static void __net_exit ip6_route_net_exit_late(struct net *net)
5422{
5423#ifdef CONFIG_PROC_FS
5424        remove_proc_entry("ipv6_route", net->proc_net);
5425        remove_proc_entry("rt6_stats", net->proc_net);
5426#endif
5427}
5428
5429static struct pernet_operations ip6_route_net_ops = {
5430        .init = ip6_route_net_init,
5431        .exit = ip6_route_net_exit,
5432};
5433
5434static int __net_init ipv6_inetpeer_init(struct net *net)
5435{
5436        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5437
5438        if (!bp)
5439                return -ENOMEM;
5440        inet_peer_base_init(bp);
5441        net->ipv6.peers = bp;
5442        return 0;
5443}
5444
5445static void __net_exit ipv6_inetpeer_exit(struct net *net)
5446{
5447        struct inet_peer_base *bp = net->ipv6.peers;
5448
5449        net->ipv6.peers = NULL;
5450        inetpeer_invalidate_tree(bp);
5451        kfree(bp);
5452}
5453
5454static struct pernet_operations ipv6_inetpeer_ops = {
5455        .init   =       ipv6_inetpeer_init,
5456        .exit   =       ipv6_inetpeer_exit,
5457};
5458
5459static struct pernet_operations ip6_route_net_late_ops = {
5460        .init = ip6_route_net_init_late,
5461        .exit = ip6_route_net_exit_late,
5462};
5463
5464static struct notifier_block ip6_route_dev_notifier = {
5465        .notifier_call = ip6_route_dev_notify,
5466        .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5467};
5468
5469void __init ip6_route_init_special_entries(void)
5470{
5471        /* Registering of the loopback is done before this portion of code,
5472         * the loopback reference in rt6_info will not be taken, do it
5473         * manually for init_net */
5474        init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
5475        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5476        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5477  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5478        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5479        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5480        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5481        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5482  #endif
5483}
5484
5485int __init ip6_route_init(void)
5486{
5487        int ret;
5488        int cpu;
5489
5490        ret = -ENOMEM;
5491        ip6_dst_ops_template.kmem_cachep =
5492                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5493                                  SLAB_HWCACHE_ALIGN, NULL);
5494        if (!ip6_dst_ops_template.kmem_cachep)
5495                goto out;
5496
5497        ret = dst_entries_init(&ip6_dst_blackhole_ops);
5498        if (ret)
5499                goto out_kmem_cache;
5500
5501        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5502        if (ret)
5503                goto out_dst_entries;
5504
5505        ret = register_pernet_subsys(&ip6_route_net_ops);
5506        if (ret)
5507                goto out_register_inetpeer;
5508
5509        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5510
5511        ret = fib6_init();
5512        if (ret)
5513                goto out_register_subsys;
5514
5515        ret = xfrm6_init();
5516        if (ret)
5517                goto out_fib6_init;
5518
5519        ret = fib6_rules_init();
5520        if (ret)
5521                goto xfrm6_init;
5522
5523        ret = register_pernet_subsys(&ip6_route_net_late_ops);
5524        if (ret)
5525                goto fib6_rules_init;
5526
5527        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5528                                   inet6_rtm_newroute, NULL, 0);
5529        if (ret < 0)
5530                goto out_register_late_subsys;
5531
5532        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5533                                   inet6_rtm_delroute, NULL, 0);
5534        if (ret < 0)
5535                goto out_register_late_subsys;
5536
5537        ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5538                                   inet6_rtm_getroute, NULL,
5539                                   RTNL_FLAG_DOIT_UNLOCKED);
5540        if (ret < 0)
5541                goto out_register_late_subsys;
5542
5543        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5544        if (ret)
5545                goto out_register_late_subsys;
5546
5547        for_each_possible_cpu(cpu) {
5548                struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5549
5550                INIT_LIST_HEAD(&ul->head);
5551                spin_lock_init(&ul->lock);
5552        }
5553
5554out:
5555        return ret;
5556
5557out_register_late_subsys:
5558        rtnl_unregister_all(PF_INET6);
5559        unregister_pernet_subsys(&ip6_route_net_late_ops);
5560fib6_rules_init:
5561        fib6_rules_cleanup();
5562xfrm6_init:
5563        xfrm6_fini();
5564out_fib6_init:
5565        fib6_gc_cleanup();
5566out_register_subsys:
5567        unregister_pernet_subsys(&ip6_route_net_ops);
5568out_register_inetpeer:
5569        unregister_pernet_subsys(&ipv6_inetpeer_ops);
5570out_dst_entries:
5571        dst_entries_destroy(&ip6_dst_blackhole_ops);
5572out_kmem_cache:
5573        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5574        goto out;
5575}
5576
5577void ip6_route_cleanup(void)
5578{
5579        unregister_netdevice_notifier(&ip6_route_dev_notifier);
5580        unregister_pernet_subsys(&ip6_route_net_late_ops);
5581        fib6_rules_cleanup();
5582        xfrm6_fini();
5583        fib6_gc_cleanup();
5584        unregister_pernet_subsys(&ipv6_inetpeer_ops);
5585        unregister_pernet_subsys(&ip6_route_net_ops);
5586        dst_entries_destroy(&ip6_dst_blackhole_ops);
5587        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5588}
5589