linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#include <linux/capability.h>
  28#include <linux/errno.h>
  29#include <linux/types.h>
  30#include <linux/times.h>
  31#include <linux/socket.h>
  32#include <linux/sockios.h>
  33#include <linux/net.h>
  34#include <linux/route.h>
  35#include <linux/netdevice.h>
  36#include <linux/in6.h>
  37#include <linux/mroute6.h>
  38#include <linux/init.h>
  39#include <linux/if_arp.h>
  40#include <linux/proc_fs.h>
  41#include <linux/seq_file.h>
  42#include <linux/nsproxy.h>
  43#include <net/net_namespace.h>
  44#include <net/snmp.h>
  45#include <net/ipv6.h>
  46#include <net/ip6_fib.h>
  47#include <net/ip6_route.h>
  48#include <net/ndisc.h>
  49#include <net/addrconf.h>
  50#include <net/tcp.h>
  51#include <linux/rtnetlink.h>
  52#include <net/dst.h>
  53#include <net/xfrm.h>
  54#include <net/netevent.h>
  55#include <net/netlink.h>
  56
  57#include <asm/uaccess.h>
  58
  59#ifdef CONFIG_SYSCTL
  60#include <linux/sysctl.h>
  61#endif
  62
  63/* Set to 3 to get tracing. */
  64#define RT6_DEBUG 2
  65
  66#if RT6_DEBUG >= 3
  67#define RDBG(x) printk x
  68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
  69#else
  70#define RDBG(x)
  71#define RT6_TRACE(x...) do { ; } while (0)
  72#endif
  73
  74#define CLONE_OFFLINK_ROUTE 0
  75
  76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
  77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  79static void             ip6_dst_destroy(struct dst_entry *);
  80static void             ip6_dst_ifdown(struct dst_entry *,
  81                                       struct net_device *dev, int how);
  82static int               ip6_dst_gc(struct dst_ops *ops);
  83
  84static int              ip6_pkt_discard(struct sk_buff *skb);
  85static int              ip6_pkt_discard_out(struct sk_buff *skb);
  86static void             ip6_link_failure(struct sk_buff *skb);
  87static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
  88
  89#ifdef CONFIG_IPV6_ROUTE_INFO
  90static struct rt6_info *rt6_add_route_info(struct net *net,
  91                                           struct in6_addr *prefix, int prefixlen,
  92                                           struct in6_addr *gwaddr, int ifindex,
  93                                           unsigned pref);
  94static struct rt6_info *rt6_get_route_info(struct net *net,
  95                                           struct in6_addr *prefix, int prefixlen,
  96                                           struct in6_addr *gwaddr, int ifindex);
  97#endif
  98
  99static struct dst_ops ip6_dst_ops_template = {
 100        .family                 =       AF_INET6,
 101        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 102        .gc                     =       ip6_dst_gc,
 103        .gc_thresh              =       1024,
 104        .check                  =       ip6_dst_check,
 105        .destroy                =       ip6_dst_destroy,
 106        .ifdown                 =       ip6_dst_ifdown,
 107        .negative_advice        =       ip6_negative_advice,
 108        .link_failure           =       ip6_link_failure,
 109        .update_pmtu            =       ip6_rt_update_pmtu,
 110        .local_out              =       __ip6_local_out,
 111        .entries                =       ATOMIC_INIT(0),
 112};
 113
 114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 115{
 116}
 117
 118static struct dst_ops ip6_dst_blackhole_ops = {
 119        .family                 =       AF_INET6,
 120        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 121        .destroy                =       ip6_dst_destroy,
 122        .check                  =       ip6_dst_check,
 123        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 124        .entries                =       ATOMIC_INIT(0),
 125};
 126
 127static struct rt6_info ip6_null_entry_template = {
 128        .u = {
 129                .dst = {
 130                        .__refcnt       = ATOMIC_INIT(1),
 131                        .__use          = 1,
 132                        .obsolete       = -1,
 133                        .error          = -ENETUNREACH,
 134                        .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
 135                        .input          = ip6_pkt_discard,
 136                        .output         = ip6_pkt_discard_out,
 137                }
 138        },
 139        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 140        .rt6i_protocol  = RTPROT_KERNEL,
 141        .rt6i_metric    = ~(u32) 0,
 142        .rt6i_ref       = ATOMIC_INIT(1),
 143};
 144
 145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 146
 147static int ip6_pkt_prohibit(struct sk_buff *skb);
 148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 149
 150static struct rt6_info ip6_prohibit_entry_template = {
 151        .u = {
 152                .dst = {
 153                        .__refcnt       = ATOMIC_INIT(1),
 154                        .__use          = 1,
 155                        .obsolete       = -1,
 156                        .error          = -EACCES,
 157                        .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
 158                        .input          = ip6_pkt_prohibit,
 159                        .output         = ip6_pkt_prohibit_out,
 160                }
 161        },
 162        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 163        .rt6i_protocol  = RTPROT_KERNEL,
 164        .rt6i_metric    = ~(u32) 0,
 165        .rt6i_ref       = ATOMIC_INIT(1),
 166};
 167
 168static struct rt6_info ip6_blk_hole_entry_template = {
 169        .u = {
 170                .dst = {
 171                        .__refcnt       = ATOMIC_INIT(1),
 172                        .__use          = 1,
 173                        .obsolete       = -1,
 174                        .error          = -EINVAL,
 175                        .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
 176                        .input          = dst_discard,
 177                        .output         = dst_discard,
 178                }
 179        },
 180        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 181        .rt6i_protocol  = RTPROT_KERNEL,
 182        .rt6i_metric    = ~(u32) 0,
 183        .rt6i_ref       = ATOMIC_INIT(1),
 184};
 185
 186#endif
 187
 188/* allocate dst with ip6_dst_ops */
 189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
 190{
 191        return (struct rt6_info *)dst_alloc(ops);
 192}
 193
 194static void ip6_dst_destroy(struct dst_entry *dst)
 195{
 196        struct rt6_info *rt = (struct rt6_info *)dst;
 197        struct inet6_dev *idev = rt->rt6i_idev;
 198
 199        if (idev != NULL) {
 200                rt->rt6i_idev = NULL;
 201                in6_dev_put(idev);
 202        }
 203}
 204
 205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 206                           int how)
 207{
 208        struct rt6_info *rt = (struct rt6_info *)dst;
 209        struct inet6_dev *idev = rt->rt6i_idev;
 210        struct net_device *loopback_dev =
 211                dev_net(dev)->loopback_dev;
 212
 213        if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
 214                struct inet6_dev *loopback_idev =
 215                        in6_dev_get(loopback_dev);
 216                if (loopback_idev != NULL) {
 217                        rt->rt6i_idev = loopback_idev;
 218                        in6_dev_put(idev);
 219                }
 220        }
 221}
 222
 223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 224{
 225        return (rt->rt6i_flags & RTF_EXPIRES &&
 226                time_after(jiffies, rt->rt6i_expires));
 227}
 228
 229static inline int rt6_need_strict(struct in6_addr *daddr)
 230{
 231        return (ipv6_addr_type(daddr) &
 232                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
 233}
 234
 235/*
 236 *      Route lookup. Any table->tb6_lock is implied.
 237 */
 238
 239static inline struct rt6_info *rt6_device_match(struct net *net,
 240                                                    struct rt6_info *rt,
 241                                                    struct in6_addr *saddr,
 242                                                    int oif,
 243                                                    int flags)
 244{
 245        struct rt6_info *local = NULL;
 246        struct rt6_info *sprt;
 247
 248        if (!oif && ipv6_addr_any(saddr))
 249                goto out;
 250
 251        for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
 252                struct net_device *dev = sprt->rt6i_dev;
 253
 254                if (oif) {
 255                        if (dev->ifindex == oif)
 256                                return sprt;
 257                        if (dev->flags & IFF_LOOPBACK) {
 258                                if (sprt->rt6i_idev == NULL ||
 259                                    sprt->rt6i_idev->dev->ifindex != oif) {
 260                                        if (flags & RT6_LOOKUP_F_IFACE && oif)
 261                                                continue;
 262                                        if (local && (!oif ||
 263                                                      local->rt6i_idev->dev->ifindex == oif))
 264                                                continue;
 265                                }
 266                                local = sprt;
 267                        }
 268                } else {
 269                        if (ipv6_chk_addr(net, saddr, dev,
 270                                          flags & RT6_LOOKUP_F_IFACE))
 271                                return sprt;
 272                }
 273        }
 274
 275        if (oif) {
 276                if (local)
 277                        return local;
 278
 279                if (flags & RT6_LOOKUP_F_IFACE)
 280                        return net->ipv6.ip6_null_entry;
 281        }
 282out:
 283        return rt;
 284}
 285
 286#ifdef CONFIG_IPV6_ROUTER_PREF
 287static void rt6_probe(struct rt6_info *rt)
 288{
 289        struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
 290        /*
 291         * Okay, this does not seem to be appropriate
 292         * for now, however, we need to check if it
 293         * is really so; aka Router Reachability Probing.
 294         *
 295         * Router Reachability Probe MUST be rate-limited
 296         * to no more than one per minute.
 297         */
 298        if (!neigh || (neigh->nud_state & NUD_VALID))
 299                return;
 300        read_lock_bh(&neigh->lock);
 301        if (!(neigh->nud_state & NUD_VALID) &&
 302            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 303                struct in6_addr mcaddr;
 304                struct in6_addr *target;
 305
 306                neigh->updated = jiffies;
 307                read_unlock_bh(&neigh->lock);
 308
 309                target = (struct in6_addr *)&neigh->primary_key;
 310                addrconf_addr_solict_mult(target, &mcaddr);
 311                ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 312        } else
 313                read_unlock_bh(&neigh->lock);
 314}
 315#else
 316static inline void rt6_probe(struct rt6_info *rt)
 317{
 318        return;
 319}
 320#endif
 321
 322/*
 323 * Default Router Selection (RFC 2461 6.3.6)
 324 */
 325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 326{
 327        struct net_device *dev = rt->rt6i_dev;
 328        if (!oif || dev->ifindex == oif)
 329                return 2;
 330        if ((dev->flags & IFF_LOOPBACK) &&
 331            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 332                return 1;
 333        return 0;
 334}
 335
 336static inline int rt6_check_neigh(struct rt6_info *rt)
 337{
 338        struct neighbour *neigh = rt->rt6i_nexthop;
 339        int m;
 340        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 341            !(rt->rt6i_flags & RTF_GATEWAY))
 342                m = 1;
 343        else if (neigh) {
 344                read_lock_bh(&neigh->lock);
 345                if (neigh->nud_state & NUD_VALID)
 346                        m = 2;
 347#ifdef CONFIG_IPV6_ROUTER_PREF
 348                else if (neigh->nud_state & NUD_FAILED)
 349                        m = 0;
 350#endif
 351                else
 352                        m = 1;
 353                read_unlock_bh(&neigh->lock);
 354        } else
 355                m = 0;
 356        return m;
 357}
 358
 359static int rt6_score_route(struct rt6_info *rt, int oif,
 360                           int strict)
 361{
 362        int m, n;
 363
 364        m = rt6_check_dev(rt, oif);
 365        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 366                return -1;
 367#ifdef CONFIG_IPV6_ROUTER_PREF
 368        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 369#endif
 370        n = rt6_check_neigh(rt);
 371        if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 372                return -1;
 373        return m;
 374}
 375
 376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 377                                   int *mpri, struct rt6_info *match)
 378{
 379        int m;
 380
 381        if (rt6_check_expired(rt))
 382                goto out;
 383
 384        m = rt6_score_route(rt, oif, strict);
 385        if (m < 0)
 386                goto out;
 387
 388        if (m > *mpri) {
 389                if (strict & RT6_LOOKUP_F_REACHABLE)
 390                        rt6_probe(match);
 391                *mpri = m;
 392                match = rt;
 393        } else if (strict & RT6_LOOKUP_F_REACHABLE) {
 394                rt6_probe(rt);
 395        }
 396
 397out:
 398        return match;
 399}
 400
 401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 402                                     struct rt6_info *rr_head,
 403                                     u32 metric, int oif, int strict)
 404{
 405        struct rt6_info *rt, *match;
 406        int mpri = -1;
 407
 408        match = NULL;
 409        for (rt = rr_head; rt && rt->rt6i_metric == metric;
 410             rt = rt->u.dst.rt6_next)
 411                match = find_match(rt, oif, strict, &mpri, match);
 412        for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 413             rt = rt->u.dst.rt6_next)
 414                match = find_match(rt, oif, strict, &mpri, match);
 415
 416        return match;
 417}
 418
 419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 420{
 421        struct rt6_info *match, *rt0;
 422        struct net *net;
 423
 424        RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 425                  __func__, fn->leaf, oif);
 426
 427        rt0 = fn->rr_ptr;
 428        if (!rt0)
 429                fn->rr_ptr = rt0 = fn->leaf;
 430
 431        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 432
 433        if (!match &&
 434            (strict & RT6_LOOKUP_F_REACHABLE)) {
 435                struct rt6_info *next = rt0->u.dst.rt6_next;
 436
 437                /* no entries matched; do round-robin */
 438                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 439                        next = fn->leaf;
 440
 441                if (next != rt0)
 442                        fn->rr_ptr = next;
 443        }
 444
 445        RT6_TRACE("%s() => %p\n",
 446                  __func__, match);
 447
 448        net = dev_net(rt0->rt6i_dev);
 449        return (match ? match : net->ipv6.ip6_null_entry);
 450}
 451
 452#ifdef CONFIG_IPV6_ROUTE_INFO
 453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 454                  struct in6_addr *gwaddr)
 455{
 456        struct net *net = dev_net(dev);
 457        struct route_info *rinfo = (struct route_info *) opt;
 458        struct in6_addr prefix_buf, *prefix;
 459        unsigned int pref;
 460        unsigned long lifetime;
 461        struct rt6_info *rt;
 462
 463        if (len < sizeof(struct route_info)) {
 464                return -EINVAL;
 465        }
 466
 467        /* Sanity check for prefix_len and length */
 468        if (rinfo->length > 3) {
 469                return -EINVAL;
 470        } else if (rinfo->prefix_len > 128) {
 471                return -EINVAL;
 472        } else if (rinfo->prefix_len > 64) {
 473                if (rinfo->length < 2) {
 474                        return -EINVAL;
 475                }
 476        } else if (rinfo->prefix_len > 0) {
 477                if (rinfo->length < 1) {
 478                        return -EINVAL;
 479                }
 480        }
 481
 482        pref = rinfo->route_pref;
 483        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 484                return -EINVAL;
 485
 486        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 487
 488        if (rinfo->length == 3)
 489                prefix = (struct in6_addr *)rinfo->prefix;
 490        else {
 491                /* this function is safe */
 492                ipv6_addr_prefix(&prefix_buf,
 493                                 (struct in6_addr *)rinfo->prefix,
 494                                 rinfo->prefix_len);
 495                prefix = &prefix_buf;
 496        }
 497
 498        rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 499                                dev->ifindex);
 500
 501        if (rt && !lifetime) {
 502                ip6_del_rt(rt);
 503                rt = NULL;
 504        }
 505
 506        if (!rt && lifetime)
 507                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 508                                        pref);
 509        else if (rt)
 510                rt->rt6i_flags = RTF_ROUTEINFO |
 511                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 512
 513        if (rt) {
 514                if (!addrconf_finite_timeout(lifetime)) {
 515                        rt->rt6i_flags &= ~RTF_EXPIRES;
 516                } else {
 517                        rt->rt6i_expires = jiffies + HZ * lifetime;
 518                        rt->rt6i_flags |= RTF_EXPIRES;
 519                }
 520                dst_release(&rt->u.dst);
 521        }
 522        return 0;
 523}
 524#endif
 525
 526#define BACKTRACK(__net, saddr)                 \
 527do { \
 528        if (rt == __net->ipv6.ip6_null_entry) { \
 529                struct fib6_node *pn; \
 530                while (1) { \
 531                        if (fn->fn_flags & RTN_TL_ROOT) \
 532                                goto out; \
 533                        pn = fn->parent; \
 534                        if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 535                                fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 536                        else \
 537                                fn = pn; \
 538                        if (fn->fn_flags & RTN_RTINFO) \
 539                                goto restart; \
 540                } \
 541        } \
 542} while(0)
 543
 544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 545                                             struct fib6_table *table,
 546                                             struct flowi *fl, int flags)
 547{
 548        struct fib6_node *fn;
 549        struct rt6_info *rt;
 550
 551        read_lock_bh(&table->tb6_lock);
 552        fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 553restart:
 554        rt = fn->leaf;
 555        rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
 556        BACKTRACK(net, &fl->fl6_src);
 557out:
 558        dst_use(&rt->u.dst, jiffies);
 559        read_unlock_bh(&table->tb6_lock);
 560        return rt;
 561
 562}
 563
 564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 565                            const struct in6_addr *saddr, int oif, int strict)
 566{
 567        struct flowi fl = {
 568                .oif = oif,
 569                .nl_u = {
 570                        .ip6_u = {
 571                                .daddr = *daddr,
 572                        },
 573                },
 574        };
 575        struct dst_entry *dst;
 576        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 577
 578        if (saddr) {
 579                memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
 580                flags |= RT6_LOOKUP_F_HAS_SADDR;
 581        }
 582
 583        dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
 584        if (dst->error == 0)
 585                return (struct rt6_info *) dst;
 586
 587        dst_release(dst);
 588
 589        return NULL;
 590}
 591
 592EXPORT_SYMBOL(rt6_lookup);
 593
 594/* ip6_ins_rt is called with FREE table->tb6_lock.
 595   It takes new route entry, the addition fails by any reason the
 596   route is freed. In any case, if caller does not hold it, it may
 597   be destroyed.
 598 */
 599
 600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 601{
 602        int err;
 603        struct fib6_table *table;
 604
 605        table = rt->rt6i_table;
 606        write_lock_bh(&table->tb6_lock);
 607        err = fib6_add(&table->tb6_root, rt, info);
 608        write_unlock_bh(&table->tb6_lock);
 609
 610        return err;
 611}
 612
 613int ip6_ins_rt(struct rt6_info *rt)
 614{
 615        struct nl_info info = {
 616                .nl_net = dev_net(rt->rt6i_dev),
 617        };
 618        return __ip6_ins_rt(rt, &info);
 619}
 620
 621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
 622                                      struct in6_addr *saddr)
 623{
 624        struct rt6_info *rt;
 625
 626        /*
 627         *      Clone the route.
 628         */
 629
 630        rt = ip6_rt_copy(ort);
 631
 632        if (rt) {
 633                struct neighbour *neigh;
 634                int attempts = !in_softirq();
 635
 636                if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 637                        if (rt->rt6i_dst.plen != 128 &&
 638                            ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
 639                                rt->rt6i_flags |= RTF_ANYCAST;
 640                        ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 641                }
 642
 643                ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 644                rt->rt6i_dst.plen = 128;
 645                rt->rt6i_flags |= RTF_CACHE;
 646                rt->u.dst.flags |= DST_HOST;
 647
 648#ifdef CONFIG_IPV6_SUBTREES
 649                if (rt->rt6i_src.plen && saddr) {
 650                        ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 651                        rt->rt6i_src.plen = 128;
 652                }
 653#endif
 654
 655        retry:
 656                neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 657                if (IS_ERR(neigh)) {
 658                        struct net *net = dev_net(rt->rt6i_dev);
 659                        int saved_rt_min_interval =
 660                                net->ipv6.sysctl.ip6_rt_gc_min_interval;
 661                        int saved_rt_elasticity =
 662                                net->ipv6.sysctl.ip6_rt_gc_elasticity;
 663
 664                        if (attempts-- > 0) {
 665                                net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 666                                net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 667
 668                                ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 669
 670                                net->ipv6.sysctl.ip6_rt_gc_elasticity =
 671                                        saved_rt_elasticity;
 672                                net->ipv6.sysctl.ip6_rt_gc_min_interval =
 673                                        saved_rt_min_interval;
 674                                goto retry;
 675                        }
 676
 677                        if (net_ratelimit())
 678                                printk(KERN_WARNING
 679                                       "Neighbour table overflow.\n");
 680                        dst_free(&rt->u.dst);
 681                        return NULL;
 682                }
 683                rt->rt6i_nexthop = neigh;
 684
 685        }
 686
 687        return rt;
 688}
 689
 690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
 691{
 692        struct rt6_info *rt = ip6_rt_copy(ort);
 693        if (rt) {
 694                ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 695                rt->rt6i_dst.plen = 128;
 696                rt->rt6i_flags |= RTF_CACHE;
 697                rt->u.dst.flags |= DST_HOST;
 698                rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
 699        }
 700        return rt;
 701}
 702
 703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 704                                      struct flowi *fl, int flags)
 705{
 706        struct fib6_node *fn;
 707        struct rt6_info *rt, *nrt;
 708        int strict = 0;
 709        int attempts = 3;
 710        int err;
 711        int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 712
 713        strict |= flags & RT6_LOOKUP_F_IFACE;
 714
 715relookup:
 716        read_lock_bh(&table->tb6_lock);
 717
 718restart_2:
 719        fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 720
 721restart:
 722        rt = rt6_select(fn, oif, strict | reachable);
 723
 724        BACKTRACK(net, &fl->fl6_src);
 725        if (rt == net->ipv6.ip6_null_entry ||
 726            rt->rt6i_flags & RTF_CACHE)
 727                goto out;
 728
 729        dst_hold(&rt->u.dst);
 730        read_unlock_bh(&table->tb6_lock);
 731
 732        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 733                nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
 734        else {
 735#if CLONE_OFFLINK_ROUTE
 736                nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
 737#else
 738                goto out2;
 739#endif
 740        }
 741
 742        dst_release(&rt->u.dst);
 743        rt = nrt ? : net->ipv6.ip6_null_entry;
 744
 745        dst_hold(&rt->u.dst);
 746        if (nrt) {
 747                err = ip6_ins_rt(nrt);
 748                if (!err)
 749                        goto out2;
 750        }
 751
 752        if (--attempts <= 0)
 753                goto out2;
 754
 755        /*
 756         * Race condition! In the gap, when table->tb6_lock was
 757         * released someone could insert this route.  Relookup.
 758         */
 759        dst_release(&rt->u.dst);
 760        goto relookup;
 761
 762out:
 763        if (reachable) {
 764                reachable = 0;
 765                goto restart_2;
 766        }
 767        dst_hold(&rt->u.dst);
 768        read_unlock_bh(&table->tb6_lock);
 769out2:
 770        rt->u.dst.lastuse = jiffies;
 771        rt->u.dst.__use++;
 772
 773        return rt;
 774}
 775
 776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 777                                            struct flowi *fl, int flags)
 778{
 779        return ip6_pol_route(net, table, fl->iif, fl, flags);
 780}
 781
 782void ip6_route_input(struct sk_buff *skb)
 783{
 784        struct ipv6hdr *iph = ipv6_hdr(skb);
 785        struct net *net = dev_net(skb->dev);
 786        int flags = RT6_LOOKUP_F_HAS_SADDR;
 787        struct flowi fl = {
 788                .iif = skb->dev->ifindex,
 789                .nl_u = {
 790                        .ip6_u = {
 791                                .daddr = iph->daddr,
 792                                .saddr = iph->saddr,
 793                                .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 794                        },
 795                },
 796                .mark = skb->mark,
 797                .proto = iph->nexthdr,
 798        };
 799
 800        if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 801                flags |= RT6_LOOKUP_F_IFACE;
 802
 803        skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
 804}
 805
 806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 807                                             struct flowi *fl, int flags)
 808{
 809        return ip6_pol_route(net, table, fl->oif, fl, flags);
 810}
 811
 812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 813                                    struct flowi *fl)
 814{
 815        int flags = 0;
 816
 817        if (rt6_need_strict(&fl->fl6_dst))
 818                flags |= RT6_LOOKUP_F_IFACE;
 819
 820        if (!ipv6_addr_any(&fl->fl6_src))
 821                flags |= RT6_LOOKUP_F_HAS_SADDR;
 822        else if (sk) {
 823                unsigned int prefs = inet6_sk(sk)->srcprefs;
 824                if (prefs & IPV6_PREFER_SRC_TMP)
 825                        flags |= RT6_LOOKUP_F_SRCPREF_TMP;
 826                if (prefs & IPV6_PREFER_SRC_PUBLIC)
 827                        flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
 828                if (prefs & IPV6_PREFER_SRC_COA)
 829                        flags |= RT6_LOOKUP_F_SRCPREF_COA;
 830        }
 831
 832        return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
 833}
 834
 835EXPORT_SYMBOL(ip6_route_output);
 836
 837int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
 838{
 839        struct rt6_info *ort = (struct rt6_info *) *dstp;
 840        struct rt6_info *rt = (struct rt6_info *)
 841                dst_alloc(&ip6_dst_blackhole_ops);
 842        struct dst_entry *new = NULL;
 843
 844        if (rt) {
 845                new = &rt->u.dst;
 846
 847                atomic_set(&new->__refcnt, 1);
 848                new->__use = 1;
 849                new->input = dst_discard;
 850                new->output = dst_discard;
 851
 852                memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
 853                new->dev = ort->u.dst.dev;
 854                if (new->dev)
 855                        dev_hold(new->dev);
 856                rt->rt6i_idev = ort->rt6i_idev;
 857                if (rt->rt6i_idev)
 858                        in6_dev_hold(rt->rt6i_idev);
 859                rt->rt6i_expires = 0;
 860
 861                ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 862                rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 863                rt->rt6i_metric = 0;
 864
 865                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 866#ifdef CONFIG_IPV6_SUBTREES
 867                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 868#endif
 869
 870                dst_free(new);
 871        }
 872
 873        dst_release(*dstp);
 874        *dstp = new;
 875        return (new ? 0 : -ENOMEM);
 876}
 877EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
 878
 879/*
 880 *      Destination cache support functions
 881 */
 882
 883static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 884{
 885        struct rt6_info *rt;
 886
 887        rt = (struct rt6_info *) dst;
 888
 889        if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
 890                return dst;
 891
 892        return NULL;
 893}
 894
 895static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 896{
 897        struct rt6_info *rt = (struct rt6_info *) dst;
 898
 899        if (rt) {
 900                if (rt->rt6i_flags & RTF_CACHE)
 901                        ip6_del_rt(rt);
 902                else
 903                        dst_release(dst);
 904        }
 905        return NULL;
 906}
 907
 908static void ip6_link_failure(struct sk_buff *skb)
 909{
 910        struct rt6_info *rt;
 911
 912        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
 913
 914        rt = (struct rt6_info *) skb_dst(skb);
 915        if (rt) {
 916                if (rt->rt6i_flags&RTF_CACHE) {
 917                        dst_set_expires(&rt->u.dst, 0);
 918                        rt->rt6i_flags |= RTF_EXPIRES;
 919                } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 920                        rt->rt6i_node->fn_sernum = -1;
 921        }
 922}
 923
 924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 925{
 926        struct rt6_info *rt6 = (struct rt6_info*)dst;
 927
 928        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
 929                rt6->rt6i_flags |= RTF_MODIFIED;
 930                if (mtu < IPV6_MIN_MTU) {
 931                        mtu = IPV6_MIN_MTU;
 932                        dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 933                }
 934                dst->metrics[RTAX_MTU-1] = mtu;
 935                call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 936        }
 937}
 938
 939static int ipv6_get_mtu(struct net_device *dev);
 940
 941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
 942{
 943        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 944
 945        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
 946                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
 947
 948        /*
 949         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
 950         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
 951         * IPV6_MAXPLEN is also valid and means: "any MSS,
 952         * rely only on pmtu discovery"
 953         */
 954        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
 955                mtu = IPV6_MAXPLEN;
 956        return mtu;
 957}
 958
 959static struct dst_entry *icmp6_dst_gc_list;
 960static DEFINE_SPINLOCK(icmp6_dst_lock);
 961
 962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 963                                  struct neighbour *neigh,
 964                                  const struct in6_addr *addr)
 965{
 966        struct rt6_info *rt;
 967        struct inet6_dev *idev = in6_dev_get(dev);
 968        struct net *net = dev_net(dev);
 969
 970        if (unlikely(idev == NULL))
 971                return NULL;
 972
 973        rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 974        if (unlikely(rt == NULL)) {
 975                in6_dev_put(idev);
 976                goto out;
 977        }
 978
 979        dev_hold(dev);
 980        if (neigh)
 981                neigh_hold(neigh);
 982        else {
 983                neigh = ndisc_get_neigh(dev, addr);
 984                if (IS_ERR(neigh))
 985                        neigh = NULL;
 986        }
 987
 988        rt->rt6i_dev      = dev;
 989        rt->rt6i_idev     = idev;
 990        rt->rt6i_nexthop  = neigh;
 991        atomic_set(&rt->u.dst.__refcnt, 1);
 992        rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
 993        rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
 994        rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
 995        rt->u.dst.output  = ip6_output;
 996
 997#if 0   /* there's no chance to use these for ndisc */
 998        rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
 999                                ? DST_HOST
1000                                : 0;
1001        ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002        rt->rt6i_dst.plen = 128;
1003#endif
1004
1005        spin_lock_bh(&icmp6_dst_lock);
1006        rt->u.dst.next = icmp6_dst_gc_list;
1007        icmp6_dst_gc_list = &rt->u.dst;
1008        spin_unlock_bh(&icmp6_dst_lock);
1009
1010        fib6_force_start_gc(net);
1011
1012out:
1013        return &rt->u.dst;
1014}
1015
1016int icmp6_dst_gc(void)
1017{
1018        struct dst_entry *dst, *next, **pprev;
1019        int more = 0;
1020
1021        next = NULL;
1022
1023        spin_lock_bh(&icmp6_dst_lock);
1024        pprev = &icmp6_dst_gc_list;
1025
1026        while ((dst = *pprev) != NULL) {
1027                if (!atomic_read(&dst->__refcnt)) {
1028                        *pprev = dst->next;
1029                        dst_free(dst);
1030                } else {
1031                        pprev = &dst->next;
1032                        ++more;
1033                }
1034        }
1035
1036        spin_unlock_bh(&icmp6_dst_lock);
1037
1038        return more;
1039}
1040
1041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042                            void *arg)
1043{
1044        struct dst_entry *dst, **pprev;
1045
1046        spin_lock_bh(&icmp6_dst_lock);
1047        pprev = &icmp6_dst_gc_list;
1048        while ((dst = *pprev) != NULL) {
1049                struct rt6_info *rt = (struct rt6_info *) dst;
1050                if (func(rt, arg)) {
1051                        *pprev = dst->next;
1052                        dst_free(dst);
1053                } else {
1054                        pprev = &dst->next;
1055                }
1056        }
1057        spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
1060static int ip6_dst_gc(struct dst_ops *ops)
1061{
1062        unsigned long now = jiffies;
1063        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1064        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1069
1070        if (time_after(rt_last_gc + rt_min_interval, now) &&
1071            atomic_read(&ops->entries) <= rt_max_size)
1072                goto out;
1073
1074        net->ipv6.ip6_rt_gc_expire++;
1075        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076        net->ipv6.ip6_rt_last_gc = now;
1077        if (atomic_read(&ops->entries) < ops->gc_thresh)
1078                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1079out:
1080        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081        return (atomic_read(&ops->entries) > rt_max_size);
1082}
1083
1084/* Clean host part of a prefix. Not necessary in radix tree,
1085   but results in cleaner routing tables.
1086
1087   Remove it only when all the things will work!
1088 */
1089
1090static int ipv6_get_mtu(struct net_device *dev)
1091{
1092        int mtu = IPV6_MIN_MTU;
1093        struct inet6_dev *idev;
1094
1095        idev = in6_dev_get(dev);
1096        if (idev) {
1097                mtu = idev->cnf.mtu6;
1098                in6_dev_put(idev);
1099        }
1100        return mtu;
1101}
1102
1103int ip6_dst_hoplimit(struct dst_entry *dst)
1104{
1105        int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106        if (hoplimit < 0) {
1107                struct net_device *dev = dst->dev;
1108                struct inet6_dev *idev = in6_dev_get(dev);
1109                if (idev) {
1110                        hoplimit = idev->cnf.hop_limit;
1111                        in6_dev_put(idev);
1112                } else
1113                        hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1114        }
1115        return hoplimit;
1116}
1117
1118/*
1119 *
1120 */
1121
1122int ip6_route_add(struct fib6_config *cfg)
1123{
1124        int err;
1125        struct net *net = cfg->fc_nlinfo.nl_net;
1126        struct rt6_info *rt = NULL;
1127        struct net_device *dev = NULL;
1128        struct inet6_dev *idev = NULL;
1129        struct fib6_table *table;
1130        int addr_type;
1131
1132        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1133                return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
1135        if (cfg->fc_src_len)
1136                return -EINVAL;
1137#endif
1138        if (cfg->fc_ifindex) {
1139                err = -ENODEV;
1140                dev = dev_get_by_index(net, cfg->fc_ifindex);
1141                if (!dev)
1142                        goto out;
1143                idev = in6_dev_get(dev);
1144                if (!idev)
1145                        goto out;
1146        }
1147
1148        if (cfg->fc_metric == 0)
1149                cfg->fc_metric = IP6_RT_PRIO_USER;
1150
1151        table = fib6_new_table(net, cfg->fc_table);
1152        if (table == NULL) {
1153                err = -ENOBUFS;
1154                goto out;
1155        }
1156
1157        rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1158
1159        if (rt == NULL) {
1160                err = -ENOMEM;
1161                goto out;
1162        }
1163
1164        rt->u.dst.obsolete = -1;
1165        rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166                                jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167                                0;
1168
1169        if (cfg->fc_protocol == RTPROT_UNSPEC)
1170                cfg->fc_protocol = RTPROT_BOOT;
1171        rt->rt6i_protocol = cfg->fc_protocol;
1172
1173        addr_type = ipv6_addr_type(&cfg->fc_dst);
1174
1175        if (addr_type & IPV6_ADDR_MULTICAST)
1176                rt->u.dst.input = ip6_mc_input;
1177        else
1178                rt->u.dst.input = ip6_forward;
1179
1180        rt->u.dst.output = ip6_output;
1181
1182        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183        rt->rt6i_dst.plen = cfg->fc_dst_len;
1184        if (rt->rt6i_dst.plen == 128)
1185               rt->u.dst.flags = DST_HOST;
1186
1187#ifdef CONFIG_IPV6_SUBTREES
1188        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189        rt->rt6i_src.plen = cfg->fc_src_len;
1190#endif
1191
1192        rt->rt6i_metric = cfg->fc_metric;
1193
1194        /* We cannot add true routes via loopback here,
1195           they would result in kernel looping; promote them to reject routes
1196         */
1197        if ((cfg->fc_flags & RTF_REJECT) ||
1198            (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199                /* hold loopback dev/idev if we haven't done so. */
1200                if (dev != net->loopback_dev) {
1201                        if (dev) {
1202                                dev_put(dev);
1203                                in6_dev_put(idev);
1204                        }
1205                        dev = net->loopback_dev;
1206                        dev_hold(dev);
1207                        idev = in6_dev_get(dev);
1208                        if (!idev) {
1209                                err = -ENODEV;
1210                                goto out;
1211                        }
1212                }
1213                rt->u.dst.output = ip6_pkt_discard_out;
1214                rt->u.dst.input = ip6_pkt_discard;
1215                rt->u.dst.error = -ENETUNREACH;
1216                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217                goto install_route;
1218        }
1219
1220        if (cfg->fc_flags & RTF_GATEWAY) {
1221                struct in6_addr *gw_addr;
1222                int gwa_type;
1223
1224                gw_addr = &cfg->fc_gateway;
1225                ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1226                gwa_type = ipv6_addr_type(gw_addr);
1227
1228                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229                        struct rt6_info *grt;
1230
1231                        /* IPv6 strictly inhibits using not link-local
1232                           addresses as nexthop address.
1233                           Otherwise, router will not able to send redirects.
1234                           It is very good, but in some (rare!) circumstances
1235                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1236                           some exceptions. --ANK
1237                         */
1238                        err = -EINVAL;
1239                        if (!(gwa_type&IPV6_ADDR_UNICAST))
1240                                goto out;
1241
1242                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1243
1244                        err = -EHOSTUNREACH;
1245                        if (grt == NULL)
1246                                goto out;
1247                        if (dev) {
1248                                if (dev != grt->rt6i_dev) {
1249                                        dst_release(&grt->u.dst);
1250                                        goto out;
1251                                }
1252                        } else {
1253                                dev = grt->rt6i_dev;
1254                                idev = grt->rt6i_idev;
1255                                dev_hold(dev);
1256                                in6_dev_hold(grt->rt6i_idev);
1257                        }
1258                        if (!(grt->rt6i_flags&RTF_GATEWAY))
1259                                err = 0;
1260                        dst_release(&grt->u.dst);
1261
1262                        if (err)
1263                                goto out;
1264                }
1265                err = -EINVAL;
1266                if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267                        goto out;
1268        }
1269
1270        err = -ENODEV;
1271        if (dev == NULL)
1272                goto out;
1273
1274        if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1275                rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276                if (IS_ERR(rt->rt6i_nexthop)) {
1277                        err = PTR_ERR(rt->rt6i_nexthop);
1278                        rt->rt6i_nexthop = NULL;
1279                        goto out;
1280                }
1281        }
1282
1283        rt->rt6i_flags = cfg->fc_flags;
1284
1285install_route:
1286        if (cfg->fc_mx) {
1287                struct nlattr *nla;
1288                int remaining;
1289
1290                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1291                        int type = nla_type(nla);
1292
1293                        if (type) {
1294                                if (type > RTAX_MAX) {
1295                                        err = -EINVAL;
1296                                        goto out;
1297                                }
1298
1299                                rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1300                        }
1301                }
1302        }
1303
1304        if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1305                rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1306        if (!dst_mtu(&rt->u.dst))
1307                rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1308        if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1309                rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1310        rt->u.dst.dev = dev;
1311        rt->rt6i_idev = idev;
1312        rt->rt6i_table = table;
1313
1314        cfg->fc_nlinfo.nl_net = dev_net(dev);
1315
1316        return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1317
1318out:
1319        if (dev)
1320                dev_put(dev);
1321        if (idev)
1322                in6_dev_put(idev);
1323        if (rt)
1324                dst_free(&rt->u.dst);
1325        return err;
1326}
1327
1328static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1329{
1330        int err;
1331        struct fib6_table *table;
1332        struct net *net = dev_net(rt->rt6i_dev);
1333
1334        if (rt == net->ipv6.ip6_null_entry)
1335                return -ENOENT;
1336
1337        table = rt->rt6i_table;
1338        write_lock_bh(&table->tb6_lock);
1339
1340        err = fib6_del(rt, info);
1341        dst_release(&rt->u.dst);
1342
1343        write_unlock_bh(&table->tb6_lock);
1344
1345        return err;
1346}
1347
1348int ip6_del_rt(struct rt6_info *rt)
1349{
1350        struct nl_info info = {
1351                .nl_net = dev_net(rt->rt6i_dev),
1352        };
1353        return __ip6_del_rt(rt, &info);
1354}
1355
1356static int ip6_route_del(struct fib6_config *cfg)
1357{
1358        struct fib6_table *table;
1359        struct fib6_node *fn;
1360        struct rt6_info *rt;
1361        int err = -ESRCH;
1362
1363        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1364        if (table == NULL)
1365                return err;
1366
1367        read_lock_bh(&table->tb6_lock);
1368
1369        fn = fib6_locate(&table->tb6_root,
1370                         &cfg->fc_dst, cfg->fc_dst_len,
1371                         &cfg->fc_src, cfg->fc_src_len);
1372
1373        if (fn) {
1374                for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1375                        if (cfg->fc_ifindex &&
1376                            (rt->rt6i_dev == NULL ||
1377                             rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1378                                continue;
1379                        if (cfg->fc_flags & RTF_GATEWAY &&
1380                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1381                                continue;
1382                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1383                                continue;
1384                        dst_hold(&rt->u.dst);
1385                        read_unlock_bh(&table->tb6_lock);
1386
1387                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1388                }
1389        }
1390        read_unlock_bh(&table->tb6_lock);
1391
1392        return err;
1393}
1394
1395/*
1396 *      Handle redirects
1397 */
1398struct ip6rd_flowi {
1399        struct flowi fl;
1400        struct in6_addr gateway;
1401};
1402
1403static struct rt6_info *__ip6_route_redirect(struct net *net,
1404                                             struct fib6_table *table,
1405                                             struct flowi *fl,
1406                                             int flags)
1407{
1408        struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409        struct rt6_info *rt;
1410        struct fib6_node *fn;
1411
1412        /*
1413         * Get the "current" route for this destination and
1414         * check if the redirect has come from approriate router.
1415         *
1416         * RFC 2461 specifies that redirects should only be
1417         * accepted if they come from the nexthop to the target.
1418         * Due to the way the routes are chosen, this notion
1419         * is a bit fuzzy and one might need to check all possible
1420         * routes.
1421         */
1422
1423        read_lock_bh(&table->tb6_lock);
1424        fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1425restart:
1426        for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1427                /*
1428                 * Current route is on-link; redirect is always invalid.
1429                 *
1430                 * Seems, previous statement is not true. It could
1431                 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432                 * But then router serving it might decide, that we should
1433                 * know truth 8)8) --ANK (980726).
1434                 */
1435                if (rt6_check_expired(rt))
1436                        continue;
1437                if (!(rt->rt6i_flags & RTF_GATEWAY))
1438                        continue;
1439                if (fl->oif != rt->rt6i_dev->ifindex)
1440                        continue;
1441                if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1442                        continue;
1443                break;
1444        }
1445
1446        if (!rt)
1447                rt = net->ipv6.ip6_null_entry;
1448        BACKTRACK(net, &fl->fl6_src);
1449out:
1450        dst_hold(&rt->u.dst);
1451
1452        read_unlock_bh(&table->tb6_lock);
1453
1454        return rt;
1455};
1456
1457static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458                                           struct in6_addr *src,
1459                                           struct in6_addr *gateway,
1460                                           struct net_device *dev)
1461{
1462        int flags = RT6_LOOKUP_F_HAS_SADDR;
1463        struct net *net = dev_net(dev);
1464        struct ip6rd_flowi rdfl = {
1465                .fl = {
1466                        .oif = dev->ifindex,
1467                        .nl_u = {
1468                                .ip6_u = {
1469                                        .daddr = *dest,
1470                                        .saddr = *src,
1471                                },
1472                        },
1473                },
1474                .gateway = *gateway,
1475        };
1476
1477        if (rt6_need_strict(dest))
1478                flags |= RT6_LOOKUP_F_IFACE;
1479
1480        return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1481                                                   flags, __ip6_route_redirect);
1482}
1483
1484void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485                  struct in6_addr *saddr,
1486                  struct neighbour *neigh, u8 *lladdr, int on_link)
1487{
1488        struct rt6_info *rt, *nrt = NULL;
1489        struct netevent_redirect netevent;
1490        struct net *net = dev_net(neigh->dev);
1491
1492        rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
1494        if (rt == net->ipv6.ip6_null_entry) {
1495                if (net_ratelimit())
1496                        printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497                               "for redirect target\n");
1498                goto out;
1499        }
1500
1501        /*
1502         *      We have finally decided to accept it.
1503         */
1504
1505        neigh_update(neigh, lladdr, NUD_STALE,
1506                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507                     NEIGH_UPDATE_F_OVERRIDE|
1508                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509                                     NEIGH_UPDATE_F_ISROUTER))
1510                     );
1511
1512        /*
1513         * Redirect received -> path was valid.
1514         * Look, redirects are sent only in response to data packets,
1515         * so that this nexthop apparently is reachable. --ANK
1516         */
1517        dst_confirm(&rt->u.dst);
1518
1519        /* Duplicate redirect: silently ignore. */
1520        if (neigh == rt->u.dst.neighbour)
1521                goto out;
1522
1523        nrt = ip6_rt_copy(rt);
1524        if (nrt == NULL)
1525                goto out;
1526
1527        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528        if (on_link)
1529                nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531        ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532        nrt->rt6i_dst.plen = 128;
1533        nrt->u.dst.flags |= DST_HOST;
1534
1535        ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536        nrt->rt6i_nexthop = neigh_clone(neigh);
1537        /* Reset pmtu, it may be better */
1538        nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539        nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540                                                        dst_mtu(&nrt->u.dst));
1541
1542        if (ip6_ins_rt(nrt))
1543                goto out;
1544
1545        netevent.old = &rt->u.dst;
1546        netevent.new = &nrt->u.dst;
1547        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
1549        if (rt->rt6i_flags&RTF_CACHE) {
1550                ip6_del_rt(rt);
1551                return;
1552        }
1553
1554out:
1555        dst_release(&rt->u.dst);
1556        return;
1557}
1558
1559/*
1560 *      Handle ICMP "packet too big" messages
1561 *      i.e. Path MTU discovery
1562 */
1563
1564void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1565                        struct net_device *dev, u32 pmtu)
1566{
1567        struct rt6_info *rt, *nrt;
1568        struct net *net = dev_net(dev);
1569        int allfrag = 0;
1570
1571        rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1572        if (rt == NULL)
1573                return;
1574
1575        if (pmtu >= dst_mtu(&rt->u.dst))
1576                goto out;
1577
1578        if (pmtu < IPV6_MIN_MTU) {
1579                /*
1580                 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1581                 * MTU (1280) and a fragment header should always be included
1582                 * after a node receiving Too Big message reporting PMTU is
1583                 * less than the IPv6 Minimum Link MTU.
1584                 */
1585                pmtu = IPV6_MIN_MTU;
1586                allfrag = 1;
1587        }
1588
1589        /* New mtu received -> path was valid.
1590           They are sent only in response to data packets,
1591           so that this nexthop apparently is reachable. --ANK
1592         */
1593        dst_confirm(&rt->u.dst);
1594
1595        /* Host route. If it is static, it would be better
1596           not to override it, but add new one, so that
1597           when cache entry will expire old pmtu
1598           would return automatically.
1599         */
1600        if (rt->rt6i_flags & RTF_CACHE) {
1601                rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1602                if (allfrag)
1603                        rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1604                dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1605                rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606                goto out;
1607        }
1608
1609        /* Network route.
1610           Two cases are possible:
1611           1. It is connected route. Action: COW
1612           2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613         */
1614        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1615                nrt = rt6_alloc_cow(rt, daddr, saddr);
1616        else
1617                nrt = rt6_alloc_clone(rt, daddr);
1618
1619        if (nrt) {
1620                nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1621                if (allfrag)
1622                        nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1623
1624                /* According to RFC 1981, detecting PMTU increase shouldn't be
1625                 * happened within 5 mins, the recommended timer is 10 mins.
1626                 * Here this route expiration time is set to ip6_rt_mtu_expires
1627                 * which is 10 mins. After 10 mins the decreased pmtu is expired
1628                 * and detecting PMTU increase will be automatically happened.
1629                 */
1630                dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1631                nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1632
1633                ip6_ins_rt(nrt);
1634        }
1635out:
1636        dst_release(&rt->u.dst);
1637}
1638
1639/*
1640 *      Misc support functions
1641 */
1642
1643static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1644{
1645        struct net *net = dev_net(ort->rt6i_dev);
1646        struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1647
1648        if (rt) {
1649                rt->u.dst.input = ort->u.dst.input;
1650                rt->u.dst.output = ort->u.dst.output;
1651
1652                memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1653                rt->u.dst.error = ort->u.dst.error;
1654                rt->u.dst.dev = ort->u.dst.dev;
1655                if (rt->u.dst.dev)
1656                        dev_hold(rt->u.dst.dev);
1657                rt->rt6i_idev = ort->rt6i_idev;
1658                if (rt->rt6i_idev)
1659                        in6_dev_hold(rt->rt6i_idev);
1660                rt->u.dst.lastuse = jiffies;
1661                rt->rt6i_expires = 0;
1662
1663                ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1664                rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1665                rt->rt6i_metric = 0;
1666
1667                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1668#ifdef CONFIG_IPV6_SUBTREES
1669                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1670#endif
1671                rt->rt6i_table = ort->rt6i_table;
1672        }
1673        return rt;
1674}
1675
1676#ifdef CONFIG_IPV6_ROUTE_INFO
1677static struct rt6_info *rt6_get_route_info(struct net *net,
1678                                           struct in6_addr *prefix, int prefixlen,
1679                                           struct in6_addr *gwaddr, int ifindex)
1680{
1681        struct fib6_node *fn;
1682        struct rt6_info *rt = NULL;
1683        struct fib6_table *table;
1684
1685        table = fib6_get_table(net, RT6_TABLE_INFO);
1686        if (table == NULL)
1687                return NULL;
1688
1689        write_lock_bh(&table->tb6_lock);
1690        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1691        if (!fn)
1692                goto out;
1693
1694        for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1695                if (rt->rt6i_dev->ifindex != ifindex)
1696                        continue;
1697                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1698                        continue;
1699                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1700                        continue;
1701                dst_hold(&rt->u.dst);
1702                break;
1703        }
1704out:
1705        write_unlock_bh(&table->tb6_lock);
1706        return rt;
1707}
1708
1709static struct rt6_info *rt6_add_route_info(struct net *net,
1710                                           struct in6_addr *prefix, int prefixlen,
1711                                           struct in6_addr *gwaddr, int ifindex,
1712                                           unsigned pref)
1713{
1714        struct fib6_config cfg = {
1715                .fc_table       = RT6_TABLE_INFO,
1716                .fc_metric      = IP6_RT_PRIO_USER,
1717                .fc_ifindex     = ifindex,
1718                .fc_dst_len     = prefixlen,
1719                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1720                                  RTF_UP | RTF_PREF(pref),
1721                .fc_nlinfo.pid = 0,
1722                .fc_nlinfo.nlh = NULL,
1723                .fc_nlinfo.nl_net = net,
1724        };
1725
1726        ipv6_addr_copy(&cfg.fc_dst, prefix);
1727        ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1728
1729        /* We should treat it as a default route if prefix length is 0. */
1730        if (!prefixlen)
1731                cfg.fc_flags |= RTF_DEFAULT;
1732
1733        ip6_route_add(&cfg);
1734
1735        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1736}
1737#endif
1738
1739struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1740{
1741        struct rt6_info *rt;
1742        struct fib6_table *table;
1743
1744        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1745        if (table == NULL)
1746                return NULL;
1747
1748        write_lock_bh(&table->tb6_lock);
1749        for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1750                if (dev == rt->rt6i_dev &&
1751                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1752                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1753                        break;
1754        }
1755        if (rt)
1756                dst_hold(&rt->u.dst);
1757        write_unlock_bh(&table->tb6_lock);
1758        return rt;
1759}
1760
1761struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1762                                     struct net_device *dev,
1763                                     unsigned int pref)
1764{
1765        struct fib6_config cfg = {
1766                .fc_table       = RT6_TABLE_DFLT,
1767                .fc_metric      = IP6_RT_PRIO_USER,
1768                .fc_ifindex     = dev->ifindex,
1769                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1770                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1771                .fc_nlinfo.pid = 0,
1772                .fc_nlinfo.nlh = NULL,
1773                .fc_nlinfo.nl_net = dev_net(dev),
1774        };
1775
1776        ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1777
1778        ip6_route_add(&cfg);
1779
1780        return rt6_get_dflt_router(gwaddr, dev);
1781}
1782
1783void rt6_purge_dflt_routers(struct net *net)
1784{
1785        struct rt6_info *rt;
1786        struct fib6_table *table;
1787
1788        /* NOTE: Keep consistent with rt6_get_dflt_router */
1789        table = fib6_get_table(net, RT6_TABLE_DFLT);
1790        if (table == NULL)
1791                return;
1792
1793restart:
1794        read_lock_bh(&table->tb6_lock);
1795        for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1796                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1797                        dst_hold(&rt->u.dst);
1798                        read_unlock_bh(&table->tb6_lock);
1799                        ip6_del_rt(rt);
1800                        goto restart;
1801                }
1802        }
1803        read_unlock_bh(&table->tb6_lock);
1804}
1805
1806static void rtmsg_to_fib6_config(struct net *net,
1807                                 struct in6_rtmsg *rtmsg,
1808                                 struct fib6_config *cfg)
1809{
1810        memset(cfg, 0, sizeof(*cfg));
1811
1812        cfg->fc_table = RT6_TABLE_MAIN;
1813        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1814        cfg->fc_metric = rtmsg->rtmsg_metric;
1815        cfg->fc_expires = rtmsg->rtmsg_info;
1816        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1817        cfg->fc_src_len = rtmsg->rtmsg_src_len;
1818        cfg->fc_flags = rtmsg->rtmsg_flags;
1819
1820        cfg->fc_nlinfo.nl_net = net;
1821
1822        ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1823        ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1824        ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1825}
1826
1827int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1828{
1829        struct fib6_config cfg;
1830        struct in6_rtmsg rtmsg;
1831        int err;
1832
1833        switch(cmd) {
1834        case SIOCADDRT:         /* Add a route */
1835        case SIOCDELRT:         /* Delete a route */
1836                if (!capable(CAP_NET_ADMIN))
1837                        return -EPERM;
1838                err = copy_from_user(&rtmsg, arg,
1839                                     sizeof(struct in6_rtmsg));
1840                if (err)
1841                        return -EFAULT;
1842
1843                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1844
1845                rtnl_lock();
1846                switch (cmd) {
1847                case SIOCADDRT:
1848                        err = ip6_route_add(&cfg);
1849                        break;
1850                case SIOCDELRT:
1851                        err = ip6_route_del(&cfg);
1852                        break;
1853                default:
1854                        err = -EINVAL;
1855                }
1856                rtnl_unlock();
1857
1858                return err;
1859        }
1860
1861        return -EINVAL;
1862}
1863
1864/*
1865 *      Drop the packet on the floor
1866 */
1867
1868static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1869{
1870        int type;
1871        struct dst_entry *dst = skb_dst(skb);
1872        switch (ipstats_mib_noroutes) {
1873        case IPSTATS_MIB_INNOROUTES:
1874                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1875                if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1876                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1877                                      IPSTATS_MIB_INADDRERRORS);
1878                        break;
1879                }
1880                /* FALLTHROUGH */
1881        case IPSTATS_MIB_OUTNOROUTES:
1882                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1883                              ipstats_mib_noroutes);
1884                break;
1885        }
1886        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1887        kfree_skb(skb);
1888        return 0;
1889}
1890
1891static int ip6_pkt_discard(struct sk_buff *skb)
1892{
1893        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1894}
1895
1896static int ip6_pkt_discard_out(struct sk_buff *skb)
1897{
1898        skb->dev = skb_dst(skb)->dev;
1899        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1900}
1901
1902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1903
1904static int ip6_pkt_prohibit(struct sk_buff *skb)
1905{
1906        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1907}
1908
1909static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1910{
1911        skb->dev = skb_dst(skb)->dev;
1912        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1913}
1914
1915#endif
1916
1917/*
1918 *      Allocate a dst for local (unicast / anycast) address.
1919 */
1920
1921struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1922                                    const struct in6_addr *addr,
1923                                    int anycast)
1924{
1925        struct net *net = dev_net(idev->dev);
1926        struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1927        struct neighbour *neigh;
1928
1929        if (rt == NULL)
1930                return ERR_PTR(-ENOMEM);
1931
1932        dev_hold(net->loopback_dev);
1933        in6_dev_hold(idev);
1934
1935        rt->u.dst.flags = DST_HOST;
1936        rt->u.dst.input = ip6_input;
1937        rt->u.dst.output = ip6_output;
1938        rt->rt6i_dev = net->loopback_dev;
1939        rt->rt6i_idev = idev;
1940        rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1941        rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1942        rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1943        rt->u.dst.obsolete = -1;
1944
1945        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1946        if (anycast)
1947                rt->rt6i_flags |= RTF_ANYCAST;
1948        else
1949                rt->rt6i_flags |= RTF_LOCAL;
1950        neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1951        if (IS_ERR(neigh)) {
1952                dst_free(&rt->u.dst);
1953
1954                /* We are casting this because that is the return
1955                 * value type.  But an errno encoded pointer is the
1956                 * same regardless of the underlying pointer type,
1957                 * and that's what we are returning.  So this is OK.
1958                 */
1959                return (struct rt6_info *) neigh;
1960        }
1961        rt->rt6i_nexthop = neigh;
1962
1963        ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1964        rt->rt6i_dst.plen = 128;
1965        rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1966
1967        atomic_set(&rt->u.dst.__refcnt, 1);
1968
1969        return rt;
1970}
1971
1972struct arg_dev_net {
1973        struct net_device *dev;
1974        struct net *net;
1975};
1976
1977static int fib6_ifdown(struct rt6_info *rt, void *arg)
1978{
1979        struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1980        struct net *net = ((struct arg_dev_net *)arg)->net;
1981
1982        if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1983            rt != net->ipv6.ip6_null_entry) {
1984                RT6_TRACE("deleted by ifdown %p\n", rt);
1985                return -1;
1986        }
1987        return 0;
1988}
1989
1990void rt6_ifdown(struct net *net, struct net_device *dev)
1991{
1992        struct arg_dev_net adn = {
1993                .dev = dev,
1994                .net = net,
1995        };
1996
1997        fib6_clean_all(net, fib6_ifdown, 0, &adn);
1998        icmp6_clean_all(fib6_ifdown, &adn);
1999}
2000
2001struct rt6_mtu_change_arg
2002{
2003        struct net_device *dev;
2004        unsigned mtu;
2005};
2006
2007static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2008{
2009        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2010        struct inet6_dev *idev;
2011        struct net *net = dev_net(arg->dev);
2012
2013        /* In IPv6 pmtu discovery is not optional,
2014           so that RTAX_MTU lock cannot disable it.
2015           We still use this lock to block changes
2016           caused by addrconf/ndisc.
2017        */
2018
2019        idev = __in6_dev_get(arg->dev);
2020        if (idev == NULL)
2021                return 0;
2022
2023        /* For administrative MTU increase, there is no way to discover
2024           IPv6 PMTU increase, so PMTU increase should be updated here.
2025           Since RFC 1981 doesn't include administrative MTU increase
2026           update PMTU increase is a MUST. (i.e. jumbo frame)
2027         */
2028        /*
2029           If new MTU is less than route PMTU, this new MTU will be the
2030           lowest MTU in the path, update the route PMTU to reflect PMTU
2031           decreases; if new MTU is greater than route PMTU, and the
2032           old MTU is the lowest MTU in the path, update the route PMTU
2033           to reflect the increase. In this case if the other nodes' MTU
2034           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2035           PMTU discouvery.
2036         */
2037        if (rt->rt6i_dev == arg->dev &&
2038            !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
2039            (dst_mtu(&rt->u.dst) >= arg->mtu ||
2040             (dst_mtu(&rt->u.dst) < arg->mtu &&
2041              dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
2042                rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
2043                rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2044        }
2045        return 0;
2046}
2047
2048void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2049{
2050        struct rt6_mtu_change_arg arg = {
2051                .dev = dev,
2052                .mtu = mtu,
2053        };
2054
2055        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2056}
2057
2058static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2059        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2060        [RTA_OIF]               = { .type = NLA_U32 },
2061        [RTA_IIF]               = { .type = NLA_U32 },
2062        [RTA_PRIORITY]          = { .type = NLA_U32 },
2063        [RTA_METRICS]           = { .type = NLA_NESTED },
2064};
2065
2066static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2067                              struct fib6_config *cfg)
2068{
2069        struct rtmsg *rtm;
2070        struct nlattr *tb[RTA_MAX+1];
2071        int err;
2072
2073        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2074        if (err < 0)
2075                goto errout;
2076
2077        err = -EINVAL;
2078        rtm = nlmsg_data(nlh);
2079        memset(cfg, 0, sizeof(*cfg));
2080
2081        cfg->fc_table = rtm->rtm_table;
2082        cfg->fc_dst_len = rtm->rtm_dst_len;
2083        cfg->fc_src_len = rtm->rtm_src_len;
2084        cfg->fc_flags = RTF_UP;
2085        cfg->fc_protocol = rtm->rtm_protocol;
2086
2087        if (rtm->rtm_type == RTN_UNREACHABLE)
2088                cfg->fc_flags |= RTF_REJECT;
2089
2090        cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2091        cfg->fc_nlinfo.nlh = nlh;
2092        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2093
2094        if (tb[RTA_GATEWAY]) {
2095                nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2096                cfg->fc_flags |= RTF_GATEWAY;
2097        }
2098
2099        if (tb[RTA_DST]) {
2100                int plen = (rtm->rtm_dst_len + 7) >> 3;
2101
2102                if (nla_len(tb[RTA_DST]) < plen)
2103                        goto errout;
2104
2105                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2106        }
2107
2108        if (tb[RTA_SRC]) {
2109                int plen = (rtm->rtm_src_len + 7) >> 3;
2110
2111                if (nla_len(tb[RTA_SRC]) < plen)
2112                        goto errout;
2113
2114                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2115        }
2116
2117        if (tb[RTA_OIF])
2118                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2119
2120        if (tb[RTA_PRIORITY])
2121                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2122
2123        if (tb[RTA_METRICS]) {
2124                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2125                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2126        }
2127
2128        if (tb[RTA_TABLE])
2129                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2130
2131        err = 0;
2132errout:
2133        return err;
2134}
2135
2136static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2137{
2138        struct fib6_config cfg;
2139        int err;
2140
2141        err = rtm_to_fib6_config(skb, nlh, &cfg);
2142        if (err < 0)
2143                return err;
2144
2145        return ip6_route_del(&cfg);
2146}
2147
2148static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2149{
2150        struct fib6_config cfg;
2151        int err;
2152
2153        err = rtm_to_fib6_config(skb, nlh, &cfg);
2154        if (err < 0)
2155                return err;
2156
2157        return ip6_route_add(&cfg);
2158}
2159
2160static inline size_t rt6_nlmsg_size(void)
2161{
2162        return NLMSG_ALIGN(sizeof(struct rtmsg))
2163               + nla_total_size(16) /* RTA_SRC */
2164               + nla_total_size(16) /* RTA_DST */
2165               + nla_total_size(16) /* RTA_GATEWAY */
2166               + nla_total_size(16) /* RTA_PREFSRC */
2167               + nla_total_size(4) /* RTA_TABLE */
2168               + nla_total_size(4) /* RTA_IIF */
2169               + nla_total_size(4) /* RTA_OIF */
2170               + nla_total_size(4) /* RTA_PRIORITY */
2171               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2172               + nla_total_size(sizeof(struct rta_cacheinfo));
2173}
2174
2175static int rt6_fill_node(struct net *net,
2176                         struct sk_buff *skb, struct rt6_info *rt,
2177                         struct in6_addr *dst, struct in6_addr *src,
2178                         int iif, int type, u32 pid, u32 seq,
2179                         int prefix, int nowait, unsigned int flags)
2180{
2181        struct rtmsg *rtm;
2182        struct nlmsghdr *nlh;
2183        long expires;
2184        u32 table;
2185
2186        if (prefix) {   /* user wants prefix routes only */
2187                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2188                        /* success since this is not a prefix route */
2189                        return 1;
2190                }
2191        }
2192
2193        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2194        if (nlh == NULL)
2195                return -EMSGSIZE;
2196
2197        rtm = nlmsg_data(nlh);
2198        rtm->rtm_family = AF_INET6;
2199        rtm->rtm_dst_len = rt->rt6i_dst.plen;
2200        rtm->rtm_src_len = rt->rt6i_src.plen;
2201        rtm->rtm_tos = 0;
2202        if (rt->rt6i_table)
2203                table = rt->rt6i_table->tb6_id;
2204        else
2205                table = RT6_TABLE_UNSPEC;
2206        rtm->rtm_table = table;
2207        NLA_PUT_U32(skb, RTA_TABLE, table);
2208        if (rt->rt6i_flags&RTF_REJECT)
2209                rtm->rtm_type = RTN_UNREACHABLE;
2210        else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2211                rtm->rtm_type = RTN_LOCAL;
2212        else
2213                rtm->rtm_type = RTN_UNICAST;
2214        rtm->rtm_flags = 0;
2215        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2216        rtm->rtm_protocol = rt->rt6i_protocol;
2217        if (rt->rt6i_flags&RTF_DYNAMIC)
2218                rtm->rtm_protocol = RTPROT_REDIRECT;
2219        else if (rt->rt6i_flags & RTF_ADDRCONF)
2220                rtm->rtm_protocol = RTPROT_KERNEL;
2221        else if (rt->rt6i_flags&RTF_DEFAULT)
2222                rtm->rtm_protocol = RTPROT_RA;
2223
2224        if (rt->rt6i_flags&RTF_CACHE)
2225                rtm->rtm_flags |= RTM_F_CLONED;
2226
2227        if (dst) {
2228                NLA_PUT(skb, RTA_DST, 16, dst);
2229                rtm->rtm_dst_len = 128;
2230        } else if (rtm->rtm_dst_len)
2231                NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2232#ifdef CONFIG_IPV6_SUBTREES
2233        if (src) {
2234                NLA_PUT(skb, RTA_SRC, 16, src);
2235                rtm->rtm_src_len = 128;
2236        } else if (rtm->rtm_src_len)
2237                NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2238#endif
2239        if (iif) {
2240#ifdef CONFIG_IPV6_MROUTE
2241                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2242                        int err = ip6mr_get_route(net, skb, rtm, nowait);
2243                        if (err <= 0) {
2244                                if (!nowait) {
2245                                        if (err == 0)
2246                                                return 0;
2247                                        goto nla_put_failure;
2248                                } else {
2249                                        if (err == -EMSGSIZE)
2250                                                goto nla_put_failure;
2251                                }
2252                        }
2253                } else
2254#endif
2255                        NLA_PUT_U32(skb, RTA_IIF, iif);
2256        } else if (dst) {
2257                struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
2258                struct in6_addr saddr_buf;
2259                if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2260                                       dst, 0, &saddr_buf) == 0)
2261                        NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2262        }
2263
2264        if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2265                goto nla_put_failure;
2266
2267        if (rt->u.dst.neighbour)
2268                NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2269
2270        if (rt->u.dst.dev)
2271                NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2272
2273        NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2274
2275        if (!(rt->rt6i_flags & RTF_EXPIRES))
2276                expires = 0;
2277        else if (rt->rt6i_expires - jiffies < INT_MAX)
2278                expires = rt->rt6i_expires - jiffies;
2279        else
2280                expires = INT_MAX;
2281
2282        if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2283                               expires, rt->u.dst.error) < 0)
2284                goto nla_put_failure;
2285
2286        return nlmsg_end(skb, nlh);
2287
2288nla_put_failure:
2289        nlmsg_cancel(skb, nlh);
2290        return -EMSGSIZE;
2291}
2292
2293int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2294{
2295        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2296        int prefix;
2297
2298        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2299                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2300                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2301        } else
2302                prefix = 0;
2303
2304        return rt6_fill_node(arg->net,
2305                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2306                     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2307                     prefix, 0, NLM_F_MULTI);
2308}
2309
2310static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2311{
2312        struct net *net = sock_net(in_skb->sk);
2313        struct nlattr *tb[RTA_MAX+1];
2314        struct rt6_info *rt;
2315        struct sk_buff *skb;
2316        struct rtmsg *rtm;
2317        struct flowi fl;
2318        int err, iif = 0;
2319
2320        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2321        if (err < 0)
2322                goto errout;
2323
2324        err = -EINVAL;
2325        memset(&fl, 0, sizeof(fl));
2326
2327        if (tb[RTA_SRC]) {
2328                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2329                        goto errout;
2330
2331                ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2332        }
2333
2334        if (tb[RTA_DST]) {
2335                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2336                        goto errout;
2337
2338                ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2339        }
2340
2341        if (tb[RTA_IIF])
2342                iif = nla_get_u32(tb[RTA_IIF]);
2343
2344        if (tb[RTA_OIF])
2345                fl.oif = nla_get_u32(tb[RTA_OIF]);
2346
2347        if (iif) {
2348                struct net_device *dev;
2349                dev = __dev_get_by_index(net, iif);
2350                if (!dev) {
2351                        err = -ENODEV;
2352                        goto errout;
2353                }
2354        }
2355
2356        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2357        if (skb == NULL) {
2358                err = -ENOBUFS;
2359                goto errout;
2360        }
2361
2362        /* Reserve room for dummy headers, this skb can pass
2363           through good chunk of routing engine.
2364         */
2365        skb_reset_mac_header(skb);
2366        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2367
2368        rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2369        skb_dst_set(skb, &rt->u.dst);
2370
2371        err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2372                            RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2373                            nlh->nlmsg_seq, 0, 0, 0);
2374        if (err < 0) {
2375                kfree_skb(skb);
2376                goto errout;
2377        }
2378
2379        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2380errout:
2381        return err;
2382}
2383
2384void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2385{
2386        struct sk_buff *skb;
2387        struct net *net = info->nl_net;
2388        u32 seq;
2389        int err;
2390
2391        err = -ENOBUFS;
2392        seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2393
2394        skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2395        if (skb == NULL)
2396                goto errout;
2397
2398        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2399                                event, info->pid, seq, 0, 0, 0);
2400        if (err < 0) {
2401                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2402                WARN_ON(err == -EMSGSIZE);
2403                kfree_skb(skb);
2404                goto errout;
2405        }
2406        rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2407                    info->nlh, gfp_any());
2408        return;
2409errout:
2410        if (err < 0)
2411                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2412}
2413
2414static int ip6_route_dev_notify(struct notifier_block *this,
2415                                unsigned long event, void *data)
2416{
2417        struct net_device *dev = (struct net_device *)data;
2418        struct net *net = dev_net(dev);
2419
2420        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2421                net->ipv6.ip6_null_entry->u.dst.dev = dev;
2422                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2423#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2424                net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2425                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2426                net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2427                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2428#endif
2429        }
2430
2431        return NOTIFY_OK;
2432}
2433
2434/*
2435 *      /proc
2436 */
2437
2438#ifdef CONFIG_PROC_FS
2439
2440#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2441
2442struct rt6_proc_arg
2443{
2444        char *buffer;
2445        int offset;
2446        int length;
2447        int skip;
2448        int len;
2449};
2450
2451static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2452{
2453        struct seq_file *m = p_arg;
2454
2455        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2456
2457#ifdef CONFIG_IPV6_SUBTREES
2458        seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2459#else
2460        seq_puts(m, "00000000000000000000000000000000 00 ");
2461#endif
2462
2463        if (rt->rt6i_nexthop) {
2464                seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2465        } else {
2466                seq_puts(m, "00000000000000000000000000000000");
2467        }
2468        seq_printf(m, " %08x %08x %08x %08x %8s\n",
2469                   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2470                   rt->u.dst.__use, rt->rt6i_flags,
2471                   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2472        return 0;
2473}
2474
2475static int ipv6_route_show(struct seq_file *m, void *v)
2476{
2477        struct net *net = (struct net *)m->private;
2478        fib6_clean_all(net, rt6_info_route, 0, m);
2479        return 0;
2480}
2481
2482static int ipv6_route_open(struct inode *inode, struct file *file)
2483{
2484        return single_open_net(inode, file, ipv6_route_show);
2485}
2486
2487static const struct file_operations ipv6_route_proc_fops = {
2488        .owner          = THIS_MODULE,
2489        .open           = ipv6_route_open,
2490        .read           = seq_read,
2491        .llseek         = seq_lseek,
2492        .release        = single_release_net,
2493};
2494
2495static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2496{
2497        struct net *net = (struct net *)seq->private;
2498        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2499                   net->ipv6.rt6_stats->fib_nodes,
2500                   net->ipv6.rt6_stats->fib_route_nodes,
2501                   net->ipv6.rt6_stats->fib_rt_alloc,
2502                   net->ipv6.rt6_stats->fib_rt_entries,
2503                   net->ipv6.rt6_stats->fib_rt_cache,
2504                   atomic_read(&net->ipv6.ip6_dst_ops.entries),
2505                   net->ipv6.rt6_stats->fib_discarded_routes);
2506
2507        return 0;
2508}
2509
2510static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2511{
2512        return single_open_net(inode, file, rt6_stats_seq_show);
2513}
2514
2515static const struct file_operations rt6_stats_seq_fops = {
2516        .owner   = THIS_MODULE,
2517        .open    = rt6_stats_seq_open,
2518        .read    = seq_read,
2519        .llseek  = seq_lseek,
2520        .release = single_release_net,
2521};
2522#endif  /* CONFIG_PROC_FS */
2523
2524#ifdef CONFIG_SYSCTL
2525
2526static
2527int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2528                              void __user *buffer, size_t *lenp, loff_t *ppos)
2529{
2530        struct net *net = current->nsproxy->net_ns;
2531        int delay = net->ipv6.sysctl.flush_delay;
2532        if (write) {
2533                proc_dointvec(ctl, write, buffer, lenp, ppos);
2534                fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2535                return 0;
2536        } else
2537                return -EINVAL;
2538}
2539
2540ctl_table ipv6_route_table_template[] = {
2541        {
2542                .procname       =       "flush",
2543                .data           =       &init_net.ipv6.sysctl.flush_delay,
2544                .maxlen         =       sizeof(int),
2545                .mode           =       0200,
2546                .proc_handler   =       ipv6_sysctl_rtcache_flush
2547        },
2548        {
2549                .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2550                .procname       =       "gc_thresh",
2551                .data           =       &ip6_dst_ops_template.gc_thresh,
2552                .maxlen         =       sizeof(int),
2553                .mode           =       0644,
2554                .proc_handler   =       proc_dointvec,
2555        },
2556        {
2557                .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2558                .procname       =       "max_size",
2559                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2560                .maxlen         =       sizeof(int),
2561                .mode           =       0644,
2562                .proc_handler   =       proc_dointvec,
2563        },
2564        {
2565                .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2566                .procname       =       "gc_min_interval",
2567                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2568                .maxlen         =       sizeof(int),
2569                .mode           =       0644,
2570                .proc_handler   =       proc_dointvec_jiffies,
2571                .strategy       =       sysctl_jiffies,
2572        },
2573        {
2574                .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2575                .procname       =       "gc_timeout",
2576                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2577                .maxlen         =       sizeof(int),
2578                .mode           =       0644,
2579                .proc_handler   =       proc_dointvec_jiffies,
2580                .strategy       =       sysctl_jiffies,
2581        },
2582        {
2583                .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2584                .procname       =       "gc_interval",
2585                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2586                .maxlen         =       sizeof(int),
2587                .mode           =       0644,
2588                .proc_handler   =       proc_dointvec_jiffies,
2589                .strategy       =       sysctl_jiffies,
2590        },
2591        {
2592                .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2593                .procname       =       "gc_elasticity",
2594                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2595                .maxlen         =       sizeof(int),
2596                .mode           =       0644,
2597                .proc_handler   =       proc_dointvec_jiffies,
2598                .strategy       =       sysctl_jiffies,
2599        },
2600        {
2601                .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2602                .procname       =       "mtu_expires",
2603                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2604                .maxlen         =       sizeof(int),
2605                .mode           =       0644,
2606                .proc_handler   =       proc_dointvec_jiffies,
2607                .strategy       =       sysctl_jiffies,
2608        },
2609        {
2610                .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2611                .procname       =       "min_adv_mss",
2612                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2613                .maxlen         =       sizeof(int),
2614                .mode           =       0644,
2615                .proc_handler   =       proc_dointvec_jiffies,
2616                .strategy       =       sysctl_jiffies,
2617        },
2618        {
2619                .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2620                .procname       =       "gc_min_interval_ms",
2621                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2622                .maxlen         =       sizeof(int),
2623                .mode           =       0644,
2624                .proc_handler   =       proc_dointvec_ms_jiffies,
2625                .strategy       =       sysctl_ms_jiffies,
2626        },
2627        { .ctl_name = 0 }
2628};
2629
2630struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2631{
2632        struct ctl_table *table;
2633
2634        table = kmemdup(ipv6_route_table_template,
2635                        sizeof(ipv6_route_table_template),
2636                        GFP_KERNEL);
2637
2638        if (table) {
2639                table[0].data = &net->ipv6.sysctl.flush_delay;
2640                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2641                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2642                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2643                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2644                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2645                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2647                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2648        }
2649
2650        return table;
2651}
2652#endif
2653
2654static int ip6_route_net_init(struct net *net)
2655{
2656        int ret = -ENOMEM;
2657
2658        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2659               sizeof(net->ipv6.ip6_dst_ops));
2660
2661        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662                                           sizeof(*net->ipv6.ip6_null_entry),
2663                                           GFP_KERNEL);
2664        if (!net->ipv6.ip6_null_entry)
2665                goto out_ip6_dst_ops;
2666        net->ipv6.ip6_null_entry->u.dst.path =
2667                (struct dst_entry *)net->ipv6.ip6_null_entry;
2668        net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2669
2670#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2671        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2672                                               sizeof(*net->ipv6.ip6_prohibit_entry),
2673                                               GFP_KERNEL);
2674        if (!net->ipv6.ip6_prohibit_entry)
2675                goto out_ip6_null_entry;
2676        net->ipv6.ip6_prohibit_entry->u.dst.path =
2677                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2678        net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2679
2680        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2681                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
2682                                               GFP_KERNEL);
2683        if (!net->ipv6.ip6_blk_hole_entry)
2684                goto out_ip6_prohibit_entry;
2685        net->ipv6.ip6_blk_hole_entry->u.dst.path =
2686                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2687        net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2688#endif
2689
2690        net->ipv6.sysctl.flush_delay = 0;
2691        net->ipv6.sysctl.ip6_rt_max_size = 4096;
2692        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2693        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2694        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2695        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2696        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2697        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2698
2699#ifdef CONFIG_PROC_FS
2700        proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2701        proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2702#endif
2703        net->ipv6.ip6_rt_gc_expire = 30*HZ;
2704
2705        ret = 0;
2706out:
2707        return ret;
2708
2709#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2710out_ip6_prohibit_entry:
2711        kfree(net->ipv6.ip6_prohibit_entry);
2712out_ip6_null_entry:
2713        kfree(net->ipv6.ip6_null_entry);
2714#endif
2715out_ip6_dst_ops:
2716        goto out;
2717}
2718
2719static void ip6_route_net_exit(struct net *net)
2720{
2721#ifdef CONFIG_PROC_FS
2722        proc_net_remove(net, "ipv6_route");
2723        proc_net_remove(net, "rt6_stats");
2724#endif
2725        kfree(net->ipv6.ip6_null_entry);
2726#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2727        kfree(net->ipv6.ip6_prohibit_entry);
2728        kfree(net->ipv6.ip6_blk_hole_entry);
2729#endif
2730}
2731
2732static struct pernet_operations ip6_route_net_ops = {
2733        .init = ip6_route_net_init,
2734        .exit = ip6_route_net_exit,
2735};
2736
2737static struct notifier_block ip6_route_dev_notifier = {
2738        .notifier_call = ip6_route_dev_notify,
2739        .priority = 0,
2740};
2741
2742int __init ip6_route_init(void)
2743{
2744        int ret;
2745
2746        ret = -ENOMEM;
2747        ip6_dst_ops_template.kmem_cachep =
2748                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2749                                  SLAB_HWCACHE_ALIGN, NULL);
2750        if (!ip6_dst_ops_template.kmem_cachep)
2751                goto out;
2752
2753        ret = register_pernet_subsys(&ip6_route_net_ops);
2754        if (ret)
2755                goto out_kmem_cache;
2756
2757        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2758
2759        /* Registering of the loopback is done before this portion of code,
2760         * the loopback reference in rt6_info will not be taken, do it
2761         * manually for init_net */
2762        init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2763        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2764  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2765        init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2766        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2767        init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2768        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2769  #endif
2770        ret = fib6_init();
2771        if (ret)
2772                goto out_register_subsys;
2773
2774        ret = xfrm6_init();
2775        if (ret)
2776                goto out_fib6_init;
2777
2778        ret = fib6_rules_init();
2779        if (ret)
2780                goto xfrm6_init;
2781
2782        ret = -ENOBUFS;
2783        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2784            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2785            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2786                goto fib6_rules_init;
2787
2788        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2789        if (ret)
2790                goto fib6_rules_init;
2791
2792out:
2793        return ret;
2794
2795fib6_rules_init:
2796        fib6_rules_cleanup();
2797xfrm6_init:
2798        xfrm6_fini();
2799out_fib6_init:
2800        fib6_gc_cleanup();
2801out_register_subsys:
2802        unregister_pernet_subsys(&ip6_route_net_ops);
2803out_kmem_cache:
2804        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2805        goto out;
2806}
2807
2808void ip6_route_cleanup(void)
2809{
2810        unregister_netdevice_notifier(&ip6_route_dev_notifier);
2811        fib6_rules_cleanup();
2812        xfrm6_fini();
2813        fib6_gc_cleanup();
2814        unregister_pernet_subsys(&ip6_route_net_ops);
2815        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2816}
2817