linux/net/ipv4/fib_semantics.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 Forwarding Information Base: semantics.
   7 *
   8 * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
   9 *
  10 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  11 *
  12 *              This program is free software; you can redistribute it and/or
  13 *              modify it under the terms of the GNU General Public License
  14 *              as published by the Free Software Foundation; either version
  15 *              2 of the License, or (at your option) any later version.
  16 */
  17
  18#include <asm/uaccess.h>
  19#include <asm/system.h>
  20#include <linux/bitops.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/jiffies.h>
  24#include <linux/mm.h>
  25#include <linux/string.h>
  26#include <linux/socket.h>
  27#include <linux/sockios.h>
  28#include <linux/errno.h>
  29#include <linux/in.h>
  30#include <linux/inet.h>
  31#include <linux/inetdevice.h>
  32#include <linux/netdevice.h>
  33#include <linux/if_arp.h>
  34#include <linux/proc_fs.h>
  35#include <linux/skbuff.h>
  36#include <linux/init.h>
  37
  38#include <net/arp.h>
  39#include <net/ip.h>
  40#include <net/protocol.h>
  41#include <net/route.h>
  42#include <net/tcp.h>
  43#include <net/sock.h>
  44#include <net/ip_fib.h>
  45#include <net/netlink.h>
  46#include <net/nexthop.h>
  47
  48#include "fib_lookup.h"
  49
  50#define FSprintk(a...)
  51
  52static DEFINE_SPINLOCK(fib_info_lock);
  53static struct hlist_head *fib_info_hash;
  54static struct hlist_head *fib_info_laddrhash;
  55static unsigned int fib_hash_size;
  56static unsigned int fib_info_cnt;
  57
  58#define DEVINDEX_HASHBITS 8
  59#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
  60static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
  61
  62#ifdef CONFIG_IP_ROUTE_MULTIPATH
  63
  64static DEFINE_SPINLOCK(fib_multipath_lock);
  65
  66#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
  67for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
  68
  69#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
  70for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
  71
  72#else /* CONFIG_IP_ROUTE_MULTIPATH */
  73
  74/* Hope, that gcc will optimize it to get rid of dummy loop */
  75
  76#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
  77for (nhsel=0; nhsel < 1; nhsel++)
  78
  79#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
  80for (nhsel=0; nhsel < 1; nhsel++)
  81
  82#endif /* CONFIG_IP_ROUTE_MULTIPATH */
  83
  84#define endfor_nexthops(fi) }
  85
  86
  87static const struct
  88{
  89        int     error;
  90        u8      scope;
  91} fib_props[RTN_MAX + 1] = {
  92        {
  93                .error  = 0,
  94                .scope  = RT_SCOPE_NOWHERE,
  95        },      /* RTN_UNSPEC */
  96        {
  97                .error  = 0,
  98                .scope  = RT_SCOPE_UNIVERSE,
  99        },      /* RTN_UNICAST */
 100        {
 101                .error  = 0,
 102                .scope  = RT_SCOPE_HOST,
 103        },      /* RTN_LOCAL */
 104        {
 105                .error  = 0,
 106                .scope  = RT_SCOPE_LINK,
 107        },      /* RTN_BROADCAST */
 108        {
 109                .error  = 0,
 110                .scope  = RT_SCOPE_LINK,
 111        },      /* RTN_ANYCAST */
 112        {
 113                .error  = 0,
 114                .scope  = RT_SCOPE_UNIVERSE,
 115        },      /* RTN_MULTICAST */
 116        {
 117                .error  = -EINVAL,
 118                .scope  = RT_SCOPE_UNIVERSE,
 119        },      /* RTN_BLACKHOLE */
 120        {
 121                .error  = -EHOSTUNREACH,
 122                .scope  = RT_SCOPE_UNIVERSE,
 123        },      /* RTN_UNREACHABLE */
 124        {
 125                .error  = -EACCES,
 126                .scope  = RT_SCOPE_UNIVERSE,
 127        },      /* RTN_PROHIBIT */
 128        {
 129                .error  = -EAGAIN,
 130                .scope  = RT_SCOPE_UNIVERSE,
 131        },      /* RTN_THROW */
 132        {
 133                .error  = -EINVAL,
 134                .scope  = RT_SCOPE_NOWHERE,
 135        },      /* RTN_NAT */
 136        {
 137                .error  = -EINVAL,
 138                .scope  = RT_SCOPE_NOWHERE,
 139        },      /* RTN_XRESOLVE */
 140};
 141
 142
 143/* Release a nexthop info record */
 144
 145void free_fib_info(struct fib_info *fi)
 146{
 147        if (fi->fib_dead == 0) {
 148                printk("Freeing alive fib_info %p\n", fi);
 149                return;
 150        }
 151        change_nexthops(fi) {
 152                if (nh->nh_dev)
 153                        dev_put(nh->nh_dev);
 154                nh->nh_dev = NULL;
 155        } endfor_nexthops(fi);
 156        fib_info_cnt--;
 157        kfree(fi);
 158}
 159
 160void fib_release_info(struct fib_info *fi)
 161{
 162        spin_lock_bh(&fib_info_lock);
 163        if (fi && --fi->fib_treeref == 0) {
 164                hlist_del(&fi->fib_hash);
 165                if (fi->fib_prefsrc)
 166                        hlist_del(&fi->fib_lhash);
 167                change_nexthops(fi) {
 168                        if (!nh->nh_dev)
 169                                continue;
 170                        hlist_del(&nh->nh_hash);
 171                } endfor_nexthops(fi)
 172                fi->fib_dead = 1;
 173                fib_info_put(fi);
 174        }
 175        spin_unlock_bh(&fib_info_lock);
 176}
 177
 178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 179{
 180        const struct fib_nh *onh = ofi->fib_nh;
 181
 182        for_nexthops(fi) {
 183                if (nh->nh_oif != onh->nh_oif ||
 184                    nh->nh_gw  != onh->nh_gw ||
 185                    nh->nh_scope != onh->nh_scope ||
 186#ifdef CONFIG_IP_ROUTE_MULTIPATH
 187                    nh->nh_weight != onh->nh_weight ||
 188#endif
 189#ifdef CONFIG_NET_CLS_ROUTE
 190                    nh->nh_tclassid != onh->nh_tclassid ||
 191#endif
 192                    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
 193                        return -1;
 194                onh++;
 195        } endfor_nexthops(fi);
 196        return 0;
 197}
 198
 199static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 200{
 201        unsigned int mask = (fib_hash_size - 1);
 202        unsigned int val = fi->fib_nhs;
 203
 204        val ^= fi->fib_protocol;
 205        val ^= (__force u32)fi->fib_prefsrc;
 206        val ^= fi->fib_priority;
 207
 208        return (val ^ (val >> 7) ^ (val >> 12)) & mask;
 209}
 210
 211static struct fib_info *fib_find_info(const struct fib_info *nfi)
 212{
 213        struct hlist_head *head;
 214        struct hlist_node *node;
 215        struct fib_info *fi;
 216        unsigned int hash;
 217
 218        hash = fib_info_hashfn(nfi);
 219        head = &fib_info_hash[hash];
 220
 221        hlist_for_each_entry(fi, node, head, fib_hash) {
 222                if (fi->fib_nhs != nfi->fib_nhs)
 223                        continue;
 224                if (nfi->fib_protocol == fi->fib_protocol &&
 225                    nfi->fib_prefsrc == fi->fib_prefsrc &&
 226                    nfi->fib_priority == fi->fib_priority &&
 227                    memcmp(nfi->fib_metrics, fi->fib_metrics,
 228                           sizeof(fi->fib_metrics)) == 0 &&
 229                    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
 230                    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
 231                        return fi;
 232        }
 233
 234        return NULL;
 235}
 236
 237static inline unsigned int fib_devindex_hashfn(unsigned int val)
 238{
 239        unsigned int mask = DEVINDEX_HASHSIZE - 1;
 240
 241        return (val ^
 242                (val >> DEVINDEX_HASHBITS) ^
 243                (val >> (DEVINDEX_HASHBITS * 2))) & mask;
 244}
 245
 246/* Check, that the gateway is already configured.
 247   Used only by redirect accept routine.
 248 */
 249
 250int ip_fib_check_default(__be32 gw, struct net_device *dev)
 251{
 252        struct hlist_head *head;
 253        struct hlist_node *node;
 254        struct fib_nh *nh;
 255        unsigned int hash;
 256
 257        spin_lock(&fib_info_lock);
 258
 259        hash = fib_devindex_hashfn(dev->ifindex);
 260        head = &fib_info_devhash[hash];
 261        hlist_for_each_entry(nh, node, head, nh_hash) {
 262                if (nh->nh_dev == dev &&
 263                    nh->nh_gw == gw &&
 264                    !(nh->nh_flags&RTNH_F_DEAD)) {
 265                        spin_unlock(&fib_info_lock);
 266                        return 0;
 267                }
 268        }
 269
 270        spin_unlock(&fib_info_lock);
 271
 272        return -1;
 273}
 274
 275static inline size_t fib_nlmsg_size(struct fib_info *fi)
 276{
 277        size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
 278                         + nla_total_size(4) /* RTA_TABLE */
 279                         + nla_total_size(4) /* RTA_DST */
 280                         + nla_total_size(4) /* RTA_PRIORITY */
 281                         + nla_total_size(4); /* RTA_PREFSRC */
 282
 283        /* space for nested metrics */
 284        payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
 285
 286        if (fi->fib_nhs) {
 287                /* Also handles the special case fib_nhs == 1 */
 288
 289                /* each nexthop is packed in an attribute */
 290                size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
 291
 292                /* may contain flow and gateway attribute */
 293                nhsize += 2 * nla_total_size(4);
 294
 295                /* all nexthops are packed in a nested attribute */
 296                payload += nla_total_size(fi->fib_nhs * nhsize);
 297        }
 298
 299        return payload;
 300}
 301
 302void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 303               int dst_len, u32 tb_id, struct nl_info *info,
 304               unsigned int nlm_flags)
 305{
 306        struct sk_buff *skb;
 307        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 308        int err = -ENOBUFS;
 309
 310        skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
 311        if (skb == NULL)
 312                goto errout;
 313
 314        err = fib_dump_info(skb, info->pid, seq, event, tb_id,
 315                            fa->fa_type, fa->fa_scope, key, dst_len,
 316                            fa->fa_tos, fa->fa_info, nlm_flags);
 317        if (err < 0) {
 318                /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
 319                WARN_ON(err == -EMSGSIZE);
 320                kfree_skb(skb);
 321                goto errout;
 322        }
 323        err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
 324                          info->nlh, GFP_KERNEL);
 325errout:
 326        if (err < 0)
 327                rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
 328}
 329
 330/* Return the first fib alias matching TOS with
 331 * priority less than or equal to PRIO.
 332 */
 333struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
 334{
 335        if (fah) {
 336                struct fib_alias *fa;
 337                list_for_each_entry(fa, fah, fa_list) {
 338                        if (fa->fa_tos > tos)
 339                                continue;
 340                        if (fa->fa_info->fib_priority >= prio ||
 341                            fa->fa_tos < tos)
 342                                return fa;
 343                }
 344        }
 345        return NULL;
 346}
 347
 348int fib_detect_death(struct fib_info *fi, int order,
 349                     struct fib_info **last_resort, int *last_idx, int *dflt)
 350{
 351        struct neighbour *n;
 352        int state = NUD_NONE;
 353
 354        n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
 355        if (n) {
 356                state = n->nud_state;
 357                neigh_release(n);
 358        }
 359        if (state==NUD_REACHABLE)
 360                return 0;
 361        if ((state&NUD_VALID) && order != *dflt)
 362                return 0;
 363        if ((state&NUD_VALID) ||
 364            (*last_idx<0 && order > *dflt)) {
 365                *last_resort = fi;
 366                *last_idx = order;
 367        }
 368        return 1;
 369}
 370
 371#ifdef CONFIG_IP_ROUTE_MULTIPATH
 372
 373static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
 374{
 375        int nhs = 0;
 376
 377        while (rtnh_ok(rtnh, remaining)) {
 378                nhs++;
 379                rtnh = rtnh_next(rtnh, &remaining);
 380        }
 381
 382        /* leftover implies invalid nexthop configuration, discard it */
 383        return remaining > 0 ? 0 : nhs;
 384}
 385
 386static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 387                       int remaining, struct fib_config *cfg)
 388{
 389        change_nexthops(fi) {
 390                int attrlen;
 391
 392                if (!rtnh_ok(rtnh, remaining))
 393                        return -EINVAL;
 394
 395                nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
 396                nh->nh_oif = rtnh->rtnh_ifindex;
 397                nh->nh_weight = rtnh->rtnh_hops + 1;
 398
 399                attrlen = rtnh_attrlen(rtnh);
 400                if (attrlen > 0) {
 401                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 402
 403                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 404                        nh->nh_gw = nla ? nla_get_be32(nla) : 0;
 405#ifdef CONFIG_NET_CLS_ROUTE
 406                        nla = nla_find(attrs, attrlen, RTA_FLOW);
 407                        nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 408#endif
 409                }
 410
 411                rtnh = rtnh_next(rtnh, &remaining);
 412        } endfor_nexthops(fi);
 413
 414        return 0;
 415}
 416
 417#endif
 418
 419int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
 420{
 421#ifdef CONFIG_IP_ROUTE_MULTIPATH
 422        struct rtnexthop *rtnh;
 423        int remaining;
 424#endif
 425
 426        if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 427                return 1;
 428
 429        if (cfg->fc_oif || cfg->fc_gw) {
 430                if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
 431                    (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
 432                        return 0;
 433                return 1;
 434        }
 435
 436#ifdef CONFIG_IP_ROUTE_MULTIPATH
 437        if (cfg->fc_mp == NULL)
 438                return 0;
 439
 440        rtnh = cfg->fc_mp;
 441        remaining = cfg->fc_mp_len;
 442
 443        for_nexthops(fi) {
 444                int attrlen;
 445
 446                if (!rtnh_ok(rtnh, remaining))
 447                        return -EINVAL;
 448
 449                if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
 450                        return 1;
 451
 452                attrlen = rtnh_attrlen(rtnh);
 453                if (attrlen < 0) {
 454                        struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
 455
 456                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 457                        if (nla && nla_get_be32(nla) != nh->nh_gw)
 458                                return 1;
 459#ifdef CONFIG_NET_CLS_ROUTE
 460                        nla = nla_find(attrs, attrlen, RTA_FLOW);
 461                        if (nla && nla_get_u32(nla) != nh->nh_tclassid)
 462                                return 1;
 463#endif
 464                }
 465
 466                rtnh = rtnh_next(rtnh, &remaining);
 467        } endfor_nexthops(fi);
 468#endif
 469        return 0;
 470}
 471
 472
 473/*
 474   Picture
 475   -------
 476
 477   Semantics of nexthop is very messy by historical reasons.
 478   We have to take into account, that:
 479   a) gateway can be actually local interface address,
 480      so that gatewayed route is direct.
 481   b) gateway must be on-link address, possibly
 482      described not by an ifaddr, but also by a direct route.
 483   c) If both gateway and interface are specified, they should not
 484      contradict.
 485   d) If we use tunnel routes, gateway could be not on-link.
 486
 487   Attempt to reconcile all of these (alas, self-contradictory) conditions
 488   results in pretty ugly and hairy code with obscure logic.
 489
 490   I chose to generalized it instead, so that the size
 491   of code does not increase practically, but it becomes
 492   much more general.
 493   Every prefix is assigned a "scope" value: "host" is local address,
 494   "link" is direct route,
 495   [ ... "site" ... "interior" ... ]
 496   and "universe" is true gateway route with global meaning.
 497
 498   Every prefix refers to a set of "nexthop"s (gw, oif),
 499   where gw must have narrower scope. This recursion stops
 500   when gw has LOCAL scope or if "nexthop" is declared ONLINK,
 501   which means that gw is forced to be on link.
 502
 503   Code is still hairy, but now it is apparently logically
 504   consistent and very flexible. F.e. as by-product it allows
 505   to co-exists in peace independent exterior and interior
 506   routing processes.
 507
 508   Normally it looks as following.
 509
 510   {universe prefix}  -> (gw, oif) [scope link]
 511                          |
 512                          |-> {link prefix} -> (gw, oif) [scope local]
 513                                                |
 514                                                |-> {local prefix} (terminal node)
 515 */
 516
 517static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 518                        struct fib_nh *nh)
 519{
 520        int err;
 521
 522        if (nh->nh_gw) {
 523                struct fib_result res;
 524
 525#ifdef CONFIG_IP_ROUTE_PERVASIVE
 526                if (nh->nh_flags&RTNH_F_PERVASIVE)
 527                        return 0;
 528#endif
 529                if (nh->nh_flags&RTNH_F_ONLINK) {
 530                        struct net_device *dev;
 531
 532                        if (cfg->fc_scope >= RT_SCOPE_LINK)
 533                                return -EINVAL;
 534                        if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
 535                                return -EINVAL;
 536                        if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
 537                                return -ENODEV;
 538                        if (!(dev->flags&IFF_UP))
 539                                return -ENETDOWN;
 540                        nh->nh_dev = dev;
 541                        dev_hold(dev);
 542                        nh->nh_scope = RT_SCOPE_LINK;
 543                        return 0;
 544                }
 545                {
 546                        struct flowi fl = {
 547                                .nl_u = {
 548                                        .ip4_u = {
 549                                                .daddr = nh->nh_gw,
 550                                                .scope = cfg->fc_scope + 1,
 551                                        },
 552                                },
 553                                .oif = nh->nh_oif,
 554                        };
 555
 556                        /* It is not necessary, but requires a bit of thinking */
 557                        if (fl.fl4_scope < RT_SCOPE_LINK)
 558                                fl.fl4_scope = RT_SCOPE_LINK;
 559                        if ((err = fib_lookup(&fl, &res)) != 0)
 560                                return err;
 561                }
 562                err = -EINVAL;
 563                if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
 564                        goto out;
 565                nh->nh_scope = res.scope;
 566                nh->nh_oif = FIB_RES_OIF(res);
 567                if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
 568                        goto out;
 569                dev_hold(nh->nh_dev);
 570                err = -ENETDOWN;
 571                if (!(nh->nh_dev->flags & IFF_UP))
 572                        goto out;
 573                err = 0;
 574out:
 575                fib_res_put(&res);
 576                return err;
 577        } else {
 578                struct in_device *in_dev;
 579
 580                if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
 581                        return -EINVAL;
 582
 583                in_dev = inetdev_by_index(nh->nh_oif);
 584                if (in_dev == NULL)
 585                        return -ENODEV;
 586                if (!(in_dev->dev->flags&IFF_UP)) {
 587                        in_dev_put(in_dev);
 588                        return -ENETDOWN;
 589                }
 590                nh->nh_dev = in_dev->dev;
 591                dev_hold(nh->nh_dev);
 592                nh->nh_scope = RT_SCOPE_HOST;
 593                in_dev_put(in_dev);
 594        }
 595        return 0;
 596}
 597
 598static inline unsigned int fib_laddr_hashfn(__be32 val)
 599{
 600        unsigned int mask = (fib_hash_size - 1);
 601
 602        return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
 603}
 604
 605static struct hlist_head *fib_hash_alloc(int bytes)
 606{
 607        if (bytes <= PAGE_SIZE)
 608                return kmalloc(bytes, GFP_KERNEL);
 609        else
 610                return (struct hlist_head *)
 611                        __get_free_pages(GFP_KERNEL, get_order(bytes));
 612}
 613
 614static void fib_hash_free(struct hlist_head *hash, int bytes)
 615{
 616        if (!hash)
 617                return;
 618
 619        if (bytes <= PAGE_SIZE)
 620                kfree(hash);
 621        else
 622                free_pages((unsigned long) hash, get_order(bytes));
 623}
 624
 625static void fib_hash_move(struct hlist_head *new_info_hash,
 626                          struct hlist_head *new_laddrhash,
 627                          unsigned int new_size)
 628{
 629        struct hlist_head *old_info_hash, *old_laddrhash;
 630        unsigned int old_size = fib_hash_size;
 631        unsigned int i, bytes;
 632
 633        spin_lock_bh(&fib_info_lock);
 634        old_info_hash = fib_info_hash;
 635        old_laddrhash = fib_info_laddrhash;
 636        fib_hash_size = new_size;
 637
 638        for (i = 0; i < old_size; i++) {
 639                struct hlist_head *head = &fib_info_hash[i];
 640                struct hlist_node *node, *n;
 641                struct fib_info *fi;
 642
 643                hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
 644                        struct hlist_head *dest;
 645                        unsigned int new_hash;
 646
 647                        hlist_del(&fi->fib_hash);
 648
 649                        new_hash = fib_info_hashfn(fi);
 650                        dest = &new_info_hash[new_hash];
 651                        hlist_add_head(&fi->fib_hash, dest);
 652                }
 653        }
 654        fib_info_hash = new_info_hash;
 655
 656        for (i = 0; i < old_size; i++) {
 657                struct hlist_head *lhead = &fib_info_laddrhash[i];
 658                struct hlist_node *node, *n;
 659                struct fib_info *fi;
 660
 661                hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
 662                        struct hlist_head *ldest;
 663                        unsigned int new_hash;
 664
 665                        hlist_del(&fi->fib_lhash);
 666
 667                        new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
 668                        ldest = &new_laddrhash[new_hash];
 669                        hlist_add_head(&fi->fib_lhash, ldest);
 670                }
 671        }
 672        fib_info_laddrhash = new_laddrhash;
 673
 674        spin_unlock_bh(&fib_info_lock);
 675
 676        bytes = old_size * sizeof(struct hlist_head *);
 677        fib_hash_free(old_info_hash, bytes);
 678        fib_hash_free(old_laddrhash, bytes);
 679}
 680
 681struct fib_info *fib_create_info(struct fib_config *cfg)
 682{
 683        int err;
 684        struct fib_info *fi = NULL;
 685        struct fib_info *ofi;
 686        int nhs = 1;
 687
 688        /* Fast check to catch the most weird cases */
 689        if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 690                goto err_inval;
 691
 692#ifdef CONFIG_IP_ROUTE_MULTIPATH
 693        if (cfg->fc_mp) {
 694                nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
 695                if (nhs == 0)
 696                        goto err_inval;
 697        }
 698#endif
 699
 700        err = -ENOBUFS;
 701        if (fib_info_cnt >= fib_hash_size) {
 702                unsigned int new_size = fib_hash_size << 1;
 703                struct hlist_head *new_info_hash;
 704                struct hlist_head *new_laddrhash;
 705                unsigned int bytes;
 706
 707                if (!new_size)
 708                        new_size = 1;
 709                bytes = new_size * sizeof(struct hlist_head *);
 710                new_info_hash = fib_hash_alloc(bytes);
 711                new_laddrhash = fib_hash_alloc(bytes);
 712                if (!new_info_hash || !new_laddrhash) {
 713                        fib_hash_free(new_info_hash, bytes);
 714                        fib_hash_free(new_laddrhash, bytes);
 715                } else {
 716                        memset(new_info_hash, 0, bytes);
 717                        memset(new_laddrhash, 0, bytes);
 718
 719                        fib_hash_move(new_info_hash, new_laddrhash, new_size);
 720                }
 721
 722                if (!fib_hash_size)
 723                        goto failure;
 724        }
 725
 726        fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 727        if (fi == NULL)
 728                goto failure;
 729        fib_info_cnt++;
 730
 731        fi->fib_protocol = cfg->fc_protocol;
 732        fi->fib_flags = cfg->fc_flags;
 733        fi->fib_priority = cfg->fc_priority;
 734        fi->fib_prefsrc = cfg->fc_prefsrc;
 735
 736        fi->fib_nhs = nhs;
 737        change_nexthops(fi) {
 738                nh->nh_parent = fi;
 739        } endfor_nexthops(fi)
 740
 741        if (cfg->fc_mx) {
 742                struct nlattr *nla;
 743                int remaining;
 744
 745                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
 746                        int type = nla_type(nla);
 747
 748                        if (type) {
 749                                if (type > RTAX_MAX)
 750                                        goto err_inval;
 751                                fi->fib_metrics[type - 1] = nla_get_u32(nla);
 752                        }
 753                }
 754        }
 755
 756        if (cfg->fc_mp) {
 757#ifdef CONFIG_IP_ROUTE_MULTIPATH
 758                err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
 759                if (err != 0)
 760                        goto failure;
 761                if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
 762                        goto err_inval;
 763                if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
 764                        goto err_inval;
 765#ifdef CONFIG_NET_CLS_ROUTE
 766                if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
 767                        goto err_inval;
 768#endif
 769#else
 770                goto err_inval;
 771#endif
 772        } else {
 773                struct fib_nh *nh = fi->fib_nh;
 774
 775                nh->nh_oif = cfg->fc_oif;
 776                nh->nh_gw = cfg->fc_gw;
 777                nh->nh_flags = cfg->fc_flags;
 778#ifdef CONFIG_NET_CLS_ROUTE
 779                nh->nh_tclassid = cfg->fc_flow;
 780#endif
 781#ifdef CONFIG_IP_ROUTE_MULTIPATH
 782                nh->nh_weight = 1;
 783#endif
 784        }
 785
 786        if (fib_props[cfg->fc_type].error) {
 787                if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
 788                        goto err_inval;
 789                goto link_it;
 790        }
 791
 792        if (cfg->fc_scope > RT_SCOPE_HOST)
 793                goto err_inval;
 794
 795        if (cfg->fc_scope == RT_SCOPE_HOST) {
 796                struct fib_nh *nh = fi->fib_nh;
 797
 798                /* Local address is added. */
 799                if (nhs != 1 || nh->nh_gw)
 800                        goto err_inval;
 801                nh->nh_scope = RT_SCOPE_NOWHERE;
 802                nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
 803                err = -ENODEV;
 804                if (nh->nh_dev == NULL)
 805                        goto failure;
 806        } else {
 807                change_nexthops(fi) {
 808                        if ((err = fib_check_nh(cfg, fi, nh)) != 0)
 809                                goto failure;
 810                } endfor_nexthops(fi)
 811        }
 812
 813        if (fi->fib_prefsrc) {
 814                if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
 815                    fi->fib_prefsrc != cfg->fc_dst)
 816                        if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
 817                                goto err_inval;
 818        }
 819
 820link_it:
 821        if ((ofi = fib_find_info(fi)) != NULL) {
 822                fi->fib_dead = 1;
 823                free_fib_info(fi);
 824                ofi->fib_treeref++;
 825                return ofi;
 826        }
 827
 828        fi->fib_treeref++;
 829        atomic_inc(&fi->fib_clntref);
 830        spin_lock_bh(&fib_info_lock);
 831        hlist_add_head(&fi->fib_hash,
 832                       &fib_info_hash[fib_info_hashfn(fi)]);
 833        if (fi->fib_prefsrc) {
 834                struct hlist_head *head;
 835
 836                head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
 837                hlist_add_head(&fi->fib_lhash, head);
 838        }
 839        change_nexthops(fi) {
 840                struct hlist_head *head;
 841                unsigned int hash;
 842
 843                if (!nh->nh_dev)
 844                        continue;
 845                hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
 846                head = &fib_info_devhash[hash];
 847                hlist_add_head(&nh->nh_hash, head);
 848        } endfor_nexthops(fi)
 849        spin_unlock_bh(&fib_info_lock);
 850        return fi;
 851
 852err_inval:
 853        err = -EINVAL;
 854
 855failure:
 856        if (fi) {
 857                fi->fib_dead = 1;
 858                free_fib_info(fi);
 859        }
 860
 861        return ERR_PTR(err);
 862}
 863
 864/* Note! fib_semantic_match intentionally uses  RCU list functions. */
 865int fib_semantic_match(struct list_head *head, const struct flowi *flp,
 866                       struct fib_result *res, __be32 zone, __be32 mask,
 867                        int prefixlen)
 868{
 869        struct fib_alias *fa;
 870        int nh_sel = 0;
 871
 872        list_for_each_entry_rcu(fa, head, fa_list) {
 873                int err;
 874
 875                if (fa->fa_tos &&
 876                    fa->fa_tos != flp->fl4_tos)
 877                        continue;
 878
 879                if (fa->fa_scope < flp->fl4_scope)
 880                        continue;
 881
 882                fa->fa_state |= FA_S_ACCESSED;
 883
 884                err = fib_props[fa->fa_type].error;
 885                if (err == 0) {
 886                        struct fib_info *fi = fa->fa_info;
 887
 888                        if (fi->fib_flags & RTNH_F_DEAD)
 889                                continue;
 890
 891                        switch (fa->fa_type) {
 892                        case RTN_UNICAST:
 893                        case RTN_LOCAL:
 894                        case RTN_BROADCAST:
 895                        case RTN_ANYCAST:
 896                        case RTN_MULTICAST:
 897                                for_nexthops(fi) {
 898                                        if (nh->nh_flags&RTNH_F_DEAD)
 899                                                continue;
 900                                        if (!flp->oif || flp->oif == nh->nh_oif)
 901                                                break;
 902                                }
 903#ifdef CONFIG_IP_ROUTE_MULTIPATH
 904                                if (nhsel < fi->fib_nhs) {
 905                                        nh_sel = nhsel;
 906                                        goto out_fill_res;
 907                                }
 908#else
 909                                if (nhsel < 1) {
 910                                        goto out_fill_res;
 911                                }
 912#endif
 913                                endfor_nexthops(fi);
 914                                continue;
 915
 916                        default:
 917                                printk(KERN_DEBUG "impossible 102\n");
 918                                return -EINVAL;
 919                        }
 920                }
 921                return err;
 922        }
 923        return 1;
 924
 925out_fill_res:
 926        res->prefixlen = prefixlen;
 927        res->nh_sel = nh_sel;
 928        res->type = fa->fa_type;
 929        res->scope = fa->fa_scope;
 930        res->fi = fa->fa_info;
 931        atomic_inc(&res->fi->fib_clntref);
 932        return 0;
 933}
 934
 935/* Find appropriate source address to this destination */
 936
 937__be32 __fib_res_prefsrc(struct fib_result *res)
 938{
 939        return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
 940}
 941
 942int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 943                  u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
 944                  struct fib_info *fi, unsigned int flags)
 945{
 946        struct nlmsghdr *nlh;
 947        struct rtmsg *rtm;
 948
 949        nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
 950        if (nlh == NULL)
 951                return -EMSGSIZE;
 952
 953        rtm = nlmsg_data(nlh);
 954        rtm->rtm_family = AF_INET;
 955        rtm->rtm_dst_len = dst_len;
 956        rtm->rtm_src_len = 0;
 957        rtm->rtm_tos = tos;
 958        rtm->rtm_table = tb_id;
 959        NLA_PUT_U32(skb, RTA_TABLE, tb_id);
 960        rtm->rtm_type = type;
 961        rtm->rtm_flags = fi->fib_flags;
 962        rtm->rtm_scope = scope;
 963        rtm->rtm_protocol = fi->fib_protocol;
 964
 965        if (rtm->rtm_dst_len)
 966                NLA_PUT_BE32(skb, RTA_DST, dst);
 967
 968        if (fi->fib_priority)
 969                NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
 970
 971        if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
 972                goto nla_put_failure;
 973
 974        if (fi->fib_prefsrc)
 975                NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
 976
 977        if (fi->fib_nhs == 1) {
 978                if (fi->fib_nh->nh_gw)
 979                        NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
 980
 981                if (fi->fib_nh->nh_oif)
 982                        NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
 983#ifdef CONFIG_NET_CLS_ROUTE
 984                if (fi->fib_nh[0].nh_tclassid)
 985                        NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 986#endif
 987        }
 988#ifdef CONFIG_IP_ROUTE_MULTIPATH
 989        if (fi->fib_nhs > 1) {
 990                struct rtnexthop *rtnh;
 991                struct nlattr *mp;
 992
 993                mp = nla_nest_start(skb, RTA_MULTIPATH);
 994                if (mp == NULL)
 995                        goto nla_put_failure;
 996
 997                for_nexthops(fi) {
 998                        rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
 999                        if (rtnh == NULL)
1000                                goto nla_put_failure;
1001
1002                        rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1003                        rtnh->rtnh_hops = nh->nh_weight - 1;
1004                        rtnh->rtnh_ifindex = nh->nh_oif;
1005
1006                        if (nh->nh_gw)
1007                                NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1008#ifdef CONFIG_NET_CLS_ROUTE
1009                        if (nh->nh_tclassid)
1010                                NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1011#endif
1012                        /* length of rtnetlink header + attributes */
1013                        rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1014                } endfor_nexthops(fi);
1015
1016                nla_nest_end(skb, mp);
1017        }
1018#endif
1019        return nlmsg_end(skb, nlh);
1020
1021nla_put_failure:
1022        nlmsg_cancel(skb, nlh);
1023        return -EMSGSIZE;
1024}
1025
1026/*
1027   Update FIB if:
1028   - local address disappeared -> we must delete all the entries
1029     referring to it.
1030   - device went down -> we must shutdown all nexthops going via it.
1031 */
1032
1033int fib_sync_down(__be32 local, struct net_device *dev, int force)
1034{
1035        int ret = 0;
1036        int scope = RT_SCOPE_NOWHERE;
1037
1038        if (force)
1039                scope = -1;
1040
1041        if (local && fib_info_laddrhash) {
1042                unsigned int hash = fib_laddr_hashfn(local);
1043                struct hlist_head *head = &fib_info_laddrhash[hash];
1044                struct hlist_node *node;
1045                struct fib_info *fi;
1046
1047                hlist_for_each_entry(fi, node, head, fib_lhash) {
1048                        if (fi->fib_prefsrc == local) {
1049                                fi->fib_flags |= RTNH_F_DEAD;
1050                                ret++;
1051                        }
1052                }
1053        }
1054
1055        if (dev) {
1056                struct fib_info *prev_fi = NULL;
1057                unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1058                struct hlist_head *head = &fib_info_devhash[hash];
1059                struct hlist_node *node;
1060                struct fib_nh *nh;
1061
1062                hlist_for_each_entry(nh, node, head, nh_hash) {
1063                        struct fib_info *fi = nh->nh_parent;
1064                        int dead;
1065
1066                        BUG_ON(!fi->fib_nhs);
1067                        if (nh->nh_dev != dev || fi == prev_fi)
1068                                continue;
1069                        prev_fi = fi;
1070                        dead = 0;
1071                        change_nexthops(fi) {
1072                                if (nh->nh_flags&RTNH_F_DEAD)
1073                                        dead++;
1074                                else if (nh->nh_dev == dev &&
1075                                         nh->nh_scope != scope) {
1076                                        nh->nh_flags |= RTNH_F_DEAD;
1077#ifdef CONFIG_IP_ROUTE_MULTIPATH
1078                                        spin_lock_bh(&fib_multipath_lock);
1079                                        fi->fib_power -= nh->nh_power;
1080                                        nh->nh_power = 0;
1081                                        spin_unlock_bh(&fib_multipath_lock);
1082#endif
1083                                        dead++;
1084                                }
1085#ifdef CONFIG_IP_ROUTE_MULTIPATH
1086                                if (force > 1 && nh->nh_dev == dev) {
1087                                        dead = fi->fib_nhs;
1088                                        break;
1089                                }
1090#endif
1091                        } endfor_nexthops(fi)
1092                        if (dead == fi->fib_nhs) {
1093                                fi->fib_flags |= RTNH_F_DEAD;
1094                                ret++;
1095                        }
1096                }
1097        }
1098
1099        return ret;
1100}
1101
1102#ifdef CONFIG_IP_ROUTE_MULTIPATH
1103
1104/*
1105   Dead device goes up. We wake up dead nexthops.
1106   It takes sense only on multipath routes.
1107 */
1108
1109int fib_sync_up(struct net_device *dev)
1110{
1111        struct fib_info *prev_fi;
1112        unsigned int hash;
1113        struct hlist_head *head;
1114        struct hlist_node *node;
1115        struct fib_nh *nh;
1116        int ret;
1117
1118        if (!(dev->flags&IFF_UP))
1119                return 0;
1120
1121        prev_fi = NULL;
1122        hash = fib_devindex_hashfn(dev->ifindex);
1123        head = &fib_info_devhash[hash];
1124        ret = 0;
1125
1126        hlist_for_each_entry(nh, node, head, nh_hash) {
1127                struct fib_info *fi = nh->nh_parent;
1128                int alive;
1129
1130                BUG_ON(!fi->fib_nhs);
1131                if (nh->nh_dev != dev || fi == prev_fi)
1132                        continue;
1133
1134                prev_fi = fi;
1135                alive = 0;
1136                change_nexthops(fi) {
1137                        if (!(nh->nh_flags&RTNH_F_DEAD)) {
1138                                alive++;
1139                                continue;
1140                        }
1141                        if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1142                                continue;
1143                        if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1144                                continue;
1145                        alive++;
1146                        spin_lock_bh(&fib_multipath_lock);
1147                        nh->nh_power = 0;
1148                        nh->nh_flags &= ~RTNH_F_DEAD;
1149                        spin_unlock_bh(&fib_multipath_lock);
1150                } endfor_nexthops(fi)
1151
1152                if (alive > 0) {
1153                        fi->fib_flags &= ~RTNH_F_DEAD;
1154                        ret++;
1155                }
1156        }
1157
1158        return ret;
1159}
1160
1161/*
1162   The algorithm is suboptimal, but it provides really
1163   fair weighted route distribution.
1164 */
1165
1166void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1167{
1168        struct fib_info *fi = res->fi;
1169        int w;
1170
1171        spin_lock_bh(&fib_multipath_lock);
1172        if (fi->fib_power <= 0) {
1173                int power = 0;
1174                change_nexthops(fi) {
1175                        if (!(nh->nh_flags&RTNH_F_DEAD)) {
1176                                power += nh->nh_weight;
1177                                nh->nh_power = nh->nh_weight;
1178                        }
1179                } endfor_nexthops(fi);
1180                fi->fib_power = power;
1181                if (power <= 0) {
1182                        spin_unlock_bh(&fib_multipath_lock);
1183                        /* Race condition: route has just become dead. */
1184                        res->nh_sel = 0;
1185                        return;
1186                }
1187        }
1188
1189
1190        /* w should be random number [0..fi->fib_power-1],
1191           it is pretty bad approximation.
1192         */
1193
1194        w = jiffies % fi->fib_power;
1195
1196        change_nexthops(fi) {
1197                if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1198                        if ((w -= nh->nh_power) <= 0) {
1199                                nh->nh_power--;
1200                                fi->fib_power--;
1201                                res->nh_sel = nhsel;
1202                                spin_unlock_bh(&fib_multipath_lock);
1203                                return;
1204                        }
1205                }
1206        } endfor_nexthops(fi);
1207
1208        /* Race condition: route has just become dead. */
1209        res->nh_sel = 0;
1210        spin_unlock_bh(&fib_multipath_lock);
1211}
1212#endif
1213