linux/net/ipv4/fib_hash.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 FIB: lookup engine and maintenance routines.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 *
  10 *              This program is free software; you can redistribute it and/or
  11 *              modify it under the terms of the GNU General Public License
  12 *              as published by the Free Software Foundation; either version
  13 *              2 of the License, or (at your option) any later version.
  14 */
  15
  16#include <asm/uaccess.h>
  17#include <asm/system.h>
  18#include <linux/bitops.h>
  19#include <linux/types.h>
  20#include <linux/kernel.h>
  21#include <linux/mm.h>
  22#include <linux/string.h>
  23#include <linux/socket.h>
  24#include <linux/sockios.h>
  25#include <linux/errno.h>
  26#include <linux/in.h>
  27#include <linux/inet.h>
  28#include <linux/inetdevice.h>
  29#include <linux/netdevice.h>
  30#include <linux/if_arp.h>
  31#include <linux/proc_fs.h>
  32#include <linux/skbuff.h>
  33#include <linux/netlink.h>
  34#include <linux/init.h>
  35
  36#include <net/net_namespace.h>
  37#include <net/ip.h>
  38#include <net/protocol.h>
  39#include <net/route.h>
  40#include <net/tcp.h>
  41#include <net/sock.h>
  42#include <net/ip_fib.h>
  43
  44#include "fib_lookup.h"
  45
  46static struct kmem_cache *fn_hash_kmem __read_mostly;
  47static struct kmem_cache *fn_alias_kmem __read_mostly;
  48
  49struct fib_node {
  50        struct hlist_node       fn_hash;
  51        struct list_head        fn_alias;
  52        __be32                  fn_key;
  53        struct fib_alias        fn_embedded_alias;
  54};
  55
  56struct fn_zone {
  57        struct fn_zone          *fz_next;       /* Next not empty zone  */
  58        struct hlist_head       *fz_hash;       /* Hash table pointer   */
  59        int                     fz_nent;        /* Number of entries    */
  60
  61        int                     fz_divisor;     /* Hash divisor         */
  62        u32                     fz_hashmask;    /* (fz_divisor - 1)     */
  63#define FZ_HASHMASK(fz)         ((fz)->fz_hashmask)
  64
  65        int                     fz_order;       /* Zone order           */
  66        __be32                  fz_mask;
  67#define FZ_MASK(fz)             ((fz)->fz_mask)
  68};
  69
  70/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
  71 * can be cheaper than memory lookup, so that FZ_* macros are used.
  72 */
  73
  74struct fn_hash {
  75        struct fn_zone  *fn_zones[33];
  76        struct fn_zone  *fn_zone_list;
  77};
  78
  79static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
  80{
  81        u32 h = ntohl(key)>>(32 - fz->fz_order);
  82        h ^= (h>>20);
  83        h ^= (h>>10);
  84        h ^= (h>>5);
  85        h &= FZ_HASHMASK(fz);
  86        return h;
  87}
  88
  89static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
  90{
  91        return dst & FZ_MASK(fz);
  92}
  93
  94static DEFINE_RWLOCK(fib_hash_lock);
  95static unsigned int fib_hash_genid;
  96
  97#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
  98
  99static struct hlist_head *fz_hash_alloc(int divisor)
 100{
 101        unsigned long size = divisor * sizeof(struct hlist_head);
 102
 103        if (size <= PAGE_SIZE) {
 104                return kzalloc(size, GFP_KERNEL);
 105        } else {
 106                return (struct hlist_head *)
 107                        __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
 108        }
 109}
 110
 111/* The fib hash lock must be held when this is called. */
 112static inline void fn_rebuild_zone(struct fn_zone *fz,
 113                                   struct hlist_head *old_ht,
 114                                   int old_divisor)
 115{
 116        int i;
 117
 118        for (i = 0; i < old_divisor; i++) {
 119                struct hlist_node *node, *n;
 120                struct fib_node *f;
 121
 122                hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
 123                        struct hlist_head *new_head;
 124
 125                        hlist_del(&f->fn_hash);
 126
 127                        new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
 128                        hlist_add_head(&f->fn_hash, new_head);
 129                }
 130        }
 131}
 132
 133static void fz_hash_free(struct hlist_head *hash, int divisor)
 134{
 135        unsigned long size = divisor * sizeof(struct hlist_head);
 136
 137        if (size <= PAGE_SIZE)
 138                kfree(hash);
 139        else
 140                free_pages((unsigned long)hash, get_order(size));
 141}
 142
 143static void fn_rehash_zone(struct fn_zone *fz)
 144{
 145        struct hlist_head *ht, *old_ht;
 146        int old_divisor, new_divisor;
 147        u32 new_hashmask;
 148
 149        old_divisor = fz->fz_divisor;
 150
 151        switch (old_divisor) {
 152        case 16:
 153                new_divisor = 256;
 154                break;
 155        case 256:
 156                new_divisor = 1024;
 157                break;
 158        default:
 159                if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
 160                        printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
 161                        return;
 162                }
 163                new_divisor = (old_divisor << 1);
 164                break;
 165        }
 166
 167        new_hashmask = (new_divisor - 1);
 168
 169#if RT_CACHE_DEBUG >= 2
 170        printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n",
 171               fz->fz_order, old_divisor);
 172#endif
 173
 174        ht = fz_hash_alloc(new_divisor);
 175
 176        if (ht) {
 177                write_lock_bh(&fib_hash_lock);
 178                old_ht = fz->fz_hash;
 179                fz->fz_hash = ht;
 180                fz->fz_hashmask = new_hashmask;
 181                fz->fz_divisor = new_divisor;
 182                fn_rebuild_zone(fz, old_ht, old_divisor);
 183                fib_hash_genid++;
 184                write_unlock_bh(&fib_hash_lock);
 185
 186                fz_hash_free(old_ht, old_divisor);
 187        }
 188}
 189
 190static inline void fn_free_node(struct fib_node * f)
 191{
 192        kmem_cache_free(fn_hash_kmem, f);
 193}
 194
 195static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
 196{
 197        fib_release_info(fa->fa_info);
 198        if (fa == &f->fn_embedded_alias)
 199                fa->fa_info = NULL;
 200        else
 201                kmem_cache_free(fn_alias_kmem, fa);
 202}
 203
 204static struct fn_zone *
 205fn_new_zone(struct fn_hash *table, int z)
 206{
 207        int i;
 208        struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL);
 209        if (!fz)
 210                return NULL;
 211
 212        if (z) {
 213                fz->fz_divisor = 16;
 214        } else {
 215                fz->fz_divisor = 1;
 216        }
 217        fz->fz_hashmask = (fz->fz_divisor - 1);
 218        fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
 219        if (!fz->fz_hash) {
 220                kfree(fz);
 221                return NULL;
 222        }
 223        fz->fz_order = z;
 224        fz->fz_mask = inet_make_mask(z);
 225
 226        /* Find the first not empty zone with more specific mask */
 227        for (i=z+1; i<=32; i++)
 228                if (table->fn_zones[i])
 229                        break;
 230        write_lock_bh(&fib_hash_lock);
 231        if (i>32) {
 232                /* No more specific masks, we are the first. */
 233                fz->fz_next = table->fn_zone_list;
 234                table->fn_zone_list = fz;
 235        } else {
 236                fz->fz_next = table->fn_zones[i]->fz_next;
 237                table->fn_zones[i]->fz_next = fz;
 238        }
 239        table->fn_zones[z] = fz;
 240        fib_hash_genid++;
 241        write_unlock_bh(&fib_hash_lock);
 242        return fz;
 243}
 244
 245static int
 246fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 247{
 248        int err;
 249        struct fn_zone *fz;
 250        struct fn_hash *t = (struct fn_hash *)tb->tb_data;
 251
 252        read_lock(&fib_hash_lock);
 253        for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
 254                struct hlist_head *head;
 255                struct hlist_node *node;
 256                struct fib_node *f;
 257                __be32 k = fz_key(flp->fl4_dst, fz);
 258
 259                head = &fz->fz_hash[fn_hash(k, fz)];
 260                hlist_for_each_entry(f, node, head, fn_hash) {
 261                        if (f->fn_key != k)
 262                                continue;
 263
 264                        err = fib_semantic_match(&f->fn_alias,
 265                                                 flp, res,
 266                                                 fz->fz_order);
 267                        if (err <= 0)
 268                                goto out;
 269                }
 270        }
 271        err = 1;
 272out:
 273        read_unlock(&fib_hash_lock);
 274        return err;
 275}
 276
 277static void
 278fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
 279{
 280        int order, last_idx;
 281        struct hlist_node *node;
 282        struct fib_node *f;
 283        struct fib_info *fi = NULL;
 284        struct fib_info *last_resort;
 285        struct fn_hash *t = (struct fn_hash *)tb->tb_data;
 286        struct fn_zone *fz = t->fn_zones[0];
 287
 288        if (fz == NULL)
 289                return;
 290
 291        last_idx = -1;
 292        last_resort = NULL;
 293        order = -1;
 294
 295        read_lock(&fib_hash_lock);
 296        hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
 297                struct fib_alias *fa;
 298
 299                list_for_each_entry(fa, &f->fn_alias, fa_list) {
 300                        struct fib_info *next_fi = fa->fa_info;
 301
 302                        if (fa->fa_scope != res->scope ||
 303                            fa->fa_type != RTN_UNICAST)
 304                                continue;
 305
 306                        if (next_fi->fib_priority > res->fi->fib_priority)
 307                                break;
 308                        if (!next_fi->fib_nh[0].nh_gw ||
 309                            next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
 310                                continue;
 311                        fa->fa_state |= FA_S_ACCESSED;
 312
 313                        if (fi == NULL) {
 314                                if (next_fi != res->fi)
 315                                        break;
 316                        } else if (!fib_detect_death(fi, order, &last_resort,
 317                                                &last_idx, tb->tb_default)) {
 318                                fib_result_assign(res, fi);
 319                                tb->tb_default = order;
 320                                goto out;
 321                        }
 322                        fi = next_fi;
 323                        order++;
 324                }
 325        }
 326
 327        if (order <= 0 || fi == NULL) {
 328                tb->tb_default = -1;
 329                goto out;
 330        }
 331
 332        if (!fib_detect_death(fi, order, &last_resort, &last_idx,
 333                                tb->tb_default)) {
 334                fib_result_assign(res, fi);
 335                tb->tb_default = order;
 336                goto out;
 337        }
 338
 339        if (last_idx >= 0)
 340                fib_result_assign(res, last_resort);
 341        tb->tb_default = last_idx;
 342out:
 343        read_unlock(&fib_hash_lock);
 344}
 345
 346/* Insert node F to FZ. */
 347static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
 348{
 349        struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
 350
 351        hlist_add_head(&f->fn_hash, head);
 352}
 353
 354/* Return the node in FZ matching KEY. */
 355static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
 356{
 357        struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
 358        struct hlist_node *node;
 359        struct fib_node *f;
 360
 361        hlist_for_each_entry(f, node, head, fn_hash) {
 362                if (f->fn_key == key)
 363                        return f;
 364        }
 365
 366        return NULL;
 367}
 368
 369static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
 370{
 371        struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 372        struct fib_node *new_f = NULL;
 373        struct fib_node *f;
 374        struct fib_alias *fa, *new_fa;
 375        struct fn_zone *fz;
 376        struct fib_info *fi;
 377        u8 tos = cfg->fc_tos;
 378        __be32 key;
 379        int err;
 380
 381        if (cfg->fc_dst_len > 32)
 382                return -EINVAL;
 383
 384        fz = table->fn_zones[cfg->fc_dst_len];
 385        if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len)))
 386                return -ENOBUFS;
 387
 388        key = 0;
 389        if (cfg->fc_dst) {
 390                if (cfg->fc_dst & ~FZ_MASK(fz))
 391                        return -EINVAL;
 392                key = fz_key(cfg->fc_dst, fz);
 393        }
 394
 395        fi = fib_create_info(cfg);
 396        if (IS_ERR(fi))
 397                return PTR_ERR(fi);
 398
 399        if (fz->fz_nent > (fz->fz_divisor<<1) &&
 400            fz->fz_divisor < FZ_MAX_DIVISOR &&
 401            (cfg->fc_dst_len == 32 ||
 402             (1 << cfg->fc_dst_len) > fz->fz_divisor))
 403                fn_rehash_zone(fz);
 404
 405        f = fib_find_node(fz, key);
 406
 407        if (!f)
 408                fa = NULL;
 409        else
 410                fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority);
 411
 412        /* Now fa, if non-NULL, points to the first fib alias
 413         * with the same keys [prefix,tos,priority], if such key already
 414         * exists or to the node before which we will insert new one.
 415         *
 416         * If fa is NULL, we will need to allocate a new one and
 417         * insert to the head of f.
 418         *
 419         * If f is NULL, no fib node matched the destination key
 420         * and we need to allocate a new one of those as well.
 421         */
 422
 423        if (fa && fa->fa_tos == tos &&
 424            fa->fa_info->fib_priority == fi->fib_priority) {
 425                struct fib_alias *fa_first, *fa_match;
 426
 427                err = -EEXIST;
 428                if (cfg->fc_nlflags & NLM_F_EXCL)
 429                        goto out;
 430
 431                /* We have 2 goals:
 432                 * 1. Find exact match for type, scope, fib_info to avoid
 433                 * duplicate routes
 434                 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
 435                 */
 436                fa_match = NULL;
 437                fa_first = fa;
 438                fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
 439                list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
 440                        if (fa->fa_tos != tos)
 441                                break;
 442                        if (fa->fa_info->fib_priority != fi->fib_priority)
 443                                break;
 444                        if (fa->fa_type == cfg->fc_type &&
 445                            fa->fa_scope == cfg->fc_scope &&
 446                            fa->fa_info == fi) {
 447                                fa_match = fa;
 448                                break;
 449                        }
 450                }
 451
 452                if (cfg->fc_nlflags & NLM_F_REPLACE) {
 453                        struct fib_info *fi_drop;
 454                        u8 state;
 455
 456                        fa = fa_first;
 457                        if (fa_match) {
 458                                if (fa == fa_match)
 459                                        err = 0;
 460                                goto out;
 461                        }
 462                        write_lock_bh(&fib_hash_lock);
 463                        fi_drop = fa->fa_info;
 464                        fa->fa_info = fi;
 465                        fa->fa_type = cfg->fc_type;
 466                        fa->fa_scope = cfg->fc_scope;
 467                        state = fa->fa_state;
 468                        fa->fa_state &= ~FA_S_ACCESSED;
 469                        fib_hash_genid++;
 470                        write_unlock_bh(&fib_hash_lock);
 471
 472                        fib_release_info(fi_drop);
 473                        if (state & FA_S_ACCESSED)
 474                                rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 475                        rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
 476                                  &cfg->fc_nlinfo, NLM_F_REPLACE);
 477                        return 0;
 478                }
 479
 480                /* Error if we find a perfect match which
 481                 * uses the same scope, type, and nexthop
 482                 * information.
 483                 */
 484                if (fa_match)
 485                        goto out;
 486
 487                if (!(cfg->fc_nlflags & NLM_F_APPEND))
 488                        fa = fa_first;
 489        }
 490
 491        err = -ENOENT;
 492        if (!(cfg->fc_nlflags & NLM_F_CREATE))
 493                goto out;
 494
 495        err = -ENOBUFS;
 496
 497        if (!f) {
 498                new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL);
 499                if (new_f == NULL)
 500                        goto out;
 501
 502                INIT_HLIST_NODE(&new_f->fn_hash);
 503                INIT_LIST_HEAD(&new_f->fn_alias);
 504                new_f->fn_key = key;
 505                f = new_f;
 506        }
 507
 508        new_fa = &f->fn_embedded_alias;
 509        if (new_fa->fa_info != NULL) {
 510                new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
 511                if (new_fa == NULL)
 512                        goto out;
 513        }
 514        new_fa->fa_info = fi;
 515        new_fa->fa_tos = tos;
 516        new_fa->fa_type = cfg->fc_type;
 517        new_fa->fa_scope = cfg->fc_scope;
 518        new_fa->fa_state = 0;
 519
 520        /*
 521         * Insert new entry to the list.
 522         */
 523
 524        write_lock_bh(&fib_hash_lock);
 525        if (new_f)
 526                fib_insert_node(fz, new_f);
 527        list_add_tail(&new_fa->fa_list,
 528                 (fa ? &fa->fa_list : &f->fn_alias));
 529        fib_hash_genid++;
 530        write_unlock_bh(&fib_hash_lock);
 531
 532        if (new_f)
 533                fz->fz_nent++;
 534        rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 535
 536        rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
 537                  &cfg->fc_nlinfo, 0);
 538        return 0;
 539
 540out:
 541        if (new_f)
 542                kmem_cache_free(fn_hash_kmem, new_f);
 543        fib_release_info(fi);
 544        return err;
 545}
 546
 547
 548static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
 549{
 550        struct fn_hash *table = (struct fn_hash *)tb->tb_data;
 551        struct fib_node *f;
 552        struct fib_alias *fa, *fa_to_delete;
 553        struct fn_zone *fz;
 554        __be32 key;
 555
 556        if (cfg->fc_dst_len > 32)
 557                return -EINVAL;
 558
 559        if ((fz  = table->fn_zones[cfg->fc_dst_len]) == NULL)
 560                return -ESRCH;
 561
 562        key = 0;
 563        if (cfg->fc_dst) {
 564                if (cfg->fc_dst & ~FZ_MASK(fz))
 565                        return -EINVAL;
 566                key = fz_key(cfg->fc_dst, fz);
 567        }
 568
 569        f = fib_find_node(fz, key);
 570
 571        if (!f)
 572                fa = NULL;
 573        else
 574                fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0);
 575        if (!fa)
 576                return -ESRCH;
 577
 578        fa_to_delete = NULL;
 579        fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
 580        list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
 581                struct fib_info *fi = fa->fa_info;
 582
 583                if (fa->fa_tos != cfg->fc_tos)
 584                        break;
 585
 586                if ((!cfg->fc_type ||
 587                     fa->fa_type == cfg->fc_type) &&
 588                    (cfg->fc_scope == RT_SCOPE_NOWHERE ||
 589                     fa->fa_scope == cfg->fc_scope) &&
 590                    (!cfg->fc_protocol ||
 591                     fi->fib_protocol == cfg->fc_protocol) &&
 592                    fib_nh_match(cfg, fi) == 0) {
 593                        fa_to_delete = fa;
 594                        break;
 595                }
 596        }
 597
 598        if (fa_to_delete) {
 599                int kill_fn;
 600
 601                fa = fa_to_delete;
 602                rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len,
 603                          tb->tb_id, &cfg->fc_nlinfo, 0);
 604
 605                kill_fn = 0;
 606                write_lock_bh(&fib_hash_lock);
 607                list_del(&fa->fa_list);
 608                if (list_empty(&f->fn_alias)) {
 609                        hlist_del(&f->fn_hash);
 610                        kill_fn = 1;
 611                }
 612                fib_hash_genid++;
 613                write_unlock_bh(&fib_hash_lock);
 614
 615                if (fa->fa_state & FA_S_ACCESSED)
 616                        rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
 617                fn_free_alias(fa, f);
 618                if (kill_fn) {
 619                        fn_free_node(f);
 620                        fz->fz_nent--;
 621                }
 622
 623                return 0;
 624        }
 625        return -ESRCH;
 626}
 627
 628static int fn_flush_list(struct fn_zone *fz, int idx)
 629{
 630        struct hlist_head *head = &fz->fz_hash[idx];
 631        struct hlist_node *node, *n;
 632        struct fib_node *f;
 633        int found = 0;
 634
 635        hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
 636                struct fib_alias *fa, *fa_node;
 637                int kill_f;
 638
 639                kill_f = 0;
 640                list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
 641                        struct fib_info *fi = fa->fa_info;
 642
 643                        if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
 644                                write_lock_bh(&fib_hash_lock);
 645                                list_del(&fa->fa_list);
 646                                if (list_empty(&f->fn_alias)) {
 647                                        hlist_del(&f->fn_hash);
 648                                        kill_f = 1;
 649                                }
 650                                fib_hash_genid++;
 651                                write_unlock_bh(&fib_hash_lock);
 652
 653                                fn_free_alias(fa, f);
 654                                found++;
 655                        }
 656                }
 657                if (kill_f) {
 658                        fn_free_node(f);
 659                        fz->fz_nent--;
 660                }
 661        }
 662        return found;
 663}
 664
 665static int fn_hash_flush(struct fib_table *tb)
 666{
 667        struct fn_hash *table = (struct fn_hash *) tb->tb_data;
 668        struct fn_zone *fz;
 669        int found = 0;
 670
 671        for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
 672                int i;
 673
 674                for (i = fz->fz_divisor - 1; i >= 0; i--)
 675                        found += fn_flush_list(fz, i);
 676        }
 677        return found;
 678}
 679
 680
 681static inline int
 682fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
 683                     struct fib_table *tb,
 684                     struct fn_zone *fz,
 685                     struct hlist_head *head)
 686{
 687        struct hlist_node *node;
 688        struct fib_node *f;
 689        int i, s_i;
 690
 691        s_i = cb->args[4];
 692        i = 0;
 693        hlist_for_each_entry(f, node, head, fn_hash) {
 694                struct fib_alias *fa;
 695
 696                list_for_each_entry(fa, &f->fn_alias, fa_list) {
 697                        if (i < s_i)
 698                                goto next;
 699
 700                        if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
 701                                          cb->nlh->nlmsg_seq,
 702                                          RTM_NEWROUTE,
 703                                          tb->tb_id,
 704                                          fa->fa_type,
 705                                          fa->fa_scope,
 706                                          f->fn_key,
 707                                          fz->fz_order,
 708                                          fa->fa_tos,
 709                                          fa->fa_info,
 710                                          NLM_F_MULTI) < 0) {
 711                                cb->args[4] = i;
 712                                return -1;
 713                        }
 714                next:
 715                        i++;
 716                }
 717        }
 718        cb->args[4] = i;
 719        return skb->len;
 720}
 721
 722static inline int
 723fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
 724                   struct fib_table *tb,
 725                   struct fn_zone *fz)
 726{
 727        int h, s_h;
 728
 729        if (fz->fz_hash == NULL)
 730                return skb->len;
 731        s_h = cb->args[3];
 732        for (h = s_h; h < fz->fz_divisor; h++) {
 733                if (hlist_empty(&fz->fz_hash[h]))
 734                        continue;
 735                if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h]) < 0) {
 736                        cb->args[3] = h;
 737                        return -1;
 738                }
 739                memset(&cb->args[4], 0,
 740                       sizeof(cb->args) - 4*sizeof(cb->args[0]));
 741        }
 742        cb->args[3] = h;
 743        return skb->len;
 744}
 745
 746static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
 747{
 748        int m, s_m;
 749        struct fn_zone *fz;
 750        struct fn_hash *table = (struct fn_hash *)tb->tb_data;
 751
 752        s_m = cb->args[2];
 753        read_lock(&fib_hash_lock);
 754        for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
 755                if (m < s_m) continue;
 756                if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
 757                        cb->args[2] = m;
 758                        read_unlock(&fib_hash_lock);
 759                        return -1;
 760                }
 761                memset(&cb->args[3], 0,
 762                       sizeof(cb->args) - 3*sizeof(cb->args[0]));
 763        }
 764        read_unlock(&fib_hash_lock);
 765        cb->args[2] = m;
 766        return skb->len;
 767}
 768
 769void __init fib_hash_init(void)
 770{
 771        fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node),
 772                                         0, SLAB_PANIC, NULL);
 773
 774        fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias),
 775                                          0, SLAB_PANIC, NULL);
 776
 777}
 778
 779struct fib_table *fib_hash_table(u32 id)
 780{
 781        struct fib_table *tb;
 782
 783        tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
 784                     GFP_KERNEL);
 785        if (tb == NULL)
 786                return NULL;
 787
 788        tb->tb_id = id;
 789        tb->tb_default = -1;
 790        tb->tb_lookup = fn_hash_lookup;
 791        tb->tb_insert = fn_hash_insert;
 792        tb->tb_delete = fn_hash_delete;
 793        tb->tb_flush = fn_hash_flush;
 794        tb->tb_select_default = fn_hash_select_default;
 795        tb->tb_dump = fn_hash_dump;
 796        memset(tb->tb_data, 0, sizeof(struct fn_hash));
 797        return tb;
 798}
 799
 800/* ------------------------------------------------------------------------ */
 801#ifdef CONFIG_PROC_FS
 802
 803struct fib_iter_state {
 804        struct seq_net_private p;
 805        struct fn_zone  *zone;
 806        int             bucket;
 807        struct hlist_head *hash_head;
 808        struct fib_node *fn;
 809        struct fib_alias *fa;
 810        loff_t pos;
 811        unsigned int genid;
 812        int valid;
 813};
 814
 815static struct fib_alias *fib_get_first(struct seq_file *seq)
 816{
 817        struct fib_iter_state *iter = seq->private;
 818        struct fib_table *main_table;
 819        struct fn_hash *table;
 820
 821        main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
 822        table = (struct fn_hash *)main_table->tb_data;
 823
 824        iter->bucket    = 0;
 825        iter->hash_head = NULL;
 826        iter->fn        = NULL;
 827        iter->fa        = NULL;
 828        iter->pos       = 0;
 829        iter->genid     = fib_hash_genid;
 830        iter->valid     = 1;
 831
 832        for (iter->zone = table->fn_zone_list; iter->zone;
 833             iter->zone = iter->zone->fz_next) {
 834                int maxslot;
 835
 836                if (!iter->zone->fz_nent)
 837                        continue;
 838
 839                iter->hash_head = iter->zone->fz_hash;
 840                maxslot = iter->zone->fz_divisor;
 841
 842                for (iter->bucket = 0; iter->bucket < maxslot;
 843                     ++iter->bucket, ++iter->hash_head) {
 844                        struct hlist_node *node;
 845                        struct fib_node *fn;
 846
 847                        hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
 848                                struct fib_alias *fa;
 849
 850                                list_for_each_entry(fa, &fn->fn_alias, fa_list) {
 851                                        iter->fn = fn;
 852                                        iter->fa = fa;
 853                                        goto out;
 854                                }
 855                        }
 856                }
 857        }
 858out:
 859        return iter->fa;
 860}
 861
 862static struct fib_alias *fib_get_next(struct seq_file *seq)
 863{
 864        struct fib_iter_state *iter = seq->private;
 865        struct fib_node *fn;
 866        struct fib_alias *fa;
 867
 868        /* Advance FA, if any. */
 869        fn = iter->fn;
 870        fa = iter->fa;
 871        if (fa) {
 872                BUG_ON(!fn);
 873                list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
 874                        iter->fa = fa;
 875                        goto out;
 876                }
 877        }
 878
 879        fa = iter->fa = NULL;
 880
 881        /* Advance FN. */
 882        if (fn) {
 883                struct hlist_node *node = &fn->fn_hash;
 884                hlist_for_each_entry_continue(fn, node, fn_hash) {
 885                        iter->fn = fn;
 886
 887                        list_for_each_entry(fa, &fn->fn_alias, fa_list) {
 888                                iter->fa = fa;
 889                                goto out;
 890                        }
 891                }
 892        }
 893
 894        fn = iter->fn = NULL;
 895
 896        /* Advance hash chain. */
 897        if (!iter->zone)
 898                goto out;
 899
 900        for (;;) {
 901                struct hlist_node *node;
 902                int maxslot;
 903
 904                maxslot = iter->zone->fz_divisor;
 905
 906                while (++iter->bucket < maxslot) {
 907                        iter->hash_head++;
 908
 909                        hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
 910                                list_for_each_entry(fa, &fn->fn_alias, fa_list) {
 911                                        iter->fn = fn;
 912                                        iter->fa = fa;
 913                                        goto out;
 914                                }
 915                        }
 916                }
 917
 918                iter->zone = iter->zone->fz_next;
 919
 920                if (!iter->zone)
 921                        goto out;
 922
 923                iter->bucket = 0;
 924                iter->hash_head = iter->zone->fz_hash;
 925
 926                hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
 927                        list_for_each_entry(fa, &fn->fn_alias, fa_list) {
 928                                iter->fn = fn;
 929                                iter->fa = fa;
 930                                goto out;
 931                        }
 932                }
 933        }
 934out:
 935        iter->pos++;
 936        return fa;
 937}
 938
 939static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
 940{
 941        struct fib_iter_state *iter = seq->private;
 942        struct fib_alias *fa;
 943
 944        if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) {
 945                fa   = iter->fa;
 946                pos -= iter->pos;
 947        } else
 948                fa = fib_get_first(seq);
 949
 950        if (fa)
 951                while (pos && (fa = fib_get_next(seq)))
 952                        --pos;
 953        return pos ? NULL : fa;
 954}
 955
 956static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
 957        __acquires(fib_hash_lock)
 958{
 959        void *v = NULL;
 960
 961        read_lock(&fib_hash_lock);
 962        if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
 963                v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 964        return v;
 965}
 966
 967static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 968{
 969        ++*pos;
 970        return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
 971}
 972
 973static void fib_seq_stop(struct seq_file *seq, void *v)
 974        __releases(fib_hash_lock)
 975{
 976        read_unlock(&fib_hash_lock);
 977}
 978
 979static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
 980{
 981        static const unsigned type2flags[RTN_MAX + 1] = {
 982                [7] = RTF_REJECT, [8] = RTF_REJECT,
 983        };
 984        unsigned flags = type2flags[type];
 985
 986        if (fi && fi->fib_nh->nh_gw)
 987                flags |= RTF_GATEWAY;
 988        if (mask == htonl(0xFFFFFFFF))
 989                flags |= RTF_HOST;
 990        flags |= RTF_UP;
 991        return flags;
 992}
 993
 994/*
 995 *      This outputs /proc/net/route.
 996 *
 997 *      It always works in backward compatibility mode.
 998 *      The format of the file is not supposed to be changed.
 999 */
1000static int fib_seq_show(struct seq_file *seq, void *v)
1001{
1002        struct fib_iter_state *iter;
1003        int len;
1004        __be32 prefix, mask;
1005        unsigned flags;
1006        struct fib_node *f;
1007        struct fib_alias *fa;
1008        struct fib_info *fi;
1009
1010        if (v == SEQ_START_TOKEN) {
1011                seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1012                           "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1013                           "\tWindow\tIRTT");
1014                goto out;
1015        }
1016
1017        iter    = seq->private;
1018        f       = iter->fn;
1019        fa      = iter->fa;
1020        fi      = fa->fa_info;
1021        prefix  = f->fn_key;
1022        mask    = FZ_MASK(iter->zone);
1023        flags   = fib_flag_trans(fa->fa_type, mask, fi);
1024        if (fi)
1025                seq_printf(seq,
1026                         "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1027                         fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1028                         fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1029                         mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1030                         fi->fib_window,
1031                         fi->fib_rtt >> 3, &len);
1032        else
1033                seq_printf(seq,
1034                         "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n",
1035                         prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len);
1036
1037        seq_printf(seq, "%*s\n", 127 - len, "");
1038out:
1039        return 0;
1040}
1041
1042static const struct seq_operations fib_seq_ops = {
1043        .start  = fib_seq_start,
1044        .next   = fib_seq_next,
1045        .stop   = fib_seq_stop,
1046        .show   = fib_seq_show,
1047};
1048
1049static int fib_seq_open(struct inode *inode, struct file *file)
1050{
1051        return seq_open_net(inode, file, &fib_seq_ops,
1052                            sizeof(struct fib_iter_state));
1053}
1054
1055static const struct file_operations fib_seq_fops = {
1056        .owner          = THIS_MODULE,
1057        .open           = fib_seq_open,
1058        .read           = seq_read,
1059        .llseek         = seq_lseek,
1060        .release        = seq_release_net,
1061};
1062
1063int __net_init fib_proc_init(struct net *net)
1064{
1065        if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
1066                return -ENOMEM;
1067        return 0;
1068}
1069
1070void __net_exit fib_proc_exit(struct net *net)
1071{
1072        proc_net_remove(net, "route");
1073}
1074#endif /* CONFIG_PROC_FS */
1075