linux/include/linux/rhashtable.h
<<
>>
Prefs
   1/*
   2 * Resizable, Scalable, Concurrent Hash Table
   3 *
   4 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
   5 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
   6 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
   7 *
   8 * Code partially derived from nft_hash
   9 * Rewritten with rehash code from br_multicast plus single list
  10 * pointer as suggested by Josh Triplett
  11 *
  12 * This program is free software; you can redistribute it and/or modify
  13 * it under the terms of the GNU General Public License version 2 as
  14 * published by the Free Software Foundation.
  15 */
  16
  17#ifndef _LINUX_RHASHTABLE_H
  18#define _LINUX_RHASHTABLE_H
  19
  20#include <linux/atomic.h>
  21#include <linux/compiler.h>
  22#include <linux/err.h>
  23#include <linux/errno.h>
  24#include <linux/jhash.h>
  25#include <linux/list_nulls.h>
  26#include <linux/workqueue.h>
  27#include <linux/mutex.h>
  28#include <linux/rcupdate.h>
  29
  30/*
  31 * The end of the chain is marked with a special nulls marks which has
  32 * the following format:
  33 *
  34 * +-------+-----------------------------------------------------+-+
  35 * | Base  |                      Hash                           |1|
  36 * +-------+-----------------------------------------------------+-+
  37 *
  38 * Base (4 bits) : Reserved to distinguish between multiple tables.
  39 *                 Specified via &struct rhashtable_params.nulls_base.
  40 * Hash (27 bits): Full hash (unmasked) of first element added to bucket
  41 * 1 (1 bit)     : Nulls marker (always set)
  42 *
  43 * The remaining bits of the next pointer remain unused for now.
  44 */
  45#define RHT_BASE_BITS           4
  46#define RHT_HASH_BITS           27
  47#define RHT_BASE_SHIFT          RHT_HASH_BITS
  48
  49/* Base bits plus 1 bit for nulls marker */
  50#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
  51
  52struct rhash_head {
  53        struct rhash_head __rcu         *next;
  54};
  55
  56/**
  57 * struct bucket_table - Table of hash buckets
  58 * @size: Number of hash buckets
  59 * @rehash: Current bucket being rehashed
  60 * @hash_rnd: Random seed to fold into hash
  61 * @locks_mask: Mask to apply before accessing locks[]
  62 * @locks: Array of spinlocks protecting individual buckets
  63 * @walkers: List of active walkers
  64 * @rcu: RCU structure for freeing the table
  65 * @future_tbl: Table under construction during rehashing
  66 * @buckets: size * hash buckets
  67 */
  68struct bucket_table {
  69        unsigned int            size;
  70        unsigned int            rehash;
  71        u32                     hash_rnd;
  72        unsigned int            locks_mask;
  73        spinlock_t              *locks;
  74        struct list_head        walkers;
  75        struct rcu_head         rcu;
  76
  77        struct bucket_table __rcu *future_tbl;
  78
  79        struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
  80};
  81
  82/**
  83 * struct rhashtable_compare_arg - Key for the function rhashtable_compare
  84 * @ht: Hash table
  85 * @key: Key to compare against
  86 */
  87struct rhashtable_compare_arg {
  88        struct rhashtable *ht;
  89        const void *key;
  90};
  91
  92typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
  93typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
  94typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
  95                               const void *obj);
  96
  97struct rhashtable;
  98
  99/**
 100 * struct rhashtable_params - Hash table construction parameters
 101 * @nelem_hint: Hint on number of elements, should be 75% of desired size
 102 * @key_len: Length of key
 103 * @key_offset: Offset of key in struct to be hashed
 104 * @head_offset: Offset of rhash_head in struct to be hashed
 105 * @insecure_max_entries: Maximum number of entries (may be exceeded)
 106 * @max_size: Maximum size while expanding
 107 * @min_size: Minimum size while shrinking
 108 * @nulls_base: Base value to generate nulls marker
 109 * @insecure_elasticity: Set to true to disable chain length checks
 110 * @automatic_shrinking: Enable automatic shrinking of tables
 111 * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
 112 * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
 113 * @obj_hashfn: Function to hash object
 114 * @obj_cmpfn: Function to compare key with object
 115 */
 116struct rhashtable_params {
 117        size_t                  nelem_hint;
 118        size_t                  key_len;
 119        size_t                  key_offset;
 120        size_t                  head_offset;
 121        unsigned int            insecure_max_entries;
 122        unsigned int            max_size;
 123        unsigned int            min_size;
 124        u32                     nulls_base;
 125        bool                    insecure_elasticity;
 126        bool                    automatic_shrinking;
 127        size_t                  locks_mul;
 128        rht_hashfn_t            hashfn;
 129        rht_obj_hashfn_t        obj_hashfn;
 130        rht_obj_cmpfn_t         obj_cmpfn;
 131};
 132
 133/**
 134 * struct rhashtable - Hash table handle
 135 * @tbl: Bucket table
 136 * @nelems: Number of elements in table
 137 * @key_len: Key length for hashfn
 138 * @elasticity: Maximum chain length before rehash
 139 * @p: Configuration parameters
 140 * @run_work: Deferred worker to expand/shrink asynchronously
 141 * @mutex: Mutex to protect current/future table swapping
 142 * @lock: Spin lock to protect walker list
 143 */
 144struct rhashtable {
 145        struct bucket_table __rcu       *tbl;
 146        atomic_t                        nelems;
 147        unsigned int                    key_len;
 148        unsigned int                    elasticity;
 149        struct rhashtable_params        p;
 150        struct work_struct              run_work;
 151        struct mutex                    mutex;
 152        spinlock_t                      lock;
 153};
 154
 155/**
 156 * struct rhashtable_walker - Hash table walker
 157 * @list: List entry on list of walkers
 158 * @tbl: The table that we were walking over
 159 */
 160struct rhashtable_walker {
 161        struct list_head list;
 162        struct bucket_table *tbl;
 163};
 164
 165/**
 166 * struct rhashtable_iter - Hash table iterator, fits into netlink cb
 167 * @ht: Table to iterate through
 168 * @p: Current pointer
 169 * @walker: Associated rhashtable walker
 170 * @slot: Current slot
 171 * @skip: Number of entries to skip in slot
 172 */
 173struct rhashtable_iter {
 174        struct rhashtable *ht;
 175        struct rhash_head *p;
 176        struct rhashtable_walker *walker;
 177        unsigned int slot;
 178        unsigned int skip;
 179};
 180
 181static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
 182{
 183        return NULLS_MARKER(ht->p.nulls_base + hash);
 184}
 185
 186#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
 187        ((ptr) = (typeof(ptr)) rht_marker(ht, hash))
 188
 189static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 190{
 191        return ((unsigned long) ptr & 1);
 192}
 193
 194static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
 195{
 196        return ((unsigned long) ptr) >> 1;
 197}
 198
 199static inline void *rht_obj(const struct rhashtable *ht,
 200                            const struct rhash_head *he)
 201{
 202        return (char *)he - ht->p.head_offset;
 203}
 204
 205static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
 206                                            unsigned int hash)
 207{
 208        return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
 209}
 210
 211static inline unsigned int rht_key_hashfn(
 212        struct rhashtable *ht, const struct bucket_table *tbl,
 213        const void *key, const struct rhashtable_params params)
 214{
 215        unsigned int hash;
 216
 217        /* params must be equal to ht->p if it isn't constant. */
 218        if (!__builtin_constant_p(params.key_len))
 219                hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
 220        else if (params.key_len) {
 221                unsigned int key_len = params.key_len;
 222
 223                if (params.hashfn)
 224                        hash = params.hashfn(key, key_len, tbl->hash_rnd);
 225                else if (key_len & (sizeof(u32) - 1))
 226                        hash = jhash(key, key_len, tbl->hash_rnd);
 227                else
 228                        hash = jhash2(key, key_len / sizeof(u32),
 229                                      tbl->hash_rnd);
 230        } else {
 231                unsigned int key_len = ht->p.key_len;
 232
 233                if (params.hashfn)
 234                        hash = params.hashfn(key, key_len, tbl->hash_rnd);
 235                else
 236                        hash = jhash(key, key_len, tbl->hash_rnd);
 237        }
 238
 239        return rht_bucket_index(tbl, hash);
 240}
 241
 242static inline unsigned int rht_head_hashfn(
 243        struct rhashtable *ht, const struct bucket_table *tbl,
 244        const struct rhash_head *he, const struct rhashtable_params params)
 245{
 246        const char *ptr = rht_obj(ht, he);
 247
 248        return likely(params.obj_hashfn) ?
 249               rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
 250                                                            ht->p.key_len,
 251                                                       tbl->hash_rnd)) :
 252               rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
 253}
 254
 255/**
 256 * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
 257 * @ht:         hash table
 258 * @tbl:        current table
 259 */
 260static inline bool rht_grow_above_75(const struct rhashtable *ht,
 261                                     const struct bucket_table *tbl)
 262{
 263        /* Expand table when exceeding 75% load */
 264        return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
 265               (!ht->p.max_size || tbl->size < ht->p.max_size);
 266}
 267
 268/**
 269 * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
 270 * @ht:         hash table
 271 * @tbl:        current table
 272 */
 273static inline bool rht_shrink_below_30(const struct rhashtable *ht,
 274                                       const struct bucket_table *tbl)
 275{
 276        /* Shrink table beneath 30% load */
 277        return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
 278               tbl->size > ht->p.min_size;
 279}
 280
 281/**
 282 * rht_grow_above_100 - returns true if nelems > table-size
 283 * @ht:         hash table
 284 * @tbl:        current table
 285 */
 286static inline bool rht_grow_above_100(const struct rhashtable *ht,
 287                                      const struct bucket_table *tbl)
 288{
 289        return atomic_read(&ht->nelems) > tbl->size &&
 290                (!ht->p.max_size || tbl->size < ht->p.max_size);
 291}
 292
 293/**
 294 * rht_grow_above_max - returns true if table is above maximum
 295 * @ht:         hash table
 296 * @tbl:        current table
 297 */
 298static inline bool rht_grow_above_max(const struct rhashtable *ht,
 299                                      const struct bucket_table *tbl)
 300{
 301        return ht->p.insecure_max_entries &&
 302               atomic_read(&ht->nelems) >= ht->p.insecure_max_entries;
 303}
 304
 305/* The bucket lock is selected based on the hash and protects mutations
 306 * on a group of hash buckets.
 307 *
 308 * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
 309 * a single lock always covers both buckets which may both contains
 310 * entries which link to the same bucket of the old table during resizing.
 311 * This allows to simplify the locking as locking the bucket in both
 312 * tables during resize always guarantee protection.
 313 *
 314 * IMPORTANT: When holding the bucket lock of both the old and new table
 315 * during expansions and shrinking, the old bucket lock must always be
 316 * acquired first.
 317 */
 318static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
 319                                          unsigned int hash)
 320{
 321        return &tbl->locks[hash & tbl->locks_mask];
 322}
 323
 324#ifdef CONFIG_PROVE_LOCKING
 325int lockdep_rht_mutex_is_held(struct rhashtable *ht);
 326int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
 327#else
 328static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 329{
 330        return 1;
 331}
 332
 333static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 334                                             u32 hash)
 335{
 336        return 1;
 337}
 338#endif /* CONFIG_PROVE_LOCKING */
 339
 340int rhashtable_init(struct rhashtable *ht,
 341                    const struct rhashtable_params *params);
 342
 343struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
 344                                            const void *key,
 345                                            struct rhash_head *obj,
 346                                            struct bucket_table *old_tbl);
 347int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
 348
 349int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
 350                         gfp_t gfp);
 351void rhashtable_walk_exit(struct rhashtable_iter *iter);
 352int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
 353void *rhashtable_walk_next(struct rhashtable_iter *iter);
 354void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
 355
 356void rhashtable_free_and_destroy(struct rhashtable *ht,
 357                                 void (*free_fn)(void *ptr, void *arg),
 358                                 void *arg);
 359void rhashtable_destroy(struct rhashtable *ht);
 360
 361#define rht_dereference(p, ht) \
 362        rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 363
 364#define rht_dereference_rcu(p, ht) \
 365        rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
 366
 367#define rht_dereference_bucket(p, tbl, hash) \
 368        rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
 369
 370#define rht_dereference_bucket_rcu(p, tbl, hash) \
 371        rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
 372
 373#define rht_entry(tpos, pos, member) \
 374        ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 375
 376/**
 377 * rht_for_each_continue - continue iterating over hash chain
 378 * @pos:        the &struct rhash_head to use as a loop cursor.
 379 * @head:       the previous &struct rhash_head to continue from
 380 * @tbl:        the &struct bucket_table
 381 * @hash:       the hash value / bucket index
 382 */
 383#define rht_for_each_continue(pos, head, tbl, hash) \
 384        for (pos = rht_dereference_bucket(head, tbl, hash); \
 385             !rht_is_a_nulls(pos); \
 386             pos = rht_dereference_bucket((pos)->next, tbl, hash))
 387
 388/**
 389 * rht_for_each - iterate over hash chain
 390 * @pos:        the &struct rhash_head to use as a loop cursor.
 391 * @tbl:        the &struct bucket_table
 392 * @hash:       the hash value / bucket index
 393 */
 394#define rht_for_each(pos, tbl, hash) \
 395        rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
 396
 397/**
 398 * rht_for_each_entry_continue - continue iterating over hash chain
 399 * @tpos:       the type * to use as a loop cursor.
 400 * @pos:        the &struct rhash_head to use as a loop cursor.
 401 * @head:       the previous &struct rhash_head to continue from
 402 * @tbl:        the &struct bucket_table
 403 * @hash:       the hash value / bucket index
 404 * @member:     name of the &struct rhash_head within the hashable struct.
 405 */
 406#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \
 407        for (pos = rht_dereference_bucket(head, tbl, hash);             \
 408             (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);    \
 409             pos = rht_dereference_bucket((pos)->next, tbl, hash))
 410
 411/**
 412 * rht_for_each_entry - iterate over hash chain of given type
 413 * @tpos:       the type * to use as a loop cursor.
 414 * @pos:        the &struct rhash_head to use as a loop cursor.
 415 * @tbl:        the &struct bucket_table
 416 * @hash:       the hash value / bucket index
 417 * @member:     name of the &struct rhash_head within the hashable struct.
 418 */
 419#define rht_for_each_entry(tpos, pos, tbl, hash, member)                \
 420        rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],    \
 421                                    tbl, hash, member)
 422
 423/**
 424 * rht_for_each_entry_safe - safely iterate over hash chain of given type
 425 * @tpos:       the type * to use as a loop cursor.
 426 * @pos:        the &struct rhash_head to use as a loop cursor.
 427 * @next:       the &struct rhash_head to use as next in loop cursor.
 428 * @tbl:        the &struct bucket_table
 429 * @hash:       the hash value / bucket index
 430 * @member:     name of the &struct rhash_head within the hashable struct.
 431 *
 432 * This hash chain list-traversal primitive allows for the looped code to
 433 * remove the loop cursor from the list.
 434 */
 435#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)         \
 436        for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
 437             next = !rht_is_a_nulls(pos) ?                                  \
 438                       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
 439             (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);        \
 440             pos = next,                                                    \
 441             next = !rht_is_a_nulls(pos) ?                                  \
 442                       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 443
 444/**
 445 * rht_for_each_rcu_continue - continue iterating over rcu hash chain
 446 * @pos:        the &struct rhash_head to use as a loop cursor.
 447 * @head:       the previous &struct rhash_head to continue from
 448 * @tbl:        the &struct bucket_table
 449 * @hash:       the hash value / bucket index
 450 *
 451 * This hash chain list-traversal primitive may safely run concurrently with
 452 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 453 * traversal is guarded by rcu_read_lock().
 454 */
 455#define rht_for_each_rcu_continue(pos, head, tbl, hash)                 \
 456        for (({barrier(); }),                                           \
 457             pos = rht_dereference_bucket_rcu(head, tbl, hash);         \
 458             !rht_is_a_nulls(pos);                                      \
 459             pos = rcu_dereference_raw(pos->next))
 460
 461/**
 462 * rht_for_each_rcu - iterate over rcu hash chain
 463 * @pos:        the &struct rhash_head to use as a loop cursor.
 464 * @tbl:        the &struct bucket_table
 465 * @hash:       the hash value / bucket index
 466 *
 467 * This hash chain list-traversal primitive may safely run concurrently with
 468 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 469 * traversal is guarded by rcu_read_lock().
 470 */
 471#define rht_for_each_rcu(pos, tbl, hash)                                \
 472        rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
 473
 474/**
 475 * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
 476 * @tpos:       the type * to use as a loop cursor.
 477 * @pos:        the &struct rhash_head to use as a loop cursor.
 478 * @head:       the previous &struct rhash_head to continue from
 479 * @tbl:        the &struct bucket_table
 480 * @hash:       the hash value / bucket index
 481 * @member:     name of the &struct rhash_head within the hashable struct.
 482 *
 483 * This hash chain list-traversal primitive may safely run concurrently with
 484 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 485 * traversal is guarded by rcu_read_lock().
 486 */
 487#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
 488        for (({barrier(); }),                                               \
 489             pos = rht_dereference_bucket_rcu(head, tbl, hash);             \
 490             (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);        \
 491             pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
 492
 493/**
 494 * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
 495 * @tpos:       the type * to use as a loop cursor.
 496 * @pos:        the &struct rhash_head to use as a loop cursor.
 497 * @tbl:        the &struct bucket_table
 498 * @hash:       the hash value / bucket index
 499 * @member:     name of the &struct rhash_head within the hashable struct.
 500 *
 501 * This hash chain list-traversal primitive may safely run concurrently with
 502 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 503 * traversal is guarded by rcu_read_lock().
 504 */
 505#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)            \
 506        rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
 507                                        tbl, hash, member)
 508
 509static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
 510                                     const void *obj)
 511{
 512        struct rhashtable *ht = arg->ht;
 513        const char *ptr = obj;
 514
 515        return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
 516}
 517
 518/**
 519 * rhashtable_lookup_fast - search hash table, inlined version
 520 * @ht:         hash table
 521 * @key:        the pointer to the key
 522 * @params:     hash table parameters
 523 *
 524 * Computes the hash value for the key and traverses the bucket chain looking
 525 * for a entry with an identical key. The first matching entry is returned.
 526 *
 527 * Returns the first entry on which the compare function returned true.
 528 */
 529static inline void *rhashtable_lookup_fast(
 530        struct rhashtable *ht, const void *key,
 531        const struct rhashtable_params params)
 532{
 533        struct rhashtable_compare_arg arg = {
 534                .ht = ht,
 535                .key = key,
 536        };
 537        const struct bucket_table *tbl;
 538        struct rhash_head *he;
 539        unsigned int hash;
 540
 541        rcu_read_lock();
 542
 543        tbl = rht_dereference_rcu(ht->tbl, ht);
 544restart:
 545        hash = rht_key_hashfn(ht, tbl, key, params);
 546        rht_for_each_rcu(he, tbl, hash) {
 547                if (params.obj_cmpfn ?
 548                    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
 549                    rhashtable_compare(&arg, rht_obj(ht, he)))
 550                        continue;
 551                rcu_read_unlock();
 552                return rht_obj(ht, he);
 553        }
 554
 555        /* Ensure we see any new tables. */
 556        smp_rmb();
 557
 558        tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 559        if (unlikely(tbl))
 560                goto restart;
 561        rcu_read_unlock();
 562
 563        return NULL;
 564}
 565
 566/* Internal function, please use rhashtable_insert_fast() instead */
 567static inline int __rhashtable_insert_fast(
 568        struct rhashtable *ht, const void *key, struct rhash_head *obj,
 569        const struct rhashtable_params params)
 570{
 571        struct rhashtable_compare_arg arg = {
 572                .ht = ht,
 573                .key = key,
 574        };
 575        struct bucket_table *tbl, *new_tbl;
 576        struct rhash_head *head;
 577        spinlock_t *lock;
 578        unsigned int elasticity;
 579        unsigned int hash;
 580        int err;
 581
 582restart:
 583        rcu_read_lock();
 584
 585        tbl = rht_dereference_rcu(ht->tbl, ht);
 586
 587        /* All insertions must grab the oldest table containing
 588         * the hashed bucket that is yet to be rehashed.
 589         */
 590        for (;;) {
 591                hash = rht_head_hashfn(ht, tbl, obj, params);
 592                lock = rht_bucket_lock(tbl, hash);
 593                spin_lock_bh(lock);
 594
 595                if (tbl->rehash <= hash)
 596                        break;
 597
 598                spin_unlock_bh(lock);
 599                tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 600        }
 601
 602        new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 603        if (unlikely(new_tbl)) {
 604                tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
 605                if (!IS_ERR_OR_NULL(tbl))
 606                        goto slow_path;
 607
 608                err = PTR_ERR(tbl);
 609                goto out;
 610        }
 611
 612        err = -E2BIG;
 613        if (unlikely(rht_grow_above_max(ht, tbl)))
 614                goto out;
 615
 616        if (unlikely(rht_grow_above_100(ht, tbl))) {
 617slow_path:
 618                spin_unlock_bh(lock);
 619                err = rhashtable_insert_rehash(ht, tbl);
 620                rcu_read_unlock();
 621                if (err)
 622                        return err;
 623
 624                goto restart;
 625        }
 626
 627        err = -EEXIST;
 628        elasticity = ht->elasticity;
 629        rht_for_each(head, tbl, hash) {
 630                if (key &&
 631                    unlikely(!(params.obj_cmpfn ?
 632                               params.obj_cmpfn(&arg, rht_obj(ht, head)) :
 633                               rhashtable_compare(&arg, rht_obj(ht, head)))))
 634                        goto out;
 635                if (!--elasticity)
 636                        goto slow_path;
 637        }
 638
 639        err = 0;
 640
 641        head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 642
 643        RCU_INIT_POINTER(obj->next, head);
 644
 645        rcu_assign_pointer(tbl->buckets[hash], obj);
 646
 647        atomic_inc(&ht->nelems);
 648        if (rht_grow_above_75(ht, tbl))
 649                schedule_work(&ht->run_work);
 650
 651out:
 652        spin_unlock_bh(lock);
 653        rcu_read_unlock();
 654
 655        return err;
 656}
 657
 658/**
 659 * rhashtable_insert_fast - insert object into hash table
 660 * @ht:         hash table
 661 * @obj:        pointer to hash head inside object
 662 * @params:     hash table parameters
 663 *
 664 * Will take a per bucket spinlock to protect against mutual mutations
 665 * on the same bucket. Multiple insertions may occur in parallel unless
 666 * they map to the same bucket lock.
 667 *
 668 * It is safe to call this function from atomic context.
 669 *
 670 * Will trigger an automatic deferred table resizing if the size grows
 671 * beyond the watermark indicated by grow_decision() which can be passed
 672 * to rhashtable_init().
 673 */
 674static inline int rhashtable_insert_fast(
 675        struct rhashtable *ht, struct rhash_head *obj,
 676        const struct rhashtable_params params)
 677{
 678        return __rhashtable_insert_fast(ht, NULL, obj, params);
 679}
 680
 681/**
 682 * rhashtable_lookup_insert_fast - lookup and insert object into hash table
 683 * @ht:         hash table
 684 * @obj:        pointer to hash head inside object
 685 * @params:     hash table parameters
 686 *
 687 * Locks down the bucket chain in both the old and new table if a resize
 688 * is in progress to ensure that writers can't remove from the old table
 689 * and can't insert to the new table during the atomic operation of search
 690 * and insertion. Searches for duplicates in both the old and new table if
 691 * a resize is in progress.
 692 *
 693 * This lookup function may only be used for fixed key hash table (key_len
 694 * parameter set). It will BUG() if used inappropriately.
 695 *
 696 * It is safe to call this function from atomic context.
 697 *
 698 * Will trigger an automatic deferred table resizing if the size grows
 699 * beyond the watermark indicated by grow_decision() which can be passed
 700 * to rhashtable_init().
 701 */
 702static inline int rhashtable_lookup_insert_fast(
 703        struct rhashtable *ht, struct rhash_head *obj,
 704        const struct rhashtable_params params)
 705{
 706        const char *key = rht_obj(ht, obj);
 707
 708        BUG_ON(ht->p.obj_hashfn);
 709
 710        return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
 711                                        params);
 712}
 713
 714/**
 715 * rhashtable_lookup_insert_key - search and insert object to hash table
 716 *                                with explicit key
 717 * @ht:         hash table
 718 * @key:        key
 719 * @obj:        pointer to hash head inside object
 720 * @params:     hash table parameters
 721 *
 722 * Locks down the bucket chain in both the old and new table if a resize
 723 * is in progress to ensure that writers can't remove from the old table
 724 * and can't insert to the new table during the atomic operation of search
 725 * and insertion. Searches for duplicates in both the old and new table if
 726 * a resize is in progress.
 727 *
 728 * Lookups may occur in parallel with hashtable mutations and resizing.
 729 *
 730 * Will trigger an automatic deferred table resizing if the size grows
 731 * beyond the watermark indicated by grow_decision() which can be passed
 732 * to rhashtable_init().
 733 *
 734 * Returns zero on success.
 735 */
 736static inline int rhashtable_lookup_insert_key(
 737        struct rhashtable *ht, const void *key, struct rhash_head *obj,
 738        const struct rhashtable_params params)
 739{
 740        BUG_ON(!ht->p.obj_hashfn || !key);
 741
 742        return __rhashtable_insert_fast(ht, key, obj, params);
 743}
 744
 745/* Internal function, please use rhashtable_remove_fast() instead */
 746static inline int __rhashtable_remove_fast(
 747        struct rhashtable *ht, struct bucket_table *tbl,
 748        struct rhash_head *obj, const struct rhashtable_params params)
 749{
 750        struct rhash_head __rcu **pprev;
 751        struct rhash_head *he;
 752        spinlock_t * lock;
 753        unsigned int hash;
 754        int err = -ENOENT;
 755
 756        hash = rht_head_hashfn(ht, tbl, obj, params);
 757        lock = rht_bucket_lock(tbl, hash);
 758
 759        spin_lock_bh(lock);
 760
 761        pprev = &tbl->buckets[hash];
 762        rht_for_each(he, tbl, hash) {
 763                if (he != obj) {
 764                        pprev = &he->next;
 765                        continue;
 766                }
 767
 768                rcu_assign_pointer(*pprev, obj->next);
 769                err = 0;
 770                break;
 771        }
 772
 773        spin_unlock_bh(lock);
 774
 775        return err;
 776}
 777
 778/**
 779 * rhashtable_remove_fast - remove object from hash table
 780 * @ht:         hash table
 781 * @obj:        pointer to hash head inside object
 782 * @params:     hash table parameters
 783 *
 784 * Since the hash chain is single linked, the removal operation needs to
 785 * walk the bucket chain upon removal. The removal operation is thus
 786 * considerable slow if the hash table is not correctly sized.
 787 *
 788 * Will automatically shrink the table via rhashtable_expand() if the
 789 * shrink_decision function specified at rhashtable_init() returns true.
 790 *
 791 * Returns zero on success, -ENOENT if the entry could not be found.
 792 */
 793static inline int rhashtable_remove_fast(
 794        struct rhashtable *ht, struct rhash_head *obj,
 795        const struct rhashtable_params params)
 796{
 797        struct bucket_table *tbl;
 798        int err;
 799
 800        rcu_read_lock();
 801
 802        tbl = rht_dereference_rcu(ht->tbl, ht);
 803
 804        /* Because we have already taken (and released) the bucket
 805         * lock in old_tbl, if we find that future_tbl is not yet
 806         * visible then that guarantees the entry to still be in
 807         * the old tbl if it exists.
 808         */
 809        while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) &&
 810               (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
 811                ;
 812
 813        if (err)
 814                goto out;
 815
 816        atomic_dec(&ht->nelems);
 817        if (unlikely(ht->p.automatic_shrinking &&
 818                     rht_shrink_below_30(ht, tbl)))
 819                schedule_work(&ht->run_work);
 820
 821out:
 822        rcu_read_unlock();
 823
 824        return err;
 825}
 826
 827/* Internal function, please use rhashtable_replace_fast() instead */
 828static inline int __rhashtable_replace_fast(
 829        struct rhashtable *ht, struct bucket_table *tbl,
 830        struct rhash_head *obj_old, struct rhash_head *obj_new,
 831        const struct rhashtable_params params)
 832{
 833        struct rhash_head __rcu **pprev;
 834        struct rhash_head *he;
 835        spinlock_t *lock;
 836        unsigned int hash;
 837        int err = -ENOENT;
 838
 839        /* Minimally, the old and new objects must have same hash
 840         * (which should mean identifiers are the same).
 841         */
 842        hash = rht_head_hashfn(ht, tbl, obj_old, params);
 843        if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
 844                return -EINVAL;
 845
 846        lock = rht_bucket_lock(tbl, hash);
 847
 848        spin_lock_bh(lock);
 849
 850        pprev = &tbl->buckets[hash];
 851        rht_for_each(he, tbl, hash) {
 852                if (he != obj_old) {
 853                        pprev = &he->next;
 854                        continue;
 855                }
 856
 857                rcu_assign_pointer(obj_new->next, obj_old->next);
 858                rcu_assign_pointer(*pprev, obj_new);
 859                err = 0;
 860                break;
 861        }
 862
 863        spin_unlock_bh(lock);
 864
 865        return err;
 866}
 867
 868/**
 869 * rhashtable_replace_fast - replace an object in hash table
 870 * @ht:         hash table
 871 * @obj_old:    pointer to hash head inside object being replaced
 872 * @obj_new:    pointer to hash head inside object which is new
 873 * @params:     hash table parameters
 874 *
 875 * Replacing an object doesn't affect the number of elements in the hash table
 876 * or bucket, so we don't need to worry about shrinking or expanding the
 877 * table here.
 878 *
 879 * Returns zero on success, -ENOENT if the entry could not be found,
 880 * -EINVAL if hash is not the same for the old and new objects.
 881 */
 882static inline int rhashtable_replace_fast(
 883        struct rhashtable *ht, struct rhash_head *obj_old,
 884        struct rhash_head *obj_new,
 885        const struct rhashtable_params params)
 886{
 887        struct bucket_table *tbl;
 888        int err;
 889
 890        rcu_read_lock();
 891
 892        tbl = rht_dereference_rcu(ht->tbl, ht);
 893
 894        /* Because we have already taken (and released) the bucket
 895         * lock in old_tbl, if we find that future_tbl is not yet
 896         * visible then that guarantees the entry to still be in
 897         * the old tbl if it exists.
 898         */
 899        while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
 900                                                obj_new, params)) &&
 901               (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
 902                ;
 903
 904        rcu_read_unlock();
 905
 906        return err;
 907}
 908
 909#endif /* _LINUX_RHASHTABLE_H */
 910