linux/drivers/staging/lustre/lustre/libcfs/hash.c
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2011, 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * libcfs/libcfs/hash.c
  37 *
  38 * Implement a hash class for hash process in lustre system.
  39 *
  40 * Author: YuZhangyong <yzy@clusterfs.com>
  41 *
  42 * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
  43 * - Simplified API and improved documentation
  44 * - Added per-hash feature flags:
  45 *   * CFS_HASH_DEBUG additional validation
  46 *   * CFS_HASH_REHASH dynamic rehashing
  47 * - Added per-hash statistics
  48 * - General performance enhancements
  49 *
  50 * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
  51 * - move all stuff to libcfs
  52 * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
  53 * - ignore hs_rwlock if without CFS_HASH_REHASH setting
  54 * - buckets are allocated one by one(instead of contiguous memory),
  55 *   to avoid unnecessary cacheline conflict
  56 *
  57 * 2010-03-01: Liang Zhen <zhen.liang@sun.com>
  58 * - "bucket" is a group of hlist_head now, user can specify bucket size
  59 *   by bkt_bits of cfs_hash_create(), all hlist_heads in a bucket share
  60 *   one lock for reducing memory overhead.
  61 *
  62 * - support lockless hash, caller will take care of locks:
  63 *   avoid lock overhead for hash tables that are already protected
  64 *   by locking in the caller for another reason
  65 *
  66 * - support both spin_lock/rwlock for bucket:
  67 *   overhead of spinlock contention is lower than read/write
  68 *   contention of rwlock, so using spinlock to serialize operations on
  69 *   bucket is more reasonable for those frequently changed hash tables
  70 *
  71 * - support one-single lock mode:
  72 *   one lock to protect all hash operations to avoid overhead of
  73 *   multiple locks if hash table is always small
  74 *
  75 * - removed a lot of unnecessary addref & decref on hash element:
  76 *   addref & decref are atomic operations in many use-cases which
  77 *   are expensive.
  78 *
  79 * - support non-blocking cfs_hash_add() and cfs_hash_findadd():
  80 *   some lustre use-cases require these functions to be strictly
  81 *   non-blocking, we need to schedule required rehash on a different
  82 *   thread on those cases.
  83 *
  84 * - safer rehash on large hash table
  85 *   In old implementation, rehash function will exclusively lock the
  86 *   hash table and finish rehash in one batch, it's dangerous on SMP
  87 *   system because rehash millions of elements could take long time.
  88 *   New implemented rehash can release lock and relax CPU in middle
  89 *   of rehash, it's safe for another thread to search/change on the
  90 *   hash table even it's in rehasing.
  91 *
  92 * - support two different refcount modes
  93 *   . hash table has refcount on element
  94 *   . hash table doesn't change refcount on adding/removing element
  95 *
  96 * - support long name hash table (for param-tree)
  97 *
  98 * - fix a bug for cfs_hash_rehash_key:
  99 *   in old implementation, cfs_hash_rehash_key could screw up the
 100 *   hash-table because @key is overwritten without any protection.
 101 *   Now we need user to define hs_keycpy for those rehash enabled
 102 *   hash tables, cfs_hash_rehash_key will overwrite hash-key
 103 *   inside lock by calling hs_keycpy.
 104 *
 105 * - better hash iteration:
 106 *   Now we support both locked iteration & lockless iteration of hash
 107 *   table. Also, user can break the iteration by return 1 in callback.
 108 */
 109
 110#include "../../include/linux/libcfs/libcfs.h"
 111#include <linux/seq_file.h>
 112
 113#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
 114static unsigned int warn_on_depth = 8;
 115module_param(warn_on_depth, uint, 0644);
 116MODULE_PARM_DESC(warn_on_depth, "warning when hash depth is high.");
 117#endif
 118
 119struct cfs_wi_sched *cfs_sched_rehash;
 120
 121static inline void
 122cfs_hash_nl_lock(union cfs_hash_lock *lock, int exclusive) {}
 123
 124static inline void
 125cfs_hash_nl_unlock(union cfs_hash_lock *lock, int exclusive) {}
 126
 127static inline void
 128cfs_hash_spin_lock(union cfs_hash_lock *lock, int exclusive)
 129        __acquires(&lock->spin)
 130{
 131        spin_lock(&lock->spin);
 132}
 133
 134static inline void
 135cfs_hash_spin_unlock(union cfs_hash_lock *lock, int exclusive)
 136        __releases(&lock->spin)
 137{
 138        spin_unlock(&lock->spin);
 139}
 140
 141static inline void
 142cfs_hash_rw_lock(union cfs_hash_lock *lock, int exclusive)
 143        __acquires(&lock->rw)
 144{
 145        if (!exclusive)
 146                read_lock(&lock->rw);
 147        else
 148                write_lock(&lock->rw);
 149}
 150
 151static inline void
 152cfs_hash_rw_unlock(union cfs_hash_lock *lock, int exclusive)
 153        __releases(&lock->rw)
 154{
 155        if (!exclusive)
 156                read_unlock(&lock->rw);
 157        else
 158                write_unlock(&lock->rw);
 159}
 160
 161/** No lock hash */
 162static cfs_hash_lock_ops_t cfs_hash_nl_lops = {
 163        .hs_lock        = cfs_hash_nl_lock,
 164        .hs_unlock      = cfs_hash_nl_unlock,
 165        .hs_bkt_lock    = cfs_hash_nl_lock,
 166        .hs_bkt_unlock  = cfs_hash_nl_unlock,
 167};
 168
 169/** no bucket lock, one spinlock to protect everything */
 170static cfs_hash_lock_ops_t cfs_hash_nbl_lops = {
 171        .hs_lock        = cfs_hash_spin_lock,
 172        .hs_unlock      = cfs_hash_spin_unlock,
 173        .hs_bkt_lock    = cfs_hash_nl_lock,
 174        .hs_bkt_unlock  = cfs_hash_nl_unlock,
 175};
 176
 177/** spin bucket lock, rehash is enabled */
 178static cfs_hash_lock_ops_t cfs_hash_bkt_spin_lops = {
 179        .hs_lock        = cfs_hash_rw_lock,
 180        .hs_unlock      = cfs_hash_rw_unlock,
 181        .hs_bkt_lock    = cfs_hash_spin_lock,
 182        .hs_bkt_unlock  = cfs_hash_spin_unlock,
 183};
 184
 185/** rw bucket lock, rehash is enabled */
 186static cfs_hash_lock_ops_t cfs_hash_bkt_rw_lops = {
 187        .hs_lock        = cfs_hash_rw_lock,
 188        .hs_unlock      = cfs_hash_rw_unlock,
 189        .hs_bkt_lock    = cfs_hash_rw_lock,
 190        .hs_bkt_unlock  = cfs_hash_rw_unlock,
 191};
 192
 193/** spin bucket lock, rehash is disabled */
 194static cfs_hash_lock_ops_t cfs_hash_nr_bkt_spin_lops = {
 195        .hs_lock        = cfs_hash_nl_lock,
 196        .hs_unlock      = cfs_hash_nl_unlock,
 197        .hs_bkt_lock    = cfs_hash_spin_lock,
 198        .hs_bkt_unlock  = cfs_hash_spin_unlock,
 199};
 200
 201/** rw bucket lock, rehash is disabled */
 202static cfs_hash_lock_ops_t cfs_hash_nr_bkt_rw_lops = {
 203        .hs_lock        = cfs_hash_nl_lock,
 204        .hs_unlock      = cfs_hash_nl_unlock,
 205        .hs_bkt_lock    = cfs_hash_rw_lock,
 206        .hs_bkt_unlock  = cfs_hash_rw_unlock,
 207};
 208
 209static void
 210cfs_hash_lock_setup(struct cfs_hash *hs)
 211{
 212        if (cfs_hash_with_no_lock(hs)) {
 213                hs->hs_lops = &cfs_hash_nl_lops;
 214
 215        } else if (cfs_hash_with_no_bktlock(hs)) {
 216                hs->hs_lops = &cfs_hash_nbl_lops;
 217                spin_lock_init(&hs->hs_lock.spin);
 218
 219        } else if (cfs_hash_with_rehash(hs)) {
 220                rwlock_init(&hs->hs_lock.rw);
 221
 222                if (cfs_hash_with_rw_bktlock(hs))
 223                        hs->hs_lops = &cfs_hash_bkt_rw_lops;
 224                else if (cfs_hash_with_spin_bktlock(hs))
 225                        hs->hs_lops = &cfs_hash_bkt_spin_lops;
 226                else
 227                        LBUG();
 228        } else {
 229                if (cfs_hash_with_rw_bktlock(hs))
 230                        hs->hs_lops = &cfs_hash_nr_bkt_rw_lops;
 231                else if (cfs_hash_with_spin_bktlock(hs))
 232                        hs->hs_lops = &cfs_hash_nr_bkt_spin_lops;
 233                else
 234                        LBUG();
 235        }
 236}
 237
 238/**
 239 * Simple hash head without depth tracking
 240 * new element is always added to head of hlist
 241 */
 242typedef struct {
 243        struct hlist_head       hh_head;        /**< entries list */
 244} cfs_hash_head_t;
 245
 246static int
 247cfs_hash_hh_hhead_size(struct cfs_hash *hs)
 248{
 249        return sizeof(cfs_hash_head_t);
 250}
 251
 252static struct hlist_head *
 253cfs_hash_hh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
 254{
 255        cfs_hash_head_t *head = (cfs_hash_head_t *)&bd->bd_bucket->hsb_head[0];
 256
 257        return &head[bd->bd_offset].hh_head;
 258}
 259
 260static int
 261cfs_hash_hh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 262                      struct hlist_node *hnode)
 263{
 264        hlist_add_head(hnode, cfs_hash_hh_hhead(hs, bd));
 265        return -1; /* unknown depth */
 266}
 267
 268static int
 269cfs_hash_hh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 270                      struct hlist_node *hnode)
 271{
 272        hlist_del_init(hnode);
 273        return -1; /* unknown depth */
 274}
 275
 276/**
 277 * Simple hash head with depth tracking
 278 * new element is always added to head of hlist
 279 */
 280typedef struct {
 281        struct hlist_head       hd_head;        /**< entries list */
 282        unsigned int        hd_depth;       /**< list length */
 283} cfs_hash_head_dep_t;
 284
 285static int
 286cfs_hash_hd_hhead_size(struct cfs_hash *hs)
 287{
 288        return sizeof(cfs_hash_head_dep_t);
 289}
 290
 291static struct hlist_head *
 292cfs_hash_hd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
 293{
 294        cfs_hash_head_dep_t   *head;
 295
 296        head = (cfs_hash_head_dep_t *)&bd->bd_bucket->hsb_head[0];
 297        return &head[bd->bd_offset].hd_head;
 298}
 299
 300static int
 301cfs_hash_hd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 302                      struct hlist_node *hnode)
 303{
 304        cfs_hash_head_dep_t *hh = container_of(cfs_hash_hd_hhead(hs, bd),
 305                                               cfs_hash_head_dep_t, hd_head);
 306        hlist_add_head(hnode, &hh->hd_head);
 307        return ++hh->hd_depth;
 308}
 309
 310static int
 311cfs_hash_hd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 312                      struct hlist_node *hnode)
 313{
 314        cfs_hash_head_dep_t *hh = container_of(cfs_hash_hd_hhead(hs, bd),
 315                                               cfs_hash_head_dep_t, hd_head);
 316        hlist_del_init(hnode);
 317        return --hh->hd_depth;
 318}
 319
 320/**
 321 * double links hash head without depth tracking
 322 * new element is always added to tail of hlist
 323 */
 324typedef struct {
 325        struct hlist_head       dh_head;        /**< entries list */
 326        struct hlist_node       *dh_tail;       /**< the last entry */
 327} cfs_hash_dhead_t;
 328
 329static int
 330cfs_hash_dh_hhead_size(struct cfs_hash *hs)
 331{
 332        return sizeof(cfs_hash_dhead_t);
 333}
 334
 335static struct hlist_head *
 336cfs_hash_dh_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
 337{
 338        cfs_hash_dhead_t *head;
 339
 340        head = (cfs_hash_dhead_t *)&bd->bd_bucket->hsb_head[0];
 341        return &head[bd->bd_offset].dh_head;
 342}
 343
 344static int
 345cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 346                      struct hlist_node *hnode)
 347{
 348        cfs_hash_dhead_t *dh = container_of(cfs_hash_dh_hhead(hs, bd),
 349                                            cfs_hash_dhead_t, dh_head);
 350
 351        if (dh->dh_tail != NULL) /* not empty */
 352                hlist_add_behind(hnode, dh->dh_tail);
 353        else /* empty list */
 354                hlist_add_head(hnode, &dh->dh_head);
 355        dh->dh_tail = hnode;
 356        return -1; /* unknown depth */
 357}
 358
 359static int
 360cfs_hash_dh_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 361                      struct hlist_node *hnd)
 362{
 363        cfs_hash_dhead_t *dh = container_of(cfs_hash_dh_hhead(hs, bd),
 364                                            cfs_hash_dhead_t, dh_head);
 365
 366        if (hnd->next == NULL) { /* it's the tail */
 367                dh->dh_tail = (hnd->pprev == &dh->dh_head.first) ? NULL :
 368                              container_of(hnd->pprev, struct hlist_node, next);
 369        }
 370        hlist_del_init(hnd);
 371        return -1; /* unknown depth */
 372}
 373
 374/**
 375 * double links hash head with depth tracking
 376 * new element is always added to tail of hlist
 377 */
 378typedef struct {
 379        struct hlist_head       dd_head;        /**< entries list */
 380        struct hlist_node       *dd_tail;       /**< the last entry */
 381        unsigned int        dd_depth;       /**< list length */
 382} cfs_hash_dhead_dep_t;
 383
 384static int
 385cfs_hash_dd_hhead_size(struct cfs_hash *hs)
 386{
 387        return sizeof(cfs_hash_dhead_dep_t);
 388}
 389
 390static struct hlist_head *
 391cfs_hash_dd_hhead(struct cfs_hash *hs, struct cfs_hash_bd *bd)
 392{
 393        cfs_hash_dhead_dep_t *head;
 394
 395        head = (cfs_hash_dhead_dep_t *)&bd->bd_bucket->hsb_head[0];
 396        return &head[bd->bd_offset].dd_head;
 397}
 398
 399static int
 400cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 401                      struct hlist_node *hnode)
 402{
 403        cfs_hash_dhead_dep_t *dh = container_of(cfs_hash_dd_hhead(hs, bd),
 404                                                cfs_hash_dhead_dep_t, dd_head);
 405
 406        if (dh->dd_tail != NULL) /* not empty */
 407                hlist_add_behind(hnode, dh->dd_tail);
 408        else /* empty list */
 409                hlist_add_head(hnode, &dh->dd_head);
 410        dh->dd_tail = hnode;
 411        return ++dh->dd_depth;
 412}
 413
 414static int
 415cfs_hash_dd_hnode_del(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 416                      struct hlist_node *hnd)
 417{
 418        cfs_hash_dhead_dep_t *dh = container_of(cfs_hash_dd_hhead(hs, bd),
 419                                                cfs_hash_dhead_dep_t, dd_head);
 420
 421        if (hnd->next == NULL) { /* it's the tail */
 422                dh->dd_tail = (hnd->pprev == &dh->dd_head.first) ? NULL :
 423                              container_of(hnd->pprev, struct hlist_node, next);
 424        }
 425        hlist_del_init(hnd);
 426        return --dh->dd_depth;
 427}
 428
 429static cfs_hash_hlist_ops_t cfs_hash_hh_hops = {
 430       .hop_hhead      = cfs_hash_hh_hhead,
 431       .hop_hhead_size = cfs_hash_hh_hhead_size,
 432       .hop_hnode_add  = cfs_hash_hh_hnode_add,
 433       .hop_hnode_del  = cfs_hash_hh_hnode_del,
 434};
 435
 436static cfs_hash_hlist_ops_t cfs_hash_hd_hops = {
 437       .hop_hhead      = cfs_hash_hd_hhead,
 438       .hop_hhead_size = cfs_hash_hd_hhead_size,
 439       .hop_hnode_add  = cfs_hash_hd_hnode_add,
 440       .hop_hnode_del  = cfs_hash_hd_hnode_del,
 441};
 442
 443static cfs_hash_hlist_ops_t cfs_hash_dh_hops = {
 444       .hop_hhead      = cfs_hash_dh_hhead,
 445       .hop_hhead_size = cfs_hash_dh_hhead_size,
 446       .hop_hnode_add  = cfs_hash_dh_hnode_add,
 447       .hop_hnode_del  = cfs_hash_dh_hnode_del,
 448};
 449
 450static cfs_hash_hlist_ops_t cfs_hash_dd_hops = {
 451       .hop_hhead      = cfs_hash_dd_hhead,
 452       .hop_hhead_size = cfs_hash_dd_hhead_size,
 453       .hop_hnode_add  = cfs_hash_dd_hnode_add,
 454       .hop_hnode_del  = cfs_hash_dd_hnode_del,
 455};
 456
 457static void
 458cfs_hash_hlist_setup(struct cfs_hash *hs)
 459{
 460        if (cfs_hash_with_add_tail(hs)) {
 461                hs->hs_hops = cfs_hash_with_depth(hs) ?
 462                              &cfs_hash_dd_hops : &cfs_hash_dh_hops;
 463        } else {
 464                hs->hs_hops = cfs_hash_with_depth(hs) ?
 465                              &cfs_hash_hd_hops : &cfs_hash_hh_hops;
 466        }
 467}
 468
 469static void
 470cfs_hash_bd_from_key(struct cfs_hash *hs, struct cfs_hash_bucket **bkts,
 471                     unsigned int bits, const void *key, struct cfs_hash_bd *bd)
 472{
 473        unsigned int index = cfs_hash_id(hs, key, (1U << bits) - 1);
 474
 475        LASSERT(bits == hs->hs_cur_bits || bits == hs->hs_rehash_bits);
 476
 477        bd->bd_bucket = bkts[index & ((1U << (bits - hs->hs_bkt_bits)) - 1)];
 478        bd->bd_offset = index >> (bits - hs->hs_bkt_bits);
 479}
 480
 481void
 482cfs_hash_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bd)
 483{
 484        /* NB: caller should hold hs->hs_rwlock if REHASH is set */
 485        if (likely(hs->hs_rehash_buckets == NULL)) {
 486                cfs_hash_bd_from_key(hs, hs->hs_buckets,
 487                                     hs->hs_cur_bits, key, bd);
 488        } else {
 489                LASSERT(hs->hs_rehash_bits != 0);
 490                cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
 491                                     hs->hs_rehash_bits, key, bd);
 492        }
 493}
 494EXPORT_SYMBOL(cfs_hash_bd_get);
 495
 496static inline void
 497cfs_hash_bd_dep_record(struct cfs_hash *hs, struct cfs_hash_bd *bd, int dep_cur)
 498{
 499        if (likely(dep_cur <= bd->bd_bucket->hsb_depmax))
 500                return;
 501
 502        bd->bd_bucket->hsb_depmax = dep_cur;
 503# if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
 504        if (likely(warn_on_depth == 0 ||
 505                   max(warn_on_depth, hs->hs_dep_max) >= dep_cur))
 506                return;
 507
 508        spin_lock(&hs->hs_dep_lock);
 509        hs->hs_dep_max  = dep_cur;
 510        hs->hs_dep_bkt  = bd->bd_bucket->hsb_index;
 511        hs->hs_dep_off  = bd->bd_offset;
 512        hs->hs_dep_bits = hs->hs_cur_bits;
 513        spin_unlock(&hs->hs_dep_lock);
 514
 515        cfs_wi_schedule(cfs_sched_rehash, &hs->hs_dep_wi);
 516# endif
 517}
 518
 519void
 520cfs_hash_bd_add_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 521                       struct hlist_node *hnode)
 522{
 523        int             rc;
 524
 525        rc = hs->hs_hops->hop_hnode_add(hs, bd, hnode);
 526        cfs_hash_bd_dep_record(hs, bd, rc);
 527        bd->bd_bucket->hsb_version++;
 528        if (unlikely(bd->bd_bucket->hsb_version == 0))
 529                bd->bd_bucket->hsb_version++;
 530        bd->bd_bucket->hsb_count++;
 531
 532        if (cfs_hash_with_counter(hs))
 533                atomic_inc(&hs->hs_count);
 534        if (!cfs_hash_with_no_itemref(hs))
 535                cfs_hash_get(hs, hnode);
 536}
 537EXPORT_SYMBOL(cfs_hash_bd_add_locked);
 538
 539void
 540cfs_hash_bd_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 541                       struct hlist_node *hnode)
 542{
 543        hs->hs_hops->hop_hnode_del(hs, bd, hnode);
 544
 545        LASSERT(bd->bd_bucket->hsb_count > 0);
 546        bd->bd_bucket->hsb_count--;
 547        bd->bd_bucket->hsb_version++;
 548        if (unlikely(bd->bd_bucket->hsb_version == 0))
 549                bd->bd_bucket->hsb_version++;
 550
 551        if (cfs_hash_with_counter(hs)) {
 552                LASSERT(atomic_read(&hs->hs_count) > 0);
 553                atomic_dec(&hs->hs_count);
 554        }
 555        if (!cfs_hash_with_no_itemref(hs))
 556                cfs_hash_put_locked(hs, hnode);
 557}
 558EXPORT_SYMBOL(cfs_hash_bd_del_locked);
 559
 560void
 561cfs_hash_bd_move_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd_old,
 562                        struct cfs_hash_bd *bd_new, struct hlist_node *hnode)
 563{
 564        struct cfs_hash_bucket *obkt = bd_old->bd_bucket;
 565        struct cfs_hash_bucket *nbkt = bd_new->bd_bucket;
 566        int             rc;
 567
 568        if (cfs_hash_bd_compare(bd_old, bd_new) == 0)
 569                return;
 570
 571        /* use cfs_hash_bd_hnode_add/del, to avoid atomic & refcount ops
 572         * in cfs_hash_bd_del/add_locked */
 573        hs->hs_hops->hop_hnode_del(hs, bd_old, hnode);
 574        rc = hs->hs_hops->hop_hnode_add(hs, bd_new, hnode);
 575        cfs_hash_bd_dep_record(hs, bd_new, rc);
 576
 577        LASSERT(obkt->hsb_count > 0);
 578        obkt->hsb_count--;
 579        obkt->hsb_version++;
 580        if (unlikely(obkt->hsb_version == 0))
 581                obkt->hsb_version++;
 582        nbkt->hsb_count++;
 583        nbkt->hsb_version++;
 584        if (unlikely(nbkt->hsb_version == 0))
 585                nbkt->hsb_version++;
 586}
 587EXPORT_SYMBOL(cfs_hash_bd_move_locked);
 588
 589enum {
 590        /** always set, for sanity (avoid ZERO intent) */
 591        CFS_HS_LOOKUP_MASK_FIND     = 1 << 0,
 592        /** return entry with a ref */
 593        CFS_HS_LOOKUP_MASK_REF      = 1 << 1,
 594        /** add entry if not existing */
 595        CFS_HS_LOOKUP_MASK_ADD      = 1 << 2,
 596        /** delete entry, ignore other masks */
 597        CFS_HS_LOOKUP_MASK_DEL      = 1 << 3,
 598};
 599
 600typedef enum cfs_hash_lookup_intent {
 601        /** return item w/o refcount */
 602        CFS_HS_LOOKUP_IT_PEEK       = CFS_HS_LOOKUP_MASK_FIND,
 603        /** return item with refcount */
 604        CFS_HS_LOOKUP_IT_FIND       = (CFS_HS_LOOKUP_MASK_FIND |
 605                                       CFS_HS_LOOKUP_MASK_REF),
 606        /** return item w/o refcount if existed, otherwise add */
 607        CFS_HS_LOOKUP_IT_ADD    = (CFS_HS_LOOKUP_MASK_FIND |
 608                                       CFS_HS_LOOKUP_MASK_ADD),
 609        /** return item with refcount if existed, otherwise add */
 610        CFS_HS_LOOKUP_IT_FINDADD    = (CFS_HS_LOOKUP_IT_FIND |
 611                                       CFS_HS_LOOKUP_MASK_ADD),
 612        /** delete if existed */
 613        CFS_HS_LOOKUP_IT_FINDDEL    = (CFS_HS_LOOKUP_MASK_FIND |
 614                                       CFS_HS_LOOKUP_MASK_DEL)
 615} cfs_hash_lookup_intent_t;
 616
 617static struct hlist_node *
 618cfs_hash_bd_lookup_intent(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 619                          const void *key, struct hlist_node *hnode,
 620                          cfs_hash_lookup_intent_t intent)
 621
 622{
 623        struct hlist_head  *hhead = cfs_hash_bd_hhead(hs, bd);
 624        struct hlist_node  *ehnode;
 625        struct hlist_node  *match;
 626        int  intent_add = (intent & CFS_HS_LOOKUP_MASK_ADD) != 0;
 627
 628        /* with this function, we can avoid a lot of useless refcount ops,
 629         * which are expensive atomic operations most time. */
 630        match = intent_add ? NULL : hnode;
 631        hlist_for_each(ehnode, hhead) {
 632                if (!cfs_hash_keycmp(hs, key, ehnode))
 633                        continue;
 634
 635                if (match != NULL && match != ehnode) /* can't match */
 636                        continue;
 637
 638                /* match and ... */
 639                if ((intent & CFS_HS_LOOKUP_MASK_DEL) != 0) {
 640                        cfs_hash_bd_del_locked(hs, bd, ehnode);
 641                        return ehnode;
 642                }
 643
 644                /* caller wants refcount? */
 645                if ((intent & CFS_HS_LOOKUP_MASK_REF) != 0)
 646                        cfs_hash_get(hs, ehnode);
 647                return ehnode;
 648        }
 649        /* no match item */
 650        if (!intent_add)
 651                return NULL;
 652
 653        LASSERT(hnode != NULL);
 654        cfs_hash_bd_add_locked(hs, bd, hnode);
 655        return hnode;
 656}
 657
 658struct hlist_node *
 659cfs_hash_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, const void *key)
 660{
 661        return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
 662                                         CFS_HS_LOOKUP_IT_FIND);
 663}
 664EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
 665
 666struct hlist_node *
 667cfs_hash_bd_peek_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd, const void *key)
 668{
 669        return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
 670                                         CFS_HS_LOOKUP_IT_PEEK);
 671}
 672EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
 673
 674struct hlist_node *
 675cfs_hash_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 676                           const void *key, struct hlist_node *hnode,
 677                           int noref)
 678{
 679        return cfs_hash_bd_lookup_intent(hs, bd, key, hnode,
 680                                         CFS_HS_LOOKUP_IT_ADD |
 681                                         (!noref * CFS_HS_LOOKUP_MASK_REF));
 682}
 683EXPORT_SYMBOL(cfs_hash_bd_findadd_locked);
 684
 685struct hlist_node *
 686cfs_hash_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
 687                           const void *key, struct hlist_node *hnode)
 688{
 689        /* hnode can be NULL, we find the first item with @key */
 690        return cfs_hash_bd_lookup_intent(hs, bd, key, hnode,
 691                                         CFS_HS_LOOKUP_IT_FINDDEL);
 692}
 693EXPORT_SYMBOL(cfs_hash_bd_finddel_locked);
 694
 695static void
 696cfs_hash_multi_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 697                       unsigned n, int excl)
 698{
 699        struct cfs_hash_bucket *prev = NULL;
 700        int             i;
 701
 702        /**
 703         * bds must be ascendantly ordered by bd->bd_bucket->hsb_index.
 704         * NB: it's possible that several bds point to the same bucket but
 705         * have different bd::bd_offset, so need take care of deadlock.
 706         */
 707        cfs_hash_for_each_bd(bds, n, i) {
 708                if (prev == bds[i].bd_bucket)
 709                        continue;
 710
 711                LASSERT(prev == NULL ||
 712                        prev->hsb_index < bds[i].bd_bucket->hsb_index);
 713                cfs_hash_bd_lock(hs, &bds[i], excl);
 714                prev = bds[i].bd_bucket;
 715        }
 716}
 717
 718static void
 719cfs_hash_multi_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 720                         unsigned n, int excl)
 721{
 722        struct cfs_hash_bucket *prev = NULL;
 723        int             i;
 724
 725        cfs_hash_for_each_bd(bds, n, i) {
 726                if (prev != bds[i].bd_bucket) {
 727                        cfs_hash_bd_unlock(hs, &bds[i], excl);
 728                        prev = bds[i].bd_bucket;
 729                }
 730        }
 731}
 732
 733static struct hlist_node *
 734cfs_hash_multi_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 735                                unsigned n, const void *key)
 736{
 737        struct hlist_node  *ehnode;
 738        unsigned           i;
 739
 740        cfs_hash_for_each_bd(bds, n, i) {
 741                ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, NULL,
 742                                                   CFS_HS_LOOKUP_IT_FIND);
 743                if (ehnode != NULL)
 744                        return ehnode;
 745        }
 746        return NULL;
 747}
 748
 749static struct hlist_node *
 750cfs_hash_multi_bd_findadd_locked(struct cfs_hash *hs,
 751                                 struct cfs_hash_bd *bds, unsigned n, const void *key,
 752                                 struct hlist_node *hnode, int noref)
 753{
 754        struct hlist_node  *ehnode;
 755        int             intent;
 756        unsigned           i;
 757
 758        LASSERT(hnode != NULL);
 759        intent = CFS_HS_LOOKUP_IT_PEEK | (!noref * CFS_HS_LOOKUP_MASK_REF);
 760
 761        cfs_hash_for_each_bd(bds, n, i) {
 762                ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key,
 763                                                   NULL, intent);
 764                if (ehnode != NULL)
 765                        return ehnode;
 766        }
 767
 768        if (i == 1) { /* only one bucket */
 769                cfs_hash_bd_add_locked(hs, &bds[0], hnode);
 770        } else {
 771                struct cfs_hash_bd      mybd;
 772
 773                cfs_hash_bd_get(hs, key, &mybd);
 774                cfs_hash_bd_add_locked(hs, &mybd, hnode);
 775        }
 776
 777        return hnode;
 778}
 779
 780static struct hlist_node *
 781cfs_hash_multi_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 782                                 unsigned n, const void *key,
 783                                 struct hlist_node *hnode)
 784{
 785        struct hlist_node  *ehnode;
 786        unsigned           i;
 787
 788        cfs_hash_for_each_bd(bds, n, i) {
 789                ehnode = cfs_hash_bd_lookup_intent(hs, &bds[i], key, hnode,
 790                                                   CFS_HS_LOOKUP_IT_FINDDEL);
 791                if (ehnode != NULL)
 792                        return ehnode;
 793        }
 794        return NULL;
 795}
 796
 797static void
 798cfs_hash_bd_order(struct cfs_hash_bd *bd1, struct cfs_hash_bd *bd2)
 799{
 800        int     rc;
 801
 802        if (bd2->bd_bucket == NULL)
 803                return;
 804
 805        if (bd1->bd_bucket == NULL) {
 806                *bd1 = *bd2;
 807                bd2->bd_bucket = NULL;
 808                return;
 809        }
 810
 811        rc = cfs_hash_bd_compare(bd1, bd2);
 812        if (rc == 0) {
 813                bd2->bd_bucket = NULL;
 814
 815        } else if (rc > 0) { /* swab bd1 and bd2 */
 816                struct cfs_hash_bd tmp;
 817
 818                tmp = *bd2;
 819                *bd2 = *bd1;
 820                *bd1 = tmp;
 821        }
 822}
 823
 824void
 825cfs_hash_dual_bd_get(struct cfs_hash *hs, const void *key, struct cfs_hash_bd *bds)
 826{
 827        /* NB: caller should hold hs_lock.rw if REHASH is set */
 828        cfs_hash_bd_from_key(hs, hs->hs_buckets,
 829                             hs->hs_cur_bits, key, &bds[0]);
 830        if (likely(hs->hs_rehash_buckets == NULL)) {
 831                /* no rehash or not rehashing */
 832                bds[1].bd_bucket = NULL;
 833                return;
 834        }
 835
 836        LASSERT(hs->hs_rehash_bits != 0);
 837        cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
 838                             hs->hs_rehash_bits, key, &bds[1]);
 839
 840        cfs_hash_bd_order(&bds[0], &bds[1]);
 841}
 842EXPORT_SYMBOL(cfs_hash_dual_bd_get);
 843
 844void
 845cfs_hash_dual_bd_lock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
 846{
 847        cfs_hash_multi_bd_lock(hs, bds, 2, excl);
 848}
 849EXPORT_SYMBOL(cfs_hash_dual_bd_lock);
 850
 851void
 852cfs_hash_dual_bd_unlock(struct cfs_hash *hs, struct cfs_hash_bd *bds, int excl)
 853{
 854        cfs_hash_multi_bd_unlock(hs, bds, 2, excl);
 855}
 856EXPORT_SYMBOL(cfs_hash_dual_bd_unlock);
 857
 858struct hlist_node *
 859cfs_hash_dual_bd_lookup_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 860                               const void *key)
 861{
 862        return cfs_hash_multi_bd_lookup_locked(hs, bds, 2, key);
 863}
 864EXPORT_SYMBOL(cfs_hash_dual_bd_lookup_locked);
 865
 866struct hlist_node *
 867cfs_hash_dual_bd_findadd_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 868                                const void *key, struct hlist_node *hnode,
 869                                int noref)
 870{
 871        return cfs_hash_multi_bd_findadd_locked(hs, bds, 2, key,
 872                                                hnode, noref);
 873}
 874EXPORT_SYMBOL(cfs_hash_dual_bd_findadd_locked);
 875
 876struct hlist_node *
 877cfs_hash_dual_bd_finddel_locked(struct cfs_hash *hs, struct cfs_hash_bd *bds,
 878                                const void *key, struct hlist_node *hnode)
 879{
 880        return cfs_hash_multi_bd_finddel_locked(hs, bds, 2, key, hnode);
 881}
 882EXPORT_SYMBOL(cfs_hash_dual_bd_finddel_locked);
 883
 884static void
 885cfs_hash_buckets_free(struct cfs_hash_bucket **buckets,
 886                      int bkt_size, int prev_size, int size)
 887{
 888        int     i;
 889
 890        for (i = prev_size; i < size; i++) {
 891                if (buckets[i] != NULL)
 892                        LIBCFS_FREE(buckets[i], bkt_size);
 893        }
 894
 895        LIBCFS_FREE(buckets, sizeof(buckets[0]) * size);
 896}
 897
 898/*
 899 * Create or grow bucket memory. Return old_buckets if no allocation was
 900 * needed, the newly allocated buckets if allocation was needed and
 901 * successful, and NULL on error.
 902 */
 903static struct cfs_hash_bucket **
 904cfs_hash_buckets_realloc(struct cfs_hash *hs, struct cfs_hash_bucket **old_bkts,
 905                         unsigned int old_size, unsigned int new_size)
 906{
 907        struct cfs_hash_bucket **new_bkts;
 908        int              i;
 909
 910        LASSERT(old_size == 0 || old_bkts != NULL);
 911
 912        if (old_bkts != NULL && old_size == new_size)
 913                return old_bkts;
 914
 915        LIBCFS_ALLOC(new_bkts, sizeof(new_bkts[0]) * new_size);
 916        if (new_bkts == NULL)
 917                return NULL;
 918
 919        if (old_bkts != NULL) {
 920                memcpy(new_bkts, old_bkts,
 921                       min(old_size, new_size) * sizeof(*old_bkts));
 922        }
 923
 924        for (i = old_size; i < new_size; i++) {
 925                struct hlist_head *hhead;
 926                struct cfs_hash_bd     bd;
 927
 928                LIBCFS_ALLOC(new_bkts[i], cfs_hash_bkt_size(hs));
 929                if (new_bkts[i] == NULL) {
 930                        cfs_hash_buckets_free(new_bkts, cfs_hash_bkt_size(hs),
 931                                              old_size, new_size);
 932                        return NULL;
 933                }
 934
 935                new_bkts[i]->hsb_index   = i;
 936                new_bkts[i]->hsb_version = 1;  /* shouldn't be zero */
 937                new_bkts[i]->hsb_depmax  = -1; /* unknown */
 938                bd.bd_bucket = new_bkts[i];
 939                cfs_hash_bd_for_each_hlist(hs, &bd, hhead)
 940                        INIT_HLIST_HEAD(hhead);
 941
 942                if (cfs_hash_with_no_lock(hs) ||
 943                    cfs_hash_with_no_bktlock(hs))
 944                        continue;
 945
 946                if (cfs_hash_with_rw_bktlock(hs))
 947                        rwlock_init(&new_bkts[i]->hsb_lock.rw);
 948                else if (cfs_hash_with_spin_bktlock(hs))
 949                        spin_lock_init(&new_bkts[i]->hsb_lock.spin);
 950                else
 951                        LBUG(); /* invalid use-case */
 952        }
 953        return new_bkts;
 954}
 955
 956/**
 957 * Initialize new libcfs hash, where:
 958 * @name     - Descriptive hash name
 959 * @cur_bits - Initial hash table size, in bits
 960 * @max_bits - Maximum allowed hash table resize, in bits
 961 * @ops      - Registered hash table operations
 962 * @flags    - CFS_HASH_REHASH enable synamic hash resizing
 963 *         - CFS_HASH_SORT enable chained hash sort
 964 */
 965static int cfs_hash_rehash_worker(cfs_workitem_t *wi);
 966
 967#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
 968static int cfs_hash_dep_print(cfs_workitem_t *wi)
 969{
 970        struct cfs_hash *hs = container_of(wi, struct cfs_hash, hs_dep_wi);
 971        int      dep;
 972        int      bkt;
 973        int      off;
 974        int      bits;
 975
 976        spin_lock(&hs->hs_dep_lock);
 977        dep  = hs->hs_dep_max;
 978        bkt  = hs->hs_dep_bkt;
 979        off  = hs->hs_dep_off;
 980        bits = hs->hs_dep_bits;
 981        spin_unlock(&hs->hs_dep_lock);
 982
 983        LCONSOLE_WARN("#### HASH %s (bits: %d): max depth %d at bucket %d/%d\n",
 984                      hs->hs_name, bits, dep, bkt, off);
 985        spin_lock(&hs->hs_dep_lock);
 986        hs->hs_dep_bits = 0; /* mark as workitem done */
 987        spin_unlock(&hs->hs_dep_lock);
 988        return 0;
 989}
 990
 991static void cfs_hash_depth_wi_init(struct cfs_hash *hs)
 992{
 993        spin_lock_init(&hs->hs_dep_lock);
 994        cfs_wi_init(&hs->hs_dep_wi, hs, cfs_hash_dep_print);
 995}
 996
 997static void cfs_hash_depth_wi_cancel(struct cfs_hash *hs)
 998{
 999        if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_dep_wi))
1000                return;
1001
1002        spin_lock(&hs->hs_dep_lock);
1003        while (hs->hs_dep_bits != 0) {
1004                spin_unlock(&hs->hs_dep_lock);
1005                cond_resched();
1006                spin_lock(&hs->hs_dep_lock);
1007        }
1008        spin_unlock(&hs->hs_dep_lock);
1009}
1010
1011#else /* CFS_HASH_DEBUG_LEVEL < CFS_HASH_DEBUG_1 */
1012
1013static inline void cfs_hash_depth_wi_init(struct cfs_hash *hs) {}
1014static inline void cfs_hash_depth_wi_cancel(struct cfs_hash *hs) {}
1015
1016#endif /* CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 */
1017
1018struct cfs_hash *
1019cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
1020                unsigned bkt_bits, unsigned extra_bytes,
1021                unsigned min_theta, unsigned max_theta,
1022                cfs_hash_ops_t *ops, unsigned flags)
1023{
1024        struct cfs_hash *hs;
1025        int      len;
1026
1027        CLASSERT(CFS_HASH_THETA_BITS < 15);
1028
1029        LASSERT(name != NULL);
1030        LASSERT(ops != NULL);
1031        LASSERT(ops->hs_key);
1032        LASSERT(ops->hs_hash);
1033        LASSERT(ops->hs_object);
1034        LASSERT(ops->hs_keycmp);
1035        LASSERT(ops->hs_get != NULL);
1036        LASSERT(ops->hs_put_locked != NULL);
1037
1038        if ((flags & CFS_HASH_REHASH) != 0)
1039                flags |= CFS_HASH_COUNTER; /* must have counter */
1040
1041        LASSERT(cur_bits > 0);
1042        LASSERT(cur_bits >= bkt_bits);
1043        LASSERT(max_bits >= cur_bits && max_bits < 31);
1044        LASSERT(ergo((flags & CFS_HASH_REHASH) == 0, cur_bits == max_bits));
1045        LASSERT(ergo((flags & CFS_HASH_REHASH) != 0,
1046                     (flags & CFS_HASH_NO_LOCK) == 0));
1047        LASSERT(ergo((flags & CFS_HASH_REHASH_KEY) != 0,
1048                      ops->hs_keycpy != NULL));
1049
1050        len = (flags & CFS_HASH_BIGNAME) == 0 ?
1051              CFS_HASH_NAME_LEN : CFS_HASH_BIGNAME_LEN;
1052        LIBCFS_ALLOC(hs, offsetof(struct cfs_hash, hs_name[len]));
1053        if (hs == NULL)
1054                return NULL;
1055
1056        strncpy(hs->hs_name, name, len);
1057        hs->hs_name[len - 1] = '\0';
1058        hs->hs_flags = flags;
1059
1060        atomic_set(&hs->hs_refcount, 1);
1061        atomic_set(&hs->hs_count, 0);
1062
1063        cfs_hash_lock_setup(hs);
1064        cfs_hash_hlist_setup(hs);
1065
1066        hs->hs_cur_bits = (__u8)cur_bits;
1067        hs->hs_min_bits = (__u8)cur_bits;
1068        hs->hs_max_bits = (__u8)max_bits;
1069        hs->hs_bkt_bits = (__u8)bkt_bits;
1070
1071        hs->hs_ops       = ops;
1072        hs->hs_extra_bytes = extra_bytes;
1073        hs->hs_rehash_bits = 0;
1074        cfs_wi_init(&hs->hs_rehash_wi, hs, cfs_hash_rehash_worker);
1075        cfs_hash_depth_wi_init(hs);
1076
1077        if (cfs_hash_with_rehash(hs))
1078                __cfs_hash_set_theta(hs, min_theta, max_theta);
1079
1080        hs->hs_buckets = cfs_hash_buckets_realloc(hs, NULL, 0,
1081                                                  CFS_HASH_NBKT(hs));
1082        if (hs->hs_buckets != NULL)
1083                return hs;
1084
1085        LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[len]));
1086        return NULL;
1087}
1088EXPORT_SYMBOL(cfs_hash_create);
1089
1090/**
1091 * Cleanup libcfs hash @hs.
1092 */
1093static void
1094cfs_hash_destroy(struct cfs_hash *hs)
1095{
1096        struct hlist_node     *hnode;
1097        struct hlist_node     *pos;
1098        struct cfs_hash_bd       bd;
1099        int                i;
1100
1101        LASSERT(hs != NULL);
1102        LASSERT(!cfs_hash_is_exiting(hs) &&
1103                !cfs_hash_is_iterating(hs));
1104
1105        /**
1106         * prohibit further rehashes, don't need any lock because
1107         * I'm the only (last) one can change it.
1108         */
1109        hs->hs_exiting = 1;
1110        if (cfs_hash_with_rehash(hs))
1111                cfs_hash_rehash_cancel(hs);
1112
1113        cfs_hash_depth_wi_cancel(hs);
1114        /* rehash should be done/canceled */
1115        LASSERT(hs->hs_buckets != NULL &&
1116                hs->hs_rehash_buckets == NULL);
1117
1118        cfs_hash_for_each_bucket(hs, &bd, i) {
1119                struct hlist_head *hhead;
1120
1121                LASSERT(bd.bd_bucket != NULL);
1122                /* no need to take this lock, just for consistent code */
1123                cfs_hash_bd_lock(hs, &bd, 1);
1124
1125                cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
1126                        hlist_for_each_safe(hnode, pos, hhead) {
1127                                LASSERTF(!cfs_hash_with_assert_empty(hs),
1128                                         "hash %s bucket %u(%u) is not empty: %u items left\n",
1129                                         hs->hs_name, bd.bd_bucket->hsb_index,
1130                                         bd.bd_offset, bd.bd_bucket->hsb_count);
1131                                /* can't assert key valicate, because we
1132                                 * can interrupt rehash */
1133                                cfs_hash_bd_del_locked(hs, &bd, hnode);
1134                                cfs_hash_exit(hs, hnode);
1135                        }
1136                }
1137                LASSERT(bd.bd_bucket->hsb_count == 0);
1138                cfs_hash_bd_unlock(hs, &bd, 1);
1139                cond_resched();
1140        }
1141
1142        LASSERT(atomic_read(&hs->hs_count) == 0);
1143
1144        cfs_hash_buckets_free(hs->hs_buckets, cfs_hash_bkt_size(hs),
1145                              0, CFS_HASH_NBKT(hs));
1146        i = cfs_hash_with_bigname(hs) ?
1147            CFS_HASH_BIGNAME_LEN : CFS_HASH_NAME_LEN;
1148        LIBCFS_FREE(hs, offsetof(struct cfs_hash, hs_name[i]));
1149}
1150
1151struct cfs_hash *cfs_hash_getref(struct cfs_hash *hs)
1152{
1153        if (atomic_inc_not_zero(&hs->hs_refcount))
1154                return hs;
1155        return NULL;
1156}
1157EXPORT_SYMBOL(cfs_hash_getref);
1158
1159void cfs_hash_putref(struct cfs_hash *hs)
1160{
1161        if (atomic_dec_and_test(&hs->hs_refcount))
1162                cfs_hash_destroy(hs);
1163}
1164EXPORT_SYMBOL(cfs_hash_putref);
1165
1166static inline int
1167cfs_hash_rehash_bits(struct cfs_hash *hs)
1168{
1169        if (cfs_hash_with_no_lock(hs) ||
1170            !cfs_hash_with_rehash(hs))
1171                return -EOPNOTSUPP;
1172
1173        if (unlikely(cfs_hash_is_exiting(hs)))
1174                return -ESRCH;
1175
1176        if (unlikely(cfs_hash_is_rehashing(hs)))
1177                return -EALREADY;
1178
1179        if (unlikely(cfs_hash_is_iterating(hs)))
1180                return -EAGAIN;
1181
1182        /* XXX: need to handle case with max_theta != 2.0
1183         *      and the case with min_theta != 0.5 */
1184        if ((hs->hs_cur_bits < hs->hs_max_bits) &&
1185            (__cfs_hash_theta(hs) > hs->hs_max_theta))
1186                return hs->hs_cur_bits + 1;
1187
1188        if (!cfs_hash_with_shrink(hs))
1189                return 0;
1190
1191        if ((hs->hs_cur_bits > hs->hs_min_bits) &&
1192            (__cfs_hash_theta(hs) < hs->hs_min_theta))
1193                return hs->hs_cur_bits - 1;
1194
1195        return 0;
1196}
1197
1198/**
1199 * don't allow inline rehash if:
1200 * - user wants non-blocking change (add/del) on hash table
1201 * - too many elements
1202 */
1203static inline int
1204cfs_hash_rehash_inline(struct cfs_hash *hs)
1205{
1206        return !cfs_hash_with_nblk_change(hs) &&
1207               atomic_read(&hs->hs_count) < CFS_HASH_LOOP_HOG;
1208}
1209
1210/**
1211 * Add item @hnode to libcfs hash @hs using @key.  The registered
1212 * ops->hs_get function will be called when the item is added.
1213 */
1214void
1215cfs_hash_add(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
1216{
1217        struct cfs_hash_bd   bd;
1218        int          bits;
1219
1220        LASSERT(hlist_unhashed(hnode));
1221
1222        cfs_hash_lock(hs, 0);
1223        cfs_hash_bd_get_and_lock(hs, key, &bd, 1);
1224
1225        cfs_hash_key_validate(hs, key, hnode);
1226        cfs_hash_bd_add_locked(hs, &bd, hnode);
1227
1228        cfs_hash_bd_unlock(hs, &bd, 1);
1229
1230        bits = cfs_hash_rehash_bits(hs);
1231        cfs_hash_unlock(hs, 0);
1232        if (bits > 0)
1233                cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
1234}
1235EXPORT_SYMBOL(cfs_hash_add);
1236
1237static struct hlist_node *
1238cfs_hash_find_or_add(struct cfs_hash *hs, const void *key,
1239                     struct hlist_node *hnode, int noref)
1240{
1241        struct hlist_node *ehnode;
1242        struct cfs_hash_bd     bds[2];
1243        int            bits = 0;
1244
1245        LASSERT(hlist_unhashed(hnode));
1246
1247        cfs_hash_lock(hs, 0);
1248        cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
1249
1250        cfs_hash_key_validate(hs, key, hnode);
1251        ehnode = cfs_hash_dual_bd_findadd_locked(hs, bds, key,
1252                                                 hnode, noref);
1253        cfs_hash_dual_bd_unlock(hs, bds, 1);
1254
1255        if (ehnode == hnode) /* new item added */
1256                bits = cfs_hash_rehash_bits(hs);
1257        cfs_hash_unlock(hs, 0);
1258        if (bits > 0)
1259                cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
1260
1261        return ehnode;
1262}
1263
1264/**
1265 * Add item @hnode to libcfs hash @hs using @key.  The registered
1266 * ops->hs_get function will be called if the item was added.
1267 * Returns 0 on success or -EALREADY on key collisions.
1268 */
1269int
1270cfs_hash_add_unique(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
1271{
1272        return cfs_hash_find_or_add(hs, key, hnode, 1) != hnode ?
1273               -EALREADY : 0;
1274}
1275EXPORT_SYMBOL(cfs_hash_add_unique);
1276
1277/**
1278 * Add item @hnode to libcfs hash @hs using @key.  If this @key
1279 * already exists in the hash then ops->hs_get will be called on the
1280 * conflicting entry and that entry will be returned to the caller.
1281 * Otherwise ops->hs_get is called on the item which was added.
1282 */
1283void *
1284cfs_hash_findadd_unique(struct cfs_hash *hs, const void *key,
1285                        struct hlist_node *hnode)
1286{
1287        hnode = cfs_hash_find_or_add(hs, key, hnode, 0);
1288
1289        return cfs_hash_object(hs, hnode);
1290}
1291EXPORT_SYMBOL(cfs_hash_findadd_unique);
1292
1293/**
1294 * Delete item @hnode from the libcfs hash @hs using @key.  The @key
1295 * is required to ensure the correct hash bucket is locked since there
1296 * is no direct linkage from the item to the bucket.  The object
1297 * removed from the hash will be returned and obs->hs_put is called
1298 * on the removed object.
1299 */
1300void *
1301cfs_hash_del(struct cfs_hash *hs, const void *key, struct hlist_node *hnode)
1302{
1303        void       *obj  = NULL;
1304        int          bits = 0;
1305        struct cfs_hash_bd   bds[2];
1306
1307        cfs_hash_lock(hs, 0);
1308        cfs_hash_dual_bd_get_and_lock(hs, key, bds, 1);
1309
1310        /* NB: do nothing if @hnode is not in hash table */
1311        if (hnode == NULL || !hlist_unhashed(hnode)) {
1312                if (bds[1].bd_bucket == NULL && hnode != NULL) {
1313                        cfs_hash_bd_del_locked(hs, &bds[0], hnode);
1314                } else {
1315                        hnode = cfs_hash_dual_bd_finddel_locked(hs, bds,
1316                                                                key, hnode);
1317                }
1318        }
1319
1320        if (hnode != NULL) {
1321                obj  = cfs_hash_object(hs, hnode);
1322                bits = cfs_hash_rehash_bits(hs);
1323        }
1324
1325        cfs_hash_dual_bd_unlock(hs, bds, 1);
1326        cfs_hash_unlock(hs, 0);
1327        if (bits > 0)
1328                cfs_hash_rehash(hs, cfs_hash_rehash_inline(hs));
1329
1330        return obj;
1331}
1332EXPORT_SYMBOL(cfs_hash_del);
1333
1334/**
1335 * Delete item given @key in libcfs hash @hs.  The first @key found in
1336 * the hash will be removed, if the key exists multiple times in the hash
1337 * @hs this function must be called once per key.  The removed object
1338 * will be returned and ops->hs_put is called on the removed object.
1339 */
1340void *
1341cfs_hash_del_key(struct cfs_hash *hs, const void *key)
1342{
1343        return cfs_hash_del(hs, key, NULL);
1344}
1345EXPORT_SYMBOL(cfs_hash_del_key);
1346
1347/**
1348 * Lookup an item using @key in the libcfs hash @hs and return it.
1349 * If the @key is found in the hash hs->hs_get() is called and the
1350 * matching objects is returned.  It is the callers responsibility
1351 * to call the counterpart ops->hs_put using the cfs_hash_put() macro
1352 * when when finished with the object.  If the @key was not found
1353 * in the hash @hs NULL is returned.
1354 */
1355void *
1356cfs_hash_lookup(struct cfs_hash *hs, const void *key)
1357{
1358        void             *obj = NULL;
1359        struct hlist_node     *hnode;
1360        struct cfs_hash_bd       bds[2];
1361
1362        cfs_hash_lock(hs, 0);
1363        cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
1364
1365        hnode = cfs_hash_dual_bd_lookup_locked(hs, bds, key);
1366        if (hnode != NULL)
1367                obj = cfs_hash_object(hs, hnode);
1368
1369        cfs_hash_dual_bd_unlock(hs, bds, 0);
1370        cfs_hash_unlock(hs, 0);
1371
1372        return obj;
1373}
1374EXPORT_SYMBOL(cfs_hash_lookup);
1375
1376static void
1377cfs_hash_for_each_enter(struct cfs_hash *hs) {
1378        LASSERT(!cfs_hash_is_exiting(hs));
1379
1380        if (!cfs_hash_with_rehash(hs))
1381                return;
1382        /*
1383         * NB: it's race on cfs_has_t::hs_iterating, but doesn't matter
1384         * because it's just an unreliable signal to rehash-thread,
1385         * rehash-thread will try to finish rehash ASAP when seeing this.
1386         */
1387        hs->hs_iterating = 1;
1388
1389        cfs_hash_lock(hs, 1);
1390        hs->hs_iterators++;
1391
1392        /* NB: iteration is mostly called by service thread,
1393         * we tend to cancel pending rehash-request, instead of
1394         * blocking service thread, we will relaunch rehash request
1395         * after iteration */
1396        if (cfs_hash_is_rehashing(hs))
1397                cfs_hash_rehash_cancel_locked(hs);
1398        cfs_hash_unlock(hs, 1);
1399}
1400
1401static void
1402cfs_hash_for_each_exit(struct cfs_hash *hs) {
1403        int remained;
1404        int bits;
1405
1406        if (!cfs_hash_with_rehash(hs))
1407                return;
1408        cfs_hash_lock(hs, 1);
1409        remained = --hs->hs_iterators;
1410        bits = cfs_hash_rehash_bits(hs);
1411        cfs_hash_unlock(hs, 1);
1412        /* NB: it's race on cfs_has_t::hs_iterating, see above */
1413        if (remained == 0)
1414                hs->hs_iterating = 0;
1415        if (bits > 0) {
1416                cfs_hash_rehash(hs, atomic_read(&hs->hs_count) <
1417                                    CFS_HASH_LOOP_HOG);
1418        }
1419}
1420
1421/**
1422 * For each item in the libcfs hash @hs call the passed callback @func
1423 * and pass to it as an argument each hash item and the private @data.
1424 *
1425 * a) the function may sleep!
1426 * b) during the callback:
1427 *    . the bucket lock is held so the callback must never sleep.
1428 *    . if @removal_safe is true, use can remove current item by
1429 *      cfs_hash_bd_del_locked
1430 */
1431static __u64
1432cfs_hash_for_each_tight(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
1433                        void *data, int remove_safe) {
1434        struct hlist_node     *hnode;
1435        struct hlist_node     *pos;
1436        struct cfs_hash_bd       bd;
1437        __u64            count = 0;
1438        int                excl  = !!remove_safe;
1439        int                loop  = 0;
1440        int                i;
1441
1442        cfs_hash_for_each_enter(hs);
1443
1444        cfs_hash_lock(hs, 0);
1445        LASSERT(!cfs_hash_is_rehashing(hs));
1446
1447        cfs_hash_for_each_bucket(hs, &bd, i) {
1448                struct hlist_head *hhead;
1449
1450                cfs_hash_bd_lock(hs, &bd, excl);
1451                if (func == NULL) { /* only glimpse size */
1452                        count += bd.bd_bucket->hsb_count;
1453                        cfs_hash_bd_unlock(hs, &bd, excl);
1454                        continue;
1455                }
1456
1457                cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
1458                        hlist_for_each_safe(hnode, pos, hhead) {
1459                                cfs_hash_bucket_validate(hs, &bd, hnode);
1460                                count++;
1461                                loop++;
1462                                if (func(hs, &bd, hnode, data)) {
1463                                        cfs_hash_bd_unlock(hs, &bd, excl);
1464                                        goto out;
1465                                }
1466                        }
1467                }
1468                cfs_hash_bd_unlock(hs, &bd, excl);
1469                if (loop < CFS_HASH_LOOP_HOG)
1470                        continue;
1471                loop = 0;
1472                cfs_hash_unlock(hs, 0);
1473                cond_resched();
1474                cfs_hash_lock(hs, 0);
1475        }
1476 out:
1477        cfs_hash_unlock(hs, 0);
1478
1479        cfs_hash_for_each_exit(hs);
1480        return count;
1481}
1482
1483typedef struct {
1484        cfs_hash_cond_opt_cb_t  func;
1485        void               *arg;
1486} cfs_hash_cond_arg_t;
1487
1488static int
1489cfs_hash_cond_del_locked(struct cfs_hash *hs, struct cfs_hash_bd *bd,
1490                         struct hlist_node *hnode, void *data)
1491{
1492        cfs_hash_cond_arg_t *cond = data;
1493
1494        if (cond->func(cfs_hash_object(hs, hnode), cond->arg))
1495                cfs_hash_bd_del_locked(hs, bd, hnode);
1496        return 0;
1497}
1498
1499/**
1500 * Delete item from the libcfs hash @hs when @func return true.
1501 * The write lock being hold during loop for each bucket to avoid
1502 * any object be reference.
1503 */
1504void
1505cfs_hash_cond_del(struct cfs_hash *hs, cfs_hash_cond_opt_cb_t func, void *data)
1506{
1507        cfs_hash_cond_arg_t arg = {
1508                .func   = func,
1509                .arg    = data,
1510        };
1511
1512        cfs_hash_for_each_tight(hs, cfs_hash_cond_del_locked, &arg, 1);
1513}
1514EXPORT_SYMBOL(cfs_hash_cond_del);
1515
1516void
1517cfs_hash_for_each(struct cfs_hash *hs,
1518                  cfs_hash_for_each_cb_t func, void *data)
1519{
1520        cfs_hash_for_each_tight(hs, func, data, 0);
1521}
1522EXPORT_SYMBOL(cfs_hash_for_each);
1523
1524void
1525cfs_hash_for_each_safe(struct cfs_hash *hs,
1526                       cfs_hash_for_each_cb_t func, void *data) {
1527        cfs_hash_for_each_tight(hs, func, data, 1);
1528}
1529EXPORT_SYMBOL(cfs_hash_for_each_safe);
1530
1531static int
1532cfs_hash_peek(struct cfs_hash *hs, struct cfs_hash_bd *bd,
1533              struct hlist_node *hnode, void *data)
1534{
1535        *(int *)data = 0;
1536        return 1; /* return 1 to break the loop */
1537}
1538
1539int
1540cfs_hash_is_empty(struct cfs_hash *hs)
1541{
1542        int empty = 1;
1543
1544        cfs_hash_for_each_tight(hs, cfs_hash_peek, &empty, 0);
1545        return empty;
1546}
1547EXPORT_SYMBOL(cfs_hash_is_empty);
1548
1549__u64
1550cfs_hash_size_get(struct cfs_hash *hs)
1551{
1552        return cfs_hash_with_counter(hs) ?
1553               atomic_read(&hs->hs_count) :
1554               cfs_hash_for_each_tight(hs, NULL, NULL, 0);
1555}
1556EXPORT_SYMBOL(cfs_hash_size_get);
1557
1558/*
1559 * cfs_hash_for_each_relax:
1560 * Iterate the hash table and call @func on each item without
1561 * any lock. This function can't guarantee to finish iteration
1562 * if these features are enabled:
1563 *
1564 *  a. if rehash_key is enabled, an item can be moved from
1565 *     one bucket to another bucket
1566 *  b. user can remove non-zero-ref item from hash-table,
1567 *     so the item can be removed from hash-table, even worse,
1568 *     it's possible that user changed key and insert to another
1569 *     hash bucket.
1570 * there's no way for us to finish iteration correctly on previous
1571 * two cases, so iteration has to be stopped on change.
1572 */
1573static int
1574cfs_hash_for_each_relax(struct cfs_hash *hs, cfs_hash_for_each_cb_t func,
1575                        void *data) {
1576        struct hlist_node *hnode;
1577        struct hlist_node *tmp;
1578        struct cfs_hash_bd     bd;
1579        __u32        version;
1580        int            count = 0;
1581        int            stop_on_change;
1582        int            rc;
1583        int            i;
1584
1585        stop_on_change = cfs_hash_with_rehash_key(hs) ||
1586                         !cfs_hash_with_no_itemref(hs) ||
1587                         hs->hs_ops->hs_put_locked == NULL;
1588        cfs_hash_lock(hs, 0);
1589        LASSERT(!cfs_hash_is_rehashing(hs));
1590
1591        cfs_hash_for_each_bucket(hs, &bd, i) {
1592                struct hlist_head *hhead;
1593
1594                cfs_hash_bd_lock(hs, &bd, 0);
1595                version = cfs_hash_bd_version_get(&bd);
1596
1597                cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
1598                        for (hnode = hhead->first; hnode != NULL;) {
1599                                cfs_hash_bucket_validate(hs, &bd, hnode);
1600                                cfs_hash_get(hs, hnode);
1601                                cfs_hash_bd_unlock(hs, &bd, 0);
1602                                cfs_hash_unlock(hs, 0);
1603
1604                                rc = func(hs, &bd, hnode, data);
1605                                if (stop_on_change)
1606                                        cfs_hash_put(hs, hnode);
1607                                cond_resched();
1608                                count++;
1609
1610                                cfs_hash_lock(hs, 0);
1611                                cfs_hash_bd_lock(hs, &bd, 0);
1612                                if (!stop_on_change) {
1613                                        tmp = hnode->next;
1614                                        cfs_hash_put_locked(hs, hnode);
1615                                        hnode = tmp;
1616                                } else { /* bucket changed? */
1617                                        if (version !=
1618                                            cfs_hash_bd_version_get(&bd))
1619                                                break;
1620                                        /* safe to continue because no change */
1621                                        hnode = hnode->next;
1622                                }
1623                                if (rc) /* callback wants to break iteration */
1624                                        break;
1625                        }
1626                }
1627                cfs_hash_bd_unlock(hs, &bd, 0);
1628        }
1629        cfs_hash_unlock(hs, 0);
1630
1631        return count;
1632}
1633
1634int
1635cfs_hash_for_each_nolock(struct cfs_hash *hs,
1636                         cfs_hash_for_each_cb_t func, void *data) {
1637        if (cfs_hash_with_no_lock(hs) ||
1638            cfs_hash_with_rehash_key(hs) ||
1639            !cfs_hash_with_no_itemref(hs))
1640                return -EOPNOTSUPP;
1641
1642        if (hs->hs_ops->hs_get == NULL ||
1643            (hs->hs_ops->hs_put == NULL &&
1644             hs->hs_ops->hs_put_locked == NULL))
1645                return -EOPNOTSUPP;
1646
1647        cfs_hash_for_each_enter(hs);
1648        cfs_hash_for_each_relax(hs, func, data);
1649        cfs_hash_for_each_exit(hs);
1650
1651        return 0;
1652}
1653EXPORT_SYMBOL(cfs_hash_for_each_nolock);
1654
1655/**
1656 * For each hash bucket in the libcfs hash @hs call the passed callback
1657 * @func until all the hash buckets are empty.  The passed callback @func
1658 * or the previously registered callback hs->hs_put must remove the item
1659 * from the hash.  You may either use the cfs_hash_del() or hlist_del()
1660 * functions.  No rwlocks will be held during the callback @func it is
1661 * safe to sleep if needed.  This function will not terminate until the
1662 * hash is empty.  Note it is still possible to concurrently add new
1663 * items in to the hash.  It is the callers responsibility to ensure
1664 * the required locking is in place to prevent concurrent insertions.
1665 */
1666int
1667cfs_hash_for_each_empty(struct cfs_hash *hs,
1668                        cfs_hash_for_each_cb_t func, void *data) {
1669        unsigned  i = 0;
1670
1671        if (cfs_hash_with_no_lock(hs))
1672                return -EOPNOTSUPP;
1673
1674        if (hs->hs_ops->hs_get == NULL ||
1675            (hs->hs_ops->hs_put == NULL &&
1676             hs->hs_ops->hs_put_locked == NULL))
1677                return -EOPNOTSUPP;
1678
1679        cfs_hash_for_each_enter(hs);
1680        while (cfs_hash_for_each_relax(hs, func, data)) {
1681                CDEBUG(D_INFO, "Try to empty hash: %s, loop: %u\n",
1682                       hs->hs_name, i++);
1683        }
1684        cfs_hash_for_each_exit(hs);
1685        return 0;
1686}
1687EXPORT_SYMBOL(cfs_hash_for_each_empty);
1688
1689void
1690cfs_hash_hlist_for_each(struct cfs_hash *hs, unsigned hindex,
1691                        cfs_hash_for_each_cb_t func, void *data)
1692{
1693        struct hlist_head   *hhead;
1694        struct hlist_node   *hnode;
1695        struct cfs_hash_bd       bd;
1696
1697        cfs_hash_for_each_enter(hs);
1698        cfs_hash_lock(hs, 0);
1699        if (hindex >= CFS_HASH_NHLIST(hs))
1700                goto out;
1701
1702        cfs_hash_bd_index_set(hs, hindex, &bd);
1703
1704        cfs_hash_bd_lock(hs, &bd, 0);
1705        hhead = cfs_hash_bd_hhead(hs, &bd);
1706        hlist_for_each(hnode, hhead) {
1707                if (func(hs, &bd, hnode, data))
1708                        break;
1709        }
1710        cfs_hash_bd_unlock(hs, &bd, 0);
1711 out:
1712        cfs_hash_unlock(hs, 0);
1713        cfs_hash_for_each_exit(hs);
1714}
1715
1716EXPORT_SYMBOL(cfs_hash_hlist_for_each);
1717
1718/*
1719 * For each item in the libcfs hash @hs which matches the @key call
1720 * the passed callback @func and pass to it as an argument each hash
1721 * item and the private @data. During the callback the bucket lock
1722 * is held so the callback must never sleep.
1723   */
1724void
1725cfs_hash_for_each_key(struct cfs_hash *hs, const void *key,
1726                      cfs_hash_for_each_cb_t func, void *data) {
1727        struct hlist_node   *hnode;
1728        struct cfs_hash_bd       bds[2];
1729        unsigned            i;
1730
1731        cfs_hash_lock(hs, 0);
1732
1733        cfs_hash_dual_bd_get_and_lock(hs, key, bds, 0);
1734
1735        cfs_hash_for_each_bd(bds, 2, i) {
1736                struct hlist_head *hlist = cfs_hash_bd_hhead(hs, &bds[i]);
1737
1738                hlist_for_each(hnode, hlist) {
1739                        cfs_hash_bucket_validate(hs, &bds[i], hnode);
1740
1741                        if (cfs_hash_keycmp(hs, key, hnode)) {
1742                                if (func(hs, &bds[i], hnode, data))
1743                                        break;
1744                        }
1745                }
1746        }
1747
1748        cfs_hash_dual_bd_unlock(hs, bds, 0);
1749        cfs_hash_unlock(hs, 0);
1750}
1751EXPORT_SYMBOL(cfs_hash_for_each_key);
1752
1753/**
1754 * Rehash the libcfs hash @hs to the given @bits.  This can be used
1755 * to grow the hash size when excessive chaining is detected, or to
1756 * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
1757 * flag is set in @hs the libcfs hash may be dynamically rehashed
1758 * during addition or removal if the hash's theta value exceeds
1759 * either the hs->hs_min_theta or hs->max_theta values.  By default
1760 * these values are tuned to keep the chained hash depth small, and
1761 * this approach assumes a reasonably uniform hashing function.  The
1762 * theta thresholds for @hs are tunable via cfs_hash_set_theta().
1763 */
1764void
1765cfs_hash_rehash_cancel_locked(struct cfs_hash *hs)
1766{
1767        int     i;
1768
1769        /* need hold cfs_hash_lock(hs, 1) */
1770        LASSERT(cfs_hash_with_rehash(hs) &&
1771                !cfs_hash_with_no_lock(hs));
1772
1773        if (!cfs_hash_is_rehashing(hs))
1774                return;
1775
1776        if (cfs_wi_deschedule(cfs_sched_rehash, &hs->hs_rehash_wi)) {
1777                hs->hs_rehash_bits = 0;
1778                return;
1779        }
1780
1781        for (i = 2; cfs_hash_is_rehashing(hs); i++) {
1782                cfs_hash_unlock(hs, 1);
1783                /* raise console warning while waiting too long */
1784                CDEBUG(IS_PO2(i >> 3) ? D_WARNING : D_INFO,
1785                       "hash %s is still rehashing, rescheded %d\n",
1786                       hs->hs_name, i - 1);
1787                cond_resched();
1788                cfs_hash_lock(hs, 1);
1789        }
1790}
1791EXPORT_SYMBOL(cfs_hash_rehash_cancel_locked);
1792
1793void
1794cfs_hash_rehash_cancel(struct cfs_hash *hs)
1795{
1796        cfs_hash_lock(hs, 1);
1797        cfs_hash_rehash_cancel_locked(hs);
1798        cfs_hash_unlock(hs, 1);
1799}
1800EXPORT_SYMBOL(cfs_hash_rehash_cancel);
1801
1802int
1803cfs_hash_rehash(struct cfs_hash *hs, int do_rehash)
1804{
1805        int     rc;
1806
1807        LASSERT(cfs_hash_with_rehash(hs) && !cfs_hash_with_no_lock(hs));
1808
1809        cfs_hash_lock(hs, 1);
1810
1811        rc = cfs_hash_rehash_bits(hs);
1812        if (rc <= 0) {
1813                cfs_hash_unlock(hs, 1);
1814                return rc;
1815        }
1816
1817        hs->hs_rehash_bits = rc;
1818        if (!do_rehash) {
1819                /* launch and return */
1820                cfs_wi_schedule(cfs_sched_rehash, &hs->hs_rehash_wi);
1821                cfs_hash_unlock(hs, 1);
1822                return 0;
1823        }
1824
1825        /* rehash right now */
1826        cfs_hash_unlock(hs, 1);
1827
1828        return cfs_hash_rehash_worker(&hs->hs_rehash_wi);
1829}
1830EXPORT_SYMBOL(cfs_hash_rehash);
1831
1832static int
1833cfs_hash_rehash_bd(struct cfs_hash *hs, struct cfs_hash_bd *old)
1834{
1835        struct cfs_hash_bd      new;
1836        struct hlist_head  *hhead;
1837        struct hlist_node  *hnode;
1838        struct hlist_node  *pos;
1839        void          *key;
1840        int             c = 0;
1841
1842        /* hold cfs_hash_lock(hs, 1), so don't need any bucket lock */
1843        cfs_hash_bd_for_each_hlist(hs, old, hhead) {
1844                hlist_for_each_safe(hnode, pos, hhead) {
1845                        key = cfs_hash_key(hs, hnode);
1846                        LASSERT(key != NULL);
1847                        /* Validate hnode is in the correct bucket. */
1848                        cfs_hash_bucket_validate(hs, old, hnode);
1849                        /*
1850                         * Delete from old hash bucket; move to new bucket.
1851                         * ops->hs_key must be defined.
1852                         */
1853                        cfs_hash_bd_from_key(hs, hs->hs_rehash_buckets,
1854                                             hs->hs_rehash_bits, key, &new);
1855                        cfs_hash_bd_move_locked(hs, old, &new, hnode);
1856                        c++;
1857                }
1858        }
1859
1860        return c;
1861}
1862
1863static int
1864cfs_hash_rehash_worker(cfs_workitem_t *wi)
1865{
1866        struct cfs_hash  *hs = container_of(wi, struct cfs_hash, hs_rehash_wi);
1867        struct cfs_hash_bucket **bkts;
1868        struct cfs_hash_bd       bd;
1869        unsigned int    old_size;
1870        unsigned int    new_size;
1871        int              bsize;
1872        int              count = 0;
1873        int              rc = 0;
1874        int              i;
1875
1876        LASSERT (hs != NULL && cfs_hash_with_rehash(hs));
1877
1878        cfs_hash_lock(hs, 0);
1879        LASSERT(cfs_hash_is_rehashing(hs));
1880
1881        old_size = CFS_HASH_NBKT(hs);
1882        new_size = CFS_HASH_RH_NBKT(hs);
1883
1884        cfs_hash_unlock(hs, 0);
1885
1886        /*
1887         * don't need hs::hs_rwlock for hs::hs_buckets,
1888         * because nobody can change bkt-table except me.
1889         */
1890        bkts = cfs_hash_buckets_realloc(hs, hs->hs_buckets,
1891                                        old_size, new_size);
1892        cfs_hash_lock(hs, 1);
1893        if (bkts == NULL) {
1894                rc = -ENOMEM;
1895                goto out;
1896        }
1897
1898        if (bkts == hs->hs_buckets) {
1899                bkts = NULL; /* do nothing */
1900                goto out;
1901        }
1902
1903        rc = __cfs_hash_theta(hs);
1904        if ((rc >= hs->hs_min_theta) && (rc <= hs->hs_max_theta)) {
1905                /* free the new allocated bkt-table */
1906                old_size = new_size;
1907                new_size = CFS_HASH_NBKT(hs);
1908                rc = -EALREADY;
1909                goto out;
1910        }
1911
1912        LASSERT(hs->hs_rehash_buckets == NULL);
1913        hs->hs_rehash_buckets = bkts;
1914
1915        rc = 0;
1916        cfs_hash_for_each_bucket(hs, &bd, i) {
1917                if (cfs_hash_is_exiting(hs)) {
1918                        rc = -ESRCH;
1919                        /* someone wants to destroy the hash, abort now */
1920                        if (old_size < new_size) /* OK to free old bkt-table */
1921                                break;
1922                        /* it's shrinking, need free new bkt-table */
1923                        hs->hs_rehash_buckets = NULL;
1924                        old_size = new_size;
1925                        new_size = CFS_HASH_NBKT(hs);
1926                        goto out;
1927                }
1928
1929                count += cfs_hash_rehash_bd(hs, &bd);
1930                if (count < CFS_HASH_LOOP_HOG ||
1931                    cfs_hash_is_iterating(hs)) { /* need to finish ASAP */
1932                        continue;
1933                }
1934
1935                count = 0;
1936                cfs_hash_unlock(hs, 1);
1937                cond_resched();
1938                cfs_hash_lock(hs, 1);
1939        }
1940
1941        hs->hs_rehash_count++;
1942
1943        bkts = hs->hs_buckets;
1944        hs->hs_buckets = hs->hs_rehash_buckets;
1945        hs->hs_rehash_buckets = NULL;
1946
1947        hs->hs_cur_bits = hs->hs_rehash_bits;
1948 out:
1949        hs->hs_rehash_bits = 0;
1950        if (rc == -ESRCH) /* never be scheduled again */
1951                cfs_wi_exit(cfs_sched_rehash, wi);
1952        bsize = cfs_hash_bkt_size(hs);
1953        cfs_hash_unlock(hs, 1);
1954        /* can't refer to @hs anymore because it could be destroyed */
1955        if (bkts != NULL)
1956                cfs_hash_buckets_free(bkts, bsize, new_size, old_size);
1957        if (rc != 0)
1958                CDEBUG(D_INFO, "early quit of rehashing: %d\n", rc);
1959        /* return 1 only if cfs_wi_exit is called */
1960        return rc == -ESRCH;
1961}
1962
1963/**
1964 * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
1965 * @old_key must be provided to locate the objects previous location
1966 * in the hash, and the @new_key will be used to reinsert the object.
1967 * Use this function instead of a cfs_hash_add() + cfs_hash_del()
1968 * combo when it is critical that there is no window in time where the
1969 * object is missing from the hash.  When an object is being rehashed
1970 * the registered cfs_hash_get() and cfs_hash_put() functions will
1971 * not be called.
1972 */
1973void cfs_hash_rehash_key(struct cfs_hash *hs, const void *old_key,
1974                         void *new_key, struct hlist_node *hnode)
1975{
1976        struct cfs_hash_bd      bds[3];
1977        struct cfs_hash_bd      old_bds[2];
1978        struct cfs_hash_bd      new_bd;
1979
1980        LASSERT(!hlist_unhashed(hnode));
1981
1982        cfs_hash_lock(hs, 0);
1983
1984        cfs_hash_dual_bd_get(hs, old_key, old_bds);
1985        cfs_hash_bd_get(hs, new_key, &new_bd);
1986
1987        bds[0] = old_bds[0];
1988        bds[1] = old_bds[1];
1989        bds[2] = new_bd;
1990
1991        /* NB: bds[0] and bds[1] are ordered already */
1992        cfs_hash_bd_order(&bds[1], &bds[2]);
1993        cfs_hash_bd_order(&bds[0], &bds[1]);
1994
1995        cfs_hash_multi_bd_lock(hs, bds, 3, 1);
1996        if (likely(old_bds[1].bd_bucket == NULL)) {
1997                cfs_hash_bd_move_locked(hs, &old_bds[0], &new_bd, hnode);
1998        } else {
1999                cfs_hash_dual_bd_finddel_locked(hs, old_bds, old_key, hnode);
2000                cfs_hash_bd_add_locked(hs, &new_bd, hnode);
2001        }
2002        /* overwrite key inside locks, otherwise may screw up with
2003         * other operations, i.e: rehash */
2004        cfs_hash_keycpy(hs, new_key, hnode);
2005
2006        cfs_hash_multi_bd_unlock(hs, bds, 3, 1);
2007        cfs_hash_unlock(hs, 0);
2008}
2009EXPORT_SYMBOL(cfs_hash_rehash_key);
2010
2011void cfs_hash_debug_header(struct seq_file *m)
2012{
2013        seq_printf(m, "%-*s   cur   min   max theta t-min t-max flags rehash   count  maxdep maxdepb distribution\n",
2014                   CFS_HASH_BIGNAME_LEN, "name");
2015}
2016EXPORT_SYMBOL(cfs_hash_debug_header);
2017
2018static struct cfs_hash_bucket **
2019cfs_hash_full_bkts(struct cfs_hash *hs)
2020{
2021        /* NB: caller should hold hs->hs_rwlock if REHASH is set */
2022        if (hs->hs_rehash_buckets == NULL)
2023                return hs->hs_buckets;
2024
2025        LASSERT(hs->hs_rehash_bits != 0);
2026        return hs->hs_rehash_bits > hs->hs_cur_bits ?
2027               hs->hs_rehash_buckets : hs->hs_buckets;
2028}
2029
2030static unsigned int
2031cfs_hash_full_nbkt(struct cfs_hash *hs)
2032{
2033        /* NB: caller should hold hs->hs_rwlock if REHASH is set */
2034        if (hs->hs_rehash_buckets == NULL)
2035                return CFS_HASH_NBKT(hs);
2036
2037        LASSERT(hs->hs_rehash_bits != 0);
2038        return hs->hs_rehash_bits > hs->hs_cur_bits ?
2039               CFS_HASH_RH_NBKT(hs) : CFS_HASH_NBKT(hs);
2040}
2041
2042void cfs_hash_debug_str(struct cfs_hash *hs, struct seq_file *m)
2043{
2044        int                 dist[8] = { 0, };
2045        int                 maxdep  = -1;
2046        int                 maxdepb = -1;
2047        int                 total   = 0;
2048        int                 theta;
2049        int                 i;
2050
2051        cfs_hash_lock(hs, 0);
2052        theta = __cfs_hash_theta(hs);
2053
2054        seq_printf(m, "%-*s %5d %5d %5d %d.%03d %d.%03d %d.%03d  0x%02x %6d ",
2055                      CFS_HASH_BIGNAME_LEN, hs->hs_name,
2056                      1 << hs->hs_cur_bits, 1 << hs->hs_min_bits,
2057                      1 << hs->hs_max_bits,
2058                      __cfs_hash_theta_int(theta), __cfs_hash_theta_frac(theta),
2059                      __cfs_hash_theta_int(hs->hs_min_theta),
2060                      __cfs_hash_theta_frac(hs->hs_min_theta),
2061                      __cfs_hash_theta_int(hs->hs_max_theta),
2062                      __cfs_hash_theta_frac(hs->hs_max_theta),
2063                      hs->hs_flags, hs->hs_rehash_count);
2064
2065        /*
2066         * The distribution is a summary of the chained hash depth in
2067         * each of the libcfs hash buckets.  Each buckets hsb_count is
2068         * divided by the hash theta value and used to generate a
2069         * histogram of the hash distribution.  A uniform hash will
2070         * result in all hash buckets being close to the average thus
2071         * only the first few entries in the histogram will be non-zero.
2072         * If you hash function results in a non-uniform hash the will
2073         * be observable by outlier bucks in the distribution histogram.
2074         *
2075         * Uniform hash distribution:      128/128/0/0/0/0/0/0
2076         * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
2077         */
2078        for (i = 0; i < cfs_hash_full_nbkt(hs); i++) {
2079                struct cfs_hash_bd  bd;
2080
2081                bd.bd_bucket = cfs_hash_full_bkts(hs)[i];
2082                cfs_hash_bd_lock(hs, &bd, 0);
2083                if (maxdep < bd.bd_bucket->hsb_depmax) {
2084                        maxdep  = bd.bd_bucket->hsb_depmax;
2085                        maxdepb = ffz(~maxdep);
2086                }
2087                total += bd.bd_bucket->hsb_count;
2088                dist[min(fls(bd.bd_bucket->hsb_count / max(theta, 1)), 7)]++;
2089                cfs_hash_bd_unlock(hs, &bd, 0);
2090        }
2091
2092        seq_printf(m, "%7d %7d %7d ", total, maxdep, maxdepb);
2093        for (i = 0; i < 8; i++)
2094                seq_printf(m, "%d%c",  dist[i], (i == 7) ? '\n' : '/');
2095
2096        cfs_hash_unlock(hs, 0);
2097}
2098EXPORT_SYMBOL(cfs_hash_debug_str);
2099