1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/atomic.h>
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/log2.h>
21#include <linux/sched.h>
22#include <linux/slab.h>
23#include <linux/vmalloc.h>
24#include <linux/mm.h>
25#include <linux/jhash.h>
26#include <linux/random.h>
27#include <linux/rhashtable.h>
28#include <linux/err.h>
29#include <linux/export.h>
30
31#define HASH_DEFAULT_SIZE 64UL
32#define HASH_MIN_SIZE 4U
33#define BUCKET_LOCKS_PER_CPU 128UL
34
35static u32 head_hashfn(struct rhashtable *ht,
36 const struct bucket_table *tbl,
37 const struct rhash_head *he)
38{
39 return rht_head_hashfn(ht, tbl, he, ht->p);
40}
41
42#ifdef CONFIG_PROVE_LOCKING
43#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
44
45int lockdep_rht_mutex_is_held(struct rhashtable *ht)
46{
47 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
48}
49EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
50
51int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
52{
53 spinlock_t *lock = rht_bucket_lock(tbl, hash);
54
55 return (debug_locks) ? lockdep_is_held(lock) : 1;
56}
57EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
58#else
59#define ASSERT_RHT_MUTEX(HT)
60#endif
61
62
63static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
64 gfp_t gfp)
65{
66 unsigned int i, size;
67#if defined(CONFIG_PROVE_LOCKING)
68 unsigned int nr_pcpus = 2;
69#else
70 unsigned int nr_pcpus = num_possible_cpus();
71#endif
72
73 nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
74 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
75
76
77 size = min_t(unsigned int, size, tbl->size >> 1);
78
79 if (sizeof(spinlock_t) != 0) {
80#ifdef CONFIG_NUMA
81 if (size * sizeof(spinlock_t) > PAGE_SIZE &&
82 gfp == GFP_KERNEL)
83 tbl->locks = vmalloc(size * sizeof(spinlock_t));
84 else
85#endif
86 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
87 gfp);
88 if (!tbl->locks)
89 return -ENOMEM;
90 for (i = 0; i < size; i++)
91 spin_lock_init(&tbl->locks[i]);
92 }
93 tbl->locks_mask = size - 1;
94
95 return 0;
96}
97
98static void bucket_table_free(const struct bucket_table *tbl)
99{
100 if (tbl)
101 kvfree(tbl->locks);
102
103 kvfree(tbl);
104}
105
106static void bucket_table_free_rcu(struct rcu_head *head)
107{
108 bucket_table_free(container_of(head, struct bucket_table, rcu));
109}
110
111static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
112 size_t nbuckets,
113 gfp_t gfp)
114{
115 struct bucket_table *tbl = NULL;
116 size_t size;
117 int i;
118
119 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
120 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
121 gfp != GFP_KERNEL)
122 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
123 if (tbl == NULL && gfp == GFP_KERNEL)
124 tbl = vzalloc(size);
125 if (tbl == NULL)
126 return NULL;
127
128 tbl->size = nbuckets;
129
130 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
131 bucket_table_free(tbl);
132 return NULL;
133 }
134
135 INIT_LIST_HEAD(&tbl->walkers);
136
137 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
138
139 for (i = 0; i < nbuckets; i++)
140 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
141
142 return tbl;
143}
144
145static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
146 struct bucket_table *tbl)
147{
148 struct bucket_table *new_tbl;
149
150 do {
151 new_tbl = tbl;
152 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
153 } while (tbl);
154
155 return new_tbl;
156}
157
158static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
159{
160 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
161 struct bucket_table *new_tbl = rhashtable_last_table(ht,
162 rht_dereference_rcu(old_tbl->future_tbl, ht));
163 struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
164 int err = -ENOENT;
165 struct rhash_head *head, *next, *entry;
166 spinlock_t *new_bucket_lock;
167 unsigned int new_hash;
168
169 rht_for_each(entry, old_tbl, old_hash) {
170 err = 0;
171 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
172
173 if (rht_is_a_nulls(next))
174 break;
175
176 pprev = &entry->next;
177 }
178
179 if (err)
180 goto out;
181
182 new_hash = head_hashfn(ht, new_tbl, entry);
183
184 new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
185
186 spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
187 head = rht_dereference_bucket(new_tbl->buckets[new_hash],
188 new_tbl, new_hash);
189
190 RCU_INIT_POINTER(entry->next, head);
191
192 rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
193 spin_unlock(new_bucket_lock);
194
195 rcu_assign_pointer(*pprev, next);
196
197out:
198 return err;
199}
200
201static void rhashtable_rehash_chain(struct rhashtable *ht,
202 unsigned int old_hash)
203{
204 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
205 spinlock_t *old_bucket_lock;
206
207 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
208
209 spin_lock_bh(old_bucket_lock);
210 while (!rhashtable_rehash_one(ht, old_hash))
211 ;
212 old_tbl->rehash++;
213 spin_unlock_bh(old_bucket_lock);
214}
215
216static int rhashtable_rehash_attach(struct rhashtable *ht,
217 struct bucket_table *old_tbl,
218 struct bucket_table *new_tbl)
219{
220
221 spin_lock_bh(old_tbl->locks);
222
223
224 if (rcu_access_pointer(old_tbl->future_tbl)) {
225 spin_unlock_bh(old_tbl->locks);
226 return -EEXIST;
227 }
228
229
230
231
232 rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
233
234 spin_unlock_bh(old_tbl->locks);
235
236 return 0;
237}
238
239static int rhashtable_rehash_table(struct rhashtable *ht)
240{
241 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
242 struct bucket_table *new_tbl;
243 struct rhashtable_walker *walker;
244 unsigned int old_hash;
245
246 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
247 if (!new_tbl)
248 return 0;
249
250 for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
251 rhashtable_rehash_chain(ht, old_hash);
252
253
254 rcu_assign_pointer(ht->tbl, new_tbl);
255
256 spin_lock(&ht->lock);
257 list_for_each_entry(walker, &old_tbl->walkers, list)
258 walker->tbl = NULL;
259 spin_unlock(&ht->lock);
260
261
262
263
264
265 call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
266
267 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
268}
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285static int rhashtable_expand(struct rhashtable *ht)
286{
287 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
288 int err;
289
290 ASSERT_RHT_MUTEX(ht);
291
292 old_tbl = rhashtable_last_table(ht, old_tbl);
293
294 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
295 if (new_tbl == NULL)
296 return -ENOMEM;
297
298 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
299 if (err)
300 bucket_table_free(new_tbl);
301
302 return err;
303}
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321static int rhashtable_shrink(struct rhashtable *ht)
322{
323 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
324 unsigned int size;
325 int err;
326
327 ASSERT_RHT_MUTEX(ht);
328
329 size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
330 if (size < ht->p.min_size)
331 size = ht->p.min_size;
332
333 if (old_tbl->size <= size)
334 return 0;
335
336 if (rht_dereference(old_tbl->future_tbl, ht))
337 return -EEXIST;
338
339 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
340 if (new_tbl == NULL)
341 return -ENOMEM;
342
343 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
344 if (err)
345 bucket_table_free(new_tbl);
346
347 return err;
348}
349
350static void rht_deferred_worker(struct work_struct *work)
351{
352 struct rhashtable *ht;
353 struct bucket_table *tbl;
354 int err = 0;
355
356 ht = container_of(work, struct rhashtable, run_work);
357 mutex_lock(&ht->mutex);
358
359 tbl = rht_dereference(ht->tbl, ht);
360 tbl = rhashtable_last_table(ht, tbl);
361
362 if (rht_grow_above_75(ht, tbl))
363 rhashtable_expand(ht);
364 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
365 rhashtable_shrink(ht);
366
367 err = rhashtable_rehash_table(ht);
368
369 mutex_unlock(&ht->mutex);
370
371 if (err)
372 schedule_work(&ht->run_work);
373}
374
375static bool rhashtable_check_elasticity(struct rhashtable *ht,
376 struct bucket_table *tbl,
377 unsigned int hash)
378{
379 unsigned int elasticity = ht->elasticity;
380 struct rhash_head *head;
381
382 rht_for_each(head, tbl, hash)
383 if (!--elasticity)
384 return true;
385
386 return false;
387}
388
389int rhashtable_insert_rehash(struct rhashtable *ht,
390 struct bucket_table *tbl)
391{
392 struct bucket_table *old_tbl;
393 struct bucket_table *new_tbl;
394 unsigned int size;
395 int err;
396
397 old_tbl = rht_dereference_rcu(ht->tbl, ht);
398
399 size = tbl->size;
400
401 err = -EBUSY;
402
403 if (rht_grow_above_75(ht, tbl))
404 size *= 2;
405
406 else if (old_tbl != tbl)
407 goto fail;
408
409 err = -ENOMEM;
410
411 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
412 if (new_tbl == NULL)
413 goto fail;
414
415 err = rhashtable_rehash_attach(ht, tbl, new_tbl);
416 if (err) {
417 bucket_table_free(new_tbl);
418 if (err == -EEXIST)
419 err = 0;
420 } else
421 schedule_work(&ht->run_work);
422
423 return err;
424
425fail:
426
427 if (likely(rcu_dereference_raw(tbl->future_tbl)))
428 return 0;
429
430
431 if (err == -ENOMEM)
432 schedule_work(&ht->run_work);
433
434 return err;
435}
436EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
437
438struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
439 const void *key,
440 struct rhash_head *obj,
441 struct bucket_table *tbl)
442{
443 struct rhash_head *head;
444 unsigned int hash;
445 int err;
446
447 tbl = rhashtable_last_table(ht, tbl);
448 hash = head_hashfn(ht, tbl, obj);
449 spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
450
451 err = -EEXIST;
452 if (key && rhashtable_lookup_fast(ht, key, ht->p))
453 goto exit;
454
455 err = -E2BIG;
456 if (unlikely(rht_grow_above_max(ht, tbl)))
457 goto exit;
458
459 err = -EAGAIN;
460 if (rhashtable_check_elasticity(ht, tbl, hash) ||
461 rht_grow_above_100(ht, tbl))
462 goto exit;
463
464 err = 0;
465
466 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
467
468 RCU_INIT_POINTER(obj->next, head);
469
470 rcu_assign_pointer(tbl->buckets[hash], obj);
471
472 atomic_inc(&ht->nelems);
473
474exit:
475 spin_unlock(rht_bucket_lock(tbl, hash));
476
477 if (err == 0)
478 return NULL;
479 else if (err == -EAGAIN)
480 return tbl;
481 else
482 return ERR_PTR(err);
483}
484EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
508{
509 iter->ht = ht;
510 iter->p = NULL;
511 iter->slot = 0;
512 iter->skip = 0;
513
514 iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
515 if (!iter->walker)
516 return -ENOMEM;
517
518 spin_lock(&ht->lock);
519 iter->walker->tbl =
520 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
521 list_add(&iter->walker->list, &iter->walker->tbl->walkers);
522 spin_unlock(&ht->lock);
523
524 return 0;
525}
526EXPORT_SYMBOL_GPL(rhashtable_walk_init);
527
528
529
530
531
532
533
534void rhashtable_walk_exit(struct rhashtable_iter *iter)
535{
536 spin_lock(&iter->ht->lock);
537 if (iter->walker->tbl)
538 list_del(&iter->walker->list);
539 spin_unlock(&iter->ht->lock);
540 kfree(iter->walker);
541}
542EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558int rhashtable_walk_start(struct rhashtable_iter *iter)
559 __acquires(RCU)
560{
561 struct rhashtable *ht = iter->ht;
562
563 rcu_read_lock();
564
565 spin_lock(&ht->lock);
566 if (iter->walker->tbl)
567 list_del(&iter->walker->list);
568 spin_unlock(&ht->lock);
569
570 if (!iter->walker->tbl) {
571 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
572 return -EAGAIN;
573 }
574
575 return 0;
576}
577EXPORT_SYMBOL_GPL(rhashtable_walk_start);
578
579
580
581
582
583
584
585
586
587
588
589
590
591void *rhashtable_walk_next(struct rhashtable_iter *iter)
592{
593 struct bucket_table *tbl = iter->walker->tbl;
594 struct rhashtable *ht = iter->ht;
595 struct rhash_head *p = iter->p;
596
597 if (p) {
598 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
599 goto next;
600 }
601
602 for (; iter->slot < tbl->size; iter->slot++) {
603 int skip = iter->skip;
604
605 rht_for_each_rcu(p, tbl, iter->slot) {
606 if (!skip)
607 break;
608 skip--;
609 }
610
611next:
612 if (!rht_is_a_nulls(p)) {
613 iter->skip++;
614 iter->p = p;
615 return rht_obj(ht, p);
616 }
617
618 iter->skip = 0;
619 }
620
621 iter->p = NULL;
622
623
624 smp_rmb();
625
626 iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
627 if (iter->walker->tbl) {
628 iter->slot = 0;
629 iter->skip = 0;
630 return ERR_PTR(-EAGAIN);
631 }
632
633 return NULL;
634}
635EXPORT_SYMBOL_GPL(rhashtable_walk_next);
636
637
638
639
640
641
642
643void rhashtable_walk_stop(struct rhashtable_iter *iter)
644 __releases(RCU)
645{
646 struct rhashtable *ht;
647 struct bucket_table *tbl = iter->walker->tbl;
648
649 if (!tbl)
650 goto out;
651
652 ht = iter->ht;
653
654 spin_lock(&ht->lock);
655 if (tbl->rehash < tbl->size)
656 list_add(&iter->walker->list, &tbl->walkers);
657 else
658 iter->walker->tbl = NULL;
659 spin_unlock(&ht->lock);
660
661 iter->p = NULL;
662
663out:
664 rcu_read_unlock();
665}
666EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
667
668static size_t rounded_hashtable_size(const struct rhashtable_params *params)
669{
670 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
671 (unsigned long)params->min_size);
672}
673
674static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
675{
676 return jhash2(key, length, seed);
677}
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722int rhashtable_init(struct rhashtable *ht,
723 const struct rhashtable_params *params)
724{
725 struct bucket_table *tbl;
726 size_t size;
727
728 size = HASH_DEFAULT_SIZE;
729
730 if ((!params->key_len && !params->obj_hashfn) ||
731 (params->obj_hashfn && !params->obj_cmpfn))
732 return -EINVAL;
733
734 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
735 return -EINVAL;
736
737 memset(ht, 0, sizeof(*ht));
738 mutex_init(&ht->mutex);
739 spin_lock_init(&ht->lock);
740 memcpy(&ht->p, params, sizeof(*params));
741
742 if (params->min_size)
743 ht->p.min_size = roundup_pow_of_two(params->min_size);
744
745 if (params->max_size)
746 ht->p.max_size = rounddown_pow_of_two(params->max_size);
747
748 if (params->insecure_max_entries)
749 ht->p.insecure_max_entries =
750 rounddown_pow_of_two(params->insecure_max_entries);
751 else
752 ht->p.insecure_max_entries = ht->p.max_size * 2;
753
754 ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
755
756 if (params->nelem_hint)
757 size = rounded_hashtable_size(&ht->p);
758
759
760
761
762
763
764
765
766
767
768
769
770
771 if (!params->insecure_elasticity)
772 ht->elasticity = 16;
773
774 if (params->locks_mul)
775 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
776 else
777 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
778
779 ht->key_len = ht->p.key_len;
780 if (!params->hashfn) {
781 ht->p.hashfn = jhash;
782
783 if (!(ht->key_len & (sizeof(u32) - 1))) {
784 ht->key_len /= sizeof(u32);
785 ht->p.hashfn = rhashtable_jhash2;
786 }
787 }
788
789 tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
790 if (tbl == NULL)
791 return -ENOMEM;
792
793 atomic_set(&ht->nelems, 0);
794
795 RCU_INIT_POINTER(ht->tbl, tbl);
796
797 INIT_WORK(&ht->run_work, rht_deferred_worker);
798
799 return 0;
800}
801EXPORT_SYMBOL_GPL(rhashtable_init);
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818void rhashtable_free_and_destroy(struct rhashtable *ht,
819 void (*free_fn)(void *ptr, void *arg),
820 void *arg)
821{
822 const struct bucket_table *tbl;
823 unsigned int i;
824
825 cancel_work_sync(&ht->run_work);
826
827 mutex_lock(&ht->mutex);
828 tbl = rht_dereference(ht->tbl, ht);
829 if (free_fn) {
830 for (i = 0; i < tbl->size; i++) {
831 struct rhash_head *pos, *next;
832
833 for (pos = rht_dereference(tbl->buckets[i], ht),
834 next = !rht_is_a_nulls(pos) ?
835 rht_dereference(pos->next, ht) : NULL;
836 !rht_is_a_nulls(pos);
837 pos = next,
838 next = !rht_is_a_nulls(pos) ?
839 rht_dereference(pos->next, ht) : NULL)
840 free_fn(rht_obj(ht, pos), arg);
841 }
842 }
843
844 bucket_table_free(tbl);
845 mutex_unlock(&ht->mutex);
846}
847EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
848
849void rhashtable_destroy(struct rhashtable *ht)
850{
851 return rhashtable_free_and_destroy(ht, NULL, NULL);
852}
853EXPORT_SYMBOL_GPL(rhashtable_destroy);
854