1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/atomic.h>
18#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/log2.h>
21#include <linux/sched.h>
22#include <linux/slab.h>
23#include <linux/vmalloc.h>
24#include <linux/mm.h>
25#include <linux/jhash.h>
26#include <linux/random.h>
27#include <linux/rhashtable.h>
28#include <linux/err.h>
29#include <linux/export.h>
30
31#define HASH_DEFAULT_SIZE 64UL
32#define HASH_MIN_SIZE 4U
33#define BUCKET_LOCKS_PER_CPU 32UL
34
35static u32 head_hashfn(struct rhashtable *ht,
36 const struct bucket_table *tbl,
37 const struct rhash_head *he)
38{
39 return rht_head_hashfn(ht, tbl, he, ht->p);
40}
41
42#ifdef CONFIG_PROVE_LOCKING
43#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
44
45int lockdep_rht_mutex_is_held(struct rhashtable *ht)
46{
47 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
48}
49EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
50
51int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
52{
53 spinlock_t *lock = rht_bucket_lock(tbl, hash);
54
55 return (debug_locks) ? lockdep_is_held(lock) : 1;
56}
57EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
58#else
59#define ASSERT_RHT_MUTEX(HT)
60#endif
61
62
63static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
64 gfp_t gfp)
65{
66 unsigned int i, size;
67#if defined(CONFIG_PROVE_LOCKING)
68 unsigned int nr_pcpus = 2;
69#else
70 unsigned int nr_pcpus = num_possible_cpus();
71#endif
72
73 nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
74 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
75
76
77 size = min_t(unsigned int, size, tbl->size >> 1);
78
79 if (sizeof(spinlock_t) != 0) {
80 tbl->locks = NULL;
81#ifdef CONFIG_NUMA
82 if (size * sizeof(spinlock_t) > PAGE_SIZE &&
83 gfp == GFP_KERNEL)
84 tbl->locks = vmalloc(size * sizeof(spinlock_t));
85#endif
86 if (gfp != GFP_KERNEL)
87 gfp |= __GFP_NOWARN | __GFP_NORETRY;
88
89 if (!tbl->locks)
90 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
91 gfp);
92 if (!tbl->locks)
93 return -ENOMEM;
94 for (i = 0; i < size; i++)
95 spin_lock_init(&tbl->locks[i]);
96 }
97 tbl->locks_mask = size - 1;
98
99 return 0;
100}
101
102static void bucket_table_free(const struct bucket_table *tbl)
103{
104 if (tbl)
105 kvfree(tbl->locks);
106
107 kvfree(tbl);
108}
109
110static void bucket_table_free_rcu(struct rcu_head *head)
111{
112 bucket_table_free(container_of(head, struct bucket_table, rcu));
113}
114
115static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
116 size_t nbuckets,
117 gfp_t gfp)
118{
119 struct bucket_table *tbl = NULL;
120 size_t size;
121 int i;
122
123 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
124 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
125 gfp != GFP_KERNEL)
126 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
127 if (tbl == NULL && gfp == GFP_KERNEL)
128 tbl = vzalloc(size);
129 if (tbl == NULL)
130 return NULL;
131
132 tbl->size = nbuckets;
133
134 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
135 bucket_table_free(tbl);
136 return NULL;
137 }
138
139 INIT_LIST_HEAD(&tbl->walkers);
140
141 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
142
143 for (i = 0; i < nbuckets; i++)
144 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
145
146 return tbl;
147}
148
149static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
150 struct bucket_table *tbl)
151{
152 struct bucket_table *new_tbl;
153
154 do {
155 new_tbl = tbl;
156 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
157 } while (tbl);
158
159 return new_tbl;
160}
161
162static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
163{
164 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
165 struct bucket_table *new_tbl = rhashtable_last_table(ht,
166 rht_dereference_rcu(old_tbl->future_tbl, ht));
167 struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
168 int err = -ENOENT;
169 struct rhash_head *head, *next, *entry;
170 spinlock_t *new_bucket_lock;
171 unsigned int new_hash;
172
173 rht_for_each(entry, old_tbl, old_hash) {
174 err = 0;
175 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
176
177 if (rht_is_a_nulls(next))
178 break;
179
180 pprev = &entry->next;
181 }
182
183 if (err)
184 goto out;
185
186 new_hash = head_hashfn(ht, new_tbl, entry);
187
188 new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
189
190 spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
191 head = rht_dereference_bucket(new_tbl->buckets[new_hash],
192 new_tbl, new_hash);
193
194 RCU_INIT_POINTER(entry->next, head);
195
196 rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
197 spin_unlock(new_bucket_lock);
198
199 rcu_assign_pointer(*pprev, next);
200
201out:
202 return err;
203}
204
205static void rhashtable_rehash_chain(struct rhashtable *ht,
206 unsigned int old_hash)
207{
208 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
209 spinlock_t *old_bucket_lock;
210
211 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
212
213 spin_lock_bh(old_bucket_lock);
214 while (!rhashtable_rehash_one(ht, old_hash))
215 ;
216 old_tbl->rehash++;
217 spin_unlock_bh(old_bucket_lock);
218}
219
220static int rhashtable_rehash_attach(struct rhashtable *ht,
221 struct bucket_table *old_tbl,
222 struct bucket_table *new_tbl)
223{
224
225 spin_lock_bh(old_tbl->locks);
226
227
228 if (rcu_access_pointer(old_tbl->future_tbl)) {
229 spin_unlock_bh(old_tbl->locks);
230 return -EEXIST;
231 }
232
233
234
235
236 rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
237
238 spin_unlock_bh(old_tbl->locks);
239
240 return 0;
241}
242
243static int rhashtable_rehash_table(struct rhashtable *ht)
244{
245 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
246 struct bucket_table *new_tbl;
247 struct rhashtable_walker *walker;
248 unsigned int old_hash;
249
250 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
251 if (!new_tbl)
252 return 0;
253
254 for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
255 rhashtable_rehash_chain(ht, old_hash);
256
257
258 rcu_assign_pointer(ht->tbl, new_tbl);
259
260 spin_lock(&ht->lock);
261 list_for_each_entry(walker, &old_tbl->walkers, list)
262 walker->tbl = NULL;
263 spin_unlock(&ht->lock);
264
265
266
267
268
269 call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
270
271 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
272}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289static int rhashtable_expand(struct rhashtable *ht)
290{
291 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
292 int err;
293
294 ASSERT_RHT_MUTEX(ht);
295
296 old_tbl = rhashtable_last_table(ht, old_tbl);
297
298 new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
299 if (new_tbl == NULL)
300 return -ENOMEM;
301
302 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
303 if (err)
304 bucket_table_free(new_tbl);
305
306 return err;
307}
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325static int rhashtable_shrink(struct rhashtable *ht)
326{
327 struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
328 unsigned int nelems = atomic_read(&ht->nelems);
329 unsigned int size = 0;
330 int err;
331
332 ASSERT_RHT_MUTEX(ht);
333
334 if (nelems)
335 size = roundup_pow_of_two(nelems * 3 / 2);
336 if (size < ht->p.min_size)
337 size = ht->p.min_size;
338
339 if (old_tbl->size <= size)
340 return 0;
341
342 if (rht_dereference(old_tbl->future_tbl, ht))
343 return -EEXIST;
344
345 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
346 if (new_tbl == NULL)
347 return -ENOMEM;
348
349 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
350 if (err)
351 bucket_table_free(new_tbl);
352
353 return err;
354}
355
356static void rht_deferred_worker(struct work_struct *work)
357{
358 struct rhashtable *ht;
359 struct bucket_table *tbl;
360 int err = 0;
361
362 ht = container_of(work, struct rhashtable, run_work);
363 mutex_lock(&ht->mutex);
364
365 tbl = rht_dereference(ht->tbl, ht);
366 tbl = rhashtable_last_table(ht, tbl);
367
368 if (rht_grow_above_75(ht, tbl))
369 rhashtable_expand(ht);
370 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
371 rhashtable_shrink(ht);
372
373 err = rhashtable_rehash_table(ht);
374
375 mutex_unlock(&ht->mutex);
376
377 if (err)
378 schedule_work(&ht->run_work);
379}
380
381static bool rhashtable_check_elasticity(struct rhashtable *ht,
382 struct bucket_table *tbl,
383 unsigned int hash)
384{
385 unsigned int elasticity = ht->elasticity;
386 struct rhash_head *head;
387
388 rht_for_each(head, tbl, hash)
389 if (!--elasticity)
390 return true;
391
392 return false;
393}
394
395int rhashtable_insert_rehash(struct rhashtable *ht,
396 struct bucket_table *tbl)
397{
398 struct bucket_table *old_tbl;
399 struct bucket_table *new_tbl;
400 unsigned int size;
401 int err;
402
403 old_tbl = rht_dereference_rcu(ht->tbl, ht);
404
405 size = tbl->size;
406
407 err = -EBUSY;
408
409 if (rht_grow_above_75(ht, tbl))
410 size *= 2;
411
412 else if (old_tbl != tbl)
413 goto fail;
414
415 err = -ENOMEM;
416
417 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
418 if (new_tbl == NULL)
419 goto fail;
420
421 err = rhashtable_rehash_attach(ht, tbl, new_tbl);
422 if (err) {
423 bucket_table_free(new_tbl);
424 if (err == -EEXIST)
425 err = 0;
426 } else
427 schedule_work(&ht->run_work);
428
429 return err;
430
431fail:
432
433 if (likely(rcu_dereference_raw(tbl->future_tbl)))
434 return 0;
435
436
437 if (err == -ENOMEM)
438 schedule_work(&ht->run_work);
439
440 return err;
441}
442EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
443
444struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
445 const void *key,
446 struct rhash_head *obj,
447 struct bucket_table *tbl)
448{
449 struct rhash_head *head;
450 unsigned int hash;
451 int err;
452
453 tbl = rhashtable_last_table(ht, tbl);
454 hash = head_hashfn(ht, tbl, obj);
455 spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
456
457 err = -EEXIST;
458 if (key && rhashtable_lookup_fast(ht, key, ht->p))
459 goto exit;
460
461 err = -E2BIG;
462 if (unlikely(rht_grow_above_max(ht, tbl)))
463 goto exit;
464
465 err = -EAGAIN;
466 if (rhashtable_check_elasticity(ht, tbl, hash) ||
467 rht_grow_above_100(ht, tbl))
468 goto exit;
469
470 err = 0;
471
472 head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
473
474 RCU_INIT_POINTER(obj->next, head);
475
476 rcu_assign_pointer(tbl->buckets[hash], obj);
477
478 atomic_inc(&ht->nelems);
479
480exit:
481 spin_unlock(rht_bucket_lock(tbl, hash));
482
483 if (err == 0)
484 return NULL;
485 else if (err == -EAGAIN)
486 return tbl;
487 else
488 return ERR_PTR(err);
489}
490EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
515 gfp_t gfp)
516{
517 iter->ht = ht;
518 iter->p = NULL;
519 iter->slot = 0;
520 iter->skip = 0;
521
522 iter->walker = kmalloc(sizeof(*iter->walker), gfp);
523 if (!iter->walker)
524 return -ENOMEM;
525
526 spin_lock(&ht->lock);
527 iter->walker->tbl =
528 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
529 list_add(&iter->walker->list, &iter->walker->tbl->walkers);
530 spin_unlock(&ht->lock);
531
532 return 0;
533}
534EXPORT_SYMBOL_GPL(rhashtable_walk_init);
535
536
537
538
539
540
541
542void rhashtable_walk_exit(struct rhashtable_iter *iter)
543{
544 spin_lock(&iter->ht->lock);
545 if (iter->walker->tbl)
546 list_del(&iter->walker->list);
547 spin_unlock(&iter->ht->lock);
548 kfree(iter->walker);
549}
550EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566int rhashtable_walk_start(struct rhashtable_iter *iter)
567 __acquires(RCU)
568{
569 struct rhashtable *ht = iter->ht;
570
571 rcu_read_lock();
572
573 spin_lock(&ht->lock);
574 if (iter->walker->tbl)
575 list_del(&iter->walker->list);
576 spin_unlock(&ht->lock);
577
578 if (!iter->walker->tbl) {
579 iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
580 return -EAGAIN;
581 }
582
583 return 0;
584}
585EXPORT_SYMBOL_GPL(rhashtable_walk_start);
586
587
588
589
590
591
592
593
594
595
596
597
598
599void *rhashtable_walk_next(struct rhashtable_iter *iter)
600{
601 struct bucket_table *tbl = iter->walker->tbl;
602 struct rhashtable *ht = iter->ht;
603 struct rhash_head *p = iter->p;
604
605 if (p) {
606 p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
607 goto next;
608 }
609
610 for (; iter->slot < tbl->size; iter->slot++) {
611 int skip = iter->skip;
612
613 rht_for_each_rcu(p, tbl, iter->slot) {
614 if (!skip)
615 break;
616 skip--;
617 }
618
619next:
620 if (!rht_is_a_nulls(p)) {
621 iter->skip++;
622 iter->p = p;
623 return rht_obj(ht, p);
624 }
625
626 iter->skip = 0;
627 }
628
629 iter->p = NULL;
630
631
632 smp_rmb();
633
634 iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
635 if (iter->walker->tbl) {
636 iter->slot = 0;
637 iter->skip = 0;
638 return ERR_PTR(-EAGAIN);
639 }
640
641 return NULL;
642}
643EXPORT_SYMBOL_GPL(rhashtable_walk_next);
644
645
646
647
648
649
650
651void rhashtable_walk_stop(struct rhashtable_iter *iter)
652 __releases(RCU)
653{
654 struct rhashtable *ht;
655 struct bucket_table *tbl = iter->walker->tbl;
656
657 if (!tbl)
658 goto out;
659
660 ht = iter->ht;
661
662 spin_lock(&ht->lock);
663 if (tbl->rehash < tbl->size)
664 list_add(&iter->walker->list, &tbl->walkers);
665 else
666 iter->walker->tbl = NULL;
667 spin_unlock(&ht->lock);
668
669 iter->p = NULL;
670
671out:
672 rcu_read_unlock();
673}
674EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
675
676static size_t rounded_hashtable_size(const struct rhashtable_params *params)
677{
678 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
679 (unsigned long)params->min_size);
680}
681
682static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
683{
684 return jhash2(key, length, seed);
685}
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730int rhashtable_init(struct rhashtable *ht,
731 const struct rhashtable_params *params)
732{
733 struct bucket_table *tbl;
734 size_t size;
735
736 size = HASH_DEFAULT_SIZE;
737
738 if ((!params->key_len && !params->obj_hashfn) ||
739 (params->obj_hashfn && !params->obj_cmpfn))
740 return -EINVAL;
741
742 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
743 return -EINVAL;
744
745 memset(ht, 0, sizeof(*ht));
746 mutex_init(&ht->mutex);
747 spin_lock_init(&ht->lock);
748 memcpy(&ht->p, params, sizeof(*params));
749
750 if (params->min_size)
751 ht->p.min_size = roundup_pow_of_two(params->min_size);
752
753 if (params->max_size)
754 ht->p.max_size = rounddown_pow_of_two(params->max_size);
755
756 if (params->insecure_max_entries)
757 ht->p.insecure_max_entries =
758 rounddown_pow_of_two(params->insecure_max_entries);
759 else
760 ht->p.insecure_max_entries = ht->p.max_size * 2;
761
762 ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
763
764 if (params->nelem_hint)
765 size = rounded_hashtable_size(&ht->p);
766
767
768
769
770
771
772
773
774
775
776
777
778
779 if (!params->insecure_elasticity)
780 ht->elasticity = 16;
781
782 if (params->locks_mul)
783 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
784 else
785 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
786
787 ht->key_len = ht->p.key_len;
788 if (!params->hashfn) {
789 ht->p.hashfn = jhash;
790
791 if (!(ht->key_len & (sizeof(u32) - 1))) {
792 ht->key_len /= sizeof(u32);
793 ht->p.hashfn = rhashtable_jhash2;
794 }
795 }
796
797 tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
798 if (tbl == NULL)
799 return -ENOMEM;
800
801 atomic_set(&ht->nelems, 0);
802
803 RCU_INIT_POINTER(ht->tbl, tbl);
804
805 INIT_WORK(&ht->run_work, rht_deferred_worker);
806
807 return 0;
808}
809EXPORT_SYMBOL_GPL(rhashtable_init);
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826void rhashtable_free_and_destroy(struct rhashtable *ht,
827 void (*free_fn)(void *ptr, void *arg),
828 void *arg)
829{
830 const struct bucket_table *tbl;
831 unsigned int i;
832
833 cancel_work_sync(&ht->run_work);
834
835 mutex_lock(&ht->mutex);
836 tbl = rht_dereference(ht->tbl, ht);
837 if (free_fn) {
838 for (i = 0; i < tbl->size; i++) {
839 struct rhash_head *pos, *next;
840
841 for (pos = rht_dereference(tbl->buckets[i], ht),
842 next = !rht_is_a_nulls(pos) ?
843 rht_dereference(pos->next, ht) : NULL;
844 !rht_is_a_nulls(pos);
845 pos = next,
846 next = !rht_is_a_nulls(pos) ?
847 rht_dereference(pos->next, ht) : NULL)
848 free_fn(rht_obj(ht, pos), arg);
849 }
850 }
851
852 bucket_table_free(tbl);
853 mutex_unlock(&ht->mutex);
854}
855EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
856
857void rhashtable_destroy(struct rhashtable *ht)
858{
859 return rhashtable_free_and_destroy(ht, NULL, NULL);
860}
861EXPORT_SYMBOL_GPL(rhashtable_destroy);
862