1
2
3
4
5
6
7#include <linux/slab.h>
8
9#include <linux/mm.h>
10#include <linux/poison.h>
11#include <linux/interrupt.h>
12#include <linux/memory.h>
13#include <linux/cache.h>
14#include <linux/compiler.h>
15#include <linux/module.h>
16#include <linux/cpu.h>
17#include <linux/uaccess.h>
18#include <linux/seq_file.h>
19#include <linux/proc_fs.h>
20#include <asm/cacheflush.h>
21#include <asm/tlbflush.h>
22#include <asm/page.h>
23#include <linux/memcontrol.h>
24
25#define CREATE_TRACE_POINTS
26#include <trace/events/kmem.h>
27
28#include "slab.h"
29
30enum slab_state slab_state;
31LIST_HEAD(slab_caches);
32DEFINE_MUTEX(slab_mutex);
33struct kmem_cache *kmem_cache;
34
35#ifdef CONFIG_HARDENED_USERCOPY
36bool usercopy_fallback __ro_after_init =
37 IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
38module_param(usercopy_fallback, bool, 0400);
39MODULE_PARM_DESC(usercopy_fallback,
40 "WARN instead of reject usercopy whitelist violations");
41#endif
42
43static LIST_HEAD(slab_caches_to_rcu_destroy);
44static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
45static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
46 slab_caches_to_rcu_destroy_workfn);
47
48
49
50
51#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
52 SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
53 SLAB_FAILSLAB | SLAB_KASAN)
54
55#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
56 SLAB_ACCOUNT)
57
58
59
60
61static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
62
63static int __init setup_slab_nomerge(char *str)
64{
65 slab_nomerge = true;
66 return 1;
67}
68
69#ifdef CONFIG_SLUB
70__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
71#endif
72
73__setup("slab_nomerge", setup_slab_nomerge);
74
75
76
77
78unsigned int kmem_cache_size(struct kmem_cache *s)
79{
80 return s->object_size;
81}
82EXPORT_SYMBOL(kmem_cache_size);
83
84#ifdef CONFIG_DEBUG_VM
85static int kmem_cache_sanity_check(const char *name, unsigned int size)
86{
87 if (!name || in_interrupt() || size < sizeof(void *) ||
88 size > KMALLOC_MAX_SIZE) {
89 pr_err("kmem_cache_create(%s) integrity check failed\n", name);
90 return -EINVAL;
91 }
92
93 WARN_ON(strchr(name, ' '));
94 return 0;
95}
96#else
97static inline int kmem_cache_sanity_check(const char *name, unsigned int size)
98{
99 return 0;
100}
101#endif
102
103void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
104{
105 size_t i;
106
107 for (i = 0; i < nr; i++) {
108 if (s)
109 kmem_cache_free(s, p[i]);
110 else
111 kfree(p[i]);
112 }
113}
114
115int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
116 void **p)
117{
118 size_t i;
119
120 for (i = 0; i < nr; i++) {
121 void *x = p[i] = kmem_cache_alloc(s, flags);
122 if (!x) {
123 __kmem_cache_free_bulk(s, i, p);
124 return 0;
125 }
126 }
127 return i;
128}
129
130#ifdef CONFIG_MEMCG_KMEM
131
132LIST_HEAD(slab_root_caches);
133
134void slab_init_memcg_params(struct kmem_cache *s)
135{
136 s->memcg_params.root_cache = NULL;
137 RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
138 INIT_LIST_HEAD(&s->memcg_params.children);
139 s->memcg_params.dying = false;
140}
141
142static int init_memcg_params(struct kmem_cache *s,
143 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
144{
145 struct memcg_cache_array *arr;
146
147 if (root_cache) {
148 s->memcg_params.root_cache = root_cache;
149 s->memcg_params.memcg = memcg;
150 INIT_LIST_HEAD(&s->memcg_params.children_node);
151 INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
152 return 0;
153 }
154
155 slab_init_memcg_params(s);
156
157 if (!memcg_nr_cache_ids)
158 return 0;
159
160 arr = kvzalloc(sizeof(struct memcg_cache_array) +
161 memcg_nr_cache_ids * sizeof(void *),
162 GFP_KERNEL);
163 if (!arr)
164 return -ENOMEM;
165
166 RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
167 return 0;
168}
169
170static void destroy_memcg_params(struct kmem_cache *s)
171{
172 if (is_root_cache(s))
173 kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
174}
175
176static void free_memcg_params(struct rcu_head *rcu)
177{
178 struct memcg_cache_array *old;
179
180 old = container_of(rcu, struct memcg_cache_array, rcu);
181 kvfree(old);
182}
183
184static int update_memcg_params(struct kmem_cache *s, int new_array_size)
185{
186 struct memcg_cache_array *old, *new;
187
188 new = kvzalloc(sizeof(struct memcg_cache_array) +
189 new_array_size * sizeof(void *), GFP_KERNEL);
190 if (!new)
191 return -ENOMEM;
192
193 old = rcu_dereference_protected(s->memcg_params.memcg_caches,
194 lockdep_is_held(&slab_mutex));
195 if (old)
196 memcpy(new->entries, old->entries,
197 memcg_nr_cache_ids * sizeof(void *));
198
199 rcu_assign_pointer(s->memcg_params.memcg_caches, new);
200 if (old)
201 call_rcu(&old->rcu, free_memcg_params);
202 return 0;
203}
204
205int memcg_update_all_caches(int num_memcgs)
206{
207 struct kmem_cache *s;
208 int ret = 0;
209
210 mutex_lock(&slab_mutex);
211 list_for_each_entry(s, &slab_root_caches, root_caches_node) {
212 ret = update_memcg_params(s, num_memcgs);
213
214
215
216
217 if (ret)
218 break;
219 }
220 mutex_unlock(&slab_mutex);
221 return ret;
222}
223
224void memcg_link_cache(struct kmem_cache *s)
225{
226 if (is_root_cache(s)) {
227 list_add(&s->root_caches_node, &slab_root_caches);
228 } else {
229 list_add(&s->memcg_params.children_node,
230 &s->memcg_params.root_cache->memcg_params.children);
231 list_add(&s->memcg_params.kmem_caches_node,
232 &s->memcg_params.memcg->kmem_caches);
233 }
234}
235
236static void memcg_unlink_cache(struct kmem_cache *s)
237{
238 if (is_root_cache(s)) {
239 list_del(&s->root_caches_node);
240 } else {
241 list_del(&s->memcg_params.children_node);
242 list_del(&s->memcg_params.kmem_caches_node);
243 }
244}
245#else
246static inline int init_memcg_params(struct kmem_cache *s,
247 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
248{
249 return 0;
250}
251
252static inline void destroy_memcg_params(struct kmem_cache *s)
253{
254}
255
256static inline void memcg_unlink_cache(struct kmem_cache *s)
257{
258}
259#endif
260
261
262
263
264
265static unsigned int calculate_alignment(slab_flags_t flags,
266 unsigned int align, unsigned int size)
267{
268
269
270
271
272
273
274
275 if (flags & SLAB_HWCACHE_ALIGN) {
276 unsigned int ralign;
277
278 ralign = cache_line_size();
279 while (size <= ralign / 2)
280 ralign /= 2;
281 align = max(align, ralign);
282 }
283
284 if (align < ARCH_SLAB_MINALIGN)
285 align = ARCH_SLAB_MINALIGN;
286
287 return ALIGN(align, sizeof(void *));
288}
289
290
291
292
293int slab_unmergeable(struct kmem_cache *s)
294{
295 if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
296 return 1;
297
298 if (!is_root_cache(s))
299 return 1;
300
301 if (s->ctor)
302 return 1;
303
304 if (s->usersize)
305 return 1;
306
307
308
309
310 if (s->refcount < 0)
311 return 1;
312
313 return 0;
314}
315
316struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
317 slab_flags_t flags, const char *name, void (*ctor)(void *))
318{
319 struct kmem_cache *s;
320
321 if (slab_nomerge)
322 return NULL;
323
324 if (ctor)
325 return NULL;
326
327 size = ALIGN(size, sizeof(void *));
328 align = calculate_alignment(flags, align, size);
329 size = ALIGN(size, align);
330 flags = kmem_cache_flags(size, flags, name, NULL);
331
332 if (flags & SLAB_NEVER_MERGE)
333 return NULL;
334
335 list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
336 if (slab_unmergeable(s))
337 continue;
338
339 if (size > s->size)
340 continue;
341
342 if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
343 continue;
344
345
346
347
348 if ((s->size & ~(align - 1)) != s->size)
349 continue;
350
351 if (s->size - size >= sizeof(void *))
352 continue;
353
354 if (IS_ENABLED(CONFIG_SLAB) && align &&
355 (align > s->align || s->align % align))
356 continue;
357
358 return s;
359 }
360 return NULL;
361}
362
363static struct kmem_cache *create_cache(const char *name,
364 unsigned int object_size, unsigned int align,
365 slab_flags_t flags, unsigned int useroffset,
366 unsigned int usersize, void (*ctor)(void *),
367 struct mem_cgroup *memcg, struct kmem_cache *root_cache)
368{
369 struct kmem_cache *s;
370 int err;
371
372 if (WARN_ON(useroffset + usersize > object_size))
373 useroffset = usersize = 0;
374
375 err = -ENOMEM;
376 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
377 if (!s)
378 goto out;
379
380 s->name = name;
381 s->size = s->object_size = object_size;
382 s->align = align;
383 s->ctor = ctor;
384 s->useroffset = useroffset;
385 s->usersize = usersize;
386
387 err = init_memcg_params(s, memcg, root_cache);
388 if (err)
389 goto out_free_cache;
390
391 err = __kmem_cache_create(s, flags);
392 if (err)
393 goto out_free_cache;
394
395 s->refcount = 1;
396 list_add(&s->list, &slab_caches);
397 memcg_link_cache(s);
398out:
399 if (err)
400 return ERR_PTR(err);
401 return s;
402
403out_free_cache:
404 destroy_memcg_params(s);
405 kmem_cache_free(kmem_cache, s);
406 goto out;
407}
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435struct kmem_cache *
436kmem_cache_create_usercopy(const char *name,
437 unsigned int size, unsigned int align,
438 slab_flags_t flags,
439 unsigned int useroffset, unsigned int usersize,
440 void (*ctor)(void *))
441{
442 struct kmem_cache *s = NULL;
443 const char *cache_name;
444 int err;
445
446 get_online_cpus();
447 get_online_mems();
448 memcg_get_cache_ids();
449
450 mutex_lock(&slab_mutex);
451
452 err = kmem_cache_sanity_check(name, size);
453 if (err) {
454 goto out_unlock;
455 }
456
457
458 if (flags & ~SLAB_FLAGS_PERMITTED) {
459 err = -EINVAL;
460 goto out_unlock;
461 }
462
463
464
465
466
467
468
469 flags &= CACHE_CREATE_MASK;
470
471
472 if (WARN_ON(!usersize && useroffset) ||
473 WARN_ON(size < usersize || size - usersize < useroffset))
474 usersize = useroffset = 0;
475
476 if (!usersize)
477 s = __kmem_cache_alias(name, size, align, flags, ctor);
478 if (s)
479 goto out_unlock;
480
481 cache_name = kstrdup_const(name, GFP_KERNEL);
482 if (!cache_name) {
483 err = -ENOMEM;
484 goto out_unlock;
485 }
486
487 s = create_cache(cache_name, size,
488 calculate_alignment(flags, align, size),
489 flags, useroffset, usersize, ctor, NULL, NULL);
490 if (IS_ERR(s)) {
491 err = PTR_ERR(s);
492 kfree_const(cache_name);
493 }
494
495out_unlock:
496 mutex_unlock(&slab_mutex);
497
498 memcg_put_cache_ids();
499 put_online_mems();
500 put_online_cpus();
501
502 if (err) {
503 if (flags & SLAB_PANIC)
504 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
505 name, err);
506 else {
507 pr_warn("kmem_cache_create(%s) failed with error %d\n",
508 name, err);
509 dump_stack();
510 }
511 return NULL;
512 }
513 return s;
514}
515EXPORT_SYMBOL(kmem_cache_create_usercopy);
516
517struct kmem_cache *
518kmem_cache_create(const char *name, unsigned int size, unsigned int align,
519 slab_flags_t flags, void (*ctor)(void *))
520{
521 return kmem_cache_create_usercopy(name, size, align, flags, 0, 0,
522 ctor);
523}
524EXPORT_SYMBOL(kmem_cache_create);
525
526static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
527{
528 LIST_HEAD(to_destroy);
529 struct kmem_cache *s, *s2;
530
531
532
533
534
535
536
537
538
539
540 mutex_lock(&slab_mutex);
541 list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
542 mutex_unlock(&slab_mutex);
543
544 if (list_empty(&to_destroy))
545 return;
546
547 rcu_barrier();
548
549 list_for_each_entry_safe(s, s2, &to_destroy, list) {
550#ifdef SLAB_SUPPORTS_SYSFS
551 sysfs_slab_release(s);
552#else
553 slab_kmem_cache_release(s);
554#endif
555 }
556}
557
558static int shutdown_cache(struct kmem_cache *s)
559{
560
561 kasan_cache_shutdown(s);
562
563 if (__kmem_cache_shutdown(s) != 0)
564 return -EBUSY;
565
566 memcg_unlink_cache(s);
567 list_del(&s->list);
568
569 if (s->flags & SLAB_TYPESAFE_BY_RCU) {
570#ifdef SLAB_SUPPORTS_SYSFS
571 sysfs_slab_unlink(s);
572#endif
573 list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
574 schedule_work(&slab_caches_to_rcu_destroy_work);
575 } else {
576#ifdef SLAB_SUPPORTS_SYSFS
577 sysfs_slab_unlink(s);
578 sysfs_slab_release(s);
579#else
580 slab_kmem_cache_release(s);
581#endif
582 }
583
584 return 0;
585}
586
587#ifdef CONFIG_MEMCG_KMEM
588
589
590
591
592
593
594
595
596
597void memcg_create_kmem_cache(struct mem_cgroup *memcg,
598 struct kmem_cache *root_cache)
599{
600 static char memcg_name_buf[NAME_MAX + 1];
601 struct cgroup_subsys_state *css = &memcg->css;
602 struct memcg_cache_array *arr;
603 struct kmem_cache *s = NULL;
604 char *cache_name;
605 int idx;
606
607 get_online_cpus();
608 get_online_mems();
609
610 mutex_lock(&slab_mutex);
611
612
613
614
615
616 if (memcg->kmem_state != KMEM_ONLINE || root_cache->memcg_params.dying)
617 goto out_unlock;
618
619 idx = memcg_cache_id(memcg);
620 arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
621 lockdep_is_held(&slab_mutex));
622
623
624
625
626
627
628 if (arr->entries[idx])
629 goto out_unlock;
630
631 cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
632 cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
633 css->serial_nr, memcg_name_buf);
634 if (!cache_name)
635 goto out_unlock;
636
637 s = create_cache(cache_name, root_cache->object_size,
638 root_cache->align,
639 root_cache->flags & CACHE_CREATE_MASK,
640 root_cache->useroffset, root_cache->usersize,
641 root_cache->ctor, memcg, root_cache);
642
643
644
645
646
647 if (IS_ERR(s)) {
648 kfree(cache_name);
649 goto out_unlock;
650 }
651
652
653
654
655
656
657 smp_wmb();
658 arr->entries[idx] = s;
659
660out_unlock:
661 mutex_unlock(&slab_mutex);
662
663 put_online_mems();
664 put_online_cpus();
665}
666
667static void kmemcg_deactivate_workfn(struct work_struct *work)
668{
669 struct kmem_cache *s = container_of(work, struct kmem_cache,
670 memcg_params.deact_work);
671
672 get_online_cpus();
673 get_online_mems();
674
675 mutex_lock(&slab_mutex);
676
677 s->memcg_params.deact_fn(s);
678
679 mutex_unlock(&slab_mutex);
680
681 put_online_mems();
682 put_online_cpus();
683
684
685 css_put(&s->memcg_params.memcg->css);
686}
687
688static void kmemcg_deactivate_rcufn(struct rcu_head *head)
689{
690 struct kmem_cache *s = container_of(head, struct kmem_cache,
691 memcg_params.deact_rcu_head);
692
693
694
695
696
697
698 INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
699 queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
700}
701
702
703
704
705
706
707
708
709
710
711
712
713void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
714 void (*deact_fn)(struct kmem_cache *))
715{
716 if (WARN_ON_ONCE(is_root_cache(s)) ||
717 WARN_ON_ONCE(s->memcg_params.deact_fn))
718 return;
719
720 if (s->memcg_params.root_cache->memcg_params.dying)
721 return;
722
723
724 css_get(&s->memcg_params.memcg->css);
725
726 s->memcg_params.deact_fn = deact_fn;
727 call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
728}
729
730void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
731{
732 int idx;
733 struct memcg_cache_array *arr;
734 struct kmem_cache *s, *c;
735
736 idx = memcg_cache_id(memcg);
737
738 get_online_cpus();
739 get_online_mems();
740
741 mutex_lock(&slab_mutex);
742 list_for_each_entry(s, &slab_root_caches, root_caches_node) {
743 arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
744 lockdep_is_held(&slab_mutex));
745 c = arr->entries[idx];
746 if (!c)
747 continue;
748
749 __kmemcg_cache_deactivate(c);
750 arr->entries[idx] = NULL;
751 }
752 mutex_unlock(&slab_mutex);
753
754 put_online_mems();
755 put_online_cpus();
756}
757
758void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
759{
760 struct kmem_cache *s, *s2;
761
762 get_online_cpus();
763 get_online_mems();
764
765 mutex_lock(&slab_mutex);
766 list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
767 memcg_params.kmem_caches_node) {
768
769
770
771
772 BUG_ON(shutdown_cache(s));
773 }
774 mutex_unlock(&slab_mutex);
775
776 put_online_mems();
777 put_online_cpus();
778}
779
780static int shutdown_memcg_caches(struct kmem_cache *s)
781{
782 struct memcg_cache_array *arr;
783 struct kmem_cache *c, *c2;
784 LIST_HEAD(busy);
785 int i;
786
787 BUG_ON(!is_root_cache(s));
788
789
790
791
792
793 arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
794 lockdep_is_held(&slab_mutex));
795 for_each_memcg_cache_index(i) {
796 c = arr->entries[i];
797 if (!c)
798 continue;
799 if (shutdown_cache(c))
800
801
802
803
804
805 list_move(&c->memcg_params.children_node, &busy);
806 else
807
808
809
810
811
812
813 arr->entries[i] = NULL;
814 }
815
816
817
818
819
820 list_for_each_entry_safe(c, c2, &s->memcg_params.children,
821 memcg_params.children_node)
822 shutdown_cache(c);
823
824 list_splice(&busy, &s->memcg_params.children);
825
826
827
828
829
830 if (!list_empty(&s->memcg_params.children))
831 return -EBUSY;
832 return 0;
833}
834
835static void flush_memcg_workqueue(struct kmem_cache *s)
836{
837 mutex_lock(&slab_mutex);
838 s->memcg_params.dying = true;
839 mutex_unlock(&slab_mutex);
840
841
842
843
844
845 if (IS_ENABLED(CONFIG_SLUB))
846 rcu_barrier_sched();
847
848
849
850
851
852
853 flush_workqueue(memcg_kmem_cache_wq);
854}
855#else
856static inline int shutdown_memcg_caches(struct kmem_cache *s)
857{
858 return 0;
859}
860
861static inline void flush_memcg_workqueue(struct kmem_cache *s)
862{
863}
864#endif
865
866void slab_kmem_cache_release(struct kmem_cache *s)
867{
868 __kmem_cache_release(s);
869 destroy_memcg_params(s);
870 kfree_const(s->name);
871 kmem_cache_free(kmem_cache, s);
872}
873
874void kmem_cache_destroy(struct kmem_cache *s)
875{
876 int err;
877
878 if (unlikely(!s))
879 return;
880
881 flush_memcg_workqueue(s);
882
883 get_online_cpus();
884 get_online_mems();
885
886 mutex_lock(&slab_mutex);
887
888 s->refcount--;
889 if (s->refcount)
890 goto out_unlock;
891
892 err = shutdown_memcg_caches(s);
893 if (!err)
894 err = shutdown_cache(s);
895
896 if (err) {
897 pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
898 s->name);
899 dump_stack();
900 }
901out_unlock:
902 mutex_unlock(&slab_mutex);
903
904 put_online_mems();
905 put_online_cpus();
906}
907EXPORT_SYMBOL(kmem_cache_destroy);
908
909
910
911
912
913
914
915
916int kmem_cache_shrink(struct kmem_cache *cachep)
917{
918 int ret;
919
920 get_online_cpus();
921 get_online_mems();
922 kasan_cache_shrink(cachep);
923 ret = __kmem_cache_shrink(cachep);
924 put_online_mems();
925 put_online_cpus();
926 return ret;
927}
928EXPORT_SYMBOL(kmem_cache_shrink);
929
930bool slab_is_available(void)
931{
932 return slab_state >= UP;
933}
934
935#ifndef CONFIG_SLOB
936
937void __init create_boot_cache(struct kmem_cache *s, const char *name,
938 unsigned int size, slab_flags_t flags,
939 unsigned int useroffset, unsigned int usersize)
940{
941 int err;
942
943 s->name = name;
944 s->size = s->object_size = size;
945 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
946 s->useroffset = useroffset;
947 s->usersize = usersize;
948
949 slab_init_memcg_params(s);
950
951 err = __kmem_cache_create(s, flags);
952
953 if (err)
954 panic("Creation of kmalloc slab %s size=%u failed. Reason %d\n",
955 name, size, err);
956
957 s->refcount = -1;
958}
959
960struct kmem_cache *__init create_kmalloc_cache(const char *name,
961 unsigned int size, slab_flags_t flags,
962 unsigned int useroffset, unsigned int usersize)
963{
964 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
965
966 if (!s)
967 panic("Out of memory when creating slab %s\n", name);
968
969 create_boot_cache(s, name, size, flags, useroffset, usersize);
970 list_add(&s->list, &slab_caches);
971 memcg_link_cache(s);
972 s->refcount = 1;
973 return s;
974}
975
976struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
977EXPORT_SYMBOL(kmalloc_caches);
978
979#ifdef CONFIG_ZONE_DMA
980struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
981EXPORT_SYMBOL(kmalloc_dma_caches);
982#endif
983
984
985
986
987
988
989
990static u8 size_index[24] __ro_after_init = {
991 3,
992 4,
993 5,
994 5,
995 6,
996 6,
997 6,
998 6,
999 1,
1000 1,
1001 1,
1002 1,
1003 7,
1004 7,
1005 7,
1006 7,
1007 2,
1008 2,
1009 2,
1010 2,
1011 2,
1012 2,
1013 2,
1014 2
1015};
1016
1017static inline unsigned int size_index_elem(unsigned int bytes)
1018{
1019 return (bytes - 1) / 8;
1020}
1021
1022
1023
1024
1025
1026struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
1027{
1028 unsigned int index;
1029
1030 if (unlikely(size > KMALLOC_MAX_SIZE)) {
1031 WARN_ON_ONCE(!(flags & __GFP_NOWARN));
1032 return NULL;
1033 }
1034
1035 if (size <= 192) {
1036 if (!size)
1037 return ZERO_SIZE_PTR;
1038
1039 index = size_index[size_index_elem(size)];
1040 } else
1041 index = fls(size - 1);
1042
1043#ifdef CONFIG_ZONE_DMA
1044 if (unlikely((flags & GFP_DMA)))
1045 return kmalloc_dma_caches[index];
1046
1047#endif
1048 return kmalloc_caches[index];
1049}
1050
1051
1052
1053
1054
1055
1056const struct kmalloc_info_struct kmalloc_info[] __initconst = {
1057 {NULL, 0}, {"kmalloc-96", 96},
1058 {"kmalloc-192", 192}, {"kmalloc-8", 8},
1059 {"kmalloc-16", 16}, {"kmalloc-32", 32},
1060 {"kmalloc-64", 64}, {"kmalloc-128", 128},
1061 {"kmalloc-256", 256}, {"kmalloc-512", 512},
1062 {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048},
1063 {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192},
1064 {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768},
1065 {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072},
1066 {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288},
1067 {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152},
1068 {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608},
1069 {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432},
1070 {"kmalloc-67108864", 67108864}
1071};
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084void __init setup_kmalloc_cache_index_table(void)
1085{
1086 unsigned int i;
1087
1088 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
1089 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
1090
1091 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
1092 unsigned int elem = size_index_elem(i);
1093
1094 if (elem >= ARRAY_SIZE(size_index))
1095 break;
1096 size_index[elem] = KMALLOC_SHIFT_LOW;
1097 }
1098
1099 if (KMALLOC_MIN_SIZE >= 64) {
1100
1101
1102
1103
1104 for (i = 64 + 8; i <= 96; i += 8)
1105 size_index[size_index_elem(i)] = 7;
1106
1107 }
1108
1109 if (KMALLOC_MIN_SIZE >= 128) {
1110
1111
1112
1113
1114
1115 for (i = 128 + 8; i <= 192; i += 8)
1116 size_index[size_index_elem(i)] = 8;
1117 }
1118}
1119
1120static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
1121{
1122 kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
1123 kmalloc_info[idx].size, flags, 0,
1124 kmalloc_info[idx].size);
1125}
1126
1127
1128
1129
1130
1131
1132void __init create_kmalloc_caches(slab_flags_t flags)
1133{
1134 int i;
1135
1136 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
1137 if (!kmalloc_caches[i])
1138 new_kmalloc_cache(i, flags);
1139
1140
1141
1142
1143
1144
1145 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
1146 new_kmalloc_cache(1, flags);
1147 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
1148 new_kmalloc_cache(2, flags);
1149 }
1150
1151
1152 slab_state = UP;
1153
1154#ifdef CONFIG_ZONE_DMA
1155 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
1156 struct kmem_cache *s = kmalloc_caches[i];
1157
1158 if (s) {
1159 unsigned int size = kmalloc_size(i);
1160 char *n = kasprintf(GFP_NOWAIT,
1161 "dma-kmalloc-%u", size);
1162
1163 BUG_ON(!n);
1164 kmalloc_dma_caches[i] = create_kmalloc_cache(n,
1165 size, SLAB_CACHE_DMA | flags, 0, 0);
1166 }
1167 }
1168#endif
1169}
1170#endif
1171
1172
1173
1174
1175
1176
1177void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1178{
1179 void *ret;
1180 struct page *page;
1181
1182 flags |= __GFP_COMP;
1183 page = alloc_pages(flags, order);
1184 ret = page ? page_address(page) : NULL;
1185 kmemleak_alloc(ret, size, 1, flags);
1186 kasan_kmalloc_large(ret, size, flags);
1187 return ret;
1188}
1189EXPORT_SYMBOL(kmalloc_order);
1190
1191#ifdef CONFIG_TRACING
1192void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1193{
1194 void *ret = kmalloc_order(size, flags, order);
1195 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1196 return ret;
1197}
1198EXPORT_SYMBOL(kmalloc_order_trace);
1199#endif
1200
1201#ifdef CONFIG_SLAB_FREELIST_RANDOM
1202
1203static void freelist_randomize(struct rnd_state *state, unsigned int *list,
1204 unsigned int count)
1205{
1206 unsigned int rand;
1207 unsigned int i;
1208
1209 for (i = 0; i < count; i++)
1210 list[i] = i;
1211
1212
1213 for (i = count - 1; i > 0; i--) {
1214 rand = prandom_u32_state(state);
1215 rand %= (i + 1);
1216 swap(list[i], list[rand]);
1217 }
1218}
1219
1220
1221int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
1222 gfp_t gfp)
1223{
1224 struct rnd_state state;
1225
1226 if (count < 2 || cachep->random_seq)
1227 return 0;
1228
1229 cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
1230 if (!cachep->random_seq)
1231 return -ENOMEM;
1232
1233
1234 prandom_seed_state(&state, get_random_long());
1235
1236 freelist_randomize(&state, cachep->random_seq, count);
1237 return 0;
1238}
1239
1240
1241void cache_random_seq_destroy(struct kmem_cache *cachep)
1242{
1243 kfree(cachep->random_seq);
1244 cachep->random_seq = NULL;
1245}
1246#endif
1247
1248#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
1249#ifdef CONFIG_SLAB
1250#define SLABINFO_RIGHTS (0600)
1251#else
1252#define SLABINFO_RIGHTS (0400)
1253#endif
1254
1255static void print_slabinfo_header(struct seq_file *m)
1256{
1257
1258
1259
1260
1261#ifdef CONFIG_DEBUG_SLAB
1262 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1263#else
1264 seq_puts(m, "slabinfo - version: 2.1\n");
1265#endif
1266 seq_puts(m, "# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
1267 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1268 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1269#ifdef CONFIG_DEBUG_SLAB
1270 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1271 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1272#endif
1273 seq_putc(m, '\n');
1274}
1275
1276void *slab_start(struct seq_file *m, loff_t *pos)
1277{
1278 mutex_lock(&slab_mutex);
1279 return seq_list_start(&slab_root_caches, *pos);
1280}
1281
1282void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1283{
1284 return seq_list_next(p, &slab_root_caches, pos);
1285}
1286
1287void slab_stop(struct seq_file *m, void *p)
1288{
1289 mutex_unlock(&slab_mutex);
1290}
1291
1292static void
1293memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1294{
1295 struct kmem_cache *c;
1296 struct slabinfo sinfo;
1297
1298 if (!is_root_cache(s))
1299 return;
1300
1301 for_each_memcg_cache(c, s) {
1302 memset(&sinfo, 0, sizeof(sinfo));
1303 get_slabinfo(c, &sinfo);
1304
1305 info->active_slabs += sinfo.active_slabs;
1306 info->num_slabs += sinfo.num_slabs;
1307 info->shared_avail += sinfo.shared_avail;
1308 info->active_objs += sinfo.active_objs;
1309 info->num_objs += sinfo.num_objs;
1310 }
1311}
1312
1313static void cache_show(struct kmem_cache *s, struct seq_file *m)
1314{
1315 struct slabinfo sinfo;
1316
1317 memset(&sinfo, 0, sizeof(sinfo));
1318 get_slabinfo(s, &sinfo);
1319
1320 memcg_accumulate_slabinfo(s, &sinfo);
1321
1322 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1323 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1324 sinfo.objects_per_slab, (1 << sinfo.cache_order));
1325
1326 seq_printf(m, " : tunables %4u %4u %4u",
1327 sinfo.limit, sinfo.batchcount, sinfo.shared);
1328 seq_printf(m, " : slabdata %6lu %6lu %6lu",
1329 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1330 slabinfo_show_stats(m, s);
1331 seq_putc(m, '\n');
1332}
1333
1334static int slab_show(struct seq_file *m, void *p)
1335{
1336 struct kmem_cache *s = list_entry(p, struct kmem_cache, root_caches_node);
1337
1338 if (p == slab_root_caches.next)
1339 print_slabinfo_header(m);
1340 cache_show(s, m);
1341 return 0;
1342}
1343
1344void dump_unreclaimable_slab(void)
1345{
1346 struct kmem_cache *s, *s2;
1347 struct slabinfo sinfo;
1348
1349
1350
1351
1352
1353
1354
1355
1356 if (!mutex_trylock(&slab_mutex)) {
1357 pr_warn("excessive unreclaimable slab but cannot dump stats\n");
1358 return;
1359 }
1360
1361 pr_info("Unreclaimable slab info:\n");
1362 pr_info("Name Used Total\n");
1363
1364 list_for_each_entry_safe(s, s2, &slab_caches, list) {
1365 if (!is_root_cache(s) || (s->flags & SLAB_RECLAIM_ACCOUNT))
1366 continue;
1367
1368 get_slabinfo(s, &sinfo);
1369
1370 if (sinfo.num_objs > 0)
1371 pr_info("%-17s %10luKB %10luKB\n", cache_name(s),
1372 (sinfo.active_objs * s->size) / 1024,
1373 (sinfo.num_objs * s->size) / 1024);
1374 }
1375 mutex_unlock(&slab_mutex);
1376}
1377
1378#if defined(CONFIG_MEMCG)
1379void *memcg_slab_start(struct seq_file *m, loff_t *pos)
1380{
1381 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1382
1383 mutex_lock(&slab_mutex);
1384 return seq_list_start(&memcg->kmem_caches, *pos);
1385}
1386
1387void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
1388{
1389 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1390
1391 return seq_list_next(p, &memcg->kmem_caches, pos);
1392}
1393
1394void memcg_slab_stop(struct seq_file *m, void *p)
1395{
1396 mutex_unlock(&slab_mutex);
1397}
1398
1399int memcg_slab_show(struct seq_file *m, void *p)
1400{
1401 struct kmem_cache *s = list_entry(p, struct kmem_cache,
1402 memcg_params.kmem_caches_node);
1403 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1404
1405 if (p == memcg->kmem_caches.next)
1406 print_slabinfo_header(m);
1407 cache_show(s, m);
1408 return 0;
1409}
1410#endif
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425static const struct seq_operations slabinfo_op = {
1426 .start = slab_start,
1427 .next = slab_next,
1428 .stop = slab_stop,
1429 .show = slab_show,
1430};
1431
1432static int slabinfo_open(struct inode *inode, struct file *file)
1433{
1434 return seq_open(file, &slabinfo_op);
1435}
1436
1437static const struct file_operations proc_slabinfo_operations = {
1438 .open = slabinfo_open,
1439 .read = seq_read,
1440 .write = slabinfo_write,
1441 .llseek = seq_lseek,
1442 .release = seq_release,
1443};
1444
1445static int __init slab_proc_init(void)
1446{
1447 proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1448 &proc_slabinfo_operations);
1449 return 0;
1450}
1451module_init(slab_proc_init);
1452#endif
1453
1454static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1455 gfp_t flags)
1456{
1457 void *ret;
1458 size_t ks = 0;
1459
1460 if (p)
1461 ks = ksize(p);
1462
1463 if (ks >= new_size) {
1464 kasan_krealloc((void *)p, new_size, flags);
1465 return (void *)p;
1466 }
1467
1468 ret = kmalloc_track_caller(new_size, flags);
1469 if (ret && p)
1470 memcpy(ret, p, ks);
1471
1472 return ret;
1473}
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1486{
1487 if (unlikely(!new_size))
1488 return ZERO_SIZE_PTR;
1489
1490 return __do_krealloc(p, new_size, flags);
1491
1492}
1493EXPORT_SYMBOL(__krealloc);
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506void *krealloc(const void *p, size_t new_size, gfp_t flags)
1507{
1508 void *ret;
1509
1510 if (unlikely(!new_size)) {
1511 kfree(p);
1512 return ZERO_SIZE_PTR;
1513 }
1514
1515 ret = __do_krealloc(p, new_size, flags);
1516 if (ret && p != ret)
1517 kfree(p);
1518
1519 return ret;
1520}
1521EXPORT_SYMBOL(krealloc);
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534void kzfree(const void *p)
1535{
1536 size_t ks;
1537 void *mem = (void *)p;
1538
1539 if (unlikely(ZERO_OR_NULL_PTR(mem)))
1540 return;
1541 ks = ksize(mem);
1542 memset(mem, 0, ks);
1543 kfree(mem);
1544}
1545EXPORT_SYMBOL(kzfree);
1546
1547
1548EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1549EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1550EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1551EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1552EXPORT_TRACEPOINT_SYMBOL(kfree);
1553EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1554
1555int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
1556{
1557 if (__should_failslab(s, gfpflags))
1558 return -ENOMEM;
1559 return 0;
1560}
1561ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
1562