1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/swab.h>
19#include <linux/bitops.h>
20#include <linux/slab.h>
21#include "slab.h"
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/kasan.h>
25#include <linux/cpu.h>
26#include <linux/cpuset.h>
27#include <linux/mempolicy.h>
28#include <linux/ctype.h>
29#include <linux/debugobjects.h>
30#include <linux/kallsyms.h>
31#include <linux/kfence.h>
32#include <linux/memory.h>
33#include <linux/math64.h>
34#include <linux/fault-inject.h>
35#include <linux/stacktrace.h>
36#include <linux/prefetch.h>
37#include <linux/memcontrol.h>
38#include <linux/random.h>
39#include <kunit/test.h>
40
41#include <linux/debugfs.h>
42#include <trace/events/kmem.h>
43
44#include "internal.h"
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121#ifdef CONFIG_SLUB_DEBUG
122#ifdef CONFIG_SLUB_DEBUG_ON
123DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
124#else
125DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
126#endif
127#endif
128
129static inline bool kmem_cache_debug(struct kmem_cache *s)
130{
131 return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
132}
133
134void *fixup_red_left(struct kmem_cache *s, void *p)
135{
136 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
137 p += s->red_left_pad;
138
139 return p;
140}
141
142static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
143{
144#ifdef CONFIG_SLUB_CPU_PARTIAL
145 return !kmem_cache_debug(s);
146#else
147 return false;
148#endif
149}
150
151
152
153
154
155
156
157
158
159
160#undef SLUB_DEBUG_CMPXCHG
161
162
163
164
165
166#define MIN_PARTIAL 5
167
168
169
170
171
172
173#define MAX_PARTIAL 10
174
175#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
176 SLAB_POISON | SLAB_STORE_USER)
177
178
179
180
181
182#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
183 SLAB_TRACE)
184
185
186
187
188
189
190
191#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
192
193#define OO_SHIFT 16
194#define OO_MASK ((1 << OO_SHIFT) - 1)
195#define MAX_OBJS_PER_PAGE 32767
196
197
198
199#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
200
201#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
202
203
204
205
206#define TRACK_ADDRS_COUNT 16
207struct track {
208 unsigned long addr;
209#ifdef CONFIG_STACKTRACE
210 unsigned long addrs[TRACK_ADDRS_COUNT];
211#endif
212 int cpu;
213 int pid;
214 unsigned long when;
215};
216
217enum track_item { TRACK_ALLOC, TRACK_FREE };
218
219#ifdef CONFIG_SYSFS
220static int sysfs_slab_add(struct kmem_cache *);
221static int sysfs_slab_alias(struct kmem_cache *, const char *);
222#else
223static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
224static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
225 { return 0; }
226#endif
227
228#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
229static void debugfs_slab_add(struct kmem_cache *);
230#else
231static inline void debugfs_slab_add(struct kmem_cache *s) { }
232#endif
233
234static inline void stat(const struct kmem_cache *s, enum stat_item si)
235{
236#ifdef CONFIG_SLUB_STATS
237
238
239
240
241 raw_cpu_inc(s->cpu_slab->stat[si]);
242#endif
243}
244
245
246
247
248
249
250
251static nodemask_t slab_nodes;
252
253
254
255
256
257
258
259
260
261
262static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
263 unsigned long ptr_addr)
264{
265#ifdef CONFIG_SLAB_FREELIST_HARDENED
266
267
268
269
270
271
272
273
274
275
276 return (void *)((unsigned long)ptr ^ s->random ^
277 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
278#else
279 return ptr;
280#endif
281}
282
283
284static inline void *freelist_dereference(const struct kmem_cache *s,
285 void *ptr_addr)
286{
287 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
288 (unsigned long)ptr_addr);
289}
290
291static inline void *get_freepointer(struct kmem_cache *s, void *object)
292{
293 object = kasan_reset_tag(object);
294 return freelist_dereference(s, object + s->offset);
295}
296
297static void prefetch_freepointer(const struct kmem_cache *s, void *object)
298{
299 prefetch(object + s->offset);
300}
301
302static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
303{
304 unsigned long freepointer_addr;
305 void *p;
306
307 if (!debug_pagealloc_enabled_static())
308 return get_freepointer(s, object);
309
310 object = kasan_reset_tag(object);
311 freepointer_addr = (unsigned long)object + s->offset;
312 copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
313 return freelist_ptr(s, p, freepointer_addr);
314}
315
316static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
317{
318 unsigned long freeptr_addr = (unsigned long)object + s->offset;
319
320#ifdef CONFIG_SLAB_FREELIST_HARDENED
321 BUG_ON(object == fp);
322#endif
323
324 freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
325 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
326}
327
328
329#define for_each_object(__p, __s, __addr, __objects) \
330 for (__p = fixup_red_left(__s, __addr); \
331 __p < (__addr) + (__objects) * (__s)->size; \
332 __p += (__s)->size)
333
334static inline unsigned int order_objects(unsigned int order, unsigned int size)
335{
336 return ((unsigned int)PAGE_SIZE << order) / size;
337}
338
339static inline struct kmem_cache_order_objects oo_make(unsigned int order,
340 unsigned int size)
341{
342 struct kmem_cache_order_objects x = {
343 (order << OO_SHIFT) + order_objects(order, size)
344 };
345
346 return x;
347}
348
349static inline unsigned int oo_order(struct kmem_cache_order_objects x)
350{
351 return x.x >> OO_SHIFT;
352}
353
354static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
355{
356 return x.x & OO_MASK;
357}
358
359
360
361
362static __always_inline void slab_lock(struct page *page)
363{
364 VM_BUG_ON_PAGE(PageTail(page), page);
365 bit_spin_lock(PG_locked, &page->flags);
366}
367
368static __always_inline void slab_unlock(struct page *page)
369{
370 VM_BUG_ON_PAGE(PageTail(page), page);
371 __bit_spin_unlock(PG_locked, &page->flags);
372}
373
374
375static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
376 void *freelist_old, unsigned long counters_old,
377 void *freelist_new, unsigned long counters_new,
378 const char *n)
379{
380 VM_BUG_ON(!irqs_disabled());
381#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
382 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
383 if (s->flags & __CMPXCHG_DOUBLE) {
384 if (cmpxchg_double(&page->freelist, &page->counters,
385 freelist_old, counters_old,
386 freelist_new, counters_new))
387 return true;
388 } else
389#endif
390 {
391 slab_lock(page);
392 if (page->freelist == freelist_old &&
393 page->counters == counters_old) {
394 page->freelist = freelist_new;
395 page->counters = counters_new;
396 slab_unlock(page);
397 return true;
398 }
399 slab_unlock(page);
400 }
401
402 cpu_relax();
403 stat(s, CMPXCHG_DOUBLE_FAIL);
404
405#ifdef SLUB_DEBUG_CMPXCHG
406 pr_info("%s %s: cmpxchg double redo ", n, s->name);
407#endif
408
409 return false;
410}
411
412static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
413 void *freelist_old, unsigned long counters_old,
414 void *freelist_new, unsigned long counters_new,
415 const char *n)
416{
417#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
418 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
419 if (s->flags & __CMPXCHG_DOUBLE) {
420 if (cmpxchg_double(&page->freelist, &page->counters,
421 freelist_old, counters_old,
422 freelist_new, counters_new))
423 return true;
424 } else
425#endif
426 {
427 unsigned long flags;
428
429 local_irq_save(flags);
430 slab_lock(page);
431 if (page->freelist == freelist_old &&
432 page->counters == counters_old) {
433 page->freelist = freelist_new;
434 page->counters = counters_new;
435 slab_unlock(page);
436 local_irq_restore(flags);
437 return true;
438 }
439 slab_unlock(page);
440 local_irq_restore(flags);
441 }
442
443 cpu_relax();
444 stat(s, CMPXCHG_DOUBLE_FAIL);
445
446#ifdef SLUB_DEBUG_CMPXCHG
447 pr_info("%s %s: cmpxchg double redo ", n, s->name);
448#endif
449
450 return false;
451}
452
453#ifdef CONFIG_SLUB_DEBUG
454static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
455static DEFINE_SPINLOCK(object_map_lock);
456
457#if IS_ENABLED(CONFIG_KUNIT)
458static bool slab_add_kunit_errors(void)
459{
460 struct kunit_resource *resource;
461
462 if (likely(!current->kunit_test))
463 return false;
464
465 resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
466 if (!resource)
467 return false;
468
469 (*(int *)resource->data)++;
470 kunit_put_resource(resource);
471 return true;
472}
473#else
474static inline bool slab_add_kunit_errors(void) { return false; }
475#endif
476
477
478
479
480
481
482
483static unsigned long *get_map(struct kmem_cache *s, struct page *page)
484 __acquires(&object_map_lock)
485{
486 void *p;
487 void *addr = page_address(page);
488
489 VM_BUG_ON(!irqs_disabled());
490
491 spin_lock(&object_map_lock);
492
493 bitmap_zero(object_map, page->objects);
494
495 for (p = page->freelist; p; p = get_freepointer(s, p))
496 set_bit(__obj_to_index(s, addr, p), object_map);
497
498 return object_map;
499}
500
501static void put_map(unsigned long *map) __releases(&object_map_lock)
502{
503 VM_BUG_ON(map != object_map);
504 spin_unlock(&object_map_lock);
505}
506
507static inline unsigned int size_from_object(struct kmem_cache *s)
508{
509 if (s->flags & SLAB_RED_ZONE)
510 return s->size - s->red_left_pad;
511
512 return s->size;
513}
514
515static inline void *restore_red_left(struct kmem_cache *s, void *p)
516{
517 if (s->flags & SLAB_RED_ZONE)
518 p -= s->red_left_pad;
519
520 return p;
521}
522
523
524
525
526#if defined(CONFIG_SLUB_DEBUG_ON)
527static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
528#else
529static slab_flags_t slub_debug;
530#endif
531
532static char *slub_debug_string;
533static int disable_higher_order_debug;
534
535
536
537
538
539
540
541static inline void metadata_access_enable(void)
542{
543 kasan_disable_current();
544}
545
546static inline void metadata_access_disable(void)
547{
548 kasan_enable_current();
549}
550
551
552
553
554
555
556static inline int check_valid_pointer(struct kmem_cache *s,
557 struct page *page, void *object)
558{
559 void *base;
560
561 if (!object)
562 return 1;
563
564 base = page_address(page);
565 object = kasan_reset_tag(object);
566 object = restore_red_left(s, object);
567 if (object < base || object >= base + page->objects * s->size ||
568 (object - base) % s->size) {
569 return 0;
570 }
571
572 return 1;
573}
574
575static void print_section(char *level, char *text, u8 *addr,
576 unsigned int length)
577{
578 metadata_access_enable();
579 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
580 16, 1, kasan_reset_tag((void *)addr), length, 1);
581 metadata_access_disable();
582}
583
584
585
586
587static inline bool freeptr_outside_object(struct kmem_cache *s)
588{
589 return s->offset >= s->inuse;
590}
591
592
593
594
595
596static inline unsigned int get_info_end(struct kmem_cache *s)
597{
598 if (freeptr_outside_object(s))
599 return s->inuse + sizeof(void *);
600 else
601 return s->inuse;
602}
603
604static struct track *get_track(struct kmem_cache *s, void *object,
605 enum track_item alloc)
606{
607 struct track *p;
608
609 p = object + get_info_end(s);
610
611 return kasan_reset_tag(p + alloc);
612}
613
614static void set_track(struct kmem_cache *s, void *object,
615 enum track_item alloc, unsigned long addr)
616{
617 struct track *p = get_track(s, object, alloc);
618
619 if (addr) {
620#ifdef CONFIG_STACKTRACE
621 unsigned int nr_entries;
622
623 metadata_access_enable();
624 nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
625 TRACK_ADDRS_COUNT, 3);
626 metadata_access_disable();
627
628 if (nr_entries < TRACK_ADDRS_COUNT)
629 p->addrs[nr_entries] = 0;
630#endif
631 p->addr = addr;
632 p->cpu = smp_processor_id();
633 p->pid = current->pid;
634 p->when = jiffies;
635 } else {
636 memset(p, 0, sizeof(struct track));
637 }
638}
639
640static void init_tracking(struct kmem_cache *s, void *object)
641{
642 if (!(s->flags & SLAB_STORE_USER))
643 return;
644
645 set_track(s, object, TRACK_FREE, 0UL);
646 set_track(s, object, TRACK_ALLOC, 0UL);
647}
648
649static void print_track(const char *s, struct track *t, unsigned long pr_time)
650{
651 if (!t->addr)
652 return;
653
654 pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
655 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
656#ifdef CONFIG_STACKTRACE
657 {
658 int i;
659 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
660 if (t->addrs[i])
661 pr_err("\t%pS\n", (void *)t->addrs[i]);
662 else
663 break;
664 }
665#endif
666}
667
668void print_tracking(struct kmem_cache *s, void *object)
669{
670 unsigned long pr_time = jiffies;
671 if (!(s->flags & SLAB_STORE_USER))
672 return;
673
674 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
675 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
676}
677
678static void print_page_info(struct page *page)
679{
680 pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n",
681 page, page->objects, page->inuse, page->freelist,
682 page->flags, &page->flags);
683
684}
685
686static void slab_bug(struct kmem_cache *s, char *fmt, ...)
687{
688 struct va_format vaf;
689 va_list args;
690
691 va_start(args, fmt);
692 vaf.fmt = fmt;
693 vaf.va = &args;
694 pr_err("=============================================================================\n");
695 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
696 pr_err("-----------------------------------------------------------------------------\n\n");
697 va_end(args);
698}
699
700__printf(2, 3)
701static void slab_fix(struct kmem_cache *s, char *fmt, ...)
702{
703 struct va_format vaf;
704 va_list args;
705
706 if (slab_add_kunit_errors())
707 return;
708
709 va_start(args, fmt);
710 vaf.fmt = fmt;
711 vaf.va = &args;
712 pr_err("FIX %s: %pV\n", s->name, &vaf);
713 va_end(args);
714}
715
716static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
717 void **freelist, void *nextfree)
718{
719 if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
720 !check_valid_pointer(s, page, nextfree) && freelist) {
721 object_err(s, page, *freelist, "Freechain corrupt");
722 *freelist = NULL;
723 slab_fix(s, "Isolate corrupted freechain");
724 return true;
725 }
726
727 return false;
728}
729
730static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
731{
732 unsigned int off;
733 u8 *addr = page_address(page);
734
735 print_tracking(s, p);
736
737 print_page_info(page);
738
739 pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
740 p, p - addr, get_freepointer(s, p));
741
742 if (s->flags & SLAB_RED_ZONE)
743 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
744 s->red_left_pad);
745 else if (p > addr + 16)
746 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
747
748 print_section(KERN_ERR, "Object ", p,
749 min_t(unsigned int, s->object_size, PAGE_SIZE));
750 if (s->flags & SLAB_RED_ZONE)
751 print_section(KERN_ERR, "Redzone ", p + s->object_size,
752 s->inuse - s->object_size);
753
754 off = get_info_end(s);
755
756 if (s->flags & SLAB_STORE_USER)
757 off += 2 * sizeof(struct track);
758
759 off += kasan_metadata_size(s);
760
761 if (off != size_from_object(s))
762
763 print_section(KERN_ERR, "Padding ", p + off,
764 size_from_object(s) - off);
765
766 dump_stack();
767}
768
769void object_err(struct kmem_cache *s, struct page *page,
770 u8 *object, char *reason)
771{
772 if (slab_add_kunit_errors())
773 return;
774
775 slab_bug(s, "%s", reason);
776 print_trailer(s, page, object);
777 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
778}
779
780static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
781 const char *fmt, ...)
782{
783 va_list args;
784 char buf[100];
785
786 if (slab_add_kunit_errors())
787 return;
788
789 va_start(args, fmt);
790 vsnprintf(buf, sizeof(buf), fmt, args);
791 va_end(args);
792 slab_bug(s, "%s", buf);
793 print_page_info(page);
794 dump_stack();
795 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
796}
797
798static void init_object(struct kmem_cache *s, void *object, u8 val)
799{
800 u8 *p = kasan_reset_tag(object);
801
802 if (s->flags & SLAB_RED_ZONE)
803 memset(p - s->red_left_pad, val, s->red_left_pad);
804
805 if (s->flags & __OBJECT_POISON) {
806 memset(p, POISON_FREE, s->object_size - 1);
807 p[s->object_size - 1] = POISON_END;
808 }
809
810 if (s->flags & SLAB_RED_ZONE)
811 memset(p + s->object_size, val, s->inuse - s->object_size);
812}
813
814static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
815 void *from, void *to)
816{
817 slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - 1, data);
818 memset(from, data, to - from);
819}
820
821static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
822 u8 *object, char *what,
823 u8 *start, unsigned int value, unsigned int bytes)
824{
825 u8 *fault;
826 u8 *end;
827 u8 *addr = page_address(page);
828
829 metadata_access_enable();
830 fault = memchr_inv(kasan_reset_tag(start), value, bytes);
831 metadata_access_disable();
832 if (!fault)
833 return 1;
834
835 end = start + bytes;
836 while (end > fault && end[-1] == value)
837 end--;
838
839 if (slab_add_kunit_errors())
840 goto skip_bug_print;
841
842 slab_bug(s, "%s overwritten", what);
843 pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
844 fault, end - 1, fault - addr,
845 fault[0], value);
846 print_trailer(s, page, object);
847 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
848
849skip_bug_print:
850 restore_bytes(s, what, value, fault, end);
851 return 0;
852}
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
893{
894 unsigned long off = get_info_end(s);
895
896 if (s->flags & SLAB_STORE_USER)
897
898 off += 2 * sizeof(struct track);
899
900 off += kasan_metadata_size(s);
901
902 if (size_from_object(s) == off)
903 return 1;
904
905 return check_bytes_and_report(s, page, p, "Object padding",
906 p + off, POISON_INUSE, size_from_object(s) - off);
907}
908
909
910static int slab_pad_check(struct kmem_cache *s, struct page *page)
911{
912 u8 *start;
913 u8 *fault;
914 u8 *end;
915 u8 *pad;
916 int length;
917 int remainder;
918
919 if (!(s->flags & SLAB_POISON))
920 return 1;
921
922 start = page_address(page);
923 length = page_size(page);
924 end = start + length;
925 remainder = length % s->size;
926 if (!remainder)
927 return 1;
928
929 pad = end - remainder;
930 metadata_access_enable();
931 fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
932 metadata_access_disable();
933 if (!fault)
934 return 1;
935 while (end > fault && end[-1] == POISON_INUSE)
936 end--;
937
938 slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
939 fault, end - 1, fault - start);
940 print_section(KERN_ERR, "Padding ", pad, remainder);
941
942 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
943 return 0;
944}
945
946static int check_object(struct kmem_cache *s, struct page *page,
947 void *object, u8 val)
948{
949 u8 *p = object;
950 u8 *endobject = object + s->object_size;
951
952 if (s->flags & SLAB_RED_ZONE) {
953 if (!check_bytes_and_report(s, page, object, "Left Redzone",
954 object - s->red_left_pad, val, s->red_left_pad))
955 return 0;
956
957 if (!check_bytes_and_report(s, page, object, "Right Redzone",
958 endobject, val, s->inuse - s->object_size))
959 return 0;
960 } else {
961 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
962 check_bytes_and_report(s, page, p, "Alignment padding",
963 endobject, POISON_INUSE,
964 s->inuse - s->object_size);
965 }
966 }
967
968 if (s->flags & SLAB_POISON) {
969 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
970 (!check_bytes_and_report(s, page, p, "Poison", p,
971 POISON_FREE, s->object_size - 1) ||
972 !check_bytes_and_report(s, page, p, "End Poison",
973 p + s->object_size - 1, POISON_END, 1)))
974 return 0;
975
976
977
978 check_pad_bytes(s, page, p);
979 }
980
981 if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
982
983
984
985
986 return 1;
987
988
989 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
990 object_err(s, page, p, "Freepointer corrupt");
991
992
993
994
995
996 set_freepointer(s, p, NULL);
997 return 0;
998 }
999 return 1;
1000}
1001
1002static int check_slab(struct kmem_cache *s, struct page *page)
1003{
1004 int maxobj;
1005
1006 VM_BUG_ON(!irqs_disabled());
1007
1008 if (!PageSlab(page)) {
1009 slab_err(s, page, "Not a valid slab page");
1010 return 0;
1011 }
1012
1013 maxobj = order_objects(compound_order(page), s->size);
1014 if (page->objects > maxobj) {
1015 slab_err(s, page, "objects %u > max %u",
1016 page->objects, maxobj);
1017 return 0;
1018 }
1019 if (page->inuse > page->objects) {
1020 slab_err(s, page, "inuse %u > max %u",
1021 page->inuse, page->objects);
1022 return 0;
1023 }
1024
1025 slab_pad_check(s, page);
1026 return 1;
1027}
1028
1029
1030
1031
1032
1033static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
1034{
1035 int nr = 0;
1036 void *fp;
1037 void *object = NULL;
1038 int max_objects;
1039
1040 fp = page->freelist;
1041 while (fp && nr <= page->objects) {
1042 if (fp == search)
1043 return 1;
1044 if (!check_valid_pointer(s, page, fp)) {
1045 if (object) {
1046 object_err(s, page, object,
1047 "Freechain corrupt");
1048 set_freepointer(s, object, NULL);
1049 } else {
1050 slab_err(s, page, "Freepointer corrupt");
1051 page->freelist = NULL;
1052 page->inuse = page->objects;
1053 slab_fix(s, "Freelist cleared");
1054 return 0;
1055 }
1056 break;
1057 }
1058 object = fp;
1059 fp = get_freepointer(s, object);
1060 nr++;
1061 }
1062
1063 max_objects = order_objects(compound_order(page), s->size);
1064 if (max_objects > MAX_OBJS_PER_PAGE)
1065 max_objects = MAX_OBJS_PER_PAGE;
1066
1067 if (page->objects != max_objects) {
1068 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1069 page->objects, max_objects);
1070 page->objects = max_objects;
1071 slab_fix(s, "Number of objects adjusted");
1072 }
1073 if (page->inuse != page->objects - nr) {
1074 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1075 page->inuse, page->objects - nr);
1076 page->inuse = page->objects - nr;
1077 slab_fix(s, "Object count adjusted");
1078 }
1079 return search == NULL;
1080}
1081
1082static void trace(struct kmem_cache *s, struct page *page, void *object,
1083 int alloc)
1084{
1085 if (s->flags & SLAB_TRACE) {
1086 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1087 s->name,
1088 alloc ? "alloc" : "free",
1089 object, page->inuse,
1090 page->freelist);
1091
1092 if (!alloc)
1093 print_section(KERN_INFO, "Object ", (void *)object,
1094 s->object_size);
1095
1096 dump_stack();
1097 }
1098}
1099
1100
1101
1102
1103static void add_full(struct kmem_cache *s,
1104 struct kmem_cache_node *n, struct page *page)
1105{
1106 if (!(s->flags & SLAB_STORE_USER))
1107 return;
1108
1109 lockdep_assert_held(&n->list_lock);
1110 list_add(&page->slab_list, &n->full);
1111}
1112
1113static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1114{
1115 if (!(s->flags & SLAB_STORE_USER))
1116 return;
1117
1118 lockdep_assert_held(&n->list_lock);
1119 list_del(&page->slab_list);
1120}
1121
1122
1123static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1124{
1125 struct kmem_cache_node *n = get_node(s, node);
1126
1127 return atomic_long_read(&n->nr_slabs);
1128}
1129
1130static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1131{
1132 return atomic_long_read(&n->nr_slabs);
1133}
1134
1135static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1136{
1137 struct kmem_cache_node *n = get_node(s, node);
1138
1139
1140
1141
1142
1143
1144
1145 if (likely(n)) {
1146 atomic_long_inc(&n->nr_slabs);
1147 atomic_long_add(objects, &n->total_objects);
1148 }
1149}
1150static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1151{
1152 struct kmem_cache_node *n = get_node(s, node);
1153
1154 atomic_long_dec(&n->nr_slabs);
1155 atomic_long_sub(objects, &n->total_objects);
1156}
1157
1158
1159static void setup_object_debug(struct kmem_cache *s, struct page *page,
1160 void *object)
1161{
1162 if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1163 return;
1164
1165 init_object(s, object, SLUB_RED_INACTIVE);
1166 init_tracking(s, object);
1167}
1168
1169static
1170void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1171{
1172 if (!kmem_cache_debug_flags(s, SLAB_POISON))
1173 return;
1174
1175 metadata_access_enable();
1176 memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
1177 metadata_access_disable();
1178}
1179
1180static inline int alloc_consistency_checks(struct kmem_cache *s,
1181 struct page *page, void *object)
1182{
1183 if (!check_slab(s, page))
1184 return 0;
1185
1186 if (!check_valid_pointer(s, page, object)) {
1187 object_err(s, page, object, "Freelist Pointer check fails");
1188 return 0;
1189 }
1190
1191 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1192 return 0;
1193
1194 return 1;
1195}
1196
1197static noinline int alloc_debug_processing(struct kmem_cache *s,
1198 struct page *page,
1199 void *object, unsigned long addr)
1200{
1201 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1202 if (!alloc_consistency_checks(s, page, object))
1203 goto bad;
1204 }
1205
1206
1207 if (s->flags & SLAB_STORE_USER)
1208 set_track(s, object, TRACK_ALLOC, addr);
1209 trace(s, page, object, 1);
1210 init_object(s, object, SLUB_RED_ACTIVE);
1211 return 1;
1212
1213bad:
1214 if (PageSlab(page)) {
1215
1216
1217
1218
1219
1220 slab_fix(s, "Marking all objects used");
1221 page->inuse = page->objects;
1222 page->freelist = NULL;
1223 }
1224 return 0;
1225}
1226
1227static inline int free_consistency_checks(struct kmem_cache *s,
1228 struct page *page, void *object, unsigned long addr)
1229{
1230 if (!check_valid_pointer(s, page, object)) {
1231 slab_err(s, page, "Invalid object pointer 0x%p", object);
1232 return 0;
1233 }
1234
1235 if (on_freelist(s, page, object)) {
1236 object_err(s, page, object, "Object already free");
1237 return 0;
1238 }
1239
1240 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1241 return 0;
1242
1243 if (unlikely(s != page->slab_cache)) {
1244 if (!PageSlab(page)) {
1245 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1246 object);
1247 } else if (!page->slab_cache) {
1248 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1249 object);
1250 dump_stack();
1251 } else
1252 object_err(s, page, object,
1253 "page slab pointer corrupt.");
1254 return 0;
1255 }
1256 return 1;
1257}
1258
1259
1260static noinline int free_debug_processing(
1261 struct kmem_cache *s, struct page *page,
1262 void *head, void *tail, int bulk_cnt,
1263 unsigned long addr)
1264{
1265 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1266 void *object = head;
1267 int cnt = 0;
1268 unsigned long flags;
1269 int ret = 0;
1270
1271 spin_lock_irqsave(&n->list_lock, flags);
1272 slab_lock(page);
1273
1274 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1275 if (!check_slab(s, page))
1276 goto out;
1277 }
1278
1279next_object:
1280 cnt++;
1281
1282 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1283 if (!free_consistency_checks(s, page, object, addr))
1284 goto out;
1285 }
1286
1287 if (s->flags & SLAB_STORE_USER)
1288 set_track(s, object, TRACK_FREE, addr);
1289 trace(s, page, object, 0);
1290
1291 init_object(s, object, SLUB_RED_INACTIVE);
1292
1293
1294 if (object != tail) {
1295 object = get_freepointer(s, object);
1296 goto next_object;
1297 }
1298 ret = 1;
1299
1300out:
1301 if (cnt != bulk_cnt)
1302 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1303 bulk_cnt, cnt);
1304
1305 slab_unlock(page);
1306 spin_unlock_irqrestore(&n->list_lock, flags);
1307 if (!ret)
1308 slab_fix(s, "Object at 0x%p not freed", object);
1309 return ret;
1310}
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322static char *
1323parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1324{
1325 bool higher_order_disable = false;
1326
1327
1328 while (*str && *str == ';')
1329 str++;
1330
1331 if (*str == ',') {
1332
1333
1334
1335
1336 *flags = DEBUG_DEFAULT_FLAGS;
1337 goto check_slabs;
1338 }
1339 *flags = 0;
1340
1341
1342 for (; *str && *str != ',' && *str != ';'; str++) {
1343 switch (tolower(*str)) {
1344 case '-':
1345 *flags = 0;
1346 break;
1347 case 'f':
1348 *flags |= SLAB_CONSISTENCY_CHECKS;
1349 break;
1350 case 'z':
1351 *flags |= SLAB_RED_ZONE;
1352 break;
1353 case 'p':
1354 *flags |= SLAB_POISON;
1355 break;
1356 case 'u':
1357 *flags |= SLAB_STORE_USER;
1358 break;
1359 case 't':
1360 *flags |= SLAB_TRACE;
1361 break;
1362 case 'a':
1363 *flags |= SLAB_FAILSLAB;
1364 break;
1365 case 'o':
1366
1367
1368
1369
1370 higher_order_disable = true;
1371 break;
1372 default:
1373 if (init)
1374 pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1375 }
1376 }
1377check_slabs:
1378 if (*str == ',')
1379 *slabs = ++str;
1380 else
1381 *slabs = NULL;
1382
1383
1384 while (*str && *str != ';')
1385 str++;
1386
1387
1388 while (*str && *str == ';')
1389 str++;
1390
1391 if (init && higher_order_disable)
1392 disable_higher_order_debug = 1;
1393
1394 if (*str)
1395 return str;
1396 else
1397 return NULL;
1398}
1399
1400static int __init setup_slub_debug(char *str)
1401{
1402 slab_flags_t flags;
1403 slab_flags_t global_flags;
1404 char *saved_str;
1405 char *slab_list;
1406 bool global_slub_debug_changed = false;
1407 bool slab_list_specified = false;
1408
1409 global_flags = DEBUG_DEFAULT_FLAGS;
1410 if (*str++ != '=' || !*str)
1411
1412
1413
1414 goto out;
1415
1416 saved_str = str;
1417 while (str) {
1418 str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1419
1420 if (!slab_list) {
1421 global_flags = flags;
1422 global_slub_debug_changed = true;
1423 } else {
1424 slab_list_specified = true;
1425 }
1426 }
1427
1428
1429
1430
1431
1432
1433
1434
1435 if (slab_list_specified) {
1436 if (!global_slub_debug_changed)
1437 global_flags = slub_debug;
1438 slub_debug_string = saved_str;
1439 }
1440out:
1441 slub_debug = global_flags;
1442 if (slub_debug != 0 || slub_debug_string)
1443 static_branch_enable(&slub_debug_enabled);
1444 else
1445 static_branch_disable(&slub_debug_enabled);
1446 if ((static_branch_unlikely(&init_on_alloc) ||
1447 static_branch_unlikely(&init_on_free)) &&
1448 (slub_debug & SLAB_POISON))
1449 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1450 return 1;
1451}
1452
1453__setup("slub_debug", setup_slub_debug);
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466slab_flags_t kmem_cache_flags(unsigned int object_size,
1467 slab_flags_t flags, const char *name)
1468{
1469 char *iter;
1470 size_t len;
1471 char *next_block;
1472 slab_flags_t block_flags;
1473 slab_flags_t slub_debug_local = slub_debug;
1474
1475
1476
1477
1478
1479
1480 if (flags & SLAB_NOLEAKTRACE)
1481 slub_debug_local &= ~SLAB_STORE_USER;
1482
1483 len = strlen(name);
1484 next_block = slub_debug_string;
1485
1486 while (next_block) {
1487 next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1488 if (!iter)
1489 continue;
1490
1491 while (*iter) {
1492 char *end, *glob;
1493 size_t cmplen;
1494
1495 end = strchrnul(iter, ',');
1496 if (next_block && next_block < end)
1497 end = next_block - 1;
1498
1499 glob = strnchr(iter, end - iter, '*');
1500 if (glob)
1501 cmplen = glob - iter;
1502 else
1503 cmplen = max_t(size_t, len, (end - iter));
1504
1505 if (!strncmp(name, iter, cmplen)) {
1506 flags |= block_flags;
1507 return flags;
1508 }
1509
1510 if (!*end || *end == ';')
1511 break;
1512 iter = end + 1;
1513 }
1514 }
1515
1516 return flags | slub_debug_local;
1517}
1518#else
1519static inline void setup_object_debug(struct kmem_cache *s,
1520 struct page *page, void *object) {}
1521static inline
1522void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1523
1524static inline int alloc_debug_processing(struct kmem_cache *s,
1525 struct page *page, void *object, unsigned long addr) { return 0; }
1526
1527static inline int free_debug_processing(
1528 struct kmem_cache *s, struct page *page,
1529 void *head, void *tail, int bulk_cnt,
1530 unsigned long addr) { return 0; }
1531
1532static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1533 { return 1; }
1534static inline int check_object(struct kmem_cache *s, struct page *page,
1535 void *object, u8 val) { return 1; }
1536static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1537 struct page *page) {}
1538static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1539 struct page *page) {}
1540slab_flags_t kmem_cache_flags(unsigned int object_size,
1541 slab_flags_t flags, const char *name)
1542{
1543 return flags;
1544}
1545#define slub_debug 0
1546
1547#define disable_higher_order_debug 0
1548
1549static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1550 { return 0; }
1551static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1552 { return 0; }
1553static inline void inc_slabs_node(struct kmem_cache *s, int node,
1554 int objects) {}
1555static inline void dec_slabs_node(struct kmem_cache *s, int node,
1556 int objects) {}
1557
1558static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
1559 void **freelist, void *nextfree)
1560{
1561 return false;
1562}
1563#endif
1564
1565
1566
1567
1568
1569static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1570{
1571 ptr = kasan_kmalloc_large(ptr, size, flags);
1572
1573 kmemleak_alloc(ptr, size, 1, flags);
1574 return ptr;
1575}
1576
1577static __always_inline void kfree_hook(void *x)
1578{
1579 kmemleak_free(x);
1580 kasan_kfree_large(x);
1581}
1582
1583static __always_inline bool slab_free_hook(struct kmem_cache *s,
1584 void *x, bool init)
1585{
1586 kmemleak_free_recursive(x, s->flags);
1587
1588
1589
1590
1591
1592
1593#ifdef CONFIG_LOCKDEP
1594 {
1595 unsigned long flags;
1596
1597 local_irq_save(flags);
1598 debug_check_no_locks_freed(x, s->object_size);
1599 local_irq_restore(flags);
1600 }
1601#endif
1602 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1603 debug_check_no_obj_freed(x, s->object_size);
1604
1605
1606 if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1607 __kcsan_check_access(x, s->object_size,
1608 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618 if (init) {
1619 int rsize;
1620
1621 if (!kasan_has_integrated_init())
1622 memset(kasan_reset_tag(x), 0, s->object_size);
1623 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
1624 memset((char *)kasan_reset_tag(x) + s->inuse, 0,
1625 s->size - s->inuse - rsize);
1626 }
1627
1628 return kasan_slab_free(s, x, init);
1629}
1630
1631static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1632 void **head, void **tail)
1633{
1634
1635 void *object;
1636 void *next = *head;
1637 void *old_tail = *tail ? *tail : *head;
1638
1639 if (is_kfence_address(next)) {
1640 slab_free_hook(s, next, false);
1641 return true;
1642 }
1643
1644
1645 *head = NULL;
1646 *tail = NULL;
1647
1648 do {
1649 object = next;
1650 next = get_freepointer(s, object);
1651
1652
1653 if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
1654
1655 set_freepointer(s, object, *head);
1656 *head = object;
1657 if (!*tail)
1658 *tail = object;
1659 }
1660 } while (object != old_tail);
1661
1662 if (*head == *tail)
1663 *tail = NULL;
1664
1665 return *head != NULL;
1666}
1667
1668static void *setup_object(struct kmem_cache *s, struct page *page,
1669 void *object)
1670{
1671 setup_object_debug(s, page, object);
1672 object = kasan_init_slab_obj(s, object);
1673 if (unlikely(s->ctor)) {
1674 kasan_unpoison_object_data(s, object);
1675 s->ctor(object);
1676 kasan_poison_object_data(s, object);
1677 }
1678 return object;
1679}
1680
1681
1682
1683
1684static inline struct page *alloc_slab_page(struct kmem_cache *s,
1685 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1686{
1687 struct page *page;
1688 unsigned int order = oo_order(oo);
1689
1690 if (node == NUMA_NO_NODE)
1691 page = alloc_pages(flags, order);
1692 else
1693 page = __alloc_pages_node(node, flags, order);
1694
1695 return page;
1696}
1697
1698#ifdef CONFIG_SLAB_FREELIST_RANDOM
1699
1700static int init_cache_random_seq(struct kmem_cache *s)
1701{
1702 unsigned int count = oo_objects(s->oo);
1703 int err;
1704
1705
1706 if (s->random_seq)
1707 return 0;
1708
1709 err = cache_random_seq_create(s, count, GFP_KERNEL);
1710 if (err) {
1711 pr_err("SLUB: Unable to initialize free list for %s\n",
1712 s->name);
1713 return err;
1714 }
1715
1716
1717 if (s->random_seq) {
1718 unsigned int i;
1719
1720 for (i = 0; i < count; i++)
1721 s->random_seq[i] *= s->size;
1722 }
1723 return 0;
1724}
1725
1726
1727static void __init init_freelist_randomization(void)
1728{
1729 struct kmem_cache *s;
1730
1731 mutex_lock(&slab_mutex);
1732
1733 list_for_each_entry(s, &slab_caches, list)
1734 init_cache_random_seq(s);
1735
1736 mutex_unlock(&slab_mutex);
1737}
1738
1739
1740static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1741 unsigned long *pos, void *start,
1742 unsigned long page_limit,
1743 unsigned long freelist_count)
1744{
1745 unsigned int idx;
1746
1747
1748
1749
1750
1751 do {
1752 idx = s->random_seq[*pos];
1753 *pos += 1;
1754 if (*pos >= freelist_count)
1755 *pos = 0;
1756 } while (unlikely(idx >= page_limit));
1757
1758 return (char *)start + idx;
1759}
1760
1761
1762static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1763{
1764 void *start;
1765 void *cur;
1766 void *next;
1767 unsigned long idx, pos, page_limit, freelist_count;
1768
1769 if (page->objects < 2 || !s->random_seq)
1770 return false;
1771
1772 freelist_count = oo_objects(s->oo);
1773 pos = get_random_int() % freelist_count;
1774
1775 page_limit = page->objects * s->size;
1776 start = fixup_red_left(s, page_address(page));
1777
1778
1779 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1780 freelist_count);
1781 cur = setup_object(s, page, cur);
1782 page->freelist = cur;
1783
1784 for (idx = 1; idx < page->objects; idx++) {
1785 next = next_freelist_entry(s, page, &pos, start, page_limit,
1786 freelist_count);
1787 next = setup_object(s, page, next);
1788 set_freepointer(s, cur, next);
1789 cur = next;
1790 }
1791 set_freepointer(s, cur, NULL);
1792
1793 return true;
1794}
1795#else
1796static inline int init_cache_random_seq(struct kmem_cache *s)
1797{
1798 return 0;
1799}
1800static inline void init_freelist_randomization(void) { }
1801static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1802{
1803 return false;
1804}
1805#endif
1806
1807static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1808{
1809 struct page *page;
1810 struct kmem_cache_order_objects oo = s->oo;
1811 gfp_t alloc_gfp;
1812 void *start, *p, *next;
1813 int idx;
1814 bool shuffle;
1815
1816 flags &= gfp_allowed_mask;
1817
1818 if (gfpflags_allow_blocking(flags))
1819 local_irq_enable();
1820
1821 flags |= s->allocflags;
1822
1823
1824
1825
1826
1827 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1828 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1829 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1830
1831 page = alloc_slab_page(s, alloc_gfp, node, oo);
1832 if (unlikely(!page)) {
1833 oo = s->min;
1834 alloc_gfp = flags;
1835
1836
1837
1838
1839 page = alloc_slab_page(s, alloc_gfp, node, oo);
1840 if (unlikely(!page))
1841 goto out;
1842 stat(s, ORDER_FALLBACK);
1843 }
1844
1845 page->objects = oo_objects(oo);
1846
1847 account_slab_page(page, oo_order(oo), s, flags);
1848
1849 page->slab_cache = s;
1850 __SetPageSlab(page);
1851 if (page_is_pfmemalloc(page))
1852 SetPageSlabPfmemalloc(page);
1853
1854 kasan_poison_slab(page);
1855
1856 start = page_address(page);
1857
1858 setup_page_debug(s, page, start);
1859
1860 shuffle = shuffle_freelist(s, page);
1861
1862 if (!shuffle) {
1863 start = fixup_red_left(s, start);
1864 start = setup_object(s, page, start);
1865 page->freelist = start;
1866 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1867 next = p + s->size;
1868 next = setup_object(s, page, next);
1869 set_freepointer(s, p, next);
1870 p = next;
1871 }
1872 set_freepointer(s, p, NULL);
1873 }
1874
1875 page->inuse = page->objects;
1876 page->frozen = 1;
1877
1878out:
1879 if (gfpflags_allow_blocking(flags))
1880 local_irq_disable();
1881 if (!page)
1882 return NULL;
1883
1884 inc_slabs_node(s, page_to_nid(page), page->objects);
1885
1886 return page;
1887}
1888
1889static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1890{
1891 if (unlikely(flags & GFP_SLAB_BUG_MASK))
1892 flags = kmalloc_fix_flags(flags);
1893
1894 return allocate_slab(s,
1895 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1896}
1897
1898static void __free_slab(struct kmem_cache *s, struct page *page)
1899{
1900 int order = compound_order(page);
1901 int pages = 1 << order;
1902
1903 if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1904 void *p;
1905
1906 slab_pad_check(s, page);
1907 for_each_object(p, s, page_address(page),
1908 page->objects)
1909 check_object(s, page, p, SLUB_RED_INACTIVE);
1910 }
1911
1912 __ClearPageSlabPfmemalloc(page);
1913 __ClearPageSlab(page);
1914
1915 page->slab_cache = NULL;
1916 if (current->reclaim_state)
1917 current->reclaim_state->reclaimed_slab += pages;
1918 unaccount_slab_page(page, order, s);
1919 __free_pages(page, order);
1920}
1921
1922static void rcu_free_slab(struct rcu_head *h)
1923{
1924 struct page *page = container_of(h, struct page, rcu_head);
1925
1926 __free_slab(page->slab_cache, page);
1927}
1928
1929static void free_slab(struct kmem_cache *s, struct page *page)
1930{
1931 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1932 call_rcu(&page->rcu_head, rcu_free_slab);
1933 } else
1934 __free_slab(s, page);
1935}
1936
1937static void discard_slab(struct kmem_cache *s, struct page *page)
1938{
1939 dec_slabs_node(s, page_to_nid(page), page->objects);
1940 free_slab(s, page);
1941}
1942
1943
1944
1945
1946static inline void
1947__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1948{
1949 n->nr_partial++;
1950 if (tail == DEACTIVATE_TO_TAIL)
1951 list_add_tail(&page->slab_list, &n->partial);
1952 else
1953 list_add(&page->slab_list, &n->partial);
1954}
1955
1956static inline void add_partial(struct kmem_cache_node *n,
1957 struct page *page, int tail)
1958{
1959 lockdep_assert_held(&n->list_lock);
1960 __add_partial(n, page, tail);
1961}
1962
1963static inline void remove_partial(struct kmem_cache_node *n,
1964 struct page *page)
1965{
1966 lockdep_assert_held(&n->list_lock);
1967 list_del(&page->slab_list);
1968 n->nr_partial--;
1969}
1970
1971
1972
1973
1974
1975
1976
1977static inline void *acquire_slab(struct kmem_cache *s,
1978 struct kmem_cache_node *n, struct page *page,
1979 int mode, int *objects)
1980{
1981 void *freelist;
1982 unsigned long counters;
1983 struct page new;
1984
1985 lockdep_assert_held(&n->list_lock);
1986
1987
1988
1989
1990
1991
1992 freelist = page->freelist;
1993 counters = page->counters;
1994 new.counters = counters;
1995 *objects = new.objects - new.inuse;
1996 if (mode) {
1997 new.inuse = page->objects;
1998 new.freelist = NULL;
1999 } else {
2000 new.freelist = freelist;
2001 }
2002
2003 VM_BUG_ON(new.frozen);
2004 new.frozen = 1;
2005
2006 if (!__cmpxchg_double_slab(s, page,
2007 freelist, counters,
2008 new.freelist, new.counters,
2009 "acquire_slab"))
2010 return NULL;
2011
2012 remove_partial(n, page);
2013 WARN_ON(!freelist);
2014 return freelist;
2015}
2016
2017static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
2018static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
2019
2020
2021
2022
2023static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
2024 struct kmem_cache_cpu *c, gfp_t flags)
2025{
2026 struct page *page, *page2;
2027 void *object = NULL;
2028 unsigned int available = 0;
2029 int objects;
2030
2031
2032
2033
2034
2035
2036
2037 if (!n || !n->nr_partial)
2038 return NULL;
2039
2040 spin_lock(&n->list_lock);
2041 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
2042 void *t;
2043
2044 if (!pfmemalloc_match(page, flags))
2045 continue;
2046
2047 t = acquire_slab(s, n, page, object == NULL, &objects);
2048 if (!t)
2049 break;
2050
2051 available += objects;
2052 if (!object) {
2053 c->page = page;
2054 stat(s, ALLOC_FROM_PARTIAL);
2055 object = t;
2056 } else {
2057 put_cpu_partial(s, page, 0);
2058 stat(s, CPU_PARTIAL_NODE);
2059 }
2060 if (!kmem_cache_has_cpu_partial(s)
2061 || available > slub_cpu_partial(s) / 2)
2062 break;
2063
2064 }
2065 spin_unlock(&n->list_lock);
2066 return object;
2067}
2068
2069
2070
2071
2072static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2073 struct kmem_cache_cpu *c)
2074{
2075#ifdef CONFIG_NUMA
2076 struct zonelist *zonelist;
2077 struct zoneref *z;
2078 struct zone *zone;
2079 enum zone_type highest_zoneidx = gfp_zone(flags);
2080 void *object;
2081 unsigned int cpuset_mems_cookie;
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101 if (!s->remote_node_defrag_ratio ||
2102 get_cycles() % 1024 > s->remote_node_defrag_ratio)
2103 return NULL;
2104
2105 do {
2106 cpuset_mems_cookie = read_mems_allowed_begin();
2107 zonelist = node_zonelist(mempolicy_slab_node(), flags);
2108 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2109 struct kmem_cache_node *n;
2110
2111 n = get_node(s, zone_to_nid(zone));
2112
2113 if (n && cpuset_zone_allowed(zone, flags) &&
2114 n->nr_partial > s->min_partial) {
2115 object = get_partial_node(s, n, c, flags);
2116 if (object) {
2117
2118
2119
2120
2121
2122
2123
2124 return object;
2125 }
2126 }
2127 }
2128 } while (read_mems_allowed_retry(cpuset_mems_cookie));
2129#endif
2130 return NULL;
2131}
2132
2133
2134
2135
2136static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2137 struct kmem_cache_cpu *c)
2138{
2139 void *object;
2140 int searchnode = node;
2141
2142 if (node == NUMA_NO_NODE)
2143 searchnode = numa_mem_id();
2144
2145 object = get_partial_node(s, get_node(s, searchnode), c, flags);
2146 if (object || node != NUMA_NO_NODE)
2147 return object;
2148
2149 return get_any_partial(s, flags, c);
2150}
2151
2152#ifdef CONFIG_PREEMPTION
2153
2154
2155
2156
2157
2158#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2159#else
2160
2161
2162
2163
2164#define TID_STEP 1
2165#endif
2166
2167static inline unsigned long next_tid(unsigned long tid)
2168{
2169 return tid + TID_STEP;
2170}
2171
2172#ifdef SLUB_DEBUG_CMPXCHG
2173static inline unsigned int tid_to_cpu(unsigned long tid)
2174{
2175 return tid % TID_STEP;
2176}
2177
2178static inline unsigned long tid_to_event(unsigned long tid)
2179{
2180 return tid / TID_STEP;
2181}
2182#endif
2183
2184static inline unsigned int init_tid(int cpu)
2185{
2186 return cpu;
2187}
2188
2189static inline void note_cmpxchg_failure(const char *n,
2190 const struct kmem_cache *s, unsigned long tid)
2191{
2192#ifdef SLUB_DEBUG_CMPXCHG
2193 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2194
2195 pr_info("%s %s: cmpxchg redo ", n, s->name);
2196
2197#ifdef CONFIG_PREEMPTION
2198 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2199 pr_warn("due to cpu change %d -> %d\n",
2200 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2201 else
2202#endif
2203 if (tid_to_event(tid) != tid_to_event(actual_tid))
2204 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2205 tid_to_event(tid), tid_to_event(actual_tid));
2206 else
2207 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2208 actual_tid, tid, next_tid(tid));
2209#endif
2210 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2211}
2212
2213static void init_kmem_cache_cpus(struct kmem_cache *s)
2214{
2215 int cpu;
2216
2217 for_each_possible_cpu(cpu)
2218 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2219}
2220
2221
2222
2223
2224static void deactivate_slab(struct kmem_cache *s, struct page *page,
2225 void *freelist, struct kmem_cache_cpu *c)
2226{
2227 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2228 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2229 int lock = 0, free_delta = 0;
2230 enum slab_modes l = M_NONE, m = M_NONE;
2231 void *nextfree, *freelist_iter, *freelist_tail;
2232 int tail = DEACTIVATE_TO_HEAD;
2233 struct page new;
2234 struct page old;
2235
2236 if (page->freelist) {
2237 stat(s, DEACTIVATE_REMOTE_FREES);
2238 tail = DEACTIVATE_TO_TAIL;
2239 }
2240
2241
2242
2243
2244
2245 freelist_tail = NULL;
2246 freelist_iter = freelist;
2247 while (freelist_iter) {
2248 nextfree = get_freepointer(s, freelist_iter);
2249
2250
2251
2252
2253
2254
2255 if (freelist_corrupted(s, page, &freelist_iter, nextfree))
2256 break;
2257
2258 freelist_tail = freelist_iter;
2259 free_delta++;
2260
2261 freelist_iter = nextfree;
2262 }
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280redo:
2281
2282 old.freelist = READ_ONCE(page->freelist);
2283 old.counters = READ_ONCE(page->counters);
2284 VM_BUG_ON(!old.frozen);
2285
2286
2287 new.counters = old.counters;
2288 if (freelist_tail) {
2289 new.inuse -= free_delta;
2290 set_freepointer(s, freelist_tail, old.freelist);
2291 new.freelist = freelist;
2292 } else
2293 new.freelist = old.freelist;
2294
2295 new.frozen = 0;
2296
2297 if (!new.inuse && n->nr_partial >= s->min_partial)
2298 m = M_FREE;
2299 else if (new.freelist) {
2300 m = M_PARTIAL;
2301 if (!lock) {
2302 lock = 1;
2303
2304
2305
2306
2307
2308 spin_lock(&n->list_lock);
2309 }
2310 } else {
2311 m = M_FULL;
2312 if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
2313 lock = 1;
2314
2315
2316
2317
2318
2319 spin_lock(&n->list_lock);
2320 }
2321 }
2322
2323 if (l != m) {
2324 if (l == M_PARTIAL)
2325 remove_partial(n, page);
2326 else if (l == M_FULL)
2327 remove_full(s, n, page);
2328
2329 if (m == M_PARTIAL)
2330 add_partial(n, page, tail);
2331 else if (m == M_FULL)
2332 add_full(s, n, page);
2333 }
2334
2335 l = m;
2336 if (!__cmpxchg_double_slab(s, page,
2337 old.freelist, old.counters,
2338 new.freelist, new.counters,
2339 "unfreezing slab"))
2340 goto redo;
2341
2342 if (lock)
2343 spin_unlock(&n->list_lock);
2344
2345 if (m == M_PARTIAL)
2346 stat(s, tail);
2347 else if (m == M_FULL)
2348 stat(s, DEACTIVATE_FULL);
2349 else if (m == M_FREE) {
2350 stat(s, DEACTIVATE_EMPTY);
2351 discard_slab(s, page);
2352 stat(s, FREE_SLAB);
2353 }
2354
2355 c->page = NULL;
2356 c->freelist = NULL;
2357}
2358
2359
2360
2361
2362
2363
2364
2365
2366static void unfreeze_partials(struct kmem_cache *s,
2367 struct kmem_cache_cpu *c)
2368{
2369#ifdef CONFIG_SLUB_CPU_PARTIAL
2370 struct kmem_cache_node *n = NULL, *n2 = NULL;
2371 struct page *page, *discard_page = NULL;
2372
2373 while ((page = slub_percpu_partial(c))) {
2374 struct page new;
2375 struct page old;
2376
2377 slub_set_percpu_partial(c, page);
2378
2379 n2 = get_node(s, page_to_nid(page));
2380 if (n != n2) {
2381 if (n)
2382 spin_unlock(&n->list_lock);
2383
2384 n = n2;
2385 spin_lock(&n->list_lock);
2386 }
2387
2388 do {
2389
2390 old.freelist = page->freelist;
2391 old.counters = page->counters;
2392 VM_BUG_ON(!old.frozen);
2393
2394 new.counters = old.counters;
2395 new.freelist = old.freelist;
2396
2397 new.frozen = 0;
2398
2399 } while (!__cmpxchg_double_slab(s, page,
2400 old.freelist, old.counters,
2401 new.freelist, new.counters,
2402 "unfreezing slab"));
2403
2404 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2405 page->next = discard_page;
2406 discard_page = page;
2407 } else {
2408 add_partial(n, page, DEACTIVATE_TO_TAIL);
2409 stat(s, FREE_ADD_PARTIAL);
2410 }
2411 }
2412
2413 if (n)
2414 spin_unlock(&n->list_lock);
2415
2416 while (discard_page) {
2417 page = discard_page;
2418 discard_page = discard_page->next;
2419
2420 stat(s, DEACTIVATE_EMPTY);
2421 discard_slab(s, page);
2422 stat(s, FREE_SLAB);
2423 }
2424#endif
2425}
2426
2427
2428
2429
2430
2431
2432
2433
2434static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2435{
2436#ifdef CONFIG_SLUB_CPU_PARTIAL
2437 struct page *oldpage;
2438 int pages;
2439 int pobjects;
2440
2441 preempt_disable();
2442 do {
2443 pages = 0;
2444 pobjects = 0;
2445 oldpage = this_cpu_read(s->cpu_slab->partial);
2446
2447 if (oldpage) {
2448 pobjects = oldpage->pobjects;
2449 pages = oldpage->pages;
2450 if (drain && pobjects > slub_cpu_partial(s)) {
2451 unsigned long flags;
2452
2453
2454
2455
2456 local_irq_save(flags);
2457 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2458 local_irq_restore(flags);
2459 oldpage = NULL;
2460 pobjects = 0;
2461 pages = 0;
2462 stat(s, CPU_PARTIAL_DRAIN);
2463 }
2464 }
2465
2466 pages++;
2467 pobjects += page->objects - page->inuse;
2468
2469 page->pages = pages;
2470 page->pobjects = pobjects;
2471 page->next = oldpage;
2472
2473 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2474 != oldpage);
2475 if (unlikely(!slub_cpu_partial(s))) {
2476 unsigned long flags;
2477
2478 local_irq_save(flags);
2479 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2480 local_irq_restore(flags);
2481 }
2482 preempt_enable();
2483#endif
2484}
2485
2486static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2487{
2488 stat(s, CPUSLAB_FLUSH);
2489 deactivate_slab(s, c->page, c->freelist, c);
2490
2491 c->tid = next_tid(c->tid);
2492}
2493
2494
2495
2496
2497
2498
2499static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2500{
2501 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2502
2503 if (c->page)
2504 flush_slab(s, c);
2505
2506 unfreeze_partials(s, c);
2507}
2508
2509static void flush_cpu_slab(void *d)
2510{
2511 struct kmem_cache *s = d;
2512
2513 __flush_cpu_slab(s, smp_processor_id());
2514}
2515
2516static bool has_cpu_slab(int cpu, void *info)
2517{
2518 struct kmem_cache *s = info;
2519 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2520
2521 return c->page || slub_percpu_partial(c);
2522}
2523
2524static void flush_all(struct kmem_cache *s)
2525{
2526 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
2527}
2528
2529
2530
2531
2532
2533static int slub_cpu_dead(unsigned int cpu)
2534{
2535 struct kmem_cache *s;
2536 unsigned long flags;
2537
2538 mutex_lock(&slab_mutex);
2539 list_for_each_entry(s, &slab_caches, list) {
2540 local_irq_save(flags);
2541 __flush_cpu_slab(s, cpu);
2542 local_irq_restore(flags);
2543 }
2544 mutex_unlock(&slab_mutex);
2545 return 0;
2546}
2547
2548
2549
2550
2551
2552static inline int node_match(struct page *page, int node)
2553{
2554#ifdef CONFIG_NUMA
2555 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2556 return 0;
2557#endif
2558 return 1;
2559}
2560
2561#ifdef CONFIG_SLUB_DEBUG
2562static int count_free(struct page *page)
2563{
2564 return page->objects - page->inuse;
2565}
2566
2567static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2568{
2569 return atomic_long_read(&n->total_objects);
2570}
2571#endif
2572
2573#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2574static unsigned long count_partial(struct kmem_cache_node *n,
2575 int (*get_count)(struct page *))
2576{
2577 unsigned long flags;
2578 unsigned long x = 0;
2579 struct page *page;
2580
2581 spin_lock_irqsave(&n->list_lock, flags);
2582 list_for_each_entry(page, &n->partial, slab_list)
2583 x += get_count(page);
2584 spin_unlock_irqrestore(&n->list_lock, flags);
2585 return x;
2586}
2587#endif
2588
2589static noinline void
2590slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2591{
2592#ifdef CONFIG_SLUB_DEBUG
2593 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2594 DEFAULT_RATELIMIT_BURST);
2595 int node;
2596 struct kmem_cache_node *n;
2597
2598 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2599 return;
2600
2601 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2602 nid, gfpflags, &gfpflags);
2603 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2604 s->name, s->object_size, s->size, oo_order(s->oo),
2605 oo_order(s->min));
2606
2607 if (oo_order(s->min) > get_order(s->object_size))
2608 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2609 s->name);
2610
2611 for_each_kmem_cache_node(s, node, n) {
2612 unsigned long nr_slabs;
2613 unsigned long nr_objs;
2614 unsigned long nr_free;
2615
2616 nr_free = count_partial(n, count_free);
2617 nr_slabs = node_nr_slabs(n);
2618 nr_objs = node_nr_objs(n);
2619
2620 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2621 node, nr_slabs, nr_objs, nr_free);
2622 }
2623#endif
2624}
2625
2626static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2627 int node, struct kmem_cache_cpu **pc)
2628{
2629 void *freelist;
2630 struct kmem_cache_cpu *c = *pc;
2631 struct page *page;
2632
2633 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2634
2635 freelist = get_partial(s, flags, node, c);
2636
2637 if (freelist)
2638 return freelist;
2639
2640 page = new_slab(s, flags, node);
2641 if (page) {
2642 c = raw_cpu_ptr(s->cpu_slab);
2643 if (c->page)
2644 flush_slab(s, c);
2645
2646
2647
2648
2649
2650 freelist = page->freelist;
2651 page->freelist = NULL;
2652
2653 stat(s, ALLOC_SLAB);
2654 c->page = page;
2655 *pc = c;
2656 }
2657
2658 return freelist;
2659}
2660
2661static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2662{
2663 if (unlikely(PageSlabPfmemalloc(page)))
2664 return gfp_pfmemalloc_allowed(gfpflags);
2665
2666 return true;
2667}
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2680{
2681 struct page new;
2682 unsigned long counters;
2683 void *freelist;
2684
2685 do {
2686 freelist = page->freelist;
2687 counters = page->counters;
2688
2689 new.counters = counters;
2690 VM_BUG_ON(!new.frozen);
2691
2692 new.inuse = page->objects;
2693 new.frozen = freelist != NULL;
2694
2695 } while (!__cmpxchg_double_slab(s, page,
2696 freelist, counters,
2697 NULL, new.counters,
2698 "get_freelist"));
2699
2700 return freelist;
2701}
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2723 unsigned long addr, struct kmem_cache_cpu *c)
2724{
2725 void *freelist;
2726 struct page *page;
2727
2728 stat(s, ALLOC_SLOWPATH);
2729
2730 page = c->page;
2731 if (!page) {
2732
2733
2734
2735
2736 if (unlikely(node != NUMA_NO_NODE &&
2737 !node_isset(node, slab_nodes)))
2738 node = NUMA_NO_NODE;
2739 goto new_slab;
2740 }
2741redo:
2742
2743 if (unlikely(!node_match(page, node))) {
2744
2745
2746
2747
2748 if (!node_isset(node, slab_nodes)) {
2749 node = NUMA_NO_NODE;
2750 goto redo;
2751 } else {
2752 stat(s, ALLOC_NODE_MISMATCH);
2753 deactivate_slab(s, page, c->freelist, c);
2754 goto new_slab;
2755 }
2756 }
2757
2758
2759
2760
2761
2762
2763 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2764 deactivate_slab(s, page, c->freelist, c);
2765 goto new_slab;
2766 }
2767
2768
2769 freelist = c->freelist;
2770 if (freelist)
2771 goto load_freelist;
2772
2773 freelist = get_freelist(s, page);
2774
2775 if (!freelist) {
2776 c->page = NULL;
2777 stat(s, DEACTIVATE_BYPASS);
2778 goto new_slab;
2779 }
2780
2781 stat(s, ALLOC_REFILL);
2782
2783load_freelist:
2784
2785
2786
2787
2788
2789 VM_BUG_ON(!c->page->frozen);
2790 c->freelist = get_freepointer(s, freelist);
2791 c->tid = next_tid(c->tid);
2792 return freelist;
2793
2794new_slab:
2795
2796 if (slub_percpu_partial(c)) {
2797 page = c->page = slub_percpu_partial(c);
2798 slub_set_percpu_partial(c, page);
2799 stat(s, CPU_PARTIAL_ALLOC);
2800 goto redo;
2801 }
2802
2803 freelist = new_slab_objects(s, gfpflags, node, &c);
2804
2805 if (unlikely(!freelist)) {
2806 slab_out_of_memory(s, gfpflags, node);
2807 return NULL;
2808 }
2809
2810 page = c->page;
2811 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2812 goto load_freelist;
2813
2814
2815 if (kmem_cache_debug(s) &&
2816 !alloc_debug_processing(s, page, freelist, addr))
2817 goto new_slab;
2818
2819 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2820 return freelist;
2821}
2822
2823
2824
2825
2826
2827static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2828 unsigned long addr, struct kmem_cache_cpu *c)
2829{
2830 void *p;
2831 unsigned long flags;
2832
2833 local_irq_save(flags);
2834#ifdef CONFIG_PREEMPTION
2835
2836
2837
2838
2839
2840 c = this_cpu_ptr(s->cpu_slab);
2841#endif
2842
2843 p = ___slab_alloc(s, gfpflags, node, addr, c);
2844 local_irq_restore(flags);
2845 return p;
2846}
2847
2848
2849
2850
2851
2852static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2853 void *obj)
2854{
2855 if (unlikely(slab_want_init_on_free(s)) && obj)
2856 memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
2857 0, sizeof(void *));
2858}
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2871 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
2872{
2873 void *object;
2874 struct kmem_cache_cpu *c;
2875 struct page *page;
2876 unsigned long tid;
2877 struct obj_cgroup *objcg = NULL;
2878 bool init = false;
2879
2880 s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
2881 if (!s)
2882 return NULL;
2883
2884 object = kfence_alloc(s, orig_size, gfpflags);
2885 if (unlikely(object))
2886 goto out;
2887
2888redo:
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899 do {
2900 tid = this_cpu_read(s->cpu_slab->tid);
2901 c = raw_cpu_ptr(s->cpu_slab);
2902 } while (IS_ENABLED(CONFIG_PREEMPTION) &&
2903 unlikely(tid != READ_ONCE(c->tid)));
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913 barrier();
2914
2915
2916
2917
2918
2919
2920
2921
2922 object = c->freelist;
2923 page = c->page;
2924 if (unlikely(!object || !page || !node_match(page, node))) {
2925 object = __slab_alloc(s, gfpflags, node, addr, c);
2926 } else {
2927 void *next_object = get_freepointer_safe(s, object);
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943 if (unlikely(!this_cpu_cmpxchg_double(
2944 s->cpu_slab->freelist, s->cpu_slab->tid,
2945 object, tid,
2946 next_object, next_tid(tid)))) {
2947
2948 note_cmpxchg_failure("slab_alloc", s, tid);
2949 goto redo;
2950 }
2951 prefetch_freepointer(s, next_object);
2952 stat(s, ALLOC_FASTPATH);
2953 }
2954
2955 maybe_wipe_obj_freeptr(s, object);
2956 init = slab_want_init_on_alloc(gfpflags, s);
2957
2958out:
2959 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
2960
2961 return object;
2962}
2963
2964static __always_inline void *slab_alloc(struct kmem_cache *s,
2965 gfp_t gfpflags, unsigned long addr, size_t orig_size)
2966{
2967 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
2968}
2969
2970void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2971{
2972 void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
2973
2974 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2975 s->size, gfpflags);
2976
2977 return ret;
2978}
2979EXPORT_SYMBOL(kmem_cache_alloc);
2980
2981#ifdef CONFIG_TRACING
2982void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2983{
2984 void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
2985 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2986 ret = kasan_kmalloc(s, ret, size, gfpflags);
2987 return ret;
2988}
2989EXPORT_SYMBOL(kmem_cache_alloc_trace);
2990#endif
2991
2992#ifdef CONFIG_NUMA
2993void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2994{
2995 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
2996
2997 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2998 s->object_size, s->size, gfpflags, node);
2999
3000 return ret;
3001}
3002EXPORT_SYMBOL(kmem_cache_alloc_node);
3003
3004#ifdef CONFIG_TRACING
3005void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
3006 gfp_t gfpflags,
3007 int node, size_t size)
3008{
3009 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
3010
3011 trace_kmalloc_node(_RET_IP_, ret,
3012 size, s->size, gfpflags, node);
3013
3014 ret = kasan_kmalloc(s, ret, size, gfpflags);
3015 return ret;
3016}
3017EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3018#endif
3019#endif
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029static void __slab_free(struct kmem_cache *s, struct page *page,
3030 void *head, void *tail, int cnt,
3031 unsigned long addr)
3032
3033{
3034 void *prior;
3035 int was_frozen;
3036 struct page new;
3037 unsigned long counters;
3038 struct kmem_cache_node *n = NULL;
3039 unsigned long flags;
3040
3041 stat(s, FREE_SLOWPATH);
3042
3043 if (kfence_free(head))
3044 return;
3045
3046 if (kmem_cache_debug(s) &&
3047 !free_debug_processing(s, page, head, tail, cnt, addr))
3048 return;
3049
3050 do {
3051 if (unlikely(n)) {
3052 spin_unlock_irqrestore(&n->list_lock, flags);
3053 n = NULL;
3054 }
3055 prior = page->freelist;
3056 counters = page->counters;
3057 set_freepointer(s, tail, prior);
3058 new.counters = counters;
3059 was_frozen = new.frozen;
3060 new.inuse -= cnt;
3061 if ((!new.inuse || !prior) && !was_frozen) {
3062
3063 if (kmem_cache_has_cpu_partial(s) && !prior) {
3064
3065
3066
3067
3068
3069
3070
3071 new.frozen = 1;
3072
3073 } else {
3074
3075 n = get_node(s, page_to_nid(page));
3076
3077
3078
3079
3080
3081
3082
3083
3084 spin_lock_irqsave(&n->list_lock, flags);
3085
3086 }
3087 }
3088
3089 } while (!cmpxchg_double_slab(s, page,
3090 prior, counters,
3091 head, new.counters,
3092 "__slab_free"));
3093
3094 if (likely(!n)) {
3095
3096 if (likely(was_frozen)) {
3097
3098
3099
3100
3101 stat(s, FREE_FROZEN);
3102 } else if (new.frozen) {
3103
3104
3105
3106
3107 put_cpu_partial(s, page, 1);
3108 stat(s, CPU_PARTIAL_FREE);
3109 }
3110
3111 return;
3112 }
3113
3114 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3115 goto slab_empty;
3116
3117
3118
3119
3120
3121 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3122 remove_full(s, n, page);
3123 add_partial(n, page, DEACTIVATE_TO_TAIL);
3124 stat(s, FREE_ADD_PARTIAL);
3125 }
3126 spin_unlock_irqrestore(&n->list_lock, flags);
3127 return;
3128
3129slab_empty:
3130 if (prior) {
3131
3132
3133
3134 remove_partial(n, page);
3135 stat(s, FREE_REMOVE_PARTIAL);
3136 } else {
3137
3138 remove_full(s, n, page);
3139 }
3140
3141 spin_unlock_irqrestore(&n->list_lock, flags);
3142 stat(s, FREE_SLAB);
3143 discard_slab(s, page);
3144}
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161static __always_inline void do_slab_free(struct kmem_cache *s,
3162 struct page *page, void *head, void *tail,
3163 int cnt, unsigned long addr)
3164{
3165 void *tail_obj = tail ? : head;
3166 struct kmem_cache_cpu *c;
3167 unsigned long tid;
3168
3169 memcg_slab_free_hook(s, &head, 1);
3170redo:
3171
3172
3173
3174
3175
3176
3177 do {
3178 tid = this_cpu_read(s->cpu_slab->tid);
3179 c = raw_cpu_ptr(s->cpu_slab);
3180 } while (IS_ENABLED(CONFIG_PREEMPTION) &&
3181 unlikely(tid != READ_ONCE(c->tid)));
3182
3183
3184 barrier();
3185
3186 if (likely(page == c->page)) {
3187 void **freelist = READ_ONCE(c->freelist);
3188
3189 set_freepointer(s, tail_obj, freelist);
3190
3191 if (unlikely(!this_cpu_cmpxchg_double(
3192 s->cpu_slab->freelist, s->cpu_slab->tid,
3193 freelist, tid,
3194 head, next_tid(tid)))) {
3195
3196 note_cmpxchg_failure("slab_free", s, tid);
3197 goto redo;
3198 }
3199 stat(s, FREE_FASTPATH);
3200 } else
3201 __slab_free(s, page, head, tail_obj, cnt, addr);
3202
3203}
3204
3205static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3206 void *head, void *tail, int cnt,
3207 unsigned long addr)
3208{
3209
3210
3211
3212
3213 if (slab_free_freelist_hook(s, &head, &tail))
3214 do_slab_free(s, page, head, tail, cnt, addr);
3215}
3216
3217#ifdef CONFIG_KASAN_GENERIC
3218void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3219{
3220 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3221}
3222#endif
3223
3224void kmem_cache_free(struct kmem_cache *s, void *x)
3225{
3226 s = cache_from_obj(s, x);
3227 if (!s)
3228 return;
3229 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3230 trace_kmem_cache_free(_RET_IP_, x, s->name);
3231}
3232EXPORT_SYMBOL(kmem_cache_free);
3233
3234struct detached_freelist {
3235 struct page *page;
3236 void *tail;
3237 void *freelist;
3238 int cnt;
3239 struct kmem_cache *s;
3240};
3241
3242static inline void free_nonslab_page(struct page *page, void *object)
3243{
3244 unsigned int order = compound_order(page);
3245
3246 VM_BUG_ON_PAGE(!PageCompound(page), page);
3247 kfree_hook(object);
3248 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
3249 __free_pages(page, order);
3250}
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264static inline
3265int build_detached_freelist(struct kmem_cache *s, size_t size,
3266 void **p, struct detached_freelist *df)
3267{
3268 size_t first_skipped_index = 0;
3269 int lookahead = 3;
3270 void *object;
3271 struct page *page;
3272
3273
3274 df->page = NULL;
3275
3276 do {
3277 object = p[--size];
3278
3279 } while (!object && size);
3280
3281 if (!object)
3282 return 0;
3283
3284 page = virt_to_head_page(object);
3285 if (!s) {
3286
3287 if (unlikely(!PageSlab(page))) {
3288 free_nonslab_page(page, object);
3289 p[size] = NULL;
3290 return size;
3291 }
3292
3293 df->s = page->slab_cache;
3294 } else {
3295 df->s = cache_from_obj(s, object);
3296 }
3297
3298 if (is_kfence_address(object)) {
3299 slab_free_hook(df->s, object, false);
3300 __kfence_free(object);
3301 p[size] = NULL;
3302 return size;
3303 }
3304
3305
3306 df->page = page;
3307 set_freepointer(df->s, object, NULL);
3308 df->tail = object;
3309 df->freelist = object;
3310 p[size] = NULL;
3311 df->cnt = 1;
3312
3313 while (size) {
3314 object = p[--size];
3315 if (!object)
3316 continue;
3317
3318
3319 if (df->page == virt_to_head_page(object)) {
3320
3321 set_freepointer(df->s, object, df->freelist);
3322 df->freelist = object;
3323 df->cnt++;
3324 p[size] = NULL;
3325
3326 continue;
3327 }
3328
3329
3330 if (!--lookahead)
3331 break;
3332
3333 if (!first_skipped_index)
3334 first_skipped_index = size + 1;
3335 }
3336
3337 return first_skipped_index;
3338}
3339
3340
3341void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3342{
3343 if (WARN_ON(!size))
3344 return;
3345
3346 memcg_slab_free_hook(s, p, size);
3347 do {
3348 struct detached_freelist df;
3349
3350 size = build_detached_freelist(s, size, p, &df);
3351 if (!df.page)
3352 continue;
3353
3354 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
3355 } while (likely(size));
3356}
3357EXPORT_SYMBOL(kmem_cache_free_bulk);
3358
3359
3360int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3361 void **p)
3362{
3363 struct kmem_cache_cpu *c;
3364 int i;
3365 struct obj_cgroup *objcg = NULL;
3366
3367
3368 s = slab_pre_alloc_hook(s, &objcg, size, flags);
3369 if (unlikely(!s))
3370 return false;
3371
3372
3373
3374
3375
3376 local_irq_disable();
3377 c = this_cpu_ptr(s->cpu_slab);
3378
3379 for (i = 0; i < size; i++) {
3380 void *object = kfence_alloc(s, s->object_size, flags);
3381
3382 if (unlikely(object)) {
3383 p[i] = object;
3384 continue;
3385 }
3386
3387 object = c->freelist;
3388 if (unlikely(!object)) {
3389
3390
3391
3392
3393
3394
3395
3396 c->tid = next_tid(c->tid);
3397
3398
3399
3400
3401
3402 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3403 _RET_IP_, c);
3404 if (unlikely(!p[i]))
3405 goto error;
3406
3407 c = this_cpu_ptr(s->cpu_slab);
3408 maybe_wipe_obj_freeptr(s, p[i]);
3409
3410 continue;
3411 }
3412 c->freelist = get_freepointer(s, object);
3413 p[i] = object;
3414 maybe_wipe_obj_freeptr(s, p[i]);
3415 }
3416 c->tid = next_tid(c->tid);
3417 local_irq_enable();
3418
3419
3420
3421
3422
3423 slab_post_alloc_hook(s, objcg, flags, size, p,
3424 slab_want_init_on_alloc(flags, s));
3425 return i;
3426error:
3427 local_irq_enable();
3428 slab_post_alloc_hook(s, objcg, flags, i, p, false);
3429 __kmem_cache_free_bulk(s, i, p);
3430 return 0;
3431}
3432EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454static unsigned int slub_min_order;
3455static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3456static unsigned int slub_min_objects;
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483static inline unsigned int slab_order(unsigned int size,
3484 unsigned int min_objects, unsigned int max_order,
3485 unsigned int fract_leftover)
3486{
3487 unsigned int min_order = slub_min_order;
3488 unsigned int order;
3489
3490 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3491 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3492
3493 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3494 order <= max_order; order++) {
3495
3496 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3497 unsigned int rem;
3498
3499 rem = slab_size % size;
3500
3501 if (rem <= slab_size / fract_leftover)
3502 break;
3503 }
3504
3505 return order;
3506}
3507
3508static inline int calculate_order(unsigned int size)
3509{
3510 unsigned int order;
3511 unsigned int min_objects;
3512 unsigned int max_objects;
3513 unsigned int nr_cpus;
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523 min_objects = slub_min_objects;
3524 if (!min_objects) {
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534 nr_cpus = num_present_cpus();
3535 if (nr_cpus <= 1)
3536 nr_cpus = nr_cpu_ids;
3537 min_objects = 4 * (fls(nr_cpus) + 1);
3538 }
3539 max_objects = order_objects(slub_max_order, size);
3540 min_objects = min(min_objects, max_objects);
3541
3542 while (min_objects > 1) {
3543 unsigned int fraction;
3544
3545 fraction = 16;
3546 while (fraction >= 4) {
3547 order = slab_order(size, min_objects,
3548 slub_max_order, fraction);
3549 if (order <= slub_max_order)
3550 return order;
3551 fraction /= 2;
3552 }
3553 min_objects--;
3554 }
3555
3556
3557
3558
3559
3560 order = slab_order(size, 1, slub_max_order, 1);
3561 if (order <= slub_max_order)
3562 return order;
3563
3564
3565
3566
3567 order = slab_order(size, 1, MAX_ORDER, 1);
3568 if (order < MAX_ORDER)
3569 return order;
3570 return -ENOSYS;
3571}
3572
3573static void
3574init_kmem_cache_node(struct kmem_cache_node *n)
3575{
3576 n->nr_partial = 0;
3577 spin_lock_init(&n->list_lock);
3578 INIT_LIST_HEAD(&n->partial);
3579#ifdef CONFIG_SLUB_DEBUG
3580 atomic_long_set(&n->nr_slabs, 0);
3581 atomic_long_set(&n->total_objects, 0);
3582 INIT_LIST_HEAD(&n->full);
3583#endif
3584}
3585
3586static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3587{
3588 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3589 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3590
3591
3592
3593
3594
3595 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3596 2 * sizeof(void *));
3597
3598 if (!s->cpu_slab)
3599 return 0;
3600
3601 init_kmem_cache_cpus(s);
3602
3603 return 1;
3604}
3605
3606static struct kmem_cache *kmem_cache_node;
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617static void early_kmem_cache_node_alloc(int node)
3618{
3619 struct page *page;
3620 struct kmem_cache_node *n;
3621
3622 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3623
3624 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3625
3626 BUG_ON(!page);
3627 if (page_to_nid(page) != node) {
3628 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3629 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3630 }
3631
3632 n = page->freelist;
3633 BUG_ON(!n);
3634#ifdef CONFIG_SLUB_DEBUG
3635 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3636 init_tracking(kmem_cache_node, n);
3637#endif
3638 n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
3639 page->freelist = get_freepointer(kmem_cache_node, n);
3640 page->inuse = 1;
3641 page->frozen = 0;
3642 kmem_cache_node->node[node] = n;
3643 init_kmem_cache_node(n);
3644 inc_slabs_node(kmem_cache_node, node, page->objects);
3645
3646
3647
3648
3649
3650 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3651}
3652
3653static void free_kmem_cache_nodes(struct kmem_cache *s)
3654{
3655 int node;
3656 struct kmem_cache_node *n;
3657
3658 for_each_kmem_cache_node(s, node, n) {
3659 s->node[node] = NULL;
3660 kmem_cache_free(kmem_cache_node, n);
3661 }
3662}
3663
3664void __kmem_cache_release(struct kmem_cache *s)
3665{
3666 cache_random_seq_destroy(s);
3667 free_percpu(s->cpu_slab);
3668 free_kmem_cache_nodes(s);
3669}
3670
3671static int init_kmem_cache_nodes(struct kmem_cache *s)
3672{
3673 int node;
3674
3675 for_each_node_mask(node, slab_nodes) {
3676 struct kmem_cache_node *n;
3677
3678 if (slab_state == DOWN) {
3679 early_kmem_cache_node_alloc(node);
3680 continue;
3681 }
3682 n = kmem_cache_alloc_node(kmem_cache_node,
3683 GFP_KERNEL, node);
3684
3685 if (!n) {
3686 free_kmem_cache_nodes(s);
3687 return 0;
3688 }
3689
3690 init_kmem_cache_node(n);
3691 s->node[node] = n;
3692 }
3693 return 1;
3694}
3695
3696static void set_min_partial(struct kmem_cache *s, unsigned long min)
3697{
3698 if (min < MIN_PARTIAL)
3699 min = MIN_PARTIAL;
3700 else if (min > MAX_PARTIAL)
3701 min = MAX_PARTIAL;
3702 s->min_partial = min;
3703}
3704
3705static void set_cpu_partial(struct kmem_cache *s)
3706{
3707#ifdef CONFIG_SLUB_CPU_PARTIAL
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725 if (!kmem_cache_has_cpu_partial(s))
3726 slub_set_cpu_partial(s, 0);
3727 else if (s->size >= PAGE_SIZE)
3728 slub_set_cpu_partial(s, 2);
3729 else if (s->size >= 1024)
3730 slub_set_cpu_partial(s, 6);
3731 else if (s->size >= 256)
3732 slub_set_cpu_partial(s, 13);
3733 else
3734 slub_set_cpu_partial(s, 30);
3735#endif
3736}
3737
3738
3739
3740
3741
3742static int calculate_sizes(struct kmem_cache *s, int forced_order)
3743{
3744 slab_flags_t flags = s->flags;
3745 unsigned int size = s->object_size;
3746 unsigned int order;
3747
3748
3749
3750
3751
3752
3753 size = ALIGN(size, sizeof(void *));
3754
3755#ifdef CONFIG_SLUB_DEBUG
3756
3757
3758
3759
3760
3761 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3762 !s->ctor)
3763 s->flags |= __OBJECT_POISON;
3764 else
3765 s->flags &= ~__OBJECT_POISON;
3766
3767
3768
3769
3770
3771
3772
3773 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3774 size += sizeof(void *);
3775#endif
3776
3777
3778
3779
3780
3781 s->inuse = size;
3782
3783 if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3784 ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
3785 s->ctor) {
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800 s->offset = size;
3801 size += sizeof(void *);
3802 } else {
3803
3804
3805
3806
3807
3808 s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
3809 }
3810
3811#ifdef CONFIG_SLUB_DEBUG
3812 if (flags & SLAB_STORE_USER)
3813
3814
3815
3816
3817 size += 2 * sizeof(struct track);
3818#endif
3819
3820 kasan_cache_create(s, &size, &s->flags);
3821#ifdef CONFIG_SLUB_DEBUG
3822 if (flags & SLAB_RED_ZONE) {
3823
3824
3825
3826
3827
3828
3829
3830 size += sizeof(void *);
3831
3832 s->red_left_pad = sizeof(void *);
3833 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3834 size += s->red_left_pad;
3835 }
3836#endif
3837
3838
3839
3840
3841
3842
3843 size = ALIGN(size, s->align);
3844 s->size = size;
3845 s->reciprocal_size = reciprocal_value(size);
3846 if (forced_order >= 0)
3847 order = forced_order;
3848 else
3849 order = calculate_order(size);
3850
3851 if ((int)order < 0)
3852 return 0;
3853
3854 s->allocflags = 0;
3855 if (order)
3856 s->allocflags |= __GFP_COMP;
3857
3858 if (s->flags & SLAB_CACHE_DMA)
3859 s->allocflags |= GFP_DMA;
3860
3861 if (s->flags & SLAB_CACHE_DMA32)
3862 s->allocflags |= GFP_DMA32;
3863
3864 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3865 s->allocflags |= __GFP_RECLAIMABLE;
3866
3867
3868
3869
3870 s->oo = oo_make(order, size);
3871 s->min = oo_make(get_order(size), size);
3872 if (oo_objects(s->oo) > oo_objects(s->max))
3873 s->max = s->oo;
3874
3875 return !!oo_objects(s->oo);
3876}
3877
3878static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3879{
3880 s->flags = kmem_cache_flags(s->size, flags, s->name);
3881#ifdef CONFIG_SLAB_FREELIST_HARDENED
3882 s->random = get_random_long();
3883#endif
3884
3885 if (!calculate_sizes(s, -1))
3886 goto error;
3887 if (disable_higher_order_debug) {
3888
3889
3890
3891
3892 if (get_order(s->size) > get_order(s->object_size)) {
3893 s->flags &= ~DEBUG_METADATA_FLAGS;
3894 s->offset = 0;
3895 if (!calculate_sizes(s, -1))
3896 goto error;
3897 }
3898 }
3899
3900#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3901 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3902 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3903
3904 s->flags |= __CMPXCHG_DOUBLE;
3905#endif
3906
3907
3908
3909
3910
3911 set_min_partial(s, ilog2(s->size) / 2);
3912
3913 set_cpu_partial(s);
3914
3915#ifdef CONFIG_NUMA
3916 s->remote_node_defrag_ratio = 1000;
3917#endif
3918
3919
3920 if (slab_state >= UP) {
3921 if (init_cache_random_seq(s))
3922 goto error;
3923 }
3924
3925 if (!init_kmem_cache_nodes(s))
3926 goto error;
3927
3928 if (alloc_kmem_cache_cpus(s))
3929 return 0;
3930
3931 free_kmem_cache_nodes(s);
3932error:
3933 return -EINVAL;
3934}
3935
3936static void list_slab_objects(struct kmem_cache *s, struct page *page,
3937 const char *text)
3938{
3939#ifdef CONFIG_SLUB_DEBUG
3940 void *addr = page_address(page);
3941 unsigned long *map;
3942 void *p;
3943
3944 slab_err(s, page, text, s->name);
3945 slab_lock(page);
3946
3947 map = get_map(s, page);
3948 for_each_object(p, s, addr, page->objects) {
3949
3950 if (!test_bit(__obj_to_index(s, addr, p), map)) {
3951 pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
3952 print_tracking(s, p);
3953 }
3954 }
3955 put_map(map);
3956 slab_unlock(page);
3957#endif
3958}
3959
3960
3961
3962
3963
3964
3965static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3966{
3967 LIST_HEAD(discard);
3968 struct page *page, *h;
3969
3970 BUG_ON(irqs_disabled());
3971 spin_lock_irq(&n->list_lock);
3972 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3973 if (!page->inuse) {
3974 remove_partial(n, page);
3975 list_add(&page->slab_list, &discard);
3976 } else {
3977 list_slab_objects(s, page,
3978 "Objects remaining in %s on __kmem_cache_shutdown()");
3979 }
3980 }
3981 spin_unlock_irq(&n->list_lock);
3982
3983 list_for_each_entry_safe(page, h, &discard, slab_list)
3984 discard_slab(s, page);
3985}
3986
3987bool __kmem_cache_empty(struct kmem_cache *s)
3988{
3989 int node;
3990 struct kmem_cache_node *n;
3991
3992 for_each_kmem_cache_node(s, node, n)
3993 if (n->nr_partial || slabs_node(s, node))
3994 return false;
3995 return true;
3996}
3997
3998
3999
4000
4001int __kmem_cache_shutdown(struct kmem_cache *s)
4002{
4003 int node;
4004 struct kmem_cache_node *n;
4005
4006 flush_all(s);
4007
4008 for_each_kmem_cache_node(s, node, n) {
4009 free_partial(s, n);
4010 if (n->nr_partial || slabs_node(s, node))
4011 return 1;
4012 }
4013 return 0;
4014}
4015
4016#ifdef CONFIG_PRINTK
4017void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
4018{
4019 void *base;
4020 int __maybe_unused i;
4021 unsigned int objnr;
4022 void *objp;
4023 void *objp0;
4024 struct kmem_cache *s = page->slab_cache;
4025 struct track __maybe_unused *trackp;
4026
4027 kpp->kp_ptr = object;
4028 kpp->kp_page = page;
4029 kpp->kp_slab_cache = s;
4030 base = page_address(page);
4031 objp0 = kasan_reset_tag(object);
4032#ifdef CONFIG_SLUB_DEBUG
4033 objp = restore_red_left(s, objp0);
4034#else
4035 objp = objp0;
4036#endif
4037 objnr = obj_to_index(s, page, objp);
4038 kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
4039 objp = base + s->size * objnr;
4040 kpp->kp_objp = objp;
4041 if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
4042 !(s->flags & SLAB_STORE_USER))
4043 return;
4044#ifdef CONFIG_SLUB_DEBUG
4045 objp = fixup_red_left(s, objp);
4046 trackp = get_track(s, objp, TRACK_ALLOC);
4047 kpp->kp_ret = (void *)trackp->addr;
4048#ifdef CONFIG_STACKTRACE
4049 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
4050 kpp->kp_stack[i] = (void *)trackp->addrs[i];
4051 if (!kpp->kp_stack[i])
4052 break;
4053 }
4054
4055 trackp = get_track(s, objp, TRACK_FREE);
4056 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
4057 kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
4058 if (!kpp->kp_free_stack[i])
4059 break;
4060 }
4061#endif
4062#endif
4063}
4064#endif
4065
4066
4067
4068
4069
4070static int __init setup_slub_min_order(char *str)
4071{
4072 get_option(&str, (int *)&slub_min_order);
4073
4074 return 1;
4075}
4076
4077__setup("slub_min_order=", setup_slub_min_order);
4078
4079static int __init setup_slub_max_order(char *str)
4080{
4081 get_option(&str, (int *)&slub_max_order);
4082 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
4083
4084 return 1;
4085}
4086
4087__setup("slub_max_order=", setup_slub_max_order);
4088
4089static int __init setup_slub_min_objects(char *str)
4090{
4091 get_option(&str, (int *)&slub_min_objects);
4092
4093 return 1;
4094}
4095
4096__setup("slub_min_objects=", setup_slub_min_objects);
4097
4098void *__kmalloc(size_t size, gfp_t flags)
4099{
4100 struct kmem_cache *s;
4101 void *ret;
4102
4103 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4104 return kmalloc_large(size, flags);
4105
4106 s = kmalloc_slab(size, flags);
4107
4108 if (unlikely(ZERO_OR_NULL_PTR(s)))
4109 return s;
4110
4111 ret = slab_alloc(s, flags, _RET_IP_, size);
4112
4113 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
4114
4115 ret = kasan_kmalloc(s, ret, size, flags);
4116
4117 return ret;
4118}
4119EXPORT_SYMBOL(__kmalloc);
4120
4121#ifdef CONFIG_NUMA
4122static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4123{
4124 struct page *page;
4125 void *ptr = NULL;
4126 unsigned int order = get_order(size);
4127
4128 flags |= __GFP_COMP;
4129 page = alloc_pages_node(node, flags, order);
4130 if (page) {
4131 ptr = page_address(page);
4132 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4133 PAGE_SIZE << order);
4134 }
4135
4136 return kmalloc_large_node_hook(ptr, size, flags);
4137}
4138
4139void *__kmalloc_node(size_t size, gfp_t flags, int node)
4140{
4141 struct kmem_cache *s;
4142 void *ret;
4143
4144 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4145 ret = kmalloc_large_node(size, flags, node);
4146
4147 trace_kmalloc_node(_RET_IP_, ret,
4148 size, PAGE_SIZE << get_order(size),
4149 flags, node);
4150
4151 return ret;
4152 }
4153
4154 s = kmalloc_slab(size, flags);
4155
4156 if (unlikely(ZERO_OR_NULL_PTR(s)))
4157 return s;
4158
4159 ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
4160
4161 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
4162
4163 ret = kasan_kmalloc(s, ret, size, flags);
4164
4165 return ret;
4166}
4167EXPORT_SYMBOL(__kmalloc_node);
4168#endif
4169
4170#ifdef CONFIG_HARDENED_USERCOPY
4171
4172
4173
4174
4175
4176
4177
4178
4179void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4180 bool to_user)
4181{
4182 struct kmem_cache *s;
4183 unsigned int offset;
4184 size_t object_size;
4185 bool is_kfence = is_kfence_address(ptr);
4186
4187 ptr = kasan_reset_tag(ptr);
4188
4189
4190 s = page->slab_cache;
4191
4192
4193 if (ptr < page_address(page))
4194 usercopy_abort("SLUB object not in SLUB page?!", NULL,
4195 to_user, 0, n);
4196
4197
4198 if (is_kfence)
4199 offset = ptr - kfence_object_start(ptr);
4200 else
4201 offset = (ptr - page_address(page)) % s->size;
4202
4203
4204 if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4205 if (offset < s->red_left_pad)
4206 usercopy_abort("SLUB object in left red zone",
4207 s->name, to_user, offset, n);
4208 offset -= s->red_left_pad;
4209 }
4210
4211
4212 if (offset >= s->useroffset &&
4213 offset - s->useroffset <= s->usersize &&
4214 n <= s->useroffset - offset + s->usersize)
4215 return;
4216
4217
4218
4219
4220
4221
4222
4223 object_size = slab_ksize(s);
4224 if (usercopy_fallback &&
4225 offset <= object_size && n <= object_size - offset) {
4226 usercopy_warn("SLUB object", s->name, to_user, offset, n);
4227 return;
4228 }
4229
4230 usercopy_abort("SLUB object", s->name, to_user, offset, n);
4231}
4232#endif
4233
4234size_t __ksize(const void *object)
4235{
4236 struct page *page;
4237
4238 if (unlikely(object == ZERO_SIZE_PTR))
4239 return 0;
4240
4241 page = virt_to_head_page(object);
4242
4243 if (unlikely(!PageSlab(page))) {
4244 WARN_ON(!PageCompound(page));
4245 return page_size(page);
4246 }
4247
4248 return slab_ksize(page->slab_cache);
4249}
4250EXPORT_SYMBOL(__ksize);
4251
4252void kfree(const void *x)
4253{
4254 struct page *page;
4255 void *object = (void *)x;
4256
4257 trace_kfree(_RET_IP_, x);
4258
4259 if (unlikely(ZERO_OR_NULL_PTR(x)))
4260 return;
4261
4262 page = virt_to_head_page(x);
4263 if (unlikely(!PageSlab(page))) {
4264 free_nonslab_page(page, object);
4265 return;
4266 }
4267 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
4268}
4269EXPORT_SYMBOL(kfree);
4270
4271#define SHRINK_PROMOTE_MAX 32
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282int __kmem_cache_shrink(struct kmem_cache *s)
4283{
4284 int node;
4285 int i;
4286 struct kmem_cache_node *n;
4287 struct page *page;
4288 struct page *t;
4289 struct list_head discard;
4290 struct list_head promote[SHRINK_PROMOTE_MAX];
4291 unsigned long flags;
4292 int ret = 0;
4293
4294 flush_all(s);
4295 for_each_kmem_cache_node(s, node, n) {
4296 INIT_LIST_HEAD(&discard);
4297 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4298 INIT_LIST_HEAD(promote + i);
4299
4300 spin_lock_irqsave(&n->list_lock, flags);
4301
4302
4303
4304
4305
4306
4307
4308 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4309 int free = page->objects - page->inuse;
4310
4311
4312 barrier();
4313
4314
4315 BUG_ON(free <= 0);
4316
4317 if (free == page->objects) {
4318 list_move(&page->slab_list, &discard);
4319 n->nr_partial--;
4320 } else if (free <= SHRINK_PROMOTE_MAX)
4321 list_move(&page->slab_list, promote + free - 1);
4322 }
4323
4324
4325
4326
4327
4328 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4329 list_splice(promote + i, &n->partial);
4330
4331 spin_unlock_irqrestore(&n->list_lock, flags);
4332
4333
4334 list_for_each_entry_safe(page, t, &discard, slab_list)
4335 discard_slab(s, page);
4336
4337 if (slabs_node(s, node))
4338 ret = 1;
4339 }
4340
4341 return ret;
4342}
4343
4344static int slab_mem_going_offline_callback(void *arg)
4345{
4346 struct kmem_cache *s;
4347
4348 mutex_lock(&slab_mutex);
4349 list_for_each_entry(s, &slab_caches, list)
4350 __kmem_cache_shrink(s);
4351 mutex_unlock(&slab_mutex);
4352
4353 return 0;
4354}
4355
4356static void slab_mem_offline_callback(void *arg)
4357{
4358 struct memory_notify *marg = arg;
4359 int offline_node;
4360
4361 offline_node = marg->status_change_nid_normal;
4362
4363
4364
4365
4366
4367 if (offline_node < 0)
4368 return;
4369
4370 mutex_lock(&slab_mutex);
4371 node_clear(offline_node, slab_nodes);
4372
4373
4374
4375
4376
4377 mutex_unlock(&slab_mutex);
4378}
4379
4380static int slab_mem_going_online_callback(void *arg)
4381{
4382 struct kmem_cache_node *n;
4383 struct kmem_cache *s;
4384 struct memory_notify *marg = arg;
4385 int nid = marg->status_change_nid_normal;
4386 int ret = 0;
4387
4388
4389
4390
4391
4392 if (nid < 0)
4393 return 0;
4394
4395
4396
4397
4398
4399
4400 mutex_lock(&slab_mutex);
4401 list_for_each_entry(s, &slab_caches, list) {
4402
4403
4404
4405
4406 if (get_node(s, nid))
4407 continue;
4408
4409
4410
4411
4412
4413 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4414 if (!n) {
4415 ret = -ENOMEM;
4416 goto out;
4417 }
4418 init_kmem_cache_node(n);
4419 s->node[nid] = n;
4420 }
4421
4422
4423
4424
4425 node_set(nid, slab_nodes);
4426out:
4427 mutex_unlock(&slab_mutex);
4428 return ret;
4429}
4430
4431static int slab_memory_callback(struct notifier_block *self,
4432 unsigned long action, void *arg)
4433{
4434 int ret = 0;
4435
4436 switch (action) {
4437 case MEM_GOING_ONLINE:
4438 ret = slab_mem_going_online_callback(arg);
4439 break;
4440 case MEM_GOING_OFFLINE:
4441 ret = slab_mem_going_offline_callback(arg);
4442 break;
4443 case MEM_OFFLINE:
4444 case MEM_CANCEL_ONLINE:
4445 slab_mem_offline_callback(arg);
4446 break;
4447 case MEM_ONLINE:
4448 case MEM_CANCEL_OFFLINE:
4449 break;
4450 }
4451 if (ret)
4452 ret = notifier_from_errno(ret);
4453 else
4454 ret = NOTIFY_OK;
4455 return ret;
4456}
4457
4458static struct notifier_block slab_memory_callback_nb = {
4459 .notifier_call = slab_memory_callback,
4460 .priority = SLAB_CALLBACK_PRI,
4461};
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4474{
4475 int node;
4476 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4477 struct kmem_cache_node *n;
4478
4479 memcpy(s, static_cache, kmem_cache->object_size);
4480
4481
4482
4483
4484
4485
4486 __flush_cpu_slab(s, smp_processor_id());
4487 for_each_kmem_cache_node(s, node, n) {
4488 struct page *p;
4489
4490 list_for_each_entry(p, &n->partial, slab_list)
4491 p->slab_cache = s;
4492
4493#ifdef CONFIG_SLUB_DEBUG
4494 list_for_each_entry(p, &n->full, slab_list)
4495 p->slab_cache = s;
4496#endif
4497 }
4498 list_add(&s->list, &slab_caches);
4499 return s;
4500}
4501
4502void __init kmem_cache_init(void)
4503{
4504 static __initdata struct kmem_cache boot_kmem_cache,
4505 boot_kmem_cache_node;
4506 int node;
4507
4508 if (debug_guardpage_minorder())
4509 slub_max_order = 0;
4510
4511
4512 if (__slub_debug_enabled())
4513 no_hash_pointers_enable(NULL);
4514
4515 kmem_cache_node = &boot_kmem_cache_node;
4516 kmem_cache = &boot_kmem_cache;
4517
4518
4519
4520
4521
4522 for_each_node_state(node, N_NORMAL_MEMORY)
4523 node_set(node, slab_nodes);
4524
4525 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4526 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4527
4528 register_hotmemory_notifier(&slab_memory_callback_nb);
4529
4530
4531 slab_state = PARTIAL;
4532
4533 create_boot_cache(kmem_cache, "kmem_cache",
4534 offsetof(struct kmem_cache, node) +
4535 nr_node_ids * sizeof(struct kmem_cache_node *),
4536 SLAB_HWCACHE_ALIGN, 0, 0);
4537
4538 kmem_cache = bootstrap(&boot_kmem_cache);
4539 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4540
4541
4542 setup_kmalloc_cache_index_table();
4543 create_kmalloc_caches(0);
4544
4545
4546 init_freelist_randomization();
4547
4548 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4549 slub_cpu_dead);
4550
4551 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4552 cache_line_size(),
4553 slub_min_order, slub_max_order, slub_min_objects,
4554 nr_cpu_ids, nr_node_ids);
4555}
4556
4557void __init kmem_cache_init_late(void)
4558{
4559}
4560
4561struct kmem_cache *
4562__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4563 slab_flags_t flags, void (*ctor)(void *))
4564{
4565 struct kmem_cache *s;
4566
4567 s = find_mergeable(size, align, flags, name, ctor);
4568 if (s) {
4569 s->refcount++;
4570
4571
4572
4573
4574
4575 s->object_size = max(s->object_size, size);
4576 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4577
4578 if (sysfs_slab_alias(s, name)) {
4579 s->refcount--;
4580 s = NULL;
4581 }
4582 }
4583
4584 return s;
4585}
4586
4587int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4588{
4589 int err;
4590
4591 err = kmem_cache_open(s, flags);
4592 if (err)
4593 return err;
4594
4595
4596 if (slab_state <= UP)
4597 return 0;
4598
4599 err = sysfs_slab_add(s);
4600 if (err)
4601 __kmem_cache_release(s);
4602
4603 if (s->flags & SLAB_STORE_USER)
4604 debugfs_slab_add(s);
4605
4606 return err;
4607}
4608
4609void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4610{
4611 struct kmem_cache *s;
4612 void *ret;
4613
4614 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4615 return kmalloc_large(size, gfpflags);
4616
4617 s = kmalloc_slab(size, gfpflags);
4618
4619 if (unlikely(ZERO_OR_NULL_PTR(s)))
4620 return s;
4621
4622 ret = slab_alloc(s, gfpflags, caller, size);
4623
4624
4625 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4626
4627 return ret;
4628}
4629EXPORT_SYMBOL(__kmalloc_track_caller);
4630
4631#ifdef CONFIG_NUMA
4632void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4633 int node, unsigned long caller)
4634{
4635 struct kmem_cache *s;
4636 void *ret;
4637
4638 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4639 ret = kmalloc_large_node(size, gfpflags, node);
4640
4641 trace_kmalloc_node(caller, ret,
4642 size, PAGE_SIZE << get_order(size),
4643 gfpflags, node);
4644
4645 return ret;
4646 }
4647
4648 s = kmalloc_slab(size, gfpflags);
4649
4650 if (unlikely(ZERO_OR_NULL_PTR(s)))
4651 return s;
4652
4653 ret = slab_alloc_node(s, gfpflags, node, caller, size);
4654
4655
4656 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4657
4658 return ret;
4659}
4660EXPORT_SYMBOL(__kmalloc_node_track_caller);
4661#endif
4662
4663#ifdef CONFIG_SYSFS
4664static int count_inuse(struct page *page)
4665{
4666 return page->inuse;
4667}
4668
4669static int count_total(struct page *page)
4670{
4671 return page->objects;
4672}
4673#endif
4674
4675#ifdef CONFIG_SLUB_DEBUG
4676static void validate_slab(struct kmem_cache *s, struct page *page)
4677{
4678 void *p;
4679 void *addr = page_address(page);
4680 unsigned long *map;
4681
4682 slab_lock(page);
4683
4684 if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4685 goto unlock;
4686
4687
4688 map = get_map(s, page);
4689 for_each_object(p, s, addr, page->objects) {
4690 u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
4691 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4692
4693 if (!check_object(s, page, p, val))
4694 break;
4695 }
4696 put_map(map);
4697unlock:
4698 slab_unlock(page);
4699}
4700
4701static int validate_slab_node(struct kmem_cache *s,
4702 struct kmem_cache_node *n)
4703{
4704 unsigned long count = 0;
4705 struct page *page;
4706 unsigned long flags;
4707
4708 spin_lock_irqsave(&n->list_lock, flags);
4709
4710 list_for_each_entry(page, &n->partial, slab_list) {
4711 validate_slab(s, page);
4712 count++;
4713 }
4714 if (count != n->nr_partial) {
4715 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4716 s->name, count, n->nr_partial);
4717 slab_add_kunit_errors();
4718 }
4719
4720 if (!(s->flags & SLAB_STORE_USER))
4721 goto out;
4722
4723 list_for_each_entry(page, &n->full, slab_list) {
4724 validate_slab(s, page);
4725 count++;
4726 }
4727 if (count != atomic_long_read(&n->nr_slabs)) {
4728 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4729 s->name, count, atomic_long_read(&n->nr_slabs));
4730 slab_add_kunit_errors();
4731 }
4732
4733out:
4734 spin_unlock_irqrestore(&n->list_lock, flags);
4735 return count;
4736}
4737
4738long validate_slab_cache(struct kmem_cache *s)
4739{
4740 int node;
4741 unsigned long count = 0;
4742 struct kmem_cache_node *n;
4743
4744 flush_all(s);
4745 for_each_kmem_cache_node(s, node, n)
4746 count += validate_slab_node(s, n);
4747
4748 return count;
4749}
4750EXPORT_SYMBOL(validate_slab_cache);
4751
4752#ifdef CONFIG_DEBUG_FS
4753
4754
4755
4756
4757
4758struct location {
4759 unsigned long count;
4760 unsigned long addr;
4761 long long sum_time;
4762 long min_time;
4763 long max_time;
4764 long min_pid;
4765 long max_pid;
4766 DECLARE_BITMAP(cpus, NR_CPUS);
4767 nodemask_t nodes;
4768};
4769
4770struct loc_track {
4771 unsigned long max;
4772 unsigned long count;
4773 struct location *loc;
4774};
4775
4776static struct dentry *slab_debugfs_root;
4777
4778static void free_loc_track(struct loc_track *t)
4779{
4780 if (t->max)
4781 free_pages((unsigned long)t->loc,
4782 get_order(sizeof(struct location) * t->max));
4783}
4784
4785static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4786{
4787 struct location *l;
4788 int order;
4789
4790 order = get_order(sizeof(struct location) * max);
4791
4792 l = (void *)__get_free_pages(flags, order);
4793 if (!l)
4794 return 0;
4795
4796 if (t->count) {
4797 memcpy(l, t->loc, sizeof(struct location) * t->count);
4798 free_loc_track(t);
4799 }
4800 t->max = max;
4801 t->loc = l;
4802 return 1;
4803}
4804
4805static int add_location(struct loc_track *t, struct kmem_cache *s,
4806 const struct track *track)
4807{
4808 long start, end, pos;
4809 struct location *l;
4810 unsigned long caddr;
4811 unsigned long age = jiffies - track->when;
4812
4813 start = -1;
4814 end = t->count;
4815
4816 for ( ; ; ) {
4817 pos = start + (end - start + 1) / 2;
4818
4819
4820
4821
4822
4823 if (pos == end)
4824 break;
4825
4826 caddr = t->loc[pos].addr;
4827 if (track->addr == caddr) {
4828
4829 l = &t->loc[pos];
4830 l->count++;
4831 if (track->when) {
4832 l->sum_time += age;
4833 if (age < l->min_time)
4834 l->min_time = age;
4835 if (age > l->max_time)
4836 l->max_time = age;
4837
4838 if (track->pid < l->min_pid)
4839 l->min_pid = track->pid;
4840 if (track->pid > l->max_pid)
4841 l->max_pid = track->pid;
4842
4843 cpumask_set_cpu(track->cpu,
4844 to_cpumask(l->cpus));
4845 }
4846 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4847 return 1;
4848 }
4849
4850 if (track->addr < caddr)
4851 end = pos;
4852 else
4853 start = pos;
4854 }
4855
4856
4857
4858
4859 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4860 return 0;
4861
4862 l = t->loc + pos;
4863 if (pos < t->count)
4864 memmove(l + 1, l,
4865 (t->count - pos) * sizeof(struct location));
4866 t->count++;
4867 l->count = 1;
4868 l->addr = track->addr;
4869 l->sum_time = age;
4870 l->min_time = age;
4871 l->max_time = age;
4872 l->min_pid = track->pid;
4873 l->max_pid = track->pid;
4874 cpumask_clear(to_cpumask(l->cpus));
4875 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4876 nodes_clear(l->nodes);
4877 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4878 return 1;
4879}
4880
4881static void process_slab(struct loc_track *t, struct kmem_cache *s,
4882 struct page *page, enum track_item alloc)
4883{
4884 void *addr = page_address(page);
4885 void *p;
4886 unsigned long *map;
4887
4888 map = get_map(s, page);
4889 for_each_object(p, s, addr, page->objects)
4890 if (!test_bit(__obj_to_index(s, addr, p), map))
4891 add_location(t, s, get_track(s, p, alloc));
4892 put_map(map);
4893}
4894#endif
4895#endif
4896
4897#ifdef CONFIG_SYSFS
4898enum slab_stat_type {
4899 SL_ALL,
4900 SL_PARTIAL,
4901 SL_CPU,
4902 SL_OBJECTS,
4903 SL_TOTAL
4904};
4905
4906#define SO_ALL (1 << SL_ALL)
4907#define SO_PARTIAL (1 << SL_PARTIAL)
4908#define SO_CPU (1 << SL_CPU)
4909#define SO_OBJECTS (1 << SL_OBJECTS)
4910#define SO_TOTAL (1 << SL_TOTAL)
4911
4912static ssize_t show_slab_objects(struct kmem_cache *s,
4913 char *buf, unsigned long flags)
4914{
4915 unsigned long total = 0;
4916 int node;
4917 int x;
4918 unsigned long *nodes;
4919 int len = 0;
4920
4921 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4922 if (!nodes)
4923 return -ENOMEM;
4924
4925 if (flags & SO_CPU) {
4926 int cpu;
4927
4928 for_each_possible_cpu(cpu) {
4929 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4930 cpu);
4931 int node;
4932 struct page *page;
4933
4934 page = READ_ONCE(c->page);
4935 if (!page)
4936 continue;
4937
4938 node = page_to_nid(page);
4939 if (flags & SO_TOTAL)
4940 x = page->objects;
4941 else if (flags & SO_OBJECTS)
4942 x = page->inuse;
4943 else
4944 x = 1;
4945
4946 total += x;
4947 nodes[node] += x;
4948
4949 page = slub_percpu_partial_read_once(c);
4950 if (page) {
4951 node = page_to_nid(page);
4952 if (flags & SO_TOTAL)
4953 WARN_ON_ONCE(1);
4954 else if (flags & SO_OBJECTS)
4955 WARN_ON_ONCE(1);
4956 else
4957 x = page->pages;
4958 total += x;
4959 nodes[node] += x;
4960 }
4961 }
4962 }
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975#ifdef CONFIG_SLUB_DEBUG
4976 if (flags & SO_ALL) {
4977 struct kmem_cache_node *n;
4978
4979 for_each_kmem_cache_node(s, node, n) {
4980
4981 if (flags & SO_TOTAL)
4982 x = atomic_long_read(&n->total_objects);
4983 else if (flags & SO_OBJECTS)
4984 x = atomic_long_read(&n->total_objects) -
4985 count_partial(n, count_free);
4986 else
4987 x = atomic_long_read(&n->nr_slabs);
4988 total += x;
4989 nodes[node] += x;
4990 }
4991
4992 } else
4993#endif
4994 if (flags & SO_PARTIAL) {
4995 struct kmem_cache_node *n;
4996
4997 for_each_kmem_cache_node(s, node, n) {
4998 if (flags & SO_TOTAL)
4999 x = count_partial(n, count_total);
5000 else if (flags & SO_OBJECTS)
5001 x = count_partial(n, count_inuse);
5002 else
5003 x = n->nr_partial;
5004 total += x;
5005 nodes[node] += x;
5006 }
5007 }
5008
5009 len += sysfs_emit_at(buf, len, "%lu", total);
5010#ifdef CONFIG_NUMA
5011 for (node = 0; node < nr_node_ids; node++) {
5012 if (nodes[node])
5013 len += sysfs_emit_at(buf, len, " N%d=%lu",
5014 node, nodes[node]);
5015 }
5016#endif
5017 len += sysfs_emit_at(buf, len, "\n");
5018 kfree(nodes);
5019
5020 return len;
5021}
5022
5023#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5024#define to_slab(n) container_of(n, struct kmem_cache, kobj)
5025
5026struct slab_attribute {
5027 struct attribute attr;
5028 ssize_t (*show)(struct kmem_cache *s, char *buf);
5029 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5030};
5031
5032#define SLAB_ATTR_RO(_name) \
5033 static struct slab_attribute _name##_attr = \
5034 __ATTR(_name, 0400, _name##_show, NULL)
5035
5036#define SLAB_ATTR(_name) \
5037 static struct slab_attribute _name##_attr = \
5038 __ATTR(_name, 0600, _name##_show, _name##_store)
5039
5040static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5041{
5042 return sysfs_emit(buf, "%u\n", s->size);
5043}
5044SLAB_ATTR_RO(slab_size);
5045
5046static ssize_t align_show(struct kmem_cache *s, char *buf)
5047{
5048 return sysfs_emit(buf, "%u\n", s->align);
5049}
5050SLAB_ATTR_RO(align);
5051
5052static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5053{
5054 return sysfs_emit(buf, "%u\n", s->object_size);
5055}
5056SLAB_ATTR_RO(object_size);
5057
5058static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5059{
5060 return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5061}
5062SLAB_ATTR_RO(objs_per_slab);
5063
5064static ssize_t order_show(struct kmem_cache *s, char *buf)
5065{
5066 return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5067}
5068SLAB_ATTR_RO(order);
5069
5070static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5071{
5072 return sysfs_emit(buf, "%lu\n", s->min_partial);
5073}
5074
5075static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5076 size_t length)
5077{
5078 unsigned long min;
5079 int err;
5080
5081 err = kstrtoul(buf, 10, &min);
5082 if (err)
5083 return err;
5084
5085 set_min_partial(s, min);
5086 return length;
5087}
5088SLAB_ATTR(min_partial);
5089
5090static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5091{
5092 return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
5093}
5094
5095static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5096 size_t length)
5097{
5098 unsigned int objects;
5099 int err;
5100
5101 err = kstrtouint(buf, 10, &objects);
5102 if (err)
5103 return err;
5104 if (objects && !kmem_cache_has_cpu_partial(s))
5105 return -EINVAL;
5106
5107 slub_set_cpu_partial(s, objects);
5108 flush_all(s);
5109 return length;
5110}
5111SLAB_ATTR(cpu_partial);
5112
5113static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5114{
5115 if (!s->ctor)
5116 return 0;
5117 return sysfs_emit(buf, "%pS\n", s->ctor);
5118}
5119SLAB_ATTR_RO(ctor);
5120
5121static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5122{
5123 return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5124}
5125SLAB_ATTR_RO(aliases);
5126
5127static ssize_t partial_show(struct kmem_cache *s, char *buf)
5128{
5129 return show_slab_objects(s, buf, SO_PARTIAL);
5130}
5131SLAB_ATTR_RO(partial);
5132
5133static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5134{
5135 return show_slab_objects(s, buf, SO_CPU);
5136}
5137SLAB_ATTR_RO(cpu_slabs);
5138
5139static ssize_t objects_show(struct kmem_cache *s, char *buf)
5140{
5141 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5142}
5143SLAB_ATTR_RO(objects);
5144
5145static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5146{
5147 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5148}
5149SLAB_ATTR_RO(objects_partial);
5150
5151static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5152{
5153 int objects = 0;
5154 int pages = 0;
5155 int cpu;
5156 int len = 0;
5157
5158 for_each_online_cpu(cpu) {
5159 struct page *page;
5160
5161 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5162
5163 if (page) {
5164 pages += page->pages;
5165 objects += page->pobjects;
5166 }
5167 }
5168
5169 len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
5170
5171#ifdef CONFIG_SMP
5172 for_each_online_cpu(cpu) {
5173 struct page *page;
5174
5175 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5176 if (page)
5177 len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5178 cpu, page->pobjects, page->pages);
5179 }
5180#endif
5181 len += sysfs_emit_at(buf, len, "\n");
5182
5183 return len;
5184}
5185SLAB_ATTR_RO(slabs_cpu_partial);
5186
5187static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5188{
5189 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5190}
5191SLAB_ATTR_RO(reclaim_account);
5192
5193static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5194{
5195 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5196}
5197SLAB_ATTR_RO(hwcache_align);
5198
5199#ifdef CONFIG_ZONE_DMA
5200static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5201{
5202 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5203}
5204SLAB_ATTR_RO(cache_dma);
5205#endif
5206
5207static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5208{
5209 return sysfs_emit(buf, "%u\n", s->usersize);
5210}
5211SLAB_ATTR_RO(usersize);
5212
5213static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5214{
5215 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5216}
5217SLAB_ATTR_RO(destroy_by_rcu);
5218
5219#ifdef CONFIG_SLUB_DEBUG
5220static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5221{
5222 return show_slab_objects(s, buf, SO_ALL);
5223}
5224SLAB_ATTR_RO(slabs);
5225
5226static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5227{
5228 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5229}
5230SLAB_ATTR_RO(total_objects);
5231
5232static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5233{
5234 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5235}
5236SLAB_ATTR_RO(sanity_checks);
5237
5238static ssize_t trace_show(struct kmem_cache *s, char *buf)
5239{
5240 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5241}
5242SLAB_ATTR_RO(trace);
5243
5244static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5245{
5246 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5247}
5248
5249SLAB_ATTR_RO(red_zone);
5250
5251static ssize_t poison_show(struct kmem_cache *s, char *buf)
5252{
5253 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5254}
5255
5256SLAB_ATTR_RO(poison);
5257
5258static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5259{
5260 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5261}
5262
5263SLAB_ATTR_RO(store_user);
5264
5265static ssize_t validate_show(struct kmem_cache *s, char *buf)
5266{
5267 return 0;
5268}
5269
5270static ssize_t validate_store(struct kmem_cache *s,
5271 const char *buf, size_t length)
5272{
5273 int ret = -EINVAL;
5274
5275 if (buf[0] == '1') {
5276 ret = validate_slab_cache(s);
5277 if (ret >= 0)
5278 ret = length;
5279 }
5280 return ret;
5281}
5282SLAB_ATTR(validate);
5283
5284#endif
5285
5286#ifdef CONFIG_FAILSLAB
5287static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5288{
5289 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5290}
5291SLAB_ATTR_RO(failslab);
5292#endif
5293
5294static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5295{
5296 return 0;
5297}
5298
5299static ssize_t shrink_store(struct kmem_cache *s,
5300 const char *buf, size_t length)
5301{
5302 if (buf[0] == '1')
5303 kmem_cache_shrink(s);
5304 else
5305 return -EINVAL;
5306 return length;
5307}
5308SLAB_ATTR(shrink);
5309
5310#ifdef CONFIG_NUMA
5311static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5312{
5313 return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5314}
5315
5316static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5317 const char *buf, size_t length)
5318{
5319 unsigned int ratio;
5320 int err;
5321
5322 err = kstrtouint(buf, 10, &ratio);
5323 if (err)
5324 return err;
5325 if (ratio > 100)
5326 return -ERANGE;
5327
5328 s->remote_node_defrag_ratio = ratio * 10;
5329
5330 return length;
5331}
5332SLAB_ATTR(remote_node_defrag_ratio);
5333#endif
5334
5335#ifdef CONFIG_SLUB_STATS
5336static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5337{
5338 unsigned long sum = 0;
5339 int cpu;
5340 int len = 0;
5341 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5342
5343 if (!data)
5344 return -ENOMEM;
5345
5346 for_each_online_cpu(cpu) {
5347 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5348
5349 data[cpu] = x;
5350 sum += x;
5351 }
5352
5353 len += sysfs_emit_at(buf, len, "%lu", sum);
5354
5355#ifdef CONFIG_SMP
5356 for_each_online_cpu(cpu) {
5357 if (data[cpu])
5358 len += sysfs_emit_at(buf, len, " C%d=%u",
5359 cpu, data[cpu]);
5360 }
5361#endif
5362 kfree(data);
5363 len += sysfs_emit_at(buf, len, "\n");
5364
5365 return len;
5366}
5367
5368static void clear_stat(struct kmem_cache *s, enum stat_item si)
5369{
5370 int cpu;
5371
5372 for_each_online_cpu(cpu)
5373 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5374}
5375
5376#define STAT_ATTR(si, text) \
5377static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5378{ \
5379 return show_stat(s, buf, si); \
5380} \
5381static ssize_t text##_store(struct kmem_cache *s, \
5382 const char *buf, size_t length) \
5383{ \
5384 if (buf[0] != '0') \
5385 return -EINVAL; \
5386 clear_stat(s, si); \
5387 return length; \
5388} \
5389SLAB_ATTR(text); \
5390
5391STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5392STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5393STAT_ATTR(FREE_FASTPATH, free_fastpath);
5394STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5395STAT_ATTR(FREE_FROZEN, free_frozen);
5396STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5397STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5398STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5399STAT_ATTR(ALLOC_SLAB, alloc_slab);
5400STAT_ATTR(ALLOC_REFILL, alloc_refill);
5401STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5402STAT_ATTR(FREE_SLAB, free_slab);
5403STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5404STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5405STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5406STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5407STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5408STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5409STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5410STAT_ATTR(ORDER_FALLBACK, order_fallback);
5411STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5412STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5413STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5414STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5415STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5416STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5417#endif
5418
5419static struct attribute *slab_attrs[] = {
5420 &slab_size_attr.attr,
5421 &object_size_attr.attr,
5422 &objs_per_slab_attr.attr,
5423 &order_attr.attr,
5424 &min_partial_attr.attr,
5425 &cpu_partial_attr.attr,
5426 &objects_attr.attr,
5427 &objects_partial_attr.attr,
5428 &partial_attr.attr,
5429 &cpu_slabs_attr.attr,
5430 &ctor_attr.attr,
5431 &aliases_attr.attr,
5432 &align_attr.attr,
5433 &hwcache_align_attr.attr,
5434 &reclaim_account_attr.attr,
5435 &destroy_by_rcu_attr.attr,
5436 &shrink_attr.attr,
5437 &slabs_cpu_partial_attr.attr,
5438#ifdef CONFIG_SLUB_DEBUG
5439 &total_objects_attr.attr,
5440 &slabs_attr.attr,
5441 &sanity_checks_attr.attr,
5442 &trace_attr.attr,
5443 &red_zone_attr.attr,
5444 &poison_attr.attr,
5445 &store_user_attr.attr,
5446 &validate_attr.attr,
5447#endif
5448#ifdef CONFIG_ZONE_DMA
5449 &cache_dma_attr.attr,
5450#endif
5451#ifdef CONFIG_NUMA
5452 &remote_node_defrag_ratio_attr.attr,
5453#endif
5454#ifdef CONFIG_SLUB_STATS
5455 &alloc_fastpath_attr.attr,
5456 &alloc_slowpath_attr.attr,
5457 &free_fastpath_attr.attr,
5458 &free_slowpath_attr.attr,
5459 &free_frozen_attr.attr,
5460 &free_add_partial_attr.attr,
5461 &free_remove_partial_attr.attr,
5462 &alloc_from_partial_attr.attr,
5463 &alloc_slab_attr.attr,
5464 &alloc_refill_attr.attr,
5465 &alloc_node_mismatch_attr.attr,
5466 &free_slab_attr.attr,
5467 &cpuslab_flush_attr.attr,
5468 &deactivate_full_attr.attr,
5469 &deactivate_empty_attr.attr,
5470 &deactivate_to_head_attr.attr,
5471 &deactivate_to_tail_attr.attr,
5472 &deactivate_remote_frees_attr.attr,
5473 &deactivate_bypass_attr.attr,
5474 &order_fallback_attr.attr,
5475 &cmpxchg_double_fail_attr.attr,
5476 &cmpxchg_double_cpu_fail_attr.attr,
5477 &cpu_partial_alloc_attr.attr,
5478 &cpu_partial_free_attr.attr,
5479 &cpu_partial_node_attr.attr,
5480 &cpu_partial_drain_attr.attr,
5481#endif
5482#ifdef CONFIG_FAILSLAB
5483 &failslab_attr.attr,
5484#endif
5485 &usersize_attr.attr,
5486
5487 NULL
5488};
5489
5490static const struct attribute_group slab_attr_group = {
5491 .attrs = slab_attrs,
5492};
5493
5494static ssize_t slab_attr_show(struct kobject *kobj,
5495 struct attribute *attr,
5496 char *buf)
5497{
5498 struct slab_attribute *attribute;
5499 struct kmem_cache *s;
5500 int err;
5501
5502 attribute = to_slab_attr(attr);
5503 s = to_slab(kobj);
5504
5505 if (!attribute->show)
5506 return -EIO;
5507
5508 err = attribute->show(s, buf);
5509
5510 return err;
5511}
5512
5513static ssize_t slab_attr_store(struct kobject *kobj,
5514 struct attribute *attr,
5515 const char *buf, size_t len)
5516{
5517 struct slab_attribute *attribute;
5518 struct kmem_cache *s;
5519 int err;
5520
5521 attribute = to_slab_attr(attr);
5522 s = to_slab(kobj);
5523
5524 if (!attribute->store)
5525 return -EIO;
5526
5527 err = attribute->store(s, buf, len);
5528 return err;
5529}
5530
5531static void kmem_cache_release(struct kobject *k)
5532{
5533 slab_kmem_cache_release(to_slab(k));
5534}
5535
5536static const struct sysfs_ops slab_sysfs_ops = {
5537 .show = slab_attr_show,
5538 .store = slab_attr_store,
5539};
5540
5541static struct kobj_type slab_ktype = {
5542 .sysfs_ops = &slab_sysfs_ops,
5543 .release = kmem_cache_release,
5544};
5545
5546static struct kset *slab_kset;
5547
5548static inline struct kset *cache_kset(struct kmem_cache *s)
5549{
5550 return slab_kset;
5551}
5552
5553#define ID_STR_LENGTH 64
5554
5555
5556
5557
5558
5559static char *create_unique_id(struct kmem_cache *s)
5560{
5561 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5562 char *p = name;
5563
5564 BUG_ON(!name);
5565
5566 *p++ = ':';
5567
5568
5569
5570
5571
5572
5573
5574 if (s->flags & SLAB_CACHE_DMA)
5575 *p++ = 'd';
5576 if (s->flags & SLAB_CACHE_DMA32)
5577 *p++ = 'D';
5578 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5579 *p++ = 'a';
5580 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5581 *p++ = 'F';
5582 if (s->flags & SLAB_ACCOUNT)
5583 *p++ = 'A';
5584 if (p != name + 1)
5585 *p++ = '-';
5586 p += sprintf(p, "%07u", s->size);
5587
5588 BUG_ON(p > name + ID_STR_LENGTH - 1);
5589 return name;
5590}
5591
5592static int sysfs_slab_add(struct kmem_cache *s)
5593{
5594 int err;
5595 const char *name;
5596 struct kset *kset = cache_kset(s);
5597 int unmergeable = slab_unmergeable(s);
5598
5599 if (!kset) {
5600 kobject_init(&s->kobj, &slab_ktype);
5601 return 0;
5602 }
5603
5604 if (!unmergeable && disable_higher_order_debug &&
5605 (slub_debug & DEBUG_METADATA_FLAGS))
5606 unmergeable = 1;
5607
5608 if (unmergeable) {
5609
5610
5611
5612
5613
5614 sysfs_remove_link(&slab_kset->kobj, s->name);
5615 name = s->name;
5616 } else {
5617
5618
5619
5620
5621 name = create_unique_id(s);
5622 }
5623
5624 s->kobj.kset = kset;
5625 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5626 if (err)
5627 goto out;
5628
5629 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5630 if (err)
5631 goto out_del_kobj;
5632
5633 if (!unmergeable) {
5634
5635 sysfs_slab_alias(s, s->name);
5636 }
5637out:
5638 if (!unmergeable)
5639 kfree(name);
5640 return err;
5641out_del_kobj:
5642 kobject_del(&s->kobj);
5643 goto out;
5644}
5645
5646void sysfs_slab_unlink(struct kmem_cache *s)
5647{
5648 if (slab_state >= FULL)
5649 kobject_del(&s->kobj);
5650}
5651
5652void sysfs_slab_release(struct kmem_cache *s)
5653{
5654 if (slab_state >= FULL)
5655 kobject_put(&s->kobj);
5656}
5657
5658
5659
5660
5661
5662struct saved_alias {
5663 struct kmem_cache *s;
5664 const char *name;
5665 struct saved_alias *next;
5666};
5667
5668static struct saved_alias *alias_list;
5669
5670static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5671{
5672 struct saved_alias *al;
5673
5674 if (slab_state == FULL) {
5675
5676
5677
5678 sysfs_remove_link(&slab_kset->kobj, name);
5679 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5680 }
5681
5682 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5683 if (!al)
5684 return -ENOMEM;
5685
5686 al->s = s;
5687 al->name = name;
5688 al->next = alias_list;
5689 alias_list = al;
5690 return 0;
5691}
5692
5693static int __init slab_sysfs_init(void)
5694{
5695 struct kmem_cache *s;
5696 int err;
5697
5698 mutex_lock(&slab_mutex);
5699
5700 slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
5701 if (!slab_kset) {
5702 mutex_unlock(&slab_mutex);
5703 pr_err("Cannot register slab subsystem.\n");
5704 return -ENOSYS;
5705 }
5706
5707 slab_state = FULL;
5708
5709 list_for_each_entry(s, &slab_caches, list) {
5710 err = sysfs_slab_add(s);
5711 if (err)
5712 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5713 s->name);
5714 }
5715
5716 while (alias_list) {
5717 struct saved_alias *al = alias_list;
5718
5719 alias_list = alias_list->next;
5720 err = sysfs_slab_alias(al->s, al->name);
5721 if (err)
5722 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5723 al->name);
5724 kfree(al);
5725 }
5726
5727 mutex_unlock(&slab_mutex);
5728 return 0;
5729}
5730
5731__initcall(slab_sysfs_init);
5732#endif
5733
5734#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
5735static int slab_debugfs_show(struct seq_file *seq, void *v)
5736{
5737
5738 struct location *l;
5739 unsigned int idx = *(unsigned int *)v;
5740 struct loc_track *t = seq->private;
5741
5742 if (idx < t->count) {
5743 l = &t->loc[idx];
5744
5745 seq_printf(seq, "%7ld ", l->count);
5746
5747 if (l->addr)
5748 seq_printf(seq, "%pS", (void *)l->addr);
5749 else
5750 seq_puts(seq, "<not-available>");
5751
5752 if (l->sum_time != l->min_time) {
5753 seq_printf(seq, " age=%ld/%llu/%ld",
5754 l->min_time, div_u64(l->sum_time, l->count),
5755 l->max_time);
5756 } else
5757 seq_printf(seq, " age=%ld", l->min_time);
5758
5759 if (l->min_pid != l->max_pid)
5760 seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
5761 else
5762 seq_printf(seq, " pid=%ld",
5763 l->min_pid);
5764
5765 if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
5766 seq_printf(seq, " cpus=%*pbl",
5767 cpumask_pr_args(to_cpumask(l->cpus)));
5768
5769 if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
5770 seq_printf(seq, " nodes=%*pbl",
5771 nodemask_pr_args(&l->nodes));
5772
5773 seq_puts(seq, "\n");
5774 }
5775
5776 if (!idx && !t->count)
5777 seq_puts(seq, "No data\n");
5778
5779 return 0;
5780}
5781
5782static void slab_debugfs_stop(struct seq_file *seq, void *v)
5783{
5784}
5785
5786static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
5787{
5788 struct loc_track *t = seq->private;
5789
5790 v = ppos;
5791 ++*ppos;
5792 if (*ppos <= t->count)
5793 return v;
5794
5795 return NULL;
5796}
5797
5798static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
5799{
5800 return ppos;
5801}
5802
5803static const struct seq_operations slab_debugfs_sops = {
5804 .start = slab_debugfs_start,
5805 .next = slab_debugfs_next,
5806 .stop = slab_debugfs_stop,
5807 .show = slab_debugfs_show,
5808};
5809
5810static int slab_debug_trace_open(struct inode *inode, struct file *filep)
5811{
5812
5813 struct kmem_cache_node *n;
5814 enum track_item alloc;
5815 int node;
5816 struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
5817 sizeof(struct loc_track));
5818 struct kmem_cache *s = file_inode(filep)->i_private;
5819
5820 if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
5821 alloc = TRACK_ALLOC;
5822 else
5823 alloc = TRACK_FREE;
5824
5825 if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL))
5826 return -ENOMEM;
5827
5828
5829 flush_all(s);
5830
5831 for_each_kmem_cache_node(s, node, n) {
5832 unsigned long flags;
5833 struct page *page;
5834
5835 if (!atomic_long_read(&n->nr_slabs))
5836 continue;
5837
5838 spin_lock_irqsave(&n->list_lock, flags);
5839 list_for_each_entry(page, &n->partial, slab_list)
5840 process_slab(t, s, page, alloc);
5841 list_for_each_entry(page, &n->full, slab_list)
5842 process_slab(t, s, page, alloc);
5843 spin_unlock_irqrestore(&n->list_lock, flags);
5844 }
5845
5846 return 0;
5847}
5848
5849static int slab_debug_trace_release(struct inode *inode, struct file *file)
5850{
5851 struct seq_file *seq = file->private_data;
5852 struct loc_track *t = seq->private;
5853
5854 free_loc_track(t);
5855 return seq_release_private(inode, file);
5856}
5857
5858static const struct file_operations slab_debugfs_fops = {
5859 .open = slab_debug_trace_open,
5860 .read = seq_read,
5861 .llseek = seq_lseek,
5862 .release = slab_debug_trace_release,
5863};
5864
5865static void debugfs_slab_add(struct kmem_cache *s)
5866{
5867 struct dentry *slab_cache_dir;
5868
5869 if (unlikely(!slab_debugfs_root))
5870 return;
5871
5872 slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
5873
5874 debugfs_create_file("alloc_traces", 0400,
5875 slab_cache_dir, s, &slab_debugfs_fops);
5876
5877 debugfs_create_file("free_traces", 0400,
5878 slab_cache_dir, s, &slab_debugfs_fops);
5879}
5880
5881void debugfs_slab_release(struct kmem_cache *s)
5882{
5883 debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
5884}
5885
5886static int __init slab_debugfs_init(void)
5887{
5888 struct kmem_cache *s;
5889
5890 slab_debugfs_root = debugfs_create_dir("slab", NULL);
5891
5892 list_for_each_entry(s, &slab_caches, list)
5893 if (s->flags & SLAB_STORE_USER)
5894 debugfs_slab_add(s);
5895
5896 return 0;
5897
5898}
5899__initcall(slab_debugfs_init);
5900#endif
5901
5902
5903
5904#ifdef CONFIG_SLUB_DEBUG
5905void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5906{
5907 unsigned long nr_slabs = 0;
5908 unsigned long nr_objs = 0;
5909 unsigned long nr_free = 0;
5910 int node;
5911 struct kmem_cache_node *n;
5912
5913 for_each_kmem_cache_node(s, node, n) {
5914 nr_slabs += node_nr_slabs(n);
5915 nr_objs += node_nr_objs(n);
5916 nr_free += count_partial(n, count_free);
5917 }
5918
5919 sinfo->active_objs = nr_objs - nr_free;
5920 sinfo->num_objs = nr_objs;
5921 sinfo->active_slabs = nr_slabs;
5922 sinfo->num_slabs = nr_slabs;
5923 sinfo->objects_per_slab = oo_objects(s->oo);
5924 sinfo->cache_order = oo_order(s->oo);
5925}
5926
5927void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5928{
5929}
5930
5931ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5932 size_t count, loff_t *ppos)
5933{
5934 return -EIO;
5935}
5936#endif
5937