1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/swab.h>
19#include <linux/bitops.h>
20#include <linux/slab.h>
21#include "slab.h"
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/kasan.h>
25#include <linux/cpu.h>
26#include <linux/cpuset.h>
27#include <linux/mempolicy.h>
28#include <linux/ctype.h>
29#include <linux/debugobjects.h>
30#include <linux/kallsyms.h>
31#include <linux/kfence.h>
32#include <linux/memory.h>
33#include <linux/math64.h>
34#include <linux/fault-inject.h>
35#include <linux/stacktrace.h>
36#include <linux/prefetch.h>
37#include <linux/memcontrol.h>
38#include <linux/random.h>
39#include <kunit/test.h>
40
41#include <linux/debugfs.h>
42#include <trace/events/kmem.h>
43
44#include "internal.h"
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163#ifndef CONFIG_PREEMPT_RT
164#define slub_get_cpu_ptr(var) get_cpu_ptr(var)
165#define slub_put_cpu_ptr(var) put_cpu_ptr(var)
166#else
167#define slub_get_cpu_ptr(var) \
168({ \
169 migrate_disable(); \
170 this_cpu_ptr(var); \
171})
172#define slub_put_cpu_ptr(var) \
173do { \
174 (void)(var); \
175 migrate_enable(); \
176} while (0)
177#endif
178
179#ifdef CONFIG_SLUB_DEBUG
180#ifdef CONFIG_SLUB_DEBUG_ON
181DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
182#else
183DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
184#endif
185#endif
186
187static inline bool kmem_cache_debug(struct kmem_cache *s)
188{
189 return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
190}
191
192void *fixup_red_left(struct kmem_cache *s, void *p)
193{
194 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
195 p += s->red_left_pad;
196
197 return p;
198}
199
200static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
201{
202#ifdef CONFIG_SLUB_CPU_PARTIAL
203 return !kmem_cache_debug(s);
204#else
205 return false;
206#endif
207}
208
209
210
211
212
213
214
215
216
217
218#undef SLUB_DEBUG_CMPXCHG
219
220
221
222
223
224#define MIN_PARTIAL 5
225
226
227
228
229
230
231#define MAX_PARTIAL 10
232
233#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
234 SLAB_POISON | SLAB_STORE_USER)
235
236
237
238
239
240#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
241 SLAB_TRACE)
242
243
244
245
246
247
248
249#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
250
251#define OO_SHIFT 16
252#define OO_MASK ((1 << OO_SHIFT) - 1)
253#define MAX_OBJS_PER_PAGE 32767
254
255
256
257#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
258
259#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
260
261
262
263
264#define TRACK_ADDRS_COUNT 16
265struct track {
266 unsigned long addr;
267#ifdef CONFIG_STACKTRACE
268 unsigned long addrs[TRACK_ADDRS_COUNT];
269#endif
270 int cpu;
271 int pid;
272 unsigned long when;
273};
274
275enum track_item { TRACK_ALLOC, TRACK_FREE };
276
277#ifdef CONFIG_SYSFS
278static int sysfs_slab_add(struct kmem_cache *);
279static int sysfs_slab_alias(struct kmem_cache *, const char *);
280#else
281static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
282static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
283 { return 0; }
284#endif
285
286#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
287static void debugfs_slab_add(struct kmem_cache *);
288#else
289static inline void debugfs_slab_add(struct kmem_cache *s) { }
290#endif
291
292static inline void stat(const struct kmem_cache *s, enum stat_item si)
293{
294#ifdef CONFIG_SLUB_STATS
295
296
297
298
299 raw_cpu_inc(s->cpu_slab->stat[si]);
300#endif
301}
302
303
304
305
306
307
308
309static nodemask_t slab_nodes;
310
311
312
313
314
315
316
317
318
319
320static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
321 unsigned long ptr_addr)
322{
323#ifdef CONFIG_SLAB_FREELIST_HARDENED
324
325
326
327
328
329
330
331
332
333
334 return (void *)((unsigned long)ptr ^ s->random ^
335 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
336#else
337 return ptr;
338#endif
339}
340
341
342static inline void *freelist_dereference(const struct kmem_cache *s,
343 void *ptr_addr)
344{
345 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
346 (unsigned long)ptr_addr);
347}
348
349static inline void *get_freepointer(struct kmem_cache *s, void *object)
350{
351 object = kasan_reset_tag(object);
352 return freelist_dereference(s, object + s->offset);
353}
354
355static void prefetch_freepointer(const struct kmem_cache *s, void *object)
356{
357 prefetch(object + s->offset);
358}
359
360static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
361{
362 unsigned long freepointer_addr;
363 void *p;
364
365 if (!debug_pagealloc_enabled_static())
366 return get_freepointer(s, object);
367
368 object = kasan_reset_tag(object);
369 freepointer_addr = (unsigned long)object + s->offset;
370 copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
371 return freelist_ptr(s, p, freepointer_addr);
372}
373
374static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
375{
376 unsigned long freeptr_addr = (unsigned long)object + s->offset;
377
378#ifdef CONFIG_SLAB_FREELIST_HARDENED
379 BUG_ON(object == fp);
380#endif
381
382 freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
383 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
384}
385
386
387#define for_each_object(__p, __s, __addr, __objects) \
388 for (__p = fixup_red_left(__s, __addr); \
389 __p < (__addr) + (__objects) * (__s)->size; \
390 __p += (__s)->size)
391
392static inline unsigned int order_objects(unsigned int order, unsigned int size)
393{
394 return ((unsigned int)PAGE_SIZE << order) / size;
395}
396
397static inline struct kmem_cache_order_objects oo_make(unsigned int order,
398 unsigned int size)
399{
400 struct kmem_cache_order_objects x = {
401 (order << OO_SHIFT) + order_objects(order, size)
402 };
403
404 return x;
405}
406
407static inline unsigned int oo_order(struct kmem_cache_order_objects x)
408{
409 return x.x >> OO_SHIFT;
410}
411
412static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
413{
414 return x.x & OO_MASK;
415}
416
417
418
419
420static __always_inline void __slab_lock(struct page *page)
421{
422 VM_BUG_ON_PAGE(PageTail(page), page);
423 bit_spin_lock(PG_locked, &page->flags);
424}
425
426static __always_inline void __slab_unlock(struct page *page)
427{
428 VM_BUG_ON_PAGE(PageTail(page), page);
429 __bit_spin_unlock(PG_locked, &page->flags);
430}
431
432static __always_inline void slab_lock(struct page *page, unsigned long *flags)
433{
434 if (IS_ENABLED(CONFIG_PREEMPT_RT))
435 local_irq_save(*flags);
436 __slab_lock(page);
437}
438
439static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
440{
441 __slab_unlock(page);
442 if (IS_ENABLED(CONFIG_PREEMPT_RT))
443 local_irq_restore(*flags);
444}
445
446
447
448
449
450
451static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
452 void *freelist_old, unsigned long counters_old,
453 void *freelist_new, unsigned long counters_new,
454 const char *n)
455{
456 if (!IS_ENABLED(CONFIG_PREEMPT_RT))
457 lockdep_assert_irqs_disabled();
458#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
459 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
460 if (s->flags & __CMPXCHG_DOUBLE) {
461 if (cmpxchg_double(&page->freelist, &page->counters,
462 freelist_old, counters_old,
463 freelist_new, counters_new))
464 return true;
465 } else
466#endif
467 {
468
469 unsigned long flags = 0;
470
471 slab_lock(page, &flags);
472 if (page->freelist == freelist_old &&
473 page->counters == counters_old) {
474 page->freelist = freelist_new;
475 page->counters = counters_new;
476 slab_unlock(page, &flags);
477 return true;
478 }
479 slab_unlock(page, &flags);
480 }
481
482 cpu_relax();
483 stat(s, CMPXCHG_DOUBLE_FAIL);
484
485#ifdef SLUB_DEBUG_CMPXCHG
486 pr_info("%s %s: cmpxchg double redo ", n, s->name);
487#endif
488
489 return false;
490}
491
492static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
493 void *freelist_old, unsigned long counters_old,
494 void *freelist_new, unsigned long counters_new,
495 const char *n)
496{
497#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
498 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
499 if (s->flags & __CMPXCHG_DOUBLE) {
500 if (cmpxchg_double(&page->freelist, &page->counters,
501 freelist_old, counters_old,
502 freelist_new, counters_new))
503 return true;
504 } else
505#endif
506 {
507 unsigned long flags;
508
509 local_irq_save(flags);
510 __slab_lock(page);
511 if (page->freelist == freelist_old &&
512 page->counters == counters_old) {
513 page->freelist = freelist_new;
514 page->counters = counters_new;
515 __slab_unlock(page);
516 local_irq_restore(flags);
517 return true;
518 }
519 __slab_unlock(page);
520 local_irq_restore(flags);
521 }
522
523 cpu_relax();
524 stat(s, CMPXCHG_DOUBLE_FAIL);
525
526#ifdef SLUB_DEBUG_CMPXCHG
527 pr_info("%s %s: cmpxchg double redo ", n, s->name);
528#endif
529
530 return false;
531}
532
533#ifdef CONFIG_SLUB_DEBUG
534static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
535static DEFINE_RAW_SPINLOCK(object_map_lock);
536
537static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
538 struct page *page)
539{
540 void *addr = page_address(page);
541 void *p;
542
543 bitmap_zero(obj_map, page->objects);
544
545 for (p = page->freelist; p; p = get_freepointer(s, p))
546 set_bit(__obj_to_index(s, addr, p), obj_map);
547}
548
549#if IS_ENABLED(CONFIG_KUNIT)
550static bool slab_add_kunit_errors(void)
551{
552 struct kunit_resource *resource;
553
554 if (likely(!current->kunit_test))
555 return false;
556
557 resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
558 if (!resource)
559 return false;
560
561 (*(int *)resource->data)++;
562 kunit_put_resource(resource);
563 return true;
564}
565#else
566static inline bool slab_add_kunit_errors(void) { return false; }
567#endif
568
569
570
571
572
573
574
575static unsigned long *get_map(struct kmem_cache *s, struct page *page)
576 __acquires(&object_map_lock)
577{
578 VM_BUG_ON(!irqs_disabled());
579
580 raw_spin_lock(&object_map_lock);
581
582 __fill_map(object_map, s, page);
583
584 return object_map;
585}
586
587static void put_map(unsigned long *map) __releases(&object_map_lock)
588{
589 VM_BUG_ON(map != object_map);
590 raw_spin_unlock(&object_map_lock);
591}
592
593static inline unsigned int size_from_object(struct kmem_cache *s)
594{
595 if (s->flags & SLAB_RED_ZONE)
596 return s->size - s->red_left_pad;
597
598 return s->size;
599}
600
601static inline void *restore_red_left(struct kmem_cache *s, void *p)
602{
603 if (s->flags & SLAB_RED_ZONE)
604 p -= s->red_left_pad;
605
606 return p;
607}
608
609
610
611
612#if defined(CONFIG_SLUB_DEBUG_ON)
613static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
614#else
615static slab_flags_t slub_debug;
616#endif
617
618static char *slub_debug_string;
619static int disable_higher_order_debug;
620
621
622
623
624
625
626
627static inline void metadata_access_enable(void)
628{
629 kasan_disable_current();
630}
631
632static inline void metadata_access_disable(void)
633{
634 kasan_enable_current();
635}
636
637
638
639
640
641
642static inline int check_valid_pointer(struct kmem_cache *s,
643 struct page *page, void *object)
644{
645 void *base;
646
647 if (!object)
648 return 1;
649
650 base = page_address(page);
651 object = kasan_reset_tag(object);
652 object = restore_red_left(s, object);
653 if (object < base || object >= base + page->objects * s->size ||
654 (object - base) % s->size) {
655 return 0;
656 }
657
658 return 1;
659}
660
661static void print_section(char *level, char *text, u8 *addr,
662 unsigned int length)
663{
664 metadata_access_enable();
665 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
666 16, 1, kasan_reset_tag((void *)addr), length, 1);
667 metadata_access_disable();
668}
669
670
671
672
673static inline bool freeptr_outside_object(struct kmem_cache *s)
674{
675 return s->offset >= s->inuse;
676}
677
678
679
680
681
682static inline unsigned int get_info_end(struct kmem_cache *s)
683{
684 if (freeptr_outside_object(s))
685 return s->inuse + sizeof(void *);
686 else
687 return s->inuse;
688}
689
690static struct track *get_track(struct kmem_cache *s, void *object,
691 enum track_item alloc)
692{
693 struct track *p;
694
695 p = object + get_info_end(s);
696
697 return kasan_reset_tag(p + alloc);
698}
699
700static void set_track(struct kmem_cache *s, void *object,
701 enum track_item alloc, unsigned long addr)
702{
703 struct track *p = get_track(s, object, alloc);
704
705 if (addr) {
706#ifdef CONFIG_STACKTRACE
707 unsigned int nr_entries;
708
709 metadata_access_enable();
710 nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
711 TRACK_ADDRS_COUNT, 3);
712 metadata_access_disable();
713
714 if (nr_entries < TRACK_ADDRS_COUNT)
715 p->addrs[nr_entries] = 0;
716#endif
717 p->addr = addr;
718 p->cpu = smp_processor_id();
719 p->pid = current->pid;
720 p->when = jiffies;
721 } else {
722 memset(p, 0, sizeof(struct track));
723 }
724}
725
726static void init_tracking(struct kmem_cache *s, void *object)
727{
728 if (!(s->flags & SLAB_STORE_USER))
729 return;
730
731 set_track(s, object, TRACK_FREE, 0UL);
732 set_track(s, object, TRACK_ALLOC, 0UL);
733}
734
735static void print_track(const char *s, struct track *t, unsigned long pr_time)
736{
737 if (!t->addr)
738 return;
739
740 pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
741 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
742#ifdef CONFIG_STACKTRACE
743 {
744 int i;
745 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
746 if (t->addrs[i])
747 pr_err("\t%pS\n", (void *)t->addrs[i]);
748 else
749 break;
750 }
751#endif
752}
753
754void print_tracking(struct kmem_cache *s, void *object)
755{
756 unsigned long pr_time = jiffies;
757 if (!(s->flags & SLAB_STORE_USER))
758 return;
759
760 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
761 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
762}
763
764static void print_page_info(struct page *page)
765{
766 pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%#lx(%pGp)\n",
767 page, page->objects, page->inuse, page->freelist,
768 page->flags, &page->flags);
769
770}
771
772static void slab_bug(struct kmem_cache *s, char *fmt, ...)
773{
774 struct va_format vaf;
775 va_list args;
776
777 va_start(args, fmt);
778 vaf.fmt = fmt;
779 vaf.va = &args;
780 pr_err("=============================================================================\n");
781 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
782 pr_err("-----------------------------------------------------------------------------\n\n");
783 va_end(args);
784}
785
786__printf(2, 3)
787static void slab_fix(struct kmem_cache *s, char *fmt, ...)
788{
789 struct va_format vaf;
790 va_list args;
791
792 if (slab_add_kunit_errors())
793 return;
794
795 va_start(args, fmt);
796 vaf.fmt = fmt;
797 vaf.va = &args;
798 pr_err("FIX %s: %pV\n", s->name, &vaf);
799 va_end(args);
800}
801
802static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
803 void **freelist, void *nextfree)
804{
805 if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
806 !check_valid_pointer(s, page, nextfree) && freelist) {
807 object_err(s, page, *freelist, "Freechain corrupt");
808 *freelist = NULL;
809 slab_fix(s, "Isolate corrupted freechain");
810 return true;
811 }
812
813 return false;
814}
815
816static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
817{
818 unsigned int off;
819 u8 *addr = page_address(page);
820
821 print_tracking(s, p);
822
823 print_page_info(page);
824
825 pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
826 p, p - addr, get_freepointer(s, p));
827
828 if (s->flags & SLAB_RED_ZONE)
829 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
830 s->red_left_pad);
831 else if (p > addr + 16)
832 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
833
834 print_section(KERN_ERR, "Object ", p,
835 min_t(unsigned int, s->object_size, PAGE_SIZE));
836 if (s->flags & SLAB_RED_ZONE)
837 print_section(KERN_ERR, "Redzone ", p + s->object_size,
838 s->inuse - s->object_size);
839
840 off = get_info_end(s);
841
842 if (s->flags & SLAB_STORE_USER)
843 off += 2 * sizeof(struct track);
844
845 off += kasan_metadata_size(s);
846
847 if (off != size_from_object(s))
848
849 print_section(KERN_ERR, "Padding ", p + off,
850 size_from_object(s) - off);
851
852 dump_stack();
853}
854
855void object_err(struct kmem_cache *s, struct page *page,
856 u8 *object, char *reason)
857{
858 if (slab_add_kunit_errors())
859 return;
860
861 slab_bug(s, "%s", reason);
862 print_trailer(s, page, object);
863 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
864}
865
866static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
867 const char *fmt, ...)
868{
869 va_list args;
870 char buf[100];
871
872 if (slab_add_kunit_errors())
873 return;
874
875 va_start(args, fmt);
876 vsnprintf(buf, sizeof(buf), fmt, args);
877 va_end(args);
878 slab_bug(s, "%s", buf);
879 print_page_info(page);
880 dump_stack();
881 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
882}
883
884static void init_object(struct kmem_cache *s, void *object, u8 val)
885{
886 u8 *p = kasan_reset_tag(object);
887
888 if (s->flags & SLAB_RED_ZONE)
889 memset(p - s->red_left_pad, val, s->red_left_pad);
890
891 if (s->flags & __OBJECT_POISON) {
892 memset(p, POISON_FREE, s->object_size - 1);
893 p[s->object_size - 1] = POISON_END;
894 }
895
896 if (s->flags & SLAB_RED_ZONE)
897 memset(p + s->object_size, val, s->inuse - s->object_size);
898}
899
900static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
901 void *from, void *to)
902{
903 slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - 1, data);
904 memset(from, data, to - from);
905}
906
907static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
908 u8 *object, char *what,
909 u8 *start, unsigned int value, unsigned int bytes)
910{
911 u8 *fault;
912 u8 *end;
913 u8 *addr = page_address(page);
914
915 metadata_access_enable();
916 fault = memchr_inv(kasan_reset_tag(start), value, bytes);
917 metadata_access_disable();
918 if (!fault)
919 return 1;
920
921 end = start + bytes;
922 while (end > fault && end[-1] == value)
923 end--;
924
925 if (slab_add_kunit_errors())
926 goto skip_bug_print;
927
928 slab_bug(s, "%s overwritten", what);
929 pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
930 fault, end - 1, fault - addr,
931 fault[0], value);
932 print_trailer(s, page, object);
933 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
934
935skip_bug_print:
936 restore_bytes(s, what, value, fault, end);
937 return 0;
938}
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
979{
980 unsigned long off = get_info_end(s);
981
982 if (s->flags & SLAB_STORE_USER)
983
984 off += 2 * sizeof(struct track);
985
986 off += kasan_metadata_size(s);
987
988 if (size_from_object(s) == off)
989 return 1;
990
991 return check_bytes_and_report(s, page, p, "Object padding",
992 p + off, POISON_INUSE, size_from_object(s) - off);
993}
994
995
996static int slab_pad_check(struct kmem_cache *s, struct page *page)
997{
998 u8 *start;
999 u8 *fault;
1000 u8 *end;
1001 u8 *pad;
1002 int length;
1003 int remainder;
1004
1005 if (!(s->flags & SLAB_POISON))
1006 return 1;
1007
1008 start = page_address(page);
1009 length = page_size(page);
1010 end = start + length;
1011 remainder = length % s->size;
1012 if (!remainder)
1013 return 1;
1014
1015 pad = end - remainder;
1016 metadata_access_enable();
1017 fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
1018 metadata_access_disable();
1019 if (!fault)
1020 return 1;
1021 while (end > fault && end[-1] == POISON_INUSE)
1022 end--;
1023
1024 slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
1025 fault, end - 1, fault - start);
1026 print_section(KERN_ERR, "Padding ", pad, remainder);
1027
1028 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
1029 return 0;
1030}
1031
1032static int check_object(struct kmem_cache *s, struct page *page,
1033 void *object, u8 val)
1034{
1035 u8 *p = object;
1036 u8 *endobject = object + s->object_size;
1037
1038 if (s->flags & SLAB_RED_ZONE) {
1039 if (!check_bytes_and_report(s, page, object, "Left Redzone",
1040 object - s->red_left_pad, val, s->red_left_pad))
1041 return 0;
1042
1043 if (!check_bytes_and_report(s, page, object, "Right Redzone",
1044 endobject, val, s->inuse - s->object_size))
1045 return 0;
1046 } else {
1047 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
1048 check_bytes_and_report(s, page, p, "Alignment padding",
1049 endobject, POISON_INUSE,
1050 s->inuse - s->object_size);
1051 }
1052 }
1053
1054 if (s->flags & SLAB_POISON) {
1055 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
1056 (!check_bytes_and_report(s, page, p, "Poison", p,
1057 POISON_FREE, s->object_size - 1) ||
1058 !check_bytes_and_report(s, page, p, "End Poison",
1059 p + s->object_size - 1, POISON_END, 1)))
1060 return 0;
1061
1062
1063
1064 check_pad_bytes(s, page, p);
1065 }
1066
1067 if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
1068
1069
1070
1071
1072 return 1;
1073
1074
1075 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
1076 object_err(s, page, p, "Freepointer corrupt");
1077
1078
1079
1080
1081
1082 set_freepointer(s, p, NULL);
1083 return 0;
1084 }
1085 return 1;
1086}
1087
1088static int check_slab(struct kmem_cache *s, struct page *page)
1089{
1090 int maxobj;
1091
1092 if (!PageSlab(page)) {
1093 slab_err(s, page, "Not a valid slab page");
1094 return 0;
1095 }
1096
1097 maxobj = order_objects(compound_order(page), s->size);
1098 if (page->objects > maxobj) {
1099 slab_err(s, page, "objects %u > max %u",
1100 page->objects, maxobj);
1101 return 0;
1102 }
1103 if (page->inuse > page->objects) {
1104 slab_err(s, page, "inuse %u > max %u",
1105 page->inuse, page->objects);
1106 return 0;
1107 }
1108
1109 slab_pad_check(s, page);
1110 return 1;
1111}
1112
1113
1114
1115
1116
1117static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
1118{
1119 int nr = 0;
1120 void *fp;
1121 void *object = NULL;
1122 int max_objects;
1123
1124 fp = page->freelist;
1125 while (fp && nr <= page->objects) {
1126 if (fp == search)
1127 return 1;
1128 if (!check_valid_pointer(s, page, fp)) {
1129 if (object) {
1130 object_err(s, page, object,
1131 "Freechain corrupt");
1132 set_freepointer(s, object, NULL);
1133 } else {
1134 slab_err(s, page, "Freepointer corrupt");
1135 page->freelist = NULL;
1136 page->inuse = page->objects;
1137 slab_fix(s, "Freelist cleared");
1138 return 0;
1139 }
1140 break;
1141 }
1142 object = fp;
1143 fp = get_freepointer(s, object);
1144 nr++;
1145 }
1146
1147 max_objects = order_objects(compound_order(page), s->size);
1148 if (max_objects > MAX_OBJS_PER_PAGE)
1149 max_objects = MAX_OBJS_PER_PAGE;
1150
1151 if (page->objects != max_objects) {
1152 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1153 page->objects, max_objects);
1154 page->objects = max_objects;
1155 slab_fix(s, "Number of objects adjusted");
1156 }
1157 if (page->inuse != page->objects - nr) {
1158 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1159 page->inuse, page->objects - nr);
1160 page->inuse = page->objects - nr;
1161 slab_fix(s, "Object count adjusted");
1162 }
1163 return search == NULL;
1164}
1165
1166static void trace(struct kmem_cache *s, struct page *page, void *object,
1167 int alloc)
1168{
1169 if (s->flags & SLAB_TRACE) {
1170 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1171 s->name,
1172 alloc ? "alloc" : "free",
1173 object, page->inuse,
1174 page->freelist);
1175
1176 if (!alloc)
1177 print_section(KERN_INFO, "Object ", (void *)object,
1178 s->object_size);
1179
1180 dump_stack();
1181 }
1182}
1183
1184
1185
1186
1187static void add_full(struct kmem_cache *s,
1188 struct kmem_cache_node *n, struct page *page)
1189{
1190 if (!(s->flags & SLAB_STORE_USER))
1191 return;
1192
1193 lockdep_assert_held(&n->list_lock);
1194 list_add(&page->slab_list, &n->full);
1195}
1196
1197static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1198{
1199 if (!(s->flags & SLAB_STORE_USER))
1200 return;
1201
1202 lockdep_assert_held(&n->list_lock);
1203 list_del(&page->slab_list);
1204}
1205
1206
1207static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1208{
1209 struct kmem_cache_node *n = get_node(s, node);
1210
1211 return atomic_long_read(&n->nr_slabs);
1212}
1213
1214static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1215{
1216 return atomic_long_read(&n->nr_slabs);
1217}
1218
1219static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1220{
1221 struct kmem_cache_node *n = get_node(s, node);
1222
1223
1224
1225
1226
1227
1228
1229 if (likely(n)) {
1230 atomic_long_inc(&n->nr_slabs);
1231 atomic_long_add(objects, &n->total_objects);
1232 }
1233}
1234static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1235{
1236 struct kmem_cache_node *n = get_node(s, node);
1237
1238 atomic_long_dec(&n->nr_slabs);
1239 atomic_long_sub(objects, &n->total_objects);
1240}
1241
1242
1243static void setup_object_debug(struct kmem_cache *s, struct page *page,
1244 void *object)
1245{
1246 if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1247 return;
1248
1249 init_object(s, object, SLUB_RED_INACTIVE);
1250 init_tracking(s, object);
1251}
1252
1253static
1254void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1255{
1256 if (!kmem_cache_debug_flags(s, SLAB_POISON))
1257 return;
1258
1259 metadata_access_enable();
1260 memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
1261 metadata_access_disable();
1262}
1263
1264static inline int alloc_consistency_checks(struct kmem_cache *s,
1265 struct page *page, void *object)
1266{
1267 if (!check_slab(s, page))
1268 return 0;
1269
1270 if (!check_valid_pointer(s, page, object)) {
1271 object_err(s, page, object, "Freelist Pointer check fails");
1272 return 0;
1273 }
1274
1275 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1276 return 0;
1277
1278 return 1;
1279}
1280
1281static noinline int alloc_debug_processing(struct kmem_cache *s,
1282 struct page *page,
1283 void *object, unsigned long addr)
1284{
1285 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1286 if (!alloc_consistency_checks(s, page, object))
1287 goto bad;
1288 }
1289
1290
1291 if (s->flags & SLAB_STORE_USER)
1292 set_track(s, object, TRACK_ALLOC, addr);
1293 trace(s, page, object, 1);
1294 init_object(s, object, SLUB_RED_ACTIVE);
1295 return 1;
1296
1297bad:
1298 if (PageSlab(page)) {
1299
1300
1301
1302
1303
1304 slab_fix(s, "Marking all objects used");
1305 page->inuse = page->objects;
1306 page->freelist = NULL;
1307 }
1308 return 0;
1309}
1310
1311static inline int free_consistency_checks(struct kmem_cache *s,
1312 struct page *page, void *object, unsigned long addr)
1313{
1314 if (!check_valid_pointer(s, page, object)) {
1315 slab_err(s, page, "Invalid object pointer 0x%p", object);
1316 return 0;
1317 }
1318
1319 if (on_freelist(s, page, object)) {
1320 object_err(s, page, object, "Object already free");
1321 return 0;
1322 }
1323
1324 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1325 return 0;
1326
1327 if (unlikely(s != page->slab_cache)) {
1328 if (!PageSlab(page)) {
1329 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1330 object);
1331 } else if (!page->slab_cache) {
1332 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1333 object);
1334 dump_stack();
1335 } else
1336 object_err(s, page, object,
1337 "page slab pointer corrupt.");
1338 return 0;
1339 }
1340 return 1;
1341}
1342
1343
1344static noinline int free_debug_processing(
1345 struct kmem_cache *s, struct page *page,
1346 void *head, void *tail, int bulk_cnt,
1347 unsigned long addr)
1348{
1349 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1350 void *object = head;
1351 int cnt = 0;
1352 unsigned long flags, flags2;
1353 int ret = 0;
1354
1355 spin_lock_irqsave(&n->list_lock, flags);
1356 slab_lock(page, &flags2);
1357
1358 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1359 if (!check_slab(s, page))
1360 goto out;
1361 }
1362
1363next_object:
1364 cnt++;
1365
1366 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1367 if (!free_consistency_checks(s, page, object, addr))
1368 goto out;
1369 }
1370
1371 if (s->flags & SLAB_STORE_USER)
1372 set_track(s, object, TRACK_FREE, addr);
1373 trace(s, page, object, 0);
1374
1375 init_object(s, object, SLUB_RED_INACTIVE);
1376
1377
1378 if (object != tail) {
1379 object = get_freepointer(s, object);
1380 goto next_object;
1381 }
1382 ret = 1;
1383
1384out:
1385 if (cnt != bulk_cnt)
1386 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1387 bulk_cnt, cnt);
1388
1389 slab_unlock(page, &flags2);
1390 spin_unlock_irqrestore(&n->list_lock, flags);
1391 if (!ret)
1392 slab_fix(s, "Object at 0x%p not freed", object);
1393 return ret;
1394}
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406static char *
1407parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1408{
1409 bool higher_order_disable = false;
1410
1411
1412 while (*str && *str == ';')
1413 str++;
1414
1415 if (*str == ',') {
1416
1417
1418
1419
1420 *flags = DEBUG_DEFAULT_FLAGS;
1421 goto check_slabs;
1422 }
1423 *flags = 0;
1424
1425
1426 for (; *str && *str != ',' && *str != ';'; str++) {
1427 switch (tolower(*str)) {
1428 case '-':
1429 *flags = 0;
1430 break;
1431 case 'f':
1432 *flags |= SLAB_CONSISTENCY_CHECKS;
1433 break;
1434 case 'z':
1435 *flags |= SLAB_RED_ZONE;
1436 break;
1437 case 'p':
1438 *flags |= SLAB_POISON;
1439 break;
1440 case 'u':
1441 *flags |= SLAB_STORE_USER;
1442 break;
1443 case 't':
1444 *flags |= SLAB_TRACE;
1445 break;
1446 case 'a':
1447 *flags |= SLAB_FAILSLAB;
1448 break;
1449 case 'o':
1450
1451
1452
1453
1454 higher_order_disable = true;
1455 break;
1456 default:
1457 if (init)
1458 pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1459 }
1460 }
1461check_slabs:
1462 if (*str == ',')
1463 *slabs = ++str;
1464 else
1465 *slabs = NULL;
1466
1467
1468 while (*str && *str != ';')
1469 str++;
1470
1471
1472 while (*str && *str == ';')
1473 str++;
1474
1475 if (init && higher_order_disable)
1476 disable_higher_order_debug = 1;
1477
1478 if (*str)
1479 return str;
1480 else
1481 return NULL;
1482}
1483
1484static int __init setup_slub_debug(char *str)
1485{
1486 slab_flags_t flags;
1487 slab_flags_t global_flags;
1488 char *saved_str;
1489 char *slab_list;
1490 bool global_slub_debug_changed = false;
1491 bool slab_list_specified = false;
1492
1493 global_flags = DEBUG_DEFAULT_FLAGS;
1494 if (*str++ != '=' || !*str)
1495
1496
1497
1498 goto out;
1499
1500 saved_str = str;
1501 while (str) {
1502 str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1503
1504 if (!slab_list) {
1505 global_flags = flags;
1506 global_slub_debug_changed = true;
1507 } else {
1508 slab_list_specified = true;
1509 }
1510 }
1511
1512
1513
1514
1515
1516
1517
1518
1519 if (slab_list_specified) {
1520 if (!global_slub_debug_changed)
1521 global_flags = slub_debug;
1522 slub_debug_string = saved_str;
1523 }
1524out:
1525 slub_debug = global_flags;
1526 if (slub_debug != 0 || slub_debug_string)
1527 static_branch_enable(&slub_debug_enabled);
1528 else
1529 static_branch_disable(&slub_debug_enabled);
1530 if ((static_branch_unlikely(&init_on_alloc) ||
1531 static_branch_unlikely(&init_on_free)) &&
1532 (slub_debug & SLAB_POISON))
1533 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1534 return 1;
1535}
1536
1537__setup("slub_debug", setup_slub_debug);
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550slab_flags_t kmem_cache_flags(unsigned int object_size,
1551 slab_flags_t flags, const char *name)
1552{
1553 char *iter;
1554 size_t len;
1555 char *next_block;
1556 slab_flags_t block_flags;
1557 slab_flags_t slub_debug_local = slub_debug;
1558
1559
1560
1561
1562
1563
1564 if (flags & SLAB_NOLEAKTRACE)
1565 slub_debug_local &= ~SLAB_STORE_USER;
1566
1567 len = strlen(name);
1568 next_block = slub_debug_string;
1569
1570 while (next_block) {
1571 next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1572 if (!iter)
1573 continue;
1574
1575 while (*iter) {
1576 char *end, *glob;
1577 size_t cmplen;
1578
1579 end = strchrnul(iter, ',');
1580 if (next_block && next_block < end)
1581 end = next_block - 1;
1582
1583 glob = strnchr(iter, end - iter, '*');
1584 if (glob)
1585 cmplen = glob - iter;
1586 else
1587 cmplen = max_t(size_t, len, (end - iter));
1588
1589 if (!strncmp(name, iter, cmplen)) {
1590 flags |= block_flags;
1591 return flags;
1592 }
1593
1594 if (!*end || *end == ';')
1595 break;
1596 iter = end + 1;
1597 }
1598 }
1599
1600 return flags | slub_debug_local;
1601}
1602#else
1603static inline void setup_object_debug(struct kmem_cache *s,
1604 struct page *page, void *object) {}
1605static inline
1606void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1607
1608static inline int alloc_debug_processing(struct kmem_cache *s,
1609 struct page *page, void *object, unsigned long addr) { return 0; }
1610
1611static inline int free_debug_processing(
1612 struct kmem_cache *s, struct page *page,
1613 void *head, void *tail, int bulk_cnt,
1614 unsigned long addr) { return 0; }
1615
1616static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1617 { return 1; }
1618static inline int check_object(struct kmem_cache *s, struct page *page,
1619 void *object, u8 val) { return 1; }
1620static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1621 struct page *page) {}
1622static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1623 struct page *page) {}
1624slab_flags_t kmem_cache_flags(unsigned int object_size,
1625 slab_flags_t flags, const char *name)
1626{
1627 return flags;
1628}
1629#define slub_debug 0
1630
1631#define disable_higher_order_debug 0
1632
1633static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1634 { return 0; }
1635static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1636 { return 0; }
1637static inline void inc_slabs_node(struct kmem_cache *s, int node,
1638 int objects) {}
1639static inline void dec_slabs_node(struct kmem_cache *s, int node,
1640 int objects) {}
1641
1642static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
1643 void **freelist, void *nextfree)
1644{
1645 return false;
1646}
1647#endif
1648
1649
1650
1651
1652
1653static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1654{
1655 ptr = kasan_kmalloc_large(ptr, size, flags);
1656
1657 kmemleak_alloc(ptr, size, 1, flags);
1658 return ptr;
1659}
1660
1661static __always_inline void kfree_hook(void *x)
1662{
1663 kmemleak_free(x);
1664 kasan_kfree_large(x);
1665}
1666
1667static __always_inline bool slab_free_hook(struct kmem_cache *s,
1668 void *x, bool init)
1669{
1670 kmemleak_free_recursive(x, s->flags);
1671
1672 debug_check_no_locks_freed(x, s->object_size);
1673
1674 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1675 debug_check_no_obj_freed(x, s->object_size);
1676
1677
1678 if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1679 __kcsan_check_access(x, s->object_size,
1680 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690 if (init) {
1691 int rsize;
1692
1693 if (!kasan_has_integrated_init())
1694 memset(kasan_reset_tag(x), 0, s->object_size);
1695 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
1696 memset((char *)kasan_reset_tag(x) + s->inuse, 0,
1697 s->size - s->inuse - rsize);
1698 }
1699
1700 return kasan_slab_free(s, x, init);
1701}
1702
1703static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1704 void **head, void **tail,
1705 int *cnt)
1706{
1707
1708 void *object;
1709 void *next = *head;
1710 void *old_tail = *tail ? *tail : *head;
1711
1712 if (is_kfence_address(next)) {
1713 slab_free_hook(s, next, false);
1714 return true;
1715 }
1716
1717
1718 *head = NULL;
1719 *tail = NULL;
1720
1721 do {
1722 object = next;
1723 next = get_freepointer(s, object);
1724
1725
1726 if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
1727
1728 set_freepointer(s, object, *head);
1729 *head = object;
1730 if (!*tail)
1731 *tail = object;
1732 } else {
1733
1734
1735
1736
1737 --(*cnt);
1738 }
1739 } while (object != old_tail);
1740
1741 if (*head == *tail)
1742 *tail = NULL;
1743
1744 return *head != NULL;
1745}
1746
1747static void *setup_object(struct kmem_cache *s, struct page *page,
1748 void *object)
1749{
1750 setup_object_debug(s, page, object);
1751 object = kasan_init_slab_obj(s, object);
1752 if (unlikely(s->ctor)) {
1753 kasan_unpoison_object_data(s, object);
1754 s->ctor(object);
1755 kasan_poison_object_data(s, object);
1756 }
1757 return object;
1758}
1759
1760
1761
1762
1763static inline struct page *alloc_slab_page(struct kmem_cache *s,
1764 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1765{
1766 struct page *page;
1767 unsigned int order = oo_order(oo);
1768
1769 if (node == NUMA_NO_NODE)
1770 page = alloc_pages(flags, order);
1771 else
1772 page = __alloc_pages_node(node, flags, order);
1773
1774 return page;
1775}
1776
1777#ifdef CONFIG_SLAB_FREELIST_RANDOM
1778
1779static int init_cache_random_seq(struct kmem_cache *s)
1780{
1781 unsigned int count = oo_objects(s->oo);
1782 int err;
1783
1784
1785 if (s->random_seq)
1786 return 0;
1787
1788 err = cache_random_seq_create(s, count, GFP_KERNEL);
1789 if (err) {
1790 pr_err("SLUB: Unable to initialize free list for %s\n",
1791 s->name);
1792 return err;
1793 }
1794
1795
1796 if (s->random_seq) {
1797 unsigned int i;
1798
1799 for (i = 0; i < count; i++)
1800 s->random_seq[i] *= s->size;
1801 }
1802 return 0;
1803}
1804
1805
1806static void __init init_freelist_randomization(void)
1807{
1808 struct kmem_cache *s;
1809
1810 mutex_lock(&slab_mutex);
1811
1812 list_for_each_entry(s, &slab_caches, list)
1813 init_cache_random_seq(s);
1814
1815 mutex_unlock(&slab_mutex);
1816}
1817
1818
1819static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1820 unsigned long *pos, void *start,
1821 unsigned long page_limit,
1822 unsigned long freelist_count)
1823{
1824 unsigned int idx;
1825
1826
1827
1828
1829
1830 do {
1831 idx = s->random_seq[*pos];
1832 *pos += 1;
1833 if (*pos >= freelist_count)
1834 *pos = 0;
1835 } while (unlikely(idx >= page_limit));
1836
1837 return (char *)start + idx;
1838}
1839
1840
1841static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1842{
1843 void *start;
1844 void *cur;
1845 void *next;
1846 unsigned long idx, pos, page_limit, freelist_count;
1847
1848 if (page->objects < 2 || !s->random_seq)
1849 return false;
1850
1851 freelist_count = oo_objects(s->oo);
1852 pos = get_random_int() % freelist_count;
1853
1854 page_limit = page->objects * s->size;
1855 start = fixup_red_left(s, page_address(page));
1856
1857
1858 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1859 freelist_count);
1860 cur = setup_object(s, page, cur);
1861 page->freelist = cur;
1862
1863 for (idx = 1; idx < page->objects; idx++) {
1864 next = next_freelist_entry(s, page, &pos, start, page_limit,
1865 freelist_count);
1866 next = setup_object(s, page, next);
1867 set_freepointer(s, cur, next);
1868 cur = next;
1869 }
1870 set_freepointer(s, cur, NULL);
1871
1872 return true;
1873}
1874#else
1875static inline int init_cache_random_seq(struct kmem_cache *s)
1876{
1877 return 0;
1878}
1879static inline void init_freelist_randomization(void) { }
1880static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1881{
1882 return false;
1883}
1884#endif
1885
1886static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1887{
1888 struct page *page;
1889 struct kmem_cache_order_objects oo = s->oo;
1890 gfp_t alloc_gfp;
1891 void *start, *p, *next;
1892 int idx;
1893 bool shuffle;
1894
1895 flags &= gfp_allowed_mask;
1896
1897 flags |= s->allocflags;
1898
1899
1900
1901
1902
1903 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1904 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1905 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1906
1907 page = alloc_slab_page(s, alloc_gfp, node, oo);
1908 if (unlikely(!page)) {
1909 oo = s->min;
1910 alloc_gfp = flags;
1911
1912
1913
1914
1915 page = alloc_slab_page(s, alloc_gfp, node, oo);
1916 if (unlikely(!page))
1917 goto out;
1918 stat(s, ORDER_FALLBACK);
1919 }
1920
1921 page->objects = oo_objects(oo);
1922
1923 account_slab_page(page, oo_order(oo), s, flags);
1924
1925 page->slab_cache = s;
1926 __SetPageSlab(page);
1927 if (page_is_pfmemalloc(page))
1928 SetPageSlabPfmemalloc(page);
1929
1930 kasan_poison_slab(page);
1931
1932 start = page_address(page);
1933
1934 setup_page_debug(s, page, start);
1935
1936 shuffle = shuffle_freelist(s, page);
1937
1938 if (!shuffle) {
1939 start = fixup_red_left(s, start);
1940 start = setup_object(s, page, start);
1941 page->freelist = start;
1942 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1943 next = p + s->size;
1944 next = setup_object(s, page, next);
1945 set_freepointer(s, p, next);
1946 p = next;
1947 }
1948 set_freepointer(s, p, NULL);
1949 }
1950
1951 page->inuse = page->objects;
1952 page->frozen = 1;
1953
1954out:
1955 if (!page)
1956 return NULL;
1957
1958 inc_slabs_node(s, page_to_nid(page), page->objects);
1959
1960 return page;
1961}
1962
1963static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1964{
1965 if (unlikely(flags & GFP_SLAB_BUG_MASK))
1966 flags = kmalloc_fix_flags(flags);
1967
1968 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
1969
1970 return allocate_slab(s,
1971 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1972}
1973
1974static void __free_slab(struct kmem_cache *s, struct page *page)
1975{
1976 int order = compound_order(page);
1977 int pages = 1 << order;
1978
1979 if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1980 void *p;
1981
1982 slab_pad_check(s, page);
1983 for_each_object(p, s, page_address(page),
1984 page->objects)
1985 check_object(s, page, p, SLUB_RED_INACTIVE);
1986 }
1987
1988 __ClearPageSlabPfmemalloc(page);
1989 __ClearPageSlab(page);
1990
1991 page->slab_cache = NULL;
1992 if (current->reclaim_state)
1993 current->reclaim_state->reclaimed_slab += pages;
1994 unaccount_slab_page(page, order, s);
1995 __free_pages(page, order);
1996}
1997
1998static void rcu_free_slab(struct rcu_head *h)
1999{
2000 struct page *page = container_of(h, struct page, rcu_head);
2001
2002 __free_slab(page->slab_cache, page);
2003}
2004
2005static void free_slab(struct kmem_cache *s, struct page *page)
2006{
2007 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
2008 call_rcu(&page->rcu_head, rcu_free_slab);
2009 } else
2010 __free_slab(s, page);
2011}
2012
2013static void discard_slab(struct kmem_cache *s, struct page *page)
2014{
2015 dec_slabs_node(s, page_to_nid(page), page->objects);
2016 free_slab(s, page);
2017}
2018
2019
2020
2021
2022static inline void
2023__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
2024{
2025 n->nr_partial++;
2026 if (tail == DEACTIVATE_TO_TAIL)
2027 list_add_tail(&page->slab_list, &n->partial);
2028 else
2029 list_add(&page->slab_list, &n->partial);
2030}
2031
2032static inline void add_partial(struct kmem_cache_node *n,
2033 struct page *page, int tail)
2034{
2035 lockdep_assert_held(&n->list_lock);
2036 __add_partial(n, page, tail);
2037}
2038
2039static inline void remove_partial(struct kmem_cache_node *n,
2040 struct page *page)
2041{
2042 lockdep_assert_held(&n->list_lock);
2043 list_del(&page->slab_list);
2044 n->nr_partial--;
2045}
2046
2047
2048
2049
2050
2051
2052
2053static inline void *acquire_slab(struct kmem_cache *s,
2054 struct kmem_cache_node *n, struct page *page,
2055 int mode, int *objects)
2056{
2057 void *freelist;
2058 unsigned long counters;
2059 struct page new;
2060
2061 lockdep_assert_held(&n->list_lock);
2062
2063
2064
2065
2066
2067
2068 freelist = page->freelist;
2069 counters = page->counters;
2070 new.counters = counters;
2071 *objects = new.objects - new.inuse;
2072 if (mode) {
2073 new.inuse = page->objects;
2074 new.freelist = NULL;
2075 } else {
2076 new.freelist = freelist;
2077 }
2078
2079 VM_BUG_ON(new.frozen);
2080 new.frozen = 1;
2081
2082 if (!__cmpxchg_double_slab(s, page,
2083 freelist, counters,
2084 new.freelist, new.counters,
2085 "acquire_slab"))
2086 return NULL;
2087
2088 remove_partial(n, page);
2089 WARN_ON(!freelist);
2090 return freelist;
2091}
2092
2093#ifdef CONFIG_SLUB_CPU_PARTIAL
2094static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
2095#else
2096static inline void put_cpu_partial(struct kmem_cache *s, struct page *page,
2097 int drain) { }
2098#endif
2099static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
2100
2101
2102
2103
2104static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
2105 struct page **ret_page, gfp_t gfpflags)
2106{
2107 struct page *page, *page2;
2108 void *object = NULL;
2109 unsigned int available = 0;
2110 unsigned long flags;
2111 int objects;
2112
2113
2114
2115
2116
2117
2118
2119 if (!n || !n->nr_partial)
2120 return NULL;
2121
2122 spin_lock_irqsave(&n->list_lock, flags);
2123 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
2124 void *t;
2125
2126 if (!pfmemalloc_match(page, gfpflags))
2127 continue;
2128
2129 t = acquire_slab(s, n, page, object == NULL, &objects);
2130 if (!t)
2131 break;
2132
2133 available += objects;
2134 if (!object) {
2135 *ret_page = page;
2136 stat(s, ALLOC_FROM_PARTIAL);
2137 object = t;
2138 } else {
2139 put_cpu_partial(s, page, 0);
2140 stat(s, CPU_PARTIAL_NODE);
2141 }
2142 if (!kmem_cache_has_cpu_partial(s)
2143 || available > slub_cpu_partial(s) / 2)
2144 break;
2145
2146 }
2147 spin_unlock_irqrestore(&n->list_lock, flags);
2148 return object;
2149}
2150
2151
2152
2153
2154static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2155 struct page **ret_page)
2156{
2157#ifdef CONFIG_NUMA
2158 struct zonelist *zonelist;
2159 struct zoneref *z;
2160 struct zone *zone;
2161 enum zone_type highest_zoneidx = gfp_zone(flags);
2162 void *object;
2163 unsigned int cpuset_mems_cookie;
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183 if (!s->remote_node_defrag_ratio ||
2184 get_cycles() % 1024 > s->remote_node_defrag_ratio)
2185 return NULL;
2186
2187 do {
2188 cpuset_mems_cookie = read_mems_allowed_begin();
2189 zonelist = node_zonelist(mempolicy_slab_node(), flags);
2190 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2191 struct kmem_cache_node *n;
2192
2193 n = get_node(s, zone_to_nid(zone));
2194
2195 if (n && cpuset_zone_allowed(zone, flags) &&
2196 n->nr_partial > s->min_partial) {
2197 object = get_partial_node(s, n, ret_page, flags);
2198 if (object) {
2199
2200
2201
2202
2203
2204
2205
2206 return object;
2207 }
2208 }
2209 }
2210 } while (read_mems_allowed_retry(cpuset_mems_cookie));
2211#endif
2212 return NULL;
2213}
2214
2215
2216
2217
2218static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2219 struct page **ret_page)
2220{
2221 void *object;
2222 int searchnode = node;
2223
2224 if (node == NUMA_NO_NODE)
2225 searchnode = numa_mem_id();
2226
2227 object = get_partial_node(s, get_node(s, searchnode), ret_page, flags);
2228 if (object || node != NUMA_NO_NODE)
2229 return object;
2230
2231 return get_any_partial(s, flags, ret_page);
2232}
2233
2234#ifdef CONFIG_PREEMPTION
2235
2236
2237
2238
2239
2240#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2241#else
2242
2243
2244
2245
2246#define TID_STEP 1
2247#endif
2248
2249static inline unsigned long next_tid(unsigned long tid)
2250{
2251 return tid + TID_STEP;
2252}
2253
2254#ifdef SLUB_DEBUG_CMPXCHG
2255static inline unsigned int tid_to_cpu(unsigned long tid)
2256{
2257 return tid % TID_STEP;
2258}
2259
2260static inline unsigned long tid_to_event(unsigned long tid)
2261{
2262 return tid / TID_STEP;
2263}
2264#endif
2265
2266static inline unsigned int init_tid(int cpu)
2267{
2268 return cpu;
2269}
2270
2271static inline void note_cmpxchg_failure(const char *n,
2272 const struct kmem_cache *s, unsigned long tid)
2273{
2274#ifdef SLUB_DEBUG_CMPXCHG
2275 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2276
2277 pr_info("%s %s: cmpxchg redo ", n, s->name);
2278
2279#ifdef CONFIG_PREEMPTION
2280 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2281 pr_warn("due to cpu change %d -> %d\n",
2282 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2283 else
2284#endif
2285 if (tid_to_event(tid) != tid_to_event(actual_tid))
2286 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2287 tid_to_event(tid), tid_to_event(actual_tid));
2288 else
2289 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2290 actual_tid, tid, next_tid(tid));
2291#endif
2292 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2293}
2294
2295static void init_kmem_cache_cpus(struct kmem_cache *s)
2296{
2297 int cpu;
2298 struct kmem_cache_cpu *c;
2299
2300 for_each_possible_cpu(cpu) {
2301 c = per_cpu_ptr(s->cpu_slab, cpu);
2302 local_lock_init(&c->lock);
2303 c->tid = init_tid(cpu);
2304 }
2305}
2306
2307
2308
2309
2310
2311
2312
2313static void deactivate_slab(struct kmem_cache *s, struct page *page,
2314 void *freelist)
2315{
2316 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2317 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2318 int lock = 0, free_delta = 0;
2319 enum slab_modes l = M_NONE, m = M_NONE;
2320 void *nextfree, *freelist_iter, *freelist_tail;
2321 int tail = DEACTIVATE_TO_HEAD;
2322 unsigned long flags = 0;
2323 struct page new;
2324 struct page old;
2325
2326 if (page->freelist) {
2327 stat(s, DEACTIVATE_REMOTE_FREES);
2328 tail = DEACTIVATE_TO_TAIL;
2329 }
2330
2331
2332
2333
2334
2335 freelist_tail = NULL;
2336 freelist_iter = freelist;
2337 while (freelist_iter) {
2338 nextfree = get_freepointer(s, freelist_iter);
2339
2340
2341
2342
2343
2344
2345 if (freelist_corrupted(s, page, &freelist_iter, nextfree))
2346 break;
2347
2348 freelist_tail = freelist_iter;
2349 free_delta++;
2350
2351 freelist_iter = nextfree;
2352 }
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370redo:
2371
2372 old.freelist = READ_ONCE(page->freelist);
2373 old.counters = READ_ONCE(page->counters);
2374 VM_BUG_ON(!old.frozen);
2375
2376
2377 new.counters = old.counters;
2378 if (freelist_tail) {
2379 new.inuse -= free_delta;
2380 set_freepointer(s, freelist_tail, old.freelist);
2381 new.freelist = freelist;
2382 } else
2383 new.freelist = old.freelist;
2384
2385 new.frozen = 0;
2386
2387 if (!new.inuse && n->nr_partial >= s->min_partial)
2388 m = M_FREE;
2389 else if (new.freelist) {
2390 m = M_PARTIAL;
2391 if (!lock) {
2392 lock = 1;
2393
2394
2395
2396
2397
2398 spin_lock_irqsave(&n->list_lock, flags);
2399 }
2400 } else {
2401 m = M_FULL;
2402 if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
2403 lock = 1;
2404
2405
2406
2407
2408
2409 spin_lock_irqsave(&n->list_lock, flags);
2410 }
2411 }
2412
2413 if (l != m) {
2414 if (l == M_PARTIAL)
2415 remove_partial(n, page);
2416 else if (l == M_FULL)
2417 remove_full(s, n, page);
2418
2419 if (m == M_PARTIAL)
2420 add_partial(n, page, tail);
2421 else if (m == M_FULL)
2422 add_full(s, n, page);
2423 }
2424
2425 l = m;
2426 if (!cmpxchg_double_slab(s, page,
2427 old.freelist, old.counters,
2428 new.freelist, new.counters,
2429 "unfreezing slab"))
2430 goto redo;
2431
2432 if (lock)
2433 spin_unlock_irqrestore(&n->list_lock, flags);
2434
2435 if (m == M_PARTIAL)
2436 stat(s, tail);
2437 else if (m == M_FULL)
2438 stat(s, DEACTIVATE_FULL);
2439 else if (m == M_FREE) {
2440 stat(s, DEACTIVATE_EMPTY);
2441 discard_slab(s, page);
2442 stat(s, FREE_SLAB);
2443 }
2444}
2445
2446#ifdef CONFIG_SLUB_CPU_PARTIAL
2447static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
2448{
2449 struct kmem_cache_node *n = NULL, *n2 = NULL;
2450 struct page *page, *discard_page = NULL;
2451 unsigned long flags = 0;
2452
2453 while (partial_page) {
2454 struct page new;
2455 struct page old;
2456
2457 page = partial_page;
2458 partial_page = page->next;
2459
2460 n2 = get_node(s, page_to_nid(page));
2461 if (n != n2) {
2462 if (n)
2463 spin_unlock_irqrestore(&n->list_lock, flags);
2464
2465 n = n2;
2466 spin_lock_irqsave(&n->list_lock, flags);
2467 }
2468
2469 do {
2470
2471 old.freelist = page->freelist;
2472 old.counters = page->counters;
2473 VM_BUG_ON(!old.frozen);
2474
2475 new.counters = old.counters;
2476 new.freelist = old.freelist;
2477
2478 new.frozen = 0;
2479
2480 } while (!__cmpxchg_double_slab(s, page,
2481 old.freelist, old.counters,
2482 new.freelist, new.counters,
2483 "unfreezing slab"));
2484
2485 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2486 page->next = discard_page;
2487 discard_page = page;
2488 } else {
2489 add_partial(n, page, DEACTIVATE_TO_TAIL);
2490 stat(s, FREE_ADD_PARTIAL);
2491 }
2492 }
2493
2494 if (n)
2495 spin_unlock_irqrestore(&n->list_lock, flags);
2496
2497 while (discard_page) {
2498 page = discard_page;
2499 discard_page = discard_page->next;
2500
2501 stat(s, DEACTIVATE_EMPTY);
2502 discard_slab(s, page);
2503 stat(s, FREE_SLAB);
2504 }
2505}
2506
2507
2508
2509
2510static void unfreeze_partials(struct kmem_cache *s)
2511{
2512 struct page *partial_page;
2513 unsigned long flags;
2514
2515 local_lock_irqsave(&s->cpu_slab->lock, flags);
2516 partial_page = this_cpu_read(s->cpu_slab->partial);
2517 this_cpu_write(s->cpu_slab->partial, NULL);
2518 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2519
2520 if (partial_page)
2521 __unfreeze_partials(s, partial_page);
2522}
2523
2524static void unfreeze_partials_cpu(struct kmem_cache *s,
2525 struct kmem_cache_cpu *c)
2526{
2527 struct page *partial_page;
2528
2529 partial_page = slub_percpu_partial(c);
2530 c->partial = NULL;
2531
2532 if (partial_page)
2533 __unfreeze_partials(s, partial_page);
2534}
2535
2536
2537
2538
2539
2540
2541
2542
2543static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2544{
2545 struct page *oldpage;
2546 struct page *page_to_unfreeze = NULL;
2547 unsigned long flags;
2548 int pages = 0;
2549 int pobjects = 0;
2550
2551 local_lock_irqsave(&s->cpu_slab->lock, flags);
2552
2553 oldpage = this_cpu_read(s->cpu_slab->partial);
2554
2555 if (oldpage) {
2556 if (drain && oldpage->pobjects > slub_cpu_partial(s)) {
2557
2558
2559
2560
2561
2562 page_to_unfreeze = oldpage;
2563 oldpage = NULL;
2564 } else {
2565 pobjects = oldpage->pobjects;
2566 pages = oldpage->pages;
2567 }
2568 }
2569
2570 pages++;
2571 pobjects += page->objects - page->inuse;
2572
2573 page->pages = pages;
2574 page->pobjects = pobjects;
2575 page->next = oldpage;
2576
2577 this_cpu_write(s->cpu_slab->partial, page);
2578
2579 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2580
2581 if (page_to_unfreeze) {
2582 __unfreeze_partials(s, page_to_unfreeze);
2583 stat(s, CPU_PARTIAL_DRAIN);
2584 }
2585}
2586
2587#else
2588
2589static inline void unfreeze_partials(struct kmem_cache *s) { }
2590static inline void unfreeze_partials_cpu(struct kmem_cache *s,
2591 struct kmem_cache_cpu *c) { }
2592
2593#endif
2594
2595static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2596{
2597 unsigned long flags;
2598 struct page *page;
2599 void *freelist;
2600
2601 local_lock_irqsave(&s->cpu_slab->lock, flags);
2602
2603 page = c->page;
2604 freelist = c->freelist;
2605
2606 c->page = NULL;
2607 c->freelist = NULL;
2608 c->tid = next_tid(c->tid);
2609
2610 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2611
2612 if (page) {
2613 deactivate_slab(s, page, freelist);
2614 stat(s, CPUSLAB_FLUSH);
2615 }
2616}
2617
2618static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2619{
2620 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2621 void *freelist = c->freelist;
2622 struct page *page = c->page;
2623
2624 c->page = NULL;
2625 c->freelist = NULL;
2626 c->tid = next_tid(c->tid);
2627
2628 if (page) {
2629 deactivate_slab(s, page, freelist);
2630 stat(s, CPUSLAB_FLUSH);
2631 }
2632
2633 unfreeze_partials_cpu(s, c);
2634}
2635
2636struct slub_flush_work {
2637 struct work_struct work;
2638 struct kmem_cache *s;
2639 bool skip;
2640};
2641
2642
2643
2644
2645
2646
2647static void flush_cpu_slab(struct work_struct *w)
2648{
2649 struct kmem_cache *s;
2650 struct kmem_cache_cpu *c;
2651 struct slub_flush_work *sfw;
2652
2653 sfw = container_of(w, struct slub_flush_work, work);
2654
2655 s = sfw->s;
2656 c = this_cpu_ptr(s->cpu_slab);
2657
2658 if (c->page)
2659 flush_slab(s, c);
2660
2661 unfreeze_partials(s);
2662}
2663
2664static bool has_cpu_slab(int cpu, struct kmem_cache *s)
2665{
2666 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2667
2668 return c->page || slub_percpu_partial(c);
2669}
2670
2671static DEFINE_MUTEX(flush_lock);
2672static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
2673
2674static void flush_all_cpus_locked(struct kmem_cache *s)
2675{
2676 struct slub_flush_work *sfw;
2677 unsigned int cpu;
2678
2679 lockdep_assert_cpus_held();
2680 mutex_lock(&flush_lock);
2681
2682 for_each_online_cpu(cpu) {
2683 sfw = &per_cpu(slub_flush, cpu);
2684 if (!has_cpu_slab(cpu, s)) {
2685 sfw->skip = true;
2686 continue;
2687 }
2688 INIT_WORK(&sfw->work, flush_cpu_slab);
2689 sfw->skip = false;
2690 sfw->s = s;
2691 schedule_work_on(cpu, &sfw->work);
2692 }
2693
2694 for_each_online_cpu(cpu) {
2695 sfw = &per_cpu(slub_flush, cpu);
2696 if (sfw->skip)
2697 continue;
2698 flush_work(&sfw->work);
2699 }
2700
2701 mutex_unlock(&flush_lock);
2702}
2703
2704static void flush_all(struct kmem_cache *s)
2705{
2706 cpus_read_lock();
2707 flush_all_cpus_locked(s);
2708 cpus_read_unlock();
2709}
2710
2711
2712
2713
2714
2715static int slub_cpu_dead(unsigned int cpu)
2716{
2717 struct kmem_cache *s;
2718
2719 mutex_lock(&slab_mutex);
2720 list_for_each_entry(s, &slab_caches, list)
2721 __flush_cpu_slab(s, cpu);
2722 mutex_unlock(&slab_mutex);
2723 return 0;
2724}
2725
2726
2727
2728
2729
2730static inline int node_match(struct page *page, int node)
2731{
2732#ifdef CONFIG_NUMA
2733 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2734 return 0;
2735#endif
2736 return 1;
2737}
2738
2739#ifdef CONFIG_SLUB_DEBUG
2740static int count_free(struct page *page)
2741{
2742 return page->objects - page->inuse;
2743}
2744
2745static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2746{
2747 return atomic_long_read(&n->total_objects);
2748}
2749#endif
2750
2751#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2752static unsigned long count_partial(struct kmem_cache_node *n,
2753 int (*get_count)(struct page *))
2754{
2755 unsigned long flags;
2756 unsigned long x = 0;
2757 struct page *page;
2758
2759 spin_lock_irqsave(&n->list_lock, flags);
2760 list_for_each_entry(page, &n->partial, slab_list)
2761 x += get_count(page);
2762 spin_unlock_irqrestore(&n->list_lock, flags);
2763 return x;
2764}
2765#endif
2766
2767static noinline void
2768slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2769{
2770#ifdef CONFIG_SLUB_DEBUG
2771 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2772 DEFAULT_RATELIMIT_BURST);
2773 int node;
2774 struct kmem_cache_node *n;
2775
2776 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2777 return;
2778
2779 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2780 nid, gfpflags, &gfpflags);
2781 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2782 s->name, s->object_size, s->size, oo_order(s->oo),
2783 oo_order(s->min));
2784
2785 if (oo_order(s->min) > get_order(s->object_size))
2786 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2787 s->name);
2788
2789 for_each_kmem_cache_node(s, node, n) {
2790 unsigned long nr_slabs;
2791 unsigned long nr_objs;
2792 unsigned long nr_free;
2793
2794 nr_free = count_partial(n, count_free);
2795 nr_slabs = node_nr_slabs(n);
2796 nr_objs = node_nr_objs(n);
2797
2798 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2799 node, nr_slabs, nr_objs, nr_free);
2800 }
2801#endif
2802}
2803
2804static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2805{
2806 if (unlikely(PageSlabPfmemalloc(page)))
2807 return gfp_pfmemalloc_allowed(gfpflags);
2808
2809 return true;
2810}
2811
2812
2813
2814
2815
2816
2817static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags)
2818{
2819 if (unlikely(__PageSlabPfmemalloc(page)))
2820 return gfp_pfmemalloc_allowed(gfpflags);
2821
2822 return true;
2823}
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2834{
2835 struct page new;
2836 unsigned long counters;
2837 void *freelist;
2838
2839 lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
2840
2841 do {
2842 freelist = page->freelist;
2843 counters = page->counters;
2844
2845 new.counters = counters;
2846 VM_BUG_ON(!new.frozen);
2847
2848 new.inuse = page->objects;
2849 new.frozen = freelist != NULL;
2850
2851 } while (!__cmpxchg_double_slab(s, page,
2852 freelist, counters,
2853 NULL, new.counters,
2854 "get_freelist"));
2855
2856 return freelist;
2857}
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2879 unsigned long addr, struct kmem_cache_cpu *c)
2880{
2881 void *freelist;
2882 struct page *page;
2883 unsigned long flags;
2884
2885 stat(s, ALLOC_SLOWPATH);
2886
2887reread_page:
2888
2889 page = READ_ONCE(c->page);
2890 if (!page) {
2891
2892
2893
2894
2895 if (unlikely(node != NUMA_NO_NODE &&
2896 !node_isset(node, slab_nodes)))
2897 node = NUMA_NO_NODE;
2898 goto new_slab;
2899 }
2900redo:
2901
2902 if (unlikely(!node_match(page, node))) {
2903
2904
2905
2906
2907 if (!node_isset(node, slab_nodes)) {
2908 node = NUMA_NO_NODE;
2909 goto redo;
2910 } else {
2911 stat(s, ALLOC_NODE_MISMATCH);
2912 goto deactivate_slab;
2913 }
2914 }
2915
2916
2917
2918
2919
2920
2921 if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags)))
2922 goto deactivate_slab;
2923
2924
2925 local_lock_irqsave(&s->cpu_slab->lock, flags);
2926 if (unlikely(page != c->page)) {
2927 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2928 goto reread_page;
2929 }
2930 freelist = c->freelist;
2931 if (freelist)
2932 goto load_freelist;
2933
2934 freelist = get_freelist(s, page);
2935
2936 if (!freelist) {
2937 c->page = NULL;
2938 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2939 stat(s, DEACTIVATE_BYPASS);
2940 goto new_slab;
2941 }
2942
2943 stat(s, ALLOC_REFILL);
2944
2945load_freelist:
2946
2947 lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
2948
2949
2950
2951
2952
2953
2954 VM_BUG_ON(!c->page->frozen);
2955 c->freelist = get_freepointer(s, freelist);
2956 c->tid = next_tid(c->tid);
2957 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2958 return freelist;
2959
2960deactivate_slab:
2961
2962 local_lock_irqsave(&s->cpu_slab->lock, flags);
2963 if (page != c->page) {
2964 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2965 goto reread_page;
2966 }
2967 freelist = c->freelist;
2968 c->page = NULL;
2969 c->freelist = NULL;
2970 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2971 deactivate_slab(s, page, freelist);
2972
2973new_slab:
2974
2975 if (slub_percpu_partial(c)) {
2976 local_lock_irqsave(&s->cpu_slab->lock, flags);
2977 if (unlikely(c->page)) {
2978 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2979 goto reread_page;
2980 }
2981 if (unlikely(!slub_percpu_partial(c))) {
2982 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2983
2984 goto new_objects;
2985 }
2986
2987 page = c->page = slub_percpu_partial(c);
2988 slub_set_percpu_partial(c, page);
2989 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2990 stat(s, CPU_PARTIAL_ALLOC);
2991 goto redo;
2992 }
2993
2994new_objects:
2995
2996 freelist = get_partial(s, gfpflags, node, &page);
2997 if (freelist)
2998 goto check_new_page;
2999
3000 slub_put_cpu_ptr(s->cpu_slab);
3001 page = new_slab(s, gfpflags, node);
3002 c = slub_get_cpu_ptr(s->cpu_slab);
3003
3004 if (unlikely(!page)) {
3005 slab_out_of_memory(s, gfpflags, node);
3006 return NULL;
3007 }
3008
3009
3010
3011
3012
3013 freelist = page->freelist;
3014 page->freelist = NULL;
3015
3016 stat(s, ALLOC_SLAB);
3017
3018check_new_page:
3019
3020 if (kmem_cache_debug(s)) {
3021 if (!alloc_debug_processing(s, page, freelist, addr)) {
3022
3023 goto new_slab;
3024 } else {
3025
3026
3027
3028
3029 goto return_single;
3030 }
3031 }
3032
3033 if (unlikely(!pfmemalloc_match(page, gfpflags)))
3034
3035
3036
3037
3038 goto return_single;
3039
3040retry_load_page:
3041
3042 local_lock_irqsave(&s->cpu_slab->lock, flags);
3043 if (unlikely(c->page)) {
3044 void *flush_freelist = c->freelist;
3045 struct page *flush_page = c->page;
3046
3047 c->page = NULL;
3048 c->freelist = NULL;
3049 c->tid = next_tid(c->tid);
3050
3051 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3052
3053 deactivate_slab(s, flush_page, flush_freelist);
3054
3055 stat(s, CPUSLAB_FLUSH);
3056
3057 goto retry_load_page;
3058 }
3059 c->page = page;
3060
3061 goto load_freelist;
3062
3063return_single:
3064
3065 deactivate_slab(s, page, get_freepointer(s, freelist));
3066 return freelist;
3067}
3068
3069
3070
3071
3072
3073
3074static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
3075 unsigned long addr, struct kmem_cache_cpu *c)
3076{
3077 void *p;
3078
3079#ifdef CONFIG_PREEMPT_COUNT
3080
3081
3082
3083
3084
3085 c = slub_get_cpu_ptr(s->cpu_slab);
3086#endif
3087
3088 p = ___slab_alloc(s, gfpflags, node, addr, c);
3089#ifdef CONFIG_PREEMPT_COUNT
3090 slub_put_cpu_ptr(s->cpu_slab);
3091#endif
3092 return p;
3093}
3094
3095
3096
3097
3098
3099static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
3100 void *obj)
3101{
3102 if (unlikely(slab_want_init_on_free(s)) && obj)
3103 memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
3104 0, sizeof(void *));
3105}
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117static __always_inline void *slab_alloc_node(struct kmem_cache *s,
3118 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
3119{
3120 void *object;
3121 struct kmem_cache_cpu *c;
3122 struct page *page;
3123 unsigned long tid;
3124 struct obj_cgroup *objcg = NULL;
3125 bool init = false;
3126
3127 s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
3128 if (!s)
3129 return NULL;
3130
3131 object = kfence_alloc(s, orig_size, gfpflags);
3132 if (unlikely(object))
3133 goto out;
3134
3135redo:
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148 c = raw_cpu_ptr(s->cpu_slab);
3149 tid = READ_ONCE(c->tid);
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159 barrier();
3160
3161
3162
3163
3164
3165
3166
3167
3168 object = c->freelist;
3169 page = c->page;
3170
3171
3172
3173
3174
3175
3176
3177 if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
3178 unlikely(!object || !page || !node_match(page, node))) {
3179 object = __slab_alloc(s, gfpflags, node, addr, c);
3180 } else {
3181 void *next_object = get_freepointer_safe(s, object);
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197 if (unlikely(!this_cpu_cmpxchg_double(
3198 s->cpu_slab->freelist, s->cpu_slab->tid,
3199 object, tid,
3200 next_object, next_tid(tid)))) {
3201
3202 note_cmpxchg_failure("slab_alloc", s, tid);
3203 goto redo;
3204 }
3205 prefetch_freepointer(s, next_object);
3206 stat(s, ALLOC_FASTPATH);
3207 }
3208
3209 maybe_wipe_obj_freeptr(s, object);
3210 init = slab_want_init_on_alloc(gfpflags, s);
3211
3212out:
3213 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
3214
3215 return object;
3216}
3217
3218static __always_inline void *slab_alloc(struct kmem_cache *s,
3219 gfp_t gfpflags, unsigned long addr, size_t orig_size)
3220{
3221 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
3222}
3223
3224void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
3225{
3226 void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
3227
3228 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
3229 s->size, gfpflags);
3230
3231 return ret;
3232}
3233EXPORT_SYMBOL(kmem_cache_alloc);
3234
3235#ifdef CONFIG_TRACING
3236void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
3237{
3238 void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
3239 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
3240 ret = kasan_kmalloc(s, ret, size, gfpflags);
3241 return ret;
3242}
3243EXPORT_SYMBOL(kmem_cache_alloc_trace);
3244#endif
3245
3246#ifdef CONFIG_NUMA
3247void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
3248{
3249 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
3250
3251 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3252 s->object_size, s->size, gfpflags, node);
3253
3254 return ret;
3255}
3256EXPORT_SYMBOL(kmem_cache_alloc_node);
3257
3258#ifdef CONFIG_TRACING
3259void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
3260 gfp_t gfpflags,
3261 int node, size_t size)
3262{
3263 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
3264
3265 trace_kmalloc_node(_RET_IP_, ret,
3266 size, s->size, gfpflags, node);
3267
3268 ret = kasan_kmalloc(s, ret, size, gfpflags);
3269 return ret;
3270}
3271EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3272#endif
3273#endif
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283static void __slab_free(struct kmem_cache *s, struct page *page,
3284 void *head, void *tail, int cnt,
3285 unsigned long addr)
3286
3287{
3288 void *prior;
3289 int was_frozen;
3290 struct page new;
3291 unsigned long counters;
3292 struct kmem_cache_node *n = NULL;
3293 unsigned long flags;
3294
3295 stat(s, FREE_SLOWPATH);
3296
3297 if (kfence_free(head))
3298 return;
3299
3300 if (kmem_cache_debug(s) &&
3301 !free_debug_processing(s, page, head, tail, cnt, addr))
3302 return;
3303
3304 do {
3305 if (unlikely(n)) {
3306 spin_unlock_irqrestore(&n->list_lock, flags);
3307 n = NULL;
3308 }
3309 prior = page->freelist;
3310 counters = page->counters;
3311 set_freepointer(s, tail, prior);
3312 new.counters = counters;
3313 was_frozen = new.frozen;
3314 new.inuse -= cnt;
3315 if ((!new.inuse || !prior) && !was_frozen) {
3316
3317 if (kmem_cache_has_cpu_partial(s) && !prior) {
3318
3319
3320
3321
3322
3323
3324
3325 new.frozen = 1;
3326
3327 } else {
3328
3329 n = get_node(s, page_to_nid(page));
3330
3331
3332
3333
3334
3335
3336
3337
3338 spin_lock_irqsave(&n->list_lock, flags);
3339
3340 }
3341 }
3342
3343 } while (!cmpxchg_double_slab(s, page,
3344 prior, counters,
3345 head, new.counters,
3346 "__slab_free"));
3347
3348 if (likely(!n)) {
3349
3350 if (likely(was_frozen)) {
3351
3352
3353
3354
3355 stat(s, FREE_FROZEN);
3356 } else if (new.frozen) {
3357
3358
3359
3360
3361 put_cpu_partial(s, page, 1);
3362 stat(s, CPU_PARTIAL_FREE);
3363 }
3364
3365 return;
3366 }
3367
3368 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3369 goto slab_empty;
3370
3371
3372
3373
3374
3375 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3376 remove_full(s, n, page);
3377 add_partial(n, page, DEACTIVATE_TO_TAIL);
3378 stat(s, FREE_ADD_PARTIAL);
3379 }
3380 spin_unlock_irqrestore(&n->list_lock, flags);
3381 return;
3382
3383slab_empty:
3384 if (prior) {
3385
3386
3387
3388 remove_partial(n, page);
3389 stat(s, FREE_REMOVE_PARTIAL);
3390 } else {
3391
3392 remove_full(s, n, page);
3393 }
3394
3395 spin_unlock_irqrestore(&n->list_lock, flags);
3396 stat(s, FREE_SLAB);
3397 discard_slab(s, page);
3398}
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415static __always_inline void do_slab_free(struct kmem_cache *s,
3416 struct page *page, void *head, void *tail,
3417 int cnt, unsigned long addr)
3418{
3419 void *tail_obj = tail ? : head;
3420 struct kmem_cache_cpu *c;
3421 unsigned long tid;
3422
3423
3424 if (!tail)
3425 memcg_slab_free_hook(s, &head, 1);
3426redo:
3427
3428
3429
3430
3431
3432
3433 c = raw_cpu_ptr(s->cpu_slab);
3434 tid = READ_ONCE(c->tid);
3435
3436
3437 barrier();
3438
3439 if (likely(page == c->page)) {
3440#ifndef CONFIG_PREEMPT_RT
3441 void **freelist = READ_ONCE(c->freelist);
3442
3443 set_freepointer(s, tail_obj, freelist);
3444
3445 if (unlikely(!this_cpu_cmpxchg_double(
3446 s->cpu_slab->freelist, s->cpu_slab->tid,
3447 freelist, tid,
3448 head, next_tid(tid)))) {
3449
3450 note_cmpxchg_failure("slab_free", s, tid);
3451 goto redo;
3452 }
3453#else
3454
3455
3456
3457
3458
3459
3460
3461 void **freelist;
3462
3463 local_lock(&s->cpu_slab->lock);
3464 c = this_cpu_ptr(s->cpu_slab);
3465 if (unlikely(page != c->page)) {
3466 local_unlock(&s->cpu_slab->lock);
3467 goto redo;
3468 }
3469 tid = c->tid;
3470 freelist = c->freelist;
3471
3472 set_freepointer(s, tail_obj, freelist);
3473 c->freelist = head;
3474 c->tid = next_tid(tid);
3475
3476 local_unlock(&s->cpu_slab->lock);
3477#endif
3478 stat(s, FREE_FASTPATH);
3479 } else
3480 __slab_free(s, page, head, tail_obj, cnt, addr);
3481
3482}
3483
3484static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3485 void *head, void *tail, int cnt,
3486 unsigned long addr)
3487{
3488
3489
3490
3491
3492 if (slab_free_freelist_hook(s, &head, &tail, &cnt))
3493 do_slab_free(s, page, head, tail, cnt, addr);
3494}
3495
3496#ifdef CONFIG_KASAN_GENERIC
3497void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3498{
3499 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3500}
3501#endif
3502
3503void kmem_cache_free(struct kmem_cache *s, void *x)
3504{
3505 s = cache_from_obj(s, x);
3506 if (!s)
3507 return;
3508 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3509 trace_kmem_cache_free(_RET_IP_, x, s->name);
3510}
3511EXPORT_SYMBOL(kmem_cache_free);
3512
3513struct detached_freelist {
3514 struct page *page;
3515 void *tail;
3516 void *freelist;
3517 int cnt;
3518 struct kmem_cache *s;
3519};
3520
3521static inline void free_nonslab_page(struct page *page, void *object)
3522{
3523 unsigned int order = compound_order(page);
3524
3525 VM_BUG_ON_PAGE(!PageCompound(page), page);
3526 kfree_hook(object);
3527 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
3528 __free_pages(page, order);
3529}
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543static inline
3544int build_detached_freelist(struct kmem_cache *s, size_t size,
3545 void **p, struct detached_freelist *df)
3546{
3547 size_t first_skipped_index = 0;
3548 int lookahead = 3;
3549 void *object;
3550 struct page *page;
3551
3552
3553 df->page = NULL;
3554
3555 do {
3556 object = p[--size];
3557
3558 } while (!object && size);
3559
3560 if (!object)
3561 return 0;
3562
3563 page = virt_to_head_page(object);
3564 if (!s) {
3565
3566 if (unlikely(!PageSlab(page))) {
3567 free_nonslab_page(page, object);
3568 p[size] = NULL;
3569 return size;
3570 }
3571
3572 df->s = page->slab_cache;
3573 } else {
3574 df->s = cache_from_obj(s, object);
3575 }
3576
3577 if (is_kfence_address(object)) {
3578 slab_free_hook(df->s, object, false);
3579 __kfence_free(object);
3580 p[size] = NULL;
3581 return size;
3582 }
3583
3584
3585 df->page = page;
3586 set_freepointer(df->s, object, NULL);
3587 df->tail = object;
3588 df->freelist = object;
3589 p[size] = NULL;
3590 df->cnt = 1;
3591
3592 while (size) {
3593 object = p[--size];
3594 if (!object)
3595 continue;
3596
3597
3598 if (df->page == virt_to_head_page(object)) {
3599
3600 set_freepointer(df->s, object, df->freelist);
3601 df->freelist = object;
3602 df->cnt++;
3603 p[size] = NULL;
3604
3605 continue;
3606 }
3607
3608
3609 if (!--lookahead)
3610 break;
3611
3612 if (!first_skipped_index)
3613 first_skipped_index = size + 1;
3614 }
3615
3616 return first_skipped_index;
3617}
3618
3619
3620void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3621{
3622 if (WARN_ON(!size))
3623 return;
3624
3625 memcg_slab_free_hook(s, p, size);
3626 do {
3627 struct detached_freelist df;
3628
3629 size = build_detached_freelist(s, size, p, &df);
3630 if (!df.page)
3631 continue;
3632
3633 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
3634 } while (likely(size));
3635}
3636EXPORT_SYMBOL(kmem_cache_free_bulk);
3637
3638
3639int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3640 void **p)
3641{
3642 struct kmem_cache_cpu *c;
3643 int i;
3644 struct obj_cgroup *objcg = NULL;
3645
3646
3647 s = slab_pre_alloc_hook(s, &objcg, size, flags);
3648 if (unlikely(!s))
3649 return false;
3650
3651
3652
3653
3654
3655 c = slub_get_cpu_ptr(s->cpu_slab);
3656 local_lock_irq(&s->cpu_slab->lock);
3657
3658 for (i = 0; i < size; i++) {
3659 void *object = kfence_alloc(s, s->object_size, flags);
3660
3661 if (unlikely(object)) {
3662 p[i] = object;
3663 continue;
3664 }
3665
3666 object = c->freelist;
3667 if (unlikely(!object)) {
3668
3669
3670
3671
3672
3673
3674
3675 c->tid = next_tid(c->tid);
3676
3677 local_unlock_irq(&s->cpu_slab->lock);
3678
3679
3680
3681
3682
3683 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3684 _RET_IP_, c);
3685 if (unlikely(!p[i]))
3686 goto error;
3687
3688 c = this_cpu_ptr(s->cpu_slab);
3689 maybe_wipe_obj_freeptr(s, p[i]);
3690
3691 local_lock_irq(&s->cpu_slab->lock);
3692
3693 continue;
3694 }
3695 c->freelist = get_freepointer(s, object);
3696 p[i] = object;
3697 maybe_wipe_obj_freeptr(s, p[i]);
3698 }
3699 c->tid = next_tid(c->tid);
3700 local_unlock_irq(&s->cpu_slab->lock);
3701 slub_put_cpu_ptr(s->cpu_slab);
3702
3703
3704
3705
3706
3707 slab_post_alloc_hook(s, objcg, flags, size, p,
3708 slab_want_init_on_alloc(flags, s));
3709 return i;
3710error:
3711 slub_put_cpu_ptr(s->cpu_slab);
3712 slab_post_alloc_hook(s, objcg, flags, i, p, false);
3713 __kmem_cache_free_bulk(s, i, p);
3714 return 0;
3715}
3716EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738static unsigned int slub_min_order;
3739static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3740static unsigned int slub_min_objects;
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767static inline unsigned int slab_order(unsigned int size,
3768 unsigned int min_objects, unsigned int max_order,
3769 unsigned int fract_leftover)
3770{
3771 unsigned int min_order = slub_min_order;
3772 unsigned int order;
3773
3774 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3775 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3776
3777 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3778 order <= max_order; order++) {
3779
3780 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3781 unsigned int rem;
3782
3783 rem = slab_size % size;
3784
3785 if (rem <= slab_size / fract_leftover)
3786 break;
3787 }
3788
3789 return order;
3790}
3791
3792static inline int calculate_order(unsigned int size)
3793{
3794 unsigned int order;
3795 unsigned int min_objects;
3796 unsigned int max_objects;
3797 unsigned int nr_cpus;
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807 min_objects = slub_min_objects;
3808 if (!min_objects) {
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818 nr_cpus = num_present_cpus();
3819 if (nr_cpus <= 1)
3820 nr_cpus = nr_cpu_ids;
3821 min_objects = 4 * (fls(nr_cpus) + 1);
3822 }
3823 max_objects = order_objects(slub_max_order, size);
3824 min_objects = min(min_objects, max_objects);
3825
3826 while (min_objects > 1) {
3827 unsigned int fraction;
3828
3829 fraction = 16;
3830 while (fraction >= 4) {
3831 order = slab_order(size, min_objects,
3832 slub_max_order, fraction);
3833 if (order <= slub_max_order)
3834 return order;
3835 fraction /= 2;
3836 }
3837 min_objects--;
3838 }
3839
3840
3841
3842
3843
3844 order = slab_order(size, 1, slub_max_order, 1);
3845 if (order <= slub_max_order)
3846 return order;
3847
3848
3849
3850
3851 order = slab_order(size, 1, MAX_ORDER, 1);
3852 if (order < MAX_ORDER)
3853 return order;
3854 return -ENOSYS;
3855}
3856
3857static void
3858init_kmem_cache_node(struct kmem_cache_node *n)
3859{
3860 n->nr_partial = 0;
3861 spin_lock_init(&n->list_lock);
3862 INIT_LIST_HEAD(&n->partial);
3863#ifdef CONFIG_SLUB_DEBUG
3864 atomic_long_set(&n->nr_slabs, 0);
3865 atomic_long_set(&n->total_objects, 0);
3866 INIT_LIST_HEAD(&n->full);
3867#endif
3868}
3869
3870static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3871{
3872 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3873 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3874
3875
3876
3877
3878
3879 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3880 2 * sizeof(void *));
3881
3882 if (!s->cpu_slab)
3883 return 0;
3884
3885 init_kmem_cache_cpus(s);
3886
3887 return 1;
3888}
3889
3890static struct kmem_cache *kmem_cache_node;
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901static void early_kmem_cache_node_alloc(int node)
3902{
3903 struct page *page;
3904 struct kmem_cache_node *n;
3905
3906 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3907
3908 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3909
3910 BUG_ON(!page);
3911 if (page_to_nid(page) != node) {
3912 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3913 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3914 }
3915
3916 n = page->freelist;
3917 BUG_ON(!n);
3918#ifdef CONFIG_SLUB_DEBUG
3919 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3920 init_tracking(kmem_cache_node, n);
3921#endif
3922 n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
3923 page->freelist = get_freepointer(kmem_cache_node, n);
3924 page->inuse = 1;
3925 page->frozen = 0;
3926 kmem_cache_node->node[node] = n;
3927 init_kmem_cache_node(n);
3928 inc_slabs_node(kmem_cache_node, node, page->objects);
3929
3930
3931
3932
3933
3934 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3935}
3936
3937static void free_kmem_cache_nodes(struct kmem_cache *s)
3938{
3939 int node;
3940 struct kmem_cache_node *n;
3941
3942 for_each_kmem_cache_node(s, node, n) {
3943 s->node[node] = NULL;
3944 kmem_cache_free(kmem_cache_node, n);
3945 }
3946}
3947
3948void __kmem_cache_release(struct kmem_cache *s)
3949{
3950 cache_random_seq_destroy(s);
3951 free_percpu(s->cpu_slab);
3952 free_kmem_cache_nodes(s);
3953}
3954
3955static int init_kmem_cache_nodes(struct kmem_cache *s)
3956{
3957 int node;
3958
3959 for_each_node_mask(node, slab_nodes) {
3960 struct kmem_cache_node *n;
3961
3962 if (slab_state == DOWN) {
3963 early_kmem_cache_node_alloc(node);
3964 continue;
3965 }
3966 n = kmem_cache_alloc_node(kmem_cache_node,
3967 GFP_KERNEL, node);
3968
3969 if (!n) {
3970 free_kmem_cache_nodes(s);
3971 return 0;
3972 }
3973
3974 init_kmem_cache_node(n);
3975 s->node[node] = n;
3976 }
3977 return 1;
3978}
3979
3980static void set_min_partial(struct kmem_cache *s, unsigned long min)
3981{
3982 if (min < MIN_PARTIAL)
3983 min = MIN_PARTIAL;
3984 else if (min > MAX_PARTIAL)
3985 min = MAX_PARTIAL;
3986 s->min_partial = min;
3987}
3988
3989static void set_cpu_partial(struct kmem_cache *s)
3990{
3991#ifdef CONFIG_SLUB_CPU_PARTIAL
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009 if (!kmem_cache_has_cpu_partial(s))
4010 slub_set_cpu_partial(s, 0);
4011 else if (s->size >= PAGE_SIZE)
4012 slub_set_cpu_partial(s, 2);
4013 else if (s->size >= 1024)
4014 slub_set_cpu_partial(s, 6);
4015 else if (s->size >= 256)
4016 slub_set_cpu_partial(s, 13);
4017 else
4018 slub_set_cpu_partial(s, 30);
4019#endif
4020}
4021
4022
4023
4024
4025
4026static int calculate_sizes(struct kmem_cache *s, int forced_order)
4027{
4028 slab_flags_t flags = s->flags;
4029 unsigned int size = s->object_size;
4030 unsigned int order;
4031
4032
4033
4034
4035
4036
4037 size = ALIGN(size, sizeof(void *));
4038
4039#ifdef CONFIG_SLUB_DEBUG
4040
4041
4042
4043
4044
4045 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
4046 !s->ctor)
4047 s->flags |= __OBJECT_POISON;
4048 else
4049 s->flags &= ~__OBJECT_POISON;
4050
4051
4052
4053
4054
4055
4056
4057 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
4058 size += sizeof(void *);
4059#endif
4060
4061
4062
4063
4064
4065 s->inuse = size;
4066
4067 if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
4068 ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
4069 s->ctor) {
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084 s->offset = size;
4085 size += sizeof(void *);
4086 } else {
4087
4088
4089
4090
4091
4092 s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
4093 }
4094
4095#ifdef CONFIG_SLUB_DEBUG
4096 if (flags & SLAB_STORE_USER)
4097
4098
4099
4100
4101 size += 2 * sizeof(struct track);
4102#endif
4103
4104 kasan_cache_create(s, &size, &s->flags);
4105#ifdef CONFIG_SLUB_DEBUG
4106 if (flags & SLAB_RED_ZONE) {
4107
4108
4109
4110
4111
4112
4113
4114 size += sizeof(void *);
4115
4116 s->red_left_pad = sizeof(void *);
4117 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
4118 size += s->red_left_pad;
4119 }
4120#endif
4121
4122
4123
4124
4125
4126
4127 size = ALIGN(size, s->align);
4128 s->size = size;
4129 s->reciprocal_size = reciprocal_value(size);
4130 if (forced_order >= 0)
4131 order = forced_order;
4132 else
4133 order = calculate_order(size);
4134
4135 if ((int)order < 0)
4136 return 0;
4137
4138 s->allocflags = 0;
4139 if (order)
4140 s->allocflags |= __GFP_COMP;
4141
4142 if (s->flags & SLAB_CACHE_DMA)
4143 s->allocflags |= GFP_DMA;
4144
4145 if (s->flags & SLAB_CACHE_DMA32)
4146 s->allocflags |= GFP_DMA32;
4147
4148 if (s->flags & SLAB_RECLAIM_ACCOUNT)
4149 s->allocflags |= __GFP_RECLAIMABLE;
4150
4151
4152
4153
4154 s->oo = oo_make(order, size);
4155 s->min = oo_make(get_order(size), size);
4156 if (oo_objects(s->oo) > oo_objects(s->max))
4157 s->max = s->oo;
4158
4159 return !!oo_objects(s->oo);
4160}
4161
4162static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
4163{
4164 s->flags = kmem_cache_flags(s->size, flags, s->name);
4165#ifdef CONFIG_SLAB_FREELIST_HARDENED
4166 s->random = get_random_long();
4167#endif
4168
4169 if (!calculate_sizes(s, -1))
4170 goto error;
4171 if (disable_higher_order_debug) {
4172
4173
4174
4175
4176 if (get_order(s->size) > get_order(s->object_size)) {
4177 s->flags &= ~DEBUG_METADATA_FLAGS;
4178 s->offset = 0;
4179 if (!calculate_sizes(s, -1))
4180 goto error;
4181 }
4182 }
4183
4184#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
4185 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
4186 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
4187
4188 s->flags |= __CMPXCHG_DOUBLE;
4189#endif
4190
4191
4192
4193
4194
4195 set_min_partial(s, ilog2(s->size) / 2);
4196
4197 set_cpu_partial(s);
4198
4199#ifdef CONFIG_NUMA
4200 s->remote_node_defrag_ratio = 1000;
4201#endif
4202
4203
4204 if (slab_state >= UP) {
4205 if (init_cache_random_seq(s))
4206 goto error;
4207 }
4208
4209 if (!init_kmem_cache_nodes(s))
4210 goto error;
4211
4212 if (alloc_kmem_cache_cpus(s))
4213 return 0;
4214
4215error:
4216 __kmem_cache_release(s);
4217 return -EINVAL;
4218}
4219
4220static void list_slab_objects(struct kmem_cache *s, struct page *page,
4221 const char *text)
4222{
4223#ifdef CONFIG_SLUB_DEBUG
4224 void *addr = page_address(page);
4225 unsigned long flags;
4226 unsigned long *map;
4227 void *p;
4228
4229 slab_err(s, page, text, s->name);
4230 slab_lock(page, &flags);
4231
4232 map = get_map(s, page);
4233 for_each_object(p, s, addr, page->objects) {
4234
4235 if (!test_bit(__obj_to_index(s, addr, p), map)) {
4236 pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
4237 print_tracking(s, p);
4238 }
4239 }
4240 put_map(map);
4241 slab_unlock(page, &flags);
4242#endif
4243}
4244
4245
4246
4247
4248
4249
4250static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
4251{
4252 LIST_HEAD(discard);
4253 struct page *page, *h;
4254
4255 BUG_ON(irqs_disabled());
4256 spin_lock_irq(&n->list_lock);
4257 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
4258 if (!page->inuse) {
4259 remove_partial(n, page);
4260 list_add(&page->slab_list, &discard);
4261 } else {
4262 list_slab_objects(s, page,
4263 "Objects remaining in %s on __kmem_cache_shutdown()");
4264 }
4265 }
4266 spin_unlock_irq(&n->list_lock);
4267
4268 list_for_each_entry_safe(page, h, &discard, slab_list)
4269 discard_slab(s, page);
4270}
4271
4272bool __kmem_cache_empty(struct kmem_cache *s)
4273{
4274 int node;
4275 struct kmem_cache_node *n;
4276
4277 for_each_kmem_cache_node(s, node, n)
4278 if (n->nr_partial || slabs_node(s, node))
4279 return false;
4280 return true;
4281}
4282
4283
4284
4285
4286int __kmem_cache_shutdown(struct kmem_cache *s)
4287{
4288 int node;
4289 struct kmem_cache_node *n;
4290
4291 flush_all_cpus_locked(s);
4292
4293 for_each_kmem_cache_node(s, node, n) {
4294 free_partial(s, n);
4295 if (n->nr_partial || slabs_node(s, node))
4296 return 1;
4297 }
4298 return 0;
4299}
4300
4301#ifdef CONFIG_PRINTK
4302void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
4303{
4304 void *base;
4305 int __maybe_unused i;
4306 unsigned int objnr;
4307 void *objp;
4308 void *objp0;
4309 struct kmem_cache *s = page->slab_cache;
4310 struct track __maybe_unused *trackp;
4311
4312 kpp->kp_ptr = object;
4313 kpp->kp_page = page;
4314 kpp->kp_slab_cache = s;
4315 base = page_address(page);
4316 objp0 = kasan_reset_tag(object);
4317#ifdef CONFIG_SLUB_DEBUG
4318 objp = restore_red_left(s, objp0);
4319#else
4320 objp = objp0;
4321#endif
4322 objnr = obj_to_index(s, page, objp);
4323 kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
4324 objp = base + s->size * objnr;
4325 kpp->kp_objp = objp;
4326 if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
4327 !(s->flags & SLAB_STORE_USER))
4328 return;
4329#ifdef CONFIG_SLUB_DEBUG
4330 objp = fixup_red_left(s, objp);
4331 trackp = get_track(s, objp, TRACK_ALLOC);
4332 kpp->kp_ret = (void *)trackp->addr;
4333#ifdef CONFIG_STACKTRACE
4334 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
4335 kpp->kp_stack[i] = (void *)trackp->addrs[i];
4336 if (!kpp->kp_stack[i])
4337 break;
4338 }
4339
4340 trackp = get_track(s, objp, TRACK_FREE);
4341 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
4342 kpp->kp_free_stack[i] = (void *)trackp->addrs[i];
4343 if (!kpp->kp_free_stack[i])
4344 break;
4345 }
4346#endif
4347#endif
4348}
4349#endif
4350
4351
4352
4353
4354
4355static int __init setup_slub_min_order(char *str)
4356{
4357 get_option(&str, (int *)&slub_min_order);
4358
4359 return 1;
4360}
4361
4362__setup("slub_min_order=", setup_slub_min_order);
4363
4364static int __init setup_slub_max_order(char *str)
4365{
4366 get_option(&str, (int *)&slub_max_order);
4367 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
4368
4369 return 1;
4370}
4371
4372__setup("slub_max_order=", setup_slub_max_order);
4373
4374static int __init setup_slub_min_objects(char *str)
4375{
4376 get_option(&str, (int *)&slub_min_objects);
4377
4378 return 1;
4379}
4380
4381__setup("slub_min_objects=", setup_slub_min_objects);
4382
4383void *__kmalloc(size_t size, gfp_t flags)
4384{
4385 struct kmem_cache *s;
4386 void *ret;
4387
4388 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4389 return kmalloc_large(size, flags);
4390
4391 s = kmalloc_slab(size, flags);
4392
4393 if (unlikely(ZERO_OR_NULL_PTR(s)))
4394 return s;
4395
4396 ret = slab_alloc(s, flags, _RET_IP_, size);
4397
4398 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
4399
4400 ret = kasan_kmalloc(s, ret, size, flags);
4401
4402 return ret;
4403}
4404EXPORT_SYMBOL(__kmalloc);
4405
4406#ifdef CONFIG_NUMA
4407static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4408{
4409 struct page *page;
4410 void *ptr = NULL;
4411 unsigned int order = get_order(size);
4412
4413 flags |= __GFP_COMP;
4414 page = alloc_pages_node(node, flags, order);
4415 if (page) {
4416 ptr = page_address(page);
4417 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4418 PAGE_SIZE << order);
4419 }
4420
4421 return kmalloc_large_node_hook(ptr, size, flags);
4422}
4423
4424void *__kmalloc_node(size_t size, gfp_t flags, int node)
4425{
4426 struct kmem_cache *s;
4427 void *ret;
4428
4429 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4430 ret = kmalloc_large_node(size, flags, node);
4431
4432 trace_kmalloc_node(_RET_IP_, ret,
4433 size, PAGE_SIZE << get_order(size),
4434 flags, node);
4435
4436 return ret;
4437 }
4438
4439 s = kmalloc_slab(size, flags);
4440
4441 if (unlikely(ZERO_OR_NULL_PTR(s)))
4442 return s;
4443
4444 ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
4445
4446 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
4447
4448 ret = kasan_kmalloc(s, ret, size, flags);
4449
4450 return ret;
4451}
4452EXPORT_SYMBOL(__kmalloc_node);
4453#endif
4454
4455#ifdef CONFIG_HARDENED_USERCOPY
4456
4457
4458
4459
4460
4461
4462
4463
4464void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4465 bool to_user)
4466{
4467 struct kmem_cache *s;
4468 unsigned int offset;
4469 size_t object_size;
4470 bool is_kfence = is_kfence_address(ptr);
4471
4472 ptr = kasan_reset_tag(ptr);
4473
4474
4475 s = page->slab_cache;
4476
4477
4478 if (ptr < page_address(page))
4479 usercopy_abort("SLUB object not in SLUB page?!", NULL,
4480 to_user, 0, n);
4481
4482
4483 if (is_kfence)
4484 offset = ptr - kfence_object_start(ptr);
4485 else
4486 offset = (ptr - page_address(page)) % s->size;
4487
4488
4489 if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4490 if (offset < s->red_left_pad)
4491 usercopy_abort("SLUB object in left red zone",
4492 s->name, to_user, offset, n);
4493 offset -= s->red_left_pad;
4494 }
4495
4496
4497 if (offset >= s->useroffset &&
4498 offset - s->useroffset <= s->usersize &&
4499 n <= s->useroffset - offset + s->usersize)
4500 return;
4501
4502
4503
4504
4505
4506
4507
4508 object_size = slab_ksize(s);
4509 if (usercopy_fallback &&
4510 offset <= object_size && n <= object_size - offset) {
4511 usercopy_warn("SLUB object", s->name, to_user, offset, n);
4512 return;
4513 }
4514
4515 usercopy_abort("SLUB object", s->name, to_user, offset, n);
4516}
4517#endif
4518
4519size_t __ksize(const void *object)
4520{
4521 struct page *page;
4522
4523 if (unlikely(object == ZERO_SIZE_PTR))
4524 return 0;
4525
4526 page = virt_to_head_page(object);
4527
4528 if (unlikely(!PageSlab(page))) {
4529 WARN_ON(!PageCompound(page));
4530 return page_size(page);
4531 }
4532
4533 return slab_ksize(page->slab_cache);
4534}
4535EXPORT_SYMBOL(__ksize);
4536
4537void kfree(const void *x)
4538{
4539 struct page *page;
4540 void *object = (void *)x;
4541
4542 trace_kfree(_RET_IP_, x);
4543
4544 if (unlikely(ZERO_OR_NULL_PTR(x)))
4545 return;
4546
4547 page = virt_to_head_page(x);
4548 if (unlikely(!PageSlab(page))) {
4549 free_nonslab_page(page, object);
4550 return;
4551 }
4552 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
4553}
4554EXPORT_SYMBOL(kfree);
4555
4556#define SHRINK_PROMOTE_MAX 32
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567static int __kmem_cache_do_shrink(struct kmem_cache *s)
4568{
4569 int node;
4570 int i;
4571 struct kmem_cache_node *n;
4572 struct page *page;
4573 struct page *t;
4574 struct list_head discard;
4575 struct list_head promote[SHRINK_PROMOTE_MAX];
4576 unsigned long flags;
4577 int ret = 0;
4578
4579 for_each_kmem_cache_node(s, node, n) {
4580 INIT_LIST_HEAD(&discard);
4581 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4582 INIT_LIST_HEAD(promote + i);
4583
4584 spin_lock_irqsave(&n->list_lock, flags);
4585
4586
4587
4588
4589
4590
4591
4592 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4593 int free = page->objects - page->inuse;
4594
4595
4596 barrier();
4597
4598
4599 BUG_ON(free <= 0);
4600
4601 if (free == page->objects) {
4602 list_move(&page->slab_list, &discard);
4603 n->nr_partial--;
4604 } else if (free <= SHRINK_PROMOTE_MAX)
4605 list_move(&page->slab_list, promote + free - 1);
4606 }
4607
4608
4609
4610
4611
4612 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4613 list_splice(promote + i, &n->partial);
4614
4615 spin_unlock_irqrestore(&n->list_lock, flags);
4616
4617
4618 list_for_each_entry_safe(page, t, &discard, slab_list)
4619 discard_slab(s, page);
4620
4621 if (slabs_node(s, node))
4622 ret = 1;
4623 }
4624
4625 return ret;
4626}
4627
4628int __kmem_cache_shrink(struct kmem_cache *s)
4629{
4630 flush_all(s);
4631 return __kmem_cache_do_shrink(s);
4632}
4633
4634static int slab_mem_going_offline_callback(void *arg)
4635{
4636 struct kmem_cache *s;
4637
4638 mutex_lock(&slab_mutex);
4639 list_for_each_entry(s, &slab_caches, list) {
4640 flush_all_cpus_locked(s);
4641 __kmem_cache_do_shrink(s);
4642 }
4643 mutex_unlock(&slab_mutex);
4644
4645 return 0;
4646}
4647
4648static void slab_mem_offline_callback(void *arg)
4649{
4650 struct memory_notify *marg = arg;
4651 int offline_node;
4652
4653 offline_node = marg->status_change_nid_normal;
4654
4655
4656
4657
4658
4659 if (offline_node < 0)
4660 return;
4661
4662 mutex_lock(&slab_mutex);
4663 node_clear(offline_node, slab_nodes);
4664
4665
4666
4667
4668
4669 mutex_unlock(&slab_mutex);
4670}
4671
4672static int slab_mem_going_online_callback(void *arg)
4673{
4674 struct kmem_cache_node *n;
4675 struct kmem_cache *s;
4676 struct memory_notify *marg = arg;
4677 int nid = marg->status_change_nid_normal;
4678 int ret = 0;
4679
4680
4681
4682
4683
4684 if (nid < 0)
4685 return 0;
4686
4687
4688
4689
4690
4691
4692 mutex_lock(&slab_mutex);
4693 list_for_each_entry(s, &slab_caches, list) {
4694
4695
4696
4697
4698 if (get_node(s, nid))
4699 continue;
4700
4701
4702
4703
4704
4705 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4706 if (!n) {
4707 ret = -ENOMEM;
4708 goto out;
4709 }
4710 init_kmem_cache_node(n);
4711 s->node[nid] = n;
4712 }
4713
4714
4715
4716
4717 node_set(nid, slab_nodes);
4718out:
4719 mutex_unlock(&slab_mutex);
4720 return ret;
4721}
4722
4723static int slab_memory_callback(struct notifier_block *self,
4724 unsigned long action, void *arg)
4725{
4726 int ret = 0;
4727
4728 switch (action) {
4729 case MEM_GOING_ONLINE:
4730 ret = slab_mem_going_online_callback(arg);
4731 break;
4732 case MEM_GOING_OFFLINE:
4733 ret = slab_mem_going_offline_callback(arg);
4734 break;
4735 case MEM_OFFLINE:
4736 case MEM_CANCEL_ONLINE:
4737 slab_mem_offline_callback(arg);
4738 break;
4739 case MEM_ONLINE:
4740 case MEM_CANCEL_OFFLINE:
4741 break;
4742 }
4743 if (ret)
4744 ret = notifier_from_errno(ret);
4745 else
4746 ret = NOTIFY_OK;
4747 return ret;
4748}
4749
4750static struct notifier_block slab_memory_callback_nb = {
4751 .notifier_call = slab_memory_callback,
4752 .priority = SLAB_CALLBACK_PRI,
4753};
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4766{
4767 int node;
4768 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4769 struct kmem_cache_node *n;
4770
4771 memcpy(s, static_cache, kmem_cache->object_size);
4772
4773
4774
4775
4776
4777
4778 __flush_cpu_slab(s, smp_processor_id());
4779 for_each_kmem_cache_node(s, node, n) {
4780 struct page *p;
4781
4782 list_for_each_entry(p, &n->partial, slab_list)
4783 p->slab_cache = s;
4784
4785#ifdef CONFIG_SLUB_DEBUG
4786 list_for_each_entry(p, &n->full, slab_list)
4787 p->slab_cache = s;
4788#endif
4789 }
4790 list_add(&s->list, &slab_caches);
4791 return s;
4792}
4793
4794void __init kmem_cache_init(void)
4795{
4796 static __initdata struct kmem_cache boot_kmem_cache,
4797 boot_kmem_cache_node;
4798 int node;
4799
4800 if (debug_guardpage_minorder())
4801 slub_max_order = 0;
4802
4803
4804 if (__slub_debug_enabled())
4805 no_hash_pointers_enable(NULL);
4806
4807 kmem_cache_node = &boot_kmem_cache_node;
4808 kmem_cache = &boot_kmem_cache;
4809
4810
4811
4812
4813
4814 for_each_node_state(node, N_NORMAL_MEMORY)
4815 node_set(node, slab_nodes);
4816
4817 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4818 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4819
4820 register_hotmemory_notifier(&slab_memory_callback_nb);
4821
4822
4823 slab_state = PARTIAL;
4824
4825 create_boot_cache(kmem_cache, "kmem_cache",
4826 offsetof(struct kmem_cache, node) +
4827 nr_node_ids * sizeof(struct kmem_cache_node *),
4828 SLAB_HWCACHE_ALIGN, 0, 0);
4829
4830 kmem_cache = bootstrap(&boot_kmem_cache);
4831 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4832
4833
4834 setup_kmalloc_cache_index_table();
4835 create_kmalloc_caches(0);
4836
4837
4838 init_freelist_randomization();
4839
4840 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4841 slub_cpu_dead);
4842
4843 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4844 cache_line_size(),
4845 slub_min_order, slub_max_order, slub_min_objects,
4846 nr_cpu_ids, nr_node_ids);
4847}
4848
4849void __init kmem_cache_init_late(void)
4850{
4851}
4852
4853struct kmem_cache *
4854__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4855 slab_flags_t flags, void (*ctor)(void *))
4856{
4857 struct kmem_cache *s;
4858
4859 s = find_mergeable(size, align, flags, name, ctor);
4860 if (s) {
4861 s->refcount++;
4862
4863
4864
4865
4866
4867 s->object_size = max(s->object_size, size);
4868 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4869
4870 if (sysfs_slab_alias(s, name)) {
4871 s->refcount--;
4872 s = NULL;
4873 }
4874 }
4875
4876 return s;
4877}
4878
4879int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4880{
4881 int err;
4882
4883 err = kmem_cache_open(s, flags);
4884 if (err)
4885 return err;
4886
4887
4888 if (slab_state <= UP)
4889 return 0;
4890
4891 err = sysfs_slab_add(s);
4892 if (err) {
4893 __kmem_cache_release(s);
4894 return err;
4895 }
4896
4897 if (s->flags & SLAB_STORE_USER)
4898 debugfs_slab_add(s);
4899
4900 return 0;
4901}
4902
4903void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4904{
4905 struct kmem_cache *s;
4906 void *ret;
4907
4908 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4909 return kmalloc_large(size, gfpflags);
4910
4911 s = kmalloc_slab(size, gfpflags);
4912
4913 if (unlikely(ZERO_OR_NULL_PTR(s)))
4914 return s;
4915
4916 ret = slab_alloc(s, gfpflags, caller, size);
4917
4918
4919 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4920
4921 return ret;
4922}
4923EXPORT_SYMBOL(__kmalloc_track_caller);
4924
4925#ifdef CONFIG_NUMA
4926void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4927 int node, unsigned long caller)
4928{
4929 struct kmem_cache *s;
4930 void *ret;
4931
4932 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4933 ret = kmalloc_large_node(size, gfpflags, node);
4934
4935 trace_kmalloc_node(caller, ret,
4936 size, PAGE_SIZE << get_order(size),
4937 gfpflags, node);
4938
4939 return ret;
4940 }
4941
4942 s = kmalloc_slab(size, gfpflags);
4943
4944 if (unlikely(ZERO_OR_NULL_PTR(s)))
4945 return s;
4946
4947 ret = slab_alloc_node(s, gfpflags, node, caller, size);
4948
4949
4950 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4951
4952 return ret;
4953}
4954EXPORT_SYMBOL(__kmalloc_node_track_caller);
4955#endif
4956
4957#ifdef CONFIG_SYSFS
4958static int count_inuse(struct page *page)
4959{
4960 return page->inuse;
4961}
4962
4963static int count_total(struct page *page)
4964{
4965 return page->objects;
4966}
4967#endif
4968
4969#ifdef CONFIG_SLUB_DEBUG
4970static void validate_slab(struct kmem_cache *s, struct page *page,
4971 unsigned long *obj_map)
4972{
4973 void *p;
4974 void *addr = page_address(page);
4975 unsigned long flags;
4976
4977 slab_lock(page, &flags);
4978
4979 if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4980 goto unlock;
4981
4982
4983 __fill_map(obj_map, s, page);
4984 for_each_object(p, s, addr, page->objects) {
4985 u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
4986 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4987
4988 if (!check_object(s, page, p, val))
4989 break;
4990 }
4991unlock:
4992 slab_unlock(page, &flags);
4993}
4994
4995static int validate_slab_node(struct kmem_cache *s,
4996 struct kmem_cache_node *n, unsigned long *obj_map)
4997{
4998 unsigned long count = 0;
4999 struct page *page;
5000 unsigned long flags;
5001
5002 spin_lock_irqsave(&n->list_lock, flags);
5003
5004 list_for_each_entry(page, &n->partial, slab_list) {
5005 validate_slab(s, page, obj_map);
5006 count++;
5007 }
5008 if (count != n->nr_partial) {
5009 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
5010 s->name, count, n->nr_partial);
5011 slab_add_kunit_errors();
5012 }
5013
5014 if (!(s->flags & SLAB_STORE_USER))
5015 goto out;
5016
5017 list_for_each_entry(page, &n->full, slab_list) {
5018 validate_slab(s, page, obj_map);
5019 count++;
5020 }
5021 if (count != atomic_long_read(&n->nr_slabs)) {
5022 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
5023 s->name, count, atomic_long_read(&n->nr_slabs));
5024 slab_add_kunit_errors();
5025 }
5026
5027out:
5028 spin_unlock_irqrestore(&n->list_lock, flags);
5029 return count;
5030}
5031
5032long validate_slab_cache(struct kmem_cache *s)
5033{
5034 int node;
5035 unsigned long count = 0;
5036 struct kmem_cache_node *n;
5037 unsigned long *obj_map;
5038
5039 obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
5040 if (!obj_map)
5041 return -ENOMEM;
5042
5043 flush_all(s);
5044 for_each_kmem_cache_node(s, node, n)
5045 count += validate_slab_node(s, n, obj_map);
5046
5047 bitmap_free(obj_map);
5048
5049 return count;
5050}
5051EXPORT_SYMBOL(validate_slab_cache);
5052
5053#ifdef CONFIG_DEBUG_FS
5054
5055
5056
5057
5058
5059struct location {
5060 unsigned long count;
5061 unsigned long addr;
5062 long long sum_time;
5063 long min_time;
5064 long max_time;
5065 long min_pid;
5066 long max_pid;
5067 DECLARE_BITMAP(cpus, NR_CPUS);
5068 nodemask_t nodes;
5069};
5070
5071struct loc_track {
5072 unsigned long max;
5073 unsigned long count;
5074 struct location *loc;
5075};
5076
5077static struct dentry *slab_debugfs_root;
5078
5079static void free_loc_track(struct loc_track *t)
5080{
5081 if (t->max)
5082 free_pages((unsigned long)t->loc,
5083 get_order(sizeof(struct location) * t->max));
5084}
5085
5086static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
5087{
5088 struct location *l;
5089 int order;
5090
5091 order = get_order(sizeof(struct location) * max);
5092
5093 l = (void *)__get_free_pages(flags, order);
5094 if (!l)
5095 return 0;
5096
5097 if (t->count) {
5098 memcpy(l, t->loc, sizeof(struct location) * t->count);
5099 free_loc_track(t);
5100 }
5101 t->max = max;
5102 t->loc = l;
5103 return 1;
5104}
5105
5106static int add_location(struct loc_track *t, struct kmem_cache *s,
5107 const struct track *track)
5108{
5109 long start, end, pos;
5110 struct location *l;
5111 unsigned long caddr;
5112 unsigned long age = jiffies - track->when;
5113
5114 start = -1;
5115 end = t->count;
5116
5117 for ( ; ; ) {
5118 pos = start + (end - start + 1) / 2;
5119
5120
5121
5122
5123
5124 if (pos == end)
5125 break;
5126
5127 caddr = t->loc[pos].addr;
5128 if (track->addr == caddr) {
5129
5130 l = &t->loc[pos];
5131 l->count++;
5132 if (track->when) {
5133 l->sum_time += age;
5134 if (age < l->min_time)
5135 l->min_time = age;
5136 if (age > l->max_time)
5137 l->max_time = age;
5138
5139 if (track->pid < l->min_pid)
5140 l->min_pid = track->pid;
5141 if (track->pid > l->max_pid)
5142 l->max_pid = track->pid;
5143
5144 cpumask_set_cpu(track->cpu,
5145 to_cpumask(l->cpus));
5146 }
5147 node_set(page_to_nid(virt_to_page(track)), l->nodes);
5148 return 1;
5149 }
5150
5151 if (track->addr < caddr)
5152 end = pos;
5153 else
5154 start = pos;
5155 }
5156
5157
5158
5159
5160 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
5161 return 0;
5162
5163 l = t->loc + pos;
5164 if (pos < t->count)
5165 memmove(l + 1, l,
5166 (t->count - pos) * sizeof(struct location));
5167 t->count++;
5168 l->count = 1;
5169 l->addr = track->addr;
5170 l->sum_time = age;
5171 l->min_time = age;
5172 l->max_time = age;
5173 l->min_pid = track->pid;
5174 l->max_pid = track->pid;
5175 cpumask_clear(to_cpumask(l->cpus));
5176 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
5177 nodes_clear(l->nodes);
5178 node_set(page_to_nid(virt_to_page(track)), l->nodes);
5179 return 1;
5180}
5181
5182static void process_slab(struct loc_track *t, struct kmem_cache *s,
5183 struct page *page, enum track_item alloc,
5184 unsigned long *obj_map)
5185{
5186 void *addr = page_address(page);
5187 void *p;
5188
5189 __fill_map(obj_map, s, page);
5190
5191 for_each_object(p, s, addr, page->objects)
5192 if (!test_bit(__obj_to_index(s, addr, p), obj_map))
5193 add_location(t, s, get_track(s, p, alloc));
5194}
5195#endif
5196#endif
5197
5198#ifdef CONFIG_SYSFS
5199enum slab_stat_type {
5200 SL_ALL,
5201 SL_PARTIAL,
5202 SL_CPU,
5203 SL_OBJECTS,
5204 SL_TOTAL
5205};
5206
5207#define SO_ALL (1 << SL_ALL)
5208#define SO_PARTIAL (1 << SL_PARTIAL)
5209#define SO_CPU (1 << SL_CPU)
5210#define SO_OBJECTS (1 << SL_OBJECTS)
5211#define SO_TOTAL (1 << SL_TOTAL)
5212
5213static ssize_t show_slab_objects(struct kmem_cache *s,
5214 char *buf, unsigned long flags)
5215{
5216 unsigned long total = 0;
5217 int node;
5218 int x;
5219 unsigned long *nodes;
5220 int len = 0;
5221
5222 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
5223 if (!nodes)
5224 return -ENOMEM;
5225
5226 if (flags & SO_CPU) {
5227 int cpu;
5228
5229 for_each_possible_cpu(cpu) {
5230 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
5231 cpu);
5232 int node;
5233 struct page *page;
5234
5235 page = READ_ONCE(c->page);
5236 if (!page)
5237 continue;
5238
5239 node = page_to_nid(page);
5240 if (flags & SO_TOTAL)
5241 x = page->objects;
5242 else if (flags & SO_OBJECTS)
5243 x = page->inuse;
5244 else
5245 x = 1;
5246
5247 total += x;
5248 nodes[node] += x;
5249
5250 page = slub_percpu_partial_read_once(c);
5251 if (page) {
5252 node = page_to_nid(page);
5253 if (flags & SO_TOTAL)
5254 WARN_ON_ONCE(1);
5255 else if (flags & SO_OBJECTS)
5256 WARN_ON_ONCE(1);
5257 else
5258 x = page->pages;
5259 total += x;
5260 nodes[node] += x;
5261 }
5262 }
5263 }
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276#ifdef CONFIG_SLUB_DEBUG
5277 if (flags & SO_ALL) {
5278 struct kmem_cache_node *n;
5279
5280 for_each_kmem_cache_node(s, node, n) {
5281
5282 if (flags & SO_TOTAL)
5283 x = atomic_long_read(&n->total_objects);
5284 else if (flags & SO_OBJECTS)
5285 x = atomic_long_read(&n->total_objects) -
5286 count_partial(n, count_free);
5287 else
5288 x = atomic_long_read(&n->nr_slabs);
5289 total += x;
5290 nodes[node] += x;
5291 }
5292
5293 } else
5294#endif
5295 if (flags & SO_PARTIAL) {
5296 struct kmem_cache_node *n;
5297
5298 for_each_kmem_cache_node(s, node, n) {
5299 if (flags & SO_TOTAL)
5300 x = count_partial(n, count_total);
5301 else if (flags & SO_OBJECTS)
5302 x = count_partial(n, count_inuse);
5303 else
5304 x = n->nr_partial;
5305 total += x;
5306 nodes[node] += x;
5307 }
5308 }
5309
5310 len += sysfs_emit_at(buf, len, "%lu", total);
5311#ifdef CONFIG_NUMA
5312 for (node = 0; node < nr_node_ids; node++) {
5313 if (nodes[node])
5314 len += sysfs_emit_at(buf, len, " N%d=%lu",
5315 node, nodes[node]);
5316 }
5317#endif
5318 len += sysfs_emit_at(buf, len, "\n");
5319 kfree(nodes);
5320
5321 return len;
5322}
5323
5324#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5325#define to_slab(n) container_of(n, struct kmem_cache, kobj)
5326
5327struct slab_attribute {
5328 struct attribute attr;
5329 ssize_t (*show)(struct kmem_cache *s, char *buf);
5330 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5331};
5332
5333#define SLAB_ATTR_RO(_name) \
5334 static struct slab_attribute _name##_attr = \
5335 __ATTR(_name, 0400, _name##_show, NULL)
5336
5337#define SLAB_ATTR(_name) \
5338 static struct slab_attribute _name##_attr = \
5339 __ATTR(_name, 0600, _name##_show, _name##_store)
5340
5341static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5342{
5343 return sysfs_emit(buf, "%u\n", s->size);
5344}
5345SLAB_ATTR_RO(slab_size);
5346
5347static ssize_t align_show(struct kmem_cache *s, char *buf)
5348{
5349 return sysfs_emit(buf, "%u\n", s->align);
5350}
5351SLAB_ATTR_RO(align);
5352
5353static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5354{
5355 return sysfs_emit(buf, "%u\n", s->object_size);
5356}
5357SLAB_ATTR_RO(object_size);
5358
5359static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5360{
5361 return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5362}
5363SLAB_ATTR_RO(objs_per_slab);
5364
5365static ssize_t order_show(struct kmem_cache *s, char *buf)
5366{
5367 return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5368}
5369SLAB_ATTR_RO(order);
5370
5371static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5372{
5373 return sysfs_emit(buf, "%lu\n", s->min_partial);
5374}
5375
5376static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5377 size_t length)
5378{
5379 unsigned long min;
5380 int err;
5381
5382 err = kstrtoul(buf, 10, &min);
5383 if (err)
5384 return err;
5385
5386 set_min_partial(s, min);
5387 return length;
5388}
5389SLAB_ATTR(min_partial);
5390
5391static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5392{
5393 return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
5394}
5395
5396static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5397 size_t length)
5398{
5399 unsigned int objects;
5400 int err;
5401
5402 err = kstrtouint(buf, 10, &objects);
5403 if (err)
5404 return err;
5405 if (objects && !kmem_cache_has_cpu_partial(s))
5406 return -EINVAL;
5407
5408 slub_set_cpu_partial(s, objects);
5409 flush_all(s);
5410 return length;
5411}
5412SLAB_ATTR(cpu_partial);
5413
5414static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5415{
5416 if (!s->ctor)
5417 return 0;
5418 return sysfs_emit(buf, "%pS\n", s->ctor);
5419}
5420SLAB_ATTR_RO(ctor);
5421
5422static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5423{
5424 return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5425}
5426SLAB_ATTR_RO(aliases);
5427
5428static ssize_t partial_show(struct kmem_cache *s, char *buf)
5429{
5430 return show_slab_objects(s, buf, SO_PARTIAL);
5431}
5432SLAB_ATTR_RO(partial);
5433
5434static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5435{
5436 return show_slab_objects(s, buf, SO_CPU);
5437}
5438SLAB_ATTR_RO(cpu_slabs);
5439
5440static ssize_t objects_show(struct kmem_cache *s, char *buf)
5441{
5442 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5443}
5444SLAB_ATTR_RO(objects);
5445
5446static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5447{
5448 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5449}
5450SLAB_ATTR_RO(objects_partial);
5451
5452static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5453{
5454 int objects = 0;
5455 int pages = 0;
5456 int cpu;
5457 int len = 0;
5458
5459 for_each_online_cpu(cpu) {
5460 struct page *page;
5461
5462 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5463
5464 if (page) {
5465 pages += page->pages;
5466 objects += page->pobjects;
5467 }
5468 }
5469
5470 len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
5471
5472#ifdef CONFIG_SMP
5473 for_each_online_cpu(cpu) {
5474 struct page *page;
5475
5476 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5477 if (page)
5478 len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5479 cpu, page->pobjects, page->pages);
5480 }
5481#endif
5482 len += sysfs_emit_at(buf, len, "\n");
5483
5484 return len;
5485}
5486SLAB_ATTR_RO(slabs_cpu_partial);
5487
5488static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5489{
5490 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5491}
5492SLAB_ATTR_RO(reclaim_account);
5493
5494static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5495{
5496 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5497}
5498SLAB_ATTR_RO(hwcache_align);
5499
5500#ifdef CONFIG_ZONE_DMA
5501static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5502{
5503 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5504}
5505SLAB_ATTR_RO(cache_dma);
5506#endif
5507
5508static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5509{
5510 return sysfs_emit(buf, "%u\n", s->usersize);
5511}
5512SLAB_ATTR_RO(usersize);
5513
5514static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5515{
5516 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5517}
5518SLAB_ATTR_RO(destroy_by_rcu);
5519
5520#ifdef CONFIG_SLUB_DEBUG
5521static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5522{
5523 return show_slab_objects(s, buf, SO_ALL);
5524}
5525SLAB_ATTR_RO(slabs);
5526
5527static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5528{
5529 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5530}
5531SLAB_ATTR_RO(total_objects);
5532
5533static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5534{
5535 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5536}
5537SLAB_ATTR_RO(sanity_checks);
5538
5539static ssize_t trace_show(struct kmem_cache *s, char *buf)
5540{
5541 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5542}
5543SLAB_ATTR_RO(trace);
5544
5545static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5546{
5547 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5548}
5549
5550SLAB_ATTR_RO(red_zone);
5551
5552static ssize_t poison_show(struct kmem_cache *s, char *buf)
5553{
5554 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5555}
5556
5557SLAB_ATTR_RO(poison);
5558
5559static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5560{
5561 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5562}
5563
5564SLAB_ATTR_RO(store_user);
5565
5566static ssize_t validate_show(struct kmem_cache *s, char *buf)
5567{
5568 return 0;
5569}
5570
5571static ssize_t validate_store(struct kmem_cache *s,
5572 const char *buf, size_t length)
5573{
5574 int ret = -EINVAL;
5575
5576 if (buf[0] == '1') {
5577 ret = validate_slab_cache(s);
5578 if (ret >= 0)
5579 ret = length;
5580 }
5581 return ret;
5582}
5583SLAB_ATTR(validate);
5584
5585#endif
5586
5587#ifdef CONFIG_FAILSLAB
5588static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5589{
5590 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5591}
5592SLAB_ATTR_RO(failslab);
5593#endif
5594
5595static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5596{
5597 return 0;
5598}
5599
5600static ssize_t shrink_store(struct kmem_cache *s,
5601 const char *buf, size_t length)
5602{
5603 if (buf[0] == '1')
5604 kmem_cache_shrink(s);
5605 else
5606 return -EINVAL;
5607 return length;
5608}
5609SLAB_ATTR(shrink);
5610
5611#ifdef CONFIG_NUMA
5612static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5613{
5614 return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5615}
5616
5617static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5618 const char *buf, size_t length)
5619{
5620 unsigned int ratio;
5621 int err;
5622
5623 err = kstrtouint(buf, 10, &ratio);
5624 if (err)
5625 return err;
5626 if (ratio > 100)
5627 return -ERANGE;
5628
5629 s->remote_node_defrag_ratio = ratio * 10;
5630
5631 return length;
5632}
5633SLAB_ATTR(remote_node_defrag_ratio);
5634#endif
5635
5636#ifdef CONFIG_SLUB_STATS
5637static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5638{
5639 unsigned long sum = 0;
5640 int cpu;
5641 int len = 0;
5642 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5643
5644 if (!data)
5645 return -ENOMEM;
5646
5647 for_each_online_cpu(cpu) {
5648 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5649
5650 data[cpu] = x;
5651 sum += x;
5652 }
5653
5654 len += sysfs_emit_at(buf, len, "%lu", sum);
5655
5656#ifdef CONFIG_SMP
5657 for_each_online_cpu(cpu) {
5658 if (data[cpu])
5659 len += sysfs_emit_at(buf, len, " C%d=%u",
5660 cpu, data[cpu]);
5661 }
5662#endif
5663 kfree(data);
5664 len += sysfs_emit_at(buf, len, "\n");
5665
5666 return len;
5667}
5668
5669static void clear_stat(struct kmem_cache *s, enum stat_item si)
5670{
5671 int cpu;
5672
5673 for_each_online_cpu(cpu)
5674 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5675}
5676
5677#define STAT_ATTR(si, text) \
5678static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5679{ \
5680 return show_stat(s, buf, si); \
5681} \
5682static ssize_t text##_store(struct kmem_cache *s, \
5683 const char *buf, size_t length) \
5684{ \
5685 if (buf[0] != '0') \
5686 return -EINVAL; \
5687 clear_stat(s, si); \
5688 return length; \
5689} \
5690SLAB_ATTR(text); \
5691
5692STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5693STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5694STAT_ATTR(FREE_FASTPATH, free_fastpath);
5695STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5696STAT_ATTR(FREE_FROZEN, free_frozen);
5697STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5698STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5699STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5700STAT_ATTR(ALLOC_SLAB, alloc_slab);
5701STAT_ATTR(ALLOC_REFILL, alloc_refill);
5702STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5703STAT_ATTR(FREE_SLAB, free_slab);
5704STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5705STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5706STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5707STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5708STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5709STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5710STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5711STAT_ATTR(ORDER_FALLBACK, order_fallback);
5712STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5713STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5714STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5715STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5716STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5717STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5718#endif
5719
5720static struct attribute *slab_attrs[] = {
5721 &slab_size_attr.attr,
5722 &object_size_attr.attr,
5723 &objs_per_slab_attr.attr,
5724 &order_attr.attr,
5725 &min_partial_attr.attr,
5726 &cpu_partial_attr.attr,
5727 &objects_attr.attr,
5728 &objects_partial_attr.attr,
5729 &partial_attr.attr,
5730 &cpu_slabs_attr.attr,
5731 &ctor_attr.attr,
5732 &aliases_attr.attr,
5733 &align_attr.attr,
5734 &hwcache_align_attr.attr,
5735 &reclaim_account_attr.attr,
5736 &destroy_by_rcu_attr.attr,
5737 &shrink_attr.attr,
5738 &slabs_cpu_partial_attr.attr,
5739#ifdef CONFIG_SLUB_DEBUG
5740 &total_objects_attr.attr,
5741 &slabs_attr.attr,
5742 &sanity_checks_attr.attr,
5743 &trace_attr.attr,
5744 &red_zone_attr.attr,
5745 &poison_attr.attr,
5746 &store_user_attr.attr,
5747 &validate_attr.attr,
5748#endif
5749#ifdef CONFIG_ZONE_DMA
5750 &cache_dma_attr.attr,
5751#endif
5752#ifdef CONFIG_NUMA
5753 &remote_node_defrag_ratio_attr.attr,
5754#endif
5755#ifdef CONFIG_SLUB_STATS
5756 &alloc_fastpath_attr.attr,
5757 &alloc_slowpath_attr.attr,
5758 &free_fastpath_attr.attr,
5759 &free_slowpath_attr.attr,
5760 &free_frozen_attr.attr,
5761 &free_add_partial_attr.attr,
5762 &free_remove_partial_attr.attr,
5763 &alloc_from_partial_attr.attr,
5764 &alloc_slab_attr.attr,
5765 &alloc_refill_attr.attr,
5766 &alloc_node_mismatch_attr.attr,
5767 &free_slab_attr.attr,
5768 &cpuslab_flush_attr.attr,
5769 &deactivate_full_attr.attr,
5770 &deactivate_empty_attr.attr,
5771 &deactivate_to_head_attr.attr,
5772 &deactivate_to_tail_attr.attr,
5773 &deactivate_remote_frees_attr.attr,
5774 &deactivate_bypass_attr.attr,
5775 &order_fallback_attr.attr,
5776 &cmpxchg_double_fail_attr.attr,
5777 &cmpxchg_double_cpu_fail_attr.attr,
5778 &cpu_partial_alloc_attr.attr,
5779 &cpu_partial_free_attr.attr,
5780 &cpu_partial_node_attr.attr,
5781 &cpu_partial_drain_attr.attr,
5782#endif
5783#ifdef CONFIG_FAILSLAB
5784 &failslab_attr.attr,
5785#endif
5786 &usersize_attr.attr,
5787
5788 NULL
5789};
5790
5791static const struct attribute_group slab_attr_group = {
5792 .attrs = slab_attrs,
5793};
5794
5795static ssize_t slab_attr_show(struct kobject *kobj,
5796 struct attribute *attr,
5797 char *buf)
5798{
5799 struct slab_attribute *attribute;
5800 struct kmem_cache *s;
5801 int err;
5802
5803 attribute = to_slab_attr(attr);
5804 s = to_slab(kobj);
5805
5806 if (!attribute->show)
5807 return -EIO;
5808
5809 err = attribute->show(s, buf);
5810
5811 return err;
5812}
5813
5814static ssize_t slab_attr_store(struct kobject *kobj,
5815 struct attribute *attr,
5816 const char *buf, size_t len)
5817{
5818 struct slab_attribute *attribute;
5819 struct kmem_cache *s;
5820 int err;
5821
5822 attribute = to_slab_attr(attr);
5823 s = to_slab(kobj);
5824
5825 if (!attribute->store)
5826 return -EIO;
5827
5828 err = attribute->store(s, buf, len);
5829 return err;
5830}
5831
5832static void kmem_cache_release(struct kobject *k)
5833{
5834 slab_kmem_cache_release(to_slab(k));
5835}
5836
5837static const struct sysfs_ops slab_sysfs_ops = {
5838 .show = slab_attr_show,
5839 .store = slab_attr_store,
5840};
5841
5842static struct kobj_type slab_ktype = {
5843 .sysfs_ops = &slab_sysfs_ops,
5844 .release = kmem_cache_release,
5845};
5846
5847static struct kset *slab_kset;
5848
5849static inline struct kset *cache_kset(struct kmem_cache *s)
5850{
5851 return slab_kset;
5852}
5853
5854#define ID_STR_LENGTH 64
5855
5856
5857
5858
5859
5860static char *create_unique_id(struct kmem_cache *s)
5861{
5862 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5863 char *p = name;
5864
5865 BUG_ON(!name);
5866
5867 *p++ = ':';
5868
5869
5870
5871
5872
5873
5874
5875 if (s->flags & SLAB_CACHE_DMA)
5876 *p++ = 'd';
5877 if (s->flags & SLAB_CACHE_DMA32)
5878 *p++ = 'D';
5879 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5880 *p++ = 'a';
5881 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5882 *p++ = 'F';
5883 if (s->flags & SLAB_ACCOUNT)
5884 *p++ = 'A';
5885 if (p != name + 1)
5886 *p++ = '-';
5887 p += sprintf(p, "%07u", s->size);
5888
5889 BUG_ON(p > name + ID_STR_LENGTH - 1);
5890 return name;
5891}
5892
5893static int sysfs_slab_add(struct kmem_cache *s)
5894{
5895 int err;
5896 const char *name;
5897 struct kset *kset = cache_kset(s);
5898 int unmergeable = slab_unmergeable(s);
5899
5900 if (!kset) {
5901 kobject_init(&s->kobj, &slab_ktype);
5902 return 0;
5903 }
5904
5905 if (!unmergeable && disable_higher_order_debug &&
5906 (slub_debug & DEBUG_METADATA_FLAGS))
5907 unmergeable = 1;
5908
5909 if (unmergeable) {
5910
5911
5912
5913
5914
5915 sysfs_remove_link(&slab_kset->kobj, s->name);
5916 name = s->name;
5917 } else {
5918
5919
5920
5921
5922 name = create_unique_id(s);
5923 }
5924
5925 s->kobj.kset = kset;
5926 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5927 if (err)
5928 goto out;
5929
5930 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5931 if (err)
5932 goto out_del_kobj;
5933
5934 if (!unmergeable) {
5935
5936 sysfs_slab_alias(s, s->name);
5937 }
5938out:
5939 if (!unmergeable)
5940 kfree(name);
5941 return err;
5942out_del_kobj:
5943 kobject_del(&s->kobj);
5944 goto out;
5945}
5946
5947void sysfs_slab_unlink(struct kmem_cache *s)
5948{
5949 if (slab_state >= FULL)
5950 kobject_del(&s->kobj);
5951}
5952
5953void sysfs_slab_release(struct kmem_cache *s)
5954{
5955 if (slab_state >= FULL)
5956 kobject_put(&s->kobj);
5957}
5958
5959
5960
5961
5962
5963struct saved_alias {
5964 struct kmem_cache *s;
5965 const char *name;
5966 struct saved_alias *next;
5967};
5968
5969static struct saved_alias *alias_list;
5970
5971static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5972{
5973 struct saved_alias *al;
5974
5975 if (slab_state == FULL) {
5976
5977
5978
5979 sysfs_remove_link(&slab_kset->kobj, name);
5980 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5981 }
5982
5983 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5984 if (!al)
5985 return -ENOMEM;
5986
5987 al->s = s;
5988 al->name = name;
5989 al->next = alias_list;
5990 alias_list = al;
5991 return 0;
5992}
5993
5994static int __init slab_sysfs_init(void)
5995{
5996 struct kmem_cache *s;
5997 int err;
5998
5999 mutex_lock(&slab_mutex);
6000
6001 slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
6002 if (!slab_kset) {
6003 mutex_unlock(&slab_mutex);
6004 pr_err("Cannot register slab subsystem.\n");
6005 return -ENOSYS;
6006 }
6007
6008 slab_state = FULL;
6009
6010 list_for_each_entry(s, &slab_caches, list) {
6011 err = sysfs_slab_add(s);
6012 if (err)
6013 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
6014 s->name);
6015 }
6016
6017 while (alias_list) {
6018 struct saved_alias *al = alias_list;
6019
6020 alias_list = alias_list->next;
6021 err = sysfs_slab_alias(al->s, al->name);
6022 if (err)
6023 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
6024 al->name);
6025 kfree(al);
6026 }
6027
6028 mutex_unlock(&slab_mutex);
6029 return 0;
6030}
6031
6032__initcall(slab_sysfs_init);
6033#endif
6034
6035#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
6036static int slab_debugfs_show(struct seq_file *seq, void *v)
6037{
6038
6039 struct location *l;
6040 unsigned int idx = *(unsigned int *)v;
6041 struct loc_track *t = seq->private;
6042
6043 if (idx < t->count) {
6044 l = &t->loc[idx];
6045
6046 seq_printf(seq, "%7ld ", l->count);
6047
6048 if (l->addr)
6049 seq_printf(seq, "%pS", (void *)l->addr);
6050 else
6051 seq_puts(seq, "<not-available>");
6052
6053 if (l->sum_time != l->min_time) {
6054 seq_printf(seq, " age=%ld/%llu/%ld",
6055 l->min_time, div_u64(l->sum_time, l->count),
6056 l->max_time);
6057 } else
6058 seq_printf(seq, " age=%ld", l->min_time);
6059
6060 if (l->min_pid != l->max_pid)
6061 seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
6062 else
6063 seq_printf(seq, " pid=%ld",
6064 l->min_pid);
6065
6066 if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
6067 seq_printf(seq, " cpus=%*pbl",
6068 cpumask_pr_args(to_cpumask(l->cpus)));
6069
6070 if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
6071 seq_printf(seq, " nodes=%*pbl",
6072 nodemask_pr_args(&l->nodes));
6073
6074 seq_puts(seq, "\n");
6075 }
6076
6077 if (!idx && !t->count)
6078 seq_puts(seq, "No data\n");
6079
6080 return 0;
6081}
6082
6083static void slab_debugfs_stop(struct seq_file *seq, void *v)
6084{
6085}
6086
6087static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
6088{
6089 struct loc_track *t = seq->private;
6090
6091 v = ppos;
6092 ++*ppos;
6093 if (*ppos <= t->count)
6094 return v;
6095
6096 return NULL;
6097}
6098
6099static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
6100{
6101 return ppos;
6102}
6103
6104static const struct seq_operations slab_debugfs_sops = {
6105 .start = slab_debugfs_start,
6106 .next = slab_debugfs_next,
6107 .stop = slab_debugfs_stop,
6108 .show = slab_debugfs_show,
6109};
6110
6111static int slab_debug_trace_open(struct inode *inode, struct file *filep)
6112{
6113
6114 struct kmem_cache_node *n;
6115 enum track_item alloc;
6116 int node;
6117 struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
6118 sizeof(struct loc_track));
6119 struct kmem_cache *s = file_inode(filep)->i_private;
6120 unsigned long *obj_map;
6121
6122 if (!t)
6123 return -ENOMEM;
6124
6125 obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
6126 if (!obj_map) {
6127 seq_release_private(inode, filep);
6128 return -ENOMEM;
6129 }
6130
6131 if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
6132 alloc = TRACK_ALLOC;
6133 else
6134 alloc = TRACK_FREE;
6135
6136 if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
6137 bitmap_free(obj_map);
6138 seq_release_private(inode, filep);
6139 return -ENOMEM;
6140 }
6141
6142 for_each_kmem_cache_node(s, node, n) {
6143 unsigned long flags;
6144 struct page *page;
6145
6146 if (!atomic_long_read(&n->nr_slabs))
6147 continue;
6148
6149 spin_lock_irqsave(&n->list_lock, flags);
6150 list_for_each_entry(page, &n->partial, slab_list)
6151 process_slab(t, s, page, alloc, obj_map);
6152 list_for_each_entry(page, &n->full, slab_list)
6153 process_slab(t, s, page, alloc, obj_map);
6154 spin_unlock_irqrestore(&n->list_lock, flags);
6155 }
6156
6157 bitmap_free(obj_map);
6158 return 0;
6159}
6160
6161static int slab_debug_trace_release(struct inode *inode, struct file *file)
6162{
6163 struct seq_file *seq = file->private_data;
6164 struct loc_track *t = seq->private;
6165
6166 free_loc_track(t);
6167 return seq_release_private(inode, file);
6168}
6169
6170static const struct file_operations slab_debugfs_fops = {
6171 .open = slab_debug_trace_open,
6172 .read = seq_read,
6173 .llseek = seq_lseek,
6174 .release = slab_debug_trace_release,
6175};
6176
6177static void debugfs_slab_add(struct kmem_cache *s)
6178{
6179 struct dentry *slab_cache_dir;
6180
6181 if (unlikely(!slab_debugfs_root))
6182 return;
6183
6184 slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
6185
6186 debugfs_create_file("alloc_traces", 0400,
6187 slab_cache_dir, s, &slab_debugfs_fops);
6188
6189 debugfs_create_file("free_traces", 0400,
6190 slab_cache_dir, s, &slab_debugfs_fops);
6191}
6192
6193void debugfs_slab_release(struct kmem_cache *s)
6194{
6195 debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
6196}
6197
6198static int __init slab_debugfs_init(void)
6199{
6200 struct kmem_cache *s;
6201
6202 slab_debugfs_root = debugfs_create_dir("slab", NULL);
6203
6204 list_for_each_entry(s, &slab_caches, list)
6205 if (s->flags & SLAB_STORE_USER)
6206 debugfs_slab_add(s);
6207
6208 return 0;
6209
6210}
6211__initcall(slab_debugfs_init);
6212#endif
6213
6214
6215
6216#ifdef CONFIG_SLUB_DEBUG
6217void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
6218{
6219 unsigned long nr_slabs = 0;
6220 unsigned long nr_objs = 0;
6221 unsigned long nr_free = 0;
6222 int node;
6223 struct kmem_cache_node *n;
6224
6225 for_each_kmem_cache_node(s, node, n) {
6226 nr_slabs += node_nr_slabs(n);
6227 nr_objs += node_nr_objs(n);
6228 nr_free += count_partial(n, count_free);
6229 }
6230
6231 sinfo->active_objs = nr_objs - nr_free;
6232 sinfo->num_objs = nr_objs;
6233 sinfo->active_slabs = nr_slabs;
6234 sinfo->num_slabs = nr_slabs;
6235 sinfo->objects_per_slab = oo_objects(s->oo);
6236 sinfo->cache_order = oo_order(s->oo);
6237}
6238
6239void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
6240{
6241}
6242
6243ssize_t slabinfo_write(struct file *file, const char __user *buffer,
6244 size_t count, loff_t *ppos)
6245{
6246 return -EIO;
6247}
6248#endif
6249