1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/bitops.h>
19#include <linux/slab.h>
20#include "slab.h"
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kasan.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/kfence.h>
31#include <linux/memory.h>
32#include <linux/math64.h>
33#include <linux/fault-inject.h>
34#include <linux/stacktrace.h>
35#include <linux/prefetch.h>
36#include <linux/memcontrol.h>
37#include <linux/random.h>
38
39#include <trace/events/kmem.h>
40
41#include "internal.h"
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118#ifdef CONFIG_SLUB_DEBUG
119#ifdef CONFIG_SLUB_DEBUG_ON
120DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
121#else
122DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
123#endif
124#endif
125
126static inline bool kmem_cache_debug(struct kmem_cache *s)
127{
128 return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
129}
130
131void *fixup_red_left(struct kmem_cache *s, void *p)
132{
133 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
134 p += s->red_left_pad;
135
136 return p;
137}
138
139static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
140{
141#ifdef CONFIG_SLUB_CPU_PARTIAL
142 return !kmem_cache_debug(s);
143#else
144 return false;
145#endif
146}
147
148
149
150
151
152
153
154
155
156
157#undef SLUB_RESILIENCY_TEST
158
159
160#undef SLUB_DEBUG_CMPXCHG
161
162
163
164
165
166#define MIN_PARTIAL 5
167
168
169
170
171
172
173#define MAX_PARTIAL 10
174
175#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
176 SLAB_POISON | SLAB_STORE_USER)
177
178
179
180
181
182#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
183 SLAB_TRACE)
184
185
186
187
188
189
190
191#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
192
193#define OO_SHIFT 16
194#define OO_MASK ((1 << OO_SHIFT) - 1)
195#define MAX_OBJS_PER_PAGE 32767
196
197
198
199#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
200
201#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
202
203
204
205
206#define TRACK_ADDRS_COUNT 16
207struct track {
208 unsigned long addr;
209#ifdef CONFIG_STACKTRACE
210 unsigned long addrs[TRACK_ADDRS_COUNT];
211#endif
212 int cpu;
213 int pid;
214 unsigned long when;
215};
216
217enum track_item { TRACK_ALLOC, TRACK_FREE };
218
219#ifdef CONFIG_SYSFS
220static int sysfs_slab_add(struct kmem_cache *);
221static int sysfs_slab_alias(struct kmem_cache *, const char *);
222#else
223static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
224static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
225 { return 0; }
226#endif
227
228static inline void stat(const struct kmem_cache *s, enum stat_item si)
229{
230#ifdef CONFIG_SLUB_STATS
231
232
233
234
235 raw_cpu_inc(s->cpu_slab->stat[si]);
236#endif
237}
238
239
240
241
242
243
244
245static nodemask_t slab_nodes;
246
247
248
249
250
251
252
253
254
255
256static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
257 unsigned long ptr_addr)
258{
259#ifdef CONFIG_SLAB_FREELIST_HARDENED
260
261
262
263
264
265
266
267
268
269
270 return (void *)((unsigned long)ptr ^ s->random ^
271 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
272#else
273 return ptr;
274#endif
275}
276
277
278static inline void *freelist_dereference(const struct kmem_cache *s,
279 void *ptr_addr)
280{
281 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
282 (unsigned long)ptr_addr);
283}
284
285static inline void *get_freepointer(struct kmem_cache *s, void *object)
286{
287 object = kasan_reset_tag(object);
288 return freelist_dereference(s, object + s->offset);
289}
290
291static void prefetch_freepointer(const struct kmem_cache *s, void *object)
292{
293 prefetch(object + s->offset);
294}
295
296static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
297{
298 unsigned long freepointer_addr;
299 void *p;
300
301 if (!debug_pagealloc_enabled_static())
302 return get_freepointer(s, object);
303
304 freepointer_addr = (unsigned long)object + s->offset;
305 copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
306 return freelist_ptr(s, p, freepointer_addr);
307}
308
309static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
310{
311 unsigned long freeptr_addr = (unsigned long)object + s->offset;
312
313#ifdef CONFIG_SLAB_FREELIST_HARDENED
314 BUG_ON(object == fp);
315#endif
316
317 freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
318 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
319}
320
321
322#define for_each_object(__p, __s, __addr, __objects) \
323 for (__p = fixup_red_left(__s, __addr); \
324 __p < (__addr) + (__objects) * (__s)->size; \
325 __p += (__s)->size)
326
327static inline unsigned int order_objects(unsigned int order, unsigned int size)
328{
329 return ((unsigned int)PAGE_SIZE << order) / size;
330}
331
332static inline struct kmem_cache_order_objects oo_make(unsigned int order,
333 unsigned int size)
334{
335 struct kmem_cache_order_objects x = {
336 (order << OO_SHIFT) + order_objects(order, size)
337 };
338
339 return x;
340}
341
342static inline unsigned int oo_order(struct kmem_cache_order_objects x)
343{
344 return x.x >> OO_SHIFT;
345}
346
347static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
348{
349 return x.x & OO_MASK;
350}
351
352
353
354
355static __always_inline void slab_lock(struct page *page)
356{
357 VM_BUG_ON_PAGE(PageTail(page), page);
358 bit_spin_lock(PG_locked, &page->flags);
359}
360
361static __always_inline void slab_unlock(struct page *page)
362{
363 VM_BUG_ON_PAGE(PageTail(page), page);
364 __bit_spin_unlock(PG_locked, &page->flags);
365}
366
367
368static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
369 void *freelist_old, unsigned long counters_old,
370 void *freelist_new, unsigned long counters_new,
371 const char *n)
372{
373 VM_BUG_ON(!irqs_disabled());
374#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
375 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
376 if (s->flags & __CMPXCHG_DOUBLE) {
377 if (cmpxchg_double(&page->freelist, &page->counters,
378 freelist_old, counters_old,
379 freelist_new, counters_new))
380 return true;
381 } else
382#endif
383 {
384 slab_lock(page);
385 if (page->freelist == freelist_old &&
386 page->counters == counters_old) {
387 page->freelist = freelist_new;
388 page->counters = counters_new;
389 slab_unlock(page);
390 return true;
391 }
392 slab_unlock(page);
393 }
394
395 cpu_relax();
396 stat(s, CMPXCHG_DOUBLE_FAIL);
397
398#ifdef SLUB_DEBUG_CMPXCHG
399 pr_info("%s %s: cmpxchg double redo ", n, s->name);
400#endif
401
402 return false;
403}
404
405static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
406 void *freelist_old, unsigned long counters_old,
407 void *freelist_new, unsigned long counters_new,
408 const char *n)
409{
410#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
411 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
412 if (s->flags & __CMPXCHG_DOUBLE) {
413 if (cmpxchg_double(&page->freelist, &page->counters,
414 freelist_old, counters_old,
415 freelist_new, counters_new))
416 return true;
417 } else
418#endif
419 {
420 unsigned long flags;
421
422 local_irq_save(flags);
423 slab_lock(page);
424 if (page->freelist == freelist_old &&
425 page->counters == counters_old) {
426 page->freelist = freelist_new;
427 page->counters = counters_new;
428 slab_unlock(page);
429 local_irq_restore(flags);
430 return true;
431 }
432 slab_unlock(page);
433 local_irq_restore(flags);
434 }
435
436 cpu_relax();
437 stat(s, CMPXCHG_DOUBLE_FAIL);
438
439#ifdef SLUB_DEBUG_CMPXCHG
440 pr_info("%s %s: cmpxchg double redo ", n, s->name);
441#endif
442
443 return false;
444}
445
446#ifdef CONFIG_SLUB_DEBUG
447static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
448static DEFINE_SPINLOCK(object_map_lock);
449
450
451
452
453
454
455
456static unsigned long *get_map(struct kmem_cache *s, struct page *page)
457 __acquires(&object_map_lock)
458{
459 void *p;
460 void *addr = page_address(page);
461
462 VM_BUG_ON(!irqs_disabled());
463
464 spin_lock(&object_map_lock);
465
466 bitmap_zero(object_map, page->objects);
467
468 for (p = page->freelist; p; p = get_freepointer(s, p))
469 set_bit(__obj_to_index(s, addr, p), object_map);
470
471 return object_map;
472}
473
474static void put_map(unsigned long *map) __releases(&object_map_lock)
475{
476 VM_BUG_ON(map != object_map);
477 spin_unlock(&object_map_lock);
478}
479
480static inline unsigned int size_from_object(struct kmem_cache *s)
481{
482 if (s->flags & SLAB_RED_ZONE)
483 return s->size - s->red_left_pad;
484
485 return s->size;
486}
487
488static inline void *restore_red_left(struct kmem_cache *s, void *p)
489{
490 if (s->flags & SLAB_RED_ZONE)
491 p -= s->red_left_pad;
492
493 return p;
494}
495
496
497
498
499#if defined(CONFIG_SLUB_DEBUG_ON)
500static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
501#else
502static slab_flags_t slub_debug;
503#endif
504
505static char *slub_debug_string;
506static int disable_higher_order_debug;
507
508
509
510
511
512
513
514static inline void metadata_access_enable(void)
515{
516 kasan_disable_current();
517}
518
519static inline void metadata_access_disable(void)
520{
521 kasan_enable_current();
522}
523
524
525
526
527
528
529static inline int check_valid_pointer(struct kmem_cache *s,
530 struct page *page, void *object)
531{
532 void *base;
533
534 if (!object)
535 return 1;
536
537 base = page_address(page);
538 object = kasan_reset_tag(object);
539 object = restore_red_left(s, object);
540 if (object < base || object >= base + page->objects * s->size ||
541 (object - base) % s->size) {
542 return 0;
543 }
544
545 return 1;
546}
547
548static void print_section(char *level, char *text, u8 *addr,
549 unsigned int length)
550{
551 metadata_access_enable();
552 print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS,
553 16, 1, addr, length, 1);
554 metadata_access_disable();
555}
556
557
558
559
560static inline bool freeptr_outside_object(struct kmem_cache *s)
561{
562 return s->offset >= s->inuse;
563}
564
565
566
567
568
569static inline unsigned int get_info_end(struct kmem_cache *s)
570{
571 if (freeptr_outside_object(s))
572 return s->inuse + sizeof(void *);
573 else
574 return s->inuse;
575}
576
577static struct track *get_track(struct kmem_cache *s, void *object,
578 enum track_item alloc)
579{
580 struct track *p;
581
582 p = object + get_info_end(s);
583
584 return kasan_reset_tag(p + alloc);
585}
586
587static void set_track(struct kmem_cache *s, void *object,
588 enum track_item alloc, unsigned long addr)
589{
590 struct track *p = get_track(s, object, alloc);
591
592 if (addr) {
593#ifdef CONFIG_STACKTRACE
594 unsigned int nr_entries;
595
596 metadata_access_enable();
597 nr_entries = stack_trace_save(kasan_reset_tag(p->addrs),
598 TRACK_ADDRS_COUNT, 3);
599 metadata_access_disable();
600
601 if (nr_entries < TRACK_ADDRS_COUNT)
602 p->addrs[nr_entries] = 0;
603#endif
604 p->addr = addr;
605 p->cpu = smp_processor_id();
606 p->pid = current->pid;
607 p->when = jiffies;
608 } else {
609 memset(p, 0, sizeof(struct track));
610 }
611}
612
613static void init_tracking(struct kmem_cache *s, void *object)
614{
615 if (!(s->flags & SLAB_STORE_USER))
616 return;
617
618 set_track(s, object, TRACK_FREE, 0UL);
619 set_track(s, object, TRACK_ALLOC, 0UL);
620}
621
622static void print_track(const char *s, struct track *t, unsigned long pr_time)
623{
624 if (!t->addr)
625 return;
626
627 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
628 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
629#ifdef CONFIG_STACKTRACE
630 {
631 int i;
632 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
633 if (t->addrs[i])
634 pr_err("\t%pS\n", (void *)t->addrs[i]);
635 else
636 break;
637 }
638#endif
639}
640
641void print_tracking(struct kmem_cache *s, void *object)
642{
643 unsigned long pr_time = jiffies;
644 if (!(s->flags & SLAB_STORE_USER))
645 return;
646
647 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
648 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
649}
650
651static void print_page_info(struct page *page)
652{
653 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
654 page, page->objects, page->inuse, page->freelist, page->flags);
655
656}
657
658static void slab_bug(struct kmem_cache *s, char *fmt, ...)
659{
660 struct va_format vaf;
661 va_list args;
662
663 va_start(args, fmt);
664 vaf.fmt = fmt;
665 vaf.va = &args;
666 pr_err("=============================================================================\n");
667 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
668 pr_err("-----------------------------------------------------------------------------\n\n");
669
670 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
671 va_end(args);
672}
673
674static void slab_fix(struct kmem_cache *s, char *fmt, ...)
675{
676 struct va_format vaf;
677 va_list args;
678
679 va_start(args, fmt);
680 vaf.fmt = fmt;
681 vaf.va = &args;
682 pr_err("FIX %s: %pV\n", s->name, &vaf);
683 va_end(args);
684}
685
686static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
687 void **freelist, void *nextfree)
688{
689 if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
690 !check_valid_pointer(s, page, nextfree) && freelist) {
691 object_err(s, page, *freelist, "Freechain corrupt");
692 *freelist = NULL;
693 slab_fix(s, "Isolate corrupted freechain");
694 return true;
695 }
696
697 return false;
698}
699
700static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
701{
702 unsigned int off;
703 u8 *addr = page_address(page);
704
705 print_tracking(s, p);
706
707 print_page_info(page);
708
709 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
710 p, p - addr, get_freepointer(s, p));
711
712 if (s->flags & SLAB_RED_ZONE)
713 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
714 s->red_left_pad);
715 else if (p > addr + 16)
716 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
717
718 print_section(KERN_ERR, "Object ", p,
719 min_t(unsigned int, s->object_size, PAGE_SIZE));
720 if (s->flags & SLAB_RED_ZONE)
721 print_section(KERN_ERR, "Redzone ", p + s->object_size,
722 s->inuse - s->object_size);
723
724 off = get_info_end(s);
725
726 if (s->flags & SLAB_STORE_USER)
727 off += 2 * sizeof(struct track);
728
729 off += kasan_metadata_size(s);
730
731 if (off != size_from_object(s))
732
733 print_section(KERN_ERR, "Padding ", p + off,
734 size_from_object(s) - off);
735
736 dump_stack();
737}
738
739void object_err(struct kmem_cache *s, struct page *page,
740 u8 *object, char *reason)
741{
742 slab_bug(s, "%s", reason);
743 print_trailer(s, page, object);
744}
745
746static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
747 const char *fmt, ...)
748{
749 va_list args;
750 char buf[100];
751
752 va_start(args, fmt);
753 vsnprintf(buf, sizeof(buf), fmt, args);
754 va_end(args);
755 slab_bug(s, "%s", buf);
756 print_page_info(page);
757 dump_stack();
758}
759
760static void init_object(struct kmem_cache *s, void *object, u8 val)
761{
762 u8 *p = kasan_reset_tag(object);
763
764 if (s->flags & SLAB_RED_ZONE)
765 memset(p - s->red_left_pad, val, s->red_left_pad);
766
767 if (s->flags & __OBJECT_POISON) {
768 memset(p, POISON_FREE, s->object_size - 1);
769 p[s->object_size - 1] = POISON_END;
770 }
771
772 if (s->flags & SLAB_RED_ZONE)
773 memset(p + s->object_size, val, s->inuse - s->object_size);
774}
775
776static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
777 void *from, void *to)
778{
779 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
780 memset(from, data, to - from);
781}
782
783static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
784 u8 *object, char *what,
785 u8 *start, unsigned int value, unsigned int bytes)
786{
787 u8 *fault;
788 u8 *end;
789 u8 *addr = page_address(page);
790
791 metadata_access_enable();
792 fault = memchr_inv(kasan_reset_tag(start), value, bytes);
793 metadata_access_disable();
794 if (!fault)
795 return 1;
796
797 end = start + bytes;
798 while (end > fault && end[-1] == value)
799 end--;
800
801 slab_bug(s, "%s overwritten", what);
802 pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
803 fault, end - 1, fault - addr,
804 fault[0], value);
805 print_trailer(s, page, object);
806
807 restore_bytes(s, what, value, fault, end);
808 return 0;
809}
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
850{
851 unsigned long off = get_info_end(s);
852
853 if (s->flags & SLAB_STORE_USER)
854
855 off += 2 * sizeof(struct track);
856
857 off += kasan_metadata_size(s);
858
859 if (size_from_object(s) == off)
860 return 1;
861
862 return check_bytes_and_report(s, page, p, "Object padding",
863 p + off, POISON_INUSE, size_from_object(s) - off);
864}
865
866
867static int slab_pad_check(struct kmem_cache *s, struct page *page)
868{
869 u8 *start;
870 u8 *fault;
871 u8 *end;
872 u8 *pad;
873 int length;
874 int remainder;
875
876 if (!(s->flags & SLAB_POISON))
877 return 1;
878
879 start = page_address(page);
880 length = page_size(page);
881 end = start + length;
882 remainder = length % s->size;
883 if (!remainder)
884 return 1;
885
886 pad = end - remainder;
887 metadata_access_enable();
888 fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
889 metadata_access_disable();
890 if (!fault)
891 return 1;
892 while (end > fault && end[-1] == POISON_INUSE)
893 end--;
894
895 slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
896 fault, end - 1, fault - start);
897 print_section(KERN_ERR, "Padding ", pad, remainder);
898
899 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
900 return 0;
901}
902
903static int check_object(struct kmem_cache *s, struct page *page,
904 void *object, u8 val)
905{
906 u8 *p = object;
907 u8 *endobject = object + s->object_size;
908
909 if (s->flags & SLAB_RED_ZONE) {
910 if (!check_bytes_and_report(s, page, object, "Redzone",
911 object - s->red_left_pad, val, s->red_left_pad))
912 return 0;
913
914 if (!check_bytes_and_report(s, page, object, "Redzone",
915 endobject, val, s->inuse - s->object_size))
916 return 0;
917 } else {
918 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
919 check_bytes_and_report(s, page, p, "Alignment padding",
920 endobject, POISON_INUSE,
921 s->inuse - s->object_size);
922 }
923 }
924
925 if (s->flags & SLAB_POISON) {
926 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
927 (!check_bytes_and_report(s, page, p, "Poison", p,
928 POISON_FREE, s->object_size - 1) ||
929 !check_bytes_and_report(s, page, p, "Poison",
930 p + s->object_size - 1, POISON_END, 1)))
931 return 0;
932
933
934
935 check_pad_bytes(s, page, p);
936 }
937
938 if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
939
940
941
942
943 return 1;
944
945
946 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
947 object_err(s, page, p, "Freepointer corrupt");
948
949
950
951
952
953 set_freepointer(s, p, NULL);
954 return 0;
955 }
956 return 1;
957}
958
959static int check_slab(struct kmem_cache *s, struct page *page)
960{
961 int maxobj;
962
963 VM_BUG_ON(!irqs_disabled());
964
965 if (!PageSlab(page)) {
966 slab_err(s, page, "Not a valid slab page");
967 return 0;
968 }
969
970 maxobj = order_objects(compound_order(page), s->size);
971 if (page->objects > maxobj) {
972 slab_err(s, page, "objects %u > max %u",
973 page->objects, maxobj);
974 return 0;
975 }
976 if (page->inuse > page->objects) {
977 slab_err(s, page, "inuse %u > max %u",
978 page->inuse, page->objects);
979 return 0;
980 }
981
982 slab_pad_check(s, page);
983 return 1;
984}
985
986
987
988
989
990static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
991{
992 int nr = 0;
993 void *fp;
994 void *object = NULL;
995 int max_objects;
996
997 fp = page->freelist;
998 while (fp && nr <= page->objects) {
999 if (fp == search)
1000 return 1;
1001 if (!check_valid_pointer(s, page, fp)) {
1002 if (object) {
1003 object_err(s, page, object,
1004 "Freechain corrupt");
1005 set_freepointer(s, object, NULL);
1006 } else {
1007 slab_err(s, page, "Freepointer corrupt");
1008 page->freelist = NULL;
1009 page->inuse = page->objects;
1010 slab_fix(s, "Freelist cleared");
1011 return 0;
1012 }
1013 break;
1014 }
1015 object = fp;
1016 fp = get_freepointer(s, object);
1017 nr++;
1018 }
1019
1020 max_objects = order_objects(compound_order(page), s->size);
1021 if (max_objects > MAX_OBJS_PER_PAGE)
1022 max_objects = MAX_OBJS_PER_PAGE;
1023
1024 if (page->objects != max_objects) {
1025 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1026 page->objects, max_objects);
1027 page->objects = max_objects;
1028 slab_fix(s, "Number of objects adjusted.");
1029 }
1030 if (page->inuse != page->objects - nr) {
1031 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1032 page->inuse, page->objects - nr);
1033 page->inuse = page->objects - nr;
1034 slab_fix(s, "Object count adjusted.");
1035 }
1036 return search == NULL;
1037}
1038
1039static void trace(struct kmem_cache *s, struct page *page, void *object,
1040 int alloc)
1041{
1042 if (s->flags & SLAB_TRACE) {
1043 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1044 s->name,
1045 alloc ? "alloc" : "free",
1046 object, page->inuse,
1047 page->freelist);
1048
1049 if (!alloc)
1050 print_section(KERN_INFO, "Object ", (void *)object,
1051 s->object_size);
1052
1053 dump_stack();
1054 }
1055}
1056
1057
1058
1059
1060static void add_full(struct kmem_cache *s,
1061 struct kmem_cache_node *n, struct page *page)
1062{
1063 if (!(s->flags & SLAB_STORE_USER))
1064 return;
1065
1066 lockdep_assert_held(&n->list_lock);
1067 list_add(&page->slab_list, &n->full);
1068}
1069
1070static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1071{
1072 if (!(s->flags & SLAB_STORE_USER))
1073 return;
1074
1075 lockdep_assert_held(&n->list_lock);
1076 list_del(&page->slab_list);
1077}
1078
1079
1080static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1081{
1082 struct kmem_cache_node *n = get_node(s, node);
1083
1084 return atomic_long_read(&n->nr_slabs);
1085}
1086
1087static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1088{
1089 return atomic_long_read(&n->nr_slabs);
1090}
1091
1092static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1093{
1094 struct kmem_cache_node *n = get_node(s, node);
1095
1096
1097
1098
1099
1100
1101
1102 if (likely(n)) {
1103 atomic_long_inc(&n->nr_slabs);
1104 atomic_long_add(objects, &n->total_objects);
1105 }
1106}
1107static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1108{
1109 struct kmem_cache_node *n = get_node(s, node);
1110
1111 atomic_long_dec(&n->nr_slabs);
1112 atomic_long_sub(objects, &n->total_objects);
1113}
1114
1115
1116static void setup_object_debug(struct kmem_cache *s, struct page *page,
1117 void *object)
1118{
1119 if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1120 return;
1121
1122 init_object(s, object, SLUB_RED_INACTIVE);
1123 init_tracking(s, object);
1124}
1125
1126static
1127void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1128{
1129 if (!kmem_cache_debug_flags(s, SLAB_POISON))
1130 return;
1131
1132 metadata_access_enable();
1133 memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
1134 metadata_access_disable();
1135}
1136
1137static inline int alloc_consistency_checks(struct kmem_cache *s,
1138 struct page *page, void *object)
1139{
1140 if (!check_slab(s, page))
1141 return 0;
1142
1143 if (!check_valid_pointer(s, page, object)) {
1144 object_err(s, page, object, "Freelist Pointer check fails");
1145 return 0;
1146 }
1147
1148 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1149 return 0;
1150
1151 return 1;
1152}
1153
1154static noinline int alloc_debug_processing(struct kmem_cache *s,
1155 struct page *page,
1156 void *object, unsigned long addr)
1157{
1158 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1159 if (!alloc_consistency_checks(s, page, object))
1160 goto bad;
1161 }
1162
1163
1164 if (s->flags & SLAB_STORE_USER)
1165 set_track(s, object, TRACK_ALLOC, addr);
1166 trace(s, page, object, 1);
1167 init_object(s, object, SLUB_RED_ACTIVE);
1168 return 1;
1169
1170bad:
1171 if (PageSlab(page)) {
1172
1173
1174
1175
1176
1177 slab_fix(s, "Marking all objects used");
1178 page->inuse = page->objects;
1179 page->freelist = NULL;
1180 }
1181 return 0;
1182}
1183
1184static inline int free_consistency_checks(struct kmem_cache *s,
1185 struct page *page, void *object, unsigned long addr)
1186{
1187 if (!check_valid_pointer(s, page, object)) {
1188 slab_err(s, page, "Invalid object pointer 0x%p", object);
1189 return 0;
1190 }
1191
1192 if (on_freelist(s, page, object)) {
1193 object_err(s, page, object, "Object already free");
1194 return 0;
1195 }
1196
1197 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1198 return 0;
1199
1200 if (unlikely(s != page->slab_cache)) {
1201 if (!PageSlab(page)) {
1202 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1203 object);
1204 } else if (!page->slab_cache) {
1205 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1206 object);
1207 dump_stack();
1208 } else
1209 object_err(s, page, object,
1210 "page slab pointer corrupt.");
1211 return 0;
1212 }
1213 return 1;
1214}
1215
1216
1217static noinline int free_debug_processing(
1218 struct kmem_cache *s, struct page *page,
1219 void *head, void *tail, int bulk_cnt,
1220 unsigned long addr)
1221{
1222 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1223 void *object = head;
1224 int cnt = 0;
1225 unsigned long flags;
1226 int ret = 0;
1227
1228 spin_lock_irqsave(&n->list_lock, flags);
1229 slab_lock(page);
1230
1231 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1232 if (!check_slab(s, page))
1233 goto out;
1234 }
1235
1236next_object:
1237 cnt++;
1238
1239 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1240 if (!free_consistency_checks(s, page, object, addr))
1241 goto out;
1242 }
1243
1244 if (s->flags & SLAB_STORE_USER)
1245 set_track(s, object, TRACK_FREE, addr);
1246 trace(s, page, object, 0);
1247
1248 init_object(s, object, SLUB_RED_INACTIVE);
1249
1250
1251 if (object != tail) {
1252 object = get_freepointer(s, object);
1253 goto next_object;
1254 }
1255 ret = 1;
1256
1257out:
1258 if (cnt != bulk_cnt)
1259 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1260 bulk_cnt, cnt);
1261
1262 slab_unlock(page);
1263 spin_unlock_irqrestore(&n->list_lock, flags);
1264 if (!ret)
1265 slab_fix(s, "Object at 0x%p not freed", object);
1266 return ret;
1267}
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279static char *
1280parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1281{
1282 bool higher_order_disable = false;
1283
1284
1285 while (*str && *str == ';')
1286 str++;
1287
1288 if (*str == ',') {
1289
1290
1291
1292
1293 *flags = DEBUG_DEFAULT_FLAGS;
1294 goto check_slabs;
1295 }
1296 *flags = 0;
1297
1298
1299 for (; *str && *str != ',' && *str != ';'; str++) {
1300 switch (tolower(*str)) {
1301 case '-':
1302 *flags = 0;
1303 break;
1304 case 'f':
1305 *flags |= SLAB_CONSISTENCY_CHECKS;
1306 break;
1307 case 'z':
1308 *flags |= SLAB_RED_ZONE;
1309 break;
1310 case 'p':
1311 *flags |= SLAB_POISON;
1312 break;
1313 case 'u':
1314 *flags |= SLAB_STORE_USER;
1315 break;
1316 case 't':
1317 *flags |= SLAB_TRACE;
1318 break;
1319 case 'a':
1320 *flags |= SLAB_FAILSLAB;
1321 break;
1322 case 'o':
1323
1324
1325
1326
1327 higher_order_disable = true;
1328 break;
1329 default:
1330 if (init)
1331 pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1332 }
1333 }
1334check_slabs:
1335 if (*str == ',')
1336 *slabs = ++str;
1337 else
1338 *slabs = NULL;
1339
1340
1341 while (*str && *str != ';')
1342 str++;
1343
1344
1345 while (*str && *str == ';')
1346 str++;
1347
1348 if (init && higher_order_disable)
1349 disable_higher_order_debug = 1;
1350
1351 if (*str)
1352 return str;
1353 else
1354 return NULL;
1355}
1356
1357static int __init setup_slub_debug(char *str)
1358{
1359 slab_flags_t flags;
1360 char *saved_str;
1361 char *slab_list;
1362 bool global_slub_debug_changed = false;
1363 bool slab_list_specified = false;
1364
1365 slub_debug = DEBUG_DEFAULT_FLAGS;
1366 if (*str++ != '=' || !*str)
1367
1368
1369
1370 goto out;
1371
1372 saved_str = str;
1373 while (str) {
1374 str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1375
1376 if (!slab_list) {
1377 slub_debug = flags;
1378 global_slub_debug_changed = true;
1379 } else {
1380 slab_list_specified = true;
1381 }
1382 }
1383
1384
1385
1386
1387
1388
1389
1390 if (slab_list_specified) {
1391 if (!global_slub_debug_changed)
1392 slub_debug = 0;
1393 slub_debug_string = saved_str;
1394 }
1395out:
1396 if (slub_debug != 0 || slub_debug_string)
1397 static_branch_enable(&slub_debug_enabled);
1398 if ((static_branch_unlikely(&init_on_alloc) ||
1399 static_branch_unlikely(&init_on_free)) &&
1400 (slub_debug & SLAB_POISON))
1401 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1402 return 1;
1403}
1404
1405__setup("slub_debug", setup_slub_debug);
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418slab_flags_t kmem_cache_flags(unsigned int object_size,
1419 slab_flags_t flags, const char *name)
1420{
1421 char *iter;
1422 size_t len;
1423 char *next_block;
1424 slab_flags_t block_flags;
1425 slab_flags_t slub_debug_local = slub_debug;
1426
1427
1428
1429
1430
1431
1432 if (flags & SLAB_NOLEAKTRACE)
1433 slub_debug_local &= ~SLAB_STORE_USER;
1434
1435 len = strlen(name);
1436 next_block = slub_debug_string;
1437
1438 while (next_block) {
1439 next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1440 if (!iter)
1441 continue;
1442
1443 while (*iter) {
1444 char *end, *glob;
1445 size_t cmplen;
1446
1447 end = strchrnul(iter, ',');
1448 if (next_block && next_block < end)
1449 end = next_block - 1;
1450
1451 glob = strnchr(iter, end - iter, '*');
1452 if (glob)
1453 cmplen = glob - iter;
1454 else
1455 cmplen = max_t(size_t, len, (end - iter));
1456
1457 if (!strncmp(name, iter, cmplen)) {
1458 flags |= block_flags;
1459 return flags;
1460 }
1461
1462 if (!*end || *end == ';')
1463 break;
1464 iter = end + 1;
1465 }
1466 }
1467
1468 return flags | slub_debug_local;
1469}
1470#else
1471static inline void setup_object_debug(struct kmem_cache *s,
1472 struct page *page, void *object) {}
1473static inline
1474void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1475
1476static inline int alloc_debug_processing(struct kmem_cache *s,
1477 struct page *page, void *object, unsigned long addr) { return 0; }
1478
1479static inline int free_debug_processing(
1480 struct kmem_cache *s, struct page *page,
1481 void *head, void *tail, int bulk_cnt,
1482 unsigned long addr) { return 0; }
1483
1484static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1485 { return 1; }
1486static inline int check_object(struct kmem_cache *s, struct page *page,
1487 void *object, u8 val) { return 1; }
1488static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1489 struct page *page) {}
1490static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1491 struct page *page) {}
1492slab_flags_t kmem_cache_flags(unsigned int object_size,
1493 slab_flags_t flags, const char *name)
1494{
1495 return flags;
1496}
1497#define slub_debug 0
1498
1499#define disable_higher_order_debug 0
1500
1501static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1502 { return 0; }
1503static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1504 { return 0; }
1505static inline void inc_slabs_node(struct kmem_cache *s, int node,
1506 int objects) {}
1507static inline void dec_slabs_node(struct kmem_cache *s, int node,
1508 int objects) {}
1509
1510static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
1511 void **freelist, void *nextfree)
1512{
1513 return false;
1514}
1515#endif
1516
1517
1518
1519
1520
1521static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1522{
1523 ptr = kasan_kmalloc_large(ptr, size, flags);
1524
1525 kmemleak_alloc(ptr, size, 1, flags);
1526 return ptr;
1527}
1528
1529static __always_inline void kfree_hook(void *x)
1530{
1531 kmemleak_free(x);
1532 kasan_kfree_large(x);
1533}
1534
1535static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1536{
1537 kmemleak_free_recursive(x, s->flags);
1538
1539
1540
1541
1542
1543
1544#ifdef CONFIG_LOCKDEP
1545 {
1546 unsigned long flags;
1547
1548 local_irq_save(flags);
1549 debug_check_no_locks_freed(x, s->object_size);
1550 local_irq_restore(flags);
1551 }
1552#endif
1553 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1554 debug_check_no_obj_freed(x, s->object_size);
1555
1556
1557 if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1558 __kcsan_check_access(x, s->object_size,
1559 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1560
1561
1562 return kasan_slab_free(s, x);
1563}
1564
1565static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1566 void **head, void **tail)
1567{
1568
1569 void *object;
1570 void *next = *head;
1571 void *old_tail = *tail ? *tail : *head;
1572 int rsize;
1573
1574 if (is_kfence_address(next)) {
1575 slab_free_hook(s, next);
1576 return true;
1577 }
1578
1579
1580 *head = NULL;
1581 *tail = NULL;
1582
1583 do {
1584 object = next;
1585 next = get_freepointer(s, object);
1586
1587 if (slab_want_init_on_free(s)) {
1588
1589
1590
1591
1592 memset(kasan_reset_tag(object), 0, s->object_size);
1593 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1594 : 0;
1595 memset((char *)kasan_reset_tag(object) + s->inuse, 0,
1596 s->size - s->inuse - rsize);
1597
1598 }
1599
1600 if (!slab_free_hook(s, object)) {
1601
1602 set_freepointer(s, object, *head);
1603 *head = object;
1604 if (!*tail)
1605 *tail = object;
1606 }
1607 } while (object != old_tail);
1608
1609 if (*head == *tail)
1610 *tail = NULL;
1611
1612 return *head != NULL;
1613}
1614
1615static void *setup_object(struct kmem_cache *s, struct page *page,
1616 void *object)
1617{
1618 setup_object_debug(s, page, object);
1619 object = kasan_init_slab_obj(s, object);
1620 if (unlikely(s->ctor)) {
1621 kasan_unpoison_object_data(s, object);
1622 s->ctor(object);
1623 kasan_poison_object_data(s, object);
1624 }
1625 return object;
1626}
1627
1628
1629
1630
1631static inline struct page *alloc_slab_page(struct kmem_cache *s,
1632 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1633{
1634 struct page *page;
1635 unsigned int order = oo_order(oo);
1636
1637 if (node == NUMA_NO_NODE)
1638 page = alloc_pages(flags, order);
1639 else
1640 page = __alloc_pages_node(node, flags, order);
1641
1642 return page;
1643}
1644
1645#ifdef CONFIG_SLAB_FREELIST_RANDOM
1646
1647static int init_cache_random_seq(struct kmem_cache *s)
1648{
1649 unsigned int count = oo_objects(s->oo);
1650 int err;
1651
1652
1653 if (s->random_seq)
1654 return 0;
1655
1656 err = cache_random_seq_create(s, count, GFP_KERNEL);
1657 if (err) {
1658 pr_err("SLUB: Unable to initialize free list for %s\n",
1659 s->name);
1660 return err;
1661 }
1662
1663
1664 if (s->random_seq) {
1665 unsigned int i;
1666
1667 for (i = 0; i < count; i++)
1668 s->random_seq[i] *= s->size;
1669 }
1670 return 0;
1671}
1672
1673
1674static void __init init_freelist_randomization(void)
1675{
1676 struct kmem_cache *s;
1677
1678 mutex_lock(&slab_mutex);
1679
1680 list_for_each_entry(s, &slab_caches, list)
1681 init_cache_random_seq(s);
1682
1683 mutex_unlock(&slab_mutex);
1684}
1685
1686
1687static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1688 unsigned long *pos, void *start,
1689 unsigned long page_limit,
1690 unsigned long freelist_count)
1691{
1692 unsigned int idx;
1693
1694
1695
1696
1697
1698 do {
1699 idx = s->random_seq[*pos];
1700 *pos += 1;
1701 if (*pos >= freelist_count)
1702 *pos = 0;
1703 } while (unlikely(idx >= page_limit));
1704
1705 return (char *)start + idx;
1706}
1707
1708
1709static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1710{
1711 void *start;
1712 void *cur;
1713 void *next;
1714 unsigned long idx, pos, page_limit, freelist_count;
1715
1716 if (page->objects < 2 || !s->random_seq)
1717 return false;
1718
1719 freelist_count = oo_objects(s->oo);
1720 pos = get_random_int() % freelist_count;
1721
1722 page_limit = page->objects * s->size;
1723 start = fixup_red_left(s, page_address(page));
1724
1725
1726 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1727 freelist_count);
1728 cur = setup_object(s, page, cur);
1729 page->freelist = cur;
1730
1731 for (idx = 1; idx < page->objects; idx++) {
1732 next = next_freelist_entry(s, page, &pos, start, page_limit,
1733 freelist_count);
1734 next = setup_object(s, page, next);
1735 set_freepointer(s, cur, next);
1736 cur = next;
1737 }
1738 set_freepointer(s, cur, NULL);
1739
1740 return true;
1741}
1742#else
1743static inline int init_cache_random_seq(struct kmem_cache *s)
1744{
1745 return 0;
1746}
1747static inline void init_freelist_randomization(void) { }
1748static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1749{
1750 return false;
1751}
1752#endif
1753
1754static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1755{
1756 struct page *page;
1757 struct kmem_cache_order_objects oo = s->oo;
1758 gfp_t alloc_gfp;
1759 void *start, *p, *next;
1760 int idx;
1761 bool shuffle;
1762
1763 flags &= gfp_allowed_mask;
1764
1765 if (gfpflags_allow_blocking(flags))
1766 local_irq_enable();
1767
1768 flags |= s->allocflags;
1769
1770
1771
1772
1773
1774 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1775 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1776 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1777
1778 page = alloc_slab_page(s, alloc_gfp, node, oo);
1779 if (unlikely(!page)) {
1780 oo = s->min;
1781 alloc_gfp = flags;
1782
1783
1784
1785
1786 page = alloc_slab_page(s, alloc_gfp, node, oo);
1787 if (unlikely(!page))
1788 goto out;
1789 stat(s, ORDER_FALLBACK);
1790 }
1791
1792 page->objects = oo_objects(oo);
1793
1794 account_slab_page(page, oo_order(oo), s, flags);
1795
1796 page->slab_cache = s;
1797 __SetPageSlab(page);
1798 if (page_is_pfmemalloc(page))
1799 SetPageSlabPfmemalloc(page);
1800
1801 kasan_poison_slab(page);
1802
1803 start = page_address(page);
1804
1805 setup_page_debug(s, page, start);
1806
1807 shuffle = shuffle_freelist(s, page);
1808
1809 if (!shuffle) {
1810 start = fixup_red_left(s, start);
1811 start = setup_object(s, page, start);
1812 page->freelist = start;
1813 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1814 next = p + s->size;
1815 next = setup_object(s, page, next);
1816 set_freepointer(s, p, next);
1817 p = next;
1818 }
1819 set_freepointer(s, p, NULL);
1820 }
1821
1822 page->inuse = page->objects;
1823 page->frozen = 1;
1824
1825out:
1826 if (gfpflags_allow_blocking(flags))
1827 local_irq_disable();
1828 if (!page)
1829 return NULL;
1830
1831 inc_slabs_node(s, page_to_nid(page), page->objects);
1832
1833 return page;
1834}
1835
1836static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1837{
1838 if (unlikely(flags & GFP_SLAB_BUG_MASK))
1839 flags = kmalloc_fix_flags(flags);
1840
1841 return allocate_slab(s,
1842 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1843}
1844
1845static void __free_slab(struct kmem_cache *s, struct page *page)
1846{
1847 int order = compound_order(page);
1848 int pages = 1 << order;
1849
1850 if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1851 void *p;
1852
1853 slab_pad_check(s, page);
1854 for_each_object(p, s, page_address(page),
1855 page->objects)
1856 check_object(s, page, p, SLUB_RED_INACTIVE);
1857 }
1858
1859 __ClearPageSlabPfmemalloc(page);
1860 __ClearPageSlab(page);
1861
1862 page->slab_cache = NULL;
1863 if (current->reclaim_state)
1864 current->reclaim_state->reclaimed_slab += pages;
1865 unaccount_slab_page(page, order, s);
1866 __free_pages(page, order);
1867}
1868
1869static void rcu_free_slab(struct rcu_head *h)
1870{
1871 struct page *page = container_of(h, struct page, rcu_head);
1872
1873 __free_slab(page->slab_cache, page);
1874}
1875
1876static void free_slab(struct kmem_cache *s, struct page *page)
1877{
1878 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1879 call_rcu(&page->rcu_head, rcu_free_slab);
1880 } else
1881 __free_slab(s, page);
1882}
1883
1884static void discard_slab(struct kmem_cache *s, struct page *page)
1885{
1886 dec_slabs_node(s, page_to_nid(page), page->objects);
1887 free_slab(s, page);
1888}
1889
1890
1891
1892
1893static inline void
1894__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1895{
1896 n->nr_partial++;
1897 if (tail == DEACTIVATE_TO_TAIL)
1898 list_add_tail(&page->slab_list, &n->partial);
1899 else
1900 list_add(&page->slab_list, &n->partial);
1901}
1902
1903static inline void add_partial(struct kmem_cache_node *n,
1904 struct page *page, int tail)
1905{
1906 lockdep_assert_held(&n->list_lock);
1907 __add_partial(n, page, tail);
1908}
1909
1910static inline void remove_partial(struct kmem_cache_node *n,
1911 struct page *page)
1912{
1913 lockdep_assert_held(&n->list_lock);
1914 list_del(&page->slab_list);
1915 n->nr_partial--;
1916}
1917
1918
1919
1920
1921
1922
1923
1924static inline void *acquire_slab(struct kmem_cache *s,
1925 struct kmem_cache_node *n, struct page *page,
1926 int mode, int *objects)
1927{
1928 void *freelist;
1929 unsigned long counters;
1930 struct page new;
1931
1932 lockdep_assert_held(&n->list_lock);
1933
1934
1935
1936
1937
1938
1939 freelist = page->freelist;
1940 counters = page->counters;
1941 new.counters = counters;
1942 *objects = new.objects - new.inuse;
1943 if (mode) {
1944 new.inuse = page->objects;
1945 new.freelist = NULL;
1946 } else {
1947 new.freelist = freelist;
1948 }
1949
1950 VM_BUG_ON(new.frozen);
1951 new.frozen = 1;
1952
1953 if (!__cmpxchg_double_slab(s, page,
1954 freelist, counters,
1955 new.freelist, new.counters,
1956 "acquire_slab"))
1957 return NULL;
1958
1959 remove_partial(n, page);
1960 WARN_ON(!freelist);
1961 return freelist;
1962}
1963
1964static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1965static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1966
1967
1968
1969
1970static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1971 struct kmem_cache_cpu *c, gfp_t flags)
1972{
1973 struct page *page, *page2;
1974 void *object = NULL;
1975 unsigned int available = 0;
1976 int objects;
1977
1978
1979
1980
1981
1982
1983
1984 if (!n || !n->nr_partial)
1985 return NULL;
1986
1987 spin_lock(&n->list_lock);
1988 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1989 void *t;
1990
1991 if (!pfmemalloc_match(page, flags))
1992 continue;
1993
1994 t = acquire_slab(s, n, page, object == NULL, &objects);
1995 if (!t)
1996 break;
1997
1998 available += objects;
1999 if (!object) {
2000 c->page = page;
2001 stat(s, ALLOC_FROM_PARTIAL);
2002 object = t;
2003 } else {
2004 put_cpu_partial(s, page, 0);
2005 stat(s, CPU_PARTIAL_NODE);
2006 }
2007 if (!kmem_cache_has_cpu_partial(s)
2008 || available > slub_cpu_partial(s) / 2)
2009 break;
2010
2011 }
2012 spin_unlock(&n->list_lock);
2013 return object;
2014}
2015
2016
2017
2018
2019static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2020 struct kmem_cache_cpu *c)
2021{
2022#ifdef CONFIG_NUMA
2023 struct zonelist *zonelist;
2024 struct zoneref *z;
2025 struct zone *zone;
2026 enum zone_type highest_zoneidx = gfp_zone(flags);
2027 void *object;
2028 unsigned int cpuset_mems_cookie;
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048 if (!s->remote_node_defrag_ratio ||
2049 get_cycles() % 1024 > s->remote_node_defrag_ratio)
2050 return NULL;
2051
2052 do {
2053 cpuset_mems_cookie = read_mems_allowed_begin();
2054 zonelist = node_zonelist(mempolicy_slab_node(), flags);
2055 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2056 struct kmem_cache_node *n;
2057
2058 n = get_node(s, zone_to_nid(zone));
2059
2060 if (n && cpuset_zone_allowed(zone, flags) &&
2061 n->nr_partial > s->min_partial) {
2062 object = get_partial_node(s, n, c, flags);
2063 if (object) {
2064
2065
2066
2067
2068
2069
2070
2071 return object;
2072 }
2073 }
2074 }
2075 } while (read_mems_allowed_retry(cpuset_mems_cookie));
2076#endif
2077 return NULL;
2078}
2079
2080
2081
2082
2083static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2084 struct kmem_cache_cpu *c)
2085{
2086 void *object;
2087 int searchnode = node;
2088
2089 if (node == NUMA_NO_NODE)
2090 searchnode = numa_mem_id();
2091
2092 object = get_partial_node(s, get_node(s, searchnode), c, flags);
2093 if (object || node != NUMA_NO_NODE)
2094 return object;
2095
2096 return get_any_partial(s, flags, c);
2097}
2098
2099#ifdef CONFIG_PREEMPTION
2100
2101
2102
2103
2104
2105#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2106#else
2107
2108
2109
2110
2111#define TID_STEP 1
2112#endif
2113
2114static inline unsigned long next_tid(unsigned long tid)
2115{
2116 return tid + TID_STEP;
2117}
2118
2119#ifdef SLUB_DEBUG_CMPXCHG
2120static inline unsigned int tid_to_cpu(unsigned long tid)
2121{
2122 return tid % TID_STEP;
2123}
2124
2125static inline unsigned long tid_to_event(unsigned long tid)
2126{
2127 return tid / TID_STEP;
2128}
2129#endif
2130
2131static inline unsigned int init_tid(int cpu)
2132{
2133 return cpu;
2134}
2135
2136static inline void note_cmpxchg_failure(const char *n,
2137 const struct kmem_cache *s, unsigned long tid)
2138{
2139#ifdef SLUB_DEBUG_CMPXCHG
2140 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2141
2142 pr_info("%s %s: cmpxchg redo ", n, s->name);
2143
2144#ifdef CONFIG_PREEMPTION
2145 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2146 pr_warn("due to cpu change %d -> %d\n",
2147 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2148 else
2149#endif
2150 if (tid_to_event(tid) != tid_to_event(actual_tid))
2151 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2152 tid_to_event(tid), tid_to_event(actual_tid));
2153 else
2154 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2155 actual_tid, tid, next_tid(tid));
2156#endif
2157 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2158}
2159
2160static void init_kmem_cache_cpus(struct kmem_cache *s)
2161{
2162 int cpu;
2163
2164 for_each_possible_cpu(cpu)
2165 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2166}
2167
2168
2169
2170
2171static void deactivate_slab(struct kmem_cache *s, struct page *page,
2172 void *freelist, struct kmem_cache_cpu *c)
2173{
2174 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2175 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2176 int lock = 0, free_delta = 0;
2177 enum slab_modes l = M_NONE, m = M_NONE;
2178 void *nextfree, *freelist_iter, *freelist_tail;
2179 int tail = DEACTIVATE_TO_HEAD;
2180 struct page new;
2181 struct page old;
2182
2183 if (page->freelist) {
2184 stat(s, DEACTIVATE_REMOTE_FREES);
2185 tail = DEACTIVATE_TO_TAIL;
2186 }
2187
2188
2189
2190
2191
2192 freelist_tail = NULL;
2193 freelist_iter = freelist;
2194 while (freelist_iter) {
2195 nextfree = get_freepointer(s, freelist_iter);
2196
2197
2198
2199
2200
2201
2202 if (freelist_corrupted(s, page, &freelist_iter, nextfree))
2203 break;
2204
2205 freelist_tail = freelist_iter;
2206 free_delta++;
2207
2208 freelist_iter = nextfree;
2209 }
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227redo:
2228
2229 old.freelist = READ_ONCE(page->freelist);
2230 old.counters = READ_ONCE(page->counters);
2231 VM_BUG_ON(!old.frozen);
2232
2233
2234 new.counters = old.counters;
2235 if (freelist_tail) {
2236 new.inuse -= free_delta;
2237 set_freepointer(s, freelist_tail, old.freelist);
2238 new.freelist = freelist;
2239 } else
2240 new.freelist = old.freelist;
2241
2242 new.frozen = 0;
2243
2244 if (!new.inuse && n->nr_partial >= s->min_partial)
2245 m = M_FREE;
2246 else if (new.freelist) {
2247 m = M_PARTIAL;
2248 if (!lock) {
2249 lock = 1;
2250
2251
2252
2253
2254
2255 spin_lock(&n->list_lock);
2256 }
2257 } else {
2258 m = M_FULL;
2259 if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
2260 lock = 1;
2261
2262
2263
2264
2265
2266 spin_lock(&n->list_lock);
2267 }
2268 }
2269
2270 if (l != m) {
2271 if (l == M_PARTIAL)
2272 remove_partial(n, page);
2273 else if (l == M_FULL)
2274 remove_full(s, n, page);
2275
2276 if (m == M_PARTIAL)
2277 add_partial(n, page, tail);
2278 else if (m == M_FULL)
2279 add_full(s, n, page);
2280 }
2281
2282 l = m;
2283 if (!__cmpxchg_double_slab(s, page,
2284 old.freelist, old.counters,
2285 new.freelist, new.counters,
2286 "unfreezing slab"))
2287 goto redo;
2288
2289 if (lock)
2290 spin_unlock(&n->list_lock);
2291
2292 if (m == M_PARTIAL)
2293 stat(s, tail);
2294 else if (m == M_FULL)
2295 stat(s, DEACTIVATE_FULL);
2296 else if (m == M_FREE) {
2297 stat(s, DEACTIVATE_EMPTY);
2298 discard_slab(s, page);
2299 stat(s, FREE_SLAB);
2300 }
2301
2302 c->page = NULL;
2303 c->freelist = NULL;
2304}
2305
2306
2307
2308
2309
2310
2311
2312
2313static void unfreeze_partials(struct kmem_cache *s,
2314 struct kmem_cache_cpu *c)
2315{
2316#ifdef CONFIG_SLUB_CPU_PARTIAL
2317 struct kmem_cache_node *n = NULL, *n2 = NULL;
2318 struct page *page, *discard_page = NULL;
2319
2320 while ((page = slub_percpu_partial(c))) {
2321 struct page new;
2322 struct page old;
2323
2324 slub_set_percpu_partial(c, page);
2325
2326 n2 = get_node(s, page_to_nid(page));
2327 if (n != n2) {
2328 if (n)
2329 spin_unlock(&n->list_lock);
2330
2331 n = n2;
2332 spin_lock(&n->list_lock);
2333 }
2334
2335 do {
2336
2337 old.freelist = page->freelist;
2338 old.counters = page->counters;
2339 VM_BUG_ON(!old.frozen);
2340
2341 new.counters = old.counters;
2342 new.freelist = old.freelist;
2343
2344 new.frozen = 0;
2345
2346 } while (!__cmpxchg_double_slab(s, page,
2347 old.freelist, old.counters,
2348 new.freelist, new.counters,
2349 "unfreezing slab"));
2350
2351 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2352 page->next = discard_page;
2353 discard_page = page;
2354 } else {
2355 add_partial(n, page, DEACTIVATE_TO_TAIL);
2356 stat(s, FREE_ADD_PARTIAL);
2357 }
2358 }
2359
2360 if (n)
2361 spin_unlock(&n->list_lock);
2362
2363 while (discard_page) {
2364 page = discard_page;
2365 discard_page = discard_page->next;
2366
2367 stat(s, DEACTIVATE_EMPTY);
2368 discard_slab(s, page);
2369 stat(s, FREE_SLAB);
2370 }
2371#endif
2372}
2373
2374
2375
2376
2377
2378
2379
2380
2381static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2382{
2383#ifdef CONFIG_SLUB_CPU_PARTIAL
2384 struct page *oldpage;
2385 int pages;
2386 int pobjects;
2387
2388 preempt_disable();
2389 do {
2390 pages = 0;
2391 pobjects = 0;
2392 oldpage = this_cpu_read(s->cpu_slab->partial);
2393
2394 if (oldpage) {
2395 pobjects = oldpage->pobjects;
2396 pages = oldpage->pages;
2397 if (drain && pobjects > slub_cpu_partial(s)) {
2398 unsigned long flags;
2399
2400
2401
2402
2403 local_irq_save(flags);
2404 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2405 local_irq_restore(flags);
2406 oldpage = NULL;
2407 pobjects = 0;
2408 pages = 0;
2409 stat(s, CPU_PARTIAL_DRAIN);
2410 }
2411 }
2412
2413 pages++;
2414 pobjects += page->objects - page->inuse;
2415
2416 page->pages = pages;
2417 page->pobjects = pobjects;
2418 page->next = oldpage;
2419
2420 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2421 != oldpage);
2422 if (unlikely(!slub_cpu_partial(s))) {
2423 unsigned long flags;
2424
2425 local_irq_save(flags);
2426 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2427 local_irq_restore(flags);
2428 }
2429 preempt_enable();
2430#endif
2431}
2432
2433static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2434{
2435 stat(s, CPUSLAB_FLUSH);
2436 deactivate_slab(s, c->page, c->freelist, c);
2437
2438 c->tid = next_tid(c->tid);
2439}
2440
2441
2442
2443
2444
2445
2446static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2447{
2448 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2449
2450 if (c->page)
2451 flush_slab(s, c);
2452
2453 unfreeze_partials(s, c);
2454}
2455
2456static void flush_cpu_slab(void *d)
2457{
2458 struct kmem_cache *s = d;
2459
2460 __flush_cpu_slab(s, smp_processor_id());
2461}
2462
2463static bool has_cpu_slab(int cpu, void *info)
2464{
2465 struct kmem_cache *s = info;
2466 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2467
2468 return c->page || slub_percpu_partial(c);
2469}
2470
2471static void flush_all(struct kmem_cache *s)
2472{
2473 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
2474}
2475
2476
2477
2478
2479
2480static int slub_cpu_dead(unsigned int cpu)
2481{
2482 struct kmem_cache *s;
2483 unsigned long flags;
2484
2485 mutex_lock(&slab_mutex);
2486 list_for_each_entry(s, &slab_caches, list) {
2487 local_irq_save(flags);
2488 __flush_cpu_slab(s, cpu);
2489 local_irq_restore(flags);
2490 }
2491 mutex_unlock(&slab_mutex);
2492 return 0;
2493}
2494
2495
2496
2497
2498
2499static inline int node_match(struct page *page, int node)
2500{
2501#ifdef CONFIG_NUMA
2502 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2503 return 0;
2504#endif
2505 return 1;
2506}
2507
2508#ifdef CONFIG_SLUB_DEBUG
2509static int count_free(struct page *page)
2510{
2511 return page->objects - page->inuse;
2512}
2513
2514static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2515{
2516 return atomic_long_read(&n->total_objects);
2517}
2518#endif
2519
2520#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2521static unsigned long count_partial(struct kmem_cache_node *n,
2522 int (*get_count)(struct page *))
2523{
2524 unsigned long flags;
2525 unsigned long x = 0;
2526 struct page *page;
2527
2528 spin_lock_irqsave(&n->list_lock, flags);
2529 list_for_each_entry(page, &n->partial, slab_list)
2530 x += get_count(page);
2531 spin_unlock_irqrestore(&n->list_lock, flags);
2532 return x;
2533}
2534#endif
2535
2536static noinline void
2537slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2538{
2539#ifdef CONFIG_SLUB_DEBUG
2540 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2541 DEFAULT_RATELIMIT_BURST);
2542 int node;
2543 struct kmem_cache_node *n;
2544
2545 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2546 return;
2547
2548 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2549 nid, gfpflags, &gfpflags);
2550 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2551 s->name, s->object_size, s->size, oo_order(s->oo),
2552 oo_order(s->min));
2553
2554 if (oo_order(s->min) > get_order(s->object_size))
2555 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2556 s->name);
2557
2558 for_each_kmem_cache_node(s, node, n) {
2559 unsigned long nr_slabs;
2560 unsigned long nr_objs;
2561 unsigned long nr_free;
2562
2563 nr_free = count_partial(n, count_free);
2564 nr_slabs = node_nr_slabs(n);
2565 nr_objs = node_nr_objs(n);
2566
2567 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2568 node, nr_slabs, nr_objs, nr_free);
2569 }
2570#endif
2571}
2572
2573static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2574 int node, struct kmem_cache_cpu **pc)
2575{
2576 void *freelist;
2577 struct kmem_cache_cpu *c = *pc;
2578 struct page *page;
2579
2580 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2581
2582 freelist = get_partial(s, flags, node, c);
2583
2584 if (freelist)
2585 return freelist;
2586
2587 page = new_slab(s, flags, node);
2588 if (page) {
2589 c = raw_cpu_ptr(s->cpu_slab);
2590 if (c->page)
2591 flush_slab(s, c);
2592
2593
2594
2595
2596
2597 freelist = page->freelist;
2598 page->freelist = NULL;
2599
2600 stat(s, ALLOC_SLAB);
2601 c->page = page;
2602 *pc = c;
2603 }
2604
2605 return freelist;
2606}
2607
2608static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2609{
2610 if (unlikely(PageSlabPfmemalloc(page)))
2611 return gfp_pfmemalloc_allowed(gfpflags);
2612
2613 return true;
2614}
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2627{
2628 struct page new;
2629 unsigned long counters;
2630 void *freelist;
2631
2632 do {
2633 freelist = page->freelist;
2634 counters = page->counters;
2635
2636 new.counters = counters;
2637 VM_BUG_ON(!new.frozen);
2638
2639 new.inuse = page->objects;
2640 new.frozen = freelist != NULL;
2641
2642 } while (!__cmpxchg_double_slab(s, page,
2643 freelist, counters,
2644 NULL, new.counters,
2645 "get_freelist"));
2646
2647 return freelist;
2648}
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2670 unsigned long addr, struct kmem_cache_cpu *c)
2671{
2672 void *freelist;
2673 struct page *page;
2674
2675 stat(s, ALLOC_SLOWPATH);
2676
2677 page = c->page;
2678 if (!page) {
2679
2680
2681
2682
2683 if (unlikely(node != NUMA_NO_NODE &&
2684 !node_isset(node, slab_nodes)))
2685 node = NUMA_NO_NODE;
2686 goto new_slab;
2687 }
2688redo:
2689
2690 if (unlikely(!node_match(page, node))) {
2691
2692
2693
2694
2695 if (!node_isset(node, slab_nodes)) {
2696 node = NUMA_NO_NODE;
2697 goto redo;
2698 } else {
2699 stat(s, ALLOC_NODE_MISMATCH);
2700 deactivate_slab(s, page, c->freelist, c);
2701 goto new_slab;
2702 }
2703 }
2704
2705
2706
2707
2708
2709
2710 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2711 deactivate_slab(s, page, c->freelist, c);
2712 goto new_slab;
2713 }
2714
2715
2716 freelist = c->freelist;
2717 if (freelist)
2718 goto load_freelist;
2719
2720 freelist = get_freelist(s, page);
2721
2722 if (!freelist) {
2723 c->page = NULL;
2724 stat(s, DEACTIVATE_BYPASS);
2725 goto new_slab;
2726 }
2727
2728 stat(s, ALLOC_REFILL);
2729
2730load_freelist:
2731
2732
2733
2734
2735
2736 VM_BUG_ON(!c->page->frozen);
2737 c->freelist = get_freepointer(s, freelist);
2738 c->tid = next_tid(c->tid);
2739 return freelist;
2740
2741new_slab:
2742
2743 if (slub_percpu_partial(c)) {
2744 page = c->page = slub_percpu_partial(c);
2745 slub_set_percpu_partial(c, page);
2746 stat(s, CPU_PARTIAL_ALLOC);
2747 goto redo;
2748 }
2749
2750 freelist = new_slab_objects(s, gfpflags, node, &c);
2751
2752 if (unlikely(!freelist)) {
2753 slab_out_of_memory(s, gfpflags, node);
2754 return NULL;
2755 }
2756
2757 page = c->page;
2758 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2759 goto load_freelist;
2760
2761
2762 if (kmem_cache_debug(s) &&
2763 !alloc_debug_processing(s, page, freelist, addr))
2764 goto new_slab;
2765
2766 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2767 return freelist;
2768}
2769
2770
2771
2772
2773
2774static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2775 unsigned long addr, struct kmem_cache_cpu *c)
2776{
2777 void *p;
2778 unsigned long flags;
2779
2780 local_irq_save(flags);
2781#ifdef CONFIG_PREEMPTION
2782
2783
2784
2785
2786
2787 c = this_cpu_ptr(s->cpu_slab);
2788#endif
2789
2790 p = ___slab_alloc(s, gfpflags, node, addr, c);
2791 local_irq_restore(flags);
2792 return p;
2793}
2794
2795
2796
2797
2798
2799static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2800 void *obj)
2801{
2802 if (unlikely(slab_want_init_on_free(s)) && obj)
2803 memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
2804 0, sizeof(void *));
2805}
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2818 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
2819{
2820 void *object;
2821 struct kmem_cache_cpu *c;
2822 struct page *page;
2823 unsigned long tid;
2824 struct obj_cgroup *objcg = NULL;
2825
2826 s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
2827 if (!s)
2828 return NULL;
2829
2830 object = kfence_alloc(s, orig_size, gfpflags);
2831 if (unlikely(object))
2832 goto out;
2833
2834redo:
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845 do {
2846 tid = this_cpu_read(s->cpu_slab->tid);
2847 c = raw_cpu_ptr(s->cpu_slab);
2848 } while (IS_ENABLED(CONFIG_PREEMPTION) &&
2849 unlikely(tid != READ_ONCE(c->tid)));
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859 barrier();
2860
2861
2862
2863
2864
2865
2866
2867
2868 object = c->freelist;
2869 page = c->page;
2870 if (unlikely(!object || !page || !node_match(page, node))) {
2871 object = __slab_alloc(s, gfpflags, node, addr, c);
2872 } else {
2873 void *next_object = get_freepointer_safe(s, object);
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889 if (unlikely(!this_cpu_cmpxchg_double(
2890 s->cpu_slab->freelist, s->cpu_slab->tid,
2891 object, tid,
2892 next_object, next_tid(tid)))) {
2893
2894 note_cmpxchg_failure("slab_alloc", s, tid);
2895 goto redo;
2896 }
2897 prefetch_freepointer(s, next_object);
2898 stat(s, ALLOC_FASTPATH);
2899 }
2900
2901 maybe_wipe_obj_freeptr(s, object);
2902
2903 if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2904 memset(kasan_reset_tag(object), 0, s->object_size);
2905
2906out:
2907 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
2908
2909 return object;
2910}
2911
2912static __always_inline void *slab_alloc(struct kmem_cache *s,
2913 gfp_t gfpflags, unsigned long addr, size_t orig_size)
2914{
2915 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
2916}
2917
2918void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2919{
2920 void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
2921
2922 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2923 s->size, gfpflags);
2924
2925 return ret;
2926}
2927EXPORT_SYMBOL(kmem_cache_alloc);
2928
2929#ifdef CONFIG_TRACING
2930void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2931{
2932 void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
2933 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2934 ret = kasan_kmalloc(s, ret, size, gfpflags);
2935 return ret;
2936}
2937EXPORT_SYMBOL(kmem_cache_alloc_trace);
2938#endif
2939
2940#ifdef CONFIG_NUMA
2941void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2942{
2943 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
2944
2945 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2946 s->object_size, s->size, gfpflags, node);
2947
2948 return ret;
2949}
2950EXPORT_SYMBOL(kmem_cache_alloc_node);
2951
2952#ifdef CONFIG_TRACING
2953void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2954 gfp_t gfpflags,
2955 int node, size_t size)
2956{
2957 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
2958
2959 trace_kmalloc_node(_RET_IP_, ret,
2960 size, s->size, gfpflags, node);
2961
2962 ret = kasan_kmalloc(s, ret, size, gfpflags);
2963 return ret;
2964}
2965EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2966#endif
2967#endif
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977static void __slab_free(struct kmem_cache *s, struct page *page,
2978 void *head, void *tail, int cnt,
2979 unsigned long addr)
2980
2981{
2982 void *prior;
2983 int was_frozen;
2984 struct page new;
2985 unsigned long counters;
2986 struct kmem_cache_node *n = NULL;
2987 unsigned long flags;
2988
2989 stat(s, FREE_SLOWPATH);
2990
2991 if (kfence_free(head))
2992 return;
2993
2994 if (kmem_cache_debug(s) &&
2995 !free_debug_processing(s, page, head, tail, cnt, addr))
2996 return;
2997
2998 do {
2999 if (unlikely(n)) {
3000 spin_unlock_irqrestore(&n->list_lock, flags);
3001 n = NULL;
3002 }
3003 prior = page->freelist;
3004 counters = page->counters;
3005 set_freepointer(s, tail, prior);
3006 new.counters = counters;
3007 was_frozen = new.frozen;
3008 new.inuse -= cnt;
3009 if ((!new.inuse || !prior) && !was_frozen) {
3010
3011 if (kmem_cache_has_cpu_partial(s) && !prior) {
3012
3013
3014
3015
3016
3017
3018
3019 new.frozen = 1;
3020
3021 } else {
3022
3023 n = get_node(s, page_to_nid(page));
3024
3025
3026
3027
3028
3029
3030
3031
3032 spin_lock_irqsave(&n->list_lock, flags);
3033
3034 }
3035 }
3036
3037 } while (!cmpxchg_double_slab(s, page,
3038 prior, counters,
3039 head, new.counters,
3040 "__slab_free"));
3041
3042 if (likely(!n)) {
3043
3044 if (likely(was_frozen)) {
3045
3046
3047
3048
3049 stat(s, FREE_FROZEN);
3050 } else if (new.frozen) {
3051
3052
3053
3054
3055 put_cpu_partial(s, page, 1);
3056 stat(s, CPU_PARTIAL_FREE);
3057 }
3058
3059 return;
3060 }
3061
3062 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3063 goto slab_empty;
3064
3065
3066
3067
3068
3069 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3070 remove_full(s, n, page);
3071 add_partial(n, page, DEACTIVATE_TO_TAIL);
3072 stat(s, FREE_ADD_PARTIAL);
3073 }
3074 spin_unlock_irqrestore(&n->list_lock, flags);
3075 return;
3076
3077slab_empty:
3078 if (prior) {
3079
3080
3081
3082 remove_partial(n, page);
3083 stat(s, FREE_REMOVE_PARTIAL);
3084 } else {
3085
3086 remove_full(s, n, page);
3087 }
3088
3089 spin_unlock_irqrestore(&n->list_lock, flags);
3090 stat(s, FREE_SLAB);
3091 discard_slab(s, page);
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109static __always_inline void do_slab_free(struct kmem_cache *s,
3110 struct page *page, void *head, void *tail,
3111 int cnt, unsigned long addr)
3112{
3113 void *tail_obj = tail ? : head;
3114 struct kmem_cache_cpu *c;
3115 unsigned long tid;
3116
3117 memcg_slab_free_hook(s, &head, 1);
3118redo:
3119
3120
3121
3122
3123
3124
3125 do {
3126 tid = this_cpu_read(s->cpu_slab->tid);
3127 c = raw_cpu_ptr(s->cpu_slab);
3128 } while (IS_ENABLED(CONFIG_PREEMPTION) &&
3129 unlikely(tid != READ_ONCE(c->tid)));
3130
3131
3132 barrier();
3133
3134 if (likely(page == c->page)) {
3135 void **freelist = READ_ONCE(c->freelist);
3136
3137 set_freepointer(s, tail_obj, freelist);
3138
3139 if (unlikely(!this_cpu_cmpxchg_double(
3140 s->cpu_slab->freelist, s->cpu_slab->tid,
3141 freelist, tid,
3142 head, next_tid(tid)))) {
3143
3144 note_cmpxchg_failure("slab_free", s, tid);
3145 goto redo;
3146 }
3147 stat(s, FREE_FASTPATH);
3148 } else
3149 __slab_free(s, page, head, tail_obj, cnt, addr);
3150
3151}
3152
3153static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3154 void *head, void *tail, int cnt,
3155 unsigned long addr)
3156{
3157
3158
3159
3160
3161 if (slab_free_freelist_hook(s, &head, &tail))
3162 do_slab_free(s, page, head, tail, cnt, addr);
3163}
3164
3165#ifdef CONFIG_KASAN_GENERIC
3166void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3167{
3168 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3169}
3170#endif
3171
3172void kmem_cache_free(struct kmem_cache *s, void *x)
3173{
3174 s = cache_from_obj(s, x);
3175 if (!s)
3176 return;
3177 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3178 trace_kmem_cache_free(_RET_IP_, x, s->name);
3179}
3180EXPORT_SYMBOL(kmem_cache_free);
3181
3182struct detached_freelist {
3183 struct page *page;
3184 void *tail;
3185 void *freelist;
3186 int cnt;
3187 struct kmem_cache *s;
3188};
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202static inline
3203int build_detached_freelist(struct kmem_cache *s, size_t size,
3204 void **p, struct detached_freelist *df)
3205{
3206 size_t first_skipped_index = 0;
3207 int lookahead = 3;
3208 void *object;
3209 struct page *page;
3210
3211
3212 df->page = NULL;
3213
3214 do {
3215 object = p[--size];
3216
3217 } while (!object && size);
3218
3219 if (!object)
3220 return 0;
3221
3222 page = virt_to_head_page(object);
3223 if (!s) {
3224
3225 if (unlikely(!PageSlab(page))) {
3226 BUG_ON(!PageCompound(page));
3227 kfree_hook(object);
3228 __free_pages(page, compound_order(page));
3229 p[size] = NULL;
3230 return size;
3231 }
3232
3233 df->s = page->slab_cache;
3234 } else {
3235 df->s = cache_from_obj(s, object);
3236 }
3237
3238 if (is_kfence_address(object)) {
3239 slab_free_hook(df->s, object);
3240 __kfence_free(object);
3241 p[size] = NULL;
3242 return size;
3243 }
3244
3245
3246 df->page = page;
3247 set_freepointer(df->s, object, NULL);
3248 df->tail = object;
3249 df->freelist = object;
3250 p[size] = NULL;
3251 df->cnt = 1;
3252
3253 while (size) {
3254 object = p[--size];
3255 if (!object)
3256 continue;
3257
3258
3259 if (df->page == virt_to_head_page(object)) {
3260
3261 set_freepointer(df->s, object, df->freelist);
3262 df->freelist = object;
3263 df->cnt++;
3264 p[size] = NULL;
3265
3266 continue;
3267 }
3268
3269
3270 if (!--lookahead)
3271 break;
3272
3273 if (!first_skipped_index)
3274 first_skipped_index = size + 1;
3275 }
3276
3277 return first_skipped_index;
3278}
3279
3280
3281void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3282{
3283 if (WARN_ON(!size))
3284 return;
3285
3286 memcg_slab_free_hook(s, p, size);
3287 do {
3288 struct detached_freelist df;
3289
3290 size = build_detached_freelist(s, size, p, &df);
3291 if (!df.page)
3292 continue;
3293
3294 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
3295 } while (likely(size));
3296}
3297EXPORT_SYMBOL(kmem_cache_free_bulk);
3298
3299
3300int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3301 void **p)
3302{
3303 struct kmem_cache_cpu *c;
3304 int i;
3305 struct obj_cgroup *objcg = NULL;
3306
3307
3308 s = slab_pre_alloc_hook(s, &objcg, size, flags);
3309 if (unlikely(!s))
3310 return false;
3311
3312
3313
3314
3315
3316 local_irq_disable();
3317 c = this_cpu_ptr(s->cpu_slab);
3318
3319 for (i = 0; i < size; i++) {
3320 void *object = kfence_alloc(s, s->object_size, flags);
3321
3322 if (unlikely(object)) {
3323 p[i] = object;
3324 continue;
3325 }
3326
3327 object = c->freelist;
3328 if (unlikely(!object)) {
3329
3330
3331
3332
3333
3334
3335
3336 c->tid = next_tid(c->tid);
3337
3338
3339
3340
3341
3342 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3343 _RET_IP_, c);
3344 if (unlikely(!p[i]))
3345 goto error;
3346
3347 c = this_cpu_ptr(s->cpu_slab);
3348 maybe_wipe_obj_freeptr(s, p[i]);
3349
3350 continue;
3351 }
3352 c->freelist = get_freepointer(s, object);
3353 p[i] = object;
3354 maybe_wipe_obj_freeptr(s, p[i]);
3355 }
3356 c->tid = next_tid(c->tid);
3357 local_irq_enable();
3358
3359
3360 if (unlikely(slab_want_init_on_alloc(flags, s))) {
3361 int j;
3362
3363 for (j = 0; j < i; j++)
3364 memset(kasan_reset_tag(p[j]), 0, s->object_size);
3365 }
3366
3367
3368 slab_post_alloc_hook(s, objcg, flags, size, p);
3369 return i;
3370error:
3371 local_irq_enable();
3372 slab_post_alloc_hook(s, objcg, flags, i, p);
3373 __kmem_cache_free_bulk(s, i, p);
3374 return 0;
3375}
3376EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398static unsigned int slub_min_order;
3399static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3400static unsigned int slub_min_objects;
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427static inline unsigned int slab_order(unsigned int size,
3428 unsigned int min_objects, unsigned int max_order,
3429 unsigned int fract_leftover)
3430{
3431 unsigned int min_order = slub_min_order;
3432 unsigned int order;
3433
3434 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3435 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3436
3437 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3438 order <= max_order; order++) {
3439
3440 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3441 unsigned int rem;
3442
3443 rem = slab_size % size;
3444
3445 if (rem <= slab_size / fract_leftover)
3446 break;
3447 }
3448
3449 return order;
3450}
3451
3452static inline int calculate_order(unsigned int size)
3453{
3454 unsigned int order;
3455 unsigned int min_objects;
3456 unsigned int max_objects;
3457 unsigned int nr_cpus;
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467 min_objects = slub_min_objects;
3468 if (!min_objects) {
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478 nr_cpus = num_present_cpus();
3479 if (nr_cpus <= 1)
3480 nr_cpus = nr_cpu_ids;
3481 min_objects = 4 * (fls(nr_cpus) + 1);
3482 }
3483 max_objects = order_objects(slub_max_order, size);
3484 min_objects = min(min_objects, max_objects);
3485
3486 while (min_objects > 1) {
3487 unsigned int fraction;
3488
3489 fraction = 16;
3490 while (fraction >= 4) {
3491 order = slab_order(size, min_objects,
3492 slub_max_order, fraction);
3493 if (order <= slub_max_order)
3494 return order;
3495 fraction /= 2;
3496 }
3497 min_objects--;
3498 }
3499
3500
3501
3502
3503
3504 order = slab_order(size, 1, slub_max_order, 1);
3505 if (order <= slub_max_order)
3506 return order;
3507
3508
3509
3510
3511 order = slab_order(size, 1, MAX_ORDER, 1);
3512 if (order < MAX_ORDER)
3513 return order;
3514 return -ENOSYS;
3515}
3516
3517static void
3518init_kmem_cache_node(struct kmem_cache_node *n)
3519{
3520 n->nr_partial = 0;
3521 spin_lock_init(&n->list_lock);
3522 INIT_LIST_HEAD(&n->partial);
3523#ifdef CONFIG_SLUB_DEBUG
3524 atomic_long_set(&n->nr_slabs, 0);
3525 atomic_long_set(&n->total_objects, 0);
3526 INIT_LIST_HEAD(&n->full);
3527#endif
3528}
3529
3530static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3531{
3532 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3533 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3534
3535
3536
3537
3538
3539 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3540 2 * sizeof(void *));
3541
3542 if (!s->cpu_slab)
3543 return 0;
3544
3545 init_kmem_cache_cpus(s);
3546
3547 return 1;
3548}
3549
3550static struct kmem_cache *kmem_cache_node;
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561static void early_kmem_cache_node_alloc(int node)
3562{
3563 struct page *page;
3564 struct kmem_cache_node *n;
3565
3566 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3567
3568 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3569
3570 BUG_ON(!page);
3571 if (page_to_nid(page) != node) {
3572 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3573 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3574 }
3575
3576 n = page->freelist;
3577 BUG_ON(!n);
3578#ifdef CONFIG_SLUB_DEBUG
3579 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3580 init_tracking(kmem_cache_node, n);
3581#endif
3582 n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL);
3583 page->freelist = get_freepointer(kmem_cache_node, n);
3584 page->inuse = 1;
3585 page->frozen = 0;
3586 kmem_cache_node->node[node] = n;
3587 init_kmem_cache_node(n);
3588 inc_slabs_node(kmem_cache_node, node, page->objects);
3589
3590
3591
3592
3593
3594 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3595}
3596
3597static void free_kmem_cache_nodes(struct kmem_cache *s)
3598{
3599 int node;
3600 struct kmem_cache_node *n;
3601
3602 for_each_kmem_cache_node(s, node, n) {
3603 s->node[node] = NULL;
3604 kmem_cache_free(kmem_cache_node, n);
3605 }
3606}
3607
3608void __kmem_cache_release(struct kmem_cache *s)
3609{
3610 cache_random_seq_destroy(s);
3611 free_percpu(s->cpu_slab);
3612 free_kmem_cache_nodes(s);
3613}
3614
3615static int init_kmem_cache_nodes(struct kmem_cache *s)
3616{
3617 int node;
3618
3619 for_each_node_mask(node, slab_nodes) {
3620 struct kmem_cache_node *n;
3621
3622 if (slab_state == DOWN) {
3623 early_kmem_cache_node_alloc(node);
3624 continue;
3625 }
3626 n = kmem_cache_alloc_node(kmem_cache_node,
3627 GFP_KERNEL, node);
3628
3629 if (!n) {
3630 free_kmem_cache_nodes(s);
3631 return 0;
3632 }
3633
3634 init_kmem_cache_node(n);
3635 s->node[node] = n;
3636 }
3637 return 1;
3638}
3639
3640static void set_min_partial(struct kmem_cache *s, unsigned long min)
3641{
3642 if (min < MIN_PARTIAL)
3643 min = MIN_PARTIAL;
3644 else if (min > MAX_PARTIAL)
3645 min = MAX_PARTIAL;
3646 s->min_partial = min;
3647}
3648
3649static void set_cpu_partial(struct kmem_cache *s)
3650{
3651#ifdef CONFIG_SLUB_CPU_PARTIAL
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669 if (!kmem_cache_has_cpu_partial(s))
3670 slub_set_cpu_partial(s, 0);
3671 else if (s->size >= PAGE_SIZE)
3672 slub_set_cpu_partial(s, 2);
3673 else if (s->size >= 1024)
3674 slub_set_cpu_partial(s, 6);
3675 else if (s->size >= 256)
3676 slub_set_cpu_partial(s, 13);
3677 else
3678 slub_set_cpu_partial(s, 30);
3679#endif
3680}
3681
3682
3683
3684
3685
3686static int calculate_sizes(struct kmem_cache *s, int forced_order)
3687{
3688 slab_flags_t flags = s->flags;
3689 unsigned int size = s->object_size;
3690 unsigned int freepointer_area;
3691 unsigned int order;
3692
3693
3694
3695
3696
3697
3698 size = ALIGN(size, sizeof(void *));
3699
3700
3701
3702
3703
3704
3705 freepointer_area = size;
3706
3707#ifdef CONFIG_SLUB_DEBUG
3708
3709
3710
3711
3712
3713 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3714 !s->ctor)
3715 s->flags |= __OBJECT_POISON;
3716 else
3717 s->flags &= ~__OBJECT_POISON;
3718
3719
3720
3721
3722
3723
3724
3725 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3726 size += sizeof(void *);
3727#endif
3728
3729
3730
3731
3732
3733 s->inuse = size;
3734
3735 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3736 s->ctor)) {
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750 s->offset = size;
3751 size += sizeof(void *);
3752 } else if (freepointer_area > sizeof(void *)) {
3753
3754
3755
3756
3757
3758 s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
3759 }
3760
3761#ifdef CONFIG_SLUB_DEBUG
3762 if (flags & SLAB_STORE_USER)
3763
3764
3765
3766
3767 size += 2 * sizeof(struct track);
3768#endif
3769
3770 kasan_cache_create(s, &size, &s->flags);
3771#ifdef CONFIG_SLUB_DEBUG
3772 if (flags & SLAB_RED_ZONE) {
3773
3774
3775
3776
3777
3778
3779
3780 size += sizeof(void *);
3781
3782 s->red_left_pad = sizeof(void *);
3783 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3784 size += s->red_left_pad;
3785 }
3786#endif
3787
3788
3789
3790
3791
3792
3793 size = ALIGN(size, s->align);
3794 s->size = size;
3795 s->reciprocal_size = reciprocal_value(size);
3796 if (forced_order >= 0)
3797 order = forced_order;
3798 else
3799 order = calculate_order(size);
3800
3801 if ((int)order < 0)
3802 return 0;
3803
3804 s->allocflags = 0;
3805 if (order)
3806 s->allocflags |= __GFP_COMP;
3807
3808 if (s->flags & SLAB_CACHE_DMA)
3809 s->allocflags |= GFP_DMA;
3810
3811 if (s->flags & SLAB_CACHE_DMA32)
3812 s->allocflags |= GFP_DMA32;
3813
3814 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3815 s->allocflags |= __GFP_RECLAIMABLE;
3816
3817
3818
3819
3820 s->oo = oo_make(order, size);
3821 s->min = oo_make(get_order(size), size);
3822 if (oo_objects(s->oo) > oo_objects(s->max))
3823 s->max = s->oo;
3824
3825 return !!oo_objects(s->oo);
3826}
3827
3828static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3829{
3830 s->flags = kmem_cache_flags(s->size, flags, s->name);
3831#ifdef CONFIG_SLAB_FREELIST_HARDENED
3832 s->random = get_random_long();
3833#endif
3834
3835 if (!calculate_sizes(s, -1))
3836 goto error;
3837 if (disable_higher_order_debug) {
3838
3839
3840
3841
3842 if (get_order(s->size) > get_order(s->object_size)) {
3843 s->flags &= ~DEBUG_METADATA_FLAGS;
3844 s->offset = 0;
3845 if (!calculate_sizes(s, -1))
3846 goto error;
3847 }
3848 }
3849
3850#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3851 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3852 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3853
3854 s->flags |= __CMPXCHG_DOUBLE;
3855#endif
3856
3857
3858
3859
3860
3861 set_min_partial(s, ilog2(s->size) / 2);
3862
3863 set_cpu_partial(s);
3864
3865#ifdef CONFIG_NUMA
3866 s->remote_node_defrag_ratio = 1000;
3867#endif
3868
3869
3870 if (slab_state >= UP) {
3871 if (init_cache_random_seq(s))
3872 goto error;
3873 }
3874
3875 if (!init_kmem_cache_nodes(s))
3876 goto error;
3877
3878 if (alloc_kmem_cache_cpus(s))
3879 return 0;
3880
3881 free_kmem_cache_nodes(s);
3882error:
3883 return -EINVAL;
3884}
3885
3886static void list_slab_objects(struct kmem_cache *s, struct page *page,
3887 const char *text)
3888{
3889#ifdef CONFIG_SLUB_DEBUG
3890 void *addr = page_address(page);
3891 unsigned long *map;
3892 void *p;
3893
3894 slab_err(s, page, text, s->name);
3895 slab_lock(page);
3896
3897 map = get_map(s, page);
3898 for_each_object(p, s, addr, page->objects) {
3899
3900 if (!test_bit(__obj_to_index(s, addr, p), map)) {
3901 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3902 print_tracking(s, p);
3903 }
3904 }
3905 put_map(map);
3906 slab_unlock(page);
3907#endif
3908}
3909
3910
3911
3912
3913
3914
3915static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3916{
3917 LIST_HEAD(discard);
3918 struct page *page, *h;
3919
3920 BUG_ON(irqs_disabled());
3921 spin_lock_irq(&n->list_lock);
3922 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3923 if (!page->inuse) {
3924 remove_partial(n, page);
3925 list_add(&page->slab_list, &discard);
3926 } else {
3927 list_slab_objects(s, page,
3928 "Objects remaining in %s on __kmem_cache_shutdown()");
3929 }
3930 }
3931 spin_unlock_irq(&n->list_lock);
3932
3933 list_for_each_entry_safe(page, h, &discard, slab_list)
3934 discard_slab(s, page);
3935}
3936
3937bool __kmem_cache_empty(struct kmem_cache *s)
3938{
3939 int node;
3940 struct kmem_cache_node *n;
3941
3942 for_each_kmem_cache_node(s, node, n)
3943 if (n->nr_partial || slabs_node(s, node))
3944 return false;
3945 return true;
3946}
3947
3948
3949
3950
3951int __kmem_cache_shutdown(struct kmem_cache *s)
3952{
3953 int node;
3954 struct kmem_cache_node *n;
3955
3956 flush_all(s);
3957
3958 for_each_kmem_cache_node(s, node, n) {
3959 free_partial(s, n);
3960 if (n->nr_partial || slabs_node(s, node))
3961 return 1;
3962 }
3963 return 0;
3964}
3965
3966void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
3967{
3968 void *base;
3969 int __maybe_unused i;
3970 unsigned int objnr;
3971 void *objp;
3972 void *objp0;
3973 struct kmem_cache *s = page->slab_cache;
3974 struct track __maybe_unused *trackp;
3975
3976 kpp->kp_ptr = object;
3977 kpp->kp_page = page;
3978 kpp->kp_slab_cache = s;
3979 base = page_address(page);
3980 objp0 = kasan_reset_tag(object);
3981#ifdef CONFIG_SLUB_DEBUG
3982 objp = restore_red_left(s, objp0);
3983#else
3984 objp = objp0;
3985#endif
3986 objnr = obj_to_index(s, page, objp);
3987 kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
3988 objp = base + s->size * objnr;
3989 kpp->kp_objp = objp;
3990 if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
3991 !(s->flags & SLAB_STORE_USER))
3992 return;
3993#ifdef CONFIG_SLUB_DEBUG
3994 trackp = get_track(s, objp, TRACK_ALLOC);
3995 kpp->kp_ret = (void *)trackp->addr;
3996#ifdef CONFIG_STACKTRACE
3997 for (i = 0; i < KS_ADDRS_COUNT && i < TRACK_ADDRS_COUNT; i++) {
3998 kpp->kp_stack[i] = (void *)trackp->addrs[i];
3999 if (!kpp->kp_stack[i])
4000 break;
4001 }
4002#endif
4003#endif
4004}
4005
4006
4007
4008
4009
4010static int __init setup_slub_min_order(char *str)
4011{
4012 get_option(&str, (int *)&slub_min_order);
4013
4014 return 1;
4015}
4016
4017__setup("slub_min_order=", setup_slub_min_order);
4018
4019static int __init setup_slub_max_order(char *str)
4020{
4021 get_option(&str, (int *)&slub_max_order);
4022 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
4023
4024 return 1;
4025}
4026
4027__setup("slub_max_order=", setup_slub_max_order);
4028
4029static int __init setup_slub_min_objects(char *str)
4030{
4031 get_option(&str, (int *)&slub_min_objects);
4032
4033 return 1;
4034}
4035
4036__setup("slub_min_objects=", setup_slub_min_objects);
4037
4038void *__kmalloc(size_t size, gfp_t flags)
4039{
4040 struct kmem_cache *s;
4041 void *ret;
4042
4043 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4044 return kmalloc_large(size, flags);
4045
4046 s = kmalloc_slab(size, flags);
4047
4048 if (unlikely(ZERO_OR_NULL_PTR(s)))
4049 return s;
4050
4051 ret = slab_alloc(s, flags, _RET_IP_, size);
4052
4053 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
4054
4055 ret = kasan_kmalloc(s, ret, size, flags);
4056
4057 return ret;
4058}
4059EXPORT_SYMBOL(__kmalloc);
4060
4061#ifdef CONFIG_NUMA
4062static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4063{
4064 struct page *page;
4065 void *ptr = NULL;
4066 unsigned int order = get_order(size);
4067
4068 flags |= __GFP_COMP;
4069 page = alloc_pages_node(node, flags, order);
4070 if (page) {
4071 ptr = page_address(page);
4072 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4073 PAGE_SIZE << order);
4074 }
4075
4076 return kmalloc_large_node_hook(ptr, size, flags);
4077}
4078
4079void *__kmalloc_node(size_t size, gfp_t flags, int node)
4080{
4081 struct kmem_cache *s;
4082 void *ret;
4083
4084 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4085 ret = kmalloc_large_node(size, flags, node);
4086
4087 trace_kmalloc_node(_RET_IP_, ret,
4088 size, PAGE_SIZE << get_order(size),
4089 flags, node);
4090
4091 return ret;
4092 }
4093
4094 s = kmalloc_slab(size, flags);
4095
4096 if (unlikely(ZERO_OR_NULL_PTR(s)))
4097 return s;
4098
4099 ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
4100
4101 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
4102
4103 ret = kasan_kmalloc(s, ret, size, flags);
4104
4105 return ret;
4106}
4107EXPORT_SYMBOL(__kmalloc_node);
4108#endif
4109
4110#ifdef CONFIG_HARDENED_USERCOPY
4111
4112
4113
4114
4115
4116
4117
4118
4119void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4120 bool to_user)
4121{
4122 struct kmem_cache *s;
4123 unsigned int offset;
4124 size_t object_size;
4125 bool is_kfence = is_kfence_address(ptr);
4126
4127 ptr = kasan_reset_tag(ptr);
4128
4129
4130 s = page->slab_cache;
4131
4132
4133 if (ptr < page_address(page))
4134 usercopy_abort("SLUB object not in SLUB page?!", NULL,
4135 to_user, 0, n);
4136
4137
4138 if (is_kfence)
4139 offset = ptr - kfence_object_start(ptr);
4140 else
4141 offset = (ptr - page_address(page)) % s->size;
4142
4143
4144 if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4145 if (offset < s->red_left_pad)
4146 usercopy_abort("SLUB object in left red zone",
4147 s->name, to_user, offset, n);
4148 offset -= s->red_left_pad;
4149 }
4150
4151
4152 if (offset >= s->useroffset &&
4153 offset - s->useroffset <= s->usersize &&
4154 n <= s->useroffset - offset + s->usersize)
4155 return;
4156
4157
4158
4159
4160
4161
4162
4163 object_size = slab_ksize(s);
4164 if (usercopy_fallback &&
4165 offset <= object_size && n <= object_size - offset) {
4166 usercopy_warn("SLUB object", s->name, to_user, offset, n);
4167 return;
4168 }
4169
4170 usercopy_abort("SLUB object", s->name, to_user, offset, n);
4171}
4172#endif
4173
4174size_t __ksize(const void *object)
4175{
4176 struct page *page;
4177
4178 if (unlikely(object == ZERO_SIZE_PTR))
4179 return 0;
4180
4181 page = virt_to_head_page(object);
4182
4183 if (unlikely(!PageSlab(page))) {
4184 WARN_ON(!PageCompound(page));
4185 return page_size(page);
4186 }
4187
4188 return slab_ksize(page->slab_cache);
4189}
4190EXPORT_SYMBOL(__ksize);
4191
4192void kfree(const void *x)
4193{
4194 struct page *page;
4195 void *object = (void *)x;
4196
4197 trace_kfree(_RET_IP_, x);
4198
4199 if (unlikely(ZERO_OR_NULL_PTR(x)))
4200 return;
4201
4202 page = virt_to_head_page(x);
4203 if (unlikely(!PageSlab(page))) {
4204 unsigned int order = compound_order(page);
4205
4206 BUG_ON(!PageCompound(page));
4207 kfree_hook(object);
4208 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4209 -(PAGE_SIZE << order));
4210 __free_pages(page, order);
4211 return;
4212 }
4213 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
4214}
4215EXPORT_SYMBOL(kfree);
4216
4217#define SHRINK_PROMOTE_MAX 32
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228int __kmem_cache_shrink(struct kmem_cache *s)
4229{
4230 int node;
4231 int i;
4232 struct kmem_cache_node *n;
4233 struct page *page;
4234 struct page *t;
4235 struct list_head discard;
4236 struct list_head promote[SHRINK_PROMOTE_MAX];
4237 unsigned long flags;
4238 int ret = 0;
4239
4240 flush_all(s);
4241 for_each_kmem_cache_node(s, node, n) {
4242 INIT_LIST_HEAD(&discard);
4243 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4244 INIT_LIST_HEAD(promote + i);
4245
4246 spin_lock_irqsave(&n->list_lock, flags);
4247
4248
4249
4250
4251
4252
4253
4254 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4255 int free = page->objects - page->inuse;
4256
4257
4258 barrier();
4259
4260
4261 BUG_ON(free <= 0);
4262
4263 if (free == page->objects) {
4264 list_move(&page->slab_list, &discard);
4265 n->nr_partial--;
4266 } else if (free <= SHRINK_PROMOTE_MAX)
4267 list_move(&page->slab_list, promote + free - 1);
4268 }
4269
4270
4271
4272
4273
4274 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4275 list_splice(promote + i, &n->partial);
4276
4277 spin_unlock_irqrestore(&n->list_lock, flags);
4278
4279
4280 list_for_each_entry_safe(page, t, &discard, slab_list)
4281 discard_slab(s, page);
4282
4283 if (slabs_node(s, node))
4284 ret = 1;
4285 }
4286
4287 return ret;
4288}
4289
4290static int slab_mem_going_offline_callback(void *arg)
4291{
4292 struct kmem_cache *s;
4293
4294 mutex_lock(&slab_mutex);
4295 list_for_each_entry(s, &slab_caches, list)
4296 __kmem_cache_shrink(s);
4297 mutex_unlock(&slab_mutex);
4298
4299 return 0;
4300}
4301
4302static void slab_mem_offline_callback(void *arg)
4303{
4304 struct memory_notify *marg = arg;
4305 int offline_node;
4306
4307 offline_node = marg->status_change_nid_normal;
4308
4309
4310
4311
4312
4313 if (offline_node < 0)
4314 return;
4315
4316 mutex_lock(&slab_mutex);
4317 node_clear(offline_node, slab_nodes);
4318
4319
4320
4321
4322
4323 mutex_unlock(&slab_mutex);
4324}
4325
4326static int slab_mem_going_online_callback(void *arg)
4327{
4328 struct kmem_cache_node *n;
4329 struct kmem_cache *s;
4330 struct memory_notify *marg = arg;
4331 int nid = marg->status_change_nid_normal;
4332 int ret = 0;
4333
4334
4335
4336
4337
4338 if (nid < 0)
4339 return 0;
4340
4341
4342
4343
4344
4345
4346 mutex_lock(&slab_mutex);
4347 list_for_each_entry(s, &slab_caches, list) {
4348
4349
4350
4351
4352 if (get_node(s, nid))
4353 continue;
4354
4355
4356
4357
4358
4359 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4360 if (!n) {
4361 ret = -ENOMEM;
4362 goto out;
4363 }
4364 init_kmem_cache_node(n);
4365 s->node[nid] = n;
4366 }
4367
4368
4369
4370
4371 node_set(nid, slab_nodes);
4372out:
4373 mutex_unlock(&slab_mutex);
4374 return ret;
4375}
4376
4377static int slab_memory_callback(struct notifier_block *self,
4378 unsigned long action, void *arg)
4379{
4380 int ret = 0;
4381
4382 switch (action) {
4383 case MEM_GOING_ONLINE:
4384 ret = slab_mem_going_online_callback(arg);
4385 break;
4386 case MEM_GOING_OFFLINE:
4387 ret = slab_mem_going_offline_callback(arg);
4388 break;
4389 case MEM_OFFLINE:
4390 case MEM_CANCEL_ONLINE:
4391 slab_mem_offline_callback(arg);
4392 break;
4393 case MEM_ONLINE:
4394 case MEM_CANCEL_OFFLINE:
4395 break;
4396 }
4397 if (ret)
4398 ret = notifier_from_errno(ret);
4399 else
4400 ret = NOTIFY_OK;
4401 return ret;
4402}
4403
4404static struct notifier_block slab_memory_callback_nb = {
4405 .notifier_call = slab_memory_callback,
4406 .priority = SLAB_CALLBACK_PRI,
4407};
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4420{
4421 int node;
4422 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4423 struct kmem_cache_node *n;
4424
4425 memcpy(s, static_cache, kmem_cache->object_size);
4426
4427
4428
4429
4430
4431
4432 __flush_cpu_slab(s, smp_processor_id());
4433 for_each_kmem_cache_node(s, node, n) {
4434 struct page *p;
4435
4436 list_for_each_entry(p, &n->partial, slab_list)
4437 p->slab_cache = s;
4438
4439#ifdef CONFIG_SLUB_DEBUG
4440 list_for_each_entry(p, &n->full, slab_list)
4441 p->slab_cache = s;
4442#endif
4443 }
4444 list_add(&s->list, &slab_caches);
4445 return s;
4446}
4447
4448void __init kmem_cache_init(void)
4449{
4450 static __initdata struct kmem_cache boot_kmem_cache,
4451 boot_kmem_cache_node;
4452 int node;
4453
4454 if (debug_guardpage_minorder())
4455 slub_max_order = 0;
4456
4457 kmem_cache_node = &boot_kmem_cache_node;
4458 kmem_cache = &boot_kmem_cache;
4459
4460
4461
4462
4463
4464 for_each_node_state(node, N_NORMAL_MEMORY)
4465 node_set(node, slab_nodes);
4466
4467 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4468 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4469
4470 register_hotmemory_notifier(&slab_memory_callback_nb);
4471
4472
4473 slab_state = PARTIAL;
4474
4475 create_boot_cache(kmem_cache, "kmem_cache",
4476 offsetof(struct kmem_cache, node) +
4477 nr_node_ids * sizeof(struct kmem_cache_node *),
4478 SLAB_HWCACHE_ALIGN, 0, 0);
4479
4480 kmem_cache = bootstrap(&boot_kmem_cache);
4481 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4482
4483
4484 setup_kmalloc_cache_index_table();
4485 create_kmalloc_caches(0);
4486
4487
4488 init_freelist_randomization();
4489
4490 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4491 slub_cpu_dead);
4492
4493 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4494 cache_line_size(),
4495 slub_min_order, slub_max_order, slub_min_objects,
4496 nr_cpu_ids, nr_node_ids);
4497}
4498
4499void __init kmem_cache_init_late(void)
4500{
4501}
4502
4503struct kmem_cache *
4504__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4505 slab_flags_t flags, void (*ctor)(void *))
4506{
4507 struct kmem_cache *s;
4508
4509 s = find_mergeable(size, align, flags, name, ctor);
4510 if (s) {
4511 s->refcount++;
4512
4513
4514
4515
4516
4517 s->object_size = max(s->object_size, size);
4518 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4519
4520 if (sysfs_slab_alias(s, name)) {
4521 s->refcount--;
4522 s = NULL;
4523 }
4524 }
4525
4526 return s;
4527}
4528
4529int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4530{
4531 int err;
4532
4533 err = kmem_cache_open(s, flags);
4534 if (err)
4535 return err;
4536
4537
4538 if (slab_state <= UP)
4539 return 0;
4540
4541 err = sysfs_slab_add(s);
4542 if (err)
4543 __kmem_cache_release(s);
4544
4545 return err;
4546}
4547
4548void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4549{
4550 struct kmem_cache *s;
4551 void *ret;
4552
4553 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4554 return kmalloc_large(size, gfpflags);
4555
4556 s = kmalloc_slab(size, gfpflags);
4557
4558 if (unlikely(ZERO_OR_NULL_PTR(s)))
4559 return s;
4560
4561 ret = slab_alloc(s, gfpflags, caller, size);
4562
4563
4564 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4565
4566 return ret;
4567}
4568EXPORT_SYMBOL(__kmalloc_track_caller);
4569
4570#ifdef CONFIG_NUMA
4571void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4572 int node, unsigned long caller)
4573{
4574 struct kmem_cache *s;
4575 void *ret;
4576
4577 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4578 ret = kmalloc_large_node(size, gfpflags, node);
4579
4580 trace_kmalloc_node(caller, ret,
4581 size, PAGE_SIZE << get_order(size),
4582 gfpflags, node);
4583
4584 return ret;
4585 }
4586
4587 s = kmalloc_slab(size, gfpflags);
4588
4589 if (unlikely(ZERO_OR_NULL_PTR(s)))
4590 return s;
4591
4592 ret = slab_alloc_node(s, gfpflags, node, caller, size);
4593
4594
4595 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4596
4597 return ret;
4598}
4599EXPORT_SYMBOL(__kmalloc_node_track_caller);
4600#endif
4601
4602#ifdef CONFIG_SYSFS
4603static int count_inuse(struct page *page)
4604{
4605 return page->inuse;
4606}
4607
4608static int count_total(struct page *page)
4609{
4610 return page->objects;
4611}
4612#endif
4613
4614#ifdef CONFIG_SLUB_DEBUG
4615static void validate_slab(struct kmem_cache *s, struct page *page)
4616{
4617 void *p;
4618 void *addr = page_address(page);
4619 unsigned long *map;
4620
4621 slab_lock(page);
4622
4623 if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4624 goto unlock;
4625
4626
4627 map = get_map(s, page);
4628 for_each_object(p, s, addr, page->objects) {
4629 u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
4630 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4631
4632 if (!check_object(s, page, p, val))
4633 break;
4634 }
4635 put_map(map);
4636unlock:
4637 slab_unlock(page);
4638}
4639
4640static int validate_slab_node(struct kmem_cache *s,
4641 struct kmem_cache_node *n)
4642{
4643 unsigned long count = 0;
4644 struct page *page;
4645 unsigned long flags;
4646
4647 spin_lock_irqsave(&n->list_lock, flags);
4648
4649 list_for_each_entry(page, &n->partial, slab_list) {
4650 validate_slab(s, page);
4651 count++;
4652 }
4653 if (count != n->nr_partial)
4654 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4655 s->name, count, n->nr_partial);
4656
4657 if (!(s->flags & SLAB_STORE_USER))
4658 goto out;
4659
4660 list_for_each_entry(page, &n->full, slab_list) {
4661 validate_slab(s, page);
4662 count++;
4663 }
4664 if (count != atomic_long_read(&n->nr_slabs))
4665 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4666 s->name, count, atomic_long_read(&n->nr_slabs));
4667
4668out:
4669 spin_unlock_irqrestore(&n->list_lock, flags);
4670 return count;
4671}
4672
4673static long validate_slab_cache(struct kmem_cache *s)
4674{
4675 int node;
4676 unsigned long count = 0;
4677 struct kmem_cache_node *n;
4678
4679 flush_all(s);
4680 for_each_kmem_cache_node(s, node, n)
4681 count += validate_slab_node(s, n);
4682
4683 return count;
4684}
4685
4686
4687
4688
4689
4690struct location {
4691 unsigned long count;
4692 unsigned long addr;
4693 long long sum_time;
4694 long min_time;
4695 long max_time;
4696 long min_pid;
4697 long max_pid;
4698 DECLARE_BITMAP(cpus, NR_CPUS);
4699 nodemask_t nodes;
4700};
4701
4702struct loc_track {
4703 unsigned long max;
4704 unsigned long count;
4705 struct location *loc;
4706};
4707
4708static void free_loc_track(struct loc_track *t)
4709{
4710 if (t->max)
4711 free_pages((unsigned long)t->loc,
4712 get_order(sizeof(struct location) * t->max));
4713}
4714
4715static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4716{
4717 struct location *l;
4718 int order;
4719
4720 order = get_order(sizeof(struct location) * max);
4721
4722 l = (void *)__get_free_pages(flags, order);
4723 if (!l)
4724 return 0;
4725
4726 if (t->count) {
4727 memcpy(l, t->loc, sizeof(struct location) * t->count);
4728 free_loc_track(t);
4729 }
4730 t->max = max;
4731 t->loc = l;
4732 return 1;
4733}
4734
4735static int add_location(struct loc_track *t, struct kmem_cache *s,
4736 const struct track *track)
4737{
4738 long start, end, pos;
4739 struct location *l;
4740 unsigned long caddr;
4741 unsigned long age = jiffies - track->when;
4742
4743 start = -1;
4744 end = t->count;
4745
4746 for ( ; ; ) {
4747 pos = start + (end - start + 1) / 2;
4748
4749
4750
4751
4752
4753 if (pos == end)
4754 break;
4755
4756 caddr = t->loc[pos].addr;
4757 if (track->addr == caddr) {
4758
4759 l = &t->loc[pos];
4760 l->count++;
4761 if (track->when) {
4762 l->sum_time += age;
4763 if (age < l->min_time)
4764 l->min_time = age;
4765 if (age > l->max_time)
4766 l->max_time = age;
4767
4768 if (track->pid < l->min_pid)
4769 l->min_pid = track->pid;
4770 if (track->pid > l->max_pid)
4771 l->max_pid = track->pid;
4772
4773 cpumask_set_cpu(track->cpu,
4774 to_cpumask(l->cpus));
4775 }
4776 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4777 return 1;
4778 }
4779
4780 if (track->addr < caddr)
4781 end = pos;
4782 else
4783 start = pos;
4784 }
4785
4786
4787
4788
4789 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4790 return 0;
4791
4792 l = t->loc + pos;
4793 if (pos < t->count)
4794 memmove(l + 1, l,
4795 (t->count - pos) * sizeof(struct location));
4796 t->count++;
4797 l->count = 1;
4798 l->addr = track->addr;
4799 l->sum_time = age;
4800 l->min_time = age;
4801 l->max_time = age;
4802 l->min_pid = track->pid;
4803 l->max_pid = track->pid;
4804 cpumask_clear(to_cpumask(l->cpus));
4805 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4806 nodes_clear(l->nodes);
4807 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4808 return 1;
4809}
4810
4811static void process_slab(struct loc_track *t, struct kmem_cache *s,
4812 struct page *page, enum track_item alloc)
4813{
4814 void *addr = page_address(page);
4815 void *p;
4816 unsigned long *map;
4817
4818 map = get_map(s, page);
4819 for_each_object(p, s, addr, page->objects)
4820 if (!test_bit(__obj_to_index(s, addr, p), map))
4821 add_location(t, s, get_track(s, p, alloc));
4822 put_map(map);
4823}
4824
4825static int list_locations(struct kmem_cache *s, char *buf,
4826 enum track_item alloc)
4827{
4828 int len = 0;
4829 unsigned long i;
4830 struct loc_track t = { 0, 0, NULL };
4831 int node;
4832 struct kmem_cache_node *n;
4833
4834 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4835 GFP_KERNEL)) {
4836 return sysfs_emit(buf, "Out of memory\n");
4837 }
4838
4839 flush_all(s);
4840
4841 for_each_kmem_cache_node(s, node, n) {
4842 unsigned long flags;
4843 struct page *page;
4844
4845 if (!atomic_long_read(&n->nr_slabs))
4846 continue;
4847
4848 spin_lock_irqsave(&n->list_lock, flags);
4849 list_for_each_entry(page, &n->partial, slab_list)
4850 process_slab(&t, s, page, alloc);
4851 list_for_each_entry(page, &n->full, slab_list)
4852 process_slab(&t, s, page, alloc);
4853 spin_unlock_irqrestore(&n->list_lock, flags);
4854 }
4855
4856 for (i = 0; i < t.count; i++) {
4857 struct location *l = &t.loc[i];
4858
4859 len += sysfs_emit_at(buf, len, "%7ld ", l->count);
4860
4861 if (l->addr)
4862 len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
4863 else
4864 len += sysfs_emit_at(buf, len, "<not-available>");
4865
4866 if (l->sum_time != l->min_time)
4867 len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
4868 l->min_time,
4869 (long)div_u64(l->sum_time,
4870 l->count),
4871 l->max_time);
4872 else
4873 len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
4874
4875 if (l->min_pid != l->max_pid)
4876 len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
4877 l->min_pid, l->max_pid);
4878 else
4879 len += sysfs_emit_at(buf, len, " pid=%ld",
4880 l->min_pid);
4881
4882 if (num_online_cpus() > 1 &&
4883 !cpumask_empty(to_cpumask(l->cpus)))
4884 len += sysfs_emit_at(buf, len, " cpus=%*pbl",
4885 cpumask_pr_args(to_cpumask(l->cpus)));
4886
4887 if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
4888 len += sysfs_emit_at(buf, len, " nodes=%*pbl",
4889 nodemask_pr_args(&l->nodes));
4890
4891 len += sysfs_emit_at(buf, len, "\n");
4892 }
4893
4894 free_loc_track(&t);
4895 if (!t.count)
4896 len += sysfs_emit_at(buf, len, "No data\n");
4897
4898 return len;
4899}
4900#endif
4901
4902#ifdef SLUB_RESILIENCY_TEST
4903static void __init resiliency_test(void)
4904{
4905 u8 *p;
4906 int type = KMALLOC_NORMAL;
4907
4908 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4909
4910 pr_err("SLUB resiliency testing\n");
4911 pr_err("-----------------------\n");
4912 pr_err("A. Corruption after allocation\n");
4913
4914 p = kzalloc(16, GFP_KERNEL);
4915 p[16] = 0x12;
4916 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4917 p + 16);
4918
4919 validate_slab_cache(kmalloc_caches[type][4]);
4920
4921
4922 p = kzalloc(32, GFP_KERNEL);
4923 p[32 + sizeof(void *)] = 0x34;
4924 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4925 p);
4926 pr_err("If allocated object is overwritten then not detectable\n\n");
4927
4928 validate_slab_cache(kmalloc_caches[type][5]);
4929 p = kzalloc(64, GFP_KERNEL);
4930 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4931 *p = 0x56;
4932 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4933 p);
4934 pr_err("If allocated object is overwritten then not detectable\n\n");
4935 validate_slab_cache(kmalloc_caches[type][6]);
4936
4937 pr_err("\nB. Corruption after free\n");
4938 p = kzalloc(128, GFP_KERNEL);
4939 kfree(p);
4940 *p = 0x78;
4941 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4942 validate_slab_cache(kmalloc_caches[type][7]);
4943
4944 p = kzalloc(256, GFP_KERNEL);
4945 kfree(p);
4946 p[50] = 0x9a;
4947 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4948 validate_slab_cache(kmalloc_caches[type][8]);
4949
4950 p = kzalloc(512, GFP_KERNEL);
4951 kfree(p);
4952 p[512] = 0xab;
4953 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4954 validate_slab_cache(kmalloc_caches[type][9]);
4955}
4956#else
4957#ifdef CONFIG_SYSFS
4958static void resiliency_test(void) {};
4959#endif
4960#endif
4961
4962#ifdef CONFIG_SYSFS
4963enum slab_stat_type {
4964 SL_ALL,
4965 SL_PARTIAL,
4966 SL_CPU,
4967 SL_OBJECTS,
4968 SL_TOTAL
4969};
4970
4971#define SO_ALL (1 << SL_ALL)
4972#define SO_PARTIAL (1 << SL_PARTIAL)
4973#define SO_CPU (1 << SL_CPU)
4974#define SO_OBJECTS (1 << SL_OBJECTS)
4975#define SO_TOTAL (1 << SL_TOTAL)
4976
4977static ssize_t show_slab_objects(struct kmem_cache *s,
4978 char *buf, unsigned long flags)
4979{
4980 unsigned long total = 0;
4981 int node;
4982 int x;
4983 unsigned long *nodes;
4984 int len = 0;
4985
4986 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4987 if (!nodes)
4988 return -ENOMEM;
4989
4990 if (flags & SO_CPU) {
4991 int cpu;
4992
4993 for_each_possible_cpu(cpu) {
4994 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4995 cpu);
4996 int node;
4997 struct page *page;
4998
4999 page = READ_ONCE(c->page);
5000 if (!page)
5001 continue;
5002
5003 node = page_to_nid(page);
5004 if (flags & SO_TOTAL)
5005 x = page->objects;
5006 else if (flags & SO_OBJECTS)
5007 x = page->inuse;
5008 else
5009 x = 1;
5010
5011 total += x;
5012 nodes[node] += x;
5013
5014 page = slub_percpu_partial_read_once(c);
5015 if (page) {
5016 node = page_to_nid(page);
5017 if (flags & SO_TOTAL)
5018 WARN_ON_ONCE(1);
5019 else if (flags & SO_OBJECTS)
5020 WARN_ON_ONCE(1);
5021 else
5022 x = page->pages;
5023 total += x;
5024 nodes[node] += x;
5025 }
5026 }
5027 }
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040#ifdef CONFIG_SLUB_DEBUG
5041 if (flags & SO_ALL) {
5042 struct kmem_cache_node *n;
5043
5044 for_each_kmem_cache_node(s, node, n) {
5045
5046 if (flags & SO_TOTAL)
5047 x = atomic_long_read(&n->total_objects);
5048 else if (flags & SO_OBJECTS)
5049 x = atomic_long_read(&n->total_objects) -
5050 count_partial(n, count_free);
5051 else
5052 x = atomic_long_read(&n->nr_slabs);
5053 total += x;
5054 nodes[node] += x;
5055 }
5056
5057 } else
5058#endif
5059 if (flags & SO_PARTIAL) {
5060 struct kmem_cache_node *n;
5061
5062 for_each_kmem_cache_node(s, node, n) {
5063 if (flags & SO_TOTAL)
5064 x = count_partial(n, count_total);
5065 else if (flags & SO_OBJECTS)
5066 x = count_partial(n, count_inuse);
5067 else
5068 x = n->nr_partial;
5069 total += x;
5070 nodes[node] += x;
5071 }
5072 }
5073
5074 len += sysfs_emit_at(buf, len, "%lu", total);
5075#ifdef CONFIG_NUMA
5076 for (node = 0; node < nr_node_ids; node++) {
5077 if (nodes[node])
5078 len += sysfs_emit_at(buf, len, " N%d=%lu",
5079 node, nodes[node]);
5080 }
5081#endif
5082 len += sysfs_emit_at(buf, len, "\n");
5083 kfree(nodes);
5084
5085 return len;
5086}
5087
5088#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5089#define to_slab(n) container_of(n, struct kmem_cache, kobj)
5090
5091struct slab_attribute {
5092 struct attribute attr;
5093 ssize_t (*show)(struct kmem_cache *s, char *buf);
5094 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5095};
5096
5097#define SLAB_ATTR_RO(_name) \
5098 static struct slab_attribute _name##_attr = \
5099 __ATTR(_name, 0400, _name##_show, NULL)
5100
5101#define SLAB_ATTR(_name) \
5102 static struct slab_attribute _name##_attr = \
5103 __ATTR(_name, 0600, _name##_show, _name##_store)
5104
5105static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5106{
5107 return sysfs_emit(buf, "%u\n", s->size);
5108}
5109SLAB_ATTR_RO(slab_size);
5110
5111static ssize_t align_show(struct kmem_cache *s, char *buf)
5112{
5113 return sysfs_emit(buf, "%u\n", s->align);
5114}
5115SLAB_ATTR_RO(align);
5116
5117static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5118{
5119 return sysfs_emit(buf, "%u\n", s->object_size);
5120}
5121SLAB_ATTR_RO(object_size);
5122
5123static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5124{
5125 return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5126}
5127SLAB_ATTR_RO(objs_per_slab);
5128
5129static ssize_t order_show(struct kmem_cache *s, char *buf)
5130{
5131 return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5132}
5133SLAB_ATTR_RO(order);
5134
5135static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5136{
5137 return sysfs_emit(buf, "%lu\n", s->min_partial);
5138}
5139
5140static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5141 size_t length)
5142{
5143 unsigned long min;
5144 int err;
5145
5146 err = kstrtoul(buf, 10, &min);
5147 if (err)
5148 return err;
5149
5150 set_min_partial(s, min);
5151 return length;
5152}
5153SLAB_ATTR(min_partial);
5154
5155static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5156{
5157 return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
5158}
5159
5160static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5161 size_t length)
5162{
5163 unsigned int objects;
5164 int err;
5165
5166 err = kstrtouint(buf, 10, &objects);
5167 if (err)
5168 return err;
5169 if (objects && !kmem_cache_has_cpu_partial(s))
5170 return -EINVAL;
5171
5172 slub_set_cpu_partial(s, objects);
5173 flush_all(s);
5174 return length;
5175}
5176SLAB_ATTR(cpu_partial);
5177
5178static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5179{
5180 if (!s->ctor)
5181 return 0;
5182 return sysfs_emit(buf, "%pS\n", s->ctor);
5183}
5184SLAB_ATTR_RO(ctor);
5185
5186static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5187{
5188 return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5189}
5190SLAB_ATTR_RO(aliases);
5191
5192static ssize_t partial_show(struct kmem_cache *s, char *buf)
5193{
5194 return show_slab_objects(s, buf, SO_PARTIAL);
5195}
5196SLAB_ATTR_RO(partial);
5197
5198static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5199{
5200 return show_slab_objects(s, buf, SO_CPU);
5201}
5202SLAB_ATTR_RO(cpu_slabs);
5203
5204static ssize_t objects_show(struct kmem_cache *s, char *buf)
5205{
5206 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5207}
5208SLAB_ATTR_RO(objects);
5209
5210static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5211{
5212 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5213}
5214SLAB_ATTR_RO(objects_partial);
5215
5216static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5217{
5218 int objects = 0;
5219 int pages = 0;
5220 int cpu;
5221 int len = 0;
5222
5223 for_each_online_cpu(cpu) {
5224 struct page *page;
5225
5226 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5227
5228 if (page) {
5229 pages += page->pages;
5230 objects += page->pobjects;
5231 }
5232 }
5233
5234 len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
5235
5236#ifdef CONFIG_SMP
5237 for_each_online_cpu(cpu) {
5238 struct page *page;
5239
5240 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5241 if (page)
5242 len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5243 cpu, page->pobjects, page->pages);
5244 }
5245#endif
5246 len += sysfs_emit_at(buf, len, "\n");
5247
5248 return len;
5249}
5250SLAB_ATTR_RO(slabs_cpu_partial);
5251
5252static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5253{
5254 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5255}
5256SLAB_ATTR_RO(reclaim_account);
5257
5258static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5259{
5260 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5261}
5262SLAB_ATTR_RO(hwcache_align);
5263
5264#ifdef CONFIG_ZONE_DMA
5265static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5266{
5267 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5268}
5269SLAB_ATTR_RO(cache_dma);
5270#endif
5271
5272static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5273{
5274 return sysfs_emit(buf, "%u\n", s->usersize);
5275}
5276SLAB_ATTR_RO(usersize);
5277
5278static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5279{
5280 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5281}
5282SLAB_ATTR_RO(destroy_by_rcu);
5283
5284#ifdef CONFIG_SLUB_DEBUG
5285static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5286{
5287 return show_slab_objects(s, buf, SO_ALL);
5288}
5289SLAB_ATTR_RO(slabs);
5290
5291static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5292{
5293 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5294}
5295SLAB_ATTR_RO(total_objects);
5296
5297static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5298{
5299 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5300}
5301SLAB_ATTR_RO(sanity_checks);
5302
5303static ssize_t trace_show(struct kmem_cache *s, char *buf)
5304{
5305 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5306}
5307SLAB_ATTR_RO(trace);
5308
5309static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5310{
5311 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5312}
5313
5314SLAB_ATTR_RO(red_zone);
5315
5316static ssize_t poison_show(struct kmem_cache *s, char *buf)
5317{
5318 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5319}
5320
5321SLAB_ATTR_RO(poison);
5322
5323static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5324{
5325 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5326}
5327
5328SLAB_ATTR_RO(store_user);
5329
5330static ssize_t validate_show(struct kmem_cache *s, char *buf)
5331{
5332 return 0;
5333}
5334
5335static ssize_t validate_store(struct kmem_cache *s,
5336 const char *buf, size_t length)
5337{
5338 int ret = -EINVAL;
5339
5340 if (buf[0] == '1') {
5341 ret = validate_slab_cache(s);
5342 if (ret >= 0)
5343 ret = length;
5344 }
5345 return ret;
5346}
5347SLAB_ATTR(validate);
5348
5349static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5350{
5351 if (!(s->flags & SLAB_STORE_USER))
5352 return -ENOSYS;
5353 return list_locations(s, buf, TRACK_ALLOC);
5354}
5355SLAB_ATTR_RO(alloc_calls);
5356
5357static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5358{
5359 if (!(s->flags & SLAB_STORE_USER))
5360 return -ENOSYS;
5361 return list_locations(s, buf, TRACK_FREE);
5362}
5363SLAB_ATTR_RO(free_calls);
5364#endif
5365
5366#ifdef CONFIG_FAILSLAB
5367static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5368{
5369 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5370}
5371SLAB_ATTR_RO(failslab);
5372#endif
5373
5374static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5375{
5376 return 0;
5377}
5378
5379static ssize_t shrink_store(struct kmem_cache *s,
5380 const char *buf, size_t length)
5381{
5382 if (buf[0] == '1')
5383 kmem_cache_shrink(s);
5384 else
5385 return -EINVAL;
5386 return length;
5387}
5388SLAB_ATTR(shrink);
5389
5390#ifdef CONFIG_NUMA
5391static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5392{
5393 return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5394}
5395
5396static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5397 const char *buf, size_t length)
5398{
5399 unsigned int ratio;
5400 int err;
5401
5402 err = kstrtouint(buf, 10, &ratio);
5403 if (err)
5404 return err;
5405 if (ratio > 100)
5406 return -ERANGE;
5407
5408 s->remote_node_defrag_ratio = ratio * 10;
5409
5410 return length;
5411}
5412SLAB_ATTR(remote_node_defrag_ratio);
5413#endif
5414
5415#ifdef CONFIG_SLUB_STATS
5416static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5417{
5418 unsigned long sum = 0;
5419 int cpu;
5420 int len = 0;
5421 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5422
5423 if (!data)
5424 return -ENOMEM;
5425
5426 for_each_online_cpu(cpu) {
5427 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5428
5429 data[cpu] = x;
5430 sum += x;
5431 }
5432
5433 len += sysfs_emit_at(buf, len, "%lu", sum);
5434
5435#ifdef CONFIG_SMP
5436 for_each_online_cpu(cpu) {
5437 if (data[cpu])
5438 len += sysfs_emit_at(buf, len, " C%d=%u",
5439 cpu, data[cpu]);
5440 }
5441#endif
5442 kfree(data);
5443 len += sysfs_emit_at(buf, len, "\n");
5444
5445 return len;
5446}
5447
5448static void clear_stat(struct kmem_cache *s, enum stat_item si)
5449{
5450 int cpu;
5451
5452 for_each_online_cpu(cpu)
5453 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5454}
5455
5456#define STAT_ATTR(si, text) \
5457static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5458{ \
5459 return show_stat(s, buf, si); \
5460} \
5461static ssize_t text##_store(struct kmem_cache *s, \
5462 const char *buf, size_t length) \
5463{ \
5464 if (buf[0] != '0') \
5465 return -EINVAL; \
5466 clear_stat(s, si); \
5467 return length; \
5468} \
5469SLAB_ATTR(text); \
5470
5471STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5472STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5473STAT_ATTR(FREE_FASTPATH, free_fastpath);
5474STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5475STAT_ATTR(FREE_FROZEN, free_frozen);
5476STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5477STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5478STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5479STAT_ATTR(ALLOC_SLAB, alloc_slab);
5480STAT_ATTR(ALLOC_REFILL, alloc_refill);
5481STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5482STAT_ATTR(FREE_SLAB, free_slab);
5483STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5484STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5485STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5486STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5487STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5488STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5489STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5490STAT_ATTR(ORDER_FALLBACK, order_fallback);
5491STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5492STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5493STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5494STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5495STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5496STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5497#endif
5498
5499static struct attribute *slab_attrs[] = {
5500 &slab_size_attr.attr,
5501 &object_size_attr.attr,
5502 &objs_per_slab_attr.attr,
5503 &order_attr.attr,
5504 &min_partial_attr.attr,
5505 &cpu_partial_attr.attr,
5506 &objects_attr.attr,
5507 &objects_partial_attr.attr,
5508 &partial_attr.attr,
5509 &cpu_slabs_attr.attr,
5510 &ctor_attr.attr,
5511 &aliases_attr.attr,
5512 &align_attr.attr,
5513 &hwcache_align_attr.attr,
5514 &reclaim_account_attr.attr,
5515 &destroy_by_rcu_attr.attr,
5516 &shrink_attr.attr,
5517 &slabs_cpu_partial_attr.attr,
5518#ifdef CONFIG_SLUB_DEBUG
5519 &total_objects_attr.attr,
5520 &slabs_attr.attr,
5521 &sanity_checks_attr.attr,
5522 &trace_attr.attr,
5523 &red_zone_attr.attr,
5524 &poison_attr.attr,
5525 &store_user_attr.attr,
5526 &validate_attr.attr,
5527 &alloc_calls_attr.attr,
5528 &free_calls_attr.attr,
5529#endif
5530#ifdef CONFIG_ZONE_DMA
5531 &cache_dma_attr.attr,
5532#endif
5533#ifdef CONFIG_NUMA
5534 &remote_node_defrag_ratio_attr.attr,
5535#endif
5536#ifdef CONFIG_SLUB_STATS
5537 &alloc_fastpath_attr.attr,
5538 &alloc_slowpath_attr.attr,
5539 &free_fastpath_attr.attr,
5540 &free_slowpath_attr.attr,
5541 &free_frozen_attr.attr,
5542 &free_add_partial_attr.attr,
5543 &free_remove_partial_attr.attr,
5544 &alloc_from_partial_attr.attr,
5545 &alloc_slab_attr.attr,
5546 &alloc_refill_attr.attr,
5547 &alloc_node_mismatch_attr.attr,
5548 &free_slab_attr.attr,
5549 &cpuslab_flush_attr.attr,
5550 &deactivate_full_attr.attr,
5551 &deactivate_empty_attr.attr,
5552 &deactivate_to_head_attr.attr,
5553 &deactivate_to_tail_attr.attr,
5554 &deactivate_remote_frees_attr.attr,
5555 &deactivate_bypass_attr.attr,
5556 &order_fallback_attr.attr,
5557 &cmpxchg_double_fail_attr.attr,
5558 &cmpxchg_double_cpu_fail_attr.attr,
5559 &cpu_partial_alloc_attr.attr,
5560 &cpu_partial_free_attr.attr,
5561 &cpu_partial_node_attr.attr,
5562 &cpu_partial_drain_attr.attr,
5563#endif
5564#ifdef CONFIG_FAILSLAB
5565 &failslab_attr.attr,
5566#endif
5567 &usersize_attr.attr,
5568
5569 NULL
5570};
5571
5572static const struct attribute_group slab_attr_group = {
5573 .attrs = slab_attrs,
5574};
5575
5576static ssize_t slab_attr_show(struct kobject *kobj,
5577 struct attribute *attr,
5578 char *buf)
5579{
5580 struct slab_attribute *attribute;
5581 struct kmem_cache *s;
5582 int err;
5583
5584 attribute = to_slab_attr(attr);
5585 s = to_slab(kobj);
5586
5587 if (!attribute->show)
5588 return -EIO;
5589
5590 err = attribute->show(s, buf);
5591
5592 return err;
5593}
5594
5595static ssize_t slab_attr_store(struct kobject *kobj,
5596 struct attribute *attr,
5597 const char *buf, size_t len)
5598{
5599 struct slab_attribute *attribute;
5600 struct kmem_cache *s;
5601 int err;
5602
5603 attribute = to_slab_attr(attr);
5604 s = to_slab(kobj);
5605
5606 if (!attribute->store)
5607 return -EIO;
5608
5609 err = attribute->store(s, buf, len);
5610 return err;
5611}
5612
5613static void kmem_cache_release(struct kobject *k)
5614{
5615 slab_kmem_cache_release(to_slab(k));
5616}
5617
5618static const struct sysfs_ops slab_sysfs_ops = {
5619 .show = slab_attr_show,
5620 .store = slab_attr_store,
5621};
5622
5623static struct kobj_type slab_ktype = {
5624 .sysfs_ops = &slab_sysfs_ops,
5625 .release = kmem_cache_release,
5626};
5627
5628static struct kset *slab_kset;
5629
5630static inline struct kset *cache_kset(struct kmem_cache *s)
5631{
5632 return slab_kset;
5633}
5634
5635#define ID_STR_LENGTH 64
5636
5637
5638
5639
5640
5641static char *create_unique_id(struct kmem_cache *s)
5642{
5643 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5644 char *p = name;
5645
5646 BUG_ON(!name);
5647
5648 *p++ = ':';
5649
5650
5651
5652
5653
5654
5655
5656 if (s->flags & SLAB_CACHE_DMA)
5657 *p++ = 'd';
5658 if (s->flags & SLAB_CACHE_DMA32)
5659 *p++ = 'D';
5660 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5661 *p++ = 'a';
5662 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5663 *p++ = 'F';
5664 if (s->flags & SLAB_ACCOUNT)
5665 *p++ = 'A';
5666 if (p != name + 1)
5667 *p++ = '-';
5668 p += sprintf(p, "%07u", s->size);
5669
5670 BUG_ON(p > name + ID_STR_LENGTH - 1);
5671 return name;
5672}
5673
5674static int sysfs_slab_add(struct kmem_cache *s)
5675{
5676 int err;
5677 const char *name;
5678 struct kset *kset = cache_kset(s);
5679 int unmergeable = slab_unmergeable(s);
5680
5681 if (!kset) {
5682 kobject_init(&s->kobj, &slab_ktype);
5683 return 0;
5684 }
5685
5686 if (!unmergeable && disable_higher_order_debug &&
5687 (slub_debug & DEBUG_METADATA_FLAGS))
5688 unmergeable = 1;
5689
5690 if (unmergeable) {
5691
5692
5693
5694
5695
5696 sysfs_remove_link(&slab_kset->kobj, s->name);
5697 name = s->name;
5698 } else {
5699
5700
5701
5702
5703 name = create_unique_id(s);
5704 }
5705
5706 s->kobj.kset = kset;
5707 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5708 if (err)
5709 goto out;
5710
5711 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5712 if (err)
5713 goto out_del_kobj;
5714
5715 if (!unmergeable) {
5716
5717 sysfs_slab_alias(s, s->name);
5718 }
5719out:
5720 if (!unmergeable)
5721 kfree(name);
5722 return err;
5723out_del_kobj:
5724 kobject_del(&s->kobj);
5725 goto out;
5726}
5727
5728void sysfs_slab_unlink(struct kmem_cache *s)
5729{
5730 if (slab_state >= FULL)
5731 kobject_del(&s->kobj);
5732}
5733
5734void sysfs_slab_release(struct kmem_cache *s)
5735{
5736 if (slab_state >= FULL)
5737 kobject_put(&s->kobj);
5738}
5739
5740
5741
5742
5743
5744struct saved_alias {
5745 struct kmem_cache *s;
5746 const char *name;
5747 struct saved_alias *next;
5748};
5749
5750static struct saved_alias *alias_list;
5751
5752static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5753{
5754 struct saved_alias *al;
5755
5756 if (slab_state == FULL) {
5757
5758
5759
5760 sysfs_remove_link(&slab_kset->kobj, name);
5761 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5762 }
5763
5764 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5765 if (!al)
5766 return -ENOMEM;
5767
5768 al->s = s;
5769 al->name = name;
5770 al->next = alias_list;
5771 alias_list = al;
5772 return 0;
5773}
5774
5775static int __init slab_sysfs_init(void)
5776{
5777 struct kmem_cache *s;
5778 int err;
5779
5780 mutex_lock(&slab_mutex);
5781
5782 slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
5783 if (!slab_kset) {
5784 mutex_unlock(&slab_mutex);
5785 pr_err("Cannot register slab subsystem.\n");
5786 return -ENOSYS;
5787 }
5788
5789 slab_state = FULL;
5790
5791 list_for_each_entry(s, &slab_caches, list) {
5792 err = sysfs_slab_add(s);
5793 if (err)
5794 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5795 s->name);
5796 }
5797
5798 while (alias_list) {
5799 struct saved_alias *al = alias_list;
5800
5801 alias_list = alias_list->next;
5802 err = sysfs_slab_alias(al->s, al->name);
5803 if (err)
5804 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5805 al->name);
5806 kfree(al);
5807 }
5808
5809 mutex_unlock(&slab_mutex);
5810 resiliency_test();
5811 return 0;
5812}
5813
5814__initcall(slab_sysfs_init);
5815#endif
5816
5817
5818
5819
5820#ifdef CONFIG_SLUB_DEBUG
5821void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5822{
5823 unsigned long nr_slabs = 0;
5824 unsigned long nr_objs = 0;
5825 unsigned long nr_free = 0;
5826 int node;
5827 struct kmem_cache_node *n;
5828
5829 for_each_kmem_cache_node(s, node, n) {
5830 nr_slabs += node_nr_slabs(n);
5831 nr_objs += node_nr_objs(n);
5832 nr_free += count_partial(n, count_free);
5833 }
5834
5835 sinfo->active_objs = nr_objs - nr_free;
5836 sinfo->num_objs = nr_objs;
5837 sinfo->active_slabs = nr_slabs;
5838 sinfo->num_slabs = nr_slabs;
5839 sinfo->objects_per_slab = oo_objects(s->oo);
5840 sinfo->cache_order = oo_order(s->oo);
5841}
5842
5843void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5844{
5845}
5846
5847ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5848 size_t count, loff_t *ppos)
5849{
5850 return -EIO;
5851}
5852#endif
5853