1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/swab.h>
19#include <linux/bitops.h>
20#include <linux/slab.h>
21#include "slab.h"
22#include <linux/proc_fs.h>
23#include <linux/seq_file.h>
24#include <linux/kasan.h>
25#include <linux/cpu.h>
26#include <linux/cpuset.h>
27#include <linux/mempolicy.h>
28#include <linux/ctype.h>
29#include <linux/debugobjects.h>
30#include <linux/kallsyms.h>
31#include <linux/memory.h>
32#include <linux/math64.h>
33#include <linux/fault-inject.h>
34#include <linux/stacktrace.h>
35#include <linux/prefetch.h>
36#include <linux/memcontrol.h>
37#include <linux/random.h>
38
39#include <trace/events/kmem.h>
40
41#include "internal.h"
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120#ifdef CONFIG_SLUB_DEBUG
121#ifdef CONFIG_SLUB_DEBUG_ON
122DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
123#else
124DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
125#endif
126#endif
127
128static inline bool kmem_cache_debug(struct kmem_cache *s)
129{
130 return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
131}
132
133void *fixup_red_left(struct kmem_cache *s, void *p)
134{
135 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
136 p += s->red_left_pad;
137
138 return p;
139}
140
141static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
142{
143#ifdef CONFIG_SLUB_CPU_PARTIAL
144 return !kmem_cache_debug(s);
145#else
146 return false;
147#endif
148}
149
150
151
152
153
154
155
156
157
158
159#undef SLUB_RESILIENCY_TEST
160
161
162#undef SLUB_DEBUG_CMPXCHG
163
164
165
166
167
168#define MIN_PARTIAL 5
169
170
171
172
173
174
175#define MAX_PARTIAL 10
176
177#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
178 SLAB_POISON | SLAB_STORE_USER)
179
180
181
182
183
184#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
185 SLAB_TRACE)
186
187
188
189
190
191
192
193#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
194
195#define OO_SHIFT 16
196#define OO_MASK ((1 << OO_SHIFT) - 1)
197#define MAX_OBJS_PER_PAGE 32767
198
199
200
201#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
202
203#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
204
205
206
207
208#define TRACK_ADDRS_COUNT 16
209struct track {
210 unsigned long addr;
211#ifdef CONFIG_STACKTRACE
212 unsigned long addrs[TRACK_ADDRS_COUNT];
213#endif
214 int cpu;
215 int pid;
216 unsigned long when;
217};
218
219enum track_item { TRACK_ALLOC, TRACK_FREE };
220
221#ifdef CONFIG_SYSFS
222static int sysfs_slab_add(struct kmem_cache *);
223static int sysfs_slab_alias(struct kmem_cache *, const char *);
224#else
225static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
226static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
227 { return 0; }
228#endif
229
230static inline void stat(const struct kmem_cache *s, enum stat_item si)
231{
232#ifdef CONFIG_SLUB_STATS
233
234
235
236
237 raw_cpu_inc(s->cpu_slab->stat[si]);
238#endif
239}
240
241
242
243
244
245
246
247static nodemask_t slab_nodes;
248
249
250
251
252
253
254
255
256
257
258static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
259 unsigned long ptr_addr)
260{
261#ifdef CONFIG_SLAB_FREELIST_HARDENED
262
263
264
265
266
267
268
269
270
271
272 return (void *)((unsigned long)ptr ^ s->random ^
273 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
274#else
275 return ptr;
276#endif
277}
278
279
280static inline void *freelist_dereference(const struct kmem_cache *s,
281 void *ptr_addr)
282{
283 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
284 (unsigned long)ptr_addr);
285}
286
287static inline void *get_freepointer(struct kmem_cache *s, void *object)
288{
289 return freelist_dereference(s, object + s->offset);
290}
291
292static void prefetch_freepointer(const struct kmem_cache *s, void *object)
293{
294 prefetch(object + s->offset);
295}
296
297static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
298{
299 unsigned long freepointer_addr;
300 void *p;
301
302 if (!debug_pagealloc_enabled_static())
303 return get_freepointer(s, object);
304
305 freepointer_addr = (unsigned long)object + s->offset;
306 probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
307 return freelist_ptr(s, p, freepointer_addr);
308}
309
310static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
311{
312 unsigned long freeptr_addr = (unsigned long)object + s->offset;
313
314 BUG_ON(object == fp);
315
316 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
317}
318
319
320#define for_each_object(__p, __s, __addr, __objects) \
321 for (__p = fixup_red_left(__s, __addr); \
322 __p < (__addr) + (__objects) * (__s)->size; \
323 __p += (__s)->size)
324
325static inline unsigned int order_objects(unsigned int order, unsigned int size)
326{
327 return ((unsigned int)PAGE_SIZE << order) / size;
328}
329
330static inline struct kmem_cache_order_objects oo_make(unsigned int order,
331 unsigned int size)
332{
333 struct kmem_cache_order_objects x = {
334 (order << OO_SHIFT) + order_objects(order, size)
335 };
336
337 return x;
338}
339
340static inline unsigned int oo_order(struct kmem_cache_order_objects x)
341{
342 return x.x >> OO_SHIFT;
343}
344
345static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
346{
347 return x.x & OO_MASK;
348}
349
350
351
352
353static __always_inline void slab_lock(struct page *page)
354{
355 VM_BUG_ON_PAGE(PageTail(page), page);
356 bit_spin_lock(PG_locked, &page->flags);
357}
358
359static __always_inline void slab_unlock(struct page *page)
360{
361 VM_BUG_ON_PAGE(PageTail(page), page);
362 __bit_spin_unlock(PG_locked, &page->flags);
363}
364
365
366static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
367 void *freelist_old, unsigned long counters_old,
368 void *freelist_new, unsigned long counters_new,
369 const char *n)
370{
371 VM_BUG_ON(!irqs_disabled());
372#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
373 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
374 if (s->flags & __CMPXCHG_DOUBLE) {
375 if (cmpxchg_double(&page->freelist, &page->counters,
376 freelist_old, counters_old,
377 freelist_new, counters_new))
378 return true;
379 } else
380#endif
381 {
382 slab_lock(page);
383 if (page->freelist == freelist_old &&
384 page->counters == counters_old) {
385 page->freelist = freelist_new;
386 page->counters = counters_new;
387 slab_unlock(page);
388 return true;
389 }
390 slab_unlock(page);
391 }
392
393 cpu_relax();
394 stat(s, CMPXCHG_DOUBLE_FAIL);
395
396#ifdef SLUB_DEBUG_CMPXCHG
397 pr_info("%s %s: cmpxchg double redo ", n, s->name);
398#endif
399
400 return false;
401}
402
403static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
404 void *freelist_old, unsigned long counters_old,
405 void *freelist_new, unsigned long counters_new,
406 const char *n)
407{
408#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
409 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
410 if (s->flags & __CMPXCHG_DOUBLE) {
411 if (cmpxchg_double(&page->freelist, &page->counters,
412 freelist_old, counters_old,
413 freelist_new, counters_new))
414 return true;
415 } else
416#endif
417 {
418 unsigned long flags;
419
420 local_irq_save(flags);
421 slab_lock(page);
422 if (page->freelist == freelist_old &&
423 page->counters == counters_old) {
424 page->freelist = freelist_new;
425 page->counters = counters_new;
426 slab_unlock(page);
427 local_irq_restore(flags);
428 return true;
429 }
430 slab_unlock(page);
431 local_irq_restore(flags);
432 }
433
434 cpu_relax();
435 stat(s, CMPXCHG_DOUBLE_FAIL);
436
437#ifdef SLUB_DEBUG_CMPXCHG
438 pr_info("%s %s: cmpxchg double redo ", n, s->name);
439#endif
440
441 return false;
442}
443
444#ifdef CONFIG_SLUB_DEBUG
445static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
446static DEFINE_SPINLOCK(object_map_lock);
447
448
449
450
451
452
453
454static unsigned long *get_map(struct kmem_cache *s, struct page *page)
455{
456 void *p;
457 void *addr = page_address(page);
458
459 VM_BUG_ON(!irqs_disabled());
460
461 spin_lock(&object_map_lock);
462
463 bitmap_zero(object_map, page->objects);
464
465 for (p = page->freelist; p; p = get_freepointer(s, p))
466 set_bit(__obj_to_index(s, addr, p), object_map);
467
468 return object_map;
469}
470
471static void put_map(unsigned long *map)
472{
473 VM_BUG_ON(map != object_map);
474 lockdep_assert_held(&object_map_lock);
475
476 spin_unlock(&object_map_lock);
477}
478
479static inline unsigned int size_from_object(struct kmem_cache *s)
480{
481 if (s->flags & SLAB_RED_ZONE)
482 return s->size - s->red_left_pad;
483
484 return s->size;
485}
486
487static inline void *restore_red_left(struct kmem_cache *s, void *p)
488{
489 if (s->flags & SLAB_RED_ZONE)
490 p -= s->red_left_pad;
491
492 return p;
493}
494
495
496
497
498#if defined(CONFIG_SLUB_DEBUG_ON)
499static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
500#else
501static slab_flags_t slub_debug;
502#endif
503
504static char *slub_debug_string;
505static int disable_higher_order_debug;
506
507
508
509
510
511
512
513static inline void metadata_access_enable(void)
514{
515 kasan_disable_current();
516}
517
518static inline void metadata_access_disable(void)
519{
520 kasan_enable_current();
521}
522
523
524
525
526
527
528static inline int check_valid_pointer(struct kmem_cache *s,
529 struct page *page, void *object)
530{
531 void *base;
532
533 if (!object)
534 return 1;
535
536 base = page_address(page);
537 object = kasan_reset_tag(object);
538 object = restore_red_left(s, object);
539 if (object < base || object >= base + page->objects * s->size ||
540 (object - base) % s->size) {
541 return 0;
542 }
543
544 return 1;
545}
546
547static void print_section(char *level, char *text, u8 *addr,
548 unsigned int length)
549{
550 metadata_access_enable();
551 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
552 length, 1);
553 metadata_access_disable();
554}
555
556
557
558
559static inline bool freeptr_outside_object(struct kmem_cache *s)
560{
561 return s->offset >= s->inuse;
562}
563
564
565
566
567
568static inline unsigned int get_info_end(struct kmem_cache *s)
569{
570 if (freeptr_outside_object(s))
571 return s->inuse + sizeof(void *);
572 else
573 return s->inuse;
574}
575
576static struct track *get_track(struct kmem_cache *s, void *object,
577 enum track_item alloc)
578{
579 struct track *p;
580
581 p = object + get_info_end(s);
582
583 return p + alloc;
584}
585
586static void set_track(struct kmem_cache *s, void *object,
587 enum track_item alloc, unsigned long addr)
588{
589 struct track *p = get_track(s, object, alloc);
590
591 if (addr) {
592#ifdef CONFIG_STACKTRACE
593 unsigned int nr_entries;
594
595 metadata_access_enable();
596 nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
597 metadata_access_disable();
598
599 if (nr_entries < TRACK_ADDRS_COUNT)
600 p->addrs[nr_entries] = 0;
601#endif
602 p->addr = addr;
603 p->cpu = smp_processor_id();
604 p->pid = current->pid;
605 p->when = jiffies;
606 } else {
607 memset(p, 0, sizeof(struct track));
608 }
609}
610
611static void init_tracking(struct kmem_cache *s, void *object)
612{
613 if (!(s->flags & SLAB_STORE_USER))
614 return;
615
616 set_track(s, object, TRACK_FREE, 0UL);
617 set_track(s, object, TRACK_ALLOC, 0UL);
618}
619
620static void print_track(const char *s, struct track *t, unsigned long pr_time)
621{
622 if (!t->addr)
623 return;
624
625 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
626 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
627#ifdef CONFIG_STACKTRACE
628 {
629 int i;
630 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
631 if (t->addrs[i])
632 pr_err("\t%pS\n", (void *)t->addrs[i]);
633 else
634 break;
635 }
636#endif
637}
638
639void print_tracking(struct kmem_cache *s, void *object)
640{
641 unsigned long pr_time = jiffies;
642 if (!(s->flags & SLAB_STORE_USER))
643 return;
644
645 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
646 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
647}
648
649static void print_page_info(struct page *page)
650{
651 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
652 page, page->objects, page->inuse, page->freelist, page->flags);
653
654}
655
656static void slab_bug(struct kmem_cache *s, char *fmt, ...)
657{
658 struct va_format vaf;
659 va_list args;
660
661 va_start(args, fmt);
662 vaf.fmt = fmt;
663 vaf.va = &args;
664 pr_err("=============================================================================\n");
665 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
666 pr_err("-----------------------------------------------------------------------------\n\n");
667
668 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
669 va_end(args);
670}
671
672static void slab_fix(struct kmem_cache *s, char *fmt, ...)
673{
674 struct va_format vaf;
675 va_list args;
676
677 va_start(args, fmt);
678 vaf.fmt = fmt;
679 vaf.va = &args;
680 pr_err("FIX %s: %pV\n", s->name, &vaf);
681 va_end(args);
682}
683
684static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
685 void **freelist, void *nextfree)
686{
687 if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
688 !check_valid_pointer(s, page, nextfree) && freelist) {
689 object_err(s, page, *freelist, "Freechain corrupt");
690 *freelist = NULL;
691 slab_fix(s, "Isolate corrupted freechain");
692 return true;
693 }
694
695 return false;
696}
697
698static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
699{
700 unsigned int off;
701 u8 *addr = page_address(page);
702
703 print_tracking(s, p);
704
705 print_page_info(page);
706
707 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
708 p, p - addr, get_freepointer(s, p));
709
710 if (s->flags & SLAB_RED_ZONE)
711 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
712 s->red_left_pad);
713 else if (p > addr + 16)
714 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
715
716 print_section(KERN_ERR, "Object ", p,
717 min_t(unsigned int, s->object_size, PAGE_SIZE));
718 if (s->flags & SLAB_RED_ZONE)
719 print_section(KERN_ERR, "Redzone ", p + s->object_size,
720 s->inuse - s->object_size);
721
722 off = get_info_end(s);
723
724 if (s->flags & SLAB_STORE_USER)
725 off += 2 * sizeof(struct track);
726
727 off += kasan_metadata_size(s);
728
729 if (off != size_from_object(s))
730
731 print_section(KERN_ERR, "Padding ", p + off,
732 size_from_object(s) - off);
733
734 dump_stack();
735}
736
737void object_err(struct kmem_cache *s, struct page *page,
738 u8 *object, char *reason)
739{
740 slab_bug(s, "%s", reason);
741 print_trailer(s, page, object);
742}
743
744static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
745 const char *fmt, ...)
746{
747 va_list args;
748 char buf[100];
749
750 va_start(args, fmt);
751 vsnprintf(buf, sizeof(buf), fmt, args);
752 va_end(args);
753 slab_bug(s, "%s", buf);
754 print_page_info(page);
755 dump_stack();
756}
757
758static void init_object(struct kmem_cache *s, void *object, u8 val)
759{
760 u8 *p = object;
761
762 if (s->flags & SLAB_RED_ZONE)
763 memset(p - s->red_left_pad, val, s->red_left_pad);
764
765 if (s->flags & __OBJECT_POISON) {
766 memset(p, POISON_FREE, s->object_size - 1);
767 p[s->object_size - 1] = POISON_END;
768 }
769
770 if (s->flags & SLAB_RED_ZONE)
771 memset(p + s->object_size, val, s->inuse - s->object_size);
772}
773
774static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
775 void *from, void *to)
776{
777 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
778 memset(from, data, to - from);
779}
780
781static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
782 u8 *object, char *what,
783 u8 *start, unsigned int value, unsigned int bytes)
784{
785 u8 *fault;
786 u8 *end;
787 u8 *addr = page_address(page);
788
789 metadata_access_enable();
790 fault = memchr_inv(start, value, bytes);
791 metadata_access_disable();
792 if (!fault)
793 return 1;
794
795 end = start + bytes;
796 while (end > fault && end[-1] == value)
797 end--;
798
799 slab_bug(s, "%s overwritten", what);
800 pr_err("INFO: 0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
801 fault, end - 1, fault - addr,
802 fault[0], value);
803 print_trailer(s, page, object);
804
805 restore_bytes(s, what, value, fault, end);
806 return 0;
807}
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
848{
849 unsigned long off = get_info_end(s);
850
851 if (s->flags & SLAB_STORE_USER)
852
853 off += 2 * sizeof(struct track);
854
855 off += kasan_metadata_size(s);
856
857 if (size_from_object(s) == off)
858 return 1;
859
860 return check_bytes_and_report(s, page, p, "Object padding",
861 p + off, POISON_INUSE, size_from_object(s) - off);
862}
863
864
865static int slab_pad_check(struct kmem_cache *s, struct page *page)
866{
867 u8 *start;
868 u8 *fault;
869 u8 *end;
870 u8 *pad;
871 int length;
872 int remainder;
873
874 if (!(s->flags & SLAB_POISON))
875 return 1;
876
877 start = page_address(page);
878 length = page_size(page);
879 end = start + length;
880 remainder = length % s->size;
881 if (!remainder)
882 return 1;
883
884 pad = end - remainder;
885 metadata_access_enable();
886 fault = memchr_inv(pad, POISON_INUSE, remainder);
887 metadata_access_disable();
888 if (!fault)
889 return 1;
890 while (end > fault && end[-1] == POISON_INUSE)
891 end--;
892
893 slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
894 fault, end - 1, fault - start);
895 print_section(KERN_ERR, "Padding ", pad, remainder);
896
897 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
898 return 0;
899}
900
901static int check_object(struct kmem_cache *s, struct page *page,
902 void *object, u8 val)
903{
904 u8 *p = object;
905 u8 *endobject = object + s->object_size;
906
907 if (s->flags & SLAB_RED_ZONE) {
908 if (!check_bytes_and_report(s, page, object, "Redzone",
909 object - s->red_left_pad, val, s->red_left_pad))
910 return 0;
911
912 if (!check_bytes_and_report(s, page, object, "Redzone",
913 endobject, val, s->inuse - s->object_size))
914 return 0;
915 } else {
916 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
917 check_bytes_and_report(s, page, p, "Alignment padding",
918 endobject, POISON_INUSE,
919 s->inuse - s->object_size);
920 }
921 }
922
923 if (s->flags & SLAB_POISON) {
924 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
925 (!check_bytes_and_report(s, page, p, "Poison", p,
926 POISON_FREE, s->object_size - 1) ||
927 !check_bytes_and_report(s, page, p, "Poison",
928 p + s->object_size - 1, POISON_END, 1)))
929 return 0;
930
931
932
933 check_pad_bytes(s, page, p);
934 }
935
936 if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
937
938
939
940
941 return 1;
942
943
944 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
945 object_err(s, page, p, "Freepointer corrupt");
946
947
948
949
950
951 set_freepointer(s, p, NULL);
952 return 0;
953 }
954 return 1;
955}
956
957static int check_slab(struct kmem_cache *s, struct page *page)
958{
959 int maxobj;
960
961 VM_BUG_ON(!irqs_disabled());
962
963 if (!PageSlab(page)) {
964 slab_err(s, page, "Not a valid slab page");
965 return 0;
966 }
967
968 maxobj = order_objects(compound_order(page), s->size);
969 if (page->objects > maxobj) {
970 slab_err(s, page, "objects %u > max %u",
971 page->objects, maxobj);
972 return 0;
973 }
974 if (page->inuse > page->objects) {
975 slab_err(s, page, "inuse %u > max %u",
976 page->inuse, page->objects);
977 return 0;
978 }
979
980 slab_pad_check(s, page);
981 return 1;
982}
983
984
985
986
987
988static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
989{
990 int nr = 0;
991 void *fp;
992 void *object = NULL;
993 int max_objects;
994
995 fp = page->freelist;
996 while (fp && nr <= page->objects) {
997 if (fp == search)
998 return 1;
999 if (!check_valid_pointer(s, page, fp)) {
1000 if (object) {
1001 object_err(s, page, object,
1002 "Freechain corrupt");
1003 set_freepointer(s, object, NULL);
1004 } else {
1005 slab_err(s, page, "Freepointer corrupt");
1006 page->freelist = NULL;
1007 page->inuse = page->objects;
1008 slab_fix(s, "Freelist cleared");
1009 return 0;
1010 }
1011 break;
1012 }
1013 object = fp;
1014 fp = get_freepointer(s, object);
1015 nr++;
1016 }
1017
1018 max_objects = order_objects(compound_order(page), s->size);
1019 if (max_objects > MAX_OBJS_PER_PAGE)
1020 max_objects = MAX_OBJS_PER_PAGE;
1021
1022 if (page->objects != max_objects) {
1023 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
1024 page->objects, max_objects);
1025 page->objects = max_objects;
1026 slab_fix(s, "Number of objects adjusted.");
1027 }
1028 if (page->inuse != page->objects - nr) {
1029 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1030 page->inuse, page->objects - nr);
1031 page->inuse = page->objects - nr;
1032 slab_fix(s, "Object count adjusted.");
1033 }
1034 return search == NULL;
1035}
1036
1037static void trace(struct kmem_cache *s, struct page *page, void *object,
1038 int alloc)
1039{
1040 if (s->flags & SLAB_TRACE) {
1041 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1042 s->name,
1043 alloc ? "alloc" : "free",
1044 object, page->inuse,
1045 page->freelist);
1046
1047 if (!alloc)
1048 print_section(KERN_INFO, "Object ", (void *)object,
1049 s->object_size);
1050
1051 dump_stack();
1052 }
1053}
1054
1055
1056
1057
1058static void add_full(struct kmem_cache *s,
1059 struct kmem_cache_node *n, struct page *page)
1060{
1061 if (!(s->flags & SLAB_STORE_USER))
1062 return;
1063
1064 lockdep_assert_held(&n->list_lock);
1065 list_add(&page->slab_list, &n->full);
1066}
1067
1068static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1069{
1070 if (!(s->flags & SLAB_STORE_USER))
1071 return;
1072
1073 lockdep_assert_held(&n->list_lock);
1074 list_del(&page->slab_list);
1075}
1076
1077
1078static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1079{
1080 struct kmem_cache_node *n = get_node(s, node);
1081
1082 return atomic_long_read(&n->nr_slabs);
1083}
1084
1085static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1086{
1087 return atomic_long_read(&n->nr_slabs);
1088}
1089
1090static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1091{
1092 struct kmem_cache_node *n = get_node(s, node);
1093
1094
1095
1096
1097
1098
1099
1100 if (likely(n)) {
1101 atomic_long_inc(&n->nr_slabs);
1102 atomic_long_add(objects, &n->total_objects);
1103 }
1104}
1105static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1106{
1107 struct kmem_cache_node *n = get_node(s, node);
1108
1109 atomic_long_dec(&n->nr_slabs);
1110 atomic_long_sub(objects, &n->total_objects);
1111}
1112
1113
1114static void setup_object_debug(struct kmem_cache *s, struct page *page,
1115 void *object)
1116{
1117 if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1118 return;
1119
1120 init_object(s, object, SLUB_RED_INACTIVE);
1121 init_tracking(s, object);
1122}
1123
1124static
1125void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
1126{
1127 if (!kmem_cache_debug_flags(s, SLAB_POISON))
1128 return;
1129
1130 metadata_access_enable();
1131 memset(addr, POISON_INUSE, page_size(page));
1132 metadata_access_disable();
1133}
1134
1135static inline int alloc_consistency_checks(struct kmem_cache *s,
1136 struct page *page, void *object)
1137{
1138 if (!check_slab(s, page))
1139 return 0;
1140
1141 if (!check_valid_pointer(s, page, object)) {
1142 object_err(s, page, object, "Freelist Pointer check fails");
1143 return 0;
1144 }
1145
1146 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1147 return 0;
1148
1149 return 1;
1150}
1151
1152static noinline int alloc_debug_processing(struct kmem_cache *s,
1153 struct page *page,
1154 void *object, unsigned long addr)
1155{
1156 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1157 if (!alloc_consistency_checks(s, page, object))
1158 goto bad;
1159 }
1160
1161
1162 if (s->flags & SLAB_STORE_USER)
1163 set_track(s, object, TRACK_ALLOC, addr);
1164 trace(s, page, object, 1);
1165 init_object(s, object, SLUB_RED_ACTIVE);
1166 return 1;
1167
1168bad:
1169 if (PageSlab(page)) {
1170
1171
1172
1173
1174
1175 slab_fix(s, "Marking all objects used");
1176 page->inuse = page->objects;
1177 page->freelist = NULL;
1178 }
1179 return 0;
1180}
1181
1182static inline int free_consistency_checks(struct kmem_cache *s,
1183 struct page *page, void *object, unsigned long addr)
1184{
1185 if (!check_valid_pointer(s, page, object)) {
1186 slab_err(s, page, "Invalid object pointer 0x%p", object);
1187 return 0;
1188 }
1189
1190 if (on_freelist(s, page, object)) {
1191 object_err(s, page, object, "Object already free");
1192 return 0;
1193 }
1194
1195 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1196 return 0;
1197
1198 if (unlikely(s != page->slab_cache)) {
1199 if (!PageSlab(page)) {
1200 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1201 object);
1202 } else if (!page->slab_cache) {
1203 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1204 object);
1205 dump_stack();
1206 } else
1207 object_err(s, page, object,
1208 "page slab pointer corrupt.");
1209 return 0;
1210 }
1211 return 1;
1212}
1213
1214
1215static noinline int free_debug_processing(
1216 struct kmem_cache *s, struct page *page,
1217 void *head, void *tail, int bulk_cnt,
1218 unsigned long addr)
1219{
1220 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1221 void *object = head;
1222 int cnt = 0;
1223 unsigned long uninitialized_var(flags);
1224 int ret = 0;
1225
1226 spin_lock_irqsave(&n->list_lock, flags);
1227 slab_lock(page);
1228
1229 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1230 if (!check_slab(s, page))
1231 goto out;
1232 }
1233
1234next_object:
1235 cnt++;
1236
1237 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1238 if (!free_consistency_checks(s, page, object, addr))
1239 goto out;
1240 }
1241
1242 if (s->flags & SLAB_STORE_USER)
1243 set_track(s, object, TRACK_FREE, addr);
1244 trace(s, page, object, 0);
1245
1246 init_object(s, object, SLUB_RED_INACTIVE);
1247
1248
1249 if (object != tail) {
1250 object = get_freepointer(s, object);
1251 goto next_object;
1252 }
1253 ret = 1;
1254
1255out:
1256 if (cnt != bulk_cnt)
1257 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1258 bulk_cnt, cnt);
1259
1260 slab_unlock(page);
1261 spin_unlock_irqrestore(&n->list_lock, flags);
1262 if (!ret)
1263 slab_fix(s, "Object at 0x%p not freed", object);
1264 return ret;
1265}
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277static char *
1278parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1279{
1280 bool higher_order_disable = false;
1281
1282
1283 while (*str && *str == ';')
1284 str++;
1285
1286 if (*str == ',') {
1287
1288
1289
1290
1291 *flags = DEBUG_DEFAULT_FLAGS;
1292 goto check_slabs;
1293 }
1294 *flags = 0;
1295
1296
1297 for (; *str && *str != ',' && *str != ';'; str++) {
1298 switch (tolower(*str)) {
1299 case '-':
1300 *flags = 0;
1301 break;
1302 case 'f':
1303 *flags |= SLAB_CONSISTENCY_CHECKS;
1304 break;
1305 case 'z':
1306 *flags |= SLAB_RED_ZONE;
1307 break;
1308 case 'p':
1309 *flags |= SLAB_POISON;
1310 break;
1311 case 'u':
1312 *flags |= SLAB_STORE_USER;
1313 break;
1314 case 't':
1315 *flags |= SLAB_TRACE;
1316 break;
1317 case 'a':
1318 *flags |= SLAB_FAILSLAB;
1319 break;
1320 case 'o':
1321
1322
1323
1324
1325 higher_order_disable = true;
1326 break;
1327 default:
1328 if (init)
1329 pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1330 }
1331 }
1332check_slabs:
1333 if (*str == ',')
1334 *slabs = ++str;
1335 else
1336 *slabs = NULL;
1337
1338
1339 while (*str && *str != ';')
1340 str++;
1341
1342
1343 while (*str && *str == ';')
1344 str++;
1345
1346 if (init && higher_order_disable)
1347 disable_higher_order_debug = 1;
1348
1349 if (*str)
1350 return str;
1351 else
1352 return NULL;
1353}
1354
1355static int __init setup_slub_debug(char *str)
1356{
1357 slab_flags_t flags;
1358 char *saved_str;
1359 char *slab_list;
1360 bool global_slub_debug_changed = false;
1361 bool slab_list_specified = false;
1362
1363 slub_debug = DEBUG_DEFAULT_FLAGS;
1364 if (*str++ != '=' || !*str)
1365
1366
1367
1368 goto out;
1369
1370 saved_str = str;
1371 while (str) {
1372 str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1373
1374 if (!slab_list) {
1375 slub_debug = flags;
1376 global_slub_debug_changed = true;
1377 } else {
1378 slab_list_specified = true;
1379 }
1380 }
1381
1382
1383
1384
1385
1386
1387
1388 if (slab_list_specified) {
1389 if (!global_slub_debug_changed)
1390 slub_debug = 0;
1391 slub_debug_string = saved_str;
1392 }
1393out:
1394 if (slub_debug != 0 || slub_debug_string)
1395 static_branch_enable(&slub_debug_enabled);
1396 else
1397 static_branch_disable(&slub_debug_enabled);
1398 if ((static_branch_unlikely(&init_on_alloc) ||
1399 static_branch_unlikely(&init_on_free)) &&
1400 (slub_debug & SLAB_POISON))
1401 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1402 return 1;
1403}
1404
1405__setup("slub_debug", setup_slub_debug);
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418slab_flags_t kmem_cache_flags(unsigned int object_size,
1419 slab_flags_t flags, const char *name)
1420{
1421 char *iter;
1422 size_t len;
1423 char *next_block;
1424 slab_flags_t block_flags;
1425 slab_flags_t slub_debug_local = slub_debug;
1426
1427
1428
1429
1430
1431
1432 if (flags & SLAB_NOLEAKTRACE)
1433 slub_debug_local &= ~SLAB_STORE_USER;
1434
1435 len = strlen(name);
1436 next_block = slub_debug_string;
1437
1438 while (next_block) {
1439 next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1440 if (!iter)
1441 continue;
1442
1443 while (*iter) {
1444 char *end, *glob;
1445 size_t cmplen;
1446
1447 end = strchrnul(iter, ',');
1448 if (next_block && next_block < end)
1449 end = next_block - 1;
1450
1451 glob = strnchr(iter, end - iter, '*');
1452 if (glob)
1453 cmplen = glob - iter;
1454 else
1455 cmplen = max_t(size_t, len, (end - iter));
1456
1457 if (!strncmp(name, iter, cmplen)) {
1458 flags |= block_flags;
1459 return flags;
1460 }
1461
1462 if (!*end || *end == ';')
1463 break;
1464 iter = end + 1;
1465 }
1466 }
1467
1468 return flags | slub_debug_local;
1469}
1470#else
1471static inline void setup_object_debug(struct kmem_cache *s,
1472 struct page *page, void *object) {}
1473static inline
1474void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
1475
1476static inline int alloc_debug_processing(struct kmem_cache *s,
1477 struct page *page, void *object, unsigned long addr) { return 0; }
1478
1479static inline int free_debug_processing(
1480 struct kmem_cache *s, struct page *page,
1481 void *head, void *tail, int bulk_cnt,
1482 unsigned long addr) { return 0; }
1483
1484static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1485 { return 1; }
1486static inline int check_object(struct kmem_cache *s, struct page *page,
1487 void *object, u8 val) { return 1; }
1488static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1489 struct page *page) {}
1490static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1491 struct page *page) {}
1492slab_flags_t kmem_cache_flags(unsigned int object_size,
1493 slab_flags_t flags, const char *name)
1494{
1495 return flags;
1496}
1497#define slub_debug 0
1498
1499#define disable_higher_order_debug 0
1500
1501static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1502 { return 0; }
1503static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1504 { return 0; }
1505static inline void inc_slabs_node(struct kmem_cache *s, int node,
1506 int objects) {}
1507static inline void dec_slabs_node(struct kmem_cache *s, int node,
1508 int objects) {}
1509
1510static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
1511 void **freelist, void *nextfree)
1512{
1513 return false;
1514}
1515#endif
1516
1517
1518
1519
1520
1521static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1522{
1523 ptr = kasan_kmalloc_large(ptr, size, flags);
1524
1525 kmemleak_alloc(ptr, size, 1, flags);
1526 return ptr;
1527}
1528
1529static __always_inline void kfree_hook(void *x)
1530{
1531 kmemleak_free(x);
1532 kasan_kfree_large(x, _RET_IP_);
1533}
1534
1535static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1536{
1537 kmemleak_free_recursive(x, s->flags);
1538
1539
1540
1541
1542
1543
1544#ifdef CONFIG_LOCKDEP
1545 {
1546 unsigned long flags;
1547
1548 local_irq_save(flags);
1549 debug_check_no_locks_freed(x, s->object_size);
1550 local_irq_restore(flags);
1551 }
1552#endif
1553 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1554 debug_check_no_obj_freed(x, s->object_size);
1555
1556
1557 return kasan_slab_free(s, x, _RET_IP_);
1558}
1559
1560static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1561 void **head, void **tail)
1562{
1563
1564 void *object;
1565 void *next = *head;
1566 void *old_tail = *tail ? *tail : *head;
1567 int rsize;
1568
1569
1570 *head = NULL;
1571 *tail = NULL;
1572
1573 do {
1574 object = next;
1575 next = get_freepointer(s, object);
1576
1577 if (slab_want_init_on_free(s)) {
1578
1579
1580
1581
1582 memset(object, 0, s->object_size);
1583 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1584 : 0;
1585 memset((char *)object + s->inuse, 0,
1586 s->size - s->inuse - rsize);
1587
1588 }
1589
1590 if (!slab_free_hook(s, object)) {
1591
1592 set_freepointer(s, object, *head);
1593 *head = object;
1594 if (!*tail)
1595 *tail = object;
1596 }
1597 } while (object != old_tail);
1598
1599 if (*head == *tail)
1600 *tail = NULL;
1601
1602 return *head != NULL;
1603}
1604
1605static void *setup_object(struct kmem_cache *s, struct page *page,
1606 void *object)
1607{
1608 setup_object_debug(s, page, object);
1609 object = kasan_init_slab_obj(s, object);
1610 if (unlikely(s->ctor)) {
1611 kasan_unpoison_object_data(s, object);
1612 s->ctor(object);
1613 kasan_poison_object_data(s, object);
1614 }
1615 return object;
1616}
1617
1618
1619
1620
1621static inline struct page *alloc_slab_page(struct kmem_cache *s,
1622 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1623{
1624 struct page *page;
1625 unsigned int order = oo_order(oo);
1626
1627 if (node == NUMA_NO_NODE)
1628 page = alloc_pages(flags, order);
1629 else
1630 page = __alloc_pages_node(node, flags, order);
1631
1632 return page;
1633}
1634
1635#ifdef CONFIG_SLAB_FREELIST_RANDOM
1636
1637static int init_cache_random_seq(struct kmem_cache *s)
1638{
1639 unsigned int count = oo_objects(s->oo);
1640 int err;
1641
1642
1643 if (s->random_seq)
1644 return 0;
1645
1646 err = cache_random_seq_create(s, count, GFP_KERNEL);
1647 if (err) {
1648 pr_err("SLUB: Unable to initialize free list for %s\n",
1649 s->name);
1650 return err;
1651 }
1652
1653
1654 if (s->random_seq) {
1655 unsigned int i;
1656
1657 for (i = 0; i < count; i++)
1658 s->random_seq[i] *= s->size;
1659 }
1660 return 0;
1661}
1662
1663
1664static void __init init_freelist_randomization(void)
1665{
1666 struct kmem_cache *s;
1667
1668 mutex_lock(&slab_mutex);
1669
1670 list_for_each_entry(s, &slab_caches, list)
1671 init_cache_random_seq(s);
1672
1673 mutex_unlock(&slab_mutex);
1674}
1675
1676
1677static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1678 unsigned long *pos, void *start,
1679 unsigned long page_limit,
1680 unsigned long freelist_count)
1681{
1682 unsigned int idx;
1683
1684
1685
1686
1687
1688 do {
1689 idx = s->random_seq[*pos];
1690 *pos += 1;
1691 if (*pos >= freelist_count)
1692 *pos = 0;
1693 } while (unlikely(idx >= page_limit));
1694
1695 return (char *)start + idx;
1696}
1697
1698
1699static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1700{
1701 void *start;
1702 void *cur;
1703 void *next;
1704 unsigned long idx, pos, page_limit, freelist_count;
1705
1706 if (page->objects < 2 || !s->random_seq)
1707 return false;
1708
1709 freelist_count = oo_objects(s->oo);
1710 pos = get_random_int() % freelist_count;
1711
1712 page_limit = page->objects * s->size;
1713 start = fixup_red_left(s, page_address(page));
1714
1715
1716 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1717 freelist_count);
1718 cur = setup_object(s, page, cur);
1719 page->freelist = cur;
1720
1721 for (idx = 1; idx < page->objects; idx++) {
1722 next = next_freelist_entry(s, page, &pos, start, page_limit,
1723 freelist_count);
1724 next = setup_object(s, page, next);
1725 set_freepointer(s, cur, next);
1726 cur = next;
1727 }
1728 set_freepointer(s, cur, NULL);
1729
1730 return true;
1731}
1732#else
1733static inline int init_cache_random_seq(struct kmem_cache *s)
1734{
1735 return 0;
1736}
1737static inline void init_freelist_randomization(void) { }
1738static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1739{
1740 return false;
1741}
1742#endif
1743
1744static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1745{
1746 struct page *page;
1747 struct kmem_cache_order_objects oo = s->oo;
1748 gfp_t alloc_gfp;
1749 void *start, *p, *next;
1750 int idx;
1751 bool shuffle;
1752
1753 flags &= gfp_allowed_mask;
1754
1755 if (gfpflags_allow_blocking(flags))
1756 local_irq_enable();
1757
1758 flags |= s->allocflags;
1759
1760
1761
1762
1763
1764 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1765 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1766 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1767
1768 page = alloc_slab_page(s, alloc_gfp, node, oo);
1769 if (unlikely(!page)) {
1770 oo = s->min;
1771 alloc_gfp = flags;
1772
1773
1774
1775
1776 page = alloc_slab_page(s, alloc_gfp, node, oo);
1777 if (unlikely(!page))
1778 goto out;
1779 stat(s, ORDER_FALLBACK);
1780 }
1781
1782 page->objects = oo_objects(oo);
1783
1784 account_slab_page(page, oo_order(oo), s, flags);
1785
1786 page->slab_cache = s;
1787 __SetPageSlab(page);
1788 if (page_is_pfmemalloc(page))
1789 SetPageSlabPfmemalloc(page);
1790
1791 kasan_poison_slab(page);
1792
1793 start = page_address(page);
1794
1795 setup_page_debug(s, page, start);
1796
1797 shuffle = shuffle_freelist(s, page);
1798
1799 if (!shuffle) {
1800 start = fixup_red_left(s, start);
1801 start = setup_object(s, page, start);
1802 page->freelist = start;
1803 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1804 next = p + s->size;
1805 next = setup_object(s, page, next);
1806 set_freepointer(s, p, next);
1807 p = next;
1808 }
1809 set_freepointer(s, p, NULL);
1810 }
1811
1812 page->inuse = page->objects;
1813 page->frozen = 1;
1814
1815out:
1816 if (gfpflags_allow_blocking(flags))
1817 local_irq_disable();
1818 if (!page)
1819 return NULL;
1820
1821 inc_slabs_node(s, page_to_nid(page), page->objects);
1822
1823 return page;
1824}
1825
1826static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1827{
1828 if (unlikely(flags & GFP_SLAB_BUG_MASK))
1829 flags = kmalloc_fix_flags(flags);
1830
1831 return allocate_slab(s,
1832 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1833}
1834
1835static void __free_slab(struct kmem_cache *s, struct page *page)
1836{
1837 int order = compound_order(page);
1838 int pages = 1 << order;
1839
1840 if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
1841 void *p;
1842
1843 slab_pad_check(s, page);
1844 for_each_object(p, s, page_address(page),
1845 page->objects)
1846 check_object(s, page, p, SLUB_RED_INACTIVE);
1847 }
1848
1849 __ClearPageSlabPfmemalloc(page);
1850 __ClearPageSlab(page);
1851
1852 page->slab_cache = NULL;
1853 if (current->reclaim_state)
1854 current->reclaim_state->reclaimed_slab += pages;
1855 unaccount_slab_page(page, order, s);
1856 __free_pages(page, order);
1857}
1858
1859static void rcu_free_slab(struct rcu_head *h)
1860{
1861 struct page *page = container_of(h, struct page, rcu_head);
1862
1863 __free_slab(page->slab_cache, page);
1864}
1865
1866static void free_slab(struct kmem_cache *s, struct page *page)
1867{
1868 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1869 call_rcu(&page->rcu_head, rcu_free_slab);
1870 } else
1871 __free_slab(s, page);
1872}
1873
1874static void discard_slab(struct kmem_cache *s, struct page *page)
1875{
1876 dec_slabs_node(s, page_to_nid(page), page->objects);
1877 free_slab(s, page);
1878}
1879
1880
1881
1882
1883static inline void
1884__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1885{
1886 n->nr_partial++;
1887 if (tail == DEACTIVATE_TO_TAIL)
1888 list_add_tail(&page->slab_list, &n->partial);
1889 else
1890 list_add(&page->slab_list, &n->partial);
1891}
1892
1893static inline void add_partial(struct kmem_cache_node *n,
1894 struct page *page, int tail)
1895{
1896 lockdep_assert_held(&n->list_lock);
1897 __add_partial(n, page, tail);
1898}
1899
1900static inline void remove_partial(struct kmem_cache_node *n,
1901 struct page *page)
1902{
1903 lockdep_assert_held(&n->list_lock);
1904 list_del(&page->slab_list);
1905 n->nr_partial--;
1906}
1907
1908
1909
1910
1911
1912
1913
1914static inline void *acquire_slab(struct kmem_cache *s,
1915 struct kmem_cache_node *n, struct page *page,
1916 int mode, int *objects)
1917{
1918 void *freelist;
1919 unsigned long counters;
1920 struct page new;
1921
1922 lockdep_assert_held(&n->list_lock);
1923
1924
1925
1926
1927
1928
1929 freelist = page->freelist;
1930 counters = page->counters;
1931 new.counters = counters;
1932 *objects = new.objects - new.inuse;
1933 if (mode) {
1934 new.inuse = page->objects;
1935 new.freelist = NULL;
1936 } else {
1937 new.freelist = freelist;
1938 }
1939
1940 VM_BUG_ON(new.frozen);
1941 new.frozen = 1;
1942
1943 if (!__cmpxchg_double_slab(s, page,
1944 freelist, counters,
1945 new.freelist, new.counters,
1946 "acquire_slab"))
1947 return NULL;
1948
1949 remove_partial(n, page);
1950 WARN_ON(!freelist);
1951 return freelist;
1952}
1953
1954static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1955static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1956
1957
1958
1959
1960static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1961 struct kmem_cache_cpu *c, gfp_t flags)
1962{
1963 struct page *page, *page2;
1964 void *object = NULL;
1965 unsigned int available = 0;
1966 int objects;
1967
1968
1969
1970
1971
1972
1973
1974 if (!n || !n->nr_partial)
1975 return NULL;
1976
1977 spin_lock(&n->list_lock);
1978 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1979 void *t;
1980
1981 if (!pfmemalloc_match(page, flags))
1982 continue;
1983
1984 t = acquire_slab(s, n, page, object == NULL, &objects);
1985 if (!t)
1986 break;
1987
1988 available += objects;
1989 if (!object) {
1990 c->page = page;
1991 stat(s, ALLOC_FROM_PARTIAL);
1992 object = t;
1993 } else {
1994 put_cpu_partial(s, page, 0);
1995 stat(s, CPU_PARTIAL_NODE);
1996 }
1997 if (!kmem_cache_has_cpu_partial(s)
1998 || available > slub_cpu_partial(s) / 2)
1999 break;
2000
2001 }
2002 spin_unlock(&n->list_lock);
2003 return object;
2004}
2005
2006
2007
2008
2009static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2010 struct kmem_cache_cpu *c)
2011{
2012#ifdef CONFIG_NUMA
2013 struct zonelist *zonelist;
2014 struct zoneref *z;
2015 struct zone *zone;
2016 enum zone_type highest_zoneidx = gfp_zone(flags);
2017 void *object;
2018 unsigned int cpuset_mems_cookie;
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038 if (!s->remote_node_defrag_ratio ||
2039 get_cycles() % 1024 > s->remote_node_defrag_ratio)
2040 return NULL;
2041
2042 do {
2043 cpuset_mems_cookie = read_mems_allowed_begin();
2044 zonelist = node_zonelist(mempolicy_slab_node(), flags);
2045 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2046 struct kmem_cache_node *n;
2047
2048 n = get_node(s, zone_to_nid(zone));
2049
2050 if (n && cpuset_zone_allowed(zone, flags) &&
2051 n->nr_partial > s->min_partial) {
2052 object = get_partial_node(s, n, c, flags);
2053 if (object) {
2054
2055
2056
2057
2058
2059
2060
2061 return object;
2062 }
2063 }
2064 }
2065 } while (read_mems_allowed_retry(cpuset_mems_cookie));
2066#endif
2067 return NULL;
2068}
2069
2070
2071
2072
2073static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2074 struct kmem_cache_cpu *c)
2075{
2076 void *object;
2077 int searchnode = node;
2078
2079 if (node == NUMA_NO_NODE)
2080 searchnode = numa_mem_id();
2081
2082 object = get_partial_node(s, get_node(s, searchnode), c, flags);
2083 if (object || node != NUMA_NO_NODE)
2084 return object;
2085
2086 return get_any_partial(s, flags, c);
2087}
2088
2089#ifdef CONFIG_PREEMPT
2090
2091
2092
2093
2094
2095#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2096#else
2097
2098
2099
2100
2101#define TID_STEP 1
2102#endif
2103
2104static inline unsigned long next_tid(unsigned long tid)
2105{
2106 return tid + TID_STEP;
2107}
2108
2109static inline unsigned int tid_to_cpu(unsigned long tid)
2110{
2111 return tid % TID_STEP;
2112}
2113
2114static inline unsigned long tid_to_event(unsigned long tid)
2115{
2116 return tid / TID_STEP;
2117}
2118
2119static inline unsigned int init_tid(int cpu)
2120{
2121 return cpu;
2122}
2123
2124static inline void note_cmpxchg_failure(const char *n,
2125 const struct kmem_cache *s, unsigned long tid)
2126{
2127#ifdef SLUB_DEBUG_CMPXCHG
2128 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2129
2130 pr_info("%s %s: cmpxchg redo ", n, s->name);
2131
2132#ifdef CONFIG_PREEMPT
2133 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2134 pr_warn("due to cpu change %d -> %d\n",
2135 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2136 else
2137#endif
2138 if (tid_to_event(tid) != tid_to_event(actual_tid))
2139 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2140 tid_to_event(tid), tid_to_event(actual_tid));
2141 else
2142 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2143 actual_tid, tid, next_tid(tid));
2144#endif
2145 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2146}
2147
2148static void init_kmem_cache_cpus(struct kmem_cache *s)
2149{
2150 int cpu;
2151
2152 for_each_possible_cpu(cpu)
2153 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2154}
2155
2156
2157
2158
2159static void deactivate_slab(struct kmem_cache *s, struct page *page,
2160 void *freelist, struct kmem_cache_cpu *c)
2161{
2162 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2163 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2164 int lock = 0, free_delta = 0;
2165 enum slab_modes l = M_NONE, m = M_NONE;
2166 void *nextfree, *freelist_iter, *freelist_tail;
2167 int tail = DEACTIVATE_TO_HEAD;
2168 struct page new;
2169 struct page old;
2170
2171 if (page->freelist) {
2172 stat(s, DEACTIVATE_REMOTE_FREES);
2173 tail = DEACTIVATE_TO_TAIL;
2174 }
2175
2176
2177
2178
2179
2180 freelist_tail = NULL;
2181 freelist_iter = freelist;
2182 while (freelist_iter) {
2183 nextfree = get_freepointer(s, freelist_iter);
2184
2185
2186
2187
2188
2189
2190 if (freelist_corrupted(s, page, &freelist_iter, nextfree))
2191 break;
2192
2193 freelist_tail = freelist_iter;
2194 free_delta++;
2195
2196 freelist_iter = nextfree;
2197 }
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215redo:
2216
2217 old.freelist = READ_ONCE(page->freelist);
2218 old.counters = READ_ONCE(page->counters);
2219 VM_BUG_ON(!old.frozen);
2220
2221
2222 new.counters = old.counters;
2223 if (freelist_tail) {
2224 new.inuse -= free_delta;
2225 set_freepointer(s, freelist_tail, old.freelist);
2226 new.freelist = freelist;
2227 } else
2228 new.freelist = old.freelist;
2229
2230 new.frozen = 0;
2231
2232 if (!new.inuse && n->nr_partial >= s->min_partial)
2233 m = M_FREE;
2234 else if (new.freelist) {
2235 m = M_PARTIAL;
2236 if (!lock) {
2237 lock = 1;
2238
2239
2240
2241
2242
2243 spin_lock(&n->list_lock);
2244 }
2245 } else {
2246 m = M_FULL;
2247 if (kmem_cache_debug_flags(s, SLAB_STORE_USER) && !lock) {
2248 lock = 1;
2249
2250
2251
2252
2253
2254 spin_lock(&n->list_lock);
2255 }
2256 }
2257
2258 if (l != m) {
2259 if (l == M_PARTIAL)
2260 remove_partial(n, page);
2261 else if (l == M_FULL)
2262 remove_full(s, n, page);
2263
2264 if (m == M_PARTIAL)
2265 add_partial(n, page, tail);
2266 else if (m == M_FULL)
2267 add_full(s, n, page);
2268 }
2269
2270 l = m;
2271 if (!__cmpxchg_double_slab(s, page,
2272 old.freelist, old.counters,
2273 new.freelist, new.counters,
2274 "unfreezing slab"))
2275 goto redo;
2276
2277 if (lock)
2278 spin_unlock(&n->list_lock);
2279
2280 if (m == M_PARTIAL)
2281 stat(s, tail);
2282 else if (m == M_FULL)
2283 stat(s, DEACTIVATE_FULL);
2284 else if (m == M_FREE) {
2285 stat(s, DEACTIVATE_EMPTY);
2286 discard_slab(s, page);
2287 stat(s, FREE_SLAB);
2288 }
2289
2290 c->page = NULL;
2291 c->freelist = NULL;
2292}
2293
2294
2295
2296
2297
2298
2299
2300
2301static void unfreeze_partials(struct kmem_cache *s,
2302 struct kmem_cache_cpu *c)
2303{
2304#ifdef CONFIG_SLUB_CPU_PARTIAL
2305 struct kmem_cache_node *n = NULL, *n2 = NULL;
2306 struct page *page, *discard_page = NULL;
2307
2308 while ((page = c->partial)) {
2309 struct page new;
2310 struct page old;
2311
2312 c->partial = page->next;
2313
2314 n2 = get_node(s, page_to_nid(page));
2315 if (n != n2) {
2316 if (n)
2317 spin_unlock(&n->list_lock);
2318
2319 n = n2;
2320 spin_lock(&n->list_lock);
2321 }
2322
2323 do {
2324
2325 old.freelist = page->freelist;
2326 old.counters = page->counters;
2327 VM_BUG_ON(!old.frozen);
2328
2329 new.counters = old.counters;
2330 new.freelist = old.freelist;
2331
2332 new.frozen = 0;
2333
2334 } while (!__cmpxchg_double_slab(s, page,
2335 old.freelist, old.counters,
2336 new.freelist, new.counters,
2337 "unfreezing slab"));
2338
2339 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2340 page->next = discard_page;
2341 discard_page = page;
2342 } else {
2343 add_partial(n, page, DEACTIVATE_TO_TAIL);
2344 stat(s, FREE_ADD_PARTIAL);
2345 }
2346 }
2347
2348 if (n)
2349 spin_unlock(&n->list_lock);
2350
2351 while (discard_page) {
2352 page = discard_page;
2353 discard_page = discard_page->next;
2354
2355 stat(s, DEACTIVATE_EMPTY);
2356 discard_slab(s, page);
2357 stat(s, FREE_SLAB);
2358 }
2359#endif
2360}
2361
2362
2363
2364
2365
2366
2367
2368
2369static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2370{
2371#ifdef CONFIG_SLUB_CPU_PARTIAL
2372 struct page *oldpage;
2373 int pages;
2374 int pobjects;
2375
2376 preempt_disable();
2377 do {
2378 pages = 0;
2379 pobjects = 0;
2380 oldpage = this_cpu_read(s->cpu_slab->partial);
2381
2382 if (oldpage) {
2383 pobjects = oldpage->pobjects;
2384 pages = oldpage->pages;
2385 if (drain && pobjects > s->cpu_partial) {
2386 unsigned long flags;
2387
2388
2389
2390
2391 local_irq_save(flags);
2392 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2393 local_irq_restore(flags);
2394 oldpage = NULL;
2395 pobjects = 0;
2396 pages = 0;
2397 stat(s, CPU_PARTIAL_DRAIN);
2398 }
2399 }
2400
2401 pages++;
2402 pobjects += page->objects - page->inuse;
2403
2404 page->pages = pages;
2405 page->pobjects = pobjects;
2406 page->next = oldpage;
2407
2408 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2409 != oldpage);
2410 if (unlikely(!s->cpu_partial)) {
2411 unsigned long flags;
2412
2413 local_irq_save(flags);
2414 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2415 local_irq_restore(flags);
2416 }
2417 preempt_enable();
2418#endif
2419}
2420
2421static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2422{
2423 stat(s, CPUSLAB_FLUSH);
2424 deactivate_slab(s, c->page, c->freelist, c);
2425
2426 c->tid = next_tid(c->tid);
2427}
2428
2429
2430
2431
2432
2433
2434static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2435{
2436 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2437
2438 if (c->page)
2439 flush_slab(s, c);
2440
2441 unfreeze_partials(s, c);
2442}
2443
2444static void flush_cpu_slab(void *d)
2445{
2446 struct kmem_cache *s = d;
2447
2448 __flush_cpu_slab(s, smp_processor_id());
2449}
2450
2451static bool has_cpu_slab(int cpu, void *info)
2452{
2453 struct kmem_cache *s = info;
2454 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2455
2456 return c->page || slub_percpu_partial(c);
2457}
2458
2459static void flush_all(struct kmem_cache *s)
2460{
2461 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
2462}
2463
2464
2465
2466
2467
2468static int slub_cpu_dead(unsigned int cpu)
2469{
2470 struct kmem_cache *s;
2471 unsigned long flags;
2472
2473 mutex_lock(&slab_mutex);
2474 list_for_each_entry(s, &slab_caches, list) {
2475 local_irq_save(flags);
2476 __flush_cpu_slab(s, cpu);
2477 local_irq_restore(flags);
2478 }
2479 mutex_unlock(&slab_mutex);
2480 return 0;
2481}
2482
2483
2484
2485
2486
2487static inline int node_match(struct page *page, int node)
2488{
2489#ifdef CONFIG_NUMA
2490 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2491 return 0;
2492#endif
2493 return 1;
2494}
2495
2496#ifdef CONFIG_SLUB_DEBUG
2497static int count_free(struct page *page)
2498{
2499 return page->objects - page->inuse;
2500}
2501
2502static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2503{
2504 return atomic_long_read(&n->total_objects);
2505}
2506#endif
2507
2508#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2509static unsigned long count_partial(struct kmem_cache_node *n,
2510 int (*get_count)(struct page *))
2511{
2512 unsigned long flags;
2513 unsigned long x = 0;
2514 struct page *page;
2515
2516 spin_lock_irqsave(&n->list_lock, flags);
2517 list_for_each_entry(page, &n->partial, slab_list)
2518 x += get_count(page);
2519 spin_unlock_irqrestore(&n->list_lock, flags);
2520 return x;
2521}
2522#endif
2523
2524static noinline void
2525slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2526{
2527#ifdef CONFIG_SLUB_DEBUG
2528 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2529 DEFAULT_RATELIMIT_BURST);
2530 int node;
2531 struct kmem_cache_node *n;
2532
2533 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2534 return;
2535
2536 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2537 nid, gfpflags, &gfpflags);
2538 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2539 s->name, s->object_size, s->size, oo_order(s->oo),
2540 oo_order(s->min));
2541
2542 if (oo_order(s->min) > get_order(s->object_size))
2543 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2544 s->name);
2545
2546 for_each_kmem_cache_node(s, node, n) {
2547 unsigned long nr_slabs;
2548 unsigned long nr_objs;
2549 unsigned long nr_free;
2550
2551 nr_free = count_partial(n, count_free);
2552 nr_slabs = node_nr_slabs(n);
2553 nr_objs = node_nr_objs(n);
2554
2555 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2556 node, nr_slabs, nr_objs, nr_free);
2557 }
2558#endif
2559}
2560
2561static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2562 int node, struct kmem_cache_cpu **pc)
2563{
2564 void *freelist;
2565 struct kmem_cache_cpu *c = *pc;
2566 struct page *page;
2567
2568 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2569
2570 freelist = get_partial(s, flags, node, c);
2571
2572 if (freelist)
2573 return freelist;
2574
2575 page = new_slab(s, flags, node);
2576 if (page) {
2577 c = raw_cpu_ptr(s->cpu_slab);
2578 if (c->page)
2579 flush_slab(s, c);
2580
2581
2582
2583
2584
2585 freelist = page->freelist;
2586 page->freelist = NULL;
2587
2588 stat(s, ALLOC_SLAB);
2589 c->page = page;
2590 *pc = c;
2591 }
2592
2593 return freelist;
2594}
2595
2596static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2597{
2598 if (unlikely(PageSlabPfmemalloc(page)))
2599 return gfp_pfmemalloc_allowed(gfpflags);
2600
2601 return true;
2602}
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2615{
2616 struct page new;
2617 unsigned long counters;
2618 void *freelist;
2619
2620 do {
2621 freelist = page->freelist;
2622 counters = page->counters;
2623
2624 new.counters = counters;
2625 VM_BUG_ON(!new.frozen);
2626
2627 new.inuse = page->objects;
2628 new.frozen = freelist != NULL;
2629
2630 } while (!__cmpxchg_double_slab(s, page,
2631 freelist, counters,
2632 NULL, new.counters,
2633 "get_freelist"));
2634
2635 return freelist;
2636}
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2658 unsigned long addr, struct kmem_cache_cpu *c)
2659{
2660 void *freelist;
2661 struct page *page;
2662
2663 stat(s, ALLOC_SLOWPATH);
2664
2665 page = c->page;
2666 if (!page) {
2667
2668
2669
2670
2671 if (unlikely(node != NUMA_NO_NODE &&
2672 !node_isset(node, slab_nodes)))
2673 node = NUMA_NO_NODE;
2674 goto new_slab;
2675 }
2676redo:
2677
2678 if (unlikely(!node_match(page, node))) {
2679
2680
2681
2682
2683 if (!node_isset(node, slab_nodes)) {
2684 node = NUMA_NO_NODE;
2685 goto redo;
2686 } else {
2687 stat(s, ALLOC_NODE_MISMATCH);
2688 deactivate_slab(s, page, c->freelist, c);
2689 goto new_slab;
2690 }
2691 }
2692
2693
2694
2695
2696
2697
2698 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2699 deactivate_slab(s, page, c->freelist, c);
2700 goto new_slab;
2701 }
2702
2703
2704 freelist = c->freelist;
2705 if (freelist)
2706 goto load_freelist;
2707
2708 freelist = get_freelist(s, page);
2709
2710 if (!freelist) {
2711 c->page = NULL;
2712 stat(s, DEACTIVATE_BYPASS);
2713 goto new_slab;
2714 }
2715
2716 stat(s, ALLOC_REFILL);
2717
2718load_freelist:
2719
2720
2721
2722
2723
2724 VM_BUG_ON(!c->page->frozen);
2725 c->freelist = get_freepointer(s, freelist);
2726 c->tid = next_tid(c->tid);
2727 return freelist;
2728
2729new_slab:
2730
2731 if (slub_percpu_partial(c)) {
2732 page = c->page = slub_percpu_partial(c);
2733 slub_set_percpu_partial(c, page);
2734 stat(s, CPU_PARTIAL_ALLOC);
2735 goto redo;
2736 }
2737
2738 freelist = new_slab_objects(s, gfpflags, node, &c);
2739
2740 if (unlikely(!freelist)) {
2741 slab_out_of_memory(s, gfpflags, node);
2742 return NULL;
2743 }
2744
2745 page = c->page;
2746 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2747 goto load_freelist;
2748
2749
2750 if (kmem_cache_debug(s) &&
2751 !alloc_debug_processing(s, page, freelist, addr))
2752 goto new_slab;
2753
2754 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2755 return freelist;
2756}
2757
2758
2759
2760
2761
2762static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2763 unsigned long addr, struct kmem_cache_cpu *c)
2764{
2765 void *p;
2766 unsigned long flags;
2767
2768 local_irq_save(flags);
2769#ifdef CONFIG_PREEMPT
2770
2771
2772
2773
2774
2775 c = this_cpu_ptr(s->cpu_slab);
2776#endif
2777
2778 p = ___slab_alloc(s, gfpflags, node, addr, c);
2779 local_irq_restore(flags);
2780 return p;
2781}
2782
2783
2784
2785
2786
2787static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
2788 void *obj)
2789{
2790 if (unlikely(slab_want_init_on_free(s)) && obj)
2791 memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
2792}
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2805 gfp_t gfpflags, int node, unsigned long addr)
2806{
2807 void *object;
2808 struct kmem_cache_cpu *c;
2809 struct page *page;
2810 unsigned long tid;
2811 struct obj_cgroup *objcg = NULL;
2812
2813 s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
2814 if (!s)
2815 return NULL;
2816redo:
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827 do {
2828 tid = this_cpu_read(s->cpu_slab->tid);
2829 c = raw_cpu_ptr(s->cpu_slab);
2830 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2831 unlikely(tid != READ_ONCE(c->tid)));
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841 barrier();
2842
2843
2844
2845
2846
2847
2848
2849
2850 object = c->freelist;
2851 page = c->page;
2852 if (unlikely(!object || !page || !node_match(page, node))) {
2853 object = __slab_alloc(s, gfpflags, node, addr, c);
2854 } else {
2855 void *next_object = get_freepointer_safe(s, object);
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871 if (unlikely(!this_cpu_cmpxchg_double(
2872 s->cpu_slab->freelist, s->cpu_slab->tid,
2873 object, tid,
2874 next_object, next_tid(tid)))) {
2875
2876 note_cmpxchg_failure("slab_alloc", s, tid);
2877 goto redo;
2878 }
2879 prefetch_freepointer(s, next_object);
2880 stat(s, ALLOC_FASTPATH);
2881 }
2882
2883 maybe_wipe_obj_freeptr(s, object);
2884
2885 if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2886 memset(object, 0, s->object_size);
2887
2888 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
2889
2890 return object;
2891}
2892
2893static __always_inline void *slab_alloc(struct kmem_cache *s,
2894 gfp_t gfpflags, unsigned long addr)
2895{
2896 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2897}
2898
2899void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2900{
2901 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2902
2903 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2904 s->size, gfpflags);
2905
2906 return ret;
2907}
2908EXPORT_SYMBOL(kmem_cache_alloc);
2909
2910#ifdef CONFIG_TRACING
2911void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2912{
2913 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2914 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2915 ret = kasan_kmalloc(s, ret, size, gfpflags);
2916 return ret;
2917}
2918EXPORT_SYMBOL(kmem_cache_alloc_trace);
2919#endif
2920
2921#ifdef CONFIG_NUMA
2922void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2923{
2924 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2925
2926 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2927 s->object_size, s->size, gfpflags, node);
2928
2929 return ret;
2930}
2931EXPORT_SYMBOL(kmem_cache_alloc_node);
2932
2933#ifdef CONFIG_TRACING
2934void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2935 gfp_t gfpflags,
2936 int node, size_t size)
2937{
2938 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2939
2940 trace_kmalloc_node(_RET_IP_, ret,
2941 size, s->size, gfpflags, node);
2942
2943 ret = kasan_kmalloc(s, ret, size, gfpflags);
2944 return ret;
2945}
2946EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2947#endif
2948#endif
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958static void __slab_free(struct kmem_cache *s, struct page *page,
2959 void *head, void *tail, int cnt,
2960 unsigned long addr)
2961
2962{
2963 void *prior;
2964 int was_frozen;
2965 struct page new;
2966 unsigned long counters;
2967 struct kmem_cache_node *n = NULL;
2968 unsigned long uninitialized_var(flags);
2969
2970 stat(s, FREE_SLOWPATH);
2971
2972 if (kmem_cache_debug(s) &&
2973 !free_debug_processing(s, page, head, tail, cnt, addr))
2974 return;
2975
2976 do {
2977 if (unlikely(n)) {
2978 spin_unlock_irqrestore(&n->list_lock, flags);
2979 n = NULL;
2980 }
2981 prior = page->freelist;
2982 counters = page->counters;
2983 set_freepointer(s, tail, prior);
2984 new.counters = counters;
2985 was_frozen = new.frozen;
2986 new.inuse -= cnt;
2987 if ((!new.inuse || !prior) && !was_frozen) {
2988
2989 if (kmem_cache_has_cpu_partial(s) && !prior) {
2990
2991
2992
2993
2994
2995
2996
2997 new.frozen = 1;
2998
2999 } else {
3000
3001 n = get_node(s, page_to_nid(page));
3002
3003
3004
3005
3006
3007
3008
3009
3010 spin_lock_irqsave(&n->list_lock, flags);
3011
3012 }
3013 }
3014
3015 } while (!cmpxchg_double_slab(s, page,
3016 prior, counters,
3017 head, new.counters,
3018 "__slab_free"));
3019
3020 if (likely(!n)) {
3021
3022 if (likely(was_frozen)) {
3023
3024
3025
3026
3027 stat(s, FREE_FROZEN);
3028 } else if (new.frozen) {
3029
3030
3031
3032
3033 put_cpu_partial(s, page, 1);
3034 stat(s, CPU_PARTIAL_FREE);
3035 }
3036
3037 return;
3038 }
3039
3040 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3041 goto slab_empty;
3042
3043
3044
3045
3046
3047 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3048 remove_full(s, n, page);
3049 add_partial(n, page, DEACTIVATE_TO_TAIL);
3050 stat(s, FREE_ADD_PARTIAL);
3051 }
3052 spin_unlock_irqrestore(&n->list_lock, flags);
3053 return;
3054
3055slab_empty:
3056 if (prior) {
3057
3058
3059
3060 remove_partial(n, page);
3061 stat(s, FREE_REMOVE_PARTIAL);
3062 } else {
3063
3064 remove_full(s, n, page);
3065 }
3066
3067 spin_unlock_irqrestore(&n->list_lock, flags);
3068 stat(s, FREE_SLAB);
3069 discard_slab(s, page);
3070}
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087static __always_inline void do_slab_free(struct kmem_cache *s,
3088 struct page *page, void *head, void *tail,
3089 int cnt, unsigned long addr)
3090{
3091 void *tail_obj = tail ? : head;
3092 struct kmem_cache_cpu *c;
3093 unsigned long tid;
3094
3095 memcg_slab_free_hook(s, &head, 1);
3096redo:
3097
3098
3099
3100
3101
3102
3103 do {
3104 tid = this_cpu_read(s->cpu_slab->tid);
3105 c = raw_cpu_ptr(s->cpu_slab);
3106 } while (IS_ENABLED(CONFIG_PREEMPT) &&
3107 unlikely(tid != READ_ONCE(c->tid)));
3108
3109
3110 barrier();
3111
3112 if (likely(page == c->page)) {
3113 void **freelist = READ_ONCE(c->freelist);
3114
3115 set_freepointer(s, tail_obj, freelist);
3116
3117 if (unlikely(!this_cpu_cmpxchg_double(
3118 s->cpu_slab->freelist, s->cpu_slab->tid,
3119 freelist, tid,
3120 head, next_tid(tid)))) {
3121
3122 note_cmpxchg_failure("slab_free", s, tid);
3123 goto redo;
3124 }
3125 stat(s, FREE_FASTPATH);
3126 } else
3127 __slab_free(s, page, head, tail_obj, cnt, addr);
3128
3129}
3130
3131static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3132 void *head, void *tail, int cnt,
3133 unsigned long addr)
3134{
3135
3136
3137
3138
3139 if (slab_free_freelist_hook(s, &head, &tail))
3140 do_slab_free(s, page, head, tail, cnt, addr);
3141}
3142
3143#ifdef CONFIG_KASAN_GENERIC
3144void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3145{
3146 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3147}
3148#endif
3149
3150void kmem_cache_free(struct kmem_cache *s, void *x)
3151{
3152 s = cache_from_obj(s, x);
3153 if (!s)
3154 return;
3155 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3156 trace_kmem_cache_free(_RET_IP_, x, s->name);
3157}
3158EXPORT_SYMBOL(kmem_cache_free);
3159
3160struct detached_freelist {
3161 struct page *page;
3162 void *tail;
3163 void *freelist;
3164 int cnt;
3165 struct kmem_cache *s;
3166};
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180static inline
3181int build_detached_freelist(struct kmem_cache *s, size_t size,
3182 void **p, struct detached_freelist *df)
3183{
3184 size_t first_skipped_index = 0;
3185 int lookahead = 3;
3186 void *object;
3187 struct page *page;
3188
3189
3190 df->page = NULL;
3191
3192 do {
3193 object = p[--size];
3194
3195 } while (!object && size);
3196
3197 if (!object)
3198 return 0;
3199
3200 page = virt_to_head_page(object);
3201 if (!s) {
3202
3203 if (unlikely(!PageSlab(page))) {
3204 BUG_ON(!PageCompound(page));
3205 kfree_hook(object);
3206 __free_pages(page, compound_order(page));
3207 p[size] = NULL;
3208 return size;
3209 }
3210
3211 df->s = page->slab_cache;
3212 } else {
3213 df->s = cache_from_obj(s, object);
3214 }
3215
3216
3217 df->page = page;
3218 set_freepointer(df->s, object, NULL);
3219 df->tail = object;
3220 df->freelist = object;
3221 p[size] = NULL;
3222 df->cnt = 1;
3223
3224 while (size) {
3225 object = p[--size];
3226 if (!object)
3227 continue;
3228
3229
3230 if (df->page == virt_to_head_page(object)) {
3231
3232 set_freepointer(df->s, object, df->freelist);
3233 df->freelist = object;
3234 df->cnt++;
3235 p[size] = NULL;
3236
3237 continue;
3238 }
3239
3240
3241 if (!--lookahead)
3242 break;
3243
3244 if (!first_skipped_index)
3245 first_skipped_index = size + 1;
3246 }
3247
3248 return first_skipped_index;
3249}
3250
3251
3252void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3253{
3254 if (WARN_ON(!size))
3255 return;
3256
3257 memcg_slab_free_hook(s, p, size);
3258 do {
3259 struct detached_freelist df;
3260
3261 size = build_detached_freelist(s, size, p, &df);
3262 if (!df.page)
3263 continue;
3264
3265 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
3266 } while (likely(size));
3267}
3268EXPORT_SYMBOL(kmem_cache_free_bulk);
3269
3270
3271int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3272 void **p)
3273{
3274 struct kmem_cache_cpu *c;
3275 int i;
3276 struct obj_cgroup *objcg = NULL;
3277
3278
3279 s = slab_pre_alloc_hook(s, &objcg, size, flags);
3280 if (unlikely(!s))
3281 return false;
3282
3283
3284
3285
3286
3287 local_irq_disable();
3288 c = this_cpu_ptr(s->cpu_slab);
3289
3290 for (i = 0; i < size; i++) {
3291 void *object = c->freelist;
3292
3293 if (unlikely(!object)) {
3294
3295
3296
3297
3298
3299
3300
3301 c->tid = next_tid(c->tid);
3302
3303
3304
3305
3306
3307 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3308 _RET_IP_, c);
3309 if (unlikely(!p[i]))
3310 goto error;
3311
3312 c = this_cpu_ptr(s->cpu_slab);
3313 maybe_wipe_obj_freeptr(s, p[i]);
3314
3315 continue;
3316 }
3317 c->freelist = get_freepointer(s, object);
3318 p[i] = object;
3319 maybe_wipe_obj_freeptr(s, p[i]);
3320 }
3321 c->tid = next_tid(c->tid);
3322 local_irq_enable();
3323
3324
3325 if (unlikely(slab_want_init_on_alloc(flags, s))) {
3326 int j;
3327
3328 for (j = 0; j < i; j++)
3329 memset(p[j], 0, s->object_size);
3330 }
3331
3332
3333 slab_post_alloc_hook(s, objcg, flags, size, p);
3334 return i;
3335error:
3336 local_irq_enable();
3337 slab_post_alloc_hook(s, objcg, flags, i, p);
3338 __kmem_cache_free_bulk(s, i, p);
3339 return 0;
3340}
3341EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363static unsigned int slub_min_order;
3364static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3365static unsigned int slub_min_objects;
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392static inline unsigned int slab_order(unsigned int size,
3393 unsigned int min_objects, unsigned int max_order,
3394 unsigned int fract_leftover)
3395{
3396 unsigned int min_order = slub_min_order;
3397 unsigned int order;
3398
3399 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3400 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3401
3402 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3403 order <= max_order; order++) {
3404
3405 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3406 unsigned int rem;
3407
3408 rem = slab_size % size;
3409
3410 if (rem <= slab_size / fract_leftover)
3411 break;
3412 }
3413
3414 return order;
3415}
3416
3417static inline int calculate_order(unsigned int size)
3418{
3419 unsigned int order;
3420 unsigned int min_objects;
3421 unsigned int max_objects;
3422 unsigned int nr_cpus;
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432 min_objects = slub_min_objects;
3433 if (!min_objects) {
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443 nr_cpus = num_present_cpus();
3444 if (nr_cpus <= 1)
3445 nr_cpus = nr_cpu_ids;
3446 min_objects = 4 * (fls(nr_cpus) + 1);
3447 }
3448 max_objects = order_objects(slub_max_order, size);
3449 min_objects = min(min_objects, max_objects);
3450
3451 while (min_objects > 1) {
3452 unsigned int fraction;
3453
3454 fraction = 16;
3455 while (fraction >= 4) {
3456 order = slab_order(size, min_objects,
3457 slub_max_order, fraction);
3458 if (order <= slub_max_order)
3459 return order;
3460 fraction /= 2;
3461 }
3462 min_objects--;
3463 }
3464
3465
3466
3467
3468
3469 order = slab_order(size, 1, slub_max_order, 1);
3470 if (order <= slub_max_order)
3471 return order;
3472
3473
3474
3475
3476 order = slab_order(size, 1, MAX_ORDER, 1);
3477 if (order < MAX_ORDER)
3478 return order;
3479 return -ENOSYS;
3480}
3481
3482static void
3483init_kmem_cache_node(struct kmem_cache_node *n)
3484{
3485 n->nr_partial = 0;
3486 spin_lock_init(&n->list_lock);
3487 INIT_LIST_HEAD(&n->partial);
3488#ifdef CONFIG_SLUB_DEBUG
3489 atomic_long_set(&n->nr_slabs, 0);
3490 atomic_long_set(&n->total_objects, 0);
3491 INIT_LIST_HEAD(&n->full);
3492#endif
3493}
3494
3495static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3496{
3497 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3498 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3499
3500
3501
3502
3503
3504 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3505 2 * sizeof(void *));
3506
3507 if (!s->cpu_slab)
3508 return 0;
3509
3510 init_kmem_cache_cpus(s);
3511
3512 return 1;
3513}
3514
3515static struct kmem_cache *kmem_cache_node;
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526static void early_kmem_cache_node_alloc(int node)
3527{
3528 struct page *page;
3529 struct kmem_cache_node *n;
3530
3531 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3532
3533 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3534
3535 BUG_ON(!page);
3536 if (page_to_nid(page) != node) {
3537 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3538 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3539 }
3540
3541 n = page->freelist;
3542 BUG_ON(!n);
3543#ifdef CONFIG_SLUB_DEBUG
3544 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3545 init_tracking(kmem_cache_node, n);
3546#endif
3547 n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3548 GFP_KERNEL);
3549 page->freelist = get_freepointer(kmem_cache_node, n);
3550 page->inuse = 1;
3551 page->frozen = 0;
3552 kmem_cache_node->node[node] = n;
3553 init_kmem_cache_node(n);
3554 inc_slabs_node(kmem_cache_node, node, page->objects);
3555
3556
3557
3558
3559
3560 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3561}
3562
3563static void free_kmem_cache_nodes(struct kmem_cache *s)
3564{
3565 int node;
3566 struct kmem_cache_node *n;
3567
3568 for_each_kmem_cache_node(s, node, n) {
3569 s->node[node] = NULL;
3570 kmem_cache_free(kmem_cache_node, n);
3571 }
3572}
3573
3574void __kmem_cache_release(struct kmem_cache *s)
3575{
3576 cache_random_seq_destroy(s);
3577 free_percpu(s->cpu_slab);
3578 free_kmem_cache_nodes(s);
3579}
3580
3581static int init_kmem_cache_nodes(struct kmem_cache *s)
3582{
3583 int node;
3584
3585 for_each_node_mask(node, slab_nodes) {
3586 struct kmem_cache_node *n;
3587
3588 if (slab_state == DOWN) {
3589 early_kmem_cache_node_alloc(node);
3590 continue;
3591 }
3592 n = kmem_cache_alloc_node(kmem_cache_node,
3593 GFP_KERNEL, node);
3594
3595 if (!n) {
3596 free_kmem_cache_nodes(s);
3597 return 0;
3598 }
3599
3600 init_kmem_cache_node(n);
3601 s->node[node] = n;
3602 }
3603 return 1;
3604}
3605
3606static void set_min_partial(struct kmem_cache *s, unsigned long min)
3607{
3608 if (min < MIN_PARTIAL)
3609 min = MIN_PARTIAL;
3610 else if (min > MAX_PARTIAL)
3611 min = MAX_PARTIAL;
3612 s->min_partial = min;
3613}
3614
3615static void set_cpu_partial(struct kmem_cache *s)
3616{
3617#ifdef CONFIG_SLUB_CPU_PARTIAL
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635 if (!kmem_cache_has_cpu_partial(s))
3636 s->cpu_partial = 0;
3637 else if (s->size >= PAGE_SIZE)
3638 s->cpu_partial = 2;
3639 else if (s->size >= 1024)
3640 s->cpu_partial = 6;
3641 else if (s->size >= 256)
3642 s->cpu_partial = 13;
3643 else
3644 s->cpu_partial = 30;
3645#endif
3646}
3647
3648
3649
3650
3651
3652static int calculate_sizes(struct kmem_cache *s, int forced_order)
3653{
3654 slab_flags_t flags = s->flags;
3655 unsigned int size = s->object_size;
3656 unsigned int freepointer_area;
3657 unsigned int order;
3658
3659
3660
3661
3662
3663
3664 size = ALIGN(size, sizeof(void *));
3665
3666
3667
3668
3669
3670
3671 freepointer_area = size;
3672
3673#ifdef CONFIG_SLUB_DEBUG
3674
3675
3676
3677
3678
3679 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3680 !s->ctor)
3681 s->flags |= __OBJECT_POISON;
3682 else
3683 s->flags &= ~__OBJECT_POISON;
3684
3685
3686
3687
3688
3689
3690
3691 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3692 size += sizeof(void *);
3693#endif
3694
3695
3696
3697
3698
3699 s->inuse = size;
3700
3701 if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3702 ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
3703 s->ctor) {
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718 s->offset = size;
3719 size += sizeof(void *);
3720 } else if (freepointer_area > sizeof(void *)) {
3721
3722
3723
3724
3725
3726 s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
3727 }
3728
3729#ifdef CONFIG_SLUB_DEBUG
3730 if (flags & SLAB_STORE_USER)
3731
3732
3733
3734
3735 size += 2 * sizeof(struct track);
3736#endif
3737
3738 kasan_cache_create(s, &size, &s->flags);
3739#ifdef CONFIG_SLUB_DEBUG
3740 if (flags & SLAB_RED_ZONE) {
3741
3742
3743
3744
3745
3746
3747
3748 size += sizeof(void *);
3749
3750 s->red_left_pad = sizeof(void *);
3751 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3752 size += s->red_left_pad;
3753 }
3754#endif
3755
3756
3757
3758
3759
3760
3761 size = ALIGN(size, s->align);
3762 s->size = size;
3763 s->reciprocal_size = reciprocal_value(size);
3764 if (forced_order >= 0)
3765 order = forced_order;
3766 else
3767 order = calculate_order(size);
3768
3769 if ((int)order < 0)
3770 return 0;
3771
3772 s->allocflags = 0;
3773 if (order)
3774 s->allocflags |= __GFP_COMP;
3775
3776 if (s->flags & SLAB_CACHE_DMA)
3777 s->allocflags |= GFP_DMA;
3778
3779 if (s->flags & SLAB_CACHE_DMA32)
3780 s->allocflags |= GFP_DMA32;
3781
3782 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3783 s->allocflags |= __GFP_RECLAIMABLE;
3784
3785
3786
3787
3788 s->oo = oo_make(order, size);
3789 s->min = oo_make(get_order(size), size);
3790 if (oo_objects(s->oo) > oo_objects(s->max))
3791 s->max = s->oo;
3792
3793 return !!oo_objects(s->oo);
3794}
3795
3796static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3797{
3798 s->flags = kmem_cache_flags(s->size, flags, s->name);
3799#ifdef CONFIG_SLAB_FREELIST_HARDENED
3800 s->random = get_random_long();
3801#endif
3802
3803 if (!calculate_sizes(s, -1))
3804 goto error;
3805 if (disable_higher_order_debug) {
3806
3807
3808
3809
3810 if (get_order(s->size) > get_order(s->object_size)) {
3811 s->flags &= ~DEBUG_METADATA_FLAGS;
3812 s->offset = 0;
3813 if (!calculate_sizes(s, -1))
3814 goto error;
3815 }
3816 }
3817
3818#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3819 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3820 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3821
3822 s->flags |= __CMPXCHG_DOUBLE;
3823#endif
3824
3825
3826
3827
3828
3829 set_min_partial(s, ilog2(s->size) / 2);
3830
3831 set_cpu_partial(s);
3832
3833#ifdef CONFIG_NUMA
3834 s->remote_node_defrag_ratio = 1000;
3835#endif
3836
3837
3838 if (slab_state >= UP) {
3839 if (init_cache_random_seq(s))
3840 goto error;
3841 }
3842
3843 if (!init_kmem_cache_nodes(s))
3844 goto error;
3845
3846 if (alloc_kmem_cache_cpus(s))
3847 return 0;
3848
3849 free_kmem_cache_nodes(s);
3850error:
3851 if (flags & SLAB_PANIC)
3852 panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
3853 s->name, s->size, s->size,
3854 oo_order(s->oo), s->offset, (unsigned long)flags);
3855 return -EINVAL;
3856}
3857
3858static void list_slab_objects(struct kmem_cache *s, struct page *page,
3859 const char *text)
3860{
3861#ifdef CONFIG_SLUB_DEBUG
3862 void *addr = page_address(page);
3863 unsigned long *map;
3864 void *p;
3865
3866 slab_err(s, page, text, s->name);
3867 slab_lock(page);
3868
3869 map = get_map(s, page);
3870 for_each_object(p, s, addr, page->objects) {
3871
3872 if (!test_bit(__obj_to_index(s, addr, p), map)) {
3873 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3874 print_tracking(s, p);
3875 }
3876 }
3877 put_map(map);
3878 slab_unlock(page);
3879#endif
3880}
3881
3882
3883
3884
3885
3886
3887static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3888{
3889 LIST_HEAD(discard);
3890 struct page *page, *h;
3891
3892 BUG_ON(irqs_disabled());
3893 spin_lock_irq(&n->list_lock);
3894 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3895 if (!page->inuse) {
3896 remove_partial(n, page);
3897 list_add(&page->slab_list, &discard);
3898 } else {
3899 list_slab_objects(s, page,
3900 "Objects remaining in %s on __kmem_cache_shutdown()");
3901 }
3902 }
3903 spin_unlock_irq(&n->list_lock);
3904
3905 list_for_each_entry_safe(page, h, &discard, slab_list)
3906 discard_slab(s, page);
3907}
3908
3909bool __kmem_cache_empty(struct kmem_cache *s)
3910{
3911 int node;
3912 struct kmem_cache_node *n;
3913
3914 for_each_kmem_cache_node(s, node, n)
3915 if (n->nr_partial || slabs_node(s, node))
3916 return false;
3917 return true;
3918}
3919
3920
3921
3922
3923int __kmem_cache_shutdown(struct kmem_cache *s)
3924{
3925 int node;
3926 struct kmem_cache_node *n;
3927
3928 flush_all(s);
3929
3930 for_each_kmem_cache_node(s, node, n) {
3931 free_partial(s, n);
3932 if (n->nr_partial || slabs_node(s, node))
3933 return 1;
3934 }
3935 return 0;
3936}
3937
3938
3939
3940
3941
3942static int __init setup_slub_min_order(char *str)
3943{
3944 get_option(&str, (int *)&slub_min_order);
3945
3946 return 1;
3947}
3948
3949__setup("slub_min_order=", setup_slub_min_order);
3950
3951static int __init setup_slub_max_order(char *str)
3952{
3953 get_option(&str, (int *)&slub_max_order);
3954 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
3955
3956 return 1;
3957}
3958
3959__setup("slub_max_order=", setup_slub_max_order);
3960
3961static int __init setup_slub_min_objects(char *str)
3962{
3963 get_option(&str, (int *)&slub_min_objects);
3964
3965 return 1;
3966}
3967
3968__setup("slub_min_objects=", setup_slub_min_objects);
3969
3970void *__kmalloc(size_t size, gfp_t flags)
3971{
3972 struct kmem_cache *s;
3973 void *ret;
3974
3975 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3976 return kmalloc_large(size, flags);
3977
3978 s = kmalloc_slab(size, flags);
3979
3980 if (unlikely(ZERO_OR_NULL_PTR(s)))
3981 return s;
3982
3983 ret = slab_alloc(s, flags, _RET_IP_);
3984
3985 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3986
3987 ret = kasan_kmalloc(s, ret, size, flags);
3988
3989 return ret;
3990}
3991EXPORT_SYMBOL(__kmalloc);
3992
3993#ifdef CONFIG_NUMA
3994static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3995{
3996 struct page *page;
3997 void *ptr = NULL;
3998 unsigned int order = get_order(size);
3999
4000 flags |= __GFP_COMP;
4001 page = alloc_pages_node(node, flags, order);
4002 if (page) {
4003 ptr = page_address(page);
4004 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4005 PAGE_SIZE << order);
4006 }
4007
4008 return kmalloc_large_node_hook(ptr, size, flags);
4009}
4010
4011void *__kmalloc_node(size_t size, gfp_t flags, int node)
4012{
4013 struct kmem_cache *s;
4014 void *ret;
4015
4016 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4017 ret = kmalloc_large_node(size, flags, node);
4018
4019 trace_kmalloc_node(_RET_IP_, ret,
4020 size, PAGE_SIZE << get_order(size),
4021 flags, node);
4022
4023 return ret;
4024 }
4025
4026 s = kmalloc_slab(size, flags);
4027
4028 if (unlikely(ZERO_OR_NULL_PTR(s)))
4029 return s;
4030
4031 ret = slab_alloc_node(s, flags, node, _RET_IP_);
4032
4033 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
4034
4035 ret = kasan_kmalloc(s, ret, size, flags);
4036
4037 return ret;
4038}
4039EXPORT_SYMBOL(__kmalloc_node);
4040#endif
4041
4042#ifdef CONFIG_HARDENED_USERCOPY
4043
4044
4045
4046
4047
4048
4049
4050
4051void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
4052 bool to_user)
4053{
4054 struct kmem_cache *s;
4055 unsigned int offset;
4056 size_t object_size;
4057
4058 ptr = kasan_reset_tag(ptr);
4059
4060
4061 s = page->slab_cache;
4062
4063
4064 if (ptr < page_address(page))
4065 usercopy_abort("SLUB object not in SLUB page?!", NULL,
4066 to_user, 0, n);
4067
4068
4069 offset = (ptr - page_address(page)) % s->size;
4070
4071
4072 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4073 if (offset < s->red_left_pad)
4074 usercopy_abort("SLUB object in left red zone",
4075 s->name, to_user, offset, n);
4076 offset -= s->red_left_pad;
4077 }
4078
4079
4080 if (offset >= s->useroffset &&
4081 offset - s->useroffset <= s->usersize &&
4082 n <= s->useroffset - offset + s->usersize)
4083 return;
4084
4085
4086
4087
4088
4089
4090
4091 object_size = slab_ksize(s);
4092 if (usercopy_fallback &&
4093 offset <= object_size && n <= object_size - offset) {
4094 usercopy_warn("SLUB object", s->name, to_user, offset, n);
4095 return;
4096 }
4097
4098 usercopy_abort("SLUB object", s->name, to_user, offset, n);
4099}
4100#endif
4101
4102size_t __ksize(const void *object)
4103{
4104 struct page *page;
4105
4106 if (unlikely(object == ZERO_SIZE_PTR))
4107 return 0;
4108
4109 page = virt_to_head_page(object);
4110
4111 if (unlikely(!PageSlab(page))) {
4112 WARN_ON(!PageCompound(page));
4113 return page_size(page);
4114 }
4115
4116 return slab_ksize(page->slab_cache);
4117}
4118EXPORT_SYMBOL(__ksize);
4119
4120void kfree(const void *x)
4121{
4122 struct page *page;
4123 void *object = (void *)x;
4124
4125 trace_kfree(_RET_IP_, x);
4126
4127 if (unlikely(ZERO_OR_NULL_PTR(x)))
4128 return;
4129
4130 page = virt_to_head_page(x);
4131 if (unlikely(!PageSlab(page))) {
4132 unsigned int order = compound_order(page);
4133
4134 BUG_ON(!PageCompound(page));
4135 kfree_hook(object);
4136 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4137 -(PAGE_SIZE << order));
4138 __free_pages(page, order);
4139 return;
4140 }
4141 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
4142}
4143EXPORT_SYMBOL(kfree);
4144
4145#define SHRINK_PROMOTE_MAX 32
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156int __kmem_cache_shrink(struct kmem_cache *s)
4157{
4158 int node;
4159 int i;
4160 struct kmem_cache_node *n;
4161 struct page *page;
4162 struct page *t;
4163 struct list_head discard;
4164 struct list_head promote[SHRINK_PROMOTE_MAX];
4165 unsigned long flags;
4166 int ret = 0;
4167
4168 flush_all(s);
4169 for_each_kmem_cache_node(s, node, n) {
4170 INIT_LIST_HEAD(&discard);
4171 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4172 INIT_LIST_HEAD(promote + i);
4173
4174 spin_lock_irqsave(&n->list_lock, flags);
4175
4176
4177
4178
4179
4180
4181
4182 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
4183 int free = page->objects - page->inuse;
4184
4185
4186 barrier();
4187
4188
4189 BUG_ON(free <= 0);
4190
4191 if (free == page->objects) {
4192 list_move(&page->slab_list, &discard);
4193 n->nr_partial--;
4194 } else if (free <= SHRINK_PROMOTE_MAX)
4195 list_move(&page->slab_list, promote + free - 1);
4196 }
4197
4198
4199
4200
4201
4202 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4203 list_splice(promote + i, &n->partial);
4204
4205 spin_unlock_irqrestore(&n->list_lock, flags);
4206
4207
4208 list_for_each_entry_safe(page, t, &discard, slab_list)
4209 discard_slab(s, page);
4210
4211 if (slabs_node(s, node))
4212 ret = 1;
4213 }
4214
4215 return ret;
4216}
4217
4218static int slab_mem_going_offline_callback(void *arg)
4219{
4220 struct kmem_cache *s;
4221
4222 mutex_lock(&slab_mutex);
4223 list_for_each_entry(s, &slab_caches, list)
4224 __kmem_cache_shrink(s);
4225 mutex_unlock(&slab_mutex);
4226
4227 return 0;
4228}
4229
4230static void slab_mem_offline_callback(void *arg)
4231{
4232 struct memory_notify *marg = arg;
4233 int offline_node;
4234
4235 offline_node = marg->status_change_nid_normal;
4236
4237
4238
4239
4240
4241 if (offline_node < 0)
4242 return;
4243
4244 mutex_lock(&slab_mutex);
4245 node_clear(offline_node, slab_nodes);
4246
4247
4248
4249
4250
4251 mutex_unlock(&slab_mutex);
4252}
4253
4254static int slab_mem_going_online_callback(void *arg)
4255{
4256 struct kmem_cache_node *n;
4257 struct kmem_cache *s;
4258 struct memory_notify *marg = arg;
4259 int nid = marg->status_change_nid_normal;
4260 int ret = 0;
4261
4262
4263
4264
4265
4266 if (nid < 0)
4267 return 0;
4268
4269
4270
4271
4272
4273
4274 mutex_lock(&slab_mutex);
4275 list_for_each_entry(s, &slab_caches, list) {
4276
4277
4278
4279
4280 if (get_node(s, nid))
4281 continue;
4282
4283
4284
4285
4286
4287 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4288 if (!n) {
4289 ret = -ENOMEM;
4290 goto out;
4291 }
4292 init_kmem_cache_node(n);
4293 s->node[nid] = n;
4294 }
4295
4296
4297
4298
4299 node_set(nid, slab_nodes);
4300out:
4301 mutex_unlock(&slab_mutex);
4302 return ret;
4303}
4304
4305static int slab_memory_callback(struct notifier_block *self,
4306 unsigned long action, void *arg)
4307{
4308 int ret = 0;
4309
4310 switch (action) {
4311 case MEM_GOING_ONLINE:
4312 ret = slab_mem_going_online_callback(arg);
4313 break;
4314 case MEM_GOING_OFFLINE:
4315 ret = slab_mem_going_offline_callback(arg);
4316 break;
4317 case MEM_OFFLINE:
4318 case MEM_CANCEL_ONLINE:
4319 slab_mem_offline_callback(arg);
4320 break;
4321 case MEM_ONLINE:
4322 case MEM_CANCEL_OFFLINE:
4323 break;
4324 }
4325 if (ret)
4326 ret = notifier_from_errno(ret);
4327 else
4328 ret = NOTIFY_OK;
4329 return ret;
4330}
4331
4332static struct notifier_block slab_memory_callback_nb = {
4333 .notifier_call = slab_memory_callback,
4334 .priority = SLAB_CALLBACK_PRI,
4335};
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4348{
4349 int node;
4350 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4351 struct kmem_cache_node *n;
4352
4353 memcpy(s, static_cache, kmem_cache->object_size);
4354
4355
4356
4357
4358
4359
4360 __flush_cpu_slab(s, smp_processor_id());
4361 for_each_kmem_cache_node(s, node, n) {
4362 struct page *p;
4363
4364 list_for_each_entry(p, &n->partial, slab_list)
4365 p->slab_cache = s;
4366
4367#ifdef CONFIG_SLUB_DEBUG
4368 list_for_each_entry(p, &n->full, slab_list)
4369 p->slab_cache = s;
4370#endif
4371 }
4372 list_add(&s->list, &slab_caches);
4373 return s;
4374}
4375
4376void __init kmem_cache_init(void)
4377{
4378 static __initdata struct kmem_cache boot_kmem_cache,
4379 boot_kmem_cache_node;
4380 int node;
4381
4382 if (debug_guardpage_minorder())
4383 slub_max_order = 0;
4384
4385 kmem_cache_node = &boot_kmem_cache_node;
4386 kmem_cache = &boot_kmem_cache;
4387
4388
4389
4390
4391
4392 for_each_node_state(node, N_NORMAL_MEMORY)
4393 node_set(node, slab_nodes);
4394
4395 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4396 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4397
4398 register_hotmemory_notifier(&slab_memory_callback_nb);
4399
4400
4401 slab_state = PARTIAL;
4402
4403 create_boot_cache(kmem_cache, "kmem_cache",
4404 offsetof(struct kmem_cache, node) +
4405 nr_node_ids * sizeof(struct kmem_cache_node *),
4406 SLAB_HWCACHE_ALIGN, 0, 0);
4407
4408 kmem_cache = bootstrap(&boot_kmem_cache);
4409 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4410
4411
4412 setup_kmalloc_cache_index_table();
4413 create_kmalloc_caches(0);
4414
4415
4416 init_freelist_randomization();
4417
4418 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4419 slub_cpu_dead);
4420
4421 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4422 cache_line_size(),
4423 slub_min_order, slub_max_order, slub_min_objects,
4424 nr_cpu_ids, nr_node_ids);
4425}
4426
4427void __init kmem_cache_init_late(void)
4428{
4429}
4430
4431struct kmem_cache *
4432__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4433 slab_flags_t flags, void (*ctor)(void *))
4434{
4435 struct kmem_cache *s;
4436
4437 s = find_mergeable(size, align, flags, name, ctor);
4438 if (s) {
4439 s->refcount++;
4440
4441
4442
4443
4444
4445 s->object_size = max(s->object_size, size);
4446 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4447
4448 if (sysfs_slab_alias(s, name)) {
4449 s->refcount--;
4450 s = NULL;
4451 }
4452 }
4453
4454 return s;
4455}
4456
4457int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4458{
4459 int err;
4460
4461 err = kmem_cache_open(s, flags);
4462 if (err)
4463 return err;
4464
4465
4466 if (slab_state <= UP)
4467 return 0;
4468
4469 err = sysfs_slab_add(s);
4470 if (err)
4471 __kmem_cache_release(s);
4472
4473 return err;
4474}
4475
4476void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4477{
4478 struct kmem_cache *s;
4479 void *ret;
4480
4481 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4482 return kmalloc_large(size, gfpflags);
4483
4484 s = kmalloc_slab(size, gfpflags);
4485
4486 if (unlikely(ZERO_OR_NULL_PTR(s)))
4487 return s;
4488
4489 ret = slab_alloc(s, gfpflags, caller);
4490
4491
4492 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4493
4494 return ret;
4495}
4496EXPORT_SYMBOL(__kmalloc_track_caller);
4497
4498#ifdef CONFIG_NUMA
4499void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4500 int node, unsigned long caller)
4501{
4502 struct kmem_cache *s;
4503 void *ret;
4504
4505 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4506 ret = kmalloc_large_node(size, gfpflags, node);
4507
4508 trace_kmalloc_node(caller, ret,
4509 size, PAGE_SIZE << get_order(size),
4510 gfpflags, node);
4511
4512 return ret;
4513 }
4514
4515 s = kmalloc_slab(size, gfpflags);
4516
4517 if (unlikely(ZERO_OR_NULL_PTR(s)))
4518 return s;
4519
4520 ret = slab_alloc_node(s, gfpflags, node, caller);
4521
4522
4523 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4524
4525 return ret;
4526}
4527EXPORT_SYMBOL(__kmalloc_node_track_caller);
4528#endif
4529
4530#ifdef CONFIG_SYSFS
4531static int count_inuse(struct page *page)
4532{
4533 return page->inuse;
4534}
4535
4536static int count_total(struct page *page)
4537{
4538 return page->objects;
4539}
4540#endif
4541
4542#ifdef CONFIG_SLUB_DEBUG
4543static void validate_slab(struct kmem_cache *s, struct page *page)
4544{
4545 void *p;
4546 void *addr = page_address(page);
4547 unsigned long *map;
4548
4549 slab_lock(page);
4550
4551 if (!check_slab(s, page) || !on_freelist(s, page, NULL))
4552 goto unlock;
4553
4554
4555 map = get_map(s, page);
4556 for_each_object(p, s, addr, page->objects) {
4557 u8 val = test_bit(__obj_to_index(s, addr, p), map) ?
4558 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
4559
4560 if (!check_object(s, page, p, val))
4561 break;
4562 }
4563 put_map(map);
4564unlock:
4565 slab_unlock(page);
4566}
4567
4568static int validate_slab_node(struct kmem_cache *s,
4569 struct kmem_cache_node *n)
4570{
4571 unsigned long count = 0;
4572 struct page *page;
4573 unsigned long flags;
4574
4575 spin_lock_irqsave(&n->list_lock, flags);
4576
4577 list_for_each_entry(page, &n->partial, slab_list) {
4578 validate_slab(s, page);
4579 count++;
4580 }
4581 if (count != n->nr_partial)
4582 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4583 s->name, count, n->nr_partial);
4584
4585 if (!(s->flags & SLAB_STORE_USER))
4586 goto out;
4587
4588 list_for_each_entry(page, &n->full, slab_list) {
4589 validate_slab(s, page);
4590 count++;
4591 }
4592 if (count != atomic_long_read(&n->nr_slabs))
4593 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4594 s->name, count, atomic_long_read(&n->nr_slabs));
4595
4596out:
4597 spin_unlock_irqrestore(&n->list_lock, flags);
4598 return count;
4599}
4600
4601static long validate_slab_cache(struct kmem_cache *s)
4602{
4603 int node;
4604 unsigned long count = 0;
4605 struct kmem_cache_node *n;
4606
4607 flush_all(s);
4608 for_each_kmem_cache_node(s, node, n)
4609 count += validate_slab_node(s, n);
4610
4611 return count;
4612}
4613
4614
4615
4616
4617
4618struct location {
4619 unsigned long count;
4620 unsigned long addr;
4621 long long sum_time;
4622 long min_time;
4623 long max_time;
4624 long min_pid;
4625 long max_pid;
4626 DECLARE_BITMAP(cpus, NR_CPUS);
4627 nodemask_t nodes;
4628};
4629
4630struct loc_track {
4631 unsigned long max;
4632 unsigned long count;
4633 struct location *loc;
4634};
4635
4636static void free_loc_track(struct loc_track *t)
4637{
4638 if (t->max)
4639 free_pages((unsigned long)t->loc,
4640 get_order(sizeof(struct location) * t->max));
4641}
4642
4643static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4644{
4645 struct location *l;
4646 int order;
4647
4648 order = get_order(sizeof(struct location) * max);
4649
4650 l = (void *)__get_free_pages(flags, order);
4651 if (!l)
4652 return 0;
4653
4654 if (t->count) {
4655 memcpy(l, t->loc, sizeof(struct location) * t->count);
4656 free_loc_track(t);
4657 }
4658 t->max = max;
4659 t->loc = l;
4660 return 1;
4661}
4662
4663static int add_location(struct loc_track *t, struct kmem_cache *s,
4664 const struct track *track)
4665{
4666 long start, end, pos;
4667 struct location *l;
4668 unsigned long caddr;
4669 unsigned long age = jiffies - track->when;
4670
4671 start = -1;
4672 end = t->count;
4673
4674 for ( ; ; ) {
4675 pos = start + (end - start + 1) / 2;
4676
4677
4678
4679
4680
4681 if (pos == end)
4682 break;
4683
4684 caddr = t->loc[pos].addr;
4685 if (track->addr == caddr) {
4686
4687 l = &t->loc[pos];
4688 l->count++;
4689 if (track->when) {
4690 l->sum_time += age;
4691 if (age < l->min_time)
4692 l->min_time = age;
4693 if (age > l->max_time)
4694 l->max_time = age;
4695
4696 if (track->pid < l->min_pid)
4697 l->min_pid = track->pid;
4698 if (track->pid > l->max_pid)
4699 l->max_pid = track->pid;
4700
4701 cpumask_set_cpu(track->cpu,
4702 to_cpumask(l->cpus));
4703 }
4704 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4705 return 1;
4706 }
4707
4708 if (track->addr < caddr)
4709 end = pos;
4710 else
4711 start = pos;
4712 }
4713
4714
4715
4716
4717 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4718 return 0;
4719
4720 l = t->loc + pos;
4721 if (pos < t->count)
4722 memmove(l + 1, l,
4723 (t->count - pos) * sizeof(struct location));
4724 t->count++;
4725 l->count = 1;
4726 l->addr = track->addr;
4727 l->sum_time = age;
4728 l->min_time = age;
4729 l->max_time = age;
4730 l->min_pid = track->pid;
4731 l->max_pid = track->pid;
4732 cpumask_clear(to_cpumask(l->cpus));
4733 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4734 nodes_clear(l->nodes);
4735 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4736 return 1;
4737}
4738
4739static void process_slab(struct loc_track *t, struct kmem_cache *s,
4740 struct page *page, enum track_item alloc)
4741{
4742 void *addr = page_address(page);
4743 void *p;
4744 unsigned long *map;
4745
4746 map = get_map(s, page);
4747 for_each_object(p, s, addr, page->objects)
4748 if (!test_bit(__obj_to_index(s, addr, p), map))
4749 add_location(t, s, get_track(s, p, alloc));
4750 put_map(map);
4751}
4752
4753static int list_locations(struct kmem_cache *s, char *buf,
4754 enum track_item alloc)
4755{
4756 int len = 0;
4757 unsigned long i;
4758 struct loc_track t = { 0, 0, NULL };
4759 int node;
4760 struct kmem_cache_node *n;
4761
4762 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4763 GFP_KERNEL)) {
4764 return sysfs_emit(buf, "Out of memory\n");
4765 }
4766
4767 flush_all(s);
4768
4769 for_each_kmem_cache_node(s, node, n) {
4770 unsigned long flags;
4771 struct page *page;
4772
4773 if (!atomic_long_read(&n->nr_slabs))
4774 continue;
4775
4776 spin_lock_irqsave(&n->list_lock, flags);
4777 list_for_each_entry(page, &n->partial, slab_list)
4778 process_slab(&t, s, page, alloc);
4779 list_for_each_entry(page, &n->full, slab_list)
4780 process_slab(&t, s, page, alloc);
4781 spin_unlock_irqrestore(&n->list_lock, flags);
4782 }
4783
4784 for (i = 0; i < t.count; i++) {
4785 struct location *l = &t.loc[i];
4786
4787 len += sysfs_emit_at(buf, len, "%7ld ", l->count);
4788
4789 if (l->addr)
4790 len += sysfs_emit_at(buf, len, "%pS", (void *)l->addr);
4791 else
4792 len += sysfs_emit_at(buf, len, "<not-available>");
4793
4794 if (l->sum_time != l->min_time)
4795 len += sysfs_emit_at(buf, len, " age=%ld/%ld/%ld",
4796 l->min_time,
4797 (long)div_u64(l->sum_time,
4798 l->count),
4799 l->max_time);
4800 else
4801 len += sysfs_emit_at(buf, len, " age=%ld", l->min_time);
4802
4803 if (l->min_pid != l->max_pid)
4804 len += sysfs_emit_at(buf, len, " pid=%ld-%ld",
4805 l->min_pid, l->max_pid);
4806 else
4807 len += sysfs_emit_at(buf, len, " pid=%ld",
4808 l->min_pid);
4809
4810 if (num_online_cpus() > 1 &&
4811 !cpumask_empty(to_cpumask(l->cpus)))
4812 len += sysfs_emit_at(buf, len, " cpus=%*pbl",
4813 cpumask_pr_args(to_cpumask(l->cpus)));
4814
4815 if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
4816 len += sysfs_emit_at(buf, len, " nodes=%*pbl",
4817 nodemask_pr_args(&l->nodes));
4818
4819 len += sysfs_emit_at(buf, len, "\n");
4820 }
4821
4822 free_loc_track(&t);
4823 if (!t.count)
4824 len += sysfs_emit_at(buf, len, "No data\n");
4825
4826 return len;
4827}
4828#endif
4829
4830#ifdef SLUB_RESILIENCY_TEST
4831static void __init resiliency_test(void)
4832{
4833 u8 *p;
4834 int type = KMALLOC_NORMAL;
4835
4836 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4837
4838 pr_err("SLUB resiliency testing\n");
4839 pr_err("-----------------------\n");
4840 pr_err("A. Corruption after allocation\n");
4841
4842 p = kzalloc(16, GFP_KERNEL);
4843 p[16] = 0x12;
4844 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4845 p + 16);
4846
4847 validate_slab_cache(kmalloc_caches[type][4]);
4848
4849
4850 p = kzalloc(32, GFP_KERNEL);
4851 p[32 + sizeof(void *)] = 0x34;
4852 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4853 p);
4854 pr_err("If allocated object is overwritten then not detectable\n\n");
4855
4856 validate_slab_cache(kmalloc_caches[type][5]);
4857 p = kzalloc(64, GFP_KERNEL);
4858 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4859 *p = 0x56;
4860 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4861 p);
4862 pr_err("If allocated object is overwritten then not detectable\n\n");
4863 validate_slab_cache(kmalloc_caches[type][6]);
4864
4865 pr_err("\nB. Corruption after free\n");
4866 p = kzalloc(128, GFP_KERNEL);
4867 kfree(p);
4868 *p = 0x78;
4869 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4870 validate_slab_cache(kmalloc_caches[type][7]);
4871
4872 p = kzalloc(256, GFP_KERNEL);
4873 kfree(p);
4874 p[50] = 0x9a;
4875 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4876 validate_slab_cache(kmalloc_caches[type][8]);
4877
4878 p = kzalloc(512, GFP_KERNEL);
4879 kfree(p);
4880 p[512] = 0xab;
4881 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4882 validate_slab_cache(kmalloc_caches[type][9]);
4883}
4884#else
4885#ifdef CONFIG_SYSFS
4886static void resiliency_test(void) {};
4887#endif
4888#endif
4889
4890#ifdef CONFIG_SYSFS
4891enum slab_stat_type {
4892 SL_ALL,
4893 SL_PARTIAL,
4894 SL_CPU,
4895 SL_OBJECTS,
4896 SL_TOTAL
4897};
4898
4899#define SO_ALL (1 << SL_ALL)
4900#define SO_PARTIAL (1 << SL_PARTIAL)
4901#define SO_CPU (1 << SL_CPU)
4902#define SO_OBJECTS (1 << SL_OBJECTS)
4903#define SO_TOTAL (1 << SL_TOTAL)
4904
4905static ssize_t show_slab_objects(struct kmem_cache *s,
4906 char *buf, unsigned long flags)
4907{
4908 unsigned long total = 0;
4909 int node;
4910 int x;
4911 unsigned long *nodes;
4912 int len = 0;
4913
4914 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4915 if (!nodes)
4916 return -ENOMEM;
4917
4918 if (flags & SO_CPU) {
4919 int cpu;
4920
4921 for_each_possible_cpu(cpu) {
4922 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4923 cpu);
4924 int node;
4925 struct page *page;
4926
4927 page = READ_ONCE(c->page);
4928 if (!page)
4929 continue;
4930
4931 node = page_to_nid(page);
4932 if (flags & SO_TOTAL)
4933 x = page->objects;
4934 else if (flags & SO_OBJECTS)
4935 x = page->inuse;
4936 else
4937 x = 1;
4938
4939 total += x;
4940 nodes[node] += x;
4941
4942 page = slub_percpu_partial_read_once(c);
4943 if (page) {
4944 node = page_to_nid(page);
4945 if (flags & SO_TOTAL)
4946 WARN_ON_ONCE(1);
4947 else if (flags & SO_OBJECTS)
4948 WARN_ON_ONCE(1);
4949 else
4950 x = page->pages;
4951 total += x;
4952 nodes[node] += x;
4953 }
4954 }
4955 }
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968#ifdef CONFIG_SLUB_DEBUG
4969 if (flags & SO_ALL) {
4970 struct kmem_cache_node *n;
4971
4972 for_each_kmem_cache_node(s, node, n) {
4973
4974 if (flags & SO_TOTAL)
4975 x = atomic_long_read(&n->total_objects);
4976 else if (flags & SO_OBJECTS)
4977 x = atomic_long_read(&n->total_objects) -
4978 count_partial(n, count_free);
4979 else
4980 x = atomic_long_read(&n->nr_slabs);
4981 total += x;
4982 nodes[node] += x;
4983 }
4984
4985 } else
4986#endif
4987 if (flags & SO_PARTIAL) {
4988 struct kmem_cache_node *n;
4989
4990 for_each_kmem_cache_node(s, node, n) {
4991 if (flags & SO_TOTAL)
4992 x = count_partial(n, count_total);
4993 else if (flags & SO_OBJECTS)
4994 x = count_partial(n, count_inuse);
4995 else
4996 x = n->nr_partial;
4997 total += x;
4998 nodes[node] += x;
4999 }
5000 }
5001
5002 len += sysfs_emit_at(buf, len, "%lu", total);
5003#ifdef CONFIG_NUMA
5004 for (node = 0; node < nr_node_ids; node++) {
5005 if (nodes[node])
5006 len += sysfs_emit_at(buf, len, " N%d=%lu",
5007 node, nodes[node]);
5008 }
5009#endif
5010 len += sysfs_emit_at(buf, len, "\n");
5011 kfree(nodes);
5012
5013 return len;
5014}
5015
5016#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5017#define to_slab(n) container_of(n, struct kmem_cache, kobj)
5018
5019struct slab_attribute {
5020 struct attribute attr;
5021 ssize_t (*show)(struct kmem_cache *s, char *buf);
5022 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5023};
5024
5025#define SLAB_ATTR_RO(_name) \
5026 static struct slab_attribute _name##_attr = \
5027 __ATTR(_name, 0400, _name##_show, NULL)
5028
5029#define SLAB_ATTR(_name) \
5030 static struct slab_attribute _name##_attr = \
5031 __ATTR(_name, 0600, _name##_show, _name##_store)
5032
5033static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5034{
5035 return sysfs_emit(buf, "%u\n", s->size);
5036}
5037SLAB_ATTR_RO(slab_size);
5038
5039static ssize_t align_show(struct kmem_cache *s, char *buf)
5040{
5041 return sysfs_emit(buf, "%u\n", s->align);
5042}
5043SLAB_ATTR_RO(align);
5044
5045static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5046{
5047 return sysfs_emit(buf, "%u\n", s->object_size);
5048}
5049SLAB_ATTR_RO(object_size);
5050
5051static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5052{
5053 return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5054}
5055SLAB_ATTR_RO(objs_per_slab);
5056
5057static ssize_t order_show(struct kmem_cache *s, char *buf)
5058{
5059 return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5060}
5061SLAB_ATTR_RO(order);
5062
5063static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5064{
5065 return sysfs_emit(buf, "%lu\n", s->min_partial);
5066}
5067
5068static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5069 size_t length)
5070{
5071 unsigned long min;
5072 int err;
5073
5074 err = kstrtoul(buf, 10, &min);
5075 if (err)
5076 return err;
5077
5078 set_min_partial(s, min);
5079 return length;
5080}
5081SLAB_ATTR(min_partial);
5082
5083static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5084{
5085 return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
5086}
5087
5088static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5089 size_t length)
5090{
5091 unsigned int objects;
5092 int err;
5093
5094 err = kstrtouint(buf, 10, &objects);
5095 if (err)
5096 return err;
5097 if (objects && !kmem_cache_has_cpu_partial(s))
5098 return -EINVAL;
5099
5100 slub_set_cpu_partial(s, objects);
5101 flush_all(s);
5102 return length;
5103}
5104SLAB_ATTR(cpu_partial);
5105
5106static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5107{
5108 if (!s->ctor)
5109 return 0;
5110 return sysfs_emit(buf, "%pS\n", s->ctor);
5111}
5112SLAB_ATTR_RO(ctor);
5113
5114static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5115{
5116 return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5117}
5118SLAB_ATTR_RO(aliases);
5119
5120static ssize_t partial_show(struct kmem_cache *s, char *buf)
5121{
5122 return show_slab_objects(s, buf, SO_PARTIAL);
5123}
5124SLAB_ATTR_RO(partial);
5125
5126static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5127{
5128 return show_slab_objects(s, buf, SO_CPU);
5129}
5130SLAB_ATTR_RO(cpu_slabs);
5131
5132static ssize_t objects_show(struct kmem_cache *s, char *buf)
5133{
5134 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5135}
5136SLAB_ATTR_RO(objects);
5137
5138static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5139{
5140 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5141}
5142SLAB_ATTR_RO(objects_partial);
5143
5144static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5145{
5146 int objects = 0;
5147 int pages = 0;
5148 int cpu;
5149 int len = 0;
5150
5151 for_each_online_cpu(cpu) {
5152 struct page *page;
5153
5154 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5155
5156 if (page) {
5157 pages += page->pages;
5158 objects += page->pobjects;
5159 }
5160 }
5161
5162 len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
5163
5164#ifdef CONFIG_SMP
5165 for_each_online_cpu(cpu) {
5166 struct page *page;
5167
5168 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5169 if (page)
5170 len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5171 cpu, page->pobjects, page->pages);
5172 }
5173#endif
5174 len += sysfs_emit_at(buf, len, "\n");
5175
5176 return len;
5177}
5178SLAB_ATTR_RO(slabs_cpu_partial);
5179
5180static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5181{
5182 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5183}
5184SLAB_ATTR_RO(reclaim_account);
5185
5186static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5187{
5188 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5189}
5190SLAB_ATTR_RO(hwcache_align);
5191
5192#ifdef CONFIG_ZONE_DMA
5193static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5194{
5195 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5196}
5197SLAB_ATTR_RO(cache_dma);
5198#endif
5199
5200static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5201{
5202 return sysfs_emit(buf, "%u\n", s->usersize);
5203}
5204SLAB_ATTR_RO(usersize);
5205
5206static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5207{
5208 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5209}
5210SLAB_ATTR_RO(destroy_by_rcu);
5211
5212#ifdef CONFIG_SLUB_DEBUG
5213static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5214{
5215 return show_slab_objects(s, buf, SO_ALL);
5216}
5217SLAB_ATTR_RO(slabs);
5218
5219static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5220{
5221 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5222}
5223SLAB_ATTR_RO(total_objects);
5224
5225static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5226{
5227 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5228}
5229SLAB_ATTR_RO(sanity_checks);
5230
5231static ssize_t trace_show(struct kmem_cache *s, char *buf)
5232{
5233 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5234}
5235SLAB_ATTR_RO(trace);
5236
5237static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5238{
5239 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5240}
5241
5242SLAB_ATTR_RO(red_zone);
5243
5244static ssize_t poison_show(struct kmem_cache *s, char *buf)
5245{
5246 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5247}
5248
5249SLAB_ATTR_RO(poison);
5250
5251static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5252{
5253 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5254}
5255
5256SLAB_ATTR_RO(store_user);
5257
5258static ssize_t validate_show(struct kmem_cache *s, char *buf)
5259{
5260 return 0;
5261}
5262
5263static ssize_t validate_store(struct kmem_cache *s,
5264 const char *buf, size_t length)
5265{
5266 int ret = -EINVAL;
5267
5268 if (buf[0] == '1') {
5269 ret = validate_slab_cache(s);
5270 if (ret >= 0)
5271 ret = length;
5272 }
5273 return ret;
5274}
5275SLAB_ATTR(validate);
5276
5277static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5278{
5279 if (!(s->flags & SLAB_STORE_USER))
5280 return -ENOSYS;
5281 return list_locations(s, buf, TRACK_ALLOC);
5282}
5283SLAB_ATTR_RO(alloc_calls);
5284
5285static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5286{
5287 if (!(s->flags & SLAB_STORE_USER))
5288 return -ENOSYS;
5289 return list_locations(s, buf, TRACK_FREE);
5290}
5291SLAB_ATTR_RO(free_calls);
5292#endif
5293
5294#ifdef CONFIG_FAILSLAB
5295static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5296{
5297 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5298}
5299SLAB_ATTR_RO(failslab);
5300#endif
5301
5302static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5303{
5304 return 0;
5305}
5306
5307static ssize_t shrink_store(struct kmem_cache *s,
5308 const char *buf, size_t length)
5309{
5310 if (buf[0] == '1')
5311 kmem_cache_shrink(s);
5312 else
5313 return -EINVAL;
5314 return length;
5315}
5316SLAB_ATTR(shrink);
5317
5318#ifdef CONFIG_NUMA
5319static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5320{
5321 return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5322}
5323
5324static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5325 const char *buf, size_t length)
5326{
5327 unsigned int ratio;
5328 int err;
5329
5330 err = kstrtouint(buf, 10, &ratio);
5331 if (err)
5332 return err;
5333 if (ratio > 100)
5334 return -ERANGE;
5335
5336 s->remote_node_defrag_ratio = ratio * 10;
5337
5338 return length;
5339}
5340SLAB_ATTR(remote_node_defrag_ratio);
5341#endif
5342
5343#ifdef CONFIG_SLUB_STATS
5344static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5345{
5346 unsigned long sum = 0;
5347 int cpu;
5348 int len = 0;
5349 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5350
5351 if (!data)
5352 return -ENOMEM;
5353
5354 for_each_online_cpu(cpu) {
5355 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5356
5357 data[cpu] = x;
5358 sum += x;
5359 }
5360
5361 len += sysfs_emit_at(buf, len, "%lu", sum);
5362
5363#ifdef CONFIG_SMP
5364 for_each_online_cpu(cpu) {
5365 if (data[cpu])
5366 len += sysfs_emit_at(buf, len, " C%d=%u",
5367 cpu, data[cpu]);
5368 }
5369#endif
5370 kfree(data);
5371 len += sysfs_emit_at(buf, len, "\n");
5372
5373 return len;
5374}
5375
5376static void clear_stat(struct kmem_cache *s, enum stat_item si)
5377{
5378 int cpu;
5379
5380 for_each_online_cpu(cpu)
5381 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5382}
5383
5384#define STAT_ATTR(si, text) \
5385static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5386{ \
5387 return show_stat(s, buf, si); \
5388} \
5389static ssize_t text##_store(struct kmem_cache *s, \
5390 const char *buf, size_t length) \
5391{ \
5392 if (buf[0] != '0') \
5393 return -EINVAL; \
5394 clear_stat(s, si); \
5395 return length; \
5396} \
5397SLAB_ATTR(text); \
5398
5399STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5400STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5401STAT_ATTR(FREE_FASTPATH, free_fastpath);
5402STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5403STAT_ATTR(FREE_FROZEN, free_frozen);
5404STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5405STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5406STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5407STAT_ATTR(ALLOC_SLAB, alloc_slab);
5408STAT_ATTR(ALLOC_REFILL, alloc_refill);
5409STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5410STAT_ATTR(FREE_SLAB, free_slab);
5411STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5412STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5413STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5414STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5415STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5416STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5417STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5418STAT_ATTR(ORDER_FALLBACK, order_fallback);
5419STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5420STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5421STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5422STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5423STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5424STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5425#endif
5426
5427static struct attribute *slab_attrs[] = {
5428 &slab_size_attr.attr,
5429 &object_size_attr.attr,
5430 &objs_per_slab_attr.attr,
5431 &order_attr.attr,
5432 &min_partial_attr.attr,
5433 &cpu_partial_attr.attr,
5434 &objects_attr.attr,
5435 &objects_partial_attr.attr,
5436 &partial_attr.attr,
5437 &cpu_slabs_attr.attr,
5438 &ctor_attr.attr,
5439 &aliases_attr.attr,
5440 &align_attr.attr,
5441 &hwcache_align_attr.attr,
5442 &reclaim_account_attr.attr,
5443 &destroy_by_rcu_attr.attr,
5444 &shrink_attr.attr,
5445 &slabs_cpu_partial_attr.attr,
5446#ifdef CONFIG_SLUB_DEBUG
5447 &total_objects_attr.attr,
5448 &slabs_attr.attr,
5449 &sanity_checks_attr.attr,
5450 &trace_attr.attr,
5451 &red_zone_attr.attr,
5452 &poison_attr.attr,
5453 &store_user_attr.attr,
5454 &validate_attr.attr,
5455 &alloc_calls_attr.attr,
5456 &free_calls_attr.attr,
5457#endif
5458#ifdef CONFIG_ZONE_DMA
5459 &cache_dma_attr.attr,
5460#endif
5461#ifdef CONFIG_NUMA
5462 &remote_node_defrag_ratio_attr.attr,
5463#endif
5464#ifdef CONFIG_SLUB_STATS
5465 &alloc_fastpath_attr.attr,
5466 &alloc_slowpath_attr.attr,
5467 &free_fastpath_attr.attr,
5468 &free_slowpath_attr.attr,
5469 &free_frozen_attr.attr,
5470 &free_add_partial_attr.attr,
5471 &free_remove_partial_attr.attr,
5472 &alloc_from_partial_attr.attr,
5473 &alloc_slab_attr.attr,
5474 &alloc_refill_attr.attr,
5475 &alloc_node_mismatch_attr.attr,
5476 &free_slab_attr.attr,
5477 &cpuslab_flush_attr.attr,
5478 &deactivate_full_attr.attr,
5479 &deactivate_empty_attr.attr,
5480 &deactivate_to_head_attr.attr,
5481 &deactivate_to_tail_attr.attr,
5482 &deactivate_remote_frees_attr.attr,
5483 &deactivate_bypass_attr.attr,
5484 &order_fallback_attr.attr,
5485 &cmpxchg_double_fail_attr.attr,
5486 &cmpxchg_double_cpu_fail_attr.attr,
5487 &cpu_partial_alloc_attr.attr,
5488 &cpu_partial_free_attr.attr,
5489 &cpu_partial_node_attr.attr,
5490 &cpu_partial_drain_attr.attr,
5491#endif
5492#ifdef CONFIG_FAILSLAB
5493 &failslab_attr.attr,
5494#endif
5495 &usersize_attr.attr,
5496
5497 NULL
5498};
5499
5500static const struct attribute_group slab_attr_group = {
5501 .attrs = slab_attrs,
5502};
5503
5504static ssize_t slab_attr_show(struct kobject *kobj,
5505 struct attribute *attr,
5506 char *buf)
5507{
5508 struct slab_attribute *attribute;
5509 struct kmem_cache *s;
5510 int err;
5511
5512 attribute = to_slab_attr(attr);
5513 s = to_slab(kobj);
5514
5515 if (!attribute->show)
5516 return -EIO;
5517
5518 err = attribute->show(s, buf);
5519
5520 return err;
5521}
5522
5523static ssize_t slab_attr_store(struct kobject *kobj,
5524 struct attribute *attr,
5525 const char *buf, size_t len)
5526{
5527 struct slab_attribute *attribute;
5528 struct kmem_cache *s;
5529 int err;
5530
5531 attribute = to_slab_attr(attr);
5532 s = to_slab(kobj);
5533
5534 if (!attribute->store)
5535 return -EIO;
5536
5537 err = attribute->store(s, buf, len);
5538 return err;
5539}
5540
5541static void kmem_cache_release(struct kobject *k)
5542{
5543 slab_kmem_cache_release(to_slab(k));
5544}
5545
5546static const struct sysfs_ops slab_sysfs_ops = {
5547 .show = slab_attr_show,
5548 .store = slab_attr_store,
5549};
5550
5551static struct kobj_type slab_ktype = {
5552 .sysfs_ops = &slab_sysfs_ops,
5553 .release = kmem_cache_release,
5554};
5555
5556static struct kset *slab_kset;
5557
5558static inline struct kset *cache_kset(struct kmem_cache *s)
5559{
5560 return slab_kset;
5561}
5562
5563#define ID_STR_LENGTH 64
5564
5565
5566
5567
5568
5569static char *create_unique_id(struct kmem_cache *s)
5570{
5571 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5572 char *p = name;
5573
5574 BUG_ON(!name);
5575
5576 *p++ = ':';
5577
5578
5579
5580
5581
5582
5583
5584 if (s->flags & SLAB_CACHE_DMA)
5585 *p++ = 'd';
5586 if (s->flags & SLAB_CACHE_DMA32)
5587 *p++ = 'D';
5588 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5589 *p++ = 'a';
5590 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5591 *p++ = 'F';
5592 if (s->flags & SLAB_ACCOUNT)
5593 *p++ = 'A';
5594 if (p != name + 1)
5595 *p++ = '-';
5596 p += sprintf(p, "%07u", s->size);
5597
5598 BUG_ON(p > name + ID_STR_LENGTH - 1);
5599 return name;
5600}
5601
5602static int sysfs_slab_add(struct kmem_cache *s)
5603{
5604 int err;
5605 const char *name;
5606 struct kset *kset = cache_kset(s);
5607 int unmergeable = slab_unmergeable(s);
5608
5609 if (!kset) {
5610 kobject_init(&s->kobj, &slab_ktype);
5611 return 0;
5612 }
5613
5614 if (!unmergeable && disable_higher_order_debug &&
5615 (slub_debug & DEBUG_METADATA_FLAGS))
5616 unmergeable = 1;
5617
5618 if (unmergeable) {
5619
5620
5621
5622
5623
5624 sysfs_remove_link(&slab_kset->kobj, s->name);
5625 name = s->name;
5626 } else {
5627
5628
5629
5630
5631 name = create_unique_id(s);
5632 }
5633
5634 s->kobj.kset = kset;
5635 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5636 if (err)
5637 goto out;
5638
5639 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5640 if (err)
5641 goto out_del_kobj;
5642
5643 if (!unmergeable) {
5644
5645 sysfs_slab_alias(s, s->name);
5646 }
5647out:
5648 if (!unmergeable)
5649 kfree(name);
5650 return err;
5651out_del_kobj:
5652 kobject_del(&s->kobj);
5653 goto out;
5654}
5655
5656void sysfs_slab_unlink(struct kmem_cache *s)
5657{
5658 if (slab_state >= FULL)
5659 kobject_del(&s->kobj);
5660}
5661
5662void sysfs_slab_release(struct kmem_cache *s)
5663{
5664 if (slab_state >= FULL)
5665 kobject_put(&s->kobj);
5666}
5667
5668
5669
5670
5671
5672struct saved_alias {
5673 struct kmem_cache *s;
5674 const char *name;
5675 struct saved_alias *next;
5676};
5677
5678static struct saved_alias *alias_list;
5679
5680static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5681{
5682 struct saved_alias *al;
5683
5684 if (slab_state == FULL) {
5685
5686
5687
5688 sysfs_remove_link(&slab_kset->kobj, name);
5689 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5690 }
5691
5692 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5693 if (!al)
5694 return -ENOMEM;
5695
5696 al->s = s;
5697 al->name = name;
5698 al->next = alias_list;
5699 alias_list = al;
5700 return 0;
5701}
5702
5703static int __init slab_sysfs_init(void)
5704{
5705 struct kmem_cache *s;
5706 int err;
5707
5708 mutex_lock(&slab_mutex);
5709
5710 slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
5711 if (!slab_kset) {
5712 mutex_unlock(&slab_mutex);
5713 pr_err("Cannot register slab subsystem.\n");
5714 return -ENOSYS;
5715 }
5716
5717 slab_state = FULL;
5718
5719 list_for_each_entry(s, &slab_caches, list) {
5720 err = sysfs_slab_add(s);
5721 if (err)
5722 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5723 s->name);
5724 }
5725
5726 while (alias_list) {
5727 struct saved_alias *al = alias_list;
5728
5729 alias_list = alias_list->next;
5730 err = sysfs_slab_alias(al->s, al->name);
5731 if (err)
5732 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5733 al->name);
5734 kfree(al);
5735 }
5736
5737 mutex_unlock(&slab_mutex);
5738 resiliency_test();
5739 return 0;
5740}
5741
5742__initcall(slab_sysfs_init);
5743#endif
5744
5745
5746
5747
5748#ifdef CONFIG_SLUB_DEBUG
5749void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5750{
5751 unsigned long nr_slabs = 0;
5752 unsigned long nr_objs = 0;
5753 unsigned long nr_free = 0;
5754 int node;
5755 struct kmem_cache_node *n;
5756
5757 for_each_kmem_cache_node(s, node, n) {
5758 nr_slabs += node_nr_slabs(n);
5759 nr_objs += node_nr_objs(n);
5760 nr_free += count_partial(n, count_free);
5761 }
5762
5763 sinfo->active_objs = nr_objs - nr_free;
5764 sinfo->num_objs = nr_objs;
5765 sinfo->active_slabs = nr_slabs;
5766 sinfo->num_slabs = nr_slabs;
5767 sinfo->objects_per_slab = oo_objects(s->oo);
5768 sinfo->cache_order = oo_order(s->oo);
5769}
5770
5771void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5772{
5773}
5774
5775ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5776 size_t count, loff_t *ppos)
5777{
5778 return -EIO;
5779}
5780#endif
5781