1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/notifier.h>
22#include <linux/seq_file.h>
23#include <linux/kasan.h>
24#include <linux/kmemcheck.h>
25#include <linux/cpu.h>
26#include <linux/cpuset.h>
27#include <linux/mempolicy.h>
28#include <linux/ctype.h>
29#include <linux/debugobjects.h>
30#include <linux/kallsyms.h>
31#include <linux/memory.h>
32#include <linux/math64.h>
33#include <linux/fault-inject.h>
34#include <linux/stacktrace.h>
35#include <linux/prefetch.h>
36#include <linux/memcontrol.h>
37
38#include <trace/events/kmem.h>
39
40#include "internal.h"
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118static inline int kmem_cache_debug(struct kmem_cache *s)
119{
120#ifdef CONFIG_SLUB_DEBUG
121 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
122#else
123 return 0;
124#endif
125}
126
127void *fixup_red_left(struct kmem_cache *s, void *p)
128{
129 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
130 p += s->red_left_pad;
131
132 return p;
133}
134
135static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
136{
137#ifdef CONFIG_SLUB_CPU_PARTIAL
138 return !kmem_cache_debug(s);
139#else
140 return false;
141#endif
142}
143
144
145
146
147
148
149
150
151
152
153#undef SLUB_RESILIENCY_TEST
154
155
156#undef SLUB_DEBUG_CMPXCHG
157
158
159
160
161
162#define MIN_PARTIAL 5
163
164
165
166
167
168
169#define MAX_PARTIAL 10
170
171#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
172 SLAB_POISON | SLAB_STORE_USER)
173
174
175
176
177
178#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
179 SLAB_TRACE)
180
181
182
183
184
185
186
187#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
188
189#define OO_SHIFT 16
190#define OO_MASK ((1 << OO_SHIFT) - 1)
191#define MAX_OBJS_PER_PAGE 32767
192
193
194#define __OBJECT_POISON 0x80000000UL
195#define __CMPXCHG_DOUBLE 0x40000000UL
196
197
198
199
200#define TRACK_ADDRS_COUNT 16
201struct track {
202 unsigned long addr;
203#ifdef CONFIG_STACKTRACE
204 unsigned long addrs[TRACK_ADDRS_COUNT];
205#endif
206 int cpu;
207 int pid;
208 unsigned long when;
209};
210
211enum track_item { TRACK_ALLOC, TRACK_FREE };
212
213#ifdef CONFIG_SYSFS
214static int sysfs_slab_add(struct kmem_cache *);
215static int sysfs_slab_alias(struct kmem_cache *, const char *);
216static void memcg_propagate_slab_attrs(struct kmem_cache *s);
217static void sysfs_slab_remove(struct kmem_cache *s);
218#else
219static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
220static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
221 { return 0; }
222static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
223static inline void sysfs_slab_remove(struct kmem_cache *s) { }
224#endif
225
226static inline void stat(const struct kmem_cache *s, enum stat_item si)
227{
228#ifdef CONFIG_SLUB_STATS
229
230
231
232
233 raw_cpu_inc(s->cpu_slab->stat[si]);
234#endif
235}
236
237
238
239
240
241static inline void *get_freepointer(struct kmem_cache *s, void *object)
242{
243 return *(void **)(object + s->offset);
244}
245
246static void prefetch_freepointer(const struct kmem_cache *s, void *object)
247{
248 prefetch(object + s->offset);
249}
250
251static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
252{
253 void *p;
254
255 if (!debug_pagealloc_enabled())
256 return get_freepointer(s, object);
257
258 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
259 return p;
260}
261
262static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
263{
264 *(void **)(object + s->offset) = fp;
265}
266
267
268#define for_each_object(__p, __s, __addr, __objects) \
269 for (__p = fixup_red_left(__s, __addr); \
270 __p < (__addr) + (__objects) * (__s)->size; \
271 __p += (__s)->size)
272
273#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
274 for (__p = fixup_red_left(__s, __addr), __idx = 1; \
275 __idx <= __objects; \
276 __p += (__s)->size, __idx++)
277
278
279static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
280{
281 return (p - addr) / s->size;
282}
283
284static inline int order_objects(int order, unsigned long size, int reserved)
285{
286 return ((PAGE_SIZE << order) - reserved) / size;
287}
288
289static inline struct kmem_cache_order_objects oo_make(int order,
290 unsigned long size, int reserved)
291{
292 struct kmem_cache_order_objects x = {
293 (order << OO_SHIFT) + order_objects(order, size, reserved)
294 };
295
296 return x;
297}
298
299static inline int oo_order(struct kmem_cache_order_objects x)
300{
301 return x.x >> OO_SHIFT;
302}
303
304static inline int oo_objects(struct kmem_cache_order_objects x)
305{
306 return x.x & OO_MASK;
307}
308
309
310
311
312static __always_inline void slab_lock(struct page *page)
313{
314 VM_BUG_ON_PAGE(PageTail(page), page);
315 bit_spin_lock(PG_locked, &page->flags);
316}
317
318static __always_inline void slab_unlock(struct page *page)
319{
320 VM_BUG_ON_PAGE(PageTail(page), page);
321 __bit_spin_unlock(PG_locked, &page->flags);
322}
323
324static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
325{
326 struct page tmp;
327 tmp.counters = counters_new;
328
329
330
331
332
333
334 page->frozen = tmp.frozen;
335 page->inuse = tmp.inuse;
336 page->objects = tmp.objects;
337}
338
339
340static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
341 void *freelist_old, unsigned long counters_old,
342 void *freelist_new, unsigned long counters_new,
343 const char *n)
344{
345 VM_BUG_ON(!irqs_disabled());
346#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
347 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
348 if (s->flags & __CMPXCHG_DOUBLE) {
349 if (cmpxchg_double(&page->freelist, &page->counters,
350 freelist_old, counters_old,
351 freelist_new, counters_new))
352 return true;
353 } else
354#endif
355 {
356 slab_lock(page);
357 if (page->freelist == freelist_old &&
358 page->counters == counters_old) {
359 page->freelist = freelist_new;
360 set_page_slub_counters(page, counters_new);
361 slab_unlock(page);
362 return true;
363 }
364 slab_unlock(page);
365 }
366
367 cpu_relax();
368 stat(s, CMPXCHG_DOUBLE_FAIL);
369
370#ifdef SLUB_DEBUG_CMPXCHG
371 pr_info("%s %s: cmpxchg double redo ", n, s->name);
372#endif
373
374 return false;
375}
376
377static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
378 void *freelist_old, unsigned long counters_old,
379 void *freelist_new, unsigned long counters_new,
380 const char *n)
381{
382#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
383 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
384 if (s->flags & __CMPXCHG_DOUBLE) {
385 if (cmpxchg_double(&page->freelist, &page->counters,
386 freelist_old, counters_old,
387 freelist_new, counters_new))
388 return true;
389 } else
390#endif
391 {
392 unsigned long flags;
393
394 local_irq_save(flags);
395 slab_lock(page);
396 if (page->freelist == freelist_old &&
397 page->counters == counters_old) {
398 page->freelist = freelist_new;
399 set_page_slub_counters(page, counters_new);
400 slab_unlock(page);
401 local_irq_restore(flags);
402 return true;
403 }
404 slab_unlock(page);
405 local_irq_restore(flags);
406 }
407
408 cpu_relax();
409 stat(s, CMPXCHG_DOUBLE_FAIL);
410
411#ifdef SLUB_DEBUG_CMPXCHG
412 pr_info("%s %s: cmpxchg double redo ", n, s->name);
413#endif
414
415 return false;
416}
417
418#ifdef CONFIG_SLUB_DEBUG
419
420
421
422
423
424
425static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
426{
427 void *p;
428 void *addr = page_address(page);
429
430 for (p = page->freelist; p; p = get_freepointer(s, p))
431 set_bit(slab_index(p, s, addr), map);
432}
433
434static inline int size_from_object(struct kmem_cache *s)
435{
436 if (s->flags & SLAB_RED_ZONE)
437 return s->size - s->red_left_pad;
438
439 return s->size;
440}
441
442static inline void *restore_red_left(struct kmem_cache *s, void *p)
443{
444 if (s->flags & SLAB_RED_ZONE)
445 p -= s->red_left_pad;
446
447 return p;
448}
449
450
451
452
453#if defined(CONFIG_SLUB_DEBUG_ON)
454static int slub_debug = DEBUG_DEFAULT_FLAGS;
455#else
456static int slub_debug;
457#endif
458
459static char *slub_debug_slabs;
460static int disable_higher_order_debug;
461
462
463
464
465
466
467
468static inline void metadata_access_enable(void)
469{
470 kasan_disable_current();
471}
472
473static inline void metadata_access_disable(void)
474{
475 kasan_enable_current();
476}
477
478
479
480
481
482
483static inline int check_valid_pointer(struct kmem_cache *s,
484 struct page *page, void *object)
485{
486 void *base;
487
488 if (!object)
489 return 1;
490
491 base = page_address(page);
492 object = restore_red_left(s, object);
493 if (object < base || object >= base + page->objects * s->size ||
494 (object - base) % s->size) {
495 return 0;
496 }
497
498 return 1;
499}
500
501static void print_section(char *level, char *text, u8 *addr,
502 unsigned int length)
503{
504 metadata_access_enable();
505 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
506 length, 1);
507 metadata_access_disable();
508}
509
510static struct track *get_track(struct kmem_cache *s, void *object,
511 enum track_item alloc)
512{
513 struct track *p;
514
515 if (s->offset)
516 p = object + s->offset + sizeof(void *);
517 else
518 p = object + s->inuse;
519
520 return p + alloc;
521}
522
523static void set_track(struct kmem_cache *s, void *object,
524 enum track_item alloc, unsigned long addr)
525{
526 struct track *p = get_track(s, object, alloc);
527
528 if (addr) {
529#ifdef CONFIG_STACKTRACE
530 struct stack_trace trace;
531 int i;
532
533 trace.nr_entries = 0;
534 trace.max_entries = TRACK_ADDRS_COUNT;
535 trace.entries = p->addrs;
536 trace.skip = 3;
537 metadata_access_enable();
538 save_stack_trace(&trace);
539 metadata_access_disable();
540
541
542 if (trace.nr_entries != 0 &&
543 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
544 trace.nr_entries--;
545
546 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
547 p->addrs[i] = 0;
548#endif
549 p->addr = addr;
550 p->cpu = smp_processor_id();
551 p->pid = current->pid;
552 p->when = jiffies;
553 } else
554 memset(p, 0, sizeof(struct track));
555}
556
557static void init_tracking(struct kmem_cache *s, void *object)
558{
559 if (!(s->flags & SLAB_STORE_USER))
560 return;
561
562 set_track(s, object, TRACK_FREE, 0UL);
563 set_track(s, object, TRACK_ALLOC, 0UL);
564}
565
566static void print_track(const char *s, struct track *t)
567{
568 if (!t->addr)
569 return;
570
571 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
572 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
573#ifdef CONFIG_STACKTRACE
574 {
575 int i;
576 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
577 if (t->addrs[i])
578 pr_err("\t%pS\n", (void *)t->addrs[i]);
579 else
580 break;
581 }
582#endif
583}
584
585static void print_tracking(struct kmem_cache *s, void *object)
586{
587 if (!(s->flags & SLAB_STORE_USER))
588 return;
589
590 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
591 print_track("Freed", get_track(s, object, TRACK_FREE));
592}
593
594static void print_page_info(struct page *page)
595{
596 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
597 page, page->objects, page->inuse, page->freelist, page->flags);
598
599}
600
601static void slab_bug(struct kmem_cache *s, char *fmt, ...)
602{
603 struct va_format vaf;
604 va_list args;
605
606 va_start(args, fmt);
607 vaf.fmt = fmt;
608 vaf.va = &args;
609 pr_err("=============================================================================\n");
610 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
611 pr_err("-----------------------------------------------------------------------------\n\n");
612
613 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
614 va_end(args);
615}
616
617static void slab_fix(struct kmem_cache *s, char *fmt, ...)
618{
619 struct va_format vaf;
620 va_list args;
621
622 va_start(args, fmt);
623 vaf.fmt = fmt;
624 vaf.va = &args;
625 pr_err("FIX %s: %pV\n", s->name, &vaf);
626 va_end(args);
627}
628
629static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
630{
631 unsigned int off;
632 u8 *addr = page_address(page);
633
634 print_tracking(s, p);
635
636 print_page_info(page);
637
638 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
639 p, p - addr, get_freepointer(s, p));
640
641 if (s->flags & SLAB_RED_ZONE)
642 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
643 s->red_left_pad);
644 else if (p > addr + 16)
645 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
646
647 print_section(KERN_ERR, "Object ", p,
648 min_t(unsigned long, s->object_size, PAGE_SIZE));
649 if (s->flags & SLAB_RED_ZONE)
650 print_section(KERN_ERR, "Redzone ", p + s->object_size,
651 s->inuse - s->object_size);
652
653 if (s->offset)
654 off = s->offset + sizeof(void *);
655 else
656 off = s->inuse;
657
658 if (s->flags & SLAB_STORE_USER)
659 off += 2 * sizeof(struct track);
660
661 off += kasan_metadata_size(s);
662
663 if (off != size_from_object(s))
664
665 print_section(KERN_ERR, "Padding ", p + off,
666 size_from_object(s) - off);
667
668 dump_stack();
669}
670
671void object_err(struct kmem_cache *s, struct page *page,
672 u8 *object, char *reason)
673{
674 slab_bug(s, "%s", reason);
675 print_trailer(s, page, object);
676}
677
678static void slab_err(struct kmem_cache *s, struct page *page,
679 const char *fmt, ...)
680{
681 va_list args;
682 char buf[100];
683
684 va_start(args, fmt);
685 vsnprintf(buf, sizeof(buf), fmt, args);
686 va_end(args);
687 slab_bug(s, "%s", buf);
688 print_page_info(page);
689 dump_stack();
690}
691
692static void init_object(struct kmem_cache *s, void *object, u8 val)
693{
694 u8 *p = object;
695
696 if (s->flags & SLAB_RED_ZONE)
697 memset(p - s->red_left_pad, val, s->red_left_pad);
698
699 if (s->flags & __OBJECT_POISON) {
700 memset(p, POISON_FREE, s->object_size - 1);
701 p[s->object_size - 1] = POISON_END;
702 }
703
704 if (s->flags & SLAB_RED_ZONE)
705 memset(p + s->object_size, val, s->inuse - s->object_size);
706}
707
708static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
709 void *from, void *to)
710{
711 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
712 memset(from, data, to - from);
713}
714
715static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
716 u8 *object, char *what,
717 u8 *start, unsigned int value, unsigned int bytes)
718{
719 u8 *fault;
720 u8 *end;
721
722 metadata_access_enable();
723 fault = memchr_inv(start, value, bytes);
724 metadata_access_disable();
725 if (!fault)
726 return 1;
727
728 end = start + bytes;
729 while (end > fault && end[-1] == value)
730 end--;
731
732 slab_bug(s, "%s overwritten", what);
733 pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
734 fault, end - 1, fault[0], value);
735 print_trailer(s, page, object);
736
737 restore_bytes(s, what, value, fault, end);
738 return 0;
739}
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
780{
781 unsigned long off = s->inuse;
782
783 if (s->offset)
784
785 off += sizeof(void *);
786
787 if (s->flags & SLAB_STORE_USER)
788
789 off += 2 * sizeof(struct track);
790
791 off += kasan_metadata_size(s);
792
793 if (size_from_object(s) == off)
794 return 1;
795
796 return check_bytes_and_report(s, page, p, "Object padding",
797 p + off, POISON_INUSE, size_from_object(s) - off);
798}
799
800
801static int slab_pad_check(struct kmem_cache *s, struct page *page)
802{
803 u8 *start;
804 u8 *fault;
805 u8 *end;
806 int length;
807 int remainder;
808
809 if (!(s->flags & SLAB_POISON))
810 return 1;
811
812 start = page_address(page);
813 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
814 end = start + length;
815 remainder = length % s->size;
816 if (!remainder)
817 return 1;
818
819 metadata_access_enable();
820 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
821 metadata_access_disable();
822 if (!fault)
823 return 1;
824 while (end > fault && end[-1] == POISON_INUSE)
825 end--;
826
827 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
828 print_section(KERN_ERR, "Padding ", end - remainder, remainder);
829
830 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
831 return 0;
832}
833
834static int check_object(struct kmem_cache *s, struct page *page,
835 void *object, u8 val)
836{
837 u8 *p = object;
838 u8 *endobject = object + s->object_size;
839
840 if (s->flags & SLAB_RED_ZONE) {
841 if (!check_bytes_and_report(s, page, object, "Redzone",
842 object - s->red_left_pad, val, s->red_left_pad))
843 return 0;
844
845 if (!check_bytes_and_report(s, page, object, "Redzone",
846 endobject, val, s->inuse - s->object_size))
847 return 0;
848 } else {
849 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
850 check_bytes_and_report(s, page, p, "Alignment padding",
851 endobject, POISON_INUSE,
852 s->inuse - s->object_size);
853 }
854 }
855
856 if (s->flags & SLAB_POISON) {
857 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
858 (!check_bytes_and_report(s, page, p, "Poison", p,
859 POISON_FREE, s->object_size - 1) ||
860 !check_bytes_and_report(s, page, p, "Poison",
861 p + s->object_size - 1, POISON_END, 1)))
862 return 0;
863
864
865
866 check_pad_bytes(s, page, p);
867 }
868
869 if (!s->offset && val == SLUB_RED_ACTIVE)
870
871
872
873
874 return 1;
875
876
877 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
878 object_err(s, page, p, "Freepointer corrupt");
879
880
881
882
883
884 set_freepointer(s, p, NULL);
885 return 0;
886 }
887 return 1;
888}
889
890static int check_slab(struct kmem_cache *s, struct page *page)
891{
892 int maxobj;
893
894 VM_BUG_ON(!irqs_disabled());
895
896 if (!PageSlab(page)) {
897 slab_err(s, page, "Not a valid slab page");
898 return 0;
899 }
900
901 maxobj = order_objects(compound_order(page), s->size, s->reserved);
902 if (page->objects > maxobj) {
903 slab_err(s, page, "objects %u > max %u",
904 page->objects, maxobj);
905 return 0;
906 }
907 if (page->inuse > page->objects) {
908 slab_err(s, page, "inuse %u > max %u",
909 page->inuse, page->objects);
910 return 0;
911 }
912
913 slab_pad_check(s, page);
914 return 1;
915}
916
917
918
919
920
921static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
922{
923 int nr = 0;
924 void *fp;
925 void *object = NULL;
926 int max_objects;
927
928 fp = page->freelist;
929 while (fp && nr <= page->objects) {
930 if (fp == search)
931 return 1;
932 if (!check_valid_pointer(s, page, fp)) {
933 if (object) {
934 object_err(s, page, object,
935 "Freechain corrupt");
936 set_freepointer(s, object, NULL);
937 } else {
938 slab_err(s, page, "Freepointer corrupt");
939 page->freelist = NULL;
940 page->inuse = page->objects;
941 slab_fix(s, "Freelist cleared");
942 return 0;
943 }
944 break;
945 }
946 object = fp;
947 fp = get_freepointer(s, object);
948 nr++;
949 }
950
951 max_objects = order_objects(compound_order(page), s->size, s->reserved);
952 if (max_objects > MAX_OBJS_PER_PAGE)
953 max_objects = MAX_OBJS_PER_PAGE;
954
955 if (page->objects != max_objects) {
956 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
957 page->objects, max_objects);
958 page->objects = max_objects;
959 slab_fix(s, "Number of objects adjusted.");
960 }
961 if (page->inuse != page->objects - nr) {
962 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
963 page->inuse, page->objects - nr);
964 page->inuse = page->objects - nr;
965 slab_fix(s, "Object count adjusted.");
966 }
967 return search == NULL;
968}
969
970static void trace(struct kmem_cache *s, struct page *page, void *object,
971 int alloc)
972{
973 if (s->flags & SLAB_TRACE) {
974 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
975 s->name,
976 alloc ? "alloc" : "free",
977 object, page->inuse,
978 page->freelist);
979
980 if (!alloc)
981 print_section(KERN_INFO, "Object ", (void *)object,
982 s->object_size);
983
984 dump_stack();
985 }
986}
987
988
989
990
991static void add_full(struct kmem_cache *s,
992 struct kmem_cache_node *n, struct page *page)
993{
994 if (!(s->flags & SLAB_STORE_USER))
995 return;
996
997 lockdep_assert_held(&n->list_lock);
998 list_add(&page->lru, &n->full);
999}
1000
1001static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1002{
1003 if (!(s->flags & SLAB_STORE_USER))
1004 return;
1005
1006 lockdep_assert_held(&n->list_lock);
1007 list_del(&page->lru);
1008}
1009
1010
1011static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1012{
1013 struct kmem_cache_node *n = get_node(s, node);
1014
1015 return atomic_long_read(&n->nr_slabs);
1016}
1017
1018static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1019{
1020 return atomic_long_read(&n->nr_slabs);
1021}
1022
1023static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1024{
1025 struct kmem_cache_node *n = get_node(s, node);
1026
1027
1028
1029
1030
1031
1032
1033 if (likely(n)) {
1034 atomic_long_inc(&n->nr_slabs);
1035 atomic_long_add(objects, &n->total_objects);
1036 }
1037}
1038static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1039{
1040 struct kmem_cache_node *n = get_node(s, node);
1041
1042 atomic_long_dec(&n->nr_slabs);
1043 atomic_long_sub(objects, &n->total_objects);
1044}
1045
1046
1047static void setup_object_debug(struct kmem_cache *s, struct page *page,
1048 void *object)
1049{
1050 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1051 return;
1052
1053 init_object(s, object, SLUB_RED_INACTIVE);
1054 init_tracking(s, object);
1055}
1056
1057static inline int alloc_consistency_checks(struct kmem_cache *s,
1058 struct page *page,
1059 void *object, unsigned long addr)
1060{
1061 if (!check_slab(s, page))
1062 return 0;
1063
1064 if (!check_valid_pointer(s, page, object)) {
1065 object_err(s, page, object, "Freelist Pointer check fails");
1066 return 0;
1067 }
1068
1069 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1070 return 0;
1071
1072 return 1;
1073}
1074
1075static noinline int alloc_debug_processing(struct kmem_cache *s,
1076 struct page *page,
1077 void *object, unsigned long addr)
1078{
1079 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1080 if (!alloc_consistency_checks(s, page, object, addr))
1081 goto bad;
1082 }
1083
1084
1085 if (s->flags & SLAB_STORE_USER)
1086 set_track(s, object, TRACK_ALLOC, addr);
1087 trace(s, page, object, 1);
1088 init_object(s, object, SLUB_RED_ACTIVE);
1089 return 1;
1090
1091bad:
1092 if (PageSlab(page)) {
1093
1094
1095
1096
1097
1098 slab_fix(s, "Marking all objects used");
1099 page->inuse = page->objects;
1100 page->freelist = NULL;
1101 }
1102 return 0;
1103}
1104
1105static inline int free_consistency_checks(struct kmem_cache *s,
1106 struct page *page, void *object, unsigned long addr)
1107{
1108 if (!check_valid_pointer(s, page, object)) {
1109 slab_err(s, page, "Invalid object pointer 0x%p", object);
1110 return 0;
1111 }
1112
1113 if (on_freelist(s, page, object)) {
1114 object_err(s, page, object, "Object already free");
1115 return 0;
1116 }
1117
1118 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1119 return 0;
1120
1121 if (unlikely(s != page->slab_cache)) {
1122 if (!PageSlab(page)) {
1123 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1124 object);
1125 } else if (!page->slab_cache) {
1126 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1127 object);
1128 dump_stack();
1129 } else
1130 object_err(s, page, object,
1131 "page slab pointer corrupt.");
1132 return 0;
1133 }
1134 return 1;
1135}
1136
1137
1138static noinline int free_debug_processing(
1139 struct kmem_cache *s, struct page *page,
1140 void *head, void *tail, int bulk_cnt,
1141 unsigned long addr)
1142{
1143 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1144 void *object = head;
1145 int cnt = 0;
1146 unsigned long uninitialized_var(flags);
1147 int ret = 0;
1148
1149 spin_lock_irqsave(&n->list_lock, flags);
1150 slab_lock(page);
1151
1152 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1153 if (!check_slab(s, page))
1154 goto out;
1155 }
1156
1157next_object:
1158 cnt++;
1159
1160 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1161 if (!free_consistency_checks(s, page, object, addr))
1162 goto out;
1163 }
1164
1165 if (s->flags & SLAB_STORE_USER)
1166 set_track(s, object, TRACK_FREE, addr);
1167 trace(s, page, object, 0);
1168
1169 init_object(s, object, SLUB_RED_INACTIVE);
1170
1171
1172 if (object != tail) {
1173 object = get_freepointer(s, object);
1174 goto next_object;
1175 }
1176 ret = 1;
1177
1178out:
1179 if (cnt != bulk_cnt)
1180 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1181 bulk_cnt, cnt);
1182
1183 slab_unlock(page);
1184 spin_unlock_irqrestore(&n->list_lock, flags);
1185 if (!ret)
1186 slab_fix(s, "Object at 0x%p not freed", object);
1187 return ret;
1188}
1189
1190static int __init setup_slub_debug(char *str)
1191{
1192 slub_debug = DEBUG_DEFAULT_FLAGS;
1193 if (*str++ != '=' || !*str)
1194
1195
1196
1197 goto out;
1198
1199 if (*str == ',')
1200
1201
1202
1203
1204 goto check_slabs;
1205
1206 slub_debug = 0;
1207 if (*str == '-')
1208
1209
1210
1211 goto out;
1212
1213
1214
1215
1216 for (; *str && *str != ','; str++) {
1217 switch (tolower(*str)) {
1218 case 'f':
1219 slub_debug |= SLAB_CONSISTENCY_CHECKS;
1220 break;
1221 case 'z':
1222 slub_debug |= SLAB_RED_ZONE;
1223 break;
1224 case 'p':
1225 slub_debug |= SLAB_POISON;
1226 break;
1227 case 'u':
1228 slub_debug |= SLAB_STORE_USER;
1229 break;
1230 case 't':
1231 slub_debug |= SLAB_TRACE;
1232 break;
1233 case 'a':
1234 slub_debug |= SLAB_FAILSLAB;
1235 break;
1236 case 'o':
1237
1238
1239
1240
1241 disable_higher_order_debug = 1;
1242 break;
1243 default:
1244 pr_err("slub_debug option '%c' unknown. skipped\n",
1245 *str);
1246 }
1247 }
1248
1249check_slabs:
1250 if (*str == ',')
1251 slub_debug_slabs = str + 1;
1252out:
1253 return 1;
1254}
1255
1256__setup("slub_debug", setup_slub_debug);
1257
1258unsigned long kmem_cache_flags(unsigned long object_size,
1259 unsigned long flags, const char *name,
1260 void (*ctor)(void *))
1261{
1262
1263
1264
1265 if (slub_debug && (!slub_debug_slabs || (name &&
1266 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
1267 flags |= slub_debug;
1268
1269 return flags;
1270}
1271#else
1272static inline void setup_object_debug(struct kmem_cache *s,
1273 struct page *page, void *object) {}
1274
1275static inline int alloc_debug_processing(struct kmem_cache *s,
1276 struct page *page, void *object, unsigned long addr) { return 0; }
1277
1278static inline int free_debug_processing(
1279 struct kmem_cache *s, struct page *page,
1280 void *head, void *tail, int bulk_cnt,
1281 unsigned long addr) { return 0; }
1282
1283static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1284 { return 1; }
1285static inline int check_object(struct kmem_cache *s, struct page *page,
1286 void *object, u8 val) { return 1; }
1287static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1288 struct page *page) {}
1289static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1290 struct page *page) {}
1291unsigned long kmem_cache_flags(unsigned long object_size,
1292 unsigned long flags, const char *name,
1293 void (*ctor)(void *))
1294{
1295 return flags;
1296}
1297#define slub_debug 0
1298
1299#define disable_higher_order_debug 0
1300
1301static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1302 { return 0; }
1303static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1304 { return 0; }
1305static inline void inc_slabs_node(struct kmem_cache *s, int node,
1306 int objects) {}
1307static inline void dec_slabs_node(struct kmem_cache *s, int node,
1308 int objects) {}
1309
1310#endif
1311
1312
1313
1314
1315
1316static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1317{
1318 kmemleak_alloc(ptr, size, 1, flags);
1319 kasan_kmalloc_large(ptr, size, flags);
1320}
1321
1322static inline void kfree_hook(const void *x)
1323{
1324 kmemleak_free(x);
1325 kasan_kfree_large(x);
1326}
1327
1328static inline void *slab_free_hook(struct kmem_cache *s, void *x)
1329{
1330 void *freeptr;
1331
1332 kmemleak_free_recursive(x, s->flags);
1333
1334
1335
1336
1337
1338
1339#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
1340 {
1341 unsigned long flags;
1342
1343 local_irq_save(flags);
1344 kmemcheck_slab_free(s, x, s->object_size);
1345 debug_check_no_locks_freed(x, s->object_size);
1346 local_irq_restore(flags);
1347 }
1348#endif
1349 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1350 debug_check_no_obj_freed(x, s->object_size);
1351
1352 freeptr = get_freepointer(s, x);
1353
1354
1355
1356
1357 kasan_slab_free(s, x);
1358 return freeptr;
1359}
1360
1361static inline void slab_free_freelist_hook(struct kmem_cache *s,
1362 void *head, void *tail)
1363{
1364
1365
1366
1367
1368#if defined(CONFIG_KMEMCHECK) || \
1369 defined(CONFIG_LOCKDEP) || \
1370 defined(CONFIG_DEBUG_KMEMLEAK) || \
1371 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1372 defined(CONFIG_KASAN)
1373
1374 void *object = head;
1375 void *tail_obj = tail ? : head;
1376 void *freeptr;
1377
1378 do {
1379 freeptr = slab_free_hook(s, object);
1380 } while ((object != tail_obj) && (object = freeptr));
1381#endif
1382}
1383
1384static void setup_object(struct kmem_cache *s, struct page *page,
1385 void *object)
1386{
1387 setup_object_debug(s, page, object);
1388 kasan_init_slab_obj(s, object);
1389 if (unlikely(s->ctor)) {
1390 kasan_unpoison_object_data(s, object);
1391 s->ctor(object);
1392 kasan_poison_object_data(s, object);
1393 }
1394}
1395
1396
1397
1398
1399static inline struct page *alloc_slab_page(struct kmem_cache *s,
1400 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1401{
1402 struct page *page;
1403 int order = oo_order(oo);
1404
1405 flags |= __GFP_NOTRACK;
1406
1407 if (node == NUMA_NO_NODE)
1408 page = alloc_pages(flags, order);
1409 else
1410 page = __alloc_pages_node(node, flags, order);
1411
1412 if (page && memcg_charge_slab(page, flags, order, s)) {
1413 __free_pages(page, order);
1414 page = NULL;
1415 }
1416
1417 return page;
1418}
1419
1420#ifdef CONFIG_SLAB_FREELIST_RANDOM
1421
1422static int init_cache_random_seq(struct kmem_cache *s)
1423{
1424 int err;
1425 unsigned long i, count = oo_objects(s->oo);
1426
1427
1428 if (s->random_seq)
1429 return 0;
1430
1431 err = cache_random_seq_create(s, count, GFP_KERNEL);
1432 if (err) {
1433 pr_err("SLUB: Unable to initialize free list for %s\n",
1434 s->name);
1435 return err;
1436 }
1437
1438
1439 if (s->random_seq) {
1440 for (i = 0; i < count; i++)
1441 s->random_seq[i] *= s->size;
1442 }
1443 return 0;
1444}
1445
1446
1447static void __init init_freelist_randomization(void)
1448{
1449 struct kmem_cache *s;
1450
1451 mutex_lock(&slab_mutex);
1452
1453 list_for_each_entry(s, &slab_caches, list)
1454 init_cache_random_seq(s);
1455
1456 mutex_unlock(&slab_mutex);
1457}
1458
1459
1460static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1461 unsigned long *pos, void *start,
1462 unsigned long page_limit,
1463 unsigned long freelist_count)
1464{
1465 unsigned int idx;
1466
1467
1468
1469
1470
1471 do {
1472 idx = s->random_seq[*pos];
1473 *pos += 1;
1474 if (*pos >= freelist_count)
1475 *pos = 0;
1476 } while (unlikely(idx >= page_limit));
1477
1478 return (char *)start + idx;
1479}
1480
1481
1482static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1483{
1484 void *start;
1485 void *cur;
1486 void *next;
1487 unsigned long idx, pos, page_limit, freelist_count;
1488
1489 if (page->objects < 2 || !s->random_seq)
1490 return false;
1491
1492 freelist_count = oo_objects(s->oo);
1493 pos = get_random_int() % freelist_count;
1494
1495 page_limit = page->objects * s->size;
1496 start = fixup_red_left(s, page_address(page));
1497
1498
1499 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1500 freelist_count);
1501 page->freelist = cur;
1502
1503 for (idx = 1; idx < page->objects; idx++) {
1504 setup_object(s, page, cur);
1505 next = next_freelist_entry(s, page, &pos, start, page_limit,
1506 freelist_count);
1507 set_freepointer(s, cur, next);
1508 cur = next;
1509 }
1510 setup_object(s, page, cur);
1511 set_freepointer(s, cur, NULL);
1512
1513 return true;
1514}
1515#else
1516static inline int init_cache_random_seq(struct kmem_cache *s)
1517{
1518 return 0;
1519}
1520static inline void init_freelist_randomization(void) { }
1521static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1522{
1523 return false;
1524}
1525#endif
1526
1527static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1528{
1529 struct page *page;
1530 struct kmem_cache_order_objects oo = s->oo;
1531 gfp_t alloc_gfp;
1532 void *start, *p;
1533 int idx, order;
1534 bool shuffle;
1535
1536 flags &= gfp_allowed_mask;
1537
1538 if (gfpflags_allow_blocking(flags))
1539 local_irq_enable();
1540
1541 flags |= s->allocflags;
1542
1543
1544
1545
1546
1547 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1548 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1549 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1550
1551 page = alloc_slab_page(s, alloc_gfp, node, oo);
1552 if (unlikely(!page)) {
1553 oo = s->min;
1554 alloc_gfp = flags;
1555
1556
1557
1558
1559 page = alloc_slab_page(s, alloc_gfp, node, oo);
1560 if (unlikely(!page))
1561 goto out;
1562 stat(s, ORDER_FALLBACK);
1563 }
1564
1565 if (kmemcheck_enabled &&
1566 !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1567 int pages = 1 << oo_order(oo);
1568
1569 kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node);
1570
1571
1572
1573
1574
1575 if (s->ctor)
1576 kmemcheck_mark_uninitialized_pages(page, pages);
1577 else
1578 kmemcheck_mark_unallocated_pages(page, pages);
1579 }
1580
1581 page->objects = oo_objects(oo);
1582
1583 order = compound_order(page);
1584 page->slab_cache = s;
1585 __SetPageSlab(page);
1586 if (page_is_pfmemalloc(page))
1587 SetPageSlabPfmemalloc(page);
1588
1589 start = page_address(page);
1590
1591 if (unlikely(s->flags & SLAB_POISON))
1592 memset(start, POISON_INUSE, PAGE_SIZE << order);
1593
1594 kasan_poison_slab(page);
1595
1596 shuffle = shuffle_freelist(s, page);
1597
1598 if (!shuffle) {
1599 for_each_object_idx(p, idx, s, start, page->objects) {
1600 setup_object(s, page, p);
1601 if (likely(idx < page->objects))
1602 set_freepointer(s, p, p + s->size);
1603 else
1604 set_freepointer(s, p, NULL);
1605 }
1606 page->freelist = fixup_red_left(s, start);
1607 }
1608
1609 page->inuse = page->objects;
1610 page->frozen = 1;
1611
1612out:
1613 if (gfpflags_allow_blocking(flags))
1614 local_irq_disable();
1615 if (!page)
1616 return NULL;
1617
1618 mod_lruvec_page_state(page,
1619 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1620 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1621 1 << oo_order(oo));
1622
1623 inc_slabs_node(s, page_to_nid(page), page->objects);
1624
1625 return page;
1626}
1627
1628static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1629{
1630 if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1631 gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1632 flags &= ~GFP_SLAB_BUG_MASK;
1633 pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1634 invalid_mask, &invalid_mask, flags, &flags);
1635 dump_stack();
1636 }
1637
1638 return allocate_slab(s,
1639 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1640}
1641
1642static void __free_slab(struct kmem_cache *s, struct page *page)
1643{
1644 int order = compound_order(page);
1645 int pages = 1 << order;
1646
1647 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1648 void *p;
1649
1650 slab_pad_check(s, page);
1651 for_each_object(p, s, page_address(page),
1652 page->objects)
1653 check_object(s, page, p, SLUB_RED_INACTIVE);
1654 }
1655
1656 kmemcheck_free_shadow(page, compound_order(page));
1657
1658 mod_lruvec_page_state(page,
1659 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1660 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1661 -pages);
1662
1663 __ClearPageSlabPfmemalloc(page);
1664 __ClearPageSlab(page);
1665
1666 page_mapcount_reset(page);
1667 if (current->reclaim_state)
1668 current->reclaim_state->reclaimed_slab += pages;
1669 memcg_uncharge_slab(page, order, s);
1670 __free_pages(page, order);
1671}
1672
1673#define need_reserve_slab_rcu \
1674 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1675
1676static void rcu_free_slab(struct rcu_head *h)
1677{
1678 struct page *page;
1679
1680 if (need_reserve_slab_rcu)
1681 page = virt_to_head_page(h);
1682 else
1683 page = container_of((struct list_head *)h, struct page, lru);
1684
1685 __free_slab(page->slab_cache, page);
1686}
1687
1688static void free_slab(struct kmem_cache *s, struct page *page)
1689{
1690 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1691 struct rcu_head *head;
1692
1693 if (need_reserve_slab_rcu) {
1694 int order = compound_order(page);
1695 int offset = (PAGE_SIZE << order) - s->reserved;
1696
1697 VM_BUG_ON(s->reserved != sizeof(*head));
1698 head = page_address(page) + offset;
1699 } else {
1700 head = &page->rcu_head;
1701 }
1702
1703 call_rcu(head, rcu_free_slab);
1704 } else
1705 __free_slab(s, page);
1706}
1707
1708static void discard_slab(struct kmem_cache *s, struct page *page)
1709{
1710 dec_slabs_node(s, page_to_nid(page), page->objects);
1711 free_slab(s, page);
1712}
1713
1714
1715
1716
1717static inline void
1718__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1719{
1720 n->nr_partial++;
1721 if (tail == DEACTIVATE_TO_TAIL)
1722 list_add_tail(&page->lru, &n->partial);
1723 else
1724 list_add(&page->lru, &n->partial);
1725}
1726
1727static inline void add_partial(struct kmem_cache_node *n,
1728 struct page *page, int tail)
1729{
1730 lockdep_assert_held(&n->list_lock);
1731 __add_partial(n, page, tail);
1732}
1733
1734static inline void remove_partial(struct kmem_cache_node *n,
1735 struct page *page)
1736{
1737 lockdep_assert_held(&n->list_lock);
1738 list_del(&page->lru);
1739 n->nr_partial--;
1740}
1741
1742
1743
1744
1745
1746
1747
1748static inline void *acquire_slab(struct kmem_cache *s,
1749 struct kmem_cache_node *n, struct page *page,
1750 int mode, int *objects)
1751{
1752 void *freelist;
1753 unsigned long counters;
1754 struct page new;
1755
1756 lockdep_assert_held(&n->list_lock);
1757
1758
1759
1760
1761
1762
1763 freelist = page->freelist;
1764 counters = page->counters;
1765 new.counters = counters;
1766 *objects = new.objects - new.inuse;
1767 if (mode) {
1768 new.inuse = page->objects;
1769 new.freelist = NULL;
1770 } else {
1771 new.freelist = freelist;
1772 }
1773
1774 VM_BUG_ON(new.frozen);
1775 new.frozen = 1;
1776
1777 if (!__cmpxchg_double_slab(s, page,
1778 freelist, counters,
1779 new.freelist, new.counters,
1780 "acquire_slab"))
1781 return NULL;
1782
1783 remove_partial(n, page);
1784 WARN_ON(!freelist);
1785 return freelist;
1786}
1787
1788static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1789static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1790
1791
1792
1793
1794static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1795 struct kmem_cache_cpu *c, gfp_t flags)
1796{
1797 struct page *page, *page2;
1798 void *object = NULL;
1799 int available = 0;
1800 int objects;
1801
1802
1803
1804
1805
1806
1807
1808 if (!n || !n->nr_partial)
1809 return NULL;
1810
1811 spin_lock(&n->list_lock);
1812 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1813 void *t;
1814
1815 if (!pfmemalloc_match(page, flags))
1816 continue;
1817
1818 t = acquire_slab(s, n, page, object == NULL, &objects);
1819 if (!t)
1820 break;
1821
1822 available += objects;
1823 if (!object) {
1824 c->page = page;
1825 stat(s, ALLOC_FROM_PARTIAL);
1826 object = t;
1827 } else {
1828 put_cpu_partial(s, page, 0);
1829 stat(s, CPU_PARTIAL_NODE);
1830 }
1831 if (!kmem_cache_has_cpu_partial(s)
1832 || available > slub_cpu_partial(s) / 2)
1833 break;
1834
1835 }
1836 spin_unlock(&n->list_lock);
1837 return object;
1838}
1839
1840
1841
1842
1843static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1844 struct kmem_cache_cpu *c)
1845{
1846#ifdef CONFIG_NUMA
1847 struct zonelist *zonelist;
1848 struct zoneref *z;
1849 struct zone *zone;
1850 enum zone_type high_zoneidx = gfp_zone(flags);
1851 void *object;
1852 unsigned int cpuset_mems_cookie;
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872 if (!s->remote_node_defrag_ratio ||
1873 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1874 return NULL;
1875
1876 do {
1877 cpuset_mems_cookie = read_mems_allowed_begin();
1878 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1879 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1880 struct kmem_cache_node *n;
1881
1882 n = get_node(s, zone_to_nid(zone));
1883
1884 if (n && cpuset_zone_allowed(zone, flags) &&
1885 n->nr_partial > s->min_partial) {
1886 object = get_partial_node(s, n, c, flags);
1887 if (object) {
1888
1889
1890
1891
1892
1893
1894
1895 return object;
1896 }
1897 }
1898 }
1899 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1900#endif
1901 return NULL;
1902}
1903
1904
1905
1906
1907static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1908 struct kmem_cache_cpu *c)
1909{
1910 void *object;
1911 int searchnode = node;
1912
1913 if (node == NUMA_NO_NODE)
1914 searchnode = numa_mem_id();
1915 else if (!node_present_pages(node))
1916 searchnode = node_to_mem_node(node);
1917
1918 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1919 if (object || node != NUMA_NO_NODE)
1920 return object;
1921
1922 return get_any_partial(s, flags, c);
1923}
1924
1925#ifdef CONFIG_PREEMPT
1926
1927
1928
1929
1930
1931#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1932#else
1933
1934
1935
1936
1937#define TID_STEP 1
1938#endif
1939
1940static inline unsigned long next_tid(unsigned long tid)
1941{
1942 return tid + TID_STEP;
1943}
1944
1945static inline unsigned int tid_to_cpu(unsigned long tid)
1946{
1947 return tid % TID_STEP;
1948}
1949
1950static inline unsigned long tid_to_event(unsigned long tid)
1951{
1952 return tid / TID_STEP;
1953}
1954
1955static inline unsigned int init_tid(int cpu)
1956{
1957 return cpu;
1958}
1959
1960static inline void note_cmpxchg_failure(const char *n,
1961 const struct kmem_cache *s, unsigned long tid)
1962{
1963#ifdef SLUB_DEBUG_CMPXCHG
1964 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1965
1966 pr_info("%s %s: cmpxchg redo ", n, s->name);
1967
1968#ifdef CONFIG_PREEMPT
1969 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1970 pr_warn("due to cpu change %d -> %d\n",
1971 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1972 else
1973#endif
1974 if (tid_to_event(tid) != tid_to_event(actual_tid))
1975 pr_warn("due to cpu running other code. Event %ld->%ld\n",
1976 tid_to_event(tid), tid_to_event(actual_tid));
1977 else
1978 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
1979 actual_tid, tid, next_tid(tid));
1980#endif
1981 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1982}
1983
1984static void init_kmem_cache_cpus(struct kmem_cache *s)
1985{
1986 int cpu;
1987
1988 for_each_possible_cpu(cpu)
1989 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1990}
1991
1992
1993
1994
1995static void deactivate_slab(struct kmem_cache *s, struct page *page,
1996 void *freelist, struct kmem_cache_cpu *c)
1997{
1998 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1999 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2000 int lock = 0;
2001 enum slab_modes l = M_NONE, m = M_NONE;
2002 void *nextfree;
2003 int tail = DEACTIVATE_TO_HEAD;
2004 struct page new;
2005 struct page old;
2006
2007 if (page->freelist) {
2008 stat(s, DEACTIVATE_REMOTE_FREES);
2009 tail = DEACTIVATE_TO_TAIL;
2010 }
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020 while (freelist && (nextfree = get_freepointer(s, freelist))) {
2021 void *prior;
2022 unsigned long counters;
2023
2024 do {
2025 prior = page->freelist;
2026 counters = page->counters;
2027 set_freepointer(s, freelist, prior);
2028 new.counters = counters;
2029 new.inuse--;
2030 VM_BUG_ON(!new.frozen);
2031
2032 } while (!__cmpxchg_double_slab(s, page,
2033 prior, counters,
2034 freelist, new.counters,
2035 "drain percpu freelist"));
2036
2037 freelist = nextfree;
2038 }
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054redo:
2055
2056 old.freelist = page->freelist;
2057 old.counters = page->counters;
2058 VM_BUG_ON(!old.frozen);
2059
2060
2061 new.counters = old.counters;
2062 if (freelist) {
2063 new.inuse--;
2064 set_freepointer(s, freelist, old.freelist);
2065 new.freelist = freelist;
2066 } else
2067 new.freelist = old.freelist;
2068
2069 new.frozen = 0;
2070
2071 if (!new.inuse && n->nr_partial >= s->min_partial)
2072 m = M_FREE;
2073 else if (new.freelist) {
2074 m = M_PARTIAL;
2075 if (!lock) {
2076 lock = 1;
2077
2078
2079
2080
2081
2082 spin_lock(&n->list_lock);
2083 }
2084 } else {
2085 m = M_FULL;
2086 if (kmem_cache_debug(s) && !lock) {
2087 lock = 1;
2088
2089
2090
2091
2092
2093 spin_lock(&n->list_lock);
2094 }
2095 }
2096
2097 if (l != m) {
2098
2099 if (l == M_PARTIAL)
2100
2101 remove_partial(n, page);
2102
2103 else if (l == M_FULL)
2104
2105 remove_full(s, n, page);
2106
2107 if (m == M_PARTIAL) {
2108
2109 add_partial(n, page, tail);
2110 stat(s, tail);
2111
2112 } else if (m == M_FULL) {
2113
2114 stat(s, DEACTIVATE_FULL);
2115 add_full(s, n, page);
2116
2117 }
2118 }
2119
2120 l = m;
2121 if (!__cmpxchg_double_slab(s, page,
2122 old.freelist, old.counters,
2123 new.freelist, new.counters,
2124 "unfreezing slab"))
2125 goto redo;
2126
2127 if (lock)
2128 spin_unlock(&n->list_lock);
2129
2130 if (m == M_FREE) {
2131 stat(s, DEACTIVATE_EMPTY);
2132 discard_slab(s, page);
2133 stat(s, FREE_SLAB);
2134 }
2135
2136 c->page = NULL;
2137 c->freelist = NULL;
2138}
2139
2140
2141
2142
2143
2144
2145
2146
2147static void unfreeze_partials(struct kmem_cache *s,
2148 struct kmem_cache_cpu *c)
2149{
2150#ifdef CONFIG_SLUB_CPU_PARTIAL
2151 struct kmem_cache_node *n = NULL, *n2 = NULL;
2152 struct page *page, *discard_page = NULL;
2153
2154 while ((page = c->partial)) {
2155 struct page new;
2156 struct page old;
2157
2158 c->partial = page->next;
2159
2160 n2 = get_node(s, page_to_nid(page));
2161 if (n != n2) {
2162 if (n)
2163 spin_unlock(&n->list_lock);
2164
2165 n = n2;
2166 spin_lock(&n->list_lock);
2167 }
2168
2169 do {
2170
2171 old.freelist = page->freelist;
2172 old.counters = page->counters;
2173 VM_BUG_ON(!old.frozen);
2174
2175 new.counters = old.counters;
2176 new.freelist = old.freelist;
2177
2178 new.frozen = 0;
2179
2180 } while (!__cmpxchg_double_slab(s, page,
2181 old.freelist, old.counters,
2182 new.freelist, new.counters,
2183 "unfreezing slab"));
2184
2185 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2186 page->next = discard_page;
2187 discard_page = page;
2188 } else {
2189 add_partial(n, page, DEACTIVATE_TO_TAIL);
2190 stat(s, FREE_ADD_PARTIAL);
2191 }
2192 }
2193
2194 if (n)
2195 spin_unlock(&n->list_lock);
2196
2197 while (discard_page) {
2198 page = discard_page;
2199 discard_page = discard_page->next;
2200
2201 stat(s, DEACTIVATE_EMPTY);
2202 discard_slab(s, page);
2203 stat(s, FREE_SLAB);
2204 }
2205#endif
2206}
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2218{
2219#ifdef CONFIG_SLUB_CPU_PARTIAL
2220 struct page *oldpage;
2221 int pages;
2222 int pobjects;
2223
2224 preempt_disable();
2225 do {
2226 pages = 0;
2227 pobjects = 0;
2228 oldpage = this_cpu_read(s->cpu_slab->partial);
2229
2230 if (oldpage) {
2231 pobjects = oldpage->pobjects;
2232 pages = oldpage->pages;
2233 if (drain && pobjects > s->cpu_partial) {
2234 unsigned long flags;
2235
2236
2237
2238
2239 local_irq_save(flags);
2240 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2241 local_irq_restore(flags);
2242 oldpage = NULL;
2243 pobjects = 0;
2244 pages = 0;
2245 stat(s, CPU_PARTIAL_DRAIN);
2246 }
2247 }
2248
2249 pages++;
2250 pobjects += page->objects - page->inuse;
2251
2252 page->pages = pages;
2253 page->pobjects = pobjects;
2254 page->next = oldpage;
2255
2256 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2257 != oldpage);
2258 if (unlikely(!s->cpu_partial)) {
2259 unsigned long flags;
2260
2261 local_irq_save(flags);
2262 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2263 local_irq_restore(flags);
2264 }
2265 preempt_enable();
2266#endif
2267}
2268
2269static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2270{
2271 stat(s, CPUSLAB_FLUSH);
2272 deactivate_slab(s, c->page, c->freelist, c);
2273
2274 c->tid = next_tid(c->tid);
2275}
2276
2277
2278
2279
2280
2281
2282static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2283{
2284 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2285
2286 if (likely(c)) {
2287 if (c->page)
2288 flush_slab(s, c);
2289
2290 unfreeze_partials(s, c);
2291 }
2292}
2293
2294static void flush_cpu_slab(void *d)
2295{
2296 struct kmem_cache *s = d;
2297
2298 __flush_cpu_slab(s, smp_processor_id());
2299}
2300
2301static bool has_cpu_slab(int cpu, void *info)
2302{
2303 struct kmem_cache *s = info;
2304 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2305
2306 return c->page || slub_percpu_partial(c);
2307}
2308
2309static void flush_all(struct kmem_cache *s)
2310{
2311 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2312}
2313
2314
2315
2316
2317
2318static int slub_cpu_dead(unsigned int cpu)
2319{
2320 struct kmem_cache *s;
2321 unsigned long flags;
2322
2323 mutex_lock(&slab_mutex);
2324 list_for_each_entry(s, &slab_caches, list) {
2325 local_irq_save(flags);
2326 __flush_cpu_slab(s, cpu);
2327 local_irq_restore(flags);
2328 }
2329 mutex_unlock(&slab_mutex);
2330 return 0;
2331}
2332
2333
2334
2335
2336
2337static inline int node_match(struct page *page, int node)
2338{
2339#ifdef CONFIG_NUMA
2340 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2341 return 0;
2342#endif
2343 return 1;
2344}
2345
2346#ifdef CONFIG_SLUB_DEBUG
2347static int count_free(struct page *page)
2348{
2349 return page->objects - page->inuse;
2350}
2351
2352static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2353{
2354 return atomic_long_read(&n->total_objects);
2355}
2356#endif
2357
2358#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2359static unsigned long count_partial(struct kmem_cache_node *n,
2360 int (*get_count)(struct page *))
2361{
2362 unsigned long flags;
2363 unsigned long x = 0;
2364 struct page *page;
2365
2366 spin_lock_irqsave(&n->list_lock, flags);
2367 list_for_each_entry(page, &n->partial, lru)
2368 x += get_count(page);
2369 spin_unlock_irqrestore(&n->list_lock, flags);
2370 return x;
2371}
2372#endif
2373
2374static noinline void
2375slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2376{
2377#ifdef CONFIG_SLUB_DEBUG
2378 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2379 DEFAULT_RATELIMIT_BURST);
2380 int node;
2381 struct kmem_cache_node *n;
2382
2383 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2384 return;
2385
2386 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2387 nid, gfpflags, &gfpflags);
2388 pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n",
2389 s->name, s->object_size, s->size, oo_order(s->oo),
2390 oo_order(s->min));
2391
2392 if (oo_order(s->min) > get_order(s->object_size))
2393 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2394 s->name);
2395
2396 for_each_kmem_cache_node(s, node, n) {
2397 unsigned long nr_slabs;
2398 unsigned long nr_objs;
2399 unsigned long nr_free;
2400
2401 nr_free = count_partial(n, count_free);
2402 nr_slabs = node_nr_slabs(n);
2403 nr_objs = node_nr_objs(n);
2404
2405 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2406 node, nr_slabs, nr_objs, nr_free);
2407 }
2408#endif
2409}
2410
2411static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2412 int node, struct kmem_cache_cpu **pc)
2413{
2414 void *freelist;
2415 struct kmem_cache_cpu *c = *pc;
2416 struct page *page;
2417
2418 freelist = get_partial(s, flags, node, c);
2419
2420 if (freelist)
2421 return freelist;
2422
2423 page = new_slab(s, flags, node);
2424 if (page) {
2425 c = raw_cpu_ptr(s->cpu_slab);
2426 if (c->page)
2427 flush_slab(s, c);
2428
2429
2430
2431
2432
2433 freelist = page->freelist;
2434 page->freelist = NULL;
2435
2436 stat(s, ALLOC_SLAB);
2437 c->page = page;
2438 *pc = c;
2439 } else
2440 freelist = NULL;
2441
2442 return freelist;
2443}
2444
2445static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2446{
2447 if (unlikely(PageSlabPfmemalloc(page)))
2448 return gfp_pfmemalloc_allowed(gfpflags);
2449
2450 return true;
2451}
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2464{
2465 struct page new;
2466 unsigned long counters;
2467 void *freelist;
2468
2469 do {
2470 freelist = page->freelist;
2471 counters = page->counters;
2472
2473 new.counters = counters;
2474 VM_BUG_ON(!new.frozen);
2475
2476 new.inuse = page->objects;
2477 new.frozen = freelist != NULL;
2478
2479 } while (!__cmpxchg_double_slab(s, page,
2480 freelist, counters,
2481 NULL, new.counters,
2482 "get_freelist"));
2483
2484 return freelist;
2485}
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2507 unsigned long addr, struct kmem_cache_cpu *c)
2508{
2509 void *freelist;
2510 struct page *page;
2511
2512 page = c->page;
2513 if (!page)
2514 goto new_slab;
2515redo:
2516
2517 if (unlikely(!node_match(page, node))) {
2518 int searchnode = node;
2519
2520 if (node != NUMA_NO_NODE && !node_present_pages(node))
2521 searchnode = node_to_mem_node(node);
2522
2523 if (unlikely(!node_match(page, searchnode))) {
2524 stat(s, ALLOC_NODE_MISMATCH);
2525 deactivate_slab(s, page, c->freelist, c);
2526 goto new_slab;
2527 }
2528 }
2529
2530
2531
2532
2533
2534
2535 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2536 deactivate_slab(s, page, c->freelist, c);
2537 goto new_slab;
2538 }
2539
2540
2541 freelist = c->freelist;
2542 if (freelist)
2543 goto load_freelist;
2544
2545 freelist = get_freelist(s, page);
2546
2547 if (!freelist) {
2548 c->page = NULL;
2549 stat(s, DEACTIVATE_BYPASS);
2550 goto new_slab;
2551 }
2552
2553 stat(s, ALLOC_REFILL);
2554
2555load_freelist:
2556
2557
2558
2559
2560
2561 VM_BUG_ON(!c->page->frozen);
2562 c->freelist = get_freepointer(s, freelist);
2563 c->tid = next_tid(c->tid);
2564 return freelist;
2565
2566new_slab:
2567
2568 if (slub_percpu_partial(c)) {
2569 page = c->page = slub_percpu_partial(c);
2570 slub_set_percpu_partial(c, page);
2571 stat(s, CPU_PARTIAL_ALLOC);
2572 goto redo;
2573 }
2574
2575 freelist = new_slab_objects(s, gfpflags, node, &c);
2576
2577 if (unlikely(!freelist)) {
2578 slab_out_of_memory(s, gfpflags, node);
2579 return NULL;
2580 }
2581
2582 page = c->page;
2583 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2584 goto load_freelist;
2585
2586
2587 if (kmem_cache_debug(s) &&
2588 !alloc_debug_processing(s, page, freelist, addr))
2589 goto new_slab;
2590
2591 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2592 return freelist;
2593}
2594
2595
2596
2597
2598
2599static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2600 unsigned long addr, struct kmem_cache_cpu *c)
2601{
2602 void *p;
2603 unsigned long flags;
2604
2605 local_irq_save(flags);
2606#ifdef CONFIG_PREEMPT
2607
2608
2609
2610
2611
2612 c = this_cpu_ptr(s->cpu_slab);
2613#endif
2614
2615 p = ___slab_alloc(s, gfpflags, node, addr, c);
2616 local_irq_restore(flags);
2617 return p;
2618}
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2631 gfp_t gfpflags, int node, unsigned long addr)
2632{
2633 void *object;
2634 struct kmem_cache_cpu *c;
2635 struct page *page;
2636 unsigned long tid;
2637
2638 s = slab_pre_alloc_hook(s, gfpflags);
2639 if (!s)
2640 return NULL;
2641redo:
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652 do {
2653 tid = this_cpu_read(s->cpu_slab->tid);
2654 c = raw_cpu_ptr(s->cpu_slab);
2655 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2656 unlikely(tid != READ_ONCE(c->tid)));
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666 barrier();
2667
2668
2669
2670
2671
2672
2673
2674
2675 object = c->freelist;
2676 page = c->page;
2677 if (unlikely(!object || !node_match(page, node))) {
2678 object = __slab_alloc(s, gfpflags, node, addr, c);
2679 stat(s, ALLOC_SLOWPATH);
2680 } else {
2681 void *next_object = get_freepointer_safe(s, object);
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697 if (unlikely(!this_cpu_cmpxchg_double(
2698 s->cpu_slab->freelist, s->cpu_slab->tid,
2699 object, tid,
2700 next_object, next_tid(tid)))) {
2701
2702 note_cmpxchg_failure("slab_alloc", s, tid);
2703 goto redo;
2704 }
2705 prefetch_freepointer(s, next_object);
2706 stat(s, ALLOC_FASTPATH);
2707 }
2708
2709 if (unlikely(gfpflags & __GFP_ZERO) && object)
2710 memset(object, 0, s->object_size);
2711
2712 slab_post_alloc_hook(s, gfpflags, 1, &object);
2713
2714 return object;
2715}
2716
2717static __always_inline void *slab_alloc(struct kmem_cache *s,
2718 gfp_t gfpflags, unsigned long addr)
2719{
2720 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2721}
2722
2723void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2724{
2725 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2726
2727 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2728 s->size, gfpflags);
2729
2730 return ret;
2731}
2732EXPORT_SYMBOL(kmem_cache_alloc);
2733
2734#ifdef CONFIG_TRACING
2735void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2736{
2737 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2738 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2739 kasan_kmalloc(s, ret, size, gfpflags);
2740 return ret;
2741}
2742EXPORT_SYMBOL(kmem_cache_alloc_trace);
2743#endif
2744
2745#ifdef CONFIG_NUMA
2746void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2747{
2748 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2749
2750 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2751 s->object_size, s->size, gfpflags, node);
2752
2753 return ret;
2754}
2755EXPORT_SYMBOL(kmem_cache_alloc_node);
2756
2757#ifdef CONFIG_TRACING
2758void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2759 gfp_t gfpflags,
2760 int node, size_t size)
2761{
2762 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2763
2764 trace_kmalloc_node(_RET_IP_, ret,
2765 size, s->size, gfpflags, node);
2766
2767 kasan_kmalloc(s, ret, size, gfpflags);
2768 return ret;
2769}
2770EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2771#endif
2772#endif
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782static void __slab_free(struct kmem_cache *s, struct page *page,
2783 void *head, void *tail, int cnt,
2784 unsigned long addr)
2785
2786{
2787 void *prior;
2788 int was_frozen;
2789 struct page new;
2790 unsigned long counters;
2791 struct kmem_cache_node *n = NULL;
2792 unsigned long uninitialized_var(flags);
2793
2794 stat(s, FREE_SLOWPATH);
2795
2796 if (kmem_cache_debug(s) &&
2797 !free_debug_processing(s, page, head, tail, cnt, addr))
2798 return;
2799
2800 do {
2801 if (unlikely(n)) {
2802 spin_unlock_irqrestore(&n->list_lock, flags);
2803 n = NULL;
2804 }
2805 prior = page->freelist;
2806 counters = page->counters;
2807 set_freepointer(s, tail, prior);
2808 new.counters = counters;
2809 was_frozen = new.frozen;
2810 new.inuse -= cnt;
2811 if ((!new.inuse || !prior) && !was_frozen) {
2812
2813 if (kmem_cache_has_cpu_partial(s) && !prior) {
2814
2815
2816
2817
2818
2819
2820
2821 new.frozen = 1;
2822
2823 } else {
2824
2825 n = get_node(s, page_to_nid(page));
2826
2827
2828
2829
2830
2831
2832
2833
2834 spin_lock_irqsave(&n->list_lock, flags);
2835
2836 }
2837 }
2838
2839 } while (!cmpxchg_double_slab(s, page,
2840 prior, counters,
2841 head, new.counters,
2842 "__slab_free"));
2843
2844 if (likely(!n)) {
2845
2846
2847
2848
2849
2850 if (new.frozen && !was_frozen) {
2851 put_cpu_partial(s, page, 1);
2852 stat(s, CPU_PARTIAL_FREE);
2853 }
2854
2855
2856
2857
2858 if (was_frozen)
2859 stat(s, FREE_FROZEN);
2860 return;
2861 }
2862
2863 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2864 goto slab_empty;
2865
2866
2867
2868
2869
2870 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2871 if (kmem_cache_debug(s))
2872 remove_full(s, n, page);
2873 add_partial(n, page, DEACTIVATE_TO_TAIL);
2874 stat(s, FREE_ADD_PARTIAL);
2875 }
2876 spin_unlock_irqrestore(&n->list_lock, flags);
2877 return;
2878
2879slab_empty:
2880 if (prior) {
2881
2882
2883
2884 remove_partial(n, page);
2885 stat(s, FREE_REMOVE_PARTIAL);
2886 } else {
2887
2888 remove_full(s, n, page);
2889 }
2890
2891 spin_unlock_irqrestore(&n->list_lock, flags);
2892 stat(s, FREE_SLAB);
2893 discard_slab(s, page);
2894}
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911static __always_inline void do_slab_free(struct kmem_cache *s,
2912 struct page *page, void *head, void *tail,
2913 int cnt, unsigned long addr)
2914{
2915 void *tail_obj = tail ? : head;
2916 struct kmem_cache_cpu *c;
2917 unsigned long tid;
2918redo:
2919
2920
2921
2922
2923
2924
2925 do {
2926 tid = this_cpu_read(s->cpu_slab->tid);
2927 c = raw_cpu_ptr(s->cpu_slab);
2928 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2929 unlikely(tid != READ_ONCE(c->tid)));
2930
2931
2932 barrier();
2933
2934 if (likely(page == c->page)) {
2935 set_freepointer(s, tail_obj, c->freelist);
2936
2937 if (unlikely(!this_cpu_cmpxchg_double(
2938 s->cpu_slab->freelist, s->cpu_slab->tid,
2939 c->freelist, tid,
2940 head, next_tid(tid)))) {
2941
2942 note_cmpxchg_failure("slab_free", s, tid);
2943 goto redo;
2944 }
2945 stat(s, FREE_FASTPATH);
2946 } else
2947 __slab_free(s, page, head, tail_obj, cnt, addr);
2948
2949}
2950
2951static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
2952 void *head, void *tail, int cnt,
2953 unsigned long addr)
2954{
2955 slab_free_freelist_hook(s, head, tail);
2956
2957
2958
2959
2960 if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
2961 return;
2962 do_slab_free(s, page, head, tail, cnt, addr);
2963}
2964
2965#ifdef CONFIG_KASAN
2966void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
2967{
2968 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
2969}
2970#endif
2971
2972void kmem_cache_free(struct kmem_cache *s, void *x)
2973{
2974 s = cache_from_obj(s, x);
2975 if (!s)
2976 return;
2977 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
2978 trace_kmem_cache_free(_RET_IP_, x);
2979}
2980EXPORT_SYMBOL(kmem_cache_free);
2981
2982struct detached_freelist {
2983 struct page *page;
2984 void *tail;
2985 void *freelist;
2986 int cnt;
2987 struct kmem_cache *s;
2988};
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002static inline
3003int build_detached_freelist(struct kmem_cache *s, size_t size,
3004 void **p, struct detached_freelist *df)
3005{
3006 size_t first_skipped_index = 0;
3007 int lookahead = 3;
3008 void *object;
3009 struct page *page;
3010
3011
3012 df->page = NULL;
3013
3014 do {
3015 object = p[--size];
3016
3017 } while (!object && size);
3018
3019 if (!object)
3020 return 0;
3021
3022 page = virt_to_head_page(object);
3023 if (!s) {
3024
3025 if (unlikely(!PageSlab(page))) {
3026 BUG_ON(!PageCompound(page));
3027 kfree_hook(object);
3028 __free_pages(page, compound_order(page));
3029 p[size] = NULL;
3030 return size;
3031 }
3032
3033 df->s = page->slab_cache;
3034 } else {
3035 df->s = cache_from_obj(s, object);
3036 }
3037
3038
3039 df->page = page;
3040 set_freepointer(df->s, object, NULL);
3041 df->tail = object;
3042 df->freelist = object;
3043 p[size] = NULL;
3044 df->cnt = 1;
3045
3046 while (size) {
3047 object = p[--size];
3048 if (!object)
3049 continue;
3050
3051
3052 if (df->page == virt_to_head_page(object)) {
3053
3054 set_freepointer(df->s, object, df->freelist);
3055 df->freelist = object;
3056 df->cnt++;
3057 p[size] = NULL;
3058
3059 continue;
3060 }
3061
3062
3063 if (!--lookahead)
3064 break;
3065
3066 if (!first_skipped_index)
3067 first_skipped_index = size + 1;
3068 }
3069
3070 return first_skipped_index;
3071}
3072
3073
3074void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3075{
3076 if (WARN_ON(!size))
3077 return;
3078
3079 do {
3080 struct detached_freelist df;
3081
3082 size = build_detached_freelist(s, size, p, &df);
3083 if (!df.page)
3084 continue;
3085
3086 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3087 } while (likely(size));
3088}
3089EXPORT_SYMBOL(kmem_cache_free_bulk);
3090
3091
3092int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3093 void **p)
3094{
3095 struct kmem_cache_cpu *c;
3096 int i;
3097
3098
3099 s = slab_pre_alloc_hook(s, flags);
3100 if (unlikely(!s))
3101 return false;
3102
3103
3104
3105
3106
3107 local_irq_disable();
3108 c = this_cpu_ptr(s->cpu_slab);
3109
3110 for (i = 0; i < size; i++) {
3111 void *object = c->freelist;
3112
3113 if (unlikely(!object)) {
3114
3115
3116
3117
3118 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3119 _RET_IP_, c);
3120 if (unlikely(!p[i]))
3121 goto error;
3122
3123 c = this_cpu_ptr(s->cpu_slab);
3124 continue;
3125 }
3126 c->freelist = get_freepointer(s, object);
3127 p[i] = object;
3128 }
3129 c->tid = next_tid(c->tid);
3130 local_irq_enable();
3131
3132
3133 if (unlikely(flags & __GFP_ZERO)) {
3134 int j;
3135
3136 for (j = 0; j < i; j++)
3137 memset(p[j], 0, s->object_size);
3138 }
3139
3140
3141 slab_post_alloc_hook(s, flags, size, p);
3142 return i;
3143error:
3144 local_irq_enable();
3145 slab_post_alloc_hook(s, flags, i, p);
3146 __kmem_cache_free_bulk(s, i, p);
3147 return 0;
3148}
3149EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171static int slub_min_order;
3172static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3173static int slub_min_objects;
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200static inline int slab_order(int size, int min_objects,
3201 int max_order, int fract_leftover, int reserved)
3202{
3203 int order;
3204 int rem;
3205 int min_order = slub_min_order;
3206
3207 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
3208 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3209
3210 for (order = max(min_order, get_order(min_objects * size + reserved));
3211 order <= max_order; order++) {
3212
3213 unsigned long slab_size = PAGE_SIZE << order;
3214
3215 rem = (slab_size - reserved) % size;
3216
3217 if (rem <= slab_size / fract_leftover)
3218 break;
3219 }
3220
3221 return order;
3222}
3223
3224static inline int calculate_order(int size, int reserved)
3225{
3226 int order;
3227 int min_objects;
3228 int fraction;
3229 int max_objects;
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239 min_objects = slub_min_objects;
3240 if (!min_objects)
3241 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3242 max_objects = order_objects(slub_max_order, size, reserved);
3243 min_objects = min(min_objects, max_objects);
3244
3245 while (min_objects > 1) {
3246 fraction = 16;
3247 while (fraction >= 4) {
3248 order = slab_order(size, min_objects,
3249 slub_max_order, fraction, reserved);
3250 if (order <= slub_max_order)
3251 return order;
3252 fraction /= 2;
3253 }
3254 min_objects--;
3255 }
3256
3257
3258
3259
3260
3261 order = slab_order(size, 1, slub_max_order, 1, reserved);
3262 if (order <= slub_max_order)
3263 return order;
3264
3265
3266
3267
3268 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
3269 if (order < MAX_ORDER)
3270 return order;
3271 return -ENOSYS;
3272}
3273
3274static void
3275init_kmem_cache_node(struct kmem_cache_node *n)
3276{
3277 n->nr_partial = 0;
3278 spin_lock_init(&n->list_lock);
3279 INIT_LIST_HEAD(&n->partial);
3280#ifdef CONFIG_SLUB_DEBUG
3281 atomic_long_set(&n->nr_slabs, 0);
3282 atomic_long_set(&n->total_objects, 0);
3283 INIT_LIST_HEAD(&n->full);
3284#endif
3285}
3286
3287static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3288{
3289 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3290 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3291
3292
3293
3294
3295
3296 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3297 2 * sizeof(void *));
3298
3299 if (!s->cpu_slab)
3300 return 0;
3301
3302 init_kmem_cache_cpus(s);
3303
3304 return 1;
3305}
3306
3307static struct kmem_cache *kmem_cache_node;
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318static void early_kmem_cache_node_alloc(int node)
3319{
3320 struct page *page;
3321 struct kmem_cache_node *n;
3322
3323 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3324
3325 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3326
3327 BUG_ON(!page);
3328 if (page_to_nid(page) != node) {
3329 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3330 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3331 }
3332
3333 n = page->freelist;
3334 BUG_ON(!n);
3335 page->freelist = get_freepointer(kmem_cache_node, n);
3336 page->inuse = 1;
3337 page->frozen = 0;
3338 kmem_cache_node->node[node] = n;
3339#ifdef CONFIG_SLUB_DEBUG
3340 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3341 init_tracking(kmem_cache_node, n);
3342#endif
3343 kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3344 GFP_KERNEL);
3345 init_kmem_cache_node(n);
3346 inc_slabs_node(kmem_cache_node, node, page->objects);
3347
3348
3349
3350
3351
3352 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3353}
3354
3355static void free_kmem_cache_nodes(struct kmem_cache *s)
3356{
3357 int node;
3358 struct kmem_cache_node *n;
3359
3360 for_each_kmem_cache_node(s, node, n) {
3361 kmem_cache_free(kmem_cache_node, n);
3362 s->node[node] = NULL;
3363 }
3364}
3365
3366void __kmem_cache_release(struct kmem_cache *s)
3367{
3368 cache_random_seq_destroy(s);
3369 free_percpu(s->cpu_slab);
3370 free_kmem_cache_nodes(s);
3371}
3372
3373static int init_kmem_cache_nodes(struct kmem_cache *s)
3374{
3375 int node;
3376
3377 for_each_node_state(node, N_NORMAL_MEMORY) {
3378 struct kmem_cache_node *n;
3379
3380 if (slab_state == DOWN) {
3381 early_kmem_cache_node_alloc(node);
3382 continue;
3383 }
3384 n = kmem_cache_alloc_node(kmem_cache_node,
3385 GFP_KERNEL, node);
3386
3387 if (!n) {
3388 free_kmem_cache_nodes(s);
3389 return 0;
3390 }
3391
3392 s->node[node] = n;
3393 init_kmem_cache_node(n);
3394 }
3395 return 1;
3396}
3397
3398static void set_min_partial(struct kmem_cache *s, unsigned long min)
3399{
3400 if (min < MIN_PARTIAL)
3401 min = MIN_PARTIAL;
3402 else if (min > MAX_PARTIAL)
3403 min = MAX_PARTIAL;
3404 s->min_partial = min;
3405}
3406
3407static void set_cpu_partial(struct kmem_cache *s)
3408{
3409#ifdef CONFIG_SLUB_CPU_PARTIAL
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427 if (!kmem_cache_has_cpu_partial(s))
3428 s->cpu_partial = 0;
3429 else if (s->size >= PAGE_SIZE)
3430 s->cpu_partial = 2;
3431 else if (s->size >= 1024)
3432 s->cpu_partial = 6;
3433 else if (s->size >= 256)
3434 s->cpu_partial = 13;
3435 else
3436 s->cpu_partial = 30;
3437#endif
3438}
3439
3440
3441
3442
3443
3444static int calculate_sizes(struct kmem_cache *s, int forced_order)
3445{
3446 unsigned long flags = s->flags;
3447 size_t size = s->object_size;
3448 int order;
3449
3450
3451
3452
3453
3454
3455 size = ALIGN(size, sizeof(void *));
3456
3457#ifdef CONFIG_SLUB_DEBUG
3458
3459
3460
3461
3462
3463 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3464 !s->ctor)
3465 s->flags |= __OBJECT_POISON;
3466 else
3467 s->flags &= ~__OBJECT_POISON;
3468
3469
3470
3471
3472
3473
3474
3475 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3476 size += sizeof(void *);
3477#endif
3478
3479
3480
3481
3482
3483 s->inuse = size;
3484
3485 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3486 s->ctor)) {
3487
3488
3489
3490
3491
3492
3493
3494
3495 s->offset = size;
3496 size += sizeof(void *);
3497 }
3498
3499#ifdef CONFIG_SLUB_DEBUG
3500 if (flags & SLAB_STORE_USER)
3501
3502
3503
3504
3505 size += 2 * sizeof(struct track);
3506#endif
3507
3508 kasan_cache_create(s, &size, &s->flags);
3509#ifdef CONFIG_SLUB_DEBUG
3510 if (flags & SLAB_RED_ZONE) {
3511
3512
3513
3514
3515
3516
3517
3518 size += sizeof(void *);
3519
3520 s->red_left_pad = sizeof(void *);
3521 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3522 size += s->red_left_pad;
3523 }
3524#endif
3525
3526
3527
3528
3529
3530
3531 size = ALIGN(size, s->align);
3532 s->size = size;
3533 if (forced_order >= 0)
3534 order = forced_order;
3535 else
3536 order = calculate_order(size, s->reserved);
3537
3538 if (order < 0)
3539 return 0;
3540
3541 s->allocflags = 0;
3542 if (order)
3543 s->allocflags |= __GFP_COMP;
3544
3545 if (s->flags & SLAB_CACHE_DMA)
3546 s->allocflags |= GFP_DMA;
3547
3548 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3549 s->allocflags |= __GFP_RECLAIMABLE;
3550
3551
3552
3553
3554 s->oo = oo_make(order, size, s->reserved);
3555 s->min = oo_make(get_order(size), size, s->reserved);
3556 if (oo_objects(s->oo) > oo_objects(s->max))
3557 s->max = s->oo;
3558
3559 return !!oo_objects(s->oo);
3560}
3561
3562static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3563{
3564 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3565 s->reserved = 0;
3566
3567 if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
3568 s->reserved = sizeof(struct rcu_head);
3569
3570 if (!calculate_sizes(s, -1))
3571 goto error;
3572 if (disable_higher_order_debug) {
3573
3574
3575
3576
3577 if (get_order(s->size) > get_order(s->object_size)) {
3578 s->flags &= ~DEBUG_METADATA_FLAGS;
3579 s->offset = 0;
3580 if (!calculate_sizes(s, -1))
3581 goto error;
3582 }
3583 }
3584
3585#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3586 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3587 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3588
3589 s->flags |= __CMPXCHG_DOUBLE;
3590#endif
3591
3592
3593
3594
3595
3596 set_min_partial(s, ilog2(s->size) / 2);
3597
3598 set_cpu_partial(s);
3599
3600#ifdef CONFIG_NUMA
3601 s->remote_node_defrag_ratio = 1000;
3602#endif
3603
3604
3605 if (slab_state >= UP) {
3606 if (init_cache_random_seq(s))
3607 goto error;
3608 }
3609
3610 if (!init_kmem_cache_nodes(s))
3611 goto error;
3612
3613 if (alloc_kmem_cache_cpus(s))
3614 return 0;
3615
3616 free_kmem_cache_nodes(s);
3617error:
3618 if (flags & SLAB_PANIC)
3619 panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n",
3620 s->name, (unsigned long)s->size, s->size,
3621 oo_order(s->oo), s->offset, flags);
3622 return -EINVAL;
3623}
3624
3625static void list_slab_objects(struct kmem_cache *s, struct page *page,
3626 const char *text)
3627{
3628#ifdef CONFIG_SLUB_DEBUG
3629 void *addr = page_address(page);
3630 void *p;
3631 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3632 sizeof(long), GFP_ATOMIC);
3633 if (!map)
3634 return;
3635 slab_err(s, page, text, s->name);
3636 slab_lock(page);
3637
3638 get_map(s, page, map);
3639 for_each_object(p, s, addr, page->objects) {
3640
3641 if (!test_bit(slab_index(p, s, addr), map)) {
3642 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3643 print_tracking(s, p);
3644 }
3645 }
3646 slab_unlock(page);
3647 kfree(map);
3648#endif
3649}
3650
3651
3652
3653
3654
3655
3656static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3657{
3658 LIST_HEAD(discard);
3659 struct page *page, *h;
3660
3661 BUG_ON(irqs_disabled());
3662 spin_lock_irq(&n->list_lock);
3663 list_for_each_entry_safe(page, h, &n->partial, lru) {
3664 if (!page->inuse) {
3665 remove_partial(n, page);
3666 list_add(&page->lru, &discard);
3667 } else {
3668 list_slab_objects(s, page,
3669 "Objects remaining in %s on __kmem_cache_shutdown()");
3670 }
3671 }
3672 spin_unlock_irq(&n->list_lock);
3673
3674 list_for_each_entry_safe(page, h, &discard, lru)
3675 discard_slab(s, page);
3676}
3677
3678
3679
3680
3681int __kmem_cache_shutdown(struct kmem_cache *s)
3682{
3683 int node;
3684 struct kmem_cache_node *n;
3685
3686 flush_all(s);
3687
3688 for_each_kmem_cache_node(s, node, n) {
3689 free_partial(s, n);
3690 if (n->nr_partial || slabs_node(s, node))
3691 return 1;
3692 }
3693 sysfs_slab_remove(s);
3694 return 0;
3695}
3696
3697
3698
3699
3700
3701static int __init setup_slub_min_order(char *str)
3702{
3703 get_option(&str, &slub_min_order);
3704
3705 return 1;
3706}
3707
3708__setup("slub_min_order=", setup_slub_min_order);
3709
3710static int __init setup_slub_max_order(char *str)
3711{
3712 get_option(&str, &slub_max_order);
3713 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3714
3715 return 1;
3716}
3717
3718__setup("slub_max_order=", setup_slub_max_order);
3719
3720static int __init setup_slub_min_objects(char *str)
3721{
3722 get_option(&str, &slub_min_objects);
3723
3724 return 1;
3725}
3726
3727__setup("slub_min_objects=", setup_slub_min_objects);
3728
3729void *__kmalloc(size_t size, gfp_t flags)
3730{
3731 struct kmem_cache *s;
3732 void *ret;
3733
3734 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3735 return kmalloc_large(size, flags);
3736
3737 s = kmalloc_slab(size, flags);
3738
3739 if (unlikely(ZERO_OR_NULL_PTR(s)))
3740 return s;
3741
3742 ret = slab_alloc(s, flags, _RET_IP_);
3743
3744 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3745
3746 kasan_kmalloc(s, ret, size, flags);
3747
3748 return ret;
3749}
3750EXPORT_SYMBOL(__kmalloc);
3751
3752#ifdef CONFIG_NUMA
3753static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3754{
3755 struct page *page;
3756 void *ptr = NULL;
3757
3758 flags |= __GFP_COMP | __GFP_NOTRACK;
3759 page = alloc_pages_node(node, flags, get_order(size));
3760 if (page)
3761 ptr = page_address(page);
3762
3763 kmalloc_large_node_hook(ptr, size, flags);
3764 return ptr;
3765}
3766
3767void *__kmalloc_node(size_t size, gfp_t flags, int node)
3768{
3769 struct kmem_cache *s;
3770 void *ret;
3771
3772 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3773 ret = kmalloc_large_node(size, flags, node);
3774
3775 trace_kmalloc_node(_RET_IP_, ret,
3776 size, PAGE_SIZE << get_order(size),
3777 flags, node);
3778
3779 return ret;
3780 }
3781
3782 s = kmalloc_slab(size, flags);
3783
3784 if (unlikely(ZERO_OR_NULL_PTR(s)))
3785 return s;
3786
3787 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3788
3789 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3790
3791 kasan_kmalloc(s, ret, size, flags);
3792
3793 return ret;
3794}
3795EXPORT_SYMBOL(__kmalloc_node);
3796#endif
3797
3798#ifdef CONFIG_HARDENED_USERCOPY
3799
3800
3801
3802
3803
3804
3805const char *__check_heap_object(const void *ptr, unsigned long n,
3806 struct page *page)
3807{
3808 struct kmem_cache *s;
3809 unsigned long offset;
3810 size_t object_size;
3811
3812
3813 s = page->slab_cache;
3814 object_size = slab_ksize(s);
3815
3816
3817 if (ptr < page_address(page))
3818 return s->name;
3819
3820
3821 offset = (ptr - page_address(page)) % s->size;
3822
3823
3824 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3825 if (offset < s->red_left_pad)
3826 return s->name;
3827 offset -= s->red_left_pad;
3828 }
3829
3830
3831 if (offset <= object_size && n <= object_size - offset)
3832 return NULL;
3833
3834 return s->name;
3835}
3836#endif
3837
3838static size_t __ksize(const void *object)
3839{
3840 struct page *page;
3841
3842 if (unlikely(object == ZERO_SIZE_PTR))
3843 return 0;
3844
3845 page = virt_to_head_page(object);
3846
3847 if (unlikely(!PageSlab(page))) {
3848 WARN_ON(!PageCompound(page));
3849 return PAGE_SIZE << compound_order(page);
3850 }
3851
3852 return slab_ksize(page->slab_cache);
3853}
3854
3855size_t ksize(const void *object)
3856{
3857 size_t size = __ksize(object);
3858
3859
3860
3861 kasan_unpoison_shadow(object, size);
3862 return size;
3863}
3864EXPORT_SYMBOL(ksize);
3865
3866void kfree(const void *x)
3867{
3868 struct page *page;
3869 void *object = (void *)x;
3870
3871 trace_kfree(_RET_IP_, x);
3872
3873 if (unlikely(ZERO_OR_NULL_PTR(x)))
3874 return;
3875
3876 page = virt_to_head_page(x);
3877 if (unlikely(!PageSlab(page))) {
3878 BUG_ON(!PageCompound(page));
3879 kfree_hook(x);
3880 __free_pages(page, compound_order(page));
3881 return;
3882 }
3883 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3884}
3885EXPORT_SYMBOL(kfree);
3886
3887#define SHRINK_PROMOTE_MAX 32
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898int __kmem_cache_shrink(struct kmem_cache *s)
3899{
3900 int node;
3901 int i;
3902 struct kmem_cache_node *n;
3903 struct page *page;
3904 struct page *t;
3905 struct list_head discard;
3906 struct list_head promote[SHRINK_PROMOTE_MAX];
3907 unsigned long flags;
3908 int ret = 0;
3909
3910 flush_all(s);
3911 for_each_kmem_cache_node(s, node, n) {
3912 INIT_LIST_HEAD(&discard);
3913 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
3914 INIT_LIST_HEAD(promote + i);
3915
3916 spin_lock_irqsave(&n->list_lock, flags);
3917
3918
3919
3920
3921
3922
3923
3924 list_for_each_entry_safe(page, t, &n->partial, lru) {
3925 int free = page->objects - page->inuse;
3926
3927
3928 barrier();
3929
3930
3931 BUG_ON(free <= 0);
3932
3933 if (free == page->objects) {
3934 list_move(&page->lru, &discard);
3935 n->nr_partial--;
3936 } else if (free <= SHRINK_PROMOTE_MAX)
3937 list_move(&page->lru, promote + free - 1);
3938 }
3939
3940
3941
3942
3943
3944 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
3945 list_splice(promote + i, &n->partial);
3946
3947 spin_unlock_irqrestore(&n->list_lock, flags);
3948
3949
3950 list_for_each_entry_safe(page, t, &discard, lru)
3951 discard_slab(s, page);
3952
3953 if (slabs_node(s, node))
3954 ret = 1;
3955 }
3956
3957 return ret;
3958}
3959
3960#ifdef CONFIG_MEMCG
3961static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
3962{
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975 if (!__kmem_cache_shrink(s))
3976 sysfs_slab_remove(s);
3977}
3978
3979void __kmemcg_cache_deactivate(struct kmem_cache *s)
3980{
3981
3982
3983
3984
3985 slub_set_cpu_partial(s, 0);
3986 s->min_partial = 0;
3987
3988
3989
3990
3991
3992 slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
3993}
3994#endif
3995
3996static int slab_mem_going_offline_callback(void *arg)
3997{
3998 struct kmem_cache *s;
3999
4000 mutex_lock(&slab_mutex);
4001 list_for_each_entry(s, &slab_caches, list)
4002 __kmem_cache_shrink(s);
4003 mutex_unlock(&slab_mutex);
4004
4005 return 0;
4006}
4007
4008static void slab_mem_offline_callback(void *arg)
4009{
4010 struct kmem_cache_node *n;
4011 struct kmem_cache *s;
4012 struct memory_notify *marg = arg;
4013 int offline_node;
4014
4015 offline_node = marg->status_change_nid_normal;
4016
4017
4018
4019
4020
4021 if (offline_node < 0)
4022 return;
4023
4024 mutex_lock(&slab_mutex);
4025 list_for_each_entry(s, &slab_caches, list) {
4026 n = get_node(s, offline_node);
4027 if (n) {
4028
4029
4030
4031
4032
4033
4034 BUG_ON(slabs_node(s, offline_node));
4035
4036 s->node[offline_node] = NULL;
4037 kmem_cache_free(kmem_cache_node, n);
4038 }
4039 }
4040 mutex_unlock(&slab_mutex);
4041}
4042
4043static int slab_mem_going_online_callback(void *arg)
4044{
4045 struct kmem_cache_node *n;
4046 struct kmem_cache *s;
4047 struct memory_notify *marg = arg;
4048 int nid = marg->status_change_nid_normal;
4049 int ret = 0;
4050
4051
4052
4053
4054
4055 if (nid < 0)
4056 return 0;
4057
4058
4059
4060
4061
4062
4063 mutex_lock(&slab_mutex);
4064 list_for_each_entry(s, &slab_caches, list) {
4065
4066
4067
4068
4069
4070 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4071 if (!n) {
4072 ret = -ENOMEM;
4073 goto out;
4074 }
4075 init_kmem_cache_node(n);
4076 s->node[nid] = n;
4077 }
4078out:
4079 mutex_unlock(&slab_mutex);
4080 return ret;
4081}
4082
4083static int slab_memory_callback(struct notifier_block *self,
4084 unsigned long action, void *arg)
4085{
4086 int ret = 0;
4087
4088 switch (action) {
4089 case MEM_GOING_ONLINE:
4090 ret = slab_mem_going_online_callback(arg);
4091 break;
4092 case MEM_GOING_OFFLINE:
4093 ret = slab_mem_going_offline_callback(arg);
4094 break;
4095 case MEM_OFFLINE:
4096 case MEM_CANCEL_ONLINE:
4097 slab_mem_offline_callback(arg);
4098 break;
4099 case MEM_ONLINE:
4100 case MEM_CANCEL_OFFLINE:
4101 break;
4102 }
4103 if (ret)
4104 ret = notifier_from_errno(ret);
4105 else
4106 ret = NOTIFY_OK;
4107 return ret;
4108}
4109
4110static struct notifier_block slab_memory_callback_nb = {
4111 .notifier_call = slab_memory_callback,
4112 .priority = SLAB_CALLBACK_PRI,
4113};
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4126{
4127 int node;
4128 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4129 struct kmem_cache_node *n;
4130
4131 memcpy(s, static_cache, kmem_cache->object_size);
4132
4133
4134
4135
4136
4137
4138 __flush_cpu_slab(s, smp_processor_id());
4139 for_each_kmem_cache_node(s, node, n) {
4140 struct page *p;
4141
4142 list_for_each_entry(p, &n->partial, lru)
4143 p->slab_cache = s;
4144
4145#ifdef CONFIG_SLUB_DEBUG
4146 list_for_each_entry(p, &n->full, lru)
4147 p->slab_cache = s;
4148#endif
4149 }
4150 slab_init_memcg_params(s);
4151 list_add(&s->list, &slab_caches);
4152 memcg_link_cache(s);
4153 return s;
4154}
4155
4156void __init kmem_cache_init(void)
4157{
4158 static __initdata struct kmem_cache boot_kmem_cache,
4159 boot_kmem_cache_node;
4160
4161 if (debug_guardpage_minorder())
4162 slub_max_order = 0;
4163
4164 kmem_cache_node = &boot_kmem_cache_node;
4165 kmem_cache = &boot_kmem_cache;
4166
4167 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4168 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
4169
4170 register_hotmemory_notifier(&slab_memory_callback_nb);
4171
4172
4173 slab_state = PARTIAL;
4174
4175 create_boot_cache(kmem_cache, "kmem_cache",
4176 offsetof(struct kmem_cache, node) +
4177 nr_node_ids * sizeof(struct kmem_cache_node *),
4178 SLAB_HWCACHE_ALIGN);
4179
4180 kmem_cache = bootstrap(&boot_kmem_cache);
4181
4182
4183
4184
4185
4186
4187 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4188
4189
4190 setup_kmalloc_cache_index_table();
4191 create_kmalloc_caches(0);
4192
4193
4194 init_freelist_randomization();
4195
4196 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4197 slub_cpu_dead);
4198
4199 pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
4200 cache_line_size(),
4201 slub_min_order, slub_max_order, slub_min_objects,
4202 nr_cpu_ids, nr_node_ids);
4203}
4204
4205void __init kmem_cache_init_late(void)
4206{
4207}
4208
4209struct kmem_cache *
4210__kmem_cache_alias(const char *name, size_t size, size_t align,
4211 unsigned long flags, void (*ctor)(void *))
4212{
4213 struct kmem_cache *s, *c;
4214
4215 s = find_mergeable(size, align, flags, name, ctor);
4216 if (s) {
4217 s->refcount++;
4218
4219
4220
4221
4222
4223 s->object_size = max(s->object_size, (int)size);
4224 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
4225
4226 for_each_memcg_cache(c, s) {
4227 c->object_size = s->object_size;
4228 c->inuse = max_t(int, c->inuse,
4229 ALIGN(size, sizeof(void *)));
4230 }
4231
4232 if (sysfs_slab_alias(s, name)) {
4233 s->refcount--;
4234 s = NULL;
4235 }
4236 }
4237
4238 return s;
4239}
4240
4241int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
4242{
4243 int err;
4244
4245 err = kmem_cache_open(s, flags);
4246 if (err)
4247 return err;
4248
4249
4250 if (slab_state <= UP)
4251 return 0;
4252
4253 memcg_propagate_slab_attrs(s);
4254 err = sysfs_slab_add(s);
4255 if (err)
4256 __kmem_cache_release(s);
4257
4258 return err;
4259}
4260
4261void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4262{
4263 struct kmem_cache *s;
4264 void *ret;
4265
4266 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4267 return kmalloc_large(size, gfpflags);
4268
4269 s = kmalloc_slab(size, gfpflags);
4270
4271 if (unlikely(ZERO_OR_NULL_PTR(s)))
4272 return s;
4273
4274 ret = slab_alloc(s, gfpflags, caller);
4275
4276
4277 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4278
4279 return ret;
4280}
4281
4282#ifdef CONFIG_NUMA
4283void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4284 int node, unsigned long caller)
4285{
4286 struct kmem_cache *s;
4287 void *ret;
4288
4289 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4290 ret = kmalloc_large_node(size, gfpflags, node);
4291
4292 trace_kmalloc_node(caller, ret,
4293 size, PAGE_SIZE << get_order(size),
4294 gfpflags, node);
4295
4296 return ret;
4297 }
4298
4299 s = kmalloc_slab(size, gfpflags);
4300
4301 if (unlikely(ZERO_OR_NULL_PTR(s)))
4302 return s;
4303
4304 ret = slab_alloc_node(s, gfpflags, node, caller);
4305
4306
4307 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4308
4309 return ret;
4310}
4311#endif
4312
4313#ifdef CONFIG_SYSFS
4314static int count_inuse(struct page *page)
4315{
4316 return page->inuse;
4317}
4318
4319static int count_total(struct page *page)
4320{
4321 return page->objects;
4322}
4323#endif
4324
4325#ifdef CONFIG_SLUB_DEBUG
4326static int validate_slab(struct kmem_cache *s, struct page *page,
4327 unsigned long *map)
4328{
4329 void *p;
4330 void *addr = page_address(page);
4331
4332 if (!check_slab(s, page) ||
4333 !on_freelist(s, page, NULL))
4334 return 0;
4335
4336
4337 bitmap_zero(map, page->objects);
4338
4339 get_map(s, page, map);
4340 for_each_object(p, s, addr, page->objects) {
4341 if (test_bit(slab_index(p, s, addr), map))
4342 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4343 return 0;
4344 }
4345
4346 for_each_object(p, s, addr, page->objects)
4347 if (!test_bit(slab_index(p, s, addr), map))
4348 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4349 return 0;
4350 return 1;
4351}
4352
4353static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4354 unsigned long *map)
4355{
4356 slab_lock(page);
4357 validate_slab(s, page, map);
4358 slab_unlock(page);
4359}
4360
4361static int validate_slab_node(struct kmem_cache *s,
4362 struct kmem_cache_node *n, unsigned long *map)
4363{
4364 unsigned long count = 0;
4365 struct page *page;
4366 unsigned long flags;
4367
4368 spin_lock_irqsave(&n->list_lock, flags);
4369
4370 list_for_each_entry(page, &n->partial, lru) {
4371 validate_slab_slab(s, page, map);
4372 count++;
4373 }
4374 if (count != n->nr_partial)
4375 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4376 s->name, count, n->nr_partial);
4377
4378 if (!(s->flags & SLAB_STORE_USER))
4379 goto out;
4380
4381 list_for_each_entry(page, &n->full, lru) {
4382 validate_slab_slab(s, page, map);
4383 count++;
4384 }
4385 if (count != atomic_long_read(&n->nr_slabs))
4386 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4387 s->name, count, atomic_long_read(&n->nr_slabs));
4388
4389out:
4390 spin_unlock_irqrestore(&n->list_lock, flags);
4391 return count;
4392}
4393
4394static long validate_slab_cache(struct kmem_cache *s)
4395{
4396 int node;
4397 unsigned long count = 0;
4398 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4399 sizeof(unsigned long), GFP_KERNEL);
4400 struct kmem_cache_node *n;
4401
4402 if (!map)
4403 return -ENOMEM;
4404
4405 flush_all(s);
4406 for_each_kmem_cache_node(s, node, n)
4407 count += validate_slab_node(s, n, map);
4408 kfree(map);
4409 return count;
4410}
4411
4412
4413
4414
4415
4416struct location {
4417 unsigned long count;
4418 unsigned long addr;
4419 long long sum_time;
4420 long min_time;
4421 long max_time;
4422 long min_pid;
4423 long max_pid;
4424 DECLARE_BITMAP(cpus, NR_CPUS);
4425 nodemask_t nodes;
4426};
4427
4428struct loc_track {
4429 unsigned long max;
4430 unsigned long count;
4431 struct location *loc;
4432};
4433
4434static void free_loc_track(struct loc_track *t)
4435{
4436 if (t->max)
4437 free_pages((unsigned long)t->loc,
4438 get_order(sizeof(struct location) * t->max));
4439}
4440
4441static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4442{
4443 struct location *l;
4444 int order;
4445
4446 order = get_order(sizeof(struct location) * max);
4447
4448 l = (void *)__get_free_pages(flags, order);
4449 if (!l)
4450 return 0;
4451
4452 if (t->count) {
4453 memcpy(l, t->loc, sizeof(struct location) * t->count);
4454 free_loc_track(t);
4455 }
4456 t->max = max;
4457 t->loc = l;
4458 return 1;
4459}
4460
4461static int add_location(struct loc_track *t, struct kmem_cache *s,
4462 const struct track *track)
4463{
4464 long start, end, pos;
4465 struct location *l;
4466 unsigned long caddr;
4467 unsigned long age = jiffies - track->when;
4468
4469 start = -1;
4470 end = t->count;
4471
4472 for ( ; ; ) {
4473 pos = start + (end - start + 1) / 2;
4474
4475
4476
4477
4478
4479 if (pos == end)
4480 break;
4481
4482 caddr = t->loc[pos].addr;
4483 if (track->addr == caddr) {
4484
4485 l = &t->loc[pos];
4486 l->count++;
4487 if (track->when) {
4488 l->sum_time += age;
4489 if (age < l->min_time)
4490 l->min_time = age;
4491 if (age > l->max_time)
4492 l->max_time = age;
4493
4494 if (track->pid < l->min_pid)
4495 l->min_pid = track->pid;
4496 if (track->pid > l->max_pid)
4497 l->max_pid = track->pid;
4498
4499 cpumask_set_cpu(track->cpu,
4500 to_cpumask(l->cpus));
4501 }
4502 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4503 return 1;
4504 }
4505
4506 if (track->addr < caddr)
4507 end = pos;
4508 else
4509 start = pos;
4510 }
4511
4512
4513
4514
4515 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4516 return 0;
4517
4518 l = t->loc + pos;
4519 if (pos < t->count)
4520 memmove(l + 1, l,
4521 (t->count - pos) * sizeof(struct location));
4522 t->count++;
4523 l->count = 1;
4524 l->addr = track->addr;
4525 l->sum_time = age;
4526 l->min_time = age;
4527 l->max_time = age;
4528 l->min_pid = track->pid;
4529 l->max_pid = track->pid;
4530 cpumask_clear(to_cpumask(l->cpus));
4531 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4532 nodes_clear(l->nodes);
4533 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4534 return 1;
4535}
4536
4537static void process_slab(struct loc_track *t, struct kmem_cache *s,
4538 struct page *page, enum track_item alloc,
4539 unsigned long *map)
4540{
4541 void *addr = page_address(page);
4542 void *p;
4543
4544 bitmap_zero(map, page->objects);
4545 get_map(s, page, map);
4546
4547 for_each_object(p, s, addr, page->objects)
4548 if (!test_bit(slab_index(p, s, addr), map))
4549 add_location(t, s, get_track(s, p, alloc));
4550}
4551
4552static int list_locations(struct kmem_cache *s, char *buf,
4553 enum track_item alloc)
4554{
4555 int len = 0;
4556 unsigned long i;
4557 struct loc_track t = { 0, 0, NULL };
4558 int node;
4559 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4560 sizeof(unsigned long), GFP_KERNEL);
4561 struct kmem_cache_node *n;
4562
4563 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4564 GFP_TEMPORARY)) {
4565 kfree(map);
4566 return sprintf(buf, "Out of memory\n");
4567 }
4568
4569 flush_all(s);
4570
4571 for_each_kmem_cache_node(s, node, n) {
4572 unsigned long flags;
4573 struct page *page;
4574
4575 if (!atomic_long_read(&n->nr_slabs))
4576 continue;
4577
4578 spin_lock_irqsave(&n->list_lock, flags);
4579 list_for_each_entry(page, &n->partial, lru)
4580 process_slab(&t, s, page, alloc, map);
4581 list_for_each_entry(page, &n->full, lru)
4582 process_slab(&t, s, page, alloc, map);
4583 spin_unlock_irqrestore(&n->list_lock, flags);
4584 }
4585
4586 for (i = 0; i < t.count; i++) {
4587 struct location *l = &t.loc[i];
4588
4589 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4590 break;
4591 len += sprintf(buf + len, "%7ld ", l->count);
4592
4593 if (l->addr)
4594 len += sprintf(buf + len, "%pS", (void *)l->addr);
4595 else
4596 len += sprintf(buf + len, "<not-available>");
4597
4598 if (l->sum_time != l->min_time) {
4599 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4600 l->min_time,
4601 (long)div_u64(l->sum_time, l->count),
4602 l->max_time);
4603 } else
4604 len += sprintf(buf + len, " age=%ld",
4605 l->min_time);
4606
4607 if (l->min_pid != l->max_pid)
4608 len += sprintf(buf + len, " pid=%ld-%ld",
4609 l->min_pid, l->max_pid);
4610 else
4611 len += sprintf(buf + len, " pid=%ld",
4612 l->min_pid);
4613
4614 if (num_online_cpus() > 1 &&
4615 !cpumask_empty(to_cpumask(l->cpus)) &&
4616 len < PAGE_SIZE - 60)
4617 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4618 " cpus=%*pbl",
4619 cpumask_pr_args(to_cpumask(l->cpus)));
4620
4621 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4622 len < PAGE_SIZE - 60)
4623 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4624 " nodes=%*pbl",
4625 nodemask_pr_args(&l->nodes));
4626
4627 len += sprintf(buf + len, "\n");
4628 }
4629
4630 free_loc_track(&t);
4631 kfree(map);
4632 if (!t.count)
4633 len += sprintf(buf, "No data\n");
4634 return len;
4635}
4636#endif
4637
4638#ifdef SLUB_RESILIENCY_TEST
4639static void __init resiliency_test(void)
4640{
4641 u8 *p;
4642
4643 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4644
4645 pr_err("SLUB resiliency testing\n");
4646 pr_err("-----------------------\n");
4647 pr_err("A. Corruption after allocation\n");
4648
4649 p = kzalloc(16, GFP_KERNEL);
4650 p[16] = 0x12;
4651 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4652 p + 16);
4653
4654 validate_slab_cache(kmalloc_caches[4]);
4655
4656
4657 p = kzalloc(32, GFP_KERNEL);
4658 p[32 + sizeof(void *)] = 0x34;
4659 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4660 p);
4661 pr_err("If allocated object is overwritten then not detectable\n\n");
4662
4663 validate_slab_cache(kmalloc_caches[5]);
4664 p = kzalloc(64, GFP_KERNEL);
4665 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4666 *p = 0x56;
4667 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4668 p);
4669 pr_err("If allocated object is overwritten then not detectable\n\n");
4670 validate_slab_cache(kmalloc_caches[6]);
4671
4672 pr_err("\nB. Corruption after free\n");
4673 p = kzalloc(128, GFP_KERNEL);
4674 kfree(p);
4675 *p = 0x78;
4676 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4677 validate_slab_cache(kmalloc_caches[7]);
4678
4679 p = kzalloc(256, GFP_KERNEL);
4680 kfree(p);
4681 p[50] = 0x9a;
4682 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4683 validate_slab_cache(kmalloc_caches[8]);
4684
4685 p = kzalloc(512, GFP_KERNEL);
4686 kfree(p);
4687 p[512] = 0xab;
4688 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4689 validate_slab_cache(kmalloc_caches[9]);
4690}
4691#else
4692#ifdef CONFIG_SYSFS
4693static void resiliency_test(void) {};
4694#endif
4695#endif
4696
4697#ifdef CONFIG_SYSFS
4698enum slab_stat_type {
4699 SL_ALL,
4700 SL_PARTIAL,
4701 SL_CPU,
4702 SL_OBJECTS,
4703 SL_TOTAL
4704};
4705
4706#define SO_ALL (1 << SL_ALL)
4707#define SO_PARTIAL (1 << SL_PARTIAL)
4708#define SO_CPU (1 << SL_CPU)
4709#define SO_OBJECTS (1 << SL_OBJECTS)
4710#define SO_TOTAL (1 << SL_TOTAL)
4711
4712#ifdef CONFIG_MEMCG
4713static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
4714
4715static int __init setup_slub_memcg_sysfs(char *str)
4716{
4717 int v;
4718
4719 if (get_option(&str, &v) > 0)
4720 memcg_sysfs_enabled = v;
4721
4722 return 1;
4723}
4724
4725__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
4726#endif
4727
4728static ssize_t show_slab_objects(struct kmem_cache *s,
4729 char *buf, unsigned long flags)
4730{
4731 unsigned long total = 0;
4732 int node;
4733 int x;
4734 unsigned long *nodes;
4735
4736 nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4737 if (!nodes)
4738 return -ENOMEM;
4739
4740 if (flags & SO_CPU) {
4741 int cpu;
4742
4743 for_each_possible_cpu(cpu) {
4744 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4745 cpu);
4746 int node;
4747 struct page *page;
4748
4749 page = READ_ONCE(c->page);
4750 if (!page)
4751 continue;
4752
4753 node = page_to_nid(page);
4754 if (flags & SO_TOTAL)
4755 x = page->objects;
4756 else if (flags & SO_OBJECTS)
4757 x = page->inuse;
4758 else
4759 x = 1;
4760
4761 total += x;
4762 nodes[node] += x;
4763
4764 page = slub_percpu_partial_read_once(c);
4765 if (page) {
4766 node = page_to_nid(page);
4767 if (flags & SO_TOTAL)
4768 WARN_ON_ONCE(1);
4769 else if (flags & SO_OBJECTS)
4770 WARN_ON_ONCE(1);
4771 else
4772 x = page->pages;
4773 total += x;
4774 nodes[node] += x;
4775 }
4776 }
4777 }
4778
4779 get_online_mems();
4780#ifdef CONFIG_SLUB_DEBUG
4781 if (flags & SO_ALL) {
4782 struct kmem_cache_node *n;
4783
4784 for_each_kmem_cache_node(s, node, n) {
4785
4786 if (flags & SO_TOTAL)
4787 x = atomic_long_read(&n->total_objects);
4788 else if (flags & SO_OBJECTS)
4789 x = atomic_long_read(&n->total_objects) -
4790 count_partial(n, count_free);
4791 else
4792 x = atomic_long_read(&n->nr_slabs);
4793 total += x;
4794 nodes[node] += x;
4795 }
4796
4797 } else
4798#endif
4799 if (flags & SO_PARTIAL) {
4800 struct kmem_cache_node *n;
4801
4802 for_each_kmem_cache_node(s, node, n) {
4803 if (flags & SO_TOTAL)
4804 x = count_partial(n, count_total);
4805 else if (flags & SO_OBJECTS)
4806 x = count_partial(n, count_inuse);
4807 else
4808 x = n->nr_partial;
4809 total += x;
4810 nodes[node] += x;
4811 }
4812 }
4813 x = sprintf(buf, "%lu", total);
4814#ifdef CONFIG_NUMA
4815 for (node = 0; node < nr_node_ids; node++)
4816 if (nodes[node])
4817 x += sprintf(buf + x, " N%d=%lu",
4818 node, nodes[node]);
4819#endif
4820 put_online_mems();
4821 kfree(nodes);
4822 return x + sprintf(buf + x, "\n");
4823}
4824
4825#ifdef CONFIG_SLUB_DEBUG
4826static int any_slab_objects(struct kmem_cache *s)
4827{
4828 int node;
4829 struct kmem_cache_node *n;
4830
4831 for_each_kmem_cache_node(s, node, n)
4832 if (atomic_long_read(&n->total_objects))
4833 return 1;
4834
4835 return 0;
4836}
4837#endif
4838
4839#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4840#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4841
4842struct slab_attribute {
4843 struct attribute attr;
4844 ssize_t (*show)(struct kmem_cache *s, char *buf);
4845 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4846};
4847
4848#define SLAB_ATTR_RO(_name) \
4849 static struct slab_attribute _name##_attr = \
4850 __ATTR(_name, 0400, _name##_show, NULL)
4851
4852#define SLAB_ATTR(_name) \
4853 static struct slab_attribute _name##_attr = \
4854 __ATTR(_name, 0600, _name##_show, _name##_store)
4855
4856static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4857{
4858 return sprintf(buf, "%d\n", s->size);
4859}
4860SLAB_ATTR_RO(slab_size);
4861
4862static ssize_t align_show(struct kmem_cache *s, char *buf)
4863{
4864 return sprintf(buf, "%d\n", s->align);
4865}
4866SLAB_ATTR_RO(align);
4867
4868static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4869{
4870 return sprintf(buf, "%d\n", s->object_size);
4871}
4872SLAB_ATTR_RO(object_size);
4873
4874static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4875{
4876 return sprintf(buf, "%d\n", oo_objects(s->oo));
4877}
4878SLAB_ATTR_RO(objs_per_slab);
4879
4880static ssize_t order_store(struct kmem_cache *s,
4881 const char *buf, size_t length)
4882{
4883 unsigned long order;
4884 int err;
4885
4886 err = kstrtoul(buf, 10, &order);
4887 if (err)
4888 return err;
4889
4890 if (order > slub_max_order || order < slub_min_order)
4891 return -EINVAL;
4892
4893 calculate_sizes(s, order);
4894 return length;
4895}
4896
4897static ssize_t order_show(struct kmem_cache *s, char *buf)
4898{
4899 return sprintf(buf, "%d\n", oo_order(s->oo));
4900}
4901SLAB_ATTR(order);
4902
4903static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4904{
4905 return sprintf(buf, "%lu\n", s->min_partial);
4906}
4907
4908static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4909 size_t length)
4910{
4911 unsigned long min;
4912 int err;
4913
4914 err = kstrtoul(buf, 10, &min);
4915 if (err)
4916 return err;
4917
4918 set_min_partial(s, min);
4919 return length;
4920}
4921SLAB_ATTR(min_partial);
4922
4923static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4924{
4925 return sprintf(buf, "%u\n", slub_cpu_partial(s));
4926}
4927
4928static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4929 size_t length)
4930{
4931 unsigned long objects;
4932 int err;
4933
4934 err = kstrtoul(buf, 10, &objects);
4935 if (err)
4936 return err;
4937 if (objects && !kmem_cache_has_cpu_partial(s))
4938 return -EINVAL;
4939
4940 slub_set_cpu_partial(s, objects);
4941 flush_all(s);
4942 return length;
4943}
4944SLAB_ATTR(cpu_partial);
4945
4946static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4947{
4948 if (!s->ctor)
4949 return 0;
4950 return sprintf(buf, "%pS\n", s->ctor);
4951}
4952SLAB_ATTR_RO(ctor);
4953
4954static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4955{
4956 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
4957}
4958SLAB_ATTR_RO(aliases);
4959
4960static ssize_t partial_show(struct kmem_cache *s, char *buf)
4961{
4962 return show_slab_objects(s, buf, SO_PARTIAL);
4963}
4964SLAB_ATTR_RO(partial);
4965
4966static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4967{
4968 return show_slab_objects(s, buf, SO_CPU);
4969}
4970SLAB_ATTR_RO(cpu_slabs);
4971
4972static ssize_t objects_show(struct kmem_cache *s, char *buf)
4973{
4974 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4975}
4976SLAB_ATTR_RO(objects);
4977
4978static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4979{
4980 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4981}
4982SLAB_ATTR_RO(objects_partial);
4983
4984static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4985{
4986 int objects = 0;
4987 int pages = 0;
4988 int cpu;
4989 int len;
4990
4991 for_each_online_cpu(cpu) {
4992 struct page *page;
4993
4994 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
4995
4996 if (page) {
4997 pages += page->pages;
4998 objects += page->pobjects;
4999 }
5000 }
5001
5002 len = sprintf(buf, "%d(%d)", objects, pages);
5003
5004#ifdef CONFIG_SMP
5005 for_each_online_cpu(cpu) {
5006 struct page *page;
5007
5008 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5009
5010 if (page && len < PAGE_SIZE - 20)
5011 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5012 page->pobjects, page->pages);
5013 }
5014#endif
5015 return len + sprintf(buf + len, "\n");
5016}
5017SLAB_ATTR_RO(slabs_cpu_partial);
5018
5019static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5020{
5021 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5022}
5023
5024static ssize_t reclaim_account_store(struct kmem_cache *s,
5025 const char *buf, size_t length)
5026{
5027 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5028 if (buf[0] == '1')
5029 s->flags |= SLAB_RECLAIM_ACCOUNT;
5030 return length;
5031}
5032SLAB_ATTR(reclaim_account);
5033
5034static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5035{
5036 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5037}
5038SLAB_ATTR_RO(hwcache_align);
5039
5040#ifdef CONFIG_ZONE_DMA
5041static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5042{
5043 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5044}
5045SLAB_ATTR_RO(cache_dma);
5046#endif
5047
5048static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5049{
5050 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5051}
5052SLAB_ATTR_RO(destroy_by_rcu);
5053
5054static ssize_t reserved_show(struct kmem_cache *s, char *buf)
5055{
5056 return sprintf(buf, "%d\n", s->reserved);
5057}
5058SLAB_ATTR_RO(reserved);
5059
5060#ifdef CONFIG_SLUB_DEBUG
5061static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5062{
5063 return show_slab_objects(s, buf, SO_ALL);
5064}
5065SLAB_ATTR_RO(slabs);
5066
5067static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5068{
5069 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5070}
5071SLAB_ATTR_RO(total_objects);
5072
5073static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5074{
5075 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5076}
5077
5078static ssize_t sanity_checks_store(struct kmem_cache *s,
5079 const char *buf, size_t length)
5080{
5081 s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5082 if (buf[0] == '1') {
5083 s->flags &= ~__CMPXCHG_DOUBLE;
5084 s->flags |= SLAB_CONSISTENCY_CHECKS;
5085 }
5086 return length;
5087}
5088SLAB_ATTR(sanity_checks);
5089
5090static ssize_t trace_show(struct kmem_cache *s, char *buf)
5091{
5092 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5093}
5094
5095static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5096 size_t length)
5097{
5098
5099
5100
5101
5102
5103 if (s->refcount > 1)
5104 return -EINVAL;
5105
5106 s->flags &= ~SLAB_TRACE;
5107 if (buf[0] == '1') {
5108 s->flags &= ~__CMPXCHG_DOUBLE;
5109 s->flags |= SLAB_TRACE;
5110 }
5111 return length;
5112}
5113SLAB_ATTR(trace);
5114
5115static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5116{
5117 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5118}
5119
5120static ssize_t red_zone_store(struct kmem_cache *s,
5121 const char *buf, size_t length)
5122{
5123 if (any_slab_objects(s))
5124 return -EBUSY;
5125
5126 s->flags &= ~SLAB_RED_ZONE;
5127 if (buf[0] == '1') {
5128 s->flags |= SLAB_RED_ZONE;
5129 }
5130 calculate_sizes(s, -1);
5131 return length;
5132}
5133SLAB_ATTR(red_zone);
5134
5135static ssize_t poison_show(struct kmem_cache *s, char *buf)
5136{
5137 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5138}
5139
5140static ssize_t poison_store(struct kmem_cache *s,
5141 const char *buf, size_t length)
5142{
5143 if (any_slab_objects(s))
5144 return -EBUSY;
5145
5146 s->flags &= ~SLAB_POISON;
5147 if (buf[0] == '1') {
5148 s->flags |= SLAB_POISON;
5149 }
5150 calculate_sizes(s, -1);
5151 return length;
5152}
5153SLAB_ATTR(poison);
5154
5155static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5156{
5157 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5158}
5159
5160static ssize_t store_user_store(struct kmem_cache *s,
5161 const char *buf, size_t length)
5162{
5163 if (any_slab_objects(s))
5164 return -EBUSY;
5165
5166 s->flags &= ~SLAB_STORE_USER;
5167 if (buf[0] == '1') {
5168 s->flags &= ~__CMPXCHG_DOUBLE;
5169 s->flags |= SLAB_STORE_USER;
5170 }
5171 calculate_sizes(s, -1);
5172 return length;
5173}
5174SLAB_ATTR(store_user);
5175
5176static ssize_t validate_show(struct kmem_cache *s, char *buf)
5177{
5178 return 0;
5179}
5180
5181static ssize_t validate_store(struct kmem_cache *s,
5182 const char *buf, size_t length)
5183{
5184 int ret = -EINVAL;
5185
5186 if (buf[0] == '1') {
5187 ret = validate_slab_cache(s);
5188 if (ret >= 0)
5189 ret = length;
5190 }
5191 return ret;
5192}
5193SLAB_ATTR(validate);
5194
5195static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5196{
5197 if (!(s->flags & SLAB_STORE_USER))
5198 return -ENOSYS;
5199 return list_locations(s, buf, TRACK_ALLOC);
5200}
5201SLAB_ATTR_RO(alloc_calls);
5202
5203static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5204{
5205 if (!(s->flags & SLAB_STORE_USER))
5206 return -ENOSYS;
5207 return list_locations(s, buf, TRACK_FREE);
5208}
5209SLAB_ATTR_RO(free_calls);
5210#endif
5211
5212#ifdef CONFIG_FAILSLAB
5213static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5214{
5215 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5216}
5217
5218static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5219 size_t length)
5220{
5221 if (s->refcount > 1)
5222 return -EINVAL;
5223
5224 s->flags &= ~SLAB_FAILSLAB;
5225 if (buf[0] == '1')
5226 s->flags |= SLAB_FAILSLAB;
5227 return length;
5228}
5229SLAB_ATTR(failslab);
5230#endif
5231
5232static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5233{
5234 return 0;
5235}
5236
5237static ssize_t shrink_store(struct kmem_cache *s,
5238 const char *buf, size_t length)
5239{
5240 if (buf[0] == '1')
5241 kmem_cache_shrink(s);
5242 else
5243 return -EINVAL;
5244 return length;
5245}
5246SLAB_ATTR(shrink);
5247
5248#ifdef CONFIG_NUMA
5249static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5250{
5251 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
5252}
5253
5254static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5255 const char *buf, size_t length)
5256{
5257 unsigned long ratio;
5258 int err;
5259
5260 err = kstrtoul(buf, 10, &ratio);
5261 if (err)
5262 return err;
5263
5264 if (ratio <= 100)
5265 s->remote_node_defrag_ratio = ratio * 10;
5266
5267 return length;
5268}
5269SLAB_ATTR(remote_node_defrag_ratio);
5270#endif
5271
5272#ifdef CONFIG_SLUB_STATS
5273static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5274{
5275 unsigned long sum = 0;
5276 int cpu;
5277 int len;
5278 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5279
5280 if (!data)
5281 return -ENOMEM;
5282
5283 for_each_online_cpu(cpu) {
5284 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5285
5286 data[cpu] = x;
5287 sum += x;
5288 }
5289
5290 len = sprintf(buf, "%lu", sum);
5291
5292#ifdef CONFIG_SMP
5293 for_each_online_cpu(cpu) {
5294 if (data[cpu] && len < PAGE_SIZE - 20)
5295 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5296 }
5297#endif
5298 kfree(data);
5299 return len + sprintf(buf + len, "\n");
5300}
5301
5302static void clear_stat(struct kmem_cache *s, enum stat_item si)
5303{
5304 int cpu;
5305
5306 for_each_online_cpu(cpu)
5307 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5308}
5309
5310#define STAT_ATTR(si, text) \
5311static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5312{ \
5313 return show_stat(s, buf, si); \
5314} \
5315static ssize_t text##_store(struct kmem_cache *s, \
5316 const char *buf, size_t length) \
5317{ \
5318 if (buf[0] != '0') \
5319 return -EINVAL; \
5320 clear_stat(s, si); \
5321 return length; \
5322} \
5323SLAB_ATTR(text); \
5324
5325STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5326STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5327STAT_ATTR(FREE_FASTPATH, free_fastpath);
5328STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5329STAT_ATTR(FREE_FROZEN, free_frozen);
5330STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5331STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5332STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5333STAT_ATTR(ALLOC_SLAB, alloc_slab);
5334STAT_ATTR(ALLOC_REFILL, alloc_refill);
5335STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5336STAT_ATTR(FREE_SLAB, free_slab);
5337STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5338STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5339STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5340STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5341STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5342STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5343STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5344STAT_ATTR(ORDER_FALLBACK, order_fallback);
5345STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5346STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5347STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5348STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5349STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5350STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5351#endif
5352
5353static struct attribute *slab_attrs[] = {
5354 &slab_size_attr.attr,
5355 &object_size_attr.attr,
5356 &objs_per_slab_attr.attr,
5357 &order_attr.attr,
5358 &min_partial_attr.attr,
5359 &cpu_partial_attr.attr,
5360 &objects_attr.attr,
5361 &objects_partial_attr.attr,
5362 &partial_attr.attr,
5363 &cpu_slabs_attr.attr,
5364 &ctor_attr.attr,
5365 &aliases_attr.attr,
5366 &align_attr.attr,
5367 &hwcache_align_attr.attr,
5368 &reclaim_account_attr.attr,
5369 &destroy_by_rcu_attr.attr,
5370 &shrink_attr.attr,
5371 &reserved_attr.attr,
5372 &slabs_cpu_partial_attr.attr,
5373#ifdef CONFIG_SLUB_DEBUG
5374 &total_objects_attr.attr,
5375 &slabs_attr.attr,
5376 &sanity_checks_attr.attr,
5377 &trace_attr.attr,
5378 &red_zone_attr.attr,
5379 &poison_attr.attr,
5380 &store_user_attr.attr,
5381 &validate_attr.attr,
5382 &alloc_calls_attr.attr,
5383 &free_calls_attr.attr,
5384#endif
5385#ifdef CONFIG_ZONE_DMA
5386 &cache_dma_attr.attr,
5387#endif
5388#ifdef CONFIG_NUMA
5389 &remote_node_defrag_ratio_attr.attr,
5390#endif
5391#ifdef CONFIG_SLUB_STATS
5392 &alloc_fastpath_attr.attr,
5393 &alloc_slowpath_attr.attr,
5394 &free_fastpath_attr.attr,
5395 &free_slowpath_attr.attr,
5396 &free_frozen_attr.attr,
5397 &free_add_partial_attr.attr,
5398 &free_remove_partial_attr.attr,
5399 &alloc_from_partial_attr.attr,
5400 &alloc_slab_attr.attr,
5401 &alloc_refill_attr.attr,
5402 &alloc_node_mismatch_attr.attr,
5403 &free_slab_attr.attr,
5404 &cpuslab_flush_attr.attr,
5405 &deactivate_full_attr.attr,
5406 &deactivate_empty_attr.attr,
5407 &deactivate_to_head_attr.attr,
5408 &deactivate_to_tail_attr.attr,
5409 &deactivate_remote_frees_attr.attr,
5410 &deactivate_bypass_attr.attr,
5411 &order_fallback_attr.attr,
5412 &cmpxchg_double_fail_attr.attr,
5413 &cmpxchg_double_cpu_fail_attr.attr,
5414 &cpu_partial_alloc_attr.attr,
5415 &cpu_partial_free_attr.attr,
5416 &cpu_partial_node_attr.attr,
5417 &cpu_partial_drain_attr.attr,
5418#endif
5419#ifdef CONFIG_FAILSLAB
5420 &failslab_attr.attr,
5421#endif
5422
5423 NULL
5424};
5425
5426static struct attribute_group slab_attr_group = {
5427 .attrs = slab_attrs,
5428};
5429
5430static ssize_t slab_attr_show(struct kobject *kobj,
5431 struct attribute *attr,
5432 char *buf)
5433{
5434 struct slab_attribute *attribute;
5435 struct kmem_cache *s;
5436 int err;
5437
5438 attribute = to_slab_attr(attr);
5439 s = to_slab(kobj);
5440
5441 if (!attribute->show)
5442 return -EIO;
5443
5444 err = attribute->show(s, buf);
5445
5446 return err;
5447}
5448
5449static ssize_t slab_attr_store(struct kobject *kobj,
5450 struct attribute *attr,
5451 const char *buf, size_t len)
5452{
5453 struct slab_attribute *attribute;
5454 struct kmem_cache *s;
5455 int err;
5456
5457 attribute = to_slab_attr(attr);
5458 s = to_slab(kobj);
5459
5460 if (!attribute->store)
5461 return -EIO;
5462
5463 err = attribute->store(s, buf, len);
5464#ifdef CONFIG_MEMCG
5465 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5466 struct kmem_cache *c;
5467
5468 mutex_lock(&slab_mutex);
5469 if (s->max_attr_size < len)
5470 s->max_attr_size = len;
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489 for_each_memcg_cache(c, s)
5490 attribute->store(c, buf, len);
5491 mutex_unlock(&slab_mutex);
5492 }
5493#endif
5494 return err;
5495}
5496
5497static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5498{
5499#ifdef CONFIG_MEMCG
5500 int i;
5501 char *buffer = NULL;
5502 struct kmem_cache *root_cache;
5503
5504 if (is_root_cache(s))
5505 return;
5506
5507 root_cache = s->memcg_params.root_cache;
5508
5509
5510
5511
5512
5513 if (!root_cache->max_attr_size)
5514 return;
5515
5516 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5517 char mbuf[64];
5518 char *buf;
5519 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5520 ssize_t len;
5521
5522 if (!attr || !attr->store || !attr->show)
5523 continue;
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534 if (buffer)
5535 buf = buffer;
5536 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5537 buf = mbuf;
5538 else {
5539 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5540 if (WARN_ON(!buffer))
5541 continue;
5542 buf = buffer;
5543 }
5544
5545 len = attr->show(root_cache, buf);
5546 if (len > 0)
5547 attr->store(s, buf, len);
5548 }
5549
5550 if (buffer)
5551 free_page((unsigned long)buffer);
5552#endif
5553}
5554
5555static void kmem_cache_release(struct kobject *k)
5556{
5557 slab_kmem_cache_release(to_slab(k));
5558}
5559
5560static const struct sysfs_ops slab_sysfs_ops = {
5561 .show = slab_attr_show,
5562 .store = slab_attr_store,
5563};
5564
5565static struct kobj_type slab_ktype = {
5566 .sysfs_ops = &slab_sysfs_ops,
5567 .release = kmem_cache_release,
5568};
5569
5570static int uevent_filter(struct kset *kset, struct kobject *kobj)
5571{
5572 struct kobj_type *ktype = get_ktype(kobj);
5573
5574 if (ktype == &slab_ktype)
5575 return 1;
5576 return 0;
5577}
5578
5579static const struct kset_uevent_ops slab_uevent_ops = {
5580 .filter = uevent_filter,
5581};
5582
5583static struct kset *slab_kset;
5584
5585static inline struct kset *cache_kset(struct kmem_cache *s)
5586{
5587#ifdef CONFIG_MEMCG
5588 if (!is_root_cache(s))
5589 return s->memcg_params.root_cache->memcg_kset;
5590#endif
5591 return slab_kset;
5592}
5593
5594#define ID_STR_LENGTH 64
5595
5596
5597
5598
5599
5600static char *create_unique_id(struct kmem_cache *s)
5601{
5602 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5603 char *p = name;
5604
5605 BUG_ON(!name);
5606
5607 *p++ = ':';
5608
5609
5610
5611
5612
5613
5614
5615 if (s->flags & SLAB_CACHE_DMA)
5616 *p++ = 'd';
5617 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5618 *p++ = 'a';
5619 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5620 *p++ = 'F';
5621 if (!(s->flags & SLAB_NOTRACK))
5622 *p++ = 't';
5623 if (s->flags & SLAB_ACCOUNT)
5624 *p++ = 'A';
5625 if (p != name + 1)
5626 *p++ = '-';
5627 p += sprintf(p, "%07d", s->size);
5628
5629 BUG_ON(p > name + ID_STR_LENGTH - 1);
5630 return name;
5631}
5632
5633static void sysfs_slab_remove_workfn(struct work_struct *work)
5634{
5635 struct kmem_cache *s =
5636 container_of(work, struct kmem_cache, kobj_remove_work);
5637
5638 if (!s->kobj.state_in_sysfs)
5639
5640
5641
5642
5643
5644
5645 goto out;
5646
5647#ifdef CONFIG_MEMCG
5648 kset_unregister(s->memcg_kset);
5649#endif
5650 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5651 kobject_del(&s->kobj);
5652out:
5653 kobject_put(&s->kobj);
5654}
5655
5656static int sysfs_slab_add(struct kmem_cache *s)
5657{
5658 int err;
5659 const char *name;
5660 struct kset *kset = cache_kset(s);
5661 int unmergeable = slab_unmergeable(s);
5662
5663 INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
5664
5665 if (!kset) {
5666 kobject_init(&s->kobj, &slab_ktype);
5667 return 0;
5668 }
5669
5670 if (unmergeable) {
5671
5672
5673
5674
5675
5676 sysfs_remove_link(&slab_kset->kobj, s->name);
5677 name = s->name;
5678 } else {
5679
5680
5681
5682
5683 name = create_unique_id(s);
5684 }
5685
5686 s->kobj.kset = kset;
5687 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5688 if (err)
5689 goto out;
5690
5691 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5692 if (err)
5693 goto out_del_kobj;
5694
5695#ifdef CONFIG_MEMCG
5696 if (is_root_cache(s) && memcg_sysfs_enabled) {
5697 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5698 if (!s->memcg_kset) {
5699 err = -ENOMEM;
5700 goto out_del_kobj;
5701 }
5702 }
5703#endif
5704
5705 kobject_uevent(&s->kobj, KOBJ_ADD);
5706 if (!unmergeable) {
5707
5708 sysfs_slab_alias(s, s->name);
5709 }
5710out:
5711 if (!unmergeable)
5712 kfree(name);
5713 return err;
5714out_del_kobj:
5715 kobject_del(&s->kobj);
5716 goto out;
5717}
5718
5719static void sysfs_slab_remove(struct kmem_cache *s)
5720{
5721 if (slab_state < FULL)
5722
5723
5724
5725
5726 return;
5727
5728 kobject_get(&s->kobj);
5729 schedule_work(&s->kobj_remove_work);
5730}
5731
5732void sysfs_slab_release(struct kmem_cache *s)
5733{
5734 if (slab_state >= FULL)
5735 kobject_put(&s->kobj);
5736}
5737
5738
5739
5740
5741
5742struct saved_alias {
5743 struct kmem_cache *s;
5744 const char *name;
5745 struct saved_alias *next;
5746};
5747
5748static struct saved_alias *alias_list;
5749
5750static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5751{
5752 struct saved_alias *al;
5753
5754 if (slab_state == FULL) {
5755
5756
5757
5758 sysfs_remove_link(&slab_kset->kobj, name);
5759 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5760 }
5761
5762 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5763 if (!al)
5764 return -ENOMEM;
5765
5766 al->s = s;
5767 al->name = name;
5768 al->next = alias_list;
5769 alias_list = al;
5770 return 0;
5771}
5772
5773static int __init slab_sysfs_init(void)
5774{
5775 struct kmem_cache *s;
5776 int err;
5777
5778 mutex_lock(&slab_mutex);
5779
5780 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5781 if (!slab_kset) {
5782 mutex_unlock(&slab_mutex);
5783 pr_err("Cannot register slab subsystem.\n");
5784 return -ENOSYS;
5785 }
5786
5787 slab_state = FULL;
5788
5789 list_for_each_entry(s, &slab_caches, list) {
5790 err = sysfs_slab_add(s);
5791 if (err)
5792 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5793 s->name);
5794 }
5795
5796 while (alias_list) {
5797 struct saved_alias *al = alias_list;
5798
5799 alias_list = alias_list->next;
5800 err = sysfs_slab_alias(al->s, al->name);
5801 if (err)
5802 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5803 al->name);
5804 kfree(al);
5805 }
5806
5807 mutex_unlock(&slab_mutex);
5808 resiliency_test();
5809 return 0;
5810}
5811
5812__initcall(slab_sysfs_init);
5813#endif
5814
5815
5816
5817
5818#ifdef CONFIG_SLABINFO
5819void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5820{
5821 unsigned long nr_slabs = 0;
5822 unsigned long nr_objs = 0;
5823 unsigned long nr_free = 0;
5824 int node;
5825 struct kmem_cache_node *n;
5826
5827 for_each_kmem_cache_node(s, node, n) {
5828 nr_slabs += node_nr_slabs(n);
5829 nr_objs += node_nr_objs(n);
5830 nr_free += count_partial(n, count_free);
5831 }
5832
5833 sinfo->active_objs = nr_objs - nr_free;
5834 sinfo->num_objs = nr_objs;
5835 sinfo->active_slabs = nr_slabs;
5836 sinfo->num_slabs = nr_slabs;
5837 sinfo->objects_per_slab = oo_objects(s->oo);
5838 sinfo->cache_order = oo_order(s->oo);
5839}
5840
5841void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5842{
5843}
5844
5845ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5846 size_t count, loff_t *ppos)
5847{
5848 return -EIO;
5849}
5850#endif
5851