1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/bitops.h>
19#include <linux/slab.h>
20#include "slab.h"
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kasan.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/memory.h>
31#include <linux/math64.h>
32#include <linux/fault-inject.h>
33#include <linux/stacktrace.h>
34#include <linux/prefetch.h>
35#include <linux/memcontrol.h>
36#include <linux/random.h>
37
38#include <trace/events/kmem.h>
39
40#include "internal.h"
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119static inline int kmem_cache_debug(struct kmem_cache *s)
120{
121#ifdef CONFIG_SLUB_DEBUG
122 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
123#else
124 return 0;
125#endif
126}
127
128void *fixup_red_left(struct kmem_cache *s, void *p)
129{
130 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
131 p += s->red_left_pad;
132
133 return p;
134}
135
136static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
137{
138#ifdef CONFIG_SLUB_CPU_PARTIAL
139 return !kmem_cache_debug(s);
140#else
141 return false;
142#endif
143}
144
145
146
147
148
149
150
151
152
153
154#undef SLUB_RESILIENCY_TEST
155
156
157#undef SLUB_DEBUG_CMPXCHG
158
159
160
161
162
163#define MIN_PARTIAL 5
164
165
166
167
168
169
170#define MAX_PARTIAL 10
171
172#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
173 SLAB_POISON | SLAB_STORE_USER)
174
175
176
177
178
179#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
180 SLAB_TRACE)
181
182
183
184
185
186
187
188#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
189
190#define OO_SHIFT 16
191#define OO_MASK ((1 << OO_SHIFT) - 1)
192#define MAX_OBJS_PER_PAGE 32767
193
194
195
196#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
197
198#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
199
200
201
202
203#define TRACK_ADDRS_COUNT 16
204struct track {
205 unsigned long addr;
206#ifdef CONFIG_STACKTRACE
207 unsigned long addrs[TRACK_ADDRS_COUNT];
208#endif
209 int cpu;
210 int pid;
211 unsigned long when;
212};
213
214enum track_item { TRACK_ALLOC, TRACK_FREE };
215
216#ifdef CONFIG_SYSFS
217static int sysfs_slab_add(struct kmem_cache *);
218static int sysfs_slab_alias(struct kmem_cache *, const char *);
219static void memcg_propagate_slab_attrs(struct kmem_cache *s);
220static void sysfs_slab_remove(struct kmem_cache *s);
221#else
222static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
223static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
224 { return 0; }
225static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
226static inline void sysfs_slab_remove(struct kmem_cache *s) { }
227#endif
228
229static inline void stat(const struct kmem_cache *s, enum stat_item si)
230{
231#ifdef CONFIG_SLUB_STATS
232
233
234
235
236 raw_cpu_inc(s->cpu_slab->stat[si]);
237#endif
238}
239
240
241
242
243
244
245
246
247
248
249static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
250 unsigned long ptr_addr)
251{
252#ifdef CONFIG_SLAB_FREELIST_HARDENED
253
254
255
256
257
258
259
260
261
262
263 return (void *)((unsigned long)ptr ^ s->random ^
264 (unsigned long)kasan_reset_tag((void *)ptr_addr));
265#else
266 return ptr;
267#endif
268}
269
270
271static inline void *freelist_dereference(const struct kmem_cache *s,
272 void *ptr_addr)
273{
274 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
275 (unsigned long)ptr_addr);
276}
277
278static inline void *get_freepointer(struct kmem_cache *s, void *object)
279{
280 return freelist_dereference(s, object + s->offset);
281}
282
283static void prefetch_freepointer(const struct kmem_cache *s, void *object)
284{
285 prefetch(object + s->offset);
286}
287
288static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
289{
290 unsigned long freepointer_addr;
291 void *p;
292
293 if (!debug_pagealloc_enabled())
294 return get_freepointer(s, object);
295
296 freepointer_addr = (unsigned long)object + s->offset;
297 probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
298 return freelist_ptr(s, p, freepointer_addr);
299}
300
301static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
302{
303 unsigned long freeptr_addr = (unsigned long)object + s->offset;
304
305#ifdef CONFIG_SLAB_FREELIST_HARDENED
306 BUG_ON(object == fp);
307#endif
308
309 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
310}
311
312
313#define for_each_object(__p, __s, __addr, __objects) \
314 for (__p = fixup_red_left(__s, __addr); \
315 __p < (__addr) + (__objects) * (__s)->size; \
316 __p += (__s)->size)
317
318
319static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
320{
321 return (kasan_reset_tag(p) - addr) / s->size;
322}
323
324static inline unsigned int order_objects(unsigned int order, unsigned int size)
325{
326 return ((unsigned int)PAGE_SIZE << order) / size;
327}
328
329static inline struct kmem_cache_order_objects oo_make(unsigned int order,
330 unsigned int size)
331{
332 struct kmem_cache_order_objects x = {
333 (order << OO_SHIFT) + order_objects(order, size)
334 };
335
336 return x;
337}
338
339static inline unsigned int oo_order(struct kmem_cache_order_objects x)
340{
341 return x.x >> OO_SHIFT;
342}
343
344static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
345{
346 return x.x & OO_MASK;
347}
348
349
350
351
352static __always_inline void slab_lock(struct page *page)
353{
354 VM_BUG_ON_PAGE(PageTail(page), page);
355 bit_spin_lock(PG_locked, &page->flags);
356}
357
358static __always_inline void slab_unlock(struct page *page)
359{
360 VM_BUG_ON_PAGE(PageTail(page), page);
361 __bit_spin_unlock(PG_locked, &page->flags);
362}
363
364
365static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
366 void *freelist_old, unsigned long counters_old,
367 void *freelist_new, unsigned long counters_new,
368 const char *n)
369{
370 VM_BUG_ON(!irqs_disabled());
371#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
372 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
373 if (s->flags & __CMPXCHG_DOUBLE) {
374 if (cmpxchg_double(&page->freelist, &page->counters,
375 freelist_old, counters_old,
376 freelist_new, counters_new))
377 return true;
378 } else
379#endif
380 {
381 slab_lock(page);
382 if (page->freelist == freelist_old &&
383 page->counters == counters_old) {
384 page->freelist = freelist_new;
385 page->counters = counters_new;
386 slab_unlock(page);
387 return true;
388 }
389 slab_unlock(page);
390 }
391
392 cpu_relax();
393 stat(s, CMPXCHG_DOUBLE_FAIL);
394
395#ifdef SLUB_DEBUG_CMPXCHG
396 pr_info("%s %s: cmpxchg double redo ", n, s->name);
397#endif
398
399 return false;
400}
401
402static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
403 void *freelist_old, unsigned long counters_old,
404 void *freelist_new, unsigned long counters_new,
405 const char *n)
406{
407#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
408 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
409 if (s->flags & __CMPXCHG_DOUBLE) {
410 if (cmpxchg_double(&page->freelist, &page->counters,
411 freelist_old, counters_old,
412 freelist_new, counters_new))
413 return true;
414 } else
415#endif
416 {
417 unsigned long flags;
418
419 local_irq_save(flags);
420 slab_lock(page);
421 if (page->freelist == freelist_old &&
422 page->counters == counters_old) {
423 page->freelist = freelist_new;
424 page->counters = counters_new;
425 slab_unlock(page);
426 local_irq_restore(flags);
427 return true;
428 }
429 slab_unlock(page);
430 local_irq_restore(flags);
431 }
432
433 cpu_relax();
434 stat(s, CMPXCHG_DOUBLE_FAIL);
435
436#ifdef SLUB_DEBUG_CMPXCHG
437 pr_info("%s %s: cmpxchg double redo ", n, s->name);
438#endif
439
440 return false;
441}
442
443#ifdef CONFIG_SLUB_DEBUG
444
445
446
447
448
449
450static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
451{
452 void *p;
453 void *addr = page_address(page);
454
455 for (p = page->freelist; p; p = get_freepointer(s, p))
456 set_bit(slab_index(p, s, addr), map);
457}
458
459static inline unsigned int size_from_object(struct kmem_cache *s)
460{
461 if (s->flags & SLAB_RED_ZONE)
462 return s->size - s->red_left_pad;
463
464 return s->size;
465}
466
467static inline void *restore_red_left(struct kmem_cache *s, void *p)
468{
469 if (s->flags & SLAB_RED_ZONE)
470 p -= s->red_left_pad;
471
472 return p;
473}
474
475
476
477
478#if defined(CONFIG_SLUB_DEBUG_ON)
479static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
480#else
481static slab_flags_t slub_debug;
482#endif
483
484static char *slub_debug_slabs;
485static int disable_higher_order_debug;
486
487
488
489
490
491
492
493static inline void metadata_access_enable(void)
494{
495 kasan_disable_current();
496}
497
498static inline void metadata_access_disable(void)
499{
500 kasan_enable_current();
501}
502
503
504
505
506
507
508static inline int check_valid_pointer(struct kmem_cache *s,
509 struct page *page, void *object)
510{
511 void *base;
512
513 if (!object)
514 return 1;
515
516 base = page_address(page);
517 object = kasan_reset_tag(object);
518 object = restore_red_left(s, object);
519 if (object < base || object >= base + page->objects * s->size ||
520 (object - base) % s->size) {
521 return 0;
522 }
523
524 return 1;
525}
526
527static void print_section(char *level, char *text, u8 *addr,
528 unsigned int length)
529{
530 metadata_access_enable();
531 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
532 length, 1);
533 metadata_access_disable();
534}
535
536static struct track *get_track(struct kmem_cache *s, void *object,
537 enum track_item alloc)
538{
539 struct track *p;
540
541 if (s->offset)
542 p = object + s->offset + sizeof(void *);
543 else
544 p = object + s->inuse;
545
546 return p + alloc;
547}
548
549static void set_track(struct kmem_cache *s, void *object,
550 enum track_item alloc, unsigned long addr)
551{
552 struct track *p = get_track(s, object, alloc);
553
554 if (addr) {
555#ifdef CONFIG_STACKTRACE
556 unsigned int nr_entries;
557
558 metadata_access_enable();
559 nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
560 metadata_access_disable();
561
562 if (nr_entries < TRACK_ADDRS_COUNT)
563 p->addrs[nr_entries] = 0;
564#endif
565 p->addr = addr;
566 p->cpu = smp_processor_id();
567 p->pid = current->pid;
568 p->when = jiffies;
569 } else {
570 memset(p, 0, sizeof(struct track));
571 }
572}
573
574static void init_tracking(struct kmem_cache *s, void *object)
575{
576 if (!(s->flags & SLAB_STORE_USER))
577 return;
578
579 set_track(s, object, TRACK_FREE, 0UL);
580 set_track(s, object, TRACK_ALLOC, 0UL);
581}
582
583static void print_track(const char *s, struct track *t, unsigned long pr_time)
584{
585 if (!t->addr)
586 return;
587
588 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
589 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
590#ifdef CONFIG_STACKTRACE
591 {
592 int i;
593 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
594 if (t->addrs[i])
595 pr_err("\t%pS\n", (void *)t->addrs[i]);
596 else
597 break;
598 }
599#endif
600}
601
602static void print_tracking(struct kmem_cache *s, void *object)
603{
604 unsigned long pr_time = jiffies;
605 if (!(s->flags & SLAB_STORE_USER))
606 return;
607
608 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
609 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
610}
611
612static void print_page_info(struct page *page)
613{
614 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
615 page, page->objects, page->inuse, page->freelist, page->flags);
616
617}
618
619static void slab_bug(struct kmem_cache *s, char *fmt, ...)
620{
621 struct va_format vaf;
622 va_list args;
623
624 va_start(args, fmt);
625 vaf.fmt = fmt;
626 vaf.va = &args;
627 pr_err("=============================================================================\n");
628 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
629 pr_err("-----------------------------------------------------------------------------\n\n");
630
631 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
632 va_end(args);
633}
634
635static void slab_fix(struct kmem_cache *s, char *fmt, ...)
636{
637 struct va_format vaf;
638 va_list args;
639
640 va_start(args, fmt);
641 vaf.fmt = fmt;
642 vaf.va = &args;
643 pr_err("FIX %s: %pV\n", s->name, &vaf);
644 va_end(args);
645}
646
647static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
648{
649 unsigned int off;
650 u8 *addr = page_address(page);
651
652 print_tracking(s, p);
653
654 print_page_info(page);
655
656 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
657 p, p - addr, get_freepointer(s, p));
658
659 if (s->flags & SLAB_RED_ZONE)
660 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
661 s->red_left_pad);
662 else if (p > addr + 16)
663 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
664
665 print_section(KERN_ERR, "Object ", p,
666 min_t(unsigned int, s->object_size, PAGE_SIZE));
667 if (s->flags & SLAB_RED_ZONE)
668 print_section(KERN_ERR, "Redzone ", p + s->object_size,
669 s->inuse - s->object_size);
670
671 if (s->offset)
672 off = s->offset + sizeof(void *);
673 else
674 off = s->inuse;
675
676 if (s->flags & SLAB_STORE_USER)
677 off += 2 * sizeof(struct track);
678
679 off += kasan_metadata_size(s);
680
681 if (off != size_from_object(s))
682
683 print_section(KERN_ERR, "Padding ", p + off,
684 size_from_object(s) - off);
685
686 dump_stack();
687}
688
689void object_err(struct kmem_cache *s, struct page *page,
690 u8 *object, char *reason)
691{
692 slab_bug(s, "%s", reason);
693 print_trailer(s, page, object);
694}
695
696static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
697 const char *fmt, ...)
698{
699 va_list args;
700 char buf[100];
701
702 va_start(args, fmt);
703 vsnprintf(buf, sizeof(buf), fmt, args);
704 va_end(args);
705 slab_bug(s, "%s", buf);
706 print_page_info(page);
707 dump_stack();
708}
709
710static void init_object(struct kmem_cache *s, void *object, u8 val)
711{
712 u8 *p = object;
713
714 if (s->flags & SLAB_RED_ZONE)
715 memset(p - s->red_left_pad, val, s->red_left_pad);
716
717 if (s->flags & __OBJECT_POISON) {
718 memset(p, POISON_FREE, s->object_size - 1);
719 p[s->object_size - 1] = POISON_END;
720 }
721
722 if (s->flags & SLAB_RED_ZONE)
723 memset(p + s->object_size, val, s->inuse - s->object_size);
724}
725
726static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
727 void *from, void *to)
728{
729 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
730 memset(from, data, to - from);
731}
732
733static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
734 u8 *object, char *what,
735 u8 *start, unsigned int value, unsigned int bytes)
736{
737 u8 *fault;
738 u8 *end;
739
740 metadata_access_enable();
741 fault = memchr_inv(start, value, bytes);
742 metadata_access_disable();
743 if (!fault)
744 return 1;
745
746 end = start + bytes;
747 while (end > fault && end[-1] == value)
748 end--;
749
750 slab_bug(s, "%s overwritten", what);
751 pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
752 fault, end - 1, fault[0], value);
753 print_trailer(s, page, object);
754
755 restore_bytes(s, what, value, fault, end);
756 return 0;
757}
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
798{
799 unsigned long off = s->inuse;
800
801 if (s->offset)
802
803 off += sizeof(void *);
804
805 if (s->flags & SLAB_STORE_USER)
806
807 off += 2 * sizeof(struct track);
808
809 off += kasan_metadata_size(s);
810
811 if (size_from_object(s) == off)
812 return 1;
813
814 return check_bytes_and_report(s, page, p, "Object padding",
815 p + off, POISON_INUSE, size_from_object(s) - off);
816}
817
818
819static int slab_pad_check(struct kmem_cache *s, struct page *page)
820{
821 u8 *start;
822 u8 *fault;
823 u8 *end;
824 u8 *pad;
825 int length;
826 int remainder;
827
828 if (!(s->flags & SLAB_POISON))
829 return 1;
830
831 start = page_address(page);
832 length = PAGE_SIZE << compound_order(page);
833 end = start + length;
834 remainder = length % s->size;
835 if (!remainder)
836 return 1;
837
838 pad = end - remainder;
839 metadata_access_enable();
840 fault = memchr_inv(pad, POISON_INUSE, remainder);
841 metadata_access_disable();
842 if (!fault)
843 return 1;
844 while (end > fault && end[-1] == POISON_INUSE)
845 end--;
846
847 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
848 print_section(KERN_ERR, "Padding ", pad, remainder);
849
850 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
851 return 0;
852}
853
854static int check_object(struct kmem_cache *s, struct page *page,
855 void *object, u8 val)
856{
857 u8 *p = object;
858 u8 *endobject = object + s->object_size;
859
860 if (s->flags & SLAB_RED_ZONE) {
861 if (!check_bytes_and_report(s, page, object, "Redzone",
862 object - s->red_left_pad, val, s->red_left_pad))
863 return 0;
864
865 if (!check_bytes_and_report(s, page, object, "Redzone",
866 endobject, val, s->inuse - s->object_size))
867 return 0;
868 } else {
869 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
870 check_bytes_and_report(s, page, p, "Alignment padding",
871 endobject, POISON_INUSE,
872 s->inuse - s->object_size);
873 }
874 }
875
876 if (s->flags & SLAB_POISON) {
877 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
878 (!check_bytes_and_report(s, page, p, "Poison", p,
879 POISON_FREE, s->object_size - 1) ||
880 !check_bytes_and_report(s, page, p, "Poison",
881 p + s->object_size - 1, POISON_END, 1)))
882 return 0;
883
884
885
886 check_pad_bytes(s, page, p);
887 }
888
889 if (!s->offset && val == SLUB_RED_ACTIVE)
890
891
892
893
894 return 1;
895
896
897 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
898 object_err(s, page, p, "Freepointer corrupt");
899
900
901
902
903
904 set_freepointer(s, p, NULL);
905 return 0;
906 }
907 return 1;
908}
909
910static int check_slab(struct kmem_cache *s, struct page *page)
911{
912 int maxobj;
913
914 VM_BUG_ON(!irqs_disabled());
915
916 if (!PageSlab(page)) {
917 slab_err(s, page, "Not a valid slab page");
918 return 0;
919 }
920
921 maxobj = order_objects(compound_order(page), s->size);
922 if (page->objects > maxobj) {
923 slab_err(s, page, "objects %u > max %u",
924 page->objects, maxobj);
925 return 0;
926 }
927 if (page->inuse > page->objects) {
928 slab_err(s, page, "inuse %u > max %u",
929 page->inuse, page->objects);
930 return 0;
931 }
932
933 slab_pad_check(s, page);
934 return 1;
935}
936
937
938
939
940
941static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
942{
943 int nr = 0;
944 void *fp;
945 void *object = NULL;
946 int max_objects;
947
948 fp = page->freelist;
949 while (fp && nr <= page->objects) {
950 if (fp == search)
951 return 1;
952 if (!check_valid_pointer(s, page, fp)) {
953 if (object) {
954 object_err(s, page, object,
955 "Freechain corrupt");
956 set_freepointer(s, object, NULL);
957 } else {
958 slab_err(s, page, "Freepointer corrupt");
959 page->freelist = NULL;
960 page->inuse = page->objects;
961 slab_fix(s, "Freelist cleared");
962 return 0;
963 }
964 break;
965 }
966 object = fp;
967 fp = get_freepointer(s, object);
968 nr++;
969 }
970
971 max_objects = order_objects(compound_order(page), s->size);
972 if (max_objects > MAX_OBJS_PER_PAGE)
973 max_objects = MAX_OBJS_PER_PAGE;
974
975 if (page->objects != max_objects) {
976 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
977 page->objects, max_objects);
978 page->objects = max_objects;
979 slab_fix(s, "Number of objects adjusted.");
980 }
981 if (page->inuse != page->objects - nr) {
982 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
983 page->inuse, page->objects - nr);
984 page->inuse = page->objects - nr;
985 slab_fix(s, "Object count adjusted.");
986 }
987 return search == NULL;
988}
989
990static void trace(struct kmem_cache *s, struct page *page, void *object,
991 int alloc)
992{
993 if (s->flags & SLAB_TRACE) {
994 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
995 s->name,
996 alloc ? "alloc" : "free",
997 object, page->inuse,
998 page->freelist);
999
1000 if (!alloc)
1001 print_section(KERN_INFO, "Object ", (void *)object,
1002 s->object_size);
1003
1004 dump_stack();
1005 }
1006}
1007
1008
1009
1010
1011static void add_full(struct kmem_cache *s,
1012 struct kmem_cache_node *n, struct page *page)
1013{
1014 if (!(s->flags & SLAB_STORE_USER))
1015 return;
1016
1017 lockdep_assert_held(&n->list_lock);
1018 list_add(&page->slab_list, &n->full);
1019}
1020
1021static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1022{
1023 if (!(s->flags & SLAB_STORE_USER))
1024 return;
1025
1026 lockdep_assert_held(&n->list_lock);
1027 list_del(&page->slab_list);
1028}
1029
1030
1031static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1032{
1033 struct kmem_cache_node *n = get_node(s, node);
1034
1035 return atomic_long_read(&n->nr_slabs);
1036}
1037
1038static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1039{
1040 return atomic_long_read(&n->nr_slabs);
1041}
1042
1043static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1044{
1045 struct kmem_cache_node *n = get_node(s, node);
1046
1047
1048
1049
1050
1051
1052
1053 if (likely(n)) {
1054 atomic_long_inc(&n->nr_slabs);
1055 atomic_long_add(objects, &n->total_objects);
1056 }
1057}
1058static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1059{
1060 struct kmem_cache_node *n = get_node(s, node);
1061
1062 atomic_long_dec(&n->nr_slabs);
1063 atomic_long_sub(objects, &n->total_objects);
1064}
1065
1066
1067static void setup_object_debug(struct kmem_cache *s, struct page *page,
1068 void *object)
1069{
1070 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1071 return;
1072
1073 init_object(s, object, SLUB_RED_INACTIVE);
1074 init_tracking(s, object);
1075}
1076
1077static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
1078{
1079 if (!(s->flags & SLAB_POISON))
1080 return;
1081
1082 metadata_access_enable();
1083 memset(addr, POISON_INUSE, PAGE_SIZE << order);
1084 metadata_access_disable();
1085}
1086
1087static inline int alloc_consistency_checks(struct kmem_cache *s,
1088 struct page *page, void *object)
1089{
1090 if (!check_slab(s, page))
1091 return 0;
1092
1093 if (!check_valid_pointer(s, page, object)) {
1094 object_err(s, page, object, "Freelist Pointer check fails");
1095 return 0;
1096 }
1097
1098 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1099 return 0;
1100
1101 return 1;
1102}
1103
1104static noinline int alloc_debug_processing(struct kmem_cache *s,
1105 struct page *page,
1106 void *object, unsigned long addr)
1107{
1108 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1109 if (!alloc_consistency_checks(s, page, object))
1110 goto bad;
1111 }
1112
1113
1114 if (s->flags & SLAB_STORE_USER)
1115 set_track(s, object, TRACK_ALLOC, addr);
1116 trace(s, page, object, 1);
1117 init_object(s, object, SLUB_RED_ACTIVE);
1118 return 1;
1119
1120bad:
1121 if (PageSlab(page)) {
1122
1123
1124
1125
1126
1127 slab_fix(s, "Marking all objects used");
1128 page->inuse = page->objects;
1129 page->freelist = NULL;
1130 }
1131 return 0;
1132}
1133
1134static inline int free_consistency_checks(struct kmem_cache *s,
1135 struct page *page, void *object, unsigned long addr)
1136{
1137 if (!check_valid_pointer(s, page, object)) {
1138 slab_err(s, page, "Invalid object pointer 0x%p", object);
1139 return 0;
1140 }
1141
1142 if (on_freelist(s, page, object)) {
1143 object_err(s, page, object, "Object already free");
1144 return 0;
1145 }
1146
1147 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1148 return 0;
1149
1150 if (unlikely(s != page->slab_cache)) {
1151 if (!PageSlab(page)) {
1152 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1153 object);
1154 } else if (!page->slab_cache) {
1155 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1156 object);
1157 dump_stack();
1158 } else
1159 object_err(s, page, object,
1160 "page slab pointer corrupt.");
1161 return 0;
1162 }
1163 return 1;
1164}
1165
1166
1167static noinline int free_debug_processing(
1168 struct kmem_cache *s, struct page *page,
1169 void *head, void *tail, int bulk_cnt,
1170 unsigned long addr)
1171{
1172 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1173 void *object = head;
1174 int cnt = 0;
1175 unsigned long uninitialized_var(flags);
1176 int ret = 0;
1177
1178 spin_lock_irqsave(&n->list_lock, flags);
1179 slab_lock(page);
1180
1181 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1182 if (!check_slab(s, page))
1183 goto out;
1184 }
1185
1186next_object:
1187 cnt++;
1188
1189 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1190 if (!free_consistency_checks(s, page, object, addr))
1191 goto out;
1192 }
1193
1194 if (s->flags & SLAB_STORE_USER)
1195 set_track(s, object, TRACK_FREE, addr);
1196 trace(s, page, object, 0);
1197
1198 init_object(s, object, SLUB_RED_INACTIVE);
1199
1200
1201 if (object != tail) {
1202 object = get_freepointer(s, object);
1203 goto next_object;
1204 }
1205 ret = 1;
1206
1207out:
1208 if (cnt != bulk_cnt)
1209 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1210 bulk_cnt, cnt);
1211
1212 slab_unlock(page);
1213 spin_unlock_irqrestore(&n->list_lock, flags);
1214 if (!ret)
1215 slab_fix(s, "Object at 0x%p not freed", object);
1216 return ret;
1217}
1218
1219static int __init setup_slub_debug(char *str)
1220{
1221 slub_debug = DEBUG_DEFAULT_FLAGS;
1222 if (*str++ != '=' || !*str)
1223
1224
1225
1226 goto out;
1227
1228 if (*str == ',')
1229
1230
1231
1232
1233 goto check_slabs;
1234
1235 slub_debug = 0;
1236 if (*str == '-')
1237
1238
1239
1240 goto out;
1241
1242
1243
1244
1245 for (; *str && *str != ','; str++) {
1246 switch (tolower(*str)) {
1247 case 'f':
1248 slub_debug |= SLAB_CONSISTENCY_CHECKS;
1249 break;
1250 case 'z':
1251 slub_debug |= SLAB_RED_ZONE;
1252 break;
1253 case 'p':
1254 slub_debug |= SLAB_POISON;
1255 break;
1256 case 'u':
1257 slub_debug |= SLAB_STORE_USER;
1258 break;
1259 case 't':
1260 slub_debug |= SLAB_TRACE;
1261 break;
1262 case 'a':
1263 slub_debug |= SLAB_FAILSLAB;
1264 break;
1265 case 'o':
1266
1267
1268
1269
1270 disable_higher_order_debug = 1;
1271 break;
1272 default:
1273 pr_err("slub_debug option '%c' unknown. skipped\n",
1274 *str);
1275 }
1276 }
1277
1278check_slabs:
1279 if (*str == ',')
1280 slub_debug_slabs = str + 1;
1281out:
1282 if ((static_branch_unlikely(&init_on_alloc) ||
1283 static_branch_unlikely(&init_on_free)) &&
1284 (slub_debug & SLAB_POISON))
1285 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1286 return 1;
1287}
1288
1289__setup("slub_debug", setup_slub_debug);
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303slab_flags_t kmem_cache_flags(unsigned int object_size,
1304 slab_flags_t flags, const char *name,
1305 void (*ctor)(void *))
1306{
1307 char *iter;
1308 size_t len;
1309
1310
1311 if (!slub_debug_slabs)
1312 return flags | slub_debug;
1313
1314 len = strlen(name);
1315 iter = slub_debug_slabs;
1316 while (*iter) {
1317 char *end, *glob;
1318 size_t cmplen;
1319
1320 end = strchrnul(iter, ',');
1321
1322 glob = strnchr(iter, end - iter, '*');
1323 if (glob)
1324 cmplen = glob - iter;
1325 else
1326 cmplen = max_t(size_t, len, (end - iter));
1327
1328 if (!strncmp(name, iter, cmplen)) {
1329 flags |= slub_debug;
1330 break;
1331 }
1332
1333 if (!*end)
1334 break;
1335 iter = end + 1;
1336 }
1337
1338 return flags;
1339}
1340#else
1341static inline void setup_object_debug(struct kmem_cache *s,
1342 struct page *page, void *object) {}
1343static inline void setup_page_debug(struct kmem_cache *s,
1344 void *addr, int order) {}
1345
1346static inline int alloc_debug_processing(struct kmem_cache *s,
1347 struct page *page, void *object, unsigned long addr) { return 0; }
1348
1349static inline int free_debug_processing(
1350 struct kmem_cache *s, struct page *page,
1351 void *head, void *tail, int bulk_cnt,
1352 unsigned long addr) { return 0; }
1353
1354static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1355 { return 1; }
1356static inline int check_object(struct kmem_cache *s, struct page *page,
1357 void *object, u8 val) { return 1; }
1358static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1359 struct page *page) {}
1360static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1361 struct page *page) {}
1362slab_flags_t kmem_cache_flags(unsigned int object_size,
1363 slab_flags_t flags, const char *name,
1364 void (*ctor)(void *))
1365{
1366 return flags;
1367}
1368#define slub_debug 0
1369
1370#define disable_higher_order_debug 0
1371
1372static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1373 { return 0; }
1374static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1375 { return 0; }
1376static inline void inc_slabs_node(struct kmem_cache *s, int node,
1377 int objects) {}
1378static inline void dec_slabs_node(struct kmem_cache *s, int node,
1379 int objects) {}
1380
1381#endif
1382
1383
1384
1385
1386
1387static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1388{
1389 ptr = kasan_kmalloc_large(ptr, size, flags);
1390
1391 kmemleak_alloc(ptr, size, 1, flags);
1392 return ptr;
1393}
1394
1395static __always_inline void kfree_hook(void *x)
1396{
1397 kmemleak_free(x);
1398 kasan_kfree_large(x, _RET_IP_);
1399}
1400
1401static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1402{
1403 kmemleak_free_recursive(x, s->flags);
1404
1405
1406
1407
1408
1409
1410#ifdef CONFIG_LOCKDEP
1411 {
1412 unsigned long flags;
1413
1414 local_irq_save(flags);
1415 debug_check_no_locks_freed(x, s->object_size);
1416 local_irq_restore(flags);
1417 }
1418#endif
1419 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1420 debug_check_no_obj_freed(x, s->object_size);
1421
1422
1423 return kasan_slab_free(s, x, _RET_IP_);
1424}
1425
1426static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1427 void **head, void **tail)
1428{
1429
1430 void *object;
1431 void *next = *head;
1432 void *old_tail = *tail ? *tail : *head;
1433 int rsize;
1434
1435 if (slab_want_init_on_free(s)) {
1436 void *p = NULL;
1437
1438 do {
1439 object = next;
1440 next = get_freepointer(s, object);
1441
1442
1443
1444
1445 memset(object, 0, s->object_size);
1446 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad
1447 : 0;
1448 memset((char *)object + s->inuse, 0,
1449 s->size - s->inuse - rsize);
1450 set_freepointer(s, object, p);
1451 p = object;
1452 } while (object != old_tail);
1453 }
1454
1455
1456
1457
1458
1459#if defined(CONFIG_LOCKDEP) || \
1460 defined(CONFIG_DEBUG_KMEMLEAK) || \
1461 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1462 defined(CONFIG_KASAN)
1463
1464 next = *head;
1465
1466
1467 *head = NULL;
1468 *tail = NULL;
1469
1470 do {
1471 object = next;
1472 next = get_freepointer(s, object);
1473
1474 if (!slab_free_hook(s, object)) {
1475
1476 set_freepointer(s, object, *head);
1477 *head = object;
1478 if (!*tail)
1479 *tail = object;
1480 }
1481 } while (object != old_tail);
1482
1483 if (*head == *tail)
1484 *tail = NULL;
1485
1486 return *head != NULL;
1487#else
1488 return true;
1489#endif
1490}
1491
1492static void *setup_object(struct kmem_cache *s, struct page *page,
1493 void *object)
1494{
1495 setup_object_debug(s, page, object);
1496 object = kasan_init_slab_obj(s, object);
1497 if (unlikely(s->ctor)) {
1498 kasan_unpoison_object_data(s, object);
1499 s->ctor(object);
1500 kasan_poison_object_data(s, object);
1501 }
1502 return object;
1503}
1504
1505
1506
1507
1508static inline struct page *alloc_slab_page(struct kmem_cache *s,
1509 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1510{
1511 struct page *page;
1512 unsigned int order = oo_order(oo);
1513
1514 if (node == NUMA_NO_NODE)
1515 page = alloc_pages(flags, order);
1516 else
1517 page = __alloc_pages_node(node, flags, order);
1518
1519 if (page && charge_slab_page(page, flags, order, s)) {
1520 __free_pages(page, order);
1521 page = NULL;
1522 }
1523
1524 return page;
1525}
1526
1527#ifdef CONFIG_SLAB_FREELIST_RANDOM
1528
1529static int init_cache_random_seq(struct kmem_cache *s)
1530{
1531 unsigned int count = oo_objects(s->oo);
1532 int err;
1533
1534
1535 if (s->random_seq)
1536 return 0;
1537
1538 err = cache_random_seq_create(s, count, GFP_KERNEL);
1539 if (err) {
1540 pr_err("SLUB: Unable to initialize free list for %s\n",
1541 s->name);
1542 return err;
1543 }
1544
1545
1546 if (s->random_seq) {
1547 unsigned int i;
1548
1549 for (i = 0; i < count; i++)
1550 s->random_seq[i] *= s->size;
1551 }
1552 return 0;
1553}
1554
1555
1556static void __init init_freelist_randomization(void)
1557{
1558 struct kmem_cache *s;
1559
1560 mutex_lock(&slab_mutex);
1561
1562 list_for_each_entry(s, &slab_caches, list)
1563 init_cache_random_seq(s);
1564
1565 mutex_unlock(&slab_mutex);
1566}
1567
1568
1569static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1570 unsigned long *pos, void *start,
1571 unsigned long page_limit,
1572 unsigned long freelist_count)
1573{
1574 unsigned int idx;
1575
1576
1577
1578
1579
1580 do {
1581 idx = s->random_seq[*pos];
1582 *pos += 1;
1583 if (*pos >= freelist_count)
1584 *pos = 0;
1585 } while (unlikely(idx >= page_limit));
1586
1587 return (char *)start + idx;
1588}
1589
1590
1591static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1592{
1593 void *start;
1594 void *cur;
1595 void *next;
1596 unsigned long idx, pos, page_limit, freelist_count;
1597
1598 if (page->objects < 2 || !s->random_seq)
1599 return false;
1600
1601 freelist_count = oo_objects(s->oo);
1602 pos = get_random_int() % freelist_count;
1603
1604 page_limit = page->objects * s->size;
1605 start = fixup_red_left(s, page_address(page));
1606
1607
1608 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1609 freelist_count);
1610 cur = setup_object(s, page, cur);
1611 page->freelist = cur;
1612
1613 for (idx = 1; idx < page->objects; idx++) {
1614 next = next_freelist_entry(s, page, &pos, start, page_limit,
1615 freelist_count);
1616 next = setup_object(s, page, next);
1617 set_freepointer(s, cur, next);
1618 cur = next;
1619 }
1620 set_freepointer(s, cur, NULL);
1621
1622 return true;
1623}
1624#else
1625static inline int init_cache_random_seq(struct kmem_cache *s)
1626{
1627 return 0;
1628}
1629static inline void init_freelist_randomization(void) { }
1630static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1631{
1632 return false;
1633}
1634#endif
1635
1636static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1637{
1638 struct page *page;
1639 struct kmem_cache_order_objects oo = s->oo;
1640 gfp_t alloc_gfp;
1641 void *start, *p, *next;
1642 int idx, order;
1643 bool shuffle;
1644
1645 flags &= gfp_allowed_mask;
1646
1647 if (gfpflags_allow_blocking(flags))
1648 local_irq_enable();
1649
1650 flags |= s->allocflags;
1651
1652
1653
1654
1655
1656 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1657 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1658 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1659
1660 page = alloc_slab_page(s, alloc_gfp, node, oo);
1661 if (unlikely(!page)) {
1662 oo = s->min;
1663 alloc_gfp = flags;
1664
1665
1666
1667
1668 page = alloc_slab_page(s, alloc_gfp, node, oo);
1669 if (unlikely(!page))
1670 goto out;
1671 stat(s, ORDER_FALLBACK);
1672 }
1673
1674 page->objects = oo_objects(oo);
1675
1676 order = compound_order(page);
1677 page->slab_cache = s;
1678 __SetPageSlab(page);
1679 if (page_is_pfmemalloc(page))
1680 SetPageSlabPfmemalloc(page);
1681
1682 kasan_poison_slab(page);
1683
1684 start = page_address(page);
1685
1686 setup_page_debug(s, start, order);
1687
1688 shuffle = shuffle_freelist(s, page);
1689
1690 if (!shuffle) {
1691 start = fixup_red_left(s, start);
1692 start = setup_object(s, page, start);
1693 page->freelist = start;
1694 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1695 next = p + s->size;
1696 next = setup_object(s, page, next);
1697 set_freepointer(s, p, next);
1698 p = next;
1699 }
1700 set_freepointer(s, p, NULL);
1701 }
1702
1703 page->inuse = page->objects;
1704 page->frozen = 1;
1705
1706out:
1707 if (gfpflags_allow_blocking(flags))
1708 local_irq_disable();
1709 if (!page)
1710 return NULL;
1711
1712 inc_slabs_node(s, page_to_nid(page), page->objects);
1713
1714 return page;
1715}
1716
1717static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1718{
1719 if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1720 gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1721 flags &= ~GFP_SLAB_BUG_MASK;
1722 pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1723 invalid_mask, &invalid_mask, flags, &flags);
1724 dump_stack();
1725 }
1726
1727 return allocate_slab(s,
1728 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1729}
1730
1731static void __free_slab(struct kmem_cache *s, struct page *page)
1732{
1733 int order = compound_order(page);
1734 int pages = 1 << order;
1735
1736 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1737 void *p;
1738
1739 slab_pad_check(s, page);
1740 for_each_object(p, s, page_address(page),
1741 page->objects)
1742 check_object(s, page, p, SLUB_RED_INACTIVE);
1743 }
1744
1745 __ClearPageSlabPfmemalloc(page);
1746 __ClearPageSlab(page);
1747
1748 page->mapping = NULL;
1749 if (current->reclaim_state)
1750 current->reclaim_state->reclaimed_slab += pages;
1751 uncharge_slab_page(page, order, s);
1752 __free_pages(page, order);
1753}
1754
1755static void rcu_free_slab(struct rcu_head *h)
1756{
1757 struct page *page = container_of(h, struct page, rcu_head);
1758
1759 __free_slab(page->slab_cache, page);
1760}
1761
1762static void free_slab(struct kmem_cache *s, struct page *page)
1763{
1764 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1765 call_rcu(&page->rcu_head, rcu_free_slab);
1766 } else
1767 __free_slab(s, page);
1768}
1769
1770static void discard_slab(struct kmem_cache *s, struct page *page)
1771{
1772 dec_slabs_node(s, page_to_nid(page), page->objects);
1773 free_slab(s, page);
1774}
1775
1776
1777
1778
1779static inline void
1780__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1781{
1782 n->nr_partial++;
1783 if (tail == DEACTIVATE_TO_TAIL)
1784 list_add_tail(&page->slab_list, &n->partial);
1785 else
1786 list_add(&page->slab_list, &n->partial);
1787}
1788
1789static inline void add_partial(struct kmem_cache_node *n,
1790 struct page *page, int tail)
1791{
1792 lockdep_assert_held(&n->list_lock);
1793 __add_partial(n, page, tail);
1794}
1795
1796static inline void remove_partial(struct kmem_cache_node *n,
1797 struct page *page)
1798{
1799 lockdep_assert_held(&n->list_lock);
1800 list_del(&page->slab_list);
1801 n->nr_partial--;
1802}
1803
1804
1805
1806
1807
1808
1809
1810static inline void *acquire_slab(struct kmem_cache *s,
1811 struct kmem_cache_node *n, struct page *page,
1812 int mode, int *objects)
1813{
1814 void *freelist;
1815 unsigned long counters;
1816 struct page new;
1817
1818 lockdep_assert_held(&n->list_lock);
1819
1820
1821
1822
1823
1824
1825 freelist = page->freelist;
1826 counters = page->counters;
1827 new.counters = counters;
1828 *objects = new.objects - new.inuse;
1829 if (mode) {
1830 new.inuse = page->objects;
1831 new.freelist = NULL;
1832 } else {
1833 new.freelist = freelist;
1834 }
1835
1836 VM_BUG_ON(new.frozen);
1837 new.frozen = 1;
1838
1839 if (!__cmpxchg_double_slab(s, page,
1840 freelist, counters,
1841 new.freelist, new.counters,
1842 "acquire_slab"))
1843 return NULL;
1844
1845 remove_partial(n, page);
1846 WARN_ON(!freelist);
1847 return freelist;
1848}
1849
1850static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1851static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1852
1853
1854
1855
1856static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1857 struct kmem_cache_cpu *c, gfp_t flags)
1858{
1859 struct page *page, *page2;
1860 void *object = NULL;
1861 unsigned int available = 0;
1862 int objects;
1863
1864
1865
1866
1867
1868
1869
1870 if (!n || !n->nr_partial)
1871 return NULL;
1872
1873 spin_lock(&n->list_lock);
1874 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1875 void *t;
1876
1877 if (!pfmemalloc_match(page, flags))
1878 continue;
1879
1880 t = acquire_slab(s, n, page, object == NULL, &objects);
1881 if (!t)
1882 break;
1883
1884 available += objects;
1885 if (!object) {
1886 c->page = page;
1887 stat(s, ALLOC_FROM_PARTIAL);
1888 object = t;
1889 } else {
1890 put_cpu_partial(s, page, 0);
1891 stat(s, CPU_PARTIAL_NODE);
1892 }
1893 if (!kmem_cache_has_cpu_partial(s)
1894 || available > slub_cpu_partial(s) / 2)
1895 break;
1896
1897 }
1898 spin_unlock(&n->list_lock);
1899 return object;
1900}
1901
1902
1903
1904
1905static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1906 struct kmem_cache_cpu *c)
1907{
1908#ifdef CONFIG_NUMA
1909 struct zonelist *zonelist;
1910 struct zoneref *z;
1911 struct zone *zone;
1912 enum zone_type high_zoneidx = gfp_zone(flags);
1913 void *object;
1914 unsigned int cpuset_mems_cookie;
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934 if (!s->remote_node_defrag_ratio ||
1935 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1936 return NULL;
1937
1938 do {
1939 cpuset_mems_cookie = read_mems_allowed_begin();
1940 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1941 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1942 struct kmem_cache_node *n;
1943
1944 n = get_node(s, zone_to_nid(zone));
1945
1946 if (n && cpuset_zone_allowed(zone, flags) &&
1947 n->nr_partial > s->min_partial) {
1948 object = get_partial_node(s, n, c, flags);
1949 if (object) {
1950
1951
1952
1953
1954
1955
1956
1957 return object;
1958 }
1959 }
1960 }
1961 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1962#endif
1963 return NULL;
1964}
1965
1966
1967
1968
1969static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1970 struct kmem_cache_cpu *c)
1971{
1972 void *object;
1973 int searchnode = node;
1974
1975 if (node == NUMA_NO_NODE)
1976 searchnode = numa_mem_id();
1977 else if (!node_present_pages(node))
1978 searchnode = node_to_mem_node(node);
1979
1980 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1981 if (object || node != NUMA_NO_NODE)
1982 return object;
1983
1984 return get_any_partial(s, flags, c);
1985}
1986
1987#ifdef CONFIG_PREEMPT
1988
1989
1990
1991
1992
1993#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1994#else
1995
1996
1997
1998
1999#define TID_STEP 1
2000#endif
2001
2002static inline unsigned long next_tid(unsigned long tid)
2003{
2004 return tid + TID_STEP;
2005}
2006
2007static inline unsigned int tid_to_cpu(unsigned long tid)
2008{
2009 return tid % TID_STEP;
2010}
2011
2012static inline unsigned long tid_to_event(unsigned long tid)
2013{
2014 return tid / TID_STEP;
2015}
2016
2017static inline unsigned int init_tid(int cpu)
2018{
2019 return cpu;
2020}
2021
2022static inline void note_cmpxchg_failure(const char *n,
2023 const struct kmem_cache *s, unsigned long tid)
2024{
2025#ifdef SLUB_DEBUG_CMPXCHG
2026 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2027
2028 pr_info("%s %s: cmpxchg redo ", n, s->name);
2029
2030#ifdef CONFIG_PREEMPT
2031 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2032 pr_warn("due to cpu change %d -> %d\n",
2033 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2034 else
2035#endif
2036 if (tid_to_event(tid) != tid_to_event(actual_tid))
2037 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2038 tid_to_event(tid), tid_to_event(actual_tid));
2039 else
2040 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2041 actual_tid, tid, next_tid(tid));
2042#endif
2043 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2044}
2045
2046static void init_kmem_cache_cpus(struct kmem_cache *s)
2047{
2048 int cpu;
2049
2050 for_each_possible_cpu(cpu)
2051 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2052}
2053
2054
2055
2056
2057static void deactivate_slab(struct kmem_cache *s, struct page *page,
2058 void *freelist, struct kmem_cache_cpu *c)
2059{
2060 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2061 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2062 int lock = 0;
2063 enum slab_modes l = M_NONE, m = M_NONE;
2064 void *nextfree;
2065 int tail = DEACTIVATE_TO_HEAD;
2066 struct page new;
2067 struct page old;
2068
2069 if (page->freelist) {
2070 stat(s, DEACTIVATE_REMOTE_FREES);
2071 tail = DEACTIVATE_TO_TAIL;
2072 }
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082 while (freelist && (nextfree = get_freepointer(s, freelist))) {
2083 void *prior;
2084 unsigned long counters;
2085
2086 do {
2087 prior = page->freelist;
2088 counters = page->counters;
2089 set_freepointer(s, freelist, prior);
2090 new.counters = counters;
2091 new.inuse--;
2092 VM_BUG_ON(!new.frozen);
2093
2094 } while (!__cmpxchg_double_slab(s, page,
2095 prior, counters,
2096 freelist, new.counters,
2097 "drain percpu freelist"));
2098
2099 freelist = nextfree;
2100 }
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116redo:
2117
2118 old.freelist = page->freelist;
2119 old.counters = page->counters;
2120 VM_BUG_ON(!old.frozen);
2121
2122
2123 new.counters = old.counters;
2124 if (freelist) {
2125 new.inuse--;
2126 set_freepointer(s, freelist, old.freelist);
2127 new.freelist = freelist;
2128 } else
2129 new.freelist = old.freelist;
2130
2131 new.frozen = 0;
2132
2133 if (!new.inuse && n->nr_partial >= s->min_partial)
2134 m = M_FREE;
2135 else if (new.freelist) {
2136 m = M_PARTIAL;
2137 if (!lock) {
2138 lock = 1;
2139
2140
2141
2142
2143
2144 spin_lock(&n->list_lock);
2145 }
2146 } else {
2147 m = M_FULL;
2148 if (kmem_cache_debug(s) && !lock) {
2149 lock = 1;
2150
2151
2152
2153
2154
2155 spin_lock(&n->list_lock);
2156 }
2157 }
2158
2159 if (l != m) {
2160 if (l == M_PARTIAL)
2161 remove_partial(n, page);
2162 else if (l == M_FULL)
2163 remove_full(s, n, page);
2164
2165 if (m == M_PARTIAL)
2166 add_partial(n, page, tail);
2167 else if (m == M_FULL)
2168 add_full(s, n, page);
2169 }
2170
2171 l = m;
2172 if (!__cmpxchg_double_slab(s, page,
2173 old.freelist, old.counters,
2174 new.freelist, new.counters,
2175 "unfreezing slab"))
2176 goto redo;
2177
2178 if (lock)
2179 spin_unlock(&n->list_lock);
2180
2181 if (m == M_PARTIAL)
2182 stat(s, tail);
2183 else if (m == M_FULL)
2184 stat(s, DEACTIVATE_FULL);
2185 else if (m == M_FREE) {
2186 stat(s, DEACTIVATE_EMPTY);
2187 discard_slab(s, page);
2188 stat(s, FREE_SLAB);
2189 }
2190
2191 c->page = NULL;
2192 c->freelist = NULL;
2193}
2194
2195
2196
2197
2198
2199
2200
2201
2202static void unfreeze_partials(struct kmem_cache *s,
2203 struct kmem_cache_cpu *c)
2204{
2205#ifdef CONFIG_SLUB_CPU_PARTIAL
2206 struct kmem_cache_node *n = NULL, *n2 = NULL;
2207 struct page *page, *discard_page = NULL;
2208
2209 while ((page = c->partial)) {
2210 struct page new;
2211 struct page old;
2212
2213 c->partial = page->next;
2214
2215 n2 = get_node(s, page_to_nid(page));
2216 if (n != n2) {
2217 if (n)
2218 spin_unlock(&n->list_lock);
2219
2220 n = n2;
2221 spin_lock(&n->list_lock);
2222 }
2223
2224 do {
2225
2226 old.freelist = page->freelist;
2227 old.counters = page->counters;
2228 VM_BUG_ON(!old.frozen);
2229
2230 new.counters = old.counters;
2231 new.freelist = old.freelist;
2232
2233 new.frozen = 0;
2234
2235 } while (!__cmpxchg_double_slab(s, page,
2236 old.freelist, old.counters,
2237 new.freelist, new.counters,
2238 "unfreezing slab"));
2239
2240 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2241 page->next = discard_page;
2242 discard_page = page;
2243 } else {
2244 add_partial(n, page, DEACTIVATE_TO_TAIL);
2245 stat(s, FREE_ADD_PARTIAL);
2246 }
2247 }
2248
2249 if (n)
2250 spin_unlock(&n->list_lock);
2251
2252 while (discard_page) {
2253 page = discard_page;
2254 discard_page = discard_page->next;
2255
2256 stat(s, DEACTIVATE_EMPTY);
2257 discard_slab(s, page);
2258 stat(s, FREE_SLAB);
2259 }
2260#endif
2261}
2262
2263
2264
2265
2266
2267
2268
2269
2270static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2271{
2272#ifdef CONFIG_SLUB_CPU_PARTIAL
2273 struct page *oldpage;
2274 int pages;
2275 int pobjects;
2276
2277 preempt_disable();
2278 do {
2279 pages = 0;
2280 pobjects = 0;
2281 oldpage = this_cpu_read(s->cpu_slab->partial);
2282
2283 if (oldpage) {
2284 pobjects = oldpage->pobjects;
2285 pages = oldpage->pages;
2286 if (drain && pobjects > s->cpu_partial) {
2287 unsigned long flags;
2288
2289
2290
2291
2292 local_irq_save(flags);
2293 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2294 local_irq_restore(flags);
2295 oldpage = NULL;
2296 pobjects = 0;
2297 pages = 0;
2298 stat(s, CPU_PARTIAL_DRAIN);
2299 }
2300 }
2301
2302 pages++;
2303 pobjects += page->objects - page->inuse;
2304
2305 page->pages = pages;
2306 page->pobjects = pobjects;
2307 page->next = oldpage;
2308
2309 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2310 != oldpage);
2311 if (unlikely(!s->cpu_partial)) {
2312 unsigned long flags;
2313
2314 local_irq_save(flags);
2315 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2316 local_irq_restore(flags);
2317 }
2318 preempt_enable();
2319#endif
2320}
2321
2322static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2323{
2324 stat(s, CPUSLAB_FLUSH);
2325 deactivate_slab(s, c->page, c->freelist, c);
2326
2327 c->tid = next_tid(c->tid);
2328}
2329
2330
2331
2332
2333
2334
2335static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2336{
2337 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2338
2339 if (c->page)
2340 flush_slab(s, c);
2341
2342 unfreeze_partials(s, c);
2343}
2344
2345static void flush_cpu_slab(void *d)
2346{
2347 struct kmem_cache *s = d;
2348
2349 __flush_cpu_slab(s, smp_processor_id());
2350}
2351
2352static bool has_cpu_slab(int cpu, void *info)
2353{
2354 struct kmem_cache *s = info;
2355 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2356
2357 return c->page || slub_percpu_partial(c);
2358}
2359
2360static void flush_all(struct kmem_cache *s)
2361{
2362 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2363}
2364
2365
2366
2367
2368
2369static int slub_cpu_dead(unsigned int cpu)
2370{
2371 struct kmem_cache *s;
2372 unsigned long flags;
2373
2374 mutex_lock(&slab_mutex);
2375 list_for_each_entry(s, &slab_caches, list) {
2376 local_irq_save(flags);
2377 __flush_cpu_slab(s, cpu);
2378 local_irq_restore(flags);
2379 }
2380 mutex_unlock(&slab_mutex);
2381 return 0;
2382}
2383
2384
2385
2386
2387
2388static inline int node_match(struct page *page, int node)
2389{
2390#ifdef CONFIG_NUMA
2391 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2392 return 0;
2393#endif
2394 return 1;
2395}
2396
2397#ifdef CONFIG_SLUB_DEBUG
2398static int count_free(struct page *page)
2399{
2400 return page->objects - page->inuse;
2401}
2402
2403static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2404{
2405 return atomic_long_read(&n->total_objects);
2406}
2407#endif
2408
2409#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2410static unsigned long count_partial(struct kmem_cache_node *n,
2411 int (*get_count)(struct page *))
2412{
2413 unsigned long flags;
2414 unsigned long x = 0;
2415 struct page *page;
2416
2417 spin_lock_irqsave(&n->list_lock, flags);
2418 list_for_each_entry(page, &n->partial, slab_list)
2419 x += get_count(page);
2420 spin_unlock_irqrestore(&n->list_lock, flags);
2421 return x;
2422}
2423#endif
2424
2425static noinline void
2426slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2427{
2428#ifdef CONFIG_SLUB_DEBUG
2429 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2430 DEFAULT_RATELIMIT_BURST);
2431 int node;
2432 struct kmem_cache_node *n;
2433
2434 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2435 return;
2436
2437 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2438 nid, gfpflags, &gfpflags);
2439 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2440 s->name, s->object_size, s->size, oo_order(s->oo),
2441 oo_order(s->min));
2442
2443 if (oo_order(s->min) > get_order(s->object_size))
2444 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2445 s->name);
2446
2447 for_each_kmem_cache_node(s, node, n) {
2448 unsigned long nr_slabs;
2449 unsigned long nr_objs;
2450 unsigned long nr_free;
2451
2452 nr_free = count_partial(n, count_free);
2453 nr_slabs = node_nr_slabs(n);
2454 nr_objs = node_nr_objs(n);
2455
2456 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2457 node, nr_slabs, nr_objs, nr_free);
2458 }
2459#endif
2460}
2461
2462static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2463 int node, struct kmem_cache_cpu **pc)
2464{
2465 void *freelist;
2466 struct kmem_cache_cpu *c = *pc;
2467 struct page *page;
2468
2469 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2470
2471 freelist = get_partial(s, flags, node, c);
2472
2473 if (freelist)
2474 return freelist;
2475
2476 page = new_slab(s, flags, node);
2477 if (page) {
2478 c = raw_cpu_ptr(s->cpu_slab);
2479 if (c->page)
2480 flush_slab(s, c);
2481
2482
2483
2484
2485
2486 freelist = page->freelist;
2487 page->freelist = NULL;
2488
2489 stat(s, ALLOC_SLAB);
2490 c->page = page;
2491 *pc = c;
2492 }
2493
2494 return freelist;
2495}
2496
2497static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2498{
2499 if (unlikely(PageSlabPfmemalloc(page)))
2500 return gfp_pfmemalloc_allowed(gfpflags);
2501
2502 return true;
2503}
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2516{
2517 struct page new;
2518 unsigned long counters;
2519 void *freelist;
2520
2521 do {
2522 freelist = page->freelist;
2523 counters = page->counters;
2524
2525 new.counters = counters;
2526 VM_BUG_ON(!new.frozen);
2527
2528 new.inuse = page->objects;
2529 new.frozen = freelist != NULL;
2530
2531 } while (!__cmpxchg_double_slab(s, page,
2532 freelist, counters,
2533 NULL, new.counters,
2534 "get_freelist"));
2535
2536 return freelist;
2537}
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2559 unsigned long addr, struct kmem_cache_cpu *c)
2560{
2561 void *freelist;
2562 struct page *page;
2563
2564 page = c->page;
2565 if (!page)
2566 goto new_slab;
2567redo:
2568
2569 if (unlikely(!node_match(page, node))) {
2570 int searchnode = node;
2571
2572 if (node != NUMA_NO_NODE && !node_present_pages(node))
2573 searchnode = node_to_mem_node(node);
2574
2575 if (unlikely(!node_match(page, searchnode))) {
2576 stat(s, ALLOC_NODE_MISMATCH);
2577 deactivate_slab(s, page, c->freelist, c);
2578 goto new_slab;
2579 }
2580 }
2581
2582
2583
2584
2585
2586
2587 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2588 deactivate_slab(s, page, c->freelist, c);
2589 goto new_slab;
2590 }
2591
2592
2593 freelist = c->freelist;
2594 if (freelist)
2595 goto load_freelist;
2596
2597 freelist = get_freelist(s, page);
2598
2599 if (!freelist) {
2600 c->page = NULL;
2601 stat(s, DEACTIVATE_BYPASS);
2602 goto new_slab;
2603 }
2604
2605 stat(s, ALLOC_REFILL);
2606
2607load_freelist:
2608
2609
2610
2611
2612
2613 VM_BUG_ON(!c->page->frozen);
2614 c->freelist = get_freepointer(s, freelist);
2615 c->tid = next_tid(c->tid);
2616 return freelist;
2617
2618new_slab:
2619
2620 if (slub_percpu_partial(c)) {
2621 page = c->page = slub_percpu_partial(c);
2622 slub_set_percpu_partial(c, page);
2623 stat(s, CPU_PARTIAL_ALLOC);
2624 goto redo;
2625 }
2626
2627 freelist = new_slab_objects(s, gfpflags, node, &c);
2628
2629 if (unlikely(!freelist)) {
2630 slab_out_of_memory(s, gfpflags, node);
2631 return NULL;
2632 }
2633
2634 page = c->page;
2635 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2636 goto load_freelist;
2637
2638
2639 if (kmem_cache_debug(s) &&
2640 !alloc_debug_processing(s, page, freelist, addr))
2641 goto new_slab;
2642
2643 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2644 return freelist;
2645}
2646
2647
2648
2649
2650
2651static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2652 unsigned long addr, struct kmem_cache_cpu *c)
2653{
2654 void *p;
2655 unsigned long flags;
2656
2657 local_irq_save(flags);
2658#ifdef CONFIG_PREEMPT
2659
2660
2661
2662
2663
2664 c = this_cpu_ptr(s->cpu_slab);
2665#endif
2666
2667 p = ___slab_alloc(s, gfpflags, node, addr, c);
2668 local_irq_restore(flags);
2669 return p;
2670}
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2683 gfp_t gfpflags, int node, unsigned long addr)
2684{
2685 void *object;
2686 struct kmem_cache_cpu *c;
2687 struct page *page;
2688 unsigned long tid;
2689
2690 s = slab_pre_alloc_hook(s, gfpflags);
2691 if (!s)
2692 return NULL;
2693redo:
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704 do {
2705 tid = this_cpu_read(s->cpu_slab->tid);
2706 c = raw_cpu_ptr(s->cpu_slab);
2707 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2708 unlikely(tid != READ_ONCE(c->tid)));
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718 barrier();
2719
2720
2721
2722
2723
2724
2725
2726
2727 object = c->freelist;
2728 page = c->page;
2729 if (unlikely(!object || !node_match(page, node))) {
2730 object = __slab_alloc(s, gfpflags, node, addr, c);
2731 stat(s, ALLOC_SLOWPATH);
2732 } else {
2733 void *next_object = get_freepointer_safe(s, object);
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749 if (unlikely(!this_cpu_cmpxchg_double(
2750 s->cpu_slab->freelist, s->cpu_slab->tid,
2751 object, tid,
2752 next_object, next_tid(tid)))) {
2753
2754 note_cmpxchg_failure("slab_alloc", s, tid);
2755 goto redo;
2756 }
2757 prefetch_freepointer(s, next_object);
2758 stat(s, ALLOC_FASTPATH);
2759 }
2760
2761
2762
2763
2764 if (unlikely(slab_want_init_on_free(s)) && object)
2765 memset(object + s->offset, 0, sizeof(void *));
2766
2767 if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
2768 memset(object, 0, s->object_size);
2769
2770 slab_post_alloc_hook(s, gfpflags, 1, &object);
2771
2772 return object;
2773}
2774
2775static __always_inline void *slab_alloc(struct kmem_cache *s,
2776 gfp_t gfpflags, unsigned long addr)
2777{
2778 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2779}
2780
2781void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2782{
2783 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2784
2785 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2786 s->size, gfpflags);
2787
2788 return ret;
2789}
2790EXPORT_SYMBOL(kmem_cache_alloc);
2791
2792#ifdef CONFIG_TRACING
2793void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2794{
2795 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2796 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2797 ret = kasan_kmalloc(s, ret, size, gfpflags);
2798 return ret;
2799}
2800EXPORT_SYMBOL(kmem_cache_alloc_trace);
2801#endif
2802
2803#ifdef CONFIG_NUMA
2804void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2805{
2806 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2807
2808 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2809 s->object_size, s->size, gfpflags, node);
2810
2811 return ret;
2812}
2813EXPORT_SYMBOL(kmem_cache_alloc_node);
2814
2815#ifdef CONFIG_TRACING
2816void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2817 gfp_t gfpflags,
2818 int node, size_t size)
2819{
2820 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2821
2822 trace_kmalloc_node(_RET_IP_, ret,
2823 size, s->size, gfpflags, node);
2824
2825 ret = kasan_kmalloc(s, ret, size, gfpflags);
2826 return ret;
2827}
2828EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2829#endif
2830#endif
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840static void __slab_free(struct kmem_cache *s, struct page *page,
2841 void *head, void *tail, int cnt,
2842 unsigned long addr)
2843
2844{
2845 void *prior;
2846 int was_frozen;
2847 struct page new;
2848 unsigned long counters;
2849 struct kmem_cache_node *n = NULL;
2850 unsigned long uninitialized_var(flags);
2851
2852 stat(s, FREE_SLOWPATH);
2853
2854 if (kmem_cache_debug(s) &&
2855 !free_debug_processing(s, page, head, tail, cnt, addr))
2856 return;
2857
2858 do {
2859 if (unlikely(n)) {
2860 spin_unlock_irqrestore(&n->list_lock, flags);
2861 n = NULL;
2862 }
2863 prior = page->freelist;
2864 counters = page->counters;
2865 set_freepointer(s, tail, prior);
2866 new.counters = counters;
2867 was_frozen = new.frozen;
2868 new.inuse -= cnt;
2869 if ((!new.inuse || !prior) && !was_frozen) {
2870
2871 if (kmem_cache_has_cpu_partial(s) && !prior) {
2872
2873
2874
2875
2876
2877
2878
2879 new.frozen = 1;
2880
2881 } else {
2882
2883 n = get_node(s, page_to_nid(page));
2884
2885
2886
2887
2888
2889
2890
2891
2892 spin_lock_irqsave(&n->list_lock, flags);
2893
2894 }
2895 }
2896
2897 } while (!cmpxchg_double_slab(s, page,
2898 prior, counters,
2899 head, new.counters,
2900 "__slab_free"));
2901
2902 if (likely(!n)) {
2903
2904
2905
2906
2907
2908 if (new.frozen && !was_frozen) {
2909 put_cpu_partial(s, page, 1);
2910 stat(s, CPU_PARTIAL_FREE);
2911 }
2912
2913
2914
2915
2916 if (was_frozen)
2917 stat(s, FREE_FROZEN);
2918 return;
2919 }
2920
2921 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2922 goto slab_empty;
2923
2924
2925
2926
2927
2928 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2929 remove_full(s, n, page);
2930 add_partial(n, page, DEACTIVATE_TO_TAIL);
2931 stat(s, FREE_ADD_PARTIAL);
2932 }
2933 spin_unlock_irqrestore(&n->list_lock, flags);
2934 return;
2935
2936slab_empty:
2937 if (prior) {
2938
2939
2940
2941 remove_partial(n, page);
2942 stat(s, FREE_REMOVE_PARTIAL);
2943 } else {
2944
2945 remove_full(s, n, page);
2946 }
2947
2948 spin_unlock_irqrestore(&n->list_lock, flags);
2949 stat(s, FREE_SLAB);
2950 discard_slab(s, page);
2951}
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968static __always_inline void do_slab_free(struct kmem_cache *s,
2969 struct page *page, void *head, void *tail,
2970 int cnt, unsigned long addr)
2971{
2972 void *tail_obj = tail ? : head;
2973 struct kmem_cache_cpu *c;
2974 unsigned long tid;
2975redo:
2976
2977
2978
2979
2980
2981
2982 do {
2983 tid = this_cpu_read(s->cpu_slab->tid);
2984 c = raw_cpu_ptr(s->cpu_slab);
2985 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2986 unlikely(tid != READ_ONCE(c->tid)));
2987
2988
2989 barrier();
2990
2991 if (likely(page == c->page)) {
2992 set_freepointer(s, tail_obj, c->freelist);
2993
2994 if (unlikely(!this_cpu_cmpxchg_double(
2995 s->cpu_slab->freelist, s->cpu_slab->tid,
2996 c->freelist, tid,
2997 head, next_tid(tid)))) {
2998
2999 note_cmpxchg_failure("slab_free", s, tid);
3000 goto redo;
3001 }
3002 stat(s, FREE_FASTPATH);
3003 } else
3004 __slab_free(s, page, head, tail_obj, cnt, addr);
3005
3006}
3007
3008static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
3009 void *head, void *tail, int cnt,
3010 unsigned long addr)
3011{
3012
3013
3014
3015
3016 if (slab_free_freelist_hook(s, &head, &tail))
3017 do_slab_free(s, page, head, tail, cnt, addr);
3018}
3019
3020#ifdef CONFIG_KASAN_GENERIC
3021void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3022{
3023 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3024}
3025#endif
3026
3027void kmem_cache_free(struct kmem_cache *s, void *x)
3028{
3029 s = cache_from_obj(s, x);
3030 if (!s)
3031 return;
3032 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3033 trace_kmem_cache_free(_RET_IP_, x);
3034}
3035EXPORT_SYMBOL(kmem_cache_free);
3036
3037struct detached_freelist {
3038 struct page *page;
3039 void *tail;
3040 void *freelist;
3041 int cnt;
3042 struct kmem_cache *s;
3043};
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057static inline
3058int build_detached_freelist(struct kmem_cache *s, size_t size,
3059 void **p, struct detached_freelist *df)
3060{
3061 size_t first_skipped_index = 0;
3062 int lookahead = 3;
3063 void *object;
3064 struct page *page;
3065
3066
3067 df->page = NULL;
3068
3069 do {
3070 object = p[--size];
3071
3072 } while (!object && size);
3073
3074 if (!object)
3075 return 0;
3076
3077 page = virt_to_head_page(object);
3078 if (!s) {
3079
3080 if (unlikely(!PageSlab(page))) {
3081 BUG_ON(!PageCompound(page));
3082 kfree_hook(object);
3083 __free_pages(page, compound_order(page));
3084 p[size] = NULL;
3085 return size;
3086 }
3087
3088 df->s = page->slab_cache;
3089 } else {
3090 df->s = cache_from_obj(s, object);
3091 }
3092
3093
3094 df->page = page;
3095 set_freepointer(df->s, object, NULL);
3096 df->tail = object;
3097 df->freelist = object;
3098 p[size] = NULL;
3099 df->cnt = 1;
3100
3101 while (size) {
3102 object = p[--size];
3103 if (!object)
3104 continue;
3105
3106
3107 if (df->page == virt_to_head_page(object)) {
3108
3109 set_freepointer(df->s, object, df->freelist);
3110 df->freelist = object;
3111 df->cnt++;
3112 p[size] = NULL;
3113
3114 continue;
3115 }
3116
3117
3118 if (!--lookahead)
3119 break;
3120
3121 if (!first_skipped_index)
3122 first_skipped_index = size + 1;
3123 }
3124
3125 return first_skipped_index;
3126}
3127
3128
3129void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3130{
3131 if (WARN_ON(!size))
3132 return;
3133
3134 do {
3135 struct detached_freelist df;
3136
3137 size = build_detached_freelist(s, size, p, &df);
3138 if (!df.page)
3139 continue;
3140
3141 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3142 } while (likely(size));
3143}
3144EXPORT_SYMBOL(kmem_cache_free_bulk);
3145
3146
3147int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3148 void **p)
3149{
3150 struct kmem_cache_cpu *c;
3151 int i;
3152
3153
3154 s = slab_pre_alloc_hook(s, flags);
3155 if (unlikely(!s))
3156 return false;
3157
3158
3159
3160
3161
3162 local_irq_disable();
3163 c = this_cpu_ptr(s->cpu_slab);
3164
3165 for (i = 0; i < size; i++) {
3166 void *object = c->freelist;
3167
3168 if (unlikely(!object)) {
3169
3170
3171
3172
3173 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3174 _RET_IP_, c);
3175 if (unlikely(!p[i]))
3176 goto error;
3177
3178 c = this_cpu_ptr(s->cpu_slab);
3179 continue;
3180 }
3181 c->freelist = get_freepointer(s, object);
3182 p[i] = object;
3183 }
3184 c->tid = next_tid(c->tid);
3185 local_irq_enable();
3186
3187
3188 if (unlikely(slab_want_init_on_alloc(flags, s))) {
3189 int j;
3190
3191 for (j = 0; j < i; j++)
3192 memset(p[j], 0, s->object_size);
3193 }
3194
3195
3196 slab_post_alloc_hook(s, flags, size, p);
3197 return i;
3198error:
3199 local_irq_enable();
3200 slab_post_alloc_hook(s, flags, i, p);
3201 __kmem_cache_free_bulk(s, i, p);
3202 return 0;
3203}
3204EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226static unsigned int slub_min_order;
3227static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3228static unsigned int slub_min_objects;
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255static inline unsigned int slab_order(unsigned int size,
3256 unsigned int min_objects, unsigned int max_order,
3257 unsigned int fract_leftover)
3258{
3259 unsigned int min_order = slub_min_order;
3260 unsigned int order;
3261
3262 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3263 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3264
3265 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3266 order <= max_order; order++) {
3267
3268 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3269 unsigned int rem;
3270
3271 rem = slab_size % size;
3272
3273 if (rem <= slab_size / fract_leftover)
3274 break;
3275 }
3276
3277 return order;
3278}
3279
3280static inline int calculate_order(unsigned int size)
3281{
3282 unsigned int order;
3283 unsigned int min_objects;
3284 unsigned int max_objects;
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294 min_objects = slub_min_objects;
3295 if (!min_objects)
3296 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3297 max_objects = order_objects(slub_max_order, size);
3298 min_objects = min(min_objects, max_objects);
3299
3300 while (min_objects > 1) {
3301 unsigned int fraction;
3302
3303 fraction = 16;
3304 while (fraction >= 4) {
3305 order = slab_order(size, min_objects,
3306 slub_max_order, fraction);
3307 if (order <= slub_max_order)
3308 return order;
3309 fraction /= 2;
3310 }
3311 min_objects--;
3312 }
3313
3314
3315
3316
3317
3318 order = slab_order(size, 1, slub_max_order, 1);
3319 if (order <= slub_max_order)
3320 return order;
3321
3322
3323
3324
3325 order = slab_order(size, 1, MAX_ORDER, 1);
3326 if (order < MAX_ORDER)
3327 return order;
3328 return -ENOSYS;
3329}
3330
3331static void
3332init_kmem_cache_node(struct kmem_cache_node *n)
3333{
3334 n->nr_partial = 0;
3335 spin_lock_init(&n->list_lock);
3336 INIT_LIST_HEAD(&n->partial);
3337#ifdef CONFIG_SLUB_DEBUG
3338 atomic_long_set(&n->nr_slabs, 0);
3339 atomic_long_set(&n->total_objects, 0);
3340 INIT_LIST_HEAD(&n->full);
3341#endif
3342}
3343
3344static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3345{
3346 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3347 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3348
3349
3350
3351
3352
3353 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3354 2 * sizeof(void *));
3355
3356 if (!s->cpu_slab)
3357 return 0;
3358
3359 init_kmem_cache_cpus(s);
3360
3361 return 1;
3362}
3363
3364static struct kmem_cache *kmem_cache_node;
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375static void early_kmem_cache_node_alloc(int node)
3376{
3377 struct page *page;
3378 struct kmem_cache_node *n;
3379
3380 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3381
3382 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3383
3384 BUG_ON(!page);
3385 if (page_to_nid(page) != node) {
3386 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3387 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3388 }
3389
3390 n = page->freelist;
3391 BUG_ON(!n);
3392#ifdef CONFIG_SLUB_DEBUG
3393 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3394 init_tracking(kmem_cache_node, n);
3395#endif
3396 n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3397 GFP_KERNEL);
3398 page->freelist = get_freepointer(kmem_cache_node, n);
3399 page->inuse = 1;
3400 page->frozen = 0;
3401 kmem_cache_node->node[node] = n;
3402 init_kmem_cache_node(n);
3403 inc_slabs_node(kmem_cache_node, node, page->objects);
3404
3405
3406
3407
3408
3409 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3410}
3411
3412static void free_kmem_cache_nodes(struct kmem_cache *s)
3413{
3414 int node;
3415 struct kmem_cache_node *n;
3416
3417 for_each_kmem_cache_node(s, node, n) {
3418 s->node[node] = NULL;
3419 kmem_cache_free(kmem_cache_node, n);
3420 }
3421}
3422
3423void __kmem_cache_release(struct kmem_cache *s)
3424{
3425 cache_random_seq_destroy(s);
3426 free_percpu(s->cpu_slab);
3427 free_kmem_cache_nodes(s);
3428}
3429
3430static int init_kmem_cache_nodes(struct kmem_cache *s)
3431{
3432 int node;
3433
3434 for_each_node_state(node, N_NORMAL_MEMORY) {
3435 struct kmem_cache_node *n;
3436
3437 if (slab_state == DOWN) {
3438 early_kmem_cache_node_alloc(node);
3439 continue;
3440 }
3441 n = kmem_cache_alloc_node(kmem_cache_node,
3442 GFP_KERNEL, node);
3443
3444 if (!n) {
3445 free_kmem_cache_nodes(s);
3446 return 0;
3447 }
3448
3449 init_kmem_cache_node(n);
3450 s->node[node] = n;
3451 }
3452 return 1;
3453}
3454
3455static void set_min_partial(struct kmem_cache *s, unsigned long min)
3456{
3457 if (min < MIN_PARTIAL)
3458 min = MIN_PARTIAL;
3459 else if (min > MAX_PARTIAL)
3460 min = MAX_PARTIAL;
3461 s->min_partial = min;
3462}
3463
3464static void set_cpu_partial(struct kmem_cache *s)
3465{
3466#ifdef CONFIG_SLUB_CPU_PARTIAL
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484 if (!kmem_cache_has_cpu_partial(s))
3485 s->cpu_partial = 0;
3486 else if (s->size >= PAGE_SIZE)
3487 s->cpu_partial = 2;
3488 else if (s->size >= 1024)
3489 s->cpu_partial = 6;
3490 else if (s->size >= 256)
3491 s->cpu_partial = 13;
3492 else
3493 s->cpu_partial = 30;
3494#endif
3495}
3496
3497
3498
3499
3500
3501static int calculate_sizes(struct kmem_cache *s, int forced_order)
3502{
3503 slab_flags_t flags = s->flags;
3504 unsigned int size = s->object_size;
3505 unsigned int order;
3506
3507
3508
3509
3510
3511
3512 size = ALIGN(size, sizeof(void *));
3513
3514#ifdef CONFIG_SLUB_DEBUG
3515
3516
3517
3518
3519
3520 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3521 !s->ctor)
3522 s->flags |= __OBJECT_POISON;
3523 else
3524 s->flags &= ~__OBJECT_POISON;
3525
3526
3527
3528
3529
3530
3531
3532 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3533 size += sizeof(void *);
3534#endif
3535
3536
3537
3538
3539
3540 s->inuse = size;
3541
3542 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3543 s->ctor)) {
3544
3545
3546
3547
3548
3549
3550
3551
3552 s->offset = size;
3553 size += sizeof(void *);
3554 }
3555
3556#ifdef CONFIG_SLUB_DEBUG
3557 if (flags & SLAB_STORE_USER)
3558
3559
3560
3561
3562 size += 2 * sizeof(struct track);
3563#endif
3564
3565 kasan_cache_create(s, &size, &s->flags);
3566#ifdef CONFIG_SLUB_DEBUG
3567 if (flags & SLAB_RED_ZONE) {
3568
3569
3570
3571
3572
3573
3574
3575 size += sizeof(void *);
3576
3577 s->red_left_pad = sizeof(void *);
3578 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3579 size += s->red_left_pad;
3580 }
3581#endif
3582
3583
3584
3585
3586
3587
3588 size = ALIGN(size, s->align);
3589 s->size = size;
3590 if (forced_order >= 0)
3591 order = forced_order;
3592 else
3593 order = calculate_order(size);
3594
3595 if ((int)order < 0)
3596 return 0;
3597
3598 s->allocflags = 0;
3599 if (order)
3600 s->allocflags |= __GFP_COMP;
3601
3602 if (s->flags & SLAB_CACHE_DMA)
3603 s->allocflags |= GFP_DMA;
3604
3605 if (s->flags & SLAB_CACHE_DMA32)
3606 s->allocflags |= GFP_DMA32;
3607
3608 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3609 s->allocflags |= __GFP_RECLAIMABLE;
3610
3611
3612
3613
3614 s->oo = oo_make(order, size);
3615 s->min = oo_make(get_order(size), size);
3616 if (oo_objects(s->oo) > oo_objects(s->max))
3617 s->max = s->oo;
3618
3619 return !!oo_objects(s->oo);
3620}
3621
3622static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3623{
3624 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3625#ifdef CONFIG_SLAB_FREELIST_HARDENED
3626 s->random = get_random_long();
3627#endif
3628
3629 if (!calculate_sizes(s, -1))
3630 goto error;
3631 if (disable_higher_order_debug) {
3632
3633
3634
3635
3636 if (get_order(s->size) > get_order(s->object_size)) {
3637 s->flags &= ~DEBUG_METADATA_FLAGS;
3638 s->offset = 0;
3639 if (!calculate_sizes(s, -1))
3640 goto error;
3641 }
3642 }
3643
3644#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3645 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3646 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3647
3648 s->flags |= __CMPXCHG_DOUBLE;
3649#endif
3650
3651
3652
3653
3654
3655 set_min_partial(s, ilog2(s->size) / 2);
3656
3657 set_cpu_partial(s);
3658
3659#ifdef CONFIG_NUMA
3660 s->remote_node_defrag_ratio = 1000;
3661#endif
3662
3663
3664 if (slab_state >= UP) {
3665 if (init_cache_random_seq(s))
3666 goto error;
3667 }
3668
3669 if (!init_kmem_cache_nodes(s))
3670 goto error;
3671
3672 if (alloc_kmem_cache_cpus(s))
3673 return 0;
3674
3675 free_kmem_cache_nodes(s);
3676error:
3677 return -EINVAL;
3678}
3679
3680static void list_slab_objects(struct kmem_cache *s, struct page *page,
3681 const char *text)
3682{
3683#ifdef CONFIG_SLUB_DEBUG
3684 void *addr = page_address(page);
3685 void *p;
3686 unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
3687 if (!map)
3688 return;
3689 slab_err(s, page, text, s->name);
3690 slab_lock(page);
3691
3692 get_map(s, page, map);
3693 for_each_object(p, s, addr, page->objects) {
3694
3695 if (!test_bit(slab_index(p, s, addr), map)) {
3696 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3697 print_tracking(s, p);
3698 }
3699 }
3700 slab_unlock(page);
3701 bitmap_free(map);
3702#endif
3703}
3704
3705
3706
3707
3708
3709
3710static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3711{
3712 LIST_HEAD(discard);
3713 struct page *page, *h;
3714
3715 BUG_ON(irqs_disabled());
3716 spin_lock_irq(&n->list_lock);
3717 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3718 if (!page->inuse) {
3719 remove_partial(n, page);
3720 list_add(&page->slab_list, &discard);
3721 } else {
3722 list_slab_objects(s, page,
3723 "Objects remaining in %s on __kmem_cache_shutdown()");
3724 }
3725 }
3726 spin_unlock_irq(&n->list_lock);
3727
3728 list_for_each_entry_safe(page, h, &discard, slab_list)
3729 discard_slab(s, page);
3730}
3731
3732bool __kmem_cache_empty(struct kmem_cache *s)
3733{
3734 int node;
3735 struct kmem_cache_node *n;
3736
3737 for_each_kmem_cache_node(s, node, n)
3738 if (n->nr_partial || slabs_node(s, node))
3739 return false;
3740 return true;
3741}
3742
3743
3744
3745
3746int __kmem_cache_shutdown(struct kmem_cache *s)
3747{
3748 int node;
3749 struct kmem_cache_node *n;
3750
3751 flush_all(s);
3752
3753 for_each_kmem_cache_node(s, node, n) {
3754 free_partial(s, n);
3755 if (n->nr_partial || slabs_node(s, node))
3756 return 1;
3757 }
3758 sysfs_slab_remove(s);
3759 return 0;
3760}
3761
3762
3763
3764
3765
3766static int __init setup_slub_min_order(char *str)
3767{
3768 get_option(&str, (int *)&slub_min_order);
3769
3770 return 1;
3771}
3772
3773__setup("slub_min_order=", setup_slub_min_order);
3774
3775static int __init setup_slub_max_order(char *str)
3776{
3777 get_option(&str, (int *)&slub_max_order);
3778 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
3779
3780 return 1;
3781}
3782
3783__setup("slub_max_order=", setup_slub_max_order);
3784
3785static int __init setup_slub_min_objects(char *str)
3786{
3787 get_option(&str, (int *)&slub_min_objects);
3788
3789 return 1;
3790}
3791
3792__setup("slub_min_objects=", setup_slub_min_objects);
3793
3794void *__kmalloc(size_t size, gfp_t flags)
3795{
3796 struct kmem_cache *s;
3797 void *ret;
3798
3799 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3800 return kmalloc_large(size, flags);
3801
3802 s = kmalloc_slab(size, flags);
3803
3804 if (unlikely(ZERO_OR_NULL_PTR(s)))
3805 return s;
3806
3807 ret = slab_alloc(s, flags, _RET_IP_);
3808
3809 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3810
3811 ret = kasan_kmalloc(s, ret, size, flags);
3812
3813 return ret;
3814}
3815EXPORT_SYMBOL(__kmalloc);
3816
3817#ifdef CONFIG_NUMA
3818static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3819{
3820 struct page *page;
3821 void *ptr = NULL;
3822
3823 flags |= __GFP_COMP;
3824 page = alloc_pages_node(node, flags, get_order(size));
3825 if (page)
3826 ptr = page_address(page);
3827
3828 return kmalloc_large_node_hook(ptr, size, flags);
3829}
3830
3831void *__kmalloc_node(size_t size, gfp_t flags, int node)
3832{
3833 struct kmem_cache *s;
3834 void *ret;
3835
3836 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3837 ret = kmalloc_large_node(size, flags, node);
3838
3839 trace_kmalloc_node(_RET_IP_, ret,
3840 size, PAGE_SIZE << get_order(size),
3841 flags, node);
3842
3843 return ret;
3844 }
3845
3846 s = kmalloc_slab(size, flags);
3847
3848 if (unlikely(ZERO_OR_NULL_PTR(s)))
3849 return s;
3850
3851 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3852
3853 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3854
3855 ret = kasan_kmalloc(s, ret, size, flags);
3856
3857 return ret;
3858}
3859EXPORT_SYMBOL(__kmalloc_node);
3860#endif
3861
3862#ifdef CONFIG_HARDENED_USERCOPY
3863
3864
3865
3866
3867
3868
3869
3870
3871void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
3872 bool to_user)
3873{
3874 struct kmem_cache *s;
3875 unsigned int offset;
3876 size_t object_size;
3877
3878 ptr = kasan_reset_tag(ptr);
3879
3880
3881 s = page->slab_cache;
3882
3883
3884 if (ptr < page_address(page))
3885 usercopy_abort("SLUB object not in SLUB page?!", NULL,
3886 to_user, 0, n);
3887
3888
3889 offset = (ptr - page_address(page)) % s->size;
3890
3891
3892 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3893 if (offset < s->red_left_pad)
3894 usercopy_abort("SLUB object in left red zone",
3895 s->name, to_user, offset, n);
3896 offset -= s->red_left_pad;
3897 }
3898
3899
3900 if (offset >= s->useroffset &&
3901 offset - s->useroffset <= s->usersize &&
3902 n <= s->useroffset - offset + s->usersize)
3903 return;
3904
3905
3906
3907
3908
3909
3910
3911 object_size = slab_ksize(s);
3912 if (usercopy_fallback &&
3913 offset <= object_size && n <= object_size - offset) {
3914 usercopy_warn("SLUB object", s->name, to_user, offset, n);
3915 return;
3916 }
3917
3918 usercopy_abort("SLUB object", s->name, to_user, offset, n);
3919}
3920#endif
3921
3922size_t __ksize(const void *object)
3923{
3924 struct page *page;
3925
3926 if (unlikely(object == ZERO_SIZE_PTR))
3927 return 0;
3928
3929 page = virt_to_head_page(object);
3930
3931 if (unlikely(!PageSlab(page))) {
3932 WARN_ON(!PageCompound(page));
3933 return PAGE_SIZE << compound_order(page);
3934 }
3935
3936 return slab_ksize(page->slab_cache);
3937}
3938EXPORT_SYMBOL(__ksize);
3939
3940void kfree(const void *x)
3941{
3942 struct page *page;
3943 void *object = (void *)x;
3944
3945 trace_kfree(_RET_IP_, x);
3946
3947 if (unlikely(ZERO_OR_NULL_PTR(x)))
3948 return;
3949
3950 page = virt_to_head_page(x);
3951 if (unlikely(!PageSlab(page))) {
3952 BUG_ON(!PageCompound(page));
3953 kfree_hook(object);
3954 __free_pages(page, compound_order(page));
3955 return;
3956 }
3957 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3958}
3959EXPORT_SYMBOL(kfree);
3960
3961#define SHRINK_PROMOTE_MAX 32
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972int __kmem_cache_shrink(struct kmem_cache *s)
3973{
3974 int node;
3975 int i;
3976 struct kmem_cache_node *n;
3977 struct page *page;
3978 struct page *t;
3979 struct list_head discard;
3980 struct list_head promote[SHRINK_PROMOTE_MAX];
3981 unsigned long flags;
3982 int ret = 0;
3983
3984 flush_all(s);
3985 for_each_kmem_cache_node(s, node, n) {
3986 INIT_LIST_HEAD(&discard);
3987 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
3988 INIT_LIST_HEAD(promote + i);
3989
3990 spin_lock_irqsave(&n->list_lock, flags);
3991
3992
3993
3994
3995
3996
3997
3998 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
3999 int free = page->objects - page->inuse;
4000
4001
4002 barrier();
4003
4004
4005 BUG_ON(free <= 0);
4006
4007 if (free == page->objects) {
4008 list_move(&page->slab_list, &discard);
4009 n->nr_partial--;
4010 } else if (free <= SHRINK_PROMOTE_MAX)
4011 list_move(&page->slab_list, promote + free - 1);
4012 }
4013
4014
4015
4016
4017
4018 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4019 list_splice(promote + i, &n->partial);
4020
4021 spin_unlock_irqrestore(&n->list_lock, flags);
4022
4023
4024 list_for_each_entry_safe(page, t, &discard, slab_list)
4025 discard_slab(s, page);
4026
4027 if (slabs_node(s, node))
4028 ret = 1;
4029 }
4030
4031 return ret;
4032}
4033
4034#ifdef CONFIG_MEMCG
4035void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
4036{
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049 if (!__kmem_cache_shrink(s))
4050 sysfs_slab_remove(s);
4051}
4052
4053void __kmemcg_cache_deactivate(struct kmem_cache *s)
4054{
4055
4056
4057
4058
4059 slub_set_cpu_partial(s, 0);
4060 s->min_partial = 0;
4061}
4062#endif
4063
4064static int slab_mem_going_offline_callback(void *arg)
4065{
4066 struct kmem_cache *s;
4067
4068 mutex_lock(&slab_mutex);
4069 list_for_each_entry(s, &slab_caches, list)
4070 __kmem_cache_shrink(s);
4071 mutex_unlock(&slab_mutex);
4072
4073 return 0;
4074}
4075
4076static void slab_mem_offline_callback(void *arg)
4077{
4078 struct kmem_cache_node *n;
4079 struct kmem_cache *s;
4080 struct memory_notify *marg = arg;
4081 int offline_node;
4082
4083 offline_node = marg->status_change_nid_normal;
4084
4085
4086
4087
4088
4089 if (offline_node < 0)
4090 return;
4091
4092 mutex_lock(&slab_mutex);
4093 list_for_each_entry(s, &slab_caches, list) {
4094 n = get_node(s, offline_node);
4095 if (n) {
4096
4097
4098
4099
4100
4101
4102 BUG_ON(slabs_node(s, offline_node));
4103
4104 s->node[offline_node] = NULL;
4105 kmem_cache_free(kmem_cache_node, n);
4106 }
4107 }
4108 mutex_unlock(&slab_mutex);
4109}
4110
4111static int slab_mem_going_online_callback(void *arg)
4112{
4113 struct kmem_cache_node *n;
4114 struct kmem_cache *s;
4115 struct memory_notify *marg = arg;
4116 int nid = marg->status_change_nid_normal;
4117 int ret = 0;
4118
4119
4120
4121
4122
4123 if (nid < 0)
4124 return 0;
4125
4126
4127
4128
4129
4130
4131 mutex_lock(&slab_mutex);
4132 list_for_each_entry(s, &slab_caches, list) {
4133
4134
4135
4136
4137
4138 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4139 if (!n) {
4140 ret = -ENOMEM;
4141 goto out;
4142 }
4143 init_kmem_cache_node(n);
4144 s->node[nid] = n;
4145 }
4146out:
4147 mutex_unlock(&slab_mutex);
4148 return ret;
4149}
4150
4151static int slab_memory_callback(struct notifier_block *self,
4152 unsigned long action, void *arg)
4153{
4154 int ret = 0;
4155
4156 switch (action) {
4157 case MEM_GOING_ONLINE:
4158 ret = slab_mem_going_online_callback(arg);
4159 break;
4160 case MEM_GOING_OFFLINE:
4161 ret = slab_mem_going_offline_callback(arg);
4162 break;
4163 case MEM_OFFLINE:
4164 case MEM_CANCEL_ONLINE:
4165 slab_mem_offline_callback(arg);
4166 break;
4167 case MEM_ONLINE:
4168 case MEM_CANCEL_OFFLINE:
4169 break;
4170 }
4171 if (ret)
4172 ret = notifier_from_errno(ret);
4173 else
4174 ret = NOTIFY_OK;
4175 return ret;
4176}
4177
4178static struct notifier_block slab_memory_callback_nb = {
4179 .notifier_call = slab_memory_callback,
4180 .priority = SLAB_CALLBACK_PRI,
4181};
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4194{
4195 int node;
4196 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4197 struct kmem_cache_node *n;
4198
4199 memcpy(s, static_cache, kmem_cache->object_size);
4200
4201
4202
4203
4204
4205
4206 __flush_cpu_slab(s, smp_processor_id());
4207 for_each_kmem_cache_node(s, node, n) {
4208 struct page *p;
4209
4210 list_for_each_entry(p, &n->partial, slab_list)
4211 p->slab_cache = s;
4212
4213#ifdef CONFIG_SLUB_DEBUG
4214 list_for_each_entry(p, &n->full, slab_list)
4215 p->slab_cache = s;
4216#endif
4217 }
4218 slab_init_memcg_params(s);
4219 list_add(&s->list, &slab_caches);
4220 memcg_link_cache(s, NULL);
4221 return s;
4222}
4223
4224void __init kmem_cache_init(void)
4225{
4226 static __initdata struct kmem_cache boot_kmem_cache,
4227 boot_kmem_cache_node;
4228
4229 if (debug_guardpage_minorder())
4230 slub_max_order = 0;
4231
4232 kmem_cache_node = &boot_kmem_cache_node;
4233 kmem_cache = &boot_kmem_cache;
4234
4235 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4236 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4237
4238 register_hotmemory_notifier(&slab_memory_callback_nb);
4239
4240
4241 slab_state = PARTIAL;
4242
4243 create_boot_cache(kmem_cache, "kmem_cache",
4244 offsetof(struct kmem_cache, node) +
4245 nr_node_ids * sizeof(struct kmem_cache_node *),
4246 SLAB_HWCACHE_ALIGN, 0, 0);
4247
4248 kmem_cache = bootstrap(&boot_kmem_cache);
4249 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4250
4251
4252 setup_kmalloc_cache_index_table();
4253 create_kmalloc_caches(0);
4254
4255
4256 init_freelist_randomization();
4257
4258 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4259 slub_cpu_dead);
4260
4261 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4262 cache_line_size(),
4263 slub_min_order, slub_max_order, slub_min_objects,
4264 nr_cpu_ids, nr_node_ids);
4265}
4266
4267void __init kmem_cache_init_late(void)
4268{
4269}
4270
4271struct kmem_cache *
4272__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4273 slab_flags_t flags, void (*ctor)(void *))
4274{
4275 struct kmem_cache *s, *c;
4276
4277 s = find_mergeable(size, align, flags, name, ctor);
4278 if (s) {
4279 s->refcount++;
4280
4281
4282
4283
4284
4285 s->object_size = max(s->object_size, size);
4286 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4287
4288 for_each_memcg_cache(c, s) {
4289 c->object_size = s->object_size;
4290 c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
4291 }
4292
4293 if (sysfs_slab_alias(s, name)) {
4294 s->refcount--;
4295 s = NULL;
4296 }
4297 }
4298
4299 return s;
4300}
4301
4302int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4303{
4304 int err;
4305
4306 err = kmem_cache_open(s, flags);
4307 if (err)
4308 return err;
4309
4310
4311 if (slab_state <= UP)
4312 return 0;
4313
4314 memcg_propagate_slab_attrs(s);
4315 err = sysfs_slab_add(s);
4316 if (err)
4317 __kmem_cache_release(s);
4318
4319 return err;
4320}
4321
4322void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4323{
4324 struct kmem_cache *s;
4325 void *ret;
4326
4327 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4328 return kmalloc_large(size, gfpflags);
4329
4330 s = kmalloc_slab(size, gfpflags);
4331
4332 if (unlikely(ZERO_OR_NULL_PTR(s)))
4333 return s;
4334
4335 ret = slab_alloc(s, gfpflags, caller);
4336
4337
4338 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4339
4340 return ret;
4341}
4342
4343#ifdef CONFIG_NUMA
4344void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4345 int node, unsigned long caller)
4346{
4347 struct kmem_cache *s;
4348 void *ret;
4349
4350 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4351 ret = kmalloc_large_node(size, gfpflags, node);
4352
4353 trace_kmalloc_node(caller, ret,
4354 size, PAGE_SIZE << get_order(size),
4355 gfpflags, node);
4356
4357 return ret;
4358 }
4359
4360 s = kmalloc_slab(size, gfpflags);
4361
4362 if (unlikely(ZERO_OR_NULL_PTR(s)))
4363 return s;
4364
4365 ret = slab_alloc_node(s, gfpflags, node, caller);
4366
4367
4368 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4369
4370 return ret;
4371}
4372#endif
4373
4374#ifdef CONFIG_SYSFS
4375static int count_inuse(struct page *page)
4376{
4377 return page->inuse;
4378}
4379
4380static int count_total(struct page *page)
4381{
4382 return page->objects;
4383}
4384#endif
4385
4386#ifdef CONFIG_SLUB_DEBUG
4387static int validate_slab(struct kmem_cache *s, struct page *page,
4388 unsigned long *map)
4389{
4390 void *p;
4391 void *addr = page_address(page);
4392
4393 if (!check_slab(s, page) ||
4394 !on_freelist(s, page, NULL))
4395 return 0;
4396
4397
4398 bitmap_zero(map, page->objects);
4399
4400 get_map(s, page, map);
4401 for_each_object(p, s, addr, page->objects) {
4402 if (test_bit(slab_index(p, s, addr), map))
4403 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4404 return 0;
4405 }
4406
4407 for_each_object(p, s, addr, page->objects)
4408 if (!test_bit(slab_index(p, s, addr), map))
4409 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4410 return 0;
4411 return 1;
4412}
4413
4414static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4415 unsigned long *map)
4416{
4417 slab_lock(page);
4418 validate_slab(s, page, map);
4419 slab_unlock(page);
4420}
4421
4422static int validate_slab_node(struct kmem_cache *s,
4423 struct kmem_cache_node *n, unsigned long *map)
4424{
4425 unsigned long count = 0;
4426 struct page *page;
4427 unsigned long flags;
4428
4429 spin_lock_irqsave(&n->list_lock, flags);
4430
4431 list_for_each_entry(page, &n->partial, slab_list) {
4432 validate_slab_slab(s, page, map);
4433 count++;
4434 }
4435 if (count != n->nr_partial)
4436 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4437 s->name, count, n->nr_partial);
4438
4439 if (!(s->flags & SLAB_STORE_USER))
4440 goto out;
4441
4442 list_for_each_entry(page, &n->full, slab_list) {
4443 validate_slab_slab(s, page, map);
4444 count++;
4445 }
4446 if (count != atomic_long_read(&n->nr_slabs))
4447 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4448 s->name, count, atomic_long_read(&n->nr_slabs));
4449
4450out:
4451 spin_unlock_irqrestore(&n->list_lock, flags);
4452 return count;
4453}
4454
4455static long validate_slab_cache(struct kmem_cache *s)
4456{
4457 int node;
4458 unsigned long count = 0;
4459 struct kmem_cache_node *n;
4460 unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
4461
4462 if (!map)
4463 return -ENOMEM;
4464
4465 flush_all(s);
4466 for_each_kmem_cache_node(s, node, n)
4467 count += validate_slab_node(s, n, map);
4468 bitmap_free(map);
4469 return count;
4470}
4471
4472
4473
4474
4475
4476struct location {
4477 unsigned long count;
4478 unsigned long addr;
4479 long long sum_time;
4480 long min_time;
4481 long max_time;
4482 long min_pid;
4483 long max_pid;
4484 DECLARE_BITMAP(cpus, NR_CPUS);
4485 nodemask_t nodes;
4486};
4487
4488struct loc_track {
4489 unsigned long max;
4490 unsigned long count;
4491 struct location *loc;
4492};
4493
4494static void free_loc_track(struct loc_track *t)
4495{
4496 if (t->max)
4497 free_pages((unsigned long)t->loc,
4498 get_order(sizeof(struct location) * t->max));
4499}
4500
4501static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4502{
4503 struct location *l;
4504 int order;
4505
4506 order = get_order(sizeof(struct location) * max);
4507
4508 l = (void *)__get_free_pages(flags, order);
4509 if (!l)
4510 return 0;
4511
4512 if (t->count) {
4513 memcpy(l, t->loc, sizeof(struct location) * t->count);
4514 free_loc_track(t);
4515 }
4516 t->max = max;
4517 t->loc = l;
4518 return 1;
4519}
4520
4521static int add_location(struct loc_track *t, struct kmem_cache *s,
4522 const struct track *track)
4523{
4524 long start, end, pos;
4525 struct location *l;
4526 unsigned long caddr;
4527 unsigned long age = jiffies - track->when;
4528
4529 start = -1;
4530 end = t->count;
4531
4532 for ( ; ; ) {
4533 pos = start + (end - start + 1) / 2;
4534
4535
4536
4537
4538
4539 if (pos == end)
4540 break;
4541
4542 caddr = t->loc[pos].addr;
4543 if (track->addr == caddr) {
4544
4545 l = &t->loc[pos];
4546 l->count++;
4547 if (track->when) {
4548 l->sum_time += age;
4549 if (age < l->min_time)
4550 l->min_time = age;
4551 if (age > l->max_time)
4552 l->max_time = age;
4553
4554 if (track->pid < l->min_pid)
4555 l->min_pid = track->pid;
4556 if (track->pid > l->max_pid)
4557 l->max_pid = track->pid;
4558
4559 cpumask_set_cpu(track->cpu,
4560 to_cpumask(l->cpus));
4561 }
4562 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4563 return 1;
4564 }
4565
4566 if (track->addr < caddr)
4567 end = pos;
4568 else
4569 start = pos;
4570 }
4571
4572
4573
4574
4575 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4576 return 0;
4577
4578 l = t->loc + pos;
4579 if (pos < t->count)
4580 memmove(l + 1, l,
4581 (t->count - pos) * sizeof(struct location));
4582 t->count++;
4583 l->count = 1;
4584 l->addr = track->addr;
4585 l->sum_time = age;
4586 l->min_time = age;
4587 l->max_time = age;
4588 l->min_pid = track->pid;
4589 l->max_pid = track->pid;
4590 cpumask_clear(to_cpumask(l->cpus));
4591 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4592 nodes_clear(l->nodes);
4593 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4594 return 1;
4595}
4596
4597static void process_slab(struct loc_track *t, struct kmem_cache *s,
4598 struct page *page, enum track_item alloc,
4599 unsigned long *map)
4600{
4601 void *addr = page_address(page);
4602 void *p;
4603
4604 bitmap_zero(map, page->objects);
4605 get_map(s, page, map);
4606
4607 for_each_object(p, s, addr, page->objects)
4608 if (!test_bit(slab_index(p, s, addr), map))
4609 add_location(t, s, get_track(s, p, alloc));
4610}
4611
4612static int list_locations(struct kmem_cache *s, char *buf,
4613 enum track_item alloc)
4614{
4615 int len = 0;
4616 unsigned long i;
4617 struct loc_track t = { 0, 0, NULL };
4618 int node;
4619 struct kmem_cache_node *n;
4620 unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
4621
4622 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4623 GFP_KERNEL)) {
4624 bitmap_free(map);
4625 return sprintf(buf, "Out of memory\n");
4626 }
4627
4628 flush_all(s);
4629
4630 for_each_kmem_cache_node(s, node, n) {
4631 unsigned long flags;
4632 struct page *page;
4633
4634 if (!atomic_long_read(&n->nr_slabs))
4635 continue;
4636
4637 spin_lock_irqsave(&n->list_lock, flags);
4638 list_for_each_entry(page, &n->partial, slab_list)
4639 process_slab(&t, s, page, alloc, map);
4640 list_for_each_entry(page, &n->full, slab_list)
4641 process_slab(&t, s, page, alloc, map);
4642 spin_unlock_irqrestore(&n->list_lock, flags);
4643 }
4644
4645 for (i = 0; i < t.count; i++) {
4646 struct location *l = &t.loc[i];
4647
4648 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4649 break;
4650 len += sprintf(buf + len, "%7ld ", l->count);
4651
4652 if (l->addr)
4653 len += sprintf(buf + len, "%pS", (void *)l->addr);
4654 else
4655 len += sprintf(buf + len, "<not-available>");
4656
4657 if (l->sum_time != l->min_time) {
4658 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4659 l->min_time,
4660 (long)div_u64(l->sum_time, l->count),
4661 l->max_time);
4662 } else
4663 len += sprintf(buf + len, " age=%ld",
4664 l->min_time);
4665
4666 if (l->min_pid != l->max_pid)
4667 len += sprintf(buf + len, " pid=%ld-%ld",
4668 l->min_pid, l->max_pid);
4669 else
4670 len += sprintf(buf + len, " pid=%ld",
4671 l->min_pid);
4672
4673 if (num_online_cpus() > 1 &&
4674 !cpumask_empty(to_cpumask(l->cpus)) &&
4675 len < PAGE_SIZE - 60)
4676 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4677 " cpus=%*pbl",
4678 cpumask_pr_args(to_cpumask(l->cpus)));
4679
4680 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4681 len < PAGE_SIZE - 60)
4682 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4683 " nodes=%*pbl",
4684 nodemask_pr_args(&l->nodes));
4685
4686 len += sprintf(buf + len, "\n");
4687 }
4688
4689 free_loc_track(&t);
4690 bitmap_free(map);
4691 if (!t.count)
4692 len += sprintf(buf, "No data\n");
4693 return len;
4694}
4695#endif
4696
4697#ifdef SLUB_RESILIENCY_TEST
4698static void __init resiliency_test(void)
4699{
4700 u8 *p;
4701 int type = KMALLOC_NORMAL;
4702
4703 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4704
4705 pr_err("SLUB resiliency testing\n");
4706 pr_err("-----------------------\n");
4707 pr_err("A. Corruption after allocation\n");
4708
4709 p = kzalloc(16, GFP_KERNEL);
4710 p[16] = 0x12;
4711 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4712 p + 16);
4713
4714 validate_slab_cache(kmalloc_caches[type][4]);
4715
4716
4717 p = kzalloc(32, GFP_KERNEL);
4718 p[32 + sizeof(void *)] = 0x34;
4719 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4720 p);
4721 pr_err("If allocated object is overwritten then not detectable\n\n");
4722
4723 validate_slab_cache(kmalloc_caches[type][5]);
4724 p = kzalloc(64, GFP_KERNEL);
4725 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4726 *p = 0x56;
4727 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4728 p);
4729 pr_err("If allocated object is overwritten then not detectable\n\n");
4730 validate_slab_cache(kmalloc_caches[type][6]);
4731
4732 pr_err("\nB. Corruption after free\n");
4733 p = kzalloc(128, GFP_KERNEL);
4734 kfree(p);
4735 *p = 0x78;
4736 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4737 validate_slab_cache(kmalloc_caches[type][7]);
4738
4739 p = kzalloc(256, GFP_KERNEL);
4740 kfree(p);
4741 p[50] = 0x9a;
4742 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4743 validate_slab_cache(kmalloc_caches[type][8]);
4744
4745 p = kzalloc(512, GFP_KERNEL);
4746 kfree(p);
4747 p[512] = 0xab;
4748 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4749 validate_slab_cache(kmalloc_caches[type][9]);
4750}
4751#else
4752#ifdef CONFIG_SYSFS
4753static void resiliency_test(void) {};
4754#endif
4755#endif
4756
4757#ifdef CONFIG_SYSFS
4758enum slab_stat_type {
4759 SL_ALL,
4760 SL_PARTIAL,
4761 SL_CPU,
4762 SL_OBJECTS,
4763 SL_TOTAL
4764};
4765
4766#define SO_ALL (1 << SL_ALL)
4767#define SO_PARTIAL (1 << SL_PARTIAL)
4768#define SO_CPU (1 << SL_CPU)
4769#define SO_OBJECTS (1 << SL_OBJECTS)
4770#define SO_TOTAL (1 << SL_TOTAL)
4771
4772#ifdef CONFIG_MEMCG
4773static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
4774
4775static int __init setup_slub_memcg_sysfs(char *str)
4776{
4777 int v;
4778
4779 if (get_option(&str, &v) > 0)
4780 memcg_sysfs_enabled = v;
4781
4782 return 1;
4783}
4784
4785__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
4786#endif
4787
4788static ssize_t show_slab_objects(struct kmem_cache *s,
4789 char *buf, unsigned long flags)
4790{
4791 unsigned long total = 0;
4792 int node;
4793 int x;
4794 unsigned long *nodes;
4795
4796 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4797 if (!nodes)
4798 return -ENOMEM;
4799
4800 if (flags & SO_CPU) {
4801 int cpu;
4802
4803 for_each_possible_cpu(cpu) {
4804 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4805 cpu);
4806 int node;
4807 struct page *page;
4808
4809 page = READ_ONCE(c->page);
4810 if (!page)
4811 continue;
4812
4813 node = page_to_nid(page);
4814 if (flags & SO_TOTAL)
4815 x = page->objects;
4816 else if (flags & SO_OBJECTS)
4817 x = page->inuse;
4818 else
4819 x = 1;
4820
4821 total += x;
4822 nodes[node] += x;
4823
4824 page = slub_percpu_partial_read_once(c);
4825 if (page) {
4826 node = page_to_nid(page);
4827 if (flags & SO_TOTAL)
4828 WARN_ON_ONCE(1);
4829 else if (flags & SO_OBJECTS)
4830 WARN_ON_ONCE(1);
4831 else
4832 x = page->pages;
4833 total += x;
4834 nodes[node] += x;
4835 }
4836 }
4837 }
4838
4839 get_online_mems();
4840#ifdef CONFIG_SLUB_DEBUG
4841 if (flags & SO_ALL) {
4842 struct kmem_cache_node *n;
4843
4844 for_each_kmem_cache_node(s, node, n) {
4845
4846 if (flags & SO_TOTAL)
4847 x = atomic_long_read(&n->total_objects);
4848 else if (flags & SO_OBJECTS)
4849 x = atomic_long_read(&n->total_objects) -
4850 count_partial(n, count_free);
4851 else
4852 x = atomic_long_read(&n->nr_slabs);
4853 total += x;
4854 nodes[node] += x;
4855 }
4856
4857 } else
4858#endif
4859 if (flags & SO_PARTIAL) {
4860 struct kmem_cache_node *n;
4861
4862 for_each_kmem_cache_node(s, node, n) {
4863 if (flags & SO_TOTAL)
4864 x = count_partial(n, count_total);
4865 else if (flags & SO_OBJECTS)
4866 x = count_partial(n, count_inuse);
4867 else
4868 x = n->nr_partial;
4869 total += x;
4870 nodes[node] += x;
4871 }
4872 }
4873 x = sprintf(buf, "%lu", total);
4874#ifdef CONFIG_NUMA
4875 for (node = 0; node < nr_node_ids; node++)
4876 if (nodes[node])
4877 x += sprintf(buf + x, " N%d=%lu",
4878 node, nodes[node]);
4879#endif
4880 put_online_mems();
4881 kfree(nodes);
4882 return x + sprintf(buf + x, "\n");
4883}
4884
4885#ifdef CONFIG_SLUB_DEBUG
4886static int any_slab_objects(struct kmem_cache *s)
4887{
4888 int node;
4889 struct kmem_cache_node *n;
4890
4891 for_each_kmem_cache_node(s, node, n)
4892 if (atomic_long_read(&n->total_objects))
4893 return 1;
4894
4895 return 0;
4896}
4897#endif
4898
4899#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4900#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4901
4902struct slab_attribute {
4903 struct attribute attr;
4904 ssize_t (*show)(struct kmem_cache *s, char *buf);
4905 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4906};
4907
4908#define SLAB_ATTR_RO(_name) \
4909 static struct slab_attribute _name##_attr = \
4910 __ATTR(_name, 0400, _name##_show, NULL)
4911
4912#define SLAB_ATTR(_name) \
4913 static struct slab_attribute _name##_attr = \
4914 __ATTR(_name, 0600, _name##_show, _name##_store)
4915
4916static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4917{
4918 return sprintf(buf, "%u\n", s->size);
4919}
4920SLAB_ATTR_RO(slab_size);
4921
4922static ssize_t align_show(struct kmem_cache *s, char *buf)
4923{
4924 return sprintf(buf, "%u\n", s->align);
4925}
4926SLAB_ATTR_RO(align);
4927
4928static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4929{
4930 return sprintf(buf, "%u\n", s->object_size);
4931}
4932SLAB_ATTR_RO(object_size);
4933
4934static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4935{
4936 return sprintf(buf, "%u\n", oo_objects(s->oo));
4937}
4938SLAB_ATTR_RO(objs_per_slab);
4939
4940static ssize_t order_store(struct kmem_cache *s,
4941 const char *buf, size_t length)
4942{
4943 unsigned int order;
4944 int err;
4945
4946 err = kstrtouint(buf, 10, &order);
4947 if (err)
4948 return err;
4949
4950 if (order > slub_max_order || order < slub_min_order)
4951 return -EINVAL;
4952
4953 calculate_sizes(s, order);
4954 return length;
4955}
4956
4957static ssize_t order_show(struct kmem_cache *s, char *buf)
4958{
4959 return sprintf(buf, "%u\n", oo_order(s->oo));
4960}
4961SLAB_ATTR(order);
4962
4963static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4964{
4965 return sprintf(buf, "%lu\n", s->min_partial);
4966}
4967
4968static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4969 size_t length)
4970{
4971 unsigned long min;
4972 int err;
4973
4974 err = kstrtoul(buf, 10, &min);
4975 if (err)
4976 return err;
4977
4978 set_min_partial(s, min);
4979 return length;
4980}
4981SLAB_ATTR(min_partial);
4982
4983static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4984{
4985 return sprintf(buf, "%u\n", slub_cpu_partial(s));
4986}
4987
4988static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4989 size_t length)
4990{
4991 unsigned int objects;
4992 int err;
4993
4994 err = kstrtouint(buf, 10, &objects);
4995 if (err)
4996 return err;
4997 if (objects && !kmem_cache_has_cpu_partial(s))
4998 return -EINVAL;
4999
5000 slub_set_cpu_partial(s, objects);
5001 flush_all(s);
5002 return length;
5003}
5004SLAB_ATTR(cpu_partial);
5005
5006static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5007{
5008 if (!s->ctor)
5009 return 0;
5010 return sprintf(buf, "%pS\n", s->ctor);
5011}
5012SLAB_ATTR_RO(ctor);
5013
5014static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5015{
5016 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5017}
5018SLAB_ATTR_RO(aliases);
5019
5020static ssize_t partial_show(struct kmem_cache *s, char *buf)
5021{
5022 return show_slab_objects(s, buf, SO_PARTIAL);
5023}
5024SLAB_ATTR_RO(partial);
5025
5026static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5027{
5028 return show_slab_objects(s, buf, SO_CPU);
5029}
5030SLAB_ATTR_RO(cpu_slabs);
5031
5032static ssize_t objects_show(struct kmem_cache *s, char *buf)
5033{
5034 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5035}
5036SLAB_ATTR_RO(objects);
5037
5038static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5039{
5040 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5041}
5042SLAB_ATTR_RO(objects_partial);
5043
5044static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5045{
5046 int objects = 0;
5047 int pages = 0;
5048 int cpu;
5049 int len;
5050
5051 for_each_online_cpu(cpu) {
5052 struct page *page;
5053
5054 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5055
5056 if (page) {
5057 pages += page->pages;
5058 objects += page->pobjects;
5059 }
5060 }
5061
5062 len = sprintf(buf, "%d(%d)", objects, pages);
5063
5064#ifdef CONFIG_SMP
5065 for_each_online_cpu(cpu) {
5066 struct page *page;
5067
5068 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5069
5070 if (page && len < PAGE_SIZE - 20)
5071 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5072 page->pobjects, page->pages);
5073 }
5074#endif
5075 return len + sprintf(buf + len, "\n");
5076}
5077SLAB_ATTR_RO(slabs_cpu_partial);
5078
5079static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5080{
5081 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5082}
5083
5084static ssize_t reclaim_account_store(struct kmem_cache *s,
5085 const char *buf, size_t length)
5086{
5087 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5088 if (buf[0] == '1')
5089 s->flags |= SLAB_RECLAIM_ACCOUNT;
5090 return length;
5091}
5092SLAB_ATTR(reclaim_account);
5093
5094static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5095{
5096 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5097}
5098SLAB_ATTR_RO(hwcache_align);
5099
5100#ifdef CONFIG_ZONE_DMA
5101static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5102{
5103 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5104}
5105SLAB_ATTR_RO(cache_dma);
5106#endif
5107
5108static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5109{
5110 return sprintf(buf, "%u\n", s->usersize);
5111}
5112SLAB_ATTR_RO(usersize);
5113
5114static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5115{
5116 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5117}
5118SLAB_ATTR_RO(destroy_by_rcu);
5119
5120#ifdef CONFIG_SLUB_DEBUG
5121static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5122{
5123 return show_slab_objects(s, buf, SO_ALL);
5124}
5125SLAB_ATTR_RO(slabs);
5126
5127static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5128{
5129 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5130}
5131SLAB_ATTR_RO(total_objects);
5132
5133static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5134{
5135 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5136}
5137
5138static ssize_t sanity_checks_store(struct kmem_cache *s,
5139 const char *buf, size_t length)
5140{
5141 s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5142 if (buf[0] == '1') {
5143 s->flags &= ~__CMPXCHG_DOUBLE;
5144 s->flags |= SLAB_CONSISTENCY_CHECKS;
5145 }
5146 return length;
5147}
5148SLAB_ATTR(sanity_checks);
5149
5150static ssize_t trace_show(struct kmem_cache *s, char *buf)
5151{
5152 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5153}
5154
5155static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5156 size_t length)
5157{
5158
5159
5160
5161
5162
5163 if (s->refcount > 1)
5164 return -EINVAL;
5165
5166 s->flags &= ~SLAB_TRACE;
5167 if (buf[0] == '1') {
5168 s->flags &= ~__CMPXCHG_DOUBLE;
5169 s->flags |= SLAB_TRACE;
5170 }
5171 return length;
5172}
5173SLAB_ATTR(trace);
5174
5175static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5176{
5177 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5178}
5179
5180static ssize_t red_zone_store(struct kmem_cache *s,
5181 const char *buf, size_t length)
5182{
5183 if (any_slab_objects(s))
5184 return -EBUSY;
5185
5186 s->flags &= ~SLAB_RED_ZONE;
5187 if (buf[0] == '1') {
5188 s->flags |= SLAB_RED_ZONE;
5189 }
5190 calculate_sizes(s, -1);
5191 return length;
5192}
5193SLAB_ATTR(red_zone);
5194
5195static ssize_t poison_show(struct kmem_cache *s, char *buf)
5196{
5197 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5198}
5199
5200static ssize_t poison_store(struct kmem_cache *s,
5201 const char *buf, size_t length)
5202{
5203 if (any_slab_objects(s))
5204 return -EBUSY;
5205
5206 s->flags &= ~SLAB_POISON;
5207 if (buf[0] == '1') {
5208 s->flags |= SLAB_POISON;
5209 }
5210 calculate_sizes(s, -1);
5211 return length;
5212}
5213SLAB_ATTR(poison);
5214
5215static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5216{
5217 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5218}
5219
5220static ssize_t store_user_store(struct kmem_cache *s,
5221 const char *buf, size_t length)
5222{
5223 if (any_slab_objects(s))
5224 return -EBUSY;
5225
5226 s->flags &= ~SLAB_STORE_USER;
5227 if (buf[0] == '1') {
5228 s->flags &= ~__CMPXCHG_DOUBLE;
5229 s->flags |= SLAB_STORE_USER;
5230 }
5231 calculate_sizes(s, -1);
5232 return length;
5233}
5234SLAB_ATTR(store_user);
5235
5236static ssize_t validate_show(struct kmem_cache *s, char *buf)
5237{
5238 return 0;
5239}
5240
5241static ssize_t validate_store(struct kmem_cache *s,
5242 const char *buf, size_t length)
5243{
5244 int ret = -EINVAL;
5245
5246 if (buf[0] == '1') {
5247 ret = validate_slab_cache(s);
5248 if (ret >= 0)
5249 ret = length;
5250 }
5251 return ret;
5252}
5253SLAB_ATTR(validate);
5254
5255static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5256{
5257 if (!(s->flags & SLAB_STORE_USER))
5258 return -ENOSYS;
5259 return list_locations(s, buf, TRACK_ALLOC);
5260}
5261SLAB_ATTR_RO(alloc_calls);
5262
5263static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5264{
5265 if (!(s->flags & SLAB_STORE_USER))
5266 return -ENOSYS;
5267 return list_locations(s, buf, TRACK_FREE);
5268}
5269SLAB_ATTR_RO(free_calls);
5270#endif
5271
5272#ifdef CONFIG_FAILSLAB
5273static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5274{
5275 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5276}
5277
5278static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5279 size_t length)
5280{
5281 if (s->refcount > 1)
5282 return -EINVAL;
5283
5284 s->flags &= ~SLAB_FAILSLAB;
5285 if (buf[0] == '1')
5286 s->flags |= SLAB_FAILSLAB;
5287 return length;
5288}
5289SLAB_ATTR(failslab);
5290#endif
5291
5292static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5293{
5294 return 0;
5295}
5296
5297static ssize_t shrink_store(struct kmem_cache *s,
5298 const char *buf, size_t length)
5299{
5300 if (buf[0] == '1')
5301 kmem_cache_shrink(s);
5302 else
5303 return -EINVAL;
5304 return length;
5305}
5306SLAB_ATTR(shrink);
5307
5308#ifdef CONFIG_NUMA
5309static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5310{
5311 return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5312}
5313
5314static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5315 const char *buf, size_t length)
5316{
5317 unsigned int ratio;
5318 int err;
5319
5320 err = kstrtouint(buf, 10, &ratio);
5321 if (err)
5322 return err;
5323 if (ratio > 100)
5324 return -ERANGE;
5325
5326 s->remote_node_defrag_ratio = ratio * 10;
5327
5328 return length;
5329}
5330SLAB_ATTR(remote_node_defrag_ratio);
5331#endif
5332
5333#ifdef CONFIG_SLUB_STATS
5334static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5335{
5336 unsigned long sum = 0;
5337 int cpu;
5338 int len;
5339 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5340
5341 if (!data)
5342 return -ENOMEM;
5343
5344 for_each_online_cpu(cpu) {
5345 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5346
5347 data[cpu] = x;
5348 sum += x;
5349 }
5350
5351 len = sprintf(buf, "%lu", sum);
5352
5353#ifdef CONFIG_SMP
5354 for_each_online_cpu(cpu) {
5355 if (data[cpu] && len < PAGE_SIZE - 20)
5356 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5357 }
5358#endif
5359 kfree(data);
5360 return len + sprintf(buf + len, "\n");
5361}
5362
5363static void clear_stat(struct kmem_cache *s, enum stat_item si)
5364{
5365 int cpu;
5366
5367 for_each_online_cpu(cpu)
5368 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5369}
5370
5371#define STAT_ATTR(si, text) \
5372static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5373{ \
5374 return show_stat(s, buf, si); \
5375} \
5376static ssize_t text##_store(struct kmem_cache *s, \
5377 const char *buf, size_t length) \
5378{ \
5379 if (buf[0] != '0') \
5380 return -EINVAL; \
5381 clear_stat(s, si); \
5382 return length; \
5383} \
5384SLAB_ATTR(text); \
5385
5386STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5387STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5388STAT_ATTR(FREE_FASTPATH, free_fastpath);
5389STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5390STAT_ATTR(FREE_FROZEN, free_frozen);
5391STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5392STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5393STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5394STAT_ATTR(ALLOC_SLAB, alloc_slab);
5395STAT_ATTR(ALLOC_REFILL, alloc_refill);
5396STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5397STAT_ATTR(FREE_SLAB, free_slab);
5398STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5399STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5400STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5401STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5402STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5403STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5404STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5405STAT_ATTR(ORDER_FALLBACK, order_fallback);
5406STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5407STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5408STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5409STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5410STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5411STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5412#endif
5413
5414static struct attribute *slab_attrs[] = {
5415 &slab_size_attr.attr,
5416 &object_size_attr.attr,
5417 &objs_per_slab_attr.attr,
5418 &order_attr.attr,
5419 &min_partial_attr.attr,
5420 &cpu_partial_attr.attr,
5421 &objects_attr.attr,
5422 &objects_partial_attr.attr,
5423 &partial_attr.attr,
5424 &cpu_slabs_attr.attr,
5425 &ctor_attr.attr,
5426 &aliases_attr.attr,
5427 &align_attr.attr,
5428 &hwcache_align_attr.attr,
5429 &reclaim_account_attr.attr,
5430 &destroy_by_rcu_attr.attr,
5431 &shrink_attr.attr,
5432 &slabs_cpu_partial_attr.attr,
5433#ifdef CONFIG_SLUB_DEBUG
5434 &total_objects_attr.attr,
5435 &slabs_attr.attr,
5436 &sanity_checks_attr.attr,
5437 &trace_attr.attr,
5438 &red_zone_attr.attr,
5439 &poison_attr.attr,
5440 &store_user_attr.attr,
5441 &validate_attr.attr,
5442 &alloc_calls_attr.attr,
5443 &free_calls_attr.attr,
5444#endif
5445#ifdef CONFIG_ZONE_DMA
5446 &cache_dma_attr.attr,
5447#endif
5448#ifdef CONFIG_NUMA
5449 &remote_node_defrag_ratio_attr.attr,
5450#endif
5451#ifdef CONFIG_SLUB_STATS
5452 &alloc_fastpath_attr.attr,
5453 &alloc_slowpath_attr.attr,
5454 &free_fastpath_attr.attr,
5455 &free_slowpath_attr.attr,
5456 &free_frozen_attr.attr,
5457 &free_add_partial_attr.attr,
5458 &free_remove_partial_attr.attr,
5459 &alloc_from_partial_attr.attr,
5460 &alloc_slab_attr.attr,
5461 &alloc_refill_attr.attr,
5462 &alloc_node_mismatch_attr.attr,
5463 &free_slab_attr.attr,
5464 &cpuslab_flush_attr.attr,
5465 &deactivate_full_attr.attr,
5466 &deactivate_empty_attr.attr,
5467 &deactivate_to_head_attr.attr,
5468 &deactivate_to_tail_attr.attr,
5469 &deactivate_remote_frees_attr.attr,
5470 &deactivate_bypass_attr.attr,
5471 &order_fallback_attr.attr,
5472 &cmpxchg_double_fail_attr.attr,
5473 &cmpxchg_double_cpu_fail_attr.attr,
5474 &cpu_partial_alloc_attr.attr,
5475 &cpu_partial_free_attr.attr,
5476 &cpu_partial_node_attr.attr,
5477 &cpu_partial_drain_attr.attr,
5478#endif
5479#ifdef CONFIG_FAILSLAB
5480 &failslab_attr.attr,
5481#endif
5482 &usersize_attr.attr,
5483
5484 NULL
5485};
5486
5487static const struct attribute_group slab_attr_group = {
5488 .attrs = slab_attrs,
5489};
5490
5491static ssize_t slab_attr_show(struct kobject *kobj,
5492 struct attribute *attr,
5493 char *buf)
5494{
5495 struct slab_attribute *attribute;
5496 struct kmem_cache *s;
5497 int err;
5498
5499 attribute = to_slab_attr(attr);
5500 s = to_slab(kobj);
5501
5502 if (!attribute->show)
5503 return -EIO;
5504
5505 err = attribute->show(s, buf);
5506
5507 return err;
5508}
5509
5510static ssize_t slab_attr_store(struct kobject *kobj,
5511 struct attribute *attr,
5512 const char *buf, size_t len)
5513{
5514 struct slab_attribute *attribute;
5515 struct kmem_cache *s;
5516 int err;
5517
5518 attribute = to_slab_attr(attr);
5519 s = to_slab(kobj);
5520
5521 if (!attribute->store)
5522 return -EIO;
5523
5524 err = attribute->store(s, buf, len);
5525#ifdef CONFIG_MEMCG
5526 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5527 struct kmem_cache *c;
5528
5529 mutex_lock(&slab_mutex);
5530 if (s->max_attr_size < len)
5531 s->max_attr_size = len;
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550 for_each_memcg_cache(c, s)
5551 attribute->store(c, buf, len);
5552 mutex_unlock(&slab_mutex);
5553 }
5554#endif
5555 return err;
5556}
5557
5558static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5559{
5560#ifdef CONFIG_MEMCG
5561 int i;
5562 char *buffer = NULL;
5563 struct kmem_cache *root_cache;
5564
5565 if (is_root_cache(s))
5566 return;
5567
5568 root_cache = s->memcg_params.root_cache;
5569
5570
5571
5572
5573
5574 if (!root_cache->max_attr_size)
5575 return;
5576
5577 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5578 char mbuf[64];
5579 char *buf;
5580 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5581 ssize_t len;
5582
5583 if (!attr || !attr->store || !attr->show)
5584 continue;
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595 if (buffer)
5596 buf = buffer;
5597 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5598 buf = mbuf;
5599 else {
5600 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5601 if (WARN_ON(!buffer))
5602 continue;
5603 buf = buffer;
5604 }
5605
5606 len = attr->show(root_cache, buf);
5607 if (len > 0)
5608 attr->store(s, buf, len);
5609 }
5610
5611 if (buffer)
5612 free_page((unsigned long)buffer);
5613#endif
5614}
5615
5616static void kmem_cache_release(struct kobject *k)
5617{
5618 slab_kmem_cache_release(to_slab(k));
5619}
5620
5621static const struct sysfs_ops slab_sysfs_ops = {
5622 .show = slab_attr_show,
5623 .store = slab_attr_store,
5624};
5625
5626static struct kobj_type slab_ktype = {
5627 .sysfs_ops = &slab_sysfs_ops,
5628 .release = kmem_cache_release,
5629};
5630
5631static int uevent_filter(struct kset *kset, struct kobject *kobj)
5632{
5633 struct kobj_type *ktype = get_ktype(kobj);
5634
5635 if (ktype == &slab_ktype)
5636 return 1;
5637 return 0;
5638}
5639
5640static const struct kset_uevent_ops slab_uevent_ops = {
5641 .filter = uevent_filter,
5642};
5643
5644static struct kset *slab_kset;
5645
5646static inline struct kset *cache_kset(struct kmem_cache *s)
5647{
5648#ifdef CONFIG_MEMCG
5649 if (!is_root_cache(s))
5650 return s->memcg_params.root_cache->memcg_kset;
5651#endif
5652 return slab_kset;
5653}
5654
5655#define ID_STR_LENGTH 64
5656
5657
5658
5659
5660
5661static char *create_unique_id(struct kmem_cache *s)
5662{
5663 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5664 char *p = name;
5665
5666 BUG_ON(!name);
5667
5668 *p++ = ':';
5669
5670
5671
5672
5673
5674
5675
5676 if (s->flags & SLAB_CACHE_DMA)
5677 *p++ = 'd';
5678 if (s->flags & SLAB_CACHE_DMA32)
5679 *p++ = 'D';
5680 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5681 *p++ = 'a';
5682 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5683 *p++ = 'F';
5684 if (s->flags & SLAB_ACCOUNT)
5685 *p++ = 'A';
5686 if (p != name + 1)
5687 *p++ = '-';
5688 p += sprintf(p, "%07u", s->size);
5689
5690 BUG_ON(p > name + ID_STR_LENGTH - 1);
5691 return name;
5692}
5693
5694static void sysfs_slab_remove_workfn(struct work_struct *work)
5695{
5696 struct kmem_cache *s =
5697 container_of(work, struct kmem_cache, kobj_remove_work);
5698
5699 if (!s->kobj.state_in_sysfs)
5700
5701
5702
5703
5704
5705
5706 goto out;
5707
5708#ifdef CONFIG_MEMCG
5709 kset_unregister(s->memcg_kset);
5710#endif
5711 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5712out:
5713 kobject_put(&s->kobj);
5714}
5715
5716static int sysfs_slab_add(struct kmem_cache *s)
5717{
5718 int err;
5719 const char *name;
5720 struct kset *kset = cache_kset(s);
5721 int unmergeable = slab_unmergeable(s);
5722
5723 INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
5724
5725 if (!kset) {
5726 kobject_init(&s->kobj, &slab_ktype);
5727 return 0;
5728 }
5729
5730 if (!unmergeable && disable_higher_order_debug &&
5731 (slub_debug & DEBUG_METADATA_FLAGS))
5732 unmergeable = 1;
5733
5734 if (unmergeable) {
5735
5736
5737
5738
5739
5740 sysfs_remove_link(&slab_kset->kobj, s->name);
5741 name = s->name;
5742 } else {
5743
5744
5745
5746
5747 name = create_unique_id(s);
5748 }
5749
5750 s->kobj.kset = kset;
5751 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5752 if (err)
5753 goto out;
5754
5755 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5756 if (err)
5757 goto out_del_kobj;
5758
5759#ifdef CONFIG_MEMCG
5760 if (is_root_cache(s) && memcg_sysfs_enabled) {
5761 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5762 if (!s->memcg_kset) {
5763 err = -ENOMEM;
5764 goto out_del_kobj;
5765 }
5766 }
5767#endif
5768
5769 kobject_uevent(&s->kobj, KOBJ_ADD);
5770 if (!unmergeable) {
5771
5772 sysfs_slab_alias(s, s->name);
5773 }
5774out:
5775 if (!unmergeable)
5776 kfree(name);
5777 return err;
5778out_del_kobj:
5779 kobject_del(&s->kobj);
5780 goto out;
5781}
5782
5783static void sysfs_slab_remove(struct kmem_cache *s)
5784{
5785 if (slab_state < FULL)
5786
5787
5788
5789
5790 return;
5791
5792 kobject_get(&s->kobj);
5793 schedule_work(&s->kobj_remove_work);
5794}
5795
5796void sysfs_slab_unlink(struct kmem_cache *s)
5797{
5798 if (slab_state >= FULL)
5799 kobject_del(&s->kobj);
5800}
5801
5802void sysfs_slab_release(struct kmem_cache *s)
5803{
5804 if (slab_state >= FULL)
5805 kobject_put(&s->kobj);
5806}
5807
5808
5809
5810
5811
5812struct saved_alias {
5813 struct kmem_cache *s;
5814 const char *name;
5815 struct saved_alias *next;
5816};
5817
5818static struct saved_alias *alias_list;
5819
5820static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5821{
5822 struct saved_alias *al;
5823
5824 if (slab_state == FULL) {
5825
5826
5827
5828 sysfs_remove_link(&slab_kset->kobj, name);
5829 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5830 }
5831
5832 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5833 if (!al)
5834 return -ENOMEM;
5835
5836 al->s = s;
5837 al->name = name;
5838 al->next = alias_list;
5839 alias_list = al;
5840 return 0;
5841}
5842
5843static int __init slab_sysfs_init(void)
5844{
5845 struct kmem_cache *s;
5846 int err;
5847
5848 mutex_lock(&slab_mutex);
5849
5850 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5851 if (!slab_kset) {
5852 mutex_unlock(&slab_mutex);
5853 pr_err("Cannot register slab subsystem.\n");
5854 return -ENOSYS;
5855 }
5856
5857 slab_state = FULL;
5858
5859 list_for_each_entry(s, &slab_caches, list) {
5860 err = sysfs_slab_add(s);
5861 if (err)
5862 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5863 s->name);
5864 }
5865
5866 while (alias_list) {
5867 struct saved_alias *al = alias_list;
5868
5869 alias_list = alias_list->next;
5870 err = sysfs_slab_alias(al->s, al->name);
5871 if (err)
5872 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5873 al->name);
5874 kfree(al);
5875 }
5876
5877 mutex_unlock(&slab_mutex);
5878 resiliency_test();
5879 return 0;
5880}
5881
5882__initcall(slab_sysfs_init);
5883#endif
5884
5885
5886
5887
5888#ifdef CONFIG_SLUB_DEBUG
5889void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5890{
5891 unsigned long nr_slabs = 0;
5892 unsigned long nr_objs = 0;
5893 unsigned long nr_free = 0;
5894 int node;
5895 struct kmem_cache_node *n;
5896
5897 for_each_kmem_cache_node(s, node, n) {
5898 nr_slabs += node_nr_slabs(n);
5899 nr_objs += node_nr_objs(n);
5900 nr_free += count_partial(n, count_free);
5901 }
5902
5903 sinfo->active_objs = nr_objs - nr_free;
5904 sinfo->num_objs = nr_objs;
5905 sinfo->active_slabs = nr_slabs;
5906 sinfo->num_slabs = nr_slabs;
5907 sinfo->objects_per_slab = oo_objects(s->oo);
5908 sinfo->cache_order = oo_order(s->oo);
5909}
5910
5911void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5912{
5913}
5914
5915ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5916 size_t count, loff_t *ppos)
5917{
5918 return -EIO;
5919}
5920#endif
5921