1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/bitops.h>
19#include <linux/slab.h>
20#include "slab.h"
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kasan.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/memory.h>
31#include <linux/math64.h>
32#include <linux/fault-inject.h>
33#include <linux/stacktrace.h>
34#include <linux/prefetch.h>
35#include <linux/memcontrol.h>
36#include <linux/random.h>
37
38#include <trace/events/kmem.h>
39
40#include "internal.h"
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119static inline int kmem_cache_debug(struct kmem_cache *s)
120{
121#ifdef CONFIG_SLUB_DEBUG
122 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
123#else
124 return 0;
125#endif
126}
127
128void *fixup_red_left(struct kmem_cache *s, void *p)
129{
130 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
131 p += s->red_left_pad;
132
133 return p;
134}
135
136static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
137{
138#ifdef CONFIG_SLUB_CPU_PARTIAL
139 return !kmem_cache_debug(s);
140#else
141 return false;
142#endif
143}
144
145
146
147
148
149
150
151
152
153
154#undef SLUB_RESILIENCY_TEST
155
156
157#undef SLUB_DEBUG_CMPXCHG
158
159
160
161
162
163#define MIN_PARTIAL 5
164
165
166
167
168
169
170#define MAX_PARTIAL 10
171
172#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
173 SLAB_POISON | SLAB_STORE_USER)
174
175
176
177
178
179#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
180 SLAB_TRACE)
181
182
183
184
185
186
187
188#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
189
190#define OO_SHIFT 16
191#define OO_MASK ((1 << OO_SHIFT) - 1)
192#define MAX_OBJS_PER_PAGE 32767
193
194
195
196#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
197
198#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
199
200
201
202
203#define TRACK_ADDRS_COUNT 16
204struct track {
205 unsigned long addr;
206#ifdef CONFIG_STACKTRACE
207 unsigned long addrs[TRACK_ADDRS_COUNT];
208#endif
209 int cpu;
210 int pid;
211 unsigned long when;
212};
213
214enum track_item { TRACK_ALLOC, TRACK_FREE };
215
216#ifdef CONFIG_SYSFS
217static int sysfs_slab_add(struct kmem_cache *);
218static int sysfs_slab_alias(struct kmem_cache *, const char *);
219static void memcg_propagate_slab_attrs(struct kmem_cache *s);
220static void sysfs_slab_remove(struct kmem_cache *s);
221#else
222static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
223static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
224 { return 0; }
225static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
226static inline void sysfs_slab_remove(struct kmem_cache *s) { }
227#endif
228
229static inline void stat(const struct kmem_cache *s, enum stat_item si)
230{
231#ifdef CONFIG_SLUB_STATS
232
233
234
235
236 raw_cpu_inc(s->cpu_slab->stat[si]);
237#endif
238}
239
240
241
242
243
244
245
246
247
248
249static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
250 unsigned long ptr_addr)
251{
252#ifdef CONFIG_SLAB_FREELIST_HARDENED
253
254
255
256
257
258
259
260
261
262
263 return (void *)((unsigned long)ptr ^ s->random ^
264 (unsigned long)kasan_reset_tag((void *)ptr_addr));
265#else
266 return ptr;
267#endif
268}
269
270
271static inline void *freelist_dereference(const struct kmem_cache *s,
272 void *ptr_addr)
273{
274 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
275 (unsigned long)ptr_addr);
276}
277
278static inline void *get_freepointer(struct kmem_cache *s, void *object)
279{
280 return freelist_dereference(s, object + s->offset);
281}
282
283static void prefetch_freepointer(const struct kmem_cache *s, void *object)
284{
285 prefetch(object + s->offset);
286}
287
288static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
289{
290 unsigned long freepointer_addr;
291 void *p;
292
293 if (!debug_pagealloc_enabled())
294 return get_freepointer(s, object);
295
296 freepointer_addr = (unsigned long)object + s->offset;
297 probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
298 return freelist_ptr(s, p, freepointer_addr);
299}
300
301static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
302{
303 unsigned long freeptr_addr = (unsigned long)object + s->offset;
304
305#ifdef CONFIG_SLAB_FREELIST_HARDENED
306 BUG_ON(object == fp);
307#endif
308
309 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
310}
311
312
313#define for_each_object(__p, __s, __addr, __objects) \
314 for (__p = fixup_red_left(__s, __addr); \
315 __p < (__addr) + (__objects) * (__s)->size; \
316 __p += (__s)->size)
317
318
319static inline unsigned int slab_index(void *p, struct kmem_cache *s, void *addr)
320{
321 return (kasan_reset_tag(p) - addr) / s->size;
322}
323
324static inline unsigned int order_objects(unsigned int order, unsigned int size)
325{
326 return ((unsigned int)PAGE_SIZE << order) / size;
327}
328
329static inline struct kmem_cache_order_objects oo_make(unsigned int order,
330 unsigned int size)
331{
332 struct kmem_cache_order_objects x = {
333 (order << OO_SHIFT) + order_objects(order, size)
334 };
335
336 return x;
337}
338
339static inline unsigned int oo_order(struct kmem_cache_order_objects x)
340{
341 return x.x >> OO_SHIFT;
342}
343
344static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
345{
346 return x.x & OO_MASK;
347}
348
349
350
351
352static __always_inline void slab_lock(struct page *page)
353{
354 VM_BUG_ON_PAGE(PageTail(page), page);
355 bit_spin_lock(PG_locked, &page->flags);
356}
357
358static __always_inline void slab_unlock(struct page *page)
359{
360 VM_BUG_ON_PAGE(PageTail(page), page);
361 __bit_spin_unlock(PG_locked, &page->flags);
362}
363
364
365static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
366 void *freelist_old, unsigned long counters_old,
367 void *freelist_new, unsigned long counters_new,
368 const char *n)
369{
370 VM_BUG_ON(!irqs_disabled());
371#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
372 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
373 if (s->flags & __CMPXCHG_DOUBLE) {
374 if (cmpxchg_double(&page->freelist, &page->counters,
375 freelist_old, counters_old,
376 freelist_new, counters_new))
377 return true;
378 } else
379#endif
380 {
381 slab_lock(page);
382 if (page->freelist == freelist_old &&
383 page->counters == counters_old) {
384 page->freelist = freelist_new;
385 page->counters = counters_new;
386 slab_unlock(page);
387 return true;
388 }
389 slab_unlock(page);
390 }
391
392 cpu_relax();
393 stat(s, CMPXCHG_DOUBLE_FAIL);
394
395#ifdef SLUB_DEBUG_CMPXCHG
396 pr_info("%s %s: cmpxchg double redo ", n, s->name);
397#endif
398
399 return false;
400}
401
402static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
403 void *freelist_old, unsigned long counters_old,
404 void *freelist_new, unsigned long counters_new,
405 const char *n)
406{
407#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
408 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
409 if (s->flags & __CMPXCHG_DOUBLE) {
410 if (cmpxchg_double(&page->freelist, &page->counters,
411 freelist_old, counters_old,
412 freelist_new, counters_new))
413 return true;
414 } else
415#endif
416 {
417 unsigned long flags;
418
419 local_irq_save(flags);
420 slab_lock(page);
421 if (page->freelist == freelist_old &&
422 page->counters == counters_old) {
423 page->freelist = freelist_new;
424 page->counters = counters_new;
425 slab_unlock(page);
426 local_irq_restore(flags);
427 return true;
428 }
429 slab_unlock(page);
430 local_irq_restore(flags);
431 }
432
433 cpu_relax();
434 stat(s, CMPXCHG_DOUBLE_FAIL);
435
436#ifdef SLUB_DEBUG_CMPXCHG
437 pr_info("%s %s: cmpxchg double redo ", n, s->name);
438#endif
439
440 return false;
441}
442
443#ifdef CONFIG_SLUB_DEBUG
444
445
446
447
448
449
450static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
451{
452 void *p;
453 void *addr = page_address(page);
454
455 for (p = page->freelist; p; p = get_freepointer(s, p))
456 set_bit(slab_index(p, s, addr), map);
457}
458
459static inline unsigned int size_from_object(struct kmem_cache *s)
460{
461 if (s->flags & SLAB_RED_ZONE)
462 return s->size - s->red_left_pad;
463
464 return s->size;
465}
466
467static inline void *restore_red_left(struct kmem_cache *s, void *p)
468{
469 if (s->flags & SLAB_RED_ZONE)
470 p -= s->red_left_pad;
471
472 return p;
473}
474
475
476
477
478#if defined(CONFIG_SLUB_DEBUG_ON)
479static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
480#else
481static slab_flags_t slub_debug;
482#endif
483
484static char *slub_debug_slabs;
485static int disable_higher_order_debug;
486
487
488
489
490
491
492
493static inline void metadata_access_enable(void)
494{
495 kasan_disable_current();
496}
497
498static inline void metadata_access_disable(void)
499{
500 kasan_enable_current();
501}
502
503
504
505
506
507
508static inline int check_valid_pointer(struct kmem_cache *s,
509 struct page *page, void *object)
510{
511 void *base;
512
513 if (!object)
514 return 1;
515
516 base = page_address(page);
517 object = kasan_reset_tag(object);
518 object = restore_red_left(s, object);
519 if (object < base || object >= base + page->objects * s->size ||
520 (object - base) % s->size) {
521 return 0;
522 }
523
524 return 1;
525}
526
527static void print_section(char *level, char *text, u8 *addr,
528 unsigned int length)
529{
530 metadata_access_enable();
531 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
532 length, 1);
533 metadata_access_disable();
534}
535
536static struct track *get_track(struct kmem_cache *s, void *object,
537 enum track_item alloc)
538{
539 struct track *p;
540
541 if (s->offset)
542 p = object + s->offset + sizeof(void *);
543 else
544 p = object + s->inuse;
545
546 return p + alloc;
547}
548
549static void set_track(struct kmem_cache *s, void *object,
550 enum track_item alloc, unsigned long addr)
551{
552 struct track *p = get_track(s, object, alloc);
553
554 if (addr) {
555#ifdef CONFIG_STACKTRACE
556 unsigned int nr_entries;
557
558 metadata_access_enable();
559 nr_entries = stack_trace_save(p->addrs, TRACK_ADDRS_COUNT, 3);
560 metadata_access_disable();
561
562 if (nr_entries < TRACK_ADDRS_COUNT)
563 p->addrs[nr_entries] = 0;
564#endif
565 p->addr = addr;
566 p->cpu = smp_processor_id();
567 p->pid = current->pid;
568 p->when = jiffies;
569 } else {
570 memset(p, 0, sizeof(struct track));
571 }
572}
573
574static void init_tracking(struct kmem_cache *s, void *object)
575{
576 if (!(s->flags & SLAB_STORE_USER))
577 return;
578
579 set_track(s, object, TRACK_FREE, 0UL);
580 set_track(s, object, TRACK_ALLOC, 0UL);
581}
582
583static void print_track(const char *s, struct track *t, unsigned long pr_time)
584{
585 if (!t->addr)
586 return;
587
588 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
589 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
590#ifdef CONFIG_STACKTRACE
591 {
592 int i;
593 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
594 if (t->addrs[i])
595 pr_err("\t%pS\n", (void *)t->addrs[i]);
596 else
597 break;
598 }
599#endif
600}
601
602static void print_tracking(struct kmem_cache *s, void *object)
603{
604 unsigned long pr_time = jiffies;
605 if (!(s->flags & SLAB_STORE_USER))
606 return;
607
608 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
609 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
610}
611
612static void print_page_info(struct page *page)
613{
614 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
615 page, page->objects, page->inuse, page->freelist, page->flags);
616
617}
618
619static void slab_bug(struct kmem_cache *s, char *fmt, ...)
620{
621 struct va_format vaf;
622 va_list args;
623
624 va_start(args, fmt);
625 vaf.fmt = fmt;
626 vaf.va = &args;
627 pr_err("=============================================================================\n");
628 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
629 pr_err("-----------------------------------------------------------------------------\n\n");
630
631 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
632 va_end(args);
633}
634
635static void slab_fix(struct kmem_cache *s, char *fmt, ...)
636{
637 struct va_format vaf;
638 va_list args;
639
640 va_start(args, fmt);
641 vaf.fmt = fmt;
642 vaf.va = &args;
643 pr_err("FIX %s: %pV\n", s->name, &vaf);
644 va_end(args);
645}
646
647static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
648{
649 unsigned int off;
650 u8 *addr = page_address(page);
651
652 print_tracking(s, p);
653
654 print_page_info(page);
655
656 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
657 p, p - addr, get_freepointer(s, p));
658
659 if (s->flags & SLAB_RED_ZONE)
660 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
661 s->red_left_pad);
662 else if (p > addr + 16)
663 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
664
665 print_section(KERN_ERR, "Object ", p,
666 min_t(unsigned int, s->object_size, PAGE_SIZE));
667 if (s->flags & SLAB_RED_ZONE)
668 print_section(KERN_ERR, "Redzone ", p + s->object_size,
669 s->inuse - s->object_size);
670
671 if (s->offset)
672 off = s->offset + sizeof(void *);
673 else
674 off = s->inuse;
675
676 if (s->flags & SLAB_STORE_USER)
677 off += 2 * sizeof(struct track);
678
679 off += kasan_metadata_size(s);
680
681 if (off != size_from_object(s))
682
683 print_section(KERN_ERR, "Padding ", p + off,
684 size_from_object(s) - off);
685
686 dump_stack();
687}
688
689void object_err(struct kmem_cache *s, struct page *page,
690 u8 *object, char *reason)
691{
692 slab_bug(s, "%s", reason);
693 print_trailer(s, page, object);
694}
695
696static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
697 const char *fmt, ...)
698{
699 va_list args;
700 char buf[100];
701
702 va_start(args, fmt);
703 vsnprintf(buf, sizeof(buf), fmt, args);
704 va_end(args);
705 slab_bug(s, "%s", buf);
706 print_page_info(page);
707 dump_stack();
708}
709
710static void init_object(struct kmem_cache *s, void *object, u8 val)
711{
712 u8 *p = object;
713
714 if (s->flags & SLAB_RED_ZONE)
715 memset(p - s->red_left_pad, val, s->red_left_pad);
716
717 if (s->flags & __OBJECT_POISON) {
718 memset(p, POISON_FREE, s->object_size - 1);
719 p[s->object_size - 1] = POISON_END;
720 }
721
722 if (s->flags & SLAB_RED_ZONE)
723 memset(p + s->object_size, val, s->inuse - s->object_size);
724}
725
726static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
727 void *from, void *to)
728{
729 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
730 memset(from, data, to - from);
731}
732
733static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
734 u8 *object, char *what,
735 u8 *start, unsigned int value, unsigned int bytes)
736{
737 u8 *fault;
738 u8 *end;
739
740 metadata_access_enable();
741 fault = memchr_inv(start, value, bytes);
742 metadata_access_disable();
743 if (!fault)
744 return 1;
745
746 end = start + bytes;
747 while (end > fault && end[-1] == value)
748 end--;
749
750 slab_bug(s, "%s overwritten", what);
751 pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
752 fault, end - 1, fault[0], value);
753 print_trailer(s, page, object);
754
755 restore_bytes(s, what, value, fault, end);
756 return 0;
757}
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
798{
799 unsigned long off = s->inuse;
800
801 if (s->offset)
802
803 off += sizeof(void *);
804
805 if (s->flags & SLAB_STORE_USER)
806
807 off += 2 * sizeof(struct track);
808
809 off += kasan_metadata_size(s);
810
811 if (size_from_object(s) == off)
812 return 1;
813
814 return check_bytes_and_report(s, page, p, "Object padding",
815 p + off, POISON_INUSE, size_from_object(s) - off);
816}
817
818
819static int slab_pad_check(struct kmem_cache *s, struct page *page)
820{
821 u8 *start;
822 u8 *fault;
823 u8 *end;
824 u8 *pad;
825 int length;
826 int remainder;
827
828 if (!(s->flags & SLAB_POISON))
829 return 1;
830
831 start = page_address(page);
832 length = PAGE_SIZE << compound_order(page);
833 end = start + length;
834 remainder = length % s->size;
835 if (!remainder)
836 return 1;
837
838 pad = end - remainder;
839 metadata_access_enable();
840 fault = memchr_inv(pad, POISON_INUSE, remainder);
841 metadata_access_disable();
842 if (!fault)
843 return 1;
844 while (end > fault && end[-1] == POISON_INUSE)
845 end--;
846
847 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
848 print_section(KERN_ERR, "Padding ", pad, remainder);
849
850 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
851 return 0;
852}
853
854static int check_object(struct kmem_cache *s, struct page *page,
855 void *object, u8 val)
856{
857 u8 *p = object;
858 u8 *endobject = object + s->object_size;
859
860 if (s->flags & SLAB_RED_ZONE) {
861 if (!check_bytes_and_report(s, page, object, "Redzone",
862 object - s->red_left_pad, val, s->red_left_pad))
863 return 0;
864
865 if (!check_bytes_and_report(s, page, object, "Redzone",
866 endobject, val, s->inuse - s->object_size))
867 return 0;
868 } else {
869 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
870 check_bytes_and_report(s, page, p, "Alignment padding",
871 endobject, POISON_INUSE,
872 s->inuse - s->object_size);
873 }
874 }
875
876 if (s->flags & SLAB_POISON) {
877 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
878 (!check_bytes_and_report(s, page, p, "Poison", p,
879 POISON_FREE, s->object_size - 1) ||
880 !check_bytes_and_report(s, page, p, "Poison",
881 p + s->object_size - 1, POISON_END, 1)))
882 return 0;
883
884
885
886 check_pad_bytes(s, page, p);
887 }
888
889 if (!s->offset && val == SLUB_RED_ACTIVE)
890
891
892
893
894 return 1;
895
896
897 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
898 object_err(s, page, p, "Freepointer corrupt");
899
900
901
902
903
904 set_freepointer(s, p, NULL);
905 return 0;
906 }
907 return 1;
908}
909
910static int check_slab(struct kmem_cache *s, struct page *page)
911{
912 int maxobj;
913
914 VM_BUG_ON(!irqs_disabled());
915
916 if (!PageSlab(page)) {
917 slab_err(s, page, "Not a valid slab page");
918 return 0;
919 }
920
921 maxobj = order_objects(compound_order(page), s->size);
922 if (page->objects > maxobj) {
923 slab_err(s, page, "objects %u > max %u",
924 page->objects, maxobj);
925 return 0;
926 }
927 if (page->inuse > page->objects) {
928 slab_err(s, page, "inuse %u > max %u",
929 page->inuse, page->objects);
930 return 0;
931 }
932
933 slab_pad_check(s, page);
934 return 1;
935}
936
937
938
939
940
941static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
942{
943 int nr = 0;
944 void *fp;
945 void *object = NULL;
946 int max_objects;
947
948 fp = page->freelist;
949 while (fp && nr <= page->objects) {
950 if (fp == search)
951 return 1;
952 if (!check_valid_pointer(s, page, fp)) {
953 if (object) {
954 object_err(s, page, object,
955 "Freechain corrupt");
956 set_freepointer(s, object, NULL);
957 } else {
958 slab_err(s, page, "Freepointer corrupt");
959 page->freelist = NULL;
960 page->inuse = page->objects;
961 slab_fix(s, "Freelist cleared");
962 return 0;
963 }
964 break;
965 }
966 object = fp;
967 fp = get_freepointer(s, object);
968 nr++;
969 }
970
971 max_objects = order_objects(compound_order(page), s->size);
972 if (max_objects > MAX_OBJS_PER_PAGE)
973 max_objects = MAX_OBJS_PER_PAGE;
974
975 if (page->objects != max_objects) {
976 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
977 page->objects, max_objects);
978 page->objects = max_objects;
979 slab_fix(s, "Number of objects adjusted.");
980 }
981 if (page->inuse != page->objects - nr) {
982 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
983 page->inuse, page->objects - nr);
984 page->inuse = page->objects - nr;
985 slab_fix(s, "Object count adjusted.");
986 }
987 return search == NULL;
988}
989
990static void trace(struct kmem_cache *s, struct page *page, void *object,
991 int alloc)
992{
993 if (s->flags & SLAB_TRACE) {
994 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
995 s->name,
996 alloc ? "alloc" : "free",
997 object, page->inuse,
998 page->freelist);
999
1000 if (!alloc)
1001 print_section(KERN_INFO, "Object ", (void *)object,
1002 s->object_size);
1003
1004 dump_stack();
1005 }
1006}
1007
1008
1009
1010
1011static void add_full(struct kmem_cache *s,
1012 struct kmem_cache_node *n, struct page *page)
1013{
1014 if (!(s->flags & SLAB_STORE_USER))
1015 return;
1016
1017 lockdep_assert_held(&n->list_lock);
1018 list_add(&page->slab_list, &n->full);
1019}
1020
1021static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1022{
1023 if (!(s->flags & SLAB_STORE_USER))
1024 return;
1025
1026 lockdep_assert_held(&n->list_lock);
1027 list_del(&page->slab_list);
1028}
1029
1030
1031static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1032{
1033 struct kmem_cache_node *n = get_node(s, node);
1034
1035 return atomic_long_read(&n->nr_slabs);
1036}
1037
1038static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1039{
1040 return atomic_long_read(&n->nr_slabs);
1041}
1042
1043static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1044{
1045 struct kmem_cache_node *n = get_node(s, node);
1046
1047
1048
1049
1050
1051
1052
1053 if (likely(n)) {
1054 atomic_long_inc(&n->nr_slabs);
1055 atomic_long_add(objects, &n->total_objects);
1056 }
1057}
1058static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1059{
1060 struct kmem_cache_node *n = get_node(s, node);
1061
1062 atomic_long_dec(&n->nr_slabs);
1063 atomic_long_sub(objects, &n->total_objects);
1064}
1065
1066
1067static void setup_object_debug(struct kmem_cache *s, struct page *page,
1068 void *object)
1069{
1070 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1071 return;
1072
1073 init_object(s, object, SLUB_RED_INACTIVE);
1074 init_tracking(s, object);
1075}
1076
1077static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
1078{
1079 if (!(s->flags & SLAB_POISON))
1080 return;
1081
1082 metadata_access_enable();
1083 memset(addr, POISON_INUSE, PAGE_SIZE << order);
1084 metadata_access_disable();
1085}
1086
1087static inline int alloc_consistency_checks(struct kmem_cache *s,
1088 struct page *page, void *object)
1089{
1090 if (!check_slab(s, page))
1091 return 0;
1092
1093 if (!check_valid_pointer(s, page, object)) {
1094 object_err(s, page, object, "Freelist Pointer check fails");
1095 return 0;
1096 }
1097
1098 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1099 return 0;
1100
1101 return 1;
1102}
1103
1104static noinline int alloc_debug_processing(struct kmem_cache *s,
1105 struct page *page,
1106 void *object, unsigned long addr)
1107{
1108 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1109 if (!alloc_consistency_checks(s, page, object))
1110 goto bad;
1111 }
1112
1113
1114 if (s->flags & SLAB_STORE_USER)
1115 set_track(s, object, TRACK_ALLOC, addr);
1116 trace(s, page, object, 1);
1117 init_object(s, object, SLUB_RED_ACTIVE);
1118 return 1;
1119
1120bad:
1121 if (PageSlab(page)) {
1122
1123
1124
1125
1126
1127 slab_fix(s, "Marking all objects used");
1128 page->inuse = page->objects;
1129 page->freelist = NULL;
1130 }
1131 return 0;
1132}
1133
1134static inline int free_consistency_checks(struct kmem_cache *s,
1135 struct page *page, void *object, unsigned long addr)
1136{
1137 if (!check_valid_pointer(s, page, object)) {
1138 slab_err(s, page, "Invalid object pointer 0x%p", object);
1139 return 0;
1140 }
1141
1142 if (on_freelist(s, page, object)) {
1143 object_err(s, page, object, "Object already free");
1144 return 0;
1145 }
1146
1147 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1148 return 0;
1149
1150 if (unlikely(s != page->slab_cache)) {
1151 if (!PageSlab(page)) {
1152 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1153 object);
1154 } else if (!page->slab_cache) {
1155 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1156 object);
1157 dump_stack();
1158 } else
1159 object_err(s, page, object,
1160 "page slab pointer corrupt.");
1161 return 0;
1162 }
1163 return 1;
1164}
1165
1166
1167static noinline int free_debug_processing(
1168 struct kmem_cache *s, struct page *page,
1169 void *head, void *tail, int bulk_cnt,
1170 unsigned long addr)
1171{
1172 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1173 void *object = head;
1174 int cnt = 0;
1175 unsigned long uninitialized_var(flags);
1176 int ret = 0;
1177
1178 spin_lock_irqsave(&n->list_lock, flags);
1179 slab_lock(page);
1180
1181 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1182 if (!check_slab(s, page))
1183 goto out;
1184 }
1185
1186next_object:
1187 cnt++;
1188
1189 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1190 if (!free_consistency_checks(s, page, object, addr))
1191 goto out;
1192 }
1193
1194 if (s->flags & SLAB_STORE_USER)
1195 set_track(s, object, TRACK_FREE, addr);
1196 trace(s, page, object, 0);
1197
1198 init_object(s, object, SLUB_RED_INACTIVE);
1199
1200
1201 if (object != tail) {
1202 object = get_freepointer(s, object);
1203 goto next_object;
1204 }
1205 ret = 1;
1206
1207out:
1208 if (cnt != bulk_cnt)
1209 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1210 bulk_cnt, cnt);
1211
1212 slab_unlock(page);
1213 spin_unlock_irqrestore(&n->list_lock, flags);
1214 if (!ret)
1215 slab_fix(s, "Object at 0x%p not freed", object);
1216 return ret;
1217}
1218
1219static int __init setup_slub_debug(char *str)
1220{
1221 slub_debug = DEBUG_DEFAULT_FLAGS;
1222 if (*str++ != '=' || !*str)
1223
1224
1225
1226 goto out;
1227
1228 if (*str == ',')
1229
1230
1231
1232
1233 goto check_slabs;
1234
1235 slub_debug = 0;
1236 if (*str == '-')
1237
1238
1239
1240 goto out;
1241
1242
1243
1244
1245 for (; *str && *str != ','; str++) {
1246 switch (tolower(*str)) {
1247 case 'f':
1248 slub_debug |= SLAB_CONSISTENCY_CHECKS;
1249 break;
1250 case 'z':
1251 slub_debug |= SLAB_RED_ZONE;
1252 break;
1253 case 'p':
1254 slub_debug |= SLAB_POISON;
1255 break;
1256 case 'u':
1257 slub_debug |= SLAB_STORE_USER;
1258 break;
1259 case 't':
1260 slub_debug |= SLAB_TRACE;
1261 break;
1262 case 'a':
1263 slub_debug |= SLAB_FAILSLAB;
1264 break;
1265 case 'o':
1266
1267
1268
1269
1270 disable_higher_order_debug = 1;
1271 break;
1272 default:
1273 pr_err("slub_debug option '%c' unknown. skipped\n",
1274 *str);
1275 }
1276 }
1277
1278check_slabs:
1279 if (*str == ',')
1280 slub_debug_slabs = str + 1;
1281out:
1282 return 1;
1283}
1284
1285__setup("slub_debug", setup_slub_debug);
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299slab_flags_t kmem_cache_flags(unsigned int object_size,
1300 slab_flags_t flags, const char *name,
1301 void (*ctor)(void *))
1302{
1303 char *iter;
1304 size_t len;
1305
1306
1307 if (!slub_debug_slabs)
1308 return flags | slub_debug;
1309
1310 len = strlen(name);
1311 iter = slub_debug_slabs;
1312 while (*iter) {
1313 char *end, *glob;
1314 size_t cmplen;
1315
1316 end = strchr(iter, ',');
1317 if (!end)
1318 end = iter + strlen(iter);
1319
1320 glob = strnchr(iter, end - iter, '*');
1321 if (glob)
1322 cmplen = glob - iter;
1323 else
1324 cmplen = max_t(size_t, len, (end - iter));
1325
1326 if (!strncmp(name, iter, cmplen)) {
1327 flags |= slub_debug;
1328 break;
1329 }
1330
1331 if (!*end)
1332 break;
1333 iter = end + 1;
1334 }
1335
1336 return flags;
1337}
1338#else
1339static inline void setup_object_debug(struct kmem_cache *s,
1340 struct page *page, void *object) {}
1341static inline void setup_page_debug(struct kmem_cache *s,
1342 void *addr, int order) {}
1343
1344static inline int alloc_debug_processing(struct kmem_cache *s,
1345 struct page *page, void *object, unsigned long addr) { return 0; }
1346
1347static inline int free_debug_processing(
1348 struct kmem_cache *s, struct page *page,
1349 void *head, void *tail, int bulk_cnt,
1350 unsigned long addr) { return 0; }
1351
1352static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1353 { return 1; }
1354static inline int check_object(struct kmem_cache *s, struct page *page,
1355 void *object, u8 val) { return 1; }
1356static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1357 struct page *page) {}
1358static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1359 struct page *page) {}
1360slab_flags_t kmem_cache_flags(unsigned int object_size,
1361 slab_flags_t flags, const char *name,
1362 void (*ctor)(void *))
1363{
1364 return flags;
1365}
1366#define slub_debug 0
1367
1368#define disable_higher_order_debug 0
1369
1370static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1371 { return 0; }
1372static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1373 { return 0; }
1374static inline void inc_slabs_node(struct kmem_cache *s, int node,
1375 int objects) {}
1376static inline void dec_slabs_node(struct kmem_cache *s, int node,
1377 int objects) {}
1378
1379#endif
1380
1381
1382
1383
1384
1385static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1386{
1387 ptr = kasan_kmalloc_large(ptr, size, flags);
1388
1389 kmemleak_alloc(ptr, size, 1, flags);
1390 return ptr;
1391}
1392
1393static __always_inline void kfree_hook(void *x)
1394{
1395 kmemleak_free(x);
1396 kasan_kfree_large(x, _RET_IP_);
1397}
1398
1399static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
1400{
1401 kmemleak_free_recursive(x, s->flags);
1402
1403
1404
1405
1406
1407
1408#ifdef CONFIG_LOCKDEP
1409 {
1410 unsigned long flags;
1411
1412 local_irq_save(flags);
1413 debug_check_no_locks_freed(x, s->object_size);
1414 local_irq_restore(flags);
1415 }
1416#endif
1417 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1418 debug_check_no_obj_freed(x, s->object_size);
1419
1420
1421 return kasan_slab_free(s, x, _RET_IP_);
1422}
1423
1424static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1425 void **head, void **tail)
1426{
1427
1428
1429
1430
1431#if defined(CONFIG_LOCKDEP) || \
1432 defined(CONFIG_DEBUG_KMEMLEAK) || \
1433 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1434 defined(CONFIG_KASAN)
1435
1436 void *object;
1437 void *next = *head;
1438 void *old_tail = *tail ? *tail : *head;
1439
1440
1441 *head = NULL;
1442 *tail = NULL;
1443
1444 do {
1445 object = next;
1446 next = get_freepointer(s, object);
1447
1448 if (!slab_free_hook(s, object)) {
1449
1450 set_freepointer(s, object, *head);
1451 *head = object;
1452 if (!*tail)
1453 *tail = object;
1454 }
1455 } while (object != old_tail);
1456
1457 if (*head == *tail)
1458 *tail = NULL;
1459
1460 return *head != NULL;
1461#else
1462 return true;
1463#endif
1464}
1465
1466static void *setup_object(struct kmem_cache *s, struct page *page,
1467 void *object)
1468{
1469 setup_object_debug(s, page, object);
1470 object = kasan_init_slab_obj(s, object);
1471 if (unlikely(s->ctor)) {
1472 kasan_unpoison_object_data(s, object);
1473 s->ctor(object);
1474 kasan_poison_object_data(s, object);
1475 }
1476 return object;
1477}
1478
1479
1480
1481
1482static inline struct page *alloc_slab_page(struct kmem_cache *s,
1483 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1484{
1485 struct page *page;
1486 unsigned int order = oo_order(oo);
1487
1488 if (node == NUMA_NO_NODE)
1489 page = alloc_pages(flags, order);
1490 else
1491 page = __alloc_pages_node(node, flags, order);
1492
1493 if (page && memcg_charge_slab(page, flags, order, s)) {
1494 __free_pages(page, order);
1495 page = NULL;
1496 }
1497
1498 return page;
1499}
1500
1501#ifdef CONFIG_SLAB_FREELIST_RANDOM
1502
1503static int init_cache_random_seq(struct kmem_cache *s)
1504{
1505 unsigned int count = oo_objects(s->oo);
1506 int err;
1507
1508
1509 if (s->random_seq)
1510 return 0;
1511
1512 err = cache_random_seq_create(s, count, GFP_KERNEL);
1513 if (err) {
1514 pr_err("SLUB: Unable to initialize free list for %s\n",
1515 s->name);
1516 return err;
1517 }
1518
1519
1520 if (s->random_seq) {
1521 unsigned int i;
1522
1523 for (i = 0; i < count; i++)
1524 s->random_seq[i] *= s->size;
1525 }
1526 return 0;
1527}
1528
1529
1530static void __init init_freelist_randomization(void)
1531{
1532 struct kmem_cache *s;
1533
1534 mutex_lock(&slab_mutex);
1535
1536 list_for_each_entry(s, &slab_caches, list)
1537 init_cache_random_seq(s);
1538
1539 mutex_unlock(&slab_mutex);
1540}
1541
1542
1543static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1544 unsigned long *pos, void *start,
1545 unsigned long page_limit,
1546 unsigned long freelist_count)
1547{
1548 unsigned int idx;
1549
1550
1551
1552
1553
1554 do {
1555 idx = s->random_seq[*pos];
1556 *pos += 1;
1557 if (*pos >= freelist_count)
1558 *pos = 0;
1559 } while (unlikely(idx >= page_limit));
1560
1561 return (char *)start + idx;
1562}
1563
1564
1565static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1566{
1567 void *start;
1568 void *cur;
1569 void *next;
1570 unsigned long idx, pos, page_limit, freelist_count;
1571
1572 if (page->objects < 2 || !s->random_seq)
1573 return false;
1574
1575 freelist_count = oo_objects(s->oo);
1576 pos = get_random_int() % freelist_count;
1577
1578 page_limit = page->objects * s->size;
1579 start = fixup_red_left(s, page_address(page));
1580
1581
1582 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1583 freelist_count);
1584 cur = setup_object(s, page, cur);
1585 page->freelist = cur;
1586
1587 for (idx = 1; idx < page->objects; idx++) {
1588 next = next_freelist_entry(s, page, &pos, start, page_limit,
1589 freelist_count);
1590 next = setup_object(s, page, next);
1591 set_freepointer(s, cur, next);
1592 cur = next;
1593 }
1594 set_freepointer(s, cur, NULL);
1595
1596 return true;
1597}
1598#else
1599static inline int init_cache_random_seq(struct kmem_cache *s)
1600{
1601 return 0;
1602}
1603static inline void init_freelist_randomization(void) { }
1604static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1605{
1606 return false;
1607}
1608#endif
1609
1610static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1611{
1612 struct page *page;
1613 struct kmem_cache_order_objects oo = s->oo;
1614 gfp_t alloc_gfp;
1615 void *start, *p, *next;
1616 int idx, order;
1617 bool shuffle;
1618
1619 flags &= gfp_allowed_mask;
1620
1621 if (gfpflags_allow_blocking(flags))
1622 local_irq_enable();
1623
1624 flags |= s->allocflags;
1625
1626
1627
1628
1629
1630 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1631 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1632 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1633
1634 page = alloc_slab_page(s, alloc_gfp, node, oo);
1635 if (unlikely(!page)) {
1636 oo = s->min;
1637 alloc_gfp = flags;
1638
1639
1640
1641
1642 page = alloc_slab_page(s, alloc_gfp, node, oo);
1643 if (unlikely(!page))
1644 goto out;
1645 stat(s, ORDER_FALLBACK);
1646 }
1647
1648 page->objects = oo_objects(oo);
1649
1650 order = compound_order(page);
1651 page->slab_cache = s;
1652 __SetPageSlab(page);
1653 if (page_is_pfmemalloc(page))
1654 SetPageSlabPfmemalloc(page);
1655
1656 kasan_poison_slab(page);
1657
1658 start = page_address(page);
1659
1660 setup_page_debug(s, start, order);
1661
1662 shuffle = shuffle_freelist(s, page);
1663
1664 if (!shuffle) {
1665 start = fixup_red_left(s, start);
1666 start = setup_object(s, page, start);
1667 page->freelist = start;
1668 for (idx = 0, p = start; idx < page->objects - 1; idx++) {
1669 next = p + s->size;
1670 next = setup_object(s, page, next);
1671 set_freepointer(s, p, next);
1672 p = next;
1673 }
1674 set_freepointer(s, p, NULL);
1675 }
1676
1677 page->inuse = page->objects;
1678 page->frozen = 1;
1679
1680out:
1681 if (gfpflags_allow_blocking(flags))
1682 local_irq_disable();
1683 if (!page)
1684 return NULL;
1685
1686 mod_lruvec_page_state(page,
1687 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1688 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1689 1 << oo_order(oo));
1690
1691 inc_slabs_node(s, page_to_nid(page), page->objects);
1692
1693 return page;
1694}
1695
1696static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1697{
1698 if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1699 gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1700 flags &= ~GFP_SLAB_BUG_MASK;
1701 pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1702 invalid_mask, &invalid_mask, flags, &flags);
1703 dump_stack();
1704 }
1705
1706 return allocate_slab(s,
1707 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1708}
1709
1710static void __free_slab(struct kmem_cache *s, struct page *page)
1711{
1712 int order = compound_order(page);
1713 int pages = 1 << order;
1714
1715 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1716 void *p;
1717
1718 slab_pad_check(s, page);
1719 for_each_object(p, s, page_address(page),
1720 page->objects)
1721 check_object(s, page, p, SLUB_RED_INACTIVE);
1722 }
1723
1724 mod_lruvec_page_state(page,
1725 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1726 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1727 -pages);
1728
1729 __ClearPageSlabPfmemalloc(page);
1730 __ClearPageSlab(page);
1731
1732 page->mapping = NULL;
1733 if (current->reclaim_state)
1734 current->reclaim_state->reclaimed_slab += pages;
1735 memcg_uncharge_slab(page, order, s);
1736 __free_pages(page, order);
1737}
1738
1739static void rcu_free_slab(struct rcu_head *h)
1740{
1741 struct page *page = container_of(h, struct page, rcu_head);
1742
1743 __free_slab(page->slab_cache, page);
1744}
1745
1746static void free_slab(struct kmem_cache *s, struct page *page)
1747{
1748 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1749 call_rcu(&page->rcu_head, rcu_free_slab);
1750 } else
1751 __free_slab(s, page);
1752}
1753
1754static void discard_slab(struct kmem_cache *s, struct page *page)
1755{
1756 dec_slabs_node(s, page_to_nid(page), page->objects);
1757 free_slab(s, page);
1758}
1759
1760
1761
1762
1763static inline void
1764__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1765{
1766 n->nr_partial++;
1767 if (tail == DEACTIVATE_TO_TAIL)
1768 list_add_tail(&page->slab_list, &n->partial);
1769 else
1770 list_add(&page->slab_list, &n->partial);
1771}
1772
1773static inline void add_partial(struct kmem_cache_node *n,
1774 struct page *page, int tail)
1775{
1776 lockdep_assert_held(&n->list_lock);
1777 __add_partial(n, page, tail);
1778}
1779
1780static inline void remove_partial(struct kmem_cache_node *n,
1781 struct page *page)
1782{
1783 lockdep_assert_held(&n->list_lock);
1784 list_del(&page->slab_list);
1785 n->nr_partial--;
1786}
1787
1788
1789
1790
1791
1792
1793
1794static inline void *acquire_slab(struct kmem_cache *s,
1795 struct kmem_cache_node *n, struct page *page,
1796 int mode, int *objects)
1797{
1798 void *freelist;
1799 unsigned long counters;
1800 struct page new;
1801
1802 lockdep_assert_held(&n->list_lock);
1803
1804
1805
1806
1807
1808
1809 freelist = page->freelist;
1810 counters = page->counters;
1811 new.counters = counters;
1812 *objects = new.objects - new.inuse;
1813 if (mode) {
1814 new.inuse = page->objects;
1815 new.freelist = NULL;
1816 } else {
1817 new.freelist = freelist;
1818 }
1819
1820 VM_BUG_ON(new.frozen);
1821 new.frozen = 1;
1822
1823 if (!__cmpxchg_double_slab(s, page,
1824 freelist, counters,
1825 new.freelist, new.counters,
1826 "acquire_slab"))
1827 return NULL;
1828
1829 remove_partial(n, page);
1830 WARN_ON(!freelist);
1831 return freelist;
1832}
1833
1834static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1835static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1836
1837
1838
1839
1840static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1841 struct kmem_cache_cpu *c, gfp_t flags)
1842{
1843 struct page *page, *page2;
1844 void *object = NULL;
1845 unsigned int available = 0;
1846 int objects;
1847
1848
1849
1850
1851
1852
1853
1854 if (!n || !n->nr_partial)
1855 return NULL;
1856
1857 spin_lock(&n->list_lock);
1858 list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
1859 void *t;
1860
1861 if (!pfmemalloc_match(page, flags))
1862 continue;
1863
1864 t = acquire_slab(s, n, page, object == NULL, &objects);
1865 if (!t)
1866 break;
1867
1868 available += objects;
1869 if (!object) {
1870 c->page = page;
1871 stat(s, ALLOC_FROM_PARTIAL);
1872 object = t;
1873 } else {
1874 put_cpu_partial(s, page, 0);
1875 stat(s, CPU_PARTIAL_NODE);
1876 }
1877 if (!kmem_cache_has_cpu_partial(s)
1878 || available > slub_cpu_partial(s) / 2)
1879 break;
1880
1881 }
1882 spin_unlock(&n->list_lock);
1883 return object;
1884}
1885
1886
1887
1888
1889static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1890 struct kmem_cache_cpu *c)
1891{
1892#ifdef CONFIG_NUMA
1893 struct zonelist *zonelist;
1894 struct zoneref *z;
1895 struct zone *zone;
1896 enum zone_type high_zoneidx = gfp_zone(flags);
1897 void *object;
1898 unsigned int cpuset_mems_cookie;
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918 if (!s->remote_node_defrag_ratio ||
1919 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1920 return NULL;
1921
1922 do {
1923 cpuset_mems_cookie = read_mems_allowed_begin();
1924 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1925 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1926 struct kmem_cache_node *n;
1927
1928 n = get_node(s, zone_to_nid(zone));
1929
1930 if (n && cpuset_zone_allowed(zone, flags) &&
1931 n->nr_partial > s->min_partial) {
1932 object = get_partial_node(s, n, c, flags);
1933 if (object) {
1934
1935
1936
1937
1938
1939
1940
1941 return object;
1942 }
1943 }
1944 }
1945 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1946#endif
1947 return NULL;
1948}
1949
1950
1951
1952
1953static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1954 struct kmem_cache_cpu *c)
1955{
1956 void *object;
1957 int searchnode = node;
1958
1959 if (node == NUMA_NO_NODE)
1960 searchnode = numa_mem_id();
1961 else if (!node_present_pages(node))
1962 searchnode = node_to_mem_node(node);
1963
1964 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1965 if (object || node != NUMA_NO_NODE)
1966 return object;
1967
1968 return get_any_partial(s, flags, c);
1969}
1970
1971#ifdef CONFIG_PREEMPT
1972
1973
1974
1975
1976
1977#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1978#else
1979
1980
1981
1982
1983#define TID_STEP 1
1984#endif
1985
1986static inline unsigned long next_tid(unsigned long tid)
1987{
1988 return tid + TID_STEP;
1989}
1990
1991static inline unsigned int tid_to_cpu(unsigned long tid)
1992{
1993 return tid % TID_STEP;
1994}
1995
1996static inline unsigned long tid_to_event(unsigned long tid)
1997{
1998 return tid / TID_STEP;
1999}
2000
2001static inline unsigned int init_tid(int cpu)
2002{
2003 return cpu;
2004}
2005
2006static inline void note_cmpxchg_failure(const char *n,
2007 const struct kmem_cache *s, unsigned long tid)
2008{
2009#ifdef SLUB_DEBUG_CMPXCHG
2010 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2011
2012 pr_info("%s %s: cmpxchg redo ", n, s->name);
2013
2014#ifdef CONFIG_PREEMPT
2015 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2016 pr_warn("due to cpu change %d -> %d\n",
2017 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2018 else
2019#endif
2020 if (tid_to_event(tid) != tid_to_event(actual_tid))
2021 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2022 tid_to_event(tid), tid_to_event(actual_tid));
2023 else
2024 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2025 actual_tid, tid, next_tid(tid));
2026#endif
2027 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2028}
2029
2030static void init_kmem_cache_cpus(struct kmem_cache *s)
2031{
2032 int cpu;
2033
2034 for_each_possible_cpu(cpu)
2035 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2036}
2037
2038
2039
2040
2041static void deactivate_slab(struct kmem_cache *s, struct page *page,
2042 void *freelist, struct kmem_cache_cpu *c)
2043{
2044 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2045 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2046 int lock = 0;
2047 enum slab_modes l = M_NONE, m = M_NONE;
2048 void *nextfree;
2049 int tail = DEACTIVATE_TO_HEAD;
2050 struct page new;
2051 struct page old;
2052
2053 if (page->freelist) {
2054 stat(s, DEACTIVATE_REMOTE_FREES);
2055 tail = DEACTIVATE_TO_TAIL;
2056 }
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066 while (freelist && (nextfree = get_freepointer(s, freelist))) {
2067 void *prior;
2068 unsigned long counters;
2069
2070 do {
2071 prior = page->freelist;
2072 counters = page->counters;
2073 set_freepointer(s, freelist, prior);
2074 new.counters = counters;
2075 new.inuse--;
2076 VM_BUG_ON(!new.frozen);
2077
2078 } while (!__cmpxchg_double_slab(s, page,
2079 prior, counters,
2080 freelist, new.counters,
2081 "drain percpu freelist"));
2082
2083 freelist = nextfree;
2084 }
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100redo:
2101
2102 old.freelist = page->freelist;
2103 old.counters = page->counters;
2104 VM_BUG_ON(!old.frozen);
2105
2106
2107 new.counters = old.counters;
2108 if (freelist) {
2109 new.inuse--;
2110 set_freepointer(s, freelist, old.freelist);
2111 new.freelist = freelist;
2112 } else
2113 new.freelist = old.freelist;
2114
2115 new.frozen = 0;
2116
2117 if (!new.inuse && n->nr_partial >= s->min_partial)
2118 m = M_FREE;
2119 else if (new.freelist) {
2120 m = M_PARTIAL;
2121 if (!lock) {
2122 lock = 1;
2123
2124
2125
2126
2127
2128 spin_lock(&n->list_lock);
2129 }
2130 } else {
2131 m = M_FULL;
2132 if (kmem_cache_debug(s) && !lock) {
2133 lock = 1;
2134
2135
2136
2137
2138
2139 spin_lock(&n->list_lock);
2140 }
2141 }
2142
2143 if (l != m) {
2144 if (l == M_PARTIAL)
2145 remove_partial(n, page);
2146 else if (l == M_FULL)
2147 remove_full(s, n, page);
2148
2149 if (m == M_PARTIAL)
2150 add_partial(n, page, tail);
2151 else if (m == M_FULL)
2152 add_full(s, n, page);
2153 }
2154
2155 l = m;
2156 if (!__cmpxchg_double_slab(s, page,
2157 old.freelist, old.counters,
2158 new.freelist, new.counters,
2159 "unfreezing slab"))
2160 goto redo;
2161
2162 if (lock)
2163 spin_unlock(&n->list_lock);
2164
2165 if (m == M_PARTIAL)
2166 stat(s, tail);
2167 else if (m == M_FULL)
2168 stat(s, DEACTIVATE_FULL);
2169 else if (m == M_FREE) {
2170 stat(s, DEACTIVATE_EMPTY);
2171 discard_slab(s, page);
2172 stat(s, FREE_SLAB);
2173 }
2174
2175 c->page = NULL;
2176 c->freelist = NULL;
2177}
2178
2179
2180
2181
2182
2183
2184
2185
2186static void unfreeze_partials(struct kmem_cache *s,
2187 struct kmem_cache_cpu *c)
2188{
2189#ifdef CONFIG_SLUB_CPU_PARTIAL
2190 struct kmem_cache_node *n = NULL, *n2 = NULL;
2191 struct page *page, *discard_page = NULL;
2192
2193 while ((page = c->partial)) {
2194 struct page new;
2195 struct page old;
2196
2197 c->partial = page->next;
2198
2199 n2 = get_node(s, page_to_nid(page));
2200 if (n != n2) {
2201 if (n)
2202 spin_unlock(&n->list_lock);
2203
2204 n = n2;
2205 spin_lock(&n->list_lock);
2206 }
2207
2208 do {
2209
2210 old.freelist = page->freelist;
2211 old.counters = page->counters;
2212 VM_BUG_ON(!old.frozen);
2213
2214 new.counters = old.counters;
2215 new.freelist = old.freelist;
2216
2217 new.frozen = 0;
2218
2219 } while (!__cmpxchg_double_slab(s, page,
2220 old.freelist, old.counters,
2221 new.freelist, new.counters,
2222 "unfreezing slab"));
2223
2224 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2225 page->next = discard_page;
2226 discard_page = page;
2227 } else {
2228 add_partial(n, page, DEACTIVATE_TO_TAIL);
2229 stat(s, FREE_ADD_PARTIAL);
2230 }
2231 }
2232
2233 if (n)
2234 spin_unlock(&n->list_lock);
2235
2236 while (discard_page) {
2237 page = discard_page;
2238 discard_page = discard_page->next;
2239
2240 stat(s, DEACTIVATE_EMPTY);
2241 discard_slab(s, page);
2242 stat(s, FREE_SLAB);
2243 }
2244#endif
2245}
2246
2247
2248
2249
2250
2251
2252
2253
2254static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2255{
2256#ifdef CONFIG_SLUB_CPU_PARTIAL
2257 struct page *oldpage;
2258 int pages;
2259 int pobjects;
2260
2261 preempt_disable();
2262 do {
2263 pages = 0;
2264 pobjects = 0;
2265 oldpage = this_cpu_read(s->cpu_slab->partial);
2266
2267 if (oldpage) {
2268 pobjects = oldpage->pobjects;
2269 pages = oldpage->pages;
2270 if (drain && pobjects > s->cpu_partial) {
2271 unsigned long flags;
2272
2273
2274
2275
2276 local_irq_save(flags);
2277 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2278 local_irq_restore(flags);
2279 oldpage = NULL;
2280 pobjects = 0;
2281 pages = 0;
2282 stat(s, CPU_PARTIAL_DRAIN);
2283 }
2284 }
2285
2286 pages++;
2287 pobjects += page->objects - page->inuse;
2288
2289 page->pages = pages;
2290 page->pobjects = pobjects;
2291 page->next = oldpage;
2292
2293 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2294 != oldpage);
2295 if (unlikely(!s->cpu_partial)) {
2296 unsigned long flags;
2297
2298 local_irq_save(flags);
2299 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2300 local_irq_restore(flags);
2301 }
2302 preempt_enable();
2303#endif
2304}
2305
2306static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2307{
2308 stat(s, CPUSLAB_FLUSH);
2309 deactivate_slab(s, c->page, c->freelist, c);
2310
2311 c->tid = next_tid(c->tid);
2312}
2313
2314
2315
2316
2317
2318
2319static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2320{
2321 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2322
2323 if (c->page)
2324 flush_slab(s, c);
2325
2326 unfreeze_partials(s, c);
2327}
2328
2329static void flush_cpu_slab(void *d)
2330{
2331 struct kmem_cache *s = d;
2332
2333 __flush_cpu_slab(s, smp_processor_id());
2334}
2335
2336static bool has_cpu_slab(int cpu, void *info)
2337{
2338 struct kmem_cache *s = info;
2339 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2340
2341 return c->page || slub_percpu_partial(c);
2342}
2343
2344static void flush_all(struct kmem_cache *s)
2345{
2346 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2347}
2348
2349
2350
2351
2352
2353static int slub_cpu_dead(unsigned int cpu)
2354{
2355 struct kmem_cache *s;
2356 unsigned long flags;
2357
2358 mutex_lock(&slab_mutex);
2359 list_for_each_entry(s, &slab_caches, list) {
2360 local_irq_save(flags);
2361 __flush_cpu_slab(s, cpu);
2362 local_irq_restore(flags);
2363 }
2364 mutex_unlock(&slab_mutex);
2365 return 0;
2366}
2367
2368
2369
2370
2371
2372static inline int node_match(struct page *page, int node)
2373{
2374#ifdef CONFIG_NUMA
2375 if (node != NUMA_NO_NODE && page_to_nid(page) != node)
2376 return 0;
2377#endif
2378 return 1;
2379}
2380
2381#ifdef CONFIG_SLUB_DEBUG
2382static int count_free(struct page *page)
2383{
2384 return page->objects - page->inuse;
2385}
2386
2387static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2388{
2389 return atomic_long_read(&n->total_objects);
2390}
2391#endif
2392
2393#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2394static unsigned long count_partial(struct kmem_cache_node *n,
2395 int (*get_count)(struct page *))
2396{
2397 unsigned long flags;
2398 unsigned long x = 0;
2399 struct page *page;
2400
2401 spin_lock_irqsave(&n->list_lock, flags);
2402 list_for_each_entry(page, &n->partial, slab_list)
2403 x += get_count(page);
2404 spin_unlock_irqrestore(&n->list_lock, flags);
2405 return x;
2406}
2407#endif
2408
2409static noinline void
2410slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2411{
2412#ifdef CONFIG_SLUB_DEBUG
2413 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2414 DEFAULT_RATELIMIT_BURST);
2415 int node;
2416 struct kmem_cache_node *n;
2417
2418 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2419 return;
2420
2421 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2422 nid, gfpflags, &gfpflags);
2423 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2424 s->name, s->object_size, s->size, oo_order(s->oo),
2425 oo_order(s->min));
2426
2427 if (oo_order(s->min) > get_order(s->object_size))
2428 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2429 s->name);
2430
2431 for_each_kmem_cache_node(s, node, n) {
2432 unsigned long nr_slabs;
2433 unsigned long nr_objs;
2434 unsigned long nr_free;
2435
2436 nr_free = count_partial(n, count_free);
2437 nr_slabs = node_nr_slabs(n);
2438 nr_objs = node_nr_objs(n);
2439
2440 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2441 node, nr_slabs, nr_objs, nr_free);
2442 }
2443#endif
2444}
2445
2446static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2447 int node, struct kmem_cache_cpu **pc)
2448{
2449 void *freelist;
2450 struct kmem_cache_cpu *c = *pc;
2451 struct page *page;
2452
2453 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2454
2455 freelist = get_partial(s, flags, node, c);
2456
2457 if (freelist)
2458 return freelist;
2459
2460 page = new_slab(s, flags, node);
2461 if (page) {
2462 c = raw_cpu_ptr(s->cpu_slab);
2463 if (c->page)
2464 flush_slab(s, c);
2465
2466
2467
2468
2469
2470 freelist = page->freelist;
2471 page->freelist = NULL;
2472
2473 stat(s, ALLOC_SLAB);
2474 c->page = page;
2475 *pc = c;
2476 }
2477
2478 return freelist;
2479}
2480
2481static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2482{
2483 if (unlikely(PageSlabPfmemalloc(page)))
2484 return gfp_pfmemalloc_allowed(gfpflags);
2485
2486 return true;
2487}
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2500{
2501 struct page new;
2502 unsigned long counters;
2503 void *freelist;
2504
2505 do {
2506 freelist = page->freelist;
2507 counters = page->counters;
2508
2509 new.counters = counters;
2510 VM_BUG_ON(!new.frozen);
2511
2512 new.inuse = page->objects;
2513 new.frozen = freelist != NULL;
2514
2515 } while (!__cmpxchg_double_slab(s, page,
2516 freelist, counters,
2517 NULL, new.counters,
2518 "get_freelist"));
2519
2520 return freelist;
2521}
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2543 unsigned long addr, struct kmem_cache_cpu *c)
2544{
2545 void *freelist;
2546 struct page *page;
2547
2548 page = c->page;
2549 if (!page)
2550 goto new_slab;
2551redo:
2552
2553 if (unlikely(!node_match(page, node))) {
2554 int searchnode = node;
2555
2556 if (node != NUMA_NO_NODE && !node_present_pages(node))
2557 searchnode = node_to_mem_node(node);
2558
2559 if (unlikely(!node_match(page, searchnode))) {
2560 stat(s, ALLOC_NODE_MISMATCH);
2561 deactivate_slab(s, page, c->freelist, c);
2562 goto new_slab;
2563 }
2564 }
2565
2566
2567
2568
2569
2570
2571 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2572 deactivate_slab(s, page, c->freelist, c);
2573 goto new_slab;
2574 }
2575
2576
2577 freelist = c->freelist;
2578 if (freelist)
2579 goto load_freelist;
2580
2581 freelist = get_freelist(s, page);
2582
2583 if (!freelist) {
2584 c->page = NULL;
2585 stat(s, DEACTIVATE_BYPASS);
2586 goto new_slab;
2587 }
2588
2589 stat(s, ALLOC_REFILL);
2590
2591load_freelist:
2592
2593
2594
2595
2596
2597 VM_BUG_ON(!c->page->frozen);
2598 c->freelist = get_freepointer(s, freelist);
2599 c->tid = next_tid(c->tid);
2600 return freelist;
2601
2602new_slab:
2603
2604 if (slub_percpu_partial(c)) {
2605 page = c->page = slub_percpu_partial(c);
2606 slub_set_percpu_partial(c, page);
2607 stat(s, CPU_PARTIAL_ALLOC);
2608 goto redo;
2609 }
2610
2611 freelist = new_slab_objects(s, gfpflags, node, &c);
2612
2613 if (unlikely(!freelist)) {
2614 slab_out_of_memory(s, gfpflags, node);
2615 return NULL;
2616 }
2617
2618 page = c->page;
2619 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2620 goto load_freelist;
2621
2622
2623 if (kmem_cache_debug(s) &&
2624 !alloc_debug_processing(s, page, freelist, addr))
2625 goto new_slab;
2626
2627 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2628 return freelist;
2629}
2630
2631
2632
2633
2634
2635static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2636 unsigned long addr, struct kmem_cache_cpu *c)
2637{
2638 void *p;
2639 unsigned long flags;
2640
2641 local_irq_save(flags);
2642#ifdef CONFIG_PREEMPT
2643
2644
2645
2646
2647
2648 c = this_cpu_ptr(s->cpu_slab);
2649#endif
2650
2651 p = ___slab_alloc(s, gfpflags, node, addr, c);
2652 local_irq_restore(flags);
2653 return p;
2654}
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2667 gfp_t gfpflags, int node, unsigned long addr)
2668{
2669 void *object;
2670 struct kmem_cache_cpu *c;
2671 struct page *page;
2672 unsigned long tid;
2673
2674 s = slab_pre_alloc_hook(s, gfpflags);
2675 if (!s)
2676 return NULL;
2677redo:
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688 do {
2689 tid = this_cpu_read(s->cpu_slab->tid);
2690 c = raw_cpu_ptr(s->cpu_slab);
2691 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2692 unlikely(tid != READ_ONCE(c->tid)));
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702 barrier();
2703
2704
2705
2706
2707
2708
2709
2710
2711 object = c->freelist;
2712 page = c->page;
2713 if (unlikely(!object || !node_match(page, node))) {
2714 object = __slab_alloc(s, gfpflags, node, addr, c);
2715 stat(s, ALLOC_SLOWPATH);
2716 } else {
2717 void *next_object = get_freepointer_safe(s, object);
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733 if (unlikely(!this_cpu_cmpxchg_double(
2734 s->cpu_slab->freelist, s->cpu_slab->tid,
2735 object, tid,
2736 next_object, next_tid(tid)))) {
2737
2738 note_cmpxchg_failure("slab_alloc", s, tid);
2739 goto redo;
2740 }
2741 prefetch_freepointer(s, next_object);
2742 stat(s, ALLOC_FASTPATH);
2743 }
2744
2745 if (unlikely(gfpflags & __GFP_ZERO) && object)
2746 memset(object, 0, s->object_size);
2747
2748 slab_post_alloc_hook(s, gfpflags, 1, &object);
2749
2750 return object;
2751}
2752
2753static __always_inline void *slab_alloc(struct kmem_cache *s,
2754 gfp_t gfpflags, unsigned long addr)
2755{
2756 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2757}
2758
2759void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2760{
2761 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2762
2763 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2764 s->size, gfpflags);
2765
2766 return ret;
2767}
2768EXPORT_SYMBOL(kmem_cache_alloc);
2769
2770#ifdef CONFIG_TRACING
2771void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2772{
2773 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2774 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2775 ret = kasan_kmalloc(s, ret, size, gfpflags);
2776 return ret;
2777}
2778EXPORT_SYMBOL(kmem_cache_alloc_trace);
2779#endif
2780
2781#ifdef CONFIG_NUMA
2782void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2783{
2784 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2785
2786 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2787 s->object_size, s->size, gfpflags, node);
2788
2789 return ret;
2790}
2791EXPORT_SYMBOL(kmem_cache_alloc_node);
2792
2793#ifdef CONFIG_TRACING
2794void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2795 gfp_t gfpflags,
2796 int node, size_t size)
2797{
2798 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2799
2800 trace_kmalloc_node(_RET_IP_, ret,
2801 size, s->size, gfpflags, node);
2802
2803 ret = kasan_kmalloc(s, ret, size, gfpflags);
2804 return ret;
2805}
2806EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2807#endif
2808#endif
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818static void __slab_free(struct kmem_cache *s, struct page *page,
2819 void *head, void *tail, int cnt,
2820 unsigned long addr)
2821
2822{
2823 void *prior;
2824 int was_frozen;
2825 struct page new;
2826 unsigned long counters;
2827 struct kmem_cache_node *n = NULL;
2828 unsigned long uninitialized_var(flags);
2829
2830 stat(s, FREE_SLOWPATH);
2831
2832 if (kmem_cache_debug(s) &&
2833 !free_debug_processing(s, page, head, tail, cnt, addr))
2834 return;
2835
2836 do {
2837 if (unlikely(n)) {
2838 spin_unlock_irqrestore(&n->list_lock, flags);
2839 n = NULL;
2840 }
2841 prior = page->freelist;
2842 counters = page->counters;
2843 set_freepointer(s, tail, prior);
2844 new.counters = counters;
2845 was_frozen = new.frozen;
2846 new.inuse -= cnt;
2847 if ((!new.inuse || !prior) && !was_frozen) {
2848
2849 if (kmem_cache_has_cpu_partial(s) && !prior) {
2850
2851
2852
2853
2854
2855
2856
2857 new.frozen = 1;
2858
2859 } else {
2860
2861 n = get_node(s, page_to_nid(page));
2862
2863
2864
2865
2866
2867
2868
2869
2870 spin_lock_irqsave(&n->list_lock, flags);
2871
2872 }
2873 }
2874
2875 } while (!cmpxchg_double_slab(s, page,
2876 prior, counters,
2877 head, new.counters,
2878 "__slab_free"));
2879
2880 if (likely(!n)) {
2881
2882
2883
2884
2885
2886 if (new.frozen && !was_frozen) {
2887 put_cpu_partial(s, page, 1);
2888 stat(s, CPU_PARTIAL_FREE);
2889 }
2890
2891
2892
2893
2894 if (was_frozen)
2895 stat(s, FREE_FROZEN);
2896 return;
2897 }
2898
2899 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2900 goto slab_empty;
2901
2902
2903
2904
2905
2906 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2907 remove_full(s, n, page);
2908 add_partial(n, page, DEACTIVATE_TO_TAIL);
2909 stat(s, FREE_ADD_PARTIAL);
2910 }
2911 spin_unlock_irqrestore(&n->list_lock, flags);
2912 return;
2913
2914slab_empty:
2915 if (prior) {
2916
2917
2918
2919 remove_partial(n, page);
2920 stat(s, FREE_REMOVE_PARTIAL);
2921 } else {
2922
2923 remove_full(s, n, page);
2924 }
2925
2926 spin_unlock_irqrestore(&n->list_lock, flags);
2927 stat(s, FREE_SLAB);
2928 discard_slab(s, page);
2929}
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946static __always_inline void do_slab_free(struct kmem_cache *s,
2947 struct page *page, void *head, void *tail,
2948 int cnt, unsigned long addr)
2949{
2950 void *tail_obj = tail ? : head;
2951 struct kmem_cache_cpu *c;
2952 unsigned long tid;
2953redo:
2954
2955
2956
2957
2958
2959
2960 do {
2961 tid = this_cpu_read(s->cpu_slab->tid);
2962 c = raw_cpu_ptr(s->cpu_slab);
2963 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2964 unlikely(tid != READ_ONCE(c->tid)));
2965
2966
2967 barrier();
2968
2969 if (likely(page == c->page)) {
2970 set_freepointer(s, tail_obj, c->freelist);
2971
2972 if (unlikely(!this_cpu_cmpxchg_double(
2973 s->cpu_slab->freelist, s->cpu_slab->tid,
2974 c->freelist, tid,
2975 head, next_tid(tid)))) {
2976
2977 note_cmpxchg_failure("slab_free", s, tid);
2978 goto redo;
2979 }
2980 stat(s, FREE_FASTPATH);
2981 } else
2982 __slab_free(s, page, head, tail_obj, cnt, addr);
2983
2984}
2985
2986static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
2987 void *head, void *tail, int cnt,
2988 unsigned long addr)
2989{
2990
2991
2992
2993
2994 if (slab_free_freelist_hook(s, &head, &tail))
2995 do_slab_free(s, page, head, tail, cnt, addr);
2996}
2997
2998#ifdef CONFIG_KASAN_GENERIC
2999void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3000{
3001 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
3002}
3003#endif
3004
3005void kmem_cache_free(struct kmem_cache *s, void *x)
3006{
3007 s = cache_from_obj(s, x);
3008 if (!s)
3009 return;
3010 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
3011 trace_kmem_cache_free(_RET_IP_, x);
3012}
3013EXPORT_SYMBOL(kmem_cache_free);
3014
3015struct detached_freelist {
3016 struct page *page;
3017 void *tail;
3018 void *freelist;
3019 int cnt;
3020 struct kmem_cache *s;
3021};
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035static inline
3036int build_detached_freelist(struct kmem_cache *s, size_t size,
3037 void **p, struct detached_freelist *df)
3038{
3039 size_t first_skipped_index = 0;
3040 int lookahead = 3;
3041 void *object;
3042 struct page *page;
3043
3044
3045 df->page = NULL;
3046
3047 do {
3048 object = p[--size];
3049
3050 } while (!object && size);
3051
3052 if (!object)
3053 return 0;
3054
3055 page = virt_to_head_page(object);
3056 if (!s) {
3057
3058 if (unlikely(!PageSlab(page))) {
3059 BUG_ON(!PageCompound(page));
3060 kfree_hook(object);
3061 __free_pages(page, compound_order(page));
3062 p[size] = NULL;
3063 return size;
3064 }
3065
3066 df->s = page->slab_cache;
3067 } else {
3068 df->s = cache_from_obj(s, object);
3069 }
3070
3071
3072 df->page = page;
3073 set_freepointer(df->s, object, NULL);
3074 df->tail = object;
3075 df->freelist = object;
3076 p[size] = NULL;
3077 df->cnt = 1;
3078
3079 while (size) {
3080 object = p[--size];
3081 if (!object)
3082 continue;
3083
3084
3085 if (df->page == virt_to_head_page(object)) {
3086
3087 set_freepointer(df->s, object, df->freelist);
3088 df->freelist = object;
3089 df->cnt++;
3090 p[size] = NULL;
3091
3092 continue;
3093 }
3094
3095
3096 if (!--lookahead)
3097 break;
3098
3099 if (!first_skipped_index)
3100 first_skipped_index = size + 1;
3101 }
3102
3103 return first_skipped_index;
3104}
3105
3106
3107void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3108{
3109 if (WARN_ON(!size))
3110 return;
3111
3112 do {
3113 struct detached_freelist df;
3114
3115 size = build_detached_freelist(s, size, p, &df);
3116 if (!df.page)
3117 continue;
3118
3119 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3120 } while (likely(size));
3121}
3122EXPORT_SYMBOL(kmem_cache_free_bulk);
3123
3124
3125int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3126 void **p)
3127{
3128 struct kmem_cache_cpu *c;
3129 int i;
3130
3131
3132 s = slab_pre_alloc_hook(s, flags);
3133 if (unlikely(!s))
3134 return false;
3135
3136
3137
3138
3139
3140 local_irq_disable();
3141 c = this_cpu_ptr(s->cpu_slab);
3142
3143 for (i = 0; i < size; i++) {
3144 void *object = c->freelist;
3145
3146 if (unlikely(!object)) {
3147
3148
3149
3150
3151 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3152 _RET_IP_, c);
3153 if (unlikely(!p[i]))
3154 goto error;
3155
3156 c = this_cpu_ptr(s->cpu_slab);
3157 continue;
3158 }
3159 c->freelist = get_freepointer(s, object);
3160 p[i] = object;
3161 }
3162 c->tid = next_tid(c->tid);
3163 local_irq_enable();
3164
3165
3166 if (unlikely(flags & __GFP_ZERO)) {
3167 int j;
3168
3169 for (j = 0; j < i; j++)
3170 memset(p[j], 0, s->object_size);
3171 }
3172
3173
3174 slab_post_alloc_hook(s, flags, size, p);
3175 return i;
3176error:
3177 local_irq_enable();
3178 slab_post_alloc_hook(s, flags, i, p);
3179 __kmem_cache_free_bulk(s, i, p);
3180 return 0;
3181}
3182EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204static unsigned int slub_min_order;
3205static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3206static unsigned int slub_min_objects;
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233static inline unsigned int slab_order(unsigned int size,
3234 unsigned int min_objects, unsigned int max_order,
3235 unsigned int fract_leftover)
3236{
3237 unsigned int min_order = slub_min_order;
3238 unsigned int order;
3239
3240 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3241 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3242
3243 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3244 order <= max_order; order++) {
3245
3246 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3247 unsigned int rem;
3248
3249 rem = slab_size % size;
3250
3251 if (rem <= slab_size / fract_leftover)
3252 break;
3253 }
3254
3255 return order;
3256}
3257
3258static inline int calculate_order(unsigned int size)
3259{
3260 unsigned int order;
3261 unsigned int min_objects;
3262 unsigned int max_objects;
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272 min_objects = slub_min_objects;
3273 if (!min_objects)
3274 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3275 max_objects = order_objects(slub_max_order, size);
3276 min_objects = min(min_objects, max_objects);
3277
3278 while (min_objects > 1) {
3279 unsigned int fraction;
3280
3281 fraction = 16;
3282 while (fraction >= 4) {
3283 order = slab_order(size, min_objects,
3284 slub_max_order, fraction);
3285 if (order <= slub_max_order)
3286 return order;
3287 fraction /= 2;
3288 }
3289 min_objects--;
3290 }
3291
3292
3293
3294
3295
3296 order = slab_order(size, 1, slub_max_order, 1);
3297 if (order <= slub_max_order)
3298 return order;
3299
3300
3301
3302
3303 order = slab_order(size, 1, MAX_ORDER, 1);
3304 if (order < MAX_ORDER)
3305 return order;
3306 return -ENOSYS;
3307}
3308
3309static void
3310init_kmem_cache_node(struct kmem_cache_node *n)
3311{
3312 n->nr_partial = 0;
3313 spin_lock_init(&n->list_lock);
3314 INIT_LIST_HEAD(&n->partial);
3315#ifdef CONFIG_SLUB_DEBUG
3316 atomic_long_set(&n->nr_slabs, 0);
3317 atomic_long_set(&n->total_objects, 0);
3318 INIT_LIST_HEAD(&n->full);
3319#endif
3320}
3321
3322static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3323{
3324 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3325 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3326
3327
3328
3329
3330
3331 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3332 2 * sizeof(void *));
3333
3334 if (!s->cpu_slab)
3335 return 0;
3336
3337 init_kmem_cache_cpus(s);
3338
3339 return 1;
3340}
3341
3342static struct kmem_cache *kmem_cache_node;
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353static void early_kmem_cache_node_alloc(int node)
3354{
3355 struct page *page;
3356 struct kmem_cache_node *n;
3357
3358 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3359
3360 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3361
3362 BUG_ON(!page);
3363 if (page_to_nid(page) != node) {
3364 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3365 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3366 }
3367
3368 n = page->freelist;
3369 BUG_ON(!n);
3370#ifdef CONFIG_SLUB_DEBUG
3371 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3372 init_tracking(kmem_cache_node, n);
3373#endif
3374 n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3375 GFP_KERNEL);
3376 page->freelist = get_freepointer(kmem_cache_node, n);
3377 page->inuse = 1;
3378 page->frozen = 0;
3379 kmem_cache_node->node[node] = n;
3380 init_kmem_cache_node(n);
3381 inc_slabs_node(kmem_cache_node, node, page->objects);
3382
3383
3384
3385
3386
3387 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3388}
3389
3390static void free_kmem_cache_nodes(struct kmem_cache *s)
3391{
3392 int node;
3393 struct kmem_cache_node *n;
3394
3395 for_each_kmem_cache_node(s, node, n) {
3396 s->node[node] = NULL;
3397 kmem_cache_free(kmem_cache_node, n);
3398 }
3399}
3400
3401void __kmem_cache_release(struct kmem_cache *s)
3402{
3403 cache_random_seq_destroy(s);
3404 free_percpu(s->cpu_slab);
3405 free_kmem_cache_nodes(s);
3406}
3407
3408static int init_kmem_cache_nodes(struct kmem_cache *s)
3409{
3410 int node;
3411
3412 for_each_node_state(node, N_NORMAL_MEMORY) {
3413 struct kmem_cache_node *n;
3414
3415 if (slab_state == DOWN) {
3416 early_kmem_cache_node_alloc(node);
3417 continue;
3418 }
3419 n = kmem_cache_alloc_node(kmem_cache_node,
3420 GFP_KERNEL, node);
3421
3422 if (!n) {
3423 free_kmem_cache_nodes(s);
3424 return 0;
3425 }
3426
3427 init_kmem_cache_node(n);
3428 s->node[node] = n;
3429 }
3430 return 1;
3431}
3432
3433static void set_min_partial(struct kmem_cache *s, unsigned long min)
3434{
3435 if (min < MIN_PARTIAL)
3436 min = MIN_PARTIAL;
3437 else if (min > MAX_PARTIAL)
3438 min = MAX_PARTIAL;
3439 s->min_partial = min;
3440}
3441
3442static void set_cpu_partial(struct kmem_cache *s)
3443{
3444#ifdef CONFIG_SLUB_CPU_PARTIAL
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462 if (!kmem_cache_has_cpu_partial(s))
3463 s->cpu_partial = 0;
3464 else if (s->size >= PAGE_SIZE)
3465 s->cpu_partial = 2;
3466 else if (s->size >= 1024)
3467 s->cpu_partial = 6;
3468 else if (s->size >= 256)
3469 s->cpu_partial = 13;
3470 else
3471 s->cpu_partial = 30;
3472#endif
3473}
3474
3475
3476
3477
3478
3479static int calculate_sizes(struct kmem_cache *s, int forced_order)
3480{
3481 slab_flags_t flags = s->flags;
3482 unsigned int size = s->object_size;
3483 unsigned int order;
3484
3485
3486
3487
3488
3489
3490 size = ALIGN(size, sizeof(void *));
3491
3492#ifdef CONFIG_SLUB_DEBUG
3493
3494
3495
3496
3497
3498 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3499 !s->ctor)
3500 s->flags |= __OBJECT_POISON;
3501 else
3502 s->flags &= ~__OBJECT_POISON;
3503
3504
3505
3506
3507
3508
3509
3510 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3511 size += sizeof(void *);
3512#endif
3513
3514
3515
3516
3517
3518 s->inuse = size;
3519
3520 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3521 s->ctor)) {
3522
3523
3524
3525
3526
3527
3528
3529
3530 s->offset = size;
3531 size += sizeof(void *);
3532 }
3533
3534#ifdef CONFIG_SLUB_DEBUG
3535 if (flags & SLAB_STORE_USER)
3536
3537
3538
3539
3540 size += 2 * sizeof(struct track);
3541#endif
3542
3543 kasan_cache_create(s, &size, &s->flags);
3544#ifdef CONFIG_SLUB_DEBUG
3545 if (flags & SLAB_RED_ZONE) {
3546
3547
3548
3549
3550
3551
3552
3553 size += sizeof(void *);
3554
3555 s->red_left_pad = sizeof(void *);
3556 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3557 size += s->red_left_pad;
3558 }
3559#endif
3560
3561
3562
3563
3564
3565
3566 size = ALIGN(size, s->align);
3567 s->size = size;
3568 if (forced_order >= 0)
3569 order = forced_order;
3570 else
3571 order = calculate_order(size);
3572
3573 if ((int)order < 0)
3574 return 0;
3575
3576 s->allocflags = 0;
3577 if (order)
3578 s->allocflags |= __GFP_COMP;
3579
3580 if (s->flags & SLAB_CACHE_DMA)
3581 s->allocflags |= GFP_DMA;
3582
3583 if (s->flags & SLAB_CACHE_DMA32)
3584 s->allocflags |= GFP_DMA32;
3585
3586 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3587 s->allocflags |= __GFP_RECLAIMABLE;
3588
3589
3590
3591
3592 s->oo = oo_make(order, size);
3593 s->min = oo_make(get_order(size), size);
3594 if (oo_objects(s->oo) > oo_objects(s->max))
3595 s->max = s->oo;
3596
3597 return !!oo_objects(s->oo);
3598}
3599
3600static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3601{
3602 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3603#ifdef CONFIG_SLAB_FREELIST_HARDENED
3604 s->random = get_random_long();
3605#endif
3606
3607 if (!calculate_sizes(s, -1))
3608 goto error;
3609 if (disable_higher_order_debug) {
3610
3611
3612
3613
3614 if (get_order(s->size) > get_order(s->object_size)) {
3615 s->flags &= ~DEBUG_METADATA_FLAGS;
3616 s->offset = 0;
3617 if (!calculate_sizes(s, -1))
3618 goto error;
3619 }
3620 }
3621
3622#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3623 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3624 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3625
3626 s->flags |= __CMPXCHG_DOUBLE;
3627#endif
3628
3629
3630
3631
3632
3633 set_min_partial(s, ilog2(s->size) / 2);
3634
3635 set_cpu_partial(s);
3636
3637#ifdef CONFIG_NUMA
3638 s->remote_node_defrag_ratio = 1000;
3639#endif
3640
3641
3642 if (slab_state >= UP) {
3643 if (init_cache_random_seq(s))
3644 goto error;
3645 }
3646
3647 if (!init_kmem_cache_nodes(s))
3648 goto error;
3649
3650 if (alloc_kmem_cache_cpus(s))
3651 return 0;
3652
3653 free_kmem_cache_nodes(s);
3654error:
3655 if (flags & SLAB_PANIC)
3656 panic("Cannot create slab %s size=%u realsize=%u order=%u offset=%u flags=%lx\n",
3657 s->name, s->size, s->size,
3658 oo_order(s->oo), s->offset, (unsigned long)flags);
3659 return -EINVAL;
3660}
3661
3662static void list_slab_objects(struct kmem_cache *s, struct page *page,
3663 const char *text)
3664{
3665#ifdef CONFIG_SLUB_DEBUG
3666 void *addr = page_address(page);
3667 void *p;
3668 unsigned long *map = bitmap_zalloc(page->objects, GFP_ATOMIC);
3669 if (!map)
3670 return;
3671 slab_err(s, page, text, s->name);
3672 slab_lock(page);
3673
3674 get_map(s, page, map);
3675 for_each_object(p, s, addr, page->objects) {
3676
3677 if (!test_bit(slab_index(p, s, addr), map)) {
3678 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3679 print_tracking(s, p);
3680 }
3681 }
3682 slab_unlock(page);
3683 bitmap_free(map);
3684#endif
3685}
3686
3687
3688
3689
3690
3691
3692static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3693{
3694 LIST_HEAD(discard);
3695 struct page *page, *h;
3696
3697 BUG_ON(irqs_disabled());
3698 spin_lock_irq(&n->list_lock);
3699 list_for_each_entry_safe(page, h, &n->partial, slab_list) {
3700 if (!page->inuse) {
3701 remove_partial(n, page);
3702 list_add(&page->slab_list, &discard);
3703 } else {
3704 list_slab_objects(s, page,
3705 "Objects remaining in %s on __kmem_cache_shutdown()");
3706 }
3707 }
3708 spin_unlock_irq(&n->list_lock);
3709
3710 list_for_each_entry_safe(page, h, &discard, slab_list)
3711 discard_slab(s, page);
3712}
3713
3714bool __kmem_cache_empty(struct kmem_cache *s)
3715{
3716 int node;
3717 struct kmem_cache_node *n;
3718
3719 for_each_kmem_cache_node(s, node, n)
3720 if (n->nr_partial || slabs_node(s, node))
3721 return false;
3722 return true;
3723}
3724
3725
3726
3727
3728int __kmem_cache_shutdown(struct kmem_cache *s)
3729{
3730 int node;
3731 struct kmem_cache_node *n;
3732
3733 flush_all(s);
3734
3735 for_each_kmem_cache_node(s, node, n) {
3736 free_partial(s, n);
3737 if (n->nr_partial || slabs_node(s, node))
3738 return 1;
3739 }
3740 sysfs_slab_remove(s);
3741 return 0;
3742}
3743
3744
3745
3746
3747
3748static int __init setup_slub_min_order(char *str)
3749{
3750 get_option(&str, (int *)&slub_min_order);
3751
3752 return 1;
3753}
3754
3755__setup("slub_min_order=", setup_slub_min_order);
3756
3757static int __init setup_slub_max_order(char *str)
3758{
3759 get_option(&str, (int *)&slub_max_order);
3760 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
3761
3762 return 1;
3763}
3764
3765__setup("slub_max_order=", setup_slub_max_order);
3766
3767static int __init setup_slub_min_objects(char *str)
3768{
3769 get_option(&str, (int *)&slub_min_objects);
3770
3771 return 1;
3772}
3773
3774__setup("slub_min_objects=", setup_slub_min_objects);
3775
3776void *__kmalloc(size_t size, gfp_t flags)
3777{
3778 struct kmem_cache *s;
3779 void *ret;
3780
3781 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3782 return kmalloc_large(size, flags);
3783
3784 s = kmalloc_slab(size, flags);
3785
3786 if (unlikely(ZERO_OR_NULL_PTR(s)))
3787 return s;
3788
3789 ret = slab_alloc(s, flags, _RET_IP_);
3790
3791 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3792
3793 ret = kasan_kmalloc(s, ret, size, flags);
3794
3795 return ret;
3796}
3797EXPORT_SYMBOL(__kmalloc);
3798
3799#ifdef CONFIG_NUMA
3800static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3801{
3802 struct page *page;
3803 void *ptr = NULL;
3804
3805 flags |= __GFP_COMP;
3806 page = alloc_pages_node(node, flags, get_order(size));
3807 if (page)
3808 ptr = page_address(page);
3809
3810 return kmalloc_large_node_hook(ptr, size, flags);
3811}
3812
3813void *__kmalloc_node(size_t size, gfp_t flags, int node)
3814{
3815 struct kmem_cache *s;
3816 void *ret;
3817
3818 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3819 ret = kmalloc_large_node(size, flags, node);
3820
3821 trace_kmalloc_node(_RET_IP_, ret,
3822 size, PAGE_SIZE << get_order(size),
3823 flags, node);
3824
3825 return ret;
3826 }
3827
3828 s = kmalloc_slab(size, flags);
3829
3830 if (unlikely(ZERO_OR_NULL_PTR(s)))
3831 return s;
3832
3833 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3834
3835 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3836
3837 ret = kasan_kmalloc(s, ret, size, flags);
3838
3839 return ret;
3840}
3841EXPORT_SYMBOL(__kmalloc_node);
3842#endif
3843
3844#ifdef CONFIG_HARDENED_USERCOPY
3845
3846
3847
3848
3849
3850
3851
3852
3853void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
3854 bool to_user)
3855{
3856 struct kmem_cache *s;
3857 unsigned int offset;
3858 size_t object_size;
3859
3860 ptr = kasan_reset_tag(ptr);
3861
3862
3863 s = page->slab_cache;
3864
3865
3866 if (ptr < page_address(page))
3867 usercopy_abort("SLUB object not in SLUB page?!", NULL,
3868 to_user, 0, n);
3869
3870
3871 offset = (ptr - page_address(page)) % s->size;
3872
3873
3874 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3875 if (offset < s->red_left_pad)
3876 usercopy_abort("SLUB object in left red zone",
3877 s->name, to_user, offset, n);
3878 offset -= s->red_left_pad;
3879 }
3880
3881
3882 if (offset >= s->useroffset &&
3883 offset - s->useroffset <= s->usersize &&
3884 n <= s->useroffset - offset + s->usersize)
3885 return;
3886
3887
3888
3889
3890
3891
3892
3893 object_size = slab_ksize(s);
3894 if (usercopy_fallback &&
3895 offset <= object_size && n <= object_size - offset) {
3896 usercopy_warn("SLUB object", s->name, to_user, offset, n);
3897 return;
3898 }
3899
3900 usercopy_abort("SLUB object", s->name, to_user, offset, n);
3901}
3902#endif
3903
3904static size_t __ksize(const void *object)
3905{
3906 struct page *page;
3907
3908 if (unlikely(object == ZERO_SIZE_PTR))
3909 return 0;
3910
3911 page = virt_to_head_page(object);
3912
3913 if (unlikely(!PageSlab(page))) {
3914 WARN_ON(!PageCompound(page));
3915 return PAGE_SIZE << compound_order(page);
3916 }
3917
3918 return slab_ksize(page->slab_cache);
3919}
3920
3921size_t ksize(const void *object)
3922{
3923 size_t size = __ksize(object);
3924
3925
3926
3927 kasan_unpoison_shadow(object, size);
3928 return size;
3929}
3930EXPORT_SYMBOL(ksize);
3931
3932void kfree(const void *x)
3933{
3934 struct page *page;
3935 void *object = (void *)x;
3936
3937 trace_kfree(_RET_IP_, x);
3938
3939 if (unlikely(ZERO_OR_NULL_PTR(x)))
3940 return;
3941
3942 page = virt_to_head_page(x);
3943 if (unlikely(!PageSlab(page))) {
3944 BUG_ON(!PageCompound(page));
3945 kfree_hook(object);
3946 __free_pages(page, compound_order(page));
3947 return;
3948 }
3949 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3950}
3951EXPORT_SYMBOL(kfree);
3952
3953#define SHRINK_PROMOTE_MAX 32
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964int __kmem_cache_shrink(struct kmem_cache *s)
3965{
3966 int node;
3967 int i;
3968 struct kmem_cache_node *n;
3969 struct page *page;
3970 struct page *t;
3971 struct list_head discard;
3972 struct list_head promote[SHRINK_PROMOTE_MAX];
3973 unsigned long flags;
3974 int ret = 0;
3975
3976 flush_all(s);
3977 for_each_kmem_cache_node(s, node, n) {
3978 INIT_LIST_HEAD(&discard);
3979 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
3980 INIT_LIST_HEAD(promote + i);
3981
3982 spin_lock_irqsave(&n->list_lock, flags);
3983
3984
3985
3986
3987
3988
3989
3990 list_for_each_entry_safe(page, t, &n->partial, slab_list) {
3991 int free = page->objects - page->inuse;
3992
3993
3994 barrier();
3995
3996
3997 BUG_ON(free <= 0);
3998
3999 if (free == page->objects) {
4000 list_move(&page->slab_list, &discard);
4001 n->nr_partial--;
4002 } else if (free <= SHRINK_PROMOTE_MAX)
4003 list_move(&page->slab_list, promote + free - 1);
4004 }
4005
4006
4007
4008
4009
4010 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4011 list_splice(promote + i, &n->partial);
4012
4013 spin_unlock_irqrestore(&n->list_lock, flags);
4014
4015
4016 list_for_each_entry_safe(page, t, &discard, slab_list)
4017 discard_slab(s, page);
4018
4019 if (slabs_node(s, node))
4020 ret = 1;
4021 }
4022
4023 return ret;
4024}
4025
4026#ifdef CONFIG_MEMCG
4027static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
4028{
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041 if (!__kmem_cache_shrink(s))
4042 sysfs_slab_remove(s);
4043}
4044
4045void __kmemcg_cache_deactivate(struct kmem_cache *s)
4046{
4047
4048
4049
4050
4051 slub_set_cpu_partial(s, 0);
4052 s->min_partial = 0;
4053
4054
4055
4056
4057
4058 slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
4059}
4060#endif
4061
4062static int slab_mem_going_offline_callback(void *arg)
4063{
4064 struct kmem_cache *s;
4065
4066 mutex_lock(&slab_mutex);
4067 list_for_each_entry(s, &slab_caches, list)
4068 __kmem_cache_shrink(s);
4069 mutex_unlock(&slab_mutex);
4070
4071 return 0;
4072}
4073
4074static void slab_mem_offline_callback(void *arg)
4075{
4076 struct kmem_cache_node *n;
4077 struct kmem_cache *s;
4078 struct memory_notify *marg = arg;
4079 int offline_node;
4080
4081 offline_node = marg->status_change_nid_normal;
4082
4083
4084
4085
4086
4087 if (offline_node < 0)
4088 return;
4089
4090 mutex_lock(&slab_mutex);
4091 list_for_each_entry(s, &slab_caches, list) {
4092 n = get_node(s, offline_node);
4093 if (n) {
4094
4095
4096
4097
4098
4099
4100 BUG_ON(slabs_node(s, offline_node));
4101
4102 s->node[offline_node] = NULL;
4103 kmem_cache_free(kmem_cache_node, n);
4104 }
4105 }
4106 mutex_unlock(&slab_mutex);
4107}
4108
4109static int slab_mem_going_online_callback(void *arg)
4110{
4111 struct kmem_cache_node *n;
4112 struct kmem_cache *s;
4113 struct memory_notify *marg = arg;
4114 int nid = marg->status_change_nid_normal;
4115 int ret = 0;
4116
4117
4118
4119
4120
4121 if (nid < 0)
4122 return 0;
4123
4124
4125
4126
4127
4128
4129 mutex_lock(&slab_mutex);
4130 list_for_each_entry(s, &slab_caches, list) {
4131
4132
4133
4134
4135
4136 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4137 if (!n) {
4138 ret = -ENOMEM;
4139 goto out;
4140 }
4141 init_kmem_cache_node(n);
4142 s->node[nid] = n;
4143 }
4144out:
4145 mutex_unlock(&slab_mutex);
4146 return ret;
4147}
4148
4149static int slab_memory_callback(struct notifier_block *self,
4150 unsigned long action, void *arg)
4151{
4152 int ret = 0;
4153
4154 switch (action) {
4155 case MEM_GOING_ONLINE:
4156 ret = slab_mem_going_online_callback(arg);
4157 break;
4158 case MEM_GOING_OFFLINE:
4159 ret = slab_mem_going_offline_callback(arg);
4160 break;
4161 case MEM_OFFLINE:
4162 case MEM_CANCEL_ONLINE:
4163 slab_mem_offline_callback(arg);
4164 break;
4165 case MEM_ONLINE:
4166 case MEM_CANCEL_OFFLINE:
4167 break;
4168 }
4169 if (ret)
4170 ret = notifier_from_errno(ret);
4171 else
4172 ret = NOTIFY_OK;
4173 return ret;
4174}
4175
4176static struct notifier_block slab_memory_callback_nb = {
4177 .notifier_call = slab_memory_callback,
4178 .priority = SLAB_CALLBACK_PRI,
4179};
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4192{
4193 int node;
4194 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4195 struct kmem_cache_node *n;
4196
4197 memcpy(s, static_cache, kmem_cache->object_size);
4198
4199
4200
4201
4202
4203
4204 __flush_cpu_slab(s, smp_processor_id());
4205 for_each_kmem_cache_node(s, node, n) {
4206 struct page *p;
4207
4208 list_for_each_entry(p, &n->partial, slab_list)
4209 p->slab_cache = s;
4210
4211#ifdef CONFIG_SLUB_DEBUG
4212 list_for_each_entry(p, &n->full, slab_list)
4213 p->slab_cache = s;
4214#endif
4215 }
4216 slab_init_memcg_params(s);
4217 list_add(&s->list, &slab_caches);
4218 memcg_link_cache(s);
4219 return s;
4220}
4221
4222void __init kmem_cache_init(void)
4223{
4224 static __initdata struct kmem_cache boot_kmem_cache,
4225 boot_kmem_cache_node;
4226
4227 if (debug_guardpage_minorder())
4228 slub_max_order = 0;
4229
4230 kmem_cache_node = &boot_kmem_cache_node;
4231 kmem_cache = &boot_kmem_cache;
4232
4233 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4234 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4235
4236 register_hotmemory_notifier(&slab_memory_callback_nb);
4237
4238
4239 slab_state = PARTIAL;
4240
4241 create_boot_cache(kmem_cache, "kmem_cache",
4242 offsetof(struct kmem_cache, node) +
4243 nr_node_ids * sizeof(struct kmem_cache_node *),
4244 SLAB_HWCACHE_ALIGN, 0, 0);
4245
4246 kmem_cache = bootstrap(&boot_kmem_cache);
4247 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4248
4249
4250 setup_kmalloc_cache_index_table();
4251 create_kmalloc_caches(0);
4252
4253
4254 init_freelist_randomization();
4255
4256 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4257 slub_cpu_dead);
4258
4259 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4260 cache_line_size(),
4261 slub_min_order, slub_max_order, slub_min_objects,
4262 nr_cpu_ids, nr_node_ids);
4263}
4264
4265void __init kmem_cache_init_late(void)
4266{
4267}
4268
4269struct kmem_cache *
4270__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4271 slab_flags_t flags, void (*ctor)(void *))
4272{
4273 struct kmem_cache *s, *c;
4274
4275 s = find_mergeable(size, align, flags, name, ctor);
4276 if (s) {
4277 s->refcount++;
4278
4279
4280
4281
4282
4283 s->object_size = max(s->object_size, size);
4284 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4285
4286 for_each_memcg_cache(c, s) {
4287 c->object_size = s->object_size;
4288 c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
4289 }
4290
4291 if (sysfs_slab_alias(s, name)) {
4292 s->refcount--;
4293 s = NULL;
4294 }
4295 }
4296
4297 return s;
4298}
4299
4300int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4301{
4302 int err;
4303
4304 err = kmem_cache_open(s, flags);
4305 if (err)
4306 return err;
4307
4308
4309 if (slab_state <= UP)
4310 return 0;
4311
4312 memcg_propagate_slab_attrs(s);
4313 err = sysfs_slab_add(s);
4314 if (err)
4315 __kmem_cache_release(s);
4316
4317 return err;
4318}
4319
4320void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4321{
4322 struct kmem_cache *s;
4323 void *ret;
4324
4325 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4326 return kmalloc_large(size, gfpflags);
4327
4328 s = kmalloc_slab(size, gfpflags);
4329
4330 if (unlikely(ZERO_OR_NULL_PTR(s)))
4331 return s;
4332
4333 ret = slab_alloc(s, gfpflags, caller);
4334
4335
4336 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4337
4338 return ret;
4339}
4340
4341#ifdef CONFIG_NUMA
4342void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4343 int node, unsigned long caller)
4344{
4345 struct kmem_cache *s;
4346 void *ret;
4347
4348 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4349 ret = kmalloc_large_node(size, gfpflags, node);
4350
4351 trace_kmalloc_node(caller, ret,
4352 size, PAGE_SIZE << get_order(size),
4353 gfpflags, node);
4354
4355 return ret;
4356 }
4357
4358 s = kmalloc_slab(size, gfpflags);
4359
4360 if (unlikely(ZERO_OR_NULL_PTR(s)))
4361 return s;
4362
4363 ret = slab_alloc_node(s, gfpflags, node, caller);
4364
4365
4366 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4367
4368 return ret;
4369}
4370#endif
4371
4372#ifdef CONFIG_SYSFS
4373static int count_inuse(struct page *page)
4374{
4375 return page->inuse;
4376}
4377
4378static int count_total(struct page *page)
4379{
4380 return page->objects;
4381}
4382#endif
4383
4384#ifdef CONFIG_SLUB_DEBUG
4385static int validate_slab(struct kmem_cache *s, struct page *page,
4386 unsigned long *map)
4387{
4388 void *p;
4389 void *addr = page_address(page);
4390
4391 if (!check_slab(s, page) ||
4392 !on_freelist(s, page, NULL))
4393 return 0;
4394
4395
4396 bitmap_zero(map, page->objects);
4397
4398 get_map(s, page, map);
4399 for_each_object(p, s, addr, page->objects) {
4400 if (test_bit(slab_index(p, s, addr), map))
4401 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4402 return 0;
4403 }
4404
4405 for_each_object(p, s, addr, page->objects)
4406 if (!test_bit(slab_index(p, s, addr), map))
4407 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4408 return 0;
4409 return 1;
4410}
4411
4412static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4413 unsigned long *map)
4414{
4415 slab_lock(page);
4416 validate_slab(s, page, map);
4417 slab_unlock(page);
4418}
4419
4420static int validate_slab_node(struct kmem_cache *s,
4421 struct kmem_cache_node *n, unsigned long *map)
4422{
4423 unsigned long count = 0;
4424 struct page *page;
4425 unsigned long flags;
4426
4427 spin_lock_irqsave(&n->list_lock, flags);
4428
4429 list_for_each_entry(page, &n->partial, slab_list) {
4430 validate_slab_slab(s, page, map);
4431 count++;
4432 }
4433 if (count != n->nr_partial)
4434 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4435 s->name, count, n->nr_partial);
4436
4437 if (!(s->flags & SLAB_STORE_USER))
4438 goto out;
4439
4440 list_for_each_entry(page, &n->full, slab_list) {
4441 validate_slab_slab(s, page, map);
4442 count++;
4443 }
4444 if (count != atomic_long_read(&n->nr_slabs))
4445 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4446 s->name, count, atomic_long_read(&n->nr_slabs));
4447
4448out:
4449 spin_unlock_irqrestore(&n->list_lock, flags);
4450 return count;
4451}
4452
4453static long validate_slab_cache(struct kmem_cache *s)
4454{
4455 int node;
4456 unsigned long count = 0;
4457 struct kmem_cache_node *n;
4458 unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
4459
4460 if (!map)
4461 return -ENOMEM;
4462
4463 flush_all(s);
4464 for_each_kmem_cache_node(s, node, n)
4465 count += validate_slab_node(s, n, map);
4466 bitmap_free(map);
4467 return count;
4468}
4469
4470
4471
4472
4473
4474struct location {
4475 unsigned long count;
4476 unsigned long addr;
4477 long long sum_time;
4478 long min_time;
4479 long max_time;
4480 long min_pid;
4481 long max_pid;
4482 DECLARE_BITMAP(cpus, NR_CPUS);
4483 nodemask_t nodes;
4484};
4485
4486struct loc_track {
4487 unsigned long max;
4488 unsigned long count;
4489 struct location *loc;
4490};
4491
4492static void free_loc_track(struct loc_track *t)
4493{
4494 if (t->max)
4495 free_pages((unsigned long)t->loc,
4496 get_order(sizeof(struct location) * t->max));
4497}
4498
4499static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4500{
4501 struct location *l;
4502 int order;
4503
4504 order = get_order(sizeof(struct location) * max);
4505
4506 l = (void *)__get_free_pages(flags, order);
4507 if (!l)
4508 return 0;
4509
4510 if (t->count) {
4511 memcpy(l, t->loc, sizeof(struct location) * t->count);
4512 free_loc_track(t);
4513 }
4514 t->max = max;
4515 t->loc = l;
4516 return 1;
4517}
4518
4519static int add_location(struct loc_track *t, struct kmem_cache *s,
4520 const struct track *track)
4521{
4522 long start, end, pos;
4523 struct location *l;
4524 unsigned long caddr;
4525 unsigned long age = jiffies - track->when;
4526
4527 start = -1;
4528 end = t->count;
4529
4530 for ( ; ; ) {
4531 pos = start + (end - start + 1) / 2;
4532
4533
4534
4535
4536
4537 if (pos == end)
4538 break;
4539
4540 caddr = t->loc[pos].addr;
4541 if (track->addr == caddr) {
4542
4543 l = &t->loc[pos];
4544 l->count++;
4545 if (track->when) {
4546 l->sum_time += age;
4547 if (age < l->min_time)
4548 l->min_time = age;
4549 if (age > l->max_time)
4550 l->max_time = age;
4551
4552 if (track->pid < l->min_pid)
4553 l->min_pid = track->pid;
4554 if (track->pid > l->max_pid)
4555 l->max_pid = track->pid;
4556
4557 cpumask_set_cpu(track->cpu,
4558 to_cpumask(l->cpus));
4559 }
4560 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4561 return 1;
4562 }
4563
4564 if (track->addr < caddr)
4565 end = pos;
4566 else
4567 start = pos;
4568 }
4569
4570
4571
4572
4573 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4574 return 0;
4575
4576 l = t->loc + pos;
4577 if (pos < t->count)
4578 memmove(l + 1, l,
4579 (t->count - pos) * sizeof(struct location));
4580 t->count++;
4581 l->count = 1;
4582 l->addr = track->addr;
4583 l->sum_time = age;
4584 l->min_time = age;
4585 l->max_time = age;
4586 l->min_pid = track->pid;
4587 l->max_pid = track->pid;
4588 cpumask_clear(to_cpumask(l->cpus));
4589 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4590 nodes_clear(l->nodes);
4591 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4592 return 1;
4593}
4594
4595static void process_slab(struct loc_track *t, struct kmem_cache *s,
4596 struct page *page, enum track_item alloc,
4597 unsigned long *map)
4598{
4599 void *addr = page_address(page);
4600 void *p;
4601
4602 bitmap_zero(map, page->objects);
4603 get_map(s, page, map);
4604
4605 for_each_object(p, s, addr, page->objects)
4606 if (!test_bit(slab_index(p, s, addr), map))
4607 add_location(t, s, get_track(s, p, alloc));
4608}
4609
4610static int list_locations(struct kmem_cache *s, char *buf,
4611 enum track_item alloc)
4612{
4613 int len = 0;
4614 unsigned long i;
4615 struct loc_track t = { 0, 0, NULL };
4616 int node;
4617 struct kmem_cache_node *n;
4618 unsigned long *map = bitmap_alloc(oo_objects(s->max), GFP_KERNEL);
4619
4620 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4621 GFP_KERNEL)) {
4622 bitmap_free(map);
4623 return sprintf(buf, "Out of memory\n");
4624 }
4625
4626 flush_all(s);
4627
4628 for_each_kmem_cache_node(s, node, n) {
4629 unsigned long flags;
4630 struct page *page;
4631
4632 if (!atomic_long_read(&n->nr_slabs))
4633 continue;
4634
4635 spin_lock_irqsave(&n->list_lock, flags);
4636 list_for_each_entry(page, &n->partial, slab_list)
4637 process_slab(&t, s, page, alloc, map);
4638 list_for_each_entry(page, &n->full, slab_list)
4639 process_slab(&t, s, page, alloc, map);
4640 spin_unlock_irqrestore(&n->list_lock, flags);
4641 }
4642
4643 for (i = 0; i < t.count; i++) {
4644 struct location *l = &t.loc[i];
4645
4646 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4647 break;
4648 len += sprintf(buf + len, "%7ld ", l->count);
4649
4650 if (l->addr)
4651 len += sprintf(buf + len, "%pS", (void *)l->addr);
4652 else
4653 len += sprintf(buf + len, "<not-available>");
4654
4655 if (l->sum_time != l->min_time) {
4656 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4657 l->min_time,
4658 (long)div_u64(l->sum_time, l->count),
4659 l->max_time);
4660 } else
4661 len += sprintf(buf + len, " age=%ld",
4662 l->min_time);
4663
4664 if (l->min_pid != l->max_pid)
4665 len += sprintf(buf + len, " pid=%ld-%ld",
4666 l->min_pid, l->max_pid);
4667 else
4668 len += sprintf(buf + len, " pid=%ld",
4669 l->min_pid);
4670
4671 if (num_online_cpus() > 1 &&
4672 !cpumask_empty(to_cpumask(l->cpus)) &&
4673 len < PAGE_SIZE - 60)
4674 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4675 " cpus=%*pbl",
4676 cpumask_pr_args(to_cpumask(l->cpus)));
4677
4678 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4679 len < PAGE_SIZE - 60)
4680 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4681 " nodes=%*pbl",
4682 nodemask_pr_args(&l->nodes));
4683
4684 len += sprintf(buf + len, "\n");
4685 }
4686
4687 free_loc_track(&t);
4688 bitmap_free(map);
4689 if (!t.count)
4690 len += sprintf(buf, "No data\n");
4691 return len;
4692}
4693#endif
4694
4695#ifdef SLUB_RESILIENCY_TEST
4696static void __init resiliency_test(void)
4697{
4698 u8 *p;
4699 int type = KMALLOC_NORMAL;
4700
4701 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4702
4703 pr_err("SLUB resiliency testing\n");
4704 pr_err("-----------------------\n");
4705 pr_err("A. Corruption after allocation\n");
4706
4707 p = kzalloc(16, GFP_KERNEL);
4708 p[16] = 0x12;
4709 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4710 p + 16);
4711
4712 validate_slab_cache(kmalloc_caches[type][4]);
4713
4714
4715 p = kzalloc(32, GFP_KERNEL);
4716 p[32 + sizeof(void *)] = 0x34;
4717 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4718 p);
4719 pr_err("If allocated object is overwritten then not detectable\n\n");
4720
4721 validate_slab_cache(kmalloc_caches[type][5]);
4722 p = kzalloc(64, GFP_KERNEL);
4723 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4724 *p = 0x56;
4725 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4726 p);
4727 pr_err("If allocated object is overwritten then not detectable\n\n");
4728 validate_slab_cache(kmalloc_caches[type][6]);
4729
4730 pr_err("\nB. Corruption after free\n");
4731 p = kzalloc(128, GFP_KERNEL);
4732 kfree(p);
4733 *p = 0x78;
4734 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4735 validate_slab_cache(kmalloc_caches[type][7]);
4736
4737 p = kzalloc(256, GFP_KERNEL);
4738 kfree(p);
4739 p[50] = 0x9a;
4740 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4741 validate_slab_cache(kmalloc_caches[type][8]);
4742
4743 p = kzalloc(512, GFP_KERNEL);
4744 kfree(p);
4745 p[512] = 0xab;
4746 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4747 validate_slab_cache(kmalloc_caches[type][9]);
4748}
4749#else
4750#ifdef CONFIG_SYSFS
4751static void resiliency_test(void) {};
4752#endif
4753#endif
4754
4755#ifdef CONFIG_SYSFS
4756enum slab_stat_type {
4757 SL_ALL,
4758 SL_PARTIAL,
4759 SL_CPU,
4760 SL_OBJECTS,
4761 SL_TOTAL
4762};
4763
4764#define SO_ALL (1 << SL_ALL)
4765#define SO_PARTIAL (1 << SL_PARTIAL)
4766#define SO_CPU (1 << SL_CPU)
4767#define SO_OBJECTS (1 << SL_OBJECTS)
4768#define SO_TOTAL (1 << SL_TOTAL)
4769
4770#ifdef CONFIG_MEMCG
4771static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
4772
4773static int __init setup_slub_memcg_sysfs(char *str)
4774{
4775 int v;
4776
4777 if (get_option(&str, &v) > 0)
4778 memcg_sysfs_enabled = v;
4779
4780 return 1;
4781}
4782
4783__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
4784#endif
4785
4786static ssize_t show_slab_objects(struct kmem_cache *s,
4787 char *buf, unsigned long flags)
4788{
4789 unsigned long total = 0;
4790 int node;
4791 int x;
4792 unsigned long *nodes;
4793
4794 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
4795 if (!nodes)
4796 return -ENOMEM;
4797
4798 if (flags & SO_CPU) {
4799 int cpu;
4800
4801 for_each_possible_cpu(cpu) {
4802 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4803 cpu);
4804 int node;
4805 struct page *page;
4806
4807 page = READ_ONCE(c->page);
4808 if (!page)
4809 continue;
4810
4811 node = page_to_nid(page);
4812 if (flags & SO_TOTAL)
4813 x = page->objects;
4814 else if (flags & SO_OBJECTS)
4815 x = page->inuse;
4816 else
4817 x = 1;
4818
4819 total += x;
4820 nodes[node] += x;
4821
4822 page = slub_percpu_partial_read_once(c);
4823 if (page) {
4824 node = page_to_nid(page);
4825 if (flags & SO_TOTAL)
4826 WARN_ON_ONCE(1);
4827 else if (flags & SO_OBJECTS)
4828 WARN_ON_ONCE(1);
4829 else
4830 x = page->pages;
4831 total += x;
4832 nodes[node] += x;
4833 }
4834 }
4835 }
4836
4837 get_online_mems();
4838#ifdef CONFIG_SLUB_DEBUG
4839 if (flags & SO_ALL) {
4840 struct kmem_cache_node *n;
4841
4842 for_each_kmem_cache_node(s, node, n) {
4843
4844 if (flags & SO_TOTAL)
4845 x = atomic_long_read(&n->total_objects);
4846 else if (flags & SO_OBJECTS)
4847 x = atomic_long_read(&n->total_objects) -
4848 count_partial(n, count_free);
4849 else
4850 x = atomic_long_read(&n->nr_slabs);
4851 total += x;
4852 nodes[node] += x;
4853 }
4854
4855 } else
4856#endif
4857 if (flags & SO_PARTIAL) {
4858 struct kmem_cache_node *n;
4859
4860 for_each_kmem_cache_node(s, node, n) {
4861 if (flags & SO_TOTAL)
4862 x = count_partial(n, count_total);
4863 else if (flags & SO_OBJECTS)
4864 x = count_partial(n, count_inuse);
4865 else
4866 x = n->nr_partial;
4867 total += x;
4868 nodes[node] += x;
4869 }
4870 }
4871 x = sprintf(buf, "%lu", total);
4872#ifdef CONFIG_NUMA
4873 for (node = 0; node < nr_node_ids; node++)
4874 if (nodes[node])
4875 x += sprintf(buf + x, " N%d=%lu",
4876 node, nodes[node]);
4877#endif
4878 put_online_mems();
4879 kfree(nodes);
4880 return x + sprintf(buf + x, "\n");
4881}
4882
4883#ifdef CONFIG_SLUB_DEBUG
4884static int any_slab_objects(struct kmem_cache *s)
4885{
4886 int node;
4887 struct kmem_cache_node *n;
4888
4889 for_each_kmem_cache_node(s, node, n)
4890 if (atomic_long_read(&n->total_objects))
4891 return 1;
4892
4893 return 0;
4894}
4895#endif
4896
4897#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4898#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4899
4900struct slab_attribute {
4901 struct attribute attr;
4902 ssize_t (*show)(struct kmem_cache *s, char *buf);
4903 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4904};
4905
4906#define SLAB_ATTR_RO(_name) \
4907 static struct slab_attribute _name##_attr = \
4908 __ATTR(_name, 0400, _name##_show, NULL)
4909
4910#define SLAB_ATTR(_name) \
4911 static struct slab_attribute _name##_attr = \
4912 __ATTR(_name, 0600, _name##_show, _name##_store)
4913
4914static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4915{
4916 return sprintf(buf, "%u\n", s->size);
4917}
4918SLAB_ATTR_RO(slab_size);
4919
4920static ssize_t align_show(struct kmem_cache *s, char *buf)
4921{
4922 return sprintf(buf, "%u\n", s->align);
4923}
4924SLAB_ATTR_RO(align);
4925
4926static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4927{
4928 return sprintf(buf, "%u\n", s->object_size);
4929}
4930SLAB_ATTR_RO(object_size);
4931
4932static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4933{
4934 return sprintf(buf, "%u\n", oo_objects(s->oo));
4935}
4936SLAB_ATTR_RO(objs_per_slab);
4937
4938static ssize_t order_store(struct kmem_cache *s,
4939 const char *buf, size_t length)
4940{
4941 unsigned int order;
4942 int err;
4943
4944 err = kstrtouint(buf, 10, &order);
4945 if (err)
4946 return err;
4947
4948 if (order > slub_max_order || order < slub_min_order)
4949 return -EINVAL;
4950
4951 calculate_sizes(s, order);
4952 return length;
4953}
4954
4955static ssize_t order_show(struct kmem_cache *s, char *buf)
4956{
4957 return sprintf(buf, "%u\n", oo_order(s->oo));
4958}
4959SLAB_ATTR(order);
4960
4961static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4962{
4963 return sprintf(buf, "%lu\n", s->min_partial);
4964}
4965
4966static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4967 size_t length)
4968{
4969 unsigned long min;
4970 int err;
4971
4972 err = kstrtoul(buf, 10, &min);
4973 if (err)
4974 return err;
4975
4976 set_min_partial(s, min);
4977 return length;
4978}
4979SLAB_ATTR(min_partial);
4980
4981static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4982{
4983 return sprintf(buf, "%u\n", slub_cpu_partial(s));
4984}
4985
4986static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4987 size_t length)
4988{
4989 unsigned int objects;
4990 int err;
4991
4992 err = kstrtouint(buf, 10, &objects);
4993 if (err)
4994 return err;
4995 if (objects && !kmem_cache_has_cpu_partial(s))
4996 return -EINVAL;
4997
4998 slub_set_cpu_partial(s, objects);
4999 flush_all(s);
5000 return length;
5001}
5002SLAB_ATTR(cpu_partial);
5003
5004static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5005{
5006 if (!s->ctor)
5007 return 0;
5008 return sprintf(buf, "%pS\n", s->ctor);
5009}
5010SLAB_ATTR_RO(ctor);
5011
5012static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5013{
5014 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5015}
5016SLAB_ATTR_RO(aliases);
5017
5018static ssize_t partial_show(struct kmem_cache *s, char *buf)
5019{
5020 return show_slab_objects(s, buf, SO_PARTIAL);
5021}
5022SLAB_ATTR_RO(partial);
5023
5024static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5025{
5026 return show_slab_objects(s, buf, SO_CPU);
5027}
5028SLAB_ATTR_RO(cpu_slabs);
5029
5030static ssize_t objects_show(struct kmem_cache *s, char *buf)
5031{
5032 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5033}
5034SLAB_ATTR_RO(objects);
5035
5036static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5037{
5038 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5039}
5040SLAB_ATTR_RO(objects_partial);
5041
5042static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5043{
5044 int objects = 0;
5045 int pages = 0;
5046 int cpu;
5047 int len;
5048
5049 for_each_online_cpu(cpu) {
5050 struct page *page;
5051
5052 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5053
5054 if (page) {
5055 pages += page->pages;
5056 objects += page->pobjects;
5057 }
5058 }
5059
5060 len = sprintf(buf, "%d(%d)", objects, pages);
5061
5062#ifdef CONFIG_SMP
5063 for_each_online_cpu(cpu) {
5064 struct page *page;
5065
5066 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5067
5068 if (page && len < PAGE_SIZE - 20)
5069 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5070 page->pobjects, page->pages);
5071 }
5072#endif
5073 return len + sprintf(buf + len, "\n");
5074}
5075SLAB_ATTR_RO(slabs_cpu_partial);
5076
5077static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5078{
5079 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5080}
5081
5082static ssize_t reclaim_account_store(struct kmem_cache *s,
5083 const char *buf, size_t length)
5084{
5085 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5086 if (buf[0] == '1')
5087 s->flags |= SLAB_RECLAIM_ACCOUNT;
5088 return length;
5089}
5090SLAB_ATTR(reclaim_account);
5091
5092static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5093{
5094 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5095}
5096SLAB_ATTR_RO(hwcache_align);
5097
5098#ifdef CONFIG_ZONE_DMA
5099static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5100{
5101 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5102}
5103SLAB_ATTR_RO(cache_dma);
5104#endif
5105
5106static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5107{
5108 return sprintf(buf, "%u\n", s->usersize);
5109}
5110SLAB_ATTR_RO(usersize);
5111
5112static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5113{
5114 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5115}
5116SLAB_ATTR_RO(destroy_by_rcu);
5117
5118#ifdef CONFIG_SLUB_DEBUG
5119static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5120{
5121 return show_slab_objects(s, buf, SO_ALL);
5122}
5123SLAB_ATTR_RO(slabs);
5124
5125static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5126{
5127 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5128}
5129SLAB_ATTR_RO(total_objects);
5130
5131static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5132{
5133 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5134}
5135
5136static ssize_t sanity_checks_store(struct kmem_cache *s,
5137 const char *buf, size_t length)
5138{
5139 s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5140 if (buf[0] == '1') {
5141 s->flags &= ~__CMPXCHG_DOUBLE;
5142 s->flags |= SLAB_CONSISTENCY_CHECKS;
5143 }
5144 return length;
5145}
5146SLAB_ATTR(sanity_checks);
5147
5148static ssize_t trace_show(struct kmem_cache *s, char *buf)
5149{
5150 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5151}
5152
5153static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5154 size_t length)
5155{
5156
5157
5158
5159
5160
5161 if (s->refcount > 1)
5162 return -EINVAL;
5163
5164 s->flags &= ~SLAB_TRACE;
5165 if (buf[0] == '1') {
5166 s->flags &= ~__CMPXCHG_DOUBLE;
5167 s->flags |= SLAB_TRACE;
5168 }
5169 return length;
5170}
5171SLAB_ATTR(trace);
5172
5173static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5174{
5175 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5176}
5177
5178static ssize_t red_zone_store(struct kmem_cache *s,
5179 const char *buf, size_t length)
5180{
5181 if (any_slab_objects(s))
5182 return -EBUSY;
5183
5184 s->flags &= ~SLAB_RED_ZONE;
5185 if (buf[0] == '1') {
5186 s->flags |= SLAB_RED_ZONE;
5187 }
5188 calculate_sizes(s, -1);
5189 return length;
5190}
5191SLAB_ATTR(red_zone);
5192
5193static ssize_t poison_show(struct kmem_cache *s, char *buf)
5194{
5195 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5196}
5197
5198static ssize_t poison_store(struct kmem_cache *s,
5199 const char *buf, size_t length)
5200{
5201 if (any_slab_objects(s))
5202 return -EBUSY;
5203
5204 s->flags &= ~SLAB_POISON;
5205 if (buf[0] == '1') {
5206 s->flags |= SLAB_POISON;
5207 }
5208 calculate_sizes(s, -1);
5209 return length;
5210}
5211SLAB_ATTR(poison);
5212
5213static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5214{
5215 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5216}
5217
5218static ssize_t store_user_store(struct kmem_cache *s,
5219 const char *buf, size_t length)
5220{
5221 if (any_slab_objects(s))
5222 return -EBUSY;
5223
5224 s->flags &= ~SLAB_STORE_USER;
5225 if (buf[0] == '1') {
5226 s->flags &= ~__CMPXCHG_DOUBLE;
5227 s->flags |= SLAB_STORE_USER;
5228 }
5229 calculate_sizes(s, -1);
5230 return length;
5231}
5232SLAB_ATTR(store_user);
5233
5234static ssize_t validate_show(struct kmem_cache *s, char *buf)
5235{
5236 return 0;
5237}
5238
5239static ssize_t validate_store(struct kmem_cache *s,
5240 const char *buf, size_t length)
5241{
5242 int ret = -EINVAL;
5243
5244 if (buf[0] == '1') {
5245 ret = validate_slab_cache(s);
5246 if (ret >= 0)
5247 ret = length;
5248 }
5249 return ret;
5250}
5251SLAB_ATTR(validate);
5252
5253static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5254{
5255 if (!(s->flags & SLAB_STORE_USER))
5256 return -ENOSYS;
5257 return list_locations(s, buf, TRACK_ALLOC);
5258}
5259SLAB_ATTR_RO(alloc_calls);
5260
5261static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5262{
5263 if (!(s->flags & SLAB_STORE_USER))
5264 return -ENOSYS;
5265 return list_locations(s, buf, TRACK_FREE);
5266}
5267SLAB_ATTR_RO(free_calls);
5268#endif
5269
5270#ifdef CONFIG_FAILSLAB
5271static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5272{
5273 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5274}
5275
5276static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5277 size_t length)
5278{
5279 if (s->refcount > 1)
5280 return -EINVAL;
5281
5282 s->flags &= ~SLAB_FAILSLAB;
5283 if (buf[0] == '1')
5284 s->flags |= SLAB_FAILSLAB;
5285 return length;
5286}
5287SLAB_ATTR(failslab);
5288#endif
5289
5290static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5291{
5292 return 0;
5293}
5294
5295static ssize_t shrink_store(struct kmem_cache *s,
5296 const char *buf, size_t length)
5297{
5298 if (buf[0] == '1')
5299 kmem_cache_shrink(s);
5300 else
5301 return -EINVAL;
5302 return length;
5303}
5304SLAB_ATTR(shrink);
5305
5306#ifdef CONFIG_NUMA
5307static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5308{
5309 return sprintf(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5310}
5311
5312static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5313 const char *buf, size_t length)
5314{
5315 unsigned int ratio;
5316 int err;
5317
5318 err = kstrtouint(buf, 10, &ratio);
5319 if (err)
5320 return err;
5321 if (ratio > 100)
5322 return -ERANGE;
5323
5324 s->remote_node_defrag_ratio = ratio * 10;
5325
5326 return length;
5327}
5328SLAB_ATTR(remote_node_defrag_ratio);
5329#endif
5330
5331#ifdef CONFIG_SLUB_STATS
5332static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5333{
5334 unsigned long sum = 0;
5335 int cpu;
5336 int len;
5337 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5338
5339 if (!data)
5340 return -ENOMEM;
5341
5342 for_each_online_cpu(cpu) {
5343 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5344
5345 data[cpu] = x;
5346 sum += x;
5347 }
5348
5349 len = sprintf(buf, "%lu", sum);
5350
5351#ifdef CONFIG_SMP
5352 for_each_online_cpu(cpu) {
5353 if (data[cpu] && len < PAGE_SIZE - 20)
5354 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5355 }
5356#endif
5357 kfree(data);
5358 return len + sprintf(buf + len, "\n");
5359}
5360
5361static void clear_stat(struct kmem_cache *s, enum stat_item si)
5362{
5363 int cpu;
5364
5365 for_each_online_cpu(cpu)
5366 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5367}
5368
5369#define STAT_ATTR(si, text) \
5370static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5371{ \
5372 return show_stat(s, buf, si); \
5373} \
5374static ssize_t text##_store(struct kmem_cache *s, \
5375 const char *buf, size_t length) \
5376{ \
5377 if (buf[0] != '0') \
5378 return -EINVAL; \
5379 clear_stat(s, si); \
5380 return length; \
5381} \
5382SLAB_ATTR(text); \
5383
5384STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5385STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5386STAT_ATTR(FREE_FASTPATH, free_fastpath);
5387STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5388STAT_ATTR(FREE_FROZEN, free_frozen);
5389STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5390STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5391STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5392STAT_ATTR(ALLOC_SLAB, alloc_slab);
5393STAT_ATTR(ALLOC_REFILL, alloc_refill);
5394STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5395STAT_ATTR(FREE_SLAB, free_slab);
5396STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5397STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5398STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5399STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5400STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5401STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5402STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5403STAT_ATTR(ORDER_FALLBACK, order_fallback);
5404STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5405STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5406STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5407STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5408STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5409STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5410#endif
5411
5412static struct attribute *slab_attrs[] = {
5413 &slab_size_attr.attr,
5414 &object_size_attr.attr,
5415 &objs_per_slab_attr.attr,
5416 &order_attr.attr,
5417 &min_partial_attr.attr,
5418 &cpu_partial_attr.attr,
5419 &objects_attr.attr,
5420 &objects_partial_attr.attr,
5421 &partial_attr.attr,
5422 &cpu_slabs_attr.attr,
5423 &ctor_attr.attr,
5424 &aliases_attr.attr,
5425 &align_attr.attr,
5426 &hwcache_align_attr.attr,
5427 &reclaim_account_attr.attr,
5428 &destroy_by_rcu_attr.attr,
5429 &shrink_attr.attr,
5430 &slabs_cpu_partial_attr.attr,
5431#ifdef CONFIG_SLUB_DEBUG
5432 &total_objects_attr.attr,
5433 &slabs_attr.attr,
5434 &sanity_checks_attr.attr,
5435 &trace_attr.attr,
5436 &red_zone_attr.attr,
5437 &poison_attr.attr,
5438 &store_user_attr.attr,
5439 &validate_attr.attr,
5440 &alloc_calls_attr.attr,
5441 &free_calls_attr.attr,
5442#endif
5443#ifdef CONFIG_ZONE_DMA
5444 &cache_dma_attr.attr,
5445#endif
5446#ifdef CONFIG_NUMA
5447 &remote_node_defrag_ratio_attr.attr,
5448#endif
5449#ifdef CONFIG_SLUB_STATS
5450 &alloc_fastpath_attr.attr,
5451 &alloc_slowpath_attr.attr,
5452 &free_fastpath_attr.attr,
5453 &free_slowpath_attr.attr,
5454 &free_frozen_attr.attr,
5455 &free_add_partial_attr.attr,
5456 &free_remove_partial_attr.attr,
5457 &alloc_from_partial_attr.attr,
5458 &alloc_slab_attr.attr,
5459 &alloc_refill_attr.attr,
5460 &alloc_node_mismatch_attr.attr,
5461 &free_slab_attr.attr,
5462 &cpuslab_flush_attr.attr,
5463 &deactivate_full_attr.attr,
5464 &deactivate_empty_attr.attr,
5465 &deactivate_to_head_attr.attr,
5466 &deactivate_to_tail_attr.attr,
5467 &deactivate_remote_frees_attr.attr,
5468 &deactivate_bypass_attr.attr,
5469 &order_fallback_attr.attr,
5470 &cmpxchg_double_fail_attr.attr,
5471 &cmpxchg_double_cpu_fail_attr.attr,
5472 &cpu_partial_alloc_attr.attr,
5473 &cpu_partial_free_attr.attr,
5474 &cpu_partial_node_attr.attr,
5475 &cpu_partial_drain_attr.attr,
5476#endif
5477#ifdef CONFIG_FAILSLAB
5478 &failslab_attr.attr,
5479#endif
5480 &usersize_attr.attr,
5481
5482 NULL
5483};
5484
5485static const struct attribute_group slab_attr_group = {
5486 .attrs = slab_attrs,
5487};
5488
5489static ssize_t slab_attr_show(struct kobject *kobj,
5490 struct attribute *attr,
5491 char *buf)
5492{
5493 struct slab_attribute *attribute;
5494 struct kmem_cache *s;
5495 int err;
5496
5497 attribute = to_slab_attr(attr);
5498 s = to_slab(kobj);
5499
5500 if (!attribute->show)
5501 return -EIO;
5502
5503 err = attribute->show(s, buf);
5504
5505 return err;
5506}
5507
5508static ssize_t slab_attr_store(struct kobject *kobj,
5509 struct attribute *attr,
5510 const char *buf, size_t len)
5511{
5512 struct slab_attribute *attribute;
5513 struct kmem_cache *s;
5514 int err;
5515
5516 attribute = to_slab_attr(attr);
5517 s = to_slab(kobj);
5518
5519 if (!attribute->store)
5520 return -EIO;
5521
5522 err = attribute->store(s, buf, len);
5523#ifdef CONFIG_MEMCG
5524 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5525 struct kmem_cache *c;
5526
5527 mutex_lock(&slab_mutex);
5528 if (s->max_attr_size < len)
5529 s->max_attr_size = len;
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548 for_each_memcg_cache(c, s)
5549 attribute->store(c, buf, len);
5550 mutex_unlock(&slab_mutex);
5551 }
5552#endif
5553 return err;
5554}
5555
5556static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5557{
5558#ifdef CONFIG_MEMCG
5559 int i;
5560 char *buffer = NULL;
5561 struct kmem_cache *root_cache;
5562
5563 if (is_root_cache(s))
5564 return;
5565
5566 root_cache = s->memcg_params.root_cache;
5567
5568
5569
5570
5571
5572 if (!root_cache->max_attr_size)
5573 return;
5574
5575 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5576 char mbuf[64];
5577 char *buf;
5578 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5579 ssize_t len;
5580
5581 if (!attr || !attr->store || !attr->show)
5582 continue;
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593 if (buffer)
5594 buf = buffer;
5595 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5596 buf = mbuf;
5597 else {
5598 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5599 if (WARN_ON(!buffer))
5600 continue;
5601 buf = buffer;
5602 }
5603
5604 len = attr->show(root_cache, buf);
5605 if (len > 0)
5606 attr->store(s, buf, len);
5607 }
5608
5609 if (buffer)
5610 free_page((unsigned long)buffer);
5611#endif
5612}
5613
5614static void kmem_cache_release(struct kobject *k)
5615{
5616 slab_kmem_cache_release(to_slab(k));
5617}
5618
5619static const struct sysfs_ops slab_sysfs_ops = {
5620 .show = slab_attr_show,
5621 .store = slab_attr_store,
5622};
5623
5624static struct kobj_type slab_ktype = {
5625 .sysfs_ops = &slab_sysfs_ops,
5626 .release = kmem_cache_release,
5627};
5628
5629static int uevent_filter(struct kset *kset, struct kobject *kobj)
5630{
5631 struct kobj_type *ktype = get_ktype(kobj);
5632
5633 if (ktype == &slab_ktype)
5634 return 1;
5635 return 0;
5636}
5637
5638static const struct kset_uevent_ops slab_uevent_ops = {
5639 .filter = uevent_filter,
5640};
5641
5642static struct kset *slab_kset;
5643
5644static inline struct kset *cache_kset(struct kmem_cache *s)
5645{
5646#ifdef CONFIG_MEMCG
5647 if (!is_root_cache(s))
5648 return s->memcg_params.root_cache->memcg_kset;
5649#endif
5650 return slab_kset;
5651}
5652
5653#define ID_STR_LENGTH 64
5654
5655
5656
5657
5658
5659static char *create_unique_id(struct kmem_cache *s)
5660{
5661 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5662 char *p = name;
5663
5664 BUG_ON(!name);
5665
5666 *p++ = ':';
5667
5668
5669
5670
5671
5672
5673
5674 if (s->flags & SLAB_CACHE_DMA)
5675 *p++ = 'd';
5676 if (s->flags & SLAB_CACHE_DMA32)
5677 *p++ = 'D';
5678 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5679 *p++ = 'a';
5680 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5681 *p++ = 'F';
5682 if (s->flags & SLAB_ACCOUNT)
5683 *p++ = 'A';
5684 if (p != name + 1)
5685 *p++ = '-';
5686 p += sprintf(p, "%07u", s->size);
5687
5688 BUG_ON(p > name + ID_STR_LENGTH - 1);
5689 return name;
5690}
5691
5692static void sysfs_slab_remove_workfn(struct work_struct *work)
5693{
5694 struct kmem_cache *s =
5695 container_of(work, struct kmem_cache, kobj_remove_work);
5696
5697 if (!s->kobj.state_in_sysfs)
5698
5699
5700
5701
5702
5703
5704 goto out;
5705
5706#ifdef CONFIG_MEMCG
5707 kset_unregister(s->memcg_kset);
5708#endif
5709 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5710out:
5711 kobject_put(&s->kobj);
5712}
5713
5714static int sysfs_slab_add(struct kmem_cache *s)
5715{
5716 int err;
5717 const char *name;
5718 struct kset *kset = cache_kset(s);
5719 int unmergeable = slab_unmergeable(s);
5720
5721 INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
5722
5723 if (!kset) {
5724 kobject_init(&s->kobj, &slab_ktype);
5725 return 0;
5726 }
5727
5728 if (!unmergeable && disable_higher_order_debug &&
5729 (slub_debug & DEBUG_METADATA_FLAGS))
5730 unmergeable = 1;
5731
5732 if (unmergeable) {
5733
5734
5735
5736
5737
5738 sysfs_remove_link(&slab_kset->kobj, s->name);
5739 name = s->name;
5740 } else {
5741
5742
5743
5744
5745 name = create_unique_id(s);
5746 }
5747
5748 s->kobj.kset = kset;
5749 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5750 if (err)
5751 goto out;
5752
5753 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5754 if (err)
5755 goto out_del_kobj;
5756
5757#ifdef CONFIG_MEMCG
5758 if (is_root_cache(s) && memcg_sysfs_enabled) {
5759 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5760 if (!s->memcg_kset) {
5761 err = -ENOMEM;
5762 goto out_del_kobj;
5763 }
5764 }
5765#endif
5766
5767 kobject_uevent(&s->kobj, KOBJ_ADD);
5768 if (!unmergeable) {
5769
5770 sysfs_slab_alias(s, s->name);
5771 }
5772out:
5773 if (!unmergeable)
5774 kfree(name);
5775 return err;
5776out_del_kobj:
5777 kobject_del(&s->kobj);
5778 goto out;
5779}
5780
5781static void sysfs_slab_remove(struct kmem_cache *s)
5782{
5783 if (slab_state < FULL)
5784
5785
5786
5787
5788 return;
5789
5790 kobject_get(&s->kobj);
5791 schedule_work(&s->kobj_remove_work);
5792}
5793
5794void sysfs_slab_unlink(struct kmem_cache *s)
5795{
5796 if (slab_state >= FULL)
5797 kobject_del(&s->kobj);
5798}
5799
5800void sysfs_slab_release(struct kmem_cache *s)
5801{
5802 if (slab_state >= FULL)
5803 kobject_put(&s->kobj);
5804}
5805
5806
5807
5808
5809
5810struct saved_alias {
5811 struct kmem_cache *s;
5812 const char *name;
5813 struct saved_alias *next;
5814};
5815
5816static struct saved_alias *alias_list;
5817
5818static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5819{
5820 struct saved_alias *al;
5821
5822 if (slab_state == FULL) {
5823
5824
5825
5826 sysfs_remove_link(&slab_kset->kobj, name);
5827 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5828 }
5829
5830 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5831 if (!al)
5832 return -ENOMEM;
5833
5834 al->s = s;
5835 al->name = name;
5836 al->next = alias_list;
5837 alias_list = al;
5838 return 0;
5839}
5840
5841static int __init slab_sysfs_init(void)
5842{
5843 struct kmem_cache *s;
5844 int err;
5845
5846 mutex_lock(&slab_mutex);
5847
5848 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5849 if (!slab_kset) {
5850 mutex_unlock(&slab_mutex);
5851 pr_err("Cannot register slab subsystem.\n");
5852 return -ENOSYS;
5853 }
5854
5855 slab_state = FULL;
5856
5857 list_for_each_entry(s, &slab_caches, list) {
5858 err = sysfs_slab_add(s);
5859 if (err)
5860 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5861 s->name);
5862 }
5863
5864 while (alias_list) {
5865 struct saved_alias *al = alias_list;
5866
5867 alias_list = alias_list->next;
5868 err = sysfs_slab_alias(al->s, al->name);
5869 if (err)
5870 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5871 al->name);
5872 kfree(al);
5873 }
5874
5875 mutex_unlock(&slab_mutex);
5876 resiliency_test();
5877 return 0;
5878}
5879
5880__initcall(slab_sysfs_init);
5881#endif
5882
5883
5884
5885
5886#ifdef CONFIG_SLUB_DEBUG
5887void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5888{
5889 unsigned long nr_slabs = 0;
5890 unsigned long nr_objs = 0;
5891 unsigned long nr_free = 0;
5892 int node;
5893 struct kmem_cache_node *n;
5894
5895 for_each_kmem_cache_node(s, node, n) {
5896 nr_slabs += node_nr_slabs(n);
5897 nr_objs += node_nr_objs(n);
5898 nr_free += count_partial(n, count_free);
5899 }
5900
5901 sinfo->active_objs = nr_objs - nr_free;
5902 sinfo->num_objs = nr_objs;
5903 sinfo->active_slabs = nr_slabs;
5904 sinfo->num_slabs = nr_slabs;
5905 sinfo->objects_per_slab = oo_objects(s->oo);
5906 sinfo->cache_order = oo_order(s->oo);
5907}
5908
5909void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5910{
5911}
5912
5913ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5914 size_t count, loff_t *ppos)
5915{
5916 return -EIO;
5917}
5918#endif
5919