1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/mm.h>
14#include <linux/swap.h>
15#include <linux/module.h>
16#include <linux/bit_spinlock.h>
17#include <linux/interrupt.h>
18#include <linux/bitops.h>
19#include <linux/slab.h>
20#include "slab.h"
21#include <linux/proc_fs.h>
22#include <linux/notifier.h>
23#include <linux/seq_file.h>
24#include <linux/kasan.h>
25#include <linux/cpu.h>
26#include <linux/cpuset.h>
27#include <linux/mempolicy.h>
28#include <linux/ctype.h>
29#include <linux/debugobjects.h>
30#include <linux/kallsyms.h>
31#include <linux/memory.h>
32#include <linux/math64.h>
33#include <linux/fault-inject.h>
34#include <linux/stacktrace.h>
35#include <linux/prefetch.h>
36#include <linux/memcontrol.h>
37#include <linux/random.h>
38
39#include <trace/events/kmem.h>
40
41#include "internal.h"
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119static inline int kmem_cache_debug(struct kmem_cache *s)
120{
121#ifdef CONFIG_SLUB_DEBUG
122 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
123#else
124 return 0;
125#endif
126}
127
128void *fixup_red_left(struct kmem_cache *s, void *p)
129{
130 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
131 p += s->red_left_pad;
132
133 return p;
134}
135
136static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
137{
138#ifdef CONFIG_SLUB_CPU_PARTIAL
139 return !kmem_cache_debug(s);
140#else
141 return false;
142#endif
143}
144
145
146
147
148
149
150
151
152
153
154#undef SLUB_RESILIENCY_TEST
155
156
157#undef SLUB_DEBUG_CMPXCHG
158
159
160
161
162
163#define MIN_PARTIAL 5
164
165
166
167
168
169
170#define MAX_PARTIAL 10
171
172#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
173 SLAB_POISON | SLAB_STORE_USER)
174
175
176
177
178
179#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
180 SLAB_TRACE)
181
182
183
184
185
186
187
188#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
189
190#define OO_SHIFT 16
191#define OO_MASK ((1 << OO_SHIFT) - 1)
192#define MAX_OBJS_PER_PAGE 32767
193
194
195
196#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
197
198#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
199
200
201
202
203#define TRACK_ADDRS_COUNT 16
204struct track {
205 unsigned long addr;
206#ifdef CONFIG_STACKTRACE
207 unsigned long addrs[TRACK_ADDRS_COUNT];
208#endif
209 int cpu;
210 int pid;
211 unsigned long when;
212};
213
214enum track_item { TRACK_ALLOC, TRACK_FREE };
215
216#ifdef CONFIG_SYSFS
217static int sysfs_slab_add(struct kmem_cache *);
218static int sysfs_slab_alias(struct kmem_cache *, const char *);
219static void memcg_propagate_slab_attrs(struct kmem_cache *s);
220static void sysfs_slab_remove(struct kmem_cache *s);
221#else
222static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
223static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
224 { return 0; }
225static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
226static inline void sysfs_slab_remove(struct kmem_cache *s) { }
227#endif
228
229static inline void stat(const struct kmem_cache *s, enum stat_item si)
230{
231#ifdef CONFIG_SLUB_STATS
232
233
234
235
236 raw_cpu_inc(s->cpu_slab->stat[si]);
237#endif
238}
239
240
241
242
243
244
245
246
247
248
249static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
250 unsigned long ptr_addr)
251{
252#ifdef CONFIG_SLAB_FREELIST_HARDENED
253 return (void *)((unsigned long)ptr ^ s->random ^ ptr_addr);
254#else
255 return ptr;
256#endif
257}
258
259
260static inline void *freelist_dereference(const struct kmem_cache *s,
261 void *ptr_addr)
262{
263 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
264 (unsigned long)ptr_addr);
265}
266
267static inline void *get_freepointer(struct kmem_cache *s, void *object)
268{
269 return freelist_dereference(s, object + s->offset);
270}
271
272static void prefetch_freepointer(const struct kmem_cache *s, void *object)
273{
274 if (object)
275 prefetch(freelist_dereference(s, object + s->offset));
276}
277
278static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
279{
280 unsigned long freepointer_addr;
281 void *p;
282
283 if (!debug_pagealloc_enabled())
284 return get_freepointer(s, object);
285
286 freepointer_addr = (unsigned long)object + s->offset;
287 probe_kernel_read(&p, (void **)freepointer_addr, sizeof(p));
288 return freelist_ptr(s, p, freepointer_addr);
289}
290
291static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
292{
293 unsigned long freeptr_addr = (unsigned long)object + s->offset;
294
295#ifdef CONFIG_SLAB_FREELIST_HARDENED
296 BUG_ON(object == fp);
297#endif
298
299 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
300}
301
302
303#define for_each_object(__p, __s, __addr, __objects) \
304 for (__p = fixup_red_left(__s, __addr); \
305 __p < (__addr) + (__objects) * (__s)->size; \
306 __p += (__s)->size)
307
308#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
309 for (__p = fixup_red_left(__s, __addr), __idx = 1; \
310 __idx <= __objects; \
311 __p += (__s)->size, __idx++)
312
313
314static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
315{
316 return (p - addr) / s->size;
317}
318
319static inline int order_objects(int order, unsigned long size, int reserved)
320{
321 return ((PAGE_SIZE << order) - reserved) / size;
322}
323
324static inline struct kmem_cache_order_objects oo_make(int order,
325 unsigned long size, int reserved)
326{
327 struct kmem_cache_order_objects x = {
328 (order << OO_SHIFT) + order_objects(order, size, reserved)
329 };
330
331 return x;
332}
333
334static inline int oo_order(struct kmem_cache_order_objects x)
335{
336 return x.x >> OO_SHIFT;
337}
338
339static inline int oo_objects(struct kmem_cache_order_objects x)
340{
341 return x.x & OO_MASK;
342}
343
344
345
346
347static __always_inline void slab_lock(struct page *page)
348{
349 VM_BUG_ON_PAGE(PageTail(page), page);
350 bit_spin_lock(PG_locked, &page->flags);
351}
352
353static __always_inline void slab_unlock(struct page *page)
354{
355 VM_BUG_ON_PAGE(PageTail(page), page);
356 __bit_spin_unlock(PG_locked, &page->flags);
357}
358
359static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
360{
361 struct page tmp;
362 tmp.counters = counters_new;
363
364
365
366
367
368
369 page->frozen = tmp.frozen;
370 page->inuse = tmp.inuse;
371 page->objects = tmp.objects;
372}
373
374
375static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
376 void *freelist_old, unsigned long counters_old,
377 void *freelist_new, unsigned long counters_new,
378 const char *n)
379{
380 VM_BUG_ON(!irqs_disabled());
381#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
382 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
383 if (s->flags & __CMPXCHG_DOUBLE) {
384 if (cmpxchg_double(&page->freelist, &page->counters,
385 freelist_old, counters_old,
386 freelist_new, counters_new))
387 return true;
388 } else
389#endif
390 {
391 slab_lock(page);
392 if (page->freelist == freelist_old &&
393 page->counters == counters_old) {
394 page->freelist = freelist_new;
395 set_page_slub_counters(page, counters_new);
396 slab_unlock(page);
397 return true;
398 }
399 slab_unlock(page);
400 }
401
402 cpu_relax();
403 stat(s, CMPXCHG_DOUBLE_FAIL);
404
405#ifdef SLUB_DEBUG_CMPXCHG
406 pr_info("%s %s: cmpxchg double redo ", n, s->name);
407#endif
408
409 return false;
410}
411
412static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
413 void *freelist_old, unsigned long counters_old,
414 void *freelist_new, unsigned long counters_new,
415 const char *n)
416{
417#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
418 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
419 if (s->flags & __CMPXCHG_DOUBLE) {
420 if (cmpxchg_double(&page->freelist, &page->counters,
421 freelist_old, counters_old,
422 freelist_new, counters_new))
423 return true;
424 } else
425#endif
426 {
427 unsigned long flags;
428
429 local_irq_save(flags);
430 slab_lock(page);
431 if (page->freelist == freelist_old &&
432 page->counters == counters_old) {
433 page->freelist = freelist_new;
434 set_page_slub_counters(page, counters_new);
435 slab_unlock(page);
436 local_irq_restore(flags);
437 return true;
438 }
439 slab_unlock(page);
440 local_irq_restore(flags);
441 }
442
443 cpu_relax();
444 stat(s, CMPXCHG_DOUBLE_FAIL);
445
446#ifdef SLUB_DEBUG_CMPXCHG
447 pr_info("%s %s: cmpxchg double redo ", n, s->name);
448#endif
449
450 return false;
451}
452
453#ifdef CONFIG_SLUB_DEBUG
454
455
456
457
458
459
460static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
461{
462 void *p;
463 void *addr = page_address(page);
464
465 for (p = page->freelist; p; p = get_freepointer(s, p))
466 set_bit(slab_index(p, s, addr), map);
467}
468
469static inline int size_from_object(struct kmem_cache *s)
470{
471 if (s->flags & SLAB_RED_ZONE)
472 return s->size - s->red_left_pad;
473
474 return s->size;
475}
476
477static inline void *restore_red_left(struct kmem_cache *s, void *p)
478{
479 if (s->flags & SLAB_RED_ZONE)
480 p -= s->red_left_pad;
481
482 return p;
483}
484
485
486
487
488#if defined(CONFIG_SLUB_DEBUG_ON)
489static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
490#else
491static slab_flags_t slub_debug;
492#endif
493
494static char *slub_debug_slabs;
495static int disable_higher_order_debug;
496
497
498
499
500
501
502
503static inline void metadata_access_enable(void)
504{
505 kasan_disable_current();
506}
507
508static inline void metadata_access_disable(void)
509{
510 kasan_enable_current();
511}
512
513
514
515
516
517
518static inline int check_valid_pointer(struct kmem_cache *s,
519 struct page *page, void *object)
520{
521 void *base;
522
523 if (!object)
524 return 1;
525
526 base = page_address(page);
527 object = restore_red_left(s, object);
528 if (object < base || object >= base + page->objects * s->size ||
529 (object - base) % s->size) {
530 return 0;
531 }
532
533 return 1;
534}
535
536static void print_section(char *level, char *text, u8 *addr,
537 unsigned int length)
538{
539 metadata_access_enable();
540 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
541 length, 1);
542 metadata_access_disable();
543}
544
545static struct track *get_track(struct kmem_cache *s, void *object,
546 enum track_item alloc)
547{
548 struct track *p;
549
550 if (s->offset)
551 p = object + s->offset + sizeof(void *);
552 else
553 p = object + s->inuse;
554
555 return p + alloc;
556}
557
558static void set_track(struct kmem_cache *s, void *object,
559 enum track_item alloc, unsigned long addr)
560{
561 struct track *p = get_track(s, object, alloc);
562
563 if (addr) {
564#ifdef CONFIG_STACKTRACE
565 struct stack_trace trace;
566 int i;
567
568 trace.nr_entries = 0;
569 trace.max_entries = TRACK_ADDRS_COUNT;
570 trace.entries = p->addrs;
571 trace.skip = 3;
572 metadata_access_enable();
573 save_stack_trace(&trace);
574 metadata_access_disable();
575
576
577 if (trace.nr_entries != 0 &&
578 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
579 trace.nr_entries--;
580
581 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
582 p->addrs[i] = 0;
583#endif
584 p->addr = addr;
585 p->cpu = smp_processor_id();
586 p->pid = current->pid;
587 p->when = jiffies;
588 } else
589 memset(p, 0, sizeof(struct track));
590}
591
592static void init_tracking(struct kmem_cache *s, void *object)
593{
594 if (!(s->flags & SLAB_STORE_USER))
595 return;
596
597 set_track(s, object, TRACK_FREE, 0UL);
598 set_track(s, object, TRACK_ALLOC, 0UL);
599}
600
601static void print_track(const char *s, struct track *t)
602{
603 if (!t->addr)
604 return;
605
606 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
607 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
608#ifdef CONFIG_STACKTRACE
609 {
610 int i;
611 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
612 if (t->addrs[i])
613 pr_err("\t%pS\n", (void *)t->addrs[i]);
614 else
615 break;
616 }
617#endif
618}
619
620static void print_tracking(struct kmem_cache *s, void *object)
621{
622 if (!(s->flags & SLAB_STORE_USER))
623 return;
624
625 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
626 print_track("Freed", get_track(s, object, TRACK_FREE));
627}
628
629static void print_page_info(struct page *page)
630{
631 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
632 page, page->objects, page->inuse, page->freelist, page->flags);
633
634}
635
636static void slab_bug(struct kmem_cache *s, char *fmt, ...)
637{
638 struct va_format vaf;
639 va_list args;
640
641 va_start(args, fmt);
642 vaf.fmt = fmt;
643 vaf.va = &args;
644 pr_err("=============================================================================\n");
645 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
646 pr_err("-----------------------------------------------------------------------------\n\n");
647
648 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
649 va_end(args);
650}
651
652static void slab_fix(struct kmem_cache *s, char *fmt, ...)
653{
654 struct va_format vaf;
655 va_list args;
656
657 va_start(args, fmt);
658 vaf.fmt = fmt;
659 vaf.va = &args;
660 pr_err("FIX %s: %pV\n", s->name, &vaf);
661 va_end(args);
662}
663
664static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
665{
666 unsigned int off;
667 u8 *addr = page_address(page);
668
669 print_tracking(s, p);
670
671 print_page_info(page);
672
673 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
674 p, p - addr, get_freepointer(s, p));
675
676 if (s->flags & SLAB_RED_ZONE)
677 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
678 s->red_left_pad);
679 else if (p > addr + 16)
680 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
681
682 print_section(KERN_ERR, "Object ", p,
683 min_t(unsigned long, s->object_size, PAGE_SIZE));
684 if (s->flags & SLAB_RED_ZONE)
685 print_section(KERN_ERR, "Redzone ", p + s->object_size,
686 s->inuse - s->object_size);
687
688 if (s->offset)
689 off = s->offset + sizeof(void *);
690 else
691 off = s->inuse;
692
693 if (s->flags & SLAB_STORE_USER)
694 off += 2 * sizeof(struct track);
695
696 off += kasan_metadata_size(s);
697
698 if (off != size_from_object(s))
699
700 print_section(KERN_ERR, "Padding ", p + off,
701 size_from_object(s) - off);
702
703 dump_stack();
704}
705
706void object_err(struct kmem_cache *s, struct page *page,
707 u8 *object, char *reason)
708{
709 slab_bug(s, "%s", reason);
710 print_trailer(s, page, object);
711}
712
713static void slab_err(struct kmem_cache *s, struct page *page,
714 const char *fmt, ...)
715{
716 va_list args;
717 char buf[100];
718
719 va_start(args, fmt);
720 vsnprintf(buf, sizeof(buf), fmt, args);
721 va_end(args);
722 slab_bug(s, "%s", buf);
723 print_page_info(page);
724 dump_stack();
725}
726
727static void init_object(struct kmem_cache *s, void *object, u8 val)
728{
729 u8 *p = object;
730
731 if (s->flags & SLAB_RED_ZONE)
732 memset(p - s->red_left_pad, val, s->red_left_pad);
733
734 if (s->flags & __OBJECT_POISON) {
735 memset(p, POISON_FREE, s->object_size - 1);
736 p[s->object_size - 1] = POISON_END;
737 }
738
739 if (s->flags & SLAB_RED_ZONE)
740 memset(p + s->object_size, val, s->inuse - s->object_size);
741}
742
743static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
744 void *from, void *to)
745{
746 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
747 memset(from, data, to - from);
748}
749
750static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
751 u8 *object, char *what,
752 u8 *start, unsigned int value, unsigned int bytes)
753{
754 u8 *fault;
755 u8 *end;
756
757 metadata_access_enable();
758 fault = memchr_inv(start, value, bytes);
759 metadata_access_disable();
760 if (!fault)
761 return 1;
762
763 end = start + bytes;
764 while (end > fault && end[-1] == value)
765 end--;
766
767 slab_bug(s, "%s overwritten", what);
768 pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
769 fault, end - 1, fault[0], value);
770 print_trailer(s, page, object);
771
772 restore_bytes(s, what, value, fault, end);
773 return 0;
774}
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
815{
816 unsigned long off = s->inuse;
817
818 if (s->offset)
819
820 off += sizeof(void *);
821
822 if (s->flags & SLAB_STORE_USER)
823
824 off += 2 * sizeof(struct track);
825
826 off += kasan_metadata_size(s);
827
828 if (size_from_object(s) == off)
829 return 1;
830
831 return check_bytes_and_report(s, page, p, "Object padding",
832 p + off, POISON_INUSE, size_from_object(s) - off);
833}
834
835
836static int slab_pad_check(struct kmem_cache *s, struct page *page)
837{
838 u8 *start;
839 u8 *fault;
840 u8 *end;
841 u8 *pad;
842 int length;
843 int remainder;
844
845 if (!(s->flags & SLAB_POISON))
846 return 1;
847
848 start = page_address(page);
849 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
850 end = start + length;
851 remainder = length % s->size;
852 if (!remainder)
853 return 1;
854
855 pad = end - remainder;
856 metadata_access_enable();
857 fault = memchr_inv(pad, POISON_INUSE, remainder);
858 metadata_access_disable();
859 if (!fault)
860 return 1;
861 while (end > fault && end[-1] == POISON_INUSE)
862 end--;
863
864 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
865 print_section(KERN_ERR, "Padding ", pad, remainder);
866
867 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
868 return 0;
869}
870
871static int check_object(struct kmem_cache *s, struct page *page,
872 void *object, u8 val)
873{
874 u8 *p = object;
875 u8 *endobject = object + s->object_size;
876
877 if (s->flags & SLAB_RED_ZONE) {
878 if (!check_bytes_and_report(s, page, object, "Redzone",
879 object - s->red_left_pad, val, s->red_left_pad))
880 return 0;
881
882 if (!check_bytes_and_report(s, page, object, "Redzone",
883 endobject, val, s->inuse - s->object_size))
884 return 0;
885 } else {
886 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
887 check_bytes_and_report(s, page, p, "Alignment padding",
888 endobject, POISON_INUSE,
889 s->inuse - s->object_size);
890 }
891 }
892
893 if (s->flags & SLAB_POISON) {
894 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
895 (!check_bytes_and_report(s, page, p, "Poison", p,
896 POISON_FREE, s->object_size - 1) ||
897 !check_bytes_and_report(s, page, p, "Poison",
898 p + s->object_size - 1, POISON_END, 1)))
899 return 0;
900
901
902
903 check_pad_bytes(s, page, p);
904 }
905
906 if (!s->offset && val == SLUB_RED_ACTIVE)
907
908
909
910
911 return 1;
912
913
914 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
915 object_err(s, page, p, "Freepointer corrupt");
916
917
918
919
920
921 set_freepointer(s, p, NULL);
922 return 0;
923 }
924 return 1;
925}
926
927static int check_slab(struct kmem_cache *s, struct page *page)
928{
929 int maxobj;
930
931 VM_BUG_ON(!irqs_disabled());
932
933 if (!PageSlab(page)) {
934 slab_err(s, page, "Not a valid slab page");
935 return 0;
936 }
937
938 maxobj = order_objects(compound_order(page), s->size, s->reserved);
939 if (page->objects > maxobj) {
940 slab_err(s, page, "objects %u > max %u",
941 page->objects, maxobj);
942 return 0;
943 }
944 if (page->inuse > page->objects) {
945 slab_err(s, page, "inuse %u > max %u",
946 page->inuse, page->objects);
947 return 0;
948 }
949
950 slab_pad_check(s, page);
951 return 1;
952}
953
954
955
956
957
958static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
959{
960 int nr = 0;
961 void *fp;
962 void *object = NULL;
963 int max_objects;
964
965 fp = page->freelist;
966 while (fp && nr <= page->objects) {
967 if (fp == search)
968 return 1;
969 if (!check_valid_pointer(s, page, fp)) {
970 if (object) {
971 object_err(s, page, object,
972 "Freechain corrupt");
973 set_freepointer(s, object, NULL);
974 } else {
975 slab_err(s, page, "Freepointer corrupt");
976 page->freelist = NULL;
977 page->inuse = page->objects;
978 slab_fix(s, "Freelist cleared");
979 return 0;
980 }
981 break;
982 }
983 object = fp;
984 fp = get_freepointer(s, object);
985 nr++;
986 }
987
988 max_objects = order_objects(compound_order(page), s->size, s->reserved);
989 if (max_objects > MAX_OBJS_PER_PAGE)
990 max_objects = MAX_OBJS_PER_PAGE;
991
992 if (page->objects != max_objects) {
993 slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
994 page->objects, max_objects);
995 page->objects = max_objects;
996 slab_fix(s, "Number of objects adjusted.");
997 }
998 if (page->inuse != page->objects - nr) {
999 slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
1000 page->inuse, page->objects - nr);
1001 page->inuse = page->objects - nr;
1002 slab_fix(s, "Object count adjusted.");
1003 }
1004 return search == NULL;
1005}
1006
1007static void trace(struct kmem_cache *s, struct page *page, void *object,
1008 int alloc)
1009{
1010 if (s->flags & SLAB_TRACE) {
1011 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1012 s->name,
1013 alloc ? "alloc" : "free",
1014 object, page->inuse,
1015 page->freelist);
1016
1017 if (!alloc)
1018 print_section(KERN_INFO, "Object ", (void *)object,
1019 s->object_size);
1020
1021 dump_stack();
1022 }
1023}
1024
1025
1026
1027
1028static void add_full(struct kmem_cache *s,
1029 struct kmem_cache_node *n, struct page *page)
1030{
1031 if (!(s->flags & SLAB_STORE_USER))
1032 return;
1033
1034 lockdep_assert_held(&n->list_lock);
1035 list_add(&page->lru, &n->full);
1036}
1037
1038static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1039{
1040 if (!(s->flags & SLAB_STORE_USER))
1041 return;
1042
1043 lockdep_assert_held(&n->list_lock);
1044 list_del(&page->lru);
1045}
1046
1047
1048static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1049{
1050 struct kmem_cache_node *n = get_node(s, node);
1051
1052 return atomic_long_read(&n->nr_slabs);
1053}
1054
1055static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1056{
1057 return atomic_long_read(&n->nr_slabs);
1058}
1059
1060static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1061{
1062 struct kmem_cache_node *n = get_node(s, node);
1063
1064
1065
1066
1067
1068
1069
1070 if (likely(n)) {
1071 atomic_long_inc(&n->nr_slabs);
1072 atomic_long_add(objects, &n->total_objects);
1073 }
1074}
1075static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1076{
1077 struct kmem_cache_node *n = get_node(s, node);
1078
1079 atomic_long_dec(&n->nr_slabs);
1080 atomic_long_sub(objects, &n->total_objects);
1081}
1082
1083
1084static void setup_object_debug(struct kmem_cache *s, struct page *page,
1085 void *object)
1086{
1087 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1088 return;
1089
1090 init_object(s, object, SLUB_RED_INACTIVE);
1091 init_tracking(s, object);
1092}
1093
1094static inline int alloc_consistency_checks(struct kmem_cache *s,
1095 struct page *page,
1096 void *object, unsigned long addr)
1097{
1098 if (!check_slab(s, page))
1099 return 0;
1100
1101 if (!check_valid_pointer(s, page, object)) {
1102 object_err(s, page, object, "Freelist Pointer check fails");
1103 return 0;
1104 }
1105
1106 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1107 return 0;
1108
1109 return 1;
1110}
1111
1112static noinline int alloc_debug_processing(struct kmem_cache *s,
1113 struct page *page,
1114 void *object, unsigned long addr)
1115{
1116 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1117 if (!alloc_consistency_checks(s, page, object, addr))
1118 goto bad;
1119 }
1120
1121
1122 if (s->flags & SLAB_STORE_USER)
1123 set_track(s, object, TRACK_ALLOC, addr);
1124 trace(s, page, object, 1);
1125 init_object(s, object, SLUB_RED_ACTIVE);
1126 return 1;
1127
1128bad:
1129 if (PageSlab(page)) {
1130
1131
1132
1133
1134
1135 slab_fix(s, "Marking all objects used");
1136 page->inuse = page->objects;
1137 page->freelist = NULL;
1138 }
1139 return 0;
1140}
1141
1142static inline int free_consistency_checks(struct kmem_cache *s,
1143 struct page *page, void *object, unsigned long addr)
1144{
1145 if (!check_valid_pointer(s, page, object)) {
1146 slab_err(s, page, "Invalid object pointer 0x%p", object);
1147 return 0;
1148 }
1149
1150 if (on_freelist(s, page, object)) {
1151 object_err(s, page, object, "Object already free");
1152 return 0;
1153 }
1154
1155 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1156 return 0;
1157
1158 if (unlikely(s != page->slab_cache)) {
1159 if (!PageSlab(page)) {
1160 slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
1161 object);
1162 } else if (!page->slab_cache) {
1163 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1164 object);
1165 dump_stack();
1166 } else
1167 object_err(s, page, object,
1168 "page slab pointer corrupt.");
1169 return 0;
1170 }
1171 return 1;
1172}
1173
1174
1175static noinline int free_debug_processing(
1176 struct kmem_cache *s, struct page *page,
1177 void *head, void *tail, int bulk_cnt,
1178 unsigned long addr)
1179{
1180 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1181 void *object = head;
1182 int cnt = 0;
1183 unsigned long uninitialized_var(flags);
1184 int ret = 0;
1185
1186 spin_lock_irqsave(&n->list_lock, flags);
1187 slab_lock(page);
1188
1189 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1190 if (!check_slab(s, page))
1191 goto out;
1192 }
1193
1194next_object:
1195 cnt++;
1196
1197 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1198 if (!free_consistency_checks(s, page, object, addr))
1199 goto out;
1200 }
1201
1202 if (s->flags & SLAB_STORE_USER)
1203 set_track(s, object, TRACK_FREE, addr);
1204 trace(s, page, object, 0);
1205
1206 init_object(s, object, SLUB_RED_INACTIVE);
1207
1208
1209 if (object != tail) {
1210 object = get_freepointer(s, object);
1211 goto next_object;
1212 }
1213 ret = 1;
1214
1215out:
1216 if (cnt != bulk_cnt)
1217 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1218 bulk_cnt, cnt);
1219
1220 slab_unlock(page);
1221 spin_unlock_irqrestore(&n->list_lock, flags);
1222 if (!ret)
1223 slab_fix(s, "Object at 0x%p not freed", object);
1224 return ret;
1225}
1226
1227static int __init setup_slub_debug(char *str)
1228{
1229 slub_debug = DEBUG_DEFAULT_FLAGS;
1230 if (*str++ != '=' || !*str)
1231
1232
1233
1234 goto out;
1235
1236 if (*str == ',')
1237
1238
1239
1240
1241 goto check_slabs;
1242
1243 slub_debug = 0;
1244 if (*str == '-')
1245
1246
1247
1248 goto out;
1249
1250
1251
1252
1253 for (; *str && *str != ','; str++) {
1254 switch (tolower(*str)) {
1255 case 'f':
1256 slub_debug |= SLAB_CONSISTENCY_CHECKS;
1257 break;
1258 case 'z':
1259 slub_debug |= SLAB_RED_ZONE;
1260 break;
1261 case 'p':
1262 slub_debug |= SLAB_POISON;
1263 break;
1264 case 'u':
1265 slub_debug |= SLAB_STORE_USER;
1266 break;
1267 case 't':
1268 slub_debug |= SLAB_TRACE;
1269 break;
1270 case 'a':
1271 slub_debug |= SLAB_FAILSLAB;
1272 break;
1273 case 'o':
1274
1275
1276
1277
1278 disable_higher_order_debug = 1;
1279 break;
1280 default:
1281 pr_err("slub_debug option '%c' unknown. skipped\n",
1282 *str);
1283 }
1284 }
1285
1286check_slabs:
1287 if (*str == ',')
1288 slub_debug_slabs = str + 1;
1289out:
1290 return 1;
1291}
1292
1293__setup("slub_debug", setup_slub_debug);
1294
1295slab_flags_t kmem_cache_flags(unsigned long object_size,
1296 slab_flags_t flags, const char *name,
1297 void (*ctor)(void *))
1298{
1299
1300
1301
1302 if (slub_debug && (!slub_debug_slabs || (name &&
1303 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
1304 flags |= slub_debug;
1305
1306 return flags;
1307}
1308#else
1309static inline void setup_object_debug(struct kmem_cache *s,
1310 struct page *page, void *object) {}
1311
1312static inline int alloc_debug_processing(struct kmem_cache *s,
1313 struct page *page, void *object, unsigned long addr) { return 0; }
1314
1315static inline int free_debug_processing(
1316 struct kmem_cache *s, struct page *page,
1317 void *head, void *tail, int bulk_cnt,
1318 unsigned long addr) { return 0; }
1319
1320static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1321 { return 1; }
1322static inline int check_object(struct kmem_cache *s, struct page *page,
1323 void *object, u8 val) { return 1; }
1324static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1325 struct page *page) {}
1326static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1327 struct page *page) {}
1328slab_flags_t kmem_cache_flags(unsigned long object_size,
1329 slab_flags_t flags, const char *name,
1330 void (*ctor)(void *))
1331{
1332 return flags;
1333}
1334#define slub_debug 0
1335
1336#define disable_higher_order_debug 0
1337
1338static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1339 { return 0; }
1340static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1341 { return 0; }
1342static inline void inc_slabs_node(struct kmem_cache *s, int node,
1343 int objects) {}
1344static inline void dec_slabs_node(struct kmem_cache *s, int node,
1345 int objects) {}
1346
1347#endif
1348
1349
1350
1351
1352
1353static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1354{
1355 kmemleak_alloc(ptr, size, 1, flags);
1356 kasan_kmalloc_large(ptr, size, flags);
1357}
1358
1359static __always_inline void kfree_hook(void *x)
1360{
1361 kmemleak_free(x);
1362 kasan_kfree_large(x, _RET_IP_);
1363}
1364
1365static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
1366{
1367 void *freeptr;
1368
1369 kmemleak_free_recursive(x, s->flags);
1370
1371
1372
1373
1374
1375
1376#ifdef CONFIG_LOCKDEP
1377 {
1378 unsigned long flags;
1379
1380 local_irq_save(flags);
1381 debug_check_no_locks_freed(x, s->object_size);
1382 local_irq_restore(flags);
1383 }
1384#endif
1385 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1386 debug_check_no_obj_freed(x, s->object_size);
1387
1388 freeptr = get_freepointer(s, x);
1389
1390
1391
1392
1393 kasan_slab_free(s, x, _RET_IP_);
1394 return freeptr;
1395}
1396
1397static inline void slab_free_freelist_hook(struct kmem_cache *s,
1398 void *head, void *tail)
1399{
1400
1401
1402
1403
1404#if defined(CONFIG_LOCKDEP) || \
1405 defined(CONFIG_DEBUG_KMEMLEAK) || \
1406 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1407 defined(CONFIG_KASAN)
1408
1409 void *object = head;
1410 void *tail_obj = tail ? : head;
1411 void *freeptr;
1412
1413 do {
1414 freeptr = slab_free_hook(s, object);
1415 } while ((object != tail_obj) && (object = freeptr));
1416#endif
1417}
1418
1419static void setup_object(struct kmem_cache *s, struct page *page,
1420 void *object)
1421{
1422 setup_object_debug(s, page, object);
1423 kasan_init_slab_obj(s, object);
1424 if (unlikely(s->ctor)) {
1425 kasan_unpoison_object_data(s, object);
1426 s->ctor(object);
1427 kasan_poison_object_data(s, object);
1428 }
1429}
1430
1431
1432
1433
1434static inline struct page *alloc_slab_page(struct kmem_cache *s,
1435 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1436{
1437 struct page *page;
1438 int order = oo_order(oo);
1439
1440 if (node == NUMA_NO_NODE)
1441 page = alloc_pages(flags, order);
1442 else
1443 page = __alloc_pages_node(node, flags, order);
1444
1445 if (page && memcg_charge_slab(page, flags, order, s)) {
1446 __free_pages(page, order);
1447 page = NULL;
1448 }
1449
1450 return page;
1451}
1452
1453#ifdef CONFIG_SLAB_FREELIST_RANDOM
1454
1455static int init_cache_random_seq(struct kmem_cache *s)
1456{
1457 int err;
1458 unsigned long i, count = oo_objects(s->oo);
1459
1460
1461 if (s->random_seq)
1462 return 0;
1463
1464 err = cache_random_seq_create(s, count, GFP_KERNEL);
1465 if (err) {
1466 pr_err("SLUB: Unable to initialize free list for %s\n",
1467 s->name);
1468 return err;
1469 }
1470
1471
1472 if (s->random_seq) {
1473 for (i = 0; i < count; i++)
1474 s->random_seq[i] *= s->size;
1475 }
1476 return 0;
1477}
1478
1479
1480static void __init init_freelist_randomization(void)
1481{
1482 struct kmem_cache *s;
1483
1484 mutex_lock(&slab_mutex);
1485
1486 list_for_each_entry(s, &slab_caches, list)
1487 init_cache_random_seq(s);
1488
1489 mutex_unlock(&slab_mutex);
1490}
1491
1492
1493static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
1494 unsigned long *pos, void *start,
1495 unsigned long page_limit,
1496 unsigned long freelist_count)
1497{
1498 unsigned int idx;
1499
1500
1501
1502
1503
1504 do {
1505 idx = s->random_seq[*pos];
1506 *pos += 1;
1507 if (*pos >= freelist_count)
1508 *pos = 0;
1509 } while (unlikely(idx >= page_limit));
1510
1511 return (char *)start + idx;
1512}
1513
1514
1515static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1516{
1517 void *start;
1518 void *cur;
1519 void *next;
1520 unsigned long idx, pos, page_limit, freelist_count;
1521
1522 if (page->objects < 2 || !s->random_seq)
1523 return false;
1524
1525 freelist_count = oo_objects(s->oo);
1526 pos = get_random_int() % freelist_count;
1527
1528 page_limit = page->objects * s->size;
1529 start = fixup_red_left(s, page_address(page));
1530
1531
1532 cur = next_freelist_entry(s, page, &pos, start, page_limit,
1533 freelist_count);
1534 page->freelist = cur;
1535
1536 for (idx = 1; idx < page->objects; idx++) {
1537 setup_object(s, page, cur);
1538 next = next_freelist_entry(s, page, &pos, start, page_limit,
1539 freelist_count);
1540 set_freepointer(s, cur, next);
1541 cur = next;
1542 }
1543 setup_object(s, page, cur);
1544 set_freepointer(s, cur, NULL);
1545
1546 return true;
1547}
1548#else
1549static inline int init_cache_random_seq(struct kmem_cache *s)
1550{
1551 return 0;
1552}
1553static inline void init_freelist_randomization(void) { }
1554static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
1555{
1556 return false;
1557}
1558#endif
1559
1560static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1561{
1562 struct page *page;
1563 struct kmem_cache_order_objects oo = s->oo;
1564 gfp_t alloc_gfp;
1565 void *start, *p;
1566 int idx, order;
1567 bool shuffle;
1568
1569 flags &= gfp_allowed_mask;
1570
1571 if (gfpflags_allow_blocking(flags))
1572 local_irq_enable();
1573
1574 flags |= s->allocflags;
1575
1576
1577
1578
1579
1580 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1581 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1582 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
1583
1584 page = alloc_slab_page(s, alloc_gfp, node, oo);
1585 if (unlikely(!page)) {
1586 oo = s->min;
1587 alloc_gfp = flags;
1588
1589
1590
1591
1592 page = alloc_slab_page(s, alloc_gfp, node, oo);
1593 if (unlikely(!page))
1594 goto out;
1595 stat(s, ORDER_FALLBACK);
1596 }
1597
1598 page->objects = oo_objects(oo);
1599
1600 order = compound_order(page);
1601 page->slab_cache = s;
1602 __SetPageSlab(page);
1603 if (page_is_pfmemalloc(page))
1604 SetPageSlabPfmemalloc(page);
1605
1606 start = page_address(page);
1607
1608 if (unlikely(s->flags & SLAB_POISON))
1609 memset(start, POISON_INUSE, PAGE_SIZE << order);
1610
1611 kasan_poison_slab(page);
1612
1613 shuffle = shuffle_freelist(s, page);
1614
1615 if (!shuffle) {
1616 for_each_object_idx(p, idx, s, start, page->objects) {
1617 setup_object(s, page, p);
1618 if (likely(idx < page->objects))
1619 set_freepointer(s, p, p + s->size);
1620 else
1621 set_freepointer(s, p, NULL);
1622 }
1623 page->freelist = fixup_red_left(s, start);
1624 }
1625
1626 page->inuse = page->objects;
1627 page->frozen = 1;
1628
1629out:
1630 if (gfpflags_allow_blocking(flags))
1631 local_irq_disable();
1632 if (!page)
1633 return NULL;
1634
1635 mod_lruvec_page_state(page,
1636 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1637 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1638 1 << oo_order(oo));
1639
1640 inc_slabs_node(s, page_to_nid(page), page->objects);
1641
1642 return page;
1643}
1644
1645static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1646{
1647 if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
1648 gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK;
1649 flags &= ~GFP_SLAB_BUG_MASK;
1650 pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n",
1651 invalid_mask, &invalid_mask, flags, &flags);
1652 dump_stack();
1653 }
1654
1655 return allocate_slab(s,
1656 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1657}
1658
1659static void __free_slab(struct kmem_cache *s, struct page *page)
1660{
1661 int order = compound_order(page);
1662 int pages = 1 << order;
1663
1664 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1665 void *p;
1666
1667 slab_pad_check(s, page);
1668 for_each_object(p, s, page_address(page),
1669 page->objects)
1670 check_object(s, page, p, SLUB_RED_INACTIVE);
1671 }
1672
1673 mod_lruvec_page_state(page,
1674 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1675 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1676 -pages);
1677
1678 __ClearPageSlabPfmemalloc(page);
1679 __ClearPageSlab(page);
1680
1681 page_mapcount_reset(page);
1682 if (current->reclaim_state)
1683 current->reclaim_state->reclaimed_slab += pages;
1684 memcg_uncharge_slab(page, order, s);
1685 __free_pages(page, order);
1686}
1687
1688#define need_reserve_slab_rcu \
1689 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1690
1691static void rcu_free_slab(struct rcu_head *h)
1692{
1693 struct page *page;
1694
1695 if (need_reserve_slab_rcu)
1696 page = virt_to_head_page(h);
1697 else
1698 page = container_of((struct list_head *)h, struct page, lru);
1699
1700 __free_slab(page->slab_cache, page);
1701}
1702
1703static void free_slab(struct kmem_cache *s, struct page *page)
1704{
1705 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
1706 struct rcu_head *head;
1707
1708 if (need_reserve_slab_rcu) {
1709 int order = compound_order(page);
1710 int offset = (PAGE_SIZE << order) - s->reserved;
1711
1712 VM_BUG_ON(s->reserved != sizeof(*head));
1713 head = page_address(page) + offset;
1714 } else {
1715 head = &page->rcu_head;
1716 }
1717
1718 call_rcu(head, rcu_free_slab);
1719 } else
1720 __free_slab(s, page);
1721}
1722
1723static void discard_slab(struct kmem_cache *s, struct page *page)
1724{
1725 dec_slabs_node(s, page_to_nid(page), page->objects);
1726 free_slab(s, page);
1727}
1728
1729
1730
1731
1732static inline void
1733__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1734{
1735 n->nr_partial++;
1736 if (tail == DEACTIVATE_TO_TAIL)
1737 list_add_tail(&page->lru, &n->partial);
1738 else
1739 list_add(&page->lru, &n->partial);
1740}
1741
1742static inline void add_partial(struct kmem_cache_node *n,
1743 struct page *page, int tail)
1744{
1745 lockdep_assert_held(&n->list_lock);
1746 __add_partial(n, page, tail);
1747}
1748
1749static inline void remove_partial(struct kmem_cache_node *n,
1750 struct page *page)
1751{
1752 lockdep_assert_held(&n->list_lock);
1753 list_del(&page->lru);
1754 n->nr_partial--;
1755}
1756
1757
1758
1759
1760
1761
1762
1763static inline void *acquire_slab(struct kmem_cache *s,
1764 struct kmem_cache_node *n, struct page *page,
1765 int mode, int *objects)
1766{
1767 void *freelist;
1768 unsigned long counters;
1769 struct page new;
1770
1771 lockdep_assert_held(&n->list_lock);
1772
1773
1774
1775
1776
1777
1778 freelist = page->freelist;
1779 counters = page->counters;
1780 new.counters = counters;
1781 *objects = new.objects - new.inuse;
1782 if (mode) {
1783 new.inuse = page->objects;
1784 new.freelist = NULL;
1785 } else {
1786 new.freelist = freelist;
1787 }
1788
1789 VM_BUG_ON(new.frozen);
1790 new.frozen = 1;
1791
1792 if (!__cmpxchg_double_slab(s, page,
1793 freelist, counters,
1794 new.freelist, new.counters,
1795 "acquire_slab"))
1796 return NULL;
1797
1798 remove_partial(n, page);
1799 WARN_ON(!freelist);
1800 return freelist;
1801}
1802
1803static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1804static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1805
1806
1807
1808
1809static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1810 struct kmem_cache_cpu *c, gfp_t flags)
1811{
1812 struct page *page, *page2;
1813 void *object = NULL;
1814 int available = 0;
1815 int objects;
1816
1817
1818
1819
1820
1821
1822
1823 if (!n || !n->nr_partial)
1824 return NULL;
1825
1826 spin_lock(&n->list_lock);
1827 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1828 void *t;
1829
1830 if (!pfmemalloc_match(page, flags))
1831 continue;
1832
1833 t = acquire_slab(s, n, page, object == NULL, &objects);
1834 if (!t)
1835 break;
1836
1837 available += objects;
1838 if (!object) {
1839 c->page = page;
1840 stat(s, ALLOC_FROM_PARTIAL);
1841 object = t;
1842 } else {
1843 put_cpu_partial(s, page, 0);
1844 stat(s, CPU_PARTIAL_NODE);
1845 }
1846 if (!kmem_cache_has_cpu_partial(s)
1847 || available > slub_cpu_partial(s) / 2)
1848 break;
1849
1850 }
1851 spin_unlock(&n->list_lock);
1852 return object;
1853}
1854
1855
1856
1857
1858static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1859 struct kmem_cache_cpu *c)
1860{
1861#ifdef CONFIG_NUMA
1862 struct zonelist *zonelist;
1863 struct zoneref *z;
1864 struct zone *zone;
1865 enum zone_type high_zoneidx = gfp_zone(flags);
1866 void *object;
1867 unsigned int cpuset_mems_cookie;
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887 if (!s->remote_node_defrag_ratio ||
1888 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1889 return NULL;
1890
1891 do {
1892 cpuset_mems_cookie = read_mems_allowed_begin();
1893 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1894 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1895 struct kmem_cache_node *n;
1896
1897 n = get_node(s, zone_to_nid(zone));
1898
1899 if (n && cpuset_zone_allowed(zone, flags) &&
1900 n->nr_partial > s->min_partial) {
1901 object = get_partial_node(s, n, c, flags);
1902 if (object) {
1903
1904
1905
1906
1907
1908
1909
1910 return object;
1911 }
1912 }
1913 }
1914 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1915#endif
1916 return NULL;
1917}
1918
1919
1920
1921
1922static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1923 struct kmem_cache_cpu *c)
1924{
1925 void *object;
1926 int searchnode = node;
1927
1928 if (node == NUMA_NO_NODE)
1929 searchnode = numa_mem_id();
1930 else if (!node_present_pages(node))
1931 searchnode = node_to_mem_node(node);
1932
1933 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1934 if (object || node != NUMA_NO_NODE)
1935 return object;
1936
1937 return get_any_partial(s, flags, c);
1938}
1939
1940#ifdef CONFIG_PREEMPT
1941
1942
1943
1944
1945
1946#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1947#else
1948
1949
1950
1951
1952#define TID_STEP 1
1953#endif
1954
1955static inline unsigned long next_tid(unsigned long tid)
1956{
1957 return tid + TID_STEP;
1958}
1959
1960static inline unsigned int tid_to_cpu(unsigned long tid)
1961{
1962 return tid % TID_STEP;
1963}
1964
1965static inline unsigned long tid_to_event(unsigned long tid)
1966{
1967 return tid / TID_STEP;
1968}
1969
1970static inline unsigned int init_tid(int cpu)
1971{
1972 return cpu;
1973}
1974
1975static inline void note_cmpxchg_failure(const char *n,
1976 const struct kmem_cache *s, unsigned long tid)
1977{
1978#ifdef SLUB_DEBUG_CMPXCHG
1979 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1980
1981 pr_info("%s %s: cmpxchg redo ", n, s->name);
1982
1983#ifdef CONFIG_PREEMPT
1984 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1985 pr_warn("due to cpu change %d -> %d\n",
1986 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1987 else
1988#endif
1989 if (tid_to_event(tid) != tid_to_event(actual_tid))
1990 pr_warn("due to cpu running other code. Event %ld->%ld\n",
1991 tid_to_event(tid), tid_to_event(actual_tid));
1992 else
1993 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
1994 actual_tid, tid, next_tid(tid));
1995#endif
1996 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1997}
1998
1999static void init_kmem_cache_cpus(struct kmem_cache *s)
2000{
2001 int cpu;
2002
2003 for_each_possible_cpu(cpu)
2004 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
2005}
2006
2007
2008
2009
2010static void deactivate_slab(struct kmem_cache *s, struct page *page,
2011 void *freelist, struct kmem_cache_cpu *c)
2012{
2013 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
2014 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
2015 int lock = 0;
2016 enum slab_modes l = M_NONE, m = M_NONE;
2017 void *nextfree;
2018 int tail = DEACTIVATE_TO_HEAD;
2019 struct page new;
2020 struct page old;
2021
2022 if (page->freelist) {
2023 stat(s, DEACTIVATE_REMOTE_FREES);
2024 tail = DEACTIVATE_TO_TAIL;
2025 }
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035 while (freelist && (nextfree = get_freepointer(s, freelist))) {
2036 void *prior;
2037 unsigned long counters;
2038
2039 do {
2040 prior = page->freelist;
2041 counters = page->counters;
2042 set_freepointer(s, freelist, prior);
2043 new.counters = counters;
2044 new.inuse--;
2045 VM_BUG_ON(!new.frozen);
2046
2047 } while (!__cmpxchg_double_slab(s, page,
2048 prior, counters,
2049 freelist, new.counters,
2050 "drain percpu freelist"));
2051
2052 freelist = nextfree;
2053 }
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069redo:
2070
2071 old.freelist = page->freelist;
2072 old.counters = page->counters;
2073 VM_BUG_ON(!old.frozen);
2074
2075
2076 new.counters = old.counters;
2077 if (freelist) {
2078 new.inuse--;
2079 set_freepointer(s, freelist, old.freelist);
2080 new.freelist = freelist;
2081 } else
2082 new.freelist = old.freelist;
2083
2084 new.frozen = 0;
2085
2086 if (!new.inuse && n->nr_partial >= s->min_partial)
2087 m = M_FREE;
2088 else if (new.freelist) {
2089 m = M_PARTIAL;
2090 if (!lock) {
2091 lock = 1;
2092
2093
2094
2095
2096
2097 spin_lock(&n->list_lock);
2098 }
2099 } else {
2100 m = M_FULL;
2101 if (kmem_cache_debug(s) && !lock) {
2102 lock = 1;
2103
2104
2105
2106
2107
2108 spin_lock(&n->list_lock);
2109 }
2110 }
2111
2112 if (l != m) {
2113
2114 if (l == M_PARTIAL)
2115
2116 remove_partial(n, page);
2117
2118 else if (l == M_FULL)
2119
2120 remove_full(s, n, page);
2121
2122 if (m == M_PARTIAL) {
2123
2124 add_partial(n, page, tail);
2125 stat(s, tail);
2126
2127 } else if (m == M_FULL) {
2128
2129 stat(s, DEACTIVATE_FULL);
2130 add_full(s, n, page);
2131
2132 }
2133 }
2134
2135 l = m;
2136 if (!__cmpxchg_double_slab(s, page,
2137 old.freelist, old.counters,
2138 new.freelist, new.counters,
2139 "unfreezing slab"))
2140 goto redo;
2141
2142 if (lock)
2143 spin_unlock(&n->list_lock);
2144
2145 if (m == M_FREE) {
2146 stat(s, DEACTIVATE_EMPTY);
2147 discard_slab(s, page);
2148 stat(s, FREE_SLAB);
2149 }
2150
2151 c->page = NULL;
2152 c->freelist = NULL;
2153}
2154
2155
2156
2157
2158
2159
2160
2161
2162static void unfreeze_partials(struct kmem_cache *s,
2163 struct kmem_cache_cpu *c)
2164{
2165#ifdef CONFIG_SLUB_CPU_PARTIAL
2166 struct kmem_cache_node *n = NULL, *n2 = NULL;
2167 struct page *page, *discard_page = NULL;
2168
2169 while ((page = c->partial)) {
2170 struct page new;
2171 struct page old;
2172
2173 c->partial = page->next;
2174
2175 n2 = get_node(s, page_to_nid(page));
2176 if (n != n2) {
2177 if (n)
2178 spin_unlock(&n->list_lock);
2179
2180 n = n2;
2181 spin_lock(&n->list_lock);
2182 }
2183
2184 do {
2185
2186 old.freelist = page->freelist;
2187 old.counters = page->counters;
2188 VM_BUG_ON(!old.frozen);
2189
2190 new.counters = old.counters;
2191 new.freelist = old.freelist;
2192
2193 new.frozen = 0;
2194
2195 } while (!__cmpxchg_double_slab(s, page,
2196 old.freelist, old.counters,
2197 new.freelist, new.counters,
2198 "unfreezing slab"));
2199
2200 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2201 page->next = discard_page;
2202 discard_page = page;
2203 } else {
2204 add_partial(n, page, DEACTIVATE_TO_TAIL);
2205 stat(s, FREE_ADD_PARTIAL);
2206 }
2207 }
2208
2209 if (n)
2210 spin_unlock(&n->list_lock);
2211
2212 while (discard_page) {
2213 page = discard_page;
2214 discard_page = discard_page->next;
2215
2216 stat(s, DEACTIVATE_EMPTY);
2217 discard_slab(s, page);
2218 stat(s, FREE_SLAB);
2219 }
2220#endif
2221}
2222
2223
2224
2225
2226
2227
2228
2229
2230static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2231{
2232#ifdef CONFIG_SLUB_CPU_PARTIAL
2233 struct page *oldpage;
2234 int pages;
2235 int pobjects;
2236
2237 preempt_disable();
2238 do {
2239 pages = 0;
2240 pobjects = 0;
2241 oldpage = this_cpu_read(s->cpu_slab->partial);
2242
2243 if (oldpage) {
2244 pobjects = oldpage->pobjects;
2245 pages = oldpage->pages;
2246 if (drain && pobjects > s->cpu_partial) {
2247 unsigned long flags;
2248
2249
2250
2251
2252 local_irq_save(flags);
2253 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2254 local_irq_restore(flags);
2255 oldpage = NULL;
2256 pobjects = 0;
2257 pages = 0;
2258 stat(s, CPU_PARTIAL_DRAIN);
2259 }
2260 }
2261
2262 pages++;
2263 pobjects += page->objects - page->inuse;
2264
2265 page->pages = pages;
2266 page->pobjects = pobjects;
2267 page->next = oldpage;
2268
2269 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2270 != oldpage);
2271 if (unlikely(!s->cpu_partial)) {
2272 unsigned long flags;
2273
2274 local_irq_save(flags);
2275 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2276 local_irq_restore(flags);
2277 }
2278 preempt_enable();
2279#endif
2280}
2281
2282static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2283{
2284 stat(s, CPUSLAB_FLUSH);
2285 deactivate_slab(s, c->page, c->freelist, c);
2286
2287 c->tid = next_tid(c->tid);
2288}
2289
2290
2291
2292
2293
2294
2295static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2296{
2297 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2298
2299 if (likely(c)) {
2300 if (c->page)
2301 flush_slab(s, c);
2302
2303 unfreeze_partials(s, c);
2304 }
2305}
2306
2307static void flush_cpu_slab(void *d)
2308{
2309 struct kmem_cache *s = d;
2310
2311 __flush_cpu_slab(s, smp_processor_id());
2312}
2313
2314static bool has_cpu_slab(int cpu, void *info)
2315{
2316 struct kmem_cache *s = info;
2317 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2318
2319 return c->page || slub_percpu_partial(c);
2320}
2321
2322static void flush_all(struct kmem_cache *s)
2323{
2324 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2325}
2326
2327
2328
2329
2330
2331static int slub_cpu_dead(unsigned int cpu)
2332{
2333 struct kmem_cache *s;
2334 unsigned long flags;
2335
2336 mutex_lock(&slab_mutex);
2337 list_for_each_entry(s, &slab_caches, list) {
2338 local_irq_save(flags);
2339 __flush_cpu_slab(s, cpu);
2340 local_irq_restore(flags);
2341 }
2342 mutex_unlock(&slab_mutex);
2343 return 0;
2344}
2345
2346
2347
2348
2349
2350static inline int node_match(struct page *page, int node)
2351{
2352#ifdef CONFIG_NUMA
2353 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2354 return 0;
2355#endif
2356 return 1;
2357}
2358
2359#ifdef CONFIG_SLUB_DEBUG
2360static int count_free(struct page *page)
2361{
2362 return page->objects - page->inuse;
2363}
2364
2365static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2366{
2367 return atomic_long_read(&n->total_objects);
2368}
2369#endif
2370
2371#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2372static unsigned long count_partial(struct kmem_cache_node *n,
2373 int (*get_count)(struct page *))
2374{
2375 unsigned long flags;
2376 unsigned long x = 0;
2377 struct page *page;
2378
2379 spin_lock_irqsave(&n->list_lock, flags);
2380 list_for_each_entry(page, &n->partial, lru)
2381 x += get_count(page);
2382 spin_unlock_irqrestore(&n->list_lock, flags);
2383 return x;
2384}
2385#endif
2386
2387static noinline void
2388slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2389{
2390#ifdef CONFIG_SLUB_DEBUG
2391 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2392 DEFAULT_RATELIMIT_BURST);
2393 int node;
2394 struct kmem_cache_node *n;
2395
2396 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2397 return;
2398
2399 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2400 nid, gfpflags, &gfpflags);
2401 pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n",
2402 s->name, s->object_size, s->size, oo_order(s->oo),
2403 oo_order(s->min));
2404
2405 if (oo_order(s->min) > get_order(s->object_size))
2406 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2407 s->name);
2408
2409 for_each_kmem_cache_node(s, node, n) {
2410 unsigned long nr_slabs;
2411 unsigned long nr_objs;
2412 unsigned long nr_free;
2413
2414 nr_free = count_partial(n, count_free);
2415 nr_slabs = node_nr_slabs(n);
2416 nr_objs = node_nr_objs(n);
2417
2418 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2419 node, nr_slabs, nr_objs, nr_free);
2420 }
2421#endif
2422}
2423
2424static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2425 int node, struct kmem_cache_cpu **pc)
2426{
2427 void *freelist;
2428 struct kmem_cache_cpu *c = *pc;
2429 struct page *page;
2430
2431 freelist = get_partial(s, flags, node, c);
2432
2433 if (freelist)
2434 return freelist;
2435
2436 page = new_slab(s, flags, node);
2437 if (page) {
2438 c = raw_cpu_ptr(s->cpu_slab);
2439 if (c->page)
2440 flush_slab(s, c);
2441
2442
2443
2444
2445
2446 freelist = page->freelist;
2447 page->freelist = NULL;
2448
2449 stat(s, ALLOC_SLAB);
2450 c->page = page;
2451 *pc = c;
2452 } else
2453 freelist = NULL;
2454
2455 return freelist;
2456}
2457
2458static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2459{
2460 if (unlikely(PageSlabPfmemalloc(page)))
2461 return gfp_pfmemalloc_allowed(gfpflags);
2462
2463 return true;
2464}
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2477{
2478 struct page new;
2479 unsigned long counters;
2480 void *freelist;
2481
2482 do {
2483 freelist = page->freelist;
2484 counters = page->counters;
2485
2486 new.counters = counters;
2487 VM_BUG_ON(!new.frozen);
2488
2489 new.inuse = page->objects;
2490 new.frozen = freelist != NULL;
2491
2492 } while (!__cmpxchg_double_slab(s, page,
2493 freelist, counters,
2494 NULL, new.counters,
2495 "get_freelist"));
2496
2497 return freelist;
2498}
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2520 unsigned long addr, struct kmem_cache_cpu *c)
2521{
2522 void *freelist;
2523 struct page *page;
2524
2525 page = c->page;
2526 if (!page)
2527 goto new_slab;
2528redo:
2529
2530 if (unlikely(!node_match(page, node))) {
2531 int searchnode = node;
2532
2533 if (node != NUMA_NO_NODE && !node_present_pages(node))
2534 searchnode = node_to_mem_node(node);
2535
2536 if (unlikely(!node_match(page, searchnode))) {
2537 stat(s, ALLOC_NODE_MISMATCH);
2538 deactivate_slab(s, page, c->freelist, c);
2539 goto new_slab;
2540 }
2541 }
2542
2543
2544
2545
2546
2547
2548 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2549 deactivate_slab(s, page, c->freelist, c);
2550 goto new_slab;
2551 }
2552
2553
2554 freelist = c->freelist;
2555 if (freelist)
2556 goto load_freelist;
2557
2558 freelist = get_freelist(s, page);
2559
2560 if (!freelist) {
2561 c->page = NULL;
2562 stat(s, DEACTIVATE_BYPASS);
2563 goto new_slab;
2564 }
2565
2566 stat(s, ALLOC_REFILL);
2567
2568load_freelist:
2569
2570
2571
2572
2573
2574 VM_BUG_ON(!c->page->frozen);
2575 c->freelist = get_freepointer(s, freelist);
2576 c->tid = next_tid(c->tid);
2577 return freelist;
2578
2579new_slab:
2580
2581 if (slub_percpu_partial(c)) {
2582 page = c->page = slub_percpu_partial(c);
2583 slub_set_percpu_partial(c, page);
2584 stat(s, CPU_PARTIAL_ALLOC);
2585 goto redo;
2586 }
2587
2588 freelist = new_slab_objects(s, gfpflags, node, &c);
2589
2590 if (unlikely(!freelist)) {
2591 slab_out_of_memory(s, gfpflags, node);
2592 return NULL;
2593 }
2594
2595 page = c->page;
2596 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2597 goto load_freelist;
2598
2599
2600 if (kmem_cache_debug(s) &&
2601 !alloc_debug_processing(s, page, freelist, addr))
2602 goto new_slab;
2603
2604 deactivate_slab(s, page, get_freepointer(s, freelist), c);
2605 return freelist;
2606}
2607
2608
2609
2610
2611
2612static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2613 unsigned long addr, struct kmem_cache_cpu *c)
2614{
2615 void *p;
2616 unsigned long flags;
2617
2618 local_irq_save(flags);
2619#ifdef CONFIG_PREEMPT
2620
2621
2622
2623
2624
2625 c = this_cpu_ptr(s->cpu_slab);
2626#endif
2627
2628 p = ___slab_alloc(s, gfpflags, node, addr, c);
2629 local_irq_restore(flags);
2630 return p;
2631}
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2644 gfp_t gfpflags, int node, unsigned long addr)
2645{
2646 void *object;
2647 struct kmem_cache_cpu *c;
2648 struct page *page;
2649 unsigned long tid;
2650
2651 s = slab_pre_alloc_hook(s, gfpflags);
2652 if (!s)
2653 return NULL;
2654redo:
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 do {
2666 tid = this_cpu_read(s->cpu_slab->tid);
2667 c = raw_cpu_ptr(s->cpu_slab);
2668 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2669 unlikely(tid != READ_ONCE(c->tid)));
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679 barrier();
2680
2681
2682
2683
2684
2685
2686
2687
2688 object = c->freelist;
2689 page = c->page;
2690 if (unlikely(!object || !node_match(page, node))) {
2691 object = __slab_alloc(s, gfpflags, node, addr, c);
2692 stat(s, ALLOC_SLOWPATH);
2693 } else {
2694 void *next_object = get_freepointer_safe(s, object);
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710 if (unlikely(!this_cpu_cmpxchg_double(
2711 s->cpu_slab->freelist, s->cpu_slab->tid,
2712 object, tid,
2713 next_object, next_tid(tid)))) {
2714
2715 note_cmpxchg_failure("slab_alloc", s, tid);
2716 goto redo;
2717 }
2718 prefetch_freepointer(s, next_object);
2719 stat(s, ALLOC_FASTPATH);
2720 }
2721
2722 if (unlikely(gfpflags & __GFP_ZERO) && object)
2723 memset(object, 0, s->object_size);
2724
2725 slab_post_alloc_hook(s, gfpflags, 1, &object);
2726
2727 return object;
2728}
2729
2730static __always_inline void *slab_alloc(struct kmem_cache *s,
2731 gfp_t gfpflags, unsigned long addr)
2732{
2733 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2734}
2735
2736void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2737{
2738 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2739
2740 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2741 s->size, gfpflags);
2742
2743 return ret;
2744}
2745EXPORT_SYMBOL(kmem_cache_alloc);
2746
2747#ifdef CONFIG_TRACING
2748void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2749{
2750 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2751 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2752 kasan_kmalloc(s, ret, size, gfpflags);
2753 return ret;
2754}
2755EXPORT_SYMBOL(kmem_cache_alloc_trace);
2756#endif
2757
2758#ifdef CONFIG_NUMA
2759void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2760{
2761 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2762
2763 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2764 s->object_size, s->size, gfpflags, node);
2765
2766 return ret;
2767}
2768EXPORT_SYMBOL(kmem_cache_alloc_node);
2769
2770#ifdef CONFIG_TRACING
2771void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2772 gfp_t gfpflags,
2773 int node, size_t size)
2774{
2775 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2776
2777 trace_kmalloc_node(_RET_IP_, ret,
2778 size, s->size, gfpflags, node);
2779
2780 kasan_kmalloc(s, ret, size, gfpflags);
2781 return ret;
2782}
2783EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2784#endif
2785#endif
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795static void __slab_free(struct kmem_cache *s, struct page *page,
2796 void *head, void *tail, int cnt,
2797 unsigned long addr)
2798
2799{
2800 void *prior;
2801 int was_frozen;
2802 struct page new;
2803 unsigned long counters;
2804 struct kmem_cache_node *n = NULL;
2805 unsigned long uninitialized_var(flags);
2806
2807 stat(s, FREE_SLOWPATH);
2808
2809 if (kmem_cache_debug(s) &&
2810 !free_debug_processing(s, page, head, tail, cnt, addr))
2811 return;
2812
2813 do {
2814 if (unlikely(n)) {
2815 spin_unlock_irqrestore(&n->list_lock, flags);
2816 n = NULL;
2817 }
2818 prior = page->freelist;
2819 counters = page->counters;
2820 set_freepointer(s, tail, prior);
2821 new.counters = counters;
2822 was_frozen = new.frozen;
2823 new.inuse -= cnt;
2824 if ((!new.inuse || !prior) && !was_frozen) {
2825
2826 if (kmem_cache_has_cpu_partial(s) && !prior) {
2827
2828
2829
2830
2831
2832
2833
2834 new.frozen = 1;
2835
2836 } else {
2837
2838 n = get_node(s, page_to_nid(page));
2839
2840
2841
2842
2843
2844
2845
2846
2847 spin_lock_irqsave(&n->list_lock, flags);
2848
2849 }
2850 }
2851
2852 } while (!cmpxchg_double_slab(s, page,
2853 prior, counters,
2854 head, new.counters,
2855 "__slab_free"));
2856
2857 if (likely(!n)) {
2858
2859
2860
2861
2862
2863 if (new.frozen && !was_frozen) {
2864 put_cpu_partial(s, page, 1);
2865 stat(s, CPU_PARTIAL_FREE);
2866 }
2867
2868
2869
2870
2871 if (was_frozen)
2872 stat(s, FREE_FROZEN);
2873 return;
2874 }
2875
2876 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2877 goto slab_empty;
2878
2879
2880
2881
2882
2883 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2884 if (kmem_cache_debug(s))
2885 remove_full(s, n, page);
2886 add_partial(n, page, DEACTIVATE_TO_TAIL);
2887 stat(s, FREE_ADD_PARTIAL);
2888 }
2889 spin_unlock_irqrestore(&n->list_lock, flags);
2890 return;
2891
2892slab_empty:
2893 if (prior) {
2894
2895
2896
2897 remove_partial(n, page);
2898 stat(s, FREE_REMOVE_PARTIAL);
2899 } else {
2900
2901 remove_full(s, n, page);
2902 }
2903
2904 spin_unlock_irqrestore(&n->list_lock, flags);
2905 stat(s, FREE_SLAB);
2906 discard_slab(s, page);
2907}
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924static __always_inline void do_slab_free(struct kmem_cache *s,
2925 struct page *page, void *head, void *tail,
2926 int cnt, unsigned long addr)
2927{
2928 void *tail_obj = tail ? : head;
2929 struct kmem_cache_cpu *c;
2930 unsigned long tid;
2931redo:
2932
2933
2934
2935
2936
2937
2938 do {
2939 tid = this_cpu_read(s->cpu_slab->tid);
2940 c = raw_cpu_ptr(s->cpu_slab);
2941 } while (IS_ENABLED(CONFIG_PREEMPT) &&
2942 unlikely(tid != READ_ONCE(c->tid)));
2943
2944
2945 barrier();
2946
2947 if (likely(page == c->page)) {
2948 set_freepointer(s, tail_obj, c->freelist);
2949
2950 if (unlikely(!this_cpu_cmpxchg_double(
2951 s->cpu_slab->freelist, s->cpu_slab->tid,
2952 c->freelist, tid,
2953 head, next_tid(tid)))) {
2954
2955 note_cmpxchg_failure("slab_free", s, tid);
2956 goto redo;
2957 }
2958 stat(s, FREE_FASTPATH);
2959 } else
2960 __slab_free(s, page, head, tail_obj, cnt, addr);
2961
2962}
2963
2964static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
2965 void *head, void *tail, int cnt,
2966 unsigned long addr)
2967{
2968 slab_free_freelist_hook(s, head, tail);
2969
2970
2971
2972
2973 if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
2974 return;
2975 do_slab_free(s, page, head, tail, cnt, addr);
2976}
2977
2978#ifdef CONFIG_KASAN
2979void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
2980{
2981 do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
2982}
2983#endif
2984
2985void kmem_cache_free(struct kmem_cache *s, void *x)
2986{
2987 s = cache_from_obj(s, x);
2988 if (!s)
2989 return;
2990 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
2991 trace_kmem_cache_free(_RET_IP_, x);
2992}
2993EXPORT_SYMBOL(kmem_cache_free);
2994
2995struct detached_freelist {
2996 struct page *page;
2997 void *tail;
2998 void *freelist;
2999 int cnt;
3000 struct kmem_cache *s;
3001};
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015static inline
3016int build_detached_freelist(struct kmem_cache *s, size_t size,
3017 void **p, struct detached_freelist *df)
3018{
3019 size_t first_skipped_index = 0;
3020 int lookahead = 3;
3021 void *object;
3022 struct page *page;
3023
3024
3025 df->page = NULL;
3026
3027 do {
3028 object = p[--size];
3029
3030 } while (!object && size);
3031
3032 if (!object)
3033 return 0;
3034
3035 page = virt_to_head_page(object);
3036 if (!s) {
3037
3038 if (unlikely(!PageSlab(page))) {
3039 BUG_ON(!PageCompound(page));
3040 kfree_hook(object);
3041 __free_pages(page, compound_order(page));
3042 p[size] = NULL;
3043 return size;
3044 }
3045
3046 df->s = page->slab_cache;
3047 } else {
3048 df->s = cache_from_obj(s, object);
3049 }
3050
3051
3052 df->page = page;
3053 set_freepointer(df->s, object, NULL);
3054 df->tail = object;
3055 df->freelist = object;
3056 p[size] = NULL;
3057 df->cnt = 1;
3058
3059 while (size) {
3060 object = p[--size];
3061 if (!object)
3062 continue;
3063
3064
3065 if (df->page == virt_to_head_page(object)) {
3066
3067 set_freepointer(df->s, object, df->freelist);
3068 df->freelist = object;
3069 df->cnt++;
3070 p[size] = NULL;
3071
3072 continue;
3073 }
3074
3075
3076 if (!--lookahead)
3077 break;
3078
3079 if (!first_skipped_index)
3080 first_skipped_index = size + 1;
3081 }
3082
3083 return first_skipped_index;
3084}
3085
3086
3087void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3088{
3089 if (WARN_ON(!size))
3090 return;
3091
3092 do {
3093 struct detached_freelist df;
3094
3095 size = build_detached_freelist(s, size, p, &df);
3096 if (!df.page)
3097 continue;
3098
3099 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
3100 } while (likely(size));
3101}
3102EXPORT_SYMBOL(kmem_cache_free_bulk);
3103
3104
3105int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3106 void **p)
3107{
3108 struct kmem_cache_cpu *c;
3109 int i;
3110
3111
3112 s = slab_pre_alloc_hook(s, flags);
3113 if (unlikely(!s))
3114 return false;
3115
3116
3117
3118
3119
3120 local_irq_disable();
3121 c = this_cpu_ptr(s->cpu_slab);
3122
3123 for (i = 0; i < size; i++) {
3124 void *object = c->freelist;
3125
3126 if (unlikely(!object)) {
3127
3128
3129
3130
3131 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3132 _RET_IP_, c);
3133 if (unlikely(!p[i]))
3134 goto error;
3135
3136 c = this_cpu_ptr(s->cpu_slab);
3137 continue;
3138 }
3139 c->freelist = get_freepointer(s, object);
3140 p[i] = object;
3141 }
3142 c->tid = next_tid(c->tid);
3143 local_irq_enable();
3144
3145
3146 if (unlikely(flags & __GFP_ZERO)) {
3147 int j;
3148
3149 for (j = 0; j < i; j++)
3150 memset(p[j], 0, s->object_size);
3151 }
3152
3153
3154 slab_post_alloc_hook(s, flags, size, p);
3155 return i;
3156error:
3157 local_irq_enable();
3158 slab_post_alloc_hook(s, flags, i, p);
3159 __kmem_cache_free_bulk(s, i, p);
3160 return 0;
3161}
3162EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184static int slub_min_order;
3185static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3186static int slub_min_objects;
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213static inline int slab_order(int size, int min_objects,
3214 int max_order, int fract_leftover, int reserved)
3215{
3216 int order;
3217 int rem;
3218 int min_order = slub_min_order;
3219
3220 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
3221 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3222
3223 for (order = max(min_order, get_order(min_objects * size + reserved));
3224 order <= max_order; order++) {
3225
3226 unsigned long slab_size = PAGE_SIZE << order;
3227
3228 rem = (slab_size - reserved) % size;
3229
3230 if (rem <= slab_size / fract_leftover)
3231 break;
3232 }
3233
3234 return order;
3235}
3236
3237static inline int calculate_order(int size, int reserved)
3238{
3239 int order;
3240 int min_objects;
3241 int fraction;
3242 int max_objects;
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252 min_objects = slub_min_objects;
3253 if (!min_objects)
3254 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3255 max_objects = order_objects(slub_max_order, size, reserved);
3256 min_objects = min(min_objects, max_objects);
3257
3258 while (min_objects > 1) {
3259 fraction = 16;
3260 while (fraction >= 4) {
3261 order = slab_order(size, min_objects,
3262 slub_max_order, fraction, reserved);
3263 if (order <= slub_max_order)
3264 return order;
3265 fraction /= 2;
3266 }
3267 min_objects--;
3268 }
3269
3270
3271
3272
3273
3274 order = slab_order(size, 1, slub_max_order, 1, reserved);
3275 if (order <= slub_max_order)
3276 return order;
3277
3278
3279
3280
3281 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
3282 if (order < MAX_ORDER)
3283 return order;
3284 return -ENOSYS;
3285}
3286
3287static void
3288init_kmem_cache_node(struct kmem_cache_node *n)
3289{
3290 n->nr_partial = 0;
3291 spin_lock_init(&n->list_lock);
3292 INIT_LIST_HEAD(&n->partial);
3293#ifdef CONFIG_SLUB_DEBUG
3294 atomic_long_set(&n->nr_slabs, 0);
3295 atomic_long_set(&n->total_objects, 0);
3296 INIT_LIST_HEAD(&n->full);
3297#endif
3298}
3299
3300static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3301{
3302 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3303 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3304
3305
3306
3307
3308
3309 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3310 2 * sizeof(void *));
3311
3312 if (!s->cpu_slab)
3313 return 0;
3314
3315 init_kmem_cache_cpus(s);
3316
3317 return 1;
3318}
3319
3320static struct kmem_cache *kmem_cache_node;
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331static void early_kmem_cache_node_alloc(int node)
3332{
3333 struct page *page;
3334 struct kmem_cache_node *n;
3335
3336 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3337
3338 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3339
3340 BUG_ON(!page);
3341 if (page_to_nid(page) != node) {
3342 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3343 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3344 }
3345
3346 n = page->freelist;
3347 BUG_ON(!n);
3348 page->freelist = get_freepointer(kmem_cache_node, n);
3349 page->inuse = 1;
3350 page->frozen = 0;
3351 kmem_cache_node->node[node] = n;
3352#ifdef CONFIG_SLUB_DEBUG
3353 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3354 init_tracking(kmem_cache_node, n);
3355#endif
3356 kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
3357 GFP_KERNEL);
3358 init_kmem_cache_node(n);
3359 inc_slabs_node(kmem_cache_node, node, page->objects);
3360
3361
3362
3363
3364
3365 __add_partial(n, page, DEACTIVATE_TO_HEAD);
3366}
3367
3368static void free_kmem_cache_nodes(struct kmem_cache *s)
3369{
3370 int node;
3371 struct kmem_cache_node *n;
3372
3373 for_each_kmem_cache_node(s, node, n) {
3374 s->node[node] = NULL;
3375 kmem_cache_free(kmem_cache_node, n);
3376 }
3377}
3378
3379void __kmem_cache_release(struct kmem_cache *s)
3380{
3381 cache_random_seq_destroy(s);
3382 free_percpu(s->cpu_slab);
3383 free_kmem_cache_nodes(s);
3384}
3385
3386static int init_kmem_cache_nodes(struct kmem_cache *s)
3387{
3388 int node;
3389
3390 for_each_node_state(node, N_NORMAL_MEMORY) {
3391 struct kmem_cache_node *n;
3392
3393 if (slab_state == DOWN) {
3394 early_kmem_cache_node_alloc(node);
3395 continue;
3396 }
3397 n = kmem_cache_alloc_node(kmem_cache_node,
3398 GFP_KERNEL, node);
3399
3400 if (!n) {
3401 free_kmem_cache_nodes(s);
3402 return 0;
3403 }
3404
3405 init_kmem_cache_node(n);
3406 s->node[node] = n;
3407 }
3408 return 1;
3409}
3410
3411static void set_min_partial(struct kmem_cache *s, unsigned long min)
3412{
3413 if (min < MIN_PARTIAL)
3414 min = MIN_PARTIAL;
3415 else if (min > MAX_PARTIAL)
3416 min = MAX_PARTIAL;
3417 s->min_partial = min;
3418}
3419
3420static void set_cpu_partial(struct kmem_cache *s)
3421{
3422#ifdef CONFIG_SLUB_CPU_PARTIAL
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440 if (!kmem_cache_has_cpu_partial(s))
3441 s->cpu_partial = 0;
3442 else if (s->size >= PAGE_SIZE)
3443 s->cpu_partial = 2;
3444 else if (s->size >= 1024)
3445 s->cpu_partial = 6;
3446 else if (s->size >= 256)
3447 s->cpu_partial = 13;
3448 else
3449 s->cpu_partial = 30;
3450#endif
3451}
3452
3453
3454
3455
3456
3457static int calculate_sizes(struct kmem_cache *s, int forced_order)
3458{
3459 slab_flags_t flags = s->flags;
3460 size_t size = s->object_size;
3461 int order;
3462
3463
3464
3465
3466
3467
3468 size = ALIGN(size, sizeof(void *));
3469
3470#ifdef CONFIG_SLUB_DEBUG
3471
3472
3473
3474
3475
3476 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
3477 !s->ctor)
3478 s->flags |= __OBJECT_POISON;
3479 else
3480 s->flags &= ~__OBJECT_POISON;
3481
3482
3483
3484
3485
3486
3487
3488 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3489 size += sizeof(void *);
3490#endif
3491
3492
3493
3494
3495
3496 s->inuse = size;
3497
3498 if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
3499 s->ctor)) {
3500
3501
3502
3503
3504
3505
3506
3507
3508 s->offset = size;
3509 size += sizeof(void *);
3510 }
3511
3512#ifdef CONFIG_SLUB_DEBUG
3513 if (flags & SLAB_STORE_USER)
3514
3515
3516
3517
3518 size += 2 * sizeof(struct track);
3519#endif
3520
3521 kasan_cache_create(s, &size, &s->flags);
3522#ifdef CONFIG_SLUB_DEBUG
3523 if (flags & SLAB_RED_ZONE) {
3524
3525
3526
3527
3528
3529
3530
3531 size += sizeof(void *);
3532
3533 s->red_left_pad = sizeof(void *);
3534 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3535 size += s->red_left_pad;
3536 }
3537#endif
3538
3539
3540
3541
3542
3543
3544 size = ALIGN(size, s->align);
3545 s->size = size;
3546 if (forced_order >= 0)
3547 order = forced_order;
3548 else
3549 order = calculate_order(size, s->reserved);
3550
3551 if (order < 0)
3552 return 0;
3553
3554 s->allocflags = 0;
3555 if (order)
3556 s->allocflags |= __GFP_COMP;
3557
3558 if (s->flags & SLAB_CACHE_DMA)
3559 s->allocflags |= GFP_DMA;
3560
3561 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3562 s->allocflags |= __GFP_RECLAIMABLE;
3563
3564
3565
3566
3567 s->oo = oo_make(order, size, s->reserved);
3568 s->min = oo_make(get_order(size), size, s->reserved);
3569 if (oo_objects(s->oo) > oo_objects(s->max))
3570 s->max = s->oo;
3571
3572 return !!oo_objects(s->oo);
3573}
3574
3575static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
3576{
3577 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3578 s->reserved = 0;
3579#ifdef CONFIG_SLAB_FREELIST_HARDENED
3580 s->random = get_random_long();
3581#endif
3582
3583 if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU))
3584 s->reserved = sizeof(struct rcu_head);
3585
3586 if (!calculate_sizes(s, -1))
3587 goto error;
3588 if (disable_higher_order_debug) {
3589
3590
3591
3592
3593 if (get_order(s->size) > get_order(s->object_size)) {
3594 s->flags &= ~DEBUG_METADATA_FLAGS;
3595 s->offset = 0;
3596 if (!calculate_sizes(s, -1))
3597 goto error;
3598 }
3599 }
3600
3601#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3602 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3603 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3604
3605 s->flags |= __CMPXCHG_DOUBLE;
3606#endif
3607
3608
3609
3610
3611
3612 set_min_partial(s, ilog2(s->size) / 2);
3613
3614 set_cpu_partial(s);
3615
3616#ifdef CONFIG_NUMA
3617 s->remote_node_defrag_ratio = 1000;
3618#endif
3619
3620
3621 if (slab_state >= UP) {
3622 if (init_cache_random_seq(s))
3623 goto error;
3624 }
3625
3626 if (!init_kmem_cache_nodes(s))
3627 goto error;
3628
3629 if (alloc_kmem_cache_cpus(s))
3630 return 0;
3631
3632 free_kmem_cache_nodes(s);
3633error:
3634 if (flags & SLAB_PANIC)
3635 panic("Cannot create slab %s size=%lu realsize=%u order=%u offset=%u flags=%lx\n",
3636 s->name, (unsigned long)s->size, s->size,
3637 oo_order(s->oo), s->offset, (unsigned long)flags);
3638 return -EINVAL;
3639}
3640
3641static void list_slab_objects(struct kmem_cache *s, struct page *page,
3642 const char *text)
3643{
3644#ifdef CONFIG_SLUB_DEBUG
3645 void *addr = page_address(page);
3646 void *p;
3647 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3648 sizeof(long), GFP_ATOMIC);
3649 if (!map)
3650 return;
3651 slab_err(s, page, text, s->name);
3652 slab_lock(page);
3653
3654 get_map(s, page, map);
3655 for_each_object(p, s, addr, page->objects) {
3656
3657 if (!test_bit(slab_index(p, s, addr), map)) {
3658 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3659 print_tracking(s, p);
3660 }
3661 }
3662 slab_unlock(page);
3663 kfree(map);
3664#endif
3665}
3666
3667
3668
3669
3670
3671
3672static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3673{
3674 LIST_HEAD(discard);
3675 struct page *page, *h;
3676
3677 BUG_ON(irqs_disabled());
3678 spin_lock_irq(&n->list_lock);
3679 list_for_each_entry_safe(page, h, &n->partial, lru) {
3680 if (!page->inuse) {
3681 remove_partial(n, page);
3682 list_add(&page->lru, &discard);
3683 } else {
3684 list_slab_objects(s, page,
3685 "Objects remaining in %s on __kmem_cache_shutdown()");
3686 }
3687 }
3688 spin_unlock_irq(&n->list_lock);
3689
3690 list_for_each_entry_safe(page, h, &discard, lru)
3691 discard_slab(s, page);
3692}
3693
3694
3695
3696
3697int __kmem_cache_shutdown(struct kmem_cache *s)
3698{
3699 int node;
3700 struct kmem_cache_node *n;
3701
3702 flush_all(s);
3703
3704 for_each_kmem_cache_node(s, node, n) {
3705 free_partial(s, n);
3706 if (n->nr_partial || slabs_node(s, node))
3707 return 1;
3708 }
3709 sysfs_slab_remove(s);
3710 return 0;
3711}
3712
3713
3714
3715
3716
3717static int __init setup_slub_min_order(char *str)
3718{
3719 get_option(&str, &slub_min_order);
3720
3721 return 1;
3722}
3723
3724__setup("slub_min_order=", setup_slub_min_order);
3725
3726static int __init setup_slub_max_order(char *str)
3727{
3728 get_option(&str, &slub_max_order);
3729 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3730
3731 return 1;
3732}
3733
3734__setup("slub_max_order=", setup_slub_max_order);
3735
3736static int __init setup_slub_min_objects(char *str)
3737{
3738 get_option(&str, &slub_min_objects);
3739
3740 return 1;
3741}
3742
3743__setup("slub_min_objects=", setup_slub_min_objects);
3744
3745void *__kmalloc(size_t size, gfp_t flags)
3746{
3747 struct kmem_cache *s;
3748 void *ret;
3749
3750 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3751 return kmalloc_large(size, flags);
3752
3753 s = kmalloc_slab(size, flags);
3754
3755 if (unlikely(ZERO_OR_NULL_PTR(s)))
3756 return s;
3757
3758 ret = slab_alloc(s, flags, _RET_IP_);
3759
3760 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3761
3762 kasan_kmalloc(s, ret, size, flags);
3763
3764 return ret;
3765}
3766EXPORT_SYMBOL(__kmalloc);
3767
3768#ifdef CONFIG_NUMA
3769static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3770{
3771 struct page *page;
3772 void *ptr = NULL;
3773
3774 flags |= __GFP_COMP;
3775 page = alloc_pages_node(node, flags, get_order(size));
3776 if (page)
3777 ptr = page_address(page);
3778
3779 kmalloc_large_node_hook(ptr, size, flags);
3780 return ptr;
3781}
3782
3783void *__kmalloc_node(size_t size, gfp_t flags, int node)
3784{
3785 struct kmem_cache *s;
3786 void *ret;
3787
3788 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3789 ret = kmalloc_large_node(size, flags, node);
3790
3791 trace_kmalloc_node(_RET_IP_, ret,
3792 size, PAGE_SIZE << get_order(size),
3793 flags, node);
3794
3795 return ret;
3796 }
3797
3798 s = kmalloc_slab(size, flags);
3799
3800 if (unlikely(ZERO_OR_NULL_PTR(s)))
3801 return s;
3802
3803 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3804
3805 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3806
3807 kasan_kmalloc(s, ret, size, flags);
3808
3809 return ret;
3810}
3811EXPORT_SYMBOL(__kmalloc_node);
3812#endif
3813
3814#ifdef CONFIG_HARDENED_USERCOPY
3815
3816
3817
3818
3819
3820
3821
3822
3823void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
3824 bool to_user)
3825{
3826 struct kmem_cache *s;
3827 unsigned long offset;
3828 size_t object_size;
3829
3830
3831 s = page->slab_cache;
3832
3833
3834 if (ptr < page_address(page))
3835 usercopy_abort("SLUB object not in SLUB page?!", NULL,
3836 to_user, 0, n);
3837
3838
3839 offset = (ptr - page_address(page)) % s->size;
3840
3841
3842 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3843 if (offset < s->red_left_pad)
3844 usercopy_abort("SLUB object in left red zone",
3845 s->name, to_user, offset, n);
3846 offset -= s->red_left_pad;
3847 }
3848
3849
3850 if (offset >= s->useroffset &&
3851 offset - s->useroffset <= s->usersize &&
3852 n <= s->useroffset - offset + s->usersize)
3853 return;
3854
3855
3856
3857
3858
3859
3860
3861 object_size = slab_ksize(s);
3862 if (usercopy_fallback &&
3863 offset <= object_size && n <= object_size - offset) {
3864 usercopy_warn("SLUB object", s->name, to_user, offset, n);
3865 return;
3866 }
3867
3868 usercopy_abort("SLUB object", s->name, to_user, offset, n);
3869}
3870#endif
3871
3872static size_t __ksize(const void *object)
3873{
3874 struct page *page;
3875
3876 if (unlikely(object == ZERO_SIZE_PTR))
3877 return 0;
3878
3879 page = virt_to_head_page(object);
3880
3881 if (unlikely(!PageSlab(page))) {
3882 WARN_ON(!PageCompound(page));
3883 return PAGE_SIZE << compound_order(page);
3884 }
3885
3886 return slab_ksize(page->slab_cache);
3887}
3888
3889size_t ksize(const void *object)
3890{
3891 size_t size = __ksize(object);
3892
3893
3894
3895 kasan_unpoison_shadow(object, size);
3896 return size;
3897}
3898EXPORT_SYMBOL(ksize);
3899
3900void kfree(const void *x)
3901{
3902 struct page *page;
3903 void *object = (void *)x;
3904
3905 trace_kfree(_RET_IP_, x);
3906
3907 if (unlikely(ZERO_OR_NULL_PTR(x)))
3908 return;
3909
3910 page = virt_to_head_page(x);
3911 if (unlikely(!PageSlab(page))) {
3912 BUG_ON(!PageCompound(page));
3913 kfree_hook(object);
3914 __free_pages(page, compound_order(page));
3915 return;
3916 }
3917 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3918}
3919EXPORT_SYMBOL(kfree);
3920
3921#define SHRINK_PROMOTE_MAX 32
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932int __kmem_cache_shrink(struct kmem_cache *s)
3933{
3934 int node;
3935 int i;
3936 struct kmem_cache_node *n;
3937 struct page *page;
3938 struct page *t;
3939 struct list_head discard;
3940 struct list_head promote[SHRINK_PROMOTE_MAX];
3941 unsigned long flags;
3942 int ret = 0;
3943
3944 flush_all(s);
3945 for_each_kmem_cache_node(s, node, n) {
3946 INIT_LIST_HEAD(&discard);
3947 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
3948 INIT_LIST_HEAD(promote + i);
3949
3950 spin_lock_irqsave(&n->list_lock, flags);
3951
3952
3953
3954
3955
3956
3957
3958 list_for_each_entry_safe(page, t, &n->partial, lru) {
3959 int free = page->objects - page->inuse;
3960
3961
3962 barrier();
3963
3964
3965 BUG_ON(free <= 0);
3966
3967 if (free == page->objects) {
3968 list_move(&page->lru, &discard);
3969 n->nr_partial--;
3970 } else if (free <= SHRINK_PROMOTE_MAX)
3971 list_move(&page->lru, promote + free - 1);
3972 }
3973
3974
3975
3976
3977
3978 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
3979 list_splice(promote + i, &n->partial);
3980
3981 spin_unlock_irqrestore(&n->list_lock, flags);
3982
3983
3984 list_for_each_entry_safe(page, t, &discard, lru)
3985 discard_slab(s, page);
3986
3987 if (slabs_node(s, node))
3988 ret = 1;
3989 }
3990
3991 return ret;
3992}
3993
3994#ifdef CONFIG_MEMCG
3995static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
3996{
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009 if (!__kmem_cache_shrink(s))
4010 sysfs_slab_remove(s);
4011}
4012
4013void __kmemcg_cache_deactivate(struct kmem_cache *s)
4014{
4015
4016
4017
4018
4019 slub_set_cpu_partial(s, 0);
4020 s->min_partial = 0;
4021
4022
4023
4024
4025
4026 slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
4027}
4028#endif
4029
4030static int slab_mem_going_offline_callback(void *arg)
4031{
4032 struct kmem_cache *s;
4033
4034 mutex_lock(&slab_mutex);
4035 list_for_each_entry(s, &slab_caches, list)
4036 __kmem_cache_shrink(s);
4037 mutex_unlock(&slab_mutex);
4038
4039 return 0;
4040}
4041
4042static void slab_mem_offline_callback(void *arg)
4043{
4044 struct kmem_cache_node *n;
4045 struct kmem_cache *s;
4046 struct memory_notify *marg = arg;
4047 int offline_node;
4048
4049 offline_node = marg->status_change_nid_normal;
4050
4051
4052
4053
4054
4055 if (offline_node < 0)
4056 return;
4057
4058 mutex_lock(&slab_mutex);
4059 list_for_each_entry(s, &slab_caches, list) {
4060 n = get_node(s, offline_node);
4061 if (n) {
4062
4063
4064
4065
4066
4067
4068 BUG_ON(slabs_node(s, offline_node));
4069
4070 s->node[offline_node] = NULL;
4071 kmem_cache_free(kmem_cache_node, n);
4072 }
4073 }
4074 mutex_unlock(&slab_mutex);
4075}
4076
4077static int slab_mem_going_online_callback(void *arg)
4078{
4079 struct kmem_cache_node *n;
4080 struct kmem_cache *s;
4081 struct memory_notify *marg = arg;
4082 int nid = marg->status_change_nid_normal;
4083 int ret = 0;
4084
4085
4086
4087
4088
4089 if (nid < 0)
4090 return 0;
4091
4092
4093
4094
4095
4096
4097 mutex_lock(&slab_mutex);
4098 list_for_each_entry(s, &slab_caches, list) {
4099
4100
4101
4102
4103
4104 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4105 if (!n) {
4106 ret = -ENOMEM;
4107 goto out;
4108 }
4109 init_kmem_cache_node(n);
4110 s->node[nid] = n;
4111 }
4112out:
4113 mutex_unlock(&slab_mutex);
4114 return ret;
4115}
4116
4117static int slab_memory_callback(struct notifier_block *self,
4118 unsigned long action, void *arg)
4119{
4120 int ret = 0;
4121
4122 switch (action) {
4123 case MEM_GOING_ONLINE:
4124 ret = slab_mem_going_online_callback(arg);
4125 break;
4126 case MEM_GOING_OFFLINE:
4127 ret = slab_mem_going_offline_callback(arg);
4128 break;
4129 case MEM_OFFLINE:
4130 case MEM_CANCEL_ONLINE:
4131 slab_mem_offline_callback(arg);
4132 break;
4133 case MEM_ONLINE:
4134 case MEM_CANCEL_OFFLINE:
4135 break;
4136 }
4137 if (ret)
4138 ret = notifier_from_errno(ret);
4139 else
4140 ret = NOTIFY_OK;
4141 return ret;
4142}
4143
4144static struct notifier_block slab_memory_callback_nb = {
4145 .notifier_call = slab_memory_callback,
4146 .priority = SLAB_CALLBACK_PRI,
4147};
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4160{
4161 int node;
4162 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4163 struct kmem_cache_node *n;
4164
4165 memcpy(s, static_cache, kmem_cache->object_size);
4166
4167
4168
4169
4170
4171
4172 __flush_cpu_slab(s, smp_processor_id());
4173 for_each_kmem_cache_node(s, node, n) {
4174 struct page *p;
4175
4176 list_for_each_entry(p, &n->partial, lru)
4177 p->slab_cache = s;
4178
4179#ifdef CONFIG_SLUB_DEBUG
4180 list_for_each_entry(p, &n->full, lru)
4181 p->slab_cache = s;
4182#endif
4183 }
4184 slab_init_memcg_params(s);
4185 list_add(&s->list, &slab_caches);
4186 memcg_link_cache(s);
4187 return s;
4188}
4189
4190void __init kmem_cache_init(void)
4191{
4192 static __initdata struct kmem_cache boot_kmem_cache,
4193 boot_kmem_cache_node;
4194
4195 if (debug_guardpage_minorder())
4196 slub_max_order = 0;
4197
4198 kmem_cache_node = &boot_kmem_cache_node;
4199 kmem_cache = &boot_kmem_cache;
4200
4201 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4202 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4203
4204 register_hotmemory_notifier(&slab_memory_callback_nb);
4205
4206
4207 slab_state = PARTIAL;
4208
4209 create_boot_cache(kmem_cache, "kmem_cache",
4210 offsetof(struct kmem_cache, node) +
4211 nr_node_ids * sizeof(struct kmem_cache_node *),
4212 SLAB_HWCACHE_ALIGN, 0, 0);
4213
4214 kmem_cache = bootstrap(&boot_kmem_cache);
4215
4216
4217
4218
4219
4220
4221 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4222
4223
4224 setup_kmalloc_cache_index_table();
4225 create_kmalloc_caches(0);
4226
4227
4228 init_freelist_randomization();
4229
4230 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4231 slub_cpu_dead);
4232
4233 pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%u, Nodes=%d\n",
4234 cache_line_size(),
4235 slub_min_order, slub_max_order, slub_min_objects,
4236 nr_cpu_ids, nr_node_ids);
4237}
4238
4239void __init kmem_cache_init_late(void)
4240{
4241}
4242
4243struct kmem_cache *
4244__kmem_cache_alias(const char *name, size_t size, size_t align,
4245 slab_flags_t flags, void (*ctor)(void *))
4246{
4247 struct kmem_cache *s, *c;
4248
4249 s = find_mergeable(size, align, flags, name, ctor);
4250 if (s) {
4251 s->refcount++;
4252
4253
4254
4255
4256
4257 s->object_size = max(s->object_size, (int)size);
4258 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
4259
4260 for_each_memcg_cache(c, s) {
4261 c->object_size = s->object_size;
4262 c->inuse = max_t(int, c->inuse,
4263 ALIGN(size, sizeof(void *)));
4264 }
4265
4266 if (sysfs_slab_alias(s, name)) {
4267 s->refcount--;
4268 s = NULL;
4269 }
4270 }
4271
4272 return s;
4273}
4274
4275int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4276{
4277 int err;
4278
4279 err = kmem_cache_open(s, flags);
4280 if (err)
4281 return err;
4282
4283
4284 if (slab_state <= UP)
4285 return 0;
4286
4287 memcg_propagate_slab_attrs(s);
4288 err = sysfs_slab_add(s);
4289 if (err)
4290 __kmem_cache_release(s);
4291
4292 return err;
4293}
4294
4295void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4296{
4297 struct kmem_cache *s;
4298 void *ret;
4299
4300 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4301 return kmalloc_large(size, gfpflags);
4302
4303 s = kmalloc_slab(size, gfpflags);
4304
4305 if (unlikely(ZERO_OR_NULL_PTR(s)))
4306 return s;
4307
4308 ret = slab_alloc(s, gfpflags, caller);
4309
4310
4311 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4312
4313 return ret;
4314}
4315
4316#ifdef CONFIG_NUMA
4317void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4318 int node, unsigned long caller)
4319{
4320 struct kmem_cache *s;
4321 void *ret;
4322
4323 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4324 ret = kmalloc_large_node(size, gfpflags, node);
4325
4326 trace_kmalloc_node(caller, ret,
4327 size, PAGE_SIZE << get_order(size),
4328 gfpflags, node);
4329
4330 return ret;
4331 }
4332
4333 s = kmalloc_slab(size, gfpflags);
4334
4335 if (unlikely(ZERO_OR_NULL_PTR(s)))
4336 return s;
4337
4338 ret = slab_alloc_node(s, gfpflags, node, caller);
4339
4340
4341 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4342
4343 return ret;
4344}
4345#endif
4346
4347#ifdef CONFIG_SYSFS
4348static int count_inuse(struct page *page)
4349{
4350 return page->inuse;
4351}
4352
4353static int count_total(struct page *page)
4354{
4355 return page->objects;
4356}
4357#endif
4358
4359#ifdef CONFIG_SLUB_DEBUG
4360static int validate_slab(struct kmem_cache *s, struct page *page,
4361 unsigned long *map)
4362{
4363 void *p;
4364 void *addr = page_address(page);
4365
4366 if (!check_slab(s, page) ||
4367 !on_freelist(s, page, NULL))
4368 return 0;
4369
4370
4371 bitmap_zero(map, page->objects);
4372
4373 get_map(s, page, map);
4374 for_each_object(p, s, addr, page->objects) {
4375 if (test_bit(slab_index(p, s, addr), map))
4376 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4377 return 0;
4378 }
4379
4380 for_each_object(p, s, addr, page->objects)
4381 if (!test_bit(slab_index(p, s, addr), map))
4382 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4383 return 0;
4384 return 1;
4385}
4386
4387static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4388 unsigned long *map)
4389{
4390 slab_lock(page);
4391 validate_slab(s, page, map);
4392 slab_unlock(page);
4393}
4394
4395static int validate_slab_node(struct kmem_cache *s,
4396 struct kmem_cache_node *n, unsigned long *map)
4397{
4398 unsigned long count = 0;
4399 struct page *page;
4400 unsigned long flags;
4401
4402 spin_lock_irqsave(&n->list_lock, flags);
4403
4404 list_for_each_entry(page, &n->partial, lru) {
4405 validate_slab_slab(s, page, map);
4406 count++;
4407 }
4408 if (count != n->nr_partial)
4409 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
4410 s->name, count, n->nr_partial);
4411
4412 if (!(s->flags & SLAB_STORE_USER))
4413 goto out;
4414
4415 list_for_each_entry(page, &n->full, lru) {
4416 validate_slab_slab(s, page, map);
4417 count++;
4418 }
4419 if (count != atomic_long_read(&n->nr_slabs))
4420 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
4421 s->name, count, atomic_long_read(&n->nr_slabs));
4422
4423out:
4424 spin_unlock_irqrestore(&n->list_lock, flags);
4425 return count;
4426}
4427
4428static long validate_slab_cache(struct kmem_cache *s)
4429{
4430 int node;
4431 unsigned long count = 0;
4432 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4433 sizeof(unsigned long), GFP_KERNEL);
4434 struct kmem_cache_node *n;
4435
4436 if (!map)
4437 return -ENOMEM;
4438
4439 flush_all(s);
4440 for_each_kmem_cache_node(s, node, n)
4441 count += validate_slab_node(s, n, map);
4442 kfree(map);
4443 return count;
4444}
4445
4446
4447
4448
4449
4450struct location {
4451 unsigned long count;
4452 unsigned long addr;
4453 long long sum_time;
4454 long min_time;
4455 long max_time;
4456 long min_pid;
4457 long max_pid;
4458 DECLARE_BITMAP(cpus, NR_CPUS);
4459 nodemask_t nodes;
4460};
4461
4462struct loc_track {
4463 unsigned long max;
4464 unsigned long count;
4465 struct location *loc;
4466};
4467
4468static void free_loc_track(struct loc_track *t)
4469{
4470 if (t->max)
4471 free_pages((unsigned long)t->loc,
4472 get_order(sizeof(struct location) * t->max));
4473}
4474
4475static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4476{
4477 struct location *l;
4478 int order;
4479
4480 order = get_order(sizeof(struct location) * max);
4481
4482 l = (void *)__get_free_pages(flags, order);
4483 if (!l)
4484 return 0;
4485
4486 if (t->count) {
4487 memcpy(l, t->loc, sizeof(struct location) * t->count);
4488 free_loc_track(t);
4489 }
4490 t->max = max;
4491 t->loc = l;
4492 return 1;
4493}
4494
4495static int add_location(struct loc_track *t, struct kmem_cache *s,
4496 const struct track *track)
4497{
4498 long start, end, pos;
4499 struct location *l;
4500 unsigned long caddr;
4501 unsigned long age = jiffies - track->when;
4502
4503 start = -1;
4504 end = t->count;
4505
4506 for ( ; ; ) {
4507 pos = start + (end - start + 1) / 2;
4508
4509
4510
4511
4512
4513 if (pos == end)
4514 break;
4515
4516 caddr = t->loc[pos].addr;
4517 if (track->addr == caddr) {
4518
4519 l = &t->loc[pos];
4520 l->count++;
4521 if (track->when) {
4522 l->sum_time += age;
4523 if (age < l->min_time)
4524 l->min_time = age;
4525 if (age > l->max_time)
4526 l->max_time = age;
4527
4528 if (track->pid < l->min_pid)
4529 l->min_pid = track->pid;
4530 if (track->pid > l->max_pid)
4531 l->max_pid = track->pid;
4532
4533 cpumask_set_cpu(track->cpu,
4534 to_cpumask(l->cpus));
4535 }
4536 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4537 return 1;
4538 }
4539
4540 if (track->addr < caddr)
4541 end = pos;
4542 else
4543 start = pos;
4544 }
4545
4546
4547
4548
4549 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4550 return 0;
4551
4552 l = t->loc + pos;
4553 if (pos < t->count)
4554 memmove(l + 1, l,
4555 (t->count - pos) * sizeof(struct location));
4556 t->count++;
4557 l->count = 1;
4558 l->addr = track->addr;
4559 l->sum_time = age;
4560 l->min_time = age;
4561 l->max_time = age;
4562 l->min_pid = track->pid;
4563 l->max_pid = track->pid;
4564 cpumask_clear(to_cpumask(l->cpus));
4565 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4566 nodes_clear(l->nodes);
4567 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4568 return 1;
4569}
4570
4571static void process_slab(struct loc_track *t, struct kmem_cache *s,
4572 struct page *page, enum track_item alloc,
4573 unsigned long *map)
4574{
4575 void *addr = page_address(page);
4576 void *p;
4577
4578 bitmap_zero(map, page->objects);
4579 get_map(s, page, map);
4580
4581 for_each_object(p, s, addr, page->objects)
4582 if (!test_bit(slab_index(p, s, addr), map))
4583 add_location(t, s, get_track(s, p, alloc));
4584}
4585
4586static int list_locations(struct kmem_cache *s, char *buf,
4587 enum track_item alloc)
4588{
4589 int len = 0;
4590 unsigned long i;
4591 struct loc_track t = { 0, 0, NULL };
4592 int node;
4593 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4594 sizeof(unsigned long), GFP_KERNEL);
4595 struct kmem_cache_node *n;
4596
4597 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4598 GFP_KERNEL)) {
4599 kfree(map);
4600 return sprintf(buf, "Out of memory\n");
4601 }
4602
4603 flush_all(s);
4604
4605 for_each_kmem_cache_node(s, node, n) {
4606 unsigned long flags;
4607 struct page *page;
4608
4609 if (!atomic_long_read(&n->nr_slabs))
4610 continue;
4611
4612 spin_lock_irqsave(&n->list_lock, flags);
4613 list_for_each_entry(page, &n->partial, lru)
4614 process_slab(&t, s, page, alloc, map);
4615 list_for_each_entry(page, &n->full, lru)
4616 process_slab(&t, s, page, alloc, map);
4617 spin_unlock_irqrestore(&n->list_lock, flags);
4618 }
4619
4620 for (i = 0; i < t.count; i++) {
4621 struct location *l = &t.loc[i];
4622
4623 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4624 break;
4625 len += sprintf(buf + len, "%7ld ", l->count);
4626
4627 if (l->addr)
4628 len += sprintf(buf + len, "%pS", (void *)l->addr);
4629 else
4630 len += sprintf(buf + len, "<not-available>");
4631
4632 if (l->sum_time != l->min_time) {
4633 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4634 l->min_time,
4635 (long)div_u64(l->sum_time, l->count),
4636 l->max_time);
4637 } else
4638 len += sprintf(buf + len, " age=%ld",
4639 l->min_time);
4640
4641 if (l->min_pid != l->max_pid)
4642 len += sprintf(buf + len, " pid=%ld-%ld",
4643 l->min_pid, l->max_pid);
4644 else
4645 len += sprintf(buf + len, " pid=%ld",
4646 l->min_pid);
4647
4648 if (num_online_cpus() > 1 &&
4649 !cpumask_empty(to_cpumask(l->cpus)) &&
4650 len < PAGE_SIZE - 60)
4651 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4652 " cpus=%*pbl",
4653 cpumask_pr_args(to_cpumask(l->cpus)));
4654
4655 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4656 len < PAGE_SIZE - 60)
4657 len += scnprintf(buf + len, PAGE_SIZE - len - 50,
4658 " nodes=%*pbl",
4659 nodemask_pr_args(&l->nodes));
4660
4661 len += sprintf(buf + len, "\n");
4662 }
4663
4664 free_loc_track(&t);
4665 kfree(map);
4666 if (!t.count)
4667 len += sprintf(buf, "No data\n");
4668 return len;
4669}
4670#endif
4671
4672#ifdef SLUB_RESILIENCY_TEST
4673static void __init resiliency_test(void)
4674{
4675 u8 *p;
4676
4677 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4678
4679 pr_err("SLUB resiliency testing\n");
4680 pr_err("-----------------------\n");
4681 pr_err("A. Corruption after allocation\n");
4682
4683 p = kzalloc(16, GFP_KERNEL);
4684 p[16] = 0x12;
4685 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4686 p + 16);
4687
4688 validate_slab_cache(kmalloc_caches[4]);
4689
4690
4691 p = kzalloc(32, GFP_KERNEL);
4692 p[32 + sizeof(void *)] = 0x34;
4693 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4694 p);
4695 pr_err("If allocated object is overwritten then not detectable\n\n");
4696
4697 validate_slab_cache(kmalloc_caches[5]);
4698 p = kzalloc(64, GFP_KERNEL);
4699 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4700 *p = 0x56;
4701 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4702 p);
4703 pr_err("If allocated object is overwritten then not detectable\n\n");
4704 validate_slab_cache(kmalloc_caches[6]);
4705
4706 pr_err("\nB. Corruption after free\n");
4707 p = kzalloc(128, GFP_KERNEL);
4708 kfree(p);
4709 *p = 0x78;
4710 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4711 validate_slab_cache(kmalloc_caches[7]);
4712
4713 p = kzalloc(256, GFP_KERNEL);
4714 kfree(p);
4715 p[50] = 0x9a;
4716 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4717 validate_slab_cache(kmalloc_caches[8]);
4718
4719 p = kzalloc(512, GFP_KERNEL);
4720 kfree(p);
4721 p[512] = 0xab;
4722 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4723 validate_slab_cache(kmalloc_caches[9]);
4724}
4725#else
4726#ifdef CONFIG_SYSFS
4727static void resiliency_test(void) {};
4728#endif
4729#endif
4730
4731#ifdef CONFIG_SYSFS
4732enum slab_stat_type {
4733 SL_ALL,
4734 SL_PARTIAL,
4735 SL_CPU,
4736 SL_OBJECTS,
4737 SL_TOTAL
4738};
4739
4740#define SO_ALL (1 << SL_ALL)
4741#define SO_PARTIAL (1 << SL_PARTIAL)
4742#define SO_CPU (1 << SL_CPU)
4743#define SO_OBJECTS (1 << SL_OBJECTS)
4744#define SO_TOTAL (1 << SL_TOTAL)
4745
4746#ifdef CONFIG_MEMCG
4747static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
4748
4749static int __init setup_slub_memcg_sysfs(char *str)
4750{
4751 int v;
4752
4753 if (get_option(&str, &v) > 0)
4754 memcg_sysfs_enabled = v;
4755
4756 return 1;
4757}
4758
4759__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
4760#endif
4761
4762static ssize_t show_slab_objects(struct kmem_cache *s,
4763 char *buf, unsigned long flags)
4764{
4765 unsigned long total = 0;
4766 int node;
4767 int x;
4768 unsigned long *nodes;
4769
4770 nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4771 if (!nodes)
4772 return -ENOMEM;
4773
4774 if (flags & SO_CPU) {
4775 int cpu;
4776
4777 for_each_possible_cpu(cpu) {
4778 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4779 cpu);
4780 int node;
4781 struct page *page;
4782
4783 page = READ_ONCE(c->page);
4784 if (!page)
4785 continue;
4786
4787 node = page_to_nid(page);
4788 if (flags & SO_TOTAL)
4789 x = page->objects;
4790 else if (flags & SO_OBJECTS)
4791 x = page->inuse;
4792 else
4793 x = 1;
4794
4795 total += x;
4796 nodes[node] += x;
4797
4798 page = slub_percpu_partial_read_once(c);
4799 if (page) {
4800 node = page_to_nid(page);
4801 if (flags & SO_TOTAL)
4802 WARN_ON_ONCE(1);
4803 else if (flags & SO_OBJECTS)
4804 WARN_ON_ONCE(1);
4805 else
4806 x = page->pages;
4807 total += x;
4808 nodes[node] += x;
4809 }
4810 }
4811 }
4812
4813 get_online_mems();
4814#ifdef CONFIG_SLUB_DEBUG
4815 if (flags & SO_ALL) {
4816 struct kmem_cache_node *n;
4817
4818 for_each_kmem_cache_node(s, node, n) {
4819
4820 if (flags & SO_TOTAL)
4821 x = atomic_long_read(&n->total_objects);
4822 else if (flags & SO_OBJECTS)
4823 x = atomic_long_read(&n->total_objects) -
4824 count_partial(n, count_free);
4825 else
4826 x = atomic_long_read(&n->nr_slabs);
4827 total += x;
4828 nodes[node] += x;
4829 }
4830
4831 } else
4832#endif
4833 if (flags & SO_PARTIAL) {
4834 struct kmem_cache_node *n;
4835
4836 for_each_kmem_cache_node(s, node, n) {
4837 if (flags & SO_TOTAL)
4838 x = count_partial(n, count_total);
4839 else if (flags & SO_OBJECTS)
4840 x = count_partial(n, count_inuse);
4841 else
4842 x = n->nr_partial;
4843 total += x;
4844 nodes[node] += x;
4845 }
4846 }
4847 x = sprintf(buf, "%lu", total);
4848#ifdef CONFIG_NUMA
4849 for (node = 0; node < nr_node_ids; node++)
4850 if (nodes[node])
4851 x += sprintf(buf + x, " N%d=%lu",
4852 node, nodes[node]);
4853#endif
4854 put_online_mems();
4855 kfree(nodes);
4856 return x + sprintf(buf + x, "\n");
4857}
4858
4859#ifdef CONFIG_SLUB_DEBUG
4860static int any_slab_objects(struct kmem_cache *s)
4861{
4862 int node;
4863 struct kmem_cache_node *n;
4864
4865 for_each_kmem_cache_node(s, node, n)
4866 if (atomic_long_read(&n->total_objects))
4867 return 1;
4868
4869 return 0;
4870}
4871#endif
4872
4873#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4874#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4875
4876struct slab_attribute {
4877 struct attribute attr;
4878 ssize_t (*show)(struct kmem_cache *s, char *buf);
4879 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4880};
4881
4882#define SLAB_ATTR_RO(_name) \
4883 static struct slab_attribute _name##_attr = \
4884 __ATTR(_name, 0400, _name##_show, NULL)
4885
4886#define SLAB_ATTR(_name) \
4887 static struct slab_attribute _name##_attr = \
4888 __ATTR(_name, 0600, _name##_show, _name##_store)
4889
4890static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4891{
4892 return sprintf(buf, "%d\n", s->size);
4893}
4894SLAB_ATTR_RO(slab_size);
4895
4896static ssize_t align_show(struct kmem_cache *s, char *buf)
4897{
4898 return sprintf(buf, "%d\n", s->align);
4899}
4900SLAB_ATTR_RO(align);
4901
4902static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4903{
4904 return sprintf(buf, "%d\n", s->object_size);
4905}
4906SLAB_ATTR_RO(object_size);
4907
4908static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4909{
4910 return sprintf(buf, "%d\n", oo_objects(s->oo));
4911}
4912SLAB_ATTR_RO(objs_per_slab);
4913
4914static ssize_t order_store(struct kmem_cache *s,
4915 const char *buf, size_t length)
4916{
4917 unsigned long order;
4918 int err;
4919
4920 err = kstrtoul(buf, 10, &order);
4921 if (err)
4922 return err;
4923
4924 if (order > slub_max_order || order < slub_min_order)
4925 return -EINVAL;
4926
4927 calculate_sizes(s, order);
4928 return length;
4929}
4930
4931static ssize_t order_show(struct kmem_cache *s, char *buf)
4932{
4933 return sprintf(buf, "%d\n", oo_order(s->oo));
4934}
4935SLAB_ATTR(order);
4936
4937static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4938{
4939 return sprintf(buf, "%lu\n", s->min_partial);
4940}
4941
4942static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4943 size_t length)
4944{
4945 unsigned long min;
4946 int err;
4947
4948 err = kstrtoul(buf, 10, &min);
4949 if (err)
4950 return err;
4951
4952 set_min_partial(s, min);
4953 return length;
4954}
4955SLAB_ATTR(min_partial);
4956
4957static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4958{
4959 return sprintf(buf, "%u\n", slub_cpu_partial(s));
4960}
4961
4962static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4963 size_t length)
4964{
4965 unsigned long objects;
4966 int err;
4967
4968 err = kstrtoul(buf, 10, &objects);
4969 if (err)
4970 return err;
4971 if (objects && !kmem_cache_has_cpu_partial(s))
4972 return -EINVAL;
4973
4974 slub_set_cpu_partial(s, objects);
4975 flush_all(s);
4976 return length;
4977}
4978SLAB_ATTR(cpu_partial);
4979
4980static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4981{
4982 if (!s->ctor)
4983 return 0;
4984 return sprintf(buf, "%pS\n", s->ctor);
4985}
4986SLAB_ATTR_RO(ctor);
4987
4988static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4989{
4990 return sprintf(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
4991}
4992SLAB_ATTR_RO(aliases);
4993
4994static ssize_t partial_show(struct kmem_cache *s, char *buf)
4995{
4996 return show_slab_objects(s, buf, SO_PARTIAL);
4997}
4998SLAB_ATTR_RO(partial);
4999
5000static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5001{
5002 return show_slab_objects(s, buf, SO_CPU);
5003}
5004SLAB_ATTR_RO(cpu_slabs);
5005
5006static ssize_t objects_show(struct kmem_cache *s, char *buf)
5007{
5008 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5009}
5010SLAB_ATTR_RO(objects);
5011
5012static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5013{
5014 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5015}
5016SLAB_ATTR_RO(objects_partial);
5017
5018static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5019{
5020 int objects = 0;
5021 int pages = 0;
5022 int cpu;
5023 int len;
5024
5025 for_each_online_cpu(cpu) {
5026 struct page *page;
5027
5028 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5029
5030 if (page) {
5031 pages += page->pages;
5032 objects += page->pobjects;
5033 }
5034 }
5035
5036 len = sprintf(buf, "%d(%d)", objects, pages);
5037
5038#ifdef CONFIG_SMP
5039 for_each_online_cpu(cpu) {
5040 struct page *page;
5041
5042 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5043
5044 if (page && len < PAGE_SIZE - 20)
5045 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
5046 page->pobjects, page->pages);
5047 }
5048#endif
5049 return len + sprintf(buf + len, "\n");
5050}
5051SLAB_ATTR_RO(slabs_cpu_partial);
5052
5053static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5054{
5055 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5056}
5057
5058static ssize_t reclaim_account_store(struct kmem_cache *s,
5059 const char *buf, size_t length)
5060{
5061 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
5062 if (buf[0] == '1')
5063 s->flags |= SLAB_RECLAIM_ACCOUNT;
5064 return length;
5065}
5066SLAB_ATTR(reclaim_account);
5067
5068static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5069{
5070 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5071}
5072SLAB_ATTR_RO(hwcache_align);
5073
5074#ifdef CONFIG_ZONE_DMA
5075static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5076{
5077 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5078}
5079SLAB_ATTR_RO(cache_dma);
5080#endif
5081
5082static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5083{
5084 return sprintf(buf, "%zu\n", s->usersize);
5085}
5086SLAB_ATTR_RO(usersize);
5087
5088static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5089{
5090 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5091}
5092SLAB_ATTR_RO(destroy_by_rcu);
5093
5094static ssize_t reserved_show(struct kmem_cache *s, char *buf)
5095{
5096 return sprintf(buf, "%d\n", s->reserved);
5097}
5098SLAB_ATTR_RO(reserved);
5099
5100#ifdef CONFIG_SLUB_DEBUG
5101static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5102{
5103 return show_slab_objects(s, buf, SO_ALL);
5104}
5105SLAB_ATTR_RO(slabs);
5106
5107static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5108{
5109 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5110}
5111SLAB_ATTR_RO(total_objects);
5112
5113static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5114{
5115 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5116}
5117
5118static ssize_t sanity_checks_store(struct kmem_cache *s,
5119 const char *buf, size_t length)
5120{
5121 s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5122 if (buf[0] == '1') {
5123 s->flags &= ~__CMPXCHG_DOUBLE;
5124 s->flags |= SLAB_CONSISTENCY_CHECKS;
5125 }
5126 return length;
5127}
5128SLAB_ATTR(sanity_checks);
5129
5130static ssize_t trace_show(struct kmem_cache *s, char *buf)
5131{
5132 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5133}
5134
5135static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5136 size_t length)
5137{
5138
5139
5140
5141
5142
5143 if (s->refcount > 1)
5144 return -EINVAL;
5145
5146 s->flags &= ~SLAB_TRACE;
5147 if (buf[0] == '1') {
5148 s->flags &= ~__CMPXCHG_DOUBLE;
5149 s->flags |= SLAB_TRACE;
5150 }
5151 return length;
5152}
5153SLAB_ATTR(trace);
5154
5155static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5156{
5157 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5158}
5159
5160static ssize_t red_zone_store(struct kmem_cache *s,
5161 const char *buf, size_t length)
5162{
5163 if (any_slab_objects(s))
5164 return -EBUSY;
5165
5166 s->flags &= ~SLAB_RED_ZONE;
5167 if (buf[0] == '1') {
5168 s->flags |= SLAB_RED_ZONE;
5169 }
5170 calculate_sizes(s, -1);
5171 return length;
5172}
5173SLAB_ATTR(red_zone);
5174
5175static ssize_t poison_show(struct kmem_cache *s, char *buf)
5176{
5177 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5178}
5179
5180static ssize_t poison_store(struct kmem_cache *s,
5181 const char *buf, size_t length)
5182{
5183 if (any_slab_objects(s))
5184 return -EBUSY;
5185
5186 s->flags &= ~SLAB_POISON;
5187 if (buf[0] == '1') {
5188 s->flags |= SLAB_POISON;
5189 }
5190 calculate_sizes(s, -1);
5191 return length;
5192}
5193SLAB_ATTR(poison);
5194
5195static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5196{
5197 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5198}
5199
5200static ssize_t store_user_store(struct kmem_cache *s,
5201 const char *buf, size_t length)
5202{
5203 if (any_slab_objects(s))
5204 return -EBUSY;
5205
5206 s->flags &= ~SLAB_STORE_USER;
5207 if (buf[0] == '1') {
5208 s->flags &= ~__CMPXCHG_DOUBLE;
5209 s->flags |= SLAB_STORE_USER;
5210 }
5211 calculate_sizes(s, -1);
5212 return length;
5213}
5214SLAB_ATTR(store_user);
5215
5216static ssize_t validate_show(struct kmem_cache *s, char *buf)
5217{
5218 return 0;
5219}
5220
5221static ssize_t validate_store(struct kmem_cache *s,
5222 const char *buf, size_t length)
5223{
5224 int ret = -EINVAL;
5225
5226 if (buf[0] == '1') {
5227 ret = validate_slab_cache(s);
5228 if (ret >= 0)
5229 ret = length;
5230 }
5231 return ret;
5232}
5233SLAB_ATTR(validate);
5234
5235static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5236{
5237 if (!(s->flags & SLAB_STORE_USER))
5238 return -ENOSYS;
5239 return list_locations(s, buf, TRACK_ALLOC);
5240}
5241SLAB_ATTR_RO(alloc_calls);
5242
5243static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5244{
5245 if (!(s->flags & SLAB_STORE_USER))
5246 return -ENOSYS;
5247 return list_locations(s, buf, TRACK_FREE);
5248}
5249SLAB_ATTR_RO(free_calls);
5250#endif
5251
5252#ifdef CONFIG_FAILSLAB
5253static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5254{
5255 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5256}
5257
5258static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5259 size_t length)
5260{
5261 if (s->refcount > 1)
5262 return -EINVAL;
5263
5264 s->flags &= ~SLAB_FAILSLAB;
5265 if (buf[0] == '1')
5266 s->flags |= SLAB_FAILSLAB;
5267 return length;
5268}
5269SLAB_ATTR(failslab);
5270#endif
5271
5272static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5273{
5274 return 0;
5275}
5276
5277static ssize_t shrink_store(struct kmem_cache *s,
5278 const char *buf, size_t length)
5279{
5280 if (buf[0] == '1')
5281 kmem_cache_shrink(s);
5282 else
5283 return -EINVAL;
5284 return length;
5285}
5286SLAB_ATTR(shrink);
5287
5288#ifdef CONFIG_NUMA
5289static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5290{
5291 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
5292}
5293
5294static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5295 const char *buf, size_t length)
5296{
5297 unsigned long ratio;
5298 int err;
5299
5300 err = kstrtoul(buf, 10, &ratio);
5301 if (err)
5302 return err;
5303
5304 if (ratio <= 100)
5305 s->remote_node_defrag_ratio = ratio * 10;
5306
5307 return length;
5308}
5309SLAB_ATTR(remote_node_defrag_ratio);
5310#endif
5311
5312#ifdef CONFIG_SLUB_STATS
5313static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5314{
5315 unsigned long sum = 0;
5316 int cpu;
5317 int len;
5318 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5319
5320 if (!data)
5321 return -ENOMEM;
5322
5323 for_each_online_cpu(cpu) {
5324 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5325
5326 data[cpu] = x;
5327 sum += x;
5328 }
5329
5330 len = sprintf(buf, "%lu", sum);
5331
5332#ifdef CONFIG_SMP
5333 for_each_online_cpu(cpu) {
5334 if (data[cpu] && len < PAGE_SIZE - 20)
5335 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5336 }
5337#endif
5338 kfree(data);
5339 return len + sprintf(buf + len, "\n");
5340}
5341
5342static void clear_stat(struct kmem_cache *s, enum stat_item si)
5343{
5344 int cpu;
5345
5346 for_each_online_cpu(cpu)
5347 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5348}
5349
5350#define STAT_ATTR(si, text) \
5351static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5352{ \
5353 return show_stat(s, buf, si); \
5354} \
5355static ssize_t text##_store(struct kmem_cache *s, \
5356 const char *buf, size_t length) \
5357{ \
5358 if (buf[0] != '0') \
5359 return -EINVAL; \
5360 clear_stat(s, si); \
5361 return length; \
5362} \
5363SLAB_ATTR(text); \
5364
5365STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5366STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5367STAT_ATTR(FREE_FASTPATH, free_fastpath);
5368STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5369STAT_ATTR(FREE_FROZEN, free_frozen);
5370STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5371STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5372STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5373STAT_ATTR(ALLOC_SLAB, alloc_slab);
5374STAT_ATTR(ALLOC_REFILL, alloc_refill);
5375STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5376STAT_ATTR(FREE_SLAB, free_slab);
5377STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5378STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5379STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5380STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5381STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5382STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5383STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5384STAT_ATTR(ORDER_FALLBACK, order_fallback);
5385STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5386STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5387STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5388STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5389STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5390STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5391#endif
5392
5393static struct attribute *slab_attrs[] = {
5394 &slab_size_attr.attr,
5395 &object_size_attr.attr,
5396 &objs_per_slab_attr.attr,
5397 &order_attr.attr,
5398 &min_partial_attr.attr,
5399 &cpu_partial_attr.attr,
5400 &objects_attr.attr,
5401 &objects_partial_attr.attr,
5402 &partial_attr.attr,
5403 &cpu_slabs_attr.attr,
5404 &ctor_attr.attr,
5405 &aliases_attr.attr,
5406 &align_attr.attr,
5407 &hwcache_align_attr.attr,
5408 &reclaim_account_attr.attr,
5409 &destroy_by_rcu_attr.attr,
5410 &shrink_attr.attr,
5411 &reserved_attr.attr,
5412 &slabs_cpu_partial_attr.attr,
5413#ifdef CONFIG_SLUB_DEBUG
5414 &total_objects_attr.attr,
5415 &slabs_attr.attr,
5416 &sanity_checks_attr.attr,
5417 &trace_attr.attr,
5418 &red_zone_attr.attr,
5419 &poison_attr.attr,
5420 &store_user_attr.attr,
5421 &validate_attr.attr,
5422 &alloc_calls_attr.attr,
5423 &free_calls_attr.attr,
5424#endif
5425#ifdef CONFIG_ZONE_DMA
5426 &cache_dma_attr.attr,
5427#endif
5428#ifdef CONFIG_NUMA
5429 &remote_node_defrag_ratio_attr.attr,
5430#endif
5431#ifdef CONFIG_SLUB_STATS
5432 &alloc_fastpath_attr.attr,
5433 &alloc_slowpath_attr.attr,
5434 &free_fastpath_attr.attr,
5435 &free_slowpath_attr.attr,
5436 &free_frozen_attr.attr,
5437 &free_add_partial_attr.attr,
5438 &free_remove_partial_attr.attr,
5439 &alloc_from_partial_attr.attr,
5440 &alloc_slab_attr.attr,
5441 &alloc_refill_attr.attr,
5442 &alloc_node_mismatch_attr.attr,
5443 &free_slab_attr.attr,
5444 &cpuslab_flush_attr.attr,
5445 &deactivate_full_attr.attr,
5446 &deactivate_empty_attr.attr,
5447 &deactivate_to_head_attr.attr,
5448 &deactivate_to_tail_attr.attr,
5449 &deactivate_remote_frees_attr.attr,
5450 &deactivate_bypass_attr.attr,
5451 &order_fallback_attr.attr,
5452 &cmpxchg_double_fail_attr.attr,
5453 &cmpxchg_double_cpu_fail_attr.attr,
5454 &cpu_partial_alloc_attr.attr,
5455 &cpu_partial_free_attr.attr,
5456 &cpu_partial_node_attr.attr,
5457 &cpu_partial_drain_attr.attr,
5458#endif
5459#ifdef CONFIG_FAILSLAB
5460 &failslab_attr.attr,
5461#endif
5462 &usersize_attr.attr,
5463
5464 NULL
5465};
5466
5467static const struct attribute_group slab_attr_group = {
5468 .attrs = slab_attrs,
5469};
5470
5471static ssize_t slab_attr_show(struct kobject *kobj,
5472 struct attribute *attr,
5473 char *buf)
5474{
5475 struct slab_attribute *attribute;
5476 struct kmem_cache *s;
5477 int err;
5478
5479 attribute = to_slab_attr(attr);
5480 s = to_slab(kobj);
5481
5482 if (!attribute->show)
5483 return -EIO;
5484
5485 err = attribute->show(s, buf);
5486
5487 return err;
5488}
5489
5490static ssize_t slab_attr_store(struct kobject *kobj,
5491 struct attribute *attr,
5492 const char *buf, size_t len)
5493{
5494 struct slab_attribute *attribute;
5495 struct kmem_cache *s;
5496 int err;
5497
5498 attribute = to_slab_attr(attr);
5499 s = to_slab(kobj);
5500
5501 if (!attribute->store)
5502 return -EIO;
5503
5504 err = attribute->store(s, buf, len);
5505#ifdef CONFIG_MEMCG
5506 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5507 struct kmem_cache *c;
5508
5509 mutex_lock(&slab_mutex);
5510 if (s->max_attr_size < len)
5511 s->max_attr_size = len;
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530 for_each_memcg_cache(c, s)
5531 attribute->store(c, buf, len);
5532 mutex_unlock(&slab_mutex);
5533 }
5534#endif
5535 return err;
5536}
5537
5538static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5539{
5540#ifdef CONFIG_MEMCG
5541 int i;
5542 char *buffer = NULL;
5543 struct kmem_cache *root_cache;
5544
5545 if (is_root_cache(s))
5546 return;
5547
5548 root_cache = s->memcg_params.root_cache;
5549
5550
5551
5552
5553
5554 if (!root_cache->max_attr_size)
5555 return;
5556
5557 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5558 char mbuf[64];
5559 char *buf;
5560 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5561 ssize_t len;
5562
5563 if (!attr || !attr->store || !attr->show)
5564 continue;
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575 if (buffer)
5576 buf = buffer;
5577 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5578 buf = mbuf;
5579 else {
5580 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5581 if (WARN_ON(!buffer))
5582 continue;
5583 buf = buffer;
5584 }
5585
5586 len = attr->show(root_cache, buf);
5587 if (len > 0)
5588 attr->store(s, buf, len);
5589 }
5590
5591 if (buffer)
5592 free_page((unsigned long)buffer);
5593#endif
5594}
5595
5596static void kmem_cache_release(struct kobject *k)
5597{
5598 slab_kmem_cache_release(to_slab(k));
5599}
5600
5601static const struct sysfs_ops slab_sysfs_ops = {
5602 .show = slab_attr_show,
5603 .store = slab_attr_store,
5604};
5605
5606static struct kobj_type slab_ktype = {
5607 .sysfs_ops = &slab_sysfs_ops,
5608 .release = kmem_cache_release,
5609};
5610
5611static int uevent_filter(struct kset *kset, struct kobject *kobj)
5612{
5613 struct kobj_type *ktype = get_ktype(kobj);
5614
5615 if (ktype == &slab_ktype)
5616 return 1;
5617 return 0;
5618}
5619
5620static const struct kset_uevent_ops slab_uevent_ops = {
5621 .filter = uevent_filter,
5622};
5623
5624static struct kset *slab_kset;
5625
5626static inline struct kset *cache_kset(struct kmem_cache *s)
5627{
5628#ifdef CONFIG_MEMCG
5629 if (!is_root_cache(s))
5630 return s->memcg_params.root_cache->memcg_kset;
5631#endif
5632 return slab_kset;
5633}
5634
5635#define ID_STR_LENGTH 64
5636
5637
5638
5639
5640
5641static char *create_unique_id(struct kmem_cache *s)
5642{
5643 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5644 char *p = name;
5645
5646 BUG_ON(!name);
5647
5648 *p++ = ':';
5649
5650
5651
5652
5653
5654
5655
5656 if (s->flags & SLAB_CACHE_DMA)
5657 *p++ = 'd';
5658 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5659 *p++ = 'a';
5660 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5661 *p++ = 'F';
5662 if (s->flags & SLAB_ACCOUNT)
5663 *p++ = 'A';
5664 if (p != name + 1)
5665 *p++ = '-';
5666 p += sprintf(p, "%07d", s->size);
5667
5668 BUG_ON(p > name + ID_STR_LENGTH - 1);
5669 return name;
5670}
5671
5672static void sysfs_slab_remove_workfn(struct work_struct *work)
5673{
5674 struct kmem_cache *s =
5675 container_of(work, struct kmem_cache, kobj_remove_work);
5676
5677 if (!s->kobj.state_in_sysfs)
5678
5679
5680
5681
5682
5683
5684 goto out;
5685
5686#ifdef CONFIG_MEMCG
5687 kset_unregister(s->memcg_kset);
5688#endif
5689 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5690 kobject_del(&s->kobj);
5691out:
5692 kobject_put(&s->kobj);
5693}
5694
5695static int sysfs_slab_add(struct kmem_cache *s)
5696{
5697 int err;
5698 const char *name;
5699 struct kset *kset = cache_kset(s);
5700 int unmergeable = slab_unmergeable(s);
5701
5702 INIT_WORK(&s->kobj_remove_work, sysfs_slab_remove_workfn);
5703
5704 if (!kset) {
5705 kobject_init(&s->kobj, &slab_ktype);
5706 return 0;
5707 }
5708
5709 if (!unmergeable && disable_higher_order_debug &&
5710 (slub_debug & DEBUG_METADATA_FLAGS))
5711 unmergeable = 1;
5712
5713 if (unmergeable) {
5714
5715
5716
5717
5718
5719 sysfs_remove_link(&slab_kset->kobj, s->name);
5720 name = s->name;
5721 } else {
5722
5723
5724
5725
5726 name = create_unique_id(s);
5727 }
5728
5729 s->kobj.kset = kset;
5730 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5731 if (err)
5732 goto out;
5733
5734 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5735 if (err)
5736 goto out_del_kobj;
5737
5738#ifdef CONFIG_MEMCG
5739 if (is_root_cache(s) && memcg_sysfs_enabled) {
5740 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5741 if (!s->memcg_kset) {
5742 err = -ENOMEM;
5743 goto out_del_kobj;
5744 }
5745 }
5746#endif
5747
5748 kobject_uevent(&s->kobj, KOBJ_ADD);
5749 if (!unmergeable) {
5750
5751 sysfs_slab_alias(s, s->name);
5752 }
5753out:
5754 if (!unmergeable)
5755 kfree(name);
5756 return err;
5757out_del_kobj:
5758 kobject_del(&s->kobj);
5759 goto out;
5760}
5761
5762static void sysfs_slab_remove(struct kmem_cache *s)
5763{
5764 if (slab_state < FULL)
5765
5766
5767
5768
5769 return;
5770
5771 kobject_get(&s->kobj);
5772 schedule_work(&s->kobj_remove_work);
5773}
5774
5775void sysfs_slab_release(struct kmem_cache *s)
5776{
5777 if (slab_state >= FULL)
5778 kobject_put(&s->kobj);
5779}
5780
5781
5782
5783
5784
5785struct saved_alias {
5786 struct kmem_cache *s;
5787 const char *name;
5788 struct saved_alias *next;
5789};
5790
5791static struct saved_alias *alias_list;
5792
5793static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5794{
5795 struct saved_alias *al;
5796
5797 if (slab_state == FULL) {
5798
5799
5800
5801 sysfs_remove_link(&slab_kset->kobj, name);
5802 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5803 }
5804
5805 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5806 if (!al)
5807 return -ENOMEM;
5808
5809 al->s = s;
5810 al->name = name;
5811 al->next = alias_list;
5812 alias_list = al;
5813 return 0;
5814}
5815
5816static int __init slab_sysfs_init(void)
5817{
5818 struct kmem_cache *s;
5819 int err;
5820
5821 mutex_lock(&slab_mutex);
5822
5823 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5824 if (!slab_kset) {
5825 mutex_unlock(&slab_mutex);
5826 pr_err("Cannot register slab subsystem.\n");
5827 return -ENOSYS;
5828 }
5829
5830 slab_state = FULL;
5831
5832 list_for_each_entry(s, &slab_caches, list) {
5833 err = sysfs_slab_add(s);
5834 if (err)
5835 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5836 s->name);
5837 }
5838
5839 while (alias_list) {
5840 struct saved_alias *al = alias_list;
5841
5842 alias_list = alias_list->next;
5843 err = sysfs_slab_alias(al->s, al->name);
5844 if (err)
5845 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5846 al->name);
5847 kfree(al);
5848 }
5849
5850 mutex_unlock(&slab_mutex);
5851 resiliency_test();
5852 return 0;
5853}
5854
5855__initcall(slab_sysfs_init);
5856#endif
5857
5858
5859
5860
5861#ifdef CONFIG_SLUB_DEBUG
5862void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5863{
5864 unsigned long nr_slabs = 0;
5865 unsigned long nr_objs = 0;
5866 unsigned long nr_free = 0;
5867 int node;
5868 struct kmem_cache_node *n;
5869
5870 for_each_kmem_cache_node(s, node, n) {
5871 nr_slabs += node_nr_slabs(n);
5872 nr_objs += node_nr_objs(n);
5873 nr_free += count_partial(n, count_free);
5874 }
5875
5876 sinfo->active_objs = nr_objs - nr_free;
5877 sinfo->num_objs = nr_objs;
5878 sinfo->active_slabs = nr_slabs;
5879 sinfo->num_slabs = nr_slabs;
5880 sinfo->objects_per_slab = oo_objects(s->oo);
5881 sinfo->cache_order = oo_order(s->oo);
5882}
5883
5884void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5885{
5886}
5887
5888ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5889 size_t count, loff_t *ppos)
5890{
5891 return -EIO;
5892}
5893#endif
5894