1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/notifier.h>
22#include <linux/seq_file.h>
23#include <linux/kmemcheck.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/memory.h>
31#include <linux/math64.h>
32#include <linux/fault-inject.h>
33#include <linux/stacktrace.h>
34#include <linux/prefetch.h>
35#include <linux/memcontrol.h>
36
37#include <trace/events/kmem.h>
38
39#include "internal.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117static inline int kmem_cache_debug(struct kmem_cache *s)
118{
119#ifdef CONFIG_SLUB_DEBUG
120 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
121#else
122 return 0;
123#endif
124}
125
126static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
127{
128#ifdef CONFIG_SLUB_CPU_PARTIAL
129 return !kmem_cache_debug(s);
130#else
131 return false;
132#endif
133}
134
135
136
137
138
139
140
141
142
143
144#undef SLUB_RESILIENCY_TEST
145
146
147#undef SLUB_DEBUG_CMPXCHG
148
149
150
151
152
153#define MIN_PARTIAL 5
154
155
156
157
158
159
160#define MAX_PARTIAL 10
161
162#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
163 SLAB_POISON | SLAB_STORE_USER)
164
165
166
167
168
169
170#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
171
172
173
174
175#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
176 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
177 SLAB_FAILSLAB)
178
179#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
180 SLAB_CACHE_DMA | SLAB_NOTRACK)
181
182#define OO_SHIFT 16
183#define OO_MASK ((1 << OO_SHIFT) - 1)
184#define MAX_OBJS_PER_PAGE 32767
185
186
187#define __OBJECT_POISON 0x80000000UL
188#define __CMPXCHG_DOUBLE 0x40000000UL
189
190#ifdef CONFIG_SMP
191static struct notifier_block slab_notifier;
192#endif
193
194
195
196
197#define TRACK_ADDRS_COUNT 16
198struct track {
199 unsigned long addr;
200#ifdef CONFIG_STACKTRACE
201 unsigned long addrs[TRACK_ADDRS_COUNT];
202#endif
203 int cpu;
204 int pid;
205 unsigned long when;
206};
207
208enum track_item { TRACK_ALLOC, TRACK_FREE };
209
210#ifdef CONFIG_SYSFS
211static int sysfs_slab_add(struct kmem_cache *);
212static int sysfs_slab_alias(struct kmem_cache *, const char *);
213static void memcg_propagate_slab_attrs(struct kmem_cache *s);
214#else
215static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
216static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
217 { return 0; }
218static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
219#endif
220
221static inline void stat(const struct kmem_cache *s, enum stat_item si)
222{
223#ifdef CONFIG_SLUB_STATS
224
225
226
227
228 raw_cpu_inc(s->cpu_slab->stat[si]);
229#endif
230}
231
232
233
234
235
236static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
237{
238 return s->node[node];
239}
240
241
242static inline int check_valid_pointer(struct kmem_cache *s,
243 struct page *page, const void *object)
244{
245 void *base;
246
247 if (!object)
248 return 1;
249
250 base = page_address(page);
251 if (object < base || object >= base + page->objects * s->size ||
252 (object - base) % s->size) {
253 return 0;
254 }
255
256 return 1;
257}
258
259static inline void *get_freepointer(struct kmem_cache *s, void *object)
260{
261 return *(void **)(object + s->offset);
262}
263
264static void prefetch_freepointer(const struct kmem_cache *s, void *object)
265{
266 prefetch(object + s->offset);
267}
268
269static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
270{
271 void *p;
272
273#ifdef CONFIG_DEBUG_PAGEALLOC
274 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
275#else
276 p = get_freepointer(s, object);
277#endif
278 return p;
279}
280
281static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
282{
283 *(void **)(object + s->offset) = fp;
284}
285
286
287#define for_each_object(__p, __s, __addr, __objects) \
288 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
289 __p += (__s)->size)
290
291
292static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
293{
294 return (p - addr) / s->size;
295}
296
297static inline size_t slab_ksize(const struct kmem_cache *s)
298{
299#ifdef CONFIG_SLUB_DEBUG
300
301
302
303
304 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
305 return s->object_size;
306
307#endif
308
309
310
311
312
313 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
314 return s->inuse;
315
316
317
318 return s->size;
319}
320
321static inline int order_objects(int order, unsigned long size, int reserved)
322{
323 return ((PAGE_SIZE << order) - reserved) / size;
324}
325
326static inline struct kmem_cache_order_objects oo_make(int order,
327 unsigned long size, int reserved)
328{
329 struct kmem_cache_order_objects x = {
330 (order << OO_SHIFT) + order_objects(order, size, reserved)
331 };
332
333 return x;
334}
335
336static inline int oo_order(struct kmem_cache_order_objects x)
337{
338 return x.x >> OO_SHIFT;
339}
340
341static inline int oo_objects(struct kmem_cache_order_objects x)
342{
343 return x.x & OO_MASK;
344}
345
346
347
348
349static __always_inline void slab_lock(struct page *page)
350{
351 bit_spin_lock(PG_locked, &page->flags);
352}
353
354static __always_inline void slab_unlock(struct page *page)
355{
356 __bit_spin_unlock(PG_locked, &page->flags);
357}
358
359static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
360{
361 struct page tmp;
362 tmp.counters = counters_new;
363
364
365
366
367
368
369 page->frozen = tmp.frozen;
370 page->inuse = tmp.inuse;
371 page->objects = tmp.objects;
372}
373
374
375static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
376 void *freelist_old, unsigned long counters_old,
377 void *freelist_new, unsigned long counters_new,
378 const char *n)
379{
380 VM_BUG_ON(!irqs_disabled());
381#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
382 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
383 if (s->flags & __CMPXCHG_DOUBLE) {
384 if (cmpxchg_double(&page->freelist, &page->counters,
385 freelist_old, counters_old,
386 freelist_new, counters_new))
387 return 1;
388 } else
389#endif
390 {
391 slab_lock(page);
392 if (page->freelist == freelist_old &&
393 page->counters == counters_old) {
394 page->freelist = freelist_new;
395 set_page_slub_counters(page, counters_new);
396 slab_unlock(page);
397 return 1;
398 }
399 slab_unlock(page);
400 }
401
402 cpu_relax();
403 stat(s, CMPXCHG_DOUBLE_FAIL);
404
405#ifdef SLUB_DEBUG_CMPXCHG
406 pr_info("%s %s: cmpxchg double redo ", n, s->name);
407#endif
408
409 return 0;
410}
411
412static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
413 void *freelist_old, unsigned long counters_old,
414 void *freelist_new, unsigned long counters_new,
415 const char *n)
416{
417#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
418 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
419 if (s->flags & __CMPXCHG_DOUBLE) {
420 if (cmpxchg_double(&page->freelist, &page->counters,
421 freelist_old, counters_old,
422 freelist_new, counters_new))
423 return 1;
424 } else
425#endif
426 {
427 unsigned long flags;
428
429 local_irq_save(flags);
430 slab_lock(page);
431 if (page->freelist == freelist_old &&
432 page->counters == counters_old) {
433 page->freelist = freelist_new;
434 set_page_slub_counters(page, counters_new);
435 slab_unlock(page);
436 local_irq_restore(flags);
437 return 1;
438 }
439 slab_unlock(page);
440 local_irq_restore(flags);
441 }
442
443 cpu_relax();
444 stat(s, CMPXCHG_DOUBLE_FAIL);
445
446#ifdef SLUB_DEBUG_CMPXCHG
447 pr_info("%s %s: cmpxchg double redo ", n, s->name);
448#endif
449
450 return 0;
451}
452
453#ifdef CONFIG_SLUB_DEBUG
454
455
456
457
458
459
460static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
461{
462 void *p;
463 void *addr = page_address(page);
464
465 for (p = page->freelist; p; p = get_freepointer(s, p))
466 set_bit(slab_index(p, s, addr), map);
467}
468
469
470
471
472#ifdef CONFIG_SLUB_DEBUG_ON
473static int slub_debug = DEBUG_DEFAULT_FLAGS;
474#else
475static int slub_debug;
476#endif
477
478static char *slub_debug_slabs;
479static int disable_higher_order_debug;
480
481
482
483
484static void print_section(char *text, u8 *addr, unsigned int length)
485{
486 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
487 length, 1);
488}
489
490static struct track *get_track(struct kmem_cache *s, void *object,
491 enum track_item alloc)
492{
493 struct track *p;
494
495 if (s->offset)
496 p = object + s->offset + sizeof(void *);
497 else
498 p = object + s->inuse;
499
500 return p + alloc;
501}
502
503static void set_track(struct kmem_cache *s, void *object,
504 enum track_item alloc, unsigned long addr)
505{
506 struct track *p = get_track(s, object, alloc);
507
508 if (addr) {
509#ifdef CONFIG_STACKTRACE
510 struct stack_trace trace;
511 int i;
512
513 trace.nr_entries = 0;
514 trace.max_entries = TRACK_ADDRS_COUNT;
515 trace.entries = p->addrs;
516 trace.skip = 3;
517 save_stack_trace(&trace);
518
519
520 if (trace.nr_entries != 0 &&
521 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
522 trace.nr_entries--;
523
524 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
525 p->addrs[i] = 0;
526#endif
527 p->addr = addr;
528 p->cpu = smp_processor_id();
529 p->pid = current->pid;
530 p->when = jiffies;
531 } else
532 memset(p, 0, sizeof(struct track));
533}
534
535static void init_tracking(struct kmem_cache *s, void *object)
536{
537 if (!(s->flags & SLAB_STORE_USER))
538 return;
539
540 set_track(s, object, TRACK_FREE, 0UL);
541 set_track(s, object, TRACK_ALLOC, 0UL);
542}
543
544static void print_track(const char *s, struct track *t)
545{
546 if (!t->addr)
547 return;
548
549 pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
550 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
551#ifdef CONFIG_STACKTRACE
552 {
553 int i;
554 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
555 if (t->addrs[i])
556 pr_err("\t%pS\n", (void *)t->addrs[i]);
557 else
558 break;
559 }
560#endif
561}
562
563static void print_tracking(struct kmem_cache *s, void *object)
564{
565 if (!(s->flags & SLAB_STORE_USER))
566 return;
567
568 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
569 print_track("Freed", get_track(s, object, TRACK_FREE));
570}
571
572static void print_page_info(struct page *page)
573{
574 pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
575 page, page->objects, page->inuse, page->freelist, page->flags);
576
577}
578
579static void slab_bug(struct kmem_cache *s, char *fmt, ...)
580{
581 struct va_format vaf;
582 va_list args;
583
584 va_start(args, fmt);
585 vaf.fmt = fmt;
586 vaf.va = &args;
587 pr_err("=============================================================================\n");
588 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
589 pr_err("-----------------------------------------------------------------------------\n\n");
590
591 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
592 va_end(args);
593}
594
595static void slab_fix(struct kmem_cache *s, char *fmt, ...)
596{
597 struct va_format vaf;
598 va_list args;
599
600 va_start(args, fmt);
601 vaf.fmt = fmt;
602 vaf.va = &args;
603 pr_err("FIX %s: %pV\n", s->name, &vaf);
604 va_end(args);
605}
606
607static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
608{
609 unsigned int off;
610 u8 *addr = page_address(page);
611
612 print_tracking(s, p);
613
614 print_page_info(page);
615
616 pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
617 p, p - addr, get_freepointer(s, p));
618
619 if (p > addr + 16)
620 print_section("Bytes b4 ", p - 16, 16);
621
622 print_section("Object ", p, min_t(unsigned long, s->object_size,
623 PAGE_SIZE));
624 if (s->flags & SLAB_RED_ZONE)
625 print_section("Redzone ", p + s->object_size,
626 s->inuse - s->object_size);
627
628 if (s->offset)
629 off = s->offset + sizeof(void *);
630 else
631 off = s->inuse;
632
633 if (s->flags & SLAB_STORE_USER)
634 off += 2 * sizeof(struct track);
635
636 if (off != s->size)
637
638 print_section("Padding ", p + off, s->size - off);
639
640 dump_stack();
641}
642
643static void object_err(struct kmem_cache *s, struct page *page,
644 u8 *object, char *reason)
645{
646 slab_bug(s, "%s", reason);
647 print_trailer(s, page, object);
648}
649
650static void slab_err(struct kmem_cache *s, struct page *page,
651 const char *fmt, ...)
652{
653 va_list args;
654 char buf[100];
655
656 va_start(args, fmt);
657 vsnprintf(buf, sizeof(buf), fmt, args);
658 va_end(args);
659 slab_bug(s, "%s", buf);
660 print_page_info(page);
661 dump_stack();
662}
663
664static void init_object(struct kmem_cache *s, void *object, u8 val)
665{
666 u8 *p = object;
667
668 if (s->flags & __OBJECT_POISON) {
669 memset(p, POISON_FREE, s->object_size - 1);
670 p[s->object_size - 1] = POISON_END;
671 }
672
673 if (s->flags & SLAB_RED_ZONE)
674 memset(p + s->object_size, val, s->inuse - s->object_size);
675}
676
677static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
678 void *from, void *to)
679{
680 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
681 memset(from, data, to - from);
682}
683
684static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
685 u8 *object, char *what,
686 u8 *start, unsigned int value, unsigned int bytes)
687{
688 u8 *fault;
689 u8 *end;
690
691 fault = memchr_inv(start, value, bytes);
692 if (!fault)
693 return 1;
694
695 end = start + bytes;
696 while (end > fault && end[-1] == value)
697 end--;
698
699 slab_bug(s, "%s overwritten", what);
700 pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
701 fault, end - 1, fault[0], value);
702 print_trailer(s, page, object);
703
704 restore_bytes(s, what, value, fault, end);
705 return 0;
706}
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
747{
748 unsigned long off = s->inuse;
749
750 if (s->offset)
751
752 off += sizeof(void *);
753
754 if (s->flags & SLAB_STORE_USER)
755
756 off += 2 * sizeof(struct track);
757
758 if (s->size == off)
759 return 1;
760
761 return check_bytes_and_report(s, page, p, "Object padding",
762 p + off, POISON_INUSE, s->size - off);
763}
764
765
766static int slab_pad_check(struct kmem_cache *s, struct page *page)
767{
768 u8 *start;
769 u8 *fault;
770 u8 *end;
771 int length;
772 int remainder;
773
774 if (!(s->flags & SLAB_POISON))
775 return 1;
776
777 start = page_address(page);
778 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
779 end = start + length;
780 remainder = length % s->size;
781 if (!remainder)
782 return 1;
783
784 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
785 if (!fault)
786 return 1;
787 while (end > fault && end[-1] == POISON_INUSE)
788 end--;
789
790 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
791 print_section("Padding ", end - remainder, remainder);
792
793 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
794 return 0;
795}
796
797static int check_object(struct kmem_cache *s, struct page *page,
798 void *object, u8 val)
799{
800 u8 *p = object;
801 u8 *endobject = object + s->object_size;
802
803 if (s->flags & SLAB_RED_ZONE) {
804 if (!check_bytes_and_report(s, page, object, "Redzone",
805 endobject, val, s->inuse - s->object_size))
806 return 0;
807 } else {
808 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
809 check_bytes_and_report(s, page, p, "Alignment padding",
810 endobject, POISON_INUSE,
811 s->inuse - s->object_size);
812 }
813 }
814
815 if (s->flags & SLAB_POISON) {
816 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
817 (!check_bytes_and_report(s, page, p, "Poison", p,
818 POISON_FREE, s->object_size - 1) ||
819 !check_bytes_and_report(s, page, p, "Poison",
820 p + s->object_size - 1, POISON_END, 1)))
821 return 0;
822
823
824
825 check_pad_bytes(s, page, p);
826 }
827
828 if (!s->offset && val == SLUB_RED_ACTIVE)
829
830
831
832
833 return 1;
834
835
836 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
837 object_err(s, page, p, "Freepointer corrupt");
838
839
840
841
842
843 set_freepointer(s, p, NULL);
844 return 0;
845 }
846 return 1;
847}
848
849static int check_slab(struct kmem_cache *s, struct page *page)
850{
851 int maxobj;
852
853 VM_BUG_ON(!irqs_disabled());
854
855 if (!PageSlab(page)) {
856 slab_err(s, page, "Not a valid slab page");
857 return 0;
858 }
859
860 maxobj = order_objects(compound_order(page), s->size, s->reserved);
861 if (page->objects > maxobj) {
862 slab_err(s, page, "objects %u > max %u",
863 s->name, page->objects, maxobj);
864 return 0;
865 }
866 if (page->inuse > page->objects) {
867 slab_err(s, page, "inuse %u > max %u",
868 s->name, page->inuse, page->objects);
869 return 0;
870 }
871
872 slab_pad_check(s, page);
873 return 1;
874}
875
876
877
878
879
880static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
881{
882 int nr = 0;
883 void *fp;
884 void *object = NULL;
885 unsigned long max_objects;
886
887 fp = page->freelist;
888 while (fp && nr <= page->objects) {
889 if (fp == search)
890 return 1;
891 if (!check_valid_pointer(s, page, fp)) {
892 if (object) {
893 object_err(s, page, object,
894 "Freechain corrupt");
895 set_freepointer(s, object, NULL);
896 } else {
897 slab_err(s, page, "Freepointer corrupt");
898 page->freelist = NULL;
899 page->inuse = page->objects;
900 slab_fix(s, "Freelist cleared");
901 return 0;
902 }
903 break;
904 }
905 object = fp;
906 fp = get_freepointer(s, object);
907 nr++;
908 }
909
910 max_objects = order_objects(compound_order(page), s->size, s->reserved);
911 if (max_objects > MAX_OBJS_PER_PAGE)
912 max_objects = MAX_OBJS_PER_PAGE;
913
914 if (page->objects != max_objects) {
915 slab_err(s, page, "Wrong number of objects. Found %d but "
916 "should be %d", page->objects, max_objects);
917 page->objects = max_objects;
918 slab_fix(s, "Number of objects adjusted.");
919 }
920 if (page->inuse != page->objects - nr) {
921 slab_err(s, page, "Wrong object count. Counter is %d but "
922 "counted were %d", page->inuse, page->objects - nr);
923 page->inuse = page->objects - nr;
924 slab_fix(s, "Object count adjusted.");
925 }
926 return search == NULL;
927}
928
929static void trace(struct kmem_cache *s, struct page *page, void *object,
930 int alloc)
931{
932 if (s->flags & SLAB_TRACE) {
933 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
934 s->name,
935 alloc ? "alloc" : "free",
936 object, page->inuse,
937 page->freelist);
938
939 if (!alloc)
940 print_section("Object ", (void *)object,
941 s->object_size);
942
943 dump_stack();
944 }
945}
946
947
948
949
950
951static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
952{
953 kmemleak_alloc(ptr, size, 1, flags);
954}
955
956static inline void kfree_hook(const void *x)
957{
958 kmemleak_free(x);
959}
960
961static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
962{
963 flags &= gfp_allowed_mask;
964 lockdep_trace_alloc(flags);
965 might_sleep_if(flags & __GFP_WAIT);
966
967 return should_failslab(s->object_size, flags, s->flags);
968}
969
970static inline void slab_post_alloc_hook(struct kmem_cache *s,
971 gfp_t flags, void *object)
972{
973 flags &= gfp_allowed_mask;
974 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
975 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
976}
977
978static inline void slab_free_hook(struct kmem_cache *s, void *x)
979{
980 kmemleak_free_recursive(x, s->flags);
981
982
983
984
985
986
987#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
988 {
989 unsigned long flags;
990
991 local_irq_save(flags);
992 kmemcheck_slab_free(s, x, s->object_size);
993 debug_check_no_locks_freed(x, s->object_size);
994 local_irq_restore(flags);
995 }
996#endif
997 if (!(s->flags & SLAB_DEBUG_OBJECTS))
998 debug_check_no_obj_freed(x, s->object_size);
999}
1000
1001
1002
1003
1004static void add_full(struct kmem_cache *s,
1005 struct kmem_cache_node *n, struct page *page)
1006{
1007 if (!(s->flags & SLAB_STORE_USER))
1008 return;
1009
1010 lockdep_assert_held(&n->list_lock);
1011 list_add(&page->lru, &n->full);
1012}
1013
1014static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1015{
1016 if (!(s->flags & SLAB_STORE_USER))
1017 return;
1018
1019 lockdep_assert_held(&n->list_lock);
1020 list_del(&page->lru);
1021}
1022
1023
1024static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1025{
1026 struct kmem_cache_node *n = get_node(s, node);
1027
1028 return atomic_long_read(&n->nr_slabs);
1029}
1030
1031static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1032{
1033 return atomic_long_read(&n->nr_slabs);
1034}
1035
1036static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1037{
1038 struct kmem_cache_node *n = get_node(s, node);
1039
1040
1041
1042
1043
1044
1045
1046 if (likely(n)) {
1047 atomic_long_inc(&n->nr_slabs);
1048 atomic_long_add(objects, &n->total_objects);
1049 }
1050}
1051static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1052{
1053 struct kmem_cache_node *n = get_node(s, node);
1054
1055 atomic_long_dec(&n->nr_slabs);
1056 atomic_long_sub(objects, &n->total_objects);
1057}
1058
1059
1060static void setup_object_debug(struct kmem_cache *s, struct page *page,
1061 void *object)
1062{
1063 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1064 return;
1065
1066 init_object(s, object, SLUB_RED_INACTIVE);
1067 init_tracking(s, object);
1068}
1069
1070static noinline int alloc_debug_processing(struct kmem_cache *s,
1071 struct page *page,
1072 void *object, unsigned long addr)
1073{
1074 if (!check_slab(s, page))
1075 goto bad;
1076
1077 if (!check_valid_pointer(s, page, object)) {
1078 object_err(s, page, object, "Freelist Pointer check fails");
1079 goto bad;
1080 }
1081
1082 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1083 goto bad;
1084
1085
1086 if (s->flags & SLAB_STORE_USER)
1087 set_track(s, object, TRACK_ALLOC, addr);
1088 trace(s, page, object, 1);
1089 init_object(s, object, SLUB_RED_ACTIVE);
1090 return 1;
1091
1092bad:
1093 if (PageSlab(page)) {
1094
1095
1096
1097
1098
1099 slab_fix(s, "Marking all objects used");
1100 page->inuse = page->objects;
1101 page->freelist = NULL;
1102 }
1103 return 0;
1104}
1105
1106static noinline struct kmem_cache_node *free_debug_processing(
1107 struct kmem_cache *s, struct page *page, void *object,
1108 unsigned long addr, unsigned long *flags)
1109{
1110 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1111
1112 spin_lock_irqsave(&n->list_lock, *flags);
1113 slab_lock(page);
1114
1115 if (!check_slab(s, page))
1116 goto fail;
1117
1118 if (!check_valid_pointer(s, page, object)) {
1119 slab_err(s, page, "Invalid object pointer 0x%p", object);
1120 goto fail;
1121 }
1122
1123 if (on_freelist(s, page, object)) {
1124 object_err(s, page, object, "Object already free");
1125 goto fail;
1126 }
1127
1128 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1129 goto out;
1130
1131 if (unlikely(s != page->slab_cache)) {
1132 if (!PageSlab(page)) {
1133 slab_err(s, page, "Attempt to free object(0x%p) "
1134 "outside of slab", object);
1135 } else if (!page->slab_cache) {
1136 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1137 object);
1138 dump_stack();
1139 } else
1140 object_err(s, page, object,
1141 "page slab pointer corrupt.");
1142 goto fail;
1143 }
1144
1145 if (s->flags & SLAB_STORE_USER)
1146 set_track(s, object, TRACK_FREE, addr);
1147 trace(s, page, object, 0);
1148 init_object(s, object, SLUB_RED_INACTIVE);
1149out:
1150 slab_unlock(page);
1151
1152
1153
1154
1155 return n;
1156
1157fail:
1158 slab_unlock(page);
1159 spin_unlock_irqrestore(&n->list_lock, *flags);
1160 slab_fix(s, "Object at 0x%p not freed", object);
1161 return NULL;
1162}
1163
1164static int __init setup_slub_debug(char *str)
1165{
1166 slub_debug = DEBUG_DEFAULT_FLAGS;
1167 if (*str++ != '=' || !*str)
1168
1169
1170
1171 goto out;
1172
1173 if (*str == ',')
1174
1175
1176
1177
1178 goto check_slabs;
1179
1180 if (tolower(*str) == 'o') {
1181
1182
1183
1184
1185 disable_higher_order_debug = 1;
1186 goto out;
1187 }
1188
1189 slub_debug = 0;
1190 if (*str == '-')
1191
1192
1193
1194 goto out;
1195
1196
1197
1198
1199 for (; *str && *str != ','; str++) {
1200 switch (tolower(*str)) {
1201 case 'f':
1202 slub_debug |= SLAB_DEBUG_FREE;
1203 break;
1204 case 'z':
1205 slub_debug |= SLAB_RED_ZONE;
1206 break;
1207 case 'p':
1208 slub_debug |= SLAB_POISON;
1209 break;
1210 case 'u':
1211 slub_debug |= SLAB_STORE_USER;
1212 break;
1213 case 't':
1214 slub_debug |= SLAB_TRACE;
1215 break;
1216 case 'a':
1217 slub_debug |= SLAB_FAILSLAB;
1218 break;
1219 default:
1220 pr_err("slub_debug option '%c' unknown. skipped\n",
1221 *str);
1222 }
1223 }
1224
1225check_slabs:
1226 if (*str == ',')
1227 slub_debug_slabs = str + 1;
1228out:
1229 return 1;
1230}
1231
1232__setup("slub_debug", setup_slub_debug);
1233
1234static unsigned long kmem_cache_flags(unsigned long object_size,
1235 unsigned long flags, const char *name,
1236 void (*ctor)(void *))
1237{
1238
1239
1240
1241 if (slub_debug && (!slub_debug_slabs || (name &&
1242 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
1243 flags |= slub_debug;
1244
1245 return flags;
1246}
1247#else
1248static inline void setup_object_debug(struct kmem_cache *s,
1249 struct page *page, void *object) {}
1250
1251static inline int alloc_debug_processing(struct kmem_cache *s,
1252 struct page *page, void *object, unsigned long addr) { return 0; }
1253
1254static inline struct kmem_cache_node *free_debug_processing(
1255 struct kmem_cache *s, struct page *page, void *object,
1256 unsigned long addr, unsigned long *flags) { return NULL; }
1257
1258static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1259 { return 1; }
1260static inline int check_object(struct kmem_cache *s, struct page *page,
1261 void *object, u8 val) { return 1; }
1262static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1263 struct page *page) {}
1264static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1265 struct page *page) {}
1266static inline unsigned long kmem_cache_flags(unsigned long object_size,
1267 unsigned long flags, const char *name,
1268 void (*ctor)(void *))
1269{
1270 return flags;
1271}
1272#define slub_debug 0
1273
1274#define disable_higher_order_debug 0
1275
1276static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1277 { return 0; }
1278static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1279 { return 0; }
1280static inline void inc_slabs_node(struct kmem_cache *s, int node,
1281 int objects) {}
1282static inline void dec_slabs_node(struct kmem_cache *s, int node,
1283 int objects) {}
1284
1285static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1286{
1287 kmemleak_alloc(ptr, size, 1, flags);
1288}
1289
1290static inline void kfree_hook(const void *x)
1291{
1292 kmemleak_free(x);
1293}
1294
1295static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1296 { return 0; }
1297
1298static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1299 void *object)
1300{
1301 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
1302 flags & gfp_allowed_mask);
1303}
1304
1305static inline void slab_free_hook(struct kmem_cache *s, void *x)
1306{
1307 kmemleak_free_recursive(x, s->flags);
1308}
1309
1310#endif
1311
1312
1313
1314
1315static inline struct page *alloc_slab_page(struct kmem_cache *s,
1316 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1317{
1318 struct page *page;
1319 int order = oo_order(oo);
1320
1321 flags |= __GFP_NOTRACK;
1322
1323 if (memcg_charge_slab(s, flags, order))
1324 return NULL;
1325
1326 if (node == NUMA_NO_NODE)
1327 page = alloc_pages(flags, order);
1328 else
1329 page = alloc_pages_exact_node(node, flags, order);
1330
1331 if (!page)
1332 memcg_uncharge_slab(s, order);
1333
1334 return page;
1335}
1336
1337static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1338{
1339 struct page *page;
1340 struct kmem_cache_order_objects oo = s->oo;
1341 gfp_t alloc_gfp;
1342
1343 flags &= gfp_allowed_mask;
1344
1345 if (flags & __GFP_WAIT)
1346 local_irq_enable();
1347
1348 flags |= s->allocflags;
1349
1350
1351
1352
1353
1354 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1355
1356 page = alloc_slab_page(s, alloc_gfp, node, oo);
1357 if (unlikely(!page)) {
1358 oo = s->min;
1359 alloc_gfp = flags;
1360
1361
1362
1363
1364 page = alloc_slab_page(s, alloc_gfp, node, oo);
1365
1366 if (page)
1367 stat(s, ORDER_FALLBACK);
1368 }
1369
1370 if (kmemcheck_enabled && page
1371 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1372 int pages = 1 << oo_order(oo);
1373
1374 kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node);
1375
1376
1377
1378
1379
1380 if (s->ctor)
1381 kmemcheck_mark_uninitialized_pages(page, pages);
1382 else
1383 kmemcheck_mark_unallocated_pages(page, pages);
1384 }
1385
1386 if (flags & __GFP_WAIT)
1387 local_irq_disable();
1388 if (!page)
1389 return NULL;
1390
1391 page->objects = oo_objects(oo);
1392 mod_zone_page_state(page_zone(page),
1393 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1394 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1395 1 << oo_order(oo));
1396
1397 return page;
1398}
1399
1400static void setup_object(struct kmem_cache *s, struct page *page,
1401 void *object)
1402{
1403 setup_object_debug(s, page, object);
1404 if (unlikely(s->ctor))
1405 s->ctor(object);
1406}
1407
1408static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1409{
1410 struct page *page;
1411 void *start;
1412 void *last;
1413 void *p;
1414 int order;
1415
1416 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1417
1418 page = allocate_slab(s,
1419 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1420 if (!page)
1421 goto out;
1422
1423 order = compound_order(page);
1424 inc_slabs_node(s, page_to_nid(page), page->objects);
1425 page->slab_cache = s;
1426 __SetPageSlab(page);
1427 if (page->pfmemalloc)
1428 SetPageSlabPfmemalloc(page);
1429
1430 start = page_address(page);
1431
1432 if (unlikely(s->flags & SLAB_POISON))
1433 memset(start, POISON_INUSE, PAGE_SIZE << order);
1434
1435 last = start;
1436 for_each_object(p, s, start, page->objects) {
1437 setup_object(s, page, last);
1438 set_freepointer(s, last, p);
1439 last = p;
1440 }
1441 setup_object(s, page, last);
1442 set_freepointer(s, last, NULL);
1443
1444 page->freelist = start;
1445 page->inuse = page->objects;
1446 page->frozen = 1;
1447out:
1448 return page;
1449}
1450
1451static void __free_slab(struct kmem_cache *s, struct page *page)
1452{
1453 int order = compound_order(page);
1454 int pages = 1 << order;
1455
1456 if (kmem_cache_debug(s)) {
1457 void *p;
1458
1459 slab_pad_check(s, page);
1460 for_each_object(p, s, page_address(page),
1461 page->objects)
1462 check_object(s, page, p, SLUB_RED_INACTIVE);
1463 }
1464
1465 kmemcheck_free_shadow(page, compound_order(page));
1466
1467 mod_zone_page_state(page_zone(page),
1468 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1469 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1470 -pages);
1471
1472 __ClearPageSlabPfmemalloc(page);
1473 __ClearPageSlab(page);
1474
1475 page_mapcount_reset(page);
1476 if (current->reclaim_state)
1477 current->reclaim_state->reclaimed_slab += pages;
1478 __free_pages(page, order);
1479 memcg_uncharge_slab(s, order);
1480}
1481
1482#define need_reserve_slab_rcu \
1483 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1484
1485static void rcu_free_slab(struct rcu_head *h)
1486{
1487 struct page *page;
1488
1489 if (need_reserve_slab_rcu)
1490 page = virt_to_head_page(h);
1491 else
1492 page = container_of((struct list_head *)h, struct page, lru);
1493
1494 __free_slab(page->slab_cache, page);
1495}
1496
1497static void free_slab(struct kmem_cache *s, struct page *page)
1498{
1499 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1500 struct rcu_head *head;
1501
1502 if (need_reserve_slab_rcu) {
1503 int order = compound_order(page);
1504 int offset = (PAGE_SIZE << order) - s->reserved;
1505
1506 VM_BUG_ON(s->reserved != sizeof(*head));
1507 head = page_address(page) + offset;
1508 } else {
1509
1510
1511
1512 head = (void *)&page->lru;
1513 }
1514
1515 call_rcu(head, rcu_free_slab);
1516 } else
1517 __free_slab(s, page);
1518}
1519
1520static void discard_slab(struct kmem_cache *s, struct page *page)
1521{
1522 dec_slabs_node(s, page_to_nid(page), page->objects);
1523 free_slab(s, page);
1524}
1525
1526
1527
1528
1529static inline void
1530__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1531{
1532 n->nr_partial++;
1533 if (tail == DEACTIVATE_TO_TAIL)
1534 list_add_tail(&page->lru, &n->partial);
1535 else
1536 list_add(&page->lru, &n->partial);
1537}
1538
1539static inline void add_partial(struct kmem_cache_node *n,
1540 struct page *page, int tail)
1541{
1542 lockdep_assert_held(&n->list_lock);
1543 __add_partial(n, page, tail);
1544}
1545
1546static inline void
1547__remove_partial(struct kmem_cache_node *n, struct page *page)
1548{
1549 list_del(&page->lru);
1550 n->nr_partial--;
1551}
1552
1553static inline void remove_partial(struct kmem_cache_node *n,
1554 struct page *page)
1555{
1556 lockdep_assert_held(&n->list_lock);
1557 __remove_partial(n, page);
1558}
1559
1560
1561
1562
1563
1564
1565
1566static inline void *acquire_slab(struct kmem_cache *s,
1567 struct kmem_cache_node *n, struct page *page,
1568 int mode, int *objects)
1569{
1570 void *freelist;
1571 unsigned long counters;
1572 struct page new;
1573
1574 lockdep_assert_held(&n->list_lock);
1575
1576
1577
1578
1579
1580
1581 freelist = page->freelist;
1582 counters = page->counters;
1583 new.counters = counters;
1584 *objects = new.objects - new.inuse;
1585 if (mode) {
1586 new.inuse = page->objects;
1587 new.freelist = NULL;
1588 } else {
1589 new.freelist = freelist;
1590 }
1591
1592 VM_BUG_ON(new.frozen);
1593 new.frozen = 1;
1594
1595 if (!__cmpxchg_double_slab(s, page,
1596 freelist, counters,
1597 new.freelist, new.counters,
1598 "acquire_slab"))
1599 return NULL;
1600
1601 remove_partial(n, page);
1602 WARN_ON(!freelist);
1603 return freelist;
1604}
1605
1606static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1607static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1608
1609
1610
1611
1612static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1613 struct kmem_cache_cpu *c, gfp_t flags)
1614{
1615 struct page *page, *page2;
1616 void *object = NULL;
1617 int available = 0;
1618 int objects;
1619
1620
1621
1622
1623
1624
1625
1626 if (!n || !n->nr_partial)
1627 return NULL;
1628
1629 spin_lock(&n->list_lock);
1630 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1631 void *t;
1632
1633 if (!pfmemalloc_match(page, flags))
1634 continue;
1635
1636 t = acquire_slab(s, n, page, object == NULL, &objects);
1637 if (!t)
1638 break;
1639
1640 available += objects;
1641 if (!object) {
1642 c->page = page;
1643 stat(s, ALLOC_FROM_PARTIAL);
1644 object = t;
1645 } else {
1646 put_cpu_partial(s, page, 0);
1647 stat(s, CPU_PARTIAL_NODE);
1648 }
1649 if (!kmem_cache_has_cpu_partial(s)
1650 || available > s->cpu_partial / 2)
1651 break;
1652
1653 }
1654 spin_unlock(&n->list_lock);
1655 return object;
1656}
1657
1658
1659
1660
1661static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1662 struct kmem_cache_cpu *c)
1663{
1664#ifdef CONFIG_NUMA
1665 struct zonelist *zonelist;
1666 struct zoneref *z;
1667 struct zone *zone;
1668 enum zone_type high_zoneidx = gfp_zone(flags);
1669 void *object;
1670 unsigned int cpuset_mems_cookie;
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690 if (!s->remote_node_defrag_ratio ||
1691 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1692 return NULL;
1693
1694 do {
1695 cpuset_mems_cookie = read_mems_allowed_begin();
1696 zonelist = node_zonelist(mempolicy_slab_node(), flags);
1697 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1698 struct kmem_cache_node *n;
1699
1700 n = get_node(s, zone_to_nid(zone));
1701
1702 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1703 n->nr_partial > s->min_partial) {
1704 object = get_partial_node(s, n, c, flags);
1705 if (object) {
1706
1707
1708
1709
1710
1711
1712
1713 return object;
1714 }
1715 }
1716 }
1717 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1718#endif
1719 return NULL;
1720}
1721
1722
1723
1724
1725static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1726 struct kmem_cache_cpu *c)
1727{
1728 void *object;
1729 int searchnode = (node == NUMA_NO_NODE) ? numa_mem_id() : node;
1730
1731 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1732 if (object || node != NUMA_NO_NODE)
1733 return object;
1734
1735 return get_any_partial(s, flags, c);
1736}
1737
1738#ifdef CONFIG_PREEMPT
1739
1740
1741
1742
1743
1744#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1745#else
1746
1747
1748
1749
1750#define TID_STEP 1
1751#endif
1752
1753static inline unsigned long next_tid(unsigned long tid)
1754{
1755 return tid + TID_STEP;
1756}
1757
1758static inline unsigned int tid_to_cpu(unsigned long tid)
1759{
1760 return tid % TID_STEP;
1761}
1762
1763static inline unsigned long tid_to_event(unsigned long tid)
1764{
1765 return tid / TID_STEP;
1766}
1767
1768static inline unsigned int init_tid(int cpu)
1769{
1770 return cpu;
1771}
1772
1773static inline void note_cmpxchg_failure(const char *n,
1774 const struct kmem_cache *s, unsigned long tid)
1775{
1776#ifdef SLUB_DEBUG_CMPXCHG
1777 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1778
1779 pr_info("%s %s: cmpxchg redo ", n, s->name);
1780
1781#ifdef CONFIG_PREEMPT
1782 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1783 pr_warn("due to cpu change %d -> %d\n",
1784 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1785 else
1786#endif
1787 if (tid_to_event(tid) != tid_to_event(actual_tid))
1788 pr_warn("due to cpu running other code. Event %ld->%ld\n",
1789 tid_to_event(tid), tid_to_event(actual_tid));
1790 else
1791 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
1792 actual_tid, tid, next_tid(tid));
1793#endif
1794 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1795}
1796
1797static void init_kmem_cache_cpus(struct kmem_cache *s)
1798{
1799 int cpu;
1800
1801 for_each_possible_cpu(cpu)
1802 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1803}
1804
1805
1806
1807
1808static void deactivate_slab(struct kmem_cache *s, struct page *page,
1809 void *freelist)
1810{
1811 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1812 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1813 int lock = 0;
1814 enum slab_modes l = M_NONE, m = M_NONE;
1815 void *nextfree;
1816 int tail = DEACTIVATE_TO_HEAD;
1817 struct page new;
1818 struct page old;
1819
1820 if (page->freelist) {
1821 stat(s, DEACTIVATE_REMOTE_FREES);
1822 tail = DEACTIVATE_TO_TAIL;
1823 }
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1834 void *prior;
1835 unsigned long counters;
1836
1837 do {
1838 prior = page->freelist;
1839 counters = page->counters;
1840 set_freepointer(s, freelist, prior);
1841 new.counters = counters;
1842 new.inuse--;
1843 VM_BUG_ON(!new.frozen);
1844
1845 } while (!__cmpxchg_double_slab(s, page,
1846 prior, counters,
1847 freelist, new.counters,
1848 "drain percpu freelist"));
1849
1850 freelist = nextfree;
1851 }
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867redo:
1868
1869 old.freelist = page->freelist;
1870 old.counters = page->counters;
1871 VM_BUG_ON(!old.frozen);
1872
1873
1874 new.counters = old.counters;
1875 if (freelist) {
1876 new.inuse--;
1877 set_freepointer(s, freelist, old.freelist);
1878 new.freelist = freelist;
1879 } else
1880 new.freelist = old.freelist;
1881
1882 new.frozen = 0;
1883
1884 if (!new.inuse && n->nr_partial >= s->min_partial)
1885 m = M_FREE;
1886 else if (new.freelist) {
1887 m = M_PARTIAL;
1888 if (!lock) {
1889 lock = 1;
1890
1891
1892
1893
1894
1895 spin_lock(&n->list_lock);
1896 }
1897 } else {
1898 m = M_FULL;
1899 if (kmem_cache_debug(s) && !lock) {
1900 lock = 1;
1901
1902
1903
1904
1905
1906 spin_lock(&n->list_lock);
1907 }
1908 }
1909
1910 if (l != m) {
1911
1912 if (l == M_PARTIAL)
1913
1914 remove_partial(n, page);
1915
1916 else if (l == M_FULL)
1917
1918 remove_full(s, n, page);
1919
1920 if (m == M_PARTIAL) {
1921
1922 add_partial(n, page, tail);
1923 stat(s, tail);
1924
1925 } else if (m == M_FULL) {
1926
1927 stat(s, DEACTIVATE_FULL);
1928 add_full(s, n, page);
1929
1930 }
1931 }
1932
1933 l = m;
1934 if (!__cmpxchg_double_slab(s, page,
1935 old.freelist, old.counters,
1936 new.freelist, new.counters,
1937 "unfreezing slab"))
1938 goto redo;
1939
1940 if (lock)
1941 spin_unlock(&n->list_lock);
1942
1943 if (m == M_FREE) {
1944 stat(s, DEACTIVATE_EMPTY);
1945 discard_slab(s, page);
1946 stat(s, FREE_SLAB);
1947 }
1948}
1949
1950
1951
1952
1953
1954
1955
1956
1957static void unfreeze_partials(struct kmem_cache *s,
1958 struct kmem_cache_cpu *c)
1959{
1960#ifdef CONFIG_SLUB_CPU_PARTIAL
1961 struct kmem_cache_node *n = NULL, *n2 = NULL;
1962 struct page *page, *discard_page = NULL;
1963
1964 while ((page = c->partial)) {
1965 struct page new;
1966 struct page old;
1967
1968 c->partial = page->next;
1969
1970 n2 = get_node(s, page_to_nid(page));
1971 if (n != n2) {
1972 if (n)
1973 spin_unlock(&n->list_lock);
1974
1975 n = n2;
1976 spin_lock(&n->list_lock);
1977 }
1978
1979 do {
1980
1981 old.freelist = page->freelist;
1982 old.counters = page->counters;
1983 VM_BUG_ON(!old.frozen);
1984
1985 new.counters = old.counters;
1986 new.freelist = old.freelist;
1987
1988 new.frozen = 0;
1989
1990 } while (!__cmpxchg_double_slab(s, page,
1991 old.freelist, old.counters,
1992 new.freelist, new.counters,
1993 "unfreezing slab"));
1994
1995 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
1996 page->next = discard_page;
1997 discard_page = page;
1998 } else {
1999 add_partial(n, page, DEACTIVATE_TO_TAIL);
2000 stat(s, FREE_ADD_PARTIAL);
2001 }
2002 }
2003
2004 if (n)
2005 spin_unlock(&n->list_lock);
2006
2007 while (discard_page) {
2008 page = discard_page;
2009 discard_page = discard_page->next;
2010
2011 stat(s, DEACTIVATE_EMPTY);
2012 discard_slab(s, page);
2013 stat(s, FREE_SLAB);
2014 }
2015#endif
2016}
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2028{
2029#ifdef CONFIG_SLUB_CPU_PARTIAL
2030 struct page *oldpage;
2031 int pages;
2032 int pobjects;
2033
2034 do {
2035 pages = 0;
2036 pobjects = 0;
2037 oldpage = this_cpu_read(s->cpu_slab->partial);
2038
2039 if (oldpage) {
2040 pobjects = oldpage->pobjects;
2041 pages = oldpage->pages;
2042 if (drain && pobjects > s->cpu_partial) {
2043 unsigned long flags;
2044
2045
2046
2047
2048 local_irq_save(flags);
2049 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2050 local_irq_restore(flags);
2051 oldpage = NULL;
2052 pobjects = 0;
2053 pages = 0;
2054 stat(s, CPU_PARTIAL_DRAIN);
2055 }
2056 }
2057
2058 pages++;
2059 pobjects += page->objects - page->inuse;
2060
2061 page->pages = pages;
2062 page->pobjects = pobjects;
2063 page->next = oldpage;
2064
2065 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2066 != oldpage);
2067#endif
2068}
2069
2070static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2071{
2072 stat(s, CPUSLAB_FLUSH);
2073 deactivate_slab(s, c->page, c->freelist);
2074
2075 c->tid = next_tid(c->tid);
2076 c->page = NULL;
2077 c->freelist = NULL;
2078}
2079
2080
2081
2082
2083
2084
2085static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2086{
2087 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2088
2089 if (likely(c)) {
2090 if (c->page)
2091 flush_slab(s, c);
2092
2093 unfreeze_partials(s, c);
2094 }
2095}
2096
2097static void flush_cpu_slab(void *d)
2098{
2099 struct kmem_cache *s = d;
2100
2101 __flush_cpu_slab(s, smp_processor_id());
2102}
2103
2104static bool has_cpu_slab(int cpu, void *info)
2105{
2106 struct kmem_cache *s = info;
2107 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2108
2109 return c->page || c->partial;
2110}
2111
2112static void flush_all(struct kmem_cache *s)
2113{
2114 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2115}
2116
2117
2118
2119
2120
2121static inline int node_match(struct page *page, int node)
2122{
2123#ifdef CONFIG_NUMA
2124 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2125 return 0;
2126#endif
2127 return 1;
2128}
2129
2130#ifdef CONFIG_SLUB_DEBUG
2131static int count_free(struct page *page)
2132{
2133 return page->objects - page->inuse;
2134}
2135
2136static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2137{
2138 return atomic_long_read(&n->total_objects);
2139}
2140#endif
2141
2142#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2143static unsigned long count_partial(struct kmem_cache_node *n,
2144 int (*get_count)(struct page *))
2145{
2146 unsigned long flags;
2147 unsigned long x = 0;
2148 struct page *page;
2149
2150 spin_lock_irqsave(&n->list_lock, flags);
2151 list_for_each_entry(page, &n->partial, lru)
2152 x += get_count(page);
2153 spin_unlock_irqrestore(&n->list_lock, flags);
2154 return x;
2155}
2156#endif
2157
2158static noinline void
2159slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2160{
2161#ifdef CONFIG_SLUB_DEBUG
2162 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2163 DEFAULT_RATELIMIT_BURST);
2164 int node;
2165
2166 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2167 return;
2168
2169 pr_warn("SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2170 nid, gfpflags);
2171 pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n",
2172 s->name, s->object_size, s->size, oo_order(s->oo),
2173 oo_order(s->min));
2174
2175 if (oo_order(s->min) > get_order(s->object_size))
2176 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2177 s->name);
2178
2179 for_each_online_node(node) {
2180 struct kmem_cache_node *n = get_node(s, node);
2181 unsigned long nr_slabs;
2182 unsigned long nr_objs;
2183 unsigned long nr_free;
2184
2185 if (!n)
2186 continue;
2187
2188 nr_free = count_partial(n, count_free);
2189 nr_slabs = node_nr_slabs(n);
2190 nr_objs = node_nr_objs(n);
2191
2192 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2193 node, nr_slabs, nr_objs, nr_free);
2194 }
2195#endif
2196}
2197
2198static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2199 int node, struct kmem_cache_cpu **pc)
2200{
2201 void *freelist;
2202 struct kmem_cache_cpu *c = *pc;
2203 struct page *page;
2204
2205 freelist = get_partial(s, flags, node, c);
2206
2207 if (freelist)
2208 return freelist;
2209
2210 page = new_slab(s, flags, node);
2211 if (page) {
2212 c = raw_cpu_ptr(s->cpu_slab);
2213 if (c->page)
2214 flush_slab(s, c);
2215
2216
2217
2218
2219
2220 freelist = page->freelist;
2221 page->freelist = NULL;
2222
2223 stat(s, ALLOC_SLAB);
2224 c->page = page;
2225 *pc = c;
2226 } else
2227 freelist = NULL;
2228
2229 return freelist;
2230}
2231
2232static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2233{
2234 if (unlikely(PageSlabPfmemalloc(page)))
2235 return gfp_pfmemalloc_allowed(gfpflags);
2236
2237 return true;
2238}
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2251{
2252 struct page new;
2253 unsigned long counters;
2254 void *freelist;
2255
2256 do {
2257 freelist = page->freelist;
2258 counters = page->counters;
2259
2260 new.counters = counters;
2261 VM_BUG_ON(!new.frozen);
2262
2263 new.inuse = page->objects;
2264 new.frozen = freelist != NULL;
2265
2266 } while (!__cmpxchg_double_slab(s, page,
2267 freelist, counters,
2268 NULL, new.counters,
2269 "get_freelist"));
2270
2271 return freelist;
2272}
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2291 unsigned long addr, struct kmem_cache_cpu *c)
2292{
2293 void *freelist;
2294 struct page *page;
2295 unsigned long flags;
2296
2297 local_irq_save(flags);
2298#ifdef CONFIG_PREEMPT
2299
2300
2301
2302
2303
2304 c = this_cpu_ptr(s->cpu_slab);
2305#endif
2306
2307 page = c->page;
2308 if (!page)
2309 goto new_slab;
2310redo:
2311
2312 if (unlikely(!node_match(page, node))) {
2313 stat(s, ALLOC_NODE_MISMATCH);
2314 deactivate_slab(s, page, c->freelist);
2315 c->page = NULL;
2316 c->freelist = NULL;
2317 goto new_slab;
2318 }
2319
2320
2321
2322
2323
2324
2325 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2326 deactivate_slab(s, page, c->freelist);
2327 c->page = NULL;
2328 c->freelist = NULL;
2329 goto new_slab;
2330 }
2331
2332
2333 freelist = c->freelist;
2334 if (freelist)
2335 goto load_freelist;
2336
2337 freelist = get_freelist(s, page);
2338
2339 if (!freelist) {
2340 c->page = NULL;
2341 stat(s, DEACTIVATE_BYPASS);
2342 goto new_slab;
2343 }
2344
2345 stat(s, ALLOC_REFILL);
2346
2347load_freelist:
2348
2349
2350
2351
2352
2353 VM_BUG_ON(!c->page->frozen);
2354 c->freelist = get_freepointer(s, freelist);
2355 c->tid = next_tid(c->tid);
2356 local_irq_restore(flags);
2357 return freelist;
2358
2359new_slab:
2360
2361 if (c->partial) {
2362 page = c->page = c->partial;
2363 c->partial = page->next;
2364 stat(s, CPU_PARTIAL_ALLOC);
2365 c->freelist = NULL;
2366 goto redo;
2367 }
2368
2369 freelist = new_slab_objects(s, gfpflags, node, &c);
2370
2371 if (unlikely(!freelist)) {
2372 slab_out_of_memory(s, gfpflags, node);
2373 local_irq_restore(flags);
2374 return NULL;
2375 }
2376
2377 page = c->page;
2378 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2379 goto load_freelist;
2380
2381
2382 if (kmem_cache_debug(s) &&
2383 !alloc_debug_processing(s, page, freelist, addr))
2384 goto new_slab;
2385
2386 deactivate_slab(s, page, get_freepointer(s, freelist));
2387 c->page = NULL;
2388 c->freelist = NULL;
2389 local_irq_restore(flags);
2390 return freelist;
2391}
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2404 gfp_t gfpflags, int node, unsigned long addr)
2405{
2406 void **object;
2407 struct kmem_cache_cpu *c;
2408 struct page *page;
2409 unsigned long tid;
2410
2411 if (slab_pre_alloc_hook(s, gfpflags))
2412 return NULL;
2413
2414 s = memcg_kmem_get_cache(s, gfpflags);
2415redo:
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427 preempt_disable();
2428 c = this_cpu_ptr(s->cpu_slab);
2429
2430
2431
2432
2433
2434
2435
2436 tid = c->tid;
2437 preempt_enable();
2438
2439 object = c->freelist;
2440 page = c->page;
2441 if (unlikely(!object || !node_match(page, node))) {
2442 object = __slab_alloc(s, gfpflags, node, addr, c);
2443 stat(s, ALLOC_SLOWPATH);
2444 } else {
2445 void *next_object = get_freepointer_safe(s, object);
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461 if (unlikely(!this_cpu_cmpxchg_double(
2462 s->cpu_slab->freelist, s->cpu_slab->tid,
2463 object, tid,
2464 next_object, next_tid(tid)))) {
2465
2466 note_cmpxchg_failure("slab_alloc", s, tid);
2467 goto redo;
2468 }
2469 prefetch_freepointer(s, next_object);
2470 stat(s, ALLOC_FASTPATH);
2471 }
2472
2473 if (unlikely(gfpflags & __GFP_ZERO) && object)
2474 memset(object, 0, s->object_size);
2475
2476 slab_post_alloc_hook(s, gfpflags, object);
2477
2478 return object;
2479}
2480
2481static __always_inline void *slab_alloc(struct kmem_cache *s,
2482 gfp_t gfpflags, unsigned long addr)
2483{
2484 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2485}
2486
2487void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2488{
2489 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2490
2491 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2492 s->size, gfpflags);
2493
2494 return ret;
2495}
2496EXPORT_SYMBOL(kmem_cache_alloc);
2497
2498#ifdef CONFIG_TRACING
2499void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2500{
2501 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2502 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2503 return ret;
2504}
2505EXPORT_SYMBOL(kmem_cache_alloc_trace);
2506#endif
2507
2508#ifdef CONFIG_NUMA
2509void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2510{
2511 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2512
2513 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2514 s->object_size, s->size, gfpflags, node);
2515
2516 return ret;
2517}
2518EXPORT_SYMBOL(kmem_cache_alloc_node);
2519
2520#ifdef CONFIG_TRACING
2521void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2522 gfp_t gfpflags,
2523 int node, size_t size)
2524{
2525 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2526
2527 trace_kmalloc_node(_RET_IP_, ret,
2528 size, s->size, gfpflags, node);
2529 return ret;
2530}
2531EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2532#endif
2533#endif
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543static void __slab_free(struct kmem_cache *s, struct page *page,
2544 void *x, unsigned long addr)
2545{
2546 void *prior;
2547 void **object = (void *)x;
2548 int was_frozen;
2549 struct page new;
2550 unsigned long counters;
2551 struct kmem_cache_node *n = NULL;
2552 unsigned long uninitialized_var(flags);
2553
2554 stat(s, FREE_SLOWPATH);
2555
2556 if (kmem_cache_debug(s) &&
2557 !(n = free_debug_processing(s, page, x, addr, &flags)))
2558 return;
2559
2560 do {
2561 if (unlikely(n)) {
2562 spin_unlock_irqrestore(&n->list_lock, flags);
2563 n = NULL;
2564 }
2565 prior = page->freelist;
2566 counters = page->counters;
2567 set_freepointer(s, object, prior);
2568 new.counters = counters;
2569 was_frozen = new.frozen;
2570 new.inuse--;
2571 if ((!new.inuse || !prior) && !was_frozen) {
2572
2573 if (kmem_cache_has_cpu_partial(s) && !prior) {
2574
2575
2576
2577
2578
2579
2580
2581 new.frozen = 1;
2582
2583 } else {
2584
2585 n = get_node(s, page_to_nid(page));
2586
2587
2588
2589
2590
2591
2592
2593
2594 spin_lock_irqsave(&n->list_lock, flags);
2595
2596 }
2597 }
2598
2599 } while (!cmpxchg_double_slab(s, page,
2600 prior, counters,
2601 object, new.counters,
2602 "__slab_free"));
2603
2604 if (likely(!n)) {
2605
2606
2607
2608
2609
2610 if (new.frozen && !was_frozen) {
2611 put_cpu_partial(s, page, 1);
2612 stat(s, CPU_PARTIAL_FREE);
2613 }
2614
2615
2616
2617
2618 if (was_frozen)
2619 stat(s, FREE_FROZEN);
2620 return;
2621 }
2622
2623 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
2624 goto slab_empty;
2625
2626
2627
2628
2629
2630 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2631 if (kmem_cache_debug(s))
2632 remove_full(s, n, page);
2633 add_partial(n, page, DEACTIVATE_TO_TAIL);
2634 stat(s, FREE_ADD_PARTIAL);
2635 }
2636 spin_unlock_irqrestore(&n->list_lock, flags);
2637 return;
2638
2639slab_empty:
2640 if (prior) {
2641
2642
2643
2644 remove_partial(n, page);
2645 stat(s, FREE_REMOVE_PARTIAL);
2646 } else {
2647
2648 remove_full(s, n, page);
2649 }
2650
2651 spin_unlock_irqrestore(&n->list_lock, flags);
2652 stat(s, FREE_SLAB);
2653 discard_slab(s, page);
2654}
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667static __always_inline void slab_free(struct kmem_cache *s,
2668 struct page *page, void *x, unsigned long addr)
2669{
2670 void **object = (void *)x;
2671 struct kmem_cache_cpu *c;
2672 unsigned long tid;
2673
2674 slab_free_hook(s, x);
2675
2676redo:
2677
2678
2679
2680
2681
2682
2683 preempt_disable();
2684 c = this_cpu_ptr(s->cpu_slab);
2685
2686 tid = c->tid;
2687 preempt_enable();
2688
2689 if (likely(page == c->page)) {
2690 set_freepointer(s, object, c->freelist);
2691
2692 if (unlikely(!this_cpu_cmpxchg_double(
2693 s->cpu_slab->freelist, s->cpu_slab->tid,
2694 c->freelist, tid,
2695 object, next_tid(tid)))) {
2696
2697 note_cmpxchg_failure("slab_free", s, tid);
2698 goto redo;
2699 }
2700 stat(s, FREE_FASTPATH);
2701 } else
2702 __slab_free(s, page, x, addr);
2703
2704}
2705
2706void kmem_cache_free(struct kmem_cache *s, void *x)
2707{
2708 s = cache_from_obj(s, x);
2709 if (!s)
2710 return;
2711 slab_free(s, virt_to_head_page(x), x, _RET_IP_);
2712 trace_kmem_cache_free(_RET_IP_, x);
2713}
2714EXPORT_SYMBOL(kmem_cache_free);
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735static int slub_min_order;
2736static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2737static int slub_min_objects;
2738
2739
2740
2741
2742
2743static int slub_nomerge;
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770static inline int slab_order(int size, int min_objects,
2771 int max_order, int fract_leftover, int reserved)
2772{
2773 int order;
2774 int rem;
2775 int min_order = slub_min_order;
2776
2777 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2778 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2779
2780 for (order = max(min_order,
2781 fls(min_objects * size - 1) - PAGE_SHIFT);
2782 order <= max_order; order++) {
2783
2784 unsigned long slab_size = PAGE_SIZE << order;
2785
2786 if (slab_size < min_objects * size + reserved)
2787 continue;
2788
2789 rem = (slab_size - reserved) % size;
2790
2791 if (rem <= slab_size / fract_leftover)
2792 break;
2793
2794 }
2795
2796 return order;
2797}
2798
2799static inline int calculate_order(int size, int reserved)
2800{
2801 int order;
2802 int min_objects;
2803 int fraction;
2804 int max_objects;
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814 min_objects = slub_min_objects;
2815 if (!min_objects)
2816 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2817 max_objects = order_objects(slub_max_order, size, reserved);
2818 min_objects = min(min_objects, max_objects);
2819
2820 while (min_objects > 1) {
2821 fraction = 16;
2822 while (fraction >= 4) {
2823 order = slab_order(size, min_objects,
2824 slub_max_order, fraction, reserved);
2825 if (order <= slub_max_order)
2826 return order;
2827 fraction /= 2;
2828 }
2829 min_objects--;
2830 }
2831
2832
2833
2834
2835
2836 order = slab_order(size, 1, slub_max_order, 1, reserved);
2837 if (order <= slub_max_order)
2838 return order;
2839
2840
2841
2842
2843 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2844 if (order < MAX_ORDER)
2845 return order;
2846 return -ENOSYS;
2847}
2848
2849static void
2850init_kmem_cache_node(struct kmem_cache_node *n)
2851{
2852 n->nr_partial = 0;
2853 spin_lock_init(&n->list_lock);
2854 INIT_LIST_HEAD(&n->partial);
2855#ifdef CONFIG_SLUB_DEBUG
2856 atomic_long_set(&n->nr_slabs, 0);
2857 atomic_long_set(&n->total_objects, 0);
2858 INIT_LIST_HEAD(&n->full);
2859#endif
2860}
2861
2862static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2863{
2864 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2865 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
2866
2867
2868
2869
2870
2871 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2872 2 * sizeof(void *));
2873
2874 if (!s->cpu_slab)
2875 return 0;
2876
2877 init_kmem_cache_cpus(s);
2878
2879 return 1;
2880}
2881
2882static struct kmem_cache *kmem_cache_node;
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893static void early_kmem_cache_node_alloc(int node)
2894{
2895 struct page *page;
2896 struct kmem_cache_node *n;
2897
2898 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2899
2900 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2901
2902 BUG_ON(!page);
2903 if (page_to_nid(page) != node) {
2904 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
2905 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
2906 }
2907
2908 n = page->freelist;
2909 BUG_ON(!n);
2910 page->freelist = get_freepointer(kmem_cache_node, n);
2911 page->inuse = 1;
2912 page->frozen = 0;
2913 kmem_cache_node->node[node] = n;
2914#ifdef CONFIG_SLUB_DEBUG
2915 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2916 init_tracking(kmem_cache_node, n);
2917#endif
2918 init_kmem_cache_node(n);
2919 inc_slabs_node(kmem_cache_node, node, page->objects);
2920
2921
2922
2923
2924
2925 __add_partial(n, page, DEACTIVATE_TO_HEAD);
2926}
2927
2928static void free_kmem_cache_nodes(struct kmem_cache *s)
2929{
2930 int node;
2931
2932 for_each_node_state(node, N_NORMAL_MEMORY) {
2933 struct kmem_cache_node *n = s->node[node];
2934
2935 if (n)
2936 kmem_cache_free(kmem_cache_node, n);
2937
2938 s->node[node] = NULL;
2939 }
2940}
2941
2942static int init_kmem_cache_nodes(struct kmem_cache *s)
2943{
2944 int node;
2945
2946 for_each_node_state(node, N_NORMAL_MEMORY) {
2947 struct kmem_cache_node *n;
2948
2949 if (slab_state == DOWN) {
2950 early_kmem_cache_node_alloc(node);
2951 continue;
2952 }
2953 n = kmem_cache_alloc_node(kmem_cache_node,
2954 GFP_KERNEL, node);
2955
2956 if (!n) {
2957 free_kmem_cache_nodes(s);
2958 return 0;
2959 }
2960
2961 s->node[node] = n;
2962 init_kmem_cache_node(n);
2963 }
2964 return 1;
2965}
2966
2967static void set_min_partial(struct kmem_cache *s, unsigned long min)
2968{
2969 if (min < MIN_PARTIAL)
2970 min = MIN_PARTIAL;
2971 else if (min > MAX_PARTIAL)
2972 min = MAX_PARTIAL;
2973 s->min_partial = min;
2974}
2975
2976
2977
2978
2979
2980static int calculate_sizes(struct kmem_cache *s, int forced_order)
2981{
2982 unsigned long flags = s->flags;
2983 unsigned long size = s->object_size;
2984 int order;
2985
2986
2987
2988
2989
2990
2991 size = ALIGN(size, sizeof(void *));
2992
2993#ifdef CONFIG_SLUB_DEBUG
2994
2995
2996
2997
2998
2999 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
3000 !s->ctor)
3001 s->flags |= __OBJECT_POISON;
3002 else
3003 s->flags &= ~__OBJECT_POISON;
3004
3005
3006
3007
3008
3009
3010
3011 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3012 size += sizeof(void *);
3013#endif
3014
3015
3016
3017
3018
3019 s->inuse = size;
3020
3021 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
3022 s->ctor)) {
3023
3024
3025
3026
3027
3028
3029
3030
3031 s->offset = size;
3032 size += sizeof(void *);
3033 }
3034
3035#ifdef CONFIG_SLUB_DEBUG
3036 if (flags & SLAB_STORE_USER)
3037
3038
3039
3040
3041 size += 2 * sizeof(struct track);
3042
3043 if (flags & SLAB_RED_ZONE)
3044
3045
3046
3047
3048
3049
3050
3051 size += sizeof(void *);
3052#endif
3053
3054
3055
3056
3057
3058
3059 size = ALIGN(size, s->align);
3060 s->size = size;
3061 if (forced_order >= 0)
3062 order = forced_order;
3063 else
3064 order = calculate_order(size, s->reserved);
3065
3066 if (order < 0)
3067 return 0;
3068
3069 s->allocflags = 0;
3070 if (order)
3071 s->allocflags |= __GFP_COMP;
3072
3073 if (s->flags & SLAB_CACHE_DMA)
3074 s->allocflags |= GFP_DMA;
3075
3076 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3077 s->allocflags |= __GFP_RECLAIMABLE;
3078
3079
3080
3081
3082 s->oo = oo_make(order, size, s->reserved);
3083 s->min = oo_make(get_order(size), size, s->reserved);
3084 if (oo_objects(s->oo) > oo_objects(s->max))
3085 s->max = s->oo;
3086
3087 return !!oo_objects(s->oo);
3088}
3089
3090static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3091{
3092 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3093 s->reserved = 0;
3094
3095 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3096 s->reserved = sizeof(struct rcu_head);
3097
3098 if (!calculate_sizes(s, -1))
3099 goto error;
3100 if (disable_higher_order_debug) {
3101
3102
3103
3104
3105 if (get_order(s->size) > get_order(s->object_size)) {
3106 s->flags &= ~DEBUG_METADATA_FLAGS;
3107 s->offset = 0;
3108 if (!calculate_sizes(s, -1))
3109 goto error;
3110 }
3111 }
3112
3113#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3114 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3115 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3116
3117 s->flags |= __CMPXCHG_DOUBLE;
3118#endif
3119
3120
3121
3122
3123
3124 set_min_partial(s, ilog2(s->size) / 2);
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143 if (!kmem_cache_has_cpu_partial(s))
3144 s->cpu_partial = 0;
3145 else if (s->size >= PAGE_SIZE)
3146 s->cpu_partial = 2;
3147 else if (s->size >= 1024)
3148 s->cpu_partial = 6;
3149 else if (s->size >= 256)
3150 s->cpu_partial = 13;
3151 else
3152 s->cpu_partial = 30;
3153
3154#ifdef CONFIG_NUMA
3155 s->remote_node_defrag_ratio = 1000;
3156#endif
3157 if (!init_kmem_cache_nodes(s))
3158 goto error;
3159
3160 if (alloc_kmem_cache_cpus(s))
3161 return 0;
3162
3163 free_kmem_cache_nodes(s);
3164error:
3165 if (flags & SLAB_PANIC)
3166 panic("Cannot create slab %s size=%lu realsize=%u "
3167 "order=%u offset=%u flags=%lx\n",
3168 s->name, (unsigned long)s->size, s->size,
3169 oo_order(s->oo), s->offset, flags);
3170 return -EINVAL;
3171}
3172
3173static void list_slab_objects(struct kmem_cache *s, struct page *page,
3174 const char *text)
3175{
3176#ifdef CONFIG_SLUB_DEBUG
3177 void *addr = page_address(page);
3178 void *p;
3179 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3180 sizeof(long), GFP_ATOMIC);
3181 if (!map)
3182 return;
3183 slab_err(s, page, text, s->name);
3184 slab_lock(page);
3185
3186 get_map(s, page, map);
3187 for_each_object(p, s, addr, page->objects) {
3188
3189 if (!test_bit(slab_index(p, s, addr), map)) {
3190 pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr);
3191 print_tracking(s, p);
3192 }
3193 }
3194 slab_unlock(page);
3195 kfree(map);
3196#endif
3197}
3198
3199
3200
3201
3202
3203
3204static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3205{
3206 struct page *page, *h;
3207
3208 list_for_each_entry_safe(page, h, &n->partial, lru) {
3209 if (!page->inuse) {
3210 __remove_partial(n, page);
3211 discard_slab(s, page);
3212 } else {
3213 list_slab_objects(s, page,
3214 "Objects remaining in %s on kmem_cache_close()");
3215 }
3216 }
3217}
3218
3219
3220
3221
3222static inline int kmem_cache_close(struct kmem_cache *s)
3223{
3224 int node;
3225
3226 flush_all(s);
3227
3228 for_each_node_state(node, N_NORMAL_MEMORY) {
3229 struct kmem_cache_node *n = get_node(s, node);
3230
3231 free_partial(s, n);
3232 if (n->nr_partial || slabs_node(s, node))
3233 return 1;
3234 }
3235 free_percpu(s->cpu_slab);
3236 free_kmem_cache_nodes(s);
3237 return 0;
3238}
3239
3240int __kmem_cache_shutdown(struct kmem_cache *s)
3241{
3242 return kmem_cache_close(s);
3243}
3244
3245
3246
3247
3248
3249static int __init setup_slub_min_order(char *str)
3250{
3251 get_option(&str, &slub_min_order);
3252
3253 return 1;
3254}
3255
3256__setup("slub_min_order=", setup_slub_min_order);
3257
3258static int __init setup_slub_max_order(char *str)
3259{
3260 get_option(&str, &slub_max_order);
3261 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3262
3263 return 1;
3264}
3265
3266__setup("slub_max_order=", setup_slub_max_order);
3267
3268static int __init setup_slub_min_objects(char *str)
3269{
3270 get_option(&str, &slub_min_objects);
3271
3272 return 1;
3273}
3274
3275__setup("slub_min_objects=", setup_slub_min_objects);
3276
3277static int __init setup_slub_nomerge(char *str)
3278{
3279 slub_nomerge = 1;
3280 return 1;
3281}
3282
3283__setup("slub_nomerge", setup_slub_nomerge);
3284
3285void *__kmalloc(size_t size, gfp_t flags)
3286{
3287 struct kmem_cache *s;
3288 void *ret;
3289
3290 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3291 return kmalloc_large(size, flags);
3292
3293 s = kmalloc_slab(size, flags);
3294
3295 if (unlikely(ZERO_OR_NULL_PTR(s)))
3296 return s;
3297
3298 ret = slab_alloc(s, flags, _RET_IP_);
3299
3300 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3301
3302 return ret;
3303}
3304EXPORT_SYMBOL(__kmalloc);
3305
3306#ifdef CONFIG_NUMA
3307static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3308{
3309 struct page *page;
3310 void *ptr = NULL;
3311
3312 flags |= __GFP_COMP | __GFP_NOTRACK;
3313 page = alloc_kmem_pages_node(node, flags, get_order(size));
3314 if (page)
3315 ptr = page_address(page);
3316
3317 kmalloc_large_node_hook(ptr, size, flags);
3318 return ptr;
3319}
3320
3321void *__kmalloc_node(size_t size, gfp_t flags, int node)
3322{
3323 struct kmem_cache *s;
3324 void *ret;
3325
3326 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3327 ret = kmalloc_large_node(size, flags, node);
3328
3329 trace_kmalloc_node(_RET_IP_, ret,
3330 size, PAGE_SIZE << get_order(size),
3331 flags, node);
3332
3333 return ret;
3334 }
3335
3336 s = kmalloc_slab(size, flags);
3337
3338 if (unlikely(ZERO_OR_NULL_PTR(s)))
3339 return s;
3340
3341 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3342
3343 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3344
3345 return ret;
3346}
3347EXPORT_SYMBOL(__kmalloc_node);
3348#endif
3349
3350size_t ksize(const void *object)
3351{
3352 struct page *page;
3353
3354 if (unlikely(object == ZERO_SIZE_PTR))
3355 return 0;
3356
3357 page = virt_to_head_page(object);
3358
3359 if (unlikely(!PageSlab(page))) {
3360 WARN_ON(!PageCompound(page));
3361 return PAGE_SIZE << compound_order(page);
3362 }
3363
3364 return slab_ksize(page->slab_cache);
3365}
3366EXPORT_SYMBOL(ksize);
3367
3368void kfree(const void *x)
3369{
3370 struct page *page;
3371 void *object = (void *)x;
3372
3373 trace_kfree(_RET_IP_, x);
3374
3375 if (unlikely(ZERO_OR_NULL_PTR(x)))
3376 return;
3377
3378 page = virt_to_head_page(x);
3379 if (unlikely(!PageSlab(page))) {
3380 BUG_ON(!PageCompound(page));
3381 kfree_hook(x);
3382 __free_kmem_pages(page, compound_order(page));
3383 return;
3384 }
3385 slab_free(page->slab_cache, page, object, _RET_IP_);
3386}
3387EXPORT_SYMBOL(kfree);
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399int __kmem_cache_shrink(struct kmem_cache *s)
3400{
3401 int node;
3402 int i;
3403 struct kmem_cache_node *n;
3404 struct page *page;
3405 struct page *t;
3406 int objects = oo_objects(s->max);
3407 struct list_head *slabs_by_inuse =
3408 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3409 unsigned long flags;
3410
3411 if (!slabs_by_inuse)
3412 return -ENOMEM;
3413
3414 flush_all(s);
3415 for_each_node_state(node, N_NORMAL_MEMORY) {
3416 n = get_node(s, node);
3417
3418 if (!n->nr_partial)
3419 continue;
3420
3421 for (i = 0; i < objects; i++)
3422 INIT_LIST_HEAD(slabs_by_inuse + i);
3423
3424 spin_lock_irqsave(&n->list_lock, flags);
3425
3426
3427
3428
3429
3430
3431
3432 list_for_each_entry_safe(page, t, &n->partial, lru) {
3433 list_move(&page->lru, slabs_by_inuse + page->inuse);
3434 if (!page->inuse)
3435 n->nr_partial--;
3436 }
3437
3438
3439
3440
3441
3442 for (i = objects - 1; i > 0; i--)
3443 list_splice(slabs_by_inuse + i, n->partial.prev);
3444
3445 spin_unlock_irqrestore(&n->list_lock, flags);
3446
3447
3448 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3449 discard_slab(s, page);
3450 }
3451
3452 kfree(slabs_by_inuse);
3453 return 0;
3454}
3455
3456static int slab_mem_going_offline_callback(void *arg)
3457{
3458 struct kmem_cache *s;
3459
3460 mutex_lock(&slab_mutex);
3461 list_for_each_entry(s, &slab_caches, list)
3462 __kmem_cache_shrink(s);
3463 mutex_unlock(&slab_mutex);
3464
3465 return 0;
3466}
3467
3468static void slab_mem_offline_callback(void *arg)
3469{
3470 struct kmem_cache_node *n;
3471 struct kmem_cache *s;
3472 struct memory_notify *marg = arg;
3473 int offline_node;
3474
3475 offline_node = marg->status_change_nid_normal;
3476
3477
3478
3479
3480
3481 if (offline_node < 0)
3482 return;
3483
3484 mutex_lock(&slab_mutex);
3485 list_for_each_entry(s, &slab_caches, list) {
3486 n = get_node(s, offline_node);
3487 if (n) {
3488
3489
3490
3491
3492
3493
3494 BUG_ON(slabs_node(s, offline_node));
3495
3496 s->node[offline_node] = NULL;
3497 kmem_cache_free(kmem_cache_node, n);
3498 }
3499 }
3500 mutex_unlock(&slab_mutex);
3501}
3502
3503static int slab_mem_going_online_callback(void *arg)
3504{
3505 struct kmem_cache_node *n;
3506 struct kmem_cache *s;
3507 struct memory_notify *marg = arg;
3508 int nid = marg->status_change_nid_normal;
3509 int ret = 0;
3510
3511
3512
3513
3514
3515 if (nid < 0)
3516 return 0;
3517
3518
3519
3520
3521
3522
3523 mutex_lock(&slab_mutex);
3524 list_for_each_entry(s, &slab_caches, list) {
3525
3526
3527
3528
3529
3530 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3531 if (!n) {
3532 ret = -ENOMEM;
3533 goto out;
3534 }
3535 init_kmem_cache_node(n);
3536 s->node[nid] = n;
3537 }
3538out:
3539 mutex_unlock(&slab_mutex);
3540 return ret;
3541}
3542
3543static int slab_memory_callback(struct notifier_block *self,
3544 unsigned long action, void *arg)
3545{
3546 int ret = 0;
3547
3548 switch (action) {
3549 case MEM_GOING_ONLINE:
3550 ret = slab_mem_going_online_callback(arg);
3551 break;
3552 case MEM_GOING_OFFLINE:
3553 ret = slab_mem_going_offline_callback(arg);
3554 break;
3555 case MEM_OFFLINE:
3556 case MEM_CANCEL_ONLINE:
3557 slab_mem_offline_callback(arg);
3558 break;
3559 case MEM_ONLINE:
3560 case MEM_CANCEL_OFFLINE:
3561 break;
3562 }
3563 if (ret)
3564 ret = notifier_from_errno(ret);
3565 else
3566 ret = NOTIFY_OK;
3567 return ret;
3568}
3569
3570static struct notifier_block slab_memory_callback_nb = {
3571 .notifier_call = slab_memory_callback,
3572 .priority = SLAB_CALLBACK_PRI,
3573};
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3586{
3587 int node;
3588 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
3589
3590 memcpy(s, static_cache, kmem_cache->object_size);
3591
3592
3593
3594
3595
3596
3597 __flush_cpu_slab(s, smp_processor_id());
3598 for_each_node_state(node, N_NORMAL_MEMORY) {
3599 struct kmem_cache_node *n = get_node(s, node);
3600 struct page *p;
3601
3602 if (n) {
3603 list_for_each_entry(p, &n->partial, lru)
3604 p->slab_cache = s;
3605
3606#ifdef CONFIG_SLUB_DEBUG
3607 list_for_each_entry(p, &n->full, lru)
3608 p->slab_cache = s;
3609#endif
3610 }
3611 }
3612 list_add(&s->list, &slab_caches);
3613 return s;
3614}
3615
3616void __init kmem_cache_init(void)
3617{
3618 static __initdata struct kmem_cache boot_kmem_cache,
3619 boot_kmem_cache_node;
3620
3621 if (debug_guardpage_minorder())
3622 slub_max_order = 0;
3623
3624 kmem_cache_node = &boot_kmem_cache_node;
3625 kmem_cache = &boot_kmem_cache;
3626
3627 create_boot_cache(kmem_cache_node, "kmem_cache_node",
3628 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
3629
3630 register_hotmemory_notifier(&slab_memory_callback_nb);
3631
3632
3633 slab_state = PARTIAL;
3634
3635 create_boot_cache(kmem_cache, "kmem_cache",
3636 offsetof(struct kmem_cache, node) +
3637 nr_node_ids * sizeof(struct kmem_cache_node *),
3638 SLAB_HWCACHE_ALIGN);
3639
3640 kmem_cache = bootstrap(&boot_kmem_cache);
3641
3642
3643
3644
3645
3646
3647 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
3648
3649
3650 create_kmalloc_caches(0);
3651
3652#ifdef CONFIG_SMP
3653 register_cpu_notifier(&slab_notifier);
3654#endif
3655
3656 pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
3657 cache_line_size(),
3658 slub_min_order, slub_max_order, slub_min_objects,
3659 nr_cpu_ids, nr_node_ids);
3660}
3661
3662void __init kmem_cache_init_late(void)
3663{
3664}
3665
3666
3667
3668
3669static int slab_unmergeable(struct kmem_cache *s)
3670{
3671 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3672 return 1;
3673
3674 if (!is_root_cache(s))
3675 return 1;
3676
3677 if (s->ctor)
3678 return 1;
3679
3680
3681
3682
3683 if (s->refcount < 0)
3684 return 1;
3685
3686 return 0;
3687}
3688
3689static struct kmem_cache *find_mergeable(size_t size, size_t align,
3690 unsigned long flags, const char *name, void (*ctor)(void *))
3691{
3692 struct kmem_cache *s;
3693
3694 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3695 return NULL;
3696
3697 if (ctor)
3698 return NULL;
3699
3700 size = ALIGN(size, sizeof(void *));
3701 align = calculate_alignment(flags, align, size);
3702 size = ALIGN(size, align);
3703 flags = kmem_cache_flags(size, flags, name, NULL);
3704
3705 list_for_each_entry(s, &slab_caches, list) {
3706 if (slab_unmergeable(s))
3707 continue;
3708
3709 if (size > s->size)
3710 continue;
3711
3712 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3713 continue;
3714
3715
3716
3717
3718 if ((s->size & ~(align - 1)) != s->size)
3719 continue;
3720
3721 if (s->size - size >= sizeof(void *))
3722 continue;
3723
3724 return s;
3725 }
3726 return NULL;
3727}
3728
3729struct kmem_cache *
3730__kmem_cache_alias(const char *name, size_t size, size_t align,
3731 unsigned long flags, void (*ctor)(void *))
3732{
3733 struct kmem_cache *s;
3734
3735 s = find_mergeable(size, align, flags, name, ctor);
3736 if (s) {
3737 int i;
3738 struct kmem_cache *c;
3739
3740 s->refcount++;
3741
3742
3743
3744
3745
3746 s->object_size = max(s->object_size, (int)size);
3747 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3748
3749 for_each_memcg_cache_index(i) {
3750 c = cache_from_memcg_idx(s, i);
3751 if (!c)
3752 continue;
3753 c->object_size = s->object_size;
3754 c->inuse = max_t(int, c->inuse,
3755 ALIGN(size, sizeof(void *)));
3756 }
3757
3758 if (sysfs_slab_alias(s, name)) {
3759 s->refcount--;
3760 s = NULL;
3761 }
3762 }
3763
3764 return s;
3765}
3766
3767int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
3768{
3769 int err;
3770
3771 err = kmem_cache_open(s, flags);
3772 if (err)
3773 return err;
3774
3775
3776 if (slab_state <= UP)
3777 return 0;
3778
3779 memcg_propagate_slab_attrs(s);
3780 err = sysfs_slab_add(s);
3781 if (err)
3782 kmem_cache_close(s);
3783
3784 return err;
3785}
3786
3787#ifdef CONFIG_SMP
3788
3789
3790
3791
3792static int slab_cpuup_callback(struct notifier_block *nfb,
3793 unsigned long action, void *hcpu)
3794{
3795 long cpu = (long)hcpu;
3796 struct kmem_cache *s;
3797 unsigned long flags;
3798
3799 switch (action) {
3800 case CPU_UP_CANCELED:
3801 case CPU_UP_CANCELED_FROZEN:
3802 case CPU_DEAD:
3803 case CPU_DEAD_FROZEN:
3804 mutex_lock(&slab_mutex);
3805 list_for_each_entry(s, &slab_caches, list) {
3806 local_irq_save(flags);
3807 __flush_cpu_slab(s, cpu);
3808 local_irq_restore(flags);
3809 }
3810 mutex_unlock(&slab_mutex);
3811 break;
3812 default:
3813 break;
3814 }
3815 return NOTIFY_OK;
3816}
3817
3818static struct notifier_block slab_notifier = {
3819 .notifier_call = slab_cpuup_callback
3820};
3821
3822#endif
3823
3824void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3825{
3826 struct kmem_cache *s;
3827 void *ret;
3828
3829 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3830 return kmalloc_large(size, gfpflags);
3831
3832 s = kmalloc_slab(size, gfpflags);
3833
3834 if (unlikely(ZERO_OR_NULL_PTR(s)))
3835 return s;
3836
3837 ret = slab_alloc(s, gfpflags, caller);
3838
3839
3840 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3841
3842 return ret;
3843}
3844
3845#ifdef CONFIG_NUMA
3846void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3847 int node, unsigned long caller)
3848{
3849 struct kmem_cache *s;
3850 void *ret;
3851
3852 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3853 ret = kmalloc_large_node(size, gfpflags, node);
3854
3855 trace_kmalloc_node(caller, ret,
3856 size, PAGE_SIZE << get_order(size),
3857 gfpflags, node);
3858
3859 return ret;
3860 }
3861
3862 s = kmalloc_slab(size, gfpflags);
3863
3864 if (unlikely(ZERO_OR_NULL_PTR(s)))
3865 return s;
3866
3867 ret = slab_alloc_node(s, gfpflags, node, caller);
3868
3869
3870 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3871
3872 return ret;
3873}
3874#endif
3875
3876#ifdef CONFIG_SYSFS
3877static int count_inuse(struct page *page)
3878{
3879 return page->inuse;
3880}
3881
3882static int count_total(struct page *page)
3883{
3884 return page->objects;
3885}
3886#endif
3887
3888#ifdef CONFIG_SLUB_DEBUG
3889static int validate_slab(struct kmem_cache *s, struct page *page,
3890 unsigned long *map)
3891{
3892 void *p;
3893 void *addr = page_address(page);
3894
3895 if (!check_slab(s, page) ||
3896 !on_freelist(s, page, NULL))
3897 return 0;
3898
3899
3900 bitmap_zero(map, page->objects);
3901
3902 get_map(s, page, map);
3903 for_each_object(p, s, addr, page->objects) {
3904 if (test_bit(slab_index(p, s, addr), map))
3905 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
3906 return 0;
3907 }
3908
3909 for_each_object(p, s, addr, page->objects)
3910 if (!test_bit(slab_index(p, s, addr), map))
3911 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
3912 return 0;
3913 return 1;
3914}
3915
3916static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3917 unsigned long *map)
3918{
3919 slab_lock(page);
3920 validate_slab(s, page, map);
3921 slab_unlock(page);
3922}
3923
3924static int validate_slab_node(struct kmem_cache *s,
3925 struct kmem_cache_node *n, unsigned long *map)
3926{
3927 unsigned long count = 0;
3928 struct page *page;
3929 unsigned long flags;
3930
3931 spin_lock_irqsave(&n->list_lock, flags);
3932
3933 list_for_each_entry(page, &n->partial, lru) {
3934 validate_slab_slab(s, page, map);
3935 count++;
3936 }
3937 if (count != n->nr_partial)
3938 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
3939 s->name, count, n->nr_partial);
3940
3941 if (!(s->flags & SLAB_STORE_USER))
3942 goto out;
3943
3944 list_for_each_entry(page, &n->full, lru) {
3945 validate_slab_slab(s, page, map);
3946 count++;
3947 }
3948 if (count != atomic_long_read(&n->nr_slabs))
3949 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
3950 s->name, count, atomic_long_read(&n->nr_slabs));
3951
3952out:
3953 spin_unlock_irqrestore(&n->list_lock, flags);
3954 return count;
3955}
3956
3957static long validate_slab_cache(struct kmem_cache *s)
3958{
3959 int node;
3960 unsigned long count = 0;
3961 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3962 sizeof(unsigned long), GFP_KERNEL);
3963
3964 if (!map)
3965 return -ENOMEM;
3966
3967 flush_all(s);
3968 for_each_node_state(node, N_NORMAL_MEMORY) {
3969 struct kmem_cache_node *n = get_node(s, node);
3970
3971 count += validate_slab_node(s, n, map);
3972 }
3973 kfree(map);
3974 return count;
3975}
3976
3977
3978
3979
3980
3981struct location {
3982 unsigned long count;
3983 unsigned long addr;
3984 long long sum_time;
3985 long min_time;
3986 long max_time;
3987 long min_pid;
3988 long max_pid;
3989 DECLARE_BITMAP(cpus, NR_CPUS);
3990 nodemask_t nodes;
3991};
3992
3993struct loc_track {
3994 unsigned long max;
3995 unsigned long count;
3996 struct location *loc;
3997};
3998
3999static void free_loc_track(struct loc_track *t)
4000{
4001 if (t->max)
4002 free_pages((unsigned long)t->loc,
4003 get_order(sizeof(struct location) * t->max));
4004}
4005
4006static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4007{
4008 struct location *l;
4009 int order;
4010
4011 order = get_order(sizeof(struct location) * max);
4012
4013 l = (void *)__get_free_pages(flags, order);
4014 if (!l)
4015 return 0;
4016
4017 if (t->count) {
4018 memcpy(l, t->loc, sizeof(struct location) * t->count);
4019 free_loc_track(t);
4020 }
4021 t->max = max;
4022 t->loc = l;
4023 return 1;
4024}
4025
4026static int add_location(struct loc_track *t, struct kmem_cache *s,
4027 const struct track *track)
4028{
4029 long start, end, pos;
4030 struct location *l;
4031 unsigned long caddr;
4032 unsigned long age = jiffies - track->when;
4033
4034 start = -1;
4035 end = t->count;
4036
4037 for ( ; ; ) {
4038 pos = start + (end - start + 1) / 2;
4039
4040
4041
4042
4043
4044 if (pos == end)
4045 break;
4046
4047 caddr = t->loc[pos].addr;
4048 if (track->addr == caddr) {
4049
4050 l = &t->loc[pos];
4051 l->count++;
4052 if (track->when) {
4053 l->sum_time += age;
4054 if (age < l->min_time)
4055 l->min_time = age;
4056 if (age > l->max_time)
4057 l->max_time = age;
4058
4059 if (track->pid < l->min_pid)
4060 l->min_pid = track->pid;
4061 if (track->pid > l->max_pid)
4062 l->max_pid = track->pid;
4063
4064 cpumask_set_cpu(track->cpu,
4065 to_cpumask(l->cpus));
4066 }
4067 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4068 return 1;
4069 }
4070
4071 if (track->addr < caddr)
4072 end = pos;
4073 else
4074 start = pos;
4075 }
4076
4077
4078
4079
4080 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4081 return 0;
4082
4083 l = t->loc + pos;
4084 if (pos < t->count)
4085 memmove(l + 1, l,
4086 (t->count - pos) * sizeof(struct location));
4087 t->count++;
4088 l->count = 1;
4089 l->addr = track->addr;
4090 l->sum_time = age;
4091 l->min_time = age;
4092 l->max_time = age;
4093 l->min_pid = track->pid;
4094 l->max_pid = track->pid;
4095 cpumask_clear(to_cpumask(l->cpus));
4096 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4097 nodes_clear(l->nodes);
4098 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4099 return 1;
4100}
4101
4102static void process_slab(struct loc_track *t, struct kmem_cache *s,
4103 struct page *page, enum track_item alloc,
4104 unsigned long *map)
4105{
4106 void *addr = page_address(page);
4107 void *p;
4108
4109 bitmap_zero(map, page->objects);
4110 get_map(s, page, map);
4111
4112 for_each_object(p, s, addr, page->objects)
4113 if (!test_bit(slab_index(p, s, addr), map))
4114 add_location(t, s, get_track(s, p, alloc));
4115}
4116
4117static int list_locations(struct kmem_cache *s, char *buf,
4118 enum track_item alloc)
4119{
4120 int len = 0;
4121 unsigned long i;
4122 struct loc_track t = { 0, 0, NULL };
4123 int node;
4124 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4125 sizeof(unsigned long), GFP_KERNEL);
4126
4127 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4128 GFP_TEMPORARY)) {
4129 kfree(map);
4130 return sprintf(buf, "Out of memory\n");
4131 }
4132
4133 flush_all(s);
4134
4135 for_each_node_state(node, N_NORMAL_MEMORY) {
4136 struct kmem_cache_node *n = get_node(s, node);
4137 unsigned long flags;
4138 struct page *page;
4139
4140 if (!atomic_long_read(&n->nr_slabs))
4141 continue;
4142
4143 spin_lock_irqsave(&n->list_lock, flags);
4144 list_for_each_entry(page, &n->partial, lru)
4145 process_slab(&t, s, page, alloc, map);
4146 list_for_each_entry(page, &n->full, lru)
4147 process_slab(&t, s, page, alloc, map);
4148 spin_unlock_irqrestore(&n->list_lock, flags);
4149 }
4150
4151 for (i = 0; i < t.count; i++) {
4152 struct location *l = &t.loc[i];
4153
4154 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4155 break;
4156 len += sprintf(buf + len, "%7ld ", l->count);
4157
4158 if (l->addr)
4159 len += sprintf(buf + len, "%pS", (void *)l->addr);
4160 else
4161 len += sprintf(buf + len, "<not-available>");
4162
4163 if (l->sum_time != l->min_time) {
4164 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4165 l->min_time,
4166 (long)div_u64(l->sum_time, l->count),
4167 l->max_time);
4168 } else
4169 len += sprintf(buf + len, " age=%ld",
4170 l->min_time);
4171
4172 if (l->min_pid != l->max_pid)
4173 len += sprintf(buf + len, " pid=%ld-%ld",
4174 l->min_pid, l->max_pid);
4175 else
4176 len += sprintf(buf + len, " pid=%ld",
4177 l->min_pid);
4178
4179 if (num_online_cpus() > 1 &&
4180 !cpumask_empty(to_cpumask(l->cpus)) &&
4181 len < PAGE_SIZE - 60) {
4182 len += sprintf(buf + len, " cpus=");
4183 len += cpulist_scnprintf(buf + len,
4184 PAGE_SIZE - len - 50,
4185 to_cpumask(l->cpus));
4186 }
4187
4188 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4189 len < PAGE_SIZE - 60) {
4190 len += sprintf(buf + len, " nodes=");
4191 len += nodelist_scnprintf(buf + len,
4192 PAGE_SIZE - len - 50,
4193 l->nodes);
4194 }
4195
4196 len += sprintf(buf + len, "\n");
4197 }
4198
4199 free_loc_track(&t);
4200 kfree(map);
4201 if (!t.count)
4202 len += sprintf(buf, "No data\n");
4203 return len;
4204}
4205#endif
4206
4207#ifdef SLUB_RESILIENCY_TEST
4208static void resiliency_test(void)
4209{
4210 u8 *p;
4211
4212 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4213
4214 pr_err("SLUB resiliency testing\n");
4215 pr_err("-----------------------\n");
4216 pr_err("A. Corruption after allocation\n");
4217
4218 p = kzalloc(16, GFP_KERNEL);
4219 p[16] = 0x12;
4220 pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
4221 p + 16);
4222
4223 validate_slab_cache(kmalloc_caches[4]);
4224
4225
4226 p = kzalloc(32, GFP_KERNEL);
4227 p[32 + sizeof(void *)] = 0x34;
4228 pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n",
4229 p);
4230 pr_err("If allocated object is overwritten then not detectable\n\n");
4231
4232 validate_slab_cache(kmalloc_caches[5]);
4233 p = kzalloc(64, GFP_KERNEL);
4234 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4235 *p = 0x56;
4236 pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4237 p);
4238 pr_err("If allocated object is overwritten then not detectable\n\n");
4239 validate_slab_cache(kmalloc_caches[6]);
4240
4241 pr_err("\nB. Corruption after free\n");
4242 p = kzalloc(128, GFP_KERNEL);
4243 kfree(p);
4244 *p = 0x78;
4245 pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4246 validate_slab_cache(kmalloc_caches[7]);
4247
4248 p = kzalloc(256, GFP_KERNEL);
4249 kfree(p);
4250 p[50] = 0x9a;
4251 pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
4252 validate_slab_cache(kmalloc_caches[8]);
4253
4254 p = kzalloc(512, GFP_KERNEL);
4255 kfree(p);
4256 p[512] = 0xab;
4257 pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4258 validate_slab_cache(kmalloc_caches[9]);
4259}
4260#else
4261#ifdef CONFIG_SYSFS
4262static void resiliency_test(void) {};
4263#endif
4264#endif
4265
4266#ifdef CONFIG_SYSFS
4267enum slab_stat_type {
4268 SL_ALL,
4269 SL_PARTIAL,
4270 SL_CPU,
4271 SL_OBJECTS,
4272 SL_TOTAL
4273};
4274
4275#define SO_ALL (1 << SL_ALL)
4276#define SO_PARTIAL (1 << SL_PARTIAL)
4277#define SO_CPU (1 << SL_CPU)
4278#define SO_OBJECTS (1 << SL_OBJECTS)
4279#define SO_TOTAL (1 << SL_TOTAL)
4280
4281static ssize_t show_slab_objects(struct kmem_cache *s,
4282 char *buf, unsigned long flags)
4283{
4284 unsigned long total = 0;
4285 int node;
4286 int x;
4287 unsigned long *nodes;
4288
4289 nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4290 if (!nodes)
4291 return -ENOMEM;
4292
4293 if (flags & SO_CPU) {
4294 int cpu;
4295
4296 for_each_possible_cpu(cpu) {
4297 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4298 cpu);
4299 int node;
4300 struct page *page;
4301
4302 page = ACCESS_ONCE(c->page);
4303 if (!page)
4304 continue;
4305
4306 node = page_to_nid(page);
4307 if (flags & SO_TOTAL)
4308 x = page->objects;
4309 else if (flags & SO_OBJECTS)
4310 x = page->inuse;
4311 else
4312 x = 1;
4313
4314 total += x;
4315 nodes[node] += x;
4316
4317 page = ACCESS_ONCE(c->partial);
4318 if (page) {
4319 node = page_to_nid(page);
4320 if (flags & SO_TOTAL)
4321 WARN_ON_ONCE(1);
4322 else if (flags & SO_OBJECTS)
4323 WARN_ON_ONCE(1);
4324 else
4325 x = page->pages;
4326 total += x;
4327 nodes[node] += x;
4328 }
4329 }
4330 }
4331
4332 get_online_mems();
4333#ifdef CONFIG_SLUB_DEBUG
4334 if (flags & SO_ALL) {
4335 for_each_node_state(node, N_NORMAL_MEMORY) {
4336 struct kmem_cache_node *n = get_node(s, node);
4337
4338 if (flags & SO_TOTAL)
4339 x = atomic_long_read(&n->total_objects);
4340 else if (flags & SO_OBJECTS)
4341 x = atomic_long_read(&n->total_objects) -
4342 count_partial(n, count_free);
4343 else
4344 x = atomic_long_read(&n->nr_slabs);
4345 total += x;
4346 nodes[node] += x;
4347 }
4348
4349 } else
4350#endif
4351 if (flags & SO_PARTIAL) {
4352 for_each_node_state(node, N_NORMAL_MEMORY) {
4353 struct kmem_cache_node *n = get_node(s, node);
4354
4355 if (flags & SO_TOTAL)
4356 x = count_partial(n, count_total);
4357 else if (flags & SO_OBJECTS)
4358 x = count_partial(n, count_inuse);
4359 else
4360 x = n->nr_partial;
4361 total += x;
4362 nodes[node] += x;
4363 }
4364 }
4365 x = sprintf(buf, "%lu", total);
4366#ifdef CONFIG_NUMA
4367 for_each_node_state(node, N_NORMAL_MEMORY)
4368 if (nodes[node])
4369 x += sprintf(buf + x, " N%d=%lu",
4370 node, nodes[node]);
4371#endif
4372 put_online_mems();
4373 kfree(nodes);
4374 return x + sprintf(buf + x, "\n");
4375}
4376
4377#ifdef CONFIG_SLUB_DEBUG
4378static int any_slab_objects(struct kmem_cache *s)
4379{
4380 int node;
4381
4382 for_each_online_node(node) {
4383 struct kmem_cache_node *n = get_node(s, node);
4384
4385 if (!n)
4386 continue;
4387
4388 if (atomic_long_read(&n->total_objects))
4389 return 1;
4390 }
4391 return 0;
4392}
4393#endif
4394
4395#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4396#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4397
4398struct slab_attribute {
4399 struct attribute attr;
4400 ssize_t (*show)(struct kmem_cache *s, char *buf);
4401 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4402};
4403
4404#define SLAB_ATTR_RO(_name) \
4405 static struct slab_attribute _name##_attr = \
4406 __ATTR(_name, 0400, _name##_show, NULL)
4407
4408#define SLAB_ATTR(_name) \
4409 static struct slab_attribute _name##_attr = \
4410 __ATTR(_name, 0600, _name##_show, _name##_store)
4411
4412static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4413{
4414 return sprintf(buf, "%d\n", s->size);
4415}
4416SLAB_ATTR_RO(slab_size);
4417
4418static ssize_t align_show(struct kmem_cache *s, char *buf)
4419{
4420 return sprintf(buf, "%d\n", s->align);
4421}
4422SLAB_ATTR_RO(align);
4423
4424static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4425{
4426 return sprintf(buf, "%d\n", s->object_size);
4427}
4428SLAB_ATTR_RO(object_size);
4429
4430static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4431{
4432 return sprintf(buf, "%d\n", oo_objects(s->oo));
4433}
4434SLAB_ATTR_RO(objs_per_slab);
4435
4436static ssize_t order_store(struct kmem_cache *s,
4437 const char *buf, size_t length)
4438{
4439 unsigned long order;
4440 int err;
4441
4442 err = kstrtoul(buf, 10, &order);
4443 if (err)
4444 return err;
4445
4446 if (order > slub_max_order || order < slub_min_order)
4447 return -EINVAL;
4448
4449 calculate_sizes(s, order);
4450 return length;
4451}
4452
4453static ssize_t order_show(struct kmem_cache *s, char *buf)
4454{
4455 return sprintf(buf, "%d\n", oo_order(s->oo));
4456}
4457SLAB_ATTR(order);
4458
4459static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4460{
4461 return sprintf(buf, "%lu\n", s->min_partial);
4462}
4463
4464static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4465 size_t length)
4466{
4467 unsigned long min;
4468 int err;
4469
4470 err = kstrtoul(buf, 10, &min);
4471 if (err)
4472 return err;
4473
4474 set_min_partial(s, min);
4475 return length;
4476}
4477SLAB_ATTR(min_partial);
4478
4479static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4480{
4481 return sprintf(buf, "%u\n", s->cpu_partial);
4482}
4483
4484static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4485 size_t length)
4486{
4487 unsigned long objects;
4488 int err;
4489
4490 err = kstrtoul(buf, 10, &objects);
4491 if (err)
4492 return err;
4493 if (objects && !kmem_cache_has_cpu_partial(s))
4494 return -EINVAL;
4495
4496 s->cpu_partial = objects;
4497 flush_all(s);
4498 return length;
4499}
4500SLAB_ATTR(cpu_partial);
4501
4502static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4503{
4504 if (!s->ctor)
4505 return 0;
4506 return sprintf(buf, "%pS\n", s->ctor);
4507}
4508SLAB_ATTR_RO(ctor);
4509
4510static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4511{
4512 return sprintf(buf, "%d\n", s->refcount - 1);
4513}
4514SLAB_ATTR_RO(aliases);
4515
4516static ssize_t partial_show(struct kmem_cache *s, char *buf)
4517{
4518 return show_slab_objects(s, buf, SO_PARTIAL);
4519}
4520SLAB_ATTR_RO(partial);
4521
4522static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4523{
4524 return show_slab_objects(s, buf, SO_CPU);
4525}
4526SLAB_ATTR_RO(cpu_slabs);
4527
4528static ssize_t objects_show(struct kmem_cache *s, char *buf)
4529{
4530 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4531}
4532SLAB_ATTR_RO(objects);
4533
4534static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4535{
4536 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4537}
4538SLAB_ATTR_RO(objects_partial);
4539
4540static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4541{
4542 int objects = 0;
4543 int pages = 0;
4544 int cpu;
4545 int len;
4546
4547 for_each_online_cpu(cpu) {
4548 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4549
4550 if (page) {
4551 pages += page->pages;
4552 objects += page->pobjects;
4553 }
4554 }
4555
4556 len = sprintf(buf, "%d(%d)", objects, pages);
4557
4558#ifdef CONFIG_SMP
4559 for_each_online_cpu(cpu) {
4560 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4561
4562 if (page && len < PAGE_SIZE - 20)
4563 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4564 page->pobjects, page->pages);
4565 }
4566#endif
4567 return len + sprintf(buf + len, "\n");
4568}
4569SLAB_ATTR_RO(slabs_cpu_partial);
4570
4571static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4572{
4573 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4574}
4575
4576static ssize_t reclaim_account_store(struct kmem_cache *s,
4577 const char *buf, size_t length)
4578{
4579 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4580 if (buf[0] == '1')
4581 s->flags |= SLAB_RECLAIM_ACCOUNT;
4582 return length;
4583}
4584SLAB_ATTR(reclaim_account);
4585
4586static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4587{
4588 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4589}
4590SLAB_ATTR_RO(hwcache_align);
4591
4592#ifdef CONFIG_ZONE_DMA
4593static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4594{
4595 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4596}
4597SLAB_ATTR_RO(cache_dma);
4598#endif
4599
4600static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4601{
4602 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4603}
4604SLAB_ATTR_RO(destroy_by_rcu);
4605
4606static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4607{
4608 return sprintf(buf, "%d\n", s->reserved);
4609}
4610SLAB_ATTR_RO(reserved);
4611
4612#ifdef CONFIG_SLUB_DEBUG
4613static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4614{
4615 return show_slab_objects(s, buf, SO_ALL);
4616}
4617SLAB_ATTR_RO(slabs);
4618
4619static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4620{
4621 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4622}
4623SLAB_ATTR_RO(total_objects);
4624
4625static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4626{
4627 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4628}
4629
4630static ssize_t sanity_checks_store(struct kmem_cache *s,
4631 const char *buf, size_t length)
4632{
4633 s->flags &= ~SLAB_DEBUG_FREE;
4634 if (buf[0] == '1') {
4635 s->flags &= ~__CMPXCHG_DOUBLE;
4636 s->flags |= SLAB_DEBUG_FREE;
4637 }
4638 return length;
4639}
4640SLAB_ATTR(sanity_checks);
4641
4642static ssize_t trace_show(struct kmem_cache *s, char *buf)
4643{
4644 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4645}
4646
4647static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4648 size_t length)
4649{
4650 s->flags &= ~SLAB_TRACE;
4651 if (buf[0] == '1') {
4652 s->flags &= ~__CMPXCHG_DOUBLE;
4653 s->flags |= SLAB_TRACE;
4654 }
4655 return length;
4656}
4657SLAB_ATTR(trace);
4658
4659static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4660{
4661 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4662}
4663
4664static ssize_t red_zone_store(struct kmem_cache *s,
4665 const char *buf, size_t length)
4666{
4667 if (any_slab_objects(s))
4668 return -EBUSY;
4669
4670 s->flags &= ~SLAB_RED_ZONE;
4671 if (buf[0] == '1') {
4672 s->flags &= ~__CMPXCHG_DOUBLE;
4673 s->flags |= SLAB_RED_ZONE;
4674 }
4675 calculate_sizes(s, -1);
4676 return length;
4677}
4678SLAB_ATTR(red_zone);
4679
4680static ssize_t poison_show(struct kmem_cache *s, char *buf)
4681{
4682 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4683}
4684
4685static ssize_t poison_store(struct kmem_cache *s,
4686 const char *buf, size_t length)
4687{
4688 if (any_slab_objects(s))
4689 return -EBUSY;
4690
4691 s->flags &= ~SLAB_POISON;
4692 if (buf[0] == '1') {
4693 s->flags &= ~__CMPXCHG_DOUBLE;
4694 s->flags |= SLAB_POISON;
4695 }
4696 calculate_sizes(s, -1);
4697 return length;
4698}
4699SLAB_ATTR(poison);
4700
4701static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4702{
4703 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4704}
4705
4706static ssize_t store_user_store(struct kmem_cache *s,
4707 const char *buf, size_t length)
4708{
4709 if (any_slab_objects(s))
4710 return -EBUSY;
4711
4712 s->flags &= ~SLAB_STORE_USER;
4713 if (buf[0] == '1') {
4714 s->flags &= ~__CMPXCHG_DOUBLE;
4715 s->flags |= SLAB_STORE_USER;
4716 }
4717 calculate_sizes(s, -1);
4718 return length;
4719}
4720SLAB_ATTR(store_user);
4721
4722static ssize_t validate_show(struct kmem_cache *s, char *buf)
4723{
4724 return 0;
4725}
4726
4727static ssize_t validate_store(struct kmem_cache *s,
4728 const char *buf, size_t length)
4729{
4730 int ret = -EINVAL;
4731
4732 if (buf[0] == '1') {
4733 ret = validate_slab_cache(s);
4734 if (ret >= 0)
4735 ret = length;
4736 }
4737 return ret;
4738}
4739SLAB_ATTR(validate);
4740
4741static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4742{
4743 if (!(s->flags & SLAB_STORE_USER))
4744 return -ENOSYS;
4745 return list_locations(s, buf, TRACK_ALLOC);
4746}
4747SLAB_ATTR_RO(alloc_calls);
4748
4749static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4750{
4751 if (!(s->flags & SLAB_STORE_USER))
4752 return -ENOSYS;
4753 return list_locations(s, buf, TRACK_FREE);
4754}
4755SLAB_ATTR_RO(free_calls);
4756#endif
4757
4758#ifdef CONFIG_FAILSLAB
4759static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4760{
4761 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4762}
4763
4764static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4765 size_t length)
4766{
4767 s->flags &= ~SLAB_FAILSLAB;
4768 if (buf[0] == '1')
4769 s->flags |= SLAB_FAILSLAB;
4770 return length;
4771}
4772SLAB_ATTR(failslab);
4773#endif
4774
4775static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4776{
4777 return 0;
4778}
4779
4780static ssize_t shrink_store(struct kmem_cache *s,
4781 const char *buf, size_t length)
4782{
4783 if (buf[0] == '1') {
4784 int rc = kmem_cache_shrink(s);
4785
4786 if (rc)
4787 return rc;
4788 } else
4789 return -EINVAL;
4790 return length;
4791}
4792SLAB_ATTR(shrink);
4793
4794#ifdef CONFIG_NUMA
4795static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4796{
4797 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4798}
4799
4800static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4801 const char *buf, size_t length)
4802{
4803 unsigned long ratio;
4804 int err;
4805
4806 err = kstrtoul(buf, 10, &ratio);
4807 if (err)
4808 return err;
4809
4810 if (ratio <= 100)
4811 s->remote_node_defrag_ratio = ratio * 10;
4812
4813 return length;
4814}
4815SLAB_ATTR(remote_node_defrag_ratio);
4816#endif
4817
4818#ifdef CONFIG_SLUB_STATS
4819static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4820{
4821 unsigned long sum = 0;
4822 int cpu;
4823 int len;
4824 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4825
4826 if (!data)
4827 return -ENOMEM;
4828
4829 for_each_online_cpu(cpu) {
4830 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4831
4832 data[cpu] = x;
4833 sum += x;
4834 }
4835
4836 len = sprintf(buf, "%lu", sum);
4837
4838#ifdef CONFIG_SMP
4839 for_each_online_cpu(cpu) {
4840 if (data[cpu] && len < PAGE_SIZE - 20)
4841 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4842 }
4843#endif
4844 kfree(data);
4845 return len + sprintf(buf + len, "\n");
4846}
4847
4848static void clear_stat(struct kmem_cache *s, enum stat_item si)
4849{
4850 int cpu;
4851
4852 for_each_online_cpu(cpu)
4853 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4854}
4855
4856#define STAT_ATTR(si, text) \
4857static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4858{ \
4859 return show_stat(s, buf, si); \
4860} \
4861static ssize_t text##_store(struct kmem_cache *s, \
4862 const char *buf, size_t length) \
4863{ \
4864 if (buf[0] != '0') \
4865 return -EINVAL; \
4866 clear_stat(s, si); \
4867 return length; \
4868} \
4869SLAB_ATTR(text); \
4870
4871STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4872STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4873STAT_ATTR(FREE_FASTPATH, free_fastpath);
4874STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4875STAT_ATTR(FREE_FROZEN, free_frozen);
4876STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4877STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4878STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4879STAT_ATTR(ALLOC_SLAB, alloc_slab);
4880STAT_ATTR(ALLOC_REFILL, alloc_refill);
4881STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
4882STAT_ATTR(FREE_SLAB, free_slab);
4883STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4884STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4885STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4886STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4887STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4888STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4889STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
4890STAT_ATTR(ORDER_FALLBACK, order_fallback);
4891STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
4892STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
4893STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
4894STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
4895STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
4896STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
4897#endif
4898
4899static struct attribute *slab_attrs[] = {
4900 &slab_size_attr.attr,
4901 &object_size_attr.attr,
4902 &objs_per_slab_attr.attr,
4903 &order_attr.attr,
4904 &min_partial_attr.attr,
4905 &cpu_partial_attr.attr,
4906 &objects_attr.attr,
4907 &objects_partial_attr.attr,
4908 &partial_attr.attr,
4909 &cpu_slabs_attr.attr,
4910 &ctor_attr.attr,
4911 &aliases_attr.attr,
4912 &align_attr.attr,
4913 &hwcache_align_attr.attr,
4914 &reclaim_account_attr.attr,
4915 &destroy_by_rcu_attr.attr,
4916 &shrink_attr.attr,
4917 &reserved_attr.attr,
4918 &slabs_cpu_partial_attr.attr,
4919#ifdef CONFIG_SLUB_DEBUG
4920 &total_objects_attr.attr,
4921 &slabs_attr.attr,
4922 &sanity_checks_attr.attr,
4923 &trace_attr.attr,
4924 &red_zone_attr.attr,
4925 &poison_attr.attr,
4926 &store_user_attr.attr,
4927 &validate_attr.attr,
4928 &alloc_calls_attr.attr,
4929 &free_calls_attr.attr,
4930#endif
4931#ifdef CONFIG_ZONE_DMA
4932 &cache_dma_attr.attr,
4933#endif
4934#ifdef CONFIG_NUMA
4935 &remote_node_defrag_ratio_attr.attr,
4936#endif
4937#ifdef CONFIG_SLUB_STATS
4938 &alloc_fastpath_attr.attr,
4939 &alloc_slowpath_attr.attr,
4940 &free_fastpath_attr.attr,
4941 &free_slowpath_attr.attr,
4942 &free_frozen_attr.attr,
4943 &free_add_partial_attr.attr,
4944 &free_remove_partial_attr.attr,
4945 &alloc_from_partial_attr.attr,
4946 &alloc_slab_attr.attr,
4947 &alloc_refill_attr.attr,
4948 &alloc_node_mismatch_attr.attr,
4949 &free_slab_attr.attr,
4950 &cpuslab_flush_attr.attr,
4951 &deactivate_full_attr.attr,
4952 &deactivate_empty_attr.attr,
4953 &deactivate_to_head_attr.attr,
4954 &deactivate_to_tail_attr.attr,
4955 &deactivate_remote_frees_attr.attr,
4956 &deactivate_bypass_attr.attr,
4957 &order_fallback_attr.attr,
4958 &cmpxchg_double_fail_attr.attr,
4959 &cmpxchg_double_cpu_fail_attr.attr,
4960 &cpu_partial_alloc_attr.attr,
4961 &cpu_partial_free_attr.attr,
4962 &cpu_partial_node_attr.attr,
4963 &cpu_partial_drain_attr.attr,
4964#endif
4965#ifdef CONFIG_FAILSLAB
4966 &failslab_attr.attr,
4967#endif
4968
4969 NULL
4970};
4971
4972static struct attribute_group slab_attr_group = {
4973 .attrs = slab_attrs,
4974};
4975
4976static ssize_t slab_attr_show(struct kobject *kobj,
4977 struct attribute *attr,
4978 char *buf)
4979{
4980 struct slab_attribute *attribute;
4981 struct kmem_cache *s;
4982 int err;
4983
4984 attribute = to_slab_attr(attr);
4985 s = to_slab(kobj);
4986
4987 if (!attribute->show)
4988 return -EIO;
4989
4990 err = attribute->show(s, buf);
4991
4992 return err;
4993}
4994
4995static ssize_t slab_attr_store(struct kobject *kobj,
4996 struct attribute *attr,
4997 const char *buf, size_t len)
4998{
4999 struct slab_attribute *attribute;
5000 struct kmem_cache *s;
5001 int err;
5002
5003 attribute = to_slab_attr(attr);
5004 s = to_slab(kobj);
5005
5006 if (!attribute->store)
5007 return -EIO;
5008
5009 err = attribute->store(s, buf, len);
5010#ifdef CONFIG_MEMCG_KMEM
5011 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5012 int i;
5013
5014 mutex_lock(&slab_mutex);
5015 if (s->max_attr_size < len)
5016 s->max_attr_size = len;
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035 for_each_memcg_cache_index(i) {
5036 struct kmem_cache *c = cache_from_memcg_idx(s, i);
5037 if (c)
5038 attribute->store(c, buf, len);
5039 }
5040 mutex_unlock(&slab_mutex);
5041 }
5042#endif
5043 return err;
5044}
5045
5046static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5047{
5048#ifdef CONFIG_MEMCG_KMEM
5049 int i;
5050 char *buffer = NULL;
5051 struct kmem_cache *root_cache;
5052
5053 if (is_root_cache(s))
5054 return;
5055
5056 root_cache = s->memcg_params->root_cache;
5057
5058
5059
5060
5061
5062 if (!root_cache->max_attr_size)
5063 return;
5064
5065 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5066 char mbuf[64];
5067 char *buf;
5068 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5069
5070 if (!attr || !attr->store || !attr->show)
5071 continue;
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082 if (buffer)
5083 buf = buffer;
5084 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5085 buf = mbuf;
5086 else {
5087 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5088 if (WARN_ON(!buffer))
5089 continue;
5090 buf = buffer;
5091 }
5092
5093 attr->show(root_cache, buf);
5094 attr->store(s, buf, strlen(buf));
5095 }
5096
5097 if (buffer)
5098 free_page((unsigned long)buffer);
5099#endif
5100}
5101
5102static void kmem_cache_release(struct kobject *k)
5103{
5104 slab_kmem_cache_release(to_slab(k));
5105}
5106
5107static const struct sysfs_ops slab_sysfs_ops = {
5108 .show = slab_attr_show,
5109 .store = slab_attr_store,
5110};
5111
5112static struct kobj_type slab_ktype = {
5113 .sysfs_ops = &slab_sysfs_ops,
5114 .release = kmem_cache_release,
5115};
5116
5117static int uevent_filter(struct kset *kset, struct kobject *kobj)
5118{
5119 struct kobj_type *ktype = get_ktype(kobj);
5120
5121 if (ktype == &slab_ktype)
5122 return 1;
5123 return 0;
5124}
5125
5126static const struct kset_uevent_ops slab_uevent_ops = {
5127 .filter = uevent_filter,
5128};
5129
5130static struct kset *slab_kset;
5131
5132static inline struct kset *cache_kset(struct kmem_cache *s)
5133{
5134#ifdef CONFIG_MEMCG_KMEM
5135 if (!is_root_cache(s))
5136 return s->memcg_params->root_cache->memcg_kset;
5137#endif
5138 return slab_kset;
5139}
5140
5141#define ID_STR_LENGTH 64
5142
5143
5144
5145
5146
5147static char *create_unique_id(struct kmem_cache *s)
5148{
5149 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5150 char *p = name;
5151
5152 BUG_ON(!name);
5153
5154 *p++ = ':';
5155
5156
5157
5158
5159
5160
5161
5162 if (s->flags & SLAB_CACHE_DMA)
5163 *p++ = 'd';
5164 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5165 *p++ = 'a';
5166 if (s->flags & SLAB_DEBUG_FREE)
5167 *p++ = 'F';
5168 if (!(s->flags & SLAB_NOTRACK))
5169 *p++ = 't';
5170 if (p != name + 1)
5171 *p++ = '-';
5172 p += sprintf(p, "%07d", s->size);
5173
5174#ifdef CONFIG_MEMCG_KMEM
5175 if (!is_root_cache(s))
5176 p += sprintf(p, "-%08d",
5177 memcg_cache_id(s->memcg_params->memcg));
5178#endif
5179
5180 BUG_ON(p > name + ID_STR_LENGTH - 1);
5181 return name;
5182}
5183
5184static int sysfs_slab_add(struct kmem_cache *s)
5185{
5186 int err;
5187 const char *name;
5188 int unmergeable = slab_unmergeable(s);
5189
5190 if (unmergeable) {
5191
5192
5193
5194
5195
5196 sysfs_remove_link(&slab_kset->kobj, s->name);
5197 name = s->name;
5198 } else {
5199
5200
5201
5202
5203 name = create_unique_id(s);
5204 }
5205
5206 s->kobj.kset = cache_kset(s);
5207 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5208 if (err)
5209 goto out_put_kobj;
5210
5211 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5212 if (err)
5213 goto out_del_kobj;
5214
5215#ifdef CONFIG_MEMCG_KMEM
5216 if (is_root_cache(s)) {
5217 s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj);
5218 if (!s->memcg_kset) {
5219 err = -ENOMEM;
5220 goto out_del_kobj;
5221 }
5222 }
5223#endif
5224
5225 kobject_uevent(&s->kobj, KOBJ_ADD);
5226 if (!unmergeable) {
5227
5228 sysfs_slab_alias(s, s->name);
5229 }
5230out:
5231 if (!unmergeable)
5232 kfree(name);
5233 return err;
5234out_del_kobj:
5235 kobject_del(&s->kobj);
5236out_put_kobj:
5237 kobject_put(&s->kobj);
5238 goto out;
5239}
5240
5241void sysfs_slab_remove(struct kmem_cache *s)
5242{
5243 if (slab_state < FULL)
5244
5245
5246
5247
5248 return;
5249
5250#ifdef CONFIG_MEMCG_KMEM
5251 kset_unregister(s->memcg_kset);
5252#endif
5253 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5254 kobject_del(&s->kobj);
5255 kobject_put(&s->kobj);
5256}
5257
5258
5259
5260
5261
5262struct saved_alias {
5263 struct kmem_cache *s;
5264 const char *name;
5265 struct saved_alias *next;
5266};
5267
5268static struct saved_alias *alias_list;
5269
5270static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5271{
5272 struct saved_alias *al;
5273
5274 if (slab_state == FULL) {
5275
5276
5277
5278 sysfs_remove_link(&slab_kset->kobj, name);
5279 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5280 }
5281
5282 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5283 if (!al)
5284 return -ENOMEM;
5285
5286 al->s = s;
5287 al->name = name;
5288 al->next = alias_list;
5289 alias_list = al;
5290 return 0;
5291}
5292
5293static int __init slab_sysfs_init(void)
5294{
5295 struct kmem_cache *s;
5296 int err;
5297
5298 mutex_lock(&slab_mutex);
5299
5300 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5301 if (!slab_kset) {
5302 mutex_unlock(&slab_mutex);
5303 pr_err("Cannot register slab subsystem.\n");
5304 return -ENOSYS;
5305 }
5306
5307 slab_state = FULL;
5308
5309 list_for_each_entry(s, &slab_caches, list) {
5310 err = sysfs_slab_add(s);
5311 if (err)
5312 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
5313 s->name);
5314 }
5315
5316 while (alias_list) {
5317 struct saved_alias *al = alias_list;
5318
5319 alias_list = alias_list->next;
5320 err = sysfs_slab_alias(al->s, al->name);
5321 if (err)
5322 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
5323 al->name);
5324 kfree(al);
5325 }
5326
5327 mutex_unlock(&slab_mutex);
5328 resiliency_test();
5329 return 0;
5330}
5331
5332__initcall(slab_sysfs_init);
5333#endif
5334
5335
5336
5337
5338#ifdef CONFIG_SLABINFO
5339void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5340{
5341 unsigned long nr_slabs = 0;
5342 unsigned long nr_objs = 0;
5343 unsigned long nr_free = 0;
5344 int node;
5345
5346 for_each_online_node(node) {
5347 struct kmem_cache_node *n = get_node(s, node);
5348
5349 if (!n)
5350 continue;
5351
5352 nr_slabs += node_nr_slabs(n);
5353 nr_objs += node_nr_objs(n);
5354 nr_free += count_partial(n, count_free);
5355 }
5356
5357 sinfo->active_objs = nr_objs - nr_free;
5358 sinfo->num_objs = nr_objs;
5359 sinfo->active_slabs = nr_slabs;
5360 sinfo->num_slabs = nr_slabs;
5361 sinfo->objects_per_slab = oo_objects(s->oo);
5362 sinfo->cache_order = oo_order(s->oo);
5363}
5364
5365void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5366{
5367}
5368
5369ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5370 size_t count, loff_t *ppos)
5371{
5372 return -EIO;
5373}
5374#endif
5375