1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/notifier.h>
22#include <linux/seq_file.h>
23#include <linux/kmemcheck.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/memory.h>
31#include <linux/math64.h>
32#include <linux/fault-inject.h>
33#include <linux/stacktrace.h>
34#include <linux/prefetch.h>
35#include <linux/memcontrol.h>
36
37#include <trace/events/kmem.h>
38
39#include "internal.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117static inline int kmem_cache_debug(struct kmem_cache *s)
118{
119#ifdef CONFIG_SLUB_DEBUG
120 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
121#else
122 return 0;
123#endif
124}
125
126static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
127{
128#ifdef CONFIG_SLUB_CPU_PARTIAL
129 return !kmem_cache_debug(s);
130#else
131 return false;
132#endif
133}
134
135
136
137
138
139
140
141
142
143
144#undef SLUB_RESILIENCY_TEST
145
146
147#undef SLUB_DEBUG_CMPXCHG
148
149
150
151
152
153#define MIN_PARTIAL 5
154
155
156
157
158
159
160#define MAX_PARTIAL 10
161
162#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
163 SLAB_POISON | SLAB_STORE_USER)
164
165
166
167
168
169
170#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
171
172
173
174
175#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
176 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
177 SLAB_FAILSLAB)
178
179#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
180 SLAB_CACHE_DMA | SLAB_NOTRACK)
181
182#define OO_SHIFT 16
183#define OO_MASK ((1 << OO_SHIFT) - 1)
184#define MAX_OBJS_PER_PAGE 32767
185
186
187#define __OBJECT_POISON 0x80000000UL
188#define __CMPXCHG_DOUBLE 0x40000000UL
189
190#ifdef CONFIG_SMP
191static struct notifier_block slab_notifier;
192#endif
193
194
195
196
197#define TRACK_ADDRS_COUNT 16
198struct track {
199 unsigned long addr;
200#ifdef CONFIG_STACKTRACE
201 unsigned long addrs[TRACK_ADDRS_COUNT];
202#endif
203 int cpu;
204 int pid;
205 unsigned long when;
206};
207
208enum track_item { TRACK_ALLOC, TRACK_FREE };
209
210#ifdef CONFIG_SYSFS
211static int sysfs_slab_add(struct kmem_cache *);
212static int sysfs_slab_alias(struct kmem_cache *, const char *);
213static void sysfs_slab_remove(struct kmem_cache *);
214static void memcg_propagate_slab_attrs(struct kmem_cache *s);
215#else
216static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
217static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
218 { return 0; }
219static inline void sysfs_slab_remove(struct kmem_cache *s) { }
220
221static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
222#endif
223
224static inline void stat(const struct kmem_cache *s, enum stat_item si)
225{
226#ifdef CONFIG_SLUB_STATS
227 __this_cpu_inc(s->cpu_slab->stat[si]);
228#endif
229}
230
231
232
233
234
235static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
236{
237 return s->node[node];
238}
239
240
241static inline int check_valid_pointer(struct kmem_cache *s,
242 struct page *page, const void *object)
243{
244 void *base;
245
246 if (!object)
247 return 1;
248
249 base = page_address(page);
250 if (object < base || object >= base + page->objects * s->size ||
251 (object - base) % s->size) {
252 return 0;
253 }
254
255 return 1;
256}
257
258static inline void *get_freepointer(struct kmem_cache *s, void *object)
259{
260 return *(void **)(object + s->offset);
261}
262
263static void prefetch_freepointer(const struct kmem_cache *s, void *object)
264{
265 prefetch(object + s->offset);
266}
267
268static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
269{
270 void *p;
271
272#ifdef CONFIG_DEBUG_PAGEALLOC
273 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
274#else
275 p = get_freepointer(s, object);
276#endif
277 return p;
278}
279
280static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
281{
282 *(void **)(object + s->offset) = fp;
283}
284
285
286#define for_each_object(__p, __s, __addr, __objects) \
287 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
288 __p += (__s)->size)
289
290
291static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
292{
293 return (p - addr) / s->size;
294}
295
296static inline size_t slab_ksize(const struct kmem_cache *s)
297{
298#ifdef CONFIG_SLUB_DEBUG
299
300
301
302
303 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
304 return s->object_size;
305
306#endif
307
308
309
310
311
312 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
313 return s->inuse;
314
315
316
317 return s->size;
318}
319
320static inline int order_objects(int order, unsigned long size, int reserved)
321{
322 return ((PAGE_SIZE << order) - reserved) / size;
323}
324
325static inline struct kmem_cache_order_objects oo_make(int order,
326 unsigned long size, int reserved)
327{
328 struct kmem_cache_order_objects x = {
329 (order << OO_SHIFT) + order_objects(order, size, reserved)
330 };
331
332 return x;
333}
334
335static inline int oo_order(struct kmem_cache_order_objects x)
336{
337 return x.x >> OO_SHIFT;
338}
339
340static inline int oo_objects(struct kmem_cache_order_objects x)
341{
342 return x.x & OO_MASK;
343}
344
345
346
347
348static __always_inline void slab_lock(struct page *page)
349{
350 bit_spin_lock(PG_locked, &page->flags);
351}
352
353static __always_inline void slab_unlock(struct page *page)
354{
355 __bit_spin_unlock(PG_locked, &page->flags);
356}
357
358
359static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
360 void *freelist_old, unsigned long counters_old,
361 void *freelist_new, unsigned long counters_new,
362 const char *n)
363{
364 VM_BUG_ON(!irqs_disabled());
365#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
366 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
367 if (s->flags & __CMPXCHG_DOUBLE) {
368 if (cmpxchg_double(&page->freelist, &page->counters,
369 freelist_old, counters_old,
370 freelist_new, counters_new))
371 return 1;
372 } else
373#endif
374 {
375 slab_lock(page);
376 if (page->freelist == freelist_old && page->counters == counters_old) {
377 page->freelist = freelist_new;
378 page->counters = counters_new;
379 slab_unlock(page);
380 return 1;
381 }
382 slab_unlock(page);
383 }
384
385 cpu_relax();
386 stat(s, CMPXCHG_DOUBLE_FAIL);
387
388#ifdef SLUB_DEBUG_CMPXCHG
389 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
390#endif
391
392 return 0;
393}
394
395static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
396 void *freelist_old, unsigned long counters_old,
397 void *freelist_new, unsigned long counters_new,
398 const char *n)
399{
400#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
401 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
402 if (s->flags & __CMPXCHG_DOUBLE) {
403 if (cmpxchg_double(&page->freelist, &page->counters,
404 freelist_old, counters_old,
405 freelist_new, counters_new))
406 return 1;
407 } else
408#endif
409 {
410 unsigned long flags;
411
412 local_irq_save(flags);
413 slab_lock(page);
414 if (page->freelist == freelist_old && page->counters == counters_old) {
415 page->freelist = freelist_new;
416 page->counters = counters_new;
417 slab_unlock(page);
418 local_irq_restore(flags);
419 return 1;
420 }
421 slab_unlock(page);
422 local_irq_restore(flags);
423 }
424
425 cpu_relax();
426 stat(s, CMPXCHG_DOUBLE_FAIL);
427
428#ifdef SLUB_DEBUG_CMPXCHG
429 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
430#endif
431
432 return 0;
433}
434
435#ifdef CONFIG_SLUB_DEBUG
436
437
438
439
440
441
442static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
443{
444 void *p;
445 void *addr = page_address(page);
446
447 for (p = page->freelist; p; p = get_freepointer(s, p))
448 set_bit(slab_index(p, s, addr), map);
449}
450
451
452
453
454#ifdef CONFIG_SLUB_DEBUG_ON
455static int slub_debug = DEBUG_DEFAULT_FLAGS;
456#else
457static int slub_debug;
458#endif
459
460static char *slub_debug_slabs;
461static int disable_higher_order_debug;
462
463
464
465
466static void print_section(char *text, u8 *addr, unsigned int length)
467{
468 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
469 length, 1);
470}
471
472static struct track *get_track(struct kmem_cache *s, void *object,
473 enum track_item alloc)
474{
475 struct track *p;
476
477 if (s->offset)
478 p = object + s->offset + sizeof(void *);
479 else
480 p = object + s->inuse;
481
482 return p + alloc;
483}
484
485static void set_track(struct kmem_cache *s, void *object,
486 enum track_item alloc, unsigned long addr)
487{
488 struct track *p = get_track(s, object, alloc);
489
490 if (addr) {
491#ifdef CONFIG_STACKTRACE
492 struct stack_trace trace;
493 int i;
494
495 trace.nr_entries = 0;
496 trace.max_entries = TRACK_ADDRS_COUNT;
497 trace.entries = p->addrs;
498 trace.skip = 3;
499 save_stack_trace(&trace);
500
501
502 if (trace.nr_entries != 0 &&
503 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
504 trace.nr_entries--;
505
506 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
507 p->addrs[i] = 0;
508#endif
509 p->addr = addr;
510 p->cpu = smp_processor_id();
511 p->pid = current->pid;
512 p->when = jiffies;
513 } else
514 memset(p, 0, sizeof(struct track));
515}
516
517static void init_tracking(struct kmem_cache *s, void *object)
518{
519 if (!(s->flags & SLAB_STORE_USER))
520 return;
521
522 set_track(s, object, TRACK_FREE, 0UL);
523 set_track(s, object, TRACK_ALLOC, 0UL);
524}
525
526static void print_track(const char *s, struct track *t)
527{
528 if (!t->addr)
529 return;
530
531 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
532 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
533#ifdef CONFIG_STACKTRACE
534 {
535 int i;
536 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
537 if (t->addrs[i])
538 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
539 else
540 break;
541 }
542#endif
543}
544
545static void print_tracking(struct kmem_cache *s, void *object)
546{
547 if (!(s->flags & SLAB_STORE_USER))
548 return;
549
550 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
551 print_track("Freed", get_track(s, object, TRACK_FREE));
552}
553
554static void print_page_info(struct page *page)
555{
556 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
557 page, page->objects, page->inuse, page->freelist, page->flags);
558
559}
560
561static void slab_bug(struct kmem_cache *s, char *fmt, ...)
562{
563 va_list args;
564 char buf[100];
565
566 va_start(args, fmt);
567 vsnprintf(buf, sizeof(buf), fmt, args);
568 va_end(args);
569 printk(KERN_ERR "========================================"
570 "=====================================\n");
571 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
572 printk(KERN_ERR "----------------------------------------"
573 "-------------------------------------\n\n");
574
575 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
576}
577
578static void slab_fix(struct kmem_cache *s, char *fmt, ...)
579{
580 va_list args;
581 char buf[100];
582
583 va_start(args, fmt);
584 vsnprintf(buf, sizeof(buf), fmt, args);
585 va_end(args);
586 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
587}
588
589static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
590{
591 unsigned int off;
592 u8 *addr = page_address(page);
593
594 print_tracking(s, p);
595
596 print_page_info(page);
597
598 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
599 p, p - addr, get_freepointer(s, p));
600
601 if (p > addr + 16)
602 print_section("Bytes b4 ", p - 16, 16);
603
604 print_section("Object ", p, min_t(unsigned long, s->object_size,
605 PAGE_SIZE));
606 if (s->flags & SLAB_RED_ZONE)
607 print_section("Redzone ", p + s->object_size,
608 s->inuse - s->object_size);
609
610 if (s->offset)
611 off = s->offset + sizeof(void *);
612 else
613 off = s->inuse;
614
615 if (s->flags & SLAB_STORE_USER)
616 off += 2 * sizeof(struct track);
617
618 if (off != s->size)
619
620 print_section("Padding ", p + off, s->size - off);
621
622 dump_stack();
623}
624
625static void object_err(struct kmem_cache *s, struct page *page,
626 u8 *object, char *reason)
627{
628 slab_bug(s, "%s", reason);
629 print_trailer(s, page, object);
630}
631
632static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
633{
634 va_list args;
635 char buf[100];
636
637 va_start(args, fmt);
638 vsnprintf(buf, sizeof(buf), fmt, args);
639 va_end(args);
640 slab_bug(s, "%s", buf);
641 print_page_info(page);
642 dump_stack();
643}
644
645static void init_object(struct kmem_cache *s, void *object, u8 val)
646{
647 u8 *p = object;
648
649 if (s->flags & __OBJECT_POISON) {
650 memset(p, POISON_FREE, s->object_size - 1);
651 p[s->object_size - 1] = POISON_END;
652 }
653
654 if (s->flags & SLAB_RED_ZONE)
655 memset(p + s->object_size, val, s->inuse - s->object_size);
656}
657
658static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
659 void *from, void *to)
660{
661 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
662 memset(from, data, to - from);
663}
664
665static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
666 u8 *object, char *what,
667 u8 *start, unsigned int value, unsigned int bytes)
668{
669 u8 *fault;
670 u8 *end;
671
672 fault = memchr_inv(start, value, bytes);
673 if (!fault)
674 return 1;
675
676 end = start + bytes;
677 while (end > fault && end[-1] == value)
678 end--;
679
680 slab_bug(s, "%s overwritten", what);
681 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
682 fault, end - 1, fault[0], value);
683 print_trailer(s, page, object);
684
685 restore_bytes(s, what, value, fault, end);
686 return 0;
687}
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
728{
729 unsigned long off = s->inuse;
730
731 if (s->offset)
732
733 off += sizeof(void *);
734
735 if (s->flags & SLAB_STORE_USER)
736
737 off += 2 * sizeof(struct track);
738
739 if (s->size == off)
740 return 1;
741
742 return check_bytes_and_report(s, page, p, "Object padding",
743 p + off, POISON_INUSE, s->size - off);
744}
745
746
747static int slab_pad_check(struct kmem_cache *s, struct page *page)
748{
749 u8 *start;
750 u8 *fault;
751 u8 *end;
752 int length;
753 int remainder;
754
755 if (!(s->flags & SLAB_POISON))
756 return 1;
757
758 start = page_address(page);
759 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
760 end = start + length;
761 remainder = length % s->size;
762 if (!remainder)
763 return 1;
764
765 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
766 if (!fault)
767 return 1;
768 while (end > fault && end[-1] == POISON_INUSE)
769 end--;
770
771 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
772 print_section("Padding ", end - remainder, remainder);
773
774 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
775 return 0;
776}
777
778static int check_object(struct kmem_cache *s, struct page *page,
779 void *object, u8 val)
780{
781 u8 *p = object;
782 u8 *endobject = object + s->object_size;
783
784 if (s->flags & SLAB_RED_ZONE) {
785 if (!check_bytes_and_report(s, page, object, "Redzone",
786 endobject, val, s->inuse - s->object_size))
787 return 0;
788 } else {
789 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
790 check_bytes_and_report(s, page, p, "Alignment padding",
791 endobject, POISON_INUSE, s->inuse - s->object_size);
792 }
793 }
794
795 if (s->flags & SLAB_POISON) {
796 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
797 (!check_bytes_and_report(s, page, p, "Poison", p,
798 POISON_FREE, s->object_size - 1) ||
799 !check_bytes_and_report(s, page, p, "Poison",
800 p + s->object_size - 1, POISON_END, 1)))
801 return 0;
802
803
804
805 check_pad_bytes(s, page, p);
806 }
807
808 if (!s->offset && val == SLUB_RED_ACTIVE)
809
810
811
812
813 return 1;
814
815
816 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
817 object_err(s, page, p, "Freepointer corrupt");
818
819
820
821
822
823 set_freepointer(s, p, NULL);
824 return 0;
825 }
826 return 1;
827}
828
829static int check_slab(struct kmem_cache *s, struct page *page)
830{
831 int maxobj;
832
833 VM_BUG_ON(!irqs_disabled());
834
835 if (!PageSlab(page)) {
836 slab_err(s, page, "Not a valid slab page");
837 return 0;
838 }
839
840 maxobj = order_objects(compound_order(page), s->size, s->reserved);
841 if (page->objects > maxobj) {
842 slab_err(s, page, "objects %u > max %u",
843 s->name, page->objects, maxobj);
844 return 0;
845 }
846 if (page->inuse > page->objects) {
847 slab_err(s, page, "inuse %u > max %u",
848 s->name, page->inuse, page->objects);
849 return 0;
850 }
851
852 slab_pad_check(s, page);
853 return 1;
854}
855
856
857
858
859
860static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
861{
862 int nr = 0;
863 void *fp;
864 void *object = NULL;
865 unsigned long max_objects;
866
867 fp = page->freelist;
868 while (fp && nr <= page->objects) {
869 if (fp == search)
870 return 1;
871 if (!check_valid_pointer(s, page, fp)) {
872 if (object) {
873 object_err(s, page, object,
874 "Freechain corrupt");
875 set_freepointer(s, object, NULL);
876 break;
877 } else {
878 slab_err(s, page, "Freepointer corrupt");
879 page->freelist = NULL;
880 page->inuse = page->objects;
881 slab_fix(s, "Freelist cleared");
882 return 0;
883 }
884 break;
885 }
886 object = fp;
887 fp = get_freepointer(s, object);
888 nr++;
889 }
890
891 max_objects = order_objects(compound_order(page), s->size, s->reserved);
892 if (max_objects > MAX_OBJS_PER_PAGE)
893 max_objects = MAX_OBJS_PER_PAGE;
894
895 if (page->objects != max_objects) {
896 slab_err(s, page, "Wrong number of objects. Found %d but "
897 "should be %d", page->objects, max_objects);
898 page->objects = max_objects;
899 slab_fix(s, "Number of objects adjusted.");
900 }
901 if (page->inuse != page->objects - nr) {
902 slab_err(s, page, "Wrong object count. Counter is %d but "
903 "counted were %d", page->inuse, page->objects - nr);
904 page->inuse = page->objects - nr;
905 slab_fix(s, "Object count adjusted.");
906 }
907 return search == NULL;
908}
909
910static void trace(struct kmem_cache *s, struct page *page, void *object,
911 int alloc)
912{
913 if (s->flags & SLAB_TRACE) {
914 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
915 s->name,
916 alloc ? "alloc" : "free",
917 object, page->inuse,
918 page->freelist);
919
920 if (!alloc)
921 print_section("Object ", (void *)object, s->object_size);
922
923 dump_stack();
924 }
925}
926
927
928
929
930
931static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
932{
933 flags &= gfp_allowed_mask;
934 lockdep_trace_alloc(flags);
935 might_sleep_if(flags & __GFP_WAIT);
936
937 return should_failslab(s->object_size, flags, s->flags);
938}
939
940static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
941{
942 flags &= gfp_allowed_mask;
943 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
944 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
945}
946
947static inline void slab_free_hook(struct kmem_cache *s, void *x)
948{
949 kmemleak_free_recursive(x, s->flags);
950
951
952
953
954
955
956#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
957 {
958 unsigned long flags;
959
960 local_irq_save(flags);
961 kmemcheck_slab_free(s, x, s->object_size);
962 debug_check_no_locks_freed(x, s->object_size);
963 local_irq_restore(flags);
964 }
965#endif
966 if (!(s->flags & SLAB_DEBUG_OBJECTS))
967 debug_check_no_obj_freed(x, s->object_size);
968}
969
970
971
972
973
974
975static void add_full(struct kmem_cache *s,
976 struct kmem_cache_node *n, struct page *page)
977{
978 if (!(s->flags & SLAB_STORE_USER))
979 return;
980
981 list_add(&page->lru, &n->full);
982}
983
984
985
986
987static void remove_full(struct kmem_cache *s, struct page *page)
988{
989 if (!(s->flags & SLAB_STORE_USER))
990 return;
991
992 list_del(&page->lru);
993}
994
995
996static inline unsigned long slabs_node(struct kmem_cache *s, int node)
997{
998 struct kmem_cache_node *n = get_node(s, node);
999
1000 return atomic_long_read(&n->nr_slabs);
1001}
1002
1003static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1004{
1005 return atomic_long_read(&n->nr_slabs);
1006}
1007
1008static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1009{
1010 struct kmem_cache_node *n = get_node(s, node);
1011
1012
1013
1014
1015
1016
1017
1018 if (likely(n)) {
1019 atomic_long_inc(&n->nr_slabs);
1020 atomic_long_add(objects, &n->total_objects);
1021 }
1022}
1023static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1024{
1025 struct kmem_cache_node *n = get_node(s, node);
1026
1027 atomic_long_dec(&n->nr_slabs);
1028 atomic_long_sub(objects, &n->total_objects);
1029}
1030
1031
1032static void setup_object_debug(struct kmem_cache *s, struct page *page,
1033 void *object)
1034{
1035 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1036 return;
1037
1038 init_object(s, object, SLUB_RED_INACTIVE);
1039 init_tracking(s, object);
1040}
1041
1042static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1043 void *object, unsigned long addr)
1044{
1045 if (!check_slab(s, page))
1046 goto bad;
1047
1048 if (!check_valid_pointer(s, page, object)) {
1049 object_err(s, page, object, "Freelist Pointer check fails");
1050 goto bad;
1051 }
1052
1053 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1054 goto bad;
1055
1056
1057 if (s->flags & SLAB_STORE_USER)
1058 set_track(s, object, TRACK_ALLOC, addr);
1059 trace(s, page, object, 1);
1060 init_object(s, object, SLUB_RED_ACTIVE);
1061 return 1;
1062
1063bad:
1064 if (PageSlab(page)) {
1065
1066
1067
1068
1069
1070 slab_fix(s, "Marking all objects used");
1071 page->inuse = page->objects;
1072 page->freelist = NULL;
1073 }
1074 return 0;
1075}
1076
1077static noinline struct kmem_cache_node *free_debug_processing(
1078 struct kmem_cache *s, struct page *page, void *object,
1079 unsigned long addr, unsigned long *flags)
1080{
1081 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1082
1083 spin_lock_irqsave(&n->list_lock, *flags);
1084 slab_lock(page);
1085
1086 if (!check_slab(s, page))
1087 goto fail;
1088
1089 if (!check_valid_pointer(s, page, object)) {
1090 slab_err(s, page, "Invalid object pointer 0x%p", object);
1091 goto fail;
1092 }
1093
1094 if (on_freelist(s, page, object)) {
1095 object_err(s, page, object, "Object already free");
1096 goto fail;
1097 }
1098
1099 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1100 goto out;
1101
1102 if (unlikely(s != page->slab_cache)) {
1103 if (!PageSlab(page)) {
1104 slab_err(s, page, "Attempt to free object(0x%p) "
1105 "outside of slab", object);
1106 } else if (!page->slab_cache) {
1107 printk(KERN_ERR
1108 "SLUB <none>: no slab for object 0x%p.\n",
1109 object);
1110 dump_stack();
1111 } else
1112 object_err(s, page, object,
1113 "page slab pointer corrupt.");
1114 goto fail;
1115 }
1116
1117 if (s->flags & SLAB_STORE_USER)
1118 set_track(s, object, TRACK_FREE, addr);
1119 trace(s, page, object, 0);
1120 init_object(s, object, SLUB_RED_INACTIVE);
1121out:
1122 slab_unlock(page);
1123
1124
1125
1126
1127 return n;
1128
1129fail:
1130 slab_unlock(page);
1131 spin_unlock_irqrestore(&n->list_lock, *flags);
1132 slab_fix(s, "Object at 0x%p not freed", object);
1133 return NULL;
1134}
1135
1136static int __init setup_slub_debug(char *str)
1137{
1138 slub_debug = DEBUG_DEFAULT_FLAGS;
1139 if (*str++ != '=' || !*str)
1140
1141
1142
1143 goto out;
1144
1145 if (*str == ',')
1146
1147
1148
1149
1150 goto check_slabs;
1151
1152 if (tolower(*str) == 'o') {
1153
1154
1155
1156
1157 disable_higher_order_debug = 1;
1158 goto out;
1159 }
1160
1161 slub_debug = 0;
1162 if (*str == '-')
1163
1164
1165
1166 goto out;
1167
1168
1169
1170
1171 for (; *str && *str != ','; str++) {
1172 switch (tolower(*str)) {
1173 case 'f':
1174 slub_debug |= SLAB_DEBUG_FREE;
1175 break;
1176 case 'z':
1177 slub_debug |= SLAB_RED_ZONE;
1178 break;
1179 case 'p':
1180 slub_debug |= SLAB_POISON;
1181 break;
1182 case 'u':
1183 slub_debug |= SLAB_STORE_USER;
1184 break;
1185 case 't':
1186 slub_debug |= SLAB_TRACE;
1187 break;
1188 case 'a':
1189 slub_debug |= SLAB_FAILSLAB;
1190 break;
1191 default:
1192 printk(KERN_ERR "slub_debug option '%c' "
1193 "unknown. skipped\n", *str);
1194 }
1195 }
1196
1197check_slabs:
1198 if (*str == ',')
1199 slub_debug_slabs = str + 1;
1200out:
1201 return 1;
1202}
1203
1204__setup("slub_debug", setup_slub_debug);
1205
1206static unsigned long kmem_cache_flags(unsigned long object_size,
1207 unsigned long flags, const char *name,
1208 void (*ctor)(void *))
1209{
1210
1211
1212
1213 if (slub_debug && (!slub_debug_slabs ||
1214 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1215 flags |= slub_debug;
1216
1217 return flags;
1218}
1219#else
1220static inline void setup_object_debug(struct kmem_cache *s,
1221 struct page *page, void *object) {}
1222
1223static inline int alloc_debug_processing(struct kmem_cache *s,
1224 struct page *page, void *object, unsigned long addr) { return 0; }
1225
1226static inline struct kmem_cache_node *free_debug_processing(
1227 struct kmem_cache *s, struct page *page, void *object,
1228 unsigned long addr, unsigned long *flags) { return NULL; }
1229
1230static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1231 { return 1; }
1232static inline int check_object(struct kmem_cache *s, struct page *page,
1233 void *object, u8 val) { return 1; }
1234static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1235 struct page *page) {}
1236static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1237static inline unsigned long kmem_cache_flags(unsigned long object_size,
1238 unsigned long flags, const char *name,
1239 void (*ctor)(void *))
1240{
1241 return flags;
1242}
1243#define slub_debug 0
1244
1245#define disable_higher_order_debug 0
1246
1247static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1248 { return 0; }
1249static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1250 { return 0; }
1251static inline void inc_slabs_node(struct kmem_cache *s, int node,
1252 int objects) {}
1253static inline void dec_slabs_node(struct kmem_cache *s, int node,
1254 int objects) {}
1255
1256static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1257 { return 0; }
1258
1259static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1260 void *object) {}
1261
1262static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1263
1264#endif
1265
1266
1267
1268
1269static inline struct page *alloc_slab_page(gfp_t flags, int node,
1270 struct kmem_cache_order_objects oo)
1271{
1272 int order = oo_order(oo);
1273
1274 flags |= __GFP_NOTRACK;
1275
1276 if (node == NUMA_NO_NODE)
1277 return alloc_pages(flags, order);
1278 else
1279 return alloc_pages_exact_node(node, flags, order);
1280}
1281
1282static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1283{
1284 struct page *page;
1285 struct kmem_cache_order_objects oo = s->oo;
1286 gfp_t alloc_gfp;
1287
1288 flags &= gfp_allowed_mask;
1289
1290 if (flags & __GFP_WAIT)
1291 local_irq_enable();
1292
1293 flags |= s->allocflags;
1294
1295
1296
1297
1298
1299 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1300
1301 page = alloc_slab_page(alloc_gfp, node, oo);
1302 if (unlikely(!page)) {
1303 oo = s->min;
1304
1305
1306
1307
1308 page = alloc_slab_page(flags, node, oo);
1309
1310 if (page)
1311 stat(s, ORDER_FALLBACK);
1312 }
1313
1314 if (kmemcheck_enabled && page
1315 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1316 int pages = 1 << oo_order(oo);
1317
1318 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1319
1320
1321
1322
1323
1324 if (s->ctor)
1325 kmemcheck_mark_uninitialized_pages(page, pages);
1326 else
1327 kmemcheck_mark_unallocated_pages(page, pages);
1328 }
1329
1330 if (flags & __GFP_WAIT)
1331 local_irq_disable();
1332 if (!page)
1333 return NULL;
1334
1335 page->objects = oo_objects(oo);
1336 mod_zone_page_state(page_zone(page),
1337 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1338 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1339 1 << oo_order(oo));
1340
1341 return page;
1342}
1343
1344static void setup_object(struct kmem_cache *s, struct page *page,
1345 void *object)
1346{
1347 setup_object_debug(s, page, object);
1348 if (unlikely(s->ctor))
1349 s->ctor(object);
1350}
1351
1352static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1353{
1354 struct page *page;
1355 void *start;
1356 void *last;
1357 void *p;
1358 int order;
1359
1360 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1361
1362 page = allocate_slab(s,
1363 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1364 if (!page)
1365 goto out;
1366
1367 order = compound_order(page);
1368 inc_slabs_node(s, page_to_nid(page), page->objects);
1369 memcg_bind_pages(s, order);
1370 page->slab_cache = s;
1371 __SetPageSlab(page);
1372 if (page->pfmemalloc)
1373 SetPageSlabPfmemalloc(page);
1374
1375 start = page_address(page);
1376
1377 if (unlikely(s->flags & SLAB_POISON))
1378 memset(start, POISON_INUSE, PAGE_SIZE << order);
1379
1380 last = start;
1381 for_each_object(p, s, start, page->objects) {
1382 setup_object(s, page, last);
1383 set_freepointer(s, last, p);
1384 last = p;
1385 }
1386 setup_object(s, page, last);
1387 set_freepointer(s, last, NULL);
1388
1389 page->freelist = start;
1390 page->inuse = page->objects;
1391 page->frozen = 1;
1392out:
1393 return page;
1394}
1395
1396static void __free_slab(struct kmem_cache *s, struct page *page)
1397{
1398 int order = compound_order(page);
1399 int pages = 1 << order;
1400
1401 if (kmem_cache_debug(s)) {
1402 void *p;
1403
1404 slab_pad_check(s, page);
1405 for_each_object(p, s, page_address(page),
1406 page->objects)
1407 check_object(s, page, p, SLUB_RED_INACTIVE);
1408 }
1409
1410 kmemcheck_free_shadow(page, compound_order(page));
1411
1412 mod_zone_page_state(page_zone(page),
1413 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1414 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1415 -pages);
1416
1417 __ClearPageSlabPfmemalloc(page);
1418 __ClearPageSlab(page);
1419
1420 memcg_release_pages(s, order);
1421 page_mapcount_reset(page);
1422 if (current->reclaim_state)
1423 current->reclaim_state->reclaimed_slab += pages;
1424 __free_memcg_kmem_pages(page, order);
1425}
1426
1427#define need_reserve_slab_rcu \
1428 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1429
1430static void rcu_free_slab(struct rcu_head *h)
1431{
1432 struct page *page;
1433
1434 if (need_reserve_slab_rcu)
1435 page = virt_to_head_page(h);
1436 else
1437 page = container_of((struct list_head *)h, struct page, lru);
1438
1439 __free_slab(page->slab_cache, page);
1440}
1441
1442static void free_slab(struct kmem_cache *s, struct page *page)
1443{
1444 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1445 struct rcu_head *head;
1446
1447 if (need_reserve_slab_rcu) {
1448 int order = compound_order(page);
1449 int offset = (PAGE_SIZE << order) - s->reserved;
1450
1451 VM_BUG_ON(s->reserved != sizeof(*head));
1452 head = page_address(page) + offset;
1453 } else {
1454
1455
1456
1457 head = (void *)&page->lru;
1458 }
1459
1460 call_rcu(head, rcu_free_slab);
1461 } else
1462 __free_slab(s, page);
1463}
1464
1465static void discard_slab(struct kmem_cache *s, struct page *page)
1466{
1467 dec_slabs_node(s, page_to_nid(page), page->objects);
1468 free_slab(s, page);
1469}
1470
1471
1472
1473
1474
1475
1476static inline void add_partial(struct kmem_cache_node *n,
1477 struct page *page, int tail)
1478{
1479 n->nr_partial++;
1480 if (tail == DEACTIVATE_TO_TAIL)
1481 list_add_tail(&page->lru, &n->partial);
1482 else
1483 list_add(&page->lru, &n->partial);
1484}
1485
1486
1487
1488
1489static inline void remove_partial(struct kmem_cache_node *n,
1490 struct page *page)
1491{
1492 list_del(&page->lru);
1493 n->nr_partial--;
1494}
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504static inline void *acquire_slab(struct kmem_cache *s,
1505 struct kmem_cache_node *n, struct page *page,
1506 int mode, int *objects)
1507{
1508 void *freelist;
1509 unsigned long counters;
1510 struct page new;
1511
1512
1513
1514
1515
1516
1517 freelist = page->freelist;
1518 counters = page->counters;
1519 new.counters = counters;
1520 *objects = new.objects - new.inuse;
1521 if (mode) {
1522 new.inuse = page->objects;
1523 new.freelist = NULL;
1524 } else {
1525 new.freelist = freelist;
1526 }
1527
1528 VM_BUG_ON(new.frozen);
1529 new.frozen = 1;
1530
1531 if (!__cmpxchg_double_slab(s, page,
1532 freelist, counters,
1533 new.freelist, new.counters,
1534 "acquire_slab"))
1535 return NULL;
1536
1537 remove_partial(n, page);
1538 WARN_ON(!freelist);
1539 return freelist;
1540}
1541
1542static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1543static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1544
1545
1546
1547
1548static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1549 struct kmem_cache_cpu *c, gfp_t flags)
1550{
1551 struct page *page, *page2;
1552 void *object = NULL;
1553 int available = 0;
1554 int objects;
1555
1556
1557
1558
1559
1560
1561
1562 if (!n || !n->nr_partial)
1563 return NULL;
1564
1565 spin_lock(&n->list_lock);
1566 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1567 void *t;
1568
1569 if (!pfmemalloc_match(page, flags))
1570 continue;
1571
1572 t = acquire_slab(s, n, page, object == NULL, &objects);
1573 if (!t)
1574 break;
1575
1576 available += objects;
1577 if (!object) {
1578 c->page = page;
1579 stat(s, ALLOC_FROM_PARTIAL);
1580 object = t;
1581 } else {
1582 put_cpu_partial(s, page, 0);
1583 stat(s, CPU_PARTIAL_NODE);
1584 }
1585 if (!kmem_cache_has_cpu_partial(s)
1586 || available > s->cpu_partial / 2)
1587 break;
1588
1589 }
1590 spin_unlock(&n->list_lock);
1591 return object;
1592}
1593
1594
1595
1596
1597static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1598 struct kmem_cache_cpu *c)
1599{
1600#ifdef CONFIG_NUMA
1601 struct zonelist *zonelist;
1602 struct zoneref *z;
1603 struct zone *zone;
1604 enum zone_type high_zoneidx = gfp_zone(flags);
1605 void *object;
1606 unsigned int cpuset_mems_cookie;
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626 if (!s->remote_node_defrag_ratio ||
1627 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1628 return NULL;
1629
1630 do {
1631 cpuset_mems_cookie = get_mems_allowed();
1632 zonelist = node_zonelist(slab_node(), flags);
1633 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1634 struct kmem_cache_node *n;
1635
1636 n = get_node(s, zone_to_nid(zone));
1637
1638 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1639 n->nr_partial > s->min_partial) {
1640 object = get_partial_node(s, n, c, flags);
1641 if (object) {
1642
1643
1644
1645
1646
1647
1648
1649
1650 put_mems_allowed(cpuset_mems_cookie);
1651 return object;
1652 }
1653 }
1654 }
1655 } while (!put_mems_allowed(cpuset_mems_cookie));
1656#endif
1657 return NULL;
1658}
1659
1660
1661
1662
1663static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1664 struct kmem_cache_cpu *c)
1665{
1666 void *object;
1667 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1668
1669 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1670 if (object || node != NUMA_NO_NODE)
1671 return object;
1672
1673 return get_any_partial(s, flags, c);
1674}
1675
1676#ifdef CONFIG_PREEMPT
1677
1678
1679
1680
1681
1682#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1683#else
1684
1685
1686
1687
1688#define TID_STEP 1
1689#endif
1690
1691static inline unsigned long next_tid(unsigned long tid)
1692{
1693 return tid + TID_STEP;
1694}
1695
1696static inline unsigned int tid_to_cpu(unsigned long tid)
1697{
1698 return tid % TID_STEP;
1699}
1700
1701static inline unsigned long tid_to_event(unsigned long tid)
1702{
1703 return tid / TID_STEP;
1704}
1705
1706static inline unsigned int init_tid(int cpu)
1707{
1708 return cpu;
1709}
1710
1711static inline void note_cmpxchg_failure(const char *n,
1712 const struct kmem_cache *s, unsigned long tid)
1713{
1714#ifdef SLUB_DEBUG_CMPXCHG
1715 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1716
1717 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1718
1719#ifdef CONFIG_PREEMPT
1720 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1721 printk("due to cpu change %d -> %d\n",
1722 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1723 else
1724#endif
1725 if (tid_to_event(tid) != tid_to_event(actual_tid))
1726 printk("due to cpu running other code. Event %ld->%ld\n",
1727 tid_to_event(tid), tid_to_event(actual_tid));
1728 else
1729 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1730 actual_tid, tid, next_tid(tid));
1731#endif
1732 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1733}
1734
1735static void init_kmem_cache_cpus(struct kmem_cache *s)
1736{
1737 int cpu;
1738
1739 for_each_possible_cpu(cpu)
1740 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1741}
1742
1743
1744
1745
1746static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1747{
1748 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1749 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1750 int lock = 0;
1751 enum slab_modes l = M_NONE, m = M_NONE;
1752 void *nextfree;
1753 int tail = DEACTIVATE_TO_HEAD;
1754 struct page new;
1755 struct page old;
1756
1757 if (page->freelist) {
1758 stat(s, DEACTIVATE_REMOTE_FREES);
1759 tail = DEACTIVATE_TO_TAIL;
1760 }
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1771 void *prior;
1772 unsigned long counters;
1773
1774 do {
1775 prior = page->freelist;
1776 counters = page->counters;
1777 set_freepointer(s, freelist, prior);
1778 new.counters = counters;
1779 new.inuse--;
1780 VM_BUG_ON(!new.frozen);
1781
1782 } while (!__cmpxchg_double_slab(s, page,
1783 prior, counters,
1784 freelist, new.counters,
1785 "drain percpu freelist"));
1786
1787 freelist = nextfree;
1788 }
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804redo:
1805
1806 old.freelist = page->freelist;
1807 old.counters = page->counters;
1808 VM_BUG_ON(!old.frozen);
1809
1810
1811 new.counters = old.counters;
1812 if (freelist) {
1813 new.inuse--;
1814 set_freepointer(s, freelist, old.freelist);
1815 new.freelist = freelist;
1816 } else
1817 new.freelist = old.freelist;
1818
1819 new.frozen = 0;
1820
1821 if (!new.inuse && n->nr_partial > s->min_partial)
1822 m = M_FREE;
1823 else if (new.freelist) {
1824 m = M_PARTIAL;
1825 if (!lock) {
1826 lock = 1;
1827
1828
1829
1830
1831
1832 spin_lock(&n->list_lock);
1833 }
1834 } else {
1835 m = M_FULL;
1836 if (kmem_cache_debug(s) && !lock) {
1837 lock = 1;
1838
1839
1840
1841
1842
1843 spin_lock(&n->list_lock);
1844 }
1845 }
1846
1847 if (l != m) {
1848
1849 if (l == M_PARTIAL)
1850
1851 remove_partial(n, page);
1852
1853 else if (l == M_FULL)
1854
1855 remove_full(s, page);
1856
1857 if (m == M_PARTIAL) {
1858
1859 add_partial(n, page, tail);
1860 stat(s, tail);
1861
1862 } else if (m == M_FULL) {
1863
1864 stat(s, DEACTIVATE_FULL);
1865 add_full(s, n, page);
1866
1867 }
1868 }
1869
1870 l = m;
1871 if (!__cmpxchg_double_slab(s, page,
1872 old.freelist, old.counters,
1873 new.freelist, new.counters,
1874 "unfreezing slab"))
1875 goto redo;
1876
1877 if (lock)
1878 spin_unlock(&n->list_lock);
1879
1880 if (m == M_FREE) {
1881 stat(s, DEACTIVATE_EMPTY);
1882 discard_slab(s, page);
1883 stat(s, FREE_SLAB);
1884 }
1885}
1886
1887
1888
1889
1890
1891
1892
1893
1894static void unfreeze_partials(struct kmem_cache *s,
1895 struct kmem_cache_cpu *c)
1896{
1897#ifdef CONFIG_SLUB_CPU_PARTIAL
1898 struct kmem_cache_node *n = NULL, *n2 = NULL;
1899 struct page *page, *discard_page = NULL;
1900
1901 while ((page = c->partial)) {
1902 struct page new;
1903 struct page old;
1904
1905 c->partial = page->next;
1906
1907 n2 = get_node(s, page_to_nid(page));
1908 if (n != n2) {
1909 if (n)
1910 spin_unlock(&n->list_lock);
1911
1912 n = n2;
1913 spin_lock(&n->list_lock);
1914 }
1915
1916 do {
1917
1918 old.freelist = page->freelist;
1919 old.counters = page->counters;
1920 VM_BUG_ON(!old.frozen);
1921
1922 new.counters = old.counters;
1923 new.freelist = old.freelist;
1924
1925 new.frozen = 0;
1926
1927 } while (!__cmpxchg_double_slab(s, page,
1928 old.freelist, old.counters,
1929 new.freelist, new.counters,
1930 "unfreezing slab"));
1931
1932 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1933 page->next = discard_page;
1934 discard_page = page;
1935 } else {
1936 add_partial(n, page, DEACTIVATE_TO_TAIL);
1937 stat(s, FREE_ADD_PARTIAL);
1938 }
1939 }
1940
1941 if (n)
1942 spin_unlock(&n->list_lock);
1943
1944 while (discard_page) {
1945 page = discard_page;
1946 discard_page = discard_page->next;
1947
1948 stat(s, DEACTIVATE_EMPTY);
1949 discard_slab(s, page);
1950 stat(s, FREE_SLAB);
1951 }
1952#endif
1953}
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1965{
1966#ifdef CONFIG_SLUB_CPU_PARTIAL
1967 struct page *oldpage;
1968 int pages;
1969 int pobjects;
1970
1971 do {
1972 pages = 0;
1973 pobjects = 0;
1974 oldpage = this_cpu_read(s->cpu_slab->partial);
1975
1976 if (oldpage) {
1977 pobjects = oldpage->pobjects;
1978 pages = oldpage->pages;
1979 if (drain && pobjects > s->cpu_partial) {
1980 unsigned long flags;
1981
1982
1983
1984
1985 local_irq_save(flags);
1986 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
1987 local_irq_restore(flags);
1988 oldpage = NULL;
1989 pobjects = 0;
1990 pages = 0;
1991 stat(s, CPU_PARTIAL_DRAIN);
1992 }
1993 }
1994
1995 pages++;
1996 pobjects += page->objects - page->inuse;
1997
1998 page->pages = pages;
1999 page->pobjects = pobjects;
2000 page->next = oldpage;
2001
2002 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
2003#endif
2004}
2005
2006static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2007{
2008 stat(s, CPUSLAB_FLUSH);
2009 deactivate_slab(s, c->page, c->freelist);
2010
2011 c->tid = next_tid(c->tid);
2012 c->page = NULL;
2013 c->freelist = NULL;
2014}
2015
2016
2017
2018
2019
2020
2021static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2022{
2023 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2024
2025 if (likely(c)) {
2026 if (c->page)
2027 flush_slab(s, c);
2028
2029 unfreeze_partials(s, c);
2030 }
2031}
2032
2033static void flush_cpu_slab(void *d)
2034{
2035 struct kmem_cache *s = d;
2036
2037 __flush_cpu_slab(s, smp_processor_id());
2038}
2039
2040static bool has_cpu_slab(int cpu, void *info)
2041{
2042 struct kmem_cache *s = info;
2043 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2044
2045 return c->page || c->partial;
2046}
2047
2048static void flush_all(struct kmem_cache *s)
2049{
2050 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2051}
2052
2053
2054
2055
2056
2057static inline int node_match(struct page *page, int node)
2058{
2059#ifdef CONFIG_NUMA
2060 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2061 return 0;
2062#endif
2063 return 1;
2064}
2065
2066static int count_free(struct page *page)
2067{
2068 return page->objects - page->inuse;
2069}
2070
2071static unsigned long count_partial(struct kmem_cache_node *n,
2072 int (*get_count)(struct page *))
2073{
2074 unsigned long flags;
2075 unsigned long x = 0;
2076 struct page *page;
2077
2078 spin_lock_irqsave(&n->list_lock, flags);
2079 list_for_each_entry(page, &n->partial, lru)
2080 x += get_count(page);
2081 spin_unlock_irqrestore(&n->list_lock, flags);
2082 return x;
2083}
2084
2085static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2086{
2087#ifdef CONFIG_SLUB_DEBUG
2088 return atomic_long_read(&n->total_objects);
2089#else
2090 return 0;
2091#endif
2092}
2093
2094static noinline void
2095slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2096{
2097 int node;
2098
2099 printk(KERN_WARNING
2100 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2101 nid, gfpflags);
2102 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2103 "default order: %d, min order: %d\n", s->name, s->object_size,
2104 s->size, oo_order(s->oo), oo_order(s->min));
2105
2106 if (oo_order(s->min) > get_order(s->object_size))
2107 printk(KERN_WARNING " %s debugging increased min order, use "
2108 "slub_debug=O to disable.\n", s->name);
2109
2110 for_each_online_node(node) {
2111 struct kmem_cache_node *n = get_node(s, node);
2112 unsigned long nr_slabs;
2113 unsigned long nr_objs;
2114 unsigned long nr_free;
2115
2116 if (!n)
2117 continue;
2118
2119 nr_free = count_partial(n, count_free);
2120 nr_slabs = node_nr_slabs(n);
2121 nr_objs = node_nr_objs(n);
2122
2123 printk(KERN_WARNING
2124 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2125 node, nr_slabs, nr_objs, nr_free);
2126 }
2127}
2128
2129static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2130 int node, struct kmem_cache_cpu **pc)
2131{
2132 void *freelist;
2133 struct kmem_cache_cpu *c = *pc;
2134 struct page *page;
2135
2136 freelist = get_partial(s, flags, node, c);
2137
2138 if (freelist)
2139 return freelist;
2140
2141 page = new_slab(s, flags, node);
2142 if (page) {
2143 c = __this_cpu_ptr(s->cpu_slab);
2144 if (c->page)
2145 flush_slab(s, c);
2146
2147
2148
2149
2150
2151 freelist = page->freelist;
2152 page->freelist = NULL;
2153
2154 stat(s, ALLOC_SLAB);
2155 c->page = page;
2156 *pc = c;
2157 } else
2158 freelist = NULL;
2159
2160 return freelist;
2161}
2162
2163static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2164{
2165 if (unlikely(PageSlabPfmemalloc(page)))
2166 return gfp_pfmemalloc_allowed(gfpflags);
2167
2168 return true;
2169}
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2182{
2183 struct page new;
2184 unsigned long counters;
2185 void *freelist;
2186
2187 do {
2188 freelist = page->freelist;
2189 counters = page->counters;
2190
2191 new.counters = counters;
2192 VM_BUG_ON(!new.frozen);
2193
2194 new.inuse = page->objects;
2195 new.frozen = freelist != NULL;
2196
2197 } while (!__cmpxchg_double_slab(s, page,
2198 freelist, counters,
2199 NULL, new.counters,
2200 "get_freelist"));
2201
2202 return freelist;
2203}
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2222 unsigned long addr, struct kmem_cache_cpu *c)
2223{
2224 void *freelist;
2225 struct page *page;
2226 unsigned long flags;
2227
2228 local_irq_save(flags);
2229#ifdef CONFIG_PREEMPT
2230
2231
2232
2233
2234
2235 c = this_cpu_ptr(s->cpu_slab);
2236#endif
2237
2238 page = c->page;
2239 if (!page)
2240 goto new_slab;
2241redo:
2242
2243 if (unlikely(!node_match(page, node))) {
2244 stat(s, ALLOC_NODE_MISMATCH);
2245 deactivate_slab(s, page, c->freelist);
2246 c->page = NULL;
2247 c->freelist = NULL;
2248 goto new_slab;
2249 }
2250
2251
2252
2253
2254
2255
2256 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2257 deactivate_slab(s, page, c->freelist);
2258 c->page = NULL;
2259 c->freelist = NULL;
2260 goto new_slab;
2261 }
2262
2263
2264 freelist = c->freelist;
2265 if (freelist)
2266 goto load_freelist;
2267
2268 stat(s, ALLOC_SLOWPATH);
2269
2270 freelist = get_freelist(s, page);
2271
2272 if (!freelist) {
2273 c->page = NULL;
2274 stat(s, DEACTIVATE_BYPASS);
2275 goto new_slab;
2276 }
2277
2278 stat(s, ALLOC_REFILL);
2279
2280load_freelist:
2281
2282
2283
2284
2285
2286 VM_BUG_ON(!c->page->frozen);
2287 c->freelist = get_freepointer(s, freelist);
2288 c->tid = next_tid(c->tid);
2289 local_irq_restore(flags);
2290 return freelist;
2291
2292new_slab:
2293
2294 if (c->partial) {
2295 page = c->page = c->partial;
2296 c->partial = page->next;
2297 stat(s, CPU_PARTIAL_ALLOC);
2298 c->freelist = NULL;
2299 goto redo;
2300 }
2301
2302 freelist = new_slab_objects(s, gfpflags, node, &c);
2303
2304 if (unlikely(!freelist)) {
2305 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2306 slab_out_of_memory(s, gfpflags, node);
2307
2308 local_irq_restore(flags);
2309 return NULL;
2310 }
2311
2312 page = c->page;
2313 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2314 goto load_freelist;
2315
2316
2317 if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
2318 goto new_slab;
2319
2320 deactivate_slab(s, page, get_freepointer(s, freelist));
2321 c->page = NULL;
2322 c->freelist = NULL;
2323 local_irq_restore(flags);
2324 return freelist;
2325}
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2338 gfp_t gfpflags, int node, unsigned long addr)
2339{
2340 void **object;
2341 struct kmem_cache_cpu *c;
2342 struct page *page;
2343 unsigned long tid;
2344
2345 if (slab_pre_alloc_hook(s, gfpflags))
2346 return NULL;
2347
2348 s = memcg_kmem_get_cache(s, gfpflags);
2349redo:
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361 preempt_disable();
2362 c = __this_cpu_ptr(s->cpu_slab);
2363
2364
2365
2366
2367
2368
2369
2370 tid = c->tid;
2371 preempt_enable();
2372
2373 object = c->freelist;
2374 page = c->page;
2375 if (unlikely(!object || !page || !node_match(page, node)))
2376 object = __slab_alloc(s, gfpflags, node, addr, c);
2377
2378 else {
2379 void *next_object = get_freepointer_safe(s, object);
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393 if (unlikely(!this_cpu_cmpxchg_double(
2394 s->cpu_slab->freelist, s->cpu_slab->tid,
2395 object, tid,
2396 next_object, next_tid(tid)))) {
2397
2398 note_cmpxchg_failure("slab_alloc", s, tid);
2399 goto redo;
2400 }
2401 prefetch_freepointer(s, next_object);
2402 stat(s, ALLOC_FASTPATH);
2403 }
2404
2405 if (unlikely(gfpflags & __GFP_ZERO) && object)
2406 memset(object, 0, s->object_size);
2407
2408 slab_post_alloc_hook(s, gfpflags, object);
2409
2410 return object;
2411}
2412
2413static __always_inline void *slab_alloc(struct kmem_cache *s,
2414 gfp_t gfpflags, unsigned long addr)
2415{
2416 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2417}
2418
2419void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2420{
2421 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2422
2423 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2424
2425 return ret;
2426}
2427EXPORT_SYMBOL(kmem_cache_alloc);
2428
2429#ifdef CONFIG_TRACING
2430void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2431{
2432 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2433 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2434 return ret;
2435}
2436EXPORT_SYMBOL(kmem_cache_alloc_trace);
2437
2438void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2439{
2440 void *ret = kmalloc_order(size, flags, order);
2441 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2442 return ret;
2443}
2444EXPORT_SYMBOL(kmalloc_order_trace);
2445#endif
2446
2447#ifdef CONFIG_NUMA
2448void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2449{
2450 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2451
2452 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2453 s->object_size, s->size, gfpflags, node);
2454
2455 return ret;
2456}
2457EXPORT_SYMBOL(kmem_cache_alloc_node);
2458
2459#ifdef CONFIG_TRACING
2460void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2461 gfp_t gfpflags,
2462 int node, size_t size)
2463{
2464 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2465
2466 trace_kmalloc_node(_RET_IP_, ret,
2467 size, s->size, gfpflags, node);
2468 return ret;
2469}
2470EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2471#endif
2472#endif
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482static void __slab_free(struct kmem_cache *s, struct page *page,
2483 void *x, unsigned long addr)
2484{
2485 void *prior;
2486 void **object = (void *)x;
2487 int was_frozen;
2488 struct page new;
2489 unsigned long counters;
2490 struct kmem_cache_node *n = NULL;
2491 unsigned long uninitialized_var(flags);
2492
2493 stat(s, FREE_SLOWPATH);
2494
2495 if (kmem_cache_debug(s) &&
2496 !(n = free_debug_processing(s, page, x, addr, &flags)))
2497 return;
2498
2499 do {
2500 if (unlikely(n)) {
2501 spin_unlock_irqrestore(&n->list_lock, flags);
2502 n = NULL;
2503 }
2504 prior = page->freelist;
2505 counters = page->counters;
2506 set_freepointer(s, object, prior);
2507 new.counters = counters;
2508 was_frozen = new.frozen;
2509 new.inuse--;
2510 if ((!new.inuse || !prior) && !was_frozen) {
2511
2512 if (kmem_cache_has_cpu_partial(s) && !prior)
2513
2514
2515
2516
2517
2518 new.frozen = 1;
2519
2520 else {
2521
2522 n = get_node(s, page_to_nid(page));
2523
2524
2525
2526
2527
2528
2529
2530
2531 spin_lock_irqsave(&n->list_lock, flags);
2532
2533 }
2534 }
2535
2536 } while (!cmpxchg_double_slab(s, page,
2537 prior, counters,
2538 object, new.counters,
2539 "__slab_free"));
2540
2541 if (likely(!n)) {
2542
2543
2544
2545
2546
2547 if (new.frozen && !was_frozen) {
2548 put_cpu_partial(s, page, 1);
2549 stat(s, CPU_PARTIAL_FREE);
2550 }
2551
2552
2553
2554
2555 if (was_frozen)
2556 stat(s, FREE_FROZEN);
2557 return;
2558 }
2559
2560 if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
2561 goto slab_empty;
2562
2563
2564
2565
2566
2567 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2568 if (kmem_cache_debug(s))
2569 remove_full(s, page);
2570 add_partial(n, page, DEACTIVATE_TO_TAIL);
2571 stat(s, FREE_ADD_PARTIAL);
2572 }
2573 spin_unlock_irqrestore(&n->list_lock, flags);
2574 return;
2575
2576slab_empty:
2577 if (prior) {
2578
2579
2580
2581 remove_partial(n, page);
2582 stat(s, FREE_REMOVE_PARTIAL);
2583 } else
2584
2585 remove_full(s, page);
2586
2587 spin_unlock_irqrestore(&n->list_lock, flags);
2588 stat(s, FREE_SLAB);
2589 discard_slab(s, page);
2590}
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603static __always_inline void slab_free(struct kmem_cache *s,
2604 struct page *page, void *x, unsigned long addr)
2605{
2606 void **object = (void *)x;
2607 struct kmem_cache_cpu *c;
2608 unsigned long tid;
2609
2610 slab_free_hook(s, x);
2611
2612redo:
2613
2614
2615
2616
2617
2618
2619 preempt_disable();
2620 c = __this_cpu_ptr(s->cpu_slab);
2621
2622 tid = c->tid;
2623 preempt_enable();
2624
2625 if (likely(page == c->page)) {
2626 set_freepointer(s, object, c->freelist);
2627
2628 if (unlikely(!this_cpu_cmpxchg_double(
2629 s->cpu_slab->freelist, s->cpu_slab->tid,
2630 c->freelist, tid,
2631 object, next_tid(tid)))) {
2632
2633 note_cmpxchg_failure("slab_free", s, tid);
2634 goto redo;
2635 }
2636 stat(s, FREE_FASTPATH);
2637 } else
2638 __slab_free(s, page, x, addr);
2639
2640}
2641
2642void kmem_cache_free(struct kmem_cache *s, void *x)
2643{
2644 s = cache_from_obj(s, x);
2645 if (!s)
2646 return;
2647 slab_free(s, virt_to_head_page(x), x, _RET_IP_);
2648 trace_kmem_cache_free(_RET_IP_, x);
2649}
2650EXPORT_SYMBOL(kmem_cache_free);
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671static int slub_min_order;
2672static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2673static int slub_min_objects;
2674
2675
2676
2677
2678
2679static int slub_nomerge;
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706static inline int slab_order(int size, int min_objects,
2707 int max_order, int fract_leftover, int reserved)
2708{
2709 int order;
2710 int rem;
2711 int min_order = slub_min_order;
2712
2713 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2714 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2715
2716 for (order = max(min_order,
2717 fls(min_objects * size - 1) - PAGE_SHIFT);
2718 order <= max_order; order++) {
2719
2720 unsigned long slab_size = PAGE_SIZE << order;
2721
2722 if (slab_size < min_objects * size + reserved)
2723 continue;
2724
2725 rem = (slab_size - reserved) % size;
2726
2727 if (rem <= slab_size / fract_leftover)
2728 break;
2729
2730 }
2731
2732 return order;
2733}
2734
2735static inline int calculate_order(int size, int reserved)
2736{
2737 int order;
2738 int min_objects;
2739 int fraction;
2740 int max_objects;
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750 min_objects = slub_min_objects;
2751 if (!min_objects)
2752 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2753 max_objects = order_objects(slub_max_order, size, reserved);
2754 min_objects = min(min_objects, max_objects);
2755
2756 while (min_objects > 1) {
2757 fraction = 16;
2758 while (fraction >= 4) {
2759 order = slab_order(size, min_objects,
2760 slub_max_order, fraction, reserved);
2761 if (order <= slub_max_order)
2762 return order;
2763 fraction /= 2;
2764 }
2765 min_objects--;
2766 }
2767
2768
2769
2770
2771
2772 order = slab_order(size, 1, slub_max_order, 1, reserved);
2773 if (order <= slub_max_order)
2774 return order;
2775
2776
2777
2778
2779 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2780 if (order < MAX_ORDER)
2781 return order;
2782 return -ENOSYS;
2783}
2784
2785static void
2786init_kmem_cache_node(struct kmem_cache_node *n)
2787{
2788 n->nr_partial = 0;
2789 spin_lock_init(&n->list_lock);
2790 INIT_LIST_HEAD(&n->partial);
2791#ifdef CONFIG_SLUB_DEBUG
2792 atomic_long_set(&n->nr_slabs, 0);
2793 atomic_long_set(&n->total_objects, 0);
2794 INIT_LIST_HEAD(&n->full);
2795#endif
2796}
2797
2798static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2799{
2800 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2801 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
2802
2803
2804
2805
2806
2807 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2808 2 * sizeof(void *));
2809
2810 if (!s->cpu_slab)
2811 return 0;
2812
2813 init_kmem_cache_cpus(s);
2814
2815 return 1;
2816}
2817
2818static struct kmem_cache *kmem_cache_node;
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829static void early_kmem_cache_node_alloc(int node)
2830{
2831 struct page *page;
2832 struct kmem_cache_node *n;
2833
2834 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2835
2836 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2837
2838 BUG_ON(!page);
2839 if (page_to_nid(page) != node) {
2840 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2841 "node %d\n", node);
2842 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2843 "in order to be able to continue\n");
2844 }
2845
2846 n = page->freelist;
2847 BUG_ON(!n);
2848 page->freelist = get_freepointer(kmem_cache_node, n);
2849 page->inuse = 1;
2850 page->frozen = 0;
2851 kmem_cache_node->node[node] = n;
2852#ifdef CONFIG_SLUB_DEBUG
2853 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2854 init_tracking(kmem_cache_node, n);
2855#endif
2856 init_kmem_cache_node(n);
2857 inc_slabs_node(kmem_cache_node, node, page->objects);
2858
2859 add_partial(n, page, DEACTIVATE_TO_HEAD);
2860}
2861
2862static void free_kmem_cache_nodes(struct kmem_cache *s)
2863{
2864 int node;
2865
2866 for_each_node_state(node, N_NORMAL_MEMORY) {
2867 struct kmem_cache_node *n = s->node[node];
2868
2869 if (n)
2870 kmem_cache_free(kmem_cache_node, n);
2871
2872 s->node[node] = NULL;
2873 }
2874}
2875
2876static int init_kmem_cache_nodes(struct kmem_cache *s)
2877{
2878 int node;
2879
2880 for_each_node_state(node, N_NORMAL_MEMORY) {
2881 struct kmem_cache_node *n;
2882
2883 if (slab_state == DOWN) {
2884 early_kmem_cache_node_alloc(node);
2885 continue;
2886 }
2887 n = kmem_cache_alloc_node(kmem_cache_node,
2888 GFP_KERNEL, node);
2889
2890 if (!n) {
2891 free_kmem_cache_nodes(s);
2892 return 0;
2893 }
2894
2895 s->node[node] = n;
2896 init_kmem_cache_node(n);
2897 }
2898 return 1;
2899}
2900
2901static void set_min_partial(struct kmem_cache *s, unsigned long min)
2902{
2903 if (min < MIN_PARTIAL)
2904 min = MIN_PARTIAL;
2905 else if (min > MAX_PARTIAL)
2906 min = MAX_PARTIAL;
2907 s->min_partial = min;
2908}
2909
2910
2911
2912
2913
2914static int calculate_sizes(struct kmem_cache *s, int forced_order)
2915{
2916 unsigned long flags = s->flags;
2917 unsigned long size = s->object_size;
2918 int order;
2919
2920
2921
2922
2923
2924
2925 size = ALIGN(size, sizeof(void *));
2926
2927#ifdef CONFIG_SLUB_DEBUG
2928
2929
2930
2931
2932
2933 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2934 !s->ctor)
2935 s->flags |= __OBJECT_POISON;
2936 else
2937 s->flags &= ~__OBJECT_POISON;
2938
2939
2940
2941
2942
2943
2944
2945 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
2946 size += sizeof(void *);
2947#endif
2948
2949
2950
2951
2952
2953 s->inuse = size;
2954
2955 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2956 s->ctor)) {
2957
2958
2959
2960
2961
2962
2963
2964
2965 s->offset = size;
2966 size += sizeof(void *);
2967 }
2968
2969#ifdef CONFIG_SLUB_DEBUG
2970 if (flags & SLAB_STORE_USER)
2971
2972
2973
2974
2975 size += 2 * sizeof(struct track);
2976
2977 if (flags & SLAB_RED_ZONE)
2978
2979
2980
2981
2982
2983
2984
2985 size += sizeof(void *);
2986#endif
2987
2988
2989
2990
2991
2992
2993 size = ALIGN(size, s->align);
2994 s->size = size;
2995 if (forced_order >= 0)
2996 order = forced_order;
2997 else
2998 order = calculate_order(size, s->reserved);
2999
3000 if (order < 0)
3001 return 0;
3002
3003 s->allocflags = 0;
3004 if (order)
3005 s->allocflags |= __GFP_COMP;
3006
3007 if (s->flags & SLAB_CACHE_DMA)
3008 s->allocflags |= GFP_DMA;
3009
3010 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3011 s->allocflags |= __GFP_RECLAIMABLE;
3012
3013
3014
3015
3016 s->oo = oo_make(order, size, s->reserved);
3017 s->min = oo_make(get_order(size), size, s->reserved);
3018 if (oo_objects(s->oo) > oo_objects(s->max))
3019 s->max = s->oo;
3020
3021 return !!oo_objects(s->oo);
3022}
3023
3024static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3025{
3026 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3027 s->reserved = 0;
3028
3029 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3030 s->reserved = sizeof(struct rcu_head);
3031
3032 if (!calculate_sizes(s, -1))
3033 goto error;
3034 if (disable_higher_order_debug) {
3035
3036
3037
3038
3039 if (get_order(s->size) > get_order(s->object_size)) {
3040 s->flags &= ~DEBUG_METADATA_FLAGS;
3041 s->offset = 0;
3042 if (!calculate_sizes(s, -1))
3043 goto error;
3044 }
3045 }
3046
3047#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3048 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3049 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3050
3051 s->flags |= __CMPXCHG_DOUBLE;
3052#endif
3053
3054
3055
3056
3057
3058 set_min_partial(s, ilog2(s->size) / 2);
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077 if (!kmem_cache_has_cpu_partial(s))
3078 s->cpu_partial = 0;
3079 else if (s->size >= PAGE_SIZE)
3080 s->cpu_partial = 2;
3081 else if (s->size >= 1024)
3082 s->cpu_partial = 6;
3083 else if (s->size >= 256)
3084 s->cpu_partial = 13;
3085 else
3086 s->cpu_partial = 30;
3087
3088#ifdef CONFIG_NUMA
3089 s->remote_node_defrag_ratio = 1000;
3090#endif
3091 if (!init_kmem_cache_nodes(s))
3092 goto error;
3093
3094 if (alloc_kmem_cache_cpus(s))
3095 return 0;
3096
3097 free_kmem_cache_nodes(s);
3098error:
3099 if (flags & SLAB_PANIC)
3100 panic("Cannot create slab %s size=%lu realsize=%u "
3101 "order=%u offset=%u flags=%lx\n",
3102 s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
3103 s->offset, flags);
3104 return -EINVAL;
3105}
3106
3107static void list_slab_objects(struct kmem_cache *s, struct page *page,
3108 const char *text)
3109{
3110#ifdef CONFIG_SLUB_DEBUG
3111 void *addr = page_address(page);
3112 void *p;
3113 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3114 sizeof(long), GFP_ATOMIC);
3115 if (!map)
3116 return;
3117 slab_err(s, page, text, s->name);
3118 slab_lock(page);
3119
3120 get_map(s, page, map);
3121 for_each_object(p, s, addr, page->objects) {
3122
3123 if (!test_bit(slab_index(p, s, addr), map)) {
3124 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3125 p, p - addr);
3126 print_tracking(s, p);
3127 }
3128 }
3129 slab_unlock(page);
3130 kfree(map);
3131#endif
3132}
3133
3134
3135
3136
3137
3138
3139static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3140{
3141 struct page *page, *h;
3142
3143 list_for_each_entry_safe(page, h, &n->partial, lru) {
3144 if (!page->inuse) {
3145 remove_partial(n, page);
3146 discard_slab(s, page);
3147 } else {
3148 list_slab_objects(s, page,
3149 "Objects remaining in %s on kmem_cache_close()");
3150 }
3151 }
3152}
3153
3154
3155
3156
3157static inline int kmem_cache_close(struct kmem_cache *s)
3158{
3159 int node;
3160
3161 flush_all(s);
3162
3163 for_each_node_state(node, N_NORMAL_MEMORY) {
3164 struct kmem_cache_node *n = get_node(s, node);
3165
3166 free_partial(s, n);
3167 if (n->nr_partial || slabs_node(s, node))
3168 return 1;
3169 }
3170 free_percpu(s->cpu_slab);
3171 free_kmem_cache_nodes(s);
3172 return 0;
3173}
3174
3175int __kmem_cache_shutdown(struct kmem_cache *s)
3176{
3177 int rc = kmem_cache_close(s);
3178
3179 if (!rc) {
3180
3181
3182
3183
3184
3185
3186
3187
3188 mutex_unlock(&slab_mutex);
3189 sysfs_slab_remove(s);
3190 mutex_lock(&slab_mutex);
3191 }
3192
3193 return rc;
3194}
3195
3196
3197
3198
3199
3200static int __init setup_slub_min_order(char *str)
3201{
3202 get_option(&str, &slub_min_order);
3203
3204 return 1;
3205}
3206
3207__setup("slub_min_order=", setup_slub_min_order);
3208
3209static int __init setup_slub_max_order(char *str)
3210{
3211 get_option(&str, &slub_max_order);
3212 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3213
3214 return 1;
3215}
3216
3217__setup("slub_max_order=", setup_slub_max_order);
3218
3219static int __init setup_slub_min_objects(char *str)
3220{
3221 get_option(&str, &slub_min_objects);
3222
3223 return 1;
3224}
3225
3226__setup("slub_min_objects=", setup_slub_min_objects);
3227
3228static int __init setup_slub_nomerge(char *str)
3229{
3230 slub_nomerge = 1;
3231 return 1;
3232}
3233
3234__setup("slub_nomerge", setup_slub_nomerge);
3235
3236void *__kmalloc(size_t size, gfp_t flags)
3237{
3238 struct kmem_cache *s;
3239 void *ret;
3240
3241 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3242 return kmalloc_large(size, flags);
3243
3244 s = kmalloc_slab(size, flags);
3245
3246 if (unlikely(ZERO_OR_NULL_PTR(s)))
3247 return s;
3248
3249 ret = slab_alloc(s, flags, _RET_IP_);
3250
3251 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3252
3253 return ret;
3254}
3255EXPORT_SYMBOL(__kmalloc);
3256
3257#ifdef CONFIG_NUMA
3258static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3259{
3260 struct page *page;
3261 void *ptr = NULL;
3262
3263 flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
3264 page = alloc_pages_node(node, flags, get_order(size));
3265 if (page)
3266 ptr = page_address(page);
3267
3268 kmemleak_alloc(ptr, size, 1, flags);
3269 return ptr;
3270}
3271
3272void *__kmalloc_node(size_t size, gfp_t flags, int node)
3273{
3274 struct kmem_cache *s;
3275 void *ret;
3276
3277 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3278 ret = kmalloc_large_node(size, flags, node);
3279
3280 trace_kmalloc_node(_RET_IP_, ret,
3281 size, PAGE_SIZE << get_order(size),
3282 flags, node);
3283
3284 return ret;
3285 }
3286
3287 s = kmalloc_slab(size, flags);
3288
3289 if (unlikely(ZERO_OR_NULL_PTR(s)))
3290 return s;
3291
3292 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3293
3294 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3295
3296 return ret;
3297}
3298EXPORT_SYMBOL(__kmalloc_node);
3299#endif
3300
3301size_t ksize(const void *object)
3302{
3303 struct page *page;
3304
3305 if (unlikely(object == ZERO_SIZE_PTR))
3306 return 0;
3307
3308 page = virt_to_head_page(object);
3309
3310 if (unlikely(!PageSlab(page))) {
3311 WARN_ON(!PageCompound(page));
3312 return PAGE_SIZE << compound_order(page);
3313 }
3314
3315 return slab_ksize(page->slab_cache);
3316}
3317EXPORT_SYMBOL(ksize);
3318
3319#ifdef CONFIG_SLUB_DEBUG
3320bool verify_mem_not_deleted(const void *x)
3321{
3322 struct page *page;
3323 void *object = (void *)x;
3324 unsigned long flags;
3325 bool rv;
3326
3327 if (unlikely(ZERO_OR_NULL_PTR(x)))
3328 return false;
3329
3330 local_irq_save(flags);
3331
3332 page = virt_to_head_page(x);
3333 if (unlikely(!PageSlab(page))) {
3334
3335 rv = true;
3336 goto out_unlock;
3337 }
3338
3339 slab_lock(page);
3340 if (on_freelist(page->slab_cache, page, object)) {
3341 object_err(page->slab_cache, page, object, "Object is on free-list");
3342 rv = false;
3343 } else {
3344 rv = true;
3345 }
3346 slab_unlock(page);
3347
3348out_unlock:
3349 local_irq_restore(flags);
3350 return rv;
3351}
3352EXPORT_SYMBOL(verify_mem_not_deleted);
3353#endif
3354
3355void kfree(const void *x)
3356{
3357 struct page *page;
3358 void *object = (void *)x;
3359
3360 trace_kfree(_RET_IP_, x);
3361
3362 if (unlikely(ZERO_OR_NULL_PTR(x)))
3363 return;
3364
3365 page = virt_to_head_page(x);
3366 if (unlikely(!PageSlab(page))) {
3367 BUG_ON(!PageCompound(page));
3368 kmemleak_free(x);
3369 __free_memcg_kmem_pages(page, compound_order(page));
3370 return;
3371 }
3372 slab_free(page->slab_cache, page, object, _RET_IP_);
3373}
3374EXPORT_SYMBOL(kfree);
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386int kmem_cache_shrink(struct kmem_cache *s)
3387{
3388 int node;
3389 int i;
3390 struct kmem_cache_node *n;
3391 struct page *page;
3392 struct page *t;
3393 int objects = oo_objects(s->max);
3394 struct list_head *slabs_by_inuse =
3395 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3396 unsigned long flags;
3397
3398 if (!slabs_by_inuse)
3399 return -ENOMEM;
3400
3401 flush_all(s);
3402 for_each_node_state(node, N_NORMAL_MEMORY) {
3403 n = get_node(s, node);
3404
3405 if (!n->nr_partial)
3406 continue;
3407
3408 for (i = 0; i < objects; i++)
3409 INIT_LIST_HEAD(slabs_by_inuse + i);
3410
3411 spin_lock_irqsave(&n->list_lock, flags);
3412
3413
3414
3415
3416
3417
3418
3419 list_for_each_entry_safe(page, t, &n->partial, lru) {
3420 list_move(&page->lru, slabs_by_inuse + page->inuse);
3421 if (!page->inuse)
3422 n->nr_partial--;
3423 }
3424
3425
3426
3427
3428
3429 for (i = objects - 1; i > 0; i--)
3430 list_splice(slabs_by_inuse + i, n->partial.prev);
3431
3432 spin_unlock_irqrestore(&n->list_lock, flags);
3433
3434
3435 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3436 discard_slab(s, page);
3437 }
3438
3439 kfree(slabs_by_inuse);
3440 return 0;
3441}
3442EXPORT_SYMBOL(kmem_cache_shrink);
3443
3444static int slab_mem_going_offline_callback(void *arg)
3445{
3446 struct kmem_cache *s;
3447
3448 mutex_lock(&slab_mutex);
3449 list_for_each_entry(s, &slab_caches, list)
3450 kmem_cache_shrink(s);
3451 mutex_unlock(&slab_mutex);
3452
3453 return 0;
3454}
3455
3456static void slab_mem_offline_callback(void *arg)
3457{
3458 struct kmem_cache_node *n;
3459 struct kmem_cache *s;
3460 struct memory_notify *marg = arg;
3461 int offline_node;
3462
3463 offline_node = marg->status_change_nid_normal;
3464
3465
3466
3467
3468
3469 if (offline_node < 0)
3470 return;
3471
3472 mutex_lock(&slab_mutex);
3473 list_for_each_entry(s, &slab_caches, list) {
3474 n = get_node(s, offline_node);
3475 if (n) {
3476
3477
3478
3479
3480
3481
3482 BUG_ON(slabs_node(s, offline_node));
3483
3484 s->node[offline_node] = NULL;
3485 kmem_cache_free(kmem_cache_node, n);
3486 }
3487 }
3488 mutex_unlock(&slab_mutex);
3489}
3490
3491static int slab_mem_going_online_callback(void *arg)
3492{
3493 struct kmem_cache_node *n;
3494 struct kmem_cache *s;
3495 struct memory_notify *marg = arg;
3496 int nid = marg->status_change_nid_normal;
3497 int ret = 0;
3498
3499
3500
3501
3502
3503 if (nid < 0)
3504 return 0;
3505
3506
3507
3508
3509
3510
3511 mutex_lock(&slab_mutex);
3512 list_for_each_entry(s, &slab_caches, list) {
3513
3514
3515
3516
3517
3518 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3519 if (!n) {
3520 ret = -ENOMEM;
3521 goto out;
3522 }
3523 init_kmem_cache_node(n);
3524 s->node[nid] = n;
3525 }
3526out:
3527 mutex_unlock(&slab_mutex);
3528 return ret;
3529}
3530
3531static int slab_memory_callback(struct notifier_block *self,
3532 unsigned long action, void *arg)
3533{
3534 int ret = 0;
3535
3536 switch (action) {
3537 case MEM_GOING_ONLINE:
3538 ret = slab_mem_going_online_callback(arg);
3539 break;
3540 case MEM_GOING_OFFLINE:
3541 ret = slab_mem_going_offline_callback(arg);
3542 break;
3543 case MEM_OFFLINE:
3544 case MEM_CANCEL_ONLINE:
3545 slab_mem_offline_callback(arg);
3546 break;
3547 case MEM_ONLINE:
3548 case MEM_CANCEL_OFFLINE:
3549 break;
3550 }
3551 if (ret)
3552 ret = notifier_from_errno(ret);
3553 else
3554 ret = NOTIFY_OK;
3555 return ret;
3556}
3557
3558static struct notifier_block slab_memory_callback_nb = {
3559 .notifier_call = slab_memory_callback,
3560 .priority = SLAB_CALLBACK_PRI,
3561};
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3574{
3575 int node;
3576 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
3577
3578 memcpy(s, static_cache, kmem_cache->object_size);
3579
3580
3581
3582
3583
3584
3585 __flush_cpu_slab(s, smp_processor_id());
3586 for_each_node_state(node, N_NORMAL_MEMORY) {
3587 struct kmem_cache_node *n = get_node(s, node);
3588 struct page *p;
3589
3590 if (n) {
3591 list_for_each_entry(p, &n->partial, lru)
3592 p->slab_cache = s;
3593
3594#ifdef CONFIG_SLUB_DEBUG
3595 list_for_each_entry(p, &n->full, lru)
3596 p->slab_cache = s;
3597#endif
3598 }
3599 }
3600 list_add(&s->list, &slab_caches);
3601 return s;
3602}
3603
3604void __init kmem_cache_init(void)
3605{
3606 static __initdata struct kmem_cache boot_kmem_cache,
3607 boot_kmem_cache_node;
3608
3609 if (debug_guardpage_minorder())
3610 slub_max_order = 0;
3611
3612 kmem_cache_node = &boot_kmem_cache_node;
3613 kmem_cache = &boot_kmem_cache;
3614
3615 create_boot_cache(kmem_cache_node, "kmem_cache_node",
3616 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
3617
3618 register_hotmemory_notifier(&slab_memory_callback_nb);
3619
3620
3621 slab_state = PARTIAL;
3622
3623 create_boot_cache(kmem_cache, "kmem_cache",
3624 offsetof(struct kmem_cache, node) +
3625 nr_node_ids * sizeof(struct kmem_cache_node *),
3626 SLAB_HWCACHE_ALIGN);
3627
3628 kmem_cache = bootstrap(&boot_kmem_cache);
3629
3630
3631
3632
3633
3634
3635 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
3636
3637
3638 create_kmalloc_caches(0);
3639
3640#ifdef CONFIG_SMP
3641 register_cpu_notifier(&slab_notifier);
3642#endif
3643
3644 printk(KERN_INFO
3645 "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
3646 " CPUs=%d, Nodes=%d\n",
3647 cache_line_size(),
3648 slub_min_order, slub_max_order, slub_min_objects,
3649 nr_cpu_ids, nr_node_ids);
3650}
3651
3652void __init kmem_cache_init_late(void)
3653{
3654}
3655
3656
3657
3658
3659static int slab_unmergeable(struct kmem_cache *s)
3660{
3661 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3662 return 1;
3663
3664 if (s->ctor)
3665 return 1;
3666
3667
3668
3669
3670 if (s->refcount < 0)
3671 return 1;
3672
3673 return 0;
3674}
3675
3676static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
3677 size_t align, unsigned long flags, const char *name,
3678 void (*ctor)(void *))
3679{
3680 struct kmem_cache *s;
3681
3682 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3683 return NULL;
3684
3685 if (ctor)
3686 return NULL;
3687
3688 size = ALIGN(size, sizeof(void *));
3689 align = calculate_alignment(flags, align, size);
3690 size = ALIGN(size, align);
3691 flags = kmem_cache_flags(size, flags, name, NULL);
3692
3693 list_for_each_entry(s, &slab_caches, list) {
3694 if (slab_unmergeable(s))
3695 continue;
3696
3697 if (size > s->size)
3698 continue;
3699
3700 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3701 continue;
3702
3703
3704
3705
3706 if ((s->size & ~(align - 1)) != s->size)
3707 continue;
3708
3709 if (s->size - size >= sizeof(void *))
3710 continue;
3711
3712 if (!cache_match_memcg(s, memcg))
3713 continue;
3714
3715 return s;
3716 }
3717 return NULL;
3718}
3719
3720struct kmem_cache *
3721__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
3722 size_t align, unsigned long flags, void (*ctor)(void *))
3723{
3724 struct kmem_cache *s;
3725
3726 s = find_mergeable(memcg, size, align, flags, name, ctor);
3727 if (s) {
3728 s->refcount++;
3729
3730
3731
3732
3733 s->object_size = max(s->object_size, (int)size);
3734 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3735
3736 if (sysfs_slab_alias(s, name)) {
3737 s->refcount--;
3738 s = NULL;
3739 }
3740 }
3741
3742 return s;
3743}
3744
3745int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
3746{
3747 int err;
3748
3749 err = kmem_cache_open(s, flags);
3750 if (err)
3751 return err;
3752
3753
3754 if (slab_state <= UP)
3755 return 0;
3756
3757 memcg_propagate_slab_attrs(s);
3758 mutex_unlock(&slab_mutex);
3759 err = sysfs_slab_add(s);
3760 mutex_lock(&slab_mutex);
3761
3762 if (err)
3763 kmem_cache_close(s);
3764
3765 return err;
3766}
3767
3768#ifdef CONFIG_SMP
3769
3770
3771
3772
3773static int slab_cpuup_callback(struct notifier_block *nfb,
3774 unsigned long action, void *hcpu)
3775{
3776 long cpu = (long)hcpu;
3777 struct kmem_cache *s;
3778 unsigned long flags;
3779
3780 switch (action) {
3781 case CPU_UP_CANCELED:
3782 case CPU_UP_CANCELED_FROZEN:
3783 case CPU_DEAD:
3784 case CPU_DEAD_FROZEN:
3785 mutex_lock(&slab_mutex);
3786 list_for_each_entry(s, &slab_caches, list) {
3787 local_irq_save(flags);
3788 __flush_cpu_slab(s, cpu);
3789 local_irq_restore(flags);
3790 }
3791 mutex_unlock(&slab_mutex);
3792 break;
3793 default:
3794 break;
3795 }
3796 return NOTIFY_OK;
3797}
3798
3799static struct notifier_block slab_notifier = {
3800 .notifier_call = slab_cpuup_callback
3801};
3802
3803#endif
3804
3805void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3806{
3807 struct kmem_cache *s;
3808 void *ret;
3809
3810 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3811 return kmalloc_large(size, gfpflags);
3812
3813 s = kmalloc_slab(size, gfpflags);
3814
3815 if (unlikely(ZERO_OR_NULL_PTR(s)))
3816 return s;
3817
3818 ret = slab_alloc(s, gfpflags, caller);
3819
3820
3821 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3822
3823 return ret;
3824}
3825
3826#ifdef CONFIG_NUMA
3827void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3828 int node, unsigned long caller)
3829{
3830 struct kmem_cache *s;
3831 void *ret;
3832
3833 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3834 ret = kmalloc_large_node(size, gfpflags, node);
3835
3836 trace_kmalloc_node(caller, ret,
3837 size, PAGE_SIZE << get_order(size),
3838 gfpflags, node);
3839
3840 return ret;
3841 }
3842
3843 s = kmalloc_slab(size, gfpflags);
3844
3845 if (unlikely(ZERO_OR_NULL_PTR(s)))
3846 return s;
3847
3848 ret = slab_alloc_node(s, gfpflags, node, caller);
3849
3850
3851 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3852
3853 return ret;
3854}
3855#endif
3856
3857#ifdef CONFIG_SYSFS
3858static int count_inuse(struct page *page)
3859{
3860 return page->inuse;
3861}
3862
3863static int count_total(struct page *page)
3864{
3865 return page->objects;
3866}
3867#endif
3868
3869#ifdef CONFIG_SLUB_DEBUG
3870static int validate_slab(struct kmem_cache *s, struct page *page,
3871 unsigned long *map)
3872{
3873 void *p;
3874 void *addr = page_address(page);
3875
3876 if (!check_slab(s, page) ||
3877 !on_freelist(s, page, NULL))
3878 return 0;
3879
3880
3881 bitmap_zero(map, page->objects);
3882
3883 get_map(s, page, map);
3884 for_each_object(p, s, addr, page->objects) {
3885 if (test_bit(slab_index(p, s, addr), map))
3886 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
3887 return 0;
3888 }
3889
3890 for_each_object(p, s, addr, page->objects)
3891 if (!test_bit(slab_index(p, s, addr), map))
3892 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
3893 return 0;
3894 return 1;
3895}
3896
3897static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3898 unsigned long *map)
3899{
3900 slab_lock(page);
3901 validate_slab(s, page, map);
3902 slab_unlock(page);
3903}
3904
3905static int validate_slab_node(struct kmem_cache *s,
3906 struct kmem_cache_node *n, unsigned long *map)
3907{
3908 unsigned long count = 0;
3909 struct page *page;
3910 unsigned long flags;
3911
3912 spin_lock_irqsave(&n->list_lock, flags);
3913
3914 list_for_each_entry(page, &n->partial, lru) {
3915 validate_slab_slab(s, page, map);
3916 count++;
3917 }
3918 if (count != n->nr_partial)
3919 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
3920 "counter=%ld\n", s->name, count, n->nr_partial);
3921
3922 if (!(s->flags & SLAB_STORE_USER))
3923 goto out;
3924
3925 list_for_each_entry(page, &n->full, lru) {
3926 validate_slab_slab(s, page, map);
3927 count++;
3928 }
3929 if (count != atomic_long_read(&n->nr_slabs))
3930 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
3931 "counter=%ld\n", s->name, count,
3932 atomic_long_read(&n->nr_slabs));
3933
3934out:
3935 spin_unlock_irqrestore(&n->list_lock, flags);
3936 return count;
3937}
3938
3939static long validate_slab_cache(struct kmem_cache *s)
3940{
3941 int node;
3942 unsigned long count = 0;
3943 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3944 sizeof(unsigned long), GFP_KERNEL);
3945
3946 if (!map)
3947 return -ENOMEM;
3948
3949 flush_all(s);
3950 for_each_node_state(node, N_NORMAL_MEMORY) {
3951 struct kmem_cache_node *n = get_node(s, node);
3952
3953 count += validate_slab_node(s, n, map);
3954 }
3955 kfree(map);
3956 return count;
3957}
3958
3959
3960
3961
3962
3963struct location {
3964 unsigned long count;
3965 unsigned long addr;
3966 long long sum_time;
3967 long min_time;
3968 long max_time;
3969 long min_pid;
3970 long max_pid;
3971 DECLARE_BITMAP(cpus, NR_CPUS);
3972 nodemask_t nodes;
3973};
3974
3975struct loc_track {
3976 unsigned long max;
3977 unsigned long count;
3978 struct location *loc;
3979};
3980
3981static void free_loc_track(struct loc_track *t)
3982{
3983 if (t->max)
3984 free_pages((unsigned long)t->loc,
3985 get_order(sizeof(struct location) * t->max));
3986}
3987
3988static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
3989{
3990 struct location *l;
3991 int order;
3992
3993 order = get_order(sizeof(struct location) * max);
3994
3995 l = (void *)__get_free_pages(flags, order);
3996 if (!l)
3997 return 0;
3998
3999 if (t->count) {
4000 memcpy(l, t->loc, sizeof(struct location) * t->count);
4001 free_loc_track(t);
4002 }
4003 t->max = max;
4004 t->loc = l;
4005 return 1;
4006}
4007
4008static int add_location(struct loc_track *t, struct kmem_cache *s,
4009 const struct track *track)
4010{
4011 long start, end, pos;
4012 struct location *l;
4013 unsigned long caddr;
4014 unsigned long age = jiffies - track->when;
4015
4016 start = -1;
4017 end = t->count;
4018
4019 for ( ; ; ) {
4020 pos = start + (end - start + 1) / 2;
4021
4022
4023
4024
4025
4026 if (pos == end)
4027 break;
4028
4029 caddr = t->loc[pos].addr;
4030 if (track->addr == caddr) {
4031
4032 l = &t->loc[pos];
4033 l->count++;
4034 if (track->when) {
4035 l->sum_time += age;
4036 if (age < l->min_time)
4037 l->min_time = age;
4038 if (age > l->max_time)
4039 l->max_time = age;
4040
4041 if (track->pid < l->min_pid)
4042 l->min_pid = track->pid;
4043 if (track->pid > l->max_pid)
4044 l->max_pid = track->pid;
4045
4046 cpumask_set_cpu(track->cpu,
4047 to_cpumask(l->cpus));
4048 }
4049 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4050 return 1;
4051 }
4052
4053 if (track->addr < caddr)
4054 end = pos;
4055 else
4056 start = pos;
4057 }
4058
4059
4060
4061
4062 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4063 return 0;
4064
4065 l = t->loc + pos;
4066 if (pos < t->count)
4067 memmove(l + 1, l,
4068 (t->count - pos) * sizeof(struct location));
4069 t->count++;
4070 l->count = 1;
4071 l->addr = track->addr;
4072 l->sum_time = age;
4073 l->min_time = age;
4074 l->max_time = age;
4075 l->min_pid = track->pid;
4076 l->max_pid = track->pid;
4077 cpumask_clear(to_cpumask(l->cpus));
4078 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4079 nodes_clear(l->nodes);
4080 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4081 return 1;
4082}
4083
4084static void process_slab(struct loc_track *t, struct kmem_cache *s,
4085 struct page *page, enum track_item alloc,
4086 unsigned long *map)
4087{
4088 void *addr = page_address(page);
4089 void *p;
4090
4091 bitmap_zero(map, page->objects);
4092 get_map(s, page, map);
4093
4094 for_each_object(p, s, addr, page->objects)
4095 if (!test_bit(slab_index(p, s, addr), map))
4096 add_location(t, s, get_track(s, p, alloc));
4097}
4098
4099static int list_locations(struct kmem_cache *s, char *buf,
4100 enum track_item alloc)
4101{
4102 int len = 0;
4103 unsigned long i;
4104 struct loc_track t = { 0, 0, NULL };
4105 int node;
4106 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4107 sizeof(unsigned long), GFP_KERNEL);
4108
4109 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4110 GFP_TEMPORARY)) {
4111 kfree(map);
4112 return sprintf(buf, "Out of memory\n");
4113 }
4114
4115 flush_all(s);
4116
4117 for_each_node_state(node, N_NORMAL_MEMORY) {
4118 struct kmem_cache_node *n = get_node(s, node);
4119 unsigned long flags;
4120 struct page *page;
4121
4122 if (!atomic_long_read(&n->nr_slabs))
4123 continue;
4124
4125 spin_lock_irqsave(&n->list_lock, flags);
4126 list_for_each_entry(page, &n->partial, lru)
4127 process_slab(&t, s, page, alloc, map);
4128 list_for_each_entry(page, &n->full, lru)
4129 process_slab(&t, s, page, alloc, map);
4130 spin_unlock_irqrestore(&n->list_lock, flags);
4131 }
4132
4133 for (i = 0; i < t.count; i++) {
4134 struct location *l = &t.loc[i];
4135
4136 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4137 break;
4138 len += sprintf(buf + len, "%7ld ", l->count);
4139
4140 if (l->addr)
4141 len += sprintf(buf + len, "%pS", (void *)l->addr);
4142 else
4143 len += sprintf(buf + len, "<not-available>");
4144
4145 if (l->sum_time != l->min_time) {
4146 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4147 l->min_time,
4148 (long)div_u64(l->sum_time, l->count),
4149 l->max_time);
4150 } else
4151 len += sprintf(buf + len, " age=%ld",
4152 l->min_time);
4153
4154 if (l->min_pid != l->max_pid)
4155 len += sprintf(buf + len, " pid=%ld-%ld",
4156 l->min_pid, l->max_pid);
4157 else
4158 len += sprintf(buf + len, " pid=%ld",
4159 l->min_pid);
4160
4161 if (num_online_cpus() > 1 &&
4162 !cpumask_empty(to_cpumask(l->cpus)) &&
4163 len < PAGE_SIZE - 60) {
4164 len += sprintf(buf + len, " cpus=");
4165 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4166 to_cpumask(l->cpus));
4167 }
4168
4169 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4170 len < PAGE_SIZE - 60) {
4171 len += sprintf(buf + len, " nodes=");
4172 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4173 l->nodes);
4174 }
4175
4176 len += sprintf(buf + len, "\n");
4177 }
4178
4179 free_loc_track(&t);
4180 kfree(map);
4181 if (!t.count)
4182 len += sprintf(buf, "No data\n");
4183 return len;
4184}
4185#endif
4186
4187#ifdef SLUB_RESILIENCY_TEST
4188static void resiliency_test(void)
4189{
4190 u8 *p;
4191
4192 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4193
4194 printk(KERN_ERR "SLUB resiliency testing\n");
4195 printk(KERN_ERR "-----------------------\n");
4196 printk(KERN_ERR "A. Corruption after allocation\n");
4197
4198 p = kzalloc(16, GFP_KERNEL);
4199 p[16] = 0x12;
4200 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4201 " 0x12->0x%p\n\n", p + 16);
4202
4203 validate_slab_cache(kmalloc_caches[4]);
4204
4205
4206 p = kzalloc(32, GFP_KERNEL);
4207 p[32 + sizeof(void *)] = 0x34;
4208 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4209 " 0x34 -> -0x%p\n", p);
4210 printk(KERN_ERR
4211 "If allocated object is overwritten then not detectable\n\n");
4212
4213 validate_slab_cache(kmalloc_caches[5]);
4214 p = kzalloc(64, GFP_KERNEL);
4215 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4216 *p = 0x56;
4217 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4218 p);
4219 printk(KERN_ERR
4220 "If allocated object is overwritten then not detectable\n\n");
4221 validate_slab_cache(kmalloc_caches[6]);
4222
4223 printk(KERN_ERR "\nB. Corruption after free\n");
4224 p = kzalloc(128, GFP_KERNEL);
4225 kfree(p);
4226 *p = 0x78;
4227 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4228 validate_slab_cache(kmalloc_caches[7]);
4229
4230 p = kzalloc(256, GFP_KERNEL);
4231 kfree(p);
4232 p[50] = 0x9a;
4233 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4234 p);
4235 validate_slab_cache(kmalloc_caches[8]);
4236
4237 p = kzalloc(512, GFP_KERNEL);
4238 kfree(p);
4239 p[512] = 0xab;
4240 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4241 validate_slab_cache(kmalloc_caches[9]);
4242}
4243#else
4244#ifdef CONFIG_SYSFS
4245static void resiliency_test(void) {};
4246#endif
4247#endif
4248
4249#ifdef CONFIG_SYSFS
4250enum slab_stat_type {
4251 SL_ALL,
4252 SL_PARTIAL,
4253 SL_CPU,
4254 SL_OBJECTS,
4255 SL_TOTAL
4256};
4257
4258#define SO_ALL (1 << SL_ALL)
4259#define SO_PARTIAL (1 << SL_PARTIAL)
4260#define SO_CPU (1 << SL_CPU)
4261#define SO_OBJECTS (1 << SL_OBJECTS)
4262#define SO_TOTAL (1 << SL_TOTAL)
4263
4264static ssize_t show_slab_objects(struct kmem_cache *s,
4265 char *buf, unsigned long flags)
4266{
4267 unsigned long total = 0;
4268 int node;
4269 int x;
4270 unsigned long *nodes;
4271 unsigned long *per_cpu;
4272
4273 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4274 if (!nodes)
4275 return -ENOMEM;
4276 per_cpu = nodes + nr_node_ids;
4277
4278 if (flags & SO_CPU) {
4279 int cpu;
4280
4281 for_each_possible_cpu(cpu) {
4282 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4283 int node;
4284 struct page *page;
4285
4286 page = ACCESS_ONCE(c->page);
4287 if (!page)
4288 continue;
4289
4290 node = page_to_nid(page);
4291 if (flags & SO_TOTAL)
4292 x = page->objects;
4293 else if (flags & SO_OBJECTS)
4294 x = page->inuse;
4295 else
4296 x = 1;
4297
4298 total += x;
4299 nodes[node] += x;
4300
4301 page = ACCESS_ONCE(c->partial);
4302 if (page) {
4303 x = page->pobjects;
4304 total += x;
4305 nodes[node] += x;
4306 }
4307
4308 per_cpu[node]++;
4309 }
4310 }
4311
4312 lock_memory_hotplug();
4313#ifdef CONFIG_SLUB_DEBUG
4314 if (flags & SO_ALL) {
4315 for_each_node_state(node, N_NORMAL_MEMORY) {
4316 struct kmem_cache_node *n = get_node(s, node);
4317
4318 if (flags & SO_TOTAL)
4319 x = atomic_long_read(&n->total_objects);
4320 else if (flags & SO_OBJECTS)
4321 x = atomic_long_read(&n->total_objects) -
4322 count_partial(n, count_free);
4323
4324 else
4325 x = atomic_long_read(&n->nr_slabs);
4326 total += x;
4327 nodes[node] += x;
4328 }
4329
4330 } else
4331#endif
4332 if (flags & SO_PARTIAL) {
4333 for_each_node_state(node, N_NORMAL_MEMORY) {
4334 struct kmem_cache_node *n = get_node(s, node);
4335
4336 if (flags & SO_TOTAL)
4337 x = count_partial(n, count_total);
4338 else if (flags & SO_OBJECTS)
4339 x = count_partial(n, count_inuse);
4340 else
4341 x = n->nr_partial;
4342 total += x;
4343 nodes[node] += x;
4344 }
4345 }
4346 x = sprintf(buf, "%lu", total);
4347#ifdef CONFIG_NUMA
4348 for_each_node_state(node, N_NORMAL_MEMORY)
4349 if (nodes[node])
4350 x += sprintf(buf + x, " N%d=%lu",
4351 node, nodes[node]);
4352#endif
4353 unlock_memory_hotplug();
4354 kfree(nodes);
4355 return x + sprintf(buf + x, "\n");
4356}
4357
4358#ifdef CONFIG_SLUB_DEBUG
4359static int any_slab_objects(struct kmem_cache *s)
4360{
4361 int node;
4362
4363 for_each_online_node(node) {
4364 struct kmem_cache_node *n = get_node(s, node);
4365
4366 if (!n)
4367 continue;
4368
4369 if (atomic_long_read(&n->total_objects))
4370 return 1;
4371 }
4372 return 0;
4373}
4374#endif
4375
4376#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4377#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4378
4379struct slab_attribute {
4380 struct attribute attr;
4381 ssize_t (*show)(struct kmem_cache *s, char *buf);
4382 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4383};
4384
4385#define SLAB_ATTR_RO(_name) \
4386 static struct slab_attribute _name##_attr = \
4387 __ATTR(_name, 0400, _name##_show, NULL)
4388
4389#define SLAB_ATTR(_name) \
4390 static struct slab_attribute _name##_attr = \
4391 __ATTR(_name, 0600, _name##_show, _name##_store)
4392
4393static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4394{
4395 return sprintf(buf, "%d\n", s->size);
4396}
4397SLAB_ATTR_RO(slab_size);
4398
4399static ssize_t align_show(struct kmem_cache *s, char *buf)
4400{
4401 return sprintf(buf, "%d\n", s->align);
4402}
4403SLAB_ATTR_RO(align);
4404
4405static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4406{
4407 return sprintf(buf, "%d\n", s->object_size);
4408}
4409SLAB_ATTR_RO(object_size);
4410
4411static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4412{
4413 return sprintf(buf, "%d\n", oo_objects(s->oo));
4414}
4415SLAB_ATTR_RO(objs_per_slab);
4416
4417static ssize_t order_store(struct kmem_cache *s,
4418 const char *buf, size_t length)
4419{
4420 unsigned long order;
4421 int err;
4422
4423 err = strict_strtoul(buf, 10, &order);
4424 if (err)
4425 return err;
4426
4427 if (order > slub_max_order || order < slub_min_order)
4428 return -EINVAL;
4429
4430 calculate_sizes(s, order);
4431 return length;
4432}
4433
4434static ssize_t order_show(struct kmem_cache *s, char *buf)
4435{
4436 return sprintf(buf, "%d\n", oo_order(s->oo));
4437}
4438SLAB_ATTR(order);
4439
4440static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4441{
4442 return sprintf(buf, "%lu\n", s->min_partial);
4443}
4444
4445static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4446 size_t length)
4447{
4448 unsigned long min;
4449 int err;
4450
4451 err = strict_strtoul(buf, 10, &min);
4452 if (err)
4453 return err;
4454
4455 set_min_partial(s, min);
4456 return length;
4457}
4458SLAB_ATTR(min_partial);
4459
4460static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4461{
4462 return sprintf(buf, "%u\n", s->cpu_partial);
4463}
4464
4465static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4466 size_t length)
4467{
4468 unsigned long objects;
4469 int err;
4470
4471 err = strict_strtoul(buf, 10, &objects);
4472 if (err)
4473 return err;
4474 if (objects && !kmem_cache_has_cpu_partial(s))
4475 return -EINVAL;
4476
4477 s->cpu_partial = objects;
4478 flush_all(s);
4479 return length;
4480}
4481SLAB_ATTR(cpu_partial);
4482
4483static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4484{
4485 if (!s->ctor)
4486 return 0;
4487 return sprintf(buf, "%pS\n", s->ctor);
4488}
4489SLAB_ATTR_RO(ctor);
4490
4491static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4492{
4493 return sprintf(buf, "%d\n", s->refcount - 1);
4494}
4495SLAB_ATTR_RO(aliases);
4496
4497static ssize_t partial_show(struct kmem_cache *s, char *buf)
4498{
4499 return show_slab_objects(s, buf, SO_PARTIAL);
4500}
4501SLAB_ATTR_RO(partial);
4502
4503static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4504{
4505 return show_slab_objects(s, buf, SO_CPU);
4506}
4507SLAB_ATTR_RO(cpu_slabs);
4508
4509static ssize_t objects_show(struct kmem_cache *s, char *buf)
4510{
4511 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4512}
4513SLAB_ATTR_RO(objects);
4514
4515static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4516{
4517 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4518}
4519SLAB_ATTR_RO(objects_partial);
4520
4521static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4522{
4523 int objects = 0;
4524 int pages = 0;
4525 int cpu;
4526 int len;
4527
4528 for_each_online_cpu(cpu) {
4529 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4530
4531 if (page) {
4532 pages += page->pages;
4533 objects += page->pobjects;
4534 }
4535 }
4536
4537 len = sprintf(buf, "%d(%d)", objects, pages);
4538
4539#ifdef CONFIG_SMP
4540 for_each_online_cpu(cpu) {
4541 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4542
4543 if (page && len < PAGE_SIZE - 20)
4544 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4545 page->pobjects, page->pages);
4546 }
4547#endif
4548 return len + sprintf(buf + len, "\n");
4549}
4550SLAB_ATTR_RO(slabs_cpu_partial);
4551
4552static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4553{
4554 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4555}
4556
4557static ssize_t reclaim_account_store(struct kmem_cache *s,
4558 const char *buf, size_t length)
4559{
4560 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4561 if (buf[0] == '1')
4562 s->flags |= SLAB_RECLAIM_ACCOUNT;
4563 return length;
4564}
4565SLAB_ATTR(reclaim_account);
4566
4567static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4568{
4569 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4570}
4571SLAB_ATTR_RO(hwcache_align);
4572
4573#ifdef CONFIG_ZONE_DMA
4574static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4575{
4576 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4577}
4578SLAB_ATTR_RO(cache_dma);
4579#endif
4580
4581static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4582{
4583 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4584}
4585SLAB_ATTR_RO(destroy_by_rcu);
4586
4587static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4588{
4589 return sprintf(buf, "%d\n", s->reserved);
4590}
4591SLAB_ATTR_RO(reserved);
4592
4593#ifdef CONFIG_SLUB_DEBUG
4594static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4595{
4596 return show_slab_objects(s, buf, SO_ALL);
4597}
4598SLAB_ATTR_RO(slabs);
4599
4600static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4601{
4602 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4603}
4604SLAB_ATTR_RO(total_objects);
4605
4606static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4607{
4608 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4609}
4610
4611static ssize_t sanity_checks_store(struct kmem_cache *s,
4612 const char *buf, size_t length)
4613{
4614 s->flags &= ~SLAB_DEBUG_FREE;
4615 if (buf[0] == '1') {
4616 s->flags &= ~__CMPXCHG_DOUBLE;
4617 s->flags |= SLAB_DEBUG_FREE;
4618 }
4619 return length;
4620}
4621SLAB_ATTR(sanity_checks);
4622
4623static ssize_t trace_show(struct kmem_cache *s, char *buf)
4624{
4625 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4626}
4627
4628static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4629 size_t length)
4630{
4631 s->flags &= ~SLAB_TRACE;
4632 if (buf[0] == '1') {
4633 s->flags &= ~__CMPXCHG_DOUBLE;
4634 s->flags |= SLAB_TRACE;
4635 }
4636 return length;
4637}
4638SLAB_ATTR(trace);
4639
4640static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4641{
4642 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4643}
4644
4645static ssize_t red_zone_store(struct kmem_cache *s,
4646 const char *buf, size_t length)
4647{
4648 if (any_slab_objects(s))
4649 return -EBUSY;
4650
4651 s->flags &= ~SLAB_RED_ZONE;
4652 if (buf[0] == '1') {
4653 s->flags &= ~__CMPXCHG_DOUBLE;
4654 s->flags |= SLAB_RED_ZONE;
4655 }
4656 calculate_sizes(s, -1);
4657 return length;
4658}
4659SLAB_ATTR(red_zone);
4660
4661static ssize_t poison_show(struct kmem_cache *s, char *buf)
4662{
4663 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4664}
4665
4666static ssize_t poison_store(struct kmem_cache *s,
4667 const char *buf, size_t length)
4668{
4669 if (any_slab_objects(s))
4670 return -EBUSY;
4671
4672 s->flags &= ~SLAB_POISON;
4673 if (buf[0] == '1') {
4674 s->flags &= ~__CMPXCHG_DOUBLE;
4675 s->flags |= SLAB_POISON;
4676 }
4677 calculate_sizes(s, -1);
4678 return length;
4679}
4680SLAB_ATTR(poison);
4681
4682static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4683{
4684 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4685}
4686
4687static ssize_t store_user_store(struct kmem_cache *s,
4688 const char *buf, size_t length)
4689{
4690 if (any_slab_objects(s))
4691 return -EBUSY;
4692
4693 s->flags &= ~SLAB_STORE_USER;
4694 if (buf[0] == '1') {
4695 s->flags &= ~__CMPXCHG_DOUBLE;
4696 s->flags |= SLAB_STORE_USER;
4697 }
4698 calculate_sizes(s, -1);
4699 return length;
4700}
4701SLAB_ATTR(store_user);
4702
4703static ssize_t validate_show(struct kmem_cache *s, char *buf)
4704{
4705 return 0;
4706}
4707
4708static ssize_t validate_store(struct kmem_cache *s,
4709 const char *buf, size_t length)
4710{
4711 int ret = -EINVAL;
4712
4713 if (buf[0] == '1') {
4714 ret = validate_slab_cache(s);
4715 if (ret >= 0)
4716 ret = length;
4717 }
4718 return ret;
4719}
4720SLAB_ATTR(validate);
4721
4722static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4723{
4724 if (!(s->flags & SLAB_STORE_USER))
4725 return -ENOSYS;
4726 return list_locations(s, buf, TRACK_ALLOC);
4727}
4728SLAB_ATTR_RO(alloc_calls);
4729
4730static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4731{
4732 if (!(s->flags & SLAB_STORE_USER))
4733 return -ENOSYS;
4734 return list_locations(s, buf, TRACK_FREE);
4735}
4736SLAB_ATTR_RO(free_calls);
4737#endif
4738
4739#ifdef CONFIG_FAILSLAB
4740static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4741{
4742 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4743}
4744
4745static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4746 size_t length)
4747{
4748 s->flags &= ~SLAB_FAILSLAB;
4749 if (buf[0] == '1')
4750 s->flags |= SLAB_FAILSLAB;
4751 return length;
4752}
4753SLAB_ATTR(failslab);
4754#endif
4755
4756static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4757{
4758 return 0;
4759}
4760
4761static ssize_t shrink_store(struct kmem_cache *s,
4762 const char *buf, size_t length)
4763{
4764 if (buf[0] == '1') {
4765 int rc = kmem_cache_shrink(s);
4766
4767 if (rc)
4768 return rc;
4769 } else
4770 return -EINVAL;
4771 return length;
4772}
4773SLAB_ATTR(shrink);
4774
4775#ifdef CONFIG_NUMA
4776static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4777{
4778 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4779}
4780
4781static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4782 const char *buf, size_t length)
4783{
4784 unsigned long ratio;
4785 int err;
4786
4787 err = strict_strtoul(buf, 10, &ratio);
4788 if (err)
4789 return err;
4790
4791 if (ratio <= 100)
4792 s->remote_node_defrag_ratio = ratio * 10;
4793
4794 return length;
4795}
4796SLAB_ATTR(remote_node_defrag_ratio);
4797#endif
4798
4799#ifdef CONFIG_SLUB_STATS
4800static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4801{
4802 unsigned long sum = 0;
4803 int cpu;
4804 int len;
4805 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4806
4807 if (!data)
4808 return -ENOMEM;
4809
4810 for_each_online_cpu(cpu) {
4811 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4812
4813 data[cpu] = x;
4814 sum += x;
4815 }
4816
4817 len = sprintf(buf, "%lu", sum);
4818
4819#ifdef CONFIG_SMP
4820 for_each_online_cpu(cpu) {
4821 if (data[cpu] && len < PAGE_SIZE - 20)
4822 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4823 }
4824#endif
4825 kfree(data);
4826 return len + sprintf(buf + len, "\n");
4827}
4828
4829static void clear_stat(struct kmem_cache *s, enum stat_item si)
4830{
4831 int cpu;
4832
4833 for_each_online_cpu(cpu)
4834 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4835}
4836
4837#define STAT_ATTR(si, text) \
4838static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4839{ \
4840 return show_stat(s, buf, si); \
4841} \
4842static ssize_t text##_store(struct kmem_cache *s, \
4843 const char *buf, size_t length) \
4844{ \
4845 if (buf[0] != '0') \
4846 return -EINVAL; \
4847 clear_stat(s, si); \
4848 return length; \
4849} \
4850SLAB_ATTR(text); \
4851
4852STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4853STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4854STAT_ATTR(FREE_FASTPATH, free_fastpath);
4855STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4856STAT_ATTR(FREE_FROZEN, free_frozen);
4857STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4858STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4859STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4860STAT_ATTR(ALLOC_SLAB, alloc_slab);
4861STAT_ATTR(ALLOC_REFILL, alloc_refill);
4862STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
4863STAT_ATTR(FREE_SLAB, free_slab);
4864STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4865STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4866STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4867STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4868STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4869STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4870STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
4871STAT_ATTR(ORDER_FALLBACK, order_fallback);
4872STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
4873STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
4874STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
4875STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
4876STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
4877STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
4878#endif
4879
4880static struct attribute *slab_attrs[] = {
4881 &slab_size_attr.attr,
4882 &object_size_attr.attr,
4883 &objs_per_slab_attr.attr,
4884 &order_attr.attr,
4885 &min_partial_attr.attr,
4886 &cpu_partial_attr.attr,
4887 &objects_attr.attr,
4888 &objects_partial_attr.attr,
4889 &partial_attr.attr,
4890 &cpu_slabs_attr.attr,
4891 &ctor_attr.attr,
4892 &aliases_attr.attr,
4893 &align_attr.attr,
4894 &hwcache_align_attr.attr,
4895 &reclaim_account_attr.attr,
4896 &destroy_by_rcu_attr.attr,
4897 &shrink_attr.attr,
4898 &reserved_attr.attr,
4899 &slabs_cpu_partial_attr.attr,
4900#ifdef CONFIG_SLUB_DEBUG
4901 &total_objects_attr.attr,
4902 &slabs_attr.attr,
4903 &sanity_checks_attr.attr,
4904 &trace_attr.attr,
4905 &red_zone_attr.attr,
4906 &poison_attr.attr,
4907 &store_user_attr.attr,
4908 &validate_attr.attr,
4909 &alloc_calls_attr.attr,
4910 &free_calls_attr.attr,
4911#endif
4912#ifdef CONFIG_ZONE_DMA
4913 &cache_dma_attr.attr,
4914#endif
4915#ifdef CONFIG_NUMA
4916 &remote_node_defrag_ratio_attr.attr,
4917#endif
4918#ifdef CONFIG_SLUB_STATS
4919 &alloc_fastpath_attr.attr,
4920 &alloc_slowpath_attr.attr,
4921 &free_fastpath_attr.attr,
4922 &free_slowpath_attr.attr,
4923 &free_frozen_attr.attr,
4924 &free_add_partial_attr.attr,
4925 &free_remove_partial_attr.attr,
4926 &alloc_from_partial_attr.attr,
4927 &alloc_slab_attr.attr,
4928 &alloc_refill_attr.attr,
4929 &alloc_node_mismatch_attr.attr,
4930 &free_slab_attr.attr,
4931 &cpuslab_flush_attr.attr,
4932 &deactivate_full_attr.attr,
4933 &deactivate_empty_attr.attr,
4934 &deactivate_to_head_attr.attr,
4935 &deactivate_to_tail_attr.attr,
4936 &deactivate_remote_frees_attr.attr,
4937 &deactivate_bypass_attr.attr,
4938 &order_fallback_attr.attr,
4939 &cmpxchg_double_fail_attr.attr,
4940 &cmpxchg_double_cpu_fail_attr.attr,
4941 &cpu_partial_alloc_attr.attr,
4942 &cpu_partial_free_attr.attr,
4943 &cpu_partial_node_attr.attr,
4944 &cpu_partial_drain_attr.attr,
4945#endif
4946#ifdef CONFIG_FAILSLAB
4947 &failslab_attr.attr,
4948#endif
4949
4950 NULL
4951};
4952
4953static struct attribute_group slab_attr_group = {
4954 .attrs = slab_attrs,
4955};
4956
4957static ssize_t slab_attr_show(struct kobject *kobj,
4958 struct attribute *attr,
4959 char *buf)
4960{
4961 struct slab_attribute *attribute;
4962 struct kmem_cache *s;
4963 int err;
4964
4965 attribute = to_slab_attr(attr);
4966 s = to_slab(kobj);
4967
4968 if (!attribute->show)
4969 return -EIO;
4970
4971 err = attribute->show(s, buf);
4972
4973 return err;
4974}
4975
4976static ssize_t slab_attr_store(struct kobject *kobj,
4977 struct attribute *attr,
4978 const char *buf, size_t len)
4979{
4980 struct slab_attribute *attribute;
4981 struct kmem_cache *s;
4982 int err;
4983
4984 attribute = to_slab_attr(attr);
4985 s = to_slab(kobj);
4986
4987 if (!attribute->store)
4988 return -EIO;
4989
4990 err = attribute->store(s, buf, len);
4991#ifdef CONFIG_MEMCG_KMEM
4992 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
4993 int i;
4994
4995 mutex_lock(&slab_mutex);
4996 if (s->max_attr_size < len)
4997 s->max_attr_size = len;
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016 for_each_memcg_cache_index(i) {
5017 struct kmem_cache *c = cache_from_memcg(s, i);
5018 if (c)
5019 attribute->store(c, buf, len);
5020 }
5021 mutex_unlock(&slab_mutex);
5022 }
5023#endif
5024 return err;
5025}
5026
5027static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5028{
5029#ifdef CONFIG_MEMCG_KMEM
5030 int i;
5031 char *buffer = NULL;
5032
5033 if (!is_root_cache(s))
5034 return;
5035
5036
5037
5038
5039
5040 if (!s->max_attr_size)
5041 return;
5042
5043 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5044 char mbuf[64];
5045 char *buf;
5046 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5047
5048 if (!attr || !attr->store || !attr->show)
5049 continue;
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060 if (buffer)
5061 buf = buffer;
5062 else if (s->max_attr_size < ARRAY_SIZE(mbuf))
5063 buf = mbuf;
5064 else {
5065 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5066 if (WARN_ON(!buffer))
5067 continue;
5068 buf = buffer;
5069 }
5070
5071 attr->show(s->memcg_params->root_cache, buf);
5072 attr->store(s, buf, strlen(buf));
5073 }
5074
5075 if (buffer)
5076 free_page((unsigned long)buffer);
5077#endif
5078}
5079
5080static const struct sysfs_ops slab_sysfs_ops = {
5081 .show = slab_attr_show,
5082 .store = slab_attr_store,
5083};
5084
5085static struct kobj_type slab_ktype = {
5086 .sysfs_ops = &slab_sysfs_ops,
5087};
5088
5089static int uevent_filter(struct kset *kset, struct kobject *kobj)
5090{
5091 struct kobj_type *ktype = get_ktype(kobj);
5092
5093 if (ktype == &slab_ktype)
5094 return 1;
5095 return 0;
5096}
5097
5098static const struct kset_uevent_ops slab_uevent_ops = {
5099 .filter = uevent_filter,
5100};
5101
5102static struct kset *slab_kset;
5103
5104#define ID_STR_LENGTH 64
5105
5106
5107
5108
5109
5110static char *create_unique_id(struct kmem_cache *s)
5111{
5112 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5113 char *p = name;
5114
5115 BUG_ON(!name);
5116
5117 *p++ = ':';
5118
5119
5120
5121
5122
5123
5124
5125 if (s->flags & SLAB_CACHE_DMA)
5126 *p++ = 'd';
5127 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5128 *p++ = 'a';
5129 if (s->flags & SLAB_DEBUG_FREE)
5130 *p++ = 'F';
5131 if (!(s->flags & SLAB_NOTRACK))
5132 *p++ = 't';
5133 if (p != name + 1)
5134 *p++ = '-';
5135 p += sprintf(p, "%07d", s->size);
5136
5137#ifdef CONFIG_MEMCG_KMEM
5138 if (!is_root_cache(s))
5139 p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
5140#endif
5141
5142 BUG_ON(p > name + ID_STR_LENGTH - 1);
5143 return name;
5144}
5145
5146static int sysfs_slab_add(struct kmem_cache *s)
5147{
5148 int err;
5149 const char *name;
5150 int unmergeable = slab_unmergeable(s);
5151
5152 if (unmergeable) {
5153
5154
5155
5156
5157
5158 sysfs_remove_link(&slab_kset->kobj, s->name);
5159 name = s->name;
5160 } else {
5161
5162
5163
5164
5165 name = create_unique_id(s);
5166 }
5167
5168 s->kobj.kset = slab_kset;
5169 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5170 if (err) {
5171 kobject_put(&s->kobj);
5172 return err;
5173 }
5174
5175 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5176 if (err) {
5177 kobject_del(&s->kobj);
5178 kobject_put(&s->kobj);
5179 return err;
5180 }
5181 kobject_uevent(&s->kobj, KOBJ_ADD);
5182 if (!unmergeable) {
5183
5184 sysfs_slab_alias(s, s->name);
5185 kfree(name);
5186 }
5187 return 0;
5188}
5189
5190static void sysfs_slab_remove(struct kmem_cache *s)
5191{
5192 if (slab_state < FULL)
5193
5194
5195
5196
5197 return;
5198
5199 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5200 kobject_del(&s->kobj);
5201 kobject_put(&s->kobj);
5202}
5203
5204
5205
5206
5207
5208struct saved_alias {
5209 struct kmem_cache *s;
5210 const char *name;
5211 struct saved_alias *next;
5212};
5213
5214static struct saved_alias *alias_list;
5215
5216static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5217{
5218 struct saved_alias *al;
5219
5220 if (slab_state == FULL) {
5221
5222
5223
5224 sysfs_remove_link(&slab_kset->kobj, name);
5225 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5226 }
5227
5228 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5229 if (!al)
5230 return -ENOMEM;
5231
5232 al->s = s;
5233 al->name = name;
5234 al->next = alias_list;
5235 alias_list = al;
5236 return 0;
5237}
5238
5239static int __init slab_sysfs_init(void)
5240{
5241 struct kmem_cache *s;
5242 int err;
5243
5244 mutex_lock(&slab_mutex);
5245
5246 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5247 if (!slab_kset) {
5248 mutex_unlock(&slab_mutex);
5249 printk(KERN_ERR "Cannot register slab subsystem.\n");
5250 return -ENOSYS;
5251 }
5252
5253 slab_state = FULL;
5254
5255 list_for_each_entry(s, &slab_caches, list) {
5256 err = sysfs_slab_add(s);
5257 if (err)
5258 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5259 " to sysfs\n", s->name);
5260 }
5261
5262 while (alias_list) {
5263 struct saved_alias *al = alias_list;
5264
5265 alias_list = alias_list->next;
5266 err = sysfs_slab_alias(al->s, al->name);
5267 if (err)
5268 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5269 " %s to sysfs\n", al->name);
5270 kfree(al);
5271 }
5272
5273 mutex_unlock(&slab_mutex);
5274 resiliency_test();
5275 return 0;
5276}
5277
5278__initcall(slab_sysfs_init);
5279#endif
5280
5281
5282
5283
5284#ifdef CONFIG_SLABINFO
5285void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5286{
5287 unsigned long nr_slabs = 0;
5288 unsigned long nr_objs = 0;
5289 unsigned long nr_free = 0;
5290 int node;
5291
5292 for_each_online_node(node) {
5293 struct kmem_cache_node *n = get_node(s, node);
5294
5295 if (!n)
5296 continue;
5297
5298 nr_slabs += node_nr_slabs(n);
5299 nr_objs += node_nr_objs(n);
5300 nr_free += count_partial(n, count_free);
5301 }
5302
5303 sinfo->active_objs = nr_objs - nr_free;
5304 sinfo->num_objs = nr_objs;
5305 sinfo->active_slabs = nr_slabs;
5306 sinfo->num_slabs = nr_slabs;
5307 sinfo->objects_per_slab = oo_objects(s->oo);
5308 sinfo->cache_order = oo_order(s->oo);
5309}
5310
5311void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5312{
5313}
5314
5315ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5316 size_t count, loff_t *ppos)
5317{
5318 return -EIO;
5319}
5320#endif
5321