1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/kmemcheck.h>
22#include <linux/cpu.h>
23#include <linux/cpuset.h>
24#include <linux/mempolicy.h>
25#include <linux/ctype.h>
26#include <linux/debugobjects.h>
27#include <linux/kallsyms.h>
28#include <linux/memory.h>
29#include <linux/math64.h>
30#include <linux/fault-inject.h>
31#include <linux/stacktrace.h>
32
33#include <trace/events/kmem.h>
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
112 SLAB_TRACE | SLAB_DEBUG_FREE)
113
114static inline int kmem_cache_debug(struct kmem_cache *s)
115{
116#ifdef CONFIG_SLUB_DEBUG
117 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
118#else
119 return 0;
120#endif
121}
122
123
124
125
126
127
128
129
130
131
132#undef SLUB_RESILIENCY_TEST
133
134
135#undef SLUB_DEBUG_CMPXCHG
136
137
138
139
140
141#define MIN_PARTIAL 5
142
143
144
145
146
147
148#define MAX_PARTIAL 10
149
150#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
151 SLAB_POISON | SLAB_STORE_USER)
152
153
154
155
156
157
158#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
159
160
161
162
163#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
164 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
165 SLAB_FAILSLAB)
166
167#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
168 SLAB_CACHE_DMA | SLAB_NOTRACK)
169
170#define OO_SHIFT 16
171#define OO_MASK ((1 << OO_SHIFT) - 1)
172#define MAX_OBJS_PER_PAGE 32767
173
174
175#define __OBJECT_POISON 0x80000000UL
176#define __CMPXCHG_DOUBLE 0x40000000UL
177
178static int kmem_size = sizeof(struct kmem_cache);
179
180#ifdef CONFIG_SMP
181static struct notifier_block slab_notifier;
182#endif
183
184static enum {
185 DOWN,
186 PARTIAL,
187 UP,
188 SYSFS
189} slab_state = DOWN;
190
191
192static DECLARE_RWSEM(slub_lock);
193static LIST_HEAD(slab_caches);
194
195
196
197
198#define TRACK_ADDRS_COUNT 16
199struct track {
200 unsigned long addr;
201#ifdef CONFIG_STACKTRACE
202 unsigned long addrs[TRACK_ADDRS_COUNT];
203#endif
204 int cpu;
205 int pid;
206 unsigned long when;
207};
208
209enum track_item { TRACK_ALLOC, TRACK_FREE };
210
211#ifdef CONFIG_SYSFS
212static int sysfs_slab_add(struct kmem_cache *);
213static int sysfs_slab_alias(struct kmem_cache *, const char *);
214static void sysfs_slab_remove(struct kmem_cache *);
215
216#else
217static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
218static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
219 { return 0; }
220static inline void sysfs_slab_remove(struct kmem_cache *s)
221{
222 kfree(s->name);
223 kfree(s);
224}
225
226#endif
227
228static inline void stat(const struct kmem_cache *s, enum stat_item si)
229{
230#ifdef CONFIG_SLUB_STATS
231 __this_cpu_inc(s->cpu_slab->stat[si]);
232#endif
233}
234
235
236
237
238
239int slab_is_available(void)
240{
241 return slab_state >= UP;
242}
243
244static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
245{
246 return s->node[node];
247}
248
249
250static inline int check_valid_pointer(struct kmem_cache *s,
251 struct page *page, const void *object)
252{
253 void *base;
254
255 if (!object)
256 return 1;
257
258 base = page_address(page);
259 if (object < base || object >= base + page->objects * s->size ||
260 (object - base) % s->size) {
261 return 0;
262 }
263
264 return 1;
265}
266
267static inline void *get_freepointer(struct kmem_cache *s, void *object)
268{
269 return *(void **)(object + s->offset);
270}
271
272static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
273{
274 void *p;
275
276#ifdef CONFIG_DEBUG_PAGEALLOC
277 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
278#else
279 p = get_freepointer(s, object);
280#endif
281 return p;
282}
283
284static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
285{
286 *(void **)(object + s->offset) = fp;
287}
288
289
290#define for_each_object(__p, __s, __addr, __objects) \
291 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
292 __p += (__s)->size)
293
294
295static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
296{
297 return (p - addr) / s->size;
298}
299
300static inline size_t slab_ksize(const struct kmem_cache *s)
301{
302#ifdef CONFIG_SLUB_DEBUG
303
304
305
306
307 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
308 return s->objsize;
309
310#endif
311
312
313
314
315
316 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
317 return s->inuse;
318
319
320
321 return s->size;
322}
323
324static inline int order_objects(int order, unsigned long size, int reserved)
325{
326 return ((PAGE_SIZE << order) - reserved) / size;
327}
328
329static inline struct kmem_cache_order_objects oo_make(int order,
330 unsigned long size, int reserved)
331{
332 struct kmem_cache_order_objects x = {
333 (order << OO_SHIFT) + order_objects(order, size, reserved)
334 };
335
336 return x;
337}
338
339static inline int oo_order(struct kmem_cache_order_objects x)
340{
341 return x.x >> OO_SHIFT;
342}
343
344static inline int oo_objects(struct kmem_cache_order_objects x)
345{
346 return x.x & OO_MASK;
347}
348
349
350
351
352static __always_inline void slab_lock(struct page *page)
353{
354 bit_spin_lock(PG_locked, &page->flags);
355}
356
357static __always_inline void slab_unlock(struct page *page)
358{
359 __bit_spin_unlock(PG_locked, &page->flags);
360}
361
362
363static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
364 void *freelist_old, unsigned long counters_old,
365 void *freelist_new, unsigned long counters_new,
366 const char *n)
367{
368 VM_BUG_ON(!irqs_disabled());
369#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
370 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
371 if (s->flags & __CMPXCHG_DOUBLE) {
372 if (cmpxchg_double(&page->freelist, &page->counters,
373 freelist_old, counters_old,
374 freelist_new, counters_new))
375 return 1;
376 } else
377#endif
378 {
379 slab_lock(page);
380 if (page->freelist == freelist_old && page->counters == counters_old) {
381 page->freelist = freelist_new;
382 page->counters = counters_new;
383 slab_unlock(page);
384 return 1;
385 }
386 slab_unlock(page);
387 }
388
389 cpu_relax();
390 stat(s, CMPXCHG_DOUBLE_FAIL);
391
392#ifdef SLUB_DEBUG_CMPXCHG
393 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
394#endif
395
396 return 0;
397}
398
399static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
400 void *freelist_old, unsigned long counters_old,
401 void *freelist_new, unsigned long counters_new,
402 const char *n)
403{
404#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
405 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
406 if (s->flags & __CMPXCHG_DOUBLE) {
407 if (cmpxchg_double(&page->freelist, &page->counters,
408 freelist_old, counters_old,
409 freelist_new, counters_new))
410 return 1;
411 } else
412#endif
413 {
414 unsigned long flags;
415
416 local_irq_save(flags);
417 slab_lock(page);
418 if (page->freelist == freelist_old && page->counters == counters_old) {
419 page->freelist = freelist_new;
420 page->counters = counters_new;
421 slab_unlock(page);
422 local_irq_restore(flags);
423 return 1;
424 }
425 slab_unlock(page);
426 local_irq_restore(flags);
427 }
428
429 cpu_relax();
430 stat(s, CMPXCHG_DOUBLE_FAIL);
431
432#ifdef SLUB_DEBUG_CMPXCHG
433 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
434#endif
435
436 return 0;
437}
438
439#ifdef CONFIG_SLUB_DEBUG
440
441
442
443
444
445
446static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
447{
448 void *p;
449 void *addr = page_address(page);
450
451 for (p = page->freelist; p; p = get_freepointer(s, p))
452 set_bit(slab_index(p, s, addr), map);
453}
454
455
456
457
458#ifdef CONFIG_SLUB_DEBUG_ON
459static int slub_debug = DEBUG_DEFAULT_FLAGS;
460#else
461static int slub_debug;
462#endif
463
464static char *slub_debug_slabs;
465static int disable_higher_order_debug;
466
467
468
469
470static void print_section(char *text, u8 *addr, unsigned int length)
471{
472 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
473 length, 1);
474}
475
476static struct track *get_track(struct kmem_cache *s, void *object,
477 enum track_item alloc)
478{
479 struct track *p;
480
481 if (s->offset)
482 p = object + s->offset + sizeof(void *);
483 else
484 p = object + s->inuse;
485
486 return p + alloc;
487}
488
489static void set_track(struct kmem_cache *s, void *object,
490 enum track_item alloc, unsigned long addr)
491{
492 struct track *p = get_track(s, object, alloc);
493
494 if (addr) {
495#ifdef CONFIG_STACKTRACE
496 struct stack_trace trace;
497 int i;
498
499 trace.nr_entries = 0;
500 trace.max_entries = TRACK_ADDRS_COUNT;
501 trace.entries = p->addrs;
502 trace.skip = 3;
503 save_stack_trace(&trace);
504
505
506 if (trace.nr_entries != 0 &&
507 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
508 trace.nr_entries--;
509
510 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
511 p->addrs[i] = 0;
512#endif
513 p->addr = addr;
514 p->cpu = smp_processor_id();
515 p->pid = current->pid;
516 p->when = jiffies;
517 } else
518 memset(p, 0, sizeof(struct track));
519}
520
521static void init_tracking(struct kmem_cache *s, void *object)
522{
523 if (!(s->flags & SLAB_STORE_USER))
524 return;
525
526 set_track(s, object, TRACK_FREE, 0UL);
527 set_track(s, object, TRACK_ALLOC, 0UL);
528}
529
530static void print_track(const char *s, struct track *t)
531{
532 if (!t->addr)
533 return;
534
535 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
536 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
537#ifdef CONFIG_STACKTRACE
538 {
539 int i;
540 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
541 if (t->addrs[i])
542 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
543 else
544 break;
545 }
546#endif
547}
548
549static void print_tracking(struct kmem_cache *s, void *object)
550{
551 if (!(s->flags & SLAB_STORE_USER))
552 return;
553
554 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
555 print_track("Freed", get_track(s, object, TRACK_FREE));
556}
557
558static void print_page_info(struct page *page)
559{
560 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
561 page, page->objects, page->inuse, page->freelist, page->flags);
562
563}
564
565static void slab_bug(struct kmem_cache *s, char *fmt, ...)
566{
567 va_list args;
568 char buf[100];
569
570 va_start(args, fmt);
571 vsnprintf(buf, sizeof(buf), fmt, args);
572 va_end(args);
573 printk(KERN_ERR "========================================"
574 "=====================================\n");
575 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
576 printk(KERN_ERR "----------------------------------------"
577 "-------------------------------------\n\n");
578}
579
580static void slab_fix(struct kmem_cache *s, char *fmt, ...)
581{
582 va_list args;
583 char buf[100];
584
585 va_start(args, fmt);
586 vsnprintf(buf, sizeof(buf), fmt, args);
587 va_end(args);
588 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
589}
590
591static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
592{
593 unsigned int off;
594 u8 *addr = page_address(page);
595
596 print_tracking(s, p);
597
598 print_page_info(page);
599
600 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
601 p, p - addr, get_freepointer(s, p));
602
603 if (p > addr + 16)
604 print_section("Bytes b4 ", p - 16, 16);
605
606 print_section("Object ", p, min_t(unsigned long, s->objsize,
607 PAGE_SIZE));
608 if (s->flags & SLAB_RED_ZONE)
609 print_section("Redzone ", p + s->objsize,
610 s->inuse - s->objsize);
611
612 if (s->offset)
613 off = s->offset + sizeof(void *);
614 else
615 off = s->inuse;
616
617 if (s->flags & SLAB_STORE_USER)
618 off += 2 * sizeof(struct track);
619
620 if (off != s->size)
621
622 print_section("Padding ", p + off, s->size - off);
623
624 dump_stack();
625}
626
627static void object_err(struct kmem_cache *s, struct page *page,
628 u8 *object, char *reason)
629{
630 slab_bug(s, "%s", reason);
631 print_trailer(s, page, object);
632}
633
634static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
635{
636 va_list args;
637 char buf[100];
638
639 va_start(args, fmt);
640 vsnprintf(buf, sizeof(buf), fmt, args);
641 va_end(args);
642 slab_bug(s, "%s", buf);
643 print_page_info(page);
644 dump_stack();
645}
646
647static void init_object(struct kmem_cache *s, void *object, u8 val)
648{
649 u8 *p = object;
650
651 if (s->flags & __OBJECT_POISON) {
652 memset(p, POISON_FREE, s->objsize - 1);
653 p[s->objsize - 1] = POISON_END;
654 }
655
656 if (s->flags & SLAB_RED_ZONE)
657 memset(p + s->objsize, val, s->inuse - s->objsize);
658}
659
660static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
661 void *from, void *to)
662{
663 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
664 memset(from, data, to - from);
665}
666
667static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
668 u8 *object, char *what,
669 u8 *start, unsigned int value, unsigned int bytes)
670{
671 u8 *fault;
672 u8 *end;
673
674 fault = memchr_inv(start, value, bytes);
675 if (!fault)
676 return 1;
677
678 end = start + bytes;
679 while (end > fault && end[-1] == value)
680 end--;
681
682 slab_bug(s, "%s overwritten", what);
683 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
684 fault, end - 1, fault[0], value);
685 print_trailer(s, page, object);
686
687 restore_bytes(s, what, value, fault, end);
688 return 0;
689}
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
730{
731 unsigned long off = s->inuse;
732
733 if (s->offset)
734
735 off += sizeof(void *);
736
737 if (s->flags & SLAB_STORE_USER)
738
739 off += 2 * sizeof(struct track);
740
741 if (s->size == off)
742 return 1;
743
744 return check_bytes_and_report(s, page, p, "Object padding",
745 p + off, POISON_INUSE, s->size - off);
746}
747
748
749static int slab_pad_check(struct kmem_cache *s, struct page *page)
750{
751 u8 *start;
752 u8 *fault;
753 u8 *end;
754 int length;
755 int remainder;
756
757 if (!(s->flags & SLAB_POISON))
758 return 1;
759
760 start = page_address(page);
761 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
762 end = start + length;
763 remainder = length % s->size;
764 if (!remainder)
765 return 1;
766
767 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
768 if (!fault)
769 return 1;
770 while (end > fault && end[-1] == POISON_INUSE)
771 end--;
772
773 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
774 print_section("Padding ", end - remainder, remainder);
775
776 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
777 return 0;
778}
779
780static int check_object(struct kmem_cache *s, struct page *page,
781 void *object, u8 val)
782{
783 u8 *p = object;
784 u8 *endobject = object + s->objsize;
785
786 if (s->flags & SLAB_RED_ZONE) {
787 if (!check_bytes_and_report(s, page, object, "Redzone",
788 endobject, val, s->inuse - s->objsize))
789 return 0;
790 } else {
791 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
792 check_bytes_and_report(s, page, p, "Alignment padding",
793 endobject, POISON_INUSE, s->inuse - s->objsize);
794 }
795 }
796
797 if (s->flags & SLAB_POISON) {
798 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
799 (!check_bytes_and_report(s, page, p, "Poison", p,
800 POISON_FREE, s->objsize - 1) ||
801 !check_bytes_and_report(s, page, p, "Poison",
802 p + s->objsize - 1, POISON_END, 1)))
803 return 0;
804
805
806
807 check_pad_bytes(s, page, p);
808 }
809
810 if (!s->offset && val == SLUB_RED_ACTIVE)
811
812
813
814
815 return 1;
816
817
818 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
819 object_err(s, page, p, "Freepointer corrupt");
820
821
822
823
824
825 set_freepointer(s, p, NULL);
826 return 0;
827 }
828 return 1;
829}
830
831static int check_slab(struct kmem_cache *s, struct page *page)
832{
833 int maxobj;
834
835 VM_BUG_ON(!irqs_disabled());
836
837 if (!PageSlab(page)) {
838 slab_err(s, page, "Not a valid slab page");
839 return 0;
840 }
841
842 maxobj = order_objects(compound_order(page), s->size, s->reserved);
843 if (page->objects > maxobj) {
844 slab_err(s, page, "objects %u > max %u",
845 s->name, page->objects, maxobj);
846 return 0;
847 }
848 if (page->inuse > page->objects) {
849 slab_err(s, page, "inuse %u > max %u",
850 s->name, page->inuse, page->objects);
851 return 0;
852 }
853
854 slab_pad_check(s, page);
855 return 1;
856}
857
858
859
860
861
862static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
863{
864 int nr = 0;
865 void *fp;
866 void *object = NULL;
867 unsigned long max_objects;
868
869 fp = page->freelist;
870 while (fp && nr <= page->objects) {
871 if (fp == search)
872 return 1;
873 if (!check_valid_pointer(s, page, fp)) {
874 if (object) {
875 object_err(s, page, object,
876 "Freechain corrupt");
877 set_freepointer(s, object, NULL);
878 break;
879 } else {
880 slab_err(s, page, "Freepointer corrupt");
881 page->freelist = NULL;
882 page->inuse = page->objects;
883 slab_fix(s, "Freelist cleared");
884 return 0;
885 }
886 break;
887 }
888 object = fp;
889 fp = get_freepointer(s, object);
890 nr++;
891 }
892
893 max_objects = order_objects(compound_order(page), s->size, s->reserved);
894 if (max_objects > MAX_OBJS_PER_PAGE)
895 max_objects = MAX_OBJS_PER_PAGE;
896
897 if (page->objects != max_objects) {
898 slab_err(s, page, "Wrong number of objects. Found %d but "
899 "should be %d", page->objects, max_objects);
900 page->objects = max_objects;
901 slab_fix(s, "Number of objects adjusted.");
902 }
903 if (page->inuse != page->objects - nr) {
904 slab_err(s, page, "Wrong object count. Counter is %d but "
905 "counted were %d", page->inuse, page->objects - nr);
906 page->inuse = page->objects - nr;
907 slab_fix(s, "Object count adjusted.");
908 }
909 return search == NULL;
910}
911
912static void trace(struct kmem_cache *s, struct page *page, void *object,
913 int alloc)
914{
915 if (s->flags & SLAB_TRACE) {
916 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
917 s->name,
918 alloc ? "alloc" : "free",
919 object, page->inuse,
920 page->freelist);
921
922 if (!alloc)
923 print_section("Object ", (void *)object, s->objsize);
924
925 dump_stack();
926 }
927}
928
929
930
931
932
933static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
934{
935 flags &= gfp_allowed_mask;
936 lockdep_trace_alloc(flags);
937 might_sleep_if(flags & __GFP_WAIT);
938
939 return should_failslab(s->objsize, flags, s->flags);
940}
941
942static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
943{
944 flags &= gfp_allowed_mask;
945 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
946 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
947}
948
949static inline void slab_free_hook(struct kmem_cache *s, void *x)
950{
951 kmemleak_free_recursive(x, s->flags);
952
953
954
955
956
957
958#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
959 {
960 unsigned long flags;
961
962 local_irq_save(flags);
963 kmemcheck_slab_free(s, x, s->objsize);
964 debug_check_no_locks_freed(x, s->objsize);
965 local_irq_restore(flags);
966 }
967#endif
968 if (!(s->flags & SLAB_DEBUG_OBJECTS))
969 debug_check_no_obj_freed(x, s->objsize);
970}
971
972
973
974
975
976
977static void add_full(struct kmem_cache *s,
978 struct kmem_cache_node *n, struct page *page)
979{
980 if (!(s->flags & SLAB_STORE_USER))
981 return;
982
983 list_add(&page->lru, &n->full);
984}
985
986
987
988
989static void remove_full(struct kmem_cache *s, struct page *page)
990{
991 if (!(s->flags & SLAB_STORE_USER))
992 return;
993
994 list_del(&page->lru);
995}
996
997
998static inline unsigned long slabs_node(struct kmem_cache *s, int node)
999{
1000 struct kmem_cache_node *n = get_node(s, node);
1001
1002 return atomic_long_read(&n->nr_slabs);
1003}
1004
1005static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1006{
1007 return atomic_long_read(&n->nr_slabs);
1008}
1009
1010static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1011{
1012 struct kmem_cache_node *n = get_node(s, node);
1013
1014
1015
1016
1017
1018
1019
1020 if (n) {
1021 atomic_long_inc(&n->nr_slabs);
1022 atomic_long_add(objects, &n->total_objects);
1023 }
1024}
1025static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1026{
1027 struct kmem_cache_node *n = get_node(s, node);
1028
1029 atomic_long_dec(&n->nr_slabs);
1030 atomic_long_sub(objects, &n->total_objects);
1031}
1032
1033
1034static void setup_object_debug(struct kmem_cache *s, struct page *page,
1035 void *object)
1036{
1037 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1038 return;
1039
1040 init_object(s, object, SLUB_RED_INACTIVE);
1041 init_tracking(s, object);
1042}
1043
1044static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
1045 void *object, unsigned long addr)
1046{
1047 if (!check_slab(s, page))
1048 goto bad;
1049
1050 if (!check_valid_pointer(s, page, object)) {
1051 object_err(s, page, object, "Freelist Pointer check fails");
1052 goto bad;
1053 }
1054
1055 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1056 goto bad;
1057
1058
1059 if (s->flags & SLAB_STORE_USER)
1060 set_track(s, object, TRACK_ALLOC, addr);
1061 trace(s, page, object, 1);
1062 init_object(s, object, SLUB_RED_ACTIVE);
1063 return 1;
1064
1065bad:
1066 if (PageSlab(page)) {
1067
1068
1069
1070
1071
1072 slab_fix(s, "Marking all objects used");
1073 page->inuse = page->objects;
1074 page->freelist = NULL;
1075 }
1076 return 0;
1077}
1078
1079static noinline int free_debug_processing(struct kmem_cache *s,
1080 struct page *page, void *object, unsigned long addr)
1081{
1082 unsigned long flags;
1083 int rc = 0;
1084
1085 local_irq_save(flags);
1086 slab_lock(page);
1087
1088 if (!check_slab(s, page))
1089 goto fail;
1090
1091 if (!check_valid_pointer(s, page, object)) {
1092 slab_err(s, page, "Invalid object pointer 0x%p", object);
1093 goto fail;
1094 }
1095
1096 if (on_freelist(s, page, object)) {
1097 object_err(s, page, object, "Object already free");
1098 goto fail;
1099 }
1100
1101 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1102 goto out;
1103
1104 if (unlikely(s != page->slab)) {
1105 if (!PageSlab(page)) {
1106 slab_err(s, page, "Attempt to free object(0x%p) "
1107 "outside of slab", object);
1108 } else if (!page->slab) {
1109 printk(KERN_ERR
1110 "SLUB <none>: no slab for object 0x%p.\n",
1111 object);
1112 dump_stack();
1113 } else
1114 object_err(s, page, object,
1115 "page slab pointer corrupt.");
1116 goto fail;
1117 }
1118
1119 if (s->flags & SLAB_STORE_USER)
1120 set_track(s, object, TRACK_FREE, addr);
1121 trace(s, page, object, 0);
1122 init_object(s, object, SLUB_RED_INACTIVE);
1123 rc = 1;
1124out:
1125 slab_unlock(page);
1126 local_irq_restore(flags);
1127 return rc;
1128
1129fail:
1130 slab_fix(s, "Object at 0x%p not freed", object);
1131 goto out;
1132}
1133
1134static int __init setup_slub_debug(char *str)
1135{
1136 slub_debug = DEBUG_DEFAULT_FLAGS;
1137 if (*str++ != '=' || !*str)
1138
1139
1140
1141 goto out;
1142
1143 if (*str == ',')
1144
1145
1146
1147
1148 goto check_slabs;
1149
1150 if (tolower(*str) == 'o') {
1151
1152
1153
1154
1155 disable_higher_order_debug = 1;
1156 goto out;
1157 }
1158
1159 slub_debug = 0;
1160 if (*str == '-')
1161
1162
1163
1164 goto out;
1165
1166
1167
1168
1169 for (; *str && *str != ','; str++) {
1170 switch (tolower(*str)) {
1171 case 'f':
1172 slub_debug |= SLAB_DEBUG_FREE;
1173 break;
1174 case 'z':
1175 slub_debug |= SLAB_RED_ZONE;
1176 break;
1177 case 'p':
1178 slub_debug |= SLAB_POISON;
1179 break;
1180 case 'u':
1181 slub_debug |= SLAB_STORE_USER;
1182 break;
1183 case 't':
1184 slub_debug |= SLAB_TRACE;
1185 break;
1186 case 'a':
1187 slub_debug |= SLAB_FAILSLAB;
1188 break;
1189 default:
1190 printk(KERN_ERR "slub_debug option '%c' "
1191 "unknown. skipped\n", *str);
1192 }
1193 }
1194
1195check_slabs:
1196 if (*str == ',')
1197 slub_debug_slabs = str + 1;
1198out:
1199 return 1;
1200}
1201
1202__setup("slub_debug", setup_slub_debug);
1203
1204static unsigned long kmem_cache_flags(unsigned long objsize,
1205 unsigned long flags, const char *name,
1206 void (*ctor)(void *))
1207{
1208
1209
1210
1211 if (slub_debug && (!slub_debug_slabs ||
1212 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1213 flags |= slub_debug;
1214
1215 return flags;
1216}
1217#else
1218static inline void setup_object_debug(struct kmem_cache *s,
1219 struct page *page, void *object) {}
1220
1221static inline int alloc_debug_processing(struct kmem_cache *s,
1222 struct page *page, void *object, unsigned long addr) { return 0; }
1223
1224static inline int free_debug_processing(struct kmem_cache *s,
1225 struct page *page, void *object, unsigned long addr) { return 0; }
1226
1227static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1228 { return 1; }
1229static inline int check_object(struct kmem_cache *s, struct page *page,
1230 void *object, u8 val) { return 1; }
1231static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1232 struct page *page) {}
1233static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1234static inline unsigned long kmem_cache_flags(unsigned long objsize,
1235 unsigned long flags, const char *name,
1236 void (*ctor)(void *))
1237{
1238 return flags;
1239}
1240#define slub_debug 0
1241
1242#define disable_higher_order_debug 0
1243
1244static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1245 { return 0; }
1246static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1247 { return 0; }
1248static inline void inc_slabs_node(struct kmem_cache *s, int node,
1249 int objects) {}
1250static inline void dec_slabs_node(struct kmem_cache *s, int node,
1251 int objects) {}
1252
1253static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1254 { return 0; }
1255
1256static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1257 void *object) {}
1258
1259static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1260
1261#endif
1262
1263
1264
1265
1266static inline struct page *alloc_slab_page(gfp_t flags, int node,
1267 struct kmem_cache_order_objects oo)
1268{
1269 int order = oo_order(oo);
1270
1271 flags |= __GFP_NOTRACK;
1272
1273 if (node == NUMA_NO_NODE)
1274 return alloc_pages(flags, order);
1275 else
1276 return alloc_pages_exact_node(node, flags, order);
1277}
1278
1279static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1280{
1281 struct page *page;
1282 struct kmem_cache_order_objects oo = s->oo;
1283 gfp_t alloc_gfp;
1284
1285 flags &= gfp_allowed_mask;
1286
1287 if (flags & __GFP_WAIT)
1288 local_irq_enable();
1289
1290 flags |= s->allocflags;
1291
1292
1293
1294
1295
1296 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1297
1298 page = alloc_slab_page(alloc_gfp, node, oo);
1299 if (unlikely(!page)) {
1300 oo = s->min;
1301
1302
1303
1304
1305 page = alloc_slab_page(flags, node, oo);
1306
1307 if (page)
1308 stat(s, ORDER_FALLBACK);
1309 }
1310
1311 if (flags & __GFP_WAIT)
1312 local_irq_disable();
1313
1314 if (!page)
1315 return NULL;
1316
1317 if (kmemcheck_enabled
1318 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1319 int pages = 1 << oo_order(oo);
1320
1321 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1322
1323
1324
1325
1326
1327 if (s->ctor)
1328 kmemcheck_mark_uninitialized_pages(page, pages);
1329 else
1330 kmemcheck_mark_unallocated_pages(page, pages);
1331 }
1332
1333 page->objects = oo_objects(oo);
1334 mod_zone_page_state(page_zone(page),
1335 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1336 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1337 1 << oo_order(oo));
1338
1339 return page;
1340}
1341
1342static void setup_object(struct kmem_cache *s, struct page *page,
1343 void *object)
1344{
1345 setup_object_debug(s, page, object);
1346 if (unlikely(s->ctor))
1347 s->ctor(object);
1348}
1349
1350static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1351{
1352 struct page *page;
1353 void *start;
1354 void *last;
1355 void *p;
1356
1357 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1358
1359 page = allocate_slab(s,
1360 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1361 if (!page)
1362 goto out;
1363
1364 inc_slabs_node(s, page_to_nid(page), page->objects);
1365 page->slab = s;
1366 page->flags |= 1 << PG_slab;
1367
1368 start = page_address(page);
1369
1370 if (unlikely(s->flags & SLAB_POISON))
1371 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1372
1373 last = start;
1374 for_each_object(p, s, start, page->objects) {
1375 setup_object(s, page, last);
1376 set_freepointer(s, last, p);
1377 last = p;
1378 }
1379 setup_object(s, page, last);
1380 set_freepointer(s, last, NULL);
1381
1382 page->freelist = start;
1383 page->inuse = page->objects;
1384 page->frozen = 1;
1385out:
1386 return page;
1387}
1388
1389static void __free_slab(struct kmem_cache *s, struct page *page)
1390{
1391 int order = compound_order(page);
1392 int pages = 1 << order;
1393
1394 if (kmem_cache_debug(s)) {
1395 void *p;
1396
1397 slab_pad_check(s, page);
1398 for_each_object(p, s, page_address(page),
1399 page->objects)
1400 check_object(s, page, p, SLUB_RED_INACTIVE);
1401 }
1402
1403 kmemcheck_free_shadow(page, compound_order(page));
1404
1405 mod_zone_page_state(page_zone(page),
1406 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1407 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1408 -pages);
1409
1410 __ClearPageSlab(page);
1411 reset_page_mapcount(page);
1412 if (current->reclaim_state)
1413 current->reclaim_state->reclaimed_slab += pages;
1414 __free_pages(page, order);
1415}
1416
1417#define need_reserve_slab_rcu \
1418 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1419
1420static void rcu_free_slab(struct rcu_head *h)
1421{
1422 struct page *page;
1423
1424 if (need_reserve_slab_rcu)
1425 page = virt_to_head_page(h);
1426 else
1427 page = container_of((struct list_head *)h, struct page, lru);
1428
1429 __free_slab(page->slab, page);
1430}
1431
1432static void free_slab(struct kmem_cache *s, struct page *page)
1433{
1434 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1435 struct rcu_head *head;
1436
1437 if (need_reserve_slab_rcu) {
1438 int order = compound_order(page);
1439 int offset = (PAGE_SIZE << order) - s->reserved;
1440
1441 VM_BUG_ON(s->reserved != sizeof(*head));
1442 head = page_address(page) + offset;
1443 } else {
1444
1445
1446
1447 head = (void *)&page->lru;
1448 }
1449
1450 call_rcu(head, rcu_free_slab);
1451 } else
1452 __free_slab(s, page);
1453}
1454
1455static void discard_slab(struct kmem_cache *s, struct page *page)
1456{
1457 dec_slabs_node(s, page_to_nid(page), page->objects);
1458 free_slab(s, page);
1459}
1460
1461
1462
1463
1464
1465
1466static inline void add_partial(struct kmem_cache_node *n,
1467 struct page *page, int tail)
1468{
1469 n->nr_partial++;
1470 if (tail == DEACTIVATE_TO_TAIL)
1471 list_add_tail(&page->lru, &n->partial);
1472 else
1473 list_add(&page->lru, &n->partial);
1474}
1475
1476
1477
1478
1479static inline void remove_partial(struct kmem_cache_node *n,
1480 struct page *page)
1481{
1482 list_del(&page->lru);
1483 n->nr_partial--;
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494static inline void *acquire_slab(struct kmem_cache *s,
1495 struct kmem_cache_node *n, struct page *page,
1496 int mode)
1497{
1498 void *freelist;
1499 unsigned long counters;
1500 struct page new;
1501
1502
1503
1504
1505
1506
1507 do {
1508 freelist = page->freelist;
1509 counters = page->counters;
1510 new.counters = counters;
1511 if (mode)
1512 new.inuse = page->objects;
1513
1514 VM_BUG_ON(new.frozen);
1515 new.frozen = 1;
1516
1517 } while (!__cmpxchg_double_slab(s, page,
1518 freelist, counters,
1519 NULL, new.counters,
1520 "lock and freeze"));
1521
1522 remove_partial(n, page);
1523 return freelist;
1524}
1525
1526static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1527
1528
1529
1530
1531static void *get_partial_node(struct kmem_cache *s,
1532 struct kmem_cache_node *n, struct kmem_cache_cpu *c)
1533{
1534 struct page *page, *page2;
1535 void *object = NULL;
1536
1537
1538
1539
1540
1541
1542
1543 if (!n || !n->nr_partial)
1544 return NULL;
1545
1546 spin_lock(&n->list_lock);
1547 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1548 void *t = acquire_slab(s, n, page, object == NULL);
1549 int available;
1550
1551 if (!t)
1552 break;
1553
1554 if (!object) {
1555 c->page = page;
1556 c->node = page_to_nid(page);
1557 stat(s, ALLOC_FROM_PARTIAL);
1558 object = t;
1559 available = page->objects - page->inuse;
1560 } else {
1561 page->freelist = t;
1562 available = put_cpu_partial(s, page, 0);
1563 }
1564 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1565 break;
1566
1567 }
1568 spin_unlock(&n->list_lock);
1569 return object;
1570}
1571
1572
1573
1574
1575static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags,
1576 struct kmem_cache_cpu *c)
1577{
1578#ifdef CONFIG_NUMA
1579 struct zonelist *zonelist;
1580 struct zoneref *z;
1581 struct zone *zone;
1582 enum zone_type high_zoneidx = gfp_zone(flags);
1583 void *object;
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603 if (!s->remote_node_defrag_ratio ||
1604 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1605 return NULL;
1606
1607 get_mems_allowed();
1608 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1609 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1610 struct kmem_cache_node *n;
1611
1612 n = get_node(s, zone_to_nid(zone));
1613
1614 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1615 n->nr_partial > s->min_partial) {
1616 object = get_partial_node(s, n, c);
1617 if (object) {
1618 put_mems_allowed();
1619 return object;
1620 }
1621 }
1622 }
1623 put_mems_allowed();
1624#endif
1625 return NULL;
1626}
1627
1628
1629
1630
1631static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1632 struct kmem_cache_cpu *c)
1633{
1634 void *object;
1635 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1636
1637 object = get_partial_node(s, get_node(s, searchnode), c);
1638 if (object || node != NUMA_NO_NODE)
1639 return object;
1640
1641 return get_any_partial(s, flags, c);
1642}
1643
1644#ifdef CONFIG_PREEMPT
1645
1646
1647
1648
1649
1650#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1651#else
1652
1653
1654
1655
1656#define TID_STEP 1
1657#endif
1658
1659static inline unsigned long next_tid(unsigned long tid)
1660{
1661 return tid + TID_STEP;
1662}
1663
1664static inline unsigned int tid_to_cpu(unsigned long tid)
1665{
1666 return tid % TID_STEP;
1667}
1668
1669static inline unsigned long tid_to_event(unsigned long tid)
1670{
1671 return tid / TID_STEP;
1672}
1673
1674static inline unsigned int init_tid(int cpu)
1675{
1676 return cpu;
1677}
1678
1679static inline void note_cmpxchg_failure(const char *n,
1680 const struct kmem_cache *s, unsigned long tid)
1681{
1682#ifdef SLUB_DEBUG_CMPXCHG
1683 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1684
1685 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1686
1687#ifdef CONFIG_PREEMPT
1688 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1689 printk("due to cpu change %d -> %d\n",
1690 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1691 else
1692#endif
1693 if (tid_to_event(tid) != tid_to_event(actual_tid))
1694 printk("due to cpu running other code. Event %ld->%ld\n",
1695 tid_to_event(tid), tid_to_event(actual_tid));
1696 else
1697 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1698 actual_tid, tid, next_tid(tid));
1699#endif
1700 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1701}
1702
1703void init_kmem_cache_cpus(struct kmem_cache *s)
1704{
1705 int cpu;
1706
1707 for_each_possible_cpu(cpu)
1708 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1709}
1710
1711
1712
1713
1714static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1715{
1716 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1717 struct page *page = c->page;
1718 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1719 int lock = 0;
1720 enum slab_modes l = M_NONE, m = M_NONE;
1721 void *freelist;
1722 void *nextfree;
1723 int tail = DEACTIVATE_TO_HEAD;
1724 struct page new;
1725 struct page old;
1726
1727 if (page->freelist) {
1728 stat(s, DEACTIVATE_REMOTE_FREES);
1729 tail = DEACTIVATE_TO_TAIL;
1730 }
1731
1732 c->tid = next_tid(c->tid);
1733 c->page = NULL;
1734 freelist = c->freelist;
1735 c->freelist = NULL;
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1746 void *prior;
1747 unsigned long counters;
1748
1749 do {
1750 prior = page->freelist;
1751 counters = page->counters;
1752 set_freepointer(s, freelist, prior);
1753 new.counters = counters;
1754 new.inuse--;
1755 VM_BUG_ON(!new.frozen);
1756
1757 } while (!__cmpxchg_double_slab(s, page,
1758 prior, counters,
1759 freelist, new.counters,
1760 "drain percpu freelist"));
1761
1762 freelist = nextfree;
1763 }
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779redo:
1780
1781 old.freelist = page->freelist;
1782 old.counters = page->counters;
1783 VM_BUG_ON(!old.frozen);
1784
1785
1786 new.counters = old.counters;
1787 if (freelist) {
1788 new.inuse--;
1789 set_freepointer(s, freelist, old.freelist);
1790 new.freelist = freelist;
1791 } else
1792 new.freelist = old.freelist;
1793
1794 new.frozen = 0;
1795
1796 if (!new.inuse && n->nr_partial > s->min_partial)
1797 m = M_FREE;
1798 else if (new.freelist) {
1799 m = M_PARTIAL;
1800 if (!lock) {
1801 lock = 1;
1802
1803
1804
1805
1806
1807 spin_lock(&n->list_lock);
1808 }
1809 } else {
1810 m = M_FULL;
1811 if (kmem_cache_debug(s) && !lock) {
1812 lock = 1;
1813
1814
1815
1816
1817
1818 spin_lock(&n->list_lock);
1819 }
1820 }
1821
1822 if (l != m) {
1823
1824 if (l == M_PARTIAL)
1825
1826 remove_partial(n, page);
1827
1828 else if (l == M_FULL)
1829
1830 remove_full(s, page);
1831
1832 if (m == M_PARTIAL) {
1833
1834 add_partial(n, page, tail);
1835 stat(s, tail);
1836
1837 } else if (m == M_FULL) {
1838
1839 stat(s, DEACTIVATE_FULL);
1840 add_full(s, n, page);
1841
1842 }
1843 }
1844
1845 l = m;
1846 if (!__cmpxchg_double_slab(s, page,
1847 old.freelist, old.counters,
1848 new.freelist, new.counters,
1849 "unfreezing slab"))
1850 goto redo;
1851
1852 if (lock)
1853 spin_unlock(&n->list_lock);
1854
1855 if (m == M_FREE) {
1856 stat(s, DEACTIVATE_EMPTY);
1857 discard_slab(s, page);
1858 stat(s, FREE_SLAB);
1859 }
1860}
1861
1862
1863static void unfreeze_partials(struct kmem_cache *s)
1864{
1865 struct kmem_cache_node *n = NULL;
1866 struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1867 struct page *page, *discard_page = NULL;
1868
1869 while ((page = c->partial)) {
1870 enum slab_modes { M_PARTIAL, M_FREE };
1871 enum slab_modes l, m;
1872 struct page new;
1873 struct page old;
1874
1875 c->partial = page->next;
1876 l = M_FREE;
1877
1878 do {
1879
1880 old.freelist = page->freelist;
1881 old.counters = page->counters;
1882 VM_BUG_ON(!old.frozen);
1883
1884 new.counters = old.counters;
1885 new.freelist = old.freelist;
1886
1887 new.frozen = 0;
1888
1889 if (!new.inuse && (!n || n->nr_partial > s->min_partial))
1890 m = M_FREE;
1891 else {
1892 struct kmem_cache_node *n2 = get_node(s,
1893 page_to_nid(page));
1894
1895 m = M_PARTIAL;
1896 if (n != n2) {
1897 if (n)
1898 spin_unlock(&n->list_lock);
1899
1900 n = n2;
1901 spin_lock(&n->list_lock);
1902 }
1903 }
1904
1905 if (l != m) {
1906 if (l == M_PARTIAL) {
1907 remove_partial(n, page);
1908 stat(s, FREE_REMOVE_PARTIAL);
1909 } else {
1910 add_partial(n, page,
1911 DEACTIVATE_TO_TAIL);
1912 stat(s, FREE_ADD_PARTIAL);
1913 }
1914
1915 l = m;
1916 }
1917
1918 } while (!cmpxchg_double_slab(s, page,
1919 old.freelist, old.counters,
1920 new.freelist, new.counters,
1921 "unfreezing slab"));
1922
1923 if (m == M_FREE) {
1924 page->next = discard_page;
1925 discard_page = page;
1926 }
1927 }
1928
1929 if (n)
1930 spin_unlock(&n->list_lock);
1931
1932 while (discard_page) {
1933 page = discard_page;
1934 discard_page = discard_page->next;
1935
1936 stat(s, DEACTIVATE_EMPTY);
1937 discard_slab(s, page);
1938 stat(s, FREE_SLAB);
1939 }
1940}
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1952{
1953 struct page *oldpage;
1954 int pages;
1955 int pobjects;
1956
1957 do {
1958 pages = 0;
1959 pobjects = 0;
1960 oldpage = this_cpu_read(s->cpu_slab->partial);
1961
1962 if (oldpage) {
1963 pobjects = oldpage->pobjects;
1964 pages = oldpage->pages;
1965 if (drain && pobjects > s->cpu_partial) {
1966 unsigned long flags;
1967
1968
1969
1970
1971 local_irq_save(flags);
1972 unfreeze_partials(s);
1973 local_irq_restore(flags);
1974 pobjects = 0;
1975 pages = 0;
1976 }
1977 }
1978
1979 pages++;
1980 pobjects += page->objects - page->inuse;
1981
1982 page->pages = pages;
1983 page->pobjects = pobjects;
1984 page->next = oldpage;
1985
1986 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1987 stat(s, CPU_PARTIAL_FREE);
1988 return pobjects;
1989}
1990
1991static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1992{
1993 stat(s, CPUSLAB_FLUSH);
1994 deactivate_slab(s, c);
1995}
1996
1997
1998
1999
2000
2001
2002static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2003{
2004 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2005
2006 if (likely(c)) {
2007 if (c->page)
2008 flush_slab(s, c);
2009
2010 unfreeze_partials(s);
2011 }
2012}
2013
2014static void flush_cpu_slab(void *d)
2015{
2016 struct kmem_cache *s = d;
2017
2018 __flush_cpu_slab(s, smp_processor_id());
2019}
2020
2021static void flush_all(struct kmem_cache *s)
2022{
2023 on_each_cpu(flush_cpu_slab, s, 1);
2024}
2025
2026
2027
2028
2029
2030static inline int node_match(struct kmem_cache_cpu *c, int node)
2031{
2032#ifdef CONFIG_NUMA
2033 if (node != NUMA_NO_NODE && c->node != node)
2034 return 0;
2035#endif
2036 return 1;
2037}
2038
2039static int count_free(struct page *page)
2040{
2041 return page->objects - page->inuse;
2042}
2043
2044static unsigned long count_partial(struct kmem_cache_node *n,
2045 int (*get_count)(struct page *))
2046{
2047 unsigned long flags;
2048 unsigned long x = 0;
2049 struct page *page;
2050
2051 spin_lock_irqsave(&n->list_lock, flags);
2052 list_for_each_entry(page, &n->partial, lru)
2053 x += get_count(page);
2054 spin_unlock_irqrestore(&n->list_lock, flags);
2055 return x;
2056}
2057
2058static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2059{
2060#ifdef CONFIG_SLUB_DEBUG
2061 return atomic_long_read(&n->total_objects);
2062#else
2063 return 0;
2064#endif
2065}
2066
2067static noinline void
2068slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2069{
2070 int node;
2071
2072 printk(KERN_WARNING
2073 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2074 nid, gfpflags);
2075 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2076 "default order: %d, min order: %d\n", s->name, s->objsize,
2077 s->size, oo_order(s->oo), oo_order(s->min));
2078
2079 if (oo_order(s->min) > get_order(s->objsize))
2080 printk(KERN_WARNING " %s debugging increased min order, use "
2081 "slub_debug=O to disable.\n", s->name);
2082
2083 for_each_online_node(node) {
2084 struct kmem_cache_node *n = get_node(s, node);
2085 unsigned long nr_slabs;
2086 unsigned long nr_objs;
2087 unsigned long nr_free;
2088
2089 if (!n)
2090 continue;
2091
2092 nr_free = count_partial(n, count_free);
2093 nr_slabs = node_nr_slabs(n);
2094 nr_objs = node_nr_objs(n);
2095
2096 printk(KERN_WARNING
2097 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2098 node, nr_slabs, nr_objs, nr_free);
2099 }
2100}
2101
2102static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2103 int node, struct kmem_cache_cpu **pc)
2104{
2105 void *object;
2106 struct kmem_cache_cpu *c;
2107 struct page *page = new_slab(s, flags, node);
2108
2109 if (page) {
2110 c = __this_cpu_ptr(s->cpu_slab);
2111 if (c->page)
2112 flush_slab(s, c);
2113
2114
2115
2116
2117
2118 object = page->freelist;
2119 page->freelist = NULL;
2120
2121 stat(s, ALLOC_SLAB);
2122 c->node = page_to_nid(page);
2123 c->page = page;
2124 *pc = c;
2125 } else
2126 object = NULL;
2127
2128 return object;
2129}
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2140{
2141 struct page new;
2142 unsigned long counters;
2143 void *freelist;
2144
2145 do {
2146 freelist = page->freelist;
2147 counters = page->counters;
2148 new.counters = counters;
2149 VM_BUG_ON(!new.frozen);
2150
2151 new.inuse = page->objects;
2152 new.frozen = freelist != NULL;
2153
2154 } while (!cmpxchg_double_slab(s, page,
2155 freelist, counters,
2156 NULL, new.counters,
2157 "get_freelist"));
2158
2159 return freelist;
2160}
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2179 unsigned long addr, struct kmem_cache_cpu *c)
2180{
2181 void **object;
2182 unsigned long flags;
2183
2184 local_irq_save(flags);
2185#ifdef CONFIG_PREEMPT
2186
2187
2188
2189
2190
2191 c = this_cpu_ptr(s->cpu_slab);
2192#endif
2193
2194 if (!c->page)
2195 goto new_slab;
2196redo:
2197 if (unlikely(!node_match(c, node))) {
2198 stat(s, ALLOC_NODE_MISMATCH);
2199 deactivate_slab(s, c);
2200 goto new_slab;
2201 }
2202
2203
2204 object = c->freelist;
2205 if (object)
2206 goto load_freelist;
2207
2208 stat(s, ALLOC_SLOWPATH);
2209
2210 object = get_freelist(s, c->page);
2211
2212 if (!object) {
2213 c->page = NULL;
2214 stat(s, DEACTIVATE_BYPASS);
2215 goto new_slab;
2216 }
2217
2218 stat(s, ALLOC_REFILL);
2219
2220load_freelist:
2221 c->freelist = get_freepointer(s, object);
2222 c->tid = next_tid(c->tid);
2223 local_irq_restore(flags);
2224 return object;
2225
2226new_slab:
2227
2228 if (c->partial) {
2229 c->page = c->partial;
2230 c->partial = c->page->next;
2231 c->node = page_to_nid(c->page);
2232 stat(s, CPU_PARTIAL_ALLOC);
2233 c->freelist = NULL;
2234 goto redo;
2235 }
2236
2237
2238 object = get_partial(s, gfpflags, node, c);
2239
2240 if (unlikely(!object)) {
2241
2242 object = new_slab_objects(s, gfpflags, node, &c);
2243
2244 if (unlikely(!object)) {
2245 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2246 slab_out_of_memory(s, gfpflags, node);
2247
2248 local_irq_restore(flags);
2249 return NULL;
2250 }
2251 }
2252
2253 if (likely(!kmem_cache_debug(s)))
2254 goto load_freelist;
2255
2256
2257 if (!alloc_debug_processing(s, c->page, object, addr))
2258 goto new_slab;
2259
2260 c->freelist = get_freepointer(s, object);
2261 deactivate_slab(s, c);
2262 c->node = NUMA_NO_NODE;
2263 local_irq_restore(flags);
2264 return object;
2265}
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277static __always_inline void *slab_alloc(struct kmem_cache *s,
2278 gfp_t gfpflags, int node, unsigned long addr)
2279{
2280 void **object;
2281 struct kmem_cache_cpu *c;
2282 unsigned long tid;
2283
2284 if (slab_pre_alloc_hook(s, gfpflags))
2285 return NULL;
2286
2287redo:
2288
2289
2290
2291
2292
2293
2294
2295 c = __this_cpu_ptr(s->cpu_slab);
2296
2297
2298
2299
2300
2301
2302
2303 tid = c->tid;
2304 barrier();
2305
2306 object = c->freelist;
2307 if (unlikely(!object || !node_match(c, node)))
2308
2309 object = __slab_alloc(s, gfpflags, node, addr, c);
2310
2311 else {
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324 if (unlikely(!this_cpu_cmpxchg_double(
2325 s->cpu_slab->freelist, s->cpu_slab->tid,
2326 object, tid,
2327 get_freepointer_safe(s, object), next_tid(tid)))) {
2328
2329 note_cmpxchg_failure("slab_alloc", s, tid);
2330 goto redo;
2331 }
2332 stat(s, ALLOC_FASTPATH);
2333 }
2334
2335 if (unlikely(gfpflags & __GFP_ZERO) && object)
2336 memset(object, 0, s->objsize);
2337
2338 slab_post_alloc_hook(s, gfpflags, object);
2339
2340 return object;
2341}
2342
2343void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2344{
2345 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2346
2347 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
2348
2349 return ret;
2350}
2351EXPORT_SYMBOL(kmem_cache_alloc);
2352
2353#ifdef CONFIG_TRACING
2354void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2355{
2356 void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_);
2357 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2358 return ret;
2359}
2360EXPORT_SYMBOL(kmem_cache_alloc_trace);
2361
2362void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2363{
2364 void *ret = kmalloc_order(size, flags, order);
2365 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2366 return ret;
2367}
2368EXPORT_SYMBOL(kmalloc_order_trace);
2369#endif
2370
2371#ifdef CONFIG_NUMA
2372void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2373{
2374 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2375
2376 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2377 s->objsize, s->size, gfpflags, node);
2378
2379 return ret;
2380}
2381EXPORT_SYMBOL(kmem_cache_alloc_node);
2382
2383#ifdef CONFIG_TRACING
2384void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2385 gfp_t gfpflags,
2386 int node, size_t size)
2387{
2388 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
2389
2390 trace_kmalloc_node(_RET_IP_, ret,
2391 size, s->size, gfpflags, node);
2392 return ret;
2393}
2394EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2395#endif
2396#endif
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406static void __slab_free(struct kmem_cache *s, struct page *page,
2407 void *x, unsigned long addr)
2408{
2409 void *prior;
2410 void **object = (void *)x;
2411 int was_frozen;
2412 int inuse;
2413 struct page new;
2414 unsigned long counters;
2415 struct kmem_cache_node *n = NULL;
2416 unsigned long uninitialized_var(flags);
2417
2418 stat(s, FREE_SLOWPATH);
2419
2420 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2421 return;
2422
2423 do {
2424 prior = page->freelist;
2425 counters = page->counters;
2426 set_freepointer(s, object, prior);
2427 new.counters = counters;
2428 was_frozen = new.frozen;
2429 new.inuse--;
2430 if ((!new.inuse || !prior) && !was_frozen && !n) {
2431
2432 if (!kmem_cache_debug(s) && !prior)
2433
2434
2435
2436
2437
2438 new.frozen = 1;
2439
2440 else {
2441
2442 n = get_node(s, page_to_nid(page));
2443
2444
2445
2446
2447
2448
2449
2450
2451 spin_lock_irqsave(&n->list_lock, flags);
2452
2453 }
2454 }
2455 inuse = new.inuse;
2456
2457 } while (!cmpxchg_double_slab(s, page,
2458 prior, counters,
2459 object, new.counters,
2460 "__slab_free"));
2461
2462 if (likely(!n)) {
2463
2464
2465
2466
2467
2468 if (new.frozen && !was_frozen)
2469 put_cpu_partial(s, page, 1);
2470
2471
2472
2473
2474
2475 if (was_frozen)
2476 stat(s, FREE_FROZEN);
2477 return;
2478 }
2479
2480
2481
2482
2483
2484 if (was_frozen)
2485 stat(s, FREE_FROZEN);
2486 else {
2487 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2488 goto slab_empty;
2489
2490
2491
2492
2493
2494 if (unlikely(!prior)) {
2495 remove_full(s, page);
2496 add_partial(n, page, DEACTIVATE_TO_TAIL);
2497 stat(s, FREE_ADD_PARTIAL);
2498 }
2499 }
2500 spin_unlock_irqrestore(&n->list_lock, flags);
2501 return;
2502
2503slab_empty:
2504 if (prior) {
2505
2506
2507
2508 remove_partial(n, page);
2509 stat(s, FREE_REMOVE_PARTIAL);
2510 } else
2511
2512 remove_full(s, page);
2513
2514 spin_unlock_irqrestore(&n->list_lock, flags);
2515 stat(s, FREE_SLAB);
2516 discard_slab(s, page);
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530static __always_inline void slab_free(struct kmem_cache *s,
2531 struct page *page, void *x, unsigned long addr)
2532{
2533 void **object = (void *)x;
2534 struct kmem_cache_cpu *c;
2535 unsigned long tid;
2536
2537 slab_free_hook(s, x);
2538
2539redo:
2540
2541
2542
2543
2544
2545
2546 c = __this_cpu_ptr(s->cpu_slab);
2547
2548 tid = c->tid;
2549 barrier();
2550
2551 if (likely(page == c->page)) {
2552 set_freepointer(s, object, c->freelist);
2553
2554 if (unlikely(!this_cpu_cmpxchg_double(
2555 s->cpu_slab->freelist, s->cpu_slab->tid,
2556 c->freelist, tid,
2557 object, next_tid(tid)))) {
2558
2559 note_cmpxchg_failure("slab_free", s, tid);
2560 goto redo;
2561 }
2562 stat(s, FREE_FASTPATH);
2563 } else
2564 __slab_free(s, page, x, addr);
2565
2566}
2567
2568void kmem_cache_free(struct kmem_cache *s, void *x)
2569{
2570 struct page *page;
2571
2572 page = virt_to_head_page(x);
2573
2574 slab_free(s, page, x, _RET_IP_);
2575
2576 trace_kmem_cache_free(_RET_IP_, x);
2577}
2578EXPORT_SYMBOL(kmem_cache_free);
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599static int slub_min_order;
2600static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2601static int slub_min_objects;
2602
2603
2604
2605
2606
2607static int slub_nomerge;
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634static inline int slab_order(int size, int min_objects,
2635 int max_order, int fract_leftover, int reserved)
2636{
2637 int order;
2638 int rem;
2639 int min_order = slub_min_order;
2640
2641 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2642 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2643
2644 for (order = max(min_order,
2645 fls(min_objects * size - 1) - PAGE_SHIFT);
2646 order <= max_order; order++) {
2647
2648 unsigned long slab_size = PAGE_SIZE << order;
2649
2650 if (slab_size < min_objects * size + reserved)
2651 continue;
2652
2653 rem = (slab_size - reserved) % size;
2654
2655 if (rem <= slab_size / fract_leftover)
2656 break;
2657
2658 }
2659
2660 return order;
2661}
2662
2663static inline int calculate_order(int size, int reserved)
2664{
2665 int order;
2666 int min_objects;
2667 int fraction;
2668 int max_objects;
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678 min_objects = slub_min_objects;
2679 if (!min_objects)
2680 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2681 max_objects = order_objects(slub_max_order, size, reserved);
2682 min_objects = min(min_objects, max_objects);
2683
2684 while (min_objects > 1) {
2685 fraction = 16;
2686 while (fraction >= 4) {
2687 order = slab_order(size, min_objects,
2688 slub_max_order, fraction, reserved);
2689 if (order <= slub_max_order)
2690 return order;
2691 fraction /= 2;
2692 }
2693 min_objects--;
2694 }
2695
2696
2697
2698
2699
2700 order = slab_order(size, 1, slub_max_order, 1, reserved);
2701 if (order <= slub_max_order)
2702 return order;
2703
2704
2705
2706
2707 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2708 if (order < MAX_ORDER)
2709 return order;
2710 return -ENOSYS;
2711}
2712
2713
2714
2715
2716static unsigned long calculate_alignment(unsigned long flags,
2717 unsigned long align, unsigned long size)
2718{
2719
2720
2721
2722
2723
2724
2725
2726 if (flags & SLAB_HWCACHE_ALIGN) {
2727 unsigned long ralign = cache_line_size();
2728 while (size <= ralign / 2)
2729 ralign /= 2;
2730 align = max(align, ralign);
2731 }
2732
2733 if (align < ARCH_SLAB_MINALIGN)
2734 align = ARCH_SLAB_MINALIGN;
2735
2736 return ALIGN(align, sizeof(void *));
2737}
2738
2739static void
2740init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2741{
2742 n->nr_partial = 0;
2743 spin_lock_init(&n->list_lock);
2744 INIT_LIST_HEAD(&n->partial);
2745#ifdef CONFIG_SLUB_DEBUG
2746 atomic_long_set(&n->nr_slabs, 0);
2747 atomic_long_set(&n->total_objects, 0);
2748 INIT_LIST_HEAD(&n->full);
2749#endif
2750}
2751
2752static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2753{
2754 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2755 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
2756
2757
2758
2759
2760
2761 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2762 2 * sizeof(void *));
2763
2764 if (!s->cpu_slab)
2765 return 0;
2766
2767 init_kmem_cache_cpus(s);
2768
2769 return 1;
2770}
2771
2772static struct kmem_cache *kmem_cache_node;
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783static void early_kmem_cache_node_alloc(int node)
2784{
2785 struct page *page;
2786 struct kmem_cache_node *n;
2787
2788 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2789
2790 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2791
2792 BUG_ON(!page);
2793 if (page_to_nid(page) != node) {
2794 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2795 "node %d\n", node);
2796 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2797 "in order to be able to continue\n");
2798 }
2799
2800 n = page->freelist;
2801 BUG_ON(!n);
2802 page->freelist = get_freepointer(kmem_cache_node, n);
2803 page->inuse = 1;
2804 page->frozen = 0;
2805 kmem_cache_node->node[node] = n;
2806#ifdef CONFIG_SLUB_DEBUG
2807 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2808 init_tracking(kmem_cache_node, n);
2809#endif
2810 init_kmem_cache_node(n, kmem_cache_node);
2811 inc_slabs_node(kmem_cache_node, node, page->objects);
2812
2813 add_partial(n, page, DEACTIVATE_TO_HEAD);
2814}
2815
2816static void free_kmem_cache_nodes(struct kmem_cache *s)
2817{
2818 int node;
2819
2820 for_each_node_state(node, N_NORMAL_MEMORY) {
2821 struct kmem_cache_node *n = s->node[node];
2822
2823 if (n)
2824 kmem_cache_free(kmem_cache_node, n);
2825
2826 s->node[node] = NULL;
2827 }
2828}
2829
2830static int init_kmem_cache_nodes(struct kmem_cache *s)
2831{
2832 int node;
2833
2834 for_each_node_state(node, N_NORMAL_MEMORY) {
2835 struct kmem_cache_node *n;
2836
2837 if (slab_state == DOWN) {
2838 early_kmem_cache_node_alloc(node);
2839 continue;
2840 }
2841 n = kmem_cache_alloc_node(kmem_cache_node,
2842 GFP_KERNEL, node);
2843
2844 if (!n) {
2845 free_kmem_cache_nodes(s);
2846 return 0;
2847 }
2848
2849 s->node[node] = n;
2850 init_kmem_cache_node(n, s);
2851 }
2852 return 1;
2853}
2854
2855static void set_min_partial(struct kmem_cache *s, unsigned long min)
2856{
2857 if (min < MIN_PARTIAL)
2858 min = MIN_PARTIAL;
2859 else if (min > MAX_PARTIAL)
2860 min = MAX_PARTIAL;
2861 s->min_partial = min;
2862}
2863
2864
2865
2866
2867
2868static int calculate_sizes(struct kmem_cache *s, int forced_order)
2869{
2870 unsigned long flags = s->flags;
2871 unsigned long size = s->objsize;
2872 unsigned long align = s->align;
2873 int order;
2874
2875
2876
2877
2878
2879
2880 size = ALIGN(size, sizeof(void *));
2881
2882#ifdef CONFIG_SLUB_DEBUG
2883
2884
2885
2886
2887
2888 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2889 !s->ctor)
2890 s->flags |= __OBJECT_POISON;
2891 else
2892 s->flags &= ~__OBJECT_POISON;
2893
2894
2895
2896
2897
2898
2899
2900 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2901 size += sizeof(void *);
2902#endif
2903
2904
2905
2906
2907
2908 s->inuse = size;
2909
2910 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2911 s->ctor)) {
2912
2913
2914
2915
2916
2917
2918
2919
2920 s->offset = size;
2921 size += sizeof(void *);
2922 }
2923
2924#ifdef CONFIG_SLUB_DEBUG
2925 if (flags & SLAB_STORE_USER)
2926
2927
2928
2929
2930 size += 2 * sizeof(struct track);
2931
2932 if (flags & SLAB_RED_ZONE)
2933
2934
2935
2936
2937
2938
2939
2940 size += sizeof(void *);
2941#endif
2942
2943
2944
2945
2946
2947
2948 align = calculate_alignment(flags, align, s->objsize);
2949 s->align = align;
2950
2951
2952
2953
2954
2955
2956 size = ALIGN(size, align);
2957 s->size = size;
2958 if (forced_order >= 0)
2959 order = forced_order;
2960 else
2961 order = calculate_order(size, s->reserved);
2962
2963 if (order < 0)
2964 return 0;
2965
2966 s->allocflags = 0;
2967 if (order)
2968 s->allocflags |= __GFP_COMP;
2969
2970 if (s->flags & SLAB_CACHE_DMA)
2971 s->allocflags |= SLUB_DMA;
2972
2973 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2974 s->allocflags |= __GFP_RECLAIMABLE;
2975
2976
2977
2978
2979 s->oo = oo_make(order, size, s->reserved);
2980 s->min = oo_make(get_order(size), size, s->reserved);
2981 if (oo_objects(s->oo) > oo_objects(s->max))
2982 s->max = s->oo;
2983
2984 return !!oo_objects(s->oo);
2985
2986}
2987
2988static int kmem_cache_open(struct kmem_cache *s,
2989 const char *name, size_t size,
2990 size_t align, unsigned long flags,
2991 void (*ctor)(void *))
2992{
2993 memset(s, 0, kmem_size);
2994 s->name = name;
2995 s->ctor = ctor;
2996 s->objsize = size;
2997 s->align = align;
2998 s->flags = kmem_cache_flags(size, flags, name, ctor);
2999 s->reserved = 0;
3000
3001 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3002 s->reserved = sizeof(struct rcu_head);
3003
3004 if (!calculate_sizes(s, -1))
3005 goto error;
3006 if (disable_higher_order_debug) {
3007
3008
3009
3010
3011 if (get_order(s->size) > get_order(s->objsize)) {
3012 s->flags &= ~DEBUG_METADATA_FLAGS;
3013 s->offset = 0;
3014 if (!calculate_sizes(s, -1))
3015 goto error;
3016 }
3017 }
3018
3019#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3020 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3021 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3022
3023 s->flags |= __CMPXCHG_DOUBLE;
3024#endif
3025
3026
3027
3028
3029
3030 set_min_partial(s, ilog2(s->size) / 2);
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049 if (kmem_cache_debug(s))
3050 s->cpu_partial = 0;
3051 else if (s->size >= PAGE_SIZE)
3052 s->cpu_partial = 2;
3053 else if (s->size >= 1024)
3054 s->cpu_partial = 6;
3055 else if (s->size >= 256)
3056 s->cpu_partial = 13;
3057 else
3058 s->cpu_partial = 30;
3059
3060 s->refcount = 1;
3061#ifdef CONFIG_NUMA
3062 s->remote_node_defrag_ratio = 1000;
3063#endif
3064 if (!init_kmem_cache_nodes(s))
3065 goto error;
3066
3067 if (alloc_kmem_cache_cpus(s))
3068 return 1;
3069
3070 free_kmem_cache_nodes(s);
3071error:
3072 if (flags & SLAB_PANIC)
3073 panic("Cannot create slab %s size=%lu realsize=%u "
3074 "order=%u offset=%u flags=%lx\n",
3075 s->name, (unsigned long)size, s->size, oo_order(s->oo),
3076 s->offset, flags);
3077 return 0;
3078}
3079
3080
3081
3082
3083unsigned int kmem_cache_size(struct kmem_cache *s)
3084{
3085 return s->objsize;
3086}
3087EXPORT_SYMBOL(kmem_cache_size);
3088
3089static void list_slab_objects(struct kmem_cache *s, struct page *page,
3090 const char *text)
3091{
3092#ifdef CONFIG_SLUB_DEBUG
3093 void *addr = page_address(page);
3094 void *p;
3095 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3096 sizeof(long), GFP_ATOMIC);
3097 if (!map)
3098 return;
3099 slab_err(s, page, "%s", text);
3100 slab_lock(page);
3101
3102 get_map(s, page, map);
3103 for_each_object(p, s, addr, page->objects) {
3104
3105 if (!test_bit(slab_index(p, s, addr), map)) {
3106 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3107 p, p - addr);
3108 print_tracking(s, p);
3109 }
3110 }
3111 slab_unlock(page);
3112 kfree(map);
3113#endif
3114}
3115
3116
3117
3118
3119
3120
3121static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3122{
3123 struct page *page, *h;
3124
3125 list_for_each_entry_safe(page, h, &n->partial, lru) {
3126 if (!page->inuse) {
3127 remove_partial(n, page);
3128 discard_slab(s, page);
3129 } else {
3130 list_slab_objects(s, page,
3131 "Objects remaining on kmem_cache_close()");
3132 }
3133 }
3134}
3135
3136
3137
3138
3139static inline int kmem_cache_close(struct kmem_cache *s)
3140{
3141 int node;
3142
3143 flush_all(s);
3144 free_percpu(s->cpu_slab);
3145
3146 for_each_node_state(node, N_NORMAL_MEMORY) {
3147 struct kmem_cache_node *n = get_node(s, node);
3148
3149 free_partial(s, n);
3150 if (n->nr_partial || slabs_node(s, node))
3151 return 1;
3152 }
3153 free_kmem_cache_nodes(s);
3154 return 0;
3155}
3156
3157
3158
3159
3160
3161void kmem_cache_destroy(struct kmem_cache *s)
3162{
3163 down_write(&slub_lock);
3164 s->refcount--;
3165 if (!s->refcount) {
3166 list_del(&s->list);
3167 up_write(&slub_lock);
3168 if (kmem_cache_close(s)) {
3169 printk(KERN_ERR "SLUB %s: %s called for cache that "
3170 "still has objects.\n", s->name, __func__);
3171 dump_stack();
3172 }
3173 if (s->flags & SLAB_DESTROY_BY_RCU)
3174 rcu_barrier();
3175 sysfs_slab_remove(s);
3176 } else
3177 up_write(&slub_lock);
3178}
3179EXPORT_SYMBOL(kmem_cache_destroy);
3180
3181
3182
3183
3184
3185struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3186EXPORT_SYMBOL(kmalloc_caches);
3187
3188static struct kmem_cache *kmem_cache;
3189
3190#ifdef CONFIG_ZONE_DMA
3191static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3192#endif
3193
3194static int __init setup_slub_min_order(char *str)
3195{
3196 get_option(&str, &slub_min_order);
3197
3198 return 1;
3199}
3200
3201__setup("slub_min_order=", setup_slub_min_order);
3202
3203static int __init setup_slub_max_order(char *str)
3204{
3205 get_option(&str, &slub_max_order);
3206 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3207
3208 return 1;
3209}
3210
3211__setup("slub_max_order=", setup_slub_max_order);
3212
3213static int __init setup_slub_min_objects(char *str)
3214{
3215 get_option(&str, &slub_min_objects);
3216
3217 return 1;
3218}
3219
3220__setup("slub_min_objects=", setup_slub_min_objects);
3221
3222static int __init setup_slub_nomerge(char *str)
3223{
3224 slub_nomerge = 1;
3225 return 1;
3226}
3227
3228__setup("slub_nomerge", setup_slub_nomerge);
3229
3230static struct kmem_cache *__init create_kmalloc_cache(const char *name,
3231 int size, unsigned int flags)
3232{
3233 struct kmem_cache *s;
3234
3235 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3236
3237
3238
3239
3240
3241 if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN,
3242 flags, NULL))
3243 goto panic;
3244
3245 list_add(&s->list, &slab_caches);
3246 return s;
3247
3248panic:
3249 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
3250 return NULL;
3251}
3252
3253
3254
3255
3256
3257
3258
3259static s8 size_index[24] = {
3260 3,
3261 4,
3262 5,
3263 5,
3264 6,
3265 6,
3266 6,
3267 6,
3268 1,
3269 1,
3270 1,
3271 1,
3272 7,
3273 7,
3274 7,
3275 7,
3276 2,
3277 2,
3278 2,
3279 2,
3280 2,
3281 2,
3282 2,
3283 2
3284};
3285
3286static inline int size_index_elem(size_t bytes)
3287{
3288 return (bytes - 1) / 8;
3289}
3290
3291static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3292{
3293 int index;
3294
3295 if (size <= 192) {
3296 if (!size)
3297 return ZERO_SIZE_PTR;
3298
3299 index = size_index[size_index_elem(size)];
3300 } else
3301 index = fls(size - 1);
3302
3303#ifdef CONFIG_ZONE_DMA
3304 if (unlikely((flags & SLUB_DMA)))
3305 return kmalloc_dma_caches[index];
3306
3307#endif
3308 return kmalloc_caches[index];
3309}
3310
3311void *__kmalloc(size_t size, gfp_t flags)
3312{
3313 struct kmem_cache *s;
3314 void *ret;
3315
3316 if (unlikely(size > SLUB_MAX_SIZE))
3317 return kmalloc_large(size, flags);
3318
3319 s = get_slab(size, flags);
3320
3321 if (unlikely(ZERO_OR_NULL_PTR(s)))
3322 return s;
3323
3324 ret = slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_);
3325
3326 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3327
3328 return ret;
3329}
3330EXPORT_SYMBOL(__kmalloc);
3331
3332#ifdef CONFIG_NUMA
3333static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3334{
3335 struct page *page;
3336 void *ptr = NULL;
3337
3338 flags |= __GFP_COMP | __GFP_NOTRACK;
3339 page = alloc_pages_node(node, flags, get_order(size));
3340 if (page)
3341 ptr = page_address(page);
3342
3343 kmemleak_alloc(ptr, size, 1, flags);
3344 return ptr;
3345}
3346
3347void *__kmalloc_node(size_t size, gfp_t flags, int node)
3348{
3349 struct kmem_cache *s;
3350 void *ret;
3351
3352 if (unlikely(size > SLUB_MAX_SIZE)) {
3353 ret = kmalloc_large_node(size, flags, node);
3354
3355 trace_kmalloc_node(_RET_IP_, ret,
3356 size, PAGE_SIZE << get_order(size),
3357 flags, node);
3358
3359 return ret;
3360 }
3361
3362 s = get_slab(size, flags);
3363
3364 if (unlikely(ZERO_OR_NULL_PTR(s)))
3365 return s;
3366
3367 ret = slab_alloc(s, flags, node, _RET_IP_);
3368
3369 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3370
3371 return ret;
3372}
3373EXPORT_SYMBOL(__kmalloc_node);
3374#endif
3375
3376size_t ksize(const void *object)
3377{
3378 struct page *page;
3379
3380 if (unlikely(object == ZERO_SIZE_PTR))
3381 return 0;
3382
3383 page = virt_to_head_page(object);
3384
3385 if (unlikely(!PageSlab(page))) {
3386 WARN_ON(!PageCompound(page));
3387 return PAGE_SIZE << compound_order(page);
3388 }
3389
3390 return slab_ksize(page->slab);
3391}
3392EXPORT_SYMBOL(ksize);
3393
3394#ifdef CONFIG_SLUB_DEBUG
3395bool verify_mem_not_deleted(const void *x)
3396{
3397 struct page *page;
3398 void *object = (void *)x;
3399 unsigned long flags;
3400 bool rv;
3401
3402 if (unlikely(ZERO_OR_NULL_PTR(x)))
3403 return false;
3404
3405 local_irq_save(flags);
3406
3407 page = virt_to_head_page(x);
3408 if (unlikely(!PageSlab(page))) {
3409
3410 rv = true;
3411 goto out_unlock;
3412 }
3413
3414 slab_lock(page);
3415 if (on_freelist(page->slab, page, object)) {
3416 object_err(page->slab, page, object, "Object is on free-list");
3417 rv = false;
3418 } else {
3419 rv = true;
3420 }
3421 slab_unlock(page);
3422
3423out_unlock:
3424 local_irq_restore(flags);
3425 return rv;
3426}
3427EXPORT_SYMBOL(verify_mem_not_deleted);
3428#endif
3429
3430void kfree(const void *x)
3431{
3432 struct page *page;
3433 void *object = (void *)x;
3434
3435 trace_kfree(_RET_IP_, x);
3436
3437 if (unlikely(ZERO_OR_NULL_PTR(x)))
3438 return;
3439
3440 page = virt_to_head_page(x);
3441 if (unlikely(!PageSlab(page))) {
3442 BUG_ON(!PageCompound(page));
3443 kmemleak_free(x);
3444 put_page(page);
3445 return;
3446 }
3447 slab_free(page->slab, page, object, _RET_IP_);
3448}
3449EXPORT_SYMBOL(kfree);
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461int kmem_cache_shrink(struct kmem_cache *s)
3462{
3463 int node;
3464 int i;
3465 struct kmem_cache_node *n;
3466 struct page *page;
3467 struct page *t;
3468 int objects = oo_objects(s->max);
3469 struct list_head *slabs_by_inuse =
3470 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3471 unsigned long flags;
3472
3473 if (!slabs_by_inuse)
3474 return -ENOMEM;
3475
3476 flush_all(s);
3477 for_each_node_state(node, N_NORMAL_MEMORY) {
3478 n = get_node(s, node);
3479
3480 if (!n->nr_partial)
3481 continue;
3482
3483 for (i = 0; i < objects; i++)
3484 INIT_LIST_HEAD(slabs_by_inuse + i);
3485
3486 spin_lock_irqsave(&n->list_lock, flags);
3487
3488
3489
3490
3491
3492
3493
3494 list_for_each_entry_safe(page, t, &n->partial, lru) {
3495 list_move(&page->lru, slabs_by_inuse + page->inuse);
3496 if (!page->inuse)
3497 n->nr_partial--;
3498 }
3499
3500
3501
3502
3503
3504 for (i = objects - 1; i > 0; i--)
3505 list_splice(slabs_by_inuse + i, n->partial.prev);
3506
3507 spin_unlock_irqrestore(&n->list_lock, flags);
3508
3509
3510 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3511 discard_slab(s, page);
3512 }
3513
3514 kfree(slabs_by_inuse);
3515 return 0;
3516}
3517EXPORT_SYMBOL(kmem_cache_shrink);
3518
3519#if defined(CONFIG_MEMORY_HOTPLUG)
3520static int slab_mem_going_offline_callback(void *arg)
3521{
3522 struct kmem_cache *s;
3523
3524 down_read(&slub_lock);
3525 list_for_each_entry(s, &slab_caches, list)
3526 kmem_cache_shrink(s);
3527 up_read(&slub_lock);
3528
3529 return 0;
3530}
3531
3532static void slab_mem_offline_callback(void *arg)
3533{
3534 struct kmem_cache_node *n;
3535 struct kmem_cache *s;
3536 struct memory_notify *marg = arg;
3537 int offline_node;
3538
3539 offline_node = marg->status_change_nid;
3540
3541
3542
3543
3544
3545 if (offline_node < 0)
3546 return;
3547
3548 down_read(&slub_lock);
3549 list_for_each_entry(s, &slab_caches, list) {
3550 n = get_node(s, offline_node);
3551 if (n) {
3552
3553
3554
3555
3556
3557
3558 BUG_ON(slabs_node(s, offline_node));
3559
3560 s->node[offline_node] = NULL;
3561 kmem_cache_free(kmem_cache_node, n);
3562 }
3563 }
3564 up_read(&slub_lock);
3565}
3566
3567static int slab_mem_going_online_callback(void *arg)
3568{
3569 struct kmem_cache_node *n;
3570 struct kmem_cache *s;
3571 struct memory_notify *marg = arg;
3572 int nid = marg->status_change_nid;
3573 int ret = 0;
3574
3575
3576
3577
3578
3579 if (nid < 0)
3580 return 0;
3581
3582
3583
3584
3585
3586
3587 down_read(&slub_lock);
3588 list_for_each_entry(s, &slab_caches, list) {
3589
3590
3591
3592
3593
3594 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3595 if (!n) {
3596 ret = -ENOMEM;
3597 goto out;
3598 }
3599 init_kmem_cache_node(n, s);
3600 s->node[nid] = n;
3601 }
3602out:
3603 up_read(&slub_lock);
3604 return ret;
3605}
3606
3607static int slab_memory_callback(struct notifier_block *self,
3608 unsigned long action, void *arg)
3609{
3610 int ret = 0;
3611
3612 switch (action) {
3613 case MEM_GOING_ONLINE:
3614 ret = slab_mem_going_online_callback(arg);
3615 break;
3616 case MEM_GOING_OFFLINE:
3617 ret = slab_mem_going_offline_callback(arg);
3618 break;
3619 case MEM_OFFLINE:
3620 case MEM_CANCEL_ONLINE:
3621 slab_mem_offline_callback(arg);
3622 break;
3623 case MEM_ONLINE:
3624 case MEM_CANCEL_OFFLINE:
3625 break;
3626 }
3627 if (ret)
3628 ret = notifier_from_errno(ret);
3629 else
3630 ret = NOTIFY_OK;
3631 return ret;
3632}
3633
3634#endif
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
3646{
3647 int node;
3648
3649 list_add(&s->list, &slab_caches);
3650 s->refcount = -1;
3651
3652 for_each_node_state(node, N_NORMAL_MEMORY) {
3653 struct kmem_cache_node *n = get_node(s, node);
3654 struct page *p;
3655
3656 if (n) {
3657 list_for_each_entry(p, &n->partial, lru)
3658 p->slab = s;
3659
3660#ifdef CONFIG_SLUB_DEBUG
3661 list_for_each_entry(p, &n->full, lru)
3662 p->slab = s;
3663#endif
3664 }
3665 }
3666}
3667
3668void __init kmem_cache_init(void)
3669{
3670 int i;
3671 int caches = 0;
3672 struct kmem_cache *temp_kmem_cache;
3673 int order;
3674 struct kmem_cache *temp_kmem_cache_node;
3675 unsigned long kmalloc_size;
3676
3677 if (debug_guardpage_minorder())
3678 slub_max_order = 0;
3679
3680 kmem_size = offsetof(struct kmem_cache, node) +
3681 nr_node_ids * sizeof(struct kmem_cache_node *);
3682
3683
3684 kmalloc_size = ALIGN(kmem_size, cache_line_size());
3685 order = get_order(2 * kmalloc_size);
3686 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
3687
3688
3689
3690
3691
3692
3693 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
3694
3695 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
3696 sizeof(struct kmem_cache_node),
3697 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3698
3699 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3700
3701
3702 slab_state = PARTIAL;
3703
3704 temp_kmem_cache = kmem_cache;
3705 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
3706 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3707 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3708 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
3709
3710
3711
3712
3713
3714
3715 temp_kmem_cache_node = kmem_cache_node;
3716
3717 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3718 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3719
3720 kmem_cache_bootstrap_fixup(kmem_cache_node);
3721
3722 caches++;
3723 kmem_cache_bootstrap_fixup(kmem_cache);
3724 caches++;
3725
3726 free_pages((unsigned long)temp_kmem_cache, order);
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3742 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3743
3744 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3745 int elem = size_index_elem(i);
3746 if (elem >= ARRAY_SIZE(size_index))
3747 break;
3748 size_index[elem] = KMALLOC_SHIFT_LOW;
3749 }
3750
3751 if (KMALLOC_MIN_SIZE == 64) {
3752
3753
3754
3755
3756 for (i = 64 + 8; i <= 96; i += 8)
3757 size_index[size_index_elem(i)] = 7;
3758 } else if (KMALLOC_MIN_SIZE == 128) {
3759
3760
3761
3762
3763
3764 for (i = 128 + 8; i <= 192; i += 8)
3765 size_index[size_index_elem(i)] = 8;
3766 }
3767
3768
3769 if (KMALLOC_MIN_SIZE <= 32) {
3770 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3771 caches++;
3772 }
3773
3774 if (KMALLOC_MIN_SIZE <= 64) {
3775 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3776 caches++;
3777 }
3778
3779 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3780 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3781 caches++;
3782 }
3783
3784 slab_state = UP;
3785
3786
3787 if (KMALLOC_MIN_SIZE <= 32) {
3788 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3789 BUG_ON(!kmalloc_caches[1]->name);
3790 }
3791
3792 if (KMALLOC_MIN_SIZE <= 64) {
3793 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3794 BUG_ON(!kmalloc_caches[2]->name);
3795 }
3796
3797 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3798 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3799
3800 BUG_ON(!s);
3801 kmalloc_caches[i]->name = s;
3802 }
3803
3804#ifdef CONFIG_SMP
3805 register_cpu_notifier(&slab_notifier);
3806#endif
3807
3808#ifdef CONFIG_ZONE_DMA
3809 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3810 struct kmem_cache *s = kmalloc_caches[i];
3811
3812 if (s && s->size) {
3813 char *name = kasprintf(GFP_NOWAIT,
3814 "dma-kmalloc-%d", s->objsize);
3815
3816 BUG_ON(!name);
3817 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3818 s->objsize, SLAB_CACHE_DMA);
3819 }
3820 }
3821#endif
3822 printk(KERN_INFO
3823 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3824 " CPUs=%d, Nodes=%d\n",
3825 caches, cache_line_size(),
3826 slub_min_order, slub_max_order, slub_min_objects,
3827 nr_cpu_ids, nr_node_ids);
3828}
3829
3830void __init kmem_cache_init_late(void)
3831{
3832}
3833
3834
3835
3836
3837static int slab_unmergeable(struct kmem_cache *s)
3838{
3839 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3840 return 1;
3841
3842 if (s->ctor)
3843 return 1;
3844
3845
3846
3847
3848 if (s->refcount < 0)
3849 return 1;
3850
3851 return 0;
3852}
3853
3854static struct kmem_cache *find_mergeable(size_t size,
3855 size_t align, unsigned long flags, const char *name,
3856 void (*ctor)(void *))
3857{
3858 struct kmem_cache *s;
3859
3860 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3861 return NULL;
3862
3863 if (ctor)
3864 return NULL;
3865
3866 size = ALIGN(size, sizeof(void *));
3867 align = calculate_alignment(flags, align, size);
3868 size = ALIGN(size, align);
3869 flags = kmem_cache_flags(size, flags, name, NULL);
3870
3871 list_for_each_entry(s, &slab_caches, list) {
3872 if (slab_unmergeable(s))
3873 continue;
3874
3875 if (size > s->size)
3876 continue;
3877
3878 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3879 continue;
3880
3881
3882
3883
3884 if ((s->size & ~(align - 1)) != s->size)
3885 continue;
3886
3887 if (s->size - size >= sizeof(void *))
3888 continue;
3889
3890 return s;
3891 }
3892 return NULL;
3893}
3894
3895struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3896 size_t align, unsigned long flags, void (*ctor)(void *))
3897{
3898 struct kmem_cache *s;
3899 char *n;
3900
3901 if (WARN_ON(!name))
3902 return NULL;
3903
3904 down_write(&slub_lock);
3905 s = find_mergeable(size, align, flags, name, ctor);
3906 if (s) {
3907 s->refcount++;
3908
3909
3910
3911
3912 s->objsize = max(s->objsize, (int)size);
3913 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3914
3915 if (sysfs_slab_alias(s, name)) {
3916 s->refcount--;
3917 goto err;
3918 }
3919 up_write(&slub_lock);
3920 return s;
3921 }
3922
3923 n = kstrdup(name, GFP_KERNEL);
3924 if (!n)
3925 goto err;
3926
3927 s = kmalloc(kmem_size, GFP_KERNEL);
3928 if (s) {
3929 if (kmem_cache_open(s, n,
3930 size, align, flags, ctor)) {
3931 list_add(&s->list, &slab_caches);
3932 if (sysfs_slab_add(s)) {
3933 list_del(&s->list);
3934 kfree(n);
3935 kfree(s);
3936 goto err;
3937 }
3938 up_write(&slub_lock);
3939 return s;
3940 }
3941 kfree(n);
3942 kfree(s);
3943 }
3944err:
3945 up_write(&slub_lock);
3946
3947 if (flags & SLAB_PANIC)
3948 panic("Cannot create slabcache %s\n", name);
3949 else
3950 s = NULL;
3951 return s;
3952}
3953EXPORT_SYMBOL(kmem_cache_create);
3954
3955#ifdef CONFIG_SMP
3956
3957
3958
3959
3960static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3961 unsigned long action, void *hcpu)
3962{
3963 long cpu = (long)hcpu;
3964 struct kmem_cache *s;
3965 unsigned long flags;
3966
3967 switch (action) {
3968 case CPU_UP_CANCELED:
3969 case CPU_UP_CANCELED_FROZEN:
3970 case CPU_DEAD:
3971 case CPU_DEAD_FROZEN:
3972 down_read(&slub_lock);
3973 list_for_each_entry(s, &slab_caches, list) {
3974 local_irq_save(flags);
3975 __flush_cpu_slab(s, cpu);
3976 local_irq_restore(flags);
3977 }
3978 up_read(&slub_lock);
3979 break;
3980 default:
3981 break;
3982 }
3983 return NOTIFY_OK;
3984}
3985
3986static struct notifier_block __cpuinitdata slab_notifier = {
3987 .notifier_call = slab_cpuup_callback
3988};
3989
3990#endif
3991
3992void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3993{
3994 struct kmem_cache *s;
3995 void *ret;
3996
3997 if (unlikely(size > SLUB_MAX_SIZE))
3998 return kmalloc_large(size, gfpflags);
3999
4000 s = get_slab(size, gfpflags);
4001
4002 if (unlikely(ZERO_OR_NULL_PTR(s)))
4003 return s;
4004
4005 ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
4006
4007
4008 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4009
4010 return ret;
4011}
4012
4013#ifdef CONFIG_NUMA
4014void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4015 int node, unsigned long caller)
4016{
4017 struct kmem_cache *s;
4018 void *ret;
4019
4020 if (unlikely(size > SLUB_MAX_SIZE)) {
4021 ret = kmalloc_large_node(size, gfpflags, node);
4022
4023 trace_kmalloc_node(caller, ret,
4024 size, PAGE_SIZE << get_order(size),
4025 gfpflags, node);
4026
4027 return ret;
4028 }
4029
4030 s = get_slab(size, gfpflags);
4031
4032 if (unlikely(ZERO_OR_NULL_PTR(s)))
4033 return s;
4034
4035 ret = slab_alloc(s, gfpflags, node, caller);
4036
4037
4038 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4039
4040 return ret;
4041}
4042#endif
4043
4044#ifdef CONFIG_SYSFS
4045static int count_inuse(struct page *page)
4046{
4047 return page->inuse;
4048}
4049
4050static int count_total(struct page *page)
4051{
4052 return page->objects;
4053}
4054#endif
4055
4056#ifdef CONFIG_SLUB_DEBUG
4057static int validate_slab(struct kmem_cache *s, struct page *page,
4058 unsigned long *map)
4059{
4060 void *p;
4061 void *addr = page_address(page);
4062
4063 if (!check_slab(s, page) ||
4064 !on_freelist(s, page, NULL))
4065 return 0;
4066
4067
4068 bitmap_zero(map, page->objects);
4069
4070 get_map(s, page, map);
4071 for_each_object(p, s, addr, page->objects) {
4072 if (test_bit(slab_index(p, s, addr), map))
4073 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4074 return 0;
4075 }
4076
4077 for_each_object(p, s, addr, page->objects)
4078 if (!test_bit(slab_index(p, s, addr), map))
4079 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4080 return 0;
4081 return 1;
4082}
4083
4084static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4085 unsigned long *map)
4086{
4087 slab_lock(page);
4088 validate_slab(s, page, map);
4089 slab_unlock(page);
4090}
4091
4092static int validate_slab_node(struct kmem_cache *s,
4093 struct kmem_cache_node *n, unsigned long *map)
4094{
4095 unsigned long count = 0;
4096 struct page *page;
4097 unsigned long flags;
4098
4099 spin_lock_irqsave(&n->list_lock, flags);
4100
4101 list_for_each_entry(page, &n->partial, lru) {
4102 validate_slab_slab(s, page, map);
4103 count++;
4104 }
4105 if (count != n->nr_partial)
4106 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4107 "counter=%ld\n", s->name, count, n->nr_partial);
4108
4109 if (!(s->flags & SLAB_STORE_USER))
4110 goto out;
4111
4112 list_for_each_entry(page, &n->full, lru) {
4113 validate_slab_slab(s, page, map);
4114 count++;
4115 }
4116 if (count != atomic_long_read(&n->nr_slabs))
4117 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4118 "counter=%ld\n", s->name, count,
4119 atomic_long_read(&n->nr_slabs));
4120
4121out:
4122 spin_unlock_irqrestore(&n->list_lock, flags);
4123 return count;
4124}
4125
4126static long validate_slab_cache(struct kmem_cache *s)
4127{
4128 int node;
4129 unsigned long count = 0;
4130 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4131 sizeof(unsigned long), GFP_KERNEL);
4132
4133 if (!map)
4134 return -ENOMEM;
4135
4136 flush_all(s);
4137 for_each_node_state(node, N_NORMAL_MEMORY) {
4138 struct kmem_cache_node *n = get_node(s, node);
4139
4140 count += validate_slab_node(s, n, map);
4141 }
4142 kfree(map);
4143 return count;
4144}
4145
4146
4147
4148
4149
4150struct location {
4151 unsigned long count;
4152 unsigned long addr;
4153 long long sum_time;
4154 long min_time;
4155 long max_time;
4156 long min_pid;
4157 long max_pid;
4158 DECLARE_BITMAP(cpus, NR_CPUS);
4159 nodemask_t nodes;
4160};
4161
4162struct loc_track {
4163 unsigned long max;
4164 unsigned long count;
4165 struct location *loc;
4166};
4167
4168static void free_loc_track(struct loc_track *t)
4169{
4170 if (t->max)
4171 free_pages((unsigned long)t->loc,
4172 get_order(sizeof(struct location) * t->max));
4173}
4174
4175static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4176{
4177 struct location *l;
4178 int order;
4179
4180 order = get_order(sizeof(struct location) * max);
4181
4182 l = (void *)__get_free_pages(flags, order);
4183 if (!l)
4184 return 0;
4185
4186 if (t->count) {
4187 memcpy(l, t->loc, sizeof(struct location) * t->count);
4188 free_loc_track(t);
4189 }
4190 t->max = max;
4191 t->loc = l;
4192 return 1;
4193}
4194
4195static int add_location(struct loc_track *t, struct kmem_cache *s,
4196 const struct track *track)
4197{
4198 long start, end, pos;
4199 struct location *l;
4200 unsigned long caddr;
4201 unsigned long age = jiffies - track->when;
4202
4203 start = -1;
4204 end = t->count;
4205
4206 for ( ; ; ) {
4207 pos = start + (end - start + 1) / 2;
4208
4209
4210
4211
4212
4213 if (pos == end)
4214 break;
4215
4216 caddr = t->loc[pos].addr;
4217 if (track->addr == caddr) {
4218
4219 l = &t->loc[pos];
4220 l->count++;
4221 if (track->when) {
4222 l->sum_time += age;
4223 if (age < l->min_time)
4224 l->min_time = age;
4225 if (age > l->max_time)
4226 l->max_time = age;
4227
4228 if (track->pid < l->min_pid)
4229 l->min_pid = track->pid;
4230 if (track->pid > l->max_pid)
4231 l->max_pid = track->pid;
4232
4233 cpumask_set_cpu(track->cpu,
4234 to_cpumask(l->cpus));
4235 }
4236 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4237 return 1;
4238 }
4239
4240 if (track->addr < caddr)
4241 end = pos;
4242 else
4243 start = pos;
4244 }
4245
4246
4247
4248
4249 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4250 return 0;
4251
4252 l = t->loc + pos;
4253 if (pos < t->count)
4254 memmove(l + 1, l,
4255 (t->count - pos) * sizeof(struct location));
4256 t->count++;
4257 l->count = 1;
4258 l->addr = track->addr;
4259 l->sum_time = age;
4260 l->min_time = age;
4261 l->max_time = age;
4262 l->min_pid = track->pid;
4263 l->max_pid = track->pid;
4264 cpumask_clear(to_cpumask(l->cpus));
4265 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4266 nodes_clear(l->nodes);
4267 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4268 return 1;
4269}
4270
4271static void process_slab(struct loc_track *t, struct kmem_cache *s,
4272 struct page *page, enum track_item alloc,
4273 unsigned long *map)
4274{
4275 void *addr = page_address(page);
4276 void *p;
4277
4278 bitmap_zero(map, page->objects);
4279 get_map(s, page, map);
4280
4281 for_each_object(p, s, addr, page->objects)
4282 if (!test_bit(slab_index(p, s, addr), map))
4283 add_location(t, s, get_track(s, p, alloc));
4284}
4285
4286static int list_locations(struct kmem_cache *s, char *buf,
4287 enum track_item alloc)
4288{
4289 int len = 0;
4290 unsigned long i;
4291 struct loc_track t = { 0, 0, NULL };
4292 int node;
4293 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4294 sizeof(unsigned long), GFP_KERNEL);
4295
4296 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4297 GFP_TEMPORARY)) {
4298 kfree(map);
4299 return sprintf(buf, "Out of memory\n");
4300 }
4301
4302 flush_all(s);
4303
4304 for_each_node_state(node, N_NORMAL_MEMORY) {
4305 struct kmem_cache_node *n = get_node(s, node);
4306 unsigned long flags;
4307 struct page *page;
4308
4309 if (!atomic_long_read(&n->nr_slabs))
4310 continue;
4311
4312 spin_lock_irqsave(&n->list_lock, flags);
4313 list_for_each_entry(page, &n->partial, lru)
4314 process_slab(&t, s, page, alloc, map);
4315 list_for_each_entry(page, &n->full, lru)
4316 process_slab(&t, s, page, alloc, map);
4317 spin_unlock_irqrestore(&n->list_lock, flags);
4318 }
4319
4320 for (i = 0; i < t.count; i++) {
4321 struct location *l = &t.loc[i];
4322
4323 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4324 break;
4325 len += sprintf(buf + len, "%7ld ", l->count);
4326
4327 if (l->addr)
4328 len += sprintf(buf + len, "%pS", (void *)l->addr);
4329 else
4330 len += sprintf(buf + len, "<not-available>");
4331
4332 if (l->sum_time != l->min_time) {
4333 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4334 l->min_time,
4335 (long)div_u64(l->sum_time, l->count),
4336 l->max_time);
4337 } else
4338 len += sprintf(buf + len, " age=%ld",
4339 l->min_time);
4340
4341 if (l->min_pid != l->max_pid)
4342 len += sprintf(buf + len, " pid=%ld-%ld",
4343 l->min_pid, l->max_pid);
4344 else
4345 len += sprintf(buf + len, " pid=%ld",
4346 l->min_pid);
4347
4348 if (num_online_cpus() > 1 &&
4349 !cpumask_empty(to_cpumask(l->cpus)) &&
4350 len < PAGE_SIZE - 60) {
4351 len += sprintf(buf + len, " cpus=");
4352 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4353 to_cpumask(l->cpus));
4354 }
4355
4356 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4357 len < PAGE_SIZE - 60) {
4358 len += sprintf(buf + len, " nodes=");
4359 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4360 l->nodes);
4361 }
4362
4363 len += sprintf(buf + len, "\n");
4364 }
4365
4366 free_loc_track(&t);
4367 kfree(map);
4368 if (!t.count)
4369 len += sprintf(buf, "No data\n");
4370 return len;
4371}
4372#endif
4373
4374#ifdef SLUB_RESILIENCY_TEST
4375static void resiliency_test(void)
4376{
4377 u8 *p;
4378
4379 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10);
4380
4381 printk(KERN_ERR "SLUB resiliency testing\n");
4382 printk(KERN_ERR "-----------------------\n");
4383 printk(KERN_ERR "A. Corruption after allocation\n");
4384
4385 p = kzalloc(16, GFP_KERNEL);
4386 p[16] = 0x12;
4387 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4388 " 0x12->0x%p\n\n", p + 16);
4389
4390 validate_slab_cache(kmalloc_caches[4]);
4391
4392
4393 p = kzalloc(32, GFP_KERNEL);
4394 p[32 + sizeof(void *)] = 0x34;
4395 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4396 " 0x34 -> -0x%p\n", p);
4397 printk(KERN_ERR
4398 "If allocated object is overwritten then not detectable\n\n");
4399
4400 validate_slab_cache(kmalloc_caches[5]);
4401 p = kzalloc(64, GFP_KERNEL);
4402 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4403 *p = 0x56;
4404 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4405 p);
4406 printk(KERN_ERR
4407 "If allocated object is overwritten then not detectable\n\n");
4408 validate_slab_cache(kmalloc_caches[6]);
4409
4410 printk(KERN_ERR "\nB. Corruption after free\n");
4411 p = kzalloc(128, GFP_KERNEL);
4412 kfree(p);
4413 *p = 0x78;
4414 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4415 validate_slab_cache(kmalloc_caches[7]);
4416
4417 p = kzalloc(256, GFP_KERNEL);
4418 kfree(p);
4419 p[50] = 0x9a;
4420 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4421 p);
4422 validate_slab_cache(kmalloc_caches[8]);
4423
4424 p = kzalloc(512, GFP_KERNEL);
4425 kfree(p);
4426 p[512] = 0xab;
4427 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4428 validate_slab_cache(kmalloc_caches[9]);
4429}
4430#else
4431#ifdef CONFIG_SYSFS
4432static void resiliency_test(void) {};
4433#endif
4434#endif
4435
4436#ifdef CONFIG_SYSFS
4437enum slab_stat_type {
4438 SL_ALL,
4439 SL_PARTIAL,
4440 SL_CPU,
4441 SL_OBJECTS,
4442 SL_TOTAL
4443};
4444
4445#define SO_ALL (1 << SL_ALL)
4446#define SO_PARTIAL (1 << SL_PARTIAL)
4447#define SO_CPU (1 << SL_CPU)
4448#define SO_OBJECTS (1 << SL_OBJECTS)
4449#define SO_TOTAL (1 << SL_TOTAL)
4450
4451static ssize_t show_slab_objects(struct kmem_cache *s,
4452 char *buf, unsigned long flags)
4453{
4454 unsigned long total = 0;
4455 int node;
4456 int x;
4457 unsigned long *nodes;
4458 unsigned long *per_cpu;
4459
4460 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4461 if (!nodes)
4462 return -ENOMEM;
4463 per_cpu = nodes + nr_node_ids;
4464
4465 if (flags & SO_CPU) {
4466 int cpu;
4467
4468 for_each_possible_cpu(cpu) {
4469 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4470 int node = ACCESS_ONCE(c->node);
4471 struct page *page;
4472
4473 if (node < 0)
4474 continue;
4475 page = ACCESS_ONCE(c->page);
4476 if (page) {
4477 if (flags & SO_TOTAL)
4478 x = page->objects;
4479 else if (flags & SO_OBJECTS)
4480 x = page->inuse;
4481 else
4482 x = 1;
4483
4484 total += x;
4485 nodes[node] += x;
4486 }
4487 page = c->partial;
4488
4489 if (page) {
4490 x = page->pobjects;
4491 total += x;
4492 nodes[node] += x;
4493 }
4494 per_cpu[node]++;
4495 }
4496 }
4497
4498 lock_memory_hotplug();
4499#ifdef CONFIG_SLUB_DEBUG
4500 if (flags & SO_ALL) {
4501 for_each_node_state(node, N_NORMAL_MEMORY) {
4502 struct kmem_cache_node *n = get_node(s, node);
4503
4504 if (flags & SO_TOTAL)
4505 x = atomic_long_read(&n->total_objects);
4506 else if (flags & SO_OBJECTS)
4507 x = atomic_long_read(&n->total_objects) -
4508 count_partial(n, count_free);
4509
4510 else
4511 x = atomic_long_read(&n->nr_slabs);
4512 total += x;
4513 nodes[node] += x;
4514 }
4515
4516 } else
4517#endif
4518 if (flags & SO_PARTIAL) {
4519 for_each_node_state(node, N_NORMAL_MEMORY) {
4520 struct kmem_cache_node *n = get_node(s, node);
4521
4522 if (flags & SO_TOTAL)
4523 x = count_partial(n, count_total);
4524 else if (flags & SO_OBJECTS)
4525 x = count_partial(n, count_inuse);
4526 else
4527 x = n->nr_partial;
4528 total += x;
4529 nodes[node] += x;
4530 }
4531 }
4532 x = sprintf(buf, "%lu", total);
4533#ifdef CONFIG_NUMA
4534 for_each_node_state(node, N_NORMAL_MEMORY)
4535 if (nodes[node])
4536 x += sprintf(buf + x, " N%d=%lu",
4537 node, nodes[node]);
4538#endif
4539 unlock_memory_hotplug();
4540 kfree(nodes);
4541 return x + sprintf(buf + x, "\n");
4542}
4543
4544#ifdef CONFIG_SLUB_DEBUG
4545static int any_slab_objects(struct kmem_cache *s)
4546{
4547 int node;
4548
4549 for_each_online_node(node) {
4550 struct kmem_cache_node *n = get_node(s, node);
4551
4552 if (!n)
4553 continue;
4554
4555 if (atomic_long_read(&n->total_objects))
4556 return 1;
4557 }
4558 return 0;
4559}
4560#endif
4561
4562#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4563#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4564
4565struct slab_attribute {
4566 struct attribute attr;
4567 ssize_t (*show)(struct kmem_cache *s, char *buf);
4568 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4569};
4570
4571#define SLAB_ATTR_RO(_name) \
4572 static struct slab_attribute _name##_attr = \
4573 __ATTR(_name, 0400, _name##_show, NULL)
4574
4575#define SLAB_ATTR(_name) \
4576 static struct slab_attribute _name##_attr = \
4577 __ATTR(_name, 0600, _name##_show, _name##_store)
4578
4579static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4580{
4581 return sprintf(buf, "%d\n", s->size);
4582}
4583SLAB_ATTR_RO(slab_size);
4584
4585static ssize_t align_show(struct kmem_cache *s, char *buf)
4586{
4587 return sprintf(buf, "%d\n", s->align);
4588}
4589SLAB_ATTR_RO(align);
4590
4591static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4592{
4593 return sprintf(buf, "%d\n", s->objsize);
4594}
4595SLAB_ATTR_RO(object_size);
4596
4597static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4598{
4599 return sprintf(buf, "%d\n", oo_objects(s->oo));
4600}
4601SLAB_ATTR_RO(objs_per_slab);
4602
4603static ssize_t order_store(struct kmem_cache *s,
4604 const char *buf, size_t length)
4605{
4606 unsigned long order;
4607 int err;
4608
4609 err = strict_strtoul(buf, 10, &order);
4610 if (err)
4611 return err;
4612
4613 if (order > slub_max_order || order < slub_min_order)
4614 return -EINVAL;
4615
4616 calculate_sizes(s, order);
4617 return length;
4618}
4619
4620static ssize_t order_show(struct kmem_cache *s, char *buf)
4621{
4622 return sprintf(buf, "%d\n", oo_order(s->oo));
4623}
4624SLAB_ATTR(order);
4625
4626static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4627{
4628 return sprintf(buf, "%lu\n", s->min_partial);
4629}
4630
4631static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4632 size_t length)
4633{
4634 unsigned long min;
4635 int err;
4636
4637 err = strict_strtoul(buf, 10, &min);
4638 if (err)
4639 return err;
4640
4641 set_min_partial(s, min);
4642 return length;
4643}
4644SLAB_ATTR(min_partial);
4645
4646static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4647{
4648 return sprintf(buf, "%u\n", s->cpu_partial);
4649}
4650
4651static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4652 size_t length)
4653{
4654 unsigned long objects;
4655 int err;
4656
4657 err = strict_strtoul(buf, 10, &objects);
4658 if (err)
4659 return err;
4660 if (objects && kmem_cache_debug(s))
4661 return -EINVAL;
4662
4663 s->cpu_partial = objects;
4664 flush_all(s);
4665 return length;
4666}
4667SLAB_ATTR(cpu_partial);
4668
4669static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4670{
4671 if (!s->ctor)
4672 return 0;
4673 return sprintf(buf, "%pS\n", s->ctor);
4674}
4675SLAB_ATTR_RO(ctor);
4676
4677static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4678{
4679 return sprintf(buf, "%d\n", s->refcount - 1);
4680}
4681SLAB_ATTR_RO(aliases);
4682
4683static ssize_t partial_show(struct kmem_cache *s, char *buf)
4684{
4685 return show_slab_objects(s, buf, SO_PARTIAL);
4686}
4687SLAB_ATTR_RO(partial);
4688
4689static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4690{
4691 return show_slab_objects(s, buf, SO_CPU);
4692}
4693SLAB_ATTR_RO(cpu_slabs);
4694
4695static ssize_t objects_show(struct kmem_cache *s, char *buf)
4696{
4697 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4698}
4699SLAB_ATTR_RO(objects);
4700
4701static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4702{
4703 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4704}
4705SLAB_ATTR_RO(objects_partial);
4706
4707static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4708{
4709 int objects = 0;
4710 int pages = 0;
4711 int cpu;
4712 int len;
4713
4714 for_each_online_cpu(cpu) {
4715 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4716
4717 if (page) {
4718 pages += page->pages;
4719 objects += page->pobjects;
4720 }
4721 }
4722
4723 len = sprintf(buf, "%d(%d)", objects, pages);
4724
4725#ifdef CONFIG_SMP
4726 for_each_online_cpu(cpu) {
4727 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4728
4729 if (page && len < PAGE_SIZE - 20)
4730 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4731 page->pobjects, page->pages);
4732 }
4733#endif
4734 return len + sprintf(buf + len, "\n");
4735}
4736SLAB_ATTR_RO(slabs_cpu_partial);
4737
4738static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4739{
4740 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4741}
4742
4743static ssize_t reclaim_account_store(struct kmem_cache *s,
4744 const char *buf, size_t length)
4745{
4746 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4747 if (buf[0] == '1')
4748 s->flags |= SLAB_RECLAIM_ACCOUNT;
4749 return length;
4750}
4751SLAB_ATTR(reclaim_account);
4752
4753static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4754{
4755 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4756}
4757SLAB_ATTR_RO(hwcache_align);
4758
4759#ifdef CONFIG_ZONE_DMA
4760static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4761{
4762 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4763}
4764SLAB_ATTR_RO(cache_dma);
4765#endif
4766
4767static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4768{
4769 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4770}
4771SLAB_ATTR_RO(destroy_by_rcu);
4772
4773static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4774{
4775 return sprintf(buf, "%d\n", s->reserved);
4776}
4777SLAB_ATTR_RO(reserved);
4778
4779#ifdef CONFIG_SLUB_DEBUG
4780static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4781{
4782 return show_slab_objects(s, buf, SO_ALL);
4783}
4784SLAB_ATTR_RO(slabs);
4785
4786static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4787{
4788 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4789}
4790SLAB_ATTR_RO(total_objects);
4791
4792static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4793{
4794 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4795}
4796
4797static ssize_t sanity_checks_store(struct kmem_cache *s,
4798 const char *buf, size_t length)
4799{
4800 s->flags &= ~SLAB_DEBUG_FREE;
4801 if (buf[0] == '1') {
4802 s->flags &= ~__CMPXCHG_DOUBLE;
4803 s->flags |= SLAB_DEBUG_FREE;
4804 }
4805 return length;
4806}
4807SLAB_ATTR(sanity_checks);
4808
4809static ssize_t trace_show(struct kmem_cache *s, char *buf)
4810{
4811 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4812}
4813
4814static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4815 size_t length)
4816{
4817 s->flags &= ~SLAB_TRACE;
4818 if (buf[0] == '1') {
4819 s->flags &= ~__CMPXCHG_DOUBLE;
4820 s->flags |= SLAB_TRACE;
4821 }
4822 return length;
4823}
4824SLAB_ATTR(trace);
4825
4826static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4827{
4828 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4829}
4830
4831static ssize_t red_zone_store(struct kmem_cache *s,
4832 const char *buf, size_t length)
4833{
4834 if (any_slab_objects(s))
4835 return -EBUSY;
4836
4837 s->flags &= ~SLAB_RED_ZONE;
4838 if (buf[0] == '1') {
4839 s->flags &= ~__CMPXCHG_DOUBLE;
4840 s->flags |= SLAB_RED_ZONE;
4841 }
4842 calculate_sizes(s, -1);
4843 return length;
4844}
4845SLAB_ATTR(red_zone);
4846
4847static ssize_t poison_show(struct kmem_cache *s, char *buf)
4848{
4849 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4850}
4851
4852static ssize_t poison_store(struct kmem_cache *s,
4853 const char *buf, size_t length)
4854{
4855 if (any_slab_objects(s))
4856 return -EBUSY;
4857
4858 s->flags &= ~SLAB_POISON;
4859 if (buf[0] == '1') {
4860 s->flags &= ~__CMPXCHG_DOUBLE;
4861 s->flags |= SLAB_POISON;
4862 }
4863 calculate_sizes(s, -1);
4864 return length;
4865}
4866SLAB_ATTR(poison);
4867
4868static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4869{
4870 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4871}
4872
4873static ssize_t store_user_store(struct kmem_cache *s,
4874 const char *buf, size_t length)
4875{
4876 if (any_slab_objects(s))
4877 return -EBUSY;
4878
4879 s->flags &= ~SLAB_STORE_USER;
4880 if (buf[0] == '1') {
4881 s->flags &= ~__CMPXCHG_DOUBLE;
4882 s->flags |= SLAB_STORE_USER;
4883 }
4884 calculate_sizes(s, -1);
4885 return length;
4886}
4887SLAB_ATTR(store_user);
4888
4889static ssize_t validate_show(struct kmem_cache *s, char *buf)
4890{
4891 return 0;
4892}
4893
4894static ssize_t validate_store(struct kmem_cache *s,
4895 const char *buf, size_t length)
4896{
4897 int ret = -EINVAL;
4898
4899 if (buf[0] == '1') {
4900 ret = validate_slab_cache(s);
4901 if (ret >= 0)
4902 ret = length;
4903 }
4904 return ret;
4905}
4906SLAB_ATTR(validate);
4907
4908static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4909{
4910 if (!(s->flags & SLAB_STORE_USER))
4911 return -ENOSYS;
4912 return list_locations(s, buf, TRACK_ALLOC);
4913}
4914SLAB_ATTR_RO(alloc_calls);
4915
4916static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4917{
4918 if (!(s->flags & SLAB_STORE_USER))
4919 return -ENOSYS;
4920 return list_locations(s, buf, TRACK_FREE);
4921}
4922SLAB_ATTR_RO(free_calls);
4923#endif
4924
4925#ifdef CONFIG_FAILSLAB
4926static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4927{
4928 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4929}
4930
4931static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4932 size_t length)
4933{
4934 s->flags &= ~SLAB_FAILSLAB;
4935 if (buf[0] == '1')
4936 s->flags |= SLAB_FAILSLAB;
4937 return length;
4938}
4939SLAB_ATTR(failslab);
4940#endif
4941
4942static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4943{
4944 return 0;
4945}
4946
4947static ssize_t shrink_store(struct kmem_cache *s,
4948 const char *buf, size_t length)
4949{
4950 if (buf[0] == '1') {
4951 int rc = kmem_cache_shrink(s);
4952
4953 if (rc)
4954 return rc;
4955 } else
4956 return -EINVAL;
4957 return length;
4958}
4959SLAB_ATTR(shrink);
4960
4961#ifdef CONFIG_NUMA
4962static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4963{
4964 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4965}
4966
4967static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4968 const char *buf, size_t length)
4969{
4970 unsigned long ratio;
4971 int err;
4972
4973 err = strict_strtoul(buf, 10, &ratio);
4974 if (err)
4975 return err;
4976
4977 if (ratio <= 100)
4978 s->remote_node_defrag_ratio = ratio * 10;
4979
4980 return length;
4981}
4982SLAB_ATTR(remote_node_defrag_ratio);
4983#endif
4984
4985#ifdef CONFIG_SLUB_STATS
4986static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4987{
4988 unsigned long sum = 0;
4989 int cpu;
4990 int len;
4991 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4992
4993 if (!data)
4994 return -ENOMEM;
4995
4996 for_each_online_cpu(cpu) {
4997 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4998
4999 data[cpu] = x;
5000 sum += x;
5001 }
5002
5003 len = sprintf(buf, "%lu", sum);
5004
5005#ifdef CONFIG_SMP
5006 for_each_online_cpu(cpu) {
5007 if (data[cpu] && len < PAGE_SIZE - 20)
5008 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5009 }
5010#endif
5011 kfree(data);
5012 return len + sprintf(buf + len, "\n");
5013}
5014
5015static void clear_stat(struct kmem_cache *s, enum stat_item si)
5016{
5017 int cpu;
5018
5019 for_each_online_cpu(cpu)
5020 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5021}
5022
5023#define STAT_ATTR(si, text) \
5024static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5025{ \
5026 return show_stat(s, buf, si); \
5027} \
5028static ssize_t text##_store(struct kmem_cache *s, \
5029 const char *buf, size_t length) \
5030{ \
5031 if (buf[0] != '0') \
5032 return -EINVAL; \
5033 clear_stat(s, si); \
5034 return length; \
5035} \
5036SLAB_ATTR(text); \
5037
5038STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5039STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5040STAT_ATTR(FREE_FASTPATH, free_fastpath);
5041STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5042STAT_ATTR(FREE_FROZEN, free_frozen);
5043STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5044STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5045STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5046STAT_ATTR(ALLOC_SLAB, alloc_slab);
5047STAT_ATTR(ALLOC_REFILL, alloc_refill);
5048STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5049STAT_ATTR(FREE_SLAB, free_slab);
5050STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5051STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5052STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5053STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5054STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5055STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5056STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5057STAT_ATTR(ORDER_FALLBACK, order_fallback);
5058STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5059STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5060STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5061STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5062#endif
5063
5064static struct attribute *slab_attrs[] = {
5065 &slab_size_attr.attr,
5066 &object_size_attr.attr,
5067 &objs_per_slab_attr.attr,
5068 &order_attr.attr,
5069 &min_partial_attr.attr,
5070 &cpu_partial_attr.attr,
5071 &objects_attr.attr,
5072 &objects_partial_attr.attr,
5073 &partial_attr.attr,
5074 &cpu_slabs_attr.attr,
5075 &ctor_attr.attr,
5076 &aliases_attr.attr,
5077 &align_attr.attr,
5078 &hwcache_align_attr.attr,
5079 &reclaim_account_attr.attr,
5080 &destroy_by_rcu_attr.attr,
5081 &shrink_attr.attr,
5082 &reserved_attr.attr,
5083 &slabs_cpu_partial_attr.attr,
5084#ifdef CONFIG_SLUB_DEBUG
5085 &total_objects_attr.attr,
5086 &slabs_attr.attr,
5087 &sanity_checks_attr.attr,
5088 &trace_attr.attr,
5089 &red_zone_attr.attr,
5090 &poison_attr.attr,
5091 &store_user_attr.attr,
5092 &validate_attr.attr,
5093 &alloc_calls_attr.attr,
5094 &free_calls_attr.attr,
5095#endif
5096#ifdef CONFIG_ZONE_DMA
5097 &cache_dma_attr.attr,
5098#endif
5099#ifdef CONFIG_NUMA
5100 &remote_node_defrag_ratio_attr.attr,
5101#endif
5102#ifdef CONFIG_SLUB_STATS
5103 &alloc_fastpath_attr.attr,
5104 &alloc_slowpath_attr.attr,
5105 &free_fastpath_attr.attr,
5106 &free_slowpath_attr.attr,
5107 &free_frozen_attr.attr,
5108 &free_add_partial_attr.attr,
5109 &free_remove_partial_attr.attr,
5110 &alloc_from_partial_attr.attr,
5111 &alloc_slab_attr.attr,
5112 &alloc_refill_attr.attr,
5113 &alloc_node_mismatch_attr.attr,
5114 &free_slab_attr.attr,
5115 &cpuslab_flush_attr.attr,
5116 &deactivate_full_attr.attr,
5117 &deactivate_empty_attr.attr,
5118 &deactivate_to_head_attr.attr,
5119 &deactivate_to_tail_attr.attr,
5120 &deactivate_remote_frees_attr.attr,
5121 &deactivate_bypass_attr.attr,
5122 &order_fallback_attr.attr,
5123 &cmpxchg_double_fail_attr.attr,
5124 &cmpxchg_double_cpu_fail_attr.attr,
5125 &cpu_partial_alloc_attr.attr,
5126 &cpu_partial_free_attr.attr,
5127#endif
5128#ifdef CONFIG_FAILSLAB
5129 &failslab_attr.attr,
5130#endif
5131
5132 NULL
5133};
5134
5135static struct attribute_group slab_attr_group = {
5136 .attrs = slab_attrs,
5137};
5138
5139static ssize_t slab_attr_show(struct kobject *kobj,
5140 struct attribute *attr,
5141 char *buf)
5142{
5143 struct slab_attribute *attribute;
5144 struct kmem_cache *s;
5145 int err;
5146
5147 attribute = to_slab_attr(attr);
5148 s = to_slab(kobj);
5149
5150 if (!attribute->show)
5151 return -EIO;
5152
5153 err = attribute->show(s, buf);
5154
5155 return err;
5156}
5157
5158static ssize_t slab_attr_store(struct kobject *kobj,
5159 struct attribute *attr,
5160 const char *buf, size_t len)
5161{
5162 struct slab_attribute *attribute;
5163 struct kmem_cache *s;
5164 int err;
5165
5166 attribute = to_slab_attr(attr);
5167 s = to_slab(kobj);
5168
5169 if (!attribute->store)
5170 return -EIO;
5171
5172 err = attribute->store(s, buf, len);
5173
5174 return err;
5175}
5176
5177static void kmem_cache_release(struct kobject *kobj)
5178{
5179 struct kmem_cache *s = to_slab(kobj);
5180
5181 kfree(s->name);
5182 kfree(s);
5183}
5184
5185static const struct sysfs_ops slab_sysfs_ops = {
5186 .show = slab_attr_show,
5187 .store = slab_attr_store,
5188};
5189
5190static struct kobj_type slab_ktype = {
5191 .sysfs_ops = &slab_sysfs_ops,
5192 .release = kmem_cache_release
5193};
5194
5195static int uevent_filter(struct kset *kset, struct kobject *kobj)
5196{
5197 struct kobj_type *ktype = get_ktype(kobj);
5198
5199 if (ktype == &slab_ktype)
5200 return 1;
5201 return 0;
5202}
5203
5204static const struct kset_uevent_ops slab_uevent_ops = {
5205 .filter = uevent_filter,
5206};
5207
5208static struct kset *slab_kset;
5209
5210#define ID_STR_LENGTH 64
5211
5212
5213
5214
5215
5216static char *create_unique_id(struct kmem_cache *s)
5217{
5218 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5219 char *p = name;
5220
5221 BUG_ON(!name);
5222
5223 *p++ = ':';
5224
5225
5226
5227
5228
5229
5230
5231 if (s->flags & SLAB_CACHE_DMA)
5232 *p++ = 'd';
5233 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5234 *p++ = 'a';
5235 if (s->flags & SLAB_DEBUG_FREE)
5236 *p++ = 'F';
5237 if (!(s->flags & SLAB_NOTRACK))
5238 *p++ = 't';
5239 if (p != name + 1)
5240 *p++ = '-';
5241 p += sprintf(p, "%07d", s->size);
5242 BUG_ON(p > name + ID_STR_LENGTH - 1);
5243 return name;
5244}
5245
5246static int sysfs_slab_add(struct kmem_cache *s)
5247{
5248 int err;
5249 const char *name;
5250 int unmergeable;
5251
5252 if (slab_state < SYSFS)
5253
5254 return 0;
5255
5256 unmergeable = slab_unmergeable(s);
5257 if (unmergeable) {
5258
5259
5260
5261
5262
5263 sysfs_remove_link(&slab_kset->kobj, s->name);
5264 name = s->name;
5265 } else {
5266
5267
5268
5269
5270 name = create_unique_id(s);
5271 }
5272
5273 s->kobj.kset = slab_kset;
5274 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5275 if (err) {
5276 kobject_put(&s->kobj);
5277 return err;
5278 }
5279
5280 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5281 if (err) {
5282 kobject_del(&s->kobj);
5283 kobject_put(&s->kobj);
5284 return err;
5285 }
5286 kobject_uevent(&s->kobj, KOBJ_ADD);
5287 if (!unmergeable) {
5288
5289 sysfs_slab_alias(s, s->name);
5290 kfree(name);
5291 }
5292 return 0;
5293}
5294
5295static void sysfs_slab_remove(struct kmem_cache *s)
5296{
5297 if (slab_state < SYSFS)
5298
5299
5300
5301
5302 return;
5303
5304 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5305 kobject_del(&s->kobj);
5306 kobject_put(&s->kobj);
5307}
5308
5309
5310
5311
5312
5313struct saved_alias {
5314 struct kmem_cache *s;
5315 const char *name;
5316 struct saved_alias *next;
5317};
5318
5319static struct saved_alias *alias_list;
5320
5321static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5322{
5323 struct saved_alias *al;
5324
5325 if (slab_state == SYSFS) {
5326
5327
5328
5329 sysfs_remove_link(&slab_kset->kobj, name);
5330 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5331 }
5332
5333 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5334 if (!al)
5335 return -ENOMEM;
5336
5337 al->s = s;
5338 al->name = name;
5339 al->next = alias_list;
5340 alias_list = al;
5341 return 0;
5342}
5343
5344static int __init slab_sysfs_init(void)
5345{
5346 struct kmem_cache *s;
5347 int err;
5348
5349 down_write(&slub_lock);
5350
5351 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5352 if (!slab_kset) {
5353 up_write(&slub_lock);
5354 printk(KERN_ERR "Cannot register slab subsystem.\n");
5355 return -ENOSYS;
5356 }
5357
5358 slab_state = SYSFS;
5359
5360 list_for_each_entry(s, &slab_caches, list) {
5361 err = sysfs_slab_add(s);
5362 if (err)
5363 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5364 " to sysfs\n", s->name);
5365 }
5366
5367 while (alias_list) {
5368 struct saved_alias *al = alias_list;
5369
5370 alias_list = alias_list->next;
5371 err = sysfs_slab_alias(al->s, al->name);
5372 if (err)
5373 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5374 " %s to sysfs\n", s->name);
5375 kfree(al);
5376 }
5377
5378 up_write(&slub_lock);
5379 resiliency_test();
5380 return 0;
5381}
5382
5383__initcall(slab_sysfs_init);
5384#endif
5385
5386
5387
5388
5389#ifdef CONFIG_SLABINFO
5390static void print_slabinfo_header(struct seq_file *m)
5391{
5392 seq_puts(m, "slabinfo - version: 2.1\n");
5393 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
5394 "<objperslab> <pagesperslab>");
5395 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
5396 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
5397 seq_putc(m, '\n');
5398}
5399
5400static void *s_start(struct seq_file *m, loff_t *pos)
5401{
5402 loff_t n = *pos;
5403
5404 down_read(&slub_lock);
5405 if (!n)
5406 print_slabinfo_header(m);
5407
5408 return seq_list_start(&slab_caches, *pos);
5409}
5410
5411static void *s_next(struct seq_file *m, void *p, loff_t *pos)
5412{
5413 return seq_list_next(p, &slab_caches, pos);
5414}
5415
5416static void s_stop(struct seq_file *m, void *p)
5417{
5418 up_read(&slub_lock);
5419}
5420
5421static int s_show(struct seq_file *m, void *p)
5422{
5423 unsigned long nr_partials = 0;
5424 unsigned long nr_slabs = 0;
5425 unsigned long nr_inuse = 0;
5426 unsigned long nr_objs = 0;
5427 unsigned long nr_free = 0;
5428 struct kmem_cache *s;
5429 int node;
5430
5431 s = list_entry(p, struct kmem_cache, list);
5432
5433 for_each_online_node(node) {
5434 struct kmem_cache_node *n = get_node(s, node);
5435
5436 if (!n)
5437 continue;
5438
5439 nr_partials += n->nr_partial;
5440 nr_slabs += atomic_long_read(&n->nr_slabs);
5441 nr_objs += atomic_long_read(&n->total_objects);
5442 nr_free += count_partial(n, count_free);
5443 }
5444
5445 nr_inuse = nr_objs - nr_free;
5446
5447 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
5448 nr_objs, s->size, oo_objects(s->oo),
5449 (1 << oo_order(s->oo)));
5450 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
5451 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
5452 0UL);
5453 seq_putc(m, '\n');
5454 return 0;
5455}
5456
5457static const struct seq_operations slabinfo_op = {
5458 .start = s_start,
5459 .next = s_next,
5460 .stop = s_stop,
5461 .show = s_show,
5462};
5463
5464static int slabinfo_open(struct inode *inode, struct file *file)
5465{
5466 return seq_open(file, &slabinfo_op);
5467}
5468
5469static const struct file_operations proc_slabinfo_operations = {
5470 .open = slabinfo_open,
5471 .read = seq_read,
5472 .llseek = seq_lseek,
5473 .release = seq_release,
5474};
5475
5476static int __init slab_proc_init(void)
5477{
5478 proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
5479 return 0;
5480}
5481module_init(slab_proc_init);
5482#endif
5483