1
2
3
4
5
6
7
8
9
10
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/module.h>
14#include <linux/bit_spinlock.h>
15#include <linux/interrupt.h>
16#include <linux/bitops.h>
17#include <linux/slab.h>
18#include <linux/proc_fs.h>
19#include <linux/seq_file.h>
20#include <linux/kmemtrace.h>
21#include <linux/kmemcheck.h>
22#include <linux/cpu.h>
23#include <linux/cpuset.h>
24#include <linux/mempolicy.h>
25#include <linux/ctype.h>
26#include <linux/debugobjects.h>
27#include <linux/kallsyms.h>
28#include <linux/memory.h>
29#include <linux/math64.h>
30#include <linux/fault-inject.h>
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110#ifdef CONFIG_SLUB_DEBUG
111#define SLABDEBUG 1
112#else
113#define SLABDEBUG 0
114#endif
115
116
117
118
119
120
121
122
123
124
125#undef SLUB_RESILIENCY_TEST
126
127
128
129
130
131#define MIN_PARTIAL 5
132
133
134
135
136
137
138#define MAX_PARTIAL 10
139
140#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
141 SLAB_POISON | SLAB_STORE_USER)
142
143
144
145
146
147
148#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
149
150
151
152
153#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
154 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
155
156#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
157 SLAB_CACHE_DMA | SLAB_NOTRACK)
158
159#ifndef ARCH_KMALLOC_MINALIGN
160#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
161#endif
162
163#ifndef ARCH_SLAB_MINALIGN
164#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
165#endif
166
167#define OO_SHIFT 16
168#define OO_MASK ((1 << OO_SHIFT) - 1)
169#define MAX_OBJS_PER_PAGE 65535
170
171
172#define __OBJECT_POISON 0x80000000
173#define __SYSFS_ADD_DEFERRED 0x40000000
174
175static int kmem_size = sizeof(struct kmem_cache);
176
177#ifdef CONFIG_SMP
178static struct notifier_block slab_notifier;
179#endif
180
181static enum {
182 DOWN,
183 PARTIAL,
184 UP,
185 SYSFS
186} slab_state = DOWN;
187
188
189static DECLARE_RWSEM(slub_lock);
190static LIST_HEAD(slab_caches);
191
192
193
194
195struct track {
196 unsigned long addr;
197 int cpu;
198 int pid;
199 unsigned long when;
200};
201
202enum track_item { TRACK_ALLOC, TRACK_FREE };
203
204#ifdef CONFIG_SLUB_DEBUG
205static int sysfs_slab_add(struct kmem_cache *);
206static int sysfs_slab_alias(struct kmem_cache *, const char *);
207static void sysfs_slab_remove(struct kmem_cache *);
208
209#else
210static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
211static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
212 { return 0; }
213static inline void sysfs_slab_remove(struct kmem_cache *s)
214{
215 kfree(s);
216}
217
218#endif
219
220static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
221{
222#ifdef CONFIG_SLUB_STATS
223 c->stat[si]++;
224#endif
225}
226
227
228
229
230
231int slab_is_available(void)
232{
233 return slab_state >= UP;
234}
235
236static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
237{
238#ifdef CONFIG_NUMA
239 return s->node[node];
240#else
241 return &s->local_node;
242#endif
243}
244
245static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
246{
247#ifdef CONFIG_SMP
248 return s->cpu_slab[cpu];
249#else
250 return &s->cpu_slab;
251#endif
252}
253
254
255static inline int check_valid_pointer(struct kmem_cache *s,
256 struct page *page, const void *object)
257{
258 void *base;
259
260 if (!object)
261 return 1;
262
263 base = page_address(page);
264 if (object < base || object >= base + page->objects * s->size ||
265 (object - base) % s->size) {
266 return 0;
267 }
268
269 return 1;
270}
271
272
273
274
275
276
277
278
279static inline void *get_freepointer(struct kmem_cache *s, void *object)
280{
281 return *(void **)(object + s->offset);
282}
283
284static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
285{
286 *(void **)(object + s->offset) = fp;
287}
288
289
290#define for_each_object(__p, __s, __addr, __objects) \
291 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
292 __p += (__s)->size)
293
294
295#define for_each_free_object(__p, __s, __free) \
296 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
297
298
299static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
300{
301 return (p - addr) / s->size;
302}
303
304static inline struct kmem_cache_order_objects oo_make(int order,
305 unsigned long size)
306{
307 struct kmem_cache_order_objects x = {
308 (order << OO_SHIFT) + (PAGE_SIZE << order) / size
309 };
310
311 return x;
312}
313
314static inline int oo_order(struct kmem_cache_order_objects x)
315{
316 return x.x >> OO_SHIFT;
317}
318
319static inline int oo_objects(struct kmem_cache_order_objects x)
320{
321 return x.x & OO_MASK;
322}
323
324#ifdef CONFIG_SLUB_DEBUG
325
326
327
328#ifdef CONFIG_SLUB_DEBUG_ON
329static int slub_debug = DEBUG_DEFAULT_FLAGS;
330#else
331static int slub_debug;
332#endif
333
334static char *slub_debug_slabs;
335static int disable_higher_order_debug;
336
337
338
339
340static void print_section(char *text, u8 *addr, unsigned int length)
341{
342 int i, offset;
343 int newline = 1;
344 char ascii[17];
345
346 ascii[16] = 0;
347
348 for (i = 0; i < length; i++) {
349 if (newline) {
350 printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
351 newline = 0;
352 }
353 printk(KERN_CONT " %02x", addr[i]);
354 offset = i % 16;
355 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
356 if (offset == 15) {
357 printk(KERN_CONT " %s\n", ascii);
358 newline = 1;
359 }
360 }
361 if (!newline) {
362 i %= 16;
363 while (i < 16) {
364 printk(KERN_CONT " ");
365 ascii[i] = ' ';
366 i++;
367 }
368 printk(KERN_CONT " %s\n", ascii);
369 }
370}
371
372static struct track *get_track(struct kmem_cache *s, void *object,
373 enum track_item alloc)
374{
375 struct track *p;
376
377 if (s->offset)
378 p = object + s->offset + sizeof(void *);
379 else
380 p = object + s->inuse;
381
382 return p + alloc;
383}
384
385static void set_track(struct kmem_cache *s, void *object,
386 enum track_item alloc, unsigned long addr)
387{
388 struct track *p = get_track(s, object, alloc);
389
390 if (addr) {
391 p->addr = addr;
392 p->cpu = smp_processor_id();
393 p->pid = current->pid;
394 p->when = jiffies;
395 } else
396 memset(p, 0, sizeof(struct track));
397}
398
399static void init_tracking(struct kmem_cache *s, void *object)
400{
401 if (!(s->flags & SLAB_STORE_USER))
402 return;
403
404 set_track(s, object, TRACK_FREE, 0UL);
405 set_track(s, object, TRACK_ALLOC, 0UL);
406}
407
408static void print_track(const char *s, struct track *t)
409{
410 if (!t->addr)
411 return;
412
413 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
414 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
415}
416
417static void print_tracking(struct kmem_cache *s, void *object)
418{
419 if (!(s->flags & SLAB_STORE_USER))
420 return;
421
422 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
423 print_track("Freed", get_track(s, object, TRACK_FREE));
424}
425
426static void print_page_info(struct page *page)
427{
428 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
429 page, page->objects, page->inuse, page->freelist, page->flags);
430
431}
432
433static void slab_bug(struct kmem_cache *s, char *fmt, ...)
434{
435 va_list args;
436 char buf[100];
437
438 va_start(args, fmt);
439 vsnprintf(buf, sizeof(buf), fmt, args);
440 va_end(args);
441 printk(KERN_ERR "========================================"
442 "=====================================\n");
443 printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
444 printk(KERN_ERR "----------------------------------------"
445 "-------------------------------------\n\n");
446}
447
448static void slab_fix(struct kmem_cache *s, char *fmt, ...)
449{
450 va_list args;
451 char buf[100];
452
453 va_start(args, fmt);
454 vsnprintf(buf, sizeof(buf), fmt, args);
455 va_end(args);
456 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
457}
458
459static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
460{
461 unsigned int off;
462 u8 *addr = page_address(page);
463
464 print_tracking(s, p);
465
466 print_page_info(page);
467
468 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
469 p, p - addr, get_freepointer(s, p));
470
471 if (p > addr + 16)
472 print_section("Bytes b4", p - 16, 16);
473
474 print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));
475
476 if (s->flags & SLAB_RED_ZONE)
477 print_section("Redzone", p + s->objsize,
478 s->inuse - s->objsize);
479
480 if (s->offset)
481 off = s->offset + sizeof(void *);
482 else
483 off = s->inuse;
484
485 if (s->flags & SLAB_STORE_USER)
486 off += 2 * sizeof(struct track);
487
488 if (off != s->size)
489
490 print_section("Padding", p + off, s->size - off);
491
492 dump_stack();
493}
494
495static void object_err(struct kmem_cache *s, struct page *page,
496 u8 *object, char *reason)
497{
498 slab_bug(s, "%s", reason);
499 print_trailer(s, page, object);
500}
501
502static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
503{
504 va_list args;
505 char buf[100];
506
507 va_start(args, fmt);
508 vsnprintf(buf, sizeof(buf), fmt, args);
509 va_end(args);
510 slab_bug(s, "%s", buf);
511 print_page_info(page);
512 dump_stack();
513}
514
515static void init_object(struct kmem_cache *s, void *object, int active)
516{
517 u8 *p = object;
518
519 if (s->flags & __OBJECT_POISON) {
520 memset(p, POISON_FREE, s->objsize - 1);
521 p[s->objsize - 1] = POISON_END;
522 }
523
524 if (s->flags & SLAB_RED_ZONE)
525 memset(p + s->objsize,
526 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
527 s->inuse - s->objsize);
528}
529
530static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
531{
532 while (bytes) {
533 if (*start != (u8)value)
534 return start;
535 start++;
536 bytes--;
537 }
538 return NULL;
539}
540
541static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
542 void *from, void *to)
543{
544 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
545 memset(from, data, to - from);
546}
547
548static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
549 u8 *object, char *what,
550 u8 *start, unsigned int value, unsigned int bytes)
551{
552 u8 *fault;
553 u8 *end;
554
555 fault = check_bytes(start, value, bytes);
556 if (!fault)
557 return 1;
558
559 end = start + bytes;
560 while (end > fault && end[-1] == value)
561 end--;
562
563 slab_bug(s, "%s overwritten", what);
564 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
565 fault, end - 1, fault[0], value);
566 print_trailer(s, page, object);
567
568 restore_bytes(s, what, value, fault, end);
569 return 0;
570}
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
611{
612 unsigned long off = s->inuse;
613
614 if (s->offset)
615
616 off += sizeof(void *);
617
618 if (s->flags & SLAB_STORE_USER)
619
620 off += 2 * sizeof(struct track);
621
622 if (s->size == off)
623 return 1;
624
625 return check_bytes_and_report(s, page, p, "Object padding",
626 p + off, POISON_INUSE, s->size - off);
627}
628
629
630static int slab_pad_check(struct kmem_cache *s, struct page *page)
631{
632 u8 *start;
633 u8 *fault;
634 u8 *end;
635 int length;
636 int remainder;
637
638 if (!(s->flags & SLAB_POISON))
639 return 1;
640
641 start = page_address(page);
642 length = (PAGE_SIZE << compound_order(page));
643 end = start + length;
644 remainder = length % s->size;
645 if (!remainder)
646 return 1;
647
648 fault = check_bytes(end - remainder, POISON_INUSE, remainder);
649 if (!fault)
650 return 1;
651 while (end > fault && end[-1] == POISON_INUSE)
652 end--;
653
654 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
655 print_section("Padding", end - remainder, remainder);
656
657 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
658 return 0;
659}
660
661static int check_object(struct kmem_cache *s, struct page *page,
662 void *object, int active)
663{
664 u8 *p = object;
665 u8 *endobject = object + s->objsize;
666
667 if (s->flags & SLAB_RED_ZONE) {
668 unsigned int red =
669 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;
670
671 if (!check_bytes_and_report(s, page, object, "Redzone",
672 endobject, red, s->inuse - s->objsize))
673 return 0;
674 } else {
675 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
676 check_bytes_and_report(s, page, p, "Alignment padding",
677 endobject, POISON_INUSE, s->inuse - s->objsize);
678 }
679 }
680
681 if (s->flags & SLAB_POISON) {
682 if (!active && (s->flags & __OBJECT_POISON) &&
683 (!check_bytes_and_report(s, page, p, "Poison", p,
684 POISON_FREE, s->objsize - 1) ||
685 !check_bytes_and_report(s, page, p, "Poison",
686 p + s->objsize - 1, POISON_END, 1)))
687 return 0;
688
689
690
691 check_pad_bytes(s, page, p);
692 }
693
694 if (!s->offset && active)
695
696
697
698
699 return 1;
700
701
702 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
703 object_err(s, page, p, "Freepointer corrupt");
704
705
706
707
708
709 set_freepointer(s, p, NULL);
710 return 0;
711 }
712 return 1;
713}
714
715static int check_slab(struct kmem_cache *s, struct page *page)
716{
717 int maxobj;
718
719 VM_BUG_ON(!irqs_disabled());
720
721 if (!PageSlab(page)) {
722 slab_err(s, page, "Not a valid slab page");
723 return 0;
724 }
725
726 maxobj = (PAGE_SIZE << compound_order(page)) / s->size;
727 if (page->objects > maxobj) {
728 slab_err(s, page, "objects %u > max %u",
729 s->name, page->objects, maxobj);
730 return 0;
731 }
732 if (page->inuse > page->objects) {
733 slab_err(s, page, "inuse %u > max %u",
734 s->name, page->inuse, page->objects);
735 return 0;
736 }
737
738 slab_pad_check(s, page);
739 return 1;
740}
741
742
743
744
745
746static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
747{
748 int nr = 0;
749 void *fp = page->freelist;
750 void *object = NULL;
751 unsigned long max_objects;
752
753 while (fp && nr <= page->objects) {
754 if (fp == search)
755 return 1;
756 if (!check_valid_pointer(s, page, fp)) {
757 if (object) {
758 object_err(s, page, object,
759 "Freechain corrupt");
760 set_freepointer(s, object, NULL);
761 break;
762 } else {
763 slab_err(s, page, "Freepointer corrupt");
764 page->freelist = NULL;
765 page->inuse = page->objects;
766 slab_fix(s, "Freelist cleared");
767 return 0;
768 }
769 break;
770 }
771 object = fp;
772 fp = get_freepointer(s, object);
773 nr++;
774 }
775
776 max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
777 if (max_objects > MAX_OBJS_PER_PAGE)
778 max_objects = MAX_OBJS_PER_PAGE;
779
780 if (page->objects != max_objects) {
781 slab_err(s, page, "Wrong number of objects. Found %d but "
782 "should be %d", page->objects, max_objects);
783 page->objects = max_objects;
784 slab_fix(s, "Number of objects adjusted.");
785 }
786 if (page->inuse != page->objects - nr) {
787 slab_err(s, page, "Wrong object count. Counter is %d but "
788 "counted were %d", page->inuse, page->objects - nr);
789 page->inuse = page->objects - nr;
790 slab_fix(s, "Object count adjusted.");
791 }
792 return search == NULL;
793}
794
795static void trace(struct kmem_cache *s, struct page *page, void *object,
796 int alloc)
797{
798 if (s->flags & SLAB_TRACE) {
799 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
800 s->name,
801 alloc ? "alloc" : "free",
802 object, page->inuse,
803 page->freelist);
804
805 if (!alloc)
806 print_section("Object", (void *)object, s->objsize);
807
808 dump_stack();
809 }
810}
811
812
813
814
815static void add_full(struct kmem_cache_node *n, struct page *page)
816{
817 spin_lock(&n->list_lock);
818 list_add(&page->lru, &n->full);
819 spin_unlock(&n->list_lock);
820}
821
822static void remove_full(struct kmem_cache *s, struct page *page)
823{
824 struct kmem_cache_node *n;
825
826 if (!(s->flags & SLAB_STORE_USER))
827 return;
828
829 n = get_node(s, page_to_nid(page));
830
831 spin_lock(&n->list_lock);
832 list_del(&page->lru);
833 spin_unlock(&n->list_lock);
834}
835
836
837static inline unsigned long slabs_node(struct kmem_cache *s, int node)
838{
839 struct kmem_cache_node *n = get_node(s, node);
840
841 return atomic_long_read(&n->nr_slabs);
842}
843
844static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
845{
846 return atomic_long_read(&n->nr_slabs);
847}
848
849static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
850{
851 struct kmem_cache_node *n = get_node(s, node);
852
853
854
855
856
857
858
859 if (!NUMA_BUILD || n) {
860 atomic_long_inc(&n->nr_slabs);
861 atomic_long_add(objects, &n->total_objects);
862 }
863}
864static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
865{
866 struct kmem_cache_node *n = get_node(s, node);
867
868 atomic_long_dec(&n->nr_slabs);
869 atomic_long_sub(objects, &n->total_objects);
870}
871
872
873static void setup_object_debug(struct kmem_cache *s, struct page *page,
874 void *object)
875{
876 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
877 return;
878
879 init_object(s, object, 0);
880 init_tracking(s, object);
881}
882
883static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
884 void *object, unsigned long addr)
885{
886 if (!check_slab(s, page))
887 goto bad;
888
889 if (!on_freelist(s, page, object)) {
890 object_err(s, page, object, "Object already allocated");
891 goto bad;
892 }
893
894 if (!check_valid_pointer(s, page, object)) {
895 object_err(s, page, object, "Freelist Pointer check fails");
896 goto bad;
897 }
898
899 if (!check_object(s, page, object, 0))
900 goto bad;
901
902
903 if (s->flags & SLAB_STORE_USER)
904 set_track(s, object, TRACK_ALLOC, addr);
905 trace(s, page, object, 1);
906 init_object(s, object, 1);
907 return 1;
908
909bad:
910 if (PageSlab(page)) {
911
912
913
914
915
916 slab_fix(s, "Marking all objects used");
917 page->inuse = page->objects;
918 page->freelist = NULL;
919 }
920 return 0;
921}
922
923static int free_debug_processing(struct kmem_cache *s, struct page *page,
924 void *object, unsigned long addr)
925{
926 if (!check_slab(s, page))
927 goto fail;
928
929 if (!check_valid_pointer(s, page, object)) {
930 slab_err(s, page, "Invalid object pointer 0x%p", object);
931 goto fail;
932 }
933
934 if (on_freelist(s, page, object)) {
935 object_err(s, page, object, "Object already free");
936 goto fail;
937 }
938
939 if (!check_object(s, page, object, 1))
940 return 0;
941
942 if (unlikely(s != page->slab)) {
943 if (!PageSlab(page)) {
944 slab_err(s, page, "Attempt to free object(0x%p) "
945 "outside of slab", object);
946 } else if (!page->slab) {
947 printk(KERN_ERR
948 "SLUB <none>: no slab for object 0x%p.\n",
949 object);
950 dump_stack();
951 } else
952 object_err(s, page, object,
953 "page slab pointer corrupt.");
954 goto fail;
955 }
956
957
958 if (!PageSlubFrozen(page) && !page->freelist)
959 remove_full(s, page);
960 if (s->flags & SLAB_STORE_USER)
961 set_track(s, object, TRACK_FREE, addr);
962 trace(s, page, object, 0);
963 init_object(s, object, 0);
964 return 1;
965
966fail:
967 slab_fix(s, "Object at 0x%p not freed", object);
968 return 0;
969}
970
971static int __init setup_slub_debug(char *str)
972{
973 slub_debug = DEBUG_DEFAULT_FLAGS;
974 if (*str++ != '=' || !*str)
975
976
977
978 goto out;
979
980 if (*str == ',')
981
982
983
984
985 goto check_slabs;
986
987 if (tolower(*str) == 'o') {
988
989
990
991
992 disable_higher_order_debug = 1;
993 goto out;
994 }
995
996 slub_debug = 0;
997 if (*str == '-')
998
999
1000
1001 goto out;
1002
1003
1004
1005
1006 for (; *str && *str != ','; str++) {
1007 switch (tolower(*str)) {
1008 case 'f':
1009 slub_debug |= SLAB_DEBUG_FREE;
1010 break;
1011 case 'z':
1012 slub_debug |= SLAB_RED_ZONE;
1013 break;
1014 case 'p':
1015 slub_debug |= SLAB_POISON;
1016 break;
1017 case 'u':
1018 slub_debug |= SLAB_STORE_USER;
1019 break;
1020 case 't':
1021 slub_debug |= SLAB_TRACE;
1022 break;
1023 default:
1024 printk(KERN_ERR "slub_debug option '%c' "
1025 "unknown. skipped\n", *str);
1026 }
1027 }
1028
1029check_slabs:
1030 if (*str == ',')
1031 slub_debug_slabs = str + 1;
1032out:
1033 return 1;
1034}
1035
1036__setup("slub_debug", setup_slub_debug);
1037
1038static unsigned long kmem_cache_flags(unsigned long objsize,
1039 unsigned long flags, const char *name,
1040 void (*ctor)(void *))
1041{
1042
1043
1044
1045 if (slub_debug && (!slub_debug_slabs ||
1046 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))
1047 flags |= slub_debug;
1048
1049 return flags;
1050}
1051#else
1052static inline void setup_object_debug(struct kmem_cache *s,
1053 struct page *page, void *object) {}
1054
1055static inline int alloc_debug_processing(struct kmem_cache *s,
1056 struct page *page, void *object, unsigned long addr) { return 0; }
1057
1058static inline int free_debug_processing(struct kmem_cache *s,
1059 struct page *page, void *object, unsigned long addr) { return 0; }
1060
1061static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1062 { return 1; }
1063static inline int check_object(struct kmem_cache *s, struct page *page,
1064 void *object, int active) { return 1; }
1065static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1066static inline unsigned long kmem_cache_flags(unsigned long objsize,
1067 unsigned long flags, const char *name,
1068 void (*ctor)(void *))
1069{
1070 return flags;
1071}
1072#define slub_debug 0
1073
1074#define disable_higher_order_debug 0
1075
1076static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1077 { return 0; }
1078static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1079 { return 0; }
1080static inline void inc_slabs_node(struct kmem_cache *s, int node,
1081 int objects) {}
1082static inline void dec_slabs_node(struct kmem_cache *s, int node,
1083 int objects) {}
1084#endif
1085
1086
1087
1088
1089static inline struct page *alloc_slab_page(gfp_t flags, int node,
1090 struct kmem_cache_order_objects oo)
1091{
1092 int order = oo_order(oo);
1093
1094 flags |= __GFP_NOTRACK;
1095
1096 if (node == -1)
1097 return alloc_pages(flags, order);
1098 else
1099 return alloc_pages_node(node, flags, order);
1100}
1101
1102static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1103{
1104 struct page *page;
1105 struct kmem_cache_order_objects oo = s->oo;
1106 gfp_t alloc_gfp;
1107
1108 flags |= s->allocflags;
1109
1110
1111
1112
1113
1114 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1115
1116 page = alloc_slab_page(alloc_gfp, node, oo);
1117 if (unlikely(!page)) {
1118 oo = s->min;
1119
1120
1121
1122
1123 page = alloc_slab_page(flags, node, oo);
1124 if (!page)
1125 return NULL;
1126
1127 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
1128 }
1129
1130 if (kmemcheck_enabled
1131 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1132 int pages = 1 << oo_order(oo);
1133
1134 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1135
1136
1137
1138
1139
1140 if (s->ctor)
1141 kmemcheck_mark_uninitialized_pages(page, pages);
1142 else
1143 kmemcheck_mark_unallocated_pages(page, pages);
1144 }
1145
1146 page->objects = oo_objects(oo);
1147 mod_zone_page_state(page_zone(page),
1148 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1149 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1150 1 << oo_order(oo));
1151
1152 return page;
1153}
1154
1155static void setup_object(struct kmem_cache *s, struct page *page,
1156 void *object)
1157{
1158 setup_object_debug(s, page, object);
1159 if (unlikely(s->ctor))
1160 s->ctor(object);
1161}
1162
1163static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1164{
1165 struct page *page;
1166 void *start;
1167 void *last;
1168 void *p;
1169
1170 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1171
1172 page = allocate_slab(s,
1173 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1174 if (!page)
1175 goto out;
1176
1177 inc_slabs_node(s, page_to_nid(page), page->objects);
1178 page->slab = s;
1179 page->flags |= 1 << PG_slab;
1180 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
1181 SLAB_STORE_USER | SLAB_TRACE))
1182 __SetPageSlubDebug(page);
1183
1184 start = page_address(page);
1185
1186 if (unlikely(s->flags & SLAB_POISON))
1187 memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
1188
1189 last = start;
1190 for_each_object(p, s, start, page->objects) {
1191 setup_object(s, page, last);
1192 set_freepointer(s, last, p);
1193 last = p;
1194 }
1195 setup_object(s, page, last);
1196 set_freepointer(s, last, NULL);
1197
1198 page->freelist = start;
1199 page->inuse = 0;
1200out:
1201 return page;
1202}
1203
1204static void __free_slab(struct kmem_cache *s, struct page *page)
1205{
1206 int order = compound_order(page);
1207 int pages = 1 << order;
1208
1209 if (unlikely(SLABDEBUG && PageSlubDebug(page))) {
1210 void *p;
1211
1212 slab_pad_check(s, page);
1213 for_each_object(p, s, page_address(page),
1214 page->objects)
1215 check_object(s, page, p, 0);
1216 __ClearPageSlubDebug(page);
1217 }
1218
1219 kmemcheck_free_shadow(page, compound_order(page));
1220
1221 mod_zone_page_state(page_zone(page),
1222 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1223 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1224 -pages);
1225
1226 __ClearPageSlab(page);
1227 reset_page_mapcount(page);
1228 if (current->reclaim_state)
1229 current->reclaim_state->reclaimed_slab += pages;
1230 __free_pages(page, order);
1231}
1232
1233static void rcu_free_slab(struct rcu_head *h)
1234{
1235 struct page *page;
1236
1237 page = container_of((struct list_head *)h, struct page, lru);
1238 __free_slab(page->slab, page);
1239}
1240
1241static void free_slab(struct kmem_cache *s, struct page *page)
1242{
1243 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1244
1245
1246
1247 struct rcu_head *head = (void *)&page->lru;
1248
1249 call_rcu(head, rcu_free_slab);
1250 } else
1251 __free_slab(s, page);
1252}
1253
1254static void discard_slab(struct kmem_cache *s, struct page *page)
1255{
1256 dec_slabs_node(s, page_to_nid(page), page->objects);
1257 free_slab(s, page);
1258}
1259
1260
1261
1262
1263static __always_inline void slab_lock(struct page *page)
1264{
1265 bit_spin_lock(PG_locked, &page->flags);
1266}
1267
1268static __always_inline void slab_unlock(struct page *page)
1269{
1270 __bit_spin_unlock(PG_locked, &page->flags);
1271}
1272
1273static __always_inline int slab_trylock(struct page *page)
1274{
1275 int rc = 1;
1276
1277 rc = bit_spin_trylock(PG_locked, &page->flags);
1278 return rc;
1279}
1280
1281
1282
1283
1284static void add_partial(struct kmem_cache_node *n,
1285 struct page *page, int tail)
1286{
1287 spin_lock(&n->list_lock);
1288 n->nr_partial++;
1289 if (tail)
1290 list_add_tail(&page->lru, &n->partial);
1291 else
1292 list_add(&page->lru, &n->partial);
1293 spin_unlock(&n->list_lock);
1294}
1295
1296static void remove_partial(struct kmem_cache *s, struct page *page)
1297{
1298 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1299
1300 spin_lock(&n->list_lock);
1301 list_del(&page->lru);
1302 n->nr_partial--;
1303 spin_unlock(&n->list_lock);
1304}
1305
1306
1307
1308
1309
1310
1311static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
1312 struct page *page)
1313{
1314 if (slab_trylock(page)) {
1315 list_del(&page->lru);
1316 n->nr_partial--;
1317 __SetPageSlubFrozen(page);
1318 return 1;
1319 }
1320 return 0;
1321}
1322
1323
1324
1325
1326static struct page *get_partial_node(struct kmem_cache_node *n)
1327{
1328 struct page *page;
1329
1330
1331
1332
1333
1334
1335
1336 if (!n || !n->nr_partial)
1337 return NULL;
1338
1339 spin_lock(&n->list_lock);
1340 list_for_each_entry(page, &n->partial, lru)
1341 if (lock_and_freeze_slab(n, page))
1342 goto out;
1343 page = NULL;
1344out:
1345 spin_unlock(&n->list_lock);
1346 return page;
1347}
1348
1349
1350
1351
1352static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1353{
1354#ifdef CONFIG_NUMA
1355 struct zonelist *zonelist;
1356 struct zoneref *z;
1357 struct zone *zone;
1358 enum zone_type high_zoneidx = gfp_zone(flags);
1359 struct page *page;
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379 if (!s->remote_node_defrag_ratio ||
1380 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1381 return NULL;
1382
1383 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
1384 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1385 struct kmem_cache_node *n;
1386
1387 n = get_node(s, zone_to_nid(zone));
1388
1389 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1390 n->nr_partial > s->min_partial) {
1391 page = get_partial_node(n);
1392 if (page)
1393 return page;
1394 }
1395 }
1396#endif
1397 return NULL;
1398}
1399
1400
1401
1402
1403static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1404{
1405 struct page *page;
1406 int searchnode = (node == -1) ? numa_node_id() : node;
1407
1408 page = get_partial_node(get_node(s, searchnode));
1409 if (page || (flags & __GFP_THISNODE))
1410 return page;
1411
1412 return get_any_partial(s, flags);
1413}
1414
1415
1416
1417
1418
1419
1420
1421
1422static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1423{
1424 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1425 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
1426
1427 __ClearPageSlubFrozen(page);
1428 if (page->inuse) {
1429
1430 if (page->freelist) {
1431 add_partial(n, page, tail);
1432 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1433 } else {
1434 stat(c, DEACTIVATE_FULL);
1435 if (SLABDEBUG && PageSlubDebug(page) &&
1436 (s->flags & SLAB_STORE_USER))
1437 add_full(n, page);
1438 }
1439 slab_unlock(page);
1440 } else {
1441 stat(c, DEACTIVATE_EMPTY);
1442 if (n->nr_partial < s->min_partial) {
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453 add_partial(n, page, 1);
1454 slab_unlock(page);
1455 } else {
1456 slab_unlock(page);
1457 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
1458 discard_slab(s, page);
1459 }
1460 }
1461}
1462
1463
1464
1465
1466static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1467{
1468 struct page *page = c->page;
1469 int tail = 1;
1470
1471 if (page->freelist)
1472 stat(c, DEACTIVATE_REMOTE_FREES);
1473
1474
1475
1476
1477
1478 while (unlikely(c->freelist)) {
1479 void **object;
1480
1481 tail = 0;
1482
1483
1484 object = c->freelist;
1485 c->freelist = c->freelist[c->offset];
1486
1487
1488 object[c->offset] = page->freelist;
1489 page->freelist = object;
1490 page->inuse--;
1491 }
1492 c->page = NULL;
1493 unfreeze_slab(s, page, tail);
1494}
1495
1496static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1497{
1498 stat(c, CPUSLAB_FLUSH);
1499 slab_lock(c->page);
1500 deactivate_slab(s, c);
1501}
1502
1503
1504
1505
1506
1507
1508static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1509{
1510 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1511
1512 if (likely(c && c->page))
1513 flush_slab(s, c);
1514}
1515
1516static void flush_cpu_slab(void *d)
1517{
1518 struct kmem_cache *s = d;
1519
1520 __flush_cpu_slab(s, smp_processor_id());
1521}
1522
1523static void flush_all(struct kmem_cache *s)
1524{
1525 on_each_cpu(flush_cpu_slab, s, 1);
1526}
1527
1528
1529
1530
1531
1532static inline int node_match(struct kmem_cache_cpu *c, int node)
1533{
1534#ifdef CONFIG_NUMA
1535 if (node != -1 && c->node != node)
1536 return 0;
1537#endif
1538 return 1;
1539}
1540
1541static int count_free(struct page *page)
1542{
1543 return page->objects - page->inuse;
1544}
1545
1546static unsigned long count_partial(struct kmem_cache_node *n,
1547 int (*get_count)(struct page *))
1548{
1549 unsigned long flags;
1550 unsigned long x = 0;
1551 struct page *page;
1552
1553 spin_lock_irqsave(&n->list_lock, flags);
1554 list_for_each_entry(page, &n->partial, lru)
1555 x += get_count(page);
1556 spin_unlock_irqrestore(&n->list_lock, flags);
1557 return x;
1558}
1559
1560static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
1561{
1562#ifdef CONFIG_SLUB_DEBUG
1563 return atomic_long_read(&n->total_objects);
1564#else
1565 return 0;
1566#endif
1567}
1568
1569static noinline void
1570slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
1571{
1572 int node;
1573
1574 printk(KERN_WARNING
1575 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1576 nid, gfpflags);
1577 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
1578 "default order: %d, min order: %d\n", s->name, s->objsize,
1579 s->size, oo_order(s->oo), oo_order(s->min));
1580
1581 if (oo_order(s->min) > get_order(s->objsize))
1582 printk(KERN_WARNING " %s debugging increased min order, use "
1583 "slub_debug=O to disable.\n", s->name);
1584
1585 for_each_online_node(node) {
1586 struct kmem_cache_node *n = get_node(s, node);
1587 unsigned long nr_slabs;
1588 unsigned long nr_objs;
1589 unsigned long nr_free;
1590
1591 if (!n)
1592 continue;
1593
1594 nr_free = count_partial(n, count_free);
1595 nr_slabs = node_nr_slabs(n);
1596 nr_objs = node_nr_objs(n);
1597
1598 printk(KERN_WARNING
1599 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
1600 node, nr_slabs, nr_objs, nr_free);
1601 }
1602}
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1623 unsigned long addr, struct kmem_cache_cpu *c)
1624{
1625 void **object;
1626 struct page *new;
1627
1628
1629 gfpflags &= ~__GFP_ZERO;
1630
1631 if (!c->page)
1632 goto new_slab;
1633
1634 slab_lock(c->page);
1635 if (unlikely(!node_match(c, node)))
1636 goto another_slab;
1637
1638 stat(c, ALLOC_REFILL);
1639
1640load_freelist:
1641 object = c->page->freelist;
1642 if (unlikely(!object))
1643 goto another_slab;
1644 if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
1645 goto debug;
1646
1647 c->freelist = object[c->offset];
1648 c->page->inuse = c->page->objects;
1649 c->page->freelist = NULL;
1650 c->node = page_to_nid(c->page);
1651unlock_out:
1652 slab_unlock(c->page);
1653 stat(c, ALLOC_SLOWPATH);
1654 return object;
1655
1656another_slab:
1657 deactivate_slab(s, c);
1658
1659new_slab:
1660 new = get_partial(s, gfpflags, node);
1661 if (new) {
1662 c->page = new;
1663 stat(c, ALLOC_FROM_PARTIAL);
1664 goto load_freelist;
1665 }
1666
1667 if (gfpflags & __GFP_WAIT)
1668 local_irq_enable();
1669
1670 new = new_slab(s, gfpflags, node);
1671
1672 if (gfpflags & __GFP_WAIT)
1673 local_irq_disable();
1674
1675 if (new) {
1676 c = get_cpu_slab(s, smp_processor_id());
1677 stat(c, ALLOC_SLAB);
1678 if (c->page)
1679 flush_slab(s, c);
1680 slab_lock(new);
1681 __SetPageSlubFrozen(new);
1682 c->page = new;
1683 goto load_freelist;
1684 }
1685 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
1686 slab_out_of_memory(s, gfpflags, node);
1687 return NULL;
1688debug:
1689 if (!alloc_debug_processing(s, c->page, object, addr))
1690 goto another_slab;
1691
1692 c->page->inuse++;
1693 c->page->freelist = object[c->offset];
1694 c->node = -1;
1695 goto unlock_out;
1696}
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708static __always_inline void *slab_alloc(struct kmem_cache *s,
1709 gfp_t gfpflags, int node, unsigned long addr)
1710{
1711 void **object;
1712 struct kmem_cache_cpu *c;
1713 unsigned long flags;
1714 unsigned int objsize;
1715
1716 gfpflags &= gfp_allowed_mask;
1717
1718 lockdep_trace_alloc(gfpflags);
1719 might_sleep_if(gfpflags & __GFP_WAIT);
1720
1721 if (should_failslab(s->objsize, gfpflags))
1722 return NULL;
1723
1724 local_irq_save(flags);
1725 c = get_cpu_slab(s, smp_processor_id());
1726 objsize = c->objsize;
1727 if (unlikely(!c->freelist || !node_match(c, node)))
1728
1729 object = __slab_alloc(s, gfpflags, node, addr, c);
1730
1731 else {
1732 object = c->freelist;
1733 c->freelist = object[c->offset];
1734 stat(c, ALLOC_FASTPATH);
1735 }
1736 local_irq_restore(flags);
1737
1738 if (unlikely((gfpflags & __GFP_ZERO) && object))
1739 memset(object, 0, objsize);
1740
1741 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
1742 kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
1743
1744 return object;
1745}
1746
1747void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1748{
1749 void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
1750
1751 trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
1752
1753 return ret;
1754}
1755EXPORT_SYMBOL(kmem_cache_alloc);
1756
1757#ifdef CONFIG_KMEMTRACE
1758void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1759{
1760 return slab_alloc(s, gfpflags, -1, _RET_IP_);
1761}
1762EXPORT_SYMBOL(kmem_cache_alloc_notrace);
1763#endif
1764
1765#ifdef CONFIG_NUMA
1766void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1767{
1768 void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
1769
1770 trace_kmem_cache_alloc_node(_RET_IP_, ret,
1771 s->objsize, s->size, gfpflags, node);
1772
1773 return ret;
1774}
1775EXPORT_SYMBOL(kmem_cache_alloc_node);
1776#endif
1777
1778#ifdef CONFIG_KMEMTRACE
1779void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1780 gfp_t gfpflags,
1781 int node)
1782{
1783 return slab_alloc(s, gfpflags, node, _RET_IP_);
1784}
1785EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1786#endif
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796static void __slab_free(struct kmem_cache *s, struct page *page,
1797 void *x, unsigned long addr, unsigned int offset)
1798{
1799 void *prior;
1800 void **object = (void *)x;
1801 struct kmem_cache_cpu *c;
1802
1803 c = get_cpu_slab(s, raw_smp_processor_id());
1804 stat(c, FREE_SLOWPATH);
1805 slab_lock(page);
1806
1807 if (unlikely(SLABDEBUG && PageSlubDebug(page)))
1808 goto debug;
1809
1810checks_ok:
1811 prior = object[offset] = page->freelist;
1812 page->freelist = object;
1813 page->inuse--;
1814
1815 if (unlikely(PageSlubFrozen(page))) {
1816 stat(c, FREE_FROZEN);
1817 goto out_unlock;
1818 }
1819
1820 if (unlikely(!page->inuse))
1821 goto slab_empty;
1822
1823
1824
1825
1826
1827 if (unlikely(!prior)) {
1828 add_partial(get_node(s, page_to_nid(page)), page, 1);
1829 stat(c, FREE_ADD_PARTIAL);
1830 }
1831
1832out_unlock:
1833 slab_unlock(page);
1834 return;
1835
1836slab_empty:
1837 if (prior) {
1838
1839
1840
1841 remove_partial(s, page);
1842 stat(c, FREE_REMOVE_PARTIAL);
1843 }
1844 slab_unlock(page);
1845 stat(c, FREE_SLAB);
1846 discard_slab(s, page);
1847 return;
1848
1849debug:
1850 if (!free_debug_processing(s, page, x, addr))
1851 goto out_unlock;
1852 goto checks_ok;
1853}
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866static __always_inline void slab_free(struct kmem_cache *s,
1867 struct page *page, void *x, unsigned long addr)
1868{
1869 void **object = (void *)x;
1870 struct kmem_cache_cpu *c;
1871 unsigned long flags;
1872
1873 kmemleak_free_recursive(x, s->flags);
1874 local_irq_save(flags);
1875 c = get_cpu_slab(s, smp_processor_id());
1876 kmemcheck_slab_free(s, object, c->objsize);
1877 debug_check_no_locks_freed(object, c->objsize);
1878 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1879 debug_check_no_obj_freed(object, c->objsize);
1880 if (likely(page == c->page && c->node >= 0)) {
1881 object[c->offset] = c->freelist;
1882 c->freelist = object;
1883 stat(c, FREE_FASTPATH);
1884 } else
1885 __slab_free(s, page, x, addr, c->offset);
1886
1887 local_irq_restore(flags);
1888}
1889
1890void kmem_cache_free(struct kmem_cache *s, void *x)
1891{
1892 struct page *page;
1893
1894 page = virt_to_head_page(x);
1895
1896 slab_free(s, page, x, _RET_IP_);
1897
1898 trace_kmem_cache_free(_RET_IP_, x);
1899}
1900EXPORT_SYMBOL(kmem_cache_free);
1901
1902
1903static struct page *get_object_page(const void *x)
1904{
1905 struct page *page = virt_to_head_page(x);
1906
1907 if (!PageSlab(page))
1908 return NULL;
1909
1910 return page;
1911}
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932static int slub_min_order;
1933static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
1934static int slub_min_objects;
1935
1936
1937
1938
1939
1940static int slub_nomerge;
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967static inline int slab_order(int size, int min_objects,
1968 int max_order, int fract_leftover)
1969{
1970 int order;
1971 int rem;
1972 int min_order = slub_min_order;
1973
1974 if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
1975 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
1976
1977 for (order = max(min_order,
1978 fls(min_objects * size - 1) - PAGE_SHIFT);
1979 order <= max_order; order++) {
1980
1981 unsigned long slab_size = PAGE_SIZE << order;
1982
1983 if (slab_size < min_objects * size)
1984 continue;
1985
1986 rem = slab_size % size;
1987
1988 if (rem <= slab_size / fract_leftover)
1989 break;
1990
1991 }
1992
1993 return order;
1994}
1995
1996static inline int calculate_order(int size)
1997{
1998 int order;
1999 int min_objects;
2000 int fraction;
2001 int max_objects;
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011 min_objects = slub_min_objects;
2012 if (!min_objects)
2013 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2014 max_objects = (PAGE_SIZE << slub_max_order)/size;
2015 min_objects = min(min_objects, max_objects);
2016
2017 while (min_objects > 1) {
2018 fraction = 16;
2019 while (fraction >= 4) {
2020 order = slab_order(size, min_objects,
2021 slub_max_order, fraction);
2022 if (order <= slub_max_order)
2023 return order;
2024 fraction /= 2;
2025 }
2026 min_objects--;
2027 }
2028
2029
2030
2031
2032
2033 order = slab_order(size, 1, slub_max_order, 1);
2034 if (order <= slub_max_order)
2035 return order;
2036
2037
2038
2039
2040 order = slab_order(size, 1, MAX_ORDER, 1);
2041 if (order < MAX_ORDER)
2042 return order;
2043 return -ENOSYS;
2044}
2045
2046
2047
2048
2049static unsigned long calculate_alignment(unsigned long flags,
2050 unsigned long align, unsigned long size)
2051{
2052
2053
2054
2055
2056
2057
2058
2059 if (flags & SLAB_HWCACHE_ALIGN) {
2060 unsigned long ralign = cache_line_size();
2061 while (size <= ralign / 2)
2062 ralign /= 2;
2063 align = max(align, ralign);
2064 }
2065
2066 if (align < ARCH_SLAB_MINALIGN)
2067 align = ARCH_SLAB_MINALIGN;
2068
2069 return ALIGN(align, sizeof(void *));
2070}
2071
2072static void init_kmem_cache_cpu(struct kmem_cache *s,
2073 struct kmem_cache_cpu *c)
2074{
2075 c->page = NULL;
2076 c->freelist = NULL;
2077 c->node = 0;
2078 c->offset = s->offset / sizeof(void *);
2079 c->objsize = s->objsize;
2080#ifdef CONFIG_SLUB_STATS
2081 memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
2082#endif
2083}
2084
2085static void
2086init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2087{
2088 n->nr_partial = 0;
2089 spin_lock_init(&n->list_lock);
2090 INIT_LIST_HEAD(&n->partial);
2091#ifdef CONFIG_SLUB_DEBUG
2092 atomic_long_set(&n->nr_slabs, 0);
2093 atomic_long_set(&n->total_objects, 0);
2094 INIT_LIST_HEAD(&n->full);
2095#endif
2096}
2097
2098#ifdef CONFIG_SMP
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114#define NR_KMEM_CACHE_CPU 100
2115
2116static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
2117 kmem_cache_cpu);
2118
2119static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
2120static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
2121
2122static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
2123 int cpu, gfp_t flags)
2124{
2125 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
2126
2127 if (c)
2128 per_cpu(kmem_cache_cpu_free, cpu) =
2129 (void *)c->freelist;
2130 else {
2131
2132 c = kmalloc_node(
2133 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
2134 flags, cpu_to_node(cpu));
2135 if (!c)
2136 return NULL;
2137 }
2138
2139 init_kmem_cache_cpu(s, c);
2140 return c;
2141}
2142
2143static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
2144{
2145 if (c < per_cpu(kmem_cache_cpu, cpu) ||
2146 c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2147 kfree(c);
2148 return;
2149 }
2150 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
2151 per_cpu(kmem_cache_cpu_free, cpu) = c;
2152}
2153
2154static void free_kmem_cache_cpus(struct kmem_cache *s)
2155{
2156 int cpu;
2157
2158 for_each_online_cpu(cpu) {
2159 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2160
2161 if (c) {
2162 s->cpu_slab[cpu] = NULL;
2163 free_kmem_cache_cpu(c, cpu);
2164 }
2165 }
2166}
2167
2168static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2169{
2170 int cpu;
2171
2172 for_each_online_cpu(cpu) {
2173 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2174
2175 if (c)
2176 continue;
2177
2178 c = alloc_kmem_cache_cpu(s, cpu, flags);
2179 if (!c) {
2180 free_kmem_cache_cpus(s);
2181 return 0;
2182 }
2183 s->cpu_slab[cpu] = c;
2184 }
2185 return 1;
2186}
2187
2188
2189
2190
2191static void init_alloc_cpu_cpu(int cpu)
2192{
2193 int i;
2194
2195 if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
2196 return;
2197
2198 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
2199 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
2200
2201 cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
2202}
2203
2204static void __init init_alloc_cpu(void)
2205{
2206 int cpu;
2207
2208 for_each_online_cpu(cpu)
2209 init_alloc_cpu_cpu(cpu);
2210 }
2211
2212#else
2213static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
2214static inline void init_alloc_cpu(void) {}
2215
2216static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2217{
2218 init_kmem_cache_cpu(s, &s->cpu_slab);
2219 return 1;
2220}
2221#endif
2222
2223#ifdef CONFIG_NUMA
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
2234{
2235 struct page *page;
2236 struct kmem_cache_node *n;
2237 unsigned long flags;
2238
2239 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
2240
2241 page = new_slab(kmalloc_caches, gfpflags, node);
2242
2243 BUG_ON(!page);
2244 if (page_to_nid(page) != node) {
2245 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2246 "node %d\n", node);
2247 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2248 "in order to be able to continue\n");
2249 }
2250
2251 n = page->freelist;
2252 BUG_ON(!n);
2253 page->freelist = get_freepointer(kmalloc_caches, n);
2254 page->inuse++;
2255 kmalloc_caches->node[node] = n;
2256#ifdef CONFIG_SLUB_DEBUG
2257 init_object(kmalloc_caches, n, 1);
2258 init_tracking(kmalloc_caches, n);
2259#endif
2260 init_kmem_cache_node(n, kmalloc_caches);
2261 inc_slabs_node(kmalloc_caches, node, page->objects);
2262
2263
2264
2265
2266
2267
2268 local_irq_save(flags);
2269 add_partial(n, page, 0);
2270 local_irq_restore(flags);
2271}
2272
2273static void free_kmem_cache_nodes(struct kmem_cache *s)
2274{
2275 int node;
2276
2277 for_each_node_state(node, N_NORMAL_MEMORY) {
2278 struct kmem_cache_node *n = s->node[node];
2279 if (n && n != &s->local_node)
2280 kmem_cache_free(kmalloc_caches, n);
2281 s->node[node] = NULL;
2282 }
2283}
2284
2285static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2286{
2287 int node;
2288 int local_node;
2289
2290 if (slab_state >= UP)
2291 local_node = page_to_nid(virt_to_page(s));
2292 else
2293 local_node = 0;
2294
2295 for_each_node_state(node, N_NORMAL_MEMORY) {
2296 struct kmem_cache_node *n;
2297
2298 if (local_node == node)
2299 n = &s->local_node;
2300 else {
2301 if (slab_state == DOWN) {
2302 early_kmem_cache_node_alloc(gfpflags, node);
2303 continue;
2304 }
2305 n = kmem_cache_alloc_node(kmalloc_caches,
2306 gfpflags, node);
2307
2308 if (!n) {
2309 free_kmem_cache_nodes(s);
2310 return 0;
2311 }
2312
2313 }
2314 s->node[node] = n;
2315 init_kmem_cache_node(n, s);
2316 }
2317 return 1;
2318}
2319#else
2320static void free_kmem_cache_nodes(struct kmem_cache *s)
2321{
2322}
2323
2324static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2325{
2326 init_kmem_cache_node(&s->local_node, s);
2327 return 1;
2328}
2329#endif
2330
2331static void set_min_partial(struct kmem_cache *s, unsigned long min)
2332{
2333 if (min < MIN_PARTIAL)
2334 min = MIN_PARTIAL;
2335 else if (min > MAX_PARTIAL)
2336 min = MAX_PARTIAL;
2337 s->min_partial = min;
2338}
2339
2340
2341
2342
2343
2344static int calculate_sizes(struct kmem_cache *s, int forced_order)
2345{
2346 unsigned long flags = s->flags;
2347 unsigned long size = s->objsize;
2348 unsigned long align = s->align;
2349 int order;
2350
2351
2352
2353
2354
2355
2356 size = ALIGN(size, sizeof(void *));
2357
2358#ifdef CONFIG_SLUB_DEBUG
2359
2360
2361
2362
2363
2364 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2365 !s->ctor)
2366 s->flags |= __OBJECT_POISON;
2367 else
2368 s->flags &= ~__OBJECT_POISON;
2369
2370
2371
2372
2373
2374
2375
2376 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2377 size += sizeof(void *);
2378#endif
2379
2380
2381
2382
2383
2384 s->inuse = size;
2385
2386 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2387 s->ctor)) {
2388
2389
2390
2391
2392
2393
2394
2395
2396 s->offset = size;
2397 size += sizeof(void *);
2398 }
2399
2400#ifdef CONFIG_SLUB_DEBUG
2401 if (flags & SLAB_STORE_USER)
2402
2403
2404
2405
2406 size += 2 * sizeof(struct track);
2407
2408 if (flags & SLAB_RED_ZONE)
2409
2410
2411
2412
2413
2414
2415
2416 size += sizeof(void *);
2417#endif
2418
2419
2420
2421
2422
2423
2424 align = calculate_alignment(flags, align, s->objsize);
2425 s->align = align;
2426
2427
2428
2429
2430
2431
2432 size = ALIGN(size, align);
2433 s->size = size;
2434 if (forced_order >= 0)
2435 order = forced_order;
2436 else
2437 order = calculate_order(size);
2438
2439 if (order < 0)
2440 return 0;
2441
2442 s->allocflags = 0;
2443 if (order)
2444 s->allocflags |= __GFP_COMP;
2445
2446 if (s->flags & SLAB_CACHE_DMA)
2447 s->allocflags |= SLUB_DMA;
2448
2449 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2450 s->allocflags |= __GFP_RECLAIMABLE;
2451
2452
2453
2454
2455 s->oo = oo_make(order, size);
2456 s->min = oo_make(get_order(size), size);
2457 if (oo_objects(s->oo) > oo_objects(s->max))
2458 s->max = s->oo;
2459
2460 return !!oo_objects(s->oo);
2461
2462}
2463
2464static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2465 const char *name, size_t size,
2466 size_t align, unsigned long flags,
2467 void (*ctor)(void *))
2468{
2469 memset(s, 0, kmem_size);
2470 s->name = name;
2471 s->ctor = ctor;
2472 s->objsize = size;
2473 s->align = align;
2474 s->flags = kmem_cache_flags(size, flags, name, ctor);
2475
2476 if (!calculate_sizes(s, -1))
2477 goto error;
2478 if (disable_higher_order_debug) {
2479
2480
2481
2482
2483 if (get_order(s->size) > get_order(s->objsize)) {
2484 s->flags &= ~DEBUG_METADATA_FLAGS;
2485 s->offset = 0;
2486 if (!calculate_sizes(s, -1))
2487 goto error;
2488 }
2489 }
2490
2491
2492
2493
2494
2495 set_min_partial(s, ilog2(s->size));
2496 s->refcount = 1;
2497#ifdef CONFIG_NUMA
2498 s->remote_node_defrag_ratio = 1000;
2499#endif
2500 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2501 goto error;
2502
2503 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2504 return 1;
2505 free_kmem_cache_nodes(s);
2506error:
2507 if (flags & SLAB_PANIC)
2508 panic("Cannot create slab %s size=%lu realsize=%u "
2509 "order=%u offset=%u flags=%lx\n",
2510 s->name, (unsigned long)size, s->size, oo_order(s->oo),
2511 s->offset, flags);
2512 return 0;
2513}
2514
2515
2516
2517
2518int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2519{
2520 struct page *page;
2521
2522 page = get_object_page(object);
2523
2524 if (!page || s != page->slab)
2525
2526 return 0;
2527
2528 if (!check_valid_pointer(s, page, object))
2529 return 0;
2530
2531
2532
2533
2534
2535
2536
2537 return 1;
2538}
2539EXPORT_SYMBOL(kmem_ptr_validate);
2540
2541
2542
2543
2544unsigned int kmem_cache_size(struct kmem_cache *s)
2545{
2546 return s->objsize;
2547}
2548EXPORT_SYMBOL(kmem_cache_size);
2549
2550const char *kmem_cache_name(struct kmem_cache *s)
2551{
2552 return s->name;
2553}
2554EXPORT_SYMBOL(kmem_cache_name);
2555
2556static void list_slab_objects(struct kmem_cache *s, struct page *page,
2557 const char *text)
2558{
2559#ifdef CONFIG_SLUB_DEBUG
2560 void *addr = page_address(page);
2561 void *p;
2562 DECLARE_BITMAP(map, page->objects);
2563
2564 bitmap_zero(map, page->objects);
2565 slab_err(s, page, "%s", text);
2566 slab_lock(page);
2567 for_each_free_object(p, s, page->freelist)
2568 set_bit(slab_index(p, s, addr), map);
2569
2570 for_each_object(p, s, addr, page->objects) {
2571
2572 if (!test_bit(slab_index(p, s, addr), map)) {
2573 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
2574 p, p - addr);
2575 print_tracking(s, p);
2576 }
2577 }
2578 slab_unlock(page);
2579#endif
2580}
2581
2582
2583
2584
2585static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2586{
2587 unsigned long flags;
2588 struct page *page, *h;
2589
2590 spin_lock_irqsave(&n->list_lock, flags);
2591 list_for_each_entry_safe(page, h, &n->partial, lru) {
2592 if (!page->inuse) {
2593 list_del(&page->lru);
2594 discard_slab(s, page);
2595 n->nr_partial--;
2596 } else {
2597 list_slab_objects(s, page,
2598 "Objects remaining on kmem_cache_close()");
2599 }
2600 }
2601 spin_unlock_irqrestore(&n->list_lock, flags);
2602}
2603
2604
2605
2606
2607static inline int kmem_cache_close(struct kmem_cache *s)
2608{
2609 int node;
2610
2611 flush_all(s);
2612
2613
2614 free_kmem_cache_cpus(s);
2615 for_each_node_state(node, N_NORMAL_MEMORY) {
2616 struct kmem_cache_node *n = get_node(s, node);
2617
2618 free_partial(s, n);
2619 if (n->nr_partial || slabs_node(s, node))
2620 return 1;
2621 }
2622 free_kmem_cache_nodes(s);
2623 return 0;
2624}
2625
2626
2627
2628
2629
2630void kmem_cache_destroy(struct kmem_cache *s)
2631{
2632 down_write(&slub_lock);
2633 s->refcount--;
2634 if (!s->refcount) {
2635 list_del(&s->list);
2636 up_write(&slub_lock);
2637 if (kmem_cache_close(s)) {
2638 printk(KERN_ERR "SLUB %s: %s called for cache that "
2639 "still has objects.\n", s->name, __func__);
2640 dump_stack();
2641 }
2642 if (s->flags & SLAB_DESTROY_BY_RCU)
2643 rcu_barrier();
2644 sysfs_slab_remove(s);
2645 } else
2646 up_write(&slub_lock);
2647}
2648EXPORT_SYMBOL(kmem_cache_destroy);
2649
2650
2651
2652
2653
2654struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
2655EXPORT_SYMBOL(kmalloc_caches);
2656
2657static int __init setup_slub_min_order(char *str)
2658{
2659 get_option(&str, &slub_min_order);
2660
2661 return 1;
2662}
2663
2664__setup("slub_min_order=", setup_slub_min_order);
2665
2666static int __init setup_slub_max_order(char *str)
2667{
2668 get_option(&str, &slub_max_order);
2669 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
2670
2671 return 1;
2672}
2673
2674__setup("slub_max_order=", setup_slub_max_order);
2675
2676static int __init setup_slub_min_objects(char *str)
2677{
2678 get_option(&str, &slub_min_objects);
2679
2680 return 1;
2681}
2682
2683__setup("slub_min_objects=", setup_slub_min_objects);
2684
2685static int __init setup_slub_nomerge(char *str)
2686{
2687 slub_nomerge = 1;
2688 return 1;
2689}
2690
2691__setup("slub_nomerge", setup_slub_nomerge);
2692
2693static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2694 const char *name, int size, gfp_t gfp_flags)
2695{
2696 unsigned int flags = 0;
2697
2698 if (gfp_flags & SLUB_DMA)
2699 flags = SLAB_CACHE_DMA;
2700
2701
2702
2703
2704
2705 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2706 flags, NULL))
2707 goto panic;
2708
2709 list_add(&s->list, &slab_caches);
2710
2711 if (sysfs_slab_add(s))
2712 goto panic;
2713 return s;
2714
2715panic:
2716 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
2717}
2718
2719#ifdef CONFIG_ZONE_DMA
2720static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
2721
2722static void sysfs_add_func(struct work_struct *w)
2723{
2724 struct kmem_cache *s;
2725
2726 down_write(&slub_lock);
2727 list_for_each_entry(s, &slab_caches, list) {
2728 if (s->flags & __SYSFS_ADD_DEFERRED) {
2729 s->flags &= ~__SYSFS_ADD_DEFERRED;
2730 sysfs_slab_add(s);
2731 }
2732 }
2733 up_write(&slub_lock);
2734}
2735
2736static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
2737
2738static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2739{
2740 struct kmem_cache *s;
2741 char *text;
2742 size_t realsize;
2743 unsigned long slabflags;
2744
2745 s = kmalloc_caches_dma[index];
2746 if (s)
2747 return s;
2748
2749
2750 if (flags & __GFP_WAIT)
2751 down_write(&slub_lock);
2752 else {
2753 if (!down_write_trylock(&slub_lock))
2754 goto out;
2755 }
2756
2757 if (kmalloc_caches_dma[index])
2758 goto unlock_out;
2759
2760 realsize = kmalloc_caches[index].objsize;
2761 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2762 (unsigned int)realsize);
2763 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2764
2765
2766
2767
2768
2769
2770
2771 slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK;
2772 if (slab_state >= SYSFS)
2773 slabflags |= __SYSFS_ADD_DEFERRED;
2774
2775 if (!s || !text || !kmem_cache_open(s, flags, text,
2776 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
2777 kfree(s);
2778 kfree(text);
2779 goto unlock_out;
2780 }
2781
2782 list_add(&s->list, &slab_caches);
2783 kmalloc_caches_dma[index] = s;
2784
2785 if (slab_state >= SYSFS)
2786 schedule_work(&sysfs_add_work);
2787
2788unlock_out:
2789 up_write(&slub_lock);
2790out:
2791 return kmalloc_caches_dma[index];
2792}
2793#endif
2794
2795
2796
2797
2798
2799
2800
2801static s8 size_index[24] = {
2802 3,
2803 4,
2804 5,
2805 5,
2806 6,
2807 6,
2808 6,
2809 6,
2810 1,
2811 1,
2812 1,
2813 1,
2814 7,
2815 7,
2816 7,
2817 7,
2818 2,
2819 2,
2820 2,
2821 2,
2822 2,
2823 2,
2824 2,
2825 2
2826};
2827
2828static inline int size_index_elem(size_t bytes)
2829{
2830 return (bytes - 1) / 8;
2831}
2832
2833static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2834{
2835 int index;
2836
2837 if (size <= 192) {
2838 if (!size)
2839 return ZERO_SIZE_PTR;
2840
2841 index = size_index[size_index_elem(size)];
2842 } else
2843 index = fls(size - 1);
2844
2845#ifdef CONFIG_ZONE_DMA
2846 if (unlikely((flags & SLUB_DMA)))
2847 return dma_kmalloc_cache(index, flags);
2848
2849#endif
2850 return &kmalloc_caches[index];
2851}
2852
2853void *__kmalloc(size_t size, gfp_t flags)
2854{
2855 struct kmem_cache *s;
2856 void *ret;
2857
2858 if (unlikely(size > SLUB_MAX_SIZE))
2859 return kmalloc_large(size, flags);
2860
2861 s = get_slab(size, flags);
2862
2863 if (unlikely(ZERO_OR_NULL_PTR(s)))
2864 return s;
2865
2866 ret = slab_alloc(s, flags, -1, _RET_IP_);
2867
2868 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
2869
2870 return ret;
2871}
2872EXPORT_SYMBOL(__kmalloc);
2873
2874static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2875{
2876 struct page *page;
2877 void *ptr = NULL;
2878
2879 flags |= __GFP_COMP | __GFP_NOTRACK;
2880 page = alloc_pages_node(node, flags, get_order(size));
2881 if (page)
2882 ptr = page_address(page);
2883
2884 kmemleak_alloc(ptr, size, 1, flags);
2885 return ptr;
2886}
2887
2888#ifdef CONFIG_NUMA
2889void *__kmalloc_node(size_t size, gfp_t flags, int node)
2890{
2891 struct kmem_cache *s;
2892 void *ret;
2893
2894 if (unlikely(size > SLUB_MAX_SIZE)) {
2895 ret = kmalloc_large_node(size, flags, node);
2896
2897 trace_kmalloc_node(_RET_IP_, ret,
2898 size, PAGE_SIZE << get_order(size),
2899 flags, node);
2900
2901 return ret;
2902 }
2903
2904 s = get_slab(size, flags);
2905
2906 if (unlikely(ZERO_OR_NULL_PTR(s)))
2907 return s;
2908
2909 ret = slab_alloc(s, flags, node, _RET_IP_);
2910
2911 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
2912
2913 return ret;
2914}
2915EXPORT_SYMBOL(__kmalloc_node);
2916#endif
2917
2918size_t ksize(const void *object)
2919{
2920 struct page *page;
2921 struct kmem_cache *s;
2922
2923 if (unlikely(object == ZERO_SIZE_PTR))
2924 return 0;
2925
2926 page = virt_to_head_page(object);
2927
2928 if (unlikely(!PageSlab(page))) {
2929 WARN_ON(!PageCompound(page));
2930 return PAGE_SIZE << compound_order(page);
2931 }
2932 s = page->slab;
2933
2934#ifdef CONFIG_SLUB_DEBUG
2935
2936
2937
2938
2939 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2940 return s->objsize;
2941
2942#endif
2943
2944
2945
2946
2947
2948 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2949 return s->inuse;
2950
2951
2952
2953 return s->size;
2954}
2955EXPORT_SYMBOL(ksize);
2956
2957void kfree(const void *x)
2958{
2959 struct page *page;
2960 void *object = (void *)x;
2961
2962 trace_kfree(_RET_IP_, x);
2963
2964 if (unlikely(ZERO_OR_NULL_PTR(x)))
2965 return;
2966
2967 page = virt_to_head_page(x);
2968 if (unlikely(!PageSlab(page))) {
2969 BUG_ON(!PageCompound(page));
2970 kmemleak_free(x);
2971 put_page(page);
2972 return;
2973 }
2974 slab_free(page->slab, page, object, _RET_IP_);
2975}
2976EXPORT_SYMBOL(kfree);
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988int kmem_cache_shrink(struct kmem_cache *s)
2989{
2990 int node;
2991 int i;
2992 struct kmem_cache_node *n;
2993 struct page *page;
2994 struct page *t;
2995 int objects = oo_objects(s->max);
2996 struct list_head *slabs_by_inuse =
2997 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
2998 unsigned long flags;
2999
3000 if (!slabs_by_inuse)
3001 return -ENOMEM;
3002
3003 flush_all(s);
3004 for_each_node_state(node, N_NORMAL_MEMORY) {
3005 n = get_node(s, node);
3006
3007 if (!n->nr_partial)
3008 continue;
3009
3010 for (i = 0; i < objects; i++)
3011 INIT_LIST_HEAD(slabs_by_inuse + i);
3012
3013 spin_lock_irqsave(&n->list_lock, flags);
3014
3015
3016
3017
3018
3019
3020
3021 list_for_each_entry_safe(page, t, &n->partial, lru) {
3022 if (!page->inuse && slab_trylock(page)) {
3023
3024
3025
3026
3027
3028 list_del(&page->lru);
3029 n->nr_partial--;
3030 slab_unlock(page);
3031 discard_slab(s, page);
3032 } else {
3033 list_move(&page->lru,
3034 slabs_by_inuse + page->inuse);
3035 }
3036 }
3037
3038
3039
3040
3041
3042 for (i = objects - 1; i >= 0; i--)
3043 list_splice(slabs_by_inuse + i, n->partial.prev);
3044
3045 spin_unlock_irqrestore(&n->list_lock, flags);
3046 }
3047
3048 kfree(slabs_by_inuse);
3049 return 0;
3050}
3051EXPORT_SYMBOL(kmem_cache_shrink);
3052
3053#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
3054static int slab_mem_going_offline_callback(void *arg)
3055{
3056 struct kmem_cache *s;
3057
3058 down_read(&slub_lock);
3059 list_for_each_entry(s, &slab_caches, list)
3060 kmem_cache_shrink(s);
3061 up_read(&slub_lock);
3062
3063 return 0;
3064}
3065
3066static void slab_mem_offline_callback(void *arg)
3067{
3068 struct kmem_cache_node *n;
3069 struct kmem_cache *s;
3070 struct memory_notify *marg = arg;
3071 int offline_node;
3072
3073 offline_node = marg->status_change_nid;
3074
3075
3076
3077
3078
3079 if (offline_node < 0)
3080 return;
3081
3082 down_read(&slub_lock);
3083 list_for_each_entry(s, &slab_caches, list) {
3084 n = get_node(s, offline_node);
3085 if (n) {
3086
3087
3088
3089
3090
3091
3092 BUG_ON(slabs_node(s, offline_node));
3093
3094 s->node[offline_node] = NULL;
3095 kmem_cache_free(kmalloc_caches, n);
3096 }
3097 }
3098 up_read(&slub_lock);
3099}
3100
3101static int slab_mem_going_online_callback(void *arg)
3102{
3103 struct kmem_cache_node *n;
3104 struct kmem_cache *s;
3105 struct memory_notify *marg = arg;
3106 int nid = marg->status_change_nid;
3107 int ret = 0;
3108
3109
3110
3111
3112
3113 if (nid < 0)
3114 return 0;
3115
3116
3117
3118
3119
3120
3121 down_read(&slub_lock);
3122 list_for_each_entry(s, &slab_caches, list) {
3123
3124
3125
3126
3127
3128 n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL);
3129 if (!n) {
3130 ret = -ENOMEM;
3131 goto out;
3132 }
3133 init_kmem_cache_node(n, s);
3134 s->node[nid] = n;
3135 }
3136out:
3137 up_read(&slub_lock);
3138 return ret;
3139}
3140
3141static int slab_memory_callback(struct notifier_block *self,
3142 unsigned long action, void *arg)
3143{
3144 int ret = 0;
3145
3146 switch (action) {
3147 case MEM_GOING_ONLINE:
3148 ret = slab_mem_going_online_callback(arg);
3149 break;
3150 case MEM_GOING_OFFLINE:
3151 ret = slab_mem_going_offline_callback(arg);
3152 break;
3153 case MEM_OFFLINE:
3154 case MEM_CANCEL_ONLINE:
3155 slab_mem_offline_callback(arg);
3156 break;
3157 case MEM_ONLINE:
3158 case MEM_CANCEL_OFFLINE:
3159 break;
3160 }
3161 if (ret)
3162 ret = notifier_from_errno(ret);
3163 else
3164 ret = NOTIFY_OK;
3165 return ret;
3166}
3167
3168#endif
3169
3170
3171
3172
3173
3174void __init kmem_cache_init(void)
3175{
3176 int i;
3177 int caches = 0;
3178
3179 init_alloc_cpu();
3180
3181#ifdef CONFIG_NUMA
3182
3183
3184
3185
3186
3187 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
3188 sizeof(struct kmem_cache_node), GFP_NOWAIT);
3189 kmalloc_caches[0].refcount = -1;
3190 caches++;
3191
3192 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3193#endif
3194
3195
3196 slab_state = PARTIAL;
3197
3198
3199 if (KMALLOC_MIN_SIZE <= 32) {
3200 create_kmalloc_cache(&kmalloc_caches[1],
3201 "kmalloc-96", 96, GFP_NOWAIT);
3202 caches++;
3203 }
3204 if (KMALLOC_MIN_SIZE <= 64) {
3205 create_kmalloc_cache(&kmalloc_caches[2],
3206 "kmalloc-192", 192, GFP_NOWAIT);
3207 caches++;
3208 }
3209
3210 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3211 create_kmalloc_cache(&kmalloc_caches[i],
3212 "kmalloc", 1 << i, GFP_NOWAIT);
3213 caches++;
3214 }
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3229 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3230
3231 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3232 int elem = size_index_elem(i);
3233 if (elem >= ARRAY_SIZE(size_index))
3234 break;
3235 size_index[elem] = KMALLOC_SHIFT_LOW;
3236 }
3237
3238 if (KMALLOC_MIN_SIZE == 64) {
3239
3240
3241
3242
3243 for (i = 64 + 8; i <= 96; i += 8)
3244 size_index[size_index_elem(i)] = 7;
3245 } else if (KMALLOC_MIN_SIZE == 128) {
3246
3247
3248
3249
3250
3251 for (i = 128 + 8; i <= 192; i += 8)
3252 size_index[size_index_elem(i)] = 8;
3253 }
3254
3255 slab_state = UP;
3256
3257
3258 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
3259 kmalloc_caches[i]. name =
3260 kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3261
3262#ifdef CONFIG_SMP
3263 register_cpu_notifier(&slab_notifier);
3264 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
3265 nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
3266#else
3267 kmem_size = sizeof(struct kmem_cache);
3268#endif
3269
3270 printk(KERN_INFO
3271 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
3272 " CPUs=%d, Nodes=%d\n",
3273 caches, cache_line_size(),
3274 slub_min_order, slub_max_order, slub_min_objects,
3275 nr_cpu_ids, nr_node_ids);
3276}
3277
3278void __init kmem_cache_init_late(void)
3279{
3280}
3281
3282
3283
3284
3285static int slab_unmergeable(struct kmem_cache *s)
3286{
3287 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3288 return 1;
3289
3290 if (s->ctor)
3291 return 1;
3292
3293
3294
3295
3296 if (s->refcount < 0)
3297 return 1;
3298
3299 return 0;
3300}
3301
3302static struct kmem_cache *find_mergeable(size_t size,
3303 size_t align, unsigned long flags, const char *name,
3304 void (*ctor)(void *))
3305{
3306 struct kmem_cache *s;
3307
3308 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3309 return NULL;
3310
3311 if (ctor)
3312 return NULL;
3313
3314 size = ALIGN(size, sizeof(void *));
3315 align = calculate_alignment(flags, align, size);
3316 size = ALIGN(size, align);
3317 flags = kmem_cache_flags(size, flags, name, NULL);
3318
3319 list_for_each_entry(s, &slab_caches, list) {
3320 if (slab_unmergeable(s))
3321 continue;
3322
3323 if (size > s->size)
3324 continue;
3325
3326 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3327 continue;
3328
3329
3330
3331
3332 if ((s->size & ~(align - 1)) != s->size)
3333 continue;
3334
3335 if (s->size - size >= sizeof(void *))
3336 continue;
3337
3338 return s;
3339 }
3340 return NULL;
3341}
3342
3343struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3344 size_t align, unsigned long flags, void (*ctor)(void *))
3345{
3346 struct kmem_cache *s;
3347
3348 if (WARN_ON(!name))
3349 return NULL;
3350
3351 down_write(&slub_lock);
3352 s = find_mergeable(size, align, flags, name, ctor);
3353 if (s) {
3354 int cpu;
3355
3356 s->refcount++;
3357
3358
3359
3360
3361 s->objsize = max(s->objsize, (int)size);
3362
3363
3364
3365
3366
3367 for_each_online_cpu(cpu)
3368 get_cpu_slab(s, cpu)->objsize = s->objsize;
3369
3370 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3371 up_write(&slub_lock);
3372
3373 if (sysfs_slab_alias(s, name)) {
3374 down_write(&slub_lock);
3375 s->refcount--;
3376 up_write(&slub_lock);
3377 goto err;
3378 }
3379 return s;
3380 }
3381
3382 s = kmalloc(kmem_size, GFP_KERNEL);
3383 if (s) {
3384 if (kmem_cache_open(s, GFP_KERNEL, name,
3385 size, align, flags, ctor)) {
3386 list_add(&s->list, &slab_caches);
3387 up_write(&slub_lock);
3388 if (sysfs_slab_add(s)) {
3389 down_write(&slub_lock);
3390 list_del(&s->list);
3391 up_write(&slub_lock);
3392 kfree(s);
3393 goto err;
3394 }
3395 return s;
3396 }
3397 kfree(s);
3398 }
3399 up_write(&slub_lock);
3400
3401err:
3402 if (flags & SLAB_PANIC)
3403 panic("Cannot create slabcache %s\n", name);
3404 else
3405 s = NULL;
3406 return s;
3407}
3408EXPORT_SYMBOL(kmem_cache_create);
3409
3410#ifdef CONFIG_SMP
3411
3412
3413
3414
3415static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3416 unsigned long action, void *hcpu)
3417{
3418 long cpu = (long)hcpu;
3419 struct kmem_cache *s;
3420 unsigned long flags;
3421
3422 switch (action) {
3423 case CPU_UP_PREPARE:
3424 case CPU_UP_PREPARE_FROZEN:
3425 init_alloc_cpu_cpu(cpu);
3426 down_read(&slub_lock);
3427 list_for_each_entry(s, &slab_caches, list)
3428 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
3429 GFP_KERNEL);
3430 up_read(&slub_lock);
3431 break;
3432
3433 case CPU_UP_CANCELED:
3434 case CPU_UP_CANCELED_FROZEN:
3435 case CPU_DEAD:
3436 case CPU_DEAD_FROZEN:
3437 down_read(&slub_lock);
3438 list_for_each_entry(s, &slab_caches, list) {
3439 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3440
3441 local_irq_save(flags);
3442 __flush_cpu_slab(s, cpu);
3443 local_irq_restore(flags);
3444 free_kmem_cache_cpu(c, cpu);
3445 s->cpu_slab[cpu] = NULL;
3446 }
3447 up_read(&slub_lock);
3448 break;
3449 default:
3450 break;
3451 }
3452 return NOTIFY_OK;
3453}
3454
3455static struct notifier_block __cpuinitdata slab_notifier = {
3456 .notifier_call = slab_cpuup_callback
3457};
3458
3459#endif
3460
3461void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3462{
3463 struct kmem_cache *s;
3464 void *ret;
3465
3466 if (unlikely(size > SLUB_MAX_SIZE))
3467 return kmalloc_large(size, gfpflags);
3468
3469 s = get_slab(size, gfpflags);
3470
3471 if (unlikely(ZERO_OR_NULL_PTR(s)))
3472 return s;
3473
3474 ret = slab_alloc(s, gfpflags, -1, caller);
3475
3476
3477 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3478
3479 return ret;
3480}
3481
3482void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3483 int node, unsigned long caller)
3484{
3485 struct kmem_cache *s;
3486 void *ret;
3487
3488 if (unlikely(size > SLUB_MAX_SIZE))
3489 return kmalloc_large_node(size, gfpflags, node);
3490
3491 s = get_slab(size, gfpflags);
3492
3493 if (unlikely(ZERO_OR_NULL_PTR(s)))
3494 return s;
3495
3496 ret = slab_alloc(s, gfpflags, node, caller);
3497
3498
3499 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3500
3501 return ret;
3502}
3503
3504#ifdef CONFIG_SLUB_DEBUG
3505static int count_inuse(struct page *page)
3506{
3507 return page->inuse;
3508}
3509
3510static int count_total(struct page *page)
3511{
3512 return page->objects;
3513}
3514
3515static int validate_slab(struct kmem_cache *s, struct page *page,
3516 unsigned long *map)
3517{
3518 void *p;
3519 void *addr = page_address(page);
3520
3521 if (!check_slab(s, page) ||
3522 !on_freelist(s, page, NULL))
3523 return 0;
3524
3525
3526 bitmap_zero(map, page->objects);
3527
3528 for_each_free_object(p, s, page->freelist) {
3529 set_bit(slab_index(p, s, addr), map);
3530 if (!check_object(s, page, p, 0))
3531 return 0;
3532 }
3533
3534 for_each_object(p, s, addr, page->objects)
3535 if (!test_bit(slab_index(p, s, addr), map))
3536 if (!check_object(s, page, p, 1))
3537 return 0;
3538 return 1;
3539}
3540
3541static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3542 unsigned long *map)
3543{
3544 if (slab_trylock(page)) {
3545 validate_slab(s, page, map);
3546 slab_unlock(page);
3547 } else
3548 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3549 s->name, page);
3550
3551 if (s->flags & DEBUG_DEFAULT_FLAGS) {
3552 if (!PageSlubDebug(page))
3553 printk(KERN_ERR "SLUB %s: SlubDebug not set "
3554 "on slab 0x%p\n", s->name, page);
3555 } else {
3556 if (PageSlubDebug(page))
3557 printk(KERN_ERR "SLUB %s: SlubDebug set on "
3558 "slab 0x%p\n", s->name, page);
3559 }
3560}
3561
3562static int validate_slab_node(struct kmem_cache *s,
3563 struct kmem_cache_node *n, unsigned long *map)
3564{
3565 unsigned long count = 0;
3566 struct page *page;
3567 unsigned long flags;
3568
3569 spin_lock_irqsave(&n->list_lock, flags);
3570
3571 list_for_each_entry(page, &n->partial, lru) {
3572 validate_slab_slab(s, page, map);
3573 count++;
3574 }
3575 if (count != n->nr_partial)
3576 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
3577 "counter=%ld\n", s->name, count, n->nr_partial);
3578
3579 if (!(s->flags & SLAB_STORE_USER))
3580 goto out;
3581
3582 list_for_each_entry(page, &n->full, lru) {
3583 validate_slab_slab(s, page, map);
3584 count++;
3585 }
3586 if (count != atomic_long_read(&n->nr_slabs))
3587 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
3588 "counter=%ld\n", s->name, count,
3589 atomic_long_read(&n->nr_slabs));
3590
3591out:
3592 spin_unlock_irqrestore(&n->list_lock, flags);
3593 return count;
3594}
3595
3596static long validate_slab_cache(struct kmem_cache *s)
3597{
3598 int node;
3599 unsigned long count = 0;
3600 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3601 sizeof(unsigned long), GFP_KERNEL);
3602
3603 if (!map)
3604 return -ENOMEM;
3605
3606 flush_all(s);
3607 for_each_node_state(node, N_NORMAL_MEMORY) {
3608 struct kmem_cache_node *n = get_node(s, node);
3609
3610 count += validate_slab_node(s, n, map);
3611 }
3612 kfree(map);
3613 return count;
3614}
3615
3616#ifdef SLUB_RESILIENCY_TEST
3617static void resiliency_test(void)
3618{
3619 u8 *p;
3620
3621 printk(KERN_ERR "SLUB resiliency testing\n");
3622 printk(KERN_ERR "-----------------------\n");
3623 printk(KERN_ERR "A. Corruption after allocation\n");
3624
3625 p = kzalloc(16, GFP_KERNEL);
3626 p[16] = 0x12;
3627 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
3628 " 0x12->0x%p\n\n", p + 16);
3629
3630 validate_slab_cache(kmalloc_caches + 4);
3631
3632
3633 p = kzalloc(32, GFP_KERNEL);
3634 p[32 + sizeof(void *)] = 0x34;
3635 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3636 " 0x34 -> -0x%p\n", p);
3637 printk(KERN_ERR
3638 "If allocated object is overwritten then not detectable\n\n");
3639
3640 validate_slab_cache(kmalloc_caches + 5);
3641 p = kzalloc(64, GFP_KERNEL);
3642 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
3643 *p = 0x56;
3644 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3645 p);
3646 printk(KERN_ERR
3647 "If allocated object is overwritten then not detectable\n\n");
3648 validate_slab_cache(kmalloc_caches + 6);
3649
3650 printk(KERN_ERR "\nB. Corruption after free\n");
3651 p = kzalloc(128, GFP_KERNEL);
3652 kfree(p);
3653 *p = 0x78;
3654 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
3655 validate_slab_cache(kmalloc_caches + 7);
3656
3657 p = kzalloc(256, GFP_KERNEL);
3658 kfree(p);
3659 p[50] = 0x9a;
3660 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3661 p);
3662 validate_slab_cache(kmalloc_caches + 8);
3663
3664 p = kzalloc(512, GFP_KERNEL);
3665 kfree(p);
3666 p[512] = 0xab;
3667 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
3668 validate_slab_cache(kmalloc_caches + 9);
3669}
3670#else
3671static void resiliency_test(void) {};
3672#endif
3673
3674
3675
3676
3677
3678
3679struct location {
3680 unsigned long count;
3681 unsigned long addr;
3682 long long sum_time;
3683 long min_time;
3684 long max_time;
3685 long min_pid;
3686 long max_pid;
3687 DECLARE_BITMAP(cpus, NR_CPUS);
3688 nodemask_t nodes;
3689};
3690
3691struct loc_track {
3692 unsigned long max;
3693 unsigned long count;
3694 struct location *loc;
3695};
3696
3697static void free_loc_track(struct loc_track *t)
3698{
3699 if (t->max)
3700 free_pages((unsigned long)t->loc,
3701 get_order(sizeof(struct location) * t->max));
3702}
3703
3704static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
3705{
3706 struct location *l;
3707 int order;
3708
3709 order = get_order(sizeof(struct location) * max);
3710
3711 l = (void *)__get_free_pages(flags, order);
3712 if (!l)
3713 return 0;
3714
3715 if (t->count) {
3716 memcpy(l, t->loc, sizeof(struct location) * t->count);
3717 free_loc_track(t);
3718 }
3719 t->max = max;
3720 t->loc = l;
3721 return 1;
3722}
3723
3724static int add_location(struct loc_track *t, struct kmem_cache *s,
3725 const struct track *track)
3726{
3727 long start, end, pos;
3728 struct location *l;
3729 unsigned long caddr;
3730 unsigned long age = jiffies - track->when;
3731
3732 start = -1;
3733 end = t->count;
3734
3735 for ( ; ; ) {
3736 pos = start + (end - start + 1) / 2;
3737
3738
3739
3740
3741
3742 if (pos == end)
3743 break;
3744
3745 caddr = t->loc[pos].addr;
3746 if (track->addr == caddr) {
3747
3748 l = &t->loc[pos];
3749 l->count++;
3750 if (track->when) {
3751 l->sum_time += age;
3752 if (age < l->min_time)
3753 l->min_time = age;
3754 if (age > l->max_time)
3755 l->max_time = age;
3756
3757 if (track->pid < l->min_pid)
3758 l->min_pid = track->pid;
3759 if (track->pid > l->max_pid)
3760 l->max_pid = track->pid;
3761
3762 cpumask_set_cpu(track->cpu,
3763 to_cpumask(l->cpus));
3764 }
3765 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3766 return 1;
3767 }
3768
3769 if (track->addr < caddr)
3770 end = pos;
3771 else
3772 start = pos;
3773 }
3774
3775
3776
3777
3778 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
3779 return 0;
3780
3781 l = t->loc + pos;
3782 if (pos < t->count)
3783 memmove(l + 1, l,
3784 (t->count - pos) * sizeof(struct location));
3785 t->count++;
3786 l->count = 1;
3787 l->addr = track->addr;
3788 l->sum_time = age;
3789 l->min_time = age;
3790 l->max_time = age;
3791 l->min_pid = track->pid;
3792 l->max_pid = track->pid;
3793 cpumask_clear(to_cpumask(l->cpus));
3794 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
3795 nodes_clear(l->nodes);
3796 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3797 return 1;
3798}
3799
3800static void process_slab(struct loc_track *t, struct kmem_cache *s,
3801 struct page *page, enum track_item alloc)
3802{
3803 void *addr = page_address(page);
3804 DECLARE_BITMAP(map, page->objects);
3805 void *p;
3806
3807 bitmap_zero(map, page->objects);
3808 for_each_free_object(p, s, page->freelist)
3809 set_bit(slab_index(p, s, addr), map);
3810
3811 for_each_object(p, s, addr, page->objects)
3812 if (!test_bit(slab_index(p, s, addr), map))
3813 add_location(t, s, get_track(s, p, alloc));
3814}
3815
3816static int list_locations(struct kmem_cache *s, char *buf,
3817 enum track_item alloc)
3818{
3819 int len = 0;
3820 unsigned long i;
3821 struct loc_track t = { 0, 0, NULL };
3822 int node;
3823
3824 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
3825 GFP_TEMPORARY))
3826 return sprintf(buf, "Out of memory\n");
3827
3828
3829 flush_all(s);
3830
3831 for_each_node_state(node, N_NORMAL_MEMORY) {
3832 struct kmem_cache_node *n = get_node(s, node);
3833 unsigned long flags;
3834 struct page *page;
3835
3836 if (!atomic_long_read(&n->nr_slabs))
3837 continue;
3838
3839 spin_lock_irqsave(&n->list_lock, flags);
3840 list_for_each_entry(page, &n->partial, lru)
3841 process_slab(&t, s, page, alloc);
3842 list_for_each_entry(page, &n->full, lru)
3843 process_slab(&t, s, page, alloc);
3844 spin_unlock_irqrestore(&n->list_lock, flags);
3845 }
3846
3847 for (i = 0; i < t.count; i++) {
3848 struct location *l = &t.loc[i];
3849
3850 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
3851 break;
3852 len += sprintf(buf + len, "%7ld ", l->count);
3853
3854 if (l->addr)
3855 len += sprint_symbol(buf + len, (unsigned long)l->addr);
3856 else
3857 len += sprintf(buf + len, "<not-available>");
3858
3859 if (l->sum_time != l->min_time) {
3860 len += sprintf(buf + len, " age=%ld/%ld/%ld",
3861 l->min_time,
3862 (long)div_u64(l->sum_time, l->count),
3863 l->max_time);
3864 } else
3865 len += sprintf(buf + len, " age=%ld",
3866 l->min_time);
3867
3868 if (l->min_pid != l->max_pid)
3869 len += sprintf(buf + len, " pid=%ld-%ld",
3870 l->min_pid, l->max_pid);
3871 else
3872 len += sprintf(buf + len, " pid=%ld",
3873 l->min_pid);
3874
3875 if (num_online_cpus() > 1 &&
3876 !cpumask_empty(to_cpumask(l->cpus)) &&
3877 len < PAGE_SIZE - 60) {
3878 len += sprintf(buf + len, " cpus=");
3879 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
3880 to_cpumask(l->cpus));
3881 }
3882
3883 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
3884 len < PAGE_SIZE - 60) {
3885 len += sprintf(buf + len, " nodes=");
3886 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
3887 l->nodes);
3888 }
3889
3890 len += sprintf(buf + len, "\n");
3891 }
3892
3893 free_loc_track(&t);
3894 if (!t.count)
3895 len += sprintf(buf, "No data\n");
3896 return len;
3897}
3898
3899enum slab_stat_type {
3900 SL_ALL,
3901 SL_PARTIAL,
3902 SL_CPU,
3903 SL_OBJECTS,
3904 SL_TOTAL
3905};
3906
3907#define SO_ALL (1 << SL_ALL)
3908#define SO_PARTIAL (1 << SL_PARTIAL)
3909#define SO_CPU (1 << SL_CPU)
3910#define SO_OBJECTS (1 << SL_OBJECTS)
3911#define SO_TOTAL (1 << SL_TOTAL)
3912
3913static ssize_t show_slab_objects(struct kmem_cache *s,
3914 char *buf, unsigned long flags)
3915{
3916 unsigned long total = 0;
3917 int node;
3918 int x;
3919 unsigned long *nodes;
3920 unsigned long *per_cpu;
3921
3922 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
3923 if (!nodes)
3924 return -ENOMEM;
3925 per_cpu = nodes + nr_node_ids;
3926
3927 if (flags & SO_CPU) {
3928 int cpu;
3929
3930 for_each_possible_cpu(cpu) {
3931 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3932
3933 if (!c || c->node < 0)
3934 continue;
3935
3936 if (c->page) {
3937 if (flags & SO_TOTAL)
3938 x = c->page->objects;
3939 else if (flags & SO_OBJECTS)
3940 x = c->page->inuse;
3941 else
3942 x = 1;
3943
3944 total += x;
3945 nodes[c->node] += x;
3946 }
3947 per_cpu[c->node]++;
3948 }
3949 }
3950
3951 if (flags & SO_ALL) {
3952 for_each_node_state(node, N_NORMAL_MEMORY) {
3953 struct kmem_cache_node *n = get_node(s, node);
3954
3955 if (flags & SO_TOTAL)
3956 x = atomic_long_read(&n->total_objects);
3957 else if (flags & SO_OBJECTS)
3958 x = atomic_long_read(&n->total_objects) -
3959 count_partial(n, count_free);
3960
3961 else
3962 x = atomic_long_read(&n->nr_slabs);
3963 total += x;
3964 nodes[node] += x;
3965 }
3966
3967 } else if (flags & SO_PARTIAL) {
3968 for_each_node_state(node, N_NORMAL_MEMORY) {
3969 struct kmem_cache_node *n = get_node(s, node);
3970
3971 if (flags & SO_TOTAL)
3972 x = count_partial(n, count_total);
3973 else if (flags & SO_OBJECTS)
3974 x = count_partial(n, count_inuse);
3975 else
3976 x = n->nr_partial;
3977 total += x;
3978 nodes[node] += x;
3979 }
3980 }
3981 x = sprintf(buf, "%lu", total);
3982#ifdef CONFIG_NUMA
3983 for_each_node_state(node, N_NORMAL_MEMORY)
3984 if (nodes[node])
3985 x += sprintf(buf + x, " N%d=%lu",
3986 node, nodes[node]);
3987#endif
3988 kfree(nodes);
3989 return x + sprintf(buf + x, "\n");
3990}
3991
3992static int any_slab_objects(struct kmem_cache *s)
3993{
3994 int node;
3995
3996 for_each_online_node(node) {
3997 struct kmem_cache_node *n = get_node(s, node);
3998
3999 if (!n)
4000 continue;
4001
4002 if (atomic_long_read(&n->total_objects))
4003 return 1;
4004 }
4005 return 0;
4006}
4007
4008#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4009#define to_slab(n) container_of(n, struct kmem_cache, kobj);
4010
4011struct slab_attribute {
4012 struct attribute attr;
4013 ssize_t (*show)(struct kmem_cache *s, char *buf);
4014 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4015};
4016
4017#define SLAB_ATTR_RO(_name) \
4018 static struct slab_attribute _name##_attr = __ATTR_RO(_name)
4019
4020#define SLAB_ATTR(_name) \
4021 static struct slab_attribute _name##_attr = \
4022 __ATTR(_name, 0644, _name##_show, _name##_store)
4023
4024static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4025{
4026 return sprintf(buf, "%d\n", s->size);
4027}
4028SLAB_ATTR_RO(slab_size);
4029
4030static ssize_t align_show(struct kmem_cache *s, char *buf)
4031{
4032 return sprintf(buf, "%d\n", s->align);
4033}
4034SLAB_ATTR_RO(align);
4035
4036static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4037{
4038 return sprintf(buf, "%d\n", s->objsize);
4039}
4040SLAB_ATTR_RO(object_size);
4041
4042static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4043{
4044 return sprintf(buf, "%d\n", oo_objects(s->oo));
4045}
4046SLAB_ATTR_RO(objs_per_slab);
4047
4048static ssize_t order_store(struct kmem_cache *s,
4049 const char *buf, size_t length)
4050{
4051 unsigned long order;
4052 int err;
4053
4054 err = strict_strtoul(buf, 10, &order);
4055 if (err)
4056 return err;
4057
4058 if (order > slub_max_order || order < slub_min_order)
4059 return -EINVAL;
4060
4061 calculate_sizes(s, order);
4062 return length;
4063}
4064
4065static ssize_t order_show(struct kmem_cache *s, char *buf)
4066{
4067 return sprintf(buf, "%d\n", oo_order(s->oo));
4068}
4069SLAB_ATTR(order);
4070
4071static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4072{
4073 return sprintf(buf, "%lu\n", s->min_partial);
4074}
4075
4076static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4077 size_t length)
4078{
4079 unsigned long min;
4080 int err;
4081
4082 err = strict_strtoul(buf, 10, &min);
4083 if (err)
4084 return err;
4085
4086 set_min_partial(s, min);
4087 return length;
4088}
4089SLAB_ATTR(min_partial);
4090
4091static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4092{
4093 if (s->ctor) {
4094 int n = sprint_symbol(buf, (unsigned long)s->ctor);
4095
4096 return n + sprintf(buf + n, "\n");
4097 }
4098 return 0;
4099}
4100SLAB_ATTR_RO(ctor);
4101
4102static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4103{
4104 return sprintf(buf, "%d\n", s->refcount - 1);
4105}
4106SLAB_ATTR_RO(aliases);
4107
4108static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4109{
4110 return show_slab_objects(s, buf, SO_ALL);
4111}
4112SLAB_ATTR_RO(slabs);
4113
4114static ssize_t partial_show(struct kmem_cache *s, char *buf)
4115{
4116 return show_slab_objects(s, buf, SO_PARTIAL);
4117}
4118SLAB_ATTR_RO(partial);
4119
4120static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4121{
4122 return show_slab_objects(s, buf, SO_CPU);
4123}
4124SLAB_ATTR_RO(cpu_slabs);
4125
4126static ssize_t objects_show(struct kmem_cache *s, char *buf)
4127{
4128 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4129}
4130SLAB_ATTR_RO(objects);
4131
4132static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4133{
4134 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4135}
4136SLAB_ATTR_RO(objects_partial);
4137
4138static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4139{
4140 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4141}
4142SLAB_ATTR_RO(total_objects);
4143
4144static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4145{
4146 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4147}
4148
4149static ssize_t sanity_checks_store(struct kmem_cache *s,
4150 const char *buf, size_t length)
4151{
4152 s->flags &= ~SLAB_DEBUG_FREE;
4153 if (buf[0] == '1')
4154 s->flags |= SLAB_DEBUG_FREE;
4155 return length;
4156}
4157SLAB_ATTR(sanity_checks);
4158
4159static ssize_t trace_show(struct kmem_cache *s, char *buf)
4160{
4161 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4162}
4163
4164static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4165 size_t length)
4166{
4167 s->flags &= ~SLAB_TRACE;
4168 if (buf[0] == '1')
4169 s->flags |= SLAB_TRACE;
4170 return length;
4171}
4172SLAB_ATTR(trace);
4173
4174static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4175{
4176 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4177}
4178
4179static ssize_t reclaim_account_store(struct kmem_cache *s,
4180 const char *buf, size_t length)
4181{
4182 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4183 if (buf[0] == '1')
4184 s->flags |= SLAB_RECLAIM_ACCOUNT;
4185 return length;
4186}
4187SLAB_ATTR(reclaim_account);
4188
4189static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4190{
4191 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4192}
4193SLAB_ATTR_RO(hwcache_align);
4194
4195#ifdef CONFIG_ZONE_DMA
4196static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4197{
4198 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4199}
4200SLAB_ATTR_RO(cache_dma);
4201#endif
4202
4203static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4204{
4205 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4206}
4207SLAB_ATTR_RO(destroy_by_rcu);
4208
4209static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4210{
4211 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4212}
4213
4214static ssize_t red_zone_store(struct kmem_cache *s,
4215 const char *buf, size_t length)
4216{
4217 if (any_slab_objects(s))
4218 return -EBUSY;
4219
4220 s->flags &= ~SLAB_RED_ZONE;
4221 if (buf[0] == '1')
4222 s->flags |= SLAB_RED_ZONE;
4223 calculate_sizes(s, -1);
4224 return length;
4225}
4226SLAB_ATTR(red_zone);
4227
4228static ssize_t poison_show(struct kmem_cache *s, char *buf)
4229{
4230 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4231}
4232
4233static ssize_t poison_store(struct kmem_cache *s,
4234 const char *buf, size_t length)
4235{
4236 if (any_slab_objects(s))
4237 return -EBUSY;
4238
4239 s->flags &= ~SLAB_POISON;
4240 if (buf[0] == '1')
4241 s->flags |= SLAB_POISON;
4242 calculate_sizes(s, -1);
4243 return length;
4244}
4245SLAB_ATTR(poison);
4246
4247static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4248{
4249 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4250}
4251
4252static ssize_t store_user_store(struct kmem_cache *s,
4253 const char *buf, size_t length)
4254{
4255 if (any_slab_objects(s))
4256 return -EBUSY;
4257
4258 s->flags &= ~SLAB_STORE_USER;
4259 if (buf[0] == '1')
4260 s->flags |= SLAB_STORE_USER;
4261 calculate_sizes(s, -1);
4262 return length;
4263}
4264SLAB_ATTR(store_user);
4265
4266static ssize_t validate_show(struct kmem_cache *s, char *buf)
4267{
4268 return 0;
4269}
4270
4271static ssize_t validate_store(struct kmem_cache *s,
4272 const char *buf, size_t length)
4273{
4274 int ret = -EINVAL;
4275
4276 if (buf[0] == '1') {
4277 ret = validate_slab_cache(s);
4278 if (ret >= 0)
4279 ret = length;
4280 }
4281 return ret;
4282}
4283SLAB_ATTR(validate);
4284
4285static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4286{
4287 return 0;
4288}
4289
4290static ssize_t shrink_store(struct kmem_cache *s,
4291 const char *buf, size_t length)
4292{
4293 if (buf[0] == '1') {
4294 int rc = kmem_cache_shrink(s);
4295
4296 if (rc)
4297 return rc;
4298 } else
4299 return -EINVAL;
4300 return length;
4301}
4302SLAB_ATTR(shrink);
4303
4304static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4305{
4306 if (!(s->flags & SLAB_STORE_USER))
4307 return -ENOSYS;
4308 return list_locations(s, buf, TRACK_ALLOC);
4309}
4310SLAB_ATTR_RO(alloc_calls);
4311
4312static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4313{
4314 if (!(s->flags & SLAB_STORE_USER))
4315 return -ENOSYS;
4316 return list_locations(s, buf, TRACK_FREE);
4317}
4318SLAB_ATTR_RO(free_calls);
4319
4320#ifdef CONFIG_NUMA
4321static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4322{
4323 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4324}
4325
4326static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4327 const char *buf, size_t length)
4328{
4329 unsigned long ratio;
4330 int err;
4331
4332 err = strict_strtoul(buf, 10, &ratio);
4333 if (err)
4334 return err;
4335
4336 if (ratio <= 100)
4337 s->remote_node_defrag_ratio = ratio * 10;
4338
4339 return length;
4340}
4341SLAB_ATTR(remote_node_defrag_ratio);
4342#endif
4343
4344#ifdef CONFIG_SLUB_STATS
4345static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4346{
4347 unsigned long sum = 0;
4348 int cpu;
4349 int len;
4350 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4351
4352 if (!data)
4353 return -ENOMEM;
4354
4355 for_each_online_cpu(cpu) {
4356 unsigned x = get_cpu_slab(s, cpu)->stat[si];
4357
4358 data[cpu] = x;
4359 sum += x;
4360 }
4361
4362 len = sprintf(buf, "%lu", sum);
4363
4364#ifdef CONFIG_SMP
4365 for_each_online_cpu(cpu) {
4366 if (data[cpu] && len < PAGE_SIZE - 20)
4367 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4368 }
4369#endif
4370 kfree(data);
4371 return len + sprintf(buf + len, "\n");
4372}
4373
4374#define STAT_ATTR(si, text) \
4375static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4376{ \
4377 return show_stat(s, buf, si); \
4378} \
4379SLAB_ATTR_RO(text); \
4380
4381STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4382STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4383STAT_ATTR(FREE_FASTPATH, free_fastpath);
4384STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4385STAT_ATTR(FREE_FROZEN, free_frozen);
4386STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4387STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4388STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4389STAT_ATTR(ALLOC_SLAB, alloc_slab);
4390STAT_ATTR(ALLOC_REFILL, alloc_refill);
4391STAT_ATTR(FREE_SLAB, free_slab);
4392STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4393STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4394STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4395STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4396STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4397STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4398STAT_ATTR(ORDER_FALLBACK, order_fallback);
4399#endif
4400
4401static struct attribute *slab_attrs[] = {
4402 &slab_size_attr.attr,
4403 &object_size_attr.attr,
4404 &objs_per_slab_attr.attr,
4405 &order_attr.attr,
4406 &min_partial_attr.attr,
4407 &objects_attr.attr,
4408 &objects_partial_attr.attr,
4409 &total_objects_attr.attr,
4410 &slabs_attr.attr,
4411 &partial_attr.attr,
4412 &cpu_slabs_attr.attr,
4413 &ctor_attr.attr,
4414 &aliases_attr.attr,
4415 &align_attr.attr,
4416 &sanity_checks_attr.attr,
4417 &trace_attr.attr,
4418 &hwcache_align_attr.attr,
4419 &reclaim_account_attr.attr,
4420 &destroy_by_rcu_attr.attr,
4421 &red_zone_attr.attr,
4422 &poison_attr.attr,
4423 &store_user_attr.attr,
4424 &validate_attr.attr,
4425 &shrink_attr.attr,
4426 &alloc_calls_attr.attr,
4427 &free_calls_attr.attr,
4428#ifdef CONFIG_ZONE_DMA
4429 &cache_dma_attr.attr,
4430#endif
4431#ifdef CONFIG_NUMA
4432 &remote_node_defrag_ratio_attr.attr,
4433#endif
4434#ifdef CONFIG_SLUB_STATS
4435 &alloc_fastpath_attr.attr,
4436 &alloc_slowpath_attr.attr,
4437 &free_fastpath_attr.attr,
4438 &free_slowpath_attr.attr,
4439 &free_frozen_attr.attr,
4440 &free_add_partial_attr.attr,
4441 &free_remove_partial_attr.attr,
4442 &alloc_from_partial_attr.attr,
4443 &alloc_slab_attr.attr,
4444 &alloc_refill_attr.attr,
4445 &free_slab_attr.attr,
4446 &cpuslab_flush_attr.attr,
4447 &deactivate_full_attr.attr,
4448 &deactivate_empty_attr.attr,
4449 &deactivate_to_head_attr.attr,
4450 &deactivate_to_tail_attr.attr,
4451 &deactivate_remote_frees_attr.attr,
4452 &order_fallback_attr.attr,
4453#endif
4454 NULL
4455};
4456
4457static struct attribute_group slab_attr_group = {
4458 .attrs = slab_attrs,
4459};
4460
4461static ssize_t slab_attr_show(struct kobject *kobj,
4462 struct attribute *attr,
4463 char *buf)
4464{
4465 struct slab_attribute *attribute;
4466 struct kmem_cache *s;
4467 int err;
4468
4469 attribute = to_slab_attr(attr);
4470 s = to_slab(kobj);
4471
4472 if (!attribute->show)
4473 return -EIO;
4474
4475 err = attribute->show(s, buf);
4476
4477 return err;
4478}
4479
4480static ssize_t slab_attr_store(struct kobject *kobj,
4481 struct attribute *attr,
4482 const char *buf, size_t len)
4483{
4484 struct slab_attribute *attribute;
4485 struct kmem_cache *s;
4486 int err;
4487
4488 attribute = to_slab_attr(attr);
4489 s = to_slab(kobj);
4490
4491 if (!attribute->store)
4492 return -EIO;
4493
4494 err = attribute->store(s, buf, len);
4495
4496 return err;
4497}
4498
4499static void kmem_cache_release(struct kobject *kobj)
4500{
4501 struct kmem_cache *s = to_slab(kobj);
4502
4503 kfree(s);
4504}
4505
4506static struct sysfs_ops slab_sysfs_ops = {
4507 .show = slab_attr_show,
4508 .store = slab_attr_store,
4509};
4510
4511static struct kobj_type slab_ktype = {
4512 .sysfs_ops = &slab_sysfs_ops,
4513 .release = kmem_cache_release
4514};
4515
4516static int uevent_filter(struct kset *kset, struct kobject *kobj)
4517{
4518 struct kobj_type *ktype = get_ktype(kobj);
4519
4520 if (ktype == &slab_ktype)
4521 return 1;
4522 return 0;
4523}
4524
4525static struct kset_uevent_ops slab_uevent_ops = {
4526 .filter = uevent_filter,
4527};
4528
4529static struct kset *slab_kset;
4530
4531#define ID_STR_LENGTH 64
4532
4533
4534
4535
4536
4537static char *create_unique_id(struct kmem_cache *s)
4538{
4539 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
4540 char *p = name;
4541
4542 BUG_ON(!name);
4543
4544 *p++ = ':';
4545
4546
4547
4548
4549
4550
4551
4552 if (s->flags & SLAB_CACHE_DMA)
4553 *p++ = 'd';
4554 if (s->flags & SLAB_RECLAIM_ACCOUNT)
4555 *p++ = 'a';
4556 if (s->flags & SLAB_DEBUG_FREE)
4557 *p++ = 'F';
4558 if (!(s->flags & SLAB_NOTRACK))
4559 *p++ = 't';
4560 if (p != name + 1)
4561 *p++ = '-';
4562 p += sprintf(p, "%07d", s->size);
4563 BUG_ON(p > name + ID_STR_LENGTH - 1);
4564 return name;
4565}
4566
4567static int sysfs_slab_add(struct kmem_cache *s)
4568{
4569 int err;
4570 const char *name;
4571 int unmergeable;
4572
4573 if (slab_state < SYSFS)
4574
4575 return 0;
4576
4577 unmergeable = slab_unmergeable(s);
4578 if (unmergeable) {
4579
4580
4581
4582
4583
4584 sysfs_remove_link(&slab_kset->kobj, s->name);
4585 name = s->name;
4586 } else {
4587
4588
4589
4590
4591 name = create_unique_id(s);
4592 }
4593
4594 s->kobj.kset = slab_kset;
4595 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
4596 if (err) {
4597 kobject_put(&s->kobj);
4598 return err;
4599 }
4600
4601 err = sysfs_create_group(&s->kobj, &slab_attr_group);
4602 if (err) {
4603 kobject_del(&s->kobj);
4604 kobject_put(&s->kobj);
4605 return err;
4606 }
4607 kobject_uevent(&s->kobj, KOBJ_ADD);
4608 if (!unmergeable) {
4609
4610 sysfs_slab_alias(s, s->name);
4611 kfree(name);
4612 }
4613 return 0;
4614}
4615
4616static void sysfs_slab_remove(struct kmem_cache *s)
4617{
4618 kobject_uevent(&s->kobj, KOBJ_REMOVE);
4619 kobject_del(&s->kobj);
4620 kobject_put(&s->kobj);
4621}
4622
4623
4624
4625
4626
4627struct saved_alias {
4628 struct kmem_cache *s;
4629 const char *name;
4630 struct saved_alias *next;
4631};
4632
4633static struct saved_alias *alias_list;
4634
4635static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
4636{
4637 struct saved_alias *al;
4638
4639 if (slab_state == SYSFS) {
4640
4641
4642
4643 sysfs_remove_link(&slab_kset->kobj, name);
4644 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
4645 }
4646
4647 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
4648 if (!al)
4649 return -ENOMEM;
4650
4651 al->s = s;
4652 al->name = name;
4653 al->next = alias_list;
4654 alias_list = al;
4655 return 0;
4656}
4657
4658static int __init slab_sysfs_init(void)
4659{
4660 struct kmem_cache *s;
4661 int err;
4662
4663 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
4664 if (!slab_kset) {
4665 printk(KERN_ERR "Cannot register slab subsystem.\n");
4666 return -ENOSYS;
4667 }
4668
4669 slab_state = SYSFS;
4670
4671 list_for_each_entry(s, &slab_caches, list) {
4672 err = sysfs_slab_add(s);
4673 if (err)
4674 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
4675 " to sysfs\n", s->name);
4676 }
4677
4678 while (alias_list) {
4679 struct saved_alias *al = alias_list;
4680
4681 alias_list = alias_list->next;
4682 err = sysfs_slab_alias(al->s, al->name);
4683 if (err)
4684 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
4685 " %s to sysfs\n", s->name);
4686 kfree(al);
4687 }
4688
4689 resiliency_test();
4690 return 0;
4691}
4692
4693__initcall(slab_sysfs_init);
4694#endif
4695
4696
4697
4698
4699#ifdef CONFIG_SLABINFO
4700static void print_slabinfo_header(struct seq_file *m)
4701{
4702 seq_puts(m, "slabinfo - version: 2.1\n");
4703 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4704 "<objperslab> <pagesperslab>");
4705 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4706 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4707 seq_putc(m, '\n');
4708}
4709
4710static void *s_start(struct seq_file *m, loff_t *pos)
4711{
4712 loff_t n = *pos;
4713
4714 down_read(&slub_lock);
4715 if (!n)
4716 print_slabinfo_header(m);
4717
4718 return seq_list_start(&slab_caches, *pos);
4719}
4720
4721static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4722{
4723 return seq_list_next(p, &slab_caches, pos);
4724}
4725
4726static void s_stop(struct seq_file *m, void *p)
4727{
4728 up_read(&slub_lock);
4729}
4730
4731static int s_show(struct seq_file *m, void *p)
4732{
4733 unsigned long nr_partials = 0;
4734 unsigned long nr_slabs = 0;
4735 unsigned long nr_inuse = 0;
4736 unsigned long nr_objs = 0;
4737 unsigned long nr_free = 0;
4738 struct kmem_cache *s;
4739 int node;
4740
4741 s = list_entry(p, struct kmem_cache, list);
4742
4743 for_each_online_node(node) {
4744 struct kmem_cache_node *n = get_node(s, node);
4745
4746 if (!n)
4747 continue;
4748
4749 nr_partials += n->nr_partial;
4750 nr_slabs += atomic_long_read(&n->nr_slabs);
4751 nr_objs += atomic_long_read(&n->total_objects);
4752 nr_free += count_partial(n, count_free);
4753 }
4754
4755 nr_inuse = nr_objs - nr_free;
4756
4757 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
4758 nr_objs, s->size, oo_objects(s->oo),
4759 (1 << oo_order(s->oo)));
4760 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
4761 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
4762 0UL);
4763 seq_putc(m, '\n');
4764 return 0;
4765}
4766
4767static const struct seq_operations slabinfo_op = {
4768 .start = s_start,
4769 .next = s_next,
4770 .stop = s_stop,
4771 .show = s_show,
4772};
4773
4774static int slabinfo_open(struct inode *inode, struct file *file)
4775{
4776 return seq_open(file, &slabinfo_op);
4777}
4778
4779static const struct file_operations proc_slabinfo_operations = {
4780 .open = slabinfo_open,
4781 .read = seq_read,
4782 .llseek = seq_lseek,
4783 .release = seq_release,
4784};
4785
4786static int __init slab_proc_init(void)
4787{
4788 proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
4789 return 0;
4790}
4791module_init(slab_proc_init);
4792#endif
4793