1
2
3
4
5
6
7
8
9
10
11#include <linux/mm.h>
12#include <linux/module.h>
13#include <linux/bit_spinlock.h>
14#include <linux/interrupt.h>
15#include <linux/bitops.h>
16#include <linux/slab.h>
17#include <linux/seq_file.h>
18#include <linux/cpu.h>
19#include <linux/cpuset.h>
20#include <linux/mempolicy.h>
21#include <linux/ctype.h>
22#include <linux/kallsyms.h>
23#include <linux/memory.h>
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103#define FROZEN (1 << PG_active)
104
105#ifdef CONFIG_SLUB_DEBUG
106#define SLABDEBUG (1 << PG_error)
107#else
108#define SLABDEBUG 0
109#endif
110
111static inline int SlabFrozen(struct page *page)
112{
113 return page->flags & FROZEN;
114}
115
116static inline void SetSlabFrozen(struct page *page)
117{
118 page->flags |= FROZEN;
119}
120
121static inline void ClearSlabFrozen(struct page *page)
122{
123 page->flags &= ~FROZEN;
124}
125
126static inline int SlabDebug(struct page *page)
127{
128 return page->flags & SLABDEBUG;
129}
130
131static inline void SetSlabDebug(struct page *page)
132{
133 page->flags |= SLABDEBUG;
134}
135
136static inline void ClearSlabDebug(struct page *page)
137{
138 page->flags &= ~SLABDEBUG;
139}
140
141
142
143
144
145
146
147
148
149
150#undef SLUB_RESILIENCY_TEST
151
152#if PAGE_SHIFT <= 12
153
154
155
156
157#define DEFAULT_MAX_ORDER 1
158#define DEFAULT_MIN_OBJECTS 4
159
160#else
161
162
163
164
165
166#define DEFAULT_MAX_ORDER 2
167#define DEFAULT_MIN_OBJECTS 8
168
169#endif
170
171
172
173
174
175#define MIN_PARTIAL 5
176
177
178
179
180
181
182#define MAX_PARTIAL 10
183
184#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
185 SLAB_POISON | SLAB_STORE_USER)
186
187
188
189
190#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
191 SLAB_TRACE | SLAB_DESTROY_BY_RCU)
192
193#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
194 SLAB_CACHE_DMA)
195
196#ifndef ARCH_KMALLOC_MINALIGN
197#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
198#endif
199
200#ifndef ARCH_SLAB_MINALIGN
201#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
202#endif
203
204
205#define __OBJECT_POISON 0x80000000
206#define __SYSFS_ADD_DEFERRED 0x40000000
207
208
209#ifndef cache_line_size
210#define cache_line_size() L1_CACHE_BYTES
211#endif
212
213static int kmem_size = sizeof(struct kmem_cache);
214
215#ifdef CONFIG_SMP
216static struct notifier_block slab_notifier;
217#endif
218
219static enum {
220 DOWN,
221 PARTIAL,
222 UP,
223 SYSFS
224} slab_state = DOWN;
225
226
227static DECLARE_RWSEM(slub_lock);
228static LIST_HEAD(slab_caches);
229
230
231
232
233struct track {
234 void *addr;
235 int cpu;
236 int pid;
237 unsigned long when;
238};
239
240enum track_item { TRACK_ALLOC, TRACK_FREE };
241
242#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
243static int sysfs_slab_add(struct kmem_cache *);
244static int sysfs_slab_alias(struct kmem_cache *, const char *);
245static void sysfs_slab_remove(struct kmem_cache *);
246#else
247static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
248static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
249 { return 0; }
250static inline void sysfs_slab_remove(struct kmem_cache *s) {}
251#endif
252
253
254
255
256
257int slab_is_available(void)
258{
259 return slab_state >= UP;
260}
261
262static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
263{
264#ifdef CONFIG_NUMA
265 return s->node[node];
266#else
267 return &s->local_node;
268#endif
269}
270
271static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
272{
273#ifdef CONFIG_SMP
274 return s->cpu_slab[cpu];
275#else
276 return &s->cpu_slab;
277#endif
278}
279
280static inline int check_valid_pointer(struct kmem_cache *s,
281 struct page *page, const void *object)
282{
283 void *base;
284
285 if (!object)
286 return 1;
287
288 base = page_address(page);
289 if (object < base || object >= base + s->objects * s->size ||
290 (object - base) % s->size) {
291 return 0;
292 }
293
294 return 1;
295}
296
297
298
299
300
301
302
303
304static inline void *get_freepointer(struct kmem_cache *s, void *object)
305{
306 return *(void **)(object + s->offset);
307}
308
309static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
310{
311 *(void **)(object + s->offset) = fp;
312}
313
314
315#define for_each_object(__p, __s, __addr) \
316 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\
317 __p += (__s)->size)
318
319
320#define for_each_free_object(__p, __s, __free) \
321 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
322
323
324static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
325{
326 return (p - addr) / s->size;
327}
328
329#ifdef CONFIG_SLUB_DEBUG
330
331
332
333#ifdef CONFIG_SLUB_DEBUG_ON
334static int slub_debug = DEBUG_DEFAULT_FLAGS;
335#else
336static int slub_debug;
337#endif
338
339static char *slub_debug_slabs;
340
341
342
343
344static void print_section(char *text, u8 *addr, unsigned int length)
345{
346 int i, offset;
347 int newline = 1;
348 char ascii[17];
349
350 ascii[16] = 0;
351
352 for (i = 0; i < length; i++) {
353 if (newline) {
354 printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
355 newline = 0;
356 }
357 printk(" %02x", addr[i]);
358 offset = i % 16;
359 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
360 if (offset == 15) {
361 printk(" %s\n",ascii);
362 newline = 1;
363 }
364 }
365 if (!newline) {
366 i %= 16;
367 while (i < 16) {
368 printk(" ");
369 ascii[i] = ' ';
370 i++;
371 }
372 printk(" %s\n", ascii);
373 }
374}
375
376static struct track *get_track(struct kmem_cache *s, void *object,
377 enum track_item alloc)
378{
379 struct track *p;
380
381 if (s->offset)
382 p = object + s->offset + sizeof(void *);
383 else
384 p = object + s->inuse;
385
386 return p + alloc;
387}
388
389static void set_track(struct kmem_cache *s, void *object,
390 enum track_item alloc, void *addr)
391{
392 struct track *p;
393
394 if (s->offset)
395 p = object + s->offset + sizeof(void *);
396 else
397 p = object + s->inuse;
398
399 p += alloc;
400 if (addr) {
401 p->addr = addr;
402 p->cpu = smp_processor_id();
403 p->pid = current ? current->pid : -1;
404 p->when = jiffies;
405 } else
406 memset(p, 0, sizeof(struct track));
407}
408
409static void init_tracking(struct kmem_cache *s, void *object)
410{
411 if (!(s->flags & SLAB_STORE_USER))
412 return;
413
414 set_track(s, object, TRACK_FREE, NULL);
415 set_track(s, object, TRACK_ALLOC, NULL);
416}
417
418static void print_track(const char *s, struct track *t)
419{
420 if (!t->addr)
421 return;
422
423 printk(KERN_ERR "INFO: %s in ", s);
424 __print_symbol("%s", (unsigned long)t->addr);
425 printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid);
426}
427
428static void print_tracking(struct kmem_cache *s, void *object)
429{
430 if (!(s->flags & SLAB_STORE_USER))
431 return;
432
433 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
434 print_track("Freed", get_track(s, object, TRACK_FREE));
435}
436
437static void print_page_info(struct page *page)
438{
439 printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n",
440 page, page->inuse, page->freelist, page->flags);
441
442}
443
444static void slab_bug(struct kmem_cache *s, char *fmt, ...)
445{
446 va_list args;
447 char buf[100];
448
449 va_start(args, fmt);
450 vsnprintf(buf, sizeof(buf), fmt, args);
451 va_end(args);
452 printk(KERN_ERR "========================================"
453 "=====================================\n");
454 printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
455 printk(KERN_ERR "----------------------------------------"
456 "-------------------------------------\n\n");
457}
458
459static void slab_fix(struct kmem_cache *s, char *fmt, ...)
460{
461 va_list args;
462 char buf[100];
463
464 va_start(args, fmt);
465 vsnprintf(buf, sizeof(buf), fmt, args);
466 va_end(args);
467 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
468}
469
470static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
471{
472 unsigned int off;
473 u8 *addr = page_address(page);
474
475 print_tracking(s, p);
476
477 print_page_info(page);
478
479 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
480 p, p - addr, get_freepointer(s, p));
481
482 if (p > addr + 16)
483 print_section("Bytes b4", p - 16, 16);
484
485 print_section("Object", p, min(s->objsize, 128));
486
487 if (s->flags & SLAB_RED_ZONE)
488 print_section("Redzone", p + s->objsize,
489 s->inuse - s->objsize);
490
491 if (s->offset)
492 off = s->offset + sizeof(void *);
493 else
494 off = s->inuse;
495
496 if (s->flags & SLAB_STORE_USER)
497 off += 2 * sizeof(struct track);
498
499 if (off != s->size)
500
501 print_section("Padding", p + off, s->size - off);
502
503 dump_stack();
504}
505
506static void object_err(struct kmem_cache *s, struct page *page,
507 u8 *object, char *reason)
508{
509 slab_bug(s, reason);
510 print_trailer(s, page, object);
511}
512
513static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...)
514{
515 va_list args;
516 char buf[100];
517
518 va_start(args, fmt);
519 vsnprintf(buf, sizeof(buf), fmt, args);
520 va_end(args);
521 slab_bug(s, fmt);
522 print_page_info(page);
523 dump_stack();
524}
525
526static void init_object(struct kmem_cache *s, void *object, int active)
527{
528 u8 *p = object;
529
530 if (s->flags & __OBJECT_POISON) {
531 memset(p, POISON_FREE, s->objsize - 1);
532 p[s->objsize -1] = POISON_END;
533 }
534
535 if (s->flags & SLAB_RED_ZONE)
536 memset(p + s->objsize,
537 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE,
538 s->inuse - s->objsize);
539}
540
541static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
542{
543 while (bytes) {
544 if (*start != (u8)value)
545 return start;
546 start++;
547 bytes--;
548 }
549 return NULL;
550}
551
552static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
553 void *from, void *to)
554{
555 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
556 memset(from, data, to - from);
557}
558
559static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
560 u8 *object, char *what,
561 u8* start, unsigned int value, unsigned int bytes)
562{
563 u8 *fault;
564 u8 *end;
565
566 fault = check_bytes(start, value, bytes);
567 if (!fault)
568 return 1;
569
570 end = start + bytes;
571 while (end > fault && end[-1] == value)
572 end--;
573
574 slab_bug(s, "%s overwritten", what);
575 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
576 fault, end - 1, fault[0], value);
577 print_trailer(s, page, object);
578
579 restore_bytes(s, what, value, fault, end);
580 return 0;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
622{
623 unsigned long off = s->inuse;
624
625 if (s->offset)
626
627 off += sizeof(void *);
628
629 if (s->flags & SLAB_STORE_USER)
630
631 off += 2 * sizeof(struct track);
632
633 if (s->size == off)
634 return 1;
635
636 return check_bytes_and_report(s, page, p, "Object padding",
637 p + off, POISON_INUSE, s->size - off);
638}
639
640static int slab_pad_check(struct kmem_cache *s, struct page *page)
641{
642 u8 *start;
643 u8 *fault;
644 u8 *end;
645 int length;
646 int remainder;
647
648 if (!(s->flags & SLAB_POISON))
649 return 1;
650
651 start = page_address(page);
652 end = start + (PAGE_SIZE << s->order);
653 length = s->objects * s->size;
654 remainder = end - (start + length);
655 if (!remainder)
656 return 1;
657
658 fault = check_bytes(start + length, POISON_INUSE, remainder);
659 if (!fault)
660 return 1;
661 while (end > fault && end[-1] == POISON_INUSE)
662 end--;
663
664 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
665 print_section("Padding", start, length);
666
667 restore_bytes(s, "slab padding", POISON_INUSE, start, end);
668 return 0;
669}
670
671static int check_object(struct kmem_cache *s, struct page *page,
672 void *object, int active)
673{
674 u8 *p = object;
675 u8 *endobject = object + s->objsize;
676
677 if (s->flags & SLAB_RED_ZONE) {
678 unsigned int red =
679 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE;
680
681 if (!check_bytes_and_report(s, page, object, "Redzone",
682 endobject, red, s->inuse - s->objsize))
683 return 0;
684 } else {
685 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse)
686 check_bytes_and_report(s, page, p, "Alignment padding", endobject,
687 POISON_INUSE, s->inuse - s->objsize);
688 }
689
690 if (s->flags & SLAB_POISON) {
691 if (!active && (s->flags & __OBJECT_POISON) &&
692 (!check_bytes_and_report(s, page, p, "Poison", p,
693 POISON_FREE, s->objsize - 1) ||
694 !check_bytes_and_report(s, page, p, "Poison",
695 p + s->objsize -1, POISON_END, 1)))
696 return 0;
697
698
699
700 check_pad_bytes(s, page, p);
701 }
702
703 if (!s->offset && active)
704
705
706
707
708 return 1;
709
710
711 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
712 object_err(s, page, p, "Freepointer corrupt");
713
714
715
716
717
718 set_freepointer(s, p, NULL);
719 return 0;
720 }
721 return 1;
722}
723
724static int check_slab(struct kmem_cache *s, struct page *page)
725{
726 VM_BUG_ON(!irqs_disabled());
727
728 if (!PageSlab(page)) {
729 slab_err(s, page, "Not a valid slab page");
730 return 0;
731 }
732 if (page->inuse > s->objects) {
733 slab_err(s, page, "inuse %u > max %u",
734 s->name, page->inuse, s->objects);
735 return 0;
736 }
737
738 slab_pad_check(s, page);
739 return 1;
740}
741
742
743
744
745
746static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
747{
748 int nr = 0;
749 void *fp = page->freelist;
750 void *object = NULL;
751
752 while (fp && nr <= s->objects) {
753 if (fp == search)
754 return 1;
755 if (!check_valid_pointer(s, page, fp)) {
756 if (object) {
757 object_err(s, page, object,
758 "Freechain corrupt");
759 set_freepointer(s, object, NULL);
760 break;
761 } else {
762 slab_err(s, page, "Freepointer corrupt");
763 page->freelist = NULL;
764 page->inuse = s->objects;
765 slab_fix(s, "Freelist cleared");
766 return 0;
767 }
768 break;
769 }
770 object = fp;
771 fp = get_freepointer(s, object);
772 nr++;
773 }
774
775 if (page->inuse != s->objects - nr) {
776 slab_err(s, page, "Wrong object count. Counter is %d but "
777 "counted were %d", page->inuse, s->objects - nr);
778 page->inuse = s->objects - nr;
779 slab_fix(s, "Object count adjusted.");
780 }
781 return search == NULL;
782}
783
784static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc)
785{
786 if (s->flags & SLAB_TRACE) {
787 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
788 s->name,
789 alloc ? "alloc" : "free",
790 object, page->inuse,
791 page->freelist);
792
793 if (!alloc)
794 print_section("Object", (void *)object, s->objsize);
795
796 dump_stack();
797 }
798}
799
800
801
802
803static void add_full(struct kmem_cache_node *n, struct page *page)
804{
805 spin_lock(&n->list_lock);
806 list_add(&page->lru, &n->full);
807 spin_unlock(&n->list_lock);
808}
809
810static void remove_full(struct kmem_cache *s, struct page *page)
811{
812 struct kmem_cache_node *n;
813
814 if (!(s->flags & SLAB_STORE_USER))
815 return;
816
817 n = get_node(s, page_to_nid(page));
818
819 spin_lock(&n->list_lock);
820 list_del(&page->lru);
821 spin_unlock(&n->list_lock);
822}
823
824static void setup_object_debug(struct kmem_cache *s, struct page *page,
825 void *object)
826{
827 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
828 return;
829
830 init_object(s, object, 0);
831 init_tracking(s, object);
832}
833
834static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
835 void *object, void *addr)
836{
837 if (!check_slab(s, page))
838 goto bad;
839
840 if (object && !on_freelist(s, page, object)) {
841 object_err(s, page, object, "Object already allocated");
842 goto bad;
843 }
844
845 if (!check_valid_pointer(s, page, object)) {
846 object_err(s, page, object, "Freelist Pointer check fails");
847 goto bad;
848 }
849
850 if (object && !check_object(s, page, object, 0))
851 goto bad;
852
853
854 if (s->flags & SLAB_STORE_USER)
855 set_track(s, object, TRACK_ALLOC, addr);
856 trace(s, page, object, 1);
857 init_object(s, object, 1);
858 return 1;
859
860bad:
861 if (PageSlab(page)) {
862
863
864
865
866
867 slab_fix(s, "Marking all objects used");
868 page->inuse = s->objects;
869 page->freelist = NULL;
870 }
871 return 0;
872}
873
874static int free_debug_processing(struct kmem_cache *s, struct page *page,
875 void *object, void *addr)
876{
877 if (!check_slab(s, page))
878 goto fail;
879
880 if (!check_valid_pointer(s, page, object)) {
881 slab_err(s, page, "Invalid object pointer 0x%p", object);
882 goto fail;
883 }
884
885 if (on_freelist(s, page, object)) {
886 object_err(s, page, object, "Object already free");
887 goto fail;
888 }
889
890 if (!check_object(s, page, object, 1))
891 return 0;
892
893 if (unlikely(s != page->slab)) {
894 if (!PageSlab(page))
895 slab_err(s, page, "Attempt to free object(0x%p) "
896 "outside of slab", object);
897 else
898 if (!page->slab) {
899 printk(KERN_ERR
900 "SLUB <none>: no slab for object 0x%p.\n",
901 object);
902 dump_stack();
903 }
904 else
905 object_err(s, page, object,
906 "page slab pointer corrupt.");
907 goto fail;
908 }
909
910
911 if (!SlabFrozen(page) && !page->freelist)
912 remove_full(s, page);
913 if (s->flags & SLAB_STORE_USER)
914 set_track(s, object, TRACK_FREE, addr);
915 trace(s, page, object, 0);
916 init_object(s, object, 0);
917 return 1;
918
919fail:
920 slab_fix(s, "Object at 0x%p not freed", object);
921 return 0;
922}
923
924static int __init setup_slub_debug(char *str)
925{
926 slub_debug = DEBUG_DEFAULT_FLAGS;
927 if (*str++ != '=' || !*str)
928
929
930
931 goto out;
932
933 if (*str == ',')
934
935
936
937
938 goto check_slabs;
939
940 slub_debug = 0;
941 if (*str == '-')
942
943
944
945 goto out;
946
947
948
949
950 for ( ;*str && *str != ','; str++) {
951 switch (tolower(*str)) {
952 case 'f':
953 slub_debug |= SLAB_DEBUG_FREE;
954 break;
955 case 'z':
956 slub_debug |= SLAB_RED_ZONE;
957 break;
958 case 'p':
959 slub_debug |= SLAB_POISON;
960 break;
961 case 'u':
962 slub_debug |= SLAB_STORE_USER;
963 break;
964 case 't':
965 slub_debug |= SLAB_TRACE;
966 break;
967 default:
968 printk(KERN_ERR "slub_debug option '%c' "
969 "unknown. skipped\n",*str);
970 }
971 }
972
973check_slabs:
974 if (*str == ',')
975 slub_debug_slabs = str + 1;
976out:
977 return 1;
978}
979
980__setup("slub_debug", setup_slub_debug);
981
982static unsigned long kmem_cache_flags(unsigned long objsize,
983 unsigned long flags, const char *name,
984 void (*ctor)(struct kmem_cache *, void *))
985{
986
987
988
989
990
991
992
993
994
995
996
997
998 if (objsize >= 65535 * sizeof(void *)) {
999 BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON |
1000 SLAB_STORE_USER | SLAB_DESTROY_BY_RCU));
1001 BUG_ON(ctor);
1002 } else {
1003
1004
1005
1006 if (slub_debug && (!slub_debug_slabs ||
1007 strncmp(slub_debug_slabs, name,
1008 strlen(slub_debug_slabs)) == 0))
1009 flags |= slub_debug;
1010 }
1011
1012 return flags;
1013}
1014#else
1015static inline void setup_object_debug(struct kmem_cache *s,
1016 struct page *page, void *object) {}
1017
1018static inline int alloc_debug_processing(struct kmem_cache *s,
1019 struct page *page, void *object, void *addr) { return 0; }
1020
1021static inline int free_debug_processing(struct kmem_cache *s,
1022 struct page *page, void *object, void *addr) { return 0; }
1023
1024static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1025 { return 1; }
1026static inline int check_object(struct kmem_cache *s, struct page *page,
1027 void *object, int active) { return 1; }
1028static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
1029static inline unsigned long kmem_cache_flags(unsigned long objsize,
1030 unsigned long flags, const char *name,
1031 void (*ctor)(struct kmem_cache *, void *))
1032{
1033 return flags;
1034}
1035#define slub_debug 0
1036#endif
1037
1038
1039
1040static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1041{
1042 struct page * page;
1043 int pages = 1 << s->order;
1044
1045 if (s->order)
1046 flags |= __GFP_COMP;
1047
1048 if (s->flags & SLAB_CACHE_DMA)
1049 flags |= SLUB_DMA;
1050
1051 if (s->flags & SLAB_RECLAIM_ACCOUNT)
1052 flags |= __GFP_RECLAIMABLE;
1053
1054 if (node == -1)
1055 page = alloc_pages(flags, s->order);
1056 else
1057 page = alloc_pages_node(node, flags, s->order);
1058
1059 if (!page)
1060 return NULL;
1061
1062 mod_zone_page_state(page_zone(page),
1063 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1064 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1065 pages);
1066
1067 return page;
1068}
1069
1070static void setup_object(struct kmem_cache *s, struct page *page,
1071 void *object)
1072{
1073 setup_object_debug(s, page, object);
1074 if (unlikely(s->ctor))
1075 s->ctor(s, object);
1076}
1077
1078static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1079{
1080 struct page *page;
1081 struct kmem_cache_node *n;
1082 void *start;
1083 void *last;
1084 void *p;
1085
1086 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1087
1088 page = allocate_slab(s,
1089 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1090 if (!page)
1091 goto out;
1092
1093 n = get_node(s, page_to_nid(page));
1094 if (n)
1095 atomic_long_inc(&n->nr_slabs);
1096 page->slab = s;
1097 page->flags |= 1 << PG_slab;
1098 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
1099 SLAB_STORE_USER | SLAB_TRACE))
1100 SetSlabDebug(page);
1101
1102 start = page_address(page);
1103
1104 if (unlikely(s->flags & SLAB_POISON))
1105 memset(start, POISON_INUSE, PAGE_SIZE << s->order);
1106
1107 last = start;
1108 for_each_object(p, s, start) {
1109 setup_object(s, page, last);
1110 set_freepointer(s, last, p);
1111 last = p;
1112 }
1113 setup_object(s, page, last);
1114 set_freepointer(s, last, NULL);
1115
1116 page->freelist = start;
1117 page->inuse = 0;
1118out:
1119 return page;
1120}
1121
1122static void __free_slab(struct kmem_cache *s, struct page *page)
1123{
1124 int pages = 1 << s->order;
1125
1126 if (unlikely(SlabDebug(page))) {
1127 void *p;
1128
1129 slab_pad_check(s, page);
1130 for_each_object(p, s, page_address(page))
1131 check_object(s, page, p, 0);
1132 ClearSlabDebug(page);
1133 }
1134
1135 mod_zone_page_state(page_zone(page),
1136 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1137 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1138 - pages);
1139
1140 __free_pages(page, s->order);
1141}
1142
1143static void rcu_free_slab(struct rcu_head *h)
1144{
1145 struct page *page;
1146
1147 page = container_of((struct list_head *)h, struct page, lru);
1148 __free_slab(page->slab, page);
1149}
1150
1151static void free_slab(struct kmem_cache *s, struct page *page)
1152{
1153 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1154
1155
1156
1157 struct rcu_head *head = (void *)&page->lru;
1158
1159 call_rcu(head, rcu_free_slab);
1160 } else
1161 __free_slab(s, page);
1162}
1163
1164static void discard_slab(struct kmem_cache *s, struct page *page)
1165{
1166 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1167
1168 atomic_long_dec(&n->nr_slabs);
1169 reset_page_mapcount(page);
1170 __ClearPageSlab(page);
1171 free_slab(s, page);
1172}
1173
1174
1175
1176
1177static __always_inline void slab_lock(struct page *page)
1178{
1179 bit_spin_lock(PG_locked, &page->flags);
1180}
1181
1182static __always_inline void slab_unlock(struct page *page)
1183{
1184 bit_spin_unlock(PG_locked, &page->flags);
1185}
1186
1187static __always_inline int slab_trylock(struct page *page)
1188{
1189 int rc = 1;
1190
1191 rc = bit_spin_trylock(PG_locked, &page->flags);
1192 return rc;
1193}
1194
1195
1196
1197
1198static void add_partial_tail(struct kmem_cache_node *n, struct page *page)
1199{
1200 spin_lock(&n->list_lock);
1201 n->nr_partial++;
1202 list_add_tail(&page->lru, &n->partial);
1203 spin_unlock(&n->list_lock);
1204}
1205
1206static void add_partial(struct kmem_cache_node *n, struct page *page)
1207{
1208 spin_lock(&n->list_lock);
1209 n->nr_partial++;
1210 list_add(&page->lru, &n->partial);
1211 spin_unlock(&n->list_lock);
1212}
1213
1214static void remove_partial(struct kmem_cache *s,
1215 struct page *page)
1216{
1217 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1218
1219 spin_lock(&n->list_lock);
1220 list_del(&page->lru);
1221 n->nr_partial--;
1222 spin_unlock(&n->list_lock);
1223}
1224
1225
1226
1227
1228
1229
1230static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page)
1231{
1232 if (slab_trylock(page)) {
1233 list_del(&page->lru);
1234 n->nr_partial--;
1235 SetSlabFrozen(page);
1236 return 1;
1237 }
1238 return 0;
1239}
1240
1241
1242
1243
1244static struct page *get_partial_node(struct kmem_cache_node *n)
1245{
1246 struct page *page;
1247
1248
1249
1250
1251
1252
1253
1254 if (!n || !n->nr_partial)
1255 return NULL;
1256
1257 spin_lock(&n->list_lock);
1258 list_for_each_entry(page, &n->partial, lru)
1259 if (lock_and_freeze_slab(n, page))
1260 goto out;
1261 page = NULL;
1262out:
1263 spin_unlock(&n->list_lock);
1264 return page;
1265}
1266
1267
1268
1269
1270static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1271{
1272#ifdef CONFIG_NUMA
1273 struct zonelist *zonelist;
1274 struct zone **z;
1275 struct page *page;
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295 if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio)
1296 return NULL;
1297
1298 zonelist = &NODE_DATA(slab_node(current->mempolicy))
1299 ->node_zonelists[gfp_zone(flags)];
1300 for (z = zonelist->zones; *z; z++) {
1301 struct kmem_cache_node *n;
1302
1303 n = get_node(s, zone_to_nid(*z));
1304
1305 if (n && cpuset_zone_allowed_hardwall(*z, flags) &&
1306 n->nr_partial > MIN_PARTIAL) {
1307 page = get_partial_node(n);
1308 if (page)
1309 return page;
1310 }
1311 }
1312#endif
1313 return NULL;
1314}
1315
1316
1317
1318
1319static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1320{
1321 struct page *page;
1322 int searchnode = (node == -1) ? numa_node_id() : node;
1323
1324 page = get_partial_node(get_node(s, searchnode));
1325 if (page || (flags & __GFP_THISNODE))
1326 return page;
1327
1328 return get_any_partial(s, flags);
1329}
1330
1331
1332
1333
1334
1335
1336
1337
1338static void unfreeze_slab(struct kmem_cache *s, struct page *page)
1339{
1340 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1341
1342 ClearSlabFrozen(page);
1343 if (page->inuse) {
1344
1345 if (page->freelist)
1346 add_partial(n, page);
1347 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
1348 add_full(n, page);
1349 slab_unlock(page);
1350
1351 } else {
1352 if (n->nr_partial < MIN_PARTIAL) {
1353
1354
1355
1356
1357
1358
1359
1360
1361 add_partial_tail(n, page);
1362 slab_unlock(page);
1363 } else {
1364 slab_unlock(page);
1365 discard_slab(s, page);
1366 }
1367 }
1368}
1369
1370
1371
1372
1373static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1374{
1375 struct page *page = c->page;
1376
1377
1378
1379
1380
1381 while (unlikely(c->freelist)) {
1382 void **object;
1383
1384
1385 object = c->freelist;
1386 c->freelist = c->freelist[c->offset];
1387
1388
1389 object[c->offset] = page->freelist;
1390 page->freelist = object;
1391 page->inuse--;
1392 }
1393 c->page = NULL;
1394 unfreeze_slab(s, page);
1395}
1396
1397static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1398{
1399 slab_lock(c->page);
1400 deactivate_slab(s, c);
1401}
1402
1403
1404
1405
1406
1407static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1408{
1409 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1410
1411 if (likely(c && c->page))
1412 flush_slab(s, c);
1413}
1414
1415static void flush_cpu_slab(void *d)
1416{
1417 struct kmem_cache *s = d;
1418
1419 __flush_cpu_slab(s, smp_processor_id());
1420}
1421
1422static void flush_all(struct kmem_cache *s)
1423{
1424#ifdef CONFIG_SMP
1425 on_each_cpu(flush_cpu_slab, s, 1, 1);
1426#else
1427 unsigned long flags;
1428
1429 local_irq_save(flags);
1430 flush_cpu_slab(s);
1431 local_irq_restore(flags);
1432#endif
1433}
1434
1435
1436
1437
1438
1439static inline int node_match(struct kmem_cache_cpu *c, int node)
1440{
1441#ifdef CONFIG_NUMA
1442 if (node != -1 && c->node != node)
1443 return 0;
1444#endif
1445 return 1;
1446}
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465static void *__slab_alloc(struct kmem_cache *s,
1466 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
1467{
1468 void **object;
1469 struct page *new;
1470
1471 if (!c->page)
1472 goto new_slab;
1473
1474 slab_lock(c->page);
1475 if (unlikely(!node_match(c, node)))
1476 goto another_slab;
1477load_freelist:
1478 object = c->page->freelist;
1479 if (unlikely(!object))
1480 goto another_slab;
1481 if (unlikely(SlabDebug(c->page)))
1482 goto debug;
1483
1484 object = c->page->freelist;
1485 c->freelist = object[c->offset];
1486 c->page->inuse = s->objects;
1487 c->page->freelist = NULL;
1488 c->node = page_to_nid(c->page);
1489 slab_unlock(c->page);
1490 return object;
1491
1492another_slab:
1493 deactivate_slab(s, c);
1494
1495new_slab:
1496 new = get_partial(s, gfpflags, node);
1497 if (new) {
1498 c->page = new;
1499 goto load_freelist;
1500 }
1501
1502 if (gfpflags & __GFP_WAIT)
1503 local_irq_enable();
1504
1505 new = new_slab(s, gfpflags, node);
1506
1507 if (gfpflags & __GFP_WAIT)
1508 local_irq_disable();
1509
1510 if (new) {
1511 c = get_cpu_slab(s, smp_processor_id());
1512 if (c->page)
1513 flush_slab(s, c);
1514 slab_lock(new);
1515 SetSlabFrozen(new);
1516 c->page = new;
1517 goto load_freelist;
1518 }
1519 return NULL;
1520debug:
1521 object = c->page->freelist;
1522 if (!alloc_debug_processing(s, c->page, object, addr))
1523 goto another_slab;
1524
1525 c->page->inuse++;
1526 c->page->freelist = object[c->offset];
1527 c->node = -1;
1528 slab_unlock(c->page);
1529 return object;
1530}
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542static void __always_inline *slab_alloc(struct kmem_cache *s,
1543 gfp_t gfpflags, int node, void *addr)
1544{
1545 void **object;
1546 unsigned long flags;
1547 struct kmem_cache_cpu *c;
1548
1549 local_irq_save(flags);
1550 c = get_cpu_slab(s, smp_processor_id());
1551 if (unlikely(!c->freelist || !node_match(c, node)))
1552
1553 object = __slab_alloc(s, gfpflags, node, addr, c);
1554
1555 else {
1556 object = c->freelist;
1557 c->freelist = object[c->offset];
1558 }
1559 local_irq_restore(flags);
1560
1561 if (unlikely((gfpflags & __GFP_ZERO) && object))
1562 memset(object, 0, c->objsize);
1563
1564 return object;
1565}
1566
1567void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1568{
1569 return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
1570}
1571EXPORT_SYMBOL(kmem_cache_alloc);
1572
1573#ifdef CONFIG_NUMA
1574void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1575{
1576 return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
1577}
1578EXPORT_SYMBOL(kmem_cache_alloc_node);
1579#endif
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589static void __slab_free(struct kmem_cache *s, struct page *page,
1590 void *x, void *addr, unsigned int offset)
1591{
1592 void *prior;
1593 void **object = (void *)x;
1594
1595 slab_lock(page);
1596
1597 if (unlikely(SlabDebug(page)))
1598 goto debug;
1599checks_ok:
1600 prior = object[offset] = page->freelist;
1601 page->freelist = object;
1602 page->inuse--;
1603
1604 if (unlikely(SlabFrozen(page)))
1605 goto out_unlock;
1606
1607 if (unlikely(!page->inuse))
1608 goto slab_empty;
1609
1610
1611
1612
1613
1614
1615 if (unlikely(!prior))
1616 add_partial_tail(get_node(s, page_to_nid(page)), page);
1617
1618out_unlock:
1619 slab_unlock(page);
1620 return;
1621
1622slab_empty:
1623 if (prior)
1624
1625
1626
1627 remove_partial(s, page);
1628
1629 slab_unlock(page);
1630 discard_slab(s, page);
1631 return;
1632
1633debug:
1634 if (!free_debug_processing(s, page, x, addr))
1635 goto out_unlock;
1636 goto checks_ok;
1637}
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650static void __always_inline slab_free(struct kmem_cache *s,
1651 struct page *page, void *x, void *addr)
1652{
1653 void **object = (void *)x;
1654 unsigned long flags;
1655 struct kmem_cache_cpu *c;
1656
1657 local_irq_save(flags);
1658 debug_check_no_locks_freed(object, s->objsize);
1659 c = get_cpu_slab(s, smp_processor_id());
1660 if (likely(page == c->page && c->node >= 0)) {
1661 object[c->offset] = c->freelist;
1662 c->freelist = object;
1663 } else
1664 __slab_free(s, page, x, addr, c->offset);
1665
1666 local_irq_restore(flags);
1667}
1668
1669void kmem_cache_free(struct kmem_cache *s, void *x)
1670{
1671 struct page *page;
1672
1673 page = virt_to_head_page(x);
1674
1675 slab_free(s, page, x, __builtin_return_address(0));
1676}
1677EXPORT_SYMBOL(kmem_cache_free);
1678
1679
1680static struct page *get_object_page(const void *x)
1681{
1682 struct page *page = virt_to_head_page(x);
1683
1684 if (!PageSlab(page))
1685 return NULL;
1686
1687 return page;
1688}
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709static int slub_min_order;
1710static int slub_max_order = DEFAULT_MAX_ORDER;
1711static int slub_min_objects = DEFAULT_MIN_OBJECTS;
1712
1713
1714
1715
1716
1717static int slub_nomerge;
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744static inline int slab_order(int size, int min_objects,
1745 int max_order, int fract_leftover)
1746{
1747 int order;
1748 int rem;
1749 int min_order = slub_min_order;
1750
1751 for (order = max(min_order,
1752 fls(min_objects * size - 1) - PAGE_SHIFT);
1753 order <= max_order; order++) {
1754
1755 unsigned long slab_size = PAGE_SIZE << order;
1756
1757 if (slab_size < min_objects * size)
1758 continue;
1759
1760 rem = slab_size % size;
1761
1762 if (rem <= slab_size / fract_leftover)
1763 break;
1764
1765 }
1766
1767 return order;
1768}
1769
1770static inline int calculate_order(int size)
1771{
1772 int order;
1773 int min_objects;
1774 int fraction;
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784 min_objects = slub_min_objects;
1785 while (min_objects > 1) {
1786 fraction = 8;
1787 while (fraction >= 4) {
1788 order = slab_order(size, min_objects,
1789 slub_max_order, fraction);
1790 if (order <= slub_max_order)
1791 return order;
1792 fraction /= 2;
1793 }
1794 min_objects /= 2;
1795 }
1796
1797
1798
1799
1800
1801 order = slab_order(size, 1, slub_max_order, 1);
1802 if (order <= slub_max_order)
1803 return order;
1804
1805
1806
1807
1808 order = slab_order(size, 1, MAX_ORDER, 1);
1809 if (order <= MAX_ORDER)
1810 return order;
1811 return -ENOSYS;
1812}
1813
1814
1815
1816
1817static unsigned long calculate_alignment(unsigned long flags,
1818 unsigned long align, unsigned long size)
1819{
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829 if ((flags & SLAB_HWCACHE_ALIGN) &&
1830 size > cache_line_size() / 2)
1831 return max_t(unsigned long, align, cache_line_size());
1832
1833 if (align < ARCH_SLAB_MINALIGN)
1834 return ARCH_SLAB_MINALIGN;
1835
1836 return ALIGN(align, sizeof(void *));
1837}
1838
1839static void init_kmem_cache_cpu(struct kmem_cache *s,
1840 struct kmem_cache_cpu *c)
1841{
1842 c->page = NULL;
1843 c->freelist = NULL;
1844 c->node = 0;
1845 c->offset = s->offset / sizeof(void *);
1846 c->objsize = s->objsize;
1847}
1848
1849static void init_kmem_cache_node(struct kmem_cache_node *n)
1850{
1851 n->nr_partial = 0;
1852 atomic_long_set(&n->nr_slabs, 0);
1853 spin_lock_init(&n->list_lock);
1854 INIT_LIST_HEAD(&n->partial);
1855#ifdef CONFIG_SLUB_DEBUG
1856 INIT_LIST_HEAD(&n->full);
1857#endif
1858}
1859
1860#ifdef CONFIG_SMP
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876#define NR_KMEM_CACHE_CPU 100
1877
1878static DEFINE_PER_CPU(struct kmem_cache_cpu,
1879 kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
1880
1881static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
1882static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
1883
1884static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
1885 int cpu, gfp_t flags)
1886{
1887 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
1888
1889 if (c)
1890 per_cpu(kmem_cache_cpu_free, cpu) =
1891 (void *)c->freelist;
1892 else {
1893
1894 c = kmalloc_node(
1895 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
1896 flags, cpu_to_node(cpu));
1897 if (!c)
1898 return NULL;
1899 }
1900
1901 init_kmem_cache_cpu(s, c);
1902 return c;
1903}
1904
1905static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
1906{
1907 if (c < per_cpu(kmem_cache_cpu, cpu) ||
1908 c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
1909 kfree(c);
1910 return;
1911 }
1912 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
1913 per_cpu(kmem_cache_cpu_free, cpu) = c;
1914}
1915
1916static void free_kmem_cache_cpus(struct kmem_cache *s)
1917{
1918 int cpu;
1919
1920 for_each_online_cpu(cpu) {
1921 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1922
1923 if (c) {
1924 s->cpu_slab[cpu] = NULL;
1925 free_kmem_cache_cpu(c, cpu);
1926 }
1927 }
1928}
1929
1930static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1931{
1932 int cpu;
1933
1934 for_each_online_cpu(cpu) {
1935 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
1936
1937 if (c)
1938 continue;
1939
1940 c = alloc_kmem_cache_cpu(s, cpu, flags);
1941 if (!c) {
1942 free_kmem_cache_cpus(s);
1943 return 0;
1944 }
1945 s->cpu_slab[cpu] = c;
1946 }
1947 return 1;
1948}
1949
1950
1951
1952
1953static void init_alloc_cpu_cpu(int cpu)
1954{
1955 int i;
1956
1957 if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
1958 return;
1959
1960 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
1961 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
1962
1963 cpu_set(cpu, kmem_cach_cpu_free_init_once);
1964}
1965
1966static void __init init_alloc_cpu(void)
1967{
1968 int cpu;
1969
1970 for_each_online_cpu(cpu)
1971 init_alloc_cpu_cpu(cpu);
1972 }
1973
1974#else
1975static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
1976static inline void init_alloc_cpu(void) {}
1977
1978static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
1979{
1980 init_kmem_cache_cpu(s, &s->cpu_slab);
1981 return 1;
1982}
1983#endif
1984
1985#ifdef CONFIG_NUMA
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
1996 int node)
1997{
1998 struct page *page;
1999 struct kmem_cache_node *n;
2000
2001 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
2002
2003 page = new_slab(kmalloc_caches, gfpflags, node);
2004
2005 BUG_ON(!page);
2006 if (page_to_nid(page) != node) {
2007 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2008 "node %d\n", node);
2009 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2010 "in order to be able to continue\n");
2011 }
2012
2013 n = page->freelist;
2014 BUG_ON(!n);
2015 page->freelist = get_freepointer(kmalloc_caches, n);
2016 page->inuse++;
2017 kmalloc_caches->node[node] = n;
2018#ifdef CONFIG_SLUB_DEBUG
2019 init_object(kmalloc_caches, n, 1);
2020 init_tracking(kmalloc_caches, n);
2021#endif
2022 init_kmem_cache_node(n);
2023 atomic_long_inc(&n->nr_slabs);
2024 add_partial(n, page);
2025 return n;
2026}
2027
2028static void free_kmem_cache_nodes(struct kmem_cache *s)
2029{
2030 int node;
2031
2032 for_each_node_state(node, N_NORMAL_MEMORY) {
2033 struct kmem_cache_node *n = s->node[node];
2034 if (n && n != &s->local_node)
2035 kmem_cache_free(kmalloc_caches, n);
2036 s->node[node] = NULL;
2037 }
2038}
2039
2040static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2041{
2042 int node;
2043 int local_node;
2044
2045 if (slab_state >= UP)
2046 local_node = page_to_nid(virt_to_page(s));
2047 else
2048 local_node = 0;
2049
2050 for_each_node_state(node, N_NORMAL_MEMORY) {
2051 struct kmem_cache_node *n;
2052
2053 if (local_node == node)
2054 n = &s->local_node;
2055 else {
2056 if (slab_state == DOWN) {
2057 n = early_kmem_cache_node_alloc(gfpflags,
2058 node);
2059 continue;
2060 }
2061 n = kmem_cache_alloc_node(kmalloc_caches,
2062 gfpflags, node);
2063
2064 if (!n) {
2065 free_kmem_cache_nodes(s);
2066 return 0;
2067 }
2068
2069 }
2070 s->node[node] = n;
2071 init_kmem_cache_node(n);
2072 }
2073 return 1;
2074}
2075#else
2076static void free_kmem_cache_nodes(struct kmem_cache *s)
2077{
2078}
2079
2080static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2081{
2082 init_kmem_cache_node(&s->local_node);
2083 return 1;
2084}
2085#endif
2086
2087
2088
2089
2090
2091static int calculate_sizes(struct kmem_cache *s)
2092{
2093 unsigned long flags = s->flags;
2094 unsigned long size = s->objsize;
2095 unsigned long align = s->align;
2096
2097
2098
2099
2100
2101
2102 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2103 !s->ctor)
2104 s->flags |= __OBJECT_POISON;
2105 else
2106 s->flags &= ~__OBJECT_POISON;
2107
2108
2109
2110
2111
2112
2113 size = ALIGN(size, sizeof(void *));
2114
2115#ifdef CONFIG_SLUB_DEBUG
2116
2117
2118
2119
2120
2121 if ((flags & SLAB_RED_ZONE) && size == s->objsize)
2122 size += sizeof(void *);
2123#endif
2124
2125
2126
2127
2128
2129 s->inuse = size;
2130
2131 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
2132 s->ctor)) {
2133
2134
2135
2136
2137
2138
2139
2140
2141 s->offset = size;
2142 size += sizeof(void *);
2143 }
2144
2145#ifdef CONFIG_SLUB_DEBUG
2146 if (flags & SLAB_STORE_USER)
2147
2148
2149
2150
2151 size += 2 * sizeof(struct track);
2152
2153 if (flags & SLAB_RED_ZONE)
2154
2155
2156
2157
2158
2159
2160
2161 size += sizeof(void *);
2162#endif
2163
2164
2165
2166
2167
2168
2169 align = calculate_alignment(flags, align, s->objsize);
2170
2171
2172
2173
2174
2175
2176 size = ALIGN(size, align);
2177 s->size = size;
2178
2179 s->order = calculate_order(size);
2180 if (s->order < 0)
2181 return 0;
2182
2183
2184
2185
2186 s->objects = (PAGE_SIZE << s->order) / size;
2187
2188 return !!s->objects;
2189
2190}
2191
2192static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2193 const char *name, size_t size,
2194 size_t align, unsigned long flags,
2195 void (*ctor)(struct kmem_cache *, void *))
2196{
2197 memset(s, 0, kmem_size);
2198 s->name = name;
2199 s->ctor = ctor;
2200 s->objsize = size;
2201 s->align = align;
2202 s->flags = kmem_cache_flags(size, flags, name, ctor);
2203
2204 if (!calculate_sizes(s))
2205 goto error;
2206
2207 s->refcount = 1;
2208#ifdef CONFIG_NUMA
2209 s->defrag_ratio = 100;
2210#endif
2211 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2212 goto error;
2213
2214 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2215 return 1;
2216 free_kmem_cache_nodes(s);
2217error:
2218 if (flags & SLAB_PANIC)
2219 panic("Cannot create slab %s size=%lu realsize=%u "
2220 "order=%u offset=%u flags=%lx\n",
2221 s->name, (unsigned long)size, s->size, s->order,
2222 s->offset, flags);
2223 return 0;
2224}
2225
2226
2227
2228
2229int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2230{
2231 struct page * page;
2232
2233 page = get_object_page(object);
2234
2235 if (!page || s != page->slab)
2236
2237 return 0;
2238
2239 if (!check_valid_pointer(s, page, object))
2240 return 0;
2241
2242
2243
2244
2245
2246
2247
2248 return 1;
2249}
2250EXPORT_SYMBOL(kmem_ptr_validate);
2251
2252
2253
2254
2255unsigned int kmem_cache_size(struct kmem_cache *s)
2256{
2257 return s->objsize;
2258}
2259EXPORT_SYMBOL(kmem_cache_size);
2260
2261const char *kmem_cache_name(struct kmem_cache *s)
2262{
2263 return s->name;
2264}
2265EXPORT_SYMBOL(kmem_cache_name);
2266
2267
2268
2269
2270
2271static int free_list(struct kmem_cache *s, struct kmem_cache_node *n,
2272 struct list_head *list)
2273{
2274 int slabs_inuse = 0;
2275 unsigned long flags;
2276 struct page *page, *h;
2277
2278 spin_lock_irqsave(&n->list_lock, flags);
2279 list_for_each_entry_safe(page, h, list, lru)
2280 if (!page->inuse) {
2281 list_del(&page->lru);
2282 discard_slab(s, page);
2283 } else
2284 slabs_inuse++;
2285 spin_unlock_irqrestore(&n->list_lock, flags);
2286 return slabs_inuse;
2287}
2288
2289
2290
2291
2292static inline int kmem_cache_close(struct kmem_cache *s)
2293{
2294 int node;
2295
2296 flush_all(s);
2297
2298
2299 free_kmem_cache_cpus(s);
2300 for_each_node_state(node, N_NORMAL_MEMORY) {
2301 struct kmem_cache_node *n = get_node(s, node);
2302
2303 n->nr_partial -= free_list(s, n, &n->partial);
2304 if (atomic_long_read(&n->nr_slabs))
2305 return 1;
2306 }
2307 free_kmem_cache_nodes(s);
2308 return 0;
2309}
2310
2311
2312
2313
2314
2315void kmem_cache_destroy(struct kmem_cache *s)
2316{
2317 down_write(&slub_lock);
2318 s->refcount--;
2319 if (!s->refcount) {
2320 list_del(&s->list);
2321 up_write(&slub_lock);
2322 if (kmem_cache_close(s))
2323 WARN_ON(1);
2324 sysfs_slab_remove(s);
2325 kfree(s);
2326 } else
2327 up_write(&slub_lock);
2328}
2329EXPORT_SYMBOL(kmem_cache_destroy);
2330
2331
2332
2333
2334
2335struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned;
2336EXPORT_SYMBOL(kmalloc_caches);
2337
2338#ifdef CONFIG_ZONE_DMA
2339static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT];
2340#endif
2341
2342static int __init setup_slub_min_order(char *str)
2343{
2344 get_option (&str, &slub_min_order);
2345
2346 return 1;
2347}
2348
2349__setup("slub_min_order=", setup_slub_min_order);
2350
2351static int __init setup_slub_max_order(char *str)
2352{
2353 get_option (&str, &slub_max_order);
2354
2355 return 1;
2356}
2357
2358__setup("slub_max_order=", setup_slub_max_order);
2359
2360static int __init setup_slub_min_objects(char *str)
2361{
2362 get_option (&str, &slub_min_objects);
2363
2364 return 1;
2365}
2366
2367__setup("slub_min_objects=", setup_slub_min_objects);
2368
2369static int __init setup_slub_nomerge(char *str)
2370{
2371 slub_nomerge = 1;
2372 return 1;
2373}
2374
2375__setup("slub_nomerge", setup_slub_nomerge);
2376
2377static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2378 const char *name, int size, gfp_t gfp_flags)
2379{
2380 unsigned int flags = 0;
2381
2382 if (gfp_flags & SLUB_DMA)
2383 flags = SLAB_CACHE_DMA;
2384
2385 down_write(&slub_lock);
2386 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2387 flags, NULL))
2388 goto panic;
2389
2390 list_add(&s->list, &slab_caches);
2391 up_write(&slub_lock);
2392 if (sysfs_slab_add(s))
2393 goto panic;
2394 return s;
2395
2396panic:
2397 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
2398}
2399
2400#ifdef CONFIG_ZONE_DMA
2401
2402static void sysfs_add_func(struct work_struct *w)
2403{
2404 struct kmem_cache *s;
2405
2406 down_write(&slub_lock);
2407 list_for_each_entry(s, &slab_caches, list) {
2408 if (s->flags & __SYSFS_ADD_DEFERRED) {
2409 s->flags &= ~__SYSFS_ADD_DEFERRED;
2410 sysfs_slab_add(s);
2411 }
2412 }
2413 up_write(&slub_lock);
2414}
2415
2416static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
2417
2418static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2419{
2420 struct kmem_cache *s;
2421 char *text;
2422 size_t realsize;
2423
2424 s = kmalloc_caches_dma[index];
2425 if (s)
2426 return s;
2427
2428
2429 if (flags & __GFP_WAIT)
2430 down_write(&slub_lock);
2431 else {
2432 if (!down_write_trylock(&slub_lock))
2433 goto out;
2434 }
2435
2436 if (kmalloc_caches_dma[index])
2437 goto unlock_out;
2438
2439 realsize = kmalloc_caches[index].objsize;
2440 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize),
2441 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2442
2443 if (!s || !text || !kmem_cache_open(s, flags, text,
2444 realsize, ARCH_KMALLOC_MINALIGN,
2445 SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) {
2446 kfree(s);
2447 kfree(text);
2448 goto unlock_out;
2449 }
2450
2451 list_add(&s->list, &slab_caches);
2452 kmalloc_caches_dma[index] = s;
2453
2454 schedule_work(&sysfs_add_work);
2455
2456unlock_out:
2457 up_write(&slub_lock);
2458out:
2459 return kmalloc_caches_dma[index];
2460}
2461#endif
2462
2463
2464
2465
2466
2467
2468
2469static s8 size_index[24] = {
2470 3,
2471 4,
2472 5,
2473 5,
2474 6,
2475 6,
2476 6,
2477 6,
2478 1,
2479 1,
2480 1,
2481 1,
2482 7,
2483 7,
2484 7,
2485 7,
2486 2,
2487 2,
2488 2,
2489 2,
2490 2,
2491 2,
2492 2,
2493 2
2494};
2495
2496static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2497{
2498 int index;
2499
2500 if (size <= 192) {
2501 if (!size)
2502 return ZERO_SIZE_PTR;
2503
2504 index = size_index[(size - 1) / 8];
2505 } else
2506 index = fls(size - 1);
2507
2508#ifdef CONFIG_ZONE_DMA
2509 if (unlikely((flags & SLUB_DMA)))
2510 return dma_kmalloc_cache(index, flags);
2511
2512#endif
2513 return &kmalloc_caches[index];
2514}
2515
2516void *__kmalloc(size_t size, gfp_t flags)
2517{
2518 struct kmem_cache *s;
2519
2520 if (unlikely(size > PAGE_SIZE / 2))
2521 return (void *)__get_free_pages(flags | __GFP_COMP,
2522 get_order(size));
2523
2524 s = get_slab(size, flags);
2525
2526 if (unlikely(ZERO_OR_NULL_PTR(s)))
2527 return s;
2528
2529 return slab_alloc(s, flags, -1, __builtin_return_address(0));
2530}
2531EXPORT_SYMBOL(__kmalloc);
2532
2533#ifdef CONFIG_NUMA
2534void *__kmalloc_node(size_t size, gfp_t flags, int node)
2535{
2536 struct kmem_cache *s;
2537
2538 if (unlikely(size > PAGE_SIZE / 2))
2539 return (void *)__get_free_pages(flags | __GFP_COMP,
2540 get_order(size));
2541
2542 s = get_slab(size, flags);
2543
2544 if (unlikely(ZERO_OR_NULL_PTR(s)))
2545 return s;
2546
2547 return slab_alloc(s, flags, node, __builtin_return_address(0));
2548}
2549EXPORT_SYMBOL(__kmalloc_node);
2550#endif
2551
2552size_t ksize(const void *object)
2553{
2554 struct page *page;
2555 struct kmem_cache *s;
2556
2557 BUG_ON(!object);
2558 if (unlikely(object == ZERO_SIZE_PTR))
2559 return 0;
2560
2561 page = virt_to_head_page(object);
2562 BUG_ON(!page);
2563
2564 if (unlikely(!PageSlab(page)))
2565 return PAGE_SIZE << compound_order(page);
2566
2567 s = page->slab;
2568 BUG_ON(!s);
2569
2570
2571
2572
2573
2574 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2575 return s->objsize;
2576
2577
2578
2579
2580
2581
2582 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2583 return s->inuse;
2584
2585
2586
2587
2588 return s->size;
2589}
2590EXPORT_SYMBOL(ksize);
2591
2592void kfree(const void *x)
2593{
2594 struct page *page;
2595
2596 if (unlikely(ZERO_OR_NULL_PTR(x)))
2597 return;
2598
2599 page = virt_to_head_page(x);
2600 if (unlikely(!PageSlab(page))) {
2601 put_page(page);
2602 return;
2603 }
2604 slab_free(page->slab, page, (void *)x, __builtin_return_address(0));
2605}
2606EXPORT_SYMBOL(kfree);
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618int kmem_cache_shrink(struct kmem_cache *s)
2619{
2620 int node;
2621 int i;
2622 struct kmem_cache_node *n;
2623 struct page *page;
2624 struct page *t;
2625 struct list_head *slabs_by_inuse =
2626 kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL);
2627 unsigned long flags;
2628
2629 if (!slabs_by_inuse)
2630 return -ENOMEM;
2631
2632 flush_all(s);
2633 for_each_node_state(node, N_NORMAL_MEMORY) {
2634 n = get_node(s, node);
2635
2636 if (!n->nr_partial)
2637 continue;
2638
2639 for (i = 0; i < s->objects; i++)
2640 INIT_LIST_HEAD(slabs_by_inuse + i);
2641
2642 spin_lock_irqsave(&n->list_lock, flags);
2643
2644
2645
2646
2647
2648
2649
2650 list_for_each_entry_safe(page, t, &n->partial, lru) {
2651 if (!page->inuse && slab_trylock(page)) {
2652
2653
2654
2655
2656
2657 list_del(&page->lru);
2658 n->nr_partial--;
2659 slab_unlock(page);
2660 discard_slab(s, page);
2661 } else {
2662 list_move(&page->lru,
2663 slabs_by_inuse + page->inuse);
2664 }
2665 }
2666
2667
2668
2669
2670
2671 for (i = s->objects - 1; i >= 0; i--)
2672 list_splice(slabs_by_inuse + i, n->partial.prev);
2673
2674 spin_unlock_irqrestore(&n->list_lock, flags);
2675 }
2676
2677 kfree(slabs_by_inuse);
2678 return 0;
2679}
2680EXPORT_SYMBOL(kmem_cache_shrink);
2681
2682#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
2683static int slab_mem_going_offline_callback(void *arg)
2684{
2685 struct kmem_cache *s;
2686
2687 down_read(&slub_lock);
2688 list_for_each_entry(s, &slab_caches, list)
2689 kmem_cache_shrink(s);
2690 up_read(&slub_lock);
2691
2692 return 0;
2693}
2694
2695static void slab_mem_offline_callback(void *arg)
2696{
2697 struct kmem_cache_node *n;
2698 struct kmem_cache *s;
2699 struct memory_notify *marg = arg;
2700 int offline_node;
2701
2702 offline_node = marg->status_change_nid;
2703
2704
2705
2706
2707
2708 if (offline_node < 0)
2709 return;
2710
2711 down_read(&slub_lock);
2712 list_for_each_entry(s, &slab_caches, list) {
2713 n = get_node(s, offline_node);
2714 if (n) {
2715
2716
2717
2718
2719
2720
2721 BUG_ON(atomic_long_read(&n->nr_slabs));
2722
2723 s->node[offline_node] = NULL;
2724 kmem_cache_free(kmalloc_caches, n);
2725 }
2726 }
2727 up_read(&slub_lock);
2728}
2729
2730static int slab_mem_going_online_callback(void *arg)
2731{
2732 struct kmem_cache_node *n;
2733 struct kmem_cache *s;
2734 struct memory_notify *marg = arg;
2735 int nid = marg->status_change_nid;
2736 int ret = 0;
2737
2738
2739
2740
2741
2742 if (nid < 0)
2743 return 0;
2744
2745
2746
2747
2748
2749
2750 down_read(&slub_lock);
2751 list_for_each_entry(s, &slab_caches, list) {
2752
2753
2754
2755
2756
2757 n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL);
2758 if (!n) {
2759 ret = -ENOMEM;
2760 goto out;
2761 }
2762 init_kmem_cache_node(n);
2763 s->node[nid] = n;
2764 }
2765out:
2766 up_read(&slub_lock);
2767 return ret;
2768}
2769
2770static int slab_memory_callback(struct notifier_block *self,
2771 unsigned long action, void *arg)
2772{
2773 int ret = 0;
2774
2775 switch (action) {
2776 case MEM_GOING_ONLINE:
2777 ret = slab_mem_going_online_callback(arg);
2778 break;
2779 case MEM_GOING_OFFLINE:
2780 ret = slab_mem_going_offline_callback(arg);
2781 break;
2782 case MEM_OFFLINE:
2783 case MEM_CANCEL_ONLINE:
2784 slab_mem_offline_callback(arg);
2785 break;
2786 case MEM_ONLINE:
2787 case MEM_CANCEL_OFFLINE:
2788 break;
2789 }
2790
2791 ret = notifier_from_errno(ret);
2792 return ret;
2793}
2794
2795#endif
2796
2797
2798
2799
2800
2801void __init kmem_cache_init(void)
2802{
2803 int i;
2804 int caches = 0;
2805
2806 init_alloc_cpu();
2807
2808#ifdef CONFIG_NUMA
2809
2810
2811
2812
2813
2814 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
2815 sizeof(struct kmem_cache_node), GFP_KERNEL);
2816 kmalloc_caches[0].refcount = -1;
2817 caches++;
2818
2819 hotplug_memory_notifier(slab_memory_callback, 1);
2820#endif
2821
2822
2823 slab_state = PARTIAL;
2824
2825
2826 if (KMALLOC_MIN_SIZE <= 64) {
2827 create_kmalloc_cache(&kmalloc_caches[1],
2828 "kmalloc-96", 96, GFP_KERNEL);
2829 caches++;
2830 }
2831 if (KMALLOC_MIN_SIZE <= 128) {
2832 create_kmalloc_cache(&kmalloc_caches[2],
2833 "kmalloc-192", 192, GFP_KERNEL);
2834 caches++;
2835 }
2836
2837 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) {
2838 create_kmalloc_cache(&kmalloc_caches[i],
2839 "kmalloc", 1 << i, GFP_KERNEL);
2840 caches++;
2841 }
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
2856 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
2857
2858 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
2859 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
2860
2861 slab_state = UP;
2862
2863
2864 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++)
2865 kmalloc_caches[i]. name =
2866 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
2867
2868#ifdef CONFIG_SMP
2869 register_cpu_notifier(&slab_notifier);
2870 kmem_size = offsetof(struct kmem_cache, cpu_slab) +
2871 nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
2872#else
2873 kmem_size = sizeof(struct kmem_cache);
2874#endif
2875
2876
2877 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2878 " CPUs=%d, Nodes=%d\n",
2879 caches, cache_line_size(),
2880 slub_min_order, slub_max_order, slub_min_objects,
2881 nr_cpu_ids, nr_node_ids);
2882}
2883
2884
2885
2886
2887static int slab_unmergeable(struct kmem_cache *s)
2888{
2889 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
2890 return 1;
2891
2892 if (s->ctor)
2893 return 1;
2894
2895
2896
2897
2898 if (s->refcount < 0)
2899 return 1;
2900
2901 return 0;
2902}
2903
2904static struct kmem_cache *find_mergeable(size_t size,
2905 size_t align, unsigned long flags, const char *name,
2906 void (*ctor)(struct kmem_cache *, void *))
2907{
2908 struct kmem_cache *s;
2909
2910 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
2911 return NULL;
2912
2913 if (ctor)
2914 return NULL;
2915
2916 size = ALIGN(size, sizeof(void *));
2917 align = calculate_alignment(flags, align, size);
2918 size = ALIGN(size, align);
2919 flags = kmem_cache_flags(size, flags, name, NULL);
2920
2921 list_for_each_entry(s, &slab_caches, list) {
2922 if (slab_unmergeable(s))
2923 continue;
2924
2925 if (size > s->size)
2926 continue;
2927
2928 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
2929 continue;
2930
2931
2932
2933
2934 if ((s->size & ~(align -1)) != s->size)
2935 continue;
2936
2937 if (s->size - size >= sizeof(void *))
2938 continue;
2939
2940 return s;
2941 }
2942 return NULL;
2943}
2944
2945struct kmem_cache *kmem_cache_create(const char *name, size_t size,
2946 size_t align, unsigned long flags,
2947 void (*ctor)(struct kmem_cache *, void *))
2948{
2949 struct kmem_cache *s;
2950
2951 down_write(&slub_lock);
2952 s = find_mergeable(size, align, flags, name, ctor);
2953 if (s) {
2954 int cpu;
2955
2956 s->refcount++;
2957
2958
2959
2960
2961 s->objsize = max(s->objsize, (int)size);
2962
2963
2964
2965
2966
2967 for_each_online_cpu(cpu)
2968 get_cpu_slab(s, cpu)->objsize = s->objsize;
2969 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
2970 up_write(&slub_lock);
2971 if (sysfs_slab_alias(s, name))
2972 goto err;
2973 return s;
2974 }
2975 s = kmalloc(kmem_size, GFP_KERNEL);
2976 if (s) {
2977 if (kmem_cache_open(s, GFP_KERNEL, name,
2978 size, align, flags, ctor)) {
2979 list_add(&s->list, &slab_caches);
2980 up_write(&slub_lock);
2981 if (sysfs_slab_add(s))
2982 goto err;
2983 return s;
2984 }
2985 kfree(s);
2986 }
2987 up_write(&slub_lock);
2988
2989err:
2990 if (flags & SLAB_PANIC)
2991 panic("Cannot create slabcache %s\n", name);
2992 else
2993 s = NULL;
2994 return s;
2995}
2996EXPORT_SYMBOL(kmem_cache_create);
2997
2998#ifdef CONFIG_SMP
2999
3000
3001
3002
3003static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3004 unsigned long action, void *hcpu)
3005{
3006 long cpu = (long)hcpu;
3007 struct kmem_cache *s;
3008 unsigned long flags;
3009
3010 switch (action) {
3011 case CPU_UP_PREPARE:
3012 case CPU_UP_PREPARE_FROZEN:
3013 init_alloc_cpu_cpu(cpu);
3014 down_read(&slub_lock);
3015 list_for_each_entry(s, &slab_caches, list)
3016 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
3017 GFP_KERNEL);
3018 up_read(&slub_lock);
3019 break;
3020
3021 case CPU_UP_CANCELED:
3022 case CPU_UP_CANCELED_FROZEN:
3023 case CPU_DEAD:
3024 case CPU_DEAD_FROZEN:
3025 down_read(&slub_lock);
3026 list_for_each_entry(s, &slab_caches, list) {
3027 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3028
3029 local_irq_save(flags);
3030 __flush_cpu_slab(s, cpu);
3031 local_irq_restore(flags);
3032 free_kmem_cache_cpu(c, cpu);
3033 s->cpu_slab[cpu] = NULL;
3034 }
3035 up_read(&slub_lock);
3036 break;
3037 default:
3038 break;
3039 }
3040 return NOTIFY_OK;
3041}
3042
3043static struct notifier_block __cpuinitdata slab_notifier =
3044 { &slab_cpuup_callback, NULL, 0 };
3045
3046#endif
3047
3048void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
3049{
3050 struct kmem_cache *s;
3051
3052 if (unlikely(size > PAGE_SIZE / 2))
3053 return (void *)__get_free_pages(gfpflags | __GFP_COMP,
3054 get_order(size));
3055 s = get_slab(size, gfpflags);
3056
3057 if (unlikely(ZERO_OR_NULL_PTR(s)))
3058 return s;
3059
3060 return slab_alloc(s, gfpflags, -1, caller);
3061}
3062
3063void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3064 int node, void *caller)
3065{
3066 struct kmem_cache *s;
3067
3068 if (unlikely(size > PAGE_SIZE / 2))
3069 return (void *)__get_free_pages(gfpflags | __GFP_COMP,
3070 get_order(size));
3071 s = get_slab(size, gfpflags);
3072
3073 if (unlikely(ZERO_OR_NULL_PTR(s)))
3074 return s;
3075
3076 return slab_alloc(s, gfpflags, node, caller);
3077}
3078
3079static unsigned long count_partial(struct kmem_cache_node *n)
3080{
3081 unsigned long flags;
3082 unsigned long x = 0;
3083 struct page *page;
3084
3085 spin_lock_irqsave(&n->list_lock, flags);
3086 list_for_each_entry(page, &n->partial, lru)
3087 x += page->inuse;
3088 spin_unlock_irqrestore(&n->list_lock, flags);
3089 return x;
3090}
3091
3092#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
3093static int validate_slab(struct kmem_cache *s, struct page *page,
3094 unsigned long *map)
3095{
3096 void *p;
3097 void *addr = page_address(page);
3098
3099 if (!check_slab(s, page) ||
3100 !on_freelist(s, page, NULL))
3101 return 0;
3102
3103
3104 bitmap_zero(map, s->objects);
3105
3106 for_each_free_object(p, s, page->freelist) {
3107 set_bit(slab_index(p, s, addr), map);
3108 if (!check_object(s, page, p, 0))
3109 return 0;
3110 }
3111
3112 for_each_object(p, s, addr)
3113 if (!test_bit(slab_index(p, s, addr), map))
3114 if (!check_object(s, page, p, 1))
3115 return 0;
3116 return 1;
3117}
3118
3119static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3120 unsigned long *map)
3121{
3122 if (slab_trylock(page)) {
3123 validate_slab(s, page, map);
3124 slab_unlock(page);
3125 } else
3126 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3127 s->name, page);
3128
3129 if (s->flags & DEBUG_DEFAULT_FLAGS) {
3130 if (!SlabDebug(page))
3131 printk(KERN_ERR "SLUB %s: SlabDebug not set "
3132 "on slab 0x%p\n", s->name, page);
3133 } else {
3134 if (SlabDebug(page))
3135 printk(KERN_ERR "SLUB %s: SlabDebug set on "
3136 "slab 0x%p\n", s->name, page);
3137 }
3138}
3139
3140static int validate_slab_node(struct kmem_cache *s,
3141 struct kmem_cache_node *n, unsigned long *map)
3142{
3143 unsigned long count = 0;
3144 struct page *page;
3145 unsigned long flags;
3146
3147 spin_lock_irqsave(&n->list_lock, flags);
3148
3149 list_for_each_entry(page, &n->partial, lru) {
3150 validate_slab_slab(s, page, map);
3151 count++;
3152 }
3153 if (count != n->nr_partial)
3154 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
3155 "counter=%ld\n", s->name, count, n->nr_partial);
3156
3157 if (!(s->flags & SLAB_STORE_USER))
3158 goto out;
3159
3160 list_for_each_entry(page, &n->full, lru) {
3161 validate_slab_slab(s, page, map);
3162 count++;
3163 }
3164 if (count != atomic_long_read(&n->nr_slabs))
3165 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
3166 "counter=%ld\n", s->name, count,
3167 atomic_long_read(&n->nr_slabs));
3168
3169out:
3170 spin_unlock_irqrestore(&n->list_lock, flags);
3171 return count;
3172}
3173
3174static long validate_slab_cache(struct kmem_cache *s)
3175{
3176 int node;
3177 unsigned long count = 0;
3178 unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) *
3179 sizeof(unsigned long), GFP_KERNEL);
3180
3181 if (!map)
3182 return -ENOMEM;
3183
3184 flush_all(s);
3185 for_each_node_state(node, N_NORMAL_MEMORY) {
3186 struct kmem_cache_node *n = get_node(s, node);
3187
3188 count += validate_slab_node(s, n, map);
3189 }
3190 kfree(map);
3191 return count;
3192}
3193
3194#ifdef SLUB_RESILIENCY_TEST
3195static void resiliency_test(void)
3196{
3197 u8 *p;
3198
3199 printk(KERN_ERR "SLUB resiliency testing\n");
3200 printk(KERN_ERR "-----------------------\n");
3201 printk(KERN_ERR "A. Corruption after allocation\n");
3202
3203 p = kzalloc(16, GFP_KERNEL);
3204 p[16] = 0x12;
3205 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
3206 " 0x12->0x%p\n\n", p + 16);
3207
3208 validate_slab_cache(kmalloc_caches + 4);
3209
3210
3211 p = kzalloc(32, GFP_KERNEL);
3212 p[32 + sizeof(void *)] = 0x34;
3213 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3214 " 0x34 -> -0x%p\n", p);
3215 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
3216
3217 validate_slab_cache(kmalloc_caches + 5);
3218 p = kzalloc(64, GFP_KERNEL);
3219 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
3220 *p = 0x56;
3221 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3222 p);
3223 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n");
3224 validate_slab_cache(kmalloc_caches + 6);
3225
3226 printk(KERN_ERR "\nB. Corruption after free\n");
3227 p = kzalloc(128, GFP_KERNEL);
3228 kfree(p);
3229 *p = 0x78;
3230 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
3231 validate_slab_cache(kmalloc_caches + 7);
3232
3233 p = kzalloc(256, GFP_KERNEL);
3234 kfree(p);
3235 p[50] = 0x9a;
3236 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
3237 validate_slab_cache(kmalloc_caches + 8);
3238
3239 p = kzalloc(512, GFP_KERNEL);
3240 kfree(p);
3241 p[512] = 0xab;
3242 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
3243 validate_slab_cache(kmalloc_caches + 9);
3244}
3245#else
3246static void resiliency_test(void) {};
3247#endif
3248
3249
3250
3251
3252
3253
3254struct location {
3255 unsigned long count;
3256 void *addr;
3257 long long sum_time;
3258 long min_time;
3259 long max_time;
3260 long min_pid;
3261 long max_pid;
3262 cpumask_t cpus;
3263 nodemask_t nodes;
3264};
3265
3266struct loc_track {
3267 unsigned long max;
3268 unsigned long count;
3269 struct location *loc;
3270};
3271
3272static void free_loc_track(struct loc_track *t)
3273{
3274 if (t->max)
3275 free_pages((unsigned long)t->loc,
3276 get_order(sizeof(struct location) * t->max));
3277}
3278
3279static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
3280{
3281 struct location *l;
3282 int order;
3283
3284 order = get_order(sizeof(struct location) * max);
3285
3286 l = (void *)__get_free_pages(flags, order);
3287 if (!l)
3288 return 0;
3289
3290 if (t->count) {
3291 memcpy(l, t->loc, sizeof(struct location) * t->count);
3292 free_loc_track(t);
3293 }
3294 t->max = max;
3295 t->loc = l;
3296 return 1;
3297}
3298
3299static int add_location(struct loc_track *t, struct kmem_cache *s,
3300 const struct track *track)
3301{
3302 long start, end, pos;
3303 struct location *l;
3304 void *caddr;
3305 unsigned long age = jiffies - track->when;
3306
3307 start = -1;
3308 end = t->count;
3309
3310 for ( ; ; ) {
3311 pos = start + (end - start + 1) / 2;
3312
3313
3314
3315
3316
3317 if (pos == end)
3318 break;
3319
3320 caddr = t->loc[pos].addr;
3321 if (track->addr == caddr) {
3322
3323 l = &t->loc[pos];
3324 l->count++;
3325 if (track->when) {
3326 l->sum_time += age;
3327 if (age < l->min_time)
3328 l->min_time = age;
3329 if (age > l->max_time)
3330 l->max_time = age;
3331
3332 if (track->pid < l->min_pid)
3333 l->min_pid = track->pid;
3334 if (track->pid > l->max_pid)
3335 l->max_pid = track->pid;
3336
3337 cpu_set(track->cpu, l->cpus);
3338 }
3339 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3340 return 1;
3341 }
3342
3343 if (track->addr < caddr)
3344 end = pos;
3345 else
3346 start = pos;
3347 }
3348
3349
3350
3351
3352 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
3353 return 0;
3354
3355 l = t->loc + pos;
3356 if (pos < t->count)
3357 memmove(l + 1, l,
3358 (t->count - pos) * sizeof(struct location));
3359 t->count++;
3360 l->count = 1;
3361 l->addr = track->addr;
3362 l->sum_time = age;
3363 l->min_time = age;
3364 l->max_time = age;
3365 l->min_pid = track->pid;
3366 l->max_pid = track->pid;
3367 cpus_clear(l->cpus);
3368 cpu_set(track->cpu, l->cpus);
3369 nodes_clear(l->nodes);
3370 node_set(page_to_nid(virt_to_page(track)), l->nodes);
3371 return 1;
3372}
3373
3374static void process_slab(struct loc_track *t, struct kmem_cache *s,
3375 struct page *page, enum track_item alloc)
3376{
3377 void *addr = page_address(page);
3378 DECLARE_BITMAP(map, s->objects);
3379 void *p;
3380
3381 bitmap_zero(map, s->objects);
3382 for_each_free_object(p, s, page->freelist)
3383 set_bit(slab_index(p, s, addr), map);
3384
3385 for_each_object(p, s, addr)
3386 if (!test_bit(slab_index(p, s, addr), map))
3387 add_location(t, s, get_track(s, p, alloc));
3388}
3389
3390static int list_locations(struct kmem_cache *s, char *buf,
3391 enum track_item alloc)
3392{
3393 int n = 0;
3394 unsigned long i;
3395 struct loc_track t = { 0, 0, NULL };
3396 int node;
3397
3398 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
3399 GFP_TEMPORARY))
3400 return sprintf(buf, "Out of memory\n");
3401
3402
3403 flush_all(s);
3404
3405 for_each_node_state(node, N_NORMAL_MEMORY) {
3406 struct kmem_cache_node *n = get_node(s, node);
3407 unsigned long flags;
3408 struct page *page;
3409
3410 if (!atomic_long_read(&n->nr_slabs))
3411 continue;
3412
3413 spin_lock_irqsave(&n->list_lock, flags);
3414 list_for_each_entry(page, &n->partial, lru)
3415 process_slab(&t, s, page, alloc);
3416 list_for_each_entry(page, &n->full, lru)
3417 process_slab(&t, s, page, alloc);
3418 spin_unlock_irqrestore(&n->list_lock, flags);
3419 }
3420
3421 for (i = 0; i < t.count; i++) {
3422 struct location *l = &t.loc[i];
3423
3424 if (n > PAGE_SIZE - 100)
3425 break;
3426 n += sprintf(buf + n, "%7ld ", l->count);
3427
3428 if (l->addr)
3429 n += sprint_symbol(buf + n, (unsigned long)l->addr);
3430 else
3431 n += sprintf(buf + n, "<not-available>");
3432
3433 if (l->sum_time != l->min_time) {
3434 unsigned long remainder;
3435
3436 n += sprintf(buf + n, " age=%ld/%ld/%ld",
3437 l->min_time,
3438 div_long_long_rem(l->sum_time, l->count, &remainder),
3439 l->max_time);
3440 } else
3441 n += sprintf(buf + n, " age=%ld",
3442 l->min_time);
3443
3444 if (l->min_pid != l->max_pid)
3445 n += sprintf(buf + n, " pid=%ld-%ld",
3446 l->min_pid, l->max_pid);
3447 else
3448 n += sprintf(buf + n, " pid=%ld",
3449 l->min_pid);
3450
3451 if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
3452 n < PAGE_SIZE - 60) {
3453 n += sprintf(buf + n, " cpus=");
3454 n += cpulist_scnprintf(buf + n, PAGE_SIZE - n - 50,
3455 l->cpus);
3456 }
3457
3458 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
3459 n < PAGE_SIZE - 60) {
3460 n += sprintf(buf + n, " nodes=");
3461 n += nodelist_scnprintf(buf + n, PAGE_SIZE - n - 50,
3462 l->nodes);
3463 }
3464
3465 n += sprintf(buf + n, "\n");
3466 }
3467
3468 free_loc_track(&t);
3469 if (!t.count)
3470 n += sprintf(buf, "No data\n");
3471 return n;
3472}
3473
3474enum slab_stat_type {
3475 SL_FULL,
3476 SL_PARTIAL,
3477 SL_CPU,
3478 SL_OBJECTS
3479};
3480
3481#define SO_FULL (1 << SL_FULL)
3482#define SO_PARTIAL (1 << SL_PARTIAL)
3483#define SO_CPU (1 << SL_CPU)
3484#define SO_OBJECTS (1 << SL_OBJECTS)
3485
3486static unsigned long slab_objects(struct kmem_cache *s,
3487 char *buf, unsigned long flags)
3488{
3489 unsigned long total = 0;
3490 int cpu;
3491 int node;
3492 int x;
3493 unsigned long *nodes;
3494 unsigned long *per_cpu;
3495
3496 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
3497 per_cpu = nodes + nr_node_ids;
3498
3499 for_each_possible_cpu(cpu) {
3500 struct page *page;
3501 int node;
3502 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3503
3504 if (!c)
3505 continue;
3506
3507 page = c->page;
3508 node = c->node;
3509 if (node < 0)
3510 continue;
3511 if (page) {
3512 if (flags & SO_CPU) {
3513 int x = 0;
3514
3515 if (flags & SO_OBJECTS)
3516 x = page->inuse;
3517 else
3518 x = 1;
3519 total += x;
3520 nodes[node] += x;
3521 }
3522 per_cpu[node]++;
3523 }
3524 }
3525
3526 for_each_node_state(node, N_NORMAL_MEMORY) {
3527 struct kmem_cache_node *n = get_node(s, node);
3528
3529 if (flags & SO_PARTIAL) {
3530 if (flags & SO_OBJECTS)
3531 x = count_partial(n);
3532 else
3533 x = n->nr_partial;
3534 total += x;
3535 nodes[node] += x;
3536 }
3537
3538 if (flags & SO_FULL) {
3539 int full_slabs = atomic_long_read(&n->nr_slabs)
3540 - per_cpu[node]
3541 - n->nr_partial;
3542
3543 if (flags & SO_OBJECTS)
3544 x = full_slabs * s->objects;
3545 else
3546 x = full_slabs;
3547 total += x;
3548 nodes[node] += x;
3549 }
3550 }
3551
3552 x = sprintf(buf, "%lu", total);
3553#ifdef CONFIG_NUMA
3554 for_each_node_state(node, N_NORMAL_MEMORY)
3555 if (nodes[node])
3556 x += sprintf(buf + x, " N%d=%lu",
3557 node, nodes[node]);
3558#endif
3559 kfree(nodes);
3560 return x + sprintf(buf + x, "\n");
3561}
3562
3563static int any_slab_objects(struct kmem_cache *s)
3564{
3565 int node;
3566 int cpu;
3567
3568 for_each_possible_cpu(cpu) {
3569 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3570
3571 if (c && c->page)
3572 return 1;
3573 }
3574
3575 for_each_online_node(node) {
3576 struct kmem_cache_node *n = get_node(s, node);
3577
3578 if (!n)
3579 continue;
3580
3581 if (n->nr_partial || atomic_long_read(&n->nr_slabs))
3582 return 1;
3583 }
3584 return 0;
3585}
3586
3587#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
3588#define to_slab(n) container_of(n, struct kmem_cache, kobj);
3589
3590struct slab_attribute {
3591 struct attribute attr;
3592 ssize_t (*show)(struct kmem_cache *s, char *buf);
3593 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
3594};
3595
3596#define SLAB_ATTR_RO(_name) \
3597 static struct slab_attribute _name##_attr = __ATTR_RO(_name)
3598
3599#define SLAB_ATTR(_name) \
3600 static struct slab_attribute _name##_attr = \
3601 __ATTR(_name, 0644, _name##_show, _name##_store)
3602
3603static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
3604{
3605 return sprintf(buf, "%d\n", s->size);
3606}
3607SLAB_ATTR_RO(slab_size);
3608
3609static ssize_t align_show(struct kmem_cache *s, char *buf)
3610{
3611 return sprintf(buf, "%d\n", s->align);
3612}
3613SLAB_ATTR_RO(align);
3614
3615static ssize_t object_size_show(struct kmem_cache *s, char *buf)
3616{
3617 return sprintf(buf, "%d\n", s->objsize);
3618}
3619SLAB_ATTR_RO(object_size);
3620
3621static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
3622{
3623 return sprintf(buf, "%d\n", s->objects);
3624}
3625SLAB_ATTR_RO(objs_per_slab);
3626
3627static ssize_t order_show(struct kmem_cache *s, char *buf)
3628{
3629 return sprintf(buf, "%d\n", s->order);
3630}
3631SLAB_ATTR_RO(order);
3632
3633static ssize_t ctor_show(struct kmem_cache *s, char *buf)
3634{
3635 if (s->ctor) {
3636 int n = sprint_symbol(buf, (unsigned long)s->ctor);
3637
3638 return n + sprintf(buf + n, "\n");
3639 }
3640 return 0;
3641}
3642SLAB_ATTR_RO(ctor);
3643
3644static ssize_t aliases_show(struct kmem_cache *s, char *buf)
3645{
3646 return sprintf(buf, "%d\n", s->refcount - 1);
3647}
3648SLAB_ATTR_RO(aliases);
3649
3650static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3651{
3652 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU);
3653}
3654SLAB_ATTR_RO(slabs);
3655
3656static ssize_t partial_show(struct kmem_cache *s, char *buf)
3657{
3658 return slab_objects(s, buf, SO_PARTIAL);
3659}
3660SLAB_ATTR_RO(partial);
3661
3662static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
3663{
3664 return slab_objects(s, buf, SO_CPU);
3665}
3666SLAB_ATTR_RO(cpu_slabs);
3667
3668static ssize_t objects_show(struct kmem_cache *s, char *buf)
3669{
3670 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS);
3671}
3672SLAB_ATTR_RO(objects);
3673
3674static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
3675{
3676 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
3677}
3678
3679static ssize_t sanity_checks_store(struct kmem_cache *s,
3680 const char *buf, size_t length)
3681{
3682 s->flags &= ~SLAB_DEBUG_FREE;
3683 if (buf[0] == '1')
3684 s->flags |= SLAB_DEBUG_FREE;
3685 return length;
3686}
3687SLAB_ATTR(sanity_checks);
3688
3689static ssize_t trace_show(struct kmem_cache *s, char *buf)
3690{
3691 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
3692}
3693
3694static ssize_t trace_store(struct kmem_cache *s, const char *buf,
3695 size_t length)
3696{
3697 s->flags &= ~SLAB_TRACE;
3698 if (buf[0] == '1')
3699 s->flags |= SLAB_TRACE;
3700 return length;
3701}
3702SLAB_ATTR(trace);
3703
3704static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
3705{
3706 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
3707}
3708
3709static ssize_t reclaim_account_store(struct kmem_cache *s,
3710 const char *buf, size_t length)
3711{
3712 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
3713 if (buf[0] == '1')
3714 s->flags |= SLAB_RECLAIM_ACCOUNT;
3715 return length;
3716}
3717SLAB_ATTR(reclaim_account);
3718
3719static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
3720{
3721 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
3722}
3723SLAB_ATTR_RO(hwcache_align);
3724
3725#ifdef CONFIG_ZONE_DMA
3726static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
3727{
3728 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
3729}
3730SLAB_ATTR_RO(cache_dma);
3731#endif
3732
3733static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
3734{
3735 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
3736}
3737SLAB_ATTR_RO(destroy_by_rcu);
3738
3739static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
3740{
3741 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
3742}
3743
3744static ssize_t red_zone_store(struct kmem_cache *s,
3745 const char *buf, size_t length)
3746{
3747 if (any_slab_objects(s))
3748 return -EBUSY;
3749
3750 s->flags &= ~SLAB_RED_ZONE;
3751 if (buf[0] == '1')
3752 s->flags |= SLAB_RED_ZONE;
3753 calculate_sizes(s);
3754 return length;
3755}
3756SLAB_ATTR(red_zone);
3757
3758static ssize_t poison_show(struct kmem_cache *s, char *buf)
3759{
3760 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
3761}
3762
3763static ssize_t poison_store(struct kmem_cache *s,
3764 const char *buf, size_t length)
3765{
3766 if (any_slab_objects(s))
3767 return -EBUSY;
3768
3769 s->flags &= ~SLAB_POISON;
3770 if (buf[0] == '1')
3771 s->flags |= SLAB_POISON;
3772 calculate_sizes(s);
3773 return length;
3774}
3775SLAB_ATTR(poison);
3776
3777static ssize_t store_user_show(struct kmem_cache *s, char *buf)
3778{
3779 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
3780}
3781
3782static ssize_t store_user_store(struct kmem_cache *s,
3783 const char *buf, size_t length)
3784{
3785 if (any_slab_objects(s))
3786 return -EBUSY;
3787
3788 s->flags &= ~SLAB_STORE_USER;
3789 if (buf[0] == '1')
3790 s->flags |= SLAB_STORE_USER;
3791 calculate_sizes(s);
3792 return length;
3793}
3794SLAB_ATTR(store_user);
3795
3796static ssize_t validate_show(struct kmem_cache *s, char *buf)
3797{
3798 return 0;
3799}
3800
3801static ssize_t validate_store(struct kmem_cache *s,
3802 const char *buf, size_t length)
3803{
3804 int ret = -EINVAL;
3805
3806 if (buf[0] == '1') {
3807 ret = validate_slab_cache(s);
3808 if (ret >= 0)
3809 ret = length;
3810 }
3811 return ret;
3812}
3813SLAB_ATTR(validate);
3814
3815static ssize_t shrink_show(struct kmem_cache *s, char *buf)
3816{
3817 return 0;
3818}
3819
3820static ssize_t shrink_store(struct kmem_cache *s,
3821 const char *buf, size_t length)
3822{
3823 if (buf[0] == '1') {
3824 int rc = kmem_cache_shrink(s);
3825
3826 if (rc)
3827 return rc;
3828 } else
3829 return -EINVAL;
3830 return length;
3831}
3832SLAB_ATTR(shrink);
3833
3834static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
3835{
3836 if (!(s->flags & SLAB_STORE_USER))
3837 return -ENOSYS;
3838 return list_locations(s, buf, TRACK_ALLOC);
3839}
3840SLAB_ATTR_RO(alloc_calls);
3841
3842static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
3843{
3844 if (!(s->flags & SLAB_STORE_USER))
3845 return -ENOSYS;
3846 return list_locations(s, buf, TRACK_FREE);
3847}
3848SLAB_ATTR_RO(free_calls);
3849
3850#ifdef CONFIG_NUMA
3851static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf)
3852{
3853 return sprintf(buf, "%d\n", s->defrag_ratio / 10);
3854}
3855
3856static ssize_t defrag_ratio_store(struct kmem_cache *s,
3857 const char *buf, size_t length)
3858{
3859 int n = simple_strtoul(buf, NULL, 10);
3860
3861 if (n < 100)
3862 s->defrag_ratio = n * 10;
3863 return length;
3864}
3865SLAB_ATTR(defrag_ratio);
3866#endif
3867
3868static struct attribute * slab_attrs[] = {
3869 &slab_size_attr.attr,
3870 &object_size_attr.attr,
3871 &objs_per_slab_attr.attr,
3872 &order_attr.attr,
3873 &objects_attr.attr,
3874 &slabs_attr.attr,
3875 &partial_attr.attr,
3876 &cpu_slabs_attr.attr,
3877 &ctor_attr.attr,
3878 &aliases_attr.attr,
3879 &align_attr.attr,
3880 &sanity_checks_attr.attr,
3881 &trace_attr.attr,
3882 &hwcache_align_attr.attr,
3883 &reclaim_account_attr.attr,
3884 &destroy_by_rcu_attr.attr,
3885 &red_zone_attr.attr,
3886 &poison_attr.attr,
3887 &store_user_attr.attr,
3888 &validate_attr.attr,
3889 &shrink_attr.attr,
3890 &alloc_calls_attr.attr,
3891 &free_calls_attr.attr,
3892#ifdef CONFIG_ZONE_DMA
3893 &cache_dma_attr.attr,
3894#endif
3895#ifdef CONFIG_NUMA
3896 &defrag_ratio_attr.attr,
3897#endif
3898 NULL
3899};
3900
3901static struct attribute_group slab_attr_group = {
3902 .attrs = slab_attrs,
3903};
3904
3905static ssize_t slab_attr_show(struct kobject *kobj,
3906 struct attribute *attr,
3907 char *buf)
3908{
3909 struct slab_attribute *attribute;
3910 struct kmem_cache *s;
3911 int err;
3912
3913 attribute = to_slab_attr(attr);
3914 s = to_slab(kobj);
3915
3916 if (!attribute->show)
3917 return -EIO;
3918
3919 err = attribute->show(s, buf);
3920
3921 return err;
3922}
3923
3924static ssize_t slab_attr_store(struct kobject *kobj,
3925 struct attribute *attr,
3926 const char *buf, size_t len)
3927{
3928 struct slab_attribute *attribute;
3929 struct kmem_cache *s;
3930 int err;
3931
3932 attribute = to_slab_attr(attr);
3933 s = to_slab(kobj);
3934
3935 if (!attribute->store)
3936 return -EIO;
3937
3938 err = attribute->store(s, buf, len);
3939
3940 return err;
3941}
3942
3943static struct sysfs_ops slab_sysfs_ops = {
3944 .show = slab_attr_show,
3945 .store = slab_attr_store,
3946};
3947
3948static struct kobj_type slab_ktype = {
3949 .sysfs_ops = &slab_sysfs_ops,
3950};
3951
3952static int uevent_filter(struct kset *kset, struct kobject *kobj)
3953{
3954 struct kobj_type *ktype = get_ktype(kobj);
3955
3956 if (ktype == &slab_ktype)
3957 return 1;
3958 return 0;
3959}
3960
3961static struct kset_uevent_ops slab_uevent_ops = {
3962 .filter = uevent_filter,
3963};
3964
3965static decl_subsys(slab, &slab_ktype, &slab_uevent_ops);
3966
3967#define ID_STR_LENGTH 64
3968
3969
3970
3971
3972
3973static char *create_unique_id(struct kmem_cache *s)
3974{
3975 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
3976 char *p = name;
3977
3978 BUG_ON(!name);
3979
3980 *p++ = ':';
3981
3982
3983
3984
3985
3986
3987
3988 if (s->flags & SLAB_CACHE_DMA)
3989 *p++ = 'd';
3990 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3991 *p++ = 'a';
3992 if (s->flags & SLAB_DEBUG_FREE)
3993 *p++ = 'F';
3994 if (p != name + 1)
3995 *p++ = '-';
3996 p += sprintf(p, "%07d", s->size);
3997 BUG_ON(p > name + ID_STR_LENGTH - 1);
3998 return name;
3999}
4000
4001static int sysfs_slab_add(struct kmem_cache *s)
4002{
4003 int err;
4004 const char *name;
4005 int unmergeable;
4006
4007 if (slab_state < SYSFS)
4008
4009 return 0;
4010
4011 unmergeable = slab_unmergeable(s);
4012 if (unmergeable) {
4013
4014
4015
4016
4017
4018 sysfs_remove_link(&slab_subsys.kobj, s->name);
4019 name = s->name;
4020 } else {
4021
4022
4023
4024
4025 name = create_unique_id(s);
4026 }
4027
4028 kobj_set_kset_s(s, slab_subsys);
4029 kobject_set_name(&s->kobj, name);
4030 kobject_init(&s->kobj);
4031 err = kobject_add(&s->kobj);
4032 if (err)
4033 return err;
4034
4035 err = sysfs_create_group(&s->kobj, &slab_attr_group);
4036 if (err)
4037 return err;
4038 kobject_uevent(&s->kobj, KOBJ_ADD);
4039 if (!unmergeable) {
4040
4041 sysfs_slab_alias(s, s->name);
4042 kfree(name);
4043 }
4044 return 0;
4045}
4046
4047static void sysfs_slab_remove(struct kmem_cache *s)
4048{
4049 kobject_uevent(&s->kobj, KOBJ_REMOVE);
4050 kobject_del(&s->kobj);
4051}
4052
4053
4054
4055
4056
4057struct saved_alias {
4058 struct kmem_cache *s;
4059 const char *name;
4060 struct saved_alias *next;
4061};
4062
4063static struct saved_alias *alias_list;
4064
4065static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
4066{
4067 struct saved_alias *al;
4068
4069 if (slab_state == SYSFS) {
4070
4071
4072
4073 sysfs_remove_link(&slab_subsys.kobj, name);
4074 return sysfs_create_link(&slab_subsys.kobj,
4075 &s->kobj, name);
4076 }
4077
4078 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
4079 if (!al)
4080 return -ENOMEM;
4081
4082 al->s = s;
4083 al->name = name;
4084 al->next = alias_list;
4085 alias_list = al;
4086 return 0;
4087}
4088
4089static int __init slab_sysfs_init(void)
4090{
4091 struct kmem_cache *s;
4092 int err;
4093
4094 err = subsystem_register(&slab_subsys);
4095 if (err) {
4096 printk(KERN_ERR "Cannot register slab subsystem.\n");
4097 return -ENOSYS;
4098 }
4099
4100 slab_state = SYSFS;
4101
4102 list_for_each_entry(s, &slab_caches, list) {
4103 err = sysfs_slab_add(s);
4104 if (err)
4105 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
4106 " to sysfs\n", s->name);
4107 }
4108
4109 while (alias_list) {
4110 struct saved_alias *al = alias_list;
4111
4112 alias_list = alias_list->next;
4113 err = sysfs_slab_alias(al->s, al->name);
4114 if (err)
4115 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
4116 " %s to sysfs\n", s->name);
4117 kfree(al);
4118 }
4119
4120 resiliency_test();
4121 return 0;
4122}
4123
4124__initcall(slab_sysfs_init);
4125#endif
4126
4127
4128
4129
4130#ifdef CONFIG_SLABINFO
4131
4132ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4133 size_t count, loff_t *ppos)
4134{
4135 return -EINVAL;
4136}
4137
4138
4139static void print_slabinfo_header(struct seq_file *m)
4140{
4141 seq_puts(m, "slabinfo - version: 2.1\n");
4142 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4143 "<objperslab> <pagesperslab>");
4144 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4145 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4146 seq_putc(m, '\n');
4147}
4148
4149static void *s_start(struct seq_file *m, loff_t *pos)
4150{
4151 loff_t n = *pos;
4152
4153 down_read(&slub_lock);
4154 if (!n)
4155 print_slabinfo_header(m);
4156
4157 return seq_list_start(&slab_caches, *pos);
4158}
4159
4160static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4161{
4162 return seq_list_next(p, &slab_caches, pos);
4163}
4164
4165static void s_stop(struct seq_file *m, void *p)
4166{
4167 up_read(&slub_lock);
4168}
4169
4170static int s_show(struct seq_file *m, void *p)
4171{
4172 unsigned long nr_partials = 0;
4173 unsigned long nr_slabs = 0;
4174 unsigned long nr_inuse = 0;
4175 unsigned long nr_objs;
4176 struct kmem_cache *s;
4177 int node;
4178
4179 s = list_entry(p, struct kmem_cache, list);
4180
4181 for_each_online_node(node) {
4182 struct kmem_cache_node *n = get_node(s, node);
4183
4184 if (!n)
4185 continue;
4186
4187 nr_partials += n->nr_partial;
4188 nr_slabs += atomic_long_read(&n->nr_slabs);
4189 nr_inuse += count_partial(n);
4190 }
4191
4192 nr_objs = nr_slabs * s->objects;
4193 nr_inuse += (nr_slabs - nr_partials) * s->objects;
4194
4195 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
4196 nr_objs, s->size, s->objects, (1 << s->order));
4197 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
4198 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
4199 0UL);
4200 seq_putc(m, '\n');
4201 return 0;
4202}
4203
4204const struct seq_operations slabinfo_op = {
4205 .start = s_start,
4206 .next = s_next,
4207 .stop = s_stop,
4208 .show = s_show,
4209};
4210
4211#endif
4212