1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/notifier.h>
22#include <linux/seq_file.h>
23#include <linux/kmemcheck.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/memory.h>
31#include <linux/math64.h>
32#include <linux/fault-inject.h>
33#include <linux/stacktrace.h>
34#include <linux/prefetch.h>
35#include <linux/memcontrol.h>
36
37#include <trace/events/kmem.h>
38
39#include "internal.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117static inline int kmem_cache_debug(struct kmem_cache *s)
118{
119#ifdef CONFIG_SLUB_DEBUG
120 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
121#else
122 return 0;
123#endif
124}
125
126static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
127{
128#ifdef CONFIG_SLUB_CPU_PARTIAL
129 return !kmem_cache_debug(s);
130#else
131 return false;
132#endif
133}
134
135
136
137
138
139
140
141
142
143
144#undef SLUB_RESILIENCY_TEST
145
146
147#undef SLUB_DEBUG_CMPXCHG
148
149
150
151
152
153#define MIN_PARTIAL 5
154
155
156
157
158
159
160#define MAX_PARTIAL 10
161
162#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
163 SLAB_POISON | SLAB_STORE_USER)
164
165
166
167
168
169
170#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
171
172
173
174
175#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
176 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
177 SLAB_FAILSLAB)
178
179#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
180 SLAB_CACHE_DMA | SLAB_NOTRACK)
181
182#define OO_SHIFT 16
183#define OO_MASK ((1 << OO_SHIFT) - 1)
184#define MAX_OBJS_PER_PAGE 32767
185
186
187#define __OBJECT_POISON 0x80000000UL
188#define __CMPXCHG_DOUBLE 0x40000000UL
189
190#ifdef CONFIG_SMP
191static struct notifier_block slab_notifier;
192#endif
193
194
195
196
197#define TRACK_ADDRS_COUNT 16
198struct track {
199 unsigned long addr;
200#ifdef CONFIG_STACKTRACE
201 unsigned long addrs[TRACK_ADDRS_COUNT];
202#endif
203 int cpu;
204 int pid;
205 unsigned long when;
206};
207
208enum track_item { TRACK_ALLOC, TRACK_FREE };
209
210#ifdef CONFIG_SYSFS
211static int sysfs_slab_add(struct kmem_cache *);
212static int sysfs_slab_alias(struct kmem_cache *, const char *);
213static void sysfs_slab_remove(struct kmem_cache *);
214static void memcg_propagate_slab_attrs(struct kmem_cache *s);
215#else
216static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
217static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
218 { return 0; }
219static inline void sysfs_slab_remove(struct kmem_cache *s) { }
220
221static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
222#endif
223
224static inline void stat(const struct kmem_cache *s, enum stat_item si)
225{
226#ifdef CONFIG_SLUB_STATS
227 __this_cpu_inc(s->cpu_slab->stat[si]);
228#endif
229}
230
231
232
233
234
235static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
236{
237 return s->node[node];
238}
239
240
241static inline int check_valid_pointer(struct kmem_cache *s,
242 struct page *page, const void *object)
243{
244 void *base;
245
246 if (!object)
247 return 1;
248
249 base = page_address(page);
250 if (object < base || object >= base + page->objects * s->size ||
251 (object - base) % s->size) {
252 return 0;
253 }
254
255 return 1;
256}
257
258static inline void *get_freepointer(struct kmem_cache *s, void *object)
259{
260 return *(void **)(object + s->offset);
261}
262
263static void prefetch_freepointer(const struct kmem_cache *s, void *object)
264{
265 prefetch(object + s->offset);
266}
267
268static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
269{
270 void *p;
271
272#ifdef CONFIG_DEBUG_PAGEALLOC
273 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
274#else
275 p = get_freepointer(s, object);
276#endif
277 return p;
278}
279
280static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
281{
282 *(void **)(object + s->offset) = fp;
283}
284
285
286#define for_each_object(__p, __s, __addr, __objects) \
287 for (__p = (__addr); __p < (__addr) + (__objects) * (__s)->size;\
288 __p += (__s)->size)
289
290
291static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
292{
293 return (p - addr) / s->size;
294}
295
296static inline size_t slab_ksize(const struct kmem_cache *s)
297{
298#ifdef CONFIG_SLUB_DEBUG
299
300
301
302
303 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
304 return s->object_size;
305
306#endif
307
308
309
310
311
312 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
313 return s->inuse;
314
315
316
317 return s->size;
318}
319
320static inline int order_objects(int order, unsigned long size, int reserved)
321{
322 return ((PAGE_SIZE << order) - reserved) / size;
323}
324
325static inline struct kmem_cache_order_objects oo_make(int order,
326 unsigned long size, int reserved)
327{
328 struct kmem_cache_order_objects x = {
329 (order << OO_SHIFT) + order_objects(order, size, reserved)
330 };
331
332 return x;
333}
334
335static inline int oo_order(struct kmem_cache_order_objects x)
336{
337 return x.x >> OO_SHIFT;
338}
339
340static inline int oo_objects(struct kmem_cache_order_objects x)
341{
342 return x.x & OO_MASK;
343}
344
345
346
347
348static __always_inline void slab_lock(struct page *page)
349{
350 bit_spin_lock(PG_locked, &page->flags);
351}
352
353static __always_inline void slab_unlock(struct page *page)
354{
355 __bit_spin_unlock(PG_locked, &page->flags);
356}
357
358static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
359{
360 struct page tmp;
361 tmp.counters = counters_new;
362
363
364
365
366
367
368 page->frozen = tmp.frozen;
369 page->inuse = tmp.inuse;
370 page->objects = tmp.objects;
371}
372
373
374static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
375 void *freelist_old, unsigned long counters_old,
376 void *freelist_new, unsigned long counters_new,
377 const char *n)
378{
379 VM_BUG_ON(!irqs_disabled());
380#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
381 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
382 if (s->flags & __CMPXCHG_DOUBLE) {
383 if (cmpxchg_double(&page->freelist, &page->counters,
384 freelist_old, counters_old,
385 freelist_new, counters_new))
386 return 1;
387 } else
388#endif
389 {
390 slab_lock(page);
391 if (page->freelist == freelist_old &&
392 page->counters == counters_old) {
393 page->freelist = freelist_new;
394 set_page_slub_counters(page, counters_new);
395 slab_unlock(page);
396 return 1;
397 }
398 slab_unlock(page);
399 }
400
401 cpu_relax();
402 stat(s, CMPXCHG_DOUBLE_FAIL);
403
404#ifdef SLUB_DEBUG_CMPXCHG
405 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
406#endif
407
408 return 0;
409}
410
411static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
412 void *freelist_old, unsigned long counters_old,
413 void *freelist_new, unsigned long counters_new,
414 const char *n)
415{
416#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
417 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
418 if (s->flags & __CMPXCHG_DOUBLE) {
419 if (cmpxchg_double(&page->freelist, &page->counters,
420 freelist_old, counters_old,
421 freelist_new, counters_new))
422 return 1;
423 } else
424#endif
425 {
426 unsigned long flags;
427
428 local_irq_save(flags);
429 slab_lock(page);
430 if (page->freelist == freelist_old &&
431 page->counters == counters_old) {
432 page->freelist = freelist_new;
433 set_page_slub_counters(page, counters_new);
434 slab_unlock(page);
435 local_irq_restore(flags);
436 return 1;
437 }
438 slab_unlock(page);
439 local_irq_restore(flags);
440 }
441
442 cpu_relax();
443 stat(s, CMPXCHG_DOUBLE_FAIL);
444
445#ifdef SLUB_DEBUG_CMPXCHG
446 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
447#endif
448
449 return 0;
450}
451
452#ifdef CONFIG_SLUB_DEBUG
453
454
455
456
457
458
459static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
460{
461 void *p;
462 void *addr = page_address(page);
463
464 for (p = page->freelist; p; p = get_freepointer(s, p))
465 set_bit(slab_index(p, s, addr), map);
466}
467
468
469
470
471#ifdef CONFIG_SLUB_DEBUG_ON
472static int slub_debug = DEBUG_DEFAULT_FLAGS;
473#else
474static int slub_debug;
475#endif
476
477static char *slub_debug_slabs;
478static int disable_higher_order_debug;
479
480
481
482
483static void print_section(char *text, u8 *addr, unsigned int length)
484{
485 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
486 length, 1);
487}
488
489static struct track *get_track(struct kmem_cache *s, void *object,
490 enum track_item alloc)
491{
492 struct track *p;
493
494 if (s->offset)
495 p = object + s->offset + sizeof(void *);
496 else
497 p = object + s->inuse;
498
499 return p + alloc;
500}
501
502static void set_track(struct kmem_cache *s, void *object,
503 enum track_item alloc, unsigned long addr)
504{
505 struct track *p = get_track(s, object, alloc);
506
507 if (addr) {
508#ifdef CONFIG_STACKTRACE
509 struct stack_trace trace;
510 int i;
511
512 trace.nr_entries = 0;
513 trace.max_entries = TRACK_ADDRS_COUNT;
514 trace.entries = p->addrs;
515 trace.skip = 3;
516 save_stack_trace(&trace);
517
518
519 if (trace.nr_entries != 0 &&
520 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
521 trace.nr_entries--;
522
523 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
524 p->addrs[i] = 0;
525#endif
526 p->addr = addr;
527 p->cpu = smp_processor_id();
528 p->pid = current->pid;
529 p->when = jiffies;
530 } else
531 memset(p, 0, sizeof(struct track));
532}
533
534static void init_tracking(struct kmem_cache *s, void *object)
535{
536 if (!(s->flags & SLAB_STORE_USER))
537 return;
538
539 set_track(s, object, TRACK_FREE, 0UL);
540 set_track(s, object, TRACK_ALLOC, 0UL);
541}
542
543static void print_track(const char *s, struct track *t)
544{
545 if (!t->addr)
546 return;
547
548 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
549 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
550#ifdef CONFIG_STACKTRACE
551 {
552 int i;
553 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
554 if (t->addrs[i])
555 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
556 else
557 break;
558 }
559#endif
560}
561
562static void print_tracking(struct kmem_cache *s, void *object)
563{
564 if (!(s->flags & SLAB_STORE_USER))
565 return;
566
567 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
568 print_track("Freed", get_track(s, object, TRACK_FREE));
569}
570
571static void print_page_info(struct page *page)
572{
573 printk(KERN_ERR
574 "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
575 page, page->objects, page->inuse, page->freelist, page->flags);
576
577}
578
579static void slab_bug(struct kmem_cache *s, char *fmt, ...)
580{
581 va_list args;
582 char buf[100];
583
584 va_start(args, fmt);
585 vsnprintf(buf, sizeof(buf), fmt, args);
586 va_end(args);
587 printk(KERN_ERR "========================================"
588 "=====================================\n");
589 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
590 printk(KERN_ERR "----------------------------------------"
591 "-------------------------------------\n\n");
592
593 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
594}
595
596static void slab_fix(struct kmem_cache *s, char *fmt, ...)
597{
598 va_list args;
599 char buf[100];
600
601 va_start(args, fmt);
602 vsnprintf(buf, sizeof(buf), fmt, args);
603 va_end(args);
604 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
605}
606
607static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
608{
609 unsigned int off;
610 u8 *addr = page_address(page);
611
612 print_tracking(s, p);
613
614 print_page_info(page);
615
616 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
617 p, p - addr, get_freepointer(s, p));
618
619 if (p > addr + 16)
620 print_section("Bytes b4 ", p - 16, 16);
621
622 print_section("Object ", p, min_t(unsigned long, s->object_size,
623 PAGE_SIZE));
624 if (s->flags & SLAB_RED_ZONE)
625 print_section("Redzone ", p + s->object_size,
626 s->inuse - s->object_size);
627
628 if (s->offset)
629 off = s->offset + sizeof(void *);
630 else
631 off = s->inuse;
632
633 if (s->flags & SLAB_STORE_USER)
634 off += 2 * sizeof(struct track);
635
636 if (off != s->size)
637
638 print_section("Padding ", p + off, s->size - off);
639
640 dump_stack();
641}
642
643static void object_err(struct kmem_cache *s, struct page *page,
644 u8 *object, char *reason)
645{
646 slab_bug(s, "%s", reason);
647 print_trailer(s, page, object);
648}
649
650static void slab_err(struct kmem_cache *s, struct page *page,
651 const char *fmt, ...)
652{
653 va_list args;
654 char buf[100];
655
656 va_start(args, fmt);
657 vsnprintf(buf, sizeof(buf), fmt, args);
658 va_end(args);
659 slab_bug(s, "%s", buf);
660 print_page_info(page);
661 dump_stack();
662}
663
664static void init_object(struct kmem_cache *s, void *object, u8 val)
665{
666 u8 *p = object;
667
668 if (s->flags & __OBJECT_POISON) {
669 memset(p, POISON_FREE, s->object_size - 1);
670 p[s->object_size - 1] = POISON_END;
671 }
672
673 if (s->flags & SLAB_RED_ZONE)
674 memset(p + s->object_size, val, s->inuse - s->object_size);
675}
676
677static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
678 void *from, void *to)
679{
680 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
681 memset(from, data, to - from);
682}
683
684static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
685 u8 *object, char *what,
686 u8 *start, unsigned int value, unsigned int bytes)
687{
688 u8 *fault;
689 u8 *end;
690
691 fault = memchr_inv(start, value, bytes);
692 if (!fault)
693 return 1;
694
695 end = start + bytes;
696 while (end > fault && end[-1] == value)
697 end--;
698
699 slab_bug(s, "%s overwritten", what);
700 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
701 fault, end - 1, fault[0], value);
702 print_trailer(s, page, object);
703
704 restore_bytes(s, what, value, fault, end);
705 return 0;
706}
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
747{
748 unsigned long off = s->inuse;
749
750 if (s->offset)
751
752 off += sizeof(void *);
753
754 if (s->flags & SLAB_STORE_USER)
755
756 off += 2 * sizeof(struct track);
757
758 if (s->size == off)
759 return 1;
760
761 return check_bytes_and_report(s, page, p, "Object padding",
762 p + off, POISON_INUSE, s->size - off);
763}
764
765
766static int slab_pad_check(struct kmem_cache *s, struct page *page)
767{
768 u8 *start;
769 u8 *fault;
770 u8 *end;
771 int length;
772 int remainder;
773
774 if (!(s->flags & SLAB_POISON))
775 return 1;
776
777 start = page_address(page);
778 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
779 end = start + length;
780 remainder = length % s->size;
781 if (!remainder)
782 return 1;
783
784 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
785 if (!fault)
786 return 1;
787 while (end > fault && end[-1] == POISON_INUSE)
788 end--;
789
790 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
791 print_section("Padding ", end - remainder, remainder);
792
793 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
794 return 0;
795}
796
797static int check_object(struct kmem_cache *s, struct page *page,
798 void *object, u8 val)
799{
800 u8 *p = object;
801 u8 *endobject = object + s->object_size;
802
803 if (s->flags & SLAB_RED_ZONE) {
804 if (!check_bytes_and_report(s, page, object, "Redzone",
805 endobject, val, s->inuse - s->object_size))
806 return 0;
807 } else {
808 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
809 check_bytes_and_report(s, page, p, "Alignment padding",
810 endobject, POISON_INUSE,
811 s->inuse - s->object_size);
812 }
813 }
814
815 if (s->flags & SLAB_POISON) {
816 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
817 (!check_bytes_and_report(s, page, p, "Poison", p,
818 POISON_FREE, s->object_size - 1) ||
819 !check_bytes_and_report(s, page, p, "Poison",
820 p + s->object_size - 1, POISON_END, 1)))
821 return 0;
822
823
824
825 check_pad_bytes(s, page, p);
826 }
827
828 if (!s->offset && val == SLUB_RED_ACTIVE)
829
830
831
832
833 return 1;
834
835
836 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
837 object_err(s, page, p, "Freepointer corrupt");
838
839
840
841
842
843 set_freepointer(s, p, NULL);
844 return 0;
845 }
846 return 1;
847}
848
849static int check_slab(struct kmem_cache *s, struct page *page)
850{
851 int maxobj;
852
853 VM_BUG_ON(!irqs_disabled());
854
855 if (!PageSlab(page)) {
856 slab_err(s, page, "Not a valid slab page");
857 return 0;
858 }
859
860 maxobj = order_objects(compound_order(page), s->size, s->reserved);
861 if (page->objects > maxobj) {
862 slab_err(s, page, "objects %u > max %u",
863 s->name, page->objects, maxobj);
864 return 0;
865 }
866 if (page->inuse > page->objects) {
867 slab_err(s, page, "inuse %u > max %u",
868 s->name, page->inuse, page->objects);
869 return 0;
870 }
871
872 slab_pad_check(s, page);
873 return 1;
874}
875
876
877
878
879
880static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
881{
882 int nr = 0;
883 void *fp;
884 void *object = NULL;
885 unsigned long max_objects;
886
887 fp = page->freelist;
888 while (fp && nr <= page->objects) {
889 if (fp == search)
890 return 1;
891 if (!check_valid_pointer(s, page, fp)) {
892 if (object) {
893 object_err(s, page, object,
894 "Freechain corrupt");
895 set_freepointer(s, object, NULL);
896 } else {
897 slab_err(s, page, "Freepointer corrupt");
898 page->freelist = NULL;
899 page->inuse = page->objects;
900 slab_fix(s, "Freelist cleared");
901 return 0;
902 }
903 break;
904 }
905 object = fp;
906 fp = get_freepointer(s, object);
907 nr++;
908 }
909
910 max_objects = order_objects(compound_order(page), s->size, s->reserved);
911 if (max_objects > MAX_OBJS_PER_PAGE)
912 max_objects = MAX_OBJS_PER_PAGE;
913
914 if (page->objects != max_objects) {
915 slab_err(s, page, "Wrong number of objects. Found %d but "
916 "should be %d", page->objects, max_objects);
917 page->objects = max_objects;
918 slab_fix(s, "Number of objects adjusted.");
919 }
920 if (page->inuse != page->objects - nr) {
921 slab_err(s, page, "Wrong object count. Counter is %d but "
922 "counted were %d", page->inuse, page->objects - nr);
923 page->inuse = page->objects - nr;
924 slab_fix(s, "Object count adjusted.");
925 }
926 return search == NULL;
927}
928
929static void trace(struct kmem_cache *s, struct page *page, void *object,
930 int alloc)
931{
932 if (s->flags & SLAB_TRACE) {
933 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
934 s->name,
935 alloc ? "alloc" : "free",
936 object, page->inuse,
937 page->freelist);
938
939 if (!alloc)
940 print_section("Object ", (void *)object,
941 s->object_size);
942
943 dump_stack();
944 }
945}
946
947
948
949
950
951static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
952{
953 kmemleak_alloc(ptr, size, 1, flags);
954}
955
956static inline void kfree_hook(const void *x)
957{
958 kmemleak_free(x);
959}
960
961static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
962{
963 flags &= gfp_allowed_mask;
964 lockdep_trace_alloc(flags);
965 might_sleep_if(flags & __GFP_WAIT);
966
967 return should_failslab(s->object_size, flags, s->flags);
968}
969
970static inline void slab_post_alloc_hook(struct kmem_cache *s,
971 gfp_t flags, void *object)
972{
973 flags &= gfp_allowed_mask;
974 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
975 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
976}
977
978static inline void slab_free_hook(struct kmem_cache *s, void *x)
979{
980 kmemleak_free_recursive(x, s->flags);
981
982
983
984
985
986
987#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
988 {
989 unsigned long flags;
990
991 local_irq_save(flags);
992 kmemcheck_slab_free(s, x, s->object_size);
993 debug_check_no_locks_freed(x, s->object_size);
994 local_irq_restore(flags);
995 }
996#endif
997 if (!(s->flags & SLAB_DEBUG_OBJECTS))
998 debug_check_no_obj_freed(x, s->object_size);
999}
1000
1001
1002
1003
1004static void add_full(struct kmem_cache *s,
1005 struct kmem_cache_node *n, struct page *page)
1006{
1007 if (!(s->flags & SLAB_STORE_USER))
1008 return;
1009
1010 lockdep_assert_held(&n->list_lock);
1011 list_add(&page->lru, &n->full);
1012}
1013
1014static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
1015{
1016 if (!(s->flags & SLAB_STORE_USER))
1017 return;
1018
1019 lockdep_assert_held(&n->list_lock);
1020 list_del(&page->lru);
1021}
1022
1023
1024static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1025{
1026 struct kmem_cache_node *n = get_node(s, node);
1027
1028 return atomic_long_read(&n->nr_slabs);
1029}
1030
1031static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1032{
1033 return atomic_long_read(&n->nr_slabs);
1034}
1035
1036static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1037{
1038 struct kmem_cache_node *n = get_node(s, node);
1039
1040
1041
1042
1043
1044
1045
1046 if (likely(n)) {
1047 atomic_long_inc(&n->nr_slabs);
1048 atomic_long_add(objects, &n->total_objects);
1049 }
1050}
1051static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1052{
1053 struct kmem_cache_node *n = get_node(s, node);
1054
1055 atomic_long_dec(&n->nr_slabs);
1056 atomic_long_sub(objects, &n->total_objects);
1057}
1058
1059
1060static void setup_object_debug(struct kmem_cache *s, struct page *page,
1061 void *object)
1062{
1063 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1064 return;
1065
1066 init_object(s, object, SLUB_RED_INACTIVE);
1067 init_tracking(s, object);
1068}
1069
1070static noinline int alloc_debug_processing(struct kmem_cache *s,
1071 struct page *page,
1072 void *object, unsigned long addr)
1073{
1074 if (!check_slab(s, page))
1075 goto bad;
1076
1077 if (!check_valid_pointer(s, page, object)) {
1078 object_err(s, page, object, "Freelist Pointer check fails");
1079 goto bad;
1080 }
1081
1082 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1083 goto bad;
1084
1085
1086 if (s->flags & SLAB_STORE_USER)
1087 set_track(s, object, TRACK_ALLOC, addr);
1088 trace(s, page, object, 1);
1089 init_object(s, object, SLUB_RED_ACTIVE);
1090 return 1;
1091
1092bad:
1093 if (PageSlab(page)) {
1094
1095
1096
1097
1098
1099 slab_fix(s, "Marking all objects used");
1100 page->inuse = page->objects;
1101 page->freelist = NULL;
1102 }
1103 return 0;
1104}
1105
1106static noinline struct kmem_cache_node *free_debug_processing(
1107 struct kmem_cache *s, struct page *page, void *object,
1108 unsigned long addr, unsigned long *flags)
1109{
1110 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1111
1112 spin_lock_irqsave(&n->list_lock, *flags);
1113 slab_lock(page);
1114
1115 if (!check_slab(s, page))
1116 goto fail;
1117
1118 if (!check_valid_pointer(s, page, object)) {
1119 slab_err(s, page, "Invalid object pointer 0x%p", object);
1120 goto fail;
1121 }
1122
1123 if (on_freelist(s, page, object)) {
1124 object_err(s, page, object, "Object already free");
1125 goto fail;
1126 }
1127
1128 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1129 goto out;
1130
1131 if (unlikely(s != page->slab_cache)) {
1132 if (!PageSlab(page)) {
1133 slab_err(s, page, "Attempt to free object(0x%p) "
1134 "outside of slab", object);
1135 } else if (!page->slab_cache) {
1136 printk(KERN_ERR
1137 "SLUB <none>: no slab for object 0x%p.\n",
1138 object);
1139 dump_stack();
1140 } else
1141 object_err(s, page, object,
1142 "page slab pointer corrupt.");
1143 goto fail;
1144 }
1145
1146 if (s->flags & SLAB_STORE_USER)
1147 set_track(s, object, TRACK_FREE, addr);
1148 trace(s, page, object, 0);
1149 init_object(s, object, SLUB_RED_INACTIVE);
1150out:
1151 slab_unlock(page);
1152
1153
1154
1155
1156 return n;
1157
1158fail:
1159 slab_unlock(page);
1160 spin_unlock_irqrestore(&n->list_lock, *flags);
1161 slab_fix(s, "Object at 0x%p not freed", object);
1162 return NULL;
1163}
1164
1165static int __init setup_slub_debug(char *str)
1166{
1167 slub_debug = DEBUG_DEFAULT_FLAGS;
1168 if (*str++ != '=' || !*str)
1169
1170
1171
1172 goto out;
1173
1174 if (*str == ',')
1175
1176
1177
1178
1179 goto check_slabs;
1180
1181 if (tolower(*str) == 'o') {
1182
1183
1184
1185
1186 disable_higher_order_debug = 1;
1187 goto out;
1188 }
1189
1190 slub_debug = 0;
1191 if (*str == '-')
1192
1193
1194
1195 goto out;
1196
1197
1198
1199
1200 for (; *str && *str != ','; str++) {
1201 switch (tolower(*str)) {
1202 case 'f':
1203 slub_debug |= SLAB_DEBUG_FREE;
1204 break;
1205 case 'z':
1206 slub_debug |= SLAB_RED_ZONE;
1207 break;
1208 case 'p':
1209 slub_debug |= SLAB_POISON;
1210 break;
1211 case 'u':
1212 slub_debug |= SLAB_STORE_USER;
1213 break;
1214 case 't':
1215 slub_debug |= SLAB_TRACE;
1216 break;
1217 case 'a':
1218 slub_debug |= SLAB_FAILSLAB;
1219 break;
1220 default:
1221 printk(KERN_ERR "slub_debug option '%c' "
1222 "unknown. skipped\n", *str);
1223 }
1224 }
1225
1226check_slabs:
1227 if (*str == ',')
1228 slub_debug_slabs = str + 1;
1229out:
1230 return 1;
1231}
1232
1233__setup("slub_debug", setup_slub_debug);
1234
1235static unsigned long kmem_cache_flags(unsigned long object_size,
1236 unsigned long flags, const char *name,
1237 void (*ctor)(void *))
1238{
1239
1240
1241
1242 if (slub_debug && (!slub_debug_slabs || (name &&
1243 !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))))
1244 flags |= slub_debug;
1245
1246 return flags;
1247}
1248#else
1249static inline void setup_object_debug(struct kmem_cache *s,
1250 struct page *page, void *object) {}
1251
1252static inline int alloc_debug_processing(struct kmem_cache *s,
1253 struct page *page, void *object, unsigned long addr) { return 0; }
1254
1255static inline struct kmem_cache_node *free_debug_processing(
1256 struct kmem_cache *s, struct page *page, void *object,
1257 unsigned long addr, unsigned long *flags) { return NULL; }
1258
1259static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1260 { return 1; }
1261static inline int check_object(struct kmem_cache *s, struct page *page,
1262 void *object, u8 val) { return 1; }
1263static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1264 struct page *page) {}
1265static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1266 struct page *page) {}
1267static inline unsigned long kmem_cache_flags(unsigned long object_size,
1268 unsigned long flags, const char *name,
1269 void (*ctor)(void *))
1270{
1271 return flags;
1272}
1273#define slub_debug 0
1274
1275#define disable_higher_order_debug 0
1276
1277static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1278 { return 0; }
1279static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1280 { return 0; }
1281static inline void inc_slabs_node(struct kmem_cache *s, int node,
1282 int objects) {}
1283static inline void dec_slabs_node(struct kmem_cache *s, int node,
1284 int objects) {}
1285
1286static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1287{
1288 kmemleak_alloc(ptr, size, 1, flags);
1289}
1290
1291static inline void kfree_hook(const void *x)
1292{
1293 kmemleak_free(x);
1294}
1295
1296static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1297 { return 0; }
1298
1299static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1300 void *object)
1301{
1302 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags,
1303 flags & gfp_allowed_mask);
1304}
1305
1306static inline void slab_free_hook(struct kmem_cache *s, void *x)
1307{
1308 kmemleak_free_recursive(x, s->flags);
1309}
1310
1311#endif
1312
1313
1314
1315
1316static inline struct page *alloc_slab_page(gfp_t flags, int node,
1317 struct kmem_cache_order_objects oo)
1318{
1319 int order = oo_order(oo);
1320
1321 flags |= __GFP_NOTRACK;
1322
1323 if (node == NUMA_NO_NODE)
1324 return alloc_pages(flags, order);
1325 else
1326 return alloc_pages_exact_node(node, flags, order);
1327}
1328
1329static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1330{
1331 struct page *page;
1332 struct kmem_cache_order_objects oo = s->oo;
1333 gfp_t alloc_gfp;
1334
1335 flags &= gfp_allowed_mask;
1336
1337 if (flags & __GFP_WAIT)
1338 local_irq_enable();
1339
1340 flags |= s->allocflags;
1341
1342
1343
1344
1345
1346 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1347
1348 page = alloc_slab_page(alloc_gfp, node, oo);
1349 if (unlikely(!page)) {
1350 oo = s->min;
1351
1352
1353
1354
1355 page = alloc_slab_page(flags, node, oo);
1356
1357 if (page)
1358 stat(s, ORDER_FALLBACK);
1359 }
1360
1361 if (kmemcheck_enabled && page
1362 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1363 int pages = 1 << oo_order(oo);
1364
1365 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1366
1367
1368
1369
1370
1371 if (s->ctor)
1372 kmemcheck_mark_uninitialized_pages(page, pages);
1373 else
1374 kmemcheck_mark_unallocated_pages(page, pages);
1375 }
1376
1377 if (flags & __GFP_WAIT)
1378 local_irq_disable();
1379 if (!page)
1380 return NULL;
1381
1382 page->objects = oo_objects(oo);
1383 mod_zone_page_state(page_zone(page),
1384 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1385 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1386 1 << oo_order(oo));
1387
1388 return page;
1389}
1390
1391static void setup_object(struct kmem_cache *s, struct page *page,
1392 void *object)
1393{
1394 setup_object_debug(s, page, object);
1395 if (unlikely(s->ctor))
1396 s->ctor(object);
1397}
1398
1399static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1400{
1401 struct page *page;
1402 void *start;
1403 void *last;
1404 void *p;
1405 int order;
1406
1407 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1408
1409 page = allocate_slab(s,
1410 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1411 if (!page)
1412 goto out;
1413
1414 order = compound_order(page);
1415 inc_slabs_node(s, page_to_nid(page), page->objects);
1416 memcg_bind_pages(s, order);
1417 page->slab_cache = s;
1418 __SetPageSlab(page);
1419 if (page->pfmemalloc)
1420 SetPageSlabPfmemalloc(page);
1421
1422 start = page_address(page);
1423
1424 if (unlikely(s->flags & SLAB_POISON))
1425 memset(start, POISON_INUSE, PAGE_SIZE << order);
1426
1427 last = start;
1428 for_each_object(p, s, start, page->objects) {
1429 setup_object(s, page, last);
1430 set_freepointer(s, last, p);
1431 last = p;
1432 }
1433 setup_object(s, page, last);
1434 set_freepointer(s, last, NULL);
1435
1436 page->freelist = start;
1437 page->inuse = page->objects;
1438 page->frozen = 1;
1439out:
1440 return page;
1441}
1442
1443static void __free_slab(struct kmem_cache *s, struct page *page)
1444{
1445 int order = compound_order(page);
1446 int pages = 1 << order;
1447
1448 if (kmem_cache_debug(s)) {
1449 void *p;
1450
1451 slab_pad_check(s, page);
1452 for_each_object(p, s, page_address(page),
1453 page->objects)
1454 check_object(s, page, p, SLUB_RED_INACTIVE);
1455 }
1456
1457 kmemcheck_free_shadow(page, compound_order(page));
1458
1459 mod_zone_page_state(page_zone(page),
1460 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1461 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1462 -pages);
1463
1464 __ClearPageSlabPfmemalloc(page);
1465 __ClearPageSlab(page);
1466
1467 memcg_release_pages(s, order);
1468 page_mapcount_reset(page);
1469 if (current->reclaim_state)
1470 current->reclaim_state->reclaimed_slab += pages;
1471 __free_memcg_kmem_pages(page, order);
1472}
1473
1474#define need_reserve_slab_rcu \
1475 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1476
1477static void rcu_free_slab(struct rcu_head *h)
1478{
1479 struct page *page;
1480
1481 if (need_reserve_slab_rcu)
1482 page = virt_to_head_page(h);
1483 else
1484 page = container_of((struct list_head *)h, struct page, lru);
1485
1486 __free_slab(page->slab_cache, page);
1487}
1488
1489static void free_slab(struct kmem_cache *s, struct page *page)
1490{
1491 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1492 struct rcu_head *head;
1493
1494 if (need_reserve_slab_rcu) {
1495 int order = compound_order(page);
1496 int offset = (PAGE_SIZE << order) - s->reserved;
1497
1498 VM_BUG_ON(s->reserved != sizeof(*head));
1499 head = page_address(page) + offset;
1500 } else {
1501
1502
1503
1504 head = (void *)&page->lru;
1505 }
1506
1507 call_rcu(head, rcu_free_slab);
1508 } else
1509 __free_slab(s, page);
1510}
1511
1512static void discard_slab(struct kmem_cache *s, struct page *page)
1513{
1514 dec_slabs_node(s, page_to_nid(page), page->objects);
1515 free_slab(s, page);
1516}
1517
1518
1519
1520
1521static inline void
1522__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
1523{
1524 n->nr_partial++;
1525 if (tail == DEACTIVATE_TO_TAIL)
1526 list_add_tail(&page->lru, &n->partial);
1527 else
1528 list_add(&page->lru, &n->partial);
1529}
1530
1531static inline void add_partial(struct kmem_cache_node *n,
1532 struct page *page, int tail)
1533{
1534 lockdep_assert_held(&n->list_lock);
1535 __add_partial(n, page, tail);
1536}
1537
1538static inline void
1539__remove_partial(struct kmem_cache_node *n, struct page *page)
1540{
1541 list_del(&page->lru);
1542 n->nr_partial--;
1543}
1544
1545static inline void remove_partial(struct kmem_cache_node *n,
1546 struct page *page)
1547{
1548 lockdep_assert_held(&n->list_lock);
1549 __remove_partial(n, page);
1550}
1551
1552
1553
1554
1555
1556
1557
1558static inline void *acquire_slab(struct kmem_cache *s,
1559 struct kmem_cache_node *n, struct page *page,
1560 int mode, int *objects)
1561{
1562 void *freelist;
1563 unsigned long counters;
1564 struct page new;
1565
1566 lockdep_assert_held(&n->list_lock);
1567
1568
1569
1570
1571
1572
1573 freelist = page->freelist;
1574 counters = page->counters;
1575 new.counters = counters;
1576 *objects = new.objects - new.inuse;
1577 if (mode) {
1578 new.inuse = page->objects;
1579 new.freelist = NULL;
1580 } else {
1581 new.freelist = freelist;
1582 }
1583
1584 VM_BUG_ON(new.frozen);
1585 new.frozen = 1;
1586
1587 if (!__cmpxchg_double_slab(s, page,
1588 freelist, counters,
1589 new.freelist, new.counters,
1590 "acquire_slab"))
1591 return NULL;
1592
1593 remove_partial(n, page);
1594 WARN_ON(!freelist);
1595 return freelist;
1596}
1597
1598static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1599static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1600
1601
1602
1603
1604static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1605 struct kmem_cache_cpu *c, gfp_t flags)
1606{
1607 struct page *page, *page2;
1608 void *object = NULL;
1609 int available = 0;
1610 int objects;
1611
1612
1613
1614
1615
1616
1617
1618 if (!n || !n->nr_partial)
1619 return NULL;
1620
1621 spin_lock(&n->list_lock);
1622 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1623 void *t;
1624
1625 if (!pfmemalloc_match(page, flags))
1626 continue;
1627
1628 t = acquire_slab(s, n, page, object == NULL, &objects);
1629 if (!t)
1630 break;
1631
1632 available += objects;
1633 if (!object) {
1634 c->page = page;
1635 stat(s, ALLOC_FROM_PARTIAL);
1636 object = t;
1637 } else {
1638 put_cpu_partial(s, page, 0);
1639 stat(s, CPU_PARTIAL_NODE);
1640 }
1641 if (!kmem_cache_has_cpu_partial(s)
1642 || available > s->cpu_partial / 2)
1643 break;
1644
1645 }
1646 spin_unlock(&n->list_lock);
1647 return object;
1648}
1649
1650
1651
1652
1653static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1654 struct kmem_cache_cpu *c)
1655{
1656#ifdef CONFIG_NUMA
1657 struct zonelist *zonelist;
1658 struct zoneref *z;
1659 struct zone *zone;
1660 enum zone_type high_zoneidx = gfp_zone(flags);
1661 void *object;
1662 unsigned int cpuset_mems_cookie;
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682 if (!s->remote_node_defrag_ratio ||
1683 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1684 return NULL;
1685
1686 do {
1687 cpuset_mems_cookie = get_mems_allowed();
1688 zonelist = node_zonelist(slab_node(), flags);
1689 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1690 struct kmem_cache_node *n;
1691
1692 n = get_node(s, zone_to_nid(zone));
1693
1694 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1695 n->nr_partial > s->min_partial) {
1696 object = get_partial_node(s, n, c, flags);
1697 if (object) {
1698
1699
1700
1701
1702
1703
1704
1705
1706 put_mems_allowed(cpuset_mems_cookie);
1707 return object;
1708 }
1709 }
1710 }
1711 } while (!put_mems_allowed(cpuset_mems_cookie));
1712#endif
1713 return NULL;
1714}
1715
1716
1717
1718
1719static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1720 struct kmem_cache_cpu *c)
1721{
1722 void *object;
1723 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1724
1725 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1726 if (object || node != NUMA_NO_NODE)
1727 return object;
1728
1729 return get_any_partial(s, flags, c);
1730}
1731
1732#ifdef CONFIG_PREEMPT
1733
1734
1735
1736
1737
1738#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1739#else
1740
1741
1742
1743
1744#define TID_STEP 1
1745#endif
1746
1747static inline unsigned long next_tid(unsigned long tid)
1748{
1749 return tid + TID_STEP;
1750}
1751
1752static inline unsigned int tid_to_cpu(unsigned long tid)
1753{
1754 return tid % TID_STEP;
1755}
1756
1757static inline unsigned long tid_to_event(unsigned long tid)
1758{
1759 return tid / TID_STEP;
1760}
1761
1762static inline unsigned int init_tid(int cpu)
1763{
1764 return cpu;
1765}
1766
1767static inline void note_cmpxchg_failure(const char *n,
1768 const struct kmem_cache *s, unsigned long tid)
1769{
1770#ifdef SLUB_DEBUG_CMPXCHG
1771 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1772
1773 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1774
1775#ifdef CONFIG_PREEMPT
1776 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1777 printk("due to cpu change %d -> %d\n",
1778 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1779 else
1780#endif
1781 if (tid_to_event(tid) != tid_to_event(actual_tid))
1782 printk("due to cpu running other code. Event %ld->%ld\n",
1783 tid_to_event(tid), tid_to_event(actual_tid));
1784 else
1785 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1786 actual_tid, tid, next_tid(tid));
1787#endif
1788 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1789}
1790
1791static void init_kmem_cache_cpus(struct kmem_cache *s)
1792{
1793 int cpu;
1794
1795 for_each_possible_cpu(cpu)
1796 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1797}
1798
1799
1800
1801
1802static void deactivate_slab(struct kmem_cache *s, struct page *page,
1803 void *freelist)
1804{
1805 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1806 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1807 int lock = 0;
1808 enum slab_modes l = M_NONE, m = M_NONE;
1809 void *nextfree;
1810 int tail = DEACTIVATE_TO_HEAD;
1811 struct page new;
1812 struct page old;
1813
1814 if (page->freelist) {
1815 stat(s, DEACTIVATE_REMOTE_FREES);
1816 tail = DEACTIVATE_TO_TAIL;
1817 }
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1828 void *prior;
1829 unsigned long counters;
1830
1831 do {
1832 prior = page->freelist;
1833 counters = page->counters;
1834 set_freepointer(s, freelist, prior);
1835 new.counters = counters;
1836 new.inuse--;
1837 VM_BUG_ON(!new.frozen);
1838
1839 } while (!__cmpxchg_double_slab(s, page,
1840 prior, counters,
1841 freelist, new.counters,
1842 "drain percpu freelist"));
1843
1844 freelist = nextfree;
1845 }
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861redo:
1862
1863 old.freelist = page->freelist;
1864 old.counters = page->counters;
1865 VM_BUG_ON(!old.frozen);
1866
1867
1868 new.counters = old.counters;
1869 if (freelist) {
1870 new.inuse--;
1871 set_freepointer(s, freelist, old.freelist);
1872 new.freelist = freelist;
1873 } else
1874 new.freelist = old.freelist;
1875
1876 new.frozen = 0;
1877
1878 if (!new.inuse && n->nr_partial > s->min_partial)
1879 m = M_FREE;
1880 else if (new.freelist) {
1881 m = M_PARTIAL;
1882 if (!lock) {
1883 lock = 1;
1884
1885
1886
1887
1888
1889 spin_lock(&n->list_lock);
1890 }
1891 } else {
1892 m = M_FULL;
1893 if (kmem_cache_debug(s) && !lock) {
1894 lock = 1;
1895
1896
1897
1898
1899
1900 spin_lock(&n->list_lock);
1901 }
1902 }
1903
1904 if (l != m) {
1905
1906 if (l == M_PARTIAL)
1907
1908 remove_partial(n, page);
1909
1910 else if (l == M_FULL)
1911
1912 remove_full(s, n, page);
1913
1914 if (m == M_PARTIAL) {
1915
1916 add_partial(n, page, tail);
1917 stat(s, tail);
1918
1919 } else if (m == M_FULL) {
1920
1921 stat(s, DEACTIVATE_FULL);
1922 add_full(s, n, page);
1923
1924 }
1925 }
1926
1927 l = m;
1928 if (!__cmpxchg_double_slab(s, page,
1929 old.freelist, old.counters,
1930 new.freelist, new.counters,
1931 "unfreezing slab"))
1932 goto redo;
1933
1934 if (lock)
1935 spin_unlock(&n->list_lock);
1936
1937 if (m == M_FREE) {
1938 stat(s, DEACTIVATE_EMPTY);
1939 discard_slab(s, page);
1940 stat(s, FREE_SLAB);
1941 }
1942}
1943
1944
1945
1946
1947
1948
1949
1950
1951static void unfreeze_partials(struct kmem_cache *s,
1952 struct kmem_cache_cpu *c)
1953{
1954#ifdef CONFIG_SLUB_CPU_PARTIAL
1955 struct kmem_cache_node *n = NULL, *n2 = NULL;
1956 struct page *page, *discard_page = NULL;
1957
1958 while ((page = c->partial)) {
1959 struct page new;
1960 struct page old;
1961
1962 c->partial = page->next;
1963
1964 n2 = get_node(s, page_to_nid(page));
1965 if (n != n2) {
1966 if (n)
1967 spin_unlock(&n->list_lock);
1968
1969 n = n2;
1970 spin_lock(&n->list_lock);
1971 }
1972
1973 do {
1974
1975 old.freelist = page->freelist;
1976 old.counters = page->counters;
1977 VM_BUG_ON(!old.frozen);
1978
1979 new.counters = old.counters;
1980 new.freelist = old.freelist;
1981
1982 new.frozen = 0;
1983
1984 } while (!__cmpxchg_double_slab(s, page,
1985 old.freelist, old.counters,
1986 new.freelist, new.counters,
1987 "unfreezing slab"));
1988
1989 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1990 page->next = discard_page;
1991 discard_page = page;
1992 } else {
1993 add_partial(n, page, DEACTIVATE_TO_TAIL);
1994 stat(s, FREE_ADD_PARTIAL);
1995 }
1996 }
1997
1998 if (n)
1999 spin_unlock(&n->list_lock);
2000
2001 while (discard_page) {
2002 page = discard_page;
2003 discard_page = discard_page->next;
2004
2005 stat(s, DEACTIVATE_EMPTY);
2006 discard_slab(s, page);
2007 stat(s, FREE_SLAB);
2008 }
2009#endif
2010}
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2022{
2023#ifdef CONFIG_SLUB_CPU_PARTIAL
2024 struct page *oldpage;
2025 int pages;
2026 int pobjects;
2027
2028 do {
2029 pages = 0;
2030 pobjects = 0;
2031 oldpage = this_cpu_read(s->cpu_slab->partial);
2032
2033 if (oldpage) {
2034 pobjects = oldpage->pobjects;
2035 pages = oldpage->pages;
2036 if (drain && pobjects > s->cpu_partial) {
2037 unsigned long flags;
2038
2039
2040
2041
2042 local_irq_save(flags);
2043 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2044 local_irq_restore(flags);
2045 oldpage = NULL;
2046 pobjects = 0;
2047 pages = 0;
2048 stat(s, CPU_PARTIAL_DRAIN);
2049 }
2050 }
2051
2052 pages++;
2053 pobjects += page->objects - page->inuse;
2054
2055 page->pages = pages;
2056 page->pobjects = pobjects;
2057 page->next = oldpage;
2058
2059 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2060 != oldpage);
2061#endif
2062}
2063
2064static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2065{
2066 stat(s, CPUSLAB_FLUSH);
2067 deactivate_slab(s, c->page, c->freelist);
2068
2069 c->tid = next_tid(c->tid);
2070 c->page = NULL;
2071 c->freelist = NULL;
2072}
2073
2074
2075
2076
2077
2078
2079static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2080{
2081 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2082
2083 if (likely(c)) {
2084 if (c->page)
2085 flush_slab(s, c);
2086
2087 unfreeze_partials(s, c);
2088 }
2089}
2090
2091static void flush_cpu_slab(void *d)
2092{
2093 struct kmem_cache *s = d;
2094
2095 __flush_cpu_slab(s, smp_processor_id());
2096}
2097
2098static bool has_cpu_slab(int cpu, void *info)
2099{
2100 struct kmem_cache *s = info;
2101 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2102
2103 return c->page || c->partial;
2104}
2105
2106static void flush_all(struct kmem_cache *s)
2107{
2108 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2109}
2110
2111
2112
2113
2114
2115static inline int node_match(struct page *page, int node)
2116{
2117#ifdef CONFIG_NUMA
2118 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2119 return 0;
2120#endif
2121 return 1;
2122}
2123
2124static int count_free(struct page *page)
2125{
2126 return page->objects - page->inuse;
2127}
2128
2129static unsigned long count_partial(struct kmem_cache_node *n,
2130 int (*get_count)(struct page *))
2131{
2132 unsigned long flags;
2133 unsigned long x = 0;
2134 struct page *page;
2135
2136 spin_lock_irqsave(&n->list_lock, flags);
2137 list_for_each_entry(page, &n->partial, lru)
2138 x += get_count(page);
2139 spin_unlock_irqrestore(&n->list_lock, flags);
2140 return x;
2141}
2142
2143static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2144{
2145#ifdef CONFIG_SLUB_DEBUG
2146 return atomic_long_read(&n->total_objects);
2147#else
2148 return 0;
2149#endif
2150}
2151
2152static noinline void
2153slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2154{
2155 int node;
2156
2157 printk(KERN_WARNING
2158 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2159 nid, gfpflags);
2160 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2161 "default order: %d, min order: %d\n", s->name, s->object_size,
2162 s->size, oo_order(s->oo), oo_order(s->min));
2163
2164 if (oo_order(s->min) > get_order(s->object_size))
2165 printk(KERN_WARNING " %s debugging increased min order, use "
2166 "slub_debug=O to disable.\n", s->name);
2167
2168 for_each_online_node(node) {
2169 struct kmem_cache_node *n = get_node(s, node);
2170 unsigned long nr_slabs;
2171 unsigned long nr_objs;
2172 unsigned long nr_free;
2173
2174 if (!n)
2175 continue;
2176
2177 nr_free = count_partial(n, count_free);
2178 nr_slabs = node_nr_slabs(n);
2179 nr_objs = node_nr_objs(n);
2180
2181 printk(KERN_WARNING
2182 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2183 node, nr_slabs, nr_objs, nr_free);
2184 }
2185}
2186
2187static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2188 int node, struct kmem_cache_cpu **pc)
2189{
2190 void *freelist;
2191 struct kmem_cache_cpu *c = *pc;
2192 struct page *page;
2193
2194 freelist = get_partial(s, flags, node, c);
2195
2196 if (freelist)
2197 return freelist;
2198
2199 page = new_slab(s, flags, node);
2200 if (page) {
2201 c = __this_cpu_ptr(s->cpu_slab);
2202 if (c->page)
2203 flush_slab(s, c);
2204
2205
2206
2207
2208
2209 freelist = page->freelist;
2210 page->freelist = NULL;
2211
2212 stat(s, ALLOC_SLAB);
2213 c->page = page;
2214 *pc = c;
2215 } else
2216 freelist = NULL;
2217
2218 return freelist;
2219}
2220
2221static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2222{
2223 if (unlikely(PageSlabPfmemalloc(page)))
2224 return gfp_pfmemalloc_allowed(gfpflags);
2225
2226 return true;
2227}
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2240{
2241 struct page new;
2242 unsigned long counters;
2243 void *freelist;
2244
2245 do {
2246 freelist = page->freelist;
2247 counters = page->counters;
2248
2249 new.counters = counters;
2250 VM_BUG_ON(!new.frozen);
2251
2252 new.inuse = page->objects;
2253 new.frozen = freelist != NULL;
2254
2255 } while (!__cmpxchg_double_slab(s, page,
2256 freelist, counters,
2257 NULL, new.counters,
2258 "get_freelist"));
2259
2260 return freelist;
2261}
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2280 unsigned long addr, struct kmem_cache_cpu *c)
2281{
2282 void *freelist;
2283 struct page *page;
2284 unsigned long flags;
2285
2286 local_irq_save(flags);
2287#ifdef CONFIG_PREEMPT
2288
2289
2290
2291
2292
2293 c = this_cpu_ptr(s->cpu_slab);
2294#endif
2295
2296 page = c->page;
2297 if (!page)
2298 goto new_slab;
2299redo:
2300
2301 if (unlikely(!node_match(page, node))) {
2302 stat(s, ALLOC_NODE_MISMATCH);
2303 deactivate_slab(s, page, c->freelist);
2304 c->page = NULL;
2305 c->freelist = NULL;
2306 goto new_slab;
2307 }
2308
2309
2310
2311
2312
2313
2314 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2315 deactivate_slab(s, page, c->freelist);
2316 c->page = NULL;
2317 c->freelist = NULL;
2318 goto new_slab;
2319 }
2320
2321
2322 freelist = c->freelist;
2323 if (freelist)
2324 goto load_freelist;
2325
2326 stat(s, ALLOC_SLOWPATH);
2327
2328 freelist = get_freelist(s, page);
2329
2330 if (!freelist) {
2331 c->page = NULL;
2332 stat(s, DEACTIVATE_BYPASS);
2333 goto new_slab;
2334 }
2335
2336 stat(s, ALLOC_REFILL);
2337
2338load_freelist:
2339
2340
2341
2342
2343
2344 VM_BUG_ON(!c->page->frozen);
2345 c->freelist = get_freepointer(s, freelist);
2346 c->tid = next_tid(c->tid);
2347 local_irq_restore(flags);
2348 return freelist;
2349
2350new_slab:
2351
2352 if (c->partial) {
2353 page = c->page = c->partial;
2354 c->partial = page->next;
2355 stat(s, CPU_PARTIAL_ALLOC);
2356 c->freelist = NULL;
2357 goto redo;
2358 }
2359
2360 freelist = new_slab_objects(s, gfpflags, node, &c);
2361
2362 if (unlikely(!freelist)) {
2363 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2364 slab_out_of_memory(s, gfpflags, node);
2365
2366 local_irq_restore(flags);
2367 return NULL;
2368 }
2369
2370 page = c->page;
2371 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2372 goto load_freelist;
2373
2374
2375 if (kmem_cache_debug(s) &&
2376 !alloc_debug_processing(s, page, freelist, addr))
2377 goto new_slab;
2378
2379 deactivate_slab(s, page, get_freepointer(s, freelist));
2380 c->page = NULL;
2381 c->freelist = NULL;
2382 local_irq_restore(flags);
2383 return freelist;
2384}
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2397 gfp_t gfpflags, int node, unsigned long addr)
2398{
2399 void **object;
2400 struct kmem_cache_cpu *c;
2401 struct page *page;
2402 unsigned long tid;
2403
2404 if (slab_pre_alloc_hook(s, gfpflags))
2405 return NULL;
2406
2407 s = memcg_kmem_get_cache(s, gfpflags);
2408redo:
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420 preempt_disable();
2421 c = __this_cpu_ptr(s->cpu_slab);
2422
2423
2424
2425
2426
2427
2428
2429 tid = c->tid;
2430 preempt_enable();
2431
2432 object = c->freelist;
2433 page = c->page;
2434 if (unlikely(!object || !node_match(page, node)))
2435 object = __slab_alloc(s, gfpflags, node, addr, c);
2436
2437 else {
2438 void *next_object = get_freepointer_safe(s, object);
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454 if (unlikely(!this_cpu_cmpxchg_double(
2455 s->cpu_slab->freelist, s->cpu_slab->tid,
2456 object, tid,
2457 next_object, next_tid(tid)))) {
2458
2459 note_cmpxchg_failure("slab_alloc", s, tid);
2460 goto redo;
2461 }
2462 prefetch_freepointer(s, next_object);
2463 stat(s, ALLOC_FASTPATH);
2464 }
2465
2466 if (unlikely(gfpflags & __GFP_ZERO) && object)
2467 memset(object, 0, s->object_size);
2468
2469 slab_post_alloc_hook(s, gfpflags, object);
2470
2471 return object;
2472}
2473
2474static __always_inline void *slab_alloc(struct kmem_cache *s,
2475 gfp_t gfpflags, unsigned long addr)
2476{
2477 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2478}
2479
2480void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2481{
2482 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2483
2484 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
2485 s->size, gfpflags);
2486
2487 return ret;
2488}
2489EXPORT_SYMBOL(kmem_cache_alloc);
2490
2491#ifdef CONFIG_TRACING
2492void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2493{
2494 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2495 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2496 return ret;
2497}
2498EXPORT_SYMBOL(kmem_cache_alloc_trace);
2499#endif
2500
2501#ifdef CONFIG_NUMA
2502void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2503{
2504 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2505
2506 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2507 s->object_size, s->size, gfpflags, node);
2508
2509 return ret;
2510}
2511EXPORT_SYMBOL(kmem_cache_alloc_node);
2512
2513#ifdef CONFIG_TRACING
2514void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2515 gfp_t gfpflags,
2516 int node, size_t size)
2517{
2518 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2519
2520 trace_kmalloc_node(_RET_IP_, ret,
2521 size, s->size, gfpflags, node);
2522 return ret;
2523}
2524EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2525#endif
2526#endif
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536static void __slab_free(struct kmem_cache *s, struct page *page,
2537 void *x, unsigned long addr)
2538{
2539 void *prior;
2540 void **object = (void *)x;
2541 int was_frozen;
2542 struct page new;
2543 unsigned long counters;
2544 struct kmem_cache_node *n = NULL;
2545 unsigned long uninitialized_var(flags);
2546
2547 stat(s, FREE_SLOWPATH);
2548
2549 if (kmem_cache_debug(s) &&
2550 !(n = free_debug_processing(s, page, x, addr, &flags)))
2551 return;
2552
2553 do {
2554 if (unlikely(n)) {
2555 spin_unlock_irqrestore(&n->list_lock, flags);
2556 n = NULL;
2557 }
2558 prior = page->freelist;
2559 counters = page->counters;
2560 set_freepointer(s, object, prior);
2561 new.counters = counters;
2562 was_frozen = new.frozen;
2563 new.inuse--;
2564 if ((!new.inuse || !prior) && !was_frozen) {
2565
2566 if (kmem_cache_has_cpu_partial(s) && !prior) {
2567
2568
2569
2570
2571
2572
2573
2574 new.frozen = 1;
2575
2576 } else {
2577
2578 n = get_node(s, page_to_nid(page));
2579
2580
2581
2582
2583
2584
2585
2586
2587 spin_lock_irqsave(&n->list_lock, flags);
2588
2589 }
2590 }
2591
2592 } while (!cmpxchg_double_slab(s, page,
2593 prior, counters,
2594 object, new.counters,
2595 "__slab_free"));
2596
2597 if (likely(!n)) {
2598
2599
2600
2601
2602
2603 if (new.frozen && !was_frozen) {
2604 put_cpu_partial(s, page, 1);
2605 stat(s, CPU_PARTIAL_FREE);
2606 }
2607
2608
2609
2610
2611 if (was_frozen)
2612 stat(s, FREE_FROZEN);
2613 return;
2614 }
2615
2616 if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
2617 goto slab_empty;
2618
2619
2620
2621
2622
2623 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
2624 if (kmem_cache_debug(s))
2625 remove_full(s, n, page);
2626 add_partial(n, page, DEACTIVATE_TO_TAIL);
2627 stat(s, FREE_ADD_PARTIAL);
2628 }
2629 spin_unlock_irqrestore(&n->list_lock, flags);
2630 return;
2631
2632slab_empty:
2633 if (prior) {
2634
2635
2636
2637 remove_partial(n, page);
2638 stat(s, FREE_REMOVE_PARTIAL);
2639 } else {
2640
2641 remove_full(s, n, page);
2642 }
2643
2644 spin_unlock_irqrestore(&n->list_lock, flags);
2645 stat(s, FREE_SLAB);
2646 discard_slab(s, page);
2647}
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660static __always_inline void slab_free(struct kmem_cache *s,
2661 struct page *page, void *x, unsigned long addr)
2662{
2663 void **object = (void *)x;
2664 struct kmem_cache_cpu *c;
2665 unsigned long tid;
2666
2667 slab_free_hook(s, x);
2668
2669redo:
2670
2671
2672
2673
2674
2675
2676 preempt_disable();
2677 c = __this_cpu_ptr(s->cpu_slab);
2678
2679 tid = c->tid;
2680 preempt_enable();
2681
2682 if (likely(page == c->page)) {
2683 set_freepointer(s, object, c->freelist);
2684
2685 if (unlikely(!this_cpu_cmpxchg_double(
2686 s->cpu_slab->freelist, s->cpu_slab->tid,
2687 c->freelist, tid,
2688 object, next_tid(tid)))) {
2689
2690 note_cmpxchg_failure("slab_free", s, tid);
2691 goto redo;
2692 }
2693 stat(s, FREE_FASTPATH);
2694 } else
2695 __slab_free(s, page, x, addr);
2696
2697}
2698
2699void kmem_cache_free(struct kmem_cache *s, void *x)
2700{
2701 s = cache_from_obj(s, x);
2702 if (!s)
2703 return;
2704 slab_free(s, virt_to_head_page(x), x, _RET_IP_);
2705 trace_kmem_cache_free(_RET_IP_, x);
2706}
2707EXPORT_SYMBOL(kmem_cache_free);
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728static int slub_min_order;
2729static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
2730static int slub_min_objects;
2731
2732
2733
2734
2735
2736static int slub_nomerge;
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763static inline int slab_order(int size, int min_objects,
2764 int max_order, int fract_leftover, int reserved)
2765{
2766 int order;
2767 int rem;
2768 int min_order = slub_min_order;
2769
2770 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
2771 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
2772
2773 for (order = max(min_order,
2774 fls(min_objects * size - 1) - PAGE_SHIFT);
2775 order <= max_order; order++) {
2776
2777 unsigned long slab_size = PAGE_SIZE << order;
2778
2779 if (slab_size < min_objects * size + reserved)
2780 continue;
2781
2782 rem = (slab_size - reserved) % size;
2783
2784 if (rem <= slab_size / fract_leftover)
2785 break;
2786
2787 }
2788
2789 return order;
2790}
2791
2792static inline int calculate_order(int size, int reserved)
2793{
2794 int order;
2795 int min_objects;
2796 int fraction;
2797 int max_objects;
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807 min_objects = slub_min_objects;
2808 if (!min_objects)
2809 min_objects = 4 * (fls(nr_cpu_ids) + 1);
2810 max_objects = order_objects(slub_max_order, size, reserved);
2811 min_objects = min(min_objects, max_objects);
2812
2813 while (min_objects > 1) {
2814 fraction = 16;
2815 while (fraction >= 4) {
2816 order = slab_order(size, min_objects,
2817 slub_max_order, fraction, reserved);
2818 if (order <= slub_max_order)
2819 return order;
2820 fraction /= 2;
2821 }
2822 min_objects--;
2823 }
2824
2825
2826
2827
2828
2829 order = slab_order(size, 1, slub_max_order, 1, reserved);
2830 if (order <= slub_max_order)
2831 return order;
2832
2833
2834
2835
2836 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
2837 if (order < MAX_ORDER)
2838 return order;
2839 return -ENOSYS;
2840}
2841
2842static void
2843init_kmem_cache_node(struct kmem_cache_node *n)
2844{
2845 n->nr_partial = 0;
2846 spin_lock_init(&n->list_lock);
2847 INIT_LIST_HEAD(&n->partial);
2848#ifdef CONFIG_SLUB_DEBUG
2849 atomic_long_set(&n->nr_slabs, 0);
2850 atomic_long_set(&n->total_objects, 0);
2851 INIT_LIST_HEAD(&n->full);
2852#endif
2853}
2854
2855static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2856{
2857 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2858 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
2859
2860
2861
2862
2863
2864 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
2865 2 * sizeof(void *));
2866
2867 if (!s->cpu_slab)
2868 return 0;
2869
2870 init_kmem_cache_cpus(s);
2871
2872 return 1;
2873}
2874
2875static struct kmem_cache *kmem_cache_node;
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886static void early_kmem_cache_node_alloc(int node)
2887{
2888 struct page *page;
2889 struct kmem_cache_node *n;
2890
2891 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2892
2893 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2894
2895 BUG_ON(!page);
2896 if (page_to_nid(page) != node) {
2897 printk(KERN_ERR "SLUB: Unable to allocate memory from "
2898 "node %d\n", node);
2899 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
2900 "in order to be able to continue\n");
2901 }
2902
2903 n = page->freelist;
2904 BUG_ON(!n);
2905 page->freelist = get_freepointer(kmem_cache_node, n);
2906 page->inuse = 1;
2907 page->frozen = 0;
2908 kmem_cache_node->node[node] = n;
2909#ifdef CONFIG_SLUB_DEBUG
2910 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
2911 init_tracking(kmem_cache_node, n);
2912#endif
2913 init_kmem_cache_node(n);
2914 inc_slabs_node(kmem_cache_node, node, page->objects);
2915
2916
2917
2918
2919
2920 __add_partial(n, page, DEACTIVATE_TO_HEAD);
2921}
2922
2923static void free_kmem_cache_nodes(struct kmem_cache *s)
2924{
2925 int node;
2926
2927 for_each_node_state(node, N_NORMAL_MEMORY) {
2928 struct kmem_cache_node *n = s->node[node];
2929
2930 if (n)
2931 kmem_cache_free(kmem_cache_node, n);
2932
2933 s->node[node] = NULL;
2934 }
2935}
2936
2937static int init_kmem_cache_nodes(struct kmem_cache *s)
2938{
2939 int node;
2940
2941 for_each_node_state(node, N_NORMAL_MEMORY) {
2942 struct kmem_cache_node *n;
2943
2944 if (slab_state == DOWN) {
2945 early_kmem_cache_node_alloc(node);
2946 continue;
2947 }
2948 n = kmem_cache_alloc_node(kmem_cache_node,
2949 GFP_KERNEL, node);
2950
2951 if (!n) {
2952 free_kmem_cache_nodes(s);
2953 return 0;
2954 }
2955
2956 s->node[node] = n;
2957 init_kmem_cache_node(n);
2958 }
2959 return 1;
2960}
2961
2962static void set_min_partial(struct kmem_cache *s, unsigned long min)
2963{
2964 if (min < MIN_PARTIAL)
2965 min = MIN_PARTIAL;
2966 else if (min > MAX_PARTIAL)
2967 min = MAX_PARTIAL;
2968 s->min_partial = min;
2969}
2970
2971
2972
2973
2974
2975static int calculate_sizes(struct kmem_cache *s, int forced_order)
2976{
2977 unsigned long flags = s->flags;
2978 unsigned long size = s->object_size;
2979 int order;
2980
2981
2982
2983
2984
2985
2986 size = ALIGN(size, sizeof(void *));
2987
2988#ifdef CONFIG_SLUB_DEBUG
2989
2990
2991
2992
2993
2994 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
2995 !s->ctor)
2996 s->flags |= __OBJECT_POISON;
2997 else
2998 s->flags &= ~__OBJECT_POISON;
2999
3000
3001
3002
3003
3004
3005
3006 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3007 size += sizeof(void *);
3008#endif
3009
3010
3011
3012
3013
3014 s->inuse = size;
3015
3016 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
3017 s->ctor)) {
3018
3019
3020
3021
3022
3023
3024
3025
3026 s->offset = size;
3027 size += sizeof(void *);
3028 }
3029
3030#ifdef CONFIG_SLUB_DEBUG
3031 if (flags & SLAB_STORE_USER)
3032
3033
3034
3035
3036 size += 2 * sizeof(struct track);
3037
3038 if (flags & SLAB_RED_ZONE)
3039
3040
3041
3042
3043
3044
3045
3046 size += sizeof(void *);
3047#endif
3048
3049
3050
3051
3052
3053
3054 size = ALIGN(size, s->align);
3055 s->size = size;
3056 if (forced_order >= 0)
3057 order = forced_order;
3058 else
3059 order = calculate_order(size, s->reserved);
3060
3061 if (order < 0)
3062 return 0;
3063
3064 s->allocflags = 0;
3065 if (order)
3066 s->allocflags |= __GFP_COMP;
3067
3068 if (s->flags & SLAB_CACHE_DMA)
3069 s->allocflags |= GFP_DMA;
3070
3071 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3072 s->allocflags |= __GFP_RECLAIMABLE;
3073
3074
3075
3076
3077 s->oo = oo_make(order, size, s->reserved);
3078 s->min = oo_make(get_order(size), size, s->reserved);
3079 if (oo_objects(s->oo) > oo_objects(s->max))
3080 s->max = s->oo;
3081
3082 return !!oo_objects(s->oo);
3083}
3084
3085static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3086{
3087 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3088 s->reserved = 0;
3089
3090 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3091 s->reserved = sizeof(struct rcu_head);
3092
3093 if (!calculate_sizes(s, -1))
3094 goto error;
3095 if (disable_higher_order_debug) {
3096
3097
3098
3099
3100 if (get_order(s->size) > get_order(s->object_size)) {
3101 s->flags &= ~DEBUG_METADATA_FLAGS;
3102 s->offset = 0;
3103 if (!calculate_sizes(s, -1))
3104 goto error;
3105 }
3106 }
3107
3108#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3109 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3110 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
3111
3112 s->flags |= __CMPXCHG_DOUBLE;
3113#endif
3114
3115
3116
3117
3118
3119 set_min_partial(s, ilog2(s->size) / 2);
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138 if (!kmem_cache_has_cpu_partial(s))
3139 s->cpu_partial = 0;
3140 else if (s->size >= PAGE_SIZE)
3141 s->cpu_partial = 2;
3142 else if (s->size >= 1024)
3143 s->cpu_partial = 6;
3144 else if (s->size >= 256)
3145 s->cpu_partial = 13;
3146 else
3147 s->cpu_partial = 30;
3148
3149#ifdef CONFIG_NUMA
3150 s->remote_node_defrag_ratio = 1000;
3151#endif
3152 if (!init_kmem_cache_nodes(s))
3153 goto error;
3154
3155 if (alloc_kmem_cache_cpus(s))
3156 return 0;
3157
3158 free_kmem_cache_nodes(s);
3159error:
3160 if (flags & SLAB_PANIC)
3161 panic("Cannot create slab %s size=%lu realsize=%u "
3162 "order=%u offset=%u flags=%lx\n",
3163 s->name, (unsigned long)s->size, s->size,
3164 oo_order(s->oo), s->offset, flags);
3165 return -EINVAL;
3166}
3167
3168static void list_slab_objects(struct kmem_cache *s, struct page *page,
3169 const char *text)
3170{
3171#ifdef CONFIG_SLUB_DEBUG
3172 void *addr = page_address(page);
3173 void *p;
3174 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3175 sizeof(long), GFP_ATOMIC);
3176 if (!map)
3177 return;
3178 slab_err(s, page, text, s->name);
3179 slab_lock(page);
3180
3181 get_map(s, page, map);
3182 for_each_object(p, s, addr, page->objects) {
3183
3184 if (!test_bit(slab_index(p, s, addr), map)) {
3185 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3186 p, p - addr);
3187 print_tracking(s, p);
3188 }
3189 }
3190 slab_unlock(page);
3191 kfree(map);
3192#endif
3193}
3194
3195
3196
3197
3198
3199
3200static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3201{
3202 struct page *page, *h;
3203
3204 list_for_each_entry_safe(page, h, &n->partial, lru) {
3205 if (!page->inuse) {
3206 __remove_partial(n, page);
3207 discard_slab(s, page);
3208 } else {
3209 list_slab_objects(s, page,
3210 "Objects remaining in %s on kmem_cache_close()");
3211 }
3212 }
3213}
3214
3215
3216
3217
3218static inline int kmem_cache_close(struct kmem_cache *s)
3219{
3220 int node;
3221
3222 flush_all(s);
3223
3224 for_each_node_state(node, N_NORMAL_MEMORY) {
3225 struct kmem_cache_node *n = get_node(s, node);
3226
3227 free_partial(s, n);
3228 if (n->nr_partial || slabs_node(s, node))
3229 return 1;
3230 }
3231 free_percpu(s->cpu_slab);
3232 free_kmem_cache_nodes(s);
3233 return 0;
3234}
3235
3236int __kmem_cache_shutdown(struct kmem_cache *s)
3237{
3238 int rc = kmem_cache_close(s);
3239
3240 if (!rc) {
3241
3242
3243
3244
3245
3246
3247
3248
3249 mutex_unlock(&slab_mutex);
3250 sysfs_slab_remove(s);
3251 mutex_lock(&slab_mutex);
3252 }
3253
3254 return rc;
3255}
3256
3257
3258
3259
3260
3261static int __init setup_slub_min_order(char *str)
3262{
3263 get_option(&str, &slub_min_order);
3264
3265 return 1;
3266}
3267
3268__setup("slub_min_order=", setup_slub_min_order);
3269
3270static int __init setup_slub_max_order(char *str)
3271{
3272 get_option(&str, &slub_max_order);
3273 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3274
3275 return 1;
3276}
3277
3278__setup("slub_max_order=", setup_slub_max_order);
3279
3280static int __init setup_slub_min_objects(char *str)
3281{
3282 get_option(&str, &slub_min_objects);
3283
3284 return 1;
3285}
3286
3287__setup("slub_min_objects=", setup_slub_min_objects);
3288
3289static int __init setup_slub_nomerge(char *str)
3290{
3291 slub_nomerge = 1;
3292 return 1;
3293}
3294
3295__setup("slub_nomerge", setup_slub_nomerge);
3296
3297void *__kmalloc(size_t size, gfp_t flags)
3298{
3299 struct kmem_cache *s;
3300 void *ret;
3301
3302 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3303 return kmalloc_large(size, flags);
3304
3305 s = kmalloc_slab(size, flags);
3306
3307 if (unlikely(ZERO_OR_NULL_PTR(s)))
3308 return s;
3309
3310 ret = slab_alloc(s, flags, _RET_IP_);
3311
3312 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3313
3314 return ret;
3315}
3316EXPORT_SYMBOL(__kmalloc);
3317
3318#ifdef CONFIG_NUMA
3319static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3320{
3321 struct page *page;
3322 void *ptr = NULL;
3323
3324 flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
3325 page = alloc_pages_node(node, flags, get_order(size));
3326 if (page)
3327 ptr = page_address(page);
3328
3329 kmalloc_large_node_hook(ptr, size, flags);
3330 return ptr;
3331}
3332
3333void *__kmalloc_node(size_t size, gfp_t flags, int node)
3334{
3335 struct kmem_cache *s;
3336 void *ret;
3337
3338 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3339 ret = kmalloc_large_node(size, flags, node);
3340
3341 trace_kmalloc_node(_RET_IP_, ret,
3342 size, PAGE_SIZE << get_order(size),
3343 flags, node);
3344
3345 return ret;
3346 }
3347
3348 s = kmalloc_slab(size, flags);
3349
3350 if (unlikely(ZERO_OR_NULL_PTR(s)))
3351 return s;
3352
3353 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3354
3355 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3356
3357 return ret;
3358}
3359EXPORT_SYMBOL(__kmalloc_node);
3360#endif
3361
3362size_t ksize(const void *object)
3363{
3364 struct page *page;
3365
3366 if (unlikely(object == ZERO_SIZE_PTR))
3367 return 0;
3368
3369 page = virt_to_head_page(object);
3370
3371 if (unlikely(!PageSlab(page))) {
3372 WARN_ON(!PageCompound(page));
3373 return PAGE_SIZE << compound_order(page);
3374 }
3375
3376 return slab_ksize(page->slab_cache);
3377}
3378EXPORT_SYMBOL(ksize);
3379
3380void kfree(const void *x)
3381{
3382 struct page *page;
3383 void *object = (void *)x;
3384
3385 trace_kfree(_RET_IP_, x);
3386
3387 if (unlikely(ZERO_OR_NULL_PTR(x)))
3388 return;
3389
3390 page = virt_to_head_page(x);
3391 if (unlikely(!PageSlab(page))) {
3392 BUG_ON(!PageCompound(page));
3393 kfree_hook(x);
3394 __free_memcg_kmem_pages(page, compound_order(page));
3395 return;
3396 }
3397 slab_free(page->slab_cache, page, object, _RET_IP_);
3398}
3399EXPORT_SYMBOL(kfree);
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411int kmem_cache_shrink(struct kmem_cache *s)
3412{
3413 int node;
3414 int i;
3415 struct kmem_cache_node *n;
3416 struct page *page;
3417 struct page *t;
3418 int objects = oo_objects(s->max);
3419 struct list_head *slabs_by_inuse =
3420 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3421 unsigned long flags;
3422
3423 if (!slabs_by_inuse)
3424 return -ENOMEM;
3425
3426 flush_all(s);
3427 for_each_node_state(node, N_NORMAL_MEMORY) {
3428 n = get_node(s, node);
3429
3430 if (!n->nr_partial)
3431 continue;
3432
3433 for (i = 0; i < objects; i++)
3434 INIT_LIST_HEAD(slabs_by_inuse + i);
3435
3436 spin_lock_irqsave(&n->list_lock, flags);
3437
3438
3439
3440
3441
3442
3443
3444 list_for_each_entry_safe(page, t, &n->partial, lru) {
3445 list_move(&page->lru, slabs_by_inuse + page->inuse);
3446 if (!page->inuse)
3447 n->nr_partial--;
3448 }
3449
3450
3451
3452
3453
3454 for (i = objects - 1; i > 0; i--)
3455 list_splice(slabs_by_inuse + i, n->partial.prev);
3456
3457 spin_unlock_irqrestore(&n->list_lock, flags);
3458
3459
3460 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3461 discard_slab(s, page);
3462 }
3463
3464 kfree(slabs_by_inuse);
3465 return 0;
3466}
3467EXPORT_SYMBOL(kmem_cache_shrink);
3468
3469static int slab_mem_going_offline_callback(void *arg)
3470{
3471 struct kmem_cache *s;
3472
3473 mutex_lock(&slab_mutex);
3474 list_for_each_entry(s, &slab_caches, list)
3475 kmem_cache_shrink(s);
3476 mutex_unlock(&slab_mutex);
3477
3478 return 0;
3479}
3480
3481static void slab_mem_offline_callback(void *arg)
3482{
3483 struct kmem_cache_node *n;
3484 struct kmem_cache *s;
3485 struct memory_notify *marg = arg;
3486 int offline_node;
3487
3488 offline_node = marg->status_change_nid_normal;
3489
3490
3491
3492
3493
3494 if (offline_node < 0)
3495 return;
3496
3497 mutex_lock(&slab_mutex);
3498 list_for_each_entry(s, &slab_caches, list) {
3499 n = get_node(s, offline_node);
3500 if (n) {
3501
3502
3503
3504
3505
3506
3507 BUG_ON(slabs_node(s, offline_node));
3508
3509 s->node[offline_node] = NULL;
3510 kmem_cache_free(kmem_cache_node, n);
3511 }
3512 }
3513 mutex_unlock(&slab_mutex);
3514}
3515
3516static int slab_mem_going_online_callback(void *arg)
3517{
3518 struct kmem_cache_node *n;
3519 struct kmem_cache *s;
3520 struct memory_notify *marg = arg;
3521 int nid = marg->status_change_nid_normal;
3522 int ret = 0;
3523
3524
3525
3526
3527
3528 if (nid < 0)
3529 return 0;
3530
3531
3532
3533
3534
3535
3536 mutex_lock(&slab_mutex);
3537 list_for_each_entry(s, &slab_caches, list) {
3538
3539
3540
3541
3542
3543 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3544 if (!n) {
3545 ret = -ENOMEM;
3546 goto out;
3547 }
3548 init_kmem_cache_node(n);
3549 s->node[nid] = n;
3550 }
3551out:
3552 mutex_unlock(&slab_mutex);
3553 return ret;
3554}
3555
3556static int slab_memory_callback(struct notifier_block *self,
3557 unsigned long action, void *arg)
3558{
3559 int ret = 0;
3560
3561 switch (action) {
3562 case MEM_GOING_ONLINE:
3563 ret = slab_mem_going_online_callback(arg);
3564 break;
3565 case MEM_GOING_OFFLINE:
3566 ret = slab_mem_going_offline_callback(arg);
3567 break;
3568 case MEM_OFFLINE:
3569 case MEM_CANCEL_ONLINE:
3570 slab_mem_offline_callback(arg);
3571 break;
3572 case MEM_ONLINE:
3573 case MEM_CANCEL_OFFLINE:
3574 break;
3575 }
3576 if (ret)
3577 ret = notifier_from_errno(ret);
3578 else
3579 ret = NOTIFY_OK;
3580 return ret;
3581}
3582
3583static struct notifier_block slab_memory_callback_nb = {
3584 .notifier_call = slab_memory_callback,
3585 .priority = SLAB_CALLBACK_PRI,
3586};
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3599{
3600 int node;
3601 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
3602
3603 memcpy(s, static_cache, kmem_cache->object_size);
3604
3605
3606
3607
3608
3609
3610 __flush_cpu_slab(s, smp_processor_id());
3611 for_each_node_state(node, N_NORMAL_MEMORY) {
3612 struct kmem_cache_node *n = get_node(s, node);
3613 struct page *p;
3614
3615 if (n) {
3616 list_for_each_entry(p, &n->partial, lru)
3617 p->slab_cache = s;
3618
3619#ifdef CONFIG_SLUB_DEBUG
3620 list_for_each_entry(p, &n->full, lru)
3621 p->slab_cache = s;
3622#endif
3623 }
3624 }
3625 list_add(&s->list, &slab_caches);
3626 return s;
3627}
3628
3629void __init kmem_cache_init(void)
3630{
3631 static __initdata struct kmem_cache boot_kmem_cache,
3632 boot_kmem_cache_node;
3633
3634 if (debug_guardpage_minorder())
3635 slub_max_order = 0;
3636
3637 kmem_cache_node = &boot_kmem_cache_node;
3638 kmem_cache = &boot_kmem_cache;
3639
3640 create_boot_cache(kmem_cache_node, "kmem_cache_node",
3641 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
3642
3643 register_hotmemory_notifier(&slab_memory_callback_nb);
3644
3645
3646 slab_state = PARTIAL;
3647
3648 create_boot_cache(kmem_cache, "kmem_cache",
3649 offsetof(struct kmem_cache, node) +
3650 nr_node_ids * sizeof(struct kmem_cache_node *),
3651 SLAB_HWCACHE_ALIGN);
3652
3653 kmem_cache = bootstrap(&boot_kmem_cache);
3654
3655
3656
3657
3658
3659
3660 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
3661
3662
3663 create_kmalloc_caches(0);
3664
3665#ifdef CONFIG_SMP
3666 register_cpu_notifier(&slab_notifier);
3667#endif
3668
3669 printk(KERN_INFO
3670 "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
3671 " CPUs=%d, Nodes=%d\n",
3672 cache_line_size(),
3673 slub_min_order, slub_max_order, slub_min_objects,
3674 nr_cpu_ids, nr_node_ids);
3675}
3676
3677void __init kmem_cache_init_late(void)
3678{
3679}
3680
3681
3682
3683
3684static int slab_unmergeable(struct kmem_cache *s)
3685{
3686 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3687 return 1;
3688
3689 if (s->ctor)
3690 return 1;
3691
3692
3693
3694
3695 if (s->refcount < 0)
3696 return 1;
3697
3698 return 0;
3699}
3700
3701static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
3702 size_t align, unsigned long flags, const char *name,
3703 void (*ctor)(void *))
3704{
3705 struct kmem_cache *s;
3706
3707 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
3708 return NULL;
3709
3710 if (ctor)
3711 return NULL;
3712
3713 size = ALIGN(size, sizeof(void *));
3714 align = calculate_alignment(flags, align, size);
3715 size = ALIGN(size, align);
3716 flags = kmem_cache_flags(size, flags, name, NULL);
3717
3718 list_for_each_entry(s, &slab_caches, list) {
3719 if (slab_unmergeable(s))
3720 continue;
3721
3722 if (size > s->size)
3723 continue;
3724
3725 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
3726 continue;
3727
3728
3729
3730
3731 if ((s->size & ~(align - 1)) != s->size)
3732 continue;
3733
3734 if (s->size - size >= sizeof(void *))
3735 continue;
3736
3737 if (!cache_match_memcg(s, memcg))
3738 continue;
3739
3740 return s;
3741 }
3742 return NULL;
3743}
3744
3745struct kmem_cache *
3746__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
3747 size_t align, unsigned long flags, void (*ctor)(void *))
3748{
3749 struct kmem_cache *s;
3750
3751 s = find_mergeable(memcg, size, align, flags, name, ctor);
3752 if (s) {
3753 s->refcount++;
3754
3755
3756
3757
3758 s->object_size = max(s->object_size, (int)size);
3759 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3760
3761 if (sysfs_slab_alias(s, name)) {
3762 s->refcount--;
3763 s = NULL;
3764 }
3765 }
3766
3767 return s;
3768}
3769
3770int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
3771{
3772 int err;
3773
3774 err = kmem_cache_open(s, flags);
3775 if (err)
3776 return err;
3777
3778
3779 if (slab_state <= UP)
3780 return 0;
3781
3782 memcg_propagate_slab_attrs(s);
3783 mutex_unlock(&slab_mutex);
3784 err = sysfs_slab_add(s);
3785 mutex_lock(&slab_mutex);
3786
3787 if (err)
3788 kmem_cache_close(s);
3789
3790 return err;
3791}
3792
3793#ifdef CONFIG_SMP
3794
3795
3796
3797
3798static int slab_cpuup_callback(struct notifier_block *nfb,
3799 unsigned long action, void *hcpu)
3800{
3801 long cpu = (long)hcpu;
3802 struct kmem_cache *s;
3803 unsigned long flags;
3804
3805 switch (action) {
3806 case CPU_UP_CANCELED:
3807 case CPU_UP_CANCELED_FROZEN:
3808 case CPU_DEAD:
3809 case CPU_DEAD_FROZEN:
3810 mutex_lock(&slab_mutex);
3811 list_for_each_entry(s, &slab_caches, list) {
3812 local_irq_save(flags);
3813 __flush_cpu_slab(s, cpu);
3814 local_irq_restore(flags);
3815 }
3816 mutex_unlock(&slab_mutex);
3817 break;
3818 default:
3819 break;
3820 }
3821 return NOTIFY_OK;
3822}
3823
3824static struct notifier_block slab_notifier = {
3825 .notifier_call = slab_cpuup_callback
3826};
3827
3828#endif
3829
3830void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3831{
3832 struct kmem_cache *s;
3833 void *ret;
3834
3835 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3836 return kmalloc_large(size, gfpflags);
3837
3838 s = kmalloc_slab(size, gfpflags);
3839
3840 if (unlikely(ZERO_OR_NULL_PTR(s)))
3841 return s;
3842
3843 ret = slab_alloc(s, gfpflags, caller);
3844
3845
3846 trace_kmalloc(caller, ret, size, s->size, gfpflags);
3847
3848 return ret;
3849}
3850
3851#ifdef CONFIG_NUMA
3852void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3853 int node, unsigned long caller)
3854{
3855 struct kmem_cache *s;
3856 void *ret;
3857
3858 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3859 ret = kmalloc_large_node(size, gfpflags, node);
3860
3861 trace_kmalloc_node(caller, ret,
3862 size, PAGE_SIZE << get_order(size),
3863 gfpflags, node);
3864
3865 return ret;
3866 }
3867
3868 s = kmalloc_slab(size, gfpflags);
3869
3870 if (unlikely(ZERO_OR_NULL_PTR(s)))
3871 return s;
3872
3873 ret = slab_alloc_node(s, gfpflags, node, caller);
3874
3875
3876 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
3877
3878 return ret;
3879}
3880#endif
3881
3882#ifdef CONFIG_SYSFS
3883static int count_inuse(struct page *page)
3884{
3885 return page->inuse;
3886}
3887
3888static int count_total(struct page *page)
3889{
3890 return page->objects;
3891}
3892#endif
3893
3894#ifdef CONFIG_SLUB_DEBUG
3895static int validate_slab(struct kmem_cache *s, struct page *page,
3896 unsigned long *map)
3897{
3898 void *p;
3899 void *addr = page_address(page);
3900
3901 if (!check_slab(s, page) ||
3902 !on_freelist(s, page, NULL))
3903 return 0;
3904
3905
3906 bitmap_zero(map, page->objects);
3907
3908 get_map(s, page, map);
3909 for_each_object(p, s, addr, page->objects) {
3910 if (test_bit(slab_index(p, s, addr), map))
3911 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
3912 return 0;
3913 }
3914
3915 for_each_object(p, s, addr, page->objects)
3916 if (!test_bit(slab_index(p, s, addr), map))
3917 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
3918 return 0;
3919 return 1;
3920}
3921
3922static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3923 unsigned long *map)
3924{
3925 slab_lock(page);
3926 validate_slab(s, page, map);
3927 slab_unlock(page);
3928}
3929
3930static int validate_slab_node(struct kmem_cache *s,
3931 struct kmem_cache_node *n, unsigned long *map)
3932{
3933 unsigned long count = 0;
3934 struct page *page;
3935 unsigned long flags;
3936
3937 spin_lock_irqsave(&n->list_lock, flags);
3938
3939 list_for_each_entry(page, &n->partial, lru) {
3940 validate_slab_slab(s, page, map);
3941 count++;
3942 }
3943 if (count != n->nr_partial)
3944 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
3945 "counter=%ld\n", s->name, count, n->nr_partial);
3946
3947 if (!(s->flags & SLAB_STORE_USER))
3948 goto out;
3949
3950 list_for_each_entry(page, &n->full, lru) {
3951 validate_slab_slab(s, page, map);
3952 count++;
3953 }
3954 if (count != atomic_long_read(&n->nr_slabs))
3955 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
3956 "counter=%ld\n", s->name, count,
3957 atomic_long_read(&n->nr_slabs));
3958
3959out:
3960 spin_unlock_irqrestore(&n->list_lock, flags);
3961 return count;
3962}
3963
3964static long validate_slab_cache(struct kmem_cache *s)
3965{
3966 int node;
3967 unsigned long count = 0;
3968 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
3969 sizeof(unsigned long), GFP_KERNEL);
3970
3971 if (!map)
3972 return -ENOMEM;
3973
3974 flush_all(s);
3975 for_each_node_state(node, N_NORMAL_MEMORY) {
3976 struct kmem_cache_node *n = get_node(s, node);
3977
3978 count += validate_slab_node(s, n, map);
3979 }
3980 kfree(map);
3981 return count;
3982}
3983
3984
3985
3986
3987
3988struct location {
3989 unsigned long count;
3990 unsigned long addr;
3991 long long sum_time;
3992 long min_time;
3993 long max_time;
3994 long min_pid;
3995 long max_pid;
3996 DECLARE_BITMAP(cpus, NR_CPUS);
3997 nodemask_t nodes;
3998};
3999
4000struct loc_track {
4001 unsigned long max;
4002 unsigned long count;
4003 struct location *loc;
4004};
4005
4006static void free_loc_track(struct loc_track *t)
4007{
4008 if (t->max)
4009 free_pages((unsigned long)t->loc,
4010 get_order(sizeof(struct location) * t->max));
4011}
4012
4013static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4014{
4015 struct location *l;
4016 int order;
4017
4018 order = get_order(sizeof(struct location) * max);
4019
4020 l = (void *)__get_free_pages(flags, order);
4021 if (!l)
4022 return 0;
4023
4024 if (t->count) {
4025 memcpy(l, t->loc, sizeof(struct location) * t->count);
4026 free_loc_track(t);
4027 }
4028 t->max = max;
4029 t->loc = l;
4030 return 1;
4031}
4032
4033static int add_location(struct loc_track *t, struct kmem_cache *s,
4034 const struct track *track)
4035{
4036 long start, end, pos;
4037 struct location *l;
4038 unsigned long caddr;
4039 unsigned long age = jiffies - track->when;
4040
4041 start = -1;
4042 end = t->count;
4043
4044 for ( ; ; ) {
4045 pos = start + (end - start + 1) / 2;
4046
4047
4048
4049
4050
4051 if (pos == end)
4052 break;
4053
4054 caddr = t->loc[pos].addr;
4055 if (track->addr == caddr) {
4056
4057 l = &t->loc[pos];
4058 l->count++;
4059 if (track->when) {
4060 l->sum_time += age;
4061 if (age < l->min_time)
4062 l->min_time = age;
4063 if (age > l->max_time)
4064 l->max_time = age;
4065
4066 if (track->pid < l->min_pid)
4067 l->min_pid = track->pid;
4068 if (track->pid > l->max_pid)
4069 l->max_pid = track->pid;
4070
4071 cpumask_set_cpu(track->cpu,
4072 to_cpumask(l->cpus));
4073 }
4074 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4075 return 1;
4076 }
4077
4078 if (track->addr < caddr)
4079 end = pos;
4080 else
4081 start = pos;
4082 }
4083
4084
4085
4086
4087 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4088 return 0;
4089
4090 l = t->loc + pos;
4091 if (pos < t->count)
4092 memmove(l + 1, l,
4093 (t->count - pos) * sizeof(struct location));
4094 t->count++;
4095 l->count = 1;
4096 l->addr = track->addr;
4097 l->sum_time = age;
4098 l->min_time = age;
4099 l->max_time = age;
4100 l->min_pid = track->pid;
4101 l->max_pid = track->pid;
4102 cpumask_clear(to_cpumask(l->cpus));
4103 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4104 nodes_clear(l->nodes);
4105 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4106 return 1;
4107}
4108
4109static void process_slab(struct loc_track *t, struct kmem_cache *s,
4110 struct page *page, enum track_item alloc,
4111 unsigned long *map)
4112{
4113 void *addr = page_address(page);
4114 void *p;
4115
4116 bitmap_zero(map, page->objects);
4117 get_map(s, page, map);
4118
4119 for_each_object(p, s, addr, page->objects)
4120 if (!test_bit(slab_index(p, s, addr), map))
4121 add_location(t, s, get_track(s, p, alloc));
4122}
4123
4124static int list_locations(struct kmem_cache *s, char *buf,
4125 enum track_item alloc)
4126{
4127 int len = 0;
4128 unsigned long i;
4129 struct loc_track t = { 0, 0, NULL };
4130 int node;
4131 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4132 sizeof(unsigned long), GFP_KERNEL);
4133
4134 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4135 GFP_TEMPORARY)) {
4136 kfree(map);
4137 return sprintf(buf, "Out of memory\n");
4138 }
4139
4140 flush_all(s);
4141
4142 for_each_node_state(node, N_NORMAL_MEMORY) {
4143 struct kmem_cache_node *n = get_node(s, node);
4144 unsigned long flags;
4145 struct page *page;
4146
4147 if (!atomic_long_read(&n->nr_slabs))
4148 continue;
4149
4150 spin_lock_irqsave(&n->list_lock, flags);
4151 list_for_each_entry(page, &n->partial, lru)
4152 process_slab(&t, s, page, alloc, map);
4153 list_for_each_entry(page, &n->full, lru)
4154 process_slab(&t, s, page, alloc, map);
4155 spin_unlock_irqrestore(&n->list_lock, flags);
4156 }
4157
4158 for (i = 0; i < t.count; i++) {
4159 struct location *l = &t.loc[i];
4160
4161 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4162 break;
4163 len += sprintf(buf + len, "%7ld ", l->count);
4164
4165 if (l->addr)
4166 len += sprintf(buf + len, "%pS", (void *)l->addr);
4167 else
4168 len += sprintf(buf + len, "<not-available>");
4169
4170 if (l->sum_time != l->min_time) {
4171 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4172 l->min_time,
4173 (long)div_u64(l->sum_time, l->count),
4174 l->max_time);
4175 } else
4176 len += sprintf(buf + len, " age=%ld",
4177 l->min_time);
4178
4179 if (l->min_pid != l->max_pid)
4180 len += sprintf(buf + len, " pid=%ld-%ld",
4181 l->min_pid, l->max_pid);
4182 else
4183 len += sprintf(buf + len, " pid=%ld",
4184 l->min_pid);
4185
4186 if (num_online_cpus() > 1 &&
4187 !cpumask_empty(to_cpumask(l->cpus)) &&
4188 len < PAGE_SIZE - 60) {
4189 len += sprintf(buf + len, " cpus=");
4190 len += cpulist_scnprintf(buf + len,
4191 PAGE_SIZE - len - 50,
4192 to_cpumask(l->cpus));
4193 }
4194
4195 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4196 len < PAGE_SIZE - 60) {
4197 len += sprintf(buf + len, " nodes=");
4198 len += nodelist_scnprintf(buf + len,
4199 PAGE_SIZE - len - 50,
4200 l->nodes);
4201 }
4202
4203 len += sprintf(buf + len, "\n");
4204 }
4205
4206 free_loc_track(&t);
4207 kfree(map);
4208 if (!t.count)
4209 len += sprintf(buf, "No data\n");
4210 return len;
4211}
4212#endif
4213
4214#ifdef SLUB_RESILIENCY_TEST
4215static void resiliency_test(void)
4216{
4217 u8 *p;
4218
4219 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4220
4221 printk(KERN_ERR "SLUB resiliency testing\n");
4222 printk(KERN_ERR "-----------------------\n");
4223 printk(KERN_ERR "A. Corruption after allocation\n");
4224
4225 p = kzalloc(16, GFP_KERNEL);
4226 p[16] = 0x12;
4227 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4228 " 0x12->0x%p\n\n", p + 16);
4229
4230 validate_slab_cache(kmalloc_caches[4]);
4231
4232
4233 p = kzalloc(32, GFP_KERNEL);
4234 p[32 + sizeof(void *)] = 0x34;
4235 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4236 " 0x34 -> -0x%p\n", p);
4237 printk(KERN_ERR
4238 "If allocated object is overwritten then not detectable\n\n");
4239
4240 validate_slab_cache(kmalloc_caches[5]);
4241 p = kzalloc(64, GFP_KERNEL);
4242 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4243 *p = 0x56;
4244 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4245 p);
4246 printk(KERN_ERR
4247 "If allocated object is overwritten then not detectable\n\n");
4248 validate_slab_cache(kmalloc_caches[6]);
4249
4250 printk(KERN_ERR "\nB. Corruption after free\n");
4251 p = kzalloc(128, GFP_KERNEL);
4252 kfree(p);
4253 *p = 0x78;
4254 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4255 validate_slab_cache(kmalloc_caches[7]);
4256
4257 p = kzalloc(256, GFP_KERNEL);
4258 kfree(p);
4259 p[50] = 0x9a;
4260 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4261 p);
4262 validate_slab_cache(kmalloc_caches[8]);
4263
4264 p = kzalloc(512, GFP_KERNEL);
4265 kfree(p);
4266 p[512] = 0xab;
4267 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4268 validate_slab_cache(kmalloc_caches[9]);
4269}
4270#else
4271#ifdef CONFIG_SYSFS
4272static void resiliency_test(void) {};
4273#endif
4274#endif
4275
4276#ifdef CONFIG_SYSFS
4277enum slab_stat_type {
4278 SL_ALL,
4279 SL_PARTIAL,
4280 SL_CPU,
4281 SL_OBJECTS,
4282 SL_TOTAL
4283};
4284
4285#define SO_ALL (1 << SL_ALL)
4286#define SO_PARTIAL (1 << SL_PARTIAL)
4287#define SO_CPU (1 << SL_CPU)
4288#define SO_OBJECTS (1 << SL_OBJECTS)
4289#define SO_TOTAL (1 << SL_TOTAL)
4290
4291static ssize_t show_slab_objects(struct kmem_cache *s,
4292 char *buf, unsigned long flags)
4293{
4294 unsigned long total = 0;
4295 int node;
4296 int x;
4297 unsigned long *nodes;
4298
4299 nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4300 if (!nodes)
4301 return -ENOMEM;
4302
4303 if (flags & SO_CPU) {
4304 int cpu;
4305
4306 for_each_possible_cpu(cpu) {
4307 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
4308 cpu);
4309 int node;
4310 struct page *page;
4311
4312 page = ACCESS_ONCE(c->page);
4313 if (!page)
4314 continue;
4315
4316 node = page_to_nid(page);
4317 if (flags & SO_TOTAL)
4318 x = page->objects;
4319 else if (flags & SO_OBJECTS)
4320 x = page->inuse;
4321 else
4322 x = 1;
4323
4324 total += x;
4325 nodes[node] += x;
4326
4327 page = ACCESS_ONCE(c->partial);
4328 if (page) {
4329 node = page_to_nid(page);
4330 if (flags & SO_TOTAL)
4331 WARN_ON_ONCE(1);
4332 else if (flags & SO_OBJECTS)
4333 WARN_ON_ONCE(1);
4334 else
4335 x = page->pages;
4336 total += x;
4337 nodes[node] += x;
4338 }
4339 }
4340 }
4341
4342 lock_memory_hotplug();
4343#ifdef CONFIG_SLUB_DEBUG
4344 if (flags & SO_ALL) {
4345 for_each_node_state(node, N_NORMAL_MEMORY) {
4346 struct kmem_cache_node *n = get_node(s, node);
4347
4348 if (flags & SO_TOTAL)
4349 x = atomic_long_read(&n->total_objects);
4350 else if (flags & SO_OBJECTS)
4351 x = atomic_long_read(&n->total_objects) -
4352 count_partial(n, count_free);
4353 else
4354 x = atomic_long_read(&n->nr_slabs);
4355 total += x;
4356 nodes[node] += x;
4357 }
4358
4359 } else
4360#endif
4361 if (flags & SO_PARTIAL) {
4362 for_each_node_state(node, N_NORMAL_MEMORY) {
4363 struct kmem_cache_node *n = get_node(s, node);
4364
4365 if (flags & SO_TOTAL)
4366 x = count_partial(n, count_total);
4367 else if (flags & SO_OBJECTS)
4368 x = count_partial(n, count_inuse);
4369 else
4370 x = n->nr_partial;
4371 total += x;
4372 nodes[node] += x;
4373 }
4374 }
4375 x = sprintf(buf, "%lu", total);
4376#ifdef CONFIG_NUMA
4377 for_each_node_state(node, N_NORMAL_MEMORY)
4378 if (nodes[node])
4379 x += sprintf(buf + x, " N%d=%lu",
4380 node, nodes[node]);
4381#endif
4382 unlock_memory_hotplug();
4383 kfree(nodes);
4384 return x + sprintf(buf + x, "\n");
4385}
4386
4387#ifdef CONFIG_SLUB_DEBUG
4388static int any_slab_objects(struct kmem_cache *s)
4389{
4390 int node;
4391
4392 for_each_online_node(node) {
4393 struct kmem_cache_node *n = get_node(s, node);
4394
4395 if (!n)
4396 continue;
4397
4398 if (atomic_long_read(&n->total_objects))
4399 return 1;
4400 }
4401 return 0;
4402}
4403#endif
4404
4405#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4406#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4407
4408struct slab_attribute {
4409 struct attribute attr;
4410 ssize_t (*show)(struct kmem_cache *s, char *buf);
4411 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4412};
4413
4414#define SLAB_ATTR_RO(_name) \
4415 static struct slab_attribute _name##_attr = \
4416 __ATTR(_name, 0400, _name##_show, NULL)
4417
4418#define SLAB_ATTR(_name) \
4419 static struct slab_attribute _name##_attr = \
4420 __ATTR(_name, 0600, _name##_show, _name##_store)
4421
4422static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4423{
4424 return sprintf(buf, "%d\n", s->size);
4425}
4426SLAB_ATTR_RO(slab_size);
4427
4428static ssize_t align_show(struct kmem_cache *s, char *buf)
4429{
4430 return sprintf(buf, "%d\n", s->align);
4431}
4432SLAB_ATTR_RO(align);
4433
4434static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4435{
4436 return sprintf(buf, "%d\n", s->object_size);
4437}
4438SLAB_ATTR_RO(object_size);
4439
4440static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4441{
4442 return sprintf(buf, "%d\n", oo_objects(s->oo));
4443}
4444SLAB_ATTR_RO(objs_per_slab);
4445
4446static ssize_t order_store(struct kmem_cache *s,
4447 const char *buf, size_t length)
4448{
4449 unsigned long order;
4450 int err;
4451
4452 err = kstrtoul(buf, 10, &order);
4453 if (err)
4454 return err;
4455
4456 if (order > slub_max_order || order < slub_min_order)
4457 return -EINVAL;
4458
4459 calculate_sizes(s, order);
4460 return length;
4461}
4462
4463static ssize_t order_show(struct kmem_cache *s, char *buf)
4464{
4465 return sprintf(buf, "%d\n", oo_order(s->oo));
4466}
4467SLAB_ATTR(order);
4468
4469static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4470{
4471 return sprintf(buf, "%lu\n", s->min_partial);
4472}
4473
4474static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4475 size_t length)
4476{
4477 unsigned long min;
4478 int err;
4479
4480 err = kstrtoul(buf, 10, &min);
4481 if (err)
4482 return err;
4483
4484 set_min_partial(s, min);
4485 return length;
4486}
4487SLAB_ATTR(min_partial);
4488
4489static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4490{
4491 return sprintf(buf, "%u\n", s->cpu_partial);
4492}
4493
4494static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4495 size_t length)
4496{
4497 unsigned long objects;
4498 int err;
4499
4500 err = kstrtoul(buf, 10, &objects);
4501 if (err)
4502 return err;
4503 if (objects && !kmem_cache_has_cpu_partial(s))
4504 return -EINVAL;
4505
4506 s->cpu_partial = objects;
4507 flush_all(s);
4508 return length;
4509}
4510SLAB_ATTR(cpu_partial);
4511
4512static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4513{
4514 if (!s->ctor)
4515 return 0;
4516 return sprintf(buf, "%pS\n", s->ctor);
4517}
4518SLAB_ATTR_RO(ctor);
4519
4520static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4521{
4522 return sprintf(buf, "%d\n", s->refcount - 1);
4523}
4524SLAB_ATTR_RO(aliases);
4525
4526static ssize_t partial_show(struct kmem_cache *s, char *buf)
4527{
4528 return show_slab_objects(s, buf, SO_PARTIAL);
4529}
4530SLAB_ATTR_RO(partial);
4531
4532static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4533{
4534 return show_slab_objects(s, buf, SO_CPU);
4535}
4536SLAB_ATTR_RO(cpu_slabs);
4537
4538static ssize_t objects_show(struct kmem_cache *s, char *buf)
4539{
4540 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4541}
4542SLAB_ATTR_RO(objects);
4543
4544static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4545{
4546 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4547}
4548SLAB_ATTR_RO(objects_partial);
4549
4550static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4551{
4552 int objects = 0;
4553 int pages = 0;
4554 int cpu;
4555 int len;
4556
4557 for_each_online_cpu(cpu) {
4558 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4559
4560 if (page) {
4561 pages += page->pages;
4562 objects += page->pobjects;
4563 }
4564 }
4565
4566 len = sprintf(buf, "%d(%d)", objects, pages);
4567
4568#ifdef CONFIG_SMP
4569 for_each_online_cpu(cpu) {
4570 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4571
4572 if (page && len < PAGE_SIZE - 20)
4573 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4574 page->pobjects, page->pages);
4575 }
4576#endif
4577 return len + sprintf(buf + len, "\n");
4578}
4579SLAB_ATTR_RO(slabs_cpu_partial);
4580
4581static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4582{
4583 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4584}
4585
4586static ssize_t reclaim_account_store(struct kmem_cache *s,
4587 const char *buf, size_t length)
4588{
4589 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4590 if (buf[0] == '1')
4591 s->flags |= SLAB_RECLAIM_ACCOUNT;
4592 return length;
4593}
4594SLAB_ATTR(reclaim_account);
4595
4596static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4597{
4598 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4599}
4600SLAB_ATTR_RO(hwcache_align);
4601
4602#ifdef CONFIG_ZONE_DMA
4603static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
4604{
4605 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
4606}
4607SLAB_ATTR_RO(cache_dma);
4608#endif
4609
4610static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
4611{
4612 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
4613}
4614SLAB_ATTR_RO(destroy_by_rcu);
4615
4616static ssize_t reserved_show(struct kmem_cache *s, char *buf)
4617{
4618 return sprintf(buf, "%d\n", s->reserved);
4619}
4620SLAB_ATTR_RO(reserved);
4621
4622#ifdef CONFIG_SLUB_DEBUG
4623static ssize_t slabs_show(struct kmem_cache *s, char *buf)
4624{
4625 return show_slab_objects(s, buf, SO_ALL);
4626}
4627SLAB_ATTR_RO(slabs);
4628
4629static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
4630{
4631 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
4632}
4633SLAB_ATTR_RO(total_objects);
4634
4635static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
4636{
4637 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE));
4638}
4639
4640static ssize_t sanity_checks_store(struct kmem_cache *s,
4641 const char *buf, size_t length)
4642{
4643 s->flags &= ~SLAB_DEBUG_FREE;
4644 if (buf[0] == '1') {
4645 s->flags &= ~__CMPXCHG_DOUBLE;
4646 s->flags |= SLAB_DEBUG_FREE;
4647 }
4648 return length;
4649}
4650SLAB_ATTR(sanity_checks);
4651
4652static ssize_t trace_show(struct kmem_cache *s, char *buf)
4653{
4654 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
4655}
4656
4657static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4658 size_t length)
4659{
4660 s->flags &= ~SLAB_TRACE;
4661 if (buf[0] == '1') {
4662 s->flags &= ~__CMPXCHG_DOUBLE;
4663 s->flags |= SLAB_TRACE;
4664 }
4665 return length;
4666}
4667SLAB_ATTR(trace);
4668
4669static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
4670{
4671 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
4672}
4673
4674static ssize_t red_zone_store(struct kmem_cache *s,
4675 const char *buf, size_t length)
4676{
4677 if (any_slab_objects(s))
4678 return -EBUSY;
4679
4680 s->flags &= ~SLAB_RED_ZONE;
4681 if (buf[0] == '1') {
4682 s->flags &= ~__CMPXCHG_DOUBLE;
4683 s->flags |= SLAB_RED_ZONE;
4684 }
4685 calculate_sizes(s, -1);
4686 return length;
4687}
4688SLAB_ATTR(red_zone);
4689
4690static ssize_t poison_show(struct kmem_cache *s, char *buf)
4691{
4692 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
4693}
4694
4695static ssize_t poison_store(struct kmem_cache *s,
4696 const char *buf, size_t length)
4697{
4698 if (any_slab_objects(s))
4699 return -EBUSY;
4700
4701 s->flags &= ~SLAB_POISON;
4702 if (buf[0] == '1') {
4703 s->flags &= ~__CMPXCHG_DOUBLE;
4704 s->flags |= SLAB_POISON;
4705 }
4706 calculate_sizes(s, -1);
4707 return length;
4708}
4709SLAB_ATTR(poison);
4710
4711static ssize_t store_user_show(struct kmem_cache *s, char *buf)
4712{
4713 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
4714}
4715
4716static ssize_t store_user_store(struct kmem_cache *s,
4717 const char *buf, size_t length)
4718{
4719 if (any_slab_objects(s))
4720 return -EBUSY;
4721
4722 s->flags &= ~SLAB_STORE_USER;
4723 if (buf[0] == '1') {
4724 s->flags &= ~__CMPXCHG_DOUBLE;
4725 s->flags |= SLAB_STORE_USER;
4726 }
4727 calculate_sizes(s, -1);
4728 return length;
4729}
4730SLAB_ATTR(store_user);
4731
4732static ssize_t validate_show(struct kmem_cache *s, char *buf)
4733{
4734 return 0;
4735}
4736
4737static ssize_t validate_store(struct kmem_cache *s,
4738 const char *buf, size_t length)
4739{
4740 int ret = -EINVAL;
4741
4742 if (buf[0] == '1') {
4743 ret = validate_slab_cache(s);
4744 if (ret >= 0)
4745 ret = length;
4746 }
4747 return ret;
4748}
4749SLAB_ATTR(validate);
4750
4751static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
4752{
4753 if (!(s->flags & SLAB_STORE_USER))
4754 return -ENOSYS;
4755 return list_locations(s, buf, TRACK_ALLOC);
4756}
4757SLAB_ATTR_RO(alloc_calls);
4758
4759static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
4760{
4761 if (!(s->flags & SLAB_STORE_USER))
4762 return -ENOSYS;
4763 return list_locations(s, buf, TRACK_FREE);
4764}
4765SLAB_ATTR_RO(free_calls);
4766#endif
4767
4768#ifdef CONFIG_FAILSLAB
4769static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4770{
4771 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4772}
4773
4774static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4775 size_t length)
4776{
4777 s->flags &= ~SLAB_FAILSLAB;
4778 if (buf[0] == '1')
4779 s->flags |= SLAB_FAILSLAB;
4780 return length;
4781}
4782SLAB_ATTR(failslab);
4783#endif
4784
4785static ssize_t shrink_show(struct kmem_cache *s, char *buf)
4786{
4787 return 0;
4788}
4789
4790static ssize_t shrink_store(struct kmem_cache *s,
4791 const char *buf, size_t length)
4792{
4793 if (buf[0] == '1') {
4794 int rc = kmem_cache_shrink(s);
4795
4796 if (rc)
4797 return rc;
4798 } else
4799 return -EINVAL;
4800 return length;
4801}
4802SLAB_ATTR(shrink);
4803
4804#ifdef CONFIG_NUMA
4805static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
4806{
4807 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
4808}
4809
4810static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4811 const char *buf, size_t length)
4812{
4813 unsigned long ratio;
4814 int err;
4815
4816 err = kstrtoul(buf, 10, &ratio);
4817 if (err)
4818 return err;
4819
4820 if (ratio <= 100)
4821 s->remote_node_defrag_ratio = ratio * 10;
4822
4823 return length;
4824}
4825SLAB_ATTR(remote_node_defrag_ratio);
4826#endif
4827
4828#ifdef CONFIG_SLUB_STATS
4829static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4830{
4831 unsigned long sum = 0;
4832 int cpu;
4833 int len;
4834 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4835
4836 if (!data)
4837 return -ENOMEM;
4838
4839 for_each_online_cpu(cpu) {
4840 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4841
4842 data[cpu] = x;
4843 sum += x;
4844 }
4845
4846 len = sprintf(buf, "%lu", sum);
4847
4848#ifdef CONFIG_SMP
4849 for_each_online_cpu(cpu) {
4850 if (data[cpu] && len < PAGE_SIZE - 20)
4851 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
4852 }
4853#endif
4854 kfree(data);
4855 return len + sprintf(buf + len, "\n");
4856}
4857
4858static void clear_stat(struct kmem_cache *s, enum stat_item si)
4859{
4860 int cpu;
4861
4862 for_each_online_cpu(cpu)
4863 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4864}
4865
4866#define STAT_ATTR(si, text) \
4867static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4868{ \
4869 return show_stat(s, buf, si); \
4870} \
4871static ssize_t text##_store(struct kmem_cache *s, \
4872 const char *buf, size_t length) \
4873{ \
4874 if (buf[0] != '0') \
4875 return -EINVAL; \
4876 clear_stat(s, si); \
4877 return length; \
4878} \
4879SLAB_ATTR(text); \
4880
4881STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4882STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4883STAT_ATTR(FREE_FASTPATH, free_fastpath);
4884STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4885STAT_ATTR(FREE_FROZEN, free_frozen);
4886STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4887STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4888STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4889STAT_ATTR(ALLOC_SLAB, alloc_slab);
4890STAT_ATTR(ALLOC_REFILL, alloc_refill);
4891STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
4892STAT_ATTR(FREE_SLAB, free_slab);
4893STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4894STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4895STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4896STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4897STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4898STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4899STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
4900STAT_ATTR(ORDER_FALLBACK, order_fallback);
4901STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
4902STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
4903STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
4904STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
4905STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
4906STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
4907#endif
4908
4909static struct attribute *slab_attrs[] = {
4910 &slab_size_attr.attr,
4911 &object_size_attr.attr,
4912 &objs_per_slab_attr.attr,
4913 &order_attr.attr,
4914 &min_partial_attr.attr,
4915 &cpu_partial_attr.attr,
4916 &objects_attr.attr,
4917 &objects_partial_attr.attr,
4918 &partial_attr.attr,
4919 &cpu_slabs_attr.attr,
4920 &ctor_attr.attr,
4921 &aliases_attr.attr,
4922 &align_attr.attr,
4923 &hwcache_align_attr.attr,
4924 &reclaim_account_attr.attr,
4925 &destroy_by_rcu_attr.attr,
4926 &shrink_attr.attr,
4927 &reserved_attr.attr,
4928 &slabs_cpu_partial_attr.attr,
4929#ifdef CONFIG_SLUB_DEBUG
4930 &total_objects_attr.attr,
4931 &slabs_attr.attr,
4932 &sanity_checks_attr.attr,
4933 &trace_attr.attr,
4934 &red_zone_attr.attr,
4935 &poison_attr.attr,
4936 &store_user_attr.attr,
4937 &validate_attr.attr,
4938 &alloc_calls_attr.attr,
4939 &free_calls_attr.attr,
4940#endif
4941#ifdef CONFIG_ZONE_DMA
4942 &cache_dma_attr.attr,
4943#endif
4944#ifdef CONFIG_NUMA
4945 &remote_node_defrag_ratio_attr.attr,
4946#endif
4947#ifdef CONFIG_SLUB_STATS
4948 &alloc_fastpath_attr.attr,
4949 &alloc_slowpath_attr.attr,
4950 &free_fastpath_attr.attr,
4951 &free_slowpath_attr.attr,
4952 &free_frozen_attr.attr,
4953 &free_add_partial_attr.attr,
4954 &free_remove_partial_attr.attr,
4955 &alloc_from_partial_attr.attr,
4956 &alloc_slab_attr.attr,
4957 &alloc_refill_attr.attr,
4958 &alloc_node_mismatch_attr.attr,
4959 &free_slab_attr.attr,
4960 &cpuslab_flush_attr.attr,
4961 &deactivate_full_attr.attr,
4962 &deactivate_empty_attr.attr,
4963 &deactivate_to_head_attr.attr,
4964 &deactivate_to_tail_attr.attr,
4965 &deactivate_remote_frees_attr.attr,
4966 &deactivate_bypass_attr.attr,
4967 &order_fallback_attr.attr,
4968 &cmpxchg_double_fail_attr.attr,
4969 &cmpxchg_double_cpu_fail_attr.attr,
4970 &cpu_partial_alloc_attr.attr,
4971 &cpu_partial_free_attr.attr,
4972 &cpu_partial_node_attr.attr,
4973 &cpu_partial_drain_attr.attr,
4974#endif
4975#ifdef CONFIG_FAILSLAB
4976 &failslab_attr.attr,
4977#endif
4978
4979 NULL
4980};
4981
4982static struct attribute_group slab_attr_group = {
4983 .attrs = slab_attrs,
4984};
4985
4986static ssize_t slab_attr_show(struct kobject *kobj,
4987 struct attribute *attr,
4988 char *buf)
4989{
4990 struct slab_attribute *attribute;
4991 struct kmem_cache *s;
4992 int err;
4993
4994 attribute = to_slab_attr(attr);
4995 s = to_slab(kobj);
4996
4997 if (!attribute->show)
4998 return -EIO;
4999
5000 err = attribute->show(s, buf);
5001
5002 return err;
5003}
5004
5005static ssize_t slab_attr_store(struct kobject *kobj,
5006 struct attribute *attr,
5007 const char *buf, size_t len)
5008{
5009 struct slab_attribute *attribute;
5010 struct kmem_cache *s;
5011 int err;
5012
5013 attribute = to_slab_attr(attr);
5014 s = to_slab(kobj);
5015
5016 if (!attribute->store)
5017 return -EIO;
5018
5019 err = attribute->store(s, buf, len);
5020#ifdef CONFIG_MEMCG_KMEM
5021 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5022 int i;
5023
5024 mutex_lock(&slab_mutex);
5025 if (s->max_attr_size < len)
5026 s->max_attr_size = len;
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045 for_each_memcg_cache_index(i) {
5046 struct kmem_cache *c = cache_from_memcg_idx(s, i);
5047 if (c)
5048 attribute->store(c, buf, len);
5049 }
5050 mutex_unlock(&slab_mutex);
5051 }
5052#endif
5053 return err;
5054}
5055
5056static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5057{
5058#ifdef CONFIG_MEMCG_KMEM
5059 int i;
5060 char *buffer = NULL;
5061
5062 if (!is_root_cache(s))
5063 return;
5064
5065
5066
5067
5068
5069 if (!s->max_attr_size)
5070 return;
5071
5072 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5073 char mbuf[64];
5074 char *buf;
5075 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5076
5077 if (!attr || !attr->store || !attr->show)
5078 continue;
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089 if (buffer)
5090 buf = buffer;
5091 else if (s->max_attr_size < ARRAY_SIZE(mbuf))
5092 buf = mbuf;
5093 else {
5094 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5095 if (WARN_ON(!buffer))
5096 continue;
5097 buf = buffer;
5098 }
5099
5100 attr->show(s->memcg_params->root_cache, buf);
5101 attr->store(s, buf, strlen(buf));
5102 }
5103
5104 if (buffer)
5105 free_page((unsigned long)buffer);
5106#endif
5107}
5108
5109static const struct sysfs_ops slab_sysfs_ops = {
5110 .show = slab_attr_show,
5111 .store = slab_attr_store,
5112};
5113
5114static struct kobj_type slab_ktype = {
5115 .sysfs_ops = &slab_sysfs_ops,
5116};
5117
5118static int uevent_filter(struct kset *kset, struct kobject *kobj)
5119{
5120 struct kobj_type *ktype = get_ktype(kobj);
5121
5122 if (ktype == &slab_ktype)
5123 return 1;
5124 return 0;
5125}
5126
5127static const struct kset_uevent_ops slab_uevent_ops = {
5128 .filter = uevent_filter,
5129};
5130
5131static struct kset *slab_kset;
5132
5133#define ID_STR_LENGTH 64
5134
5135
5136
5137
5138
5139static char *create_unique_id(struct kmem_cache *s)
5140{
5141 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5142 char *p = name;
5143
5144 BUG_ON(!name);
5145
5146 *p++ = ':';
5147
5148
5149
5150
5151
5152
5153
5154 if (s->flags & SLAB_CACHE_DMA)
5155 *p++ = 'd';
5156 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5157 *p++ = 'a';
5158 if (s->flags & SLAB_DEBUG_FREE)
5159 *p++ = 'F';
5160 if (!(s->flags & SLAB_NOTRACK))
5161 *p++ = 't';
5162 if (p != name + 1)
5163 *p++ = '-';
5164 p += sprintf(p, "%07d", s->size);
5165
5166#ifdef CONFIG_MEMCG_KMEM
5167 if (!is_root_cache(s))
5168 p += sprintf(p, "-%08d",
5169 memcg_cache_id(s->memcg_params->memcg));
5170#endif
5171
5172 BUG_ON(p > name + ID_STR_LENGTH - 1);
5173 return name;
5174}
5175
5176static int sysfs_slab_add(struct kmem_cache *s)
5177{
5178 int err;
5179 const char *name;
5180 int unmergeable = slab_unmergeable(s);
5181
5182 if (unmergeable) {
5183
5184
5185
5186
5187
5188 sysfs_remove_link(&slab_kset->kobj, s->name);
5189 name = s->name;
5190 } else {
5191
5192
5193
5194
5195 name = create_unique_id(s);
5196 }
5197
5198 s->kobj.kset = slab_kset;
5199 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5200 if (err) {
5201 kobject_put(&s->kobj);
5202 return err;
5203 }
5204
5205 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5206 if (err) {
5207 kobject_del(&s->kobj);
5208 kobject_put(&s->kobj);
5209 return err;
5210 }
5211 kobject_uevent(&s->kobj, KOBJ_ADD);
5212 if (!unmergeable) {
5213
5214 sysfs_slab_alias(s, s->name);
5215 kfree(name);
5216 }
5217 return 0;
5218}
5219
5220static void sysfs_slab_remove(struct kmem_cache *s)
5221{
5222 if (slab_state < FULL)
5223
5224
5225
5226
5227 return;
5228
5229 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5230 kobject_del(&s->kobj);
5231 kobject_put(&s->kobj);
5232}
5233
5234
5235
5236
5237
5238struct saved_alias {
5239 struct kmem_cache *s;
5240 const char *name;
5241 struct saved_alias *next;
5242};
5243
5244static struct saved_alias *alias_list;
5245
5246static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5247{
5248 struct saved_alias *al;
5249
5250 if (slab_state == FULL) {
5251
5252
5253
5254 sysfs_remove_link(&slab_kset->kobj, name);
5255 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5256 }
5257
5258 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5259 if (!al)
5260 return -ENOMEM;
5261
5262 al->s = s;
5263 al->name = name;
5264 al->next = alias_list;
5265 alias_list = al;
5266 return 0;
5267}
5268
5269static int __init slab_sysfs_init(void)
5270{
5271 struct kmem_cache *s;
5272 int err;
5273
5274 mutex_lock(&slab_mutex);
5275
5276 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5277 if (!slab_kset) {
5278 mutex_unlock(&slab_mutex);
5279 printk(KERN_ERR "Cannot register slab subsystem.\n");
5280 return -ENOSYS;
5281 }
5282
5283 slab_state = FULL;
5284
5285 list_for_each_entry(s, &slab_caches, list) {
5286 err = sysfs_slab_add(s);
5287 if (err)
5288 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5289 " to sysfs\n", s->name);
5290 }
5291
5292 while (alias_list) {
5293 struct saved_alias *al = alias_list;
5294
5295 alias_list = alias_list->next;
5296 err = sysfs_slab_alias(al->s, al->name);
5297 if (err)
5298 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5299 " %s to sysfs\n", al->name);
5300 kfree(al);
5301 }
5302
5303 mutex_unlock(&slab_mutex);
5304 resiliency_test();
5305 return 0;
5306}
5307
5308__initcall(slab_sysfs_init);
5309#endif
5310
5311
5312
5313
5314#ifdef CONFIG_SLABINFO
5315void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5316{
5317 unsigned long nr_slabs = 0;
5318 unsigned long nr_objs = 0;
5319 unsigned long nr_free = 0;
5320 int node;
5321
5322 for_each_online_node(node) {
5323 struct kmem_cache_node *n = get_node(s, node);
5324
5325 if (!n)
5326 continue;
5327
5328 nr_slabs += node_nr_slabs(n);
5329 nr_objs += node_nr_objs(n);
5330 nr_free += count_partial(n, count_free);
5331 }
5332
5333 sinfo->active_objs = nr_objs - nr_free;
5334 sinfo->num_objs = nr_objs;
5335 sinfo->active_slabs = nr_slabs;
5336 sinfo->num_slabs = nr_slabs;
5337 sinfo->objects_per_slab = oo_objects(s->oo);
5338 sinfo->cache_order = oo_order(s->oo);
5339}
5340
5341void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5342{
5343}
5344
5345ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5346 size_t count, loff_t *ppos)
5347{
5348 return -EIO;
5349}
5350#endif
5351