1
2
3
4
5
6
7
8
9
10
11
12#include <linux/mm.h>
13#include <linux/swap.h>
14#include <linux/module.h>
15#include <linux/bit_spinlock.h>
16#include <linux/interrupt.h>
17#include <linux/bitops.h>
18#include <linux/slab.h>
19#include "slab.h"
20#include <linux/proc_fs.h>
21#include <linux/notifier.h>
22#include <linux/seq_file.h>
23#include <linux/kmemcheck.h>
24#include <linux/cpu.h>
25#include <linux/cpuset.h>
26#include <linux/mempolicy.h>
27#include <linux/ctype.h>
28#include <linux/debugobjects.h>
29#include <linux/kallsyms.h>
30#include <linux/memory.h>
31#include <linux/math64.h>
32#include <linux/fault-inject.h>
33#include <linux/stacktrace.h>
34#include <linux/prefetch.h>
35#include <linux/memcontrol.h>
36
37#include <trace/events/kmem.h>
38
39#include "internal.h"
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117static inline int kmem_cache_debug(struct kmem_cache *s)
118{
119#ifdef CONFIG_SLUB_DEBUG
120 return unlikely(s->flags & SLAB_DEBUG_FLAGS);
121#else
122 return 0;
123#endif
124}
125
126static inline void *fixup_red_left(struct kmem_cache *s, void *p)
127{
128 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE)
129 p += s->red_left_pad;
130
131 return p;
132}
133
134
135
136
137
138
139
140
141
142
143#undef SLUB_RESILIENCY_TEST
144
145
146#undef SLUB_DEBUG_CMPXCHG
147
148
149
150
151
152#define MIN_PARTIAL 5
153
154
155
156
157
158
159#define MAX_PARTIAL 10
160
161#define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
162 SLAB_POISON | SLAB_STORE_USER)
163
164
165
166
167
168#define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
169 SLAB_TRACE)
170
171
172
173
174
175
176
177#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
178
179
180
181
182#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
183 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
184 SLAB_FAILSLAB)
185
186#define SLUB_MERGE_SAME (SLAB_CONSISTENCY_CHECKS | SLAB_RECLAIM_ACCOUNT | \
187 SLAB_CACHE_DMA | SLAB_NOTRACK | SLAB_ACCOUNT)
188
189#define OO_SHIFT 16
190#define OO_MASK ((1 << OO_SHIFT) - 1)
191#define MAX_OBJS_PER_PAGE 32767
192
193
194#define __OBJECT_POISON 0x80000000UL
195#define __CMPXCHG_DOUBLE 0x40000000UL
196
197#ifdef CONFIG_SMP
198static struct notifier_block slab_notifier;
199#endif
200
201
202
203
204#define TRACK_ADDRS_COUNT 16
205struct track {
206 unsigned long addr;
207#ifdef CONFIG_STACKTRACE
208 unsigned long addrs[TRACK_ADDRS_COUNT];
209#endif
210 int cpu;
211 int pid;
212 unsigned long when;
213};
214
215enum track_item { TRACK_ALLOC, TRACK_FREE };
216
217#ifdef CONFIG_SYSFS
218static int sysfs_slab_add(struct kmem_cache *);
219static int sysfs_slab_alias(struct kmem_cache *, const char *);
220static void sysfs_slab_remove(struct kmem_cache *);
221static void memcg_propagate_slab_attrs(struct kmem_cache *s);
222#else
223static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
224static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
225 { return 0; }
226static inline void sysfs_slab_remove(struct kmem_cache *s) { }
227
228static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
229#endif
230
231static inline void stat(const struct kmem_cache *s, enum stat_item si)
232{
233#ifdef CONFIG_SLUB_STATS
234 __this_cpu_inc(s->cpu_slab->stat[si]);
235#endif
236}
237
238
239
240
241
242static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
243{
244 return s->node[node];
245}
246
247static inline void *get_freepointer(struct kmem_cache *s, void *object)
248{
249 return *(void **)(object + s->offset);
250}
251
252static void prefetch_freepointer(const struct kmem_cache *s, void *object)
253{
254 prefetch(object + s->offset);
255}
256
257static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
258{
259 void *p;
260
261 if (!debug_pagealloc_enabled())
262 return get_freepointer(s, object);
263
264 probe_kernel_read(&p, (void **)(object + s->offset), sizeof(p));
265 return p;
266}
267
268static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
269{
270 *(void **)(object + s->offset) = fp;
271}
272
273
274#define for_each_object(__p, __s, __addr, __objects) \
275 for (__p = fixup_red_left(__s, __addr); \
276 __p < (__addr) + (__objects) * (__s)->size; \
277 __p += (__s)->size)
278
279#define for_each_object_idx(__p, __idx, __s, __addr, __objects) \
280 for (__p = fixup_red_left(__s, __addr), __idx = 1; \
281 __idx <= __objects; \
282 __p += (__s)->size, __idx++)
283
284
285static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
286{
287 return (p - addr) / s->size;
288}
289
290static inline size_t slab_ksize(const struct kmem_cache *s)
291{
292#ifdef CONFIG_SLUB_DEBUG
293
294
295
296
297 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
298 return s->object_size;
299
300#endif
301
302
303
304
305
306 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
307 return s->inuse;
308
309
310
311 return s->size;
312}
313
314static inline int order_objects(int order, unsigned long size, int reserved)
315{
316 return ((PAGE_SIZE << order) - reserved) / size;
317}
318
319static inline struct kmem_cache_order_objects oo_make(int order,
320 unsigned long size, int reserved)
321{
322 struct kmem_cache_order_objects x = {
323 (order << OO_SHIFT) + order_objects(order, size, reserved)
324 };
325
326 return x;
327}
328
329static inline int oo_order(struct kmem_cache_order_objects x)
330{
331 return x.x >> OO_SHIFT;
332}
333
334static inline int oo_objects(struct kmem_cache_order_objects x)
335{
336 return x.x & OO_MASK;
337}
338
339
340
341
342static __always_inline void slab_lock(struct page *page)
343{
344 bit_spin_lock(PG_locked, &page->flags);
345}
346
347static __always_inline void slab_unlock(struct page *page)
348{
349 __bit_spin_unlock(PG_locked, &page->flags);
350}
351
352static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
353{
354 struct page tmp;
355 tmp.counters = counters_new;
356
357
358
359
360
361
362 page->frozen = tmp.frozen;
363 page->inuse = tmp.inuse;
364 page->objects = tmp.objects;
365}
366
367
368static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
369 void *freelist_old, unsigned long counters_old,
370 void *freelist_new, unsigned long counters_new,
371 const char *n)
372{
373 VM_BUG_ON(!irqs_disabled());
374#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
375 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
376 if (s->flags & __CMPXCHG_DOUBLE) {
377 if (cmpxchg_double(&page->freelist, &page->counters,
378 freelist_old, counters_old,
379 freelist_new, counters_new))
380 return 1;
381 } else
382#endif
383 {
384 slab_lock(page);
385 if (page->freelist == freelist_old && page->counters == counters_old) {
386 page->freelist = freelist_new;
387 set_page_slub_counters(page, counters_new);
388 slab_unlock(page);
389 return 1;
390 }
391 slab_unlock(page);
392 }
393
394 cpu_relax();
395 stat(s, CMPXCHG_DOUBLE_FAIL);
396
397#ifdef SLUB_DEBUG_CMPXCHG
398 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
399#endif
400
401 return 0;
402}
403
404static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
405 void *freelist_old, unsigned long counters_old,
406 void *freelist_new, unsigned long counters_new,
407 const char *n)
408{
409#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
410 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
411 if (s->flags & __CMPXCHG_DOUBLE) {
412 if (cmpxchg_double(&page->freelist, &page->counters,
413 freelist_old, counters_old,
414 freelist_new, counters_new))
415 return 1;
416 } else
417#endif
418 {
419 unsigned long flags;
420
421 local_irq_save(flags);
422 slab_lock(page);
423 if (page->freelist == freelist_old && page->counters == counters_old) {
424 page->freelist = freelist_new;
425 set_page_slub_counters(page, counters_new);
426 slab_unlock(page);
427 local_irq_restore(flags);
428 return 1;
429 }
430 slab_unlock(page);
431 local_irq_restore(flags);
432 }
433
434 cpu_relax();
435 stat(s, CMPXCHG_DOUBLE_FAIL);
436
437#ifdef SLUB_DEBUG_CMPXCHG
438 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
439#endif
440
441 return 0;
442}
443
444#ifdef CONFIG_SLUB_DEBUG
445
446
447
448
449
450
451static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
452{
453 void *p;
454 void *addr = page_address(page);
455
456 for (p = page->freelist; p; p = get_freepointer(s, p))
457 set_bit(slab_index(p, s, addr), map);
458}
459
460static inline int size_from_object(struct kmem_cache *s)
461{
462 if (s->flags & SLAB_RED_ZONE)
463 return s->size - s->red_left_pad;
464
465 return s->size;
466}
467
468static inline void *restore_red_left(struct kmem_cache *s, void *p)
469{
470 if (s->flags & SLAB_RED_ZONE)
471 p -= s->red_left_pad;
472
473 return p;
474}
475
476
477
478
479#ifdef CONFIG_SLUB_DEBUG_ON
480static int slub_debug = DEBUG_DEFAULT_FLAGS;
481#else
482static int slub_debug;
483#endif
484
485static char *slub_debug_slabs;
486static int disable_higher_order_debug;
487
488
489
490
491
492
493static inline int check_valid_pointer(struct kmem_cache *s,
494 struct page *page, void *object)
495{
496 void *base;
497
498 if (!object)
499 return 1;
500
501 base = page_address(page);
502 object = restore_red_left(s, object);
503 if (object < base || object >= base + page->objects * s->size ||
504 (object - base) % s->size) {
505 return 0;
506 }
507
508 return 1;
509}
510
511static void print_section(char *text, u8 *addr, unsigned int length)
512{
513 print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
514 length, 1);
515}
516
517static struct track *get_track(struct kmem_cache *s, void *object,
518 enum track_item alloc)
519{
520 struct track *p;
521
522 if (s->offset)
523 p = object + s->offset + sizeof(void *);
524 else
525 p = object + s->inuse;
526
527 return p + alloc;
528}
529
530static void set_track(struct kmem_cache *s, void *object,
531 enum track_item alloc, unsigned long addr)
532{
533 struct track *p = get_track(s, object, alloc);
534
535 if (addr) {
536#ifdef CONFIG_STACKTRACE
537 struct stack_trace trace;
538 int i;
539
540 trace.nr_entries = 0;
541 trace.max_entries = TRACK_ADDRS_COUNT;
542 trace.entries = p->addrs;
543 trace.skip = 3;
544 save_stack_trace(&trace);
545
546
547 if (trace.nr_entries != 0 &&
548 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
549 trace.nr_entries--;
550
551 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
552 p->addrs[i] = 0;
553#endif
554 p->addr = addr;
555 p->cpu = smp_processor_id();
556 p->pid = current->pid;
557 p->when = jiffies;
558 } else
559 memset(p, 0, sizeof(struct track));
560}
561
562static void init_tracking(struct kmem_cache *s, void *object)
563{
564 if (!(s->flags & SLAB_STORE_USER))
565 return;
566
567 set_track(s, object, TRACK_FREE, 0UL);
568 set_track(s, object, TRACK_ALLOC, 0UL);
569}
570
571static void print_track(const char *s, struct track *t)
572{
573 if (!t->addr)
574 return;
575
576 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
577 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
578#ifdef CONFIG_STACKTRACE
579 {
580 int i;
581 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
582 if (t->addrs[i])
583 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
584 else
585 break;
586 }
587#endif
588}
589
590static void print_tracking(struct kmem_cache *s, void *object)
591{
592 if (!(s->flags & SLAB_STORE_USER))
593 return;
594
595 print_track("Allocated", get_track(s, object, TRACK_ALLOC));
596 print_track("Freed", get_track(s, object, TRACK_FREE));
597}
598
599static void print_page_info(struct page *page)
600{
601 printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
602 page, page->objects, page->inuse, page->freelist, page->flags);
603
604}
605
606static void slab_bug(struct kmem_cache *s, char *fmt, ...)
607{
608 va_list args;
609 char buf[100];
610
611 va_start(args, fmt);
612 vsnprintf(buf, sizeof(buf), fmt, args);
613 va_end(args);
614 printk(KERN_ERR "========================================"
615 "=====================================\n");
616 printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
617 printk(KERN_ERR "----------------------------------------"
618 "-------------------------------------\n\n");
619
620 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
621}
622
623static void slab_fix(struct kmem_cache *s, char *fmt, ...)
624{
625 va_list args;
626 char buf[100];
627
628 va_start(args, fmt);
629 vsnprintf(buf, sizeof(buf), fmt, args);
630 va_end(args);
631 printk(KERN_ERR "FIX %s: %s\n", s->name, buf);
632}
633
634static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
635{
636 unsigned int off;
637 u8 *addr = page_address(page);
638
639 print_tracking(s, p);
640
641 print_page_info(page);
642
643 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n",
644 p, p - addr, get_freepointer(s, p));
645
646 if (s->flags & SLAB_RED_ZONE)
647 print_section("Redzone ", p - s->red_left_pad, s->red_left_pad);
648 else if (p > addr + 16)
649 print_section("Bytes b4 ", p - 16, 16);
650
651 print_section("Object ", p, min_t(unsigned long, s->object_size,
652 PAGE_SIZE));
653 if (s->flags & SLAB_RED_ZONE)
654 print_section("Redzone ", p + s->object_size,
655 s->inuse - s->object_size);
656
657 if (s->offset)
658 off = s->offset + sizeof(void *);
659 else
660 off = s->inuse;
661
662 if (s->flags & SLAB_STORE_USER)
663 off += 2 * sizeof(struct track);
664
665 if (off != size_from_object(s))
666
667 print_section("Padding ", p + off, size_from_object(s) - off);
668
669 dump_stack();
670}
671
672static void object_err(struct kmem_cache *s, struct page *page,
673 u8 *object, char *reason)
674{
675 slab_bug(s, "%s", reason);
676 print_trailer(s, page, object);
677}
678
679static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
680{
681 va_list args;
682 char buf[100];
683
684 va_start(args, fmt);
685 vsnprintf(buf, sizeof(buf), fmt, args);
686 va_end(args);
687 slab_bug(s, "%s", buf);
688 print_page_info(page);
689 dump_stack();
690}
691
692static void init_object(struct kmem_cache *s, void *object, u8 val)
693{
694 u8 *p = object;
695
696 if (s->flags & SLAB_RED_ZONE)
697 memset(p - s->red_left_pad, val, s->red_left_pad);
698
699 if (s->flags & __OBJECT_POISON) {
700 memset(p, POISON_FREE, s->object_size - 1);
701 p[s->object_size - 1] = POISON_END;
702 }
703
704 if (s->flags & SLAB_RED_ZONE)
705 memset(p + s->object_size, val, s->inuse - s->object_size);
706}
707
708static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
709 void *from, void *to)
710{
711 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data);
712 memset(from, data, to - from);
713}
714
715static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
716 u8 *object, char *what,
717 u8 *start, unsigned int value, unsigned int bytes)
718{
719 u8 *fault;
720 u8 *end;
721
722 fault = memchr_inv(start, value, bytes);
723 if (!fault)
724 return 1;
725
726 end = start + bytes;
727 while (end > fault && end[-1] == value)
728 end--;
729
730 slab_bug(s, "%s overwritten", what);
731 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n",
732 fault, end - 1, fault[0], value);
733 print_trailer(s, page, object);
734
735 restore_bytes(s, what, value, fault, end);
736 return 0;
737}
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
778{
779 unsigned long off = s->inuse;
780
781 if (s->offset)
782
783 off += sizeof(void *);
784
785 if (s->flags & SLAB_STORE_USER)
786
787 off += 2 * sizeof(struct track);
788
789 if (size_from_object(s) == off)
790 return 1;
791
792 return check_bytes_and_report(s, page, p, "Object padding",
793 p + off, POISON_INUSE, size_from_object(s) - off);
794}
795
796
797static int slab_pad_check(struct kmem_cache *s, struct page *page)
798{
799 u8 *start;
800 u8 *fault;
801 u8 *end;
802 int length;
803 int remainder;
804
805 if (!(s->flags & SLAB_POISON))
806 return 1;
807
808 start = page_address(page);
809 length = (PAGE_SIZE << compound_order(page)) - s->reserved;
810 end = start + length;
811 remainder = length % s->size;
812 if (!remainder)
813 return 1;
814
815 fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
816 if (!fault)
817 return 1;
818 while (end > fault && end[-1] == POISON_INUSE)
819 end--;
820
821 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
822 print_section("Padding ", end - remainder, remainder);
823
824 restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
825 return 0;
826}
827
828static int check_object(struct kmem_cache *s, struct page *page,
829 void *object, u8 val)
830{
831 u8 *p = object;
832 u8 *endobject = object + s->object_size;
833
834 if (s->flags & SLAB_RED_ZONE) {
835 if (!check_bytes_and_report(s, page, object, "Redzone",
836 object - s->red_left_pad, val, s->red_left_pad))
837 return 0;
838
839 if (!check_bytes_and_report(s, page, object, "Redzone",
840 endobject, val, s->inuse - s->object_size))
841 return 0;
842 } else {
843 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
844 check_bytes_and_report(s, page, p, "Alignment padding",
845 endobject, POISON_INUSE, s->inuse - s->object_size);
846 }
847 }
848
849 if (s->flags & SLAB_POISON) {
850 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
851 (!check_bytes_and_report(s, page, p, "Poison", p,
852 POISON_FREE, s->object_size - 1) ||
853 !check_bytes_and_report(s, page, p, "Poison",
854 p + s->object_size - 1, POISON_END, 1)))
855 return 0;
856
857
858
859 check_pad_bytes(s, page, p);
860 }
861
862 if (!s->offset && val == SLUB_RED_ACTIVE)
863
864
865
866
867 return 1;
868
869
870 if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
871 object_err(s, page, p, "Freepointer corrupt");
872
873
874
875
876
877 set_freepointer(s, p, NULL);
878 return 0;
879 }
880 return 1;
881}
882
883static int check_slab(struct kmem_cache *s, struct page *page)
884{
885 int maxobj;
886
887 VM_BUG_ON(!irqs_disabled());
888
889 if (!PageSlab(page)) {
890 slab_err(s, page, "Not a valid slab page");
891 return 0;
892 }
893
894 maxobj = order_objects(compound_order(page), s->size, s->reserved);
895 if (page->objects > maxobj) {
896 slab_err(s, page, "objects %u > max %u",
897 s->name, page->objects, maxobj);
898 return 0;
899 }
900 if (page->inuse > page->objects) {
901 slab_err(s, page, "inuse %u > max %u",
902 s->name, page->inuse, page->objects);
903 return 0;
904 }
905
906 slab_pad_check(s, page);
907 return 1;
908}
909
910
911
912
913
914static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
915{
916 int nr = 0;
917 void *fp;
918 void *object = NULL;
919 unsigned long max_objects;
920
921 fp = page->freelist;
922 while (fp && nr <= page->objects) {
923 if (fp == search)
924 return 1;
925 if (!check_valid_pointer(s, page, fp)) {
926 if (object) {
927 object_err(s, page, object,
928 "Freechain corrupt");
929 set_freepointer(s, object, NULL);
930 break;
931 } else {
932 slab_err(s, page, "Freepointer corrupt");
933 page->freelist = NULL;
934 page->inuse = page->objects;
935 slab_fix(s, "Freelist cleared");
936 return 0;
937 }
938 break;
939 }
940 object = fp;
941 fp = get_freepointer(s, object);
942 nr++;
943 }
944
945 max_objects = order_objects(compound_order(page), s->size, s->reserved);
946 if (max_objects > MAX_OBJS_PER_PAGE)
947 max_objects = MAX_OBJS_PER_PAGE;
948
949 if (page->objects != max_objects) {
950 slab_err(s, page, "Wrong number of objects. Found %d but "
951 "should be %d", page->objects, max_objects);
952 page->objects = max_objects;
953 slab_fix(s, "Number of objects adjusted.");
954 }
955 if (page->inuse != page->objects - nr) {
956 slab_err(s, page, "Wrong object count. Counter is %d but "
957 "counted were %d", page->inuse, page->objects - nr);
958 page->inuse = page->objects - nr;
959 slab_fix(s, "Object count adjusted.");
960 }
961 return search == NULL;
962}
963
964static void trace(struct kmem_cache *s, struct page *page, void *object,
965 int alloc)
966{
967 if (s->flags & SLAB_TRACE) {
968 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
969 s->name,
970 alloc ? "alloc" : "free",
971 object, page->inuse,
972 page->freelist);
973
974 if (!alloc)
975 print_section("Object ", (void *)object, s->object_size);
976
977 dump_stack();
978 }
979}
980
981
982
983
984
985static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
986{
987 flags &= gfp_allowed_mask;
988 lockdep_trace_alloc(flags);
989 might_sleep_if(flags & __GFP_WAIT);
990
991 return should_failslab(s->object_size, flags, s->flags);
992}
993
994static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
995 size_t size, void **p)
996{
997 size_t i;
998
999 flags &= gfp_allowed_mask;
1000 for (i = 0; i < size; i++) {
1001 void *object = p[i];
1002
1003 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
1004 kmemleak_alloc_recursive(object, s->object_size, 1,
1005 s->flags, flags);
1006 }
1007}
1008
1009static inline void slab_free_hook(struct kmem_cache *s, void *x)
1010{
1011 kmemleak_free_recursive(x, s->flags);
1012
1013
1014
1015
1016
1017
1018#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
1019 {
1020 unsigned long flags;
1021
1022 local_irq_save(flags);
1023 kmemcheck_slab_free(s, x, s->object_size);
1024 debug_check_no_locks_freed(x, s->object_size);
1025 local_irq_restore(flags);
1026 }
1027#endif
1028 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1029 debug_check_no_obj_freed(x, s->object_size);
1030}
1031
1032
1033
1034
1035
1036
1037static void add_full(struct kmem_cache *s,
1038 struct kmem_cache_node *n, struct page *page)
1039{
1040 if (!(s->flags & SLAB_STORE_USER))
1041 return;
1042
1043 list_add(&page->lru, &n->full);
1044}
1045
1046
1047
1048
1049static void remove_full(struct kmem_cache *s, struct page *page)
1050{
1051 if (!(s->flags & SLAB_STORE_USER))
1052 return;
1053
1054 list_del(&page->lru);
1055}
1056
1057
1058static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1059{
1060 struct kmem_cache_node *n = get_node(s, node);
1061
1062 return atomic_long_read(&n->nr_slabs);
1063}
1064
1065static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1066{
1067 return atomic_long_read(&n->nr_slabs);
1068}
1069
1070static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1071{
1072 struct kmem_cache_node *n = get_node(s, node);
1073
1074
1075
1076
1077
1078
1079
1080 if (likely(n)) {
1081 atomic_long_inc(&n->nr_slabs);
1082 atomic_long_add(objects, &n->total_objects);
1083 }
1084}
1085static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1086{
1087 struct kmem_cache_node *n = get_node(s, node);
1088
1089 atomic_long_dec(&n->nr_slabs);
1090 atomic_long_sub(objects, &n->total_objects);
1091}
1092
1093
1094static void setup_object_debug(struct kmem_cache *s, struct page *page,
1095 void *object)
1096{
1097 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)))
1098 return;
1099
1100 init_object(s, object, SLUB_RED_INACTIVE);
1101 init_tracking(s, object);
1102}
1103
1104static inline int alloc_consistency_checks(struct kmem_cache *s, struct page *page,
1105 void *object)
1106{
1107 if (!check_slab(s, page))
1108 return 0;
1109
1110 if (!check_valid_pointer(s, page, object)) {
1111 object_err(s, page, object, "Freelist Pointer check fails");
1112 return 0;
1113 }
1114
1115 if (!check_object(s, page, object, SLUB_RED_INACTIVE))
1116 return 0;
1117
1118 return 1;
1119}
1120
1121static noinline int alloc_debug_processing(struct kmem_cache *s,
1122 struct page *page,
1123 void *object, unsigned long addr)
1124{
1125 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1126 if (!alloc_consistency_checks(s, page, object))
1127 goto bad;
1128 }
1129
1130
1131 if (s->flags & SLAB_STORE_USER)
1132 set_track(s, object, TRACK_ALLOC, addr);
1133 trace(s, page, object, 1);
1134 init_object(s, object, SLUB_RED_ACTIVE);
1135 return 1;
1136
1137bad:
1138 if (PageSlab(page)) {
1139
1140
1141
1142
1143
1144 slab_fix(s, "Marking all objects used");
1145 page->inuse = page->objects;
1146 page->freelist = NULL;
1147 }
1148 return 0;
1149}
1150
1151static inline int free_consistency_checks(struct kmem_cache *s,
1152 struct page *page, void *object, unsigned long addr)
1153{
1154 if (!check_valid_pointer(s, page, object)) {
1155 slab_err(s, page, "Invalid object pointer 0x%p", object);
1156 return 0;
1157 }
1158
1159 if (on_freelist(s, page, object)) {
1160 object_err(s, page, object, "Object already free");
1161 return 0;
1162 }
1163
1164 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1165 return 0;
1166
1167 if (unlikely(s != page->slab_cache)) {
1168 if (!PageSlab(page)) {
1169 slab_err(s, page, "Attempt to free object(0x%p) "
1170 "outside of slab", object);
1171 } else if (!page->slab_cache) {
1172 printk(KERN_ERR
1173 "SLUB <none>: no slab for object 0x%p.\n",
1174 object);
1175 dump_stack();
1176 } else
1177 object_err(s, page, object,
1178 "page slab pointer corrupt.");
1179 return 0;
1180 }
1181 return 1;
1182}
1183
1184
1185static noinline int free_debug_processing(
1186 struct kmem_cache *s, struct page *page,
1187 void *head, void *tail, int bulk_cnt,
1188 unsigned long addr)
1189{
1190 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1191 void *object = head;
1192 int cnt = 0;
1193 unsigned long uninitialized_var(flags);
1194 int ret = 0;
1195
1196 spin_lock_irqsave(&n->list_lock, flags);
1197 slab_lock(page);
1198
1199 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1200 if (!check_slab(s, page))
1201 goto out;
1202 }
1203
1204next_object:
1205 cnt++;
1206
1207 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1208 if (!free_consistency_checks(s, page, object, addr))
1209 goto out;
1210 }
1211
1212 if (s->flags & SLAB_STORE_USER)
1213 set_track(s, object, TRACK_FREE, addr);
1214 trace(s, page, object, 0);
1215
1216 init_object(s, object, SLUB_RED_INACTIVE);
1217
1218
1219 if (object != tail) {
1220 object = get_freepointer(s, object);
1221 goto next_object;
1222 }
1223 ret = 1;
1224
1225out:
1226 if (cnt != bulk_cnt)
1227 slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
1228 bulk_cnt, cnt);
1229
1230 slab_unlock(page);
1231 spin_unlock_irqrestore(&n->list_lock, flags);
1232 if (!ret)
1233 slab_fix(s, "Object at 0x%p not freed", object);
1234 return ret;
1235}
1236
1237static int __init setup_slub_debug(char *str)
1238{
1239 slub_debug = DEBUG_DEFAULT_FLAGS;
1240 if (*str++ != '=' || !*str)
1241
1242
1243
1244 goto out;
1245
1246 if (*str == ',')
1247
1248
1249
1250
1251 goto check_slabs;
1252
1253 if (tolower(*str) == 'o') {
1254
1255
1256
1257
1258 disable_higher_order_debug = 1;
1259 goto out;
1260 }
1261
1262 slub_debug = 0;
1263 if (*str == '-')
1264
1265
1266
1267 goto out;
1268
1269
1270
1271
1272 for (; *str && *str != ','; str++) {
1273 switch (tolower(*str)) {
1274 case 'f':
1275 slub_debug |= SLAB_CONSISTENCY_CHECKS;
1276 break;
1277 case 'z':
1278 slub_debug |= SLAB_RED_ZONE;
1279 break;
1280 case 'p':
1281 slub_debug |= SLAB_POISON;
1282 break;
1283 case 'u':
1284 slub_debug |= SLAB_STORE_USER;
1285 break;
1286 case 't':
1287 slub_debug |= SLAB_TRACE;
1288 break;
1289 case 'a':
1290 slub_debug |= SLAB_FAILSLAB;
1291 break;
1292 default:
1293 printk(KERN_ERR "slub_debug option '%c' "
1294 "unknown. skipped\n", *str);
1295 }
1296 }
1297
1298check_slabs:
1299 if (*str == ',')
1300 slub_debug_slabs = str + 1;
1301out:
1302 return 1;
1303}
1304
1305__setup("slub_debug", setup_slub_debug);
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319static unsigned long kmem_cache_flags(unsigned long object_size,
1320 unsigned long flags, const char *name,
1321 void (*ctor)(void *))
1322{
1323 char *iter;
1324 size_t len;
1325
1326
1327 if (!slub_debug_slabs)
1328 return flags | slub_debug;
1329
1330 len = strlen(name);
1331 iter = slub_debug_slabs;
1332 while (*iter) {
1333 char *end, *glob;
1334 size_t cmplen;
1335
1336 end = strchr(iter, ',');
1337 if (!end)
1338 end = iter + strlen(iter);
1339
1340 glob = strnchr(iter, end - iter, '*');
1341 if (glob)
1342 cmplen = glob - iter;
1343 else
1344 cmplen = max_t(size_t, len, (end - iter));
1345
1346 if (!strncmp(name, iter, cmplen)) {
1347 flags |= slub_debug;
1348 break;
1349 }
1350
1351 if (!*end)
1352 break;
1353 iter = end + 1;
1354 }
1355
1356 return flags;
1357}
1358#else
1359static inline void setup_object_debug(struct kmem_cache *s,
1360 struct page *page, void *object) {}
1361
1362static inline int alloc_debug_processing(struct kmem_cache *s,
1363 struct page *page, void *object, unsigned long addr) { return 0; }
1364
1365static inline int free_debug_processing(
1366 struct kmem_cache *s, struct page *page,
1367 void *head, void *tail, int bulk_cnt,
1368 unsigned long addr) { return 0; }
1369
1370static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1371 { return 1; }
1372static inline int check_object(struct kmem_cache *s, struct page *page,
1373 void *object, u8 val) { return 1; }
1374static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1375 struct page *page) {}
1376static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1377static inline unsigned long kmem_cache_flags(unsigned long object_size,
1378 unsigned long flags, const char *name,
1379 void (*ctor)(void *))
1380{
1381 return flags;
1382}
1383#define slub_debug 0
1384
1385#define disable_higher_order_debug 0
1386
1387static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1388 { return 0; }
1389static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1390 { return 0; }
1391static inline void inc_slabs_node(struct kmem_cache *s, int node,
1392 int objects) {}
1393static inline void dec_slabs_node(struct kmem_cache *s, int node,
1394 int objects) {}
1395
1396static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
1397 { return 0; }
1398
1399static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
1400 void *object) {}
1401
1402static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
1403
1404#endif
1405
1406static inline void slab_free_freelist_hook(struct kmem_cache *s,
1407 void *head, void *tail)
1408{
1409
1410
1411
1412
1413#if defined(CONFIG_KMEMCHECK) || \
1414 defined(CONFIG_LOCKDEP) || \
1415 defined(CONFIG_DEBUG_KMEMLEAK) || \
1416 defined(CONFIG_DEBUG_OBJECTS_FREE) || \
1417 defined(CONFIG_KASAN)
1418
1419 void *object = head;
1420 void *tail_obj = tail ? : head;
1421
1422 do {
1423 slab_free_hook(s, object);
1424 } while ((object != tail_obj) &&
1425 (object = get_freepointer(s, object)));
1426#endif
1427}
1428
1429
1430
1431
1432static inline struct page *alloc_slab_page(struct kmem_cache *s,
1433 gfp_t flags, int node, struct kmem_cache_order_objects oo)
1434{
1435 struct page *page;
1436 int order = oo_order(oo);
1437
1438 flags |= __GFP_NOTRACK;
1439
1440 if (memcg_charge_slab(s, flags, order))
1441 return NULL;
1442
1443 if (node == NUMA_NO_NODE)
1444 page = alloc_pages(flags, order);
1445 else
1446 page = alloc_pages_exact_node(node, flags, order);
1447
1448 if (!page)
1449 memcg_uncharge_slab(s, order);
1450
1451 return page;
1452}
1453
1454static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1455{
1456 struct page *page;
1457 struct kmem_cache_order_objects oo = s->oo;
1458 gfp_t alloc_gfp;
1459
1460 flags &= gfp_allowed_mask;
1461
1462 if (flags & __GFP_WAIT)
1463 local_irq_enable();
1464
1465 flags |= s->allocflags;
1466
1467
1468
1469
1470
1471 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1472
1473 page = alloc_slab_page(s, alloc_gfp, node, oo);
1474 if (unlikely(!page)) {
1475 oo = s->min;
1476
1477
1478
1479
1480 page = alloc_slab_page(s, flags, node, oo);
1481
1482 if (page)
1483 stat(s, ORDER_FALLBACK);
1484 }
1485
1486 if (kmemcheck_enabled && page
1487 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1488 int pages = 1 << oo_order(oo);
1489
1490 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1491
1492
1493
1494
1495
1496 if (s->ctor)
1497 kmemcheck_mark_uninitialized_pages(page, pages);
1498 else
1499 kmemcheck_mark_unallocated_pages(page, pages);
1500 }
1501
1502 if (flags & __GFP_WAIT)
1503 local_irq_disable();
1504 if (!page)
1505 return NULL;
1506
1507 page->objects = oo_objects(oo);
1508 mod_zone_page_state(page_zone(page),
1509 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1510 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1511 1 << oo_order(oo));
1512
1513 return page;
1514}
1515
1516static void setup_object(struct kmem_cache *s, struct page *page,
1517 void *object)
1518{
1519 setup_object_debug(s, page, object);
1520 if (unlikely(s->ctor))
1521 s->ctor(object);
1522}
1523
1524static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1525{
1526 struct page *page;
1527 void *start;
1528 void *p;
1529 int order;
1530 int idx;
1531
1532 BUG_ON(flags & GFP_SLAB_BUG_MASK);
1533
1534 page = allocate_slab(s,
1535 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
1536 if (!page)
1537 goto out;
1538
1539 order = compound_order(page);
1540 inc_slabs_node(s, page_to_nid(page), page->objects);
1541 memcg_bind_pages(s, order);
1542 page->slab_cache = s;
1543 __SetPageSlab(page);
1544 if (page_is_pfmemalloc(page))
1545 SetPageSlabPfmemalloc(page);
1546
1547 start = page_address(page);
1548
1549 if (unlikely(s->flags & SLAB_POISON))
1550 memset(start, POISON_INUSE, PAGE_SIZE << order);
1551
1552 for_each_object_idx(p, idx, s, start, page->objects) {
1553 setup_object(s, page, p);
1554 if (likely(idx < page->objects))
1555 set_freepointer(s, p, p + s->size);
1556 else
1557 set_freepointer(s, p, NULL);
1558 }
1559
1560 page->freelist = fixup_red_left(s, start);
1561 page->inuse = page->objects;
1562 page->frozen = 1;
1563out:
1564 return page;
1565}
1566
1567static void __free_slab(struct kmem_cache *s, struct page *page)
1568{
1569 int order = compound_order(page);
1570 int pages = 1 << order;
1571
1572 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1573 void *p;
1574
1575 slab_pad_check(s, page);
1576 for_each_object(p, s, page_address(page),
1577 page->objects)
1578 check_object(s, page, p, SLUB_RED_INACTIVE);
1579 }
1580
1581 kmemcheck_free_shadow(page, compound_order(page));
1582
1583 mod_zone_page_state(page_zone(page),
1584 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1585 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1586 -pages);
1587
1588 __ClearPageSlabPfmemalloc(page);
1589 __ClearPageSlab(page);
1590
1591 memcg_release_pages(s, order);
1592 page_mapcount_reset(page);
1593 if (current->reclaim_state)
1594 current->reclaim_state->reclaimed_slab += pages;
1595 __free_pages(page, order);
1596 memcg_uncharge_slab(s, order);
1597}
1598
1599#define need_reserve_slab_rcu \
1600 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
1601
1602static void rcu_free_slab(struct rcu_head *h)
1603{
1604 struct page *page;
1605
1606 if (need_reserve_slab_rcu)
1607 page = virt_to_head_page(h);
1608 else
1609 page = container_of((struct list_head *)h, struct page, lru);
1610
1611 __free_slab(page->slab_cache, page);
1612}
1613
1614static void free_slab(struct kmem_cache *s, struct page *page)
1615{
1616 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
1617 struct rcu_head *head;
1618
1619 if (need_reserve_slab_rcu) {
1620 int order = compound_order(page);
1621 int offset = (PAGE_SIZE << order) - s->reserved;
1622
1623 VM_BUG_ON(s->reserved != sizeof(*head));
1624 head = page_address(page) + offset;
1625 } else {
1626
1627
1628
1629 head = (void *)&page->lru;
1630 }
1631
1632 call_rcu(head, rcu_free_slab);
1633 } else
1634 __free_slab(s, page);
1635}
1636
1637static void discard_slab(struct kmem_cache *s, struct page *page)
1638{
1639 dec_slabs_node(s, page_to_nid(page), page->objects);
1640 free_slab(s, page);
1641}
1642
1643
1644
1645
1646
1647
1648static inline void add_partial(struct kmem_cache_node *n,
1649 struct page *page, int tail)
1650{
1651 n->nr_partial++;
1652 if (tail == DEACTIVATE_TO_TAIL)
1653 list_add_tail(&page->lru, &n->partial);
1654 else
1655 list_add(&page->lru, &n->partial);
1656}
1657
1658
1659
1660
1661static inline void remove_partial(struct kmem_cache_node *n,
1662 struct page *page)
1663{
1664 list_del(&page->lru);
1665 n->nr_partial--;
1666}
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676static inline void *acquire_slab(struct kmem_cache *s,
1677 struct kmem_cache_node *n, struct page *page,
1678 int mode, int *objects)
1679{
1680 void *freelist;
1681 unsigned long counters;
1682 struct page new;
1683
1684
1685
1686
1687
1688
1689 freelist = page->freelist;
1690 counters = page->counters;
1691 new.counters = counters;
1692 *objects = new.objects - new.inuse;
1693 if (mode) {
1694 new.inuse = page->objects;
1695 new.freelist = NULL;
1696 } else {
1697 new.freelist = freelist;
1698 }
1699
1700 VM_BUG_ON(new.frozen);
1701 new.frozen = 1;
1702
1703 if (!__cmpxchg_double_slab(s, page,
1704 freelist, counters,
1705 new.freelist, new.counters,
1706 "acquire_slab"))
1707 return NULL;
1708
1709 remove_partial(n, page);
1710 WARN_ON(!freelist);
1711 return freelist;
1712}
1713
1714static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1715static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1716
1717
1718
1719
1720static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1721 struct kmem_cache_cpu *c, gfp_t flags)
1722{
1723 struct page *page, *page2;
1724 void *object = NULL;
1725 int available = 0;
1726 int objects;
1727
1728
1729
1730
1731
1732
1733
1734 if (!n || !n->nr_partial)
1735 return NULL;
1736
1737 spin_lock(&n->list_lock);
1738 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1739 void *t;
1740
1741 if (!pfmemalloc_match(page, flags))
1742 continue;
1743
1744 t = acquire_slab(s, n, page, object == NULL, &objects);
1745 if (!t)
1746 break;
1747
1748 available += objects;
1749 if (!object) {
1750 c->page = page;
1751 stat(s, ALLOC_FROM_PARTIAL);
1752 object = t;
1753 } else {
1754 put_cpu_partial(s, page, 0);
1755 stat(s, CPU_PARTIAL_NODE);
1756 }
1757 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
1758 break;
1759
1760 }
1761 spin_unlock(&n->list_lock);
1762 return object;
1763}
1764
1765
1766
1767
1768static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
1769 struct kmem_cache_cpu *c)
1770{
1771#ifdef CONFIG_NUMA
1772 struct zonelist *zonelist;
1773 struct zoneref *z;
1774 struct zone *zone;
1775 enum zone_type high_zoneidx = gfp_zone(flags);
1776 void *object;
1777 unsigned int cpuset_mems_cookie;
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797 if (!s->remote_node_defrag_ratio ||
1798 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1799 return NULL;
1800
1801 do {
1802 cpuset_mems_cookie = read_mems_allowed_begin();
1803 zonelist = node_zonelist(slab_node(), flags);
1804 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1805 struct kmem_cache_node *n;
1806
1807 n = get_node(s, zone_to_nid(zone));
1808
1809 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1810 n->nr_partial > s->min_partial) {
1811 object = get_partial_node(s, n, c, flags);
1812 if (object) {
1813
1814
1815
1816
1817
1818
1819
1820 return object;
1821 }
1822 }
1823 }
1824 } while (read_mems_allowed_retry(cpuset_mems_cookie));
1825#endif
1826 return NULL;
1827}
1828
1829
1830
1831
1832static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
1833 struct kmem_cache_cpu *c)
1834{
1835 void *object;
1836 int searchnode = node;
1837
1838 if (node == NUMA_NO_NODE)
1839 searchnode = numa_mem_id();
1840 else if (!node_present_pages(node))
1841 searchnode = node_to_mem_node(node);
1842
1843 object = get_partial_node(s, get_node(s, searchnode), c, flags);
1844 if (object || node != NUMA_NO_NODE)
1845 return object;
1846
1847 return get_any_partial(s, flags, c);
1848}
1849
1850#ifdef CONFIG_PREEMPT
1851
1852
1853
1854
1855
1856#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
1857#else
1858
1859
1860
1861
1862#define TID_STEP 1
1863#endif
1864
1865static inline unsigned long next_tid(unsigned long tid)
1866{
1867 return tid + TID_STEP;
1868}
1869
1870static inline unsigned int tid_to_cpu(unsigned long tid)
1871{
1872 return tid % TID_STEP;
1873}
1874
1875static inline unsigned long tid_to_event(unsigned long tid)
1876{
1877 return tid / TID_STEP;
1878}
1879
1880static inline unsigned int init_tid(int cpu)
1881{
1882 return cpu;
1883}
1884
1885static inline void note_cmpxchg_failure(const char *n,
1886 const struct kmem_cache *s, unsigned long tid)
1887{
1888#ifdef SLUB_DEBUG_CMPXCHG
1889 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
1890
1891 printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
1892
1893#ifdef CONFIG_PREEMPT
1894 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
1895 printk("due to cpu change %d -> %d\n",
1896 tid_to_cpu(tid), tid_to_cpu(actual_tid));
1897 else
1898#endif
1899 if (tid_to_event(tid) != tid_to_event(actual_tid))
1900 printk("due to cpu running other code. Event %ld->%ld\n",
1901 tid_to_event(tid), tid_to_event(actual_tid));
1902 else
1903 printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
1904 actual_tid, tid, next_tid(tid));
1905#endif
1906 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
1907}
1908
1909static void init_kmem_cache_cpus(struct kmem_cache *s)
1910{
1911 int cpu;
1912
1913 for_each_possible_cpu(cpu)
1914 per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
1915}
1916
1917
1918
1919
1920static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
1921{
1922 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1923 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1924 int lock = 0;
1925 enum slab_modes l = M_NONE, m = M_NONE;
1926 void *nextfree;
1927 int tail = DEACTIVATE_TO_HEAD;
1928 struct page new;
1929 struct page old;
1930
1931 if (page->freelist) {
1932 stat(s, DEACTIVATE_REMOTE_FREES);
1933 tail = DEACTIVATE_TO_TAIL;
1934 }
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1945 void *prior;
1946 unsigned long counters;
1947
1948 do {
1949 prior = page->freelist;
1950 counters = page->counters;
1951 set_freepointer(s, freelist, prior);
1952 new.counters = counters;
1953 new.inuse--;
1954 VM_BUG_ON(!new.frozen);
1955
1956 } while (!__cmpxchg_double_slab(s, page,
1957 prior, counters,
1958 freelist, new.counters,
1959 "drain percpu freelist"));
1960
1961 freelist = nextfree;
1962 }
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978redo:
1979
1980 old.freelist = page->freelist;
1981 old.counters = page->counters;
1982 VM_BUG_ON(!old.frozen);
1983
1984
1985 new.counters = old.counters;
1986 if (freelist) {
1987 new.inuse--;
1988 set_freepointer(s, freelist, old.freelist);
1989 new.freelist = freelist;
1990 } else
1991 new.freelist = old.freelist;
1992
1993 new.frozen = 0;
1994
1995 if (!new.inuse && n->nr_partial > s->min_partial)
1996 m = M_FREE;
1997 else if (new.freelist) {
1998 m = M_PARTIAL;
1999 if (!lock) {
2000 lock = 1;
2001
2002
2003
2004
2005
2006 spin_lock(&n->list_lock);
2007 }
2008 } else {
2009 m = M_FULL;
2010 if (kmem_cache_debug(s) && !lock) {
2011 lock = 1;
2012
2013
2014
2015
2016
2017 spin_lock(&n->list_lock);
2018 }
2019 }
2020
2021 if (l != m) {
2022
2023 if (l == M_PARTIAL)
2024
2025 remove_partial(n, page);
2026
2027 else if (l == M_FULL)
2028
2029 remove_full(s, page);
2030
2031 if (m == M_PARTIAL) {
2032
2033 add_partial(n, page, tail);
2034 stat(s, tail);
2035
2036 } else if (m == M_FULL) {
2037
2038 stat(s, DEACTIVATE_FULL);
2039 add_full(s, n, page);
2040
2041 }
2042 }
2043
2044 l = m;
2045 if (!__cmpxchg_double_slab(s, page,
2046 old.freelist, old.counters,
2047 new.freelist, new.counters,
2048 "unfreezing slab"))
2049 goto redo;
2050
2051 if (lock)
2052 spin_unlock(&n->list_lock);
2053
2054 if (m == M_FREE) {
2055 stat(s, DEACTIVATE_EMPTY);
2056 discard_slab(s, page);
2057 stat(s, FREE_SLAB);
2058 }
2059}
2060
2061
2062
2063
2064
2065
2066
2067
2068static void unfreeze_partials(struct kmem_cache *s,
2069 struct kmem_cache_cpu *c)
2070{
2071 struct kmem_cache_node *n = NULL, *n2 = NULL;
2072 struct page *page, *discard_page = NULL;
2073
2074 while ((page = c->partial)) {
2075 struct page new;
2076 struct page old;
2077
2078 c->partial = page->next;
2079
2080 n2 = get_node(s, page_to_nid(page));
2081 if (n != n2) {
2082 if (n)
2083 spin_unlock(&n->list_lock);
2084
2085 n = n2;
2086 spin_lock(&n->list_lock);
2087 }
2088
2089 do {
2090
2091 old.freelist = page->freelist;
2092 old.counters = page->counters;
2093 VM_BUG_ON(!old.frozen);
2094
2095 new.counters = old.counters;
2096 new.freelist = old.freelist;
2097
2098 new.frozen = 0;
2099
2100 } while (!__cmpxchg_double_slab(s, page,
2101 old.freelist, old.counters,
2102 new.freelist, new.counters,
2103 "unfreezing slab"));
2104
2105 if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
2106 page->next = discard_page;
2107 discard_page = page;
2108 } else {
2109 add_partial(n, page, DEACTIVATE_TO_TAIL);
2110 stat(s, FREE_ADD_PARTIAL);
2111 }
2112 }
2113
2114 if (n)
2115 spin_unlock(&n->list_lock);
2116
2117 while (discard_page) {
2118 page = discard_page;
2119 discard_page = discard_page->next;
2120
2121 stat(s, DEACTIVATE_EMPTY);
2122 discard_slab(s, page);
2123 stat(s, FREE_SLAB);
2124 }
2125}
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2137{
2138 struct page *oldpage;
2139 int pages;
2140 int pobjects;
2141
2142 preempt_disable();
2143 do {
2144 pages = 0;
2145 pobjects = 0;
2146 oldpage = this_cpu_read(s->cpu_slab->partial);
2147
2148 if (oldpage) {
2149 pobjects = oldpage->pobjects;
2150 pages = oldpage->pages;
2151 if (drain && pobjects > s->cpu_partial) {
2152 unsigned long flags;
2153
2154
2155
2156
2157 local_irq_save(flags);
2158 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2159 local_irq_restore(flags);
2160 oldpage = NULL;
2161 pobjects = 0;
2162 pages = 0;
2163 stat(s, CPU_PARTIAL_DRAIN);
2164 }
2165 }
2166
2167 pages++;
2168 pobjects += page->objects - page->inuse;
2169
2170 page->pages = pages;
2171 page->pobjects = pobjects;
2172 page->next = oldpage;
2173
2174 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
2175
2176 if (unlikely(!s->cpu_partial)) {
2177 unsigned long flags;
2178
2179 local_irq_save(flags);
2180 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2181 local_irq_restore(flags);
2182 }
2183 preempt_enable();
2184}
2185
2186static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2187{
2188 stat(s, CPUSLAB_FLUSH);
2189 deactivate_slab(s, c->page, c->freelist);
2190
2191 c->tid = next_tid(c->tid);
2192 c->page = NULL;
2193 c->freelist = NULL;
2194}
2195
2196
2197
2198
2199
2200
2201static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2202{
2203 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2204
2205 if (likely(c)) {
2206 if (c->page)
2207 flush_slab(s, c);
2208
2209 unfreeze_partials(s, c);
2210 }
2211}
2212
2213static void flush_cpu_slab(void *d)
2214{
2215 struct kmem_cache *s = d;
2216
2217 __flush_cpu_slab(s, smp_processor_id());
2218}
2219
2220static bool has_cpu_slab(int cpu, void *info)
2221{
2222 struct kmem_cache *s = info;
2223 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2224
2225 return c->page || c->partial;
2226}
2227
2228static void flush_all(struct kmem_cache *s)
2229{
2230 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
2231}
2232
2233
2234
2235
2236
2237static inline int node_match(struct page *page, int node)
2238{
2239#ifdef CONFIG_NUMA
2240 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2241 return 0;
2242#endif
2243 return 1;
2244}
2245
2246static int count_free(struct page *page)
2247{
2248 return page->objects - page->inuse;
2249}
2250
2251static unsigned long count_partial(struct kmem_cache_node *n,
2252 int (*get_count)(struct page *))
2253{
2254 unsigned long flags;
2255 unsigned long x = 0;
2256 struct page *page;
2257
2258 spin_lock_irqsave(&n->list_lock, flags);
2259 list_for_each_entry(page, &n->partial, lru)
2260 x += get_count(page);
2261 spin_unlock_irqrestore(&n->list_lock, flags);
2262 return x;
2263}
2264
2265static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2266{
2267#ifdef CONFIG_SLUB_DEBUG
2268 return atomic_long_read(&n->total_objects);
2269#else
2270 return 0;
2271#endif
2272}
2273
2274static noinline void
2275slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2276{
2277 int node;
2278
2279 printk(KERN_WARNING
2280 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
2281 nid, gfpflags);
2282 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
2283 "default order: %d, min order: %d\n", s->name, s->object_size,
2284 s->size, oo_order(s->oo), oo_order(s->min));
2285
2286 if (oo_order(s->min) > get_order(s->object_size))
2287 printk(KERN_WARNING " %s debugging increased min order, use "
2288 "slub_debug=O to disable.\n", s->name);
2289
2290 for_each_online_node(node) {
2291 struct kmem_cache_node *n = get_node(s, node);
2292 unsigned long nr_slabs;
2293 unsigned long nr_objs;
2294 unsigned long nr_free;
2295
2296 if (!n)
2297 continue;
2298
2299 nr_free = count_partial(n, count_free);
2300 nr_slabs = node_nr_slabs(n);
2301 nr_objs = node_nr_objs(n);
2302
2303 printk(KERN_WARNING
2304 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
2305 node, nr_slabs, nr_objs, nr_free);
2306 }
2307}
2308
2309static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2310 int node, struct kmem_cache_cpu **pc)
2311{
2312 void *freelist;
2313 struct kmem_cache_cpu *c = *pc;
2314 struct page *page;
2315
2316 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2317
2318 freelist = get_partial(s, flags, node, c);
2319
2320 if (freelist)
2321 return freelist;
2322
2323 page = new_slab(s, flags, node);
2324 if (page) {
2325 c = this_cpu_ptr(s->cpu_slab);
2326 if (c->page)
2327 flush_slab(s, c);
2328
2329
2330
2331
2332
2333 freelist = page->freelist;
2334 page->freelist = NULL;
2335
2336 stat(s, ALLOC_SLAB);
2337 c->page = page;
2338 *pc = c;
2339 } else
2340 freelist = NULL;
2341
2342 return freelist;
2343}
2344
2345static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2346{
2347 if (unlikely(PageSlabPfmemalloc(page)))
2348 return gfp_pfmemalloc_allowed(gfpflags);
2349
2350 return true;
2351}
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363static inline void *get_freelist(struct kmem_cache *s, struct page *page)
2364{
2365 struct page new;
2366 unsigned long counters;
2367 void *freelist;
2368
2369 do {
2370 freelist = page->freelist;
2371 counters = page->counters;
2372
2373 new.counters = counters;
2374 VM_BUG_ON(!new.frozen);
2375
2376 new.inuse = page->objects;
2377 new.frozen = freelist != NULL;
2378
2379 } while (!__cmpxchg_double_slab(s, page,
2380 freelist, counters,
2381 NULL, new.counters,
2382 "get_freelist"));
2383
2384 return freelist;
2385}
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2407 unsigned long addr, struct kmem_cache_cpu *c)
2408{
2409 void *freelist;
2410 struct page *page;
2411
2412 page = c->page;
2413 if (!page)
2414 goto new_slab;
2415redo:
2416
2417 if (unlikely(!node_match(page, node))) {
2418 int searchnode = node;
2419
2420 if (node != NUMA_NO_NODE && !node_present_pages(node))
2421 searchnode = node_to_mem_node(node);
2422
2423 if (unlikely(!node_match(page, searchnode))) {
2424 stat(s, ALLOC_NODE_MISMATCH);
2425 deactivate_slab(s, page, c->freelist);
2426 c->page = NULL;
2427 c->freelist = NULL;
2428 goto new_slab;
2429 }
2430 }
2431
2432
2433
2434
2435
2436
2437 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2438 deactivate_slab(s, page, c->freelist);
2439 c->page = NULL;
2440 c->freelist = NULL;
2441 goto new_slab;
2442 }
2443
2444
2445 freelist = c->freelist;
2446 if (freelist)
2447 goto load_freelist;
2448
2449 stat(s, ALLOC_SLOWPATH);
2450
2451 freelist = get_freelist(s, page);
2452
2453 if (!freelist) {
2454 c->page = NULL;
2455 stat(s, DEACTIVATE_BYPASS);
2456 goto new_slab;
2457 }
2458
2459 stat(s, ALLOC_REFILL);
2460
2461load_freelist:
2462
2463
2464
2465
2466
2467 VM_BUG_ON(!c->page->frozen);
2468 c->freelist = get_freepointer(s, freelist);
2469 c->tid = next_tid(c->tid);
2470 return freelist;
2471
2472new_slab:
2473
2474 if (c->partial) {
2475 page = c->page = c->partial;
2476 c->partial = page->next;
2477 stat(s, CPU_PARTIAL_ALLOC);
2478 c->freelist = NULL;
2479 goto redo;
2480 }
2481
2482 freelist = new_slab_objects(s, gfpflags, node, &c);
2483
2484 if (unlikely(!freelist)) {
2485 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
2486 slab_out_of_memory(s, gfpflags, node);
2487 return NULL;
2488 }
2489
2490 page = c->page;
2491 if (likely(!kmem_cache_debug(s) && pfmemalloc_match(page, gfpflags)))
2492 goto load_freelist;
2493
2494
2495 if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
2496 goto new_slab;
2497
2498 deactivate_slab(s, page, get_freepointer(s, freelist));
2499 c->page = NULL;
2500 c->freelist = NULL;
2501 return freelist;
2502}
2503
2504
2505
2506
2507
2508static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2509 unsigned long addr, struct kmem_cache_cpu *c)
2510{
2511 void *p;
2512 unsigned long flags;
2513
2514 local_irq_save(flags);
2515#ifdef CONFIG_PREEMPT
2516
2517
2518
2519
2520
2521 c = this_cpu_ptr(s->cpu_slab);
2522#endif
2523
2524 p = ___slab_alloc(s, gfpflags, node, addr, c);
2525 local_irq_restore(flags);
2526 return p;
2527}
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2540 gfp_t gfpflags, int node, unsigned long addr)
2541{
2542 void *object;
2543 struct kmem_cache_cpu *c;
2544 struct page *page;
2545 unsigned long tid;
2546
2547 if (slab_pre_alloc_hook(s, gfpflags))
2548 return NULL;
2549
2550 s = memcg_kmem_get_cache(s, gfpflags);
2551redo:
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563 preempt_disable();
2564 c = this_cpu_ptr(s->cpu_slab);
2565
2566
2567
2568
2569
2570
2571
2572 tid = c->tid;
2573 preempt_enable();
2574
2575 object = c->freelist;
2576 page = c->page;
2577 if (unlikely(!object || !node_match(page, node)))
2578 object = __slab_alloc(s, gfpflags, node, addr, c);
2579
2580 else {
2581 void *next_object = get_freepointer_safe(s, object);
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595 if (unlikely(!this_cpu_cmpxchg_double(
2596 s->cpu_slab->freelist, s->cpu_slab->tid,
2597 object, tid,
2598 next_object, next_tid(tid)))) {
2599
2600 note_cmpxchg_failure("slab_alloc", s, tid);
2601 goto redo;
2602 }
2603 prefetch_freepointer(s, next_object);
2604 stat(s, ALLOC_FASTPATH);
2605 }
2606
2607 if (unlikely(gfpflags & __GFP_ZERO) && object)
2608 memset(object, 0, s->object_size);
2609
2610 slab_post_alloc_hook(s, gfpflags, 1, &object);
2611
2612 return object;
2613}
2614
2615static __always_inline void *slab_alloc(struct kmem_cache *s,
2616 gfp_t gfpflags, unsigned long addr)
2617{
2618 return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
2619}
2620
2621void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
2622{
2623 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2624
2625 trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
2626
2627 return ret;
2628}
2629EXPORT_SYMBOL(kmem_cache_alloc);
2630
2631#ifdef CONFIG_TRACING
2632void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
2633{
2634 void *ret = slab_alloc(s, gfpflags, _RET_IP_);
2635 trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
2636 return ret;
2637}
2638EXPORT_SYMBOL(kmem_cache_alloc_trace);
2639
2640void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
2641{
2642 void *ret = kmalloc_order(size, flags, order);
2643 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
2644 return ret;
2645}
2646EXPORT_SYMBOL(kmalloc_order_trace);
2647#endif
2648
2649#ifdef CONFIG_NUMA
2650void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
2651{
2652 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2653
2654 trace_kmem_cache_alloc_node(_RET_IP_, ret,
2655 s->object_size, s->size, gfpflags, node);
2656
2657 return ret;
2658}
2659EXPORT_SYMBOL(kmem_cache_alloc_node);
2660
2661#ifdef CONFIG_TRACING
2662void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
2663 gfp_t gfpflags,
2664 int node, size_t size)
2665{
2666 void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
2667
2668 trace_kmalloc_node(_RET_IP_, ret,
2669 size, s->size, gfpflags, node);
2670 return ret;
2671}
2672EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2673#endif
2674#endif
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684static void __slab_free(struct kmem_cache *s, struct page *page,
2685 void *head, void *tail, int cnt,
2686 unsigned long addr)
2687
2688{
2689 void *prior;
2690 int was_frozen;
2691 struct page new;
2692 unsigned long counters;
2693 struct kmem_cache_node *n = NULL;
2694 unsigned long uninitialized_var(flags);
2695
2696 stat(s, FREE_SLOWPATH);
2697
2698 if (kmem_cache_debug(s) &&
2699 !free_debug_processing(s, page, head, tail, cnt, addr))
2700 return;
2701
2702 do {
2703 if (unlikely(n)) {
2704 spin_unlock_irqrestore(&n->list_lock, flags);
2705 n = NULL;
2706 }
2707 prior = page->freelist;
2708 counters = page->counters;
2709 set_freepointer(s, tail, prior);
2710 new.counters = counters;
2711 was_frozen = new.frozen;
2712 new.inuse -= cnt;
2713 if ((!new.inuse || !prior) && !was_frozen) {
2714
2715 if (!kmem_cache_debug(s) && !prior)
2716
2717
2718
2719
2720
2721 new.frozen = 1;
2722
2723 else {
2724
2725 n = get_node(s, page_to_nid(page));
2726
2727
2728
2729
2730
2731
2732
2733
2734 spin_lock_irqsave(&n->list_lock, flags);
2735
2736 }
2737 }
2738
2739 } while (!cmpxchg_double_slab(s, page,
2740 prior, counters,
2741 head, new.counters,
2742 "__slab_free"));
2743
2744 if (likely(!n)) {
2745
2746
2747
2748
2749
2750 if (new.frozen && !was_frozen) {
2751 put_cpu_partial(s, page, 1);
2752 stat(s, CPU_PARTIAL_FREE);
2753 }
2754
2755
2756
2757
2758 if (was_frozen)
2759 stat(s, FREE_FROZEN);
2760 return;
2761 }
2762
2763 if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
2764 goto slab_empty;
2765
2766
2767
2768
2769
2770 if (kmem_cache_debug(s) && unlikely(!prior)) {
2771 remove_full(s, page);
2772 add_partial(n, page, DEACTIVATE_TO_TAIL);
2773 stat(s, FREE_ADD_PARTIAL);
2774 }
2775 spin_unlock_irqrestore(&n->list_lock, flags);
2776 return;
2777
2778slab_empty:
2779 if (prior) {
2780
2781
2782
2783 remove_partial(n, page);
2784 stat(s, FREE_REMOVE_PARTIAL);
2785 } else
2786
2787 remove_full(s, page);
2788
2789 spin_unlock_irqrestore(&n->list_lock, flags);
2790 stat(s, FREE_SLAB);
2791 discard_slab(s, page);
2792}
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
2810 void *head, void *tail, int cnt,
2811 unsigned long addr)
2812{
2813 void *tail_obj = tail ? : head;
2814 struct kmem_cache_cpu *c;
2815 unsigned long tid;
2816
2817 slab_free_freelist_hook(s, head, tail);
2818
2819redo:
2820
2821
2822
2823
2824
2825
2826 preempt_disable();
2827 c = this_cpu_ptr(s->cpu_slab);
2828
2829 tid = c->tid;
2830 preempt_enable();
2831
2832 if (likely(page == c->page)) {
2833 set_freepointer(s, tail_obj, c->freelist);
2834
2835 if (unlikely(!this_cpu_cmpxchg_double(
2836 s->cpu_slab->freelist, s->cpu_slab->tid,
2837 c->freelist, tid,
2838 head, next_tid(tid)))) {
2839
2840 note_cmpxchg_failure("slab_free", s, tid);
2841 goto redo;
2842 }
2843 stat(s, FREE_FASTPATH);
2844 } else
2845 __slab_free(s, page, head, tail_obj, cnt, addr);
2846
2847}
2848
2849void kmem_cache_free(struct kmem_cache *s, void *x)
2850{
2851 s = cache_from_obj(s, x);
2852 if (!s)
2853 return;
2854 slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
2855 trace_kmem_cache_free(_RET_IP_, x);
2856}
2857EXPORT_SYMBOL(kmem_cache_free);
2858
2859struct detached_freelist {
2860 struct page *page;
2861 void *tail;
2862 void *freelist;
2863 int cnt;
2864 struct kmem_cache *s;
2865};
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879static inline
2880int build_detached_freelist(struct kmem_cache *s, size_t size,
2881 void **p, struct detached_freelist *df)
2882{
2883 size_t first_skipped_index = 0;
2884 int lookahead = 3;
2885 void *object;
2886
2887
2888 df->page = NULL;
2889
2890 do {
2891 object = p[--size];
2892 } while (!object && size);
2893
2894 if (!object)
2895 return 0;
2896
2897
2898 df->s = cache_from_obj(s, object);
2899
2900
2901 set_freepointer(df->s, object, NULL);
2902 df->page = virt_to_head_page(object);
2903 df->tail = object;
2904 df->freelist = object;
2905 p[size] = NULL;
2906 df->cnt = 1;
2907
2908 while (size) {
2909 object = p[--size];
2910 if (!object)
2911 continue;
2912
2913
2914 if (df->page == virt_to_head_page(object)) {
2915
2916 set_freepointer(df->s, object, df->freelist);
2917 df->freelist = object;
2918 df->cnt++;
2919 p[size] = NULL;
2920
2921 continue;
2922 }
2923
2924
2925 if (!--lookahead)
2926 break;
2927
2928 if (!first_skipped_index)
2929 first_skipped_index = size + 1;
2930 }
2931
2932 return first_skipped_index;
2933}
2934
2935
2936void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
2937{
2938 if (WARN_ON(!size))
2939 return;
2940
2941 do {
2942 struct detached_freelist df;
2943
2944 size = build_detached_freelist(s, size, p, &df);
2945 if (unlikely(!df.page))
2946 continue;
2947
2948 slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
2949 } while (likely(size));
2950}
2951EXPORT_SYMBOL(kmem_cache_free_bulk);
2952
2953
2954int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
2955 void **p)
2956{
2957 struct kmem_cache_cpu *c;
2958 int i;
2959
2960
2961 if (unlikely(slab_pre_alloc_hook(s, flags)))
2962 return false;
2963
2964
2965
2966
2967
2968 local_irq_disable();
2969 c = this_cpu_ptr(s->cpu_slab);
2970
2971 for (i = 0; i < size; i++) {
2972 void *object = c->freelist;
2973
2974 if (unlikely(!object)) {
2975
2976
2977
2978
2979 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
2980 _RET_IP_, c);
2981 if (unlikely(!p[i]))
2982 goto error;
2983
2984 c = this_cpu_ptr(s->cpu_slab);
2985 continue;
2986 }
2987 c->freelist = get_freepointer(s, object);
2988 p[i] = object;
2989 }
2990 c->tid = next_tid(c->tid);
2991 local_irq_enable();
2992
2993
2994 if (unlikely(flags & __GFP_ZERO)) {
2995 int j;
2996
2997 for (j = 0; j < i; j++)
2998 memset(p[j], 0, s->object_size);
2999 }
3000
3001
3002 slab_post_alloc_hook(s, flags, size, p);
3003 return i;
3004error:
3005 local_irq_enable();
3006 slab_post_alloc_hook(s, flags, i, p);
3007 __kmem_cache_free_bulk(s, i, p);
3008 return 0;
3009}
3010EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032static int slub_min_order;
3033static int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3034static int slub_min_objects;
3035
3036
3037
3038
3039
3040static int slub_nomerge;
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067static inline int slab_order(int size, int min_objects,
3068 int max_order, int fract_leftover, int reserved)
3069{
3070 int order;
3071 int rem;
3072 int min_order = slub_min_order;
3073
3074 if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
3075 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3076
3077 for (order = max(min_order,
3078 fls(min_objects * size - 1) - PAGE_SHIFT);
3079 order <= max_order; order++) {
3080
3081 unsigned long slab_size = PAGE_SIZE << order;
3082
3083 if (slab_size < min_objects * size + reserved)
3084 continue;
3085
3086 rem = (slab_size - reserved) % size;
3087
3088 if (rem <= slab_size / fract_leftover)
3089 break;
3090
3091 }
3092
3093 return order;
3094}
3095
3096static inline int calculate_order(int size, int reserved)
3097{
3098 int order;
3099 int min_objects;
3100 int fraction;
3101 int max_objects;
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111 min_objects = slub_min_objects;
3112 if (!min_objects)
3113 min_objects = 4 * (fls(nr_cpu_ids) + 1);
3114 max_objects = order_objects(slub_max_order, size, reserved);
3115 min_objects = min(min_objects, max_objects);
3116
3117 while (min_objects > 1) {
3118 fraction = 16;
3119 while (fraction >= 4) {
3120 order = slab_order(size, min_objects,
3121 slub_max_order, fraction, reserved);
3122 if (order <= slub_max_order)
3123 return order;
3124 fraction /= 2;
3125 }
3126 min_objects--;
3127 }
3128
3129
3130
3131
3132
3133 order = slab_order(size, 1, slub_max_order, 1, reserved);
3134 if (order <= slub_max_order)
3135 return order;
3136
3137
3138
3139
3140 order = slab_order(size, 1, MAX_ORDER, 1, reserved);
3141 if (order < MAX_ORDER)
3142 return order;
3143 return -ENOSYS;
3144}
3145
3146static void
3147init_kmem_cache_node(struct kmem_cache_node *n)
3148{
3149 n->nr_partial = 0;
3150 spin_lock_init(&n->list_lock);
3151 INIT_LIST_HEAD(&n->partial);
3152#ifdef CONFIG_SLUB_DEBUG
3153 atomic_long_set(&n->nr_slabs, 0);
3154 atomic_long_set(&n->total_objects, 0);
3155 INIT_LIST_HEAD(&n->full);
3156#endif
3157}
3158
3159static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3160{
3161 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3162 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3163
3164
3165
3166
3167
3168 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3169 2 * sizeof(void *));
3170
3171 if (!s->cpu_slab)
3172 return 0;
3173
3174 init_kmem_cache_cpus(s);
3175
3176 return 1;
3177}
3178
3179static struct kmem_cache *kmem_cache_node;
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190static void early_kmem_cache_node_alloc(int node)
3191{
3192 struct page *page;
3193 struct kmem_cache_node *n;
3194
3195 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3196
3197 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3198
3199 BUG_ON(!page);
3200 if (page_to_nid(page) != node) {
3201 printk(KERN_ERR "SLUB: Unable to allocate memory from "
3202 "node %d\n", node);
3203 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
3204 "in order to be able to continue\n");
3205 }
3206
3207 n = page->freelist;
3208 BUG_ON(!n);
3209 page->freelist = get_freepointer(kmem_cache_node, n);
3210 page->inuse = 1;
3211 page->frozen = 0;
3212 kmem_cache_node->node[node] = n;
3213#ifdef CONFIG_SLUB_DEBUG
3214 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3215 init_tracking(kmem_cache_node, n);
3216#endif
3217 init_kmem_cache_node(n);
3218 inc_slabs_node(kmem_cache_node, node, page->objects);
3219
3220 add_partial(n, page, DEACTIVATE_TO_HEAD);
3221}
3222
3223static void free_kmem_cache_nodes(struct kmem_cache *s)
3224{
3225 int node;
3226
3227 for_each_node_state(node, N_NORMAL_MEMORY) {
3228 struct kmem_cache_node *n = s->node[node];
3229
3230 if (n)
3231 kmem_cache_free(kmem_cache_node, n);
3232
3233 s->node[node] = NULL;
3234 }
3235}
3236
3237static int init_kmem_cache_nodes(struct kmem_cache *s)
3238{
3239 int node;
3240
3241 for_each_node_state(node, N_NORMAL_MEMORY) {
3242 struct kmem_cache_node *n;
3243
3244 if (slab_state == DOWN) {
3245 early_kmem_cache_node_alloc(node);
3246 continue;
3247 }
3248 n = kmem_cache_alloc_node(kmem_cache_node,
3249 GFP_KERNEL, node);
3250
3251 if (!n) {
3252 free_kmem_cache_nodes(s);
3253 return 0;
3254 }
3255
3256 s->node[node] = n;
3257 init_kmem_cache_node(n);
3258 }
3259 return 1;
3260}
3261
3262static void set_min_partial(struct kmem_cache *s, unsigned long min)
3263{
3264 if (min < MIN_PARTIAL)
3265 min = MIN_PARTIAL;
3266 else if (min > MAX_PARTIAL)
3267 min = MAX_PARTIAL;
3268 s->min_partial = min;
3269}
3270
3271
3272
3273
3274
3275static int calculate_sizes(struct kmem_cache *s, int forced_order)
3276{
3277 unsigned long flags = s->flags;
3278 unsigned long size = s->object_size;
3279 int order;
3280
3281
3282
3283
3284
3285
3286 size = ALIGN(size, sizeof(void *));
3287
3288#ifdef CONFIG_SLUB_DEBUG
3289
3290
3291
3292
3293
3294 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) &&
3295 !s->ctor)
3296 s->flags |= __OBJECT_POISON;
3297 else
3298 s->flags &= ~__OBJECT_POISON;
3299
3300
3301
3302
3303
3304
3305
3306 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
3307 size += sizeof(void *);
3308#endif
3309
3310
3311
3312
3313
3314 s->inuse = size;
3315
3316 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) ||
3317 s->ctor)) {
3318
3319
3320
3321
3322
3323
3324
3325
3326 s->offset = size;
3327 size += sizeof(void *);
3328 }
3329
3330#ifdef CONFIG_SLUB_DEBUG
3331 if (flags & SLAB_STORE_USER)
3332
3333
3334
3335
3336 size += 2 * sizeof(struct track);
3337
3338 if (flags & SLAB_RED_ZONE) {
3339
3340
3341
3342
3343
3344
3345
3346 size += sizeof(void *);
3347
3348 s->red_left_pad = sizeof(void *);
3349 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
3350 size += s->red_left_pad;
3351 }
3352#endif
3353
3354
3355
3356
3357
3358
3359 size = ALIGN(size, s->align);
3360 s->size = size;
3361 if (forced_order >= 0)
3362 order = forced_order;
3363 else
3364 order = calculate_order(size, s->reserved);
3365
3366 if (order < 0)
3367 return 0;
3368
3369 s->allocflags = 0;
3370 if (order)
3371 s->allocflags |= __GFP_COMP;
3372
3373 if (s->flags & SLAB_CACHE_DMA)
3374 s->allocflags |= GFP_DMA;
3375
3376 if (s->flags & SLAB_RECLAIM_ACCOUNT)
3377 s->allocflags |= __GFP_RECLAIMABLE;
3378
3379
3380
3381
3382 s->oo = oo_make(order, size, s->reserved);
3383 s->min = oo_make(get_order(size), size, s->reserved);
3384 if (oo_objects(s->oo) > oo_objects(s->max))
3385 s->max = s->oo;
3386
3387 return !!oo_objects(s->oo);
3388}
3389
3390static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
3391{
3392 s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
3393 s->reserved = 0;
3394
3395 if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
3396 s->reserved = sizeof(struct rcu_head);
3397
3398 if (!calculate_sizes(s, -1))
3399 goto error;
3400 if (disable_higher_order_debug) {
3401
3402
3403
3404
3405 if (get_order(s->size) > get_order(s->object_size)) {
3406 s->flags &= ~DEBUG_METADATA_FLAGS;
3407 s->offset = 0;
3408 if (!calculate_sizes(s, -1))
3409 goto error;
3410 }
3411 }
3412
3413#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
3414 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
3415 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
3416
3417 s->flags |= __CMPXCHG_DOUBLE;
3418#endif
3419
3420
3421
3422
3423
3424 set_min_partial(s, ilog2(s->size) / 2);
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443 if (kmem_cache_debug(s))
3444 s->cpu_partial = 0;
3445 else if (s->size >= PAGE_SIZE)
3446 s->cpu_partial = 2;
3447 else if (s->size >= 1024)
3448 s->cpu_partial = 6;
3449 else if (s->size >= 256)
3450 s->cpu_partial = 13;
3451 else
3452 s->cpu_partial = 30;
3453
3454#ifdef CONFIG_NUMA
3455 s->remote_node_defrag_ratio = 1000;
3456#endif
3457 if (!init_kmem_cache_nodes(s))
3458 goto error;
3459
3460 if (alloc_kmem_cache_cpus(s))
3461 return 0;
3462
3463 free_kmem_cache_nodes(s);
3464error:
3465 if (flags & SLAB_PANIC)
3466 panic("Cannot create slab %s size=%lu realsize=%u "
3467 "order=%u offset=%u flags=%lx\n",
3468 s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
3469 s->offset, flags);
3470 return -EINVAL;
3471}
3472
3473static void list_slab_objects(struct kmem_cache *s, struct page *page,
3474 const char *text)
3475{
3476#ifdef CONFIG_SLUB_DEBUG
3477 void *addr = page_address(page);
3478 void *p;
3479 unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) *
3480 sizeof(long), GFP_ATOMIC);
3481 if (!map)
3482 return;
3483 slab_err(s, page, text, s->name);
3484 slab_lock(page);
3485
3486 get_map(s, page, map);
3487 for_each_object(p, s, addr, page->objects) {
3488
3489 if (!test_bit(slab_index(p, s, addr), map)) {
3490 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n",
3491 p, p - addr);
3492 print_tracking(s, p);
3493 }
3494 }
3495 slab_unlock(page);
3496 kfree(map);
3497#endif
3498}
3499
3500
3501
3502
3503
3504
3505static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
3506{
3507 struct page *page, *h;
3508
3509 list_for_each_entry_safe(page, h, &n->partial, lru) {
3510 if (!page->inuse) {
3511 remove_partial(n, page);
3512 discard_slab(s, page);
3513 } else {
3514 list_slab_objects(s, page,
3515 "Objects remaining in %s on kmem_cache_close()");
3516 }
3517 }
3518}
3519
3520
3521
3522
3523static inline int kmem_cache_close(struct kmem_cache *s)
3524{
3525 int node;
3526
3527 flush_all(s);
3528
3529 for_each_node_state(node, N_NORMAL_MEMORY) {
3530 struct kmem_cache_node *n = get_node(s, node);
3531
3532 free_partial(s, n);
3533 if (n->nr_partial || slabs_node(s, node))
3534 return 1;
3535 }
3536 free_percpu(s->cpu_slab);
3537 free_kmem_cache_nodes(s);
3538 return 0;
3539}
3540
3541int __kmem_cache_shutdown(struct kmem_cache *s)
3542{
3543 int rc = kmem_cache_close(s);
3544
3545 if (!rc) {
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555 mutex_unlock(&slab_mutex);
3556 sysfs_slab_remove(s);
3557 mutex_lock(&slab_mutex);
3558 }
3559
3560 return rc;
3561}
3562
3563
3564
3565
3566
3567static int __init setup_slub_min_order(char *str)
3568{
3569 get_option(&str, &slub_min_order);
3570
3571 return 1;
3572}
3573
3574__setup("slub_min_order=", setup_slub_min_order);
3575
3576static int __init setup_slub_max_order(char *str)
3577{
3578 get_option(&str, &slub_max_order);
3579 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
3580
3581 return 1;
3582}
3583
3584__setup("slub_max_order=", setup_slub_max_order);
3585
3586static int __init setup_slub_min_objects(char *str)
3587{
3588 get_option(&str, &slub_min_objects);
3589
3590 return 1;
3591}
3592
3593__setup("slub_min_objects=", setup_slub_min_objects);
3594
3595static int __init setup_slub_nomerge(char *str)
3596{
3597 slub_nomerge = 1;
3598 return 1;
3599}
3600
3601__setup("slub_nomerge", setup_slub_nomerge);
3602
3603void *__kmalloc(size_t size, gfp_t flags)
3604{
3605 struct kmem_cache *s;
3606 void *ret;
3607
3608 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3609 return kmalloc_large(size, flags);
3610
3611 s = kmalloc_slab(size, flags);
3612
3613 if (unlikely(ZERO_OR_NULL_PTR(s)))
3614 return s;
3615
3616 ret = slab_alloc(s, flags, _RET_IP_);
3617
3618 trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
3619
3620 return ret;
3621}
3622EXPORT_SYMBOL(__kmalloc);
3623
3624#ifdef CONFIG_NUMA
3625static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
3626{
3627 struct page *page;
3628 void *ptr = NULL;
3629
3630 flags |= __GFP_COMP | __GFP_NOTRACK;
3631 page = alloc_pages_node(node, flags, get_order(size));
3632 if (page)
3633 ptr = page_address(page);
3634
3635 kmemleak_alloc(ptr, size, 1, flags);
3636 return ptr;
3637}
3638
3639void *__kmalloc_node(size_t size, gfp_t flags, int node)
3640{
3641 struct kmem_cache *s;
3642 void *ret;
3643
3644 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3645 ret = kmalloc_large_node(size, flags, node);
3646
3647 trace_kmalloc_node(_RET_IP_, ret,
3648 size, PAGE_SIZE << get_order(size),
3649 flags, node);
3650
3651 return ret;
3652 }
3653
3654 s = kmalloc_slab(size, flags);
3655
3656 if (unlikely(ZERO_OR_NULL_PTR(s)))
3657 return s;
3658
3659 ret = slab_alloc_node(s, flags, node, _RET_IP_);
3660
3661 trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
3662
3663 return ret;
3664}
3665EXPORT_SYMBOL(__kmalloc_node);
3666#endif
3667
3668#ifdef CONFIG_HARDENED_USERCOPY
3669
3670
3671
3672
3673
3674
3675const char *__check_heap_object(const void *ptr, unsigned long n,
3676 struct page *page)
3677{
3678 struct kmem_cache *s;
3679 unsigned long offset;
3680 size_t object_size;
3681
3682
3683 s = page->slab_cache;
3684 object_size = slab_ksize(s);
3685
3686
3687 if (ptr < page_address(page))
3688 return s->name;
3689
3690
3691 offset = (ptr - page_address(page)) % s->size;
3692
3693
3694 if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) {
3695 if (offset < s->red_left_pad)
3696 return s->name;
3697 offset -= s->red_left_pad;
3698 }
3699
3700
3701 if (offset <= object_size && n <= object_size - offset)
3702 return NULL;
3703
3704 return s->name;
3705}
3706#endif
3707
3708size_t ksize(const void *object)
3709{
3710 struct page *page;
3711
3712 if (unlikely(object == ZERO_SIZE_PTR))
3713 return 0;
3714
3715 page = virt_to_head_page(object);
3716
3717 if (unlikely(!PageSlab(page))) {
3718 WARN_ON(!PageCompound(page));
3719 return PAGE_SIZE << compound_order(page);
3720 }
3721
3722 return slab_ksize(page->slab_cache);
3723}
3724EXPORT_SYMBOL(ksize);
3725
3726#ifdef CONFIG_SLUB_DEBUG
3727bool verify_mem_not_deleted(const void *x)
3728{
3729 struct page *page;
3730 void *object = (void *)x;
3731 unsigned long flags;
3732 bool rv;
3733
3734 if (unlikely(ZERO_OR_NULL_PTR(x)))
3735 return false;
3736
3737 local_irq_save(flags);
3738
3739 page = virt_to_head_page(x);
3740 if (unlikely(!PageSlab(page))) {
3741
3742 rv = true;
3743 goto out_unlock;
3744 }
3745
3746 slab_lock(page);
3747 if (on_freelist(page->slab_cache, page, object)) {
3748 object_err(page->slab_cache, page, object, "Object is on free-list");
3749 rv = false;
3750 } else {
3751 rv = true;
3752 }
3753 slab_unlock(page);
3754
3755out_unlock:
3756 local_irq_restore(flags);
3757 return rv;
3758}
3759EXPORT_SYMBOL(verify_mem_not_deleted);
3760#endif
3761
3762void kfree(const void *x)
3763{
3764 struct page *page;
3765 void *object = (void *)x;
3766
3767 trace_kfree(_RET_IP_, x);
3768
3769 if (unlikely(ZERO_OR_NULL_PTR(x)))
3770 return;
3771
3772 page = virt_to_head_page(x);
3773 if (unlikely(!PageSlab(page))) {
3774 BUG_ON(!PageCompound(page));
3775 kmemleak_free(x);
3776 __free_pages(page, compound_order(page));
3777 return;
3778 }
3779 slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
3780}
3781EXPORT_SYMBOL(kfree);
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793int kmem_cache_shrink(struct kmem_cache *s)
3794{
3795 int node;
3796 int i;
3797 struct kmem_cache_node *n;
3798 struct page *page;
3799 struct page *t;
3800 int objects = oo_objects(s->max);
3801 struct list_head *slabs_by_inuse =
3802 kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL);
3803 unsigned long flags;
3804
3805 if (!slabs_by_inuse)
3806 return -ENOMEM;
3807
3808 flush_all(s);
3809 for_each_node_state(node, N_NORMAL_MEMORY) {
3810 n = get_node(s, node);
3811
3812 for (i = 0; i < objects; i++)
3813 INIT_LIST_HEAD(slabs_by_inuse + i);
3814
3815 spin_lock_irqsave(&n->list_lock, flags);
3816
3817
3818
3819
3820
3821
3822
3823 list_for_each_entry_safe(page, t, &n->partial, lru) {
3824 list_move(&page->lru, slabs_by_inuse + page->inuse);
3825 if (!page->inuse)
3826 n->nr_partial--;
3827 }
3828
3829
3830
3831
3832
3833 for (i = objects - 1; i > 0; i--)
3834 list_splice(slabs_by_inuse + i, n->partial.prev);
3835
3836 spin_unlock_irqrestore(&n->list_lock, flags);
3837
3838
3839 list_for_each_entry_safe(page, t, slabs_by_inuse, lru)
3840 discard_slab(s, page);
3841 }
3842
3843 kfree(slabs_by_inuse);
3844 return 0;
3845}
3846EXPORT_SYMBOL(kmem_cache_shrink);
3847
3848#ifdef CONFIG_MEMCG
3849void __kmemcg_cache_deactivate(struct kmem_cache *s)
3850{
3851
3852
3853
3854
3855 s->cpu_partial = 0;
3856 s->min_partial = 0;
3857
3858
3859
3860
3861
3862 synchronize_sched();
3863
3864 kmem_cache_shrink(s);
3865}
3866#endif
3867
3868static int slab_mem_going_offline_callback(void *arg)
3869{
3870 struct kmem_cache *s;
3871
3872 mutex_lock(&slab_mutex);
3873 list_for_each_entry(s, &slab_caches, list)
3874 kmem_cache_shrink(s);
3875 mutex_unlock(&slab_mutex);
3876
3877 return 0;
3878}
3879
3880static void slab_mem_offline_callback(void *arg)
3881{
3882 struct kmem_cache_node *n;
3883 struct kmem_cache *s;
3884 struct memory_notify *marg = arg;
3885 int offline_node;
3886
3887 offline_node = marg->status_change_nid_normal;
3888
3889
3890
3891
3892
3893 if (offline_node < 0)
3894 return;
3895
3896 mutex_lock(&slab_mutex);
3897 list_for_each_entry(s, &slab_caches, list) {
3898 n = get_node(s, offline_node);
3899 if (n) {
3900
3901
3902
3903
3904
3905
3906 BUG_ON(slabs_node(s, offline_node));
3907
3908 s->node[offline_node] = NULL;
3909 kmem_cache_free(kmem_cache_node, n);
3910 }
3911 }
3912 mutex_unlock(&slab_mutex);
3913}
3914
3915static int slab_mem_going_online_callback(void *arg)
3916{
3917 struct kmem_cache_node *n;
3918 struct kmem_cache *s;
3919 struct memory_notify *marg = arg;
3920 int nid = marg->status_change_nid_normal;
3921 int ret = 0;
3922
3923
3924
3925
3926
3927 if (nid < 0)
3928 return 0;
3929
3930
3931
3932
3933
3934
3935 mutex_lock(&slab_mutex);
3936 list_for_each_entry(s, &slab_caches, list) {
3937
3938
3939
3940
3941
3942 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
3943 if (!n) {
3944 ret = -ENOMEM;
3945 goto out;
3946 }
3947 init_kmem_cache_node(n);
3948 s->node[nid] = n;
3949 }
3950out:
3951 mutex_unlock(&slab_mutex);
3952 return ret;
3953}
3954
3955static int slab_memory_callback(struct notifier_block *self,
3956 unsigned long action, void *arg)
3957{
3958 int ret = 0;
3959
3960 switch (action) {
3961 case MEM_GOING_ONLINE:
3962 ret = slab_mem_going_online_callback(arg);
3963 break;
3964 case MEM_GOING_OFFLINE:
3965 ret = slab_mem_going_offline_callback(arg);
3966 break;
3967 case MEM_OFFLINE:
3968 case MEM_CANCEL_ONLINE:
3969 slab_mem_offline_callback(arg);
3970 break;
3971 case MEM_ONLINE:
3972 case MEM_CANCEL_OFFLINE:
3973 break;
3974 }
3975 if (ret)
3976 ret = notifier_from_errno(ret);
3977 else
3978 ret = NOTIFY_OK;
3979 return ret;
3980}
3981
3982static struct notifier_block slab_memory_callback_nb = {
3983 .notifier_call = slab_memory_callback,
3984 .priority = SLAB_CALLBACK_PRI,
3985};
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3998{
3999 int node;
4000 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4001
4002 memcpy(s, static_cache, kmem_cache->object_size);
4003
4004
4005
4006
4007
4008
4009 __flush_cpu_slab(s, smp_processor_id());
4010 for_each_node_state(node, N_NORMAL_MEMORY) {
4011 struct kmem_cache_node *n = get_node(s, node);
4012 struct page *p;
4013
4014 if (n) {
4015 list_for_each_entry(p, &n->partial, lru)
4016 p->slab_cache = s;
4017
4018#ifdef CONFIG_SLUB_DEBUG
4019 list_for_each_entry(p, &n->full, lru)
4020 p->slab_cache = s;
4021#endif
4022 }
4023 }
4024 list_add(&s->list, &slab_caches);
4025 return s;
4026}
4027
4028void __init kmem_cache_init(void)
4029{
4030 static __initdata struct kmem_cache boot_kmem_cache,
4031 boot_kmem_cache_node;
4032
4033 if (debug_guardpage_minorder())
4034 slub_max_order = 0;
4035
4036 kmem_cache_node = &boot_kmem_cache_node;
4037 kmem_cache = &boot_kmem_cache;
4038
4039 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4040 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
4041
4042 register_hotmemory_notifier(&slab_memory_callback_nb);
4043
4044
4045 slab_state = PARTIAL;
4046
4047 create_boot_cache(kmem_cache, "kmem_cache",
4048 offsetof(struct kmem_cache, node) +
4049 nr_node_ids * sizeof(struct kmem_cache_node *),
4050 SLAB_HWCACHE_ALIGN);
4051
4052 kmem_cache = bootstrap(&boot_kmem_cache);
4053
4054
4055
4056
4057
4058
4059 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4060
4061
4062 create_kmalloc_caches(0);
4063
4064#ifdef CONFIG_SMP
4065 register_cpu_notifier(&slab_notifier);
4066#endif
4067
4068 printk(KERN_INFO
4069 "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
4070 " CPUs=%d, Nodes=%d\n",
4071 cache_line_size(),
4072 slub_min_order, slub_max_order, slub_min_objects,
4073 nr_cpu_ids, nr_node_ids);
4074}
4075
4076void __init kmem_cache_init_late(void)
4077{
4078}
4079
4080
4081
4082
4083static int slab_unmergeable(struct kmem_cache *s)
4084{
4085 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
4086 return 1;
4087
4088 if (!is_root_cache(s))
4089 return 1;
4090
4091 if (s->ctor)
4092 return 1;
4093
4094
4095
4096
4097 if (s->refcount < 0)
4098 return 1;
4099
4100 return 0;
4101}
4102
4103static struct kmem_cache *find_mergeable(size_t size, size_t align,
4104 unsigned long flags, const char *name, void (*ctor)(void *))
4105{
4106 struct kmem_cache *s;
4107
4108 if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
4109 return NULL;
4110
4111 if (ctor)
4112 return NULL;
4113
4114 size = ALIGN(size, sizeof(void *));
4115 align = calculate_alignment(flags, align, size);
4116 size = ALIGN(size, align);
4117 flags = kmem_cache_flags(size, flags, name, NULL);
4118
4119 list_for_each_entry(s, &slab_caches, list) {
4120 if (slab_unmergeable(s))
4121 continue;
4122
4123 if (size > s->size)
4124 continue;
4125
4126 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
4127 continue;
4128
4129
4130
4131
4132 if ((s->size & ~(align - 1)) != s->size)
4133 continue;
4134
4135 if (s->size - size >= sizeof(void *))
4136 continue;
4137
4138 return s;
4139 }
4140 return NULL;
4141}
4142
4143struct kmem_cache *
4144__kmem_cache_alias(const char *name, size_t size, size_t align,
4145 unsigned long flags, void (*ctor)(void *))
4146{
4147 struct kmem_cache *s;
4148
4149 s = find_mergeable(size, align, flags, name, ctor);
4150 if (s) {
4151 s->refcount++;
4152
4153
4154
4155
4156 s->object_size = max(s->object_size, (int)size);
4157 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
4158
4159 if (sysfs_slab_alias(s, name)) {
4160 s->refcount--;
4161 s = NULL;
4162 }
4163 }
4164
4165 return s;
4166}
4167
4168int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
4169{
4170 int err;
4171
4172 err = kmem_cache_open(s, flags);
4173 if (err)
4174 return err;
4175
4176
4177 if (slab_state <= UP)
4178 return 0;
4179
4180 memcg_propagate_slab_attrs(s);
4181 err = sysfs_slab_add(s);
4182 if (err)
4183 kmem_cache_close(s);
4184
4185 return err;
4186}
4187
4188#ifdef CONFIG_SMP
4189
4190
4191
4192
4193static int slab_cpuup_callback(struct notifier_block *nfb,
4194 unsigned long action, void *hcpu)
4195{
4196 long cpu = (long)hcpu;
4197 struct kmem_cache *s;
4198 unsigned long flags;
4199
4200 switch (action) {
4201 case CPU_UP_CANCELED:
4202 case CPU_UP_CANCELED_FROZEN:
4203 case CPU_DEAD:
4204 case CPU_DEAD_FROZEN:
4205 mutex_lock(&slab_mutex);
4206 list_for_each_entry(s, &slab_caches, list) {
4207 local_irq_save(flags);
4208 __flush_cpu_slab(s, cpu);
4209 local_irq_restore(flags);
4210 }
4211 mutex_unlock(&slab_mutex);
4212 break;
4213 default:
4214 break;
4215 }
4216 return NOTIFY_OK;
4217}
4218
4219static struct notifier_block slab_notifier = {
4220 .notifier_call = slab_cpuup_callback
4221};
4222
4223#endif
4224
4225void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4226{
4227 struct kmem_cache *s;
4228 void *ret;
4229
4230 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4231 return kmalloc_large(size, gfpflags);
4232
4233 s = kmalloc_slab(size, gfpflags);
4234
4235 if (unlikely(ZERO_OR_NULL_PTR(s)))
4236 return s;
4237
4238 ret = slab_alloc(s, gfpflags, caller);
4239
4240
4241 trace_kmalloc(caller, ret, size, s->size, gfpflags);
4242
4243 return ret;
4244}
4245
4246#ifdef CONFIG_NUMA
4247void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4248 int node, unsigned long caller)
4249{
4250 struct kmem_cache *s;
4251 void *ret;
4252
4253 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4254 ret = kmalloc_large_node(size, gfpflags, node);
4255
4256 trace_kmalloc_node(caller, ret,
4257 size, PAGE_SIZE << get_order(size),
4258 gfpflags, node);
4259
4260 return ret;
4261 }
4262
4263 s = kmalloc_slab(size, gfpflags);
4264
4265 if (unlikely(ZERO_OR_NULL_PTR(s)))
4266 return s;
4267
4268 ret = slab_alloc_node(s, gfpflags, node, caller);
4269
4270
4271 trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
4272
4273 return ret;
4274}
4275#endif
4276
4277#ifdef CONFIG_SYSFS
4278static int count_inuse(struct page *page)
4279{
4280 return page->inuse;
4281}
4282
4283static int count_total(struct page *page)
4284{
4285 return page->objects;
4286}
4287#endif
4288
4289#ifdef CONFIG_SLUB_DEBUG
4290static int validate_slab(struct kmem_cache *s, struct page *page,
4291 unsigned long *map)
4292{
4293 void *p;
4294 void *addr = page_address(page);
4295
4296 if (!check_slab(s, page) ||
4297 !on_freelist(s, page, NULL))
4298 return 0;
4299
4300
4301 bitmap_zero(map, page->objects);
4302
4303 get_map(s, page, map);
4304 for_each_object(p, s, addr, page->objects) {
4305 if (test_bit(slab_index(p, s, addr), map))
4306 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
4307 return 0;
4308 }
4309
4310 for_each_object(p, s, addr, page->objects)
4311 if (!test_bit(slab_index(p, s, addr), map))
4312 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
4313 return 0;
4314 return 1;
4315}
4316
4317static void validate_slab_slab(struct kmem_cache *s, struct page *page,
4318 unsigned long *map)
4319{
4320 slab_lock(page);
4321 validate_slab(s, page, map);
4322 slab_unlock(page);
4323}
4324
4325static int validate_slab_node(struct kmem_cache *s,
4326 struct kmem_cache_node *n, unsigned long *map)
4327{
4328 unsigned long count = 0;
4329 struct page *page;
4330 unsigned long flags;
4331
4332 spin_lock_irqsave(&n->list_lock, flags);
4333
4334 list_for_each_entry(page, &n->partial, lru) {
4335 validate_slab_slab(s, page, map);
4336 count++;
4337 }
4338 if (count != n->nr_partial)
4339 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but "
4340 "counter=%ld\n", s->name, count, n->nr_partial);
4341
4342 if (!(s->flags & SLAB_STORE_USER))
4343 goto out;
4344
4345 list_for_each_entry(page, &n->full, lru) {
4346 validate_slab_slab(s, page, map);
4347 count++;
4348 }
4349 if (count != atomic_long_read(&n->nr_slabs))
4350 printk(KERN_ERR "SLUB: %s %ld slabs counted but "
4351 "counter=%ld\n", s->name, count,
4352 atomic_long_read(&n->nr_slabs));
4353
4354out:
4355 spin_unlock_irqrestore(&n->list_lock, flags);
4356 return count;
4357}
4358
4359static long validate_slab_cache(struct kmem_cache *s)
4360{
4361 int node;
4362 unsigned long count = 0;
4363 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4364 sizeof(unsigned long), GFP_KERNEL);
4365
4366 if (!map)
4367 return -ENOMEM;
4368
4369 flush_all(s);
4370 for_each_node_state(node, N_NORMAL_MEMORY) {
4371 struct kmem_cache_node *n = get_node(s, node);
4372
4373 count += validate_slab_node(s, n, map);
4374 }
4375 kfree(map);
4376 return count;
4377}
4378
4379
4380
4381
4382
4383struct location {
4384 unsigned long count;
4385 unsigned long addr;
4386 long long sum_time;
4387 long min_time;
4388 long max_time;
4389 long min_pid;
4390 long max_pid;
4391 DECLARE_BITMAP(cpus, NR_CPUS);
4392 nodemask_t nodes;
4393};
4394
4395struct loc_track {
4396 unsigned long max;
4397 unsigned long count;
4398 struct location *loc;
4399};
4400
4401static void free_loc_track(struct loc_track *t)
4402{
4403 if (t->max)
4404 free_pages((unsigned long)t->loc,
4405 get_order(sizeof(struct location) * t->max));
4406}
4407
4408static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
4409{
4410 struct location *l;
4411 int order;
4412
4413 order = get_order(sizeof(struct location) * max);
4414
4415 l = (void *)__get_free_pages(flags, order);
4416 if (!l)
4417 return 0;
4418
4419 if (t->count) {
4420 memcpy(l, t->loc, sizeof(struct location) * t->count);
4421 free_loc_track(t);
4422 }
4423 t->max = max;
4424 t->loc = l;
4425 return 1;
4426}
4427
4428static int add_location(struct loc_track *t, struct kmem_cache *s,
4429 const struct track *track)
4430{
4431 long start, end, pos;
4432 struct location *l;
4433 unsigned long caddr;
4434 unsigned long age = jiffies - track->when;
4435
4436 start = -1;
4437 end = t->count;
4438
4439 for ( ; ; ) {
4440 pos = start + (end - start + 1) / 2;
4441
4442
4443
4444
4445
4446 if (pos == end)
4447 break;
4448
4449 caddr = t->loc[pos].addr;
4450 if (track->addr == caddr) {
4451
4452 l = &t->loc[pos];
4453 l->count++;
4454 if (track->when) {
4455 l->sum_time += age;
4456 if (age < l->min_time)
4457 l->min_time = age;
4458 if (age > l->max_time)
4459 l->max_time = age;
4460
4461 if (track->pid < l->min_pid)
4462 l->min_pid = track->pid;
4463 if (track->pid > l->max_pid)
4464 l->max_pid = track->pid;
4465
4466 cpumask_set_cpu(track->cpu,
4467 to_cpumask(l->cpus));
4468 }
4469 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4470 return 1;
4471 }
4472
4473 if (track->addr < caddr)
4474 end = pos;
4475 else
4476 start = pos;
4477 }
4478
4479
4480
4481
4482 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
4483 return 0;
4484
4485 l = t->loc + pos;
4486 if (pos < t->count)
4487 memmove(l + 1, l,
4488 (t->count - pos) * sizeof(struct location));
4489 t->count++;
4490 l->count = 1;
4491 l->addr = track->addr;
4492 l->sum_time = age;
4493 l->min_time = age;
4494 l->max_time = age;
4495 l->min_pid = track->pid;
4496 l->max_pid = track->pid;
4497 cpumask_clear(to_cpumask(l->cpus));
4498 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
4499 nodes_clear(l->nodes);
4500 node_set(page_to_nid(virt_to_page(track)), l->nodes);
4501 return 1;
4502}
4503
4504static void process_slab(struct loc_track *t, struct kmem_cache *s,
4505 struct page *page, enum track_item alloc,
4506 unsigned long *map)
4507{
4508 void *addr = page_address(page);
4509 void *p;
4510
4511 bitmap_zero(map, page->objects);
4512 get_map(s, page, map);
4513
4514 for_each_object(p, s, addr, page->objects)
4515 if (!test_bit(slab_index(p, s, addr), map))
4516 add_location(t, s, get_track(s, p, alloc));
4517}
4518
4519static int list_locations(struct kmem_cache *s, char *buf,
4520 enum track_item alloc)
4521{
4522 int len = 0;
4523 unsigned long i;
4524 struct loc_track t = { 0, 0, NULL };
4525 int node;
4526 unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
4527 sizeof(unsigned long), GFP_KERNEL);
4528
4529 if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
4530 GFP_TEMPORARY)) {
4531 kfree(map);
4532 return sprintf(buf, "Out of memory\n");
4533 }
4534
4535 flush_all(s);
4536
4537 for_each_node_state(node, N_NORMAL_MEMORY) {
4538 struct kmem_cache_node *n = get_node(s, node);
4539 unsigned long flags;
4540 struct page *page;
4541
4542 if (!atomic_long_read(&n->nr_slabs))
4543 continue;
4544
4545 spin_lock_irqsave(&n->list_lock, flags);
4546 list_for_each_entry(page, &n->partial, lru)
4547 process_slab(&t, s, page, alloc, map);
4548 list_for_each_entry(page, &n->full, lru)
4549 process_slab(&t, s, page, alloc, map);
4550 spin_unlock_irqrestore(&n->list_lock, flags);
4551 }
4552
4553 for (i = 0; i < t.count; i++) {
4554 struct location *l = &t.loc[i];
4555
4556 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
4557 break;
4558 len += sprintf(buf + len, "%7ld ", l->count);
4559
4560 if (l->addr)
4561 len += sprintf(buf + len, "%pS", (void *)l->addr);
4562 else
4563 len += sprintf(buf + len, "<not-available>");
4564
4565 if (l->sum_time != l->min_time) {
4566 len += sprintf(buf + len, " age=%ld/%ld/%ld",
4567 l->min_time,
4568 (long)div_u64(l->sum_time, l->count),
4569 l->max_time);
4570 } else
4571 len += sprintf(buf + len, " age=%ld",
4572 l->min_time);
4573
4574 if (l->min_pid != l->max_pid)
4575 len += sprintf(buf + len, " pid=%ld-%ld",
4576 l->min_pid, l->max_pid);
4577 else
4578 len += sprintf(buf + len, " pid=%ld",
4579 l->min_pid);
4580
4581 if (num_online_cpus() > 1 &&
4582 !cpumask_empty(to_cpumask(l->cpus)) &&
4583 len < PAGE_SIZE - 60) {
4584 len += sprintf(buf + len, " cpus=");
4585 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4586 to_cpumask(l->cpus));
4587 }
4588
4589 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
4590 len < PAGE_SIZE - 60) {
4591 len += sprintf(buf + len, " nodes=");
4592 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
4593 l->nodes);
4594 }
4595
4596 len += sprintf(buf + len, "\n");
4597 }
4598
4599 free_loc_track(&t);
4600 kfree(map);
4601 if (!t.count)
4602 len += sprintf(buf, "No data\n");
4603 return len;
4604}
4605#endif
4606
4607#ifdef SLUB_RESILIENCY_TEST
4608static void resiliency_test(void)
4609{
4610 u8 *p;
4611
4612 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4613
4614 printk(KERN_ERR "SLUB resiliency testing\n");
4615 printk(KERN_ERR "-----------------------\n");
4616 printk(KERN_ERR "A. Corruption after allocation\n");
4617
4618 p = kzalloc(16, GFP_KERNEL);
4619 p[16] = 0x12;
4620 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer"
4621 " 0x12->0x%p\n\n", p + 16);
4622
4623 validate_slab_cache(kmalloc_caches[4]);
4624
4625
4626 p = kzalloc(32, GFP_KERNEL);
4627 p[32 + sizeof(void *)] = 0x34;
4628 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
4629 " 0x34 -> -0x%p\n", p);
4630 printk(KERN_ERR
4631 "If allocated object is overwritten then not detectable\n\n");
4632
4633 validate_slab_cache(kmalloc_caches[5]);
4634 p = kzalloc(64, GFP_KERNEL);
4635 p += 64 + (get_cycles() & 0xff) * sizeof(void *);
4636 *p = 0x56;
4637 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
4638 p);
4639 printk(KERN_ERR
4640 "If allocated object is overwritten then not detectable\n\n");
4641 validate_slab_cache(kmalloc_caches[6]);
4642
4643 printk(KERN_ERR "\nB. Corruption after free\n");
4644 p = kzalloc(128, GFP_KERNEL);
4645 kfree(p);
4646 *p = 0x78;
4647 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
4648 validate_slab_cache(kmalloc_caches[7]);
4649
4650 p = kzalloc(256, GFP_KERNEL);
4651 kfree(p);
4652 p[50] = 0x9a;
4653 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
4654 p);
4655 validate_slab_cache(kmalloc_caches[8]);
4656
4657 p = kzalloc(512, GFP_KERNEL);
4658 kfree(p);
4659 p[512] = 0xab;
4660 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
4661 validate_slab_cache(kmalloc_caches[9]);
4662}
4663#else
4664#ifdef CONFIG_SYSFS
4665static void resiliency_test(void) {};
4666#endif
4667#endif
4668
4669#ifdef CONFIG_SYSFS
4670enum slab_stat_type {
4671 SL_ALL,
4672 SL_PARTIAL,
4673 SL_CPU,
4674 SL_OBJECTS,
4675 SL_TOTAL
4676};
4677
4678#define SO_ALL (1 << SL_ALL)
4679#define SO_PARTIAL (1 << SL_PARTIAL)
4680#define SO_CPU (1 << SL_CPU)
4681#define SO_OBJECTS (1 << SL_OBJECTS)
4682#define SO_TOTAL (1 << SL_TOTAL)
4683
4684static ssize_t show_slab_objects(struct kmem_cache *s,
4685 char *buf, unsigned long flags)
4686{
4687 unsigned long total = 0;
4688 int node;
4689 int x;
4690 unsigned long *nodes;
4691 unsigned long *per_cpu;
4692
4693 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
4694 if (!nodes)
4695 return -ENOMEM;
4696 per_cpu = nodes + nr_node_ids;
4697
4698 if (flags & SO_CPU) {
4699 int cpu;
4700
4701 for_each_possible_cpu(cpu) {
4702 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
4703 int node;
4704 struct page *page;
4705
4706 page = ACCESS_ONCE(c->page);
4707 if (!page)
4708 continue;
4709
4710 node = page_to_nid(page);
4711 if (flags & SO_TOTAL)
4712 x = page->objects;
4713 else if (flags & SO_OBJECTS)
4714 x = page->inuse;
4715 else
4716 x = 1;
4717
4718 total += x;
4719 nodes[node] += x;
4720
4721 page = ACCESS_ONCE(c->partial);
4722 if (page) {
4723 node = page_to_nid(page);
4724 if (flags & SO_TOTAL)
4725 WARN_ON_ONCE(1);
4726 else if (flags & SO_OBJECTS)
4727 WARN_ON_ONCE(1);
4728 else
4729 x = page->pages;
4730 total += x;
4731 nodes[node] += x;
4732 }
4733
4734 per_cpu[node]++;
4735 }
4736 }
4737
4738 get_online_mems();
4739#ifdef CONFIG_SLUB_DEBUG
4740 if (flags & SO_ALL) {
4741 for_each_node_state(node, N_NORMAL_MEMORY) {
4742 struct kmem_cache_node *n = get_node(s, node);
4743
4744 if (flags & SO_TOTAL)
4745 x = atomic_long_read(&n->total_objects);
4746 else if (flags & SO_OBJECTS)
4747 x = atomic_long_read(&n->total_objects) -
4748 count_partial(n, count_free);
4749
4750 else
4751 x = atomic_long_read(&n->nr_slabs);
4752 total += x;
4753 nodes[node] += x;
4754 }
4755
4756 } else
4757#endif
4758 if (flags & SO_PARTIAL) {
4759 for_each_node_state(node, N_NORMAL_MEMORY) {
4760 struct kmem_cache_node *n = get_node(s, node);
4761
4762 if (flags & SO_TOTAL)
4763 x = count_partial(n, count_total);
4764 else if (flags & SO_OBJECTS)
4765 x = count_partial(n, count_inuse);
4766 else
4767 x = n->nr_partial;
4768 total += x;
4769 nodes[node] += x;
4770 }
4771 }
4772 x = sprintf(buf, "%lu", total);
4773#ifdef CONFIG_NUMA
4774 for_each_node_state(node, N_NORMAL_MEMORY)
4775 if (nodes[node])
4776 x += sprintf(buf + x, " N%d=%lu",
4777 node, nodes[node]);
4778#endif
4779 put_online_mems();
4780 kfree(nodes);
4781 return x + sprintf(buf + x, "\n");
4782}
4783
4784#ifdef CONFIG_SLUB_DEBUG
4785static int any_slab_objects(struct kmem_cache *s)
4786{
4787 int node;
4788
4789 for_each_online_node(node) {
4790 struct kmem_cache_node *n = get_node(s, node);
4791
4792 if (!n)
4793 continue;
4794
4795 if (atomic_long_read(&n->total_objects))
4796 return 1;
4797 }
4798 return 0;
4799}
4800#endif
4801
4802#define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
4803#define to_slab(n) container_of(n, struct kmem_cache, kobj)
4804
4805struct slab_attribute {
4806 struct attribute attr;
4807 ssize_t (*show)(struct kmem_cache *s, char *buf);
4808 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
4809};
4810
4811#define SLAB_ATTR_RO(_name) \
4812 static struct slab_attribute _name##_attr = \
4813 __ATTR(_name, 0400, _name##_show, NULL)
4814
4815#define SLAB_ATTR(_name) \
4816 static struct slab_attribute _name##_attr = \
4817 __ATTR(_name, 0600, _name##_show, _name##_store)
4818
4819static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
4820{
4821 return sprintf(buf, "%d\n", s->size);
4822}
4823SLAB_ATTR_RO(slab_size);
4824
4825static ssize_t align_show(struct kmem_cache *s, char *buf)
4826{
4827 return sprintf(buf, "%d\n", s->align);
4828}
4829SLAB_ATTR_RO(align);
4830
4831static ssize_t object_size_show(struct kmem_cache *s, char *buf)
4832{
4833 return sprintf(buf, "%d\n", s->object_size);
4834}
4835SLAB_ATTR_RO(object_size);
4836
4837static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
4838{
4839 return sprintf(buf, "%d\n", oo_objects(s->oo));
4840}
4841SLAB_ATTR_RO(objs_per_slab);
4842
4843static ssize_t order_store(struct kmem_cache *s,
4844 const char *buf, size_t length)
4845{
4846 unsigned long order;
4847 int err;
4848
4849 err = strict_strtoul(buf, 10, &order);
4850 if (err)
4851 return err;
4852
4853 if (order > slub_max_order || order < slub_min_order)
4854 return -EINVAL;
4855
4856 calculate_sizes(s, order);
4857 return length;
4858}
4859
4860static ssize_t order_show(struct kmem_cache *s, char *buf)
4861{
4862 return sprintf(buf, "%d\n", oo_order(s->oo));
4863}
4864SLAB_ATTR(order);
4865
4866static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
4867{
4868 return sprintf(buf, "%lu\n", s->min_partial);
4869}
4870
4871static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
4872 size_t length)
4873{
4874 unsigned long min;
4875 int err;
4876
4877 err = strict_strtoul(buf, 10, &min);
4878 if (err)
4879 return err;
4880
4881 set_min_partial(s, min);
4882 return length;
4883}
4884SLAB_ATTR(min_partial);
4885
4886static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
4887{
4888 return sprintf(buf, "%u\n", s->cpu_partial);
4889}
4890
4891static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
4892 size_t length)
4893{
4894 unsigned long objects;
4895 int err;
4896
4897 err = strict_strtoul(buf, 10, &objects);
4898 if (err)
4899 return err;
4900 if (objects && kmem_cache_debug(s))
4901 return -EINVAL;
4902
4903 s->cpu_partial = objects;
4904 flush_all(s);
4905 return length;
4906}
4907SLAB_ATTR(cpu_partial);
4908
4909static ssize_t ctor_show(struct kmem_cache *s, char *buf)
4910{
4911 if (!s->ctor)
4912 return 0;
4913 return sprintf(buf, "%pS\n", s->ctor);
4914}
4915SLAB_ATTR_RO(ctor);
4916
4917static ssize_t aliases_show(struct kmem_cache *s, char *buf)
4918{
4919 return sprintf(buf, "%d\n", s->refcount - 1);
4920}
4921SLAB_ATTR_RO(aliases);
4922
4923static ssize_t partial_show(struct kmem_cache *s, char *buf)
4924{
4925 return show_slab_objects(s, buf, SO_PARTIAL);
4926}
4927SLAB_ATTR_RO(partial);
4928
4929static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
4930{
4931 return show_slab_objects(s, buf, SO_CPU);
4932}
4933SLAB_ATTR_RO(cpu_slabs);
4934
4935static ssize_t objects_show(struct kmem_cache *s, char *buf)
4936{
4937 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
4938}
4939SLAB_ATTR_RO(objects);
4940
4941static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
4942{
4943 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
4944}
4945SLAB_ATTR_RO(objects_partial);
4946
4947static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
4948{
4949 int objects = 0;
4950 int pages = 0;
4951 int cpu;
4952 int len;
4953
4954 for_each_online_cpu(cpu) {
4955 struct page *page = per_cpu_ptr(s->cpu_slab, cpu)->partial;
4956
4957 if (page) {
4958 pages += page->pages;
4959 objects += page->pobjects;
4960 }
4961 }
4962
4963 len = sprintf(buf, "%d(%d)", objects, pages);
4964
4965#ifdef CONFIG_SMP
4966 for_each_online_cpu(cpu) {
4967 struct page *page = per_cpu_ptr(s->cpu_slab, cpu) ->partial;
4968
4969 if (page && len < PAGE_SIZE - 20)
4970 len += sprintf(buf + len, " C%d=%d(%d)", cpu,
4971 page->pobjects, page->pages);
4972 }
4973#endif
4974 return len + sprintf(buf + len, "\n");
4975}
4976SLAB_ATTR_RO(slabs_cpu_partial);
4977
4978static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4979{
4980 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
4981}
4982
4983static ssize_t reclaim_account_store(struct kmem_cache *s,
4984 const char *buf, size_t length)
4985{
4986 s->flags &= ~SLAB_RECLAIM_ACCOUNT;
4987 if (buf[0] == '1')
4988 s->flags |= SLAB_RECLAIM_ACCOUNT;
4989 return length;
4990}
4991SLAB_ATTR(reclaim_account);
4992
4993static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
4994{
4995 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
4996}
4997SLAB_ATTR_RO(hwcache_align);
4998
4999#ifdef CONFIG_ZONE_DMA
5000static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5001{
5002 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5003}
5004SLAB_ATTR_RO(cache_dma);
5005#endif
5006
5007static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5008{
5009 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU));
5010}
5011SLAB_ATTR_RO(destroy_by_rcu);
5012
5013static ssize_t reserved_show(struct kmem_cache *s, char *buf)
5014{
5015 return sprintf(buf, "%d\n", s->reserved);
5016}
5017SLAB_ATTR_RO(reserved);
5018
5019#ifdef CONFIG_SLUB_DEBUG
5020static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5021{
5022 return show_slab_objects(s, buf, SO_ALL);
5023}
5024SLAB_ATTR_RO(slabs);
5025
5026static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5027{
5028 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5029}
5030SLAB_ATTR_RO(total_objects);
5031
5032static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5033{
5034 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5035}
5036
5037static ssize_t sanity_checks_store(struct kmem_cache *s,
5038 const char *buf, size_t length)
5039{
5040 s->flags &= ~SLAB_CONSISTENCY_CHECKS;
5041 if (buf[0] == '1') {
5042 s->flags &= ~__CMPXCHG_DOUBLE;
5043 s->flags |= SLAB_CONSISTENCY_CHECKS;
5044 }
5045 return length;
5046}
5047SLAB_ATTR(sanity_checks);
5048
5049static ssize_t trace_show(struct kmem_cache *s, char *buf)
5050{
5051 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5052}
5053
5054static ssize_t trace_store(struct kmem_cache *s, const char *buf,
5055 size_t length)
5056{
5057 s->flags &= ~SLAB_TRACE;
5058 if (buf[0] == '1') {
5059 s->flags &= ~__CMPXCHG_DOUBLE;
5060 s->flags |= SLAB_TRACE;
5061 }
5062 return length;
5063}
5064SLAB_ATTR(trace);
5065
5066static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5067{
5068 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5069}
5070
5071static ssize_t red_zone_store(struct kmem_cache *s,
5072 const char *buf, size_t length)
5073{
5074 if (any_slab_objects(s))
5075 return -EBUSY;
5076
5077 s->flags &= ~SLAB_RED_ZONE;
5078 if (buf[0] == '1') {
5079 s->flags |= SLAB_RED_ZONE;
5080 }
5081 calculate_sizes(s, -1);
5082 return length;
5083}
5084SLAB_ATTR(red_zone);
5085
5086static ssize_t poison_show(struct kmem_cache *s, char *buf)
5087{
5088 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON));
5089}
5090
5091static ssize_t poison_store(struct kmem_cache *s,
5092 const char *buf, size_t length)
5093{
5094 if (any_slab_objects(s))
5095 return -EBUSY;
5096
5097 s->flags &= ~SLAB_POISON;
5098 if (buf[0] == '1') {
5099 s->flags |= SLAB_POISON;
5100 }
5101 calculate_sizes(s, -1);
5102 return length;
5103}
5104SLAB_ATTR(poison);
5105
5106static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5107{
5108 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5109}
5110
5111static ssize_t store_user_store(struct kmem_cache *s,
5112 const char *buf, size_t length)
5113{
5114 if (any_slab_objects(s))
5115 return -EBUSY;
5116
5117 s->flags &= ~SLAB_STORE_USER;
5118 if (buf[0] == '1') {
5119 s->flags &= ~__CMPXCHG_DOUBLE;
5120 s->flags |= SLAB_STORE_USER;
5121 }
5122 calculate_sizes(s, -1);
5123 return length;
5124}
5125SLAB_ATTR(store_user);
5126
5127static ssize_t validate_show(struct kmem_cache *s, char *buf)
5128{
5129 return 0;
5130}
5131
5132static ssize_t validate_store(struct kmem_cache *s,
5133 const char *buf, size_t length)
5134{
5135 int ret = -EINVAL;
5136
5137 if (buf[0] == '1') {
5138 ret = validate_slab_cache(s);
5139 if (ret >= 0)
5140 ret = length;
5141 }
5142 return ret;
5143}
5144SLAB_ATTR(validate);
5145
5146static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf)
5147{
5148 if (!(s->flags & SLAB_STORE_USER))
5149 return -ENOSYS;
5150 return list_locations(s, buf, TRACK_ALLOC);
5151}
5152SLAB_ATTR_RO(alloc_calls);
5153
5154static ssize_t free_calls_show(struct kmem_cache *s, char *buf)
5155{
5156 if (!(s->flags & SLAB_STORE_USER))
5157 return -ENOSYS;
5158 return list_locations(s, buf, TRACK_FREE);
5159}
5160SLAB_ATTR_RO(free_calls);
5161#endif
5162
5163#ifdef CONFIG_FAILSLAB
5164static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5165{
5166 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5167}
5168
5169static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
5170 size_t length)
5171{
5172 s->flags &= ~SLAB_FAILSLAB;
5173 if (buf[0] == '1')
5174 s->flags |= SLAB_FAILSLAB;
5175 return length;
5176}
5177SLAB_ATTR(failslab);
5178#endif
5179
5180static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5181{
5182 return 0;
5183}
5184
5185static ssize_t shrink_store(struct kmem_cache *s,
5186 const char *buf, size_t length)
5187{
5188 if (buf[0] == '1') {
5189 int rc = kmem_cache_shrink(s);
5190
5191 if (rc)
5192 return rc;
5193 } else
5194 return -EINVAL;
5195 return length;
5196}
5197SLAB_ATTR(shrink);
5198
5199#ifdef CONFIG_NUMA
5200static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5201{
5202 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10);
5203}
5204
5205static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5206 const char *buf, size_t length)
5207{
5208 unsigned long ratio;
5209 int err;
5210
5211 err = strict_strtoul(buf, 10, &ratio);
5212 if (err)
5213 return err;
5214
5215 if (ratio <= 100)
5216 s->remote_node_defrag_ratio = ratio * 10;
5217
5218 return length;
5219}
5220SLAB_ATTR(remote_node_defrag_ratio);
5221#endif
5222
5223#ifdef CONFIG_SLUB_STATS
5224static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5225{
5226 unsigned long sum = 0;
5227 int cpu;
5228 int len;
5229 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
5230
5231 if (!data)
5232 return -ENOMEM;
5233
5234 for_each_online_cpu(cpu) {
5235 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5236
5237 data[cpu] = x;
5238 sum += x;
5239 }
5240
5241 len = sprintf(buf, "%lu", sum);
5242
5243#ifdef CONFIG_SMP
5244 for_each_online_cpu(cpu) {
5245 if (data[cpu] && len < PAGE_SIZE - 20)
5246 len += sprintf(buf + len, " C%d=%u", cpu, data[cpu]);
5247 }
5248#endif
5249 kfree(data);
5250 return len + sprintf(buf + len, "\n");
5251}
5252
5253static void clear_stat(struct kmem_cache *s, enum stat_item si)
5254{
5255 int cpu;
5256
5257 for_each_online_cpu(cpu)
5258 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5259}
5260
5261#define STAT_ATTR(si, text) \
5262static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5263{ \
5264 return show_stat(s, buf, si); \
5265} \
5266static ssize_t text##_store(struct kmem_cache *s, \
5267 const char *buf, size_t length) \
5268{ \
5269 if (buf[0] != '0') \
5270 return -EINVAL; \
5271 clear_stat(s, si); \
5272 return length; \
5273} \
5274SLAB_ATTR(text); \
5275
5276STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5277STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5278STAT_ATTR(FREE_FASTPATH, free_fastpath);
5279STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5280STAT_ATTR(FREE_FROZEN, free_frozen);
5281STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5282STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5283STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5284STAT_ATTR(ALLOC_SLAB, alloc_slab);
5285STAT_ATTR(ALLOC_REFILL, alloc_refill);
5286STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5287STAT_ATTR(FREE_SLAB, free_slab);
5288STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5289STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5290STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5291STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5292STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5293STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5294STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5295STAT_ATTR(ORDER_FALLBACK, order_fallback);
5296STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5297STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5298STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5299STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5300STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5301STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5302#endif
5303
5304static struct attribute *slab_attrs[] = {
5305 &slab_size_attr.attr,
5306 &object_size_attr.attr,
5307 &objs_per_slab_attr.attr,
5308 &order_attr.attr,
5309 &min_partial_attr.attr,
5310 &cpu_partial_attr.attr,
5311 &objects_attr.attr,
5312 &objects_partial_attr.attr,
5313 &partial_attr.attr,
5314 &cpu_slabs_attr.attr,
5315 &ctor_attr.attr,
5316 &aliases_attr.attr,
5317 &align_attr.attr,
5318 &hwcache_align_attr.attr,
5319 &reclaim_account_attr.attr,
5320 &destroy_by_rcu_attr.attr,
5321 &shrink_attr.attr,
5322 &reserved_attr.attr,
5323 &slabs_cpu_partial_attr.attr,
5324#ifdef CONFIG_SLUB_DEBUG
5325 &total_objects_attr.attr,
5326 &slabs_attr.attr,
5327 &sanity_checks_attr.attr,
5328 &trace_attr.attr,
5329 &red_zone_attr.attr,
5330 &poison_attr.attr,
5331 &store_user_attr.attr,
5332 &validate_attr.attr,
5333 &alloc_calls_attr.attr,
5334 &free_calls_attr.attr,
5335#endif
5336#ifdef CONFIG_ZONE_DMA
5337 &cache_dma_attr.attr,
5338#endif
5339#ifdef CONFIG_NUMA
5340 &remote_node_defrag_ratio_attr.attr,
5341#endif
5342#ifdef CONFIG_SLUB_STATS
5343 &alloc_fastpath_attr.attr,
5344 &alloc_slowpath_attr.attr,
5345 &free_fastpath_attr.attr,
5346 &free_slowpath_attr.attr,
5347 &free_frozen_attr.attr,
5348 &free_add_partial_attr.attr,
5349 &free_remove_partial_attr.attr,
5350 &alloc_from_partial_attr.attr,
5351 &alloc_slab_attr.attr,
5352 &alloc_refill_attr.attr,
5353 &alloc_node_mismatch_attr.attr,
5354 &free_slab_attr.attr,
5355 &cpuslab_flush_attr.attr,
5356 &deactivate_full_attr.attr,
5357 &deactivate_empty_attr.attr,
5358 &deactivate_to_head_attr.attr,
5359 &deactivate_to_tail_attr.attr,
5360 &deactivate_remote_frees_attr.attr,
5361 &deactivate_bypass_attr.attr,
5362 &order_fallback_attr.attr,
5363 &cmpxchg_double_fail_attr.attr,
5364 &cmpxchg_double_cpu_fail_attr.attr,
5365 &cpu_partial_alloc_attr.attr,
5366 &cpu_partial_free_attr.attr,
5367 &cpu_partial_node_attr.attr,
5368 &cpu_partial_drain_attr.attr,
5369#endif
5370#ifdef CONFIG_FAILSLAB
5371 &failslab_attr.attr,
5372#endif
5373
5374 NULL
5375};
5376
5377static struct attribute_group slab_attr_group = {
5378 .attrs = slab_attrs,
5379};
5380
5381static ssize_t slab_attr_show(struct kobject *kobj,
5382 struct attribute *attr,
5383 char *buf)
5384{
5385 struct slab_attribute *attribute;
5386 struct kmem_cache *s;
5387 int err;
5388
5389 attribute = to_slab_attr(attr);
5390 s = to_slab(kobj);
5391
5392 if (!attribute->show)
5393 return -EIO;
5394
5395 err = attribute->show(s, buf);
5396
5397 return err;
5398}
5399
5400static ssize_t slab_attr_store(struct kobject *kobj,
5401 struct attribute *attr,
5402 const char *buf, size_t len)
5403{
5404 struct slab_attribute *attribute;
5405 struct kmem_cache *s;
5406 int err;
5407
5408 attribute = to_slab_attr(attr);
5409 s = to_slab(kobj);
5410
5411 if (!attribute->store)
5412 return -EIO;
5413
5414 err = attribute->store(s, buf, len);
5415#ifdef CONFIG_MEMCG_KMEM
5416 if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
5417 int i;
5418
5419 mutex_lock(&slab_mutex);
5420 if (s->max_attr_size < len)
5421 s->max_attr_size = len;
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440 for_each_memcg_cache_index(i) {
5441 struct kmem_cache *c = cache_from_memcg_idx(s, i);
5442 if (c)
5443 attribute->store(c, buf, len);
5444 }
5445 mutex_unlock(&slab_mutex);
5446 }
5447#endif
5448 return err;
5449}
5450
5451static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5452{
5453#ifdef CONFIG_MEMCG_KMEM
5454 int i;
5455 char *buffer = NULL;
5456
5457 if (!is_root_cache(s))
5458 return;
5459
5460
5461
5462
5463
5464 if (!s->max_attr_size)
5465 return;
5466
5467 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
5468 char mbuf[64];
5469 char *buf;
5470 struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
5471
5472 if (!attr || !attr->store || !attr->show)
5473 continue;
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484 if (buffer)
5485 buf = buffer;
5486 else if (s->max_attr_size < ARRAY_SIZE(mbuf))
5487 buf = mbuf;
5488 else {
5489 buffer = (char *) get_zeroed_page(GFP_KERNEL);
5490 if (WARN_ON(!buffer))
5491 continue;
5492 buf = buffer;
5493 }
5494
5495 attr->show(s->memcg_params->root_cache, buf);
5496 attr->store(s, buf, strlen(buf));
5497 }
5498
5499 if (buffer)
5500 free_page((unsigned long)buffer);
5501#endif
5502}
5503
5504static const struct sysfs_ops slab_sysfs_ops = {
5505 .show = slab_attr_show,
5506 .store = slab_attr_store,
5507};
5508
5509static struct kobj_type slab_ktype = {
5510 .sysfs_ops = &slab_sysfs_ops,
5511};
5512
5513static int uevent_filter(struct kset *kset, struct kobject *kobj)
5514{
5515 struct kobj_type *ktype = get_ktype(kobj);
5516
5517 if (ktype == &slab_ktype)
5518 return 1;
5519 return 0;
5520}
5521
5522static const struct kset_uevent_ops slab_uevent_ops = {
5523 .filter = uevent_filter,
5524};
5525
5526static struct kset *slab_kset;
5527
5528#define ID_STR_LENGTH 64
5529
5530
5531
5532
5533
5534static char *create_unique_id(struct kmem_cache *s)
5535{
5536 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5537 char *p = name;
5538
5539 BUG_ON(!name);
5540
5541 *p++ = ':';
5542
5543
5544
5545
5546
5547
5548
5549 if (s->flags & SLAB_CACHE_DMA)
5550 *p++ = 'd';
5551 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5552 *p++ = 'a';
5553 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5554 *p++ = 'F';
5555 if (!(s->flags & SLAB_NOTRACK))
5556 *p++ = 't';
5557 if (s->flags & SLAB_ACCOUNT)
5558 *p++ = 'A';
5559 if (p != name + 1)
5560 *p++ = '-';
5561 p += sprintf(p, "%07d", s->size);
5562
5563#ifdef CONFIG_MEMCG_KMEM
5564 if (!is_root_cache(s))
5565 p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
5566#endif
5567
5568 BUG_ON(p > name + ID_STR_LENGTH - 1);
5569 return name;
5570}
5571
5572static int sysfs_slab_add(struct kmem_cache *s)
5573{
5574 int err;
5575 const char *name;
5576 int unmergeable = slab_unmergeable(s);
5577
5578 if (unmergeable) {
5579
5580
5581
5582
5583
5584 sysfs_remove_link(&slab_kset->kobj, s->name);
5585 name = s->name;
5586 } else {
5587
5588
5589
5590
5591 name = create_unique_id(s);
5592 }
5593
5594 s->kobj.kset = slab_kset;
5595 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
5596 if (err) {
5597 kobject_put(&s->kobj);
5598 return err;
5599 }
5600
5601 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5602 if (err) {
5603 kobject_del(&s->kobj);
5604 kobject_put(&s->kobj);
5605 return err;
5606 }
5607 kobject_uevent(&s->kobj, KOBJ_ADD);
5608 if (!unmergeable) {
5609
5610 sysfs_slab_alias(s, s->name);
5611 kfree(name);
5612 }
5613 return 0;
5614}
5615
5616static void sysfs_slab_remove(struct kmem_cache *s)
5617{
5618 if (slab_state < FULL)
5619
5620
5621
5622
5623 return;
5624
5625 kobject_uevent(&s->kobj, KOBJ_REMOVE);
5626 kobject_del(&s->kobj);
5627 kobject_put(&s->kobj);
5628}
5629
5630
5631
5632
5633
5634struct saved_alias {
5635 struct kmem_cache *s;
5636 const char *name;
5637 struct saved_alias *next;
5638};
5639
5640static struct saved_alias *alias_list;
5641
5642static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
5643{
5644 struct saved_alias *al;
5645
5646 if (slab_state == FULL) {
5647
5648
5649
5650 sysfs_remove_link(&slab_kset->kobj, name);
5651 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
5652 }
5653
5654 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
5655 if (!al)
5656 return -ENOMEM;
5657
5658 al->s = s;
5659 al->name = name;
5660 al->next = alias_list;
5661 alias_list = al;
5662 return 0;
5663}
5664
5665static int __init slab_sysfs_init(void)
5666{
5667 struct kmem_cache *s;
5668 int err;
5669
5670 mutex_lock(&slab_mutex);
5671
5672 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
5673 if (!slab_kset) {
5674 mutex_unlock(&slab_mutex);
5675 printk(KERN_ERR "Cannot register slab subsystem.\n");
5676 return -ENOSYS;
5677 }
5678
5679 slab_state = FULL;
5680
5681 list_for_each_entry(s, &slab_caches, list) {
5682 err = sysfs_slab_add(s);
5683 if (err)
5684 printk(KERN_ERR "SLUB: Unable to add boot slab %s"
5685 " to sysfs\n", s->name);
5686 }
5687
5688 while (alias_list) {
5689 struct saved_alias *al = alias_list;
5690
5691 alias_list = alias_list->next;
5692 err = sysfs_slab_alias(al->s, al->name);
5693 if (err)
5694 printk(KERN_ERR "SLUB: Unable to add boot slab alias"
5695 " %s to sysfs\n", al->name);
5696 kfree(al);
5697 }
5698
5699 mutex_unlock(&slab_mutex);
5700 resiliency_test();
5701 return 0;
5702}
5703
5704__initcall(slab_sysfs_init);
5705#endif
5706
5707
5708
5709
5710#ifdef CONFIG_SLABINFO
5711void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
5712{
5713 unsigned long nr_partials = 0;
5714 unsigned long nr_slabs = 0;
5715 unsigned long nr_objs = 0;
5716 unsigned long nr_free = 0;
5717 int node;
5718
5719 for_each_online_node(node) {
5720 struct kmem_cache_node *n = get_node(s, node);
5721
5722 if (!n)
5723 continue;
5724
5725 nr_partials += n->nr_partial;
5726 nr_slabs += atomic_long_read(&n->nr_slabs);
5727 nr_objs += atomic_long_read(&n->total_objects);
5728 nr_free += count_partial(n, count_free);
5729 }
5730
5731 sinfo->active_objs = nr_objs - nr_free;
5732 sinfo->num_objs = nr_objs;
5733 sinfo->active_slabs = nr_slabs;
5734 sinfo->num_slabs = nr_slabs;
5735 sinfo->objects_per_slab = oo_objects(s->oo);
5736 sinfo->cache_order = oo_order(s->oo);
5737}
5738
5739void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
5740{
5741}
5742
5743ssize_t slabinfo_write(struct file *file, const char __user *buffer,
5744 size_t count, loff_t *ppos)
5745{
5746 return -EIO;
5747}
5748#endif
5749