1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/rcupdate.h>
106#include <linux/string.h>
107#include <linux/uaccess.h>
108#include <linux/nodemask.h>
109#include <linux/kmemleak.h>
110#include <linux/mempolicy.h>
111#include <linux/mutex.h>
112#include <linux/fault-inject.h>
113#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h>
116#include <linux/kmemcheck.h>
117#include <linux/memory.h>
118#include <linux/prefetch.h>
119
120#include <asm/cacheflush.h>
121#include <asm/tlbflush.h>
122#include <asm/page.h>
123
124
125
126
127
128
129
130
131
132
133
134#ifdef CONFIG_DEBUG_SLAB
135#define DEBUG 1
136#define STATS 1
137#define FORCED_DEBUG 1
138#else
139#define DEBUG 0
140#define STATS 0
141#define FORCED_DEBUG 0
142#endif
143
144
145#define BYTES_PER_WORD sizeof(void *)
146#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
147
148#ifndef ARCH_KMALLOC_FLAGS
149#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
150#endif
151
152
153#if DEBUG
154# define CREATE_MASK (SLAB_RED_ZONE | \
155 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
156 SLAB_CACHE_DMA | \
157 SLAB_STORE_USER | \
158 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
159 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
160 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
161#else
162# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
163 SLAB_CACHE_DMA | \
164 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
165 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
166 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
167#endif
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188typedef unsigned int kmem_bufctl_t;
189#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
190#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
191#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
192#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208struct slab_rcu {
209 struct rcu_head head;
210 struct kmem_cache *cachep;
211 void *addr;
212};
213
214
215
216
217
218
219
220
221struct slab {
222 union {
223 struct {
224 struct list_head list;
225 unsigned long colouroff;
226 void *s_mem;
227 unsigned int inuse;
228 kmem_bufctl_t free;
229 unsigned short nodeid;
230 };
231 struct slab_rcu __slab_cover_slab_rcu;
232 };
233};
234
235
236
237
238
239
240
241
242
243
244
245
246
247struct array_cache {
248 unsigned int avail;
249 unsigned int limit;
250 unsigned int batchcount;
251 unsigned int touched;
252 spinlock_t lock;
253 void *entry[];
254
255
256
257
258};
259
260
261
262
263
264#define BOOT_CPUCACHE_ENTRIES 1
265struct arraycache_init {
266 struct array_cache cache;
267 void *entries[BOOT_CPUCACHE_ENTRIES];
268};
269
270
271
272
273struct kmem_list3 {
274 struct list_head slabs_partial;
275 struct list_head slabs_full;
276 struct list_head slabs_free;
277 unsigned long free_objects;
278 unsigned int free_limit;
279 unsigned int colour_next;
280 spinlock_t list_lock;
281 struct array_cache *shared;
282 struct array_cache **alien;
283 unsigned long next_reap;
284 int free_touched;
285};
286
287
288
289
290#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
291static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
292#define CACHE_CACHE 0
293#define SIZE_AC MAX_NUMNODES
294#define SIZE_L3 (2 * MAX_NUMNODES)
295
296static int drain_freelist(struct kmem_cache *cache,
297 struct kmem_list3 *l3, int tofree);
298static void free_block(struct kmem_cache *cachep, void **objpp, int len,
299 int node);
300static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
301static void cache_reap(struct work_struct *unused);
302
303
304
305
306
307static __always_inline int index_of(const size_t size)
308{
309 extern void __bad_size(void);
310
311 if (__builtin_constant_p(size)) {
312 int i = 0;
313
314#define CACHE(x) \
315 if (size <=x) \
316 return i; \
317 else \
318 i++;
319#include <linux/kmalloc_sizes.h>
320#undef CACHE
321 __bad_size();
322 } else
323 __bad_size();
324 return 0;
325}
326
327static int slab_early_init = 1;
328
329#define INDEX_AC index_of(sizeof(struct arraycache_init))
330#define INDEX_L3 index_of(sizeof(struct kmem_list3))
331
332static void kmem_list3_init(struct kmem_list3 *parent)
333{
334 INIT_LIST_HEAD(&parent->slabs_full);
335 INIT_LIST_HEAD(&parent->slabs_partial);
336 INIT_LIST_HEAD(&parent->slabs_free);
337 parent->shared = NULL;
338 parent->alien = NULL;
339 parent->colour_next = 0;
340 spin_lock_init(&parent->list_lock);
341 parent->free_objects = 0;
342 parent->free_touched = 0;
343}
344
345#define MAKE_LIST(cachep, listp, slab, nodeid) \
346 do { \
347 INIT_LIST_HEAD(listp); \
348 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
349 } while (0)
350
351#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
352 do { \
353 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
354 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
355 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
356 } while (0)
357
358#define CFLGS_OFF_SLAB (0x80000000UL)
359#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
360
361#define BATCHREFILL_LIMIT 16
362
363
364
365
366
367
368
369#define REAPTIMEOUT_CPUC (2*HZ)
370#define REAPTIMEOUT_LIST3 (4*HZ)
371
372#if STATS
373#define STATS_INC_ACTIVE(x) ((x)->num_active++)
374#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
375#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
376#define STATS_INC_GROWN(x) ((x)->grown++)
377#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
378#define STATS_SET_HIGH(x) \
379 do { \
380 if ((x)->num_active > (x)->high_mark) \
381 (x)->high_mark = (x)->num_active; \
382 } while (0)
383#define STATS_INC_ERR(x) ((x)->errors++)
384#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
385#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
386#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
387#define STATS_SET_FREEABLE(x, i) \
388 do { \
389 if ((x)->max_freeable < i) \
390 (x)->max_freeable = i; \
391 } while (0)
392#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
393#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
394#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
395#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
396#else
397#define STATS_INC_ACTIVE(x) do { } while (0)
398#define STATS_DEC_ACTIVE(x) do { } while (0)
399#define STATS_INC_ALLOCED(x) do { } while (0)
400#define STATS_INC_GROWN(x) do { } while (0)
401#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
402#define STATS_SET_HIGH(x) do { } while (0)
403#define STATS_INC_ERR(x) do { } while (0)
404#define STATS_INC_NODEALLOCS(x) do { } while (0)
405#define STATS_INC_NODEFREES(x) do { } while (0)
406#define STATS_INC_ACOVERFLOW(x) do { } while (0)
407#define STATS_SET_FREEABLE(x, i) do { } while (0)
408#define STATS_INC_ALLOCHIT(x) do { } while (0)
409#define STATS_INC_ALLOCMISS(x) do { } while (0)
410#define STATS_INC_FREEHIT(x) do { } while (0)
411#define STATS_INC_FREEMISS(x) do { } while (0)
412#endif
413
414#if DEBUG
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429static int obj_offset(struct kmem_cache *cachep)
430{
431 return cachep->obj_offset;
432}
433
434static int obj_size(struct kmem_cache *cachep)
435{
436 return cachep->obj_size;
437}
438
439static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
440{
441 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
442 return (unsigned long long*) (objp + obj_offset(cachep) -
443 sizeof(unsigned long long));
444}
445
446static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
447{
448 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
449 if (cachep->flags & SLAB_STORE_USER)
450 return (unsigned long long *)(objp + cachep->buffer_size -
451 sizeof(unsigned long long) -
452 REDZONE_ALIGN);
453 return (unsigned long long *) (objp + cachep->buffer_size -
454 sizeof(unsigned long long));
455}
456
457static void **dbg_userword(struct kmem_cache *cachep, void *objp)
458{
459 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
460 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
461}
462
463#else
464
465#define obj_offset(x) 0
466#define obj_size(cachep) (cachep->buffer_size)
467#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
468#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
469#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
470
471#endif
472
473#ifdef CONFIG_TRACING
474size_t slab_buffer_size(struct kmem_cache *cachep)
475{
476 return cachep->buffer_size;
477}
478EXPORT_SYMBOL(slab_buffer_size);
479#endif
480
481
482
483
484#define BREAK_GFP_ORDER_HI 1
485#define BREAK_GFP_ORDER_LO 0
486static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
487
488
489
490
491
492
493static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
494{
495 page->lru.next = (struct list_head *)cache;
496}
497
498static inline struct kmem_cache *page_get_cache(struct page *page)
499{
500 page = compound_head(page);
501 BUG_ON(!PageSlab(page));
502 return (struct kmem_cache *)page->lru.next;
503}
504
505static inline void page_set_slab(struct page *page, struct slab *slab)
506{
507 page->lru.prev = (struct list_head *)slab;
508}
509
510static inline struct slab *page_get_slab(struct page *page)
511{
512 BUG_ON(!PageSlab(page));
513 return (struct slab *)page->lru.prev;
514}
515
516static inline struct kmem_cache *virt_to_cache(const void *obj)
517{
518 struct page *page = virt_to_head_page(obj);
519 return page_get_cache(page);
520}
521
522static inline struct slab *virt_to_slab(const void *obj)
523{
524 struct page *page = virt_to_head_page(obj);
525 return page_get_slab(page);
526}
527
528static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
529 unsigned int idx)
530{
531 return slab->s_mem + cache->buffer_size * idx;
532}
533
534
535
536
537
538
539
540static inline unsigned int obj_to_index(const struct kmem_cache *cache,
541 const struct slab *slab, void *obj)
542{
543 u32 offset = (obj - slab->s_mem);
544 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
545}
546
547
548
549
550struct cache_sizes malloc_sizes[] = {
551#define CACHE(x) { .cs_size = (x) },
552#include <linux/kmalloc_sizes.h>
553 CACHE(ULONG_MAX)
554#undef CACHE
555};
556EXPORT_SYMBOL(malloc_sizes);
557
558
559struct cache_names {
560 char *name;
561 char *name_dma;
562};
563
564static struct cache_names __initdata cache_names[] = {
565#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
566#include <linux/kmalloc_sizes.h>
567 {NULL,}
568#undef CACHE
569};
570
571static struct arraycache_init initarray_cache __initdata =
572 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
573static struct arraycache_init initarray_generic =
574 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
575
576
577static struct kmem_cache cache_cache = {
578 .batchcount = 1,
579 .limit = BOOT_CPUCACHE_ENTRIES,
580 .shared = 1,
581 .buffer_size = sizeof(struct kmem_cache),
582 .name = "kmem_cache",
583};
584
585#define BAD_ALIEN_MAGIC 0x01020304ul
586
587
588
589
590
591static enum {
592 NONE,
593 PARTIAL_AC,
594 PARTIAL_L3,
595 EARLY,
596 FULL
597} g_cpucache_up;
598
599
600
601
602int slab_is_available(void)
603{
604 return g_cpucache_up >= EARLY;
605}
606
607#ifdef CONFIG_LOCKDEP
608
609
610
611
612
613
614
615
616
617
618
619
620static struct lock_class_key on_slab_l3_key;
621static struct lock_class_key on_slab_alc_key;
622
623static void init_node_lock_keys(int q)
624{
625 struct cache_sizes *s = malloc_sizes;
626
627 if (g_cpucache_up != FULL)
628 return;
629
630 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
631 struct array_cache **alc;
632 struct kmem_list3 *l3;
633 int r;
634
635 l3 = s->cs_cachep->nodelists[q];
636 if (!l3 || OFF_SLAB(s->cs_cachep))
637 continue;
638 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
639 alc = l3->alien;
640
641
642
643
644
645
646
647 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
648 continue;
649 for_each_node(r) {
650 if (alc[r])
651 lockdep_set_class(&alc[r]->lock,
652 &on_slab_alc_key);
653 }
654 }
655}
656
657static inline void init_lock_keys(void)
658{
659 int node;
660
661 for_each_node(node)
662 init_node_lock_keys(node);
663}
664#else
665static void init_node_lock_keys(int q)
666{
667}
668
669static inline void init_lock_keys(void)
670{
671}
672#endif
673
674
675
676
677static DEFINE_MUTEX(cache_chain_mutex);
678static struct list_head cache_chain;
679
680static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
681
682static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
683{
684 return cachep->array[smp_processor_id()];
685}
686
687static inline struct kmem_cache *__find_general_cachep(size_t size,
688 gfp_t gfpflags)
689{
690 struct cache_sizes *csizep = malloc_sizes;
691
692#if DEBUG
693
694
695
696
697 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
698#endif
699 if (!size)
700 return ZERO_SIZE_PTR;
701
702 while (size > csizep->cs_size)
703 csizep++;
704
705
706
707
708
709
710#ifdef CONFIG_ZONE_DMA
711 if (unlikely(gfpflags & GFP_DMA))
712 return csizep->cs_dmacachep;
713#endif
714 return csizep->cs_cachep;
715}
716
717static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
718{
719 return __find_general_cachep(size, gfpflags);
720}
721
722static size_t slab_mgmt_size(size_t nr_objs, size_t align)
723{
724 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
725}
726
727
728
729
730static void cache_estimate(unsigned long gfporder, size_t buffer_size,
731 size_t align, int flags, size_t *left_over,
732 unsigned int *num)
733{
734 int nr_objs;
735 size_t mgmt_size;
736 size_t slab_size = PAGE_SIZE << gfporder;
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753 if (flags & CFLGS_OFF_SLAB) {
754 mgmt_size = 0;
755 nr_objs = slab_size / buffer_size;
756
757 if (nr_objs > SLAB_LIMIT)
758 nr_objs = SLAB_LIMIT;
759 } else {
760
761
762
763
764
765
766
767
768 nr_objs = (slab_size - sizeof(struct slab)) /
769 (buffer_size + sizeof(kmem_bufctl_t));
770
771
772
773
774
775 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
776 > slab_size)
777 nr_objs--;
778
779 if (nr_objs > SLAB_LIMIT)
780 nr_objs = SLAB_LIMIT;
781
782 mgmt_size = slab_mgmt_size(nr_objs, align);
783 }
784 *num = nr_objs;
785 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
786}
787
788#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
789
790static void __slab_error(const char *function, struct kmem_cache *cachep,
791 char *msg)
792{
793 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
794 function, cachep->name, msg);
795 dump_stack();
796}
797
798
799
800
801
802
803
804
805
806static int use_alien_caches __read_mostly = 1;
807static int __init noaliencache_setup(char *s)
808{
809 use_alien_caches = 0;
810 return 1;
811}
812__setup("noaliencache", noaliencache_setup);
813
814#ifdef CONFIG_NUMA
815
816
817
818
819
820
821static DEFINE_PER_CPU(unsigned long, slab_reap_node);
822
823static void init_reap_node(int cpu)
824{
825 int node;
826
827 node = next_node(cpu_to_mem(cpu), node_online_map);
828 if (node == MAX_NUMNODES)
829 node = first_node(node_online_map);
830
831 per_cpu(slab_reap_node, cpu) = node;
832}
833
834static void next_reap_node(void)
835{
836 int node = __this_cpu_read(slab_reap_node);
837
838 node = next_node(node, node_online_map);
839 if (unlikely(node >= MAX_NUMNODES))
840 node = first_node(node_online_map);
841 __this_cpu_write(slab_reap_node, node);
842}
843
844#else
845#define init_reap_node(cpu) do { } while (0)
846#define next_reap_node(void) do { } while (0)
847#endif
848
849
850
851
852
853
854
855
856static void __cpuinit start_cpu_timer(int cpu)
857{
858 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
859
860
861
862
863
864
865 if (keventd_up() && reap_work->work.func == NULL) {
866 init_reap_node(cpu);
867 INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
868 schedule_delayed_work_on(cpu, reap_work,
869 __round_jiffies_relative(HZ, cpu));
870 }
871}
872
873static struct array_cache *alloc_arraycache(int node, int entries,
874 int batchcount, gfp_t gfp)
875{
876 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
877 struct array_cache *nc = NULL;
878
879 nc = kmalloc_node(memsize, gfp, node);
880
881
882
883
884
885
886
887 kmemleak_no_scan(nc);
888 if (nc) {
889 nc->avail = 0;
890 nc->limit = entries;
891 nc->batchcount = batchcount;
892 nc->touched = 0;
893 spin_lock_init(&nc->lock);
894 }
895 return nc;
896}
897
898
899
900
901
902
903
904static int transfer_objects(struct array_cache *to,
905 struct array_cache *from, unsigned int max)
906{
907
908 int nr = min3(from->avail, max, to->limit - to->avail);
909
910 if (!nr)
911 return 0;
912
913 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
914 sizeof(void *) *nr);
915
916 from->avail -= nr;
917 to->avail += nr;
918 return nr;
919}
920
921#ifndef CONFIG_NUMA
922
923#define drain_alien_cache(cachep, alien) do { } while (0)
924#define reap_alien(cachep, l3) do { } while (0)
925
926static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
927{
928 return (struct array_cache **)BAD_ALIEN_MAGIC;
929}
930
931static inline void free_alien_cache(struct array_cache **ac_ptr)
932{
933}
934
935static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
936{
937 return 0;
938}
939
940static inline void *alternate_node_alloc(struct kmem_cache *cachep,
941 gfp_t flags)
942{
943 return NULL;
944}
945
946static inline void *____cache_alloc_node(struct kmem_cache *cachep,
947 gfp_t flags, int nodeid)
948{
949 return NULL;
950}
951
952#else
953
954static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
955static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
956
957static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
958{
959 struct array_cache **ac_ptr;
960 int memsize = sizeof(void *) * nr_node_ids;
961 int i;
962
963 if (limit > 1)
964 limit = 12;
965 ac_ptr = kzalloc_node(memsize, gfp, node);
966 if (ac_ptr) {
967 for_each_node(i) {
968 if (i == node || !node_online(i))
969 continue;
970 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
971 if (!ac_ptr[i]) {
972 for (i--; i >= 0; i--)
973 kfree(ac_ptr[i]);
974 kfree(ac_ptr);
975 return NULL;
976 }
977 }
978 }
979 return ac_ptr;
980}
981
982static void free_alien_cache(struct array_cache **ac_ptr)
983{
984 int i;
985
986 if (!ac_ptr)
987 return;
988 for_each_node(i)
989 kfree(ac_ptr[i]);
990 kfree(ac_ptr);
991}
992
993static void __drain_alien_cache(struct kmem_cache *cachep,
994 struct array_cache *ac, int node)
995{
996 struct kmem_list3 *rl3 = cachep->nodelists[node];
997
998 if (ac->avail) {
999 spin_lock(&rl3->list_lock);
1000
1001
1002
1003
1004
1005 if (rl3->shared)
1006 transfer_objects(rl3->shared, ac, ac->limit);
1007
1008 free_block(cachep, ac->entry, ac->avail, node);
1009 ac->avail = 0;
1010 spin_unlock(&rl3->list_lock);
1011 }
1012}
1013
1014
1015
1016
1017static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1018{
1019 int node = __this_cpu_read(slab_reap_node);
1020
1021 if (l3->alien) {
1022 struct array_cache *ac = l3->alien[node];
1023
1024 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1025 __drain_alien_cache(cachep, ac, node);
1026 spin_unlock_irq(&ac->lock);
1027 }
1028 }
1029}
1030
1031static void drain_alien_cache(struct kmem_cache *cachep,
1032 struct array_cache **alien)
1033{
1034 int i = 0;
1035 struct array_cache *ac;
1036 unsigned long flags;
1037
1038 for_each_online_node(i) {
1039 ac = alien[i];
1040 if (ac) {
1041 spin_lock_irqsave(&ac->lock, flags);
1042 __drain_alien_cache(cachep, ac, i);
1043 spin_unlock_irqrestore(&ac->lock, flags);
1044 }
1045 }
1046}
1047
1048static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1049{
1050 struct slab *slabp = virt_to_slab(objp);
1051 int nodeid = slabp->nodeid;
1052 struct kmem_list3 *l3;
1053 struct array_cache *alien = NULL;
1054 int node;
1055
1056 node = numa_mem_id();
1057
1058
1059
1060
1061
1062 if (likely(slabp->nodeid == node))
1063 return 0;
1064
1065 l3 = cachep->nodelists[node];
1066 STATS_INC_NODEFREES(cachep);
1067 if (l3->alien && l3->alien[nodeid]) {
1068 alien = l3->alien[nodeid];
1069 spin_lock(&alien->lock);
1070 if (unlikely(alien->avail == alien->limit)) {
1071 STATS_INC_ACOVERFLOW(cachep);
1072 __drain_alien_cache(cachep, alien, nodeid);
1073 }
1074 alien->entry[alien->avail++] = objp;
1075 spin_unlock(&alien->lock);
1076 } else {
1077 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1078 free_block(cachep, &objp, 1, nodeid);
1079 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1080 }
1081 return 1;
1082}
1083#endif
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094static int init_cache_nodelists_node(int node)
1095{
1096 struct kmem_cache *cachep;
1097 struct kmem_list3 *l3;
1098 const int memsize = sizeof(struct kmem_list3);
1099
1100 list_for_each_entry(cachep, &cache_chain, next) {
1101
1102
1103
1104
1105
1106 if (!cachep->nodelists[node]) {
1107 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1108 if (!l3)
1109 return -ENOMEM;
1110 kmem_list3_init(l3);
1111 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1112 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1113
1114
1115
1116
1117
1118
1119 cachep->nodelists[node] = l3;
1120 }
1121
1122 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1123 cachep->nodelists[node]->free_limit =
1124 (1 + nr_cpus_node(node)) *
1125 cachep->batchcount + cachep->num;
1126 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1127 }
1128 return 0;
1129}
1130
1131static void __cpuinit cpuup_canceled(long cpu)
1132{
1133 struct kmem_cache *cachep;
1134 struct kmem_list3 *l3 = NULL;
1135 int node = cpu_to_mem(cpu);
1136 const struct cpumask *mask = cpumask_of_node(node);
1137
1138 list_for_each_entry(cachep, &cache_chain, next) {
1139 struct array_cache *nc;
1140 struct array_cache *shared;
1141 struct array_cache **alien;
1142
1143
1144 nc = cachep->array[cpu];
1145 cachep->array[cpu] = NULL;
1146 l3 = cachep->nodelists[node];
1147
1148 if (!l3)
1149 goto free_array_cache;
1150
1151 spin_lock_irq(&l3->list_lock);
1152
1153
1154 l3->free_limit -= cachep->batchcount;
1155 if (nc)
1156 free_block(cachep, nc->entry, nc->avail, node);
1157
1158 if (!cpumask_empty(mask)) {
1159 spin_unlock_irq(&l3->list_lock);
1160 goto free_array_cache;
1161 }
1162
1163 shared = l3->shared;
1164 if (shared) {
1165 free_block(cachep, shared->entry,
1166 shared->avail, node);
1167 l3->shared = NULL;
1168 }
1169
1170 alien = l3->alien;
1171 l3->alien = NULL;
1172
1173 spin_unlock_irq(&l3->list_lock);
1174
1175 kfree(shared);
1176 if (alien) {
1177 drain_alien_cache(cachep, alien);
1178 free_alien_cache(alien);
1179 }
1180free_array_cache:
1181 kfree(nc);
1182 }
1183
1184
1185
1186
1187
1188 list_for_each_entry(cachep, &cache_chain, next) {
1189 l3 = cachep->nodelists[node];
1190 if (!l3)
1191 continue;
1192 drain_freelist(cachep, l3, l3->free_objects);
1193 }
1194}
1195
1196static int __cpuinit cpuup_prepare(long cpu)
1197{
1198 struct kmem_cache *cachep;
1199 struct kmem_list3 *l3 = NULL;
1200 int node = cpu_to_mem(cpu);
1201 int err;
1202
1203
1204
1205
1206
1207
1208
1209 err = init_cache_nodelists_node(node);
1210 if (err < 0)
1211 goto bad;
1212
1213
1214
1215
1216
1217 list_for_each_entry(cachep, &cache_chain, next) {
1218 struct array_cache *nc;
1219 struct array_cache *shared = NULL;
1220 struct array_cache **alien = NULL;
1221
1222 nc = alloc_arraycache(node, cachep->limit,
1223 cachep->batchcount, GFP_KERNEL);
1224 if (!nc)
1225 goto bad;
1226 if (cachep->shared) {
1227 shared = alloc_arraycache(node,
1228 cachep->shared * cachep->batchcount,
1229 0xbaadf00d, GFP_KERNEL);
1230 if (!shared) {
1231 kfree(nc);
1232 goto bad;
1233 }
1234 }
1235 if (use_alien_caches) {
1236 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1237 if (!alien) {
1238 kfree(shared);
1239 kfree(nc);
1240 goto bad;
1241 }
1242 }
1243 cachep->array[cpu] = nc;
1244 l3 = cachep->nodelists[node];
1245 BUG_ON(!l3);
1246
1247 spin_lock_irq(&l3->list_lock);
1248 if (!l3->shared) {
1249
1250
1251
1252
1253 l3->shared = shared;
1254 shared = NULL;
1255 }
1256#ifdef CONFIG_NUMA
1257 if (!l3->alien) {
1258 l3->alien = alien;
1259 alien = NULL;
1260 }
1261#endif
1262 spin_unlock_irq(&l3->list_lock);
1263 kfree(shared);
1264 free_alien_cache(alien);
1265 }
1266 init_node_lock_keys(node);
1267
1268 return 0;
1269bad:
1270 cpuup_canceled(cpu);
1271 return -ENOMEM;
1272}
1273
1274static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1275 unsigned long action, void *hcpu)
1276{
1277 long cpu = (long)hcpu;
1278 int err = 0;
1279
1280 switch (action) {
1281 case CPU_UP_PREPARE:
1282 case CPU_UP_PREPARE_FROZEN:
1283 mutex_lock(&cache_chain_mutex);
1284 err = cpuup_prepare(cpu);
1285 mutex_unlock(&cache_chain_mutex);
1286 break;
1287 case CPU_ONLINE:
1288 case CPU_ONLINE_FROZEN:
1289 start_cpu_timer(cpu);
1290 break;
1291#ifdef CONFIG_HOTPLUG_CPU
1292 case CPU_DOWN_PREPARE:
1293 case CPU_DOWN_PREPARE_FROZEN:
1294
1295
1296
1297
1298
1299
1300 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1301
1302 per_cpu(slab_reap_work, cpu).work.func = NULL;
1303 break;
1304 case CPU_DOWN_FAILED:
1305 case CPU_DOWN_FAILED_FROZEN:
1306 start_cpu_timer(cpu);
1307 break;
1308 case CPU_DEAD:
1309 case CPU_DEAD_FROZEN:
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319#endif
1320 case CPU_UP_CANCELED:
1321 case CPU_UP_CANCELED_FROZEN:
1322 mutex_lock(&cache_chain_mutex);
1323 cpuup_canceled(cpu);
1324 mutex_unlock(&cache_chain_mutex);
1325 break;
1326 }
1327 return notifier_from_errno(err);
1328}
1329
1330static struct notifier_block __cpuinitdata cpucache_notifier = {
1331 &cpuup_callback, NULL, 0
1332};
1333
1334#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1335
1336
1337
1338
1339
1340
1341
1342static int __meminit drain_cache_nodelists_node(int node)
1343{
1344 struct kmem_cache *cachep;
1345 int ret = 0;
1346
1347 list_for_each_entry(cachep, &cache_chain, next) {
1348 struct kmem_list3 *l3;
1349
1350 l3 = cachep->nodelists[node];
1351 if (!l3)
1352 continue;
1353
1354 drain_freelist(cachep, l3, l3->free_objects);
1355
1356 if (!list_empty(&l3->slabs_full) ||
1357 !list_empty(&l3->slabs_partial)) {
1358 ret = -EBUSY;
1359 break;
1360 }
1361 }
1362 return ret;
1363}
1364
1365static int __meminit slab_memory_callback(struct notifier_block *self,
1366 unsigned long action, void *arg)
1367{
1368 struct memory_notify *mnb = arg;
1369 int ret = 0;
1370 int nid;
1371
1372 nid = mnb->status_change_nid;
1373 if (nid < 0)
1374 goto out;
1375
1376 switch (action) {
1377 case MEM_GOING_ONLINE:
1378 mutex_lock(&cache_chain_mutex);
1379 ret = init_cache_nodelists_node(nid);
1380 mutex_unlock(&cache_chain_mutex);
1381 break;
1382 case MEM_GOING_OFFLINE:
1383 mutex_lock(&cache_chain_mutex);
1384 ret = drain_cache_nodelists_node(nid);
1385 mutex_unlock(&cache_chain_mutex);
1386 break;
1387 case MEM_ONLINE:
1388 case MEM_OFFLINE:
1389 case MEM_CANCEL_ONLINE:
1390 case MEM_CANCEL_OFFLINE:
1391 break;
1392 }
1393out:
1394 return notifier_from_errno(ret);
1395}
1396#endif
1397
1398
1399
1400
1401static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1402 int nodeid)
1403{
1404 struct kmem_list3 *ptr;
1405
1406 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1407 BUG_ON(!ptr);
1408
1409 memcpy(ptr, list, sizeof(struct kmem_list3));
1410
1411
1412
1413 spin_lock_init(&ptr->list_lock);
1414
1415 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1416 cachep->nodelists[nodeid] = ptr;
1417}
1418
1419
1420
1421
1422
1423static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1424{
1425 int node;
1426
1427 for_each_online_node(node) {
1428 cachep->nodelists[node] = &initkmem_list3[index + node];
1429 cachep->nodelists[node]->next_reap = jiffies +
1430 REAPTIMEOUT_LIST3 +
1431 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1432 }
1433}
1434
1435
1436
1437
1438
1439void __init kmem_cache_init(void)
1440{
1441 size_t left_over;
1442 struct cache_sizes *sizes;
1443 struct cache_names *names;
1444 int i;
1445 int order;
1446 int node;
1447
1448 if (num_possible_nodes() == 1)
1449 use_alien_caches = 0;
1450
1451 for (i = 0; i < NUM_INIT_LISTS; i++) {
1452 kmem_list3_init(&initkmem_list3[i]);
1453 if (i < MAX_NUMNODES)
1454 cache_cache.nodelists[i] = NULL;
1455 }
1456 set_up_list3s(&cache_cache, CACHE_CACHE);
1457
1458
1459
1460
1461
1462 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1463 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485 node = numa_mem_id();
1486
1487
1488 INIT_LIST_HEAD(&cache_chain);
1489 list_add(&cache_cache.next, &cache_chain);
1490 cache_cache.colour_off = cache_line_size();
1491 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1492 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1493
1494
1495
1496
1497
1498 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1499 nr_node_ids * sizeof(struct kmem_list3 *);
1500#if DEBUG
1501 cache_cache.obj_size = cache_cache.buffer_size;
1502#endif
1503 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1504 cache_line_size());
1505 cache_cache.reciprocal_buffer_size =
1506 reciprocal_value(cache_cache.buffer_size);
1507
1508 for (order = 0; order < MAX_ORDER; order++) {
1509 cache_estimate(order, cache_cache.buffer_size,
1510 cache_line_size(), 0, &left_over, &cache_cache.num);
1511 if (cache_cache.num)
1512 break;
1513 }
1514 BUG_ON(!cache_cache.num);
1515 cache_cache.gfporder = order;
1516 cache_cache.colour = left_over / cache_cache.colour_off;
1517 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1518 sizeof(struct slab), cache_line_size());
1519
1520
1521 sizes = malloc_sizes;
1522 names = cache_names;
1523
1524
1525
1526
1527
1528
1529
1530 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1531 sizes[INDEX_AC].cs_size,
1532 ARCH_KMALLOC_MINALIGN,
1533 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1534 NULL);
1535
1536 if (INDEX_AC != INDEX_L3) {
1537 sizes[INDEX_L3].cs_cachep =
1538 kmem_cache_create(names[INDEX_L3].name,
1539 sizes[INDEX_L3].cs_size,
1540 ARCH_KMALLOC_MINALIGN,
1541 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1542 NULL);
1543 }
1544
1545 slab_early_init = 0;
1546
1547 while (sizes->cs_size != ULONG_MAX) {
1548
1549
1550
1551
1552
1553
1554
1555 if (!sizes->cs_cachep) {
1556 sizes->cs_cachep = kmem_cache_create(names->name,
1557 sizes->cs_size,
1558 ARCH_KMALLOC_MINALIGN,
1559 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1560 NULL);
1561 }
1562#ifdef CONFIG_ZONE_DMA
1563 sizes->cs_dmacachep = kmem_cache_create(
1564 names->name_dma,
1565 sizes->cs_size,
1566 ARCH_KMALLOC_MINALIGN,
1567 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1568 SLAB_PANIC,
1569 NULL);
1570#endif
1571 sizes++;
1572 names++;
1573 }
1574
1575 {
1576 struct array_cache *ptr;
1577
1578 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1579
1580 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1581 memcpy(ptr, cpu_cache_get(&cache_cache),
1582 sizeof(struct arraycache_init));
1583
1584
1585
1586 spin_lock_init(&ptr->lock);
1587
1588 cache_cache.array[smp_processor_id()] = ptr;
1589
1590 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1591
1592 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1593 != &initarray_generic.cache);
1594 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1595 sizeof(struct arraycache_init));
1596
1597
1598
1599 spin_lock_init(&ptr->lock);
1600
1601 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1602 ptr;
1603 }
1604
1605 {
1606 int nid;
1607
1608 for_each_online_node(nid) {
1609 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1610
1611 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1612 &initkmem_list3[SIZE_AC + nid], nid);
1613
1614 if (INDEX_AC != INDEX_L3) {
1615 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1616 &initkmem_list3[SIZE_L3 + nid], nid);
1617 }
1618 }
1619 }
1620
1621 g_cpucache_up = EARLY;
1622}
1623
1624void __init kmem_cache_init_late(void)
1625{
1626 struct kmem_cache *cachep;
1627
1628
1629 mutex_lock(&cache_chain_mutex);
1630 list_for_each_entry(cachep, &cache_chain, next)
1631 if (enable_cpucache(cachep, GFP_NOWAIT))
1632 BUG();
1633 mutex_unlock(&cache_chain_mutex);
1634
1635
1636 g_cpucache_up = FULL;
1637
1638
1639 init_lock_keys();
1640
1641
1642
1643
1644
1645 register_cpu_notifier(&cpucache_notifier);
1646
1647#ifdef CONFIG_NUMA
1648
1649
1650
1651
1652 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1653#endif
1654
1655
1656
1657
1658
1659}
1660
1661static int __init cpucache_init(void)
1662{
1663 int cpu;
1664
1665
1666
1667
1668 for_each_online_cpu(cpu)
1669 start_cpu_timer(cpu);
1670 return 0;
1671}
1672__initcall(cpucache_init);
1673
1674
1675
1676
1677
1678
1679
1680
1681static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1682{
1683 struct page *page;
1684 int nr_pages;
1685 int i;
1686
1687#ifndef CONFIG_MMU
1688
1689
1690
1691
1692 flags |= __GFP_COMP;
1693#endif
1694
1695 flags |= cachep->gfpflags;
1696 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1697 flags |= __GFP_RECLAIMABLE;
1698
1699 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1700 if (!page)
1701 return NULL;
1702
1703 nr_pages = (1 << cachep->gfporder);
1704 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1705 add_zone_page_state(page_zone(page),
1706 NR_SLAB_RECLAIMABLE, nr_pages);
1707 else
1708 add_zone_page_state(page_zone(page),
1709 NR_SLAB_UNRECLAIMABLE, nr_pages);
1710 for (i = 0; i < nr_pages; i++)
1711 __SetPageSlab(page + i);
1712
1713 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1714 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1715
1716 if (cachep->ctor)
1717 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1718 else
1719 kmemcheck_mark_unallocated_pages(page, nr_pages);
1720 }
1721
1722 return page_address(page);
1723}
1724
1725
1726
1727
1728static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1729{
1730 unsigned long i = (1 << cachep->gfporder);
1731 struct page *page = virt_to_page(addr);
1732 const unsigned long nr_freed = i;
1733
1734 kmemcheck_free_shadow(page, cachep->gfporder);
1735
1736 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1737 sub_zone_page_state(page_zone(page),
1738 NR_SLAB_RECLAIMABLE, nr_freed);
1739 else
1740 sub_zone_page_state(page_zone(page),
1741 NR_SLAB_UNRECLAIMABLE, nr_freed);
1742 while (i--) {
1743 BUG_ON(!PageSlab(page));
1744 __ClearPageSlab(page);
1745 page++;
1746 }
1747 if (current->reclaim_state)
1748 current->reclaim_state->reclaimed_slab += nr_freed;
1749 free_pages((unsigned long)addr, cachep->gfporder);
1750}
1751
1752static void kmem_rcu_free(struct rcu_head *head)
1753{
1754 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1755 struct kmem_cache *cachep = slab_rcu->cachep;
1756
1757 kmem_freepages(cachep, slab_rcu->addr);
1758 if (OFF_SLAB(cachep))
1759 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1760}
1761
1762#if DEBUG
1763
1764#ifdef CONFIG_DEBUG_PAGEALLOC
1765static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1766 unsigned long caller)
1767{
1768 int size = obj_size(cachep);
1769
1770 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1771
1772 if (size < 5 * sizeof(unsigned long))
1773 return;
1774
1775 *addr++ = 0x12345678;
1776 *addr++ = caller;
1777 *addr++ = smp_processor_id();
1778 size -= 3 * sizeof(unsigned long);
1779 {
1780 unsigned long *sptr = &caller;
1781 unsigned long svalue;
1782
1783 while (!kstack_end(sptr)) {
1784 svalue = *sptr++;
1785 if (kernel_text_address(svalue)) {
1786 *addr++ = svalue;
1787 size -= sizeof(unsigned long);
1788 if (size <= sizeof(unsigned long))
1789 break;
1790 }
1791 }
1792
1793 }
1794 *addr++ = 0x87654321;
1795}
1796#endif
1797
1798static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1799{
1800 int size = obj_size(cachep);
1801 addr = &((char *)addr)[obj_offset(cachep)];
1802
1803 memset(addr, val, size);
1804 *(unsigned char *)(addr + size - 1) = POISON_END;
1805}
1806
1807static void dump_line(char *data, int offset, int limit)
1808{
1809 int i;
1810 unsigned char error = 0;
1811 int bad_count = 0;
1812
1813 printk(KERN_ERR "%03x:", offset);
1814 for (i = 0; i < limit; i++) {
1815 if (data[offset + i] != POISON_FREE) {
1816 error = data[offset + i];
1817 bad_count++;
1818 }
1819 printk(" %02x", (unsigned char)data[offset + i]);
1820 }
1821 printk("\n");
1822
1823 if (bad_count == 1) {
1824 error ^= POISON_FREE;
1825 if (!(error & (error - 1))) {
1826 printk(KERN_ERR "Single bit error detected. Probably "
1827 "bad RAM.\n");
1828#ifdef CONFIG_X86
1829 printk(KERN_ERR "Run memtest86+ or a similar memory "
1830 "test tool.\n");
1831#else
1832 printk(KERN_ERR "Run a memory test tool.\n");
1833#endif
1834 }
1835 }
1836}
1837#endif
1838
1839#if DEBUG
1840
1841static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1842{
1843 int i, size;
1844 char *realobj;
1845
1846 if (cachep->flags & SLAB_RED_ZONE) {
1847 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1848 *dbg_redzone1(cachep, objp),
1849 *dbg_redzone2(cachep, objp));
1850 }
1851
1852 if (cachep->flags & SLAB_STORE_USER) {
1853 printk(KERN_ERR "Last user: [<%p>]",
1854 *dbg_userword(cachep, objp));
1855 print_symbol("(%s)",
1856 (unsigned long)*dbg_userword(cachep, objp));
1857 printk("\n");
1858 }
1859 realobj = (char *)objp + obj_offset(cachep);
1860 size = obj_size(cachep);
1861 for (i = 0; i < size && lines; i += 16, lines--) {
1862 int limit;
1863 limit = 16;
1864 if (i + limit > size)
1865 limit = size - i;
1866 dump_line(realobj, i, limit);
1867 }
1868}
1869
1870static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1871{
1872 char *realobj;
1873 int size, i;
1874 int lines = 0;
1875
1876 realobj = (char *)objp + obj_offset(cachep);
1877 size = obj_size(cachep);
1878
1879 for (i = 0; i < size; i++) {
1880 char exp = POISON_FREE;
1881 if (i == size - 1)
1882 exp = POISON_END;
1883 if (realobj[i] != exp) {
1884 int limit;
1885
1886
1887 if (lines == 0) {
1888 printk(KERN_ERR
1889 "Slab corruption: %s start=%p, len=%d\n",
1890 cachep->name, realobj, size);
1891 print_objinfo(cachep, objp, 0);
1892 }
1893
1894 i = (i / 16) * 16;
1895 limit = 16;
1896 if (i + limit > size)
1897 limit = size - i;
1898 dump_line(realobj, i, limit);
1899 i += 16;
1900 lines++;
1901
1902 if (lines > 5)
1903 break;
1904 }
1905 }
1906 if (lines != 0) {
1907
1908
1909
1910 struct slab *slabp = virt_to_slab(objp);
1911 unsigned int objnr;
1912
1913 objnr = obj_to_index(cachep, slabp, objp);
1914 if (objnr) {
1915 objp = index_to_obj(cachep, slabp, objnr - 1);
1916 realobj = (char *)objp + obj_offset(cachep);
1917 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1918 realobj, size);
1919 print_objinfo(cachep, objp, 2);
1920 }
1921 if (objnr + 1 < cachep->num) {
1922 objp = index_to_obj(cachep, slabp, objnr + 1);
1923 realobj = (char *)objp + obj_offset(cachep);
1924 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1925 realobj, size);
1926 print_objinfo(cachep, objp, 2);
1927 }
1928 }
1929}
1930#endif
1931
1932#if DEBUG
1933static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1934{
1935 int i;
1936 for (i = 0; i < cachep->num; i++) {
1937 void *objp = index_to_obj(cachep, slabp, i);
1938
1939 if (cachep->flags & SLAB_POISON) {
1940#ifdef CONFIG_DEBUG_PAGEALLOC
1941 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1942 OFF_SLAB(cachep))
1943 kernel_map_pages(virt_to_page(objp),
1944 cachep->buffer_size / PAGE_SIZE, 1);
1945 else
1946 check_poison_obj(cachep, objp);
1947#else
1948 check_poison_obj(cachep, objp);
1949#endif
1950 }
1951 if (cachep->flags & SLAB_RED_ZONE) {
1952 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1953 slab_error(cachep, "start of a freed object "
1954 "was overwritten");
1955 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1956 slab_error(cachep, "end of a freed object "
1957 "was overwritten");
1958 }
1959 }
1960}
1961#else
1962static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1963{
1964}
1965#endif
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1977{
1978 void *addr = slabp->s_mem - slabp->colouroff;
1979
1980 slab_destroy_debugcheck(cachep, slabp);
1981 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1982 struct slab_rcu *slab_rcu;
1983
1984 slab_rcu = (struct slab_rcu *)slabp;
1985 slab_rcu->cachep = cachep;
1986 slab_rcu->addr = addr;
1987 call_rcu(&slab_rcu->head, kmem_rcu_free);
1988 } else {
1989 kmem_freepages(cachep, addr);
1990 if (OFF_SLAB(cachep))
1991 kmem_cache_free(cachep->slabp_cache, slabp);
1992 }
1993}
1994
1995static void __kmem_cache_destroy(struct kmem_cache *cachep)
1996{
1997 int i;
1998 struct kmem_list3 *l3;
1999
2000 for_each_online_cpu(i)
2001 kfree(cachep->array[i]);
2002
2003
2004 for_each_online_node(i) {
2005 l3 = cachep->nodelists[i];
2006 if (l3) {
2007 kfree(l3->shared);
2008 free_alien_cache(l3->alien);
2009 kfree(l3);
2010 }
2011 }
2012 kmem_cache_free(&cache_cache, cachep);
2013}
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029static size_t calculate_slab_order(struct kmem_cache *cachep,
2030 size_t size, size_t align, unsigned long flags)
2031{
2032 unsigned long offslab_limit;
2033 size_t left_over = 0;
2034 int gfporder;
2035
2036 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2037 unsigned int num;
2038 size_t remainder;
2039
2040 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2041 if (!num)
2042 continue;
2043
2044 if (flags & CFLGS_OFF_SLAB) {
2045
2046
2047
2048
2049
2050 offslab_limit = size - sizeof(struct slab);
2051 offslab_limit /= sizeof(kmem_bufctl_t);
2052
2053 if (num > offslab_limit)
2054 break;
2055 }
2056
2057
2058 cachep->num = num;
2059 cachep->gfporder = gfporder;
2060 left_over = remainder;
2061
2062
2063
2064
2065
2066
2067 if (flags & SLAB_RECLAIM_ACCOUNT)
2068 break;
2069
2070
2071
2072
2073
2074 if (gfporder >= slab_break_gfp_order)
2075 break;
2076
2077
2078
2079
2080 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2081 break;
2082 }
2083 return left_over;
2084}
2085
2086static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2087{
2088 if (g_cpucache_up == FULL)
2089 return enable_cpucache(cachep, gfp);
2090
2091 if (g_cpucache_up == NONE) {
2092
2093
2094
2095
2096
2097 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2098
2099
2100
2101
2102
2103
2104 set_up_list3s(cachep, SIZE_AC);
2105 if (INDEX_AC == INDEX_L3)
2106 g_cpucache_up = PARTIAL_L3;
2107 else
2108 g_cpucache_up = PARTIAL_AC;
2109 } else {
2110 cachep->array[smp_processor_id()] =
2111 kmalloc(sizeof(struct arraycache_init), gfp);
2112
2113 if (g_cpucache_up == PARTIAL_AC) {
2114 set_up_list3s(cachep, SIZE_L3);
2115 g_cpucache_up = PARTIAL_L3;
2116 } else {
2117 int node;
2118 for_each_online_node(node) {
2119 cachep->nodelists[node] =
2120 kmalloc_node(sizeof(struct kmem_list3),
2121 gfp, node);
2122 BUG_ON(!cachep->nodelists[node]);
2123 kmem_list3_init(cachep->nodelists[node]);
2124 }
2125 }
2126 }
2127 cachep->nodelists[numa_mem_id()]->next_reap =
2128 jiffies + REAPTIMEOUT_LIST3 +
2129 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2130
2131 cpu_cache_get(cachep)->avail = 0;
2132 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2133 cpu_cache_get(cachep)->batchcount = 1;
2134 cpu_cache_get(cachep)->touched = 0;
2135 cachep->batchcount = 1;
2136 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2137 return 0;
2138}
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167struct kmem_cache *
2168kmem_cache_create (const char *name, size_t size, size_t align,
2169 unsigned long flags, void (*ctor)(void *))
2170{
2171 size_t left_over, slab_size, ralign;
2172 struct kmem_cache *cachep = NULL, *pc;
2173 gfp_t gfp;
2174
2175
2176
2177
2178 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2179 size > KMALLOC_MAX_SIZE) {
2180 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2181 name);
2182 BUG();
2183 }
2184
2185
2186
2187
2188
2189 if (slab_is_available()) {
2190 get_online_cpus();
2191 mutex_lock(&cache_chain_mutex);
2192 }
2193
2194 list_for_each_entry(pc, &cache_chain, next) {
2195 char tmp;
2196 int res;
2197
2198
2199
2200
2201
2202
2203 res = probe_kernel_address(pc->name, tmp);
2204 if (res) {
2205 printk(KERN_ERR
2206 "SLAB: cache with size %d has lost its name\n",
2207 pc->buffer_size);
2208 continue;
2209 }
2210
2211 if (!strcmp(pc->name, name)) {
2212 printk(KERN_ERR
2213 "kmem_cache_create: duplicate cache %s\n", name);
2214 dump_stack();
2215 goto oops;
2216 }
2217 }
2218
2219#if DEBUG
2220 WARN_ON(strchr(name, ' '));
2221#if FORCED_DEBUG
2222
2223
2224
2225
2226
2227
2228 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2229 2 * sizeof(unsigned long long)))
2230 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2231 if (!(flags & SLAB_DESTROY_BY_RCU))
2232 flags |= SLAB_POISON;
2233#endif
2234 if (flags & SLAB_DESTROY_BY_RCU)
2235 BUG_ON(flags & SLAB_POISON);
2236#endif
2237
2238
2239
2240
2241 BUG_ON(flags & ~CREATE_MASK);
2242
2243
2244
2245
2246
2247
2248 if (size & (BYTES_PER_WORD - 1)) {
2249 size += (BYTES_PER_WORD - 1);
2250 size &= ~(BYTES_PER_WORD - 1);
2251 }
2252
2253
2254
2255
2256 if (flags & SLAB_HWCACHE_ALIGN) {
2257
2258
2259
2260
2261
2262 ralign = cache_line_size();
2263 while (size <= ralign / 2)
2264 ralign /= 2;
2265 } else {
2266 ralign = BYTES_PER_WORD;
2267 }
2268
2269
2270
2271
2272
2273
2274 if (flags & SLAB_STORE_USER)
2275 ralign = BYTES_PER_WORD;
2276
2277 if (flags & SLAB_RED_ZONE) {
2278 ralign = REDZONE_ALIGN;
2279
2280
2281 size += REDZONE_ALIGN - 1;
2282 size &= ~(REDZONE_ALIGN - 1);
2283 }
2284
2285
2286 if (ralign < ARCH_SLAB_MINALIGN) {
2287 ralign = ARCH_SLAB_MINALIGN;
2288 }
2289
2290 if (ralign < align) {
2291 ralign = align;
2292 }
2293
2294 if (ralign > __alignof__(unsigned long long))
2295 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2296
2297
2298
2299 align = ralign;
2300
2301 if (slab_is_available())
2302 gfp = GFP_KERNEL;
2303 else
2304 gfp = GFP_NOWAIT;
2305
2306
2307 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2308 if (!cachep)
2309 goto oops;
2310
2311#if DEBUG
2312 cachep->obj_size = size;
2313
2314
2315
2316
2317
2318 if (flags & SLAB_RED_ZONE) {
2319
2320 cachep->obj_offset += sizeof(unsigned long long);
2321 size += 2 * sizeof(unsigned long long);
2322 }
2323 if (flags & SLAB_STORE_USER) {
2324
2325
2326
2327
2328 if (flags & SLAB_RED_ZONE)
2329 size += REDZONE_ALIGN;
2330 else
2331 size += BYTES_PER_WORD;
2332 }
2333#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2334 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2335 && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2336 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2337 size = PAGE_SIZE;
2338 }
2339#endif
2340#endif
2341
2342
2343
2344
2345
2346
2347
2348 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2349 !(flags & SLAB_NOLEAKTRACE))
2350
2351
2352
2353
2354 flags |= CFLGS_OFF_SLAB;
2355
2356 size = ALIGN(size, align);
2357
2358 left_over = calculate_slab_order(cachep, size, align, flags);
2359
2360 if (!cachep->num) {
2361 printk(KERN_ERR
2362 "kmem_cache_create: couldn't create cache %s.\n", name);
2363 kmem_cache_free(&cache_cache, cachep);
2364 cachep = NULL;
2365 goto oops;
2366 }
2367 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2368 + sizeof(struct slab), align);
2369
2370
2371
2372
2373
2374 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2375 flags &= ~CFLGS_OFF_SLAB;
2376 left_over -= slab_size;
2377 }
2378
2379 if (flags & CFLGS_OFF_SLAB) {
2380
2381 slab_size =
2382 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2383
2384#ifdef CONFIG_PAGE_POISONING
2385
2386
2387
2388
2389 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2390 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2391#endif
2392 }
2393
2394 cachep->colour_off = cache_line_size();
2395
2396 if (cachep->colour_off < align)
2397 cachep->colour_off = align;
2398 cachep->colour = left_over / cachep->colour_off;
2399 cachep->slab_size = slab_size;
2400 cachep->flags = flags;
2401 cachep->gfpflags = 0;
2402 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2403 cachep->gfpflags |= GFP_DMA;
2404 cachep->buffer_size = size;
2405 cachep->reciprocal_buffer_size = reciprocal_value(size);
2406
2407 if (flags & CFLGS_OFF_SLAB) {
2408 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2409
2410
2411
2412
2413
2414
2415
2416 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2417 }
2418 cachep->ctor = ctor;
2419 cachep->name = name;
2420
2421 if (setup_cpu_cache(cachep, gfp)) {
2422 __kmem_cache_destroy(cachep);
2423 cachep = NULL;
2424 goto oops;
2425 }
2426
2427
2428 list_add(&cachep->next, &cache_chain);
2429oops:
2430 if (!cachep && (flags & SLAB_PANIC))
2431 panic("kmem_cache_create(): failed to create slab `%s'\n",
2432 name);
2433 if (slab_is_available()) {
2434 mutex_unlock(&cache_chain_mutex);
2435 put_online_cpus();
2436 }
2437 return cachep;
2438}
2439EXPORT_SYMBOL(kmem_cache_create);
2440
2441#if DEBUG
2442static void check_irq_off(void)
2443{
2444 BUG_ON(!irqs_disabled());
2445}
2446
2447static void check_irq_on(void)
2448{
2449 BUG_ON(irqs_disabled());
2450}
2451
2452static void check_spinlock_acquired(struct kmem_cache *cachep)
2453{
2454#ifdef CONFIG_SMP
2455 check_irq_off();
2456 assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
2457#endif
2458}
2459
2460static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2461{
2462#ifdef CONFIG_SMP
2463 check_irq_off();
2464 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2465#endif
2466}
2467
2468#else
2469#define check_irq_off() do { } while(0)
2470#define check_irq_on() do { } while(0)
2471#define check_spinlock_acquired(x) do { } while(0)
2472#define check_spinlock_acquired_node(x, y) do { } while(0)
2473#endif
2474
2475static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2476 struct array_cache *ac,
2477 int force, int node);
2478
2479static void do_drain(void *arg)
2480{
2481 struct kmem_cache *cachep = arg;
2482 struct array_cache *ac;
2483 int node = numa_mem_id();
2484
2485 check_irq_off();
2486 ac = cpu_cache_get(cachep);
2487 spin_lock(&cachep->nodelists[node]->list_lock);
2488 free_block(cachep, ac->entry, ac->avail, node);
2489 spin_unlock(&cachep->nodelists[node]->list_lock);
2490 ac->avail = 0;
2491}
2492
2493static void drain_cpu_caches(struct kmem_cache *cachep)
2494{
2495 struct kmem_list3 *l3;
2496 int node;
2497
2498 on_each_cpu(do_drain, cachep, 1);
2499 check_irq_on();
2500 for_each_online_node(node) {
2501 l3 = cachep->nodelists[node];
2502 if (l3 && l3->alien)
2503 drain_alien_cache(cachep, l3->alien);
2504 }
2505
2506 for_each_online_node(node) {
2507 l3 = cachep->nodelists[node];
2508 if (l3)
2509 drain_array(cachep, l3, l3->shared, 1, node);
2510 }
2511}
2512
2513
2514
2515
2516
2517
2518
2519static int drain_freelist(struct kmem_cache *cache,
2520 struct kmem_list3 *l3, int tofree)
2521{
2522 struct list_head *p;
2523 int nr_freed;
2524 struct slab *slabp;
2525
2526 nr_freed = 0;
2527 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2528
2529 spin_lock_irq(&l3->list_lock);
2530 p = l3->slabs_free.prev;
2531 if (p == &l3->slabs_free) {
2532 spin_unlock_irq(&l3->list_lock);
2533 goto out;
2534 }
2535
2536 slabp = list_entry(p, struct slab, list);
2537#if DEBUG
2538 BUG_ON(slabp->inuse);
2539#endif
2540 list_del(&slabp->list);
2541
2542
2543
2544
2545 l3->free_objects -= cache->num;
2546 spin_unlock_irq(&l3->list_lock);
2547 slab_destroy(cache, slabp);
2548 nr_freed++;
2549 }
2550out:
2551 return nr_freed;
2552}
2553
2554
2555static int __cache_shrink(struct kmem_cache *cachep)
2556{
2557 int ret = 0, i = 0;
2558 struct kmem_list3 *l3;
2559
2560 drain_cpu_caches(cachep);
2561
2562 check_irq_on();
2563 for_each_online_node(i) {
2564 l3 = cachep->nodelists[i];
2565 if (!l3)
2566 continue;
2567
2568 drain_freelist(cachep, l3, l3->free_objects);
2569
2570 ret += !list_empty(&l3->slabs_full) ||
2571 !list_empty(&l3->slabs_partial);
2572 }
2573 return (ret ? 1 : 0);
2574}
2575
2576
2577
2578
2579
2580
2581
2582
2583int kmem_cache_shrink(struct kmem_cache *cachep)
2584{
2585 int ret;
2586 BUG_ON(!cachep || in_interrupt());
2587
2588 get_online_cpus();
2589 mutex_lock(&cache_chain_mutex);
2590 ret = __cache_shrink(cachep);
2591 mutex_unlock(&cache_chain_mutex);
2592 put_online_cpus();
2593 return ret;
2594}
2595EXPORT_SYMBOL(kmem_cache_shrink);
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613void kmem_cache_destroy(struct kmem_cache *cachep)
2614{
2615 BUG_ON(!cachep || in_interrupt());
2616
2617
2618 get_online_cpus();
2619 mutex_lock(&cache_chain_mutex);
2620
2621
2622
2623 list_del(&cachep->next);
2624 if (__cache_shrink(cachep)) {
2625 slab_error(cachep, "Can't free all objects");
2626 list_add(&cachep->next, &cache_chain);
2627 mutex_unlock(&cache_chain_mutex);
2628 put_online_cpus();
2629 return;
2630 }
2631
2632 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2633 rcu_barrier();
2634
2635 __kmem_cache_destroy(cachep);
2636 mutex_unlock(&cache_chain_mutex);
2637 put_online_cpus();
2638}
2639EXPORT_SYMBOL(kmem_cache_destroy);
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2653 int colour_off, gfp_t local_flags,
2654 int nodeid)
2655{
2656 struct slab *slabp;
2657
2658 if (OFF_SLAB(cachep)) {
2659
2660 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2661 local_flags, nodeid);
2662
2663
2664
2665
2666
2667
2668 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2669 local_flags);
2670 if (!slabp)
2671 return NULL;
2672 } else {
2673 slabp = objp + colour_off;
2674 colour_off += cachep->slab_size;
2675 }
2676 slabp->inuse = 0;
2677 slabp->colouroff = colour_off;
2678 slabp->s_mem = objp + colour_off;
2679 slabp->nodeid = nodeid;
2680 slabp->free = 0;
2681 return slabp;
2682}
2683
2684static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2685{
2686 return (kmem_bufctl_t *) (slabp + 1);
2687}
2688
2689static void cache_init_objs(struct kmem_cache *cachep,
2690 struct slab *slabp)
2691{
2692 int i;
2693
2694 for (i = 0; i < cachep->num; i++) {
2695 void *objp = index_to_obj(cachep, slabp, i);
2696#if DEBUG
2697
2698 if (cachep->flags & SLAB_POISON)
2699 poison_obj(cachep, objp, POISON_FREE);
2700 if (cachep->flags & SLAB_STORE_USER)
2701 *dbg_userword(cachep, objp) = NULL;
2702
2703 if (cachep->flags & SLAB_RED_ZONE) {
2704 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2705 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2706 }
2707
2708
2709
2710
2711
2712 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2713 cachep->ctor(objp + obj_offset(cachep));
2714
2715 if (cachep->flags & SLAB_RED_ZONE) {
2716 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2717 slab_error(cachep, "constructor overwrote the"
2718 " end of an object");
2719 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2720 slab_error(cachep, "constructor overwrote the"
2721 " start of an object");
2722 }
2723 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2724 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2725 kernel_map_pages(virt_to_page(objp),
2726 cachep->buffer_size / PAGE_SIZE, 0);
2727#else
2728 if (cachep->ctor)
2729 cachep->ctor(objp);
2730#endif
2731 slab_bufctl(slabp)[i] = i + 1;
2732 }
2733 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2734}
2735
2736static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2737{
2738 if (CONFIG_ZONE_DMA_FLAG) {
2739 if (flags & GFP_DMA)
2740 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2741 else
2742 BUG_ON(cachep->gfpflags & GFP_DMA);
2743 }
2744}
2745
2746static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2747 int nodeid)
2748{
2749 void *objp = index_to_obj(cachep, slabp, slabp->free);
2750 kmem_bufctl_t next;
2751
2752 slabp->inuse++;
2753 next = slab_bufctl(slabp)[slabp->free];
2754#if DEBUG
2755 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2756 WARN_ON(slabp->nodeid != nodeid);
2757#endif
2758 slabp->free = next;
2759
2760 return objp;
2761}
2762
2763static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2764 void *objp, int nodeid)
2765{
2766 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2767
2768#if DEBUG
2769
2770 WARN_ON(slabp->nodeid != nodeid);
2771
2772 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2773 printk(KERN_ERR "slab: double free detected in cache "
2774 "'%s', objp %p\n", cachep->name, objp);
2775 BUG();
2776 }
2777#endif
2778 slab_bufctl(slabp)[objnr] = slabp->free;
2779 slabp->free = objnr;
2780 slabp->inuse--;
2781}
2782
2783
2784
2785
2786
2787
2788static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2789 void *addr)
2790{
2791 int nr_pages;
2792 struct page *page;
2793
2794 page = virt_to_page(addr);
2795
2796 nr_pages = 1;
2797 if (likely(!PageCompound(page)))
2798 nr_pages <<= cache->gfporder;
2799
2800 do {
2801 page_set_cache(page, cache);
2802 page_set_slab(page, slab);
2803 page++;
2804 } while (--nr_pages);
2805}
2806
2807
2808
2809
2810
2811static int cache_grow(struct kmem_cache *cachep,
2812 gfp_t flags, int nodeid, void *objp)
2813{
2814 struct slab *slabp;
2815 size_t offset;
2816 gfp_t local_flags;
2817 struct kmem_list3 *l3;
2818
2819
2820
2821
2822
2823 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2824 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2825
2826
2827 check_irq_off();
2828 l3 = cachep->nodelists[nodeid];
2829 spin_lock(&l3->list_lock);
2830
2831
2832 offset = l3->colour_next;
2833 l3->colour_next++;
2834 if (l3->colour_next >= cachep->colour)
2835 l3->colour_next = 0;
2836 spin_unlock(&l3->list_lock);
2837
2838 offset *= cachep->colour_off;
2839
2840 if (local_flags & __GFP_WAIT)
2841 local_irq_enable();
2842
2843
2844
2845
2846
2847
2848
2849 kmem_flagcheck(cachep, flags);
2850
2851
2852
2853
2854
2855 if (!objp)
2856 objp = kmem_getpages(cachep, local_flags, nodeid);
2857 if (!objp)
2858 goto failed;
2859
2860
2861 slabp = alloc_slabmgmt(cachep, objp, offset,
2862 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2863 if (!slabp)
2864 goto opps1;
2865
2866 slab_map_pages(cachep, slabp, objp);
2867
2868 cache_init_objs(cachep, slabp);
2869
2870 if (local_flags & __GFP_WAIT)
2871 local_irq_disable();
2872 check_irq_off();
2873 spin_lock(&l3->list_lock);
2874
2875
2876 list_add_tail(&slabp->list, &(l3->slabs_free));
2877 STATS_INC_GROWN(cachep);
2878 l3->free_objects += cachep->num;
2879 spin_unlock(&l3->list_lock);
2880 return 1;
2881opps1:
2882 kmem_freepages(cachep, objp);
2883failed:
2884 if (local_flags & __GFP_WAIT)
2885 local_irq_disable();
2886 return 0;
2887}
2888
2889#if DEBUG
2890
2891
2892
2893
2894
2895
2896static void kfree_debugcheck(const void *objp)
2897{
2898 if (!virt_addr_valid(objp)) {
2899 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2900 (unsigned long)objp);
2901 BUG();
2902 }
2903}
2904
2905static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2906{
2907 unsigned long long redzone1, redzone2;
2908
2909 redzone1 = *dbg_redzone1(cache, obj);
2910 redzone2 = *dbg_redzone2(cache, obj);
2911
2912
2913
2914
2915 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2916 return;
2917
2918 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2919 slab_error(cache, "double free detected");
2920 else
2921 slab_error(cache, "memory outside object was overwritten");
2922
2923 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2924 obj, redzone1, redzone2);
2925}
2926
2927static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2928 void *caller)
2929{
2930 struct page *page;
2931 unsigned int objnr;
2932 struct slab *slabp;
2933
2934 BUG_ON(virt_to_cache(objp) != cachep);
2935
2936 objp -= obj_offset(cachep);
2937 kfree_debugcheck(objp);
2938 page = virt_to_head_page(objp);
2939
2940 slabp = page_get_slab(page);
2941
2942 if (cachep->flags & SLAB_RED_ZONE) {
2943 verify_redzone_free(cachep, objp);
2944 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2945 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2946 }
2947 if (cachep->flags & SLAB_STORE_USER)
2948 *dbg_userword(cachep, objp) = caller;
2949
2950 objnr = obj_to_index(cachep, slabp, objp);
2951
2952 BUG_ON(objnr >= cachep->num);
2953 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2954
2955#ifdef CONFIG_DEBUG_SLAB_LEAK
2956 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2957#endif
2958 if (cachep->flags & SLAB_POISON) {
2959#ifdef CONFIG_DEBUG_PAGEALLOC
2960 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2961 store_stackinfo(cachep, objp, (unsigned long)caller);
2962 kernel_map_pages(virt_to_page(objp),
2963 cachep->buffer_size / PAGE_SIZE, 0);
2964 } else {
2965 poison_obj(cachep, objp, POISON_FREE);
2966 }
2967#else
2968 poison_obj(cachep, objp, POISON_FREE);
2969#endif
2970 }
2971 return objp;
2972}
2973
2974static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2975{
2976 kmem_bufctl_t i;
2977 int entries = 0;
2978
2979
2980 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2981 entries++;
2982 if (entries > cachep->num || i >= cachep->num)
2983 goto bad;
2984 }
2985 if (entries != cachep->num - slabp->inuse) {
2986bad:
2987 printk(KERN_ERR "slab: Internal list corruption detected in "
2988 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2989 cachep->name, cachep->num, slabp, slabp->inuse);
2990 for (i = 0;
2991 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2992 i++) {
2993 if (i % 16 == 0)
2994 printk("\n%03x:", i);
2995 printk(" %02x", ((unsigned char *)slabp)[i]);
2996 }
2997 printk("\n");
2998 BUG();
2999 }
3000}
3001#else
3002#define kfree_debugcheck(x) do { } while(0)
3003#define cache_free_debugcheck(x,objp,z) (objp)
3004#define check_slabp(x,y) do { } while(0)
3005#endif
3006
3007static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
3008{
3009 int batchcount;
3010 struct kmem_list3 *l3;
3011 struct array_cache *ac;
3012 int node;
3013
3014retry:
3015 check_irq_off();
3016 node = numa_mem_id();
3017 ac = cpu_cache_get(cachep);
3018 batchcount = ac->batchcount;
3019 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
3020
3021
3022
3023
3024
3025 batchcount = BATCHREFILL_LIMIT;
3026 }
3027 l3 = cachep->nodelists[node];
3028
3029 BUG_ON(ac->avail > 0 || !l3);
3030 spin_lock(&l3->list_lock);
3031
3032
3033 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
3034 l3->shared->touched = 1;
3035 goto alloc_done;
3036 }
3037
3038 while (batchcount > 0) {
3039 struct list_head *entry;
3040 struct slab *slabp;
3041
3042 entry = l3->slabs_partial.next;
3043 if (entry == &l3->slabs_partial) {
3044 l3->free_touched = 1;
3045 entry = l3->slabs_free.next;
3046 if (entry == &l3->slabs_free)
3047 goto must_grow;
3048 }
3049
3050 slabp = list_entry(entry, struct slab, list);
3051 check_slabp(cachep, slabp);
3052 check_spinlock_acquired(cachep);
3053
3054
3055
3056
3057
3058
3059 BUG_ON(slabp->inuse >= cachep->num);
3060
3061 while (slabp->inuse < cachep->num && batchcount--) {
3062 STATS_INC_ALLOCED(cachep);
3063 STATS_INC_ACTIVE(cachep);
3064 STATS_SET_HIGH(cachep);
3065
3066 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3067 node);
3068 }
3069 check_slabp(cachep, slabp);
3070
3071
3072 list_del(&slabp->list);
3073 if (slabp->free == BUFCTL_END)
3074 list_add(&slabp->list, &l3->slabs_full);
3075 else
3076 list_add(&slabp->list, &l3->slabs_partial);
3077 }
3078
3079must_grow:
3080 l3->free_objects -= ac->avail;
3081alloc_done:
3082 spin_unlock(&l3->list_lock);
3083
3084 if (unlikely(!ac->avail)) {
3085 int x;
3086 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3087
3088
3089 ac = cpu_cache_get(cachep);
3090 if (!x && ac->avail == 0)
3091 return NULL;
3092
3093 if (!ac->avail)
3094 goto retry;
3095 }
3096 ac->touched = 1;
3097 return ac->entry[--ac->avail];
3098}
3099
3100static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3101 gfp_t flags)
3102{
3103 might_sleep_if(flags & __GFP_WAIT);
3104#if DEBUG
3105 kmem_flagcheck(cachep, flags);
3106#endif
3107}
3108
3109#if DEBUG
3110static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3111 gfp_t flags, void *objp, void *caller)
3112{
3113 if (!objp)
3114 return objp;
3115 if (cachep->flags & SLAB_POISON) {
3116#ifdef CONFIG_DEBUG_PAGEALLOC
3117 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3118 kernel_map_pages(virt_to_page(objp),
3119 cachep->buffer_size / PAGE_SIZE, 1);
3120 else
3121 check_poison_obj(cachep, objp);
3122#else
3123 check_poison_obj(cachep, objp);
3124#endif
3125 poison_obj(cachep, objp, POISON_INUSE);
3126 }
3127 if (cachep->flags & SLAB_STORE_USER)
3128 *dbg_userword(cachep, objp) = caller;
3129
3130 if (cachep->flags & SLAB_RED_ZONE) {
3131 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3132 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3133 slab_error(cachep, "double free, or memory outside"
3134 " object was overwritten");
3135 printk(KERN_ERR
3136 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3137 objp, *dbg_redzone1(cachep, objp),
3138 *dbg_redzone2(cachep, objp));
3139 }
3140 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3141 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3142 }
3143#ifdef CONFIG_DEBUG_SLAB_LEAK
3144 {
3145 struct slab *slabp;
3146 unsigned objnr;
3147
3148 slabp = page_get_slab(virt_to_head_page(objp));
3149 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3150 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3151 }
3152#endif
3153 objp += obj_offset(cachep);
3154 if (cachep->ctor && cachep->flags & SLAB_POISON)
3155 cachep->ctor(objp);
3156#if ARCH_SLAB_MINALIGN
3157 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3158 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3159 objp, ARCH_SLAB_MINALIGN);
3160 }
3161#endif
3162 return objp;
3163}
3164#else
3165#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3166#endif
3167
3168static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3169{
3170 if (cachep == &cache_cache)
3171 return false;
3172
3173 return should_failslab(obj_size(cachep), flags, cachep->flags);
3174}
3175
3176static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3177{
3178 void *objp;
3179 struct array_cache *ac;
3180
3181 check_irq_off();
3182
3183 ac = cpu_cache_get(cachep);
3184 if (likely(ac->avail)) {
3185 STATS_INC_ALLOCHIT(cachep);
3186 ac->touched = 1;
3187 objp = ac->entry[--ac->avail];
3188 } else {
3189 STATS_INC_ALLOCMISS(cachep);
3190 objp = cache_alloc_refill(cachep, flags);
3191
3192
3193
3194
3195 ac = cpu_cache_get(cachep);
3196 }
3197
3198
3199
3200
3201
3202 if (objp)
3203 kmemleak_erase(&ac->entry[ac->avail]);
3204 return objp;
3205}
3206
3207#ifdef CONFIG_NUMA
3208
3209
3210
3211
3212
3213
3214static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3215{
3216 int nid_alloc, nid_here;
3217
3218 if (in_interrupt() || (flags & __GFP_THISNODE))
3219 return NULL;
3220 nid_alloc = nid_here = numa_mem_id();
3221 get_mems_allowed();
3222 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3223 nid_alloc = cpuset_slab_spread_node();
3224 else if (current->mempolicy)
3225 nid_alloc = slab_node(current->mempolicy);
3226 put_mems_allowed();
3227 if (nid_alloc != nid_here)
3228 return ____cache_alloc_node(cachep, flags, nid_alloc);
3229 return NULL;
3230}
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3241{
3242 struct zonelist *zonelist;
3243 gfp_t local_flags;
3244 struct zoneref *z;
3245 struct zone *zone;
3246 enum zone_type high_zoneidx = gfp_zone(flags);
3247 void *obj = NULL;
3248 int nid;
3249
3250 if (flags & __GFP_THISNODE)
3251 return NULL;
3252
3253 get_mems_allowed();
3254 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3255 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3256
3257retry:
3258
3259
3260
3261
3262 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3263 nid = zone_to_nid(zone);
3264
3265 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3266 cache->nodelists[nid] &&
3267 cache->nodelists[nid]->free_objects) {
3268 obj = ____cache_alloc_node(cache,
3269 flags | GFP_THISNODE, nid);
3270 if (obj)
3271 break;
3272 }
3273 }
3274
3275 if (!obj) {
3276
3277
3278
3279
3280
3281
3282 if (local_flags & __GFP_WAIT)
3283 local_irq_enable();
3284 kmem_flagcheck(cache, flags);
3285 obj = kmem_getpages(cache, local_flags, numa_mem_id());
3286 if (local_flags & __GFP_WAIT)
3287 local_irq_disable();
3288 if (obj) {
3289
3290
3291
3292 nid = page_to_nid(virt_to_page(obj));
3293 if (cache_grow(cache, flags, nid, obj)) {
3294 obj = ____cache_alloc_node(cache,
3295 flags | GFP_THISNODE, nid);
3296 if (!obj)
3297
3298
3299
3300
3301
3302 goto retry;
3303 } else {
3304
3305 obj = NULL;
3306 }
3307 }
3308 }
3309 put_mems_allowed();
3310 return obj;
3311}
3312
3313
3314
3315
3316static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3317 int nodeid)
3318{
3319 struct list_head *entry;
3320 struct slab *slabp;
3321 struct kmem_list3 *l3;
3322 void *obj;
3323 int x;
3324
3325 l3 = cachep->nodelists[nodeid];
3326 BUG_ON(!l3);
3327
3328retry:
3329 check_irq_off();
3330 spin_lock(&l3->list_lock);
3331 entry = l3->slabs_partial.next;
3332 if (entry == &l3->slabs_partial) {
3333 l3->free_touched = 1;
3334 entry = l3->slabs_free.next;
3335 if (entry == &l3->slabs_free)
3336 goto must_grow;
3337 }
3338
3339 slabp = list_entry(entry, struct slab, list);
3340 check_spinlock_acquired_node(cachep, nodeid);
3341 check_slabp(cachep, slabp);
3342
3343 STATS_INC_NODEALLOCS(cachep);
3344 STATS_INC_ACTIVE(cachep);
3345 STATS_SET_HIGH(cachep);
3346
3347 BUG_ON(slabp->inuse == cachep->num);
3348
3349 obj = slab_get_obj(cachep, slabp, nodeid);
3350 check_slabp(cachep, slabp);
3351 l3->free_objects--;
3352
3353 list_del(&slabp->list);
3354
3355 if (slabp->free == BUFCTL_END)
3356 list_add(&slabp->list, &l3->slabs_full);
3357 else
3358 list_add(&slabp->list, &l3->slabs_partial);
3359
3360 spin_unlock(&l3->list_lock);
3361 goto done;
3362
3363must_grow:
3364 spin_unlock(&l3->list_lock);
3365 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3366 if (x)
3367 goto retry;
3368
3369 return fallback_alloc(cachep, flags);
3370
3371done:
3372 return obj;
3373}
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387static __always_inline void *
3388__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3389 void *caller)
3390{
3391 unsigned long save_flags;
3392 void *ptr;
3393 int slab_node = numa_mem_id();
3394
3395 flags &= gfp_allowed_mask;
3396
3397 lockdep_trace_alloc(flags);
3398
3399 if (slab_should_failslab(cachep, flags))
3400 return NULL;
3401
3402 cache_alloc_debugcheck_before(cachep, flags);
3403 local_irq_save(save_flags);
3404
3405 if (nodeid == -1)
3406 nodeid = slab_node;
3407
3408 if (unlikely(!cachep->nodelists[nodeid])) {
3409
3410 ptr = fallback_alloc(cachep, flags);
3411 goto out;
3412 }
3413
3414 if (nodeid == slab_node) {
3415
3416
3417
3418
3419
3420
3421 ptr = ____cache_alloc(cachep, flags);
3422 if (ptr)
3423 goto out;
3424 }
3425
3426 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3427 out:
3428 local_irq_restore(save_flags);
3429 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3430 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3431 flags);
3432
3433 if (likely(ptr))
3434 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3435
3436 if (unlikely((flags & __GFP_ZERO) && ptr))
3437 memset(ptr, 0, obj_size(cachep));
3438
3439 return ptr;
3440}
3441
3442static __always_inline void *
3443__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3444{
3445 void *objp;
3446
3447 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3448 objp = alternate_node_alloc(cache, flags);
3449 if (objp)
3450 goto out;
3451 }
3452 objp = ____cache_alloc(cache, flags);
3453
3454
3455
3456
3457
3458 if (!objp)
3459 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3460
3461 out:
3462 return objp;
3463}
3464#else
3465
3466static __always_inline void *
3467__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3468{
3469 return ____cache_alloc(cachep, flags);
3470}
3471
3472#endif
3473
3474static __always_inline void *
3475__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3476{
3477 unsigned long save_flags;
3478 void *objp;
3479
3480 flags &= gfp_allowed_mask;
3481
3482 lockdep_trace_alloc(flags);
3483
3484 if (slab_should_failslab(cachep, flags))
3485 return NULL;
3486
3487 cache_alloc_debugcheck_before(cachep, flags);
3488 local_irq_save(save_flags);
3489 objp = __do_cache_alloc(cachep, flags);
3490 local_irq_restore(save_flags);
3491 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3492 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3493 flags);
3494 prefetchw(objp);
3495
3496 if (likely(objp))
3497 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3498
3499 if (unlikely((flags & __GFP_ZERO) && objp))
3500 memset(objp, 0, obj_size(cachep));
3501
3502 return objp;
3503}
3504
3505
3506
3507
3508static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3509 int node)
3510{
3511 int i;
3512 struct kmem_list3 *l3;
3513
3514 for (i = 0; i < nr_objects; i++) {
3515 void *objp = objpp[i];
3516 struct slab *slabp;
3517
3518 slabp = virt_to_slab(objp);
3519 l3 = cachep->nodelists[node];
3520 list_del(&slabp->list);
3521 check_spinlock_acquired_node(cachep, node);
3522 check_slabp(cachep, slabp);
3523 slab_put_obj(cachep, slabp, objp, node);
3524 STATS_DEC_ACTIVE(cachep);
3525 l3->free_objects++;
3526 check_slabp(cachep, slabp);
3527
3528
3529 if (slabp->inuse == 0) {
3530 if (l3->free_objects > l3->free_limit) {
3531 l3->free_objects -= cachep->num;
3532
3533
3534
3535
3536
3537
3538 slab_destroy(cachep, slabp);
3539 } else {
3540 list_add(&slabp->list, &l3->slabs_free);
3541 }
3542 } else {
3543
3544
3545
3546
3547 list_add_tail(&slabp->list, &l3->slabs_partial);
3548 }
3549 }
3550}
3551
3552static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3553{
3554 int batchcount;
3555 struct kmem_list3 *l3;
3556 int node = numa_mem_id();
3557
3558 batchcount = ac->batchcount;
3559#if DEBUG
3560 BUG_ON(!batchcount || batchcount > ac->avail);
3561#endif
3562 check_irq_off();
3563 l3 = cachep->nodelists[node];
3564 spin_lock(&l3->list_lock);
3565 if (l3->shared) {
3566 struct array_cache *shared_array = l3->shared;
3567 int max = shared_array->limit - shared_array->avail;
3568 if (max) {
3569 if (batchcount > max)
3570 batchcount = max;
3571 memcpy(&(shared_array->entry[shared_array->avail]),
3572 ac->entry, sizeof(void *) * batchcount);
3573 shared_array->avail += batchcount;
3574 goto free_done;
3575 }
3576 }
3577
3578 free_block(cachep, ac->entry, batchcount, node);
3579free_done:
3580#if STATS
3581 {
3582 int i = 0;
3583 struct list_head *p;
3584
3585 p = l3->slabs_free.next;
3586 while (p != &(l3->slabs_free)) {
3587 struct slab *slabp;
3588
3589 slabp = list_entry(p, struct slab, list);
3590 BUG_ON(slabp->inuse);
3591
3592 i++;
3593 p = p->next;
3594 }
3595 STATS_SET_FREEABLE(cachep, i);
3596 }
3597#endif
3598 spin_unlock(&l3->list_lock);
3599 ac->avail -= batchcount;
3600 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3601}
3602
3603
3604
3605
3606
3607static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3608 void *caller)
3609{
3610 struct array_cache *ac = cpu_cache_get(cachep);
3611
3612 check_irq_off();
3613 kmemleak_free_recursive(objp, cachep->flags);
3614 objp = cache_free_debugcheck(cachep, objp, caller);
3615
3616 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3617
3618
3619
3620
3621
3622
3623
3624
3625 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3626 return;
3627
3628 if (likely(ac->avail < ac->limit)) {
3629 STATS_INC_FREEHIT(cachep);
3630 ac->entry[ac->avail++] = objp;
3631 return;
3632 } else {
3633 STATS_INC_FREEMISS(cachep);
3634 cache_flusharray(cachep, ac);
3635 ac->entry[ac->avail++] = objp;
3636 }
3637}
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3648{
3649 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3650
3651 trace_kmem_cache_alloc(_RET_IP_, ret,
3652 obj_size(cachep), cachep->buffer_size, flags);
3653
3654 return ret;
3655}
3656EXPORT_SYMBOL(kmem_cache_alloc);
3657
3658#ifdef CONFIG_TRACING
3659void *
3660kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
3661{
3662 void *ret;
3663
3664 ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3665
3666 trace_kmalloc(_RET_IP_, ret,
3667 size, slab_buffer_size(cachep), flags);
3668 return ret;
3669}
3670EXPORT_SYMBOL(kmem_cache_alloc_trace);
3671#endif
3672
3673#ifdef CONFIG_NUMA
3674void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3675{
3676 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3677 __builtin_return_address(0));
3678
3679 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3680 obj_size(cachep), cachep->buffer_size,
3681 flags, nodeid);
3682
3683 return ret;
3684}
3685EXPORT_SYMBOL(kmem_cache_alloc_node);
3686
3687#ifdef CONFIG_TRACING
3688void *kmem_cache_alloc_node_trace(size_t size,
3689 struct kmem_cache *cachep,
3690 gfp_t flags,
3691 int nodeid)
3692{
3693 void *ret;
3694
3695 ret = __cache_alloc_node(cachep, flags, nodeid,
3696 __builtin_return_address(0));
3697 trace_kmalloc_node(_RET_IP_, ret,
3698 size, slab_buffer_size(cachep),
3699 flags, nodeid);
3700 return ret;
3701}
3702EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3703#endif
3704
3705static __always_inline void *
3706__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3707{
3708 struct kmem_cache *cachep;
3709
3710 cachep = kmem_find_general_cachep(size, flags);
3711 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3712 return cachep;
3713 return kmem_cache_alloc_node_trace(size, cachep, flags, node);
3714}
3715
3716#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3717void *__kmalloc_node(size_t size, gfp_t flags, int node)
3718{
3719 return __do_kmalloc_node(size, flags, node,
3720 __builtin_return_address(0));
3721}
3722EXPORT_SYMBOL(__kmalloc_node);
3723
3724void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3725 int node, unsigned long caller)
3726{
3727 return __do_kmalloc_node(size, flags, node, (void *)caller);
3728}
3729EXPORT_SYMBOL(__kmalloc_node_track_caller);
3730#else
3731void *__kmalloc_node(size_t size, gfp_t flags, int node)
3732{
3733 return __do_kmalloc_node(size, flags, node, NULL);
3734}
3735EXPORT_SYMBOL(__kmalloc_node);
3736#endif
3737#endif
3738
3739
3740
3741
3742
3743
3744
3745static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3746 void *caller)
3747{
3748 struct kmem_cache *cachep;
3749 void *ret;
3750
3751
3752
3753
3754
3755
3756 cachep = __find_general_cachep(size, flags);
3757 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3758 return cachep;
3759 ret = __cache_alloc(cachep, flags, caller);
3760
3761 trace_kmalloc((unsigned long) caller, ret,
3762 size, cachep->buffer_size, flags);
3763
3764 return ret;
3765}
3766
3767
3768#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3769void *__kmalloc(size_t size, gfp_t flags)
3770{
3771 return __do_kmalloc(size, flags, __builtin_return_address(0));
3772}
3773EXPORT_SYMBOL(__kmalloc);
3774
3775void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3776{
3777 return __do_kmalloc(size, flags, (void *)caller);
3778}
3779EXPORT_SYMBOL(__kmalloc_track_caller);
3780
3781#else
3782void *__kmalloc(size_t size, gfp_t flags)
3783{
3784 return __do_kmalloc(size, flags, NULL);
3785}
3786EXPORT_SYMBOL(__kmalloc);
3787#endif
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3798{
3799 unsigned long flags;
3800
3801 local_irq_save(flags);
3802 debug_check_no_locks_freed(objp, obj_size(cachep));
3803 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3804 debug_check_no_obj_freed(objp, obj_size(cachep));
3805 __cache_free(cachep, objp, __builtin_return_address(0));
3806 local_irq_restore(flags);
3807
3808 trace_kmem_cache_free(_RET_IP_, objp);
3809}
3810EXPORT_SYMBOL(kmem_cache_free);
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821void kfree(const void *objp)
3822{
3823 struct kmem_cache *c;
3824 unsigned long flags;
3825
3826 trace_kfree(_RET_IP_, objp);
3827
3828 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3829 return;
3830 local_irq_save(flags);
3831 kfree_debugcheck(objp);
3832 c = virt_to_cache(objp);
3833 debug_check_no_locks_freed(objp, obj_size(c));
3834 debug_check_no_obj_freed(objp, obj_size(c));
3835 __cache_free(c, (void *)objp, __builtin_return_address(0));
3836 local_irq_restore(flags);
3837}
3838EXPORT_SYMBOL(kfree);
3839
3840unsigned int kmem_cache_size(struct kmem_cache *cachep)
3841{
3842 return obj_size(cachep);
3843}
3844EXPORT_SYMBOL(kmem_cache_size);
3845
3846
3847
3848
3849static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3850{
3851 int node;
3852 struct kmem_list3 *l3;
3853 struct array_cache *new_shared;
3854 struct array_cache **new_alien = NULL;
3855
3856 for_each_online_node(node) {
3857
3858 if (use_alien_caches) {
3859 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3860 if (!new_alien)
3861 goto fail;
3862 }
3863
3864 new_shared = NULL;
3865 if (cachep->shared) {
3866 new_shared = alloc_arraycache(node,
3867 cachep->shared*cachep->batchcount,
3868 0xbaadf00d, gfp);
3869 if (!new_shared) {
3870 free_alien_cache(new_alien);
3871 goto fail;
3872 }
3873 }
3874
3875 l3 = cachep->nodelists[node];
3876 if (l3) {
3877 struct array_cache *shared = l3->shared;
3878
3879 spin_lock_irq(&l3->list_lock);
3880
3881 if (shared)
3882 free_block(cachep, shared->entry,
3883 shared->avail, node);
3884
3885 l3->shared = new_shared;
3886 if (!l3->alien) {
3887 l3->alien = new_alien;
3888 new_alien = NULL;
3889 }
3890 l3->free_limit = (1 + nr_cpus_node(node)) *
3891 cachep->batchcount + cachep->num;
3892 spin_unlock_irq(&l3->list_lock);
3893 kfree(shared);
3894 free_alien_cache(new_alien);
3895 continue;
3896 }
3897 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3898 if (!l3) {
3899 free_alien_cache(new_alien);
3900 kfree(new_shared);
3901 goto fail;
3902 }
3903
3904 kmem_list3_init(l3);
3905 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3906 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3907 l3->shared = new_shared;
3908 l3->alien = new_alien;
3909 l3->free_limit = (1 + nr_cpus_node(node)) *
3910 cachep->batchcount + cachep->num;
3911 cachep->nodelists[node] = l3;
3912 }
3913 return 0;
3914
3915fail:
3916 if (!cachep->next.next) {
3917
3918 node--;
3919 while (node >= 0) {
3920 if (cachep->nodelists[node]) {
3921 l3 = cachep->nodelists[node];
3922
3923 kfree(l3->shared);
3924 free_alien_cache(l3->alien);
3925 kfree(l3);
3926 cachep->nodelists[node] = NULL;
3927 }
3928 node--;
3929 }
3930 }
3931 return -ENOMEM;
3932}
3933
3934struct ccupdate_struct {
3935 struct kmem_cache *cachep;
3936 struct array_cache *new[NR_CPUS];
3937};
3938
3939static void do_ccupdate_local(void *info)
3940{
3941 struct ccupdate_struct *new = info;
3942 struct array_cache *old;
3943
3944 check_irq_off();
3945 old = cpu_cache_get(new->cachep);
3946
3947 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3948 new->new[smp_processor_id()] = old;
3949}
3950
3951
3952static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3953 int batchcount, int shared, gfp_t gfp)
3954{
3955 struct ccupdate_struct *new;
3956 int i;
3957
3958 new = kzalloc(sizeof(*new), gfp);
3959 if (!new)
3960 return -ENOMEM;
3961
3962 for_each_online_cpu(i) {
3963 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
3964 batchcount, gfp);
3965 if (!new->new[i]) {
3966 for (i--; i >= 0; i--)
3967 kfree(new->new[i]);
3968 kfree(new);
3969 return -ENOMEM;
3970 }
3971 }
3972 new->cachep = cachep;
3973
3974 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3975
3976 check_irq_on();
3977 cachep->batchcount = batchcount;
3978 cachep->limit = limit;
3979 cachep->shared = shared;
3980
3981 for_each_online_cpu(i) {
3982 struct array_cache *ccold = new->new[i];
3983 if (!ccold)
3984 continue;
3985 spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
3986 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
3987 spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
3988 kfree(ccold);
3989 }
3990 kfree(new);
3991 return alloc_kmemlist(cachep, gfp);
3992}
3993
3994
3995static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3996{
3997 int err;
3998 int limit, shared;
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009 if (cachep->buffer_size > 131072)
4010 limit = 1;
4011 else if (cachep->buffer_size > PAGE_SIZE)
4012 limit = 8;
4013 else if (cachep->buffer_size > 1024)
4014 limit = 24;
4015 else if (cachep->buffer_size > 256)
4016 limit = 54;
4017 else
4018 limit = 120;
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029 shared = 0;
4030 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
4031 shared = 8;
4032
4033#if DEBUG
4034
4035
4036
4037
4038 if (limit > 32)
4039 limit = 32;
4040#endif
4041 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
4042 if (err)
4043 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4044 cachep->name, -err);
4045 return err;
4046}
4047
4048
4049
4050
4051
4052
4053static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4054 struct array_cache *ac, int force, int node)
4055{
4056 int tofree;
4057
4058 if (!ac || !ac->avail)
4059 return;
4060 if (ac->touched && !force) {
4061 ac->touched = 0;
4062 } else {
4063 spin_lock_irq(&l3->list_lock);
4064 if (ac->avail) {
4065 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4066 if (tofree > ac->avail)
4067 tofree = (ac->avail + 1) / 2;
4068 free_block(cachep, ac->entry, tofree, node);
4069 ac->avail -= tofree;
4070 memmove(ac->entry, &(ac->entry[tofree]),
4071 sizeof(void *) * ac->avail);
4072 }
4073 spin_unlock_irq(&l3->list_lock);
4074 }
4075}
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089static void cache_reap(struct work_struct *w)
4090{
4091 struct kmem_cache *searchp;
4092 struct kmem_list3 *l3;
4093 int node = numa_mem_id();
4094 struct delayed_work *work = to_delayed_work(w);
4095
4096 if (!mutex_trylock(&cache_chain_mutex))
4097
4098 goto out;
4099
4100 list_for_each_entry(searchp, &cache_chain, next) {
4101 check_irq_on();
4102
4103
4104
4105
4106
4107
4108 l3 = searchp->nodelists[node];
4109
4110 reap_alien(searchp, l3);
4111
4112 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4113
4114
4115
4116
4117
4118 if (time_after(l3->next_reap, jiffies))
4119 goto next;
4120
4121 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4122
4123 drain_array(searchp, l3, l3->shared, 0, node);
4124
4125 if (l3->free_touched)
4126 l3->free_touched = 0;
4127 else {
4128 int freed;
4129
4130 freed = drain_freelist(searchp, l3, (l3->free_limit +
4131 5 * searchp->num - 1) / (5 * searchp->num));
4132 STATS_ADD_REAPED(searchp, freed);
4133 }
4134next:
4135 cond_resched();
4136 }
4137 check_irq_on();
4138 mutex_unlock(&cache_chain_mutex);
4139 next_reap_node();
4140out:
4141
4142 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4143}
4144
4145#ifdef CONFIG_SLABINFO
4146
4147static void print_slabinfo_header(struct seq_file *m)
4148{
4149
4150
4151
4152
4153#if STATS
4154 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4155#else
4156 seq_puts(m, "slabinfo - version: 2.1\n");
4157#endif
4158 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4159 "<objperslab> <pagesperslab>");
4160 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4161 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4162#if STATS
4163 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4164 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4165 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4166#endif
4167 seq_putc(m, '\n');
4168}
4169
4170static void *s_start(struct seq_file *m, loff_t *pos)
4171{
4172 loff_t n = *pos;
4173
4174 mutex_lock(&cache_chain_mutex);
4175 if (!n)
4176 print_slabinfo_header(m);
4177
4178 return seq_list_start(&cache_chain, *pos);
4179}
4180
4181static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4182{
4183 return seq_list_next(p, &cache_chain, pos);
4184}
4185
4186static void s_stop(struct seq_file *m, void *p)
4187{
4188 mutex_unlock(&cache_chain_mutex);
4189}
4190
4191static int s_show(struct seq_file *m, void *p)
4192{
4193 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4194 struct slab *slabp;
4195 unsigned long active_objs;
4196 unsigned long num_objs;
4197 unsigned long active_slabs = 0;
4198 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4199 const char *name;
4200 char *error = NULL;
4201 int node;
4202 struct kmem_list3 *l3;
4203
4204 active_objs = 0;
4205 num_slabs = 0;
4206 for_each_online_node(node) {
4207 l3 = cachep->nodelists[node];
4208 if (!l3)
4209 continue;
4210
4211 check_irq_on();
4212 spin_lock_irq(&l3->list_lock);
4213
4214 list_for_each_entry(slabp, &l3->slabs_full, list) {
4215 if (slabp->inuse != cachep->num && !error)
4216 error = "slabs_full accounting error";
4217 active_objs += cachep->num;
4218 active_slabs++;
4219 }
4220 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4221 if (slabp->inuse == cachep->num && !error)
4222 error = "slabs_partial inuse accounting error";
4223 if (!slabp->inuse && !error)
4224 error = "slabs_partial/inuse accounting error";
4225 active_objs += slabp->inuse;
4226 active_slabs++;
4227 }
4228 list_for_each_entry(slabp, &l3->slabs_free, list) {
4229 if (slabp->inuse && !error)
4230 error = "slabs_free/inuse accounting error";
4231 num_slabs++;
4232 }
4233 free_objects += l3->free_objects;
4234 if (l3->shared)
4235 shared_avail += l3->shared->avail;
4236
4237 spin_unlock_irq(&l3->list_lock);
4238 }
4239 num_slabs += active_slabs;
4240 num_objs = num_slabs * cachep->num;
4241 if (num_objs - active_objs != free_objects && !error)
4242 error = "free_objects accounting error";
4243
4244 name = cachep->name;
4245 if (error)
4246 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4247
4248 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4249 name, active_objs, num_objs, cachep->buffer_size,
4250 cachep->num, (1 << cachep->gfporder));
4251 seq_printf(m, " : tunables %4u %4u %4u",
4252 cachep->limit, cachep->batchcount, cachep->shared);
4253 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4254 active_slabs, num_slabs, shared_avail);
4255#if STATS
4256 {
4257 unsigned long high = cachep->high_mark;
4258 unsigned long allocs = cachep->num_allocations;
4259 unsigned long grown = cachep->grown;
4260 unsigned long reaped = cachep->reaped;
4261 unsigned long errors = cachep->errors;
4262 unsigned long max_freeable = cachep->max_freeable;
4263 unsigned long node_allocs = cachep->node_allocs;
4264 unsigned long node_frees = cachep->node_frees;
4265 unsigned long overflows = cachep->node_overflow;
4266
4267 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4268 "%4lu %4lu %4lu %4lu %4lu",
4269 allocs, high, grown,
4270 reaped, errors, max_freeable, node_allocs,
4271 node_frees, overflows);
4272 }
4273
4274 {
4275 unsigned long allochit = atomic_read(&cachep->allochit);
4276 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4277 unsigned long freehit = atomic_read(&cachep->freehit);
4278 unsigned long freemiss = atomic_read(&cachep->freemiss);
4279
4280 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4281 allochit, allocmiss, freehit, freemiss);
4282 }
4283#endif
4284 seq_putc(m, '\n');
4285 return 0;
4286}
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302static const struct seq_operations slabinfo_op = {
4303 .start = s_start,
4304 .next = s_next,
4305 .stop = s_stop,
4306 .show = s_show,
4307};
4308
4309#define MAX_SLABINFO_WRITE 128
4310
4311
4312
4313
4314
4315
4316
4317static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4318 size_t count, loff_t *ppos)
4319{
4320 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4321 int limit, batchcount, shared, res;
4322 struct kmem_cache *cachep;
4323
4324 if (count > MAX_SLABINFO_WRITE)
4325 return -EINVAL;
4326 if (copy_from_user(&kbuf, buffer, count))
4327 return -EFAULT;
4328 kbuf[MAX_SLABINFO_WRITE] = '\0';
4329
4330 tmp = strchr(kbuf, ' ');
4331 if (!tmp)
4332 return -EINVAL;
4333 *tmp = '\0';
4334 tmp++;
4335 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4336 return -EINVAL;
4337
4338
4339 mutex_lock(&cache_chain_mutex);
4340 res = -EINVAL;
4341 list_for_each_entry(cachep, &cache_chain, next) {
4342 if (!strcmp(cachep->name, kbuf)) {
4343 if (limit < 1 || batchcount < 1 ||
4344 batchcount > limit || shared < 0) {
4345 res = 0;
4346 } else {
4347 res = do_tune_cpucache(cachep, limit,
4348 batchcount, shared,
4349 GFP_KERNEL);
4350 }
4351 break;
4352 }
4353 }
4354 mutex_unlock(&cache_chain_mutex);
4355 if (res >= 0)
4356 res = count;
4357 return res;
4358}
4359
4360static int slabinfo_open(struct inode *inode, struct file *file)
4361{
4362 return seq_open(file, &slabinfo_op);
4363}
4364
4365static const struct file_operations proc_slabinfo_operations = {
4366 .open = slabinfo_open,
4367 .read = seq_read,
4368 .write = slabinfo_write,
4369 .llseek = seq_lseek,
4370 .release = seq_release,
4371};
4372
4373#ifdef CONFIG_DEBUG_SLAB_LEAK
4374
4375static void *leaks_start(struct seq_file *m, loff_t *pos)
4376{
4377 mutex_lock(&cache_chain_mutex);
4378 return seq_list_start(&cache_chain, *pos);
4379}
4380
4381static inline int add_caller(unsigned long *n, unsigned long v)
4382{
4383 unsigned long *p;
4384 int l;
4385 if (!v)
4386 return 1;
4387 l = n[1];
4388 p = n + 2;
4389 while (l) {
4390 int i = l/2;
4391 unsigned long *q = p + 2 * i;
4392 if (*q == v) {
4393 q[1]++;
4394 return 1;
4395 }
4396 if (*q > v) {
4397 l = i;
4398 } else {
4399 p = q + 2;
4400 l -= i + 1;
4401 }
4402 }
4403 if (++n[1] == n[0])
4404 return 0;
4405 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4406 p[0] = v;
4407 p[1] = 1;
4408 return 1;
4409}
4410
4411static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4412{
4413 void *p;
4414 int i;
4415 if (n[0] == n[1])
4416 return;
4417 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4418 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4419 continue;
4420 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4421 return;
4422 }
4423}
4424
4425static void show_symbol(struct seq_file *m, unsigned long address)
4426{
4427#ifdef CONFIG_KALLSYMS
4428 unsigned long offset, size;
4429 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4430
4431 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4432 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4433 if (modname[0])
4434 seq_printf(m, " [%s]", modname);
4435 return;
4436 }
4437#endif
4438 seq_printf(m, "%p", (void *)address);
4439}
4440
4441static int leaks_show(struct seq_file *m, void *p)
4442{
4443 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4444 struct slab *slabp;
4445 struct kmem_list3 *l3;
4446 const char *name;
4447 unsigned long *n = m->private;
4448 int node;
4449 int i;
4450
4451 if (!(cachep->flags & SLAB_STORE_USER))
4452 return 0;
4453 if (!(cachep->flags & SLAB_RED_ZONE))
4454 return 0;
4455
4456
4457
4458 n[1] = 0;
4459
4460 for_each_online_node(node) {
4461 l3 = cachep->nodelists[node];
4462 if (!l3)
4463 continue;
4464
4465 check_irq_on();
4466 spin_lock_irq(&l3->list_lock);
4467
4468 list_for_each_entry(slabp, &l3->slabs_full, list)
4469 handle_slab(n, cachep, slabp);
4470 list_for_each_entry(slabp, &l3->slabs_partial, list)
4471 handle_slab(n, cachep, slabp);
4472 spin_unlock_irq(&l3->list_lock);
4473 }
4474 name = cachep->name;
4475 if (n[0] == n[1]) {
4476
4477 mutex_unlock(&cache_chain_mutex);
4478 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4479 if (!m->private) {
4480
4481 m->private = n;
4482 mutex_lock(&cache_chain_mutex);
4483 return -ENOMEM;
4484 }
4485 *(unsigned long *)m->private = n[0] * 2;
4486 kfree(n);
4487 mutex_lock(&cache_chain_mutex);
4488
4489 m->count = m->size;
4490 return 0;
4491 }
4492 for (i = 0; i < n[1]; i++) {
4493 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4494 show_symbol(m, n[2*i+2]);
4495 seq_putc(m, '\n');
4496 }
4497
4498 return 0;
4499}
4500
4501static const struct seq_operations slabstats_op = {
4502 .start = leaks_start,
4503 .next = s_next,
4504 .stop = s_stop,
4505 .show = leaks_show,
4506};
4507
4508static int slabstats_open(struct inode *inode, struct file *file)
4509{
4510 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4511 int ret = -ENOMEM;
4512 if (n) {
4513 ret = seq_open(file, &slabstats_op);
4514 if (!ret) {
4515 struct seq_file *m = file->private_data;
4516 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4517 m->private = n;
4518 n = NULL;
4519 }
4520 kfree(n);
4521 }
4522 return ret;
4523}
4524
4525static const struct file_operations proc_slabstats_operations = {
4526 .open = slabstats_open,
4527 .read = seq_read,
4528 .llseek = seq_lseek,
4529 .release = seq_release_private,
4530};
4531#endif
4532
4533static int __init slab_proc_init(void)
4534{
4535 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4536#ifdef CONFIG_DEBUG_SLAB_LEAK
4537 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4538#endif
4539 return 0;
4540}
4541module_init(slab_proc_init);
4542#endif
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556size_t ksize(const void *objp)
4557{
4558 BUG_ON(!objp);
4559 if (unlikely(objp == ZERO_SIZE_PTR))
4560 return 0;
4561
4562 return obj_size(virt_to_cache(objp));
4563}
4564EXPORT_SYMBOL(ksize);
4565