1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/rcupdate.h>
106#include <linux/string.h>
107#include <linux/uaccess.h>
108#include <linux/nodemask.h>
109#include <linux/kmemleak.h>
110#include <linux/mempolicy.h>
111#include <linux/mutex.h>
112#include <linux/fault-inject.h>
113#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h>
116#include <linux/kmemcheck.h>
117#include <linux/memory.h>
118
119#include <asm/cacheflush.h>
120#include <asm/tlbflush.h>
121#include <asm/page.h>
122
123
124
125
126
127
128
129
130
131
132
133#ifdef CONFIG_DEBUG_SLAB
134#define DEBUG 1
135#define STATS 1
136#define FORCED_DEBUG 1
137#else
138#define DEBUG 0
139#define STATS 0
140#define FORCED_DEBUG 0
141#endif
142
143
144#define BYTES_PER_WORD sizeof(void *)
145#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
146
147#ifndef ARCH_KMALLOC_FLAGS
148#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
149#endif
150
151
152#if DEBUG
153# define CREATE_MASK (SLAB_RED_ZONE | \
154 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
155 SLAB_CACHE_DMA | \
156 SLAB_STORE_USER | \
157 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
158 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
159 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
160#else
161# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
162 SLAB_CACHE_DMA | \
163 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
164 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
165 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
166#endif
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187typedef unsigned int kmem_bufctl_t;
188#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
189#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
190#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
191#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
192
193
194
195
196
197
198
199
200struct slab {
201 struct list_head list;
202 unsigned long colouroff;
203 void *s_mem;
204 unsigned int inuse;
205 kmem_bufctl_t free;
206 unsigned short nodeid;
207};
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225struct slab_rcu {
226 struct rcu_head head;
227 struct kmem_cache *cachep;
228 void *addr;
229};
230
231
232
233
234
235
236
237
238
239
240
241
242
243struct array_cache {
244 unsigned int avail;
245 unsigned int limit;
246 unsigned int batchcount;
247 unsigned int touched;
248 spinlock_t lock;
249 void *entry[];
250
251
252
253
254};
255
256
257
258
259
260#define BOOT_CPUCACHE_ENTRIES 1
261struct arraycache_init {
262 struct array_cache cache;
263 void *entries[BOOT_CPUCACHE_ENTRIES];
264};
265
266
267
268
269struct kmem_list3 {
270 struct list_head slabs_partial;
271 struct list_head slabs_full;
272 struct list_head slabs_free;
273 unsigned long free_objects;
274 unsigned int free_limit;
275 unsigned int colour_next;
276 spinlock_t list_lock;
277 struct array_cache *shared;
278 struct array_cache **alien;
279 unsigned long next_reap;
280 int free_touched;
281};
282
283
284
285
286#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
287static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
288#define CACHE_CACHE 0
289#define SIZE_AC MAX_NUMNODES
290#define SIZE_L3 (2 * MAX_NUMNODES)
291
292static int drain_freelist(struct kmem_cache *cache,
293 struct kmem_list3 *l3, int tofree);
294static void free_block(struct kmem_cache *cachep, void **objpp, int len,
295 int node);
296static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
297static void cache_reap(struct work_struct *unused);
298
299
300
301
302
303static __always_inline int index_of(const size_t size)
304{
305 extern void __bad_size(void);
306
307 if (__builtin_constant_p(size)) {
308 int i = 0;
309
310#define CACHE(x) \
311 if (size <=x) \
312 return i; \
313 else \
314 i++;
315#include <linux/kmalloc_sizes.h>
316#undef CACHE
317 __bad_size();
318 } else
319 __bad_size();
320 return 0;
321}
322
323static int slab_early_init = 1;
324
325#define INDEX_AC index_of(sizeof(struct arraycache_init))
326#define INDEX_L3 index_of(sizeof(struct kmem_list3))
327
328static void kmem_list3_init(struct kmem_list3 *parent)
329{
330 INIT_LIST_HEAD(&parent->slabs_full);
331 INIT_LIST_HEAD(&parent->slabs_partial);
332 INIT_LIST_HEAD(&parent->slabs_free);
333 parent->shared = NULL;
334 parent->alien = NULL;
335 parent->colour_next = 0;
336 spin_lock_init(&parent->list_lock);
337 parent->free_objects = 0;
338 parent->free_touched = 0;
339}
340
341#define MAKE_LIST(cachep, listp, slab, nodeid) \
342 do { \
343 INIT_LIST_HEAD(listp); \
344 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
345 } while (0)
346
347#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
348 do { \
349 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
350 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
351 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
352 } while (0)
353
354#define CFLGS_OFF_SLAB (0x80000000UL)
355#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
356
357#define BATCHREFILL_LIMIT 16
358
359
360
361
362
363
364
365#define REAPTIMEOUT_CPUC (2*HZ)
366#define REAPTIMEOUT_LIST3 (4*HZ)
367
368#if STATS
369#define STATS_INC_ACTIVE(x) ((x)->num_active++)
370#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
371#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
372#define STATS_INC_GROWN(x) ((x)->grown++)
373#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
374#define STATS_SET_HIGH(x) \
375 do { \
376 if ((x)->num_active > (x)->high_mark) \
377 (x)->high_mark = (x)->num_active; \
378 } while (0)
379#define STATS_INC_ERR(x) ((x)->errors++)
380#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
381#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
382#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
383#define STATS_SET_FREEABLE(x, i) \
384 do { \
385 if ((x)->max_freeable < i) \
386 (x)->max_freeable = i; \
387 } while (0)
388#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
389#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
390#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
391#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
392#else
393#define STATS_INC_ACTIVE(x) do { } while (0)
394#define STATS_DEC_ACTIVE(x) do { } while (0)
395#define STATS_INC_ALLOCED(x) do { } while (0)
396#define STATS_INC_GROWN(x) do { } while (0)
397#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
398#define STATS_SET_HIGH(x) do { } while (0)
399#define STATS_INC_ERR(x) do { } while (0)
400#define STATS_INC_NODEALLOCS(x) do { } while (0)
401#define STATS_INC_NODEFREES(x) do { } while (0)
402#define STATS_INC_ACOVERFLOW(x) do { } while (0)
403#define STATS_SET_FREEABLE(x, i) do { } while (0)
404#define STATS_INC_ALLOCHIT(x) do { } while (0)
405#define STATS_INC_ALLOCMISS(x) do { } while (0)
406#define STATS_INC_FREEHIT(x) do { } while (0)
407#define STATS_INC_FREEMISS(x) do { } while (0)
408#endif
409
410#if DEBUG
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425static int obj_offset(struct kmem_cache *cachep)
426{
427 return cachep->obj_offset;
428}
429
430static int obj_size(struct kmem_cache *cachep)
431{
432 return cachep->obj_size;
433}
434
435static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
436{
437 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
438 return (unsigned long long*) (objp + obj_offset(cachep) -
439 sizeof(unsigned long long));
440}
441
442static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
443{
444 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
445 if (cachep->flags & SLAB_STORE_USER)
446 return (unsigned long long *)(objp + cachep->buffer_size -
447 sizeof(unsigned long long) -
448 REDZONE_ALIGN);
449 return (unsigned long long *) (objp + cachep->buffer_size -
450 sizeof(unsigned long long));
451}
452
453static void **dbg_userword(struct kmem_cache *cachep, void *objp)
454{
455 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
456 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
457}
458
459#else
460
461#define obj_offset(x) 0
462#define obj_size(cachep) (cachep->buffer_size)
463#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
464#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
465#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
466
467#endif
468
469#ifdef CONFIG_TRACING
470size_t slab_buffer_size(struct kmem_cache *cachep)
471{
472 return cachep->buffer_size;
473}
474EXPORT_SYMBOL(slab_buffer_size);
475#endif
476
477
478
479
480#define BREAK_GFP_ORDER_HI 1
481#define BREAK_GFP_ORDER_LO 0
482static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
483
484
485
486
487
488
489static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
490{
491 page->lru.next = (struct list_head *)cache;
492}
493
494static inline struct kmem_cache *page_get_cache(struct page *page)
495{
496 page = compound_head(page);
497 BUG_ON(!PageSlab(page));
498 return (struct kmem_cache *)page->lru.next;
499}
500
501static inline void page_set_slab(struct page *page, struct slab *slab)
502{
503 page->lru.prev = (struct list_head *)slab;
504}
505
506static inline struct slab *page_get_slab(struct page *page)
507{
508 BUG_ON(!PageSlab(page));
509 return (struct slab *)page->lru.prev;
510}
511
512static inline struct kmem_cache *virt_to_cache(const void *obj)
513{
514 struct page *page = virt_to_head_page(obj);
515 return page_get_cache(page);
516}
517
518static inline struct slab *virt_to_slab(const void *obj)
519{
520 struct page *page = virt_to_head_page(obj);
521 return page_get_slab(page);
522}
523
524static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
525 unsigned int idx)
526{
527 return slab->s_mem + cache->buffer_size * idx;
528}
529
530
531
532
533
534
535
536static inline unsigned int obj_to_index(const struct kmem_cache *cache,
537 const struct slab *slab, void *obj)
538{
539 u32 offset = (obj - slab->s_mem);
540 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
541}
542
543
544
545
546struct cache_sizes malloc_sizes[] = {
547#define CACHE(x) { .cs_size = (x) },
548#include <linux/kmalloc_sizes.h>
549 CACHE(ULONG_MAX)
550#undef CACHE
551};
552EXPORT_SYMBOL(malloc_sizes);
553
554
555struct cache_names {
556 char *name;
557 char *name_dma;
558};
559
560static struct cache_names __initdata cache_names[] = {
561#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
562#include <linux/kmalloc_sizes.h>
563 {NULL,}
564#undef CACHE
565};
566
567static struct arraycache_init initarray_cache __initdata =
568 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
569static struct arraycache_init initarray_generic =
570 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
571
572
573static struct kmem_cache cache_cache = {
574 .batchcount = 1,
575 .limit = BOOT_CPUCACHE_ENTRIES,
576 .shared = 1,
577 .buffer_size = sizeof(struct kmem_cache),
578 .name = "kmem_cache",
579};
580
581#define BAD_ALIEN_MAGIC 0x01020304ul
582
583
584
585
586
587static enum {
588 NONE,
589 PARTIAL_AC,
590 PARTIAL_L3,
591 EARLY,
592 FULL
593} g_cpucache_up;
594
595
596
597
598int slab_is_available(void)
599{
600 return g_cpucache_up >= EARLY;
601}
602
603#ifdef CONFIG_LOCKDEP
604
605
606
607
608
609
610
611
612
613
614
615
616static struct lock_class_key on_slab_l3_key;
617static struct lock_class_key on_slab_alc_key;
618
619static void init_node_lock_keys(int q)
620{
621 struct cache_sizes *s = malloc_sizes;
622
623 if (g_cpucache_up != FULL)
624 return;
625
626 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
627 struct array_cache **alc;
628 struct kmem_list3 *l3;
629 int r;
630
631 l3 = s->cs_cachep->nodelists[q];
632 if (!l3 || OFF_SLAB(s->cs_cachep))
633 continue;
634 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
635 alc = l3->alien;
636
637
638
639
640
641
642
643 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
644 continue;
645 for_each_node(r) {
646 if (alc[r])
647 lockdep_set_class(&alc[r]->lock,
648 &on_slab_alc_key);
649 }
650 }
651}
652
653static inline void init_lock_keys(void)
654{
655 int node;
656
657 for_each_node(node)
658 init_node_lock_keys(node);
659}
660#else
661static void init_node_lock_keys(int q)
662{
663}
664
665static inline void init_lock_keys(void)
666{
667}
668#endif
669
670
671
672
673static DEFINE_MUTEX(cache_chain_mutex);
674static struct list_head cache_chain;
675
676static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
677
678static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
679{
680 return cachep->array[smp_processor_id()];
681}
682
683static inline struct kmem_cache *__find_general_cachep(size_t size,
684 gfp_t gfpflags)
685{
686 struct cache_sizes *csizep = malloc_sizes;
687
688#if DEBUG
689
690
691
692
693 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
694#endif
695 if (!size)
696 return ZERO_SIZE_PTR;
697
698 while (size > csizep->cs_size)
699 csizep++;
700
701
702
703
704
705
706#ifdef CONFIG_ZONE_DMA
707 if (unlikely(gfpflags & GFP_DMA))
708 return csizep->cs_dmacachep;
709#endif
710 return csizep->cs_cachep;
711}
712
713static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
714{
715 return __find_general_cachep(size, gfpflags);
716}
717
718static size_t slab_mgmt_size(size_t nr_objs, size_t align)
719{
720 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
721}
722
723
724
725
726static void cache_estimate(unsigned long gfporder, size_t buffer_size,
727 size_t align, int flags, size_t *left_over,
728 unsigned int *num)
729{
730 int nr_objs;
731 size_t mgmt_size;
732 size_t slab_size = PAGE_SIZE << gfporder;
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749 if (flags & CFLGS_OFF_SLAB) {
750 mgmt_size = 0;
751 nr_objs = slab_size / buffer_size;
752
753 if (nr_objs > SLAB_LIMIT)
754 nr_objs = SLAB_LIMIT;
755 } else {
756
757
758
759
760
761
762
763
764 nr_objs = (slab_size - sizeof(struct slab)) /
765 (buffer_size + sizeof(kmem_bufctl_t));
766
767
768
769
770
771 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
772 > slab_size)
773 nr_objs--;
774
775 if (nr_objs > SLAB_LIMIT)
776 nr_objs = SLAB_LIMIT;
777
778 mgmt_size = slab_mgmt_size(nr_objs, align);
779 }
780 *num = nr_objs;
781 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
782}
783
784#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
785
786static void __slab_error(const char *function, struct kmem_cache *cachep,
787 char *msg)
788{
789 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
790 function, cachep->name, msg);
791 dump_stack();
792}
793
794
795
796
797
798
799
800
801
802static int use_alien_caches __read_mostly = 1;
803static int __init noaliencache_setup(char *s)
804{
805 use_alien_caches = 0;
806 return 1;
807}
808__setup("noaliencache", noaliencache_setup);
809
810#ifdef CONFIG_NUMA
811
812
813
814
815
816
817static DEFINE_PER_CPU(unsigned long, slab_reap_node);
818
819static void init_reap_node(int cpu)
820{
821 int node;
822
823 node = next_node(cpu_to_mem(cpu), node_online_map);
824 if (node == MAX_NUMNODES)
825 node = first_node(node_online_map);
826
827 per_cpu(slab_reap_node, cpu) = node;
828}
829
830static void next_reap_node(void)
831{
832 int node = __this_cpu_read(slab_reap_node);
833
834 node = next_node(node, node_online_map);
835 if (unlikely(node >= MAX_NUMNODES))
836 node = first_node(node_online_map);
837 __this_cpu_write(slab_reap_node, node);
838}
839
840#else
841#define init_reap_node(cpu) do { } while (0)
842#define next_reap_node(void) do { } while (0)
843#endif
844
845
846
847
848
849
850
851
852static void __cpuinit start_cpu_timer(int cpu)
853{
854 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
855
856
857
858
859
860
861 if (keventd_up() && reap_work->work.func == NULL) {
862 init_reap_node(cpu);
863 INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
864 schedule_delayed_work_on(cpu, reap_work,
865 __round_jiffies_relative(HZ, cpu));
866 }
867}
868
869static struct array_cache *alloc_arraycache(int node, int entries,
870 int batchcount, gfp_t gfp)
871{
872 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
873 struct array_cache *nc = NULL;
874
875 nc = kmalloc_node(memsize, gfp, node);
876
877
878
879
880
881
882
883 kmemleak_no_scan(nc);
884 if (nc) {
885 nc->avail = 0;
886 nc->limit = entries;
887 nc->batchcount = batchcount;
888 nc->touched = 0;
889 spin_lock_init(&nc->lock);
890 }
891 return nc;
892}
893
894
895
896
897
898
899
900static int transfer_objects(struct array_cache *to,
901 struct array_cache *from, unsigned int max)
902{
903
904 int nr = min3(from->avail, max, to->limit - to->avail);
905
906 if (!nr)
907 return 0;
908
909 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
910 sizeof(void *) *nr);
911
912 from->avail -= nr;
913 to->avail += nr;
914 return nr;
915}
916
917#ifndef CONFIG_NUMA
918
919#define drain_alien_cache(cachep, alien) do { } while (0)
920#define reap_alien(cachep, l3) do { } while (0)
921
922static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
923{
924 return (struct array_cache **)BAD_ALIEN_MAGIC;
925}
926
927static inline void free_alien_cache(struct array_cache **ac_ptr)
928{
929}
930
931static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
932{
933 return 0;
934}
935
936static inline void *alternate_node_alloc(struct kmem_cache *cachep,
937 gfp_t flags)
938{
939 return NULL;
940}
941
942static inline void *____cache_alloc_node(struct kmem_cache *cachep,
943 gfp_t flags, int nodeid)
944{
945 return NULL;
946}
947
948#else
949
950static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
951static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
952
953static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
954{
955 struct array_cache **ac_ptr;
956 int memsize = sizeof(void *) * nr_node_ids;
957 int i;
958
959 if (limit > 1)
960 limit = 12;
961 ac_ptr = kzalloc_node(memsize, gfp, node);
962 if (ac_ptr) {
963 for_each_node(i) {
964 if (i == node || !node_online(i))
965 continue;
966 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
967 if (!ac_ptr[i]) {
968 for (i--; i >= 0; i--)
969 kfree(ac_ptr[i]);
970 kfree(ac_ptr);
971 return NULL;
972 }
973 }
974 }
975 return ac_ptr;
976}
977
978static void free_alien_cache(struct array_cache **ac_ptr)
979{
980 int i;
981
982 if (!ac_ptr)
983 return;
984 for_each_node(i)
985 kfree(ac_ptr[i]);
986 kfree(ac_ptr);
987}
988
989static void __drain_alien_cache(struct kmem_cache *cachep,
990 struct array_cache *ac, int node)
991{
992 struct kmem_list3 *rl3 = cachep->nodelists[node];
993
994 if (ac->avail) {
995 spin_lock(&rl3->list_lock);
996
997
998
999
1000
1001 if (rl3->shared)
1002 transfer_objects(rl3->shared, ac, ac->limit);
1003
1004 free_block(cachep, ac->entry, ac->avail, node);
1005 ac->avail = 0;
1006 spin_unlock(&rl3->list_lock);
1007 }
1008}
1009
1010
1011
1012
1013static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1014{
1015 int node = __this_cpu_read(slab_reap_node);
1016
1017 if (l3->alien) {
1018 struct array_cache *ac = l3->alien[node];
1019
1020 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1021 __drain_alien_cache(cachep, ac, node);
1022 spin_unlock_irq(&ac->lock);
1023 }
1024 }
1025}
1026
1027static void drain_alien_cache(struct kmem_cache *cachep,
1028 struct array_cache **alien)
1029{
1030 int i = 0;
1031 struct array_cache *ac;
1032 unsigned long flags;
1033
1034 for_each_online_node(i) {
1035 ac = alien[i];
1036 if (ac) {
1037 spin_lock_irqsave(&ac->lock, flags);
1038 __drain_alien_cache(cachep, ac, i);
1039 spin_unlock_irqrestore(&ac->lock, flags);
1040 }
1041 }
1042}
1043
1044static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1045{
1046 struct slab *slabp = virt_to_slab(objp);
1047 int nodeid = slabp->nodeid;
1048 struct kmem_list3 *l3;
1049 struct array_cache *alien = NULL;
1050 int node;
1051
1052 node = numa_mem_id();
1053
1054
1055
1056
1057
1058 if (likely(slabp->nodeid == node))
1059 return 0;
1060
1061 l3 = cachep->nodelists[node];
1062 STATS_INC_NODEFREES(cachep);
1063 if (l3->alien && l3->alien[nodeid]) {
1064 alien = l3->alien[nodeid];
1065 spin_lock(&alien->lock);
1066 if (unlikely(alien->avail == alien->limit)) {
1067 STATS_INC_ACOVERFLOW(cachep);
1068 __drain_alien_cache(cachep, alien, nodeid);
1069 }
1070 alien->entry[alien->avail++] = objp;
1071 spin_unlock(&alien->lock);
1072 } else {
1073 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1074 free_block(cachep, &objp, 1, nodeid);
1075 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1076 }
1077 return 1;
1078}
1079#endif
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090static int init_cache_nodelists_node(int node)
1091{
1092 struct kmem_cache *cachep;
1093 struct kmem_list3 *l3;
1094 const int memsize = sizeof(struct kmem_list3);
1095
1096 list_for_each_entry(cachep, &cache_chain, next) {
1097
1098
1099
1100
1101
1102 if (!cachep->nodelists[node]) {
1103 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1104 if (!l3)
1105 return -ENOMEM;
1106 kmem_list3_init(l3);
1107 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1108 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1109
1110
1111
1112
1113
1114
1115 cachep->nodelists[node] = l3;
1116 }
1117
1118 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1119 cachep->nodelists[node]->free_limit =
1120 (1 + nr_cpus_node(node)) *
1121 cachep->batchcount + cachep->num;
1122 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1123 }
1124 return 0;
1125}
1126
1127static void __cpuinit cpuup_canceled(long cpu)
1128{
1129 struct kmem_cache *cachep;
1130 struct kmem_list3 *l3 = NULL;
1131 int node = cpu_to_mem(cpu);
1132 const struct cpumask *mask = cpumask_of_node(node);
1133
1134 list_for_each_entry(cachep, &cache_chain, next) {
1135 struct array_cache *nc;
1136 struct array_cache *shared;
1137 struct array_cache **alien;
1138
1139
1140 nc = cachep->array[cpu];
1141 cachep->array[cpu] = NULL;
1142 l3 = cachep->nodelists[node];
1143
1144 if (!l3)
1145 goto free_array_cache;
1146
1147 spin_lock_irq(&l3->list_lock);
1148
1149
1150 l3->free_limit -= cachep->batchcount;
1151 if (nc)
1152 free_block(cachep, nc->entry, nc->avail, node);
1153
1154 if (!cpumask_empty(mask)) {
1155 spin_unlock_irq(&l3->list_lock);
1156 goto free_array_cache;
1157 }
1158
1159 shared = l3->shared;
1160 if (shared) {
1161 free_block(cachep, shared->entry,
1162 shared->avail, node);
1163 l3->shared = NULL;
1164 }
1165
1166 alien = l3->alien;
1167 l3->alien = NULL;
1168
1169 spin_unlock_irq(&l3->list_lock);
1170
1171 kfree(shared);
1172 if (alien) {
1173 drain_alien_cache(cachep, alien);
1174 free_alien_cache(alien);
1175 }
1176free_array_cache:
1177 kfree(nc);
1178 }
1179
1180
1181
1182
1183
1184 list_for_each_entry(cachep, &cache_chain, next) {
1185 l3 = cachep->nodelists[node];
1186 if (!l3)
1187 continue;
1188 drain_freelist(cachep, l3, l3->free_objects);
1189 }
1190}
1191
1192static int __cpuinit cpuup_prepare(long cpu)
1193{
1194 struct kmem_cache *cachep;
1195 struct kmem_list3 *l3 = NULL;
1196 int node = cpu_to_mem(cpu);
1197 int err;
1198
1199
1200
1201
1202
1203
1204
1205 err = init_cache_nodelists_node(node);
1206 if (err < 0)
1207 goto bad;
1208
1209
1210
1211
1212
1213 list_for_each_entry(cachep, &cache_chain, next) {
1214 struct array_cache *nc;
1215 struct array_cache *shared = NULL;
1216 struct array_cache **alien = NULL;
1217
1218 nc = alloc_arraycache(node, cachep->limit,
1219 cachep->batchcount, GFP_KERNEL);
1220 if (!nc)
1221 goto bad;
1222 if (cachep->shared) {
1223 shared = alloc_arraycache(node,
1224 cachep->shared * cachep->batchcount,
1225 0xbaadf00d, GFP_KERNEL);
1226 if (!shared) {
1227 kfree(nc);
1228 goto bad;
1229 }
1230 }
1231 if (use_alien_caches) {
1232 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1233 if (!alien) {
1234 kfree(shared);
1235 kfree(nc);
1236 goto bad;
1237 }
1238 }
1239 cachep->array[cpu] = nc;
1240 l3 = cachep->nodelists[node];
1241 BUG_ON(!l3);
1242
1243 spin_lock_irq(&l3->list_lock);
1244 if (!l3->shared) {
1245
1246
1247
1248
1249 l3->shared = shared;
1250 shared = NULL;
1251 }
1252#ifdef CONFIG_NUMA
1253 if (!l3->alien) {
1254 l3->alien = alien;
1255 alien = NULL;
1256 }
1257#endif
1258 spin_unlock_irq(&l3->list_lock);
1259 kfree(shared);
1260 free_alien_cache(alien);
1261 }
1262 init_node_lock_keys(node);
1263
1264 return 0;
1265bad:
1266 cpuup_canceled(cpu);
1267 return -ENOMEM;
1268}
1269
1270static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1271 unsigned long action, void *hcpu)
1272{
1273 long cpu = (long)hcpu;
1274 int err = 0;
1275
1276 switch (action) {
1277 case CPU_UP_PREPARE:
1278 case CPU_UP_PREPARE_FROZEN:
1279 mutex_lock(&cache_chain_mutex);
1280 err = cpuup_prepare(cpu);
1281 mutex_unlock(&cache_chain_mutex);
1282 break;
1283 case CPU_ONLINE:
1284 case CPU_ONLINE_FROZEN:
1285 start_cpu_timer(cpu);
1286 break;
1287#ifdef CONFIG_HOTPLUG_CPU
1288 case CPU_DOWN_PREPARE:
1289 case CPU_DOWN_PREPARE_FROZEN:
1290
1291
1292
1293
1294
1295
1296 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1297
1298 per_cpu(slab_reap_work, cpu).work.func = NULL;
1299 break;
1300 case CPU_DOWN_FAILED:
1301 case CPU_DOWN_FAILED_FROZEN:
1302 start_cpu_timer(cpu);
1303 break;
1304 case CPU_DEAD:
1305 case CPU_DEAD_FROZEN:
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315#endif
1316 case CPU_UP_CANCELED:
1317 case CPU_UP_CANCELED_FROZEN:
1318 mutex_lock(&cache_chain_mutex);
1319 cpuup_canceled(cpu);
1320 mutex_unlock(&cache_chain_mutex);
1321 break;
1322 }
1323 return notifier_from_errno(err);
1324}
1325
1326static struct notifier_block __cpuinitdata cpucache_notifier = {
1327 &cpuup_callback, NULL, 0
1328};
1329
1330#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1331
1332
1333
1334
1335
1336
1337
1338static int __meminit drain_cache_nodelists_node(int node)
1339{
1340 struct kmem_cache *cachep;
1341 int ret = 0;
1342
1343 list_for_each_entry(cachep, &cache_chain, next) {
1344 struct kmem_list3 *l3;
1345
1346 l3 = cachep->nodelists[node];
1347 if (!l3)
1348 continue;
1349
1350 drain_freelist(cachep, l3, l3->free_objects);
1351
1352 if (!list_empty(&l3->slabs_full) ||
1353 !list_empty(&l3->slabs_partial)) {
1354 ret = -EBUSY;
1355 break;
1356 }
1357 }
1358 return ret;
1359}
1360
1361static int __meminit slab_memory_callback(struct notifier_block *self,
1362 unsigned long action, void *arg)
1363{
1364 struct memory_notify *mnb = arg;
1365 int ret = 0;
1366 int nid;
1367
1368 nid = mnb->status_change_nid;
1369 if (nid < 0)
1370 goto out;
1371
1372 switch (action) {
1373 case MEM_GOING_ONLINE:
1374 mutex_lock(&cache_chain_mutex);
1375 ret = init_cache_nodelists_node(nid);
1376 mutex_unlock(&cache_chain_mutex);
1377 break;
1378 case MEM_GOING_OFFLINE:
1379 mutex_lock(&cache_chain_mutex);
1380 ret = drain_cache_nodelists_node(nid);
1381 mutex_unlock(&cache_chain_mutex);
1382 break;
1383 case MEM_ONLINE:
1384 case MEM_OFFLINE:
1385 case MEM_CANCEL_ONLINE:
1386 case MEM_CANCEL_OFFLINE:
1387 break;
1388 }
1389out:
1390 return ret ? notifier_from_errno(ret) : NOTIFY_OK;
1391}
1392#endif
1393
1394
1395
1396
1397static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1398 int nodeid)
1399{
1400 struct kmem_list3 *ptr;
1401
1402 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1403 BUG_ON(!ptr);
1404
1405 memcpy(ptr, list, sizeof(struct kmem_list3));
1406
1407
1408
1409 spin_lock_init(&ptr->list_lock);
1410
1411 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1412 cachep->nodelists[nodeid] = ptr;
1413}
1414
1415
1416
1417
1418
1419static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1420{
1421 int node;
1422
1423 for_each_online_node(node) {
1424 cachep->nodelists[node] = &initkmem_list3[index + node];
1425 cachep->nodelists[node]->next_reap = jiffies +
1426 REAPTIMEOUT_LIST3 +
1427 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1428 }
1429}
1430
1431
1432
1433
1434
1435void __init kmem_cache_init(void)
1436{
1437 size_t left_over;
1438 struct cache_sizes *sizes;
1439 struct cache_names *names;
1440 int i;
1441 int order;
1442 int node;
1443
1444 if (num_possible_nodes() == 1)
1445 use_alien_caches = 0;
1446
1447 for (i = 0; i < NUM_INIT_LISTS; i++) {
1448 kmem_list3_init(&initkmem_list3[i]);
1449 if (i < MAX_NUMNODES)
1450 cache_cache.nodelists[i] = NULL;
1451 }
1452 set_up_list3s(&cache_cache, CACHE_CACHE);
1453
1454
1455
1456
1457
1458 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1459 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 node = numa_mem_id();
1482
1483
1484 INIT_LIST_HEAD(&cache_chain);
1485 list_add(&cache_cache.next, &cache_chain);
1486 cache_cache.colour_off = cache_line_size();
1487 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1488 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1489
1490
1491
1492
1493
1494 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1495 nr_node_ids * sizeof(struct kmem_list3 *);
1496#if DEBUG
1497 cache_cache.obj_size = cache_cache.buffer_size;
1498#endif
1499 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1500 cache_line_size());
1501 cache_cache.reciprocal_buffer_size =
1502 reciprocal_value(cache_cache.buffer_size);
1503
1504 for (order = 0; order < MAX_ORDER; order++) {
1505 cache_estimate(order, cache_cache.buffer_size,
1506 cache_line_size(), 0, &left_over, &cache_cache.num);
1507 if (cache_cache.num)
1508 break;
1509 }
1510 BUG_ON(!cache_cache.num);
1511 cache_cache.gfporder = order;
1512 cache_cache.colour = left_over / cache_cache.colour_off;
1513 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1514 sizeof(struct slab), cache_line_size());
1515
1516
1517 sizes = malloc_sizes;
1518 names = cache_names;
1519
1520
1521
1522
1523
1524
1525
1526 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1527 sizes[INDEX_AC].cs_size,
1528 ARCH_KMALLOC_MINALIGN,
1529 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1530 NULL);
1531
1532 if (INDEX_AC != INDEX_L3) {
1533 sizes[INDEX_L3].cs_cachep =
1534 kmem_cache_create(names[INDEX_L3].name,
1535 sizes[INDEX_L3].cs_size,
1536 ARCH_KMALLOC_MINALIGN,
1537 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1538 NULL);
1539 }
1540
1541 slab_early_init = 0;
1542
1543 while (sizes->cs_size != ULONG_MAX) {
1544
1545
1546
1547
1548
1549
1550
1551 if (!sizes->cs_cachep) {
1552 sizes->cs_cachep = kmem_cache_create(names->name,
1553 sizes->cs_size,
1554 ARCH_KMALLOC_MINALIGN,
1555 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1556 NULL);
1557 }
1558#ifdef CONFIG_ZONE_DMA
1559 sizes->cs_dmacachep = kmem_cache_create(
1560 names->name_dma,
1561 sizes->cs_size,
1562 ARCH_KMALLOC_MINALIGN,
1563 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1564 SLAB_PANIC,
1565 NULL);
1566#endif
1567 sizes++;
1568 names++;
1569 }
1570
1571 {
1572 struct array_cache *ptr;
1573
1574 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1575
1576 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1577 memcpy(ptr, cpu_cache_get(&cache_cache),
1578 sizeof(struct arraycache_init));
1579
1580
1581
1582 spin_lock_init(&ptr->lock);
1583
1584 cache_cache.array[smp_processor_id()] = ptr;
1585
1586 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1587
1588 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1589 != &initarray_generic.cache);
1590 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1591 sizeof(struct arraycache_init));
1592
1593
1594
1595 spin_lock_init(&ptr->lock);
1596
1597 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1598 ptr;
1599 }
1600
1601 {
1602 int nid;
1603
1604 for_each_online_node(nid) {
1605 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1606
1607 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1608 &initkmem_list3[SIZE_AC + nid], nid);
1609
1610 if (INDEX_AC != INDEX_L3) {
1611 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1612 &initkmem_list3[SIZE_L3 + nid], nid);
1613 }
1614 }
1615 }
1616
1617 g_cpucache_up = EARLY;
1618}
1619
1620void __init kmem_cache_init_late(void)
1621{
1622 struct kmem_cache *cachep;
1623
1624
1625 mutex_lock(&cache_chain_mutex);
1626 list_for_each_entry(cachep, &cache_chain, next)
1627 if (enable_cpucache(cachep, GFP_NOWAIT))
1628 BUG();
1629 mutex_unlock(&cache_chain_mutex);
1630
1631
1632 g_cpucache_up = FULL;
1633
1634
1635 init_lock_keys();
1636
1637
1638
1639
1640
1641 register_cpu_notifier(&cpucache_notifier);
1642
1643#ifdef CONFIG_NUMA
1644
1645
1646
1647
1648 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1649#endif
1650
1651
1652
1653
1654
1655}
1656
1657static int __init cpucache_init(void)
1658{
1659 int cpu;
1660
1661
1662
1663
1664 for_each_online_cpu(cpu)
1665 start_cpu_timer(cpu);
1666 return 0;
1667}
1668__initcall(cpucache_init);
1669
1670
1671
1672
1673
1674
1675
1676
1677static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1678{
1679 struct page *page;
1680 int nr_pages;
1681 int i;
1682
1683#ifndef CONFIG_MMU
1684
1685
1686
1687
1688 flags |= __GFP_COMP;
1689#endif
1690
1691 flags |= cachep->gfpflags;
1692 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1693 flags |= __GFP_RECLAIMABLE;
1694
1695 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1696 if (!page)
1697 return NULL;
1698
1699 nr_pages = (1 << cachep->gfporder);
1700 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1701 add_zone_page_state(page_zone(page),
1702 NR_SLAB_RECLAIMABLE, nr_pages);
1703 else
1704 add_zone_page_state(page_zone(page),
1705 NR_SLAB_UNRECLAIMABLE, nr_pages);
1706 for (i = 0; i < nr_pages; i++)
1707 __SetPageSlab(page + i);
1708
1709 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1710 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1711
1712 if (cachep->ctor)
1713 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1714 else
1715 kmemcheck_mark_unallocated_pages(page, nr_pages);
1716 }
1717
1718 return page_address(page);
1719}
1720
1721
1722
1723
1724static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1725{
1726 unsigned long i = (1 << cachep->gfporder);
1727 struct page *page = virt_to_page(addr);
1728 const unsigned long nr_freed = i;
1729
1730 kmemcheck_free_shadow(page, cachep->gfporder);
1731
1732 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1733 sub_zone_page_state(page_zone(page),
1734 NR_SLAB_RECLAIMABLE, nr_freed);
1735 else
1736 sub_zone_page_state(page_zone(page),
1737 NR_SLAB_UNRECLAIMABLE, nr_freed);
1738 while (i--) {
1739 BUG_ON(!PageSlab(page));
1740 __ClearPageSlab(page);
1741 page++;
1742 }
1743 if (current->reclaim_state)
1744 current->reclaim_state->reclaimed_slab += nr_freed;
1745 free_pages((unsigned long)addr, cachep->gfporder);
1746}
1747
1748static void kmem_rcu_free(struct rcu_head *head)
1749{
1750 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1751 struct kmem_cache *cachep = slab_rcu->cachep;
1752
1753 kmem_freepages(cachep, slab_rcu->addr);
1754 if (OFF_SLAB(cachep))
1755 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1756}
1757
1758#if DEBUG
1759
1760#ifdef CONFIG_DEBUG_PAGEALLOC
1761static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1762 unsigned long caller)
1763{
1764 int size = obj_size(cachep);
1765
1766 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1767
1768 if (size < 5 * sizeof(unsigned long))
1769 return;
1770
1771 *addr++ = 0x12345678;
1772 *addr++ = caller;
1773 *addr++ = smp_processor_id();
1774 size -= 3 * sizeof(unsigned long);
1775 {
1776 unsigned long *sptr = &caller;
1777 unsigned long svalue;
1778
1779 while (!kstack_end(sptr)) {
1780 svalue = *sptr++;
1781 if (kernel_text_address(svalue)) {
1782 *addr++ = svalue;
1783 size -= sizeof(unsigned long);
1784 if (size <= sizeof(unsigned long))
1785 break;
1786 }
1787 }
1788
1789 }
1790 *addr++ = 0x87654321;
1791}
1792#endif
1793
1794static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1795{
1796 int size = obj_size(cachep);
1797 addr = &((char *)addr)[obj_offset(cachep)];
1798
1799 memset(addr, val, size);
1800 *(unsigned char *)(addr + size - 1) = POISON_END;
1801}
1802
1803static void dump_line(char *data, int offset, int limit)
1804{
1805 int i;
1806 unsigned char error = 0;
1807 int bad_count = 0;
1808
1809 printk(KERN_ERR "%03x:", offset);
1810 for (i = 0; i < limit; i++) {
1811 if (data[offset + i] != POISON_FREE) {
1812 error = data[offset + i];
1813 bad_count++;
1814 }
1815 printk(" %02x", (unsigned char)data[offset + i]);
1816 }
1817 printk("\n");
1818
1819 if (bad_count == 1) {
1820 error ^= POISON_FREE;
1821 if (!(error & (error - 1))) {
1822 printk(KERN_ERR "Single bit error detected. Probably "
1823 "bad RAM.\n");
1824#ifdef CONFIG_X86
1825 printk(KERN_ERR "Run memtest86+ or a similar memory "
1826 "test tool.\n");
1827#else
1828 printk(KERN_ERR "Run a memory test tool.\n");
1829#endif
1830 }
1831 }
1832}
1833#endif
1834
1835#if DEBUG
1836
1837static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1838{
1839 int i, size;
1840 char *realobj;
1841
1842 if (cachep->flags & SLAB_RED_ZONE) {
1843 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1844 *dbg_redzone1(cachep, objp),
1845 *dbg_redzone2(cachep, objp));
1846 }
1847
1848 if (cachep->flags & SLAB_STORE_USER) {
1849 printk(KERN_ERR "Last user: [<%p>]",
1850 *dbg_userword(cachep, objp));
1851 print_symbol("(%s)",
1852 (unsigned long)*dbg_userword(cachep, objp));
1853 printk("\n");
1854 }
1855 realobj = (char *)objp + obj_offset(cachep);
1856 size = obj_size(cachep);
1857 for (i = 0; i < size && lines; i += 16, lines--) {
1858 int limit;
1859 limit = 16;
1860 if (i + limit > size)
1861 limit = size - i;
1862 dump_line(realobj, i, limit);
1863 }
1864}
1865
1866static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1867{
1868 char *realobj;
1869 int size, i;
1870 int lines = 0;
1871
1872 realobj = (char *)objp + obj_offset(cachep);
1873 size = obj_size(cachep);
1874
1875 for (i = 0; i < size; i++) {
1876 char exp = POISON_FREE;
1877 if (i == size - 1)
1878 exp = POISON_END;
1879 if (realobj[i] != exp) {
1880 int limit;
1881
1882
1883 if (lines == 0) {
1884 printk(KERN_ERR
1885 "Slab corruption: %s start=%p, len=%d\n",
1886 cachep->name, realobj, size);
1887 print_objinfo(cachep, objp, 0);
1888 }
1889
1890 i = (i / 16) * 16;
1891 limit = 16;
1892 if (i + limit > size)
1893 limit = size - i;
1894 dump_line(realobj, i, limit);
1895 i += 16;
1896 lines++;
1897
1898 if (lines > 5)
1899 break;
1900 }
1901 }
1902 if (lines != 0) {
1903
1904
1905
1906 struct slab *slabp = virt_to_slab(objp);
1907 unsigned int objnr;
1908
1909 objnr = obj_to_index(cachep, slabp, objp);
1910 if (objnr) {
1911 objp = index_to_obj(cachep, slabp, objnr - 1);
1912 realobj = (char *)objp + obj_offset(cachep);
1913 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1914 realobj, size);
1915 print_objinfo(cachep, objp, 2);
1916 }
1917 if (objnr + 1 < cachep->num) {
1918 objp = index_to_obj(cachep, slabp, objnr + 1);
1919 realobj = (char *)objp + obj_offset(cachep);
1920 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1921 realobj, size);
1922 print_objinfo(cachep, objp, 2);
1923 }
1924 }
1925}
1926#endif
1927
1928#if DEBUG
1929static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1930{
1931 int i;
1932 for (i = 0; i < cachep->num; i++) {
1933 void *objp = index_to_obj(cachep, slabp, i);
1934
1935 if (cachep->flags & SLAB_POISON) {
1936#ifdef CONFIG_DEBUG_PAGEALLOC
1937 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1938 OFF_SLAB(cachep))
1939 kernel_map_pages(virt_to_page(objp),
1940 cachep->buffer_size / PAGE_SIZE, 1);
1941 else
1942 check_poison_obj(cachep, objp);
1943#else
1944 check_poison_obj(cachep, objp);
1945#endif
1946 }
1947 if (cachep->flags & SLAB_RED_ZONE) {
1948 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1949 slab_error(cachep, "start of a freed object "
1950 "was overwritten");
1951 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1952 slab_error(cachep, "end of a freed object "
1953 "was overwritten");
1954 }
1955 }
1956}
1957#else
1958static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1959{
1960}
1961#endif
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1973{
1974 void *addr = slabp->s_mem - slabp->colouroff;
1975
1976 slab_destroy_debugcheck(cachep, slabp);
1977 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1978 struct slab_rcu *slab_rcu;
1979
1980 slab_rcu = (struct slab_rcu *)slabp;
1981 slab_rcu->cachep = cachep;
1982 slab_rcu->addr = addr;
1983 call_rcu(&slab_rcu->head, kmem_rcu_free);
1984 } else {
1985 kmem_freepages(cachep, addr);
1986 if (OFF_SLAB(cachep))
1987 kmem_cache_free(cachep->slabp_cache, slabp);
1988 }
1989}
1990
1991static void __kmem_cache_destroy(struct kmem_cache *cachep)
1992{
1993 int i;
1994 struct kmem_list3 *l3;
1995
1996 for_each_online_cpu(i)
1997 kfree(cachep->array[i]);
1998
1999
2000 for_each_online_node(i) {
2001 l3 = cachep->nodelists[i];
2002 if (l3) {
2003 kfree(l3->shared);
2004 free_alien_cache(l3->alien);
2005 kfree(l3);
2006 }
2007 }
2008 kmem_cache_free(&cache_cache, cachep);
2009}
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025static size_t calculate_slab_order(struct kmem_cache *cachep,
2026 size_t size, size_t align, unsigned long flags)
2027{
2028 unsigned long offslab_limit;
2029 size_t left_over = 0;
2030 int gfporder;
2031
2032 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2033 unsigned int num;
2034 size_t remainder;
2035
2036 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2037 if (!num)
2038 continue;
2039
2040 if (flags & CFLGS_OFF_SLAB) {
2041
2042
2043
2044
2045
2046 offslab_limit = size - sizeof(struct slab);
2047 offslab_limit /= sizeof(kmem_bufctl_t);
2048
2049 if (num > offslab_limit)
2050 break;
2051 }
2052
2053
2054 cachep->num = num;
2055 cachep->gfporder = gfporder;
2056 left_over = remainder;
2057
2058
2059
2060
2061
2062
2063 if (flags & SLAB_RECLAIM_ACCOUNT)
2064 break;
2065
2066
2067
2068
2069
2070 if (gfporder >= slab_break_gfp_order)
2071 break;
2072
2073
2074
2075
2076 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2077 break;
2078 }
2079 return left_over;
2080}
2081
2082static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2083{
2084 if (g_cpucache_up == FULL)
2085 return enable_cpucache(cachep, gfp);
2086
2087 if (g_cpucache_up == NONE) {
2088
2089
2090
2091
2092
2093 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2094
2095
2096
2097
2098
2099
2100 set_up_list3s(cachep, SIZE_AC);
2101 if (INDEX_AC == INDEX_L3)
2102 g_cpucache_up = PARTIAL_L3;
2103 else
2104 g_cpucache_up = PARTIAL_AC;
2105 } else {
2106 cachep->array[smp_processor_id()] =
2107 kmalloc(sizeof(struct arraycache_init), gfp);
2108
2109 if (g_cpucache_up == PARTIAL_AC) {
2110 set_up_list3s(cachep, SIZE_L3);
2111 g_cpucache_up = PARTIAL_L3;
2112 } else {
2113 int node;
2114 for_each_online_node(node) {
2115 cachep->nodelists[node] =
2116 kmalloc_node(sizeof(struct kmem_list3),
2117 gfp, node);
2118 BUG_ON(!cachep->nodelists[node]);
2119 kmem_list3_init(cachep->nodelists[node]);
2120 }
2121 }
2122 }
2123 cachep->nodelists[numa_mem_id()]->next_reap =
2124 jiffies + REAPTIMEOUT_LIST3 +
2125 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2126
2127 cpu_cache_get(cachep)->avail = 0;
2128 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2129 cpu_cache_get(cachep)->batchcount = 1;
2130 cpu_cache_get(cachep)->touched = 0;
2131 cachep->batchcount = 1;
2132 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2133 return 0;
2134}
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165struct kmem_cache *
2166kmem_cache_create (const char *name, size_t size, size_t align,
2167 unsigned long flags, void (*ctor)(void *))
2168{
2169 size_t left_over, slab_size, ralign;
2170 struct kmem_cache *cachep = NULL, *pc;
2171 gfp_t gfp;
2172
2173
2174
2175
2176 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2177 size > KMALLOC_MAX_SIZE) {
2178 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2179 name);
2180 BUG();
2181 }
2182
2183
2184
2185
2186
2187 if (slab_is_available()) {
2188 get_online_cpus();
2189 mutex_lock(&cache_chain_mutex);
2190 }
2191
2192 list_for_each_entry(pc, &cache_chain, next) {
2193 char tmp;
2194 int res;
2195
2196
2197
2198
2199
2200
2201 res = probe_kernel_address(pc->name, tmp);
2202 if (res) {
2203 printk(KERN_ERR
2204 "SLAB: cache with size %d has lost its name\n",
2205 pc->buffer_size);
2206 continue;
2207 }
2208
2209 if (!strcmp(pc->name, name)) {
2210 printk(KERN_ERR
2211 "kmem_cache_create: duplicate cache %s\n", name);
2212 dump_stack();
2213 goto oops;
2214 }
2215 }
2216
2217#if DEBUG
2218 WARN_ON(strchr(name, ' '));
2219#if FORCED_DEBUG
2220
2221
2222
2223
2224
2225
2226 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2227 2 * sizeof(unsigned long long)))
2228 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2229 if (!(flags & SLAB_DESTROY_BY_RCU))
2230 flags |= SLAB_POISON;
2231#endif
2232 if (flags & SLAB_DESTROY_BY_RCU)
2233 BUG_ON(flags & SLAB_POISON);
2234#endif
2235
2236
2237
2238
2239 BUG_ON(flags & ~CREATE_MASK);
2240
2241
2242
2243
2244
2245
2246 if (size & (BYTES_PER_WORD - 1)) {
2247 size += (BYTES_PER_WORD - 1);
2248 size &= ~(BYTES_PER_WORD - 1);
2249 }
2250
2251
2252
2253
2254 if (flags & SLAB_HWCACHE_ALIGN) {
2255
2256
2257
2258
2259
2260 ralign = cache_line_size();
2261 while (size <= ralign / 2)
2262 ralign /= 2;
2263 } else {
2264 ralign = BYTES_PER_WORD;
2265 }
2266
2267
2268
2269
2270
2271
2272 if (flags & SLAB_STORE_USER)
2273 ralign = BYTES_PER_WORD;
2274
2275 if (flags & SLAB_RED_ZONE) {
2276 ralign = REDZONE_ALIGN;
2277
2278
2279 size += REDZONE_ALIGN - 1;
2280 size &= ~(REDZONE_ALIGN - 1);
2281 }
2282
2283
2284 if (ralign < ARCH_SLAB_MINALIGN) {
2285 ralign = ARCH_SLAB_MINALIGN;
2286 }
2287
2288 if (ralign < align) {
2289 ralign = align;
2290 }
2291
2292 if (ralign & (__alignof__(unsigned long long) - 1))
2293 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2294
2295
2296
2297 align = ralign;
2298
2299 if (slab_is_available())
2300 gfp = GFP_KERNEL;
2301 else
2302 gfp = GFP_NOWAIT;
2303
2304
2305 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2306 if (!cachep)
2307 goto oops;
2308
2309#if DEBUG
2310 cachep->obj_size = size;
2311
2312
2313
2314
2315
2316 if (flags & SLAB_RED_ZONE) {
2317
2318 cachep->obj_offset += align;
2319 size += align + sizeof(unsigned long long);
2320 }
2321 if (flags & SLAB_STORE_USER) {
2322
2323
2324
2325
2326 if (flags & SLAB_RED_ZONE)
2327 size += REDZONE_ALIGN;
2328 else
2329 size += BYTES_PER_WORD;
2330 }
2331#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2332 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2333 && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2334 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2335 size = PAGE_SIZE;
2336 }
2337#endif
2338#endif
2339
2340
2341
2342
2343
2344
2345
2346 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2347 !(flags & SLAB_NOLEAKTRACE))
2348
2349
2350
2351
2352 flags |= CFLGS_OFF_SLAB;
2353
2354 size = ALIGN(size, align);
2355
2356 left_over = calculate_slab_order(cachep, size, align, flags);
2357
2358 if (!cachep->num) {
2359 printk(KERN_ERR
2360 "kmem_cache_create: couldn't create cache %s.\n", name);
2361 kmem_cache_free(&cache_cache, cachep);
2362 cachep = NULL;
2363 goto oops;
2364 }
2365 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2366 + sizeof(struct slab), align);
2367
2368
2369
2370
2371
2372 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2373 flags &= ~CFLGS_OFF_SLAB;
2374 left_over -= slab_size;
2375 }
2376
2377 if (flags & CFLGS_OFF_SLAB) {
2378
2379 slab_size =
2380 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2381
2382#ifdef CONFIG_PAGE_POISONING
2383
2384
2385
2386
2387 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2388 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2389#endif
2390 }
2391
2392 cachep->colour_off = cache_line_size();
2393
2394 if (cachep->colour_off < align)
2395 cachep->colour_off = align;
2396 cachep->colour = left_over / cachep->colour_off;
2397 cachep->slab_size = slab_size;
2398 cachep->flags = flags;
2399 cachep->gfpflags = 0;
2400 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2401 cachep->gfpflags |= GFP_DMA;
2402 cachep->buffer_size = size;
2403 cachep->reciprocal_buffer_size = reciprocal_value(size);
2404
2405 if (flags & CFLGS_OFF_SLAB) {
2406 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2407
2408
2409
2410
2411
2412
2413
2414 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2415 }
2416 cachep->ctor = ctor;
2417 cachep->name = name;
2418
2419 if (setup_cpu_cache(cachep, gfp)) {
2420 __kmem_cache_destroy(cachep);
2421 cachep = NULL;
2422 goto oops;
2423 }
2424
2425
2426 list_add(&cachep->next, &cache_chain);
2427oops:
2428 if (!cachep && (flags & SLAB_PANIC))
2429 panic("kmem_cache_create(): failed to create slab `%s'\n",
2430 name);
2431 if (slab_is_available()) {
2432 mutex_unlock(&cache_chain_mutex);
2433 put_online_cpus();
2434 }
2435 return cachep;
2436}
2437EXPORT_SYMBOL(kmem_cache_create);
2438
2439#if DEBUG
2440static void check_irq_off(void)
2441{
2442 BUG_ON(!irqs_disabled());
2443}
2444
2445static void check_irq_on(void)
2446{
2447 BUG_ON(irqs_disabled());
2448}
2449
2450static void check_spinlock_acquired(struct kmem_cache *cachep)
2451{
2452#ifdef CONFIG_SMP
2453 check_irq_off();
2454 assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
2455#endif
2456}
2457
2458static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2459{
2460#ifdef CONFIG_SMP
2461 check_irq_off();
2462 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2463#endif
2464}
2465
2466#else
2467#define check_irq_off() do { } while(0)
2468#define check_irq_on() do { } while(0)
2469#define check_spinlock_acquired(x) do { } while(0)
2470#define check_spinlock_acquired_node(x, y) do { } while(0)
2471#endif
2472
2473static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2474 struct array_cache *ac,
2475 int force, int node);
2476
2477static void do_drain(void *arg)
2478{
2479 struct kmem_cache *cachep = arg;
2480 struct array_cache *ac;
2481 int node = numa_mem_id();
2482
2483 check_irq_off();
2484 ac = cpu_cache_get(cachep);
2485 spin_lock(&cachep->nodelists[node]->list_lock);
2486 free_block(cachep, ac->entry, ac->avail, node);
2487 spin_unlock(&cachep->nodelists[node]->list_lock);
2488 ac->avail = 0;
2489}
2490
2491static void drain_cpu_caches(struct kmem_cache *cachep)
2492{
2493 struct kmem_list3 *l3;
2494 int node;
2495
2496 on_each_cpu(do_drain, cachep, 1);
2497 check_irq_on();
2498 for_each_online_node(node) {
2499 l3 = cachep->nodelists[node];
2500 if (l3 && l3->alien)
2501 drain_alien_cache(cachep, l3->alien);
2502 }
2503
2504 for_each_online_node(node) {
2505 l3 = cachep->nodelists[node];
2506 if (l3)
2507 drain_array(cachep, l3, l3->shared, 1, node);
2508 }
2509}
2510
2511
2512
2513
2514
2515
2516
2517static int drain_freelist(struct kmem_cache *cache,
2518 struct kmem_list3 *l3, int tofree)
2519{
2520 struct list_head *p;
2521 int nr_freed;
2522 struct slab *slabp;
2523
2524 nr_freed = 0;
2525 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2526
2527 spin_lock_irq(&l3->list_lock);
2528 p = l3->slabs_free.prev;
2529 if (p == &l3->slabs_free) {
2530 spin_unlock_irq(&l3->list_lock);
2531 goto out;
2532 }
2533
2534 slabp = list_entry(p, struct slab, list);
2535#if DEBUG
2536 BUG_ON(slabp->inuse);
2537#endif
2538 list_del(&slabp->list);
2539
2540
2541
2542
2543 l3->free_objects -= cache->num;
2544 spin_unlock_irq(&l3->list_lock);
2545 slab_destroy(cache, slabp);
2546 nr_freed++;
2547 }
2548out:
2549 return nr_freed;
2550}
2551
2552
2553static int __cache_shrink(struct kmem_cache *cachep)
2554{
2555 int ret = 0, i = 0;
2556 struct kmem_list3 *l3;
2557
2558 drain_cpu_caches(cachep);
2559
2560 check_irq_on();
2561 for_each_online_node(i) {
2562 l3 = cachep->nodelists[i];
2563 if (!l3)
2564 continue;
2565
2566 drain_freelist(cachep, l3, l3->free_objects);
2567
2568 ret += !list_empty(&l3->slabs_full) ||
2569 !list_empty(&l3->slabs_partial);
2570 }
2571 return (ret ? 1 : 0);
2572}
2573
2574
2575
2576
2577
2578
2579
2580
2581int kmem_cache_shrink(struct kmem_cache *cachep)
2582{
2583 int ret;
2584 BUG_ON(!cachep || in_interrupt());
2585
2586 get_online_cpus();
2587 mutex_lock(&cache_chain_mutex);
2588 ret = __cache_shrink(cachep);
2589 mutex_unlock(&cache_chain_mutex);
2590 put_online_cpus();
2591 return ret;
2592}
2593EXPORT_SYMBOL(kmem_cache_shrink);
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611void kmem_cache_destroy(struct kmem_cache *cachep)
2612{
2613 BUG_ON(!cachep || in_interrupt());
2614
2615
2616 get_online_cpus();
2617 mutex_lock(&cache_chain_mutex);
2618
2619
2620
2621 list_del(&cachep->next);
2622 if (__cache_shrink(cachep)) {
2623 slab_error(cachep, "Can't free all objects");
2624 list_add(&cachep->next, &cache_chain);
2625 mutex_unlock(&cache_chain_mutex);
2626 put_online_cpus();
2627 return;
2628 }
2629
2630 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2631 rcu_barrier();
2632
2633 __kmem_cache_destroy(cachep);
2634 mutex_unlock(&cache_chain_mutex);
2635 put_online_cpus();
2636}
2637EXPORT_SYMBOL(kmem_cache_destroy);
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2651 int colour_off, gfp_t local_flags,
2652 int nodeid)
2653{
2654 struct slab *slabp;
2655
2656 if (OFF_SLAB(cachep)) {
2657
2658 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2659 local_flags, nodeid);
2660
2661
2662
2663
2664
2665
2666 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2667 local_flags);
2668 if (!slabp)
2669 return NULL;
2670 } else {
2671 slabp = objp + colour_off;
2672 colour_off += cachep->slab_size;
2673 }
2674 slabp->inuse = 0;
2675 slabp->colouroff = colour_off;
2676 slabp->s_mem = objp + colour_off;
2677 slabp->nodeid = nodeid;
2678 slabp->free = 0;
2679 return slabp;
2680}
2681
2682static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2683{
2684 return (kmem_bufctl_t *) (slabp + 1);
2685}
2686
2687static void cache_init_objs(struct kmem_cache *cachep,
2688 struct slab *slabp)
2689{
2690 int i;
2691
2692 for (i = 0; i < cachep->num; i++) {
2693 void *objp = index_to_obj(cachep, slabp, i);
2694#if DEBUG
2695
2696 if (cachep->flags & SLAB_POISON)
2697 poison_obj(cachep, objp, POISON_FREE);
2698 if (cachep->flags & SLAB_STORE_USER)
2699 *dbg_userword(cachep, objp) = NULL;
2700
2701 if (cachep->flags & SLAB_RED_ZONE) {
2702 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2703 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2704 }
2705
2706
2707
2708
2709
2710 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2711 cachep->ctor(objp + obj_offset(cachep));
2712
2713 if (cachep->flags & SLAB_RED_ZONE) {
2714 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2715 slab_error(cachep, "constructor overwrote the"
2716 " end of an object");
2717 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2718 slab_error(cachep, "constructor overwrote the"
2719 " start of an object");
2720 }
2721 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2722 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2723 kernel_map_pages(virt_to_page(objp),
2724 cachep->buffer_size / PAGE_SIZE, 0);
2725#else
2726 if (cachep->ctor)
2727 cachep->ctor(objp);
2728#endif
2729 slab_bufctl(slabp)[i] = i + 1;
2730 }
2731 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2732}
2733
2734static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2735{
2736 if (CONFIG_ZONE_DMA_FLAG) {
2737 if (flags & GFP_DMA)
2738 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2739 else
2740 BUG_ON(cachep->gfpflags & GFP_DMA);
2741 }
2742}
2743
2744static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2745 int nodeid)
2746{
2747 void *objp = index_to_obj(cachep, slabp, slabp->free);
2748 kmem_bufctl_t next;
2749
2750 slabp->inuse++;
2751 next = slab_bufctl(slabp)[slabp->free];
2752#if DEBUG
2753 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2754 WARN_ON(slabp->nodeid != nodeid);
2755#endif
2756 slabp->free = next;
2757
2758 return objp;
2759}
2760
2761static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2762 void *objp, int nodeid)
2763{
2764 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2765
2766#if DEBUG
2767
2768 WARN_ON(slabp->nodeid != nodeid);
2769
2770 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2771 printk(KERN_ERR "slab: double free detected in cache "
2772 "'%s', objp %p\n", cachep->name, objp);
2773 BUG();
2774 }
2775#endif
2776 slab_bufctl(slabp)[objnr] = slabp->free;
2777 slabp->free = objnr;
2778 slabp->inuse--;
2779}
2780
2781
2782
2783
2784
2785
2786static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2787 void *addr)
2788{
2789 int nr_pages;
2790 struct page *page;
2791
2792 page = virt_to_page(addr);
2793
2794 nr_pages = 1;
2795 if (likely(!PageCompound(page)))
2796 nr_pages <<= cache->gfporder;
2797
2798 do {
2799 page_set_cache(page, cache);
2800 page_set_slab(page, slab);
2801 page++;
2802 } while (--nr_pages);
2803}
2804
2805
2806
2807
2808
2809static int cache_grow(struct kmem_cache *cachep,
2810 gfp_t flags, int nodeid, void *objp)
2811{
2812 struct slab *slabp;
2813 size_t offset;
2814 gfp_t local_flags;
2815 struct kmem_list3 *l3;
2816
2817
2818
2819
2820
2821 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2822 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2823
2824
2825 check_irq_off();
2826 l3 = cachep->nodelists[nodeid];
2827 spin_lock(&l3->list_lock);
2828
2829
2830 offset = l3->colour_next;
2831 l3->colour_next++;
2832 if (l3->colour_next >= cachep->colour)
2833 l3->colour_next = 0;
2834 spin_unlock(&l3->list_lock);
2835
2836 offset *= cachep->colour_off;
2837
2838 if (local_flags & __GFP_WAIT)
2839 local_irq_enable();
2840
2841
2842
2843
2844
2845
2846
2847 kmem_flagcheck(cachep, flags);
2848
2849
2850
2851
2852
2853 if (!objp)
2854 objp = kmem_getpages(cachep, local_flags, nodeid);
2855 if (!objp)
2856 goto failed;
2857
2858
2859 slabp = alloc_slabmgmt(cachep, objp, offset,
2860 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2861 if (!slabp)
2862 goto opps1;
2863
2864 slab_map_pages(cachep, slabp, objp);
2865
2866 cache_init_objs(cachep, slabp);
2867
2868 if (local_flags & __GFP_WAIT)
2869 local_irq_disable();
2870 check_irq_off();
2871 spin_lock(&l3->list_lock);
2872
2873
2874 list_add_tail(&slabp->list, &(l3->slabs_free));
2875 STATS_INC_GROWN(cachep);
2876 l3->free_objects += cachep->num;
2877 spin_unlock(&l3->list_lock);
2878 return 1;
2879opps1:
2880 kmem_freepages(cachep, objp);
2881failed:
2882 if (local_flags & __GFP_WAIT)
2883 local_irq_disable();
2884 return 0;
2885}
2886
2887#if DEBUG
2888
2889
2890
2891
2892
2893
2894static void kfree_debugcheck(const void *objp)
2895{
2896 if (!virt_addr_valid(objp)) {
2897 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2898 (unsigned long)objp);
2899 BUG();
2900 }
2901}
2902
2903static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2904{
2905 unsigned long long redzone1, redzone2;
2906
2907 redzone1 = *dbg_redzone1(cache, obj);
2908 redzone2 = *dbg_redzone2(cache, obj);
2909
2910
2911
2912
2913 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2914 return;
2915
2916 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2917 slab_error(cache, "double free detected");
2918 else
2919 slab_error(cache, "memory outside object was overwritten");
2920
2921 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2922 obj, redzone1, redzone2);
2923}
2924
2925static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2926 void *caller)
2927{
2928 struct page *page;
2929 unsigned int objnr;
2930 struct slab *slabp;
2931
2932 BUG_ON(virt_to_cache(objp) != cachep);
2933
2934 objp -= obj_offset(cachep);
2935 kfree_debugcheck(objp);
2936 page = virt_to_head_page(objp);
2937
2938 slabp = page_get_slab(page);
2939
2940 if (cachep->flags & SLAB_RED_ZONE) {
2941 verify_redzone_free(cachep, objp);
2942 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2943 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2944 }
2945 if (cachep->flags & SLAB_STORE_USER)
2946 *dbg_userword(cachep, objp) = caller;
2947
2948 objnr = obj_to_index(cachep, slabp, objp);
2949
2950 BUG_ON(objnr >= cachep->num);
2951 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2952
2953#ifdef CONFIG_DEBUG_SLAB_LEAK
2954 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2955#endif
2956 if (cachep->flags & SLAB_POISON) {
2957#ifdef CONFIG_DEBUG_PAGEALLOC
2958 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2959 store_stackinfo(cachep, objp, (unsigned long)caller);
2960 kernel_map_pages(virt_to_page(objp),
2961 cachep->buffer_size / PAGE_SIZE, 0);
2962 } else {
2963 poison_obj(cachep, objp, POISON_FREE);
2964 }
2965#else
2966 poison_obj(cachep, objp, POISON_FREE);
2967#endif
2968 }
2969 return objp;
2970}
2971
2972static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2973{
2974 kmem_bufctl_t i;
2975 int entries = 0;
2976
2977
2978 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2979 entries++;
2980 if (entries > cachep->num || i >= cachep->num)
2981 goto bad;
2982 }
2983 if (entries != cachep->num - slabp->inuse) {
2984bad:
2985 printk(KERN_ERR "slab: Internal list corruption detected in "
2986 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2987 cachep->name, cachep->num, slabp, slabp->inuse);
2988 for (i = 0;
2989 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2990 i++) {
2991 if (i % 16 == 0)
2992 printk("\n%03x:", i);
2993 printk(" %02x", ((unsigned char *)slabp)[i]);
2994 }
2995 printk("\n");
2996 BUG();
2997 }
2998}
2999#else
3000#define kfree_debugcheck(x) do { } while(0)
3001#define cache_free_debugcheck(x,objp,z) (objp)
3002#define check_slabp(x,y) do { } while(0)
3003#endif
3004
3005static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
3006{
3007 int batchcount;
3008 struct kmem_list3 *l3;
3009 struct array_cache *ac;
3010 int node;
3011
3012retry:
3013 check_irq_off();
3014 node = numa_mem_id();
3015 ac = cpu_cache_get(cachep);
3016 batchcount = ac->batchcount;
3017 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
3018
3019
3020
3021
3022
3023 batchcount = BATCHREFILL_LIMIT;
3024 }
3025 l3 = cachep->nodelists[node];
3026
3027 BUG_ON(ac->avail > 0 || !l3);
3028 spin_lock(&l3->list_lock);
3029
3030
3031 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
3032 l3->shared->touched = 1;
3033 goto alloc_done;
3034 }
3035
3036 while (batchcount > 0) {
3037 struct list_head *entry;
3038 struct slab *slabp;
3039
3040 entry = l3->slabs_partial.next;
3041 if (entry == &l3->slabs_partial) {
3042 l3->free_touched = 1;
3043 entry = l3->slabs_free.next;
3044 if (entry == &l3->slabs_free)
3045 goto must_grow;
3046 }
3047
3048 slabp = list_entry(entry, struct slab, list);
3049 check_slabp(cachep, slabp);
3050 check_spinlock_acquired(cachep);
3051
3052
3053
3054
3055
3056
3057 BUG_ON(slabp->inuse >= cachep->num);
3058
3059 while (slabp->inuse < cachep->num && batchcount--) {
3060 STATS_INC_ALLOCED(cachep);
3061 STATS_INC_ACTIVE(cachep);
3062 STATS_SET_HIGH(cachep);
3063
3064 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3065 node);
3066 }
3067 check_slabp(cachep, slabp);
3068
3069
3070 list_del(&slabp->list);
3071 if (slabp->free == BUFCTL_END)
3072 list_add(&slabp->list, &l3->slabs_full);
3073 else
3074 list_add(&slabp->list, &l3->slabs_partial);
3075 }
3076
3077must_grow:
3078 l3->free_objects -= ac->avail;
3079alloc_done:
3080 spin_unlock(&l3->list_lock);
3081
3082 if (unlikely(!ac->avail)) {
3083 int x;
3084 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3085
3086
3087 ac = cpu_cache_get(cachep);
3088 if (!x && ac->avail == 0)
3089 return NULL;
3090
3091 if (!ac->avail)
3092 goto retry;
3093 }
3094 ac->touched = 1;
3095 return ac->entry[--ac->avail];
3096}
3097
3098static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3099 gfp_t flags)
3100{
3101 might_sleep_if(flags & __GFP_WAIT);
3102#if DEBUG
3103 kmem_flagcheck(cachep, flags);
3104#endif
3105}
3106
3107#if DEBUG
3108static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3109 gfp_t flags, void *objp, void *caller)
3110{
3111 if (!objp)
3112 return objp;
3113 if (cachep->flags & SLAB_POISON) {
3114#ifdef CONFIG_DEBUG_PAGEALLOC
3115 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3116 kernel_map_pages(virt_to_page(objp),
3117 cachep->buffer_size / PAGE_SIZE, 1);
3118 else
3119 check_poison_obj(cachep, objp);
3120#else
3121 check_poison_obj(cachep, objp);
3122#endif
3123 poison_obj(cachep, objp, POISON_INUSE);
3124 }
3125 if (cachep->flags & SLAB_STORE_USER)
3126 *dbg_userword(cachep, objp) = caller;
3127
3128 if (cachep->flags & SLAB_RED_ZONE) {
3129 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3130 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3131 slab_error(cachep, "double free, or memory outside"
3132 " object was overwritten");
3133 printk(KERN_ERR
3134 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3135 objp, *dbg_redzone1(cachep, objp),
3136 *dbg_redzone2(cachep, objp));
3137 }
3138 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3139 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3140 }
3141#ifdef CONFIG_DEBUG_SLAB_LEAK
3142 {
3143 struct slab *slabp;
3144 unsigned objnr;
3145
3146 slabp = page_get_slab(virt_to_head_page(objp));
3147 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3148 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3149 }
3150#endif
3151 objp += obj_offset(cachep);
3152 if (cachep->ctor && cachep->flags & SLAB_POISON)
3153 cachep->ctor(objp);
3154#if ARCH_SLAB_MINALIGN
3155 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3156 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3157 objp, ARCH_SLAB_MINALIGN);
3158 }
3159#endif
3160 return objp;
3161}
3162#else
3163#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3164#endif
3165
3166static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3167{
3168 if (cachep == &cache_cache)
3169 return false;
3170
3171 return should_failslab(obj_size(cachep), flags, cachep->flags);
3172}
3173
3174static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3175{
3176 void *objp;
3177 struct array_cache *ac;
3178
3179 check_irq_off();
3180
3181 ac = cpu_cache_get(cachep);
3182 if (likely(ac->avail)) {
3183 STATS_INC_ALLOCHIT(cachep);
3184 ac->touched = 1;
3185 objp = ac->entry[--ac->avail];
3186 } else {
3187 STATS_INC_ALLOCMISS(cachep);
3188 objp = cache_alloc_refill(cachep, flags);
3189
3190
3191
3192
3193 ac = cpu_cache_get(cachep);
3194 }
3195
3196
3197
3198
3199
3200 if (objp)
3201 kmemleak_erase(&ac->entry[ac->avail]);
3202 return objp;
3203}
3204
3205#ifdef CONFIG_NUMA
3206
3207
3208
3209
3210
3211
3212static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3213{
3214 int nid_alloc, nid_here;
3215
3216 if (in_interrupt() || (flags & __GFP_THISNODE))
3217 return NULL;
3218 nid_alloc = nid_here = numa_mem_id();
3219 get_mems_allowed();
3220 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3221 nid_alloc = cpuset_slab_spread_node();
3222 else if (current->mempolicy)
3223 nid_alloc = slab_node(current->mempolicy);
3224 put_mems_allowed();
3225 if (nid_alloc != nid_here)
3226 return ____cache_alloc_node(cachep, flags, nid_alloc);
3227 return NULL;
3228}
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3239{
3240 struct zonelist *zonelist;
3241 gfp_t local_flags;
3242 struct zoneref *z;
3243 struct zone *zone;
3244 enum zone_type high_zoneidx = gfp_zone(flags);
3245 void *obj = NULL;
3246 int nid;
3247
3248 if (flags & __GFP_THISNODE)
3249 return NULL;
3250
3251 get_mems_allowed();
3252 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3253 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3254
3255retry:
3256
3257
3258
3259
3260 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3261 nid = zone_to_nid(zone);
3262
3263 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3264 cache->nodelists[nid] &&
3265 cache->nodelists[nid]->free_objects) {
3266 obj = ____cache_alloc_node(cache,
3267 flags | GFP_THISNODE, nid);
3268 if (obj)
3269 break;
3270 }
3271 }
3272
3273 if (!obj) {
3274
3275
3276
3277
3278
3279
3280 if (local_flags & __GFP_WAIT)
3281 local_irq_enable();
3282 kmem_flagcheck(cache, flags);
3283 obj = kmem_getpages(cache, local_flags, numa_mem_id());
3284 if (local_flags & __GFP_WAIT)
3285 local_irq_disable();
3286 if (obj) {
3287
3288
3289
3290 nid = page_to_nid(virt_to_page(obj));
3291 if (cache_grow(cache, flags, nid, obj)) {
3292 obj = ____cache_alloc_node(cache,
3293 flags | GFP_THISNODE, nid);
3294 if (!obj)
3295
3296
3297
3298
3299
3300 goto retry;
3301 } else {
3302
3303 obj = NULL;
3304 }
3305 }
3306 }
3307 put_mems_allowed();
3308 return obj;
3309}
3310
3311
3312
3313
3314static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3315 int nodeid)
3316{
3317 struct list_head *entry;
3318 struct slab *slabp;
3319 struct kmem_list3 *l3;
3320 void *obj;
3321 int x;
3322
3323 l3 = cachep->nodelists[nodeid];
3324 BUG_ON(!l3);
3325
3326retry:
3327 check_irq_off();
3328 spin_lock(&l3->list_lock);
3329 entry = l3->slabs_partial.next;
3330 if (entry == &l3->slabs_partial) {
3331 l3->free_touched = 1;
3332 entry = l3->slabs_free.next;
3333 if (entry == &l3->slabs_free)
3334 goto must_grow;
3335 }
3336
3337 slabp = list_entry(entry, struct slab, list);
3338 check_spinlock_acquired_node(cachep, nodeid);
3339 check_slabp(cachep, slabp);
3340
3341 STATS_INC_NODEALLOCS(cachep);
3342 STATS_INC_ACTIVE(cachep);
3343 STATS_SET_HIGH(cachep);
3344
3345 BUG_ON(slabp->inuse == cachep->num);
3346
3347 obj = slab_get_obj(cachep, slabp, nodeid);
3348 check_slabp(cachep, slabp);
3349 l3->free_objects--;
3350
3351 list_del(&slabp->list);
3352
3353 if (slabp->free == BUFCTL_END)
3354 list_add(&slabp->list, &l3->slabs_full);
3355 else
3356 list_add(&slabp->list, &l3->slabs_partial);
3357
3358 spin_unlock(&l3->list_lock);
3359 goto done;
3360
3361must_grow:
3362 spin_unlock(&l3->list_lock);
3363 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3364 if (x)
3365 goto retry;
3366
3367 return fallback_alloc(cachep, flags);
3368
3369done:
3370 return obj;
3371}
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385static __always_inline void *
3386__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3387 void *caller)
3388{
3389 unsigned long save_flags;
3390 void *ptr;
3391 int slab_node = numa_mem_id();
3392
3393 flags &= gfp_allowed_mask;
3394
3395 lockdep_trace_alloc(flags);
3396
3397 if (slab_should_failslab(cachep, flags))
3398 return NULL;
3399
3400 cache_alloc_debugcheck_before(cachep, flags);
3401 local_irq_save(save_flags);
3402
3403 if (nodeid == -1)
3404 nodeid = slab_node;
3405
3406 if (unlikely(!cachep->nodelists[nodeid])) {
3407
3408 ptr = fallback_alloc(cachep, flags);
3409 goto out;
3410 }
3411
3412 if (nodeid == slab_node) {
3413
3414
3415
3416
3417
3418
3419 ptr = ____cache_alloc(cachep, flags);
3420 if (ptr)
3421 goto out;
3422 }
3423
3424 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3425 out:
3426 local_irq_restore(save_flags);
3427 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3428 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3429 flags);
3430
3431 if (likely(ptr))
3432 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3433
3434 if (unlikely((flags & __GFP_ZERO) && ptr))
3435 memset(ptr, 0, obj_size(cachep));
3436
3437 return ptr;
3438}
3439
3440static __always_inline void *
3441__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3442{
3443 void *objp;
3444
3445 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3446 objp = alternate_node_alloc(cache, flags);
3447 if (objp)
3448 goto out;
3449 }
3450 objp = ____cache_alloc(cache, flags);
3451
3452
3453
3454
3455
3456 if (!objp)
3457 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3458
3459 out:
3460 return objp;
3461}
3462#else
3463
3464static __always_inline void *
3465__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3466{
3467 return ____cache_alloc(cachep, flags);
3468}
3469
3470#endif
3471
3472static __always_inline void *
3473__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3474{
3475 unsigned long save_flags;
3476 void *objp;
3477
3478 flags &= gfp_allowed_mask;
3479
3480 lockdep_trace_alloc(flags);
3481
3482 if (slab_should_failslab(cachep, flags))
3483 return NULL;
3484
3485 cache_alloc_debugcheck_before(cachep, flags);
3486 local_irq_save(save_flags);
3487 objp = __do_cache_alloc(cachep, flags);
3488 local_irq_restore(save_flags);
3489 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3490 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3491 flags);
3492 prefetchw(objp);
3493
3494 if (likely(objp))
3495 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3496
3497 if (unlikely((flags & __GFP_ZERO) && objp))
3498 memset(objp, 0, obj_size(cachep));
3499
3500 return objp;
3501}
3502
3503
3504
3505
3506static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3507 int node)
3508{
3509 int i;
3510 struct kmem_list3 *l3;
3511
3512 for (i = 0; i < nr_objects; i++) {
3513 void *objp = objpp[i];
3514 struct slab *slabp;
3515
3516 slabp = virt_to_slab(objp);
3517 l3 = cachep->nodelists[node];
3518 list_del(&slabp->list);
3519 check_spinlock_acquired_node(cachep, node);
3520 check_slabp(cachep, slabp);
3521 slab_put_obj(cachep, slabp, objp, node);
3522 STATS_DEC_ACTIVE(cachep);
3523 l3->free_objects++;
3524 check_slabp(cachep, slabp);
3525
3526
3527 if (slabp->inuse == 0) {
3528 if (l3->free_objects > l3->free_limit) {
3529 l3->free_objects -= cachep->num;
3530
3531
3532
3533
3534
3535
3536 slab_destroy(cachep, slabp);
3537 } else {
3538 list_add(&slabp->list, &l3->slabs_free);
3539 }
3540 } else {
3541
3542
3543
3544
3545 list_add_tail(&slabp->list, &l3->slabs_partial);
3546 }
3547 }
3548}
3549
3550static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3551{
3552 int batchcount;
3553 struct kmem_list3 *l3;
3554 int node = numa_mem_id();
3555
3556 batchcount = ac->batchcount;
3557#if DEBUG
3558 BUG_ON(!batchcount || batchcount > ac->avail);
3559#endif
3560 check_irq_off();
3561 l3 = cachep->nodelists[node];
3562 spin_lock(&l3->list_lock);
3563 if (l3->shared) {
3564 struct array_cache *shared_array = l3->shared;
3565 int max = shared_array->limit - shared_array->avail;
3566 if (max) {
3567 if (batchcount > max)
3568 batchcount = max;
3569 memcpy(&(shared_array->entry[shared_array->avail]),
3570 ac->entry, sizeof(void *) * batchcount);
3571 shared_array->avail += batchcount;
3572 goto free_done;
3573 }
3574 }
3575
3576 free_block(cachep, ac->entry, batchcount, node);
3577free_done:
3578#if STATS
3579 {
3580 int i = 0;
3581 struct list_head *p;
3582
3583 p = l3->slabs_free.next;
3584 while (p != &(l3->slabs_free)) {
3585 struct slab *slabp;
3586
3587 slabp = list_entry(p, struct slab, list);
3588 BUG_ON(slabp->inuse);
3589
3590 i++;
3591 p = p->next;
3592 }
3593 STATS_SET_FREEABLE(cachep, i);
3594 }
3595#endif
3596 spin_unlock(&l3->list_lock);
3597 ac->avail -= batchcount;
3598 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3599}
3600
3601
3602
3603
3604
3605static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3606{
3607 struct array_cache *ac = cpu_cache_get(cachep);
3608
3609 check_irq_off();
3610 kmemleak_free_recursive(objp, cachep->flags);
3611 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3612
3613 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3614
3615
3616
3617
3618
3619
3620
3621
3622 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3623 return;
3624
3625 if (likely(ac->avail < ac->limit)) {
3626 STATS_INC_FREEHIT(cachep);
3627 ac->entry[ac->avail++] = objp;
3628 return;
3629 } else {
3630 STATS_INC_FREEMISS(cachep);
3631 cache_flusharray(cachep, ac);
3632 ac->entry[ac->avail++] = objp;
3633 }
3634}
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3645{
3646 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3647
3648 trace_kmem_cache_alloc(_RET_IP_, ret,
3649 obj_size(cachep), cachep->buffer_size, flags);
3650
3651 return ret;
3652}
3653EXPORT_SYMBOL(kmem_cache_alloc);
3654
3655#ifdef CONFIG_TRACING
3656void *
3657kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
3658{
3659 void *ret;
3660
3661 ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3662
3663 trace_kmalloc(_RET_IP_, ret,
3664 size, slab_buffer_size(cachep), flags);
3665 return ret;
3666}
3667EXPORT_SYMBOL(kmem_cache_alloc_trace);
3668#endif
3669
3670#ifdef CONFIG_NUMA
3671void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3672{
3673 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3674 __builtin_return_address(0));
3675
3676 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3677 obj_size(cachep), cachep->buffer_size,
3678 flags, nodeid);
3679
3680 return ret;
3681}
3682EXPORT_SYMBOL(kmem_cache_alloc_node);
3683
3684#ifdef CONFIG_TRACING
3685void *kmem_cache_alloc_node_trace(size_t size,
3686 struct kmem_cache *cachep,
3687 gfp_t flags,
3688 int nodeid)
3689{
3690 void *ret;
3691
3692 ret = __cache_alloc_node(cachep, flags, nodeid,
3693 __builtin_return_address(0));
3694 trace_kmalloc_node(_RET_IP_, ret,
3695 size, slab_buffer_size(cachep),
3696 flags, nodeid);
3697 return ret;
3698}
3699EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3700#endif
3701
3702static __always_inline void *
3703__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3704{
3705 struct kmem_cache *cachep;
3706
3707 cachep = kmem_find_general_cachep(size, flags);
3708 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3709 return cachep;
3710 return kmem_cache_alloc_node_trace(size, cachep, flags, node);
3711}
3712
3713#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3714void *__kmalloc_node(size_t size, gfp_t flags, int node)
3715{
3716 return __do_kmalloc_node(size, flags, node,
3717 __builtin_return_address(0));
3718}
3719EXPORT_SYMBOL(__kmalloc_node);
3720
3721void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3722 int node, unsigned long caller)
3723{
3724 return __do_kmalloc_node(size, flags, node, (void *)caller);
3725}
3726EXPORT_SYMBOL(__kmalloc_node_track_caller);
3727#else
3728void *__kmalloc_node(size_t size, gfp_t flags, int node)
3729{
3730 return __do_kmalloc_node(size, flags, node, NULL);
3731}
3732EXPORT_SYMBOL(__kmalloc_node);
3733#endif
3734#endif
3735
3736
3737
3738
3739
3740
3741
3742static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3743 void *caller)
3744{
3745 struct kmem_cache *cachep;
3746 void *ret;
3747
3748
3749
3750
3751
3752
3753 cachep = __find_general_cachep(size, flags);
3754 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3755 return cachep;
3756 ret = __cache_alloc(cachep, flags, caller);
3757
3758 trace_kmalloc((unsigned long) caller, ret,
3759 size, cachep->buffer_size, flags);
3760
3761 return ret;
3762}
3763
3764
3765#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3766void *__kmalloc(size_t size, gfp_t flags)
3767{
3768 return __do_kmalloc(size, flags, __builtin_return_address(0));
3769}
3770EXPORT_SYMBOL(__kmalloc);
3771
3772void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3773{
3774 return __do_kmalloc(size, flags, (void *)caller);
3775}
3776EXPORT_SYMBOL(__kmalloc_track_caller);
3777
3778#else
3779void *__kmalloc(size_t size, gfp_t flags)
3780{
3781 return __do_kmalloc(size, flags, NULL);
3782}
3783EXPORT_SYMBOL(__kmalloc);
3784#endif
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3795{
3796 unsigned long flags;
3797
3798 local_irq_save(flags);
3799 debug_check_no_locks_freed(objp, obj_size(cachep));
3800 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3801 debug_check_no_obj_freed(objp, obj_size(cachep));
3802 __cache_free(cachep, objp);
3803 local_irq_restore(flags);
3804
3805 trace_kmem_cache_free(_RET_IP_, objp);
3806}
3807EXPORT_SYMBOL(kmem_cache_free);
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818void kfree(const void *objp)
3819{
3820 struct kmem_cache *c;
3821 unsigned long flags;
3822
3823 trace_kfree(_RET_IP_, objp);
3824
3825 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3826 return;
3827 local_irq_save(flags);
3828 kfree_debugcheck(objp);
3829 c = virt_to_cache(objp);
3830 debug_check_no_locks_freed(objp, obj_size(c));
3831 debug_check_no_obj_freed(objp, obj_size(c));
3832 __cache_free(c, (void *)objp);
3833 local_irq_restore(flags);
3834}
3835EXPORT_SYMBOL(kfree);
3836
3837unsigned int kmem_cache_size(struct kmem_cache *cachep)
3838{
3839 return obj_size(cachep);
3840}
3841EXPORT_SYMBOL(kmem_cache_size);
3842
3843const char *kmem_cache_name(struct kmem_cache *cachep)
3844{
3845 return cachep->name;
3846}
3847EXPORT_SYMBOL_GPL(kmem_cache_name);
3848
3849
3850
3851
3852static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3853{
3854 int node;
3855 struct kmem_list3 *l3;
3856 struct array_cache *new_shared;
3857 struct array_cache **new_alien = NULL;
3858
3859 for_each_online_node(node) {
3860
3861 if (use_alien_caches) {
3862 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3863 if (!new_alien)
3864 goto fail;
3865 }
3866
3867 new_shared = NULL;
3868 if (cachep->shared) {
3869 new_shared = alloc_arraycache(node,
3870 cachep->shared*cachep->batchcount,
3871 0xbaadf00d, gfp);
3872 if (!new_shared) {
3873 free_alien_cache(new_alien);
3874 goto fail;
3875 }
3876 }
3877
3878 l3 = cachep->nodelists[node];
3879 if (l3) {
3880 struct array_cache *shared = l3->shared;
3881
3882 spin_lock_irq(&l3->list_lock);
3883
3884 if (shared)
3885 free_block(cachep, shared->entry,
3886 shared->avail, node);
3887
3888 l3->shared = new_shared;
3889 if (!l3->alien) {
3890 l3->alien = new_alien;
3891 new_alien = NULL;
3892 }
3893 l3->free_limit = (1 + nr_cpus_node(node)) *
3894 cachep->batchcount + cachep->num;
3895 spin_unlock_irq(&l3->list_lock);
3896 kfree(shared);
3897 free_alien_cache(new_alien);
3898 continue;
3899 }
3900 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3901 if (!l3) {
3902 free_alien_cache(new_alien);
3903 kfree(new_shared);
3904 goto fail;
3905 }
3906
3907 kmem_list3_init(l3);
3908 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3909 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3910 l3->shared = new_shared;
3911 l3->alien = new_alien;
3912 l3->free_limit = (1 + nr_cpus_node(node)) *
3913 cachep->batchcount + cachep->num;
3914 cachep->nodelists[node] = l3;
3915 }
3916 return 0;
3917
3918fail:
3919 if (!cachep->next.next) {
3920
3921 node--;
3922 while (node >= 0) {
3923 if (cachep->nodelists[node]) {
3924 l3 = cachep->nodelists[node];
3925
3926 kfree(l3->shared);
3927 free_alien_cache(l3->alien);
3928 kfree(l3);
3929 cachep->nodelists[node] = NULL;
3930 }
3931 node--;
3932 }
3933 }
3934 return -ENOMEM;
3935}
3936
3937struct ccupdate_struct {
3938 struct kmem_cache *cachep;
3939 struct array_cache *new[NR_CPUS];
3940};
3941
3942static void do_ccupdate_local(void *info)
3943{
3944 struct ccupdate_struct *new = info;
3945 struct array_cache *old;
3946
3947 check_irq_off();
3948 old = cpu_cache_get(new->cachep);
3949
3950 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3951 new->new[smp_processor_id()] = old;
3952}
3953
3954
3955static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3956 int batchcount, int shared, gfp_t gfp)
3957{
3958 struct ccupdate_struct *new;
3959 int i;
3960
3961 new = kzalloc(sizeof(*new), gfp);
3962 if (!new)
3963 return -ENOMEM;
3964
3965 for_each_online_cpu(i) {
3966 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
3967 batchcount, gfp);
3968 if (!new->new[i]) {
3969 for (i--; i >= 0; i--)
3970 kfree(new->new[i]);
3971 kfree(new);
3972 return -ENOMEM;
3973 }
3974 }
3975 new->cachep = cachep;
3976
3977 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3978
3979 check_irq_on();
3980 cachep->batchcount = batchcount;
3981 cachep->limit = limit;
3982 cachep->shared = shared;
3983
3984 for_each_online_cpu(i) {
3985 struct array_cache *ccold = new->new[i];
3986 if (!ccold)
3987 continue;
3988 spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
3989 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
3990 spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
3991 kfree(ccold);
3992 }
3993 kfree(new);
3994 return alloc_kmemlist(cachep, gfp);
3995}
3996
3997
3998static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3999{
4000 int err;
4001 int limit, shared;
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012 if (cachep->buffer_size > 131072)
4013 limit = 1;
4014 else if (cachep->buffer_size > PAGE_SIZE)
4015 limit = 8;
4016 else if (cachep->buffer_size > 1024)
4017 limit = 24;
4018 else if (cachep->buffer_size > 256)
4019 limit = 54;
4020 else
4021 limit = 120;
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032 shared = 0;
4033 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
4034 shared = 8;
4035
4036#if DEBUG
4037
4038
4039
4040
4041 if (limit > 32)
4042 limit = 32;
4043#endif
4044 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
4045 if (err)
4046 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4047 cachep->name, -err);
4048 return err;
4049}
4050
4051
4052
4053
4054
4055
4056static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4057 struct array_cache *ac, int force, int node)
4058{
4059 int tofree;
4060
4061 if (!ac || !ac->avail)
4062 return;
4063 if (ac->touched && !force) {
4064 ac->touched = 0;
4065 } else {
4066 spin_lock_irq(&l3->list_lock);
4067 if (ac->avail) {
4068 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4069 if (tofree > ac->avail)
4070 tofree = (ac->avail + 1) / 2;
4071 free_block(cachep, ac->entry, tofree, node);
4072 ac->avail -= tofree;
4073 memmove(ac->entry, &(ac->entry[tofree]),
4074 sizeof(void *) * ac->avail);
4075 }
4076 spin_unlock_irq(&l3->list_lock);
4077 }
4078}
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092static void cache_reap(struct work_struct *w)
4093{
4094 struct kmem_cache *searchp;
4095 struct kmem_list3 *l3;
4096 int node = numa_mem_id();
4097 struct delayed_work *work = to_delayed_work(w);
4098
4099 if (!mutex_trylock(&cache_chain_mutex))
4100
4101 goto out;
4102
4103 list_for_each_entry(searchp, &cache_chain, next) {
4104 check_irq_on();
4105
4106
4107
4108
4109
4110
4111 l3 = searchp->nodelists[node];
4112
4113 reap_alien(searchp, l3);
4114
4115 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4116
4117
4118
4119
4120
4121 if (time_after(l3->next_reap, jiffies))
4122 goto next;
4123
4124 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4125
4126 drain_array(searchp, l3, l3->shared, 0, node);
4127
4128 if (l3->free_touched)
4129 l3->free_touched = 0;
4130 else {
4131 int freed;
4132
4133 freed = drain_freelist(searchp, l3, (l3->free_limit +
4134 5 * searchp->num - 1) / (5 * searchp->num));
4135 STATS_ADD_REAPED(searchp, freed);
4136 }
4137next:
4138 cond_resched();
4139 }
4140 check_irq_on();
4141 mutex_unlock(&cache_chain_mutex);
4142 next_reap_node();
4143out:
4144
4145 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4146}
4147
4148#ifdef CONFIG_SLABINFO
4149
4150static void print_slabinfo_header(struct seq_file *m)
4151{
4152
4153
4154
4155
4156#if STATS
4157 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4158#else
4159 seq_puts(m, "slabinfo - version: 2.1\n");
4160#endif
4161 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4162 "<objperslab> <pagesperslab>");
4163 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4164 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4165#if STATS
4166 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4167 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4168 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4169#endif
4170 seq_putc(m, '\n');
4171}
4172
4173static void *s_start(struct seq_file *m, loff_t *pos)
4174{
4175 loff_t n = *pos;
4176
4177 mutex_lock(&cache_chain_mutex);
4178 if (!n)
4179 print_slabinfo_header(m);
4180
4181 return seq_list_start(&cache_chain, *pos);
4182}
4183
4184static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4185{
4186 return seq_list_next(p, &cache_chain, pos);
4187}
4188
4189static void s_stop(struct seq_file *m, void *p)
4190{
4191 mutex_unlock(&cache_chain_mutex);
4192}
4193
4194static int s_show(struct seq_file *m, void *p)
4195{
4196 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4197 struct slab *slabp;
4198 unsigned long active_objs;
4199 unsigned long num_objs;
4200 unsigned long active_slabs = 0;
4201 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4202 const char *name;
4203 char *error = NULL;
4204 int node;
4205 struct kmem_list3 *l3;
4206
4207 active_objs = 0;
4208 num_slabs = 0;
4209 for_each_online_node(node) {
4210 l3 = cachep->nodelists[node];
4211 if (!l3)
4212 continue;
4213
4214 check_irq_on();
4215 spin_lock_irq(&l3->list_lock);
4216
4217 list_for_each_entry(slabp, &l3->slabs_full, list) {
4218 if (slabp->inuse != cachep->num && !error)
4219 error = "slabs_full accounting error";
4220 active_objs += cachep->num;
4221 active_slabs++;
4222 }
4223 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4224 if (slabp->inuse == cachep->num && !error)
4225 error = "slabs_partial inuse accounting error";
4226 if (!slabp->inuse && !error)
4227 error = "slabs_partial/inuse accounting error";
4228 active_objs += slabp->inuse;
4229 active_slabs++;
4230 }
4231 list_for_each_entry(slabp, &l3->slabs_free, list) {
4232 if (slabp->inuse && !error)
4233 error = "slabs_free/inuse accounting error";
4234 num_slabs++;
4235 }
4236 free_objects += l3->free_objects;
4237 if (l3->shared)
4238 shared_avail += l3->shared->avail;
4239
4240 spin_unlock_irq(&l3->list_lock);
4241 }
4242 num_slabs += active_slabs;
4243 num_objs = num_slabs * cachep->num;
4244 if (num_objs - active_objs != free_objects && !error)
4245 error = "free_objects accounting error";
4246
4247 name = cachep->name;
4248 if (error)
4249 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4250
4251 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4252 name, active_objs, num_objs, cachep->buffer_size,
4253 cachep->num, (1 << cachep->gfporder));
4254 seq_printf(m, " : tunables %4u %4u %4u",
4255 cachep->limit, cachep->batchcount, cachep->shared);
4256 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4257 active_slabs, num_slabs, shared_avail);
4258#if STATS
4259 {
4260 unsigned long high = cachep->high_mark;
4261 unsigned long allocs = cachep->num_allocations;
4262 unsigned long grown = cachep->grown;
4263 unsigned long reaped = cachep->reaped;
4264 unsigned long errors = cachep->errors;
4265 unsigned long max_freeable = cachep->max_freeable;
4266 unsigned long node_allocs = cachep->node_allocs;
4267 unsigned long node_frees = cachep->node_frees;
4268 unsigned long overflows = cachep->node_overflow;
4269
4270 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4271 "%4lu %4lu %4lu %4lu %4lu",
4272 allocs, high, grown,
4273 reaped, errors, max_freeable, node_allocs,
4274 node_frees, overflows);
4275 }
4276
4277 {
4278 unsigned long allochit = atomic_read(&cachep->allochit);
4279 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4280 unsigned long freehit = atomic_read(&cachep->freehit);
4281 unsigned long freemiss = atomic_read(&cachep->freemiss);
4282
4283 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4284 allochit, allocmiss, freehit, freemiss);
4285 }
4286#endif
4287 seq_putc(m, '\n');
4288 return 0;
4289}
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305static const struct seq_operations slabinfo_op = {
4306 .start = s_start,
4307 .next = s_next,
4308 .stop = s_stop,
4309 .show = s_show,
4310};
4311
4312#define MAX_SLABINFO_WRITE 128
4313
4314
4315
4316
4317
4318
4319
4320static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4321 size_t count, loff_t *ppos)
4322{
4323 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4324 int limit, batchcount, shared, res;
4325 struct kmem_cache *cachep;
4326
4327 if (count > MAX_SLABINFO_WRITE)
4328 return -EINVAL;
4329 if (copy_from_user(&kbuf, buffer, count))
4330 return -EFAULT;
4331 kbuf[MAX_SLABINFO_WRITE] = '\0';
4332
4333 tmp = strchr(kbuf, ' ');
4334 if (!tmp)
4335 return -EINVAL;
4336 *tmp = '\0';
4337 tmp++;
4338 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4339 return -EINVAL;
4340
4341
4342 mutex_lock(&cache_chain_mutex);
4343 res = -EINVAL;
4344 list_for_each_entry(cachep, &cache_chain, next) {
4345 if (!strcmp(cachep->name, kbuf)) {
4346 if (limit < 1 || batchcount < 1 ||
4347 batchcount > limit || shared < 0) {
4348 res = 0;
4349 } else {
4350 res = do_tune_cpucache(cachep, limit,
4351 batchcount, shared,
4352 GFP_KERNEL);
4353 }
4354 break;
4355 }
4356 }
4357 mutex_unlock(&cache_chain_mutex);
4358 if (res >= 0)
4359 res = count;
4360 return res;
4361}
4362
4363static int slabinfo_open(struct inode *inode, struct file *file)
4364{
4365 return seq_open(file, &slabinfo_op);
4366}
4367
4368static const struct file_operations proc_slabinfo_operations = {
4369 .open = slabinfo_open,
4370 .read = seq_read,
4371 .write = slabinfo_write,
4372 .llseek = seq_lseek,
4373 .release = seq_release,
4374};
4375
4376#ifdef CONFIG_DEBUG_SLAB_LEAK
4377
4378static void *leaks_start(struct seq_file *m, loff_t *pos)
4379{
4380 mutex_lock(&cache_chain_mutex);
4381 return seq_list_start(&cache_chain, *pos);
4382}
4383
4384static inline int add_caller(unsigned long *n, unsigned long v)
4385{
4386 unsigned long *p;
4387 int l;
4388 if (!v)
4389 return 1;
4390 l = n[1];
4391 p = n + 2;
4392 while (l) {
4393 int i = l/2;
4394 unsigned long *q = p + 2 * i;
4395 if (*q == v) {
4396 q[1]++;
4397 return 1;
4398 }
4399 if (*q > v) {
4400 l = i;
4401 } else {
4402 p = q + 2;
4403 l -= i + 1;
4404 }
4405 }
4406 if (++n[1] == n[0])
4407 return 0;
4408 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4409 p[0] = v;
4410 p[1] = 1;
4411 return 1;
4412}
4413
4414static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4415{
4416 void *p;
4417 int i;
4418 if (n[0] == n[1])
4419 return;
4420 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4421 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4422 continue;
4423 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4424 return;
4425 }
4426}
4427
4428static void show_symbol(struct seq_file *m, unsigned long address)
4429{
4430#ifdef CONFIG_KALLSYMS
4431 unsigned long offset, size;
4432 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4433
4434 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4435 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4436 if (modname[0])
4437 seq_printf(m, " [%s]", modname);
4438 return;
4439 }
4440#endif
4441 seq_printf(m, "%p", (void *)address);
4442}
4443
4444static int leaks_show(struct seq_file *m, void *p)
4445{
4446 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4447 struct slab *slabp;
4448 struct kmem_list3 *l3;
4449 const char *name;
4450 unsigned long *n = m->private;
4451 int node;
4452 int i;
4453
4454 if (!(cachep->flags & SLAB_STORE_USER))
4455 return 0;
4456 if (!(cachep->flags & SLAB_RED_ZONE))
4457 return 0;
4458
4459
4460
4461 n[1] = 0;
4462
4463 for_each_online_node(node) {
4464 l3 = cachep->nodelists[node];
4465 if (!l3)
4466 continue;
4467
4468 check_irq_on();
4469 spin_lock_irq(&l3->list_lock);
4470
4471 list_for_each_entry(slabp, &l3->slabs_full, list)
4472 handle_slab(n, cachep, slabp);
4473 list_for_each_entry(slabp, &l3->slabs_partial, list)
4474 handle_slab(n, cachep, slabp);
4475 spin_unlock_irq(&l3->list_lock);
4476 }
4477 name = cachep->name;
4478 if (n[0] == n[1]) {
4479
4480 mutex_unlock(&cache_chain_mutex);
4481 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4482 if (!m->private) {
4483
4484 m->private = n;
4485 mutex_lock(&cache_chain_mutex);
4486 return -ENOMEM;
4487 }
4488 *(unsigned long *)m->private = n[0] * 2;
4489 kfree(n);
4490 mutex_lock(&cache_chain_mutex);
4491
4492 m->count = m->size;
4493 return 0;
4494 }
4495 for (i = 0; i < n[1]; i++) {
4496 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4497 show_symbol(m, n[2*i+2]);
4498 seq_putc(m, '\n');
4499 }
4500
4501 return 0;
4502}
4503
4504static const struct seq_operations slabstats_op = {
4505 .start = leaks_start,
4506 .next = s_next,
4507 .stop = s_stop,
4508 .show = leaks_show,
4509};
4510
4511static int slabstats_open(struct inode *inode, struct file *file)
4512{
4513 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4514 int ret = -ENOMEM;
4515 if (n) {
4516 ret = seq_open(file, &slabstats_op);
4517 if (!ret) {
4518 struct seq_file *m = file->private_data;
4519 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4520 m->private = n;
4521 n = NULL;
4522 }
4523 kfree(n);
4524 }
4525 return ret;
4526}
4527
4528static const struct file_operations proc_slabstats_operations = {
4529 .open = slabstats_open,
4530 .read = seq_read,
4531 .llseek = seq_lseek,
4532 .release = seq_release_private,
4533};
4534#endif
4535
4536static int __init slab_proc_init(void)
4537{
4538 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4539#ifdef CONFIG_DEBUG_SLAB_LEAK
4540 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4541#endif
4542 return 0;
4543}
4544module_init(slab_proc_init);
4545#endif
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559size_t ksize(const void *objp)
4560{
4561 BUG_ON(!objp);
4562 if (unlikely(objp == ZERO_SIZE_PTR))
4563 return 0;
4564
4565 return obj_size(virt_to_cache(objp));
4566}
4567EXPORT_SYMBOL(ksize);
4568