1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/rcupdate.h>
106#include <linux/string.h>
107#include <linux/uaccess.h>
108#include <linux/nodemask.h>
109#include <linux/kmemleak.h>
110#include <linux/mempolicy.h>
111#include <linux/mutex.h>
112#include <linux/fault-inject.h>
113#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h>
116#include <linux/kmemcheck.h>
117#include <linux/memory.h>
118#include <linux/prefetch.h>
119
120#include <asm/cacheflush.h>
121#include <asm/tlbflush.h>
122#include <asm/page.h>
123
124
125
126
127
128
129
130
131
132
133
134#ifdef CONFIG_DEBUG_SLAB
135#define DEBUG 1
136#define STATS 1
137#define FORCED_DEBUG 1
138#else
139#define DEBUG 0
140#define STATS 0
141#define FORCED_DEBUG 0
142#endif
143
144
145#define BYTES_PER_WORD sizeof(void *)
146#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
147
148#ifndef ARCH_KMALLOC_FLAGS
149#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
150#endif
151
152
153#if DEBUG
154# define CREATE_MASK (SLAB_RED_ZONE | \
155 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
156 SLAB_CACHE_DMA | \
157 SLAB_STORE_USER | \
158 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
159 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
160 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
161#else
162# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
163 SLAB_CACHE_DMA | \
164 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
165 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
166 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
167#endif
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188typedef unsigned int kmem_bufctl_t;
189#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
190#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
191#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
192#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208struct slab_rcu {
209 struct rcu_head head;
210 struct kmem_cache *cachep;
211 void *addr;
212};
213
214
215
216
217
218
219
220
221struct slab {
222 union {
223 struct {
224 struct list_head list;
225 unsigned long colouroff;
226 void *s_mem;
227 unsigned int inuse;
228 kmem_bufctl_t free;
229 unsigned short nodeid;
230 };
231 struct slab_rcu __slab_cover_slab_rcu;
232 };
233};
234
235
236
237
238
239
240
241
242
243
244
245
246
247struct array_cache {
248 unsigned int avail;
249 unsigned int limit;
250 unsigned int batchcount;
251 unsigned int touched;
252 spinlock_t lock;
253 void *entry[];
254
255
256
257
258};
259
260
261
262
263
264#define BOOT_CPUCACHE_ENTRIES 1
265struct arraycache_init {
266 struct array_cache cache;
267 void *entries[BOOT_CPUCACHE_ENTRIES];
268};
269
270
271
272
273struct kmem_list3 {
274 struct list_head slabs_partial;
275 struct list_head slabs_full;
276 struct list_head slabs_free;
277 unsigned long free_objects;
278 unsigned int free_limit;
279 unsigned int colour_next;
280 spinlock_t list_lock;
281 struct array_cache *shared;
282 struct array_cache **alien;
283 unsigned long next_reap;
284 int free_touched;
285};
286
287
288
289
290#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
291static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
292#define CACHE_CACHE 0
293#define SIZE_AC MAX_NUMNODES
294#define SIZE_L3 (2 * MAX_NUMNODES)
295
296static int drain_freelist(struct kmem_cache *cache,
297 struct kmem_list3 *l3, int tofree);
298static void free_block(struct kmem_cache *cachep, void **objpp, int len,
299 int node);
300static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
301static void cache_reap(struct work_struct *unused);
302
303
304
305
306
307static __always_inline int index_of(const size_t size)
308{
309 extern void __bad_size(void);
310
311 if (__builtin_constant_p(size)) {
312 int i = 0;
313
314#define CACHE(x) \
315 if (size <=x) \
316 return i; \
317 else \
318 i++;
319#include <linux/kmalloc_sizes.h>
320#undef CACHE
321 __bad_size();
322 } else
323 __bad_size();
324 return 0;
325}
326
327static int slab_early_init = 1;
328
329#define INDEX_AC index_of(sizeof(struct arraycache_init))
330#define INDEX_L3 index_of(sizeof(struct kmem_list3))
331
332static void kmem_list3_init(struct kmem_list3 *parent)
333{
334 INIT_LIST_HEAD(&parent->slabs_full);
335 INIT_LIST_HEAD(&parent->slabs_partial);
336 INIT_LIST_HEAD(&parent->slabs_free);
337 parent->shared = NULL;
338 parent->alien = NULL;
339 parent->colour_next = 0;
340 spin_lock_init(&parent->list_lock);
341 parent->free_objects = 0;
342 parent->free_touched = 0;
343}
344
345#define MAKE_LIST(cachep, listp, slab, nodeid) \
346 do { \
347 INIT_LIST_HEAD(listp); \
348 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
349 } while (0)
350
351#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
352 do { \
353 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
354 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
355 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
356 } while (0)
357
358#define CFLGS_OFF_SLAB (0x80000000UL)
359#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
360
361#define BATCHREFILL_LIMIT 16
362
363
364
365
366
367
368
369#define REAPTIMEOUT_CPUC (2*HZ)
370#define REAPTIMEOUT_LIST3 (4*HZ)
371
372#if STATS
373#define STATS_INC_ACTIVE(x) ((x)->num_active++)
374#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
375#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
376#define STATS_INC_GROWN(x) ((x)->grown++)
377#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
378#define STATS_SET_HIGH(x) \
379 do { \
380 if ((x)->num_active > (x)->high_mark) \
381 (x)->high_mark = (x)->num_active; \
382 } while (0)
383#define STATS_INC_ERR(x) ((x)->errors++)
384#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
385#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
386#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
387#define STATS_SET_FREEABLE(x, i) \
388 do { \
389 if ((x)->max_freeable < i) \
390 (x)->max_freeable = i; \
391 } while (0)
392#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
393#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
394#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
395#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
396#else
397#define STATS_INC_ACTIVE(x) do { } while (0)
398#define STATS_DEC_ACTIVE(x) do { } while (0)
399#define STATS_INC_ALLOCED(x) do { } while (0)
400#define STATS_INC_GROWN(x) do { } while (0)
401#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
402#define STATS_SET_HIGH(x) do { } while (0)
403#define STATS_INC_ERR(x) do { } while (0)
404#define STATS_INC_NODEALLOCS(x) do { } while (0)
405#define STATS_INC_NODEFREES(x) do { } while (0)
406#define STATS_INC_ACOVERFLOW(x) do { } while (0)
407#define STATS_SET_FREEABLE(x, i) do { } while (0)
408#define STATS_INC_ALLOCHIT(x) do { } while (0)
409#define STATS_INC_ALLOCMISS(x) do { } while (0)
410#define STATS_INC_FREEHIT(x) do { } while (0)
411#define STATS_INC_FREEMISS(x) do { } while (0)
412#endif
413
414#if DEBUG
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429static int obj_offset(struct kmem_cache *cachep)
430{
431 return cachep->obj_offset;
432}
433
434static int obj_size(struct kmem_cache *cachep)
435{
436 return cachep->obj_size;
437}
438
439static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
440{
441 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
442 return (unsigned long long*) (objp + obj_offset(cachep) -
443 sizeof(unsigned long long));
444}
445
446static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
447{
448 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
449 if (cachep->flags & SLAB_STORE_USER)
450 return (unsigned long long *)(objp + cachep->buffer_size -
451 sizeof(unsigned long long) -
452 REDZONE_ALIGN);
453 return (unsigned long long *) (objp + cachep->buffer_size -
454 sizeof(unsigned long long));
455}
456
457static void **dbg_userword(struct kmem_cache *cachep, void *objp)
458{
459 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
460 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
461}
462
463#else
464
465#define obj_offset(x) 0
466#define obj_size(cachep) (cachep->buffer_size)
467#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
468#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
469#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
470
471#endif
472
473#ifdef CONFIG_TRACING
474size_t slab_buffer_size(struct kmem_cache *cachep)
475{
476 return cachep->buffer_size;
477}
478EXPORT_SYMBOL(slab_buffer_size);
479#endif
480
481
482
483
484#define BREAK_GFP_ORDER_HI 1
485#define BREAK_GFP_ORDER_LO 0
486static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
487
488
489
490
491
492
493static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
494{
495 page->lru.next = (struct list_head *)cache;
496}
497
498static inline struct kmem_cache *page_get_cache(struct page *page)
499{
500 page = compound_head(page);
501 BUG_ON(!PageSlab(page));
502 return (struct kmem_cache *)page->lru.next;
503}
504
505static inline void page_set_slab(struct page *page, struct slab *slab)
506{
507 page->lru.prev = (struct list_head *)slab;
508}
509
510static inline struct slab *page_get_slab(struct page *page)
511{
512 BUG_ON(!PageSlab(page));
513 return (struct slab *)page->lru.prev;
514}
515
516static inline struct kmem_cache *virt_to_cache(const void *obj)
517{
518 struct page *page = virt_to_head_page(obj);
519 return page_get_cache(page);
520}
521
522static inline struct slab *virt_to_slab(const void *obj)
523{
524 struct page *page = virt_to_head_page(obj);
525 return page_get_slab(page);
526}
527
528static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
529 unsigned int idx)
530{
531 return slab->s_mem + cache->buffer_size * idx;
532}
533
534
535
536
537
538
539
540static inline unsigned int obj_to_index(const struct kmem_cache *cache,
541 const struct slab *slab, void *obj)
542{
543 u32 offset = (obj - slab->s_mem);
544 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
545}
546
547
548
549
550struct cache_sizes malloc_sizes[] = {
551#define CACHE(x) { .cs_size = (x) },
552#include <linux/kmalloc_sizes.h>
553 CACHE(ULONG_MAX)
554#undef CACHE
555};
556EXPORT_SYMBOL(malloc_sizes);
557
558
559struct cache_names {
560 char *name;
561 char *name_dma;
562};
563
564static struct cache_names __initdata cache_names[] = {
565#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
566#include <linux/kmalloc_sizes.h>
567 {NULL,}
568#undef CACHE
569};
570
571static struct arraycache_init initarray_cache __initdata =
572 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
573static struct arraycache_init initarray_generic =
574 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
575
576
577static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
578static struct kmem_cache cache_cache = {
579 .nodelists = cache_cache_nodelists,
580 .batchcount = 1,
581 .limit = BOOT_CPUCACHE_ENTRIES,
582 .shared = 1,
583 .buffer_size = sizeof(struct kmem_cache),
584 .name = "kmem_cache",
585};
586
587#define BAD_ALIEN_MAGIC 0x01020304ul
588
589
590
591
592
593static enum {
594 NONE,
595 PARTIAL_AC,
596 PARTIAL_L3,
597 EARLY,
598 LATE,
599 FULL
600} g_cpucache_up;
601
602
603
604
605int slab_is_available(void)
606{
607 return g_cpucache_up >= EARLY;
608}
609
610#ifdef CONFIG_LOCKDEP
611
612
613
614
615
616
617
618
619
620
621
622
623static struct lock_class_key on_slab_l3_key;
624static struct lock_class_key on_slab_alc_key;
625
626static struct lock_class_key debugobj_l3_key;
627static struct lock_class_key debugobj_alc_key;
628
629static void slab_set_lock_classes(struct kmem_cache *cachep,
630 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
631 int q)
632{
633 struct array_cache **alc;
634 struct kmem_list3 *l3;
635 int r;
636
637 l3 = cachep->nodelists[q];
638 if (!l3)
639 return;
640
641 lockdep_set_class(&l3->list_lock, l3_key);
642 alc = l3->alien;
643
644
645
646
647
648
649
650 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
651 return;
652 for_each_node(r) {
653 if (alc[r])
654 lockdep_set_class(&alc[r]->lock, alc_key);
655 }
656}
657
658static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
659{
660 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
661}
662
663static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
664{
665 int node;
666
667 for_each_online_node(node)
668 slab_set_debugobj_lock_classes_node(cachep, node);
669}
670
671static void init_node_lock_keys(int q)
672{
673 struct cache_sizes *s = malloc_sizes;
674
675 if (g_cpucache_up < LATE)
676 return;
677
678 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
679 struct kmem_list3 *l3;
680
681 l3 = s->cs_cachep->nodelists[q];
682 if (!l3 || OFF_SLAB(s->cs_cachep))
683 continue;
684
685 slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key,
686 &on_slab_alc_key, q);
687 }
688}
689
690static inline void init_lock_keys(void)
691{
692 int node;
693
694 for_each_node(node)
695 init_node_lock_keys(node);
696}
697#else
698static void init_node_lock_keys(int q)
699{
700}
701
702static inline void init_lock_keys(void)
703{
704}
705
706static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
707{
708}
709
710static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
711{
712}
713#endif
714
715
716
717
718static DEFINE_MUTEX(cache_chain_mutex);
719static struct list_head cache_chain;
720
721static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
722
723static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
724{
725 return cachep->array[smp_processor_id()];
726}
727
728static inline struct kmem_cache *__find_general_cachep(size_t size,
729 gfp_t gfpflags)
730{
731 struct cache_sizes *csizep = malloc_sizes;
732
733#if DEBUG
734
735
736
737
738 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
739#endif
740 if (!size)
741 return ZERO_SIZE_PTR;
742
743 while (size > csizep->cs_size)
744 csizep++;
745
746
747
748
749
750
751#ifdef CONFIG_ZONE_DMA
752 if (unlikely(gfpflags & GFP_DMA))
753 return csizep->cs_dmacachep;
754#endif
755 return csizep->cs_cachep;
756}
757
758static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
759{
760 return __find_general_cachep(size, gfpflags);
761}
762
763static size_t slab_mgmt_size(size_t nr_objs, size_t align)
764{
765 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
766}
767
768
769
770
771static void cache_estimate(unsigned long gfporder, size_t buffer_size,
772 size_t align, int flags, size_t *left_over,
773 unsigned int *num)
774{
775 int nr_objs;
776 size_t mgmt_size;
777 size_t slab_size = PAGE_SIZE << gfporder;
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794 if (flags & CFLGS_OFF_SLAB) {
795 mgmt_size = 0;
796 nr_objs = slab_size / buffer_size;
797
798 if (nr_objs > SLAB_LIMIT)
799 nr_objs = SLAB_LIMIT;
800 } else {
801
802
803
804
805
806
807
808
809 nr_objs = (slab_size - sizeof(struct slab)) /
810 (buffer_size + sizeof(kmem_bufctl_t));
811
812
813
814
815
816 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
817 > slab_size)
818 nr_objs--;
819
820 if (nr_objs > SLAB_LIMIT)
821 nr_objs = SLAB_LIMIT;
822
823 mgmt_size = slab_mgmt_size(nr_objs, align);
824 }
825 *num = nr_objs;
826 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
827}
828
829#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
830
831static void __slab_error(const char *function, struct kmem_cache *cachep,
832 char *msg)
833{
834 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
835 function, cachep->name, msg);
836 dump_stack();
837}
838
839
840
841
842
843
844
845
846
847static int use_alien_caches __read_mostly = 1;
848static int __init noaliencache_setup(char *s)
849{
850 use_alien_caches = 0;
851 return 1;
852}
853__setup("noaliencache", noaliencache_setup);
854
855#ifdef CONFIG_NUMA
856
857
858
859
860
861
862static DEFINE_PER_CPU(unsigned long, slab_reap_node);
863
864static void init_reap_node(int cpu)
865{
866 int node;
867
868 node = next_node(cpu_to_mem(cpu), node_online_map);
869 if (node == MAX_NUMNODES)
870 node = first_node(node_online_map);
871
872 per_cpu(slab_reap_node, cpu) = node;
873}
874
875static void next_reap_node(void)
876{
877 int node = __this_cpu_read(slab_reap_node);
878
879 node = next_node(node, node_online_map);
880 if (unlikely(node >= MAX_NUMNODES))
881 node = first_node(node_online_map);
882 __this_cpu_write(slab_reap_node, node);
883}
884
885#else
886#define init_reap_node(cpu) do { } while (0)
887#define next_reap_node(void) do { } while (0)
888#endif
889
890
891
892
893
894
895
896
897static void __cpuinit start_cpu_timer(int cpu)
898{
899 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
900
901
902
903
904
905
906 if (keventd_up() && reap_work->work.func == NULL) {
907 init_reap_node(cpu);
908 INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
909 schedule_delayed_work_on(cpu, reap_work,
910 __round_jiffies_relative(HZ, cpu));
911 }
912}
913
914static struct array_cache *alloc_arraycache(int node, int entries,
915 int batchcount, gfp_t gfp)
916{
917 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
918 struct array_cache *nc = NULL;
919
920 nc = kmalloc_node(memsize, gfp, node);
921
922
923
924
925
926
927
928 kmemleak_no_scan(nc);
929 if (nc) {
930 nc->avail = 0;
931 nc->limit = entries;
932 nc->batchcount = batchcount;
933 nc->touched = 0;
934 spin_lock_init(&nc->lock);
935 }
936 return nc;
937}
938
939
940
941
942
943
944
945static int transfer_objects(struct array_cache *to,
946 struct array_cache *from, unsigned int max)
947{
948
949 int nr = min3(from->avail, max, to->limit - to->avail);
950
951 if (!nr)
952 return 0;
953
954 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
955 sizeof(void *) *nr);
956
957 from->avail -= nr;
958 to->avail += nr;
959 return nr;
960}
961
962#ifndef CONFIG_NUMA
963
964#define drain_alien_cache(cachep, alien) do { } while (0)
965#define reap_alien(cachep, l3) do { } while (0)
966
967static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
968{
969 return (struct array_cache **)BAD_ALIEN_MAGIC;
970}
971
972static inline void free_alien_cache(struct array_cache **ac_ptr)
973{
974}
975
976static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
977{
978 return 0;
979}
980
981static inline void *alternate_node_alloc(struct kmem_cache *cachep,
982 gfp_t flags)
983{
984 return NULL;
985}
986
987static inline void *____cache_alloc_node(struct kmem_cache *cachep,
988 gfp_t flags, int nodeid)
989{
990 return NULL;
991}
992
993#else
994
995static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
996static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
997
998static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
999{
1000 struct array_cache **ac_ptr;
1001 int memsize = sizeof(void *) * nr_node_ids;
1002 int i;
1003
1004 if (limit > 1)
1005 limit = 12;
1006 ac_ptr = kzalloc_node(memsize, gfp, node);
1007 if (ac_ptr) {
1008 for_each_node(i) {
1009 if (i == node || !node_online(i))
1010 continue;
1011 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
1012 if (!ac_ptr[i]) {
1013 for (i--; i >= 0; i--)
1014 kfree(ac_ptr[i]);
1015 kfree(ac_ptr);
1016 return NULL;
1017 }
1018 }
1019 }
1020 return ac_ptr;
1021}
1022
1023static void free_alien_cache(struct array_cache **ac_ptr)
1024{
1025 int i;
1026
1027 if (!ac_ptr)
1028 return;
1029 for_each_node(i)
1030 kfree(ac_ptr[i]);
1031 kfree(ac_ptr);
1032}
1033
1034static void __drain_alien_cache(struct kmem_cache *cachep,
1035 struct array_cache *ac, int node)
1036{
1037 struct kmem_list3 *rl3 = cachep->nodelists[node];
1038
1039 if (ac->avail) {
1040 spin_lock(&rl3->list_lock);
1041
1042
1043
1044
1045
1046 if (rl3->shared)
1047 transfer_objects(rl3->shared, ac, ac->limit);
1048
1049 free_block(cachep, ac->entry, ac->avail, node);
1050 ac->avail = 0;
1051 spin_unlock(&rl3->list_lock);
1052 }
1053}
1054
1055
1056
1057
1058static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1059{
1060 int node = __this_cpu_read(slab_reap_node);
1061
1062 if (l3->alien) {
1063 struct array_cache *ac = l3->alien[node];
1064
1065 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1066 __drain_alien_cache(cachep, ac, node);
1067 spin_unlock_irq(&ac->lock);
1068 }
1069 }
1070}
1071
1072static void drain_alien_cache(struct kmem_cache *cachep,
1073 struct array_cache **alien)
1074{
1075 int i = 0;
1076 struct array_cache *ac;
1077 unsigned long flags;
1078
1079 for_each_online_node(i) {
1080 ac = alien[i];
1081 if (ac) {
1082 spin_lock_irqsave(&ac->lock, flags);
1083 __drain_alien_cache(cachep, ac, i);
1084 spin_unlock_irqrestore(&ac->lock, flags);
1085 }
1086 }
1087}
1088
1089static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1090{
1091 struct slab *slabp = virt_to_slab(objp);
1092 int nodeid = slabp->nodeid;
1093 struct kmem_list3 *l3;
1094 struct array_cache *alien = NULL;
1095 int node;
1096
1097 node = numa_mem_id();
1098
1099
1100
1101
1102
1103 if (likely(slabp->nodeid == node))
1104 return 0;
1105
1106 l3 = cachep->nodelists[node];
1107 STATS_INC_NODEFREES(cachep);
1108 if (l3->alien && l3->alien[nodeid]) {
1109 alien = l3->alien[nodeid];
1110 spin_lock(&alien->lock);
1111 if (unlikely(alien->avail == alien->limit)) {
1112 STATS_INC_ACOVERFLOW(cachep);
1113 __drain_alien_cache(cachep, alien, nodeid);
1114 }
1115 alien->entry[alien->avail++] = objp;
1116 spin_unlock(&alien->lock);
1117 } else {
1118 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1119 free_block(cachep, &objp, 1, nodeid);
1120 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1121 }
1122 return 1;
1123}
1124#endif
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135static int init_cache_nodelists_node(int node)
1136{
1137 struct kmem_cache *cachep;
1138 struct kmem_list3 *l3;
1139 const int memsize = sizeof(struct kmem_list3);
1140
1141 list_for_each_entry(cachep, &cache_chain, next) {
1142
1143
1144
1145
1146
1147 if (!cachep->nodelists[node]) {
1148 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1149 if (!l3)
1150 return -ENOMEM;
1151 kmem_list3_init(l3);
1152 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1153 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1154
1155
1156
1157
1158
1159
1160 cachep->nodelists[node] = l3;
1161 }
1162
1163 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1164 cachep->nodelists[node]->free_limit =
1165 (1 + nr_cpus_node(node)) *
1166 cachep->batchcount + cachep->num;
1167 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1168 }
1169 return 0;
1170}
1171
1172static void __cpuinit cpuup_canceled(long cpu)
1173{
1174 struct kmem_cache *cachep;
1175 struct kmem_list3 *l3 = NULL;
1176 int node = cpu_to_mem(cpu);
1177 const struct cpumask *mask = cpumask_of_node(node);
1178
1179 list_for_each_entry(cachep, &cache_chain, next) {
1180 struct array_cache *nc;
1181 struct array_cache *shared;
1182 struct array_cache **alien;
1183
1184
1185 nc = cachep->array[cpu];
1186 cachep->array[cpu] = NULL;
1187 l3 = cachep->nodelists[node];
1188
1189 if (!l3)
1190 goto free_array_cache;
1191
1192 spin_lock_irq(&l3->list_lock);
1193
1194
1195 l3->free_limit -= cachep->batchcount;
1196 if (nc)
1197 free_block(cachep, nc->entry, nc->avail, node);
1198
1199 if (!cpumask_empty(mask)) {
1200 spin_unlock_irq(&l3->list_lock);
1201 goto free_array_cache;
1202 }
1203
1204 shared = l3->shared;
1205 if (shared) {
1206 free_block(cachep, shared->entry,
1207 shared->avail, node);
1208 l3->shared = NULL;
1209 }
1210
1211 alien = l3->alien;
1212 l3->alien = NULL;
1213
1214 spin_unlock_irq(&l3->list_lock);
1215
1216 kfree(shared);
1217 if (alien) {
1218 drain_alien_cache(cachep, alien);
1219 free_alien_cache(alien);
1220 }
1221free_array_cache:
1222 kfree(nc);
1223 }
1224
1225
1226
1227
1228
1229 list_for_each_entry(cachep, &cache_chain, next) {
1230 l3 = cachep->nodelists[node];
1231 if (!l3)
1232 continue;
1233 drain_freelist(cachep, l3, l3->free_objects);
1234 }
1235}
1236
1237static int __cpuinit cpuup_prepare(long cpu)
1238{
1239 struct kmem_cache *cachep;
1240 struct kmem_list3 *l3 = NULL;
1241 int node = cpu_to_mem(cpu);
1242 int err;
1243
1244
1245
1246
1247
1248
1249
1250 err = init_cache_nodelists_node(node);
1251 if (err < 0)
1252 goto bad;
1253
1254
1255
1256
1257
1258 list_for_each_entry(cachep, &cache_chain, next) {
1259 struct array_cache *nc;
1260 struct array_cache *shared = NULL;
1261 struct array_cache **alien = NULL;
1262
1263 nc = alloc_arraycache(node, cachep->limit,
1264 cachep->batchcount, GFP_KERNEL);
1265 if (!nc)
1266 goto bad;
1267 if (cachep->shared) {
1268 shared = alloc_arraycache(node,
1269 cachep->shared * cachep->batchcount,
1270 0xbaadf00d, GFP_KERNEL);
1271 if (!shared) {
1272 kfree(nc);
1273 goto bad;
1274 }
1275 }
1276 if (use_alien_caches) {
1277 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1278 if (!alien) {
1279 kfree(shared);
1280 kfree(nc);
1281 goto bad;
1282 }
1283 }
1284 cachep->array[cpu] = nc;
1285 l3 = cachep->nodelists[node];
1286 BUG_ON(!l3);
1287
1288 spin_lock_irq(&l3->list_lock);
1289 if (!l3->shared) {
1290
1291
1292
1293
1294 l3->shared = shared;
1295 shared = NULL;
1296 }
1297#ifdef CONFIG_NUMA
1298 if (!l3->alien) {
1299 l3->alien = alien;
1300 alien = NULL;
1301 }
1302#endif
1303 spin_unlock_irq(&l3->list_lock);
1304 kfree(shared);
1305 free_alien_cache(alien);
1306 if (cachep->flags & SLAB_DEBUG_OBJECTS)
1307 slab_set_debugobj_lock_classes_node(cachep, node);
1308 }
1309 init_node_lock_keys(node);
1310
1311 return 0;
1312bad:
1313 cpuup_canceled(cpu);
1314 return -ENOMEM;
1315}
1316
1317static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1318 unsigned long action, void *hcpu)
1319{
1320 long cpu = (long)hcpu;
1321 int err = 0;
1322
1323 switch (action) {
1324 case CPU_UP_PREPARE:
1325 case CPU_UP_PREPARE_FROZEN:
1326 mutex_lock(&cache_chain_mutex);
1327 err = cpuup_prepare(cpu);
1328 mutex_unlock(&cache_chain_mutex);
1329 break;
1330 case CPU_ONLINE:
1331 case CPU_ONLINE_FROZEN:
1332 start_cpu_timer(cpu);
1333 break;
1334#ifdef CONFIG_HOTPLUG_CPU
1335 case CPU_DOWN_PREPARE:
1336 case CPU_DOWN_PREPARE_FROZEN:
1337
1338
1339
1340
1341
1342
1343 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1344
1345 per_cpu(slab_reap_work, cpu).work.func = NULL;
1346 break;
1347 case CPU_DOWN_FAILED:
1348 case CPU_DOWN_FAILED_FROZEN:
1349 start_cpu_timer(cpu);
1350 break;
1351 case CPU_DEAD:
1352 case CPU_DEAD_FROZEN:
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362#endif
1363 case CPU_UP_CANCELED:
1364 case CPU_UP_CANCELED_FROZEN:
1365 mutex_lock(&cache_chain_mutex);
1366 cpuup_canceled(cpu);
1367 mutex_unlock(&cache_chain_mutex);
1368 break;
1369 }
1370 return notifier_from_errno(err);
1371}
1372
1373static struct notifier_block __cpuinitdata cpucache_notifier = {
1374 &cpuup_callback, NULL, 0
1375};
1376
1377#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1378
1379
1380
1381
1382
1383
1384
1385static int __meminit drain_cache_nodelists_node(int node)
1386{
1387 struct kmem_cache *cachep;
1388 int ret = 0;
1389
1390 list_for_each_entry(cachep, &cache_chain, next) {
1391 struct kmem_list3 *l3;
1392
1393 l3 = cachep->nodelists[node];
1394 if (!l3)
1395 continue;
1396
1397 drain_freelist(cachep, l3, l3->free_objects);
1398
1399 if (!list_empty(&l3->slabs_full) ||
1400 !list_empty(&l3->slabs_partial)) {
1401 ret = -EBUSY;
1402 break;
1403 }
1404 }
1405 return ret;
1406}
1407
1408static int __meminit slab_memory_callback(struct notifier_block *self,
1409 unsigned long action, void *arg)
1410{
1411 struct memory_notify *mnb = arg;
1412 int ret = 0;
1413 int nid;
1414
1415 nid = mnb->status_change_nid;
1416 if (nid < 0)
1417 goto out;
1418
1419 switch (action) {
1420 case MEM_GOING_ONLINE:
1421 mutex_lock(&cache_chain_mutex);
1422 ret = init_cache_nodelists_node(nid);
1423 mutex_unlock(&cache_chain_mutex);
1424 break;
1425 case MEM_GOING_OFFLINE:
1426 mutex_lock(&cache_chain_mutex);
1427 ret = drain_cache_nodelists_node(nid);
1428 mutex_unlock(&cache_chain_mutex);
1429 break;
1430 case MEM_ONLINE:
1431 case MEM_OFFLINE:
1432 case MEM_CANCEL_ONLINE:
1433 case MEM_CANCEL_OFFLINE:
1434 break;
1435 }
1436out:
1437 return notifier_from_errno(ret);
1438}
1439#endif
1440
1441
1442
1443
1444static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1445 int nodeid)
1446{
1447 struct kmem_list3 *ptr;
1448
1449 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1450 BUG_ON(!ptr);
1451
1452 memcpy(ptr, list, sizeof(struct kmem_list3));
1453
1454
1455
1456 spin_lock_init(&ptr->list_lock);
1457
1458 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1459 cachep->nodelists[nodeid] = ptr;
1460}
1461
1462
1463
1464
1465
1466static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1467{
1468 int node;
1469
1470 for_each_online_node(node) {
1471 cachep->nodelists[node] = &initkmem_list3[index + node];
1472 cachep->nodelists[node]->next_reap = jiffies +
1473 REAPTIMEOUT_LIST3 +
1474 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1475 }
1476}
1477
1478
1479
1480
1481
1482void __init kmem_cache_init(void)
1483{
1484 size_t left_over;
1485 struct cache_sizes *sizes;
1486 struct cache_names *names;
1487 int i;
1488 int order;
1489 int node;
1490
1491 if (num_possible_nodes() == 1)
1492 use_alien_caches = 0;
1493
1494 for (i = 0; i < NUM_INIT_LISTS; i++) {
1495 kmem_list3_init(&initkmem_list3[i]);
1496 if (i < MAX_NUMNODES)
1497 cache_cache.nodelists[i] = NULL;
1498 }
1499 set_up_list3s(&cache_cache, CACHE_CACHE);
1500
1501
1502
1503
1504
1505 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1506 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528 node = numa_mem_id();
1529
1530
1531 INIT_LIST_HEAD(&cache_chain);
1532 list_add(&cache_cache.next, &cache_chain);
1533 cache_cache.colour_off = cache_line_size();
1534 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1535 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1536
1537
1538
1539
1540 cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1541 nr_node_ids * sizeof(struct kmem_list3 *);
1542#if DEBUG
1543 cache_cache.obj_size = cache_cache.buffer_size;
1544#endif
1545 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1546 cache_line_size());
1547 cache_cache.reciprocal_buffer_size =
1548 reciprocal_value(cache_cache.buffer_size);
1549
1550 for (order = 0; order < MAX_ORDER; order++) {
1551 cache_estimate(order, cache_cache.buffer_size,
1552 cache_line_size(), 0, &left_over, &cache_cache.num);
1553 if (cache_cache.num)
1554 break;
1555 }
1556 BUG_ON(!cache_cache.num);
1557 cache_cache.gfporder = order;
1558 cache_cache.colour = left_over / cache_cache.colour_off;
1559 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1560 sizeof(struct slab), cache_line_size());
1561
1562
1563 sizes = malloc_sizes;
1564 names = cache_names;
1565
1566
1567
1568
1569
1570
1571
1572 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1573 sizes[INDEX_AC].cs_size,
1574 ARCH_KMALLOC_MINALIGN,
1575 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1576 NULL);
1577
1578 if (INDEX_AC != INDEX_L3) {
1579 sizes[INDEX_L3].cs_cachep =
1580 kmem_cache_create(names[INDEX_L3].name,
1581 sizes[INDEX_L3].cs_size,
1582 ARCH_KMALLOC_MINALIGN,
1583 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1584 NULL);
1585 }
1586
1587 slab_early_init = 0;
1588
1589 while (sizes->cs_size != ULONG_MAX) {
1590
1591
1592
1593
1594
1595
1596
1597 if (!sizes->cs_cachep) {
1598 sizes->cs_cachep = kmem_cache_create(names->name,
1599 sizes->cs_size,
1600 ARCH_KMALLOC_MINALIGN,
1601 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1602 NULL);
1603 }
1604#ifdef CONFIG_ZONE_DMA
1605 sizes->cs_dmacachep = kmem_cache_create(
1606 names->name_dma,
1607 sizes->cs_size,
1608 ARCH_KMALLOC_MINALIGN,
1609 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1610 SLAB_PANIC,
1611 NULL);
1612#endif
1613 sizes++;
1614 names++;
1615 }
1616
1617 {
1618 struct array_cache *ptr;
1619
1620 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1621
1622 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1623 memcpy(ptr, cpu_cache_get(&cache_cache),
1624 sizeof(struct arraycache_init));
1625
1626
1627
1628 spin_lock_init(&ptr->lock);
1629
1630 cache_cache.array[smp_processor_id()] = ptr;
1631
1632 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1633
1634 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1635 != &initarray_generic.cache);
1636 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1637 sizeof(struct arraycache_init));
1638
1639
1640
1641 spin_lock_init(&ptr->lock);
1642
1643 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1644 ptr;
1645 }
1646
1647 {
1648 int nid;
1649
1650 for_each_online_node(nid) {
1651 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1652
1653 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1654 &initkmem_list3[SIZE_AC + nid], nid);
1655
1656 if (INDEX_AC != INDEX_L3) {
1657 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1658 &initkmem_list3[SIZE_L3 + nid], nid);
1659 }
1660 }
1661 }
1662
1663 g_cpucache_up = EARLY;
1664}
1665
1666void __init kmem_cache_init_late(void)
1667{
1668 struct kmem_cache *cachep;
1669
1670 g_cpucache_up = LATE;
1671
1672
1673 init_lock_keys();
1674
1675
1676 mutex_lock(&cache_chain_mutex);
1677 list_for_each_entry(cachep, &cache_chain, next)
1678 if (enable_cpucache(cachep, GFP_NOWAIT))
1679 BUG();
1680 mutex_unlock(&cache_chain_mutex);
1681
1682
1683 g_cpucache_up = FULL;
1684
1685
1686
1687
1688
1689 register_cpu_notifier(&cpucache_notifier);
1690
1691#ifdef CONFIG_NUMA
1692
1693
1694
1695
1696 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1697#endif
1698
1699
1700
1701
1702
1703}
1704
1705static int __init cpucache_init(void)
1706{
1707 int cpu;
1708
1709
1710
1711
1712 for_each_online_cpu(cpu)
1713 start_cpu_timer(cpu);
1714 return 0;
1715}
1716__initcall(cpucache_init);
1717
1718
1719
1720
1721
1722
1723
1724
1725static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1726{
1727 struct page *page;
1728 int nr_pages;
1729 int i;
1730
1731#ifndef CONFIG_MMU
1732
1733
1734
1735
1736 flags |= __GFP_COMP;
1737#endif
1738
1739 flags |= cachep->gfpflags;
1740 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1741 flags |= __GFP_RECLAIMABLE;
1742
1743 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1744 if (!page)
1745 return NULL;
1746
1747 nr_pages = (1 << cachep->gfporder);
1748 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1749 add_zone_page_state(page_zone(page),
1750 NR_SLAB_RECLAIMABLE, nr_pages);
1751 else
1752 add_zone_page_state(page_zone(page),
1753 NR_SLAB_UNRECLAIMABLE, nr_pages);
1754 for (i = 0; i < nr_pages; i++)
1755 __SetPageSlab(page + i);
1756
1757 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1758 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1759
1760 if (cachep->ctor)
1761 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1762 else
1763 kmemcheck_mark_unallocated_pages(page, nr_pages);
1764 }
1765
1766 return page_address(page);
1767}
1768
1769
1770
1771
1772static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1773{
1774 unsigned long i = (1 << cachep->gfporder);
1775 struct page *page = virt_to_page(addr);
1776 const unsigned long nr_freed = i;
1777
1778 kmemcheck_free_shadow(page, cachep->gfporder);
1779
1780 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1781 sub_zone_page_state(page_zone(page),
1782 NR_SLAB_RECLAIMABLE, nr_freed);
1783 else
1784 sub_zone_page_state(page_zone(page),
1785 NR_SLAB_UNRECLAIMABLE, nr_freed);
1786 while (i--) {
1787 BUG_ON(!PageSlab(page));
1788 __ClearPageSlab(page);
1789 page++;
1790 }
1791 if (current->reclaim_state)
1792 current->reclaim_state->reclaimed_slab += nr_freed;
1793 free_pages((unsigned long)addr, cachep->gfporder);
1794}
1795
1796static void kmem_rcu_free(struct rcu_head *head)
1797{
1798 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1799 struct kmem_cache *cachep = slab_rcu->cachep;
1800
1801 kmem_freepages(cachep, slab_rcu->addr);
1802 if (OFF_SLAB(cachep))
1803 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1804}
1805
1806#if DEBUG
1807
1808#ifdef CONFIG_DEBUG_PAGEALLOC
1809static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1810 unsigned long caller)
1811{
1812 int size = obj_size(cachep);
1813
1814 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1815
1816 if (size < 5 * sizeof(unsigned long))
1817 return;
1818
1819 *addr++ = 0x12345678;
1820 *addr++ = caller;
1821 *addr++ = smp_processor_id();
1822 size -= 3 * sizeof(unsigned long);
1823 {
1824 unsigned long *sptr = &caller;
1825 unsigned long svalue;
1826
1827 while (!kstack_end(sptr)) {
1828 svalue = *sptr++;
1829 if (kernel_text_address(svalue)) {
1830 *addr++ = svalue;
1831 size -= sizeof(unsigned long);
1832 if (size <= sizeof(unsigned long))
1833 break;
1834 }
1835 }
1836
1837 }
1838 *addr++ = 0x87654321;
1839}
1840#endif
1841
1842static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1843{
1844 int size = obj_size(cachep);
1845 addr = &((char *)addr)[obj_offset(cachep)];
1846
1847 memset(addr, val, size);
1848 *(unsigned char *)(addr + size - 1) = POISON_END;
1849}
1850
1851static void dump_line(char *data, int offset, int limit)
1852{
1853 int i;
1854 unsigned char error = 0;
1855 int bad_count = 0;
1856
1857 printk(KERN_ERR "%03x: ", offset);
1858 for (i = 0; i < limit; i++) {
1859 if (data[offset + i] != POISON_FREE) {
1860 error = data[offset + i];
1861 bad_count++;
1862 }
1863 }
1864 print_hex_dump(KERN_CONT, "", 0, 16, 1,
1865 &data[offset], limit, 1);
1866
1867 if (bad_count == 1) {
1868 error ^= POISON_FREE;
1869 if (!(error & (error - 1))) {
1870 printk(KERN_ERR "Single bit error detected. Probably "
1871 "bad RAM.\n");
1872#ifdef CONFIG_X86
1873 printk(KERN_ERR "Run memtest86+ or a similar memory "
1874 "test tool.\n");
1875#else
1876 printk(KERN_ERR "Run a memory test tool.\n");
1877#endif
1878 }
1879 }
1880}
1881#endif
1882
1883#if DEBUG
1884
1885static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1886{
1887 int i, size;
1888 char *realobj;
1889
1890 if (cachep->flags & SLAB_RED_ZONE) {
1891 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1892 *dbg_redzone1(cachep, objp),
1893 *dbg_redzone2(cachep, objp));
1894 }
1895
1896 if (cachep->flags & SLAB_STORE_USER) {
1897 printk(KERN_ERR "Last user: [<%p>]",
1898 *dbg_userword(cachep, objp));
1899 print_symbol("(%s)",
1900 (unsigned long)*dbg_userword(cachep, objp));
1901 printk("\n");
1902 }
1903 realobj = (char *)objp + obj_offset(cachep);
1904 size = obj_size(cachep);
1905 for (i = 0; i < size && lines; i += 16, lines--) {
1906 int limit;
1907 limit = 16;
1908 if (i + limit > size)
1909 limit = size - i;
1910 dump_line(realobj, i, limit);
1911 }
1912}
1913
1914static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1915{
1916 char *realobj;
1917 int size, i;
1918 int lines = 0;
1919
1920 realobj = (char *)objp + obj_offset(cachep);
1921 size = obj_size(cachep);
1922
1923 for (i = 0; i < size; i++) {
1924 char exp = POISON_FREE;
1925 if (i == size - 1)
1926 exp = POISON_END;
1927 if (realobj[i] != exp) {
1928 int limit;
1929
1930
1931 if (lines == 0) {
1932 printk(KERN_ERR
1933 "Slab corruption: %s start=%p, len=%d\n",
1934 cachep->name, realobj, size);
1935 print_objinfo(cachep, objp, 0);
1936 }
1937
1938 i = (i / 16) * 16;
1939 limit = 16;
1940 if (i + limit > size)
1941 limit = size - i;
1942 dump_line(realobj, i, limit);
1943 i += 16;
1944 lines++;
1945
1946 if (lines > 5)
1947 break;
1948 }
1949 }
1950 if (lines != 0) {
1951
1952
1953
1954 struct slab *slabp = virt_to_slab(objp);
1955 unsigned int objnr;
1956
1957 objnr = obj_to_index(cachep, slabp, objp);
1958 if (objnr) {
1959 objp = index_to_obj(cachep, slabp, objnr - 1);
1960 realobj = (char *)objp + obj_offset(cachep);
1961 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1962 realobj, size);
1963 print_objinfo(cachep, objp, 2);
1964 }
1965 if (objnr + 1 < cachep->num) {
1966 objp = index_to_obj(cachep, slabp, objnr + 1);
1967 realobj = (char *)objp + obj_offset(cachep);
1968 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1969 realobj, size);
1970 print_objinfo(cachep, objp, 2);
1971 }
1972 }
1973}
1974#endif
1975
1976#if DEBUG
1977static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1978{
1979 int i;
1980 for (i = 0; i < cachep->num; i++) {
1981 void *objp = index_to_obj(cachep, slabp, i);
1982
1983 if (cachep->flags & SLAB_POISON) {
1984#ifdef CONFIG_DEBUG_PAGEALLOC
1985 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1986 OFF_SLAB(cachep))
1987 kernel_map_pages(virt_to_page(objp),
1988 cachep->buffer_size / PAGE_SIZE, 1);
1989 else
1990 check_poison_obj(cachep, objp);
1991#else
1992 check_poison_obj(cachep, objp);
1993#endif
1994 }
1995 if (cachep->flags & SLAB_RED_ZONE) {
1996 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1997 slab_error(cachep, "start of a freed object "
1998 "was overwritten");
1999 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2000 slab_error(cachep, "end of a freed object "
2001 "was overwritten");
2002 }
2003 }
2004}
2005#else
2006static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
2007{
2008}
2009#endif
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
2021{
2022 void *addr = slabp->s_mem - slabp->colouroff;
2023
2024 slab_destroy_debugcheck(cachep, slabp);
2025 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
2026 struct slab_rcu *slab_rcu;
2027
2028 slab_rcu = (struct slab_rcu *)slabp;
2029 slab_rcu->cachep = cachep;
2030 slab_rcu->addr = addr;
2031 call_rcu(&slab_rcu->head, kmem_rcu_free);
2032 } else {
2033 kmem_freepages(cachep, addr);
2034 if (OFF_SLAB(cachep))
2035 kmem_cache_free(cachep->slabp_cache, slabp);
2036 }
2037}
2038
2039static void __kmem_cache_destroy(struct kmem_cache *cachep)
2040{
2041 int i;
2042 struct kmem_list3 *l3;
2043
2044 for_each_online_cpu(i)
2045 kfree(cachep->array[i]);
2046
2047
2048 for_each_online_node(i) {
2049 l3 = cachep->nodelists[i];
2050 if (l3) {
2051 kfree(l3->shared);
2052 free_alien_cache(l3->alien);
2053 kfree(l3);
2054 }
2055 }
2056 kmem_cache_free(&cache_cache, cachep);
2057}
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073static size_t calculate_slab_order(struct kmem_cache *cachep,
2074 size_t size, size_t align, unsigned long flags)
2075{
2076 unsigned long offslab_limit;
2077 size_t left_over = 0;
2078 int gfporder;
2079
2080 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2081 unsigned int num;
2082 size_t remainder;
2083
2084 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2085 if (!num)
2086 continue;
2087
2088 if (flags & CFLGS_OFF_SLAB) {
2089
2090
2091
2092
2093
2094 offslab_limit = size - sizeof(struct slab);
2095 offslab_limit /= sizeof(kmem_bufctl_t);
2096
2097 if (num > offslab_limit)
2098 break;
2099 }
2100
2101
2102 cachep->num = num;
2103 cachep->gfporder = gfporder;
2104 left_over = remainder;
2105
2106
2107
2108
2109
2110
2111 if (flags & SLAB_RECLAIM_ACCOUNT)
2112 break;
2113
2114
2115
2116
2117
2118 if (gfporder >= slab_break_gfp_order)
2119 break;
2120
2121
2122
2123
2124 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2125 break;
2126 }
2127 return left_over;
2128}
2129
2130static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2131{
2132 if (g_cpucache_up == FULL)
2133 return enable_cpucache(cachep, gfp);
2134
2135 if (g_cpucache_up == NONE) {
2136
2137
2138
2139
2140
2141 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2142
2143
2144
2145
2146
2147
2148 set_up_list3s(cachep, SIZE_AC);
2149 if (INDEX_AC == INDEX_L3)
2150 g_cpucache_up = PARTIAL_L3;
2151 else
2152 g_cpucache_up = PARTIAL_AC;
2153 } else {
2154 cachep->array[smp_processor_id()] =
2155 kmalloc(sizeof(struct arraycache_init), gfp);
2156
2157 if (g_cpucache_up == PARTIAL_AC) {
2158 set_up_list3s(cachep, SIZE_L3);
2159 g_cpucache_up = PARTIAL_L3;
2160 } else {
2161 int node;
2162 for_each_online_node(node) {
2163 cachep->nodelists[node] =
2164 kmalloc_node(sizeof(struct kmem_list3),
2165 gfp, node);
2166 BUG_ON(!cachep->nodelists[node]);
2167 kmem_list3_init(cachep->nodelists[node]);
2168 }
2169 }
2170 }
2171 cachep->nodelists[numa_mem_id()]->next_reap =
2172 jiffies + REAPTIMEOUT_LIST3 +
2173 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2174
2175 cpu_cache_get(cachep)->avail = 0;
2176 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2177 cpu_cache_get(cachep)->batchcount = 1;
2178 cpu_cache_get(cachep)->touched = 0;
2179 cachep->batchcount = 1;
2180 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2181 return 0;
2182}
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211struct kmem_cache *
2212kmem_cache_create (const char *name, size_t size, size_t align,
2213 unsigned long flags, void (*ctor)(void *))
2214{
2215 size_t left_over, slab_size, ralign;
2216 struct kmem_cache *cachep = NULL, *pc;
2217 gfp_t gfp;
2218
2219
2220
2221
2222 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2223 size > KMALLOC_MAX_SIZE) {
2224 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2225 name);
2226 BUG();
2227 }
2228
2229
2230
2231
2232
2233 if (slab_is_available()) {
2234 get_online_cpus();
2235 mutex_lock(&cache_chain_mutex);
2236 }
2237
2238 list_for_each_entry(pc, &cache_chain, next) {
2239 char tmp;
2240 int res;
2241
2242
2243
2244
2245
2246
2247 res = probe_kernel_address(pc->name, tmp);
2248 if (res) {
2249 printk(KERN_ERR
2250 "SLAB: cache with size %d has lost its name\n",
2251 pc->buffer_size);
2252 continue;
2253 }
2254
2255 if (!strcmp(pc->name, name)) {
2256 printk(KERN_ERR
2257 "kmem_cache_create: duplicate cache %s\n", name);
2258 dump_stack();
2259 goto oops;
2260 }
2261 }
2262
2263#if DEBUG
2264 WARN_ON(strchr(name, ' '));
2265#if FORCED_DEBUG
2266
2267
2268
2269
2270
2271
2272 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2273 2 * sizeof(unsigned long long)))
2274 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2275 if (!(flags & SLAB_DESTROY_BY_RCU))
2276 flags |= SLAB_POISON;
2277#endif
2278 if (flags & SLAB_DESTROY_BY_RCU)
2279 BUG_ON(flags & SLAB_POISON);
2280#endif
2281
2282
2283
2284
2285 BUG_ON(flags & ~CREATE_MASK);
2286
2287
2288
2289
2290
2291
2292 if (size & (BYTES_PER_WORD - 1)) {
2293 size += (BYTES_PER_WORD - 1);
2294 size &= ~(BYTES_PER_WORD - 1);
2295 }
2296
2297
2298
2299
2300 if (flags & SLAB_HWCACHE_ALIGN) {
2301
2302
2303
2304
2305
2306 ralign = cache_line_size();
2307 while (size <= ralign / 2)
2308 ralign /= 2;
2309 } else {
2310 ralign = BYTES_PER_WORD;
2311 }
2312
2313
2314
2315
2316
2317
2318 if (flags & SLAB_STORE_USER)
2319 ralign = BYTES_PER_WORD;
2320
2321 if (flags & SLAB_RED_ZONE) {
2322 ralign = REDZONE_ALIGN;
2323
2324
2325 size += REDZONE_ALIGN - 1;
2326 size &= ~(REDZONE_ALIGN - 1);
2327 }
2328
2329
2330 if (ralign < ARCH_SLAB_MINALIGN) {
2331 ralign = ARCH_SLAB_MINALIGN;
2332 }
2333
2334 if (ralign < align) {
2335 ralign = align;
2336 }
2337
2338 if (ralign > __alignof__(unsigned long long))
2339 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2340
2341
2342
2343 align = ralign;
2344
2345 if (slab_is_available())
2346 gfp = GFP_KERNEL;
2347 else
2348 gfp = GFP_NOWAIT;
2349
2350
2351 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2352 if (!cachep)
2353 goto oops;
2354
2355 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
2356#if DEBUG
2357 cachep->obj_size = size;
2358
2359
2360
2361
2362
2363 if (flags & SLAB_RED_ZONE) {
2364
2365 cachep->obj_offset += sizeof(unsigned long long);
2366 size += 2 * sizeof(unsigned long long);
2367 }
2368 if (flags & SLAB_STORE_USER) {
2369
2370
2371
2372
2373 if (flags & SLAB_RED_ZONE)
2374 size += REDZONE_ALIGN;
2375 else
2376 size += BYTES_PER_WORD;
2377 }
2378#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2379 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2380 && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
2381 cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
2382 size = PAGE_SIZE;
2383 }
2384#endif
2385#endif
2386
2387
2388
2389
2390
2391
2392
2393 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2394 !(flags & SLAB_NOLEAKTRACE))
2395
2396
2397
2398
2399 flags |= CFLGS_OFF_SLAB;
2400
2401 size = ALIGN(size, align);
2402
2403 left_over = calculate_slab_order(cachep, size, align, flags);
2404
2405 if (!cachep->num) {
2406 printk(KERN_ERR
2407 "kmem_cache_create: couldn't create cache %s.\n", name);
2408 kmem_cache_free(&cache_cache, cachep);
2409 cachep = NULL;
2410 goto oops;
2411 }
2412 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2413 + sizeof(struct slab), align);
2414
2415
2416
2417
2418
2419 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2420 flags &= ~CFLGS_OFF_SLAB;
2421 left_over -= slab_size;
2422 }
2423
2424 if (flags & CFLGS_OFF_SLAB) {
2425
2426 slab_size =
2427 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2428
2429#ifdef CONFIG_PAGE_POISONING
2430
2431
2432
2433
2434 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2435 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2436#endif
2437 }
2438
2439 cachep->colour_off = cache_line_size();
2440
2441 if (cachep->colour_off < align)
2442 cachep->colour_off = align;
2443 cachep->colour = left_over / cachep->colour_off;
2444 cachep->slab_size = slab_size;
2445 cachep->flags = flags;
2446 cachep->gfpflags = 0;
2447 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2448 cachep->gfpflags |= GFP_DMA;
2449 cachep->buffer_size = size;
2450 cachep->reciprocal_buffer_size = reciprocal_value(size);
2451
2452 if (flags & CFLGS_OFF_SLAB) {
2453 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2454
2455
2456
2457
2458
2459
2460
2461 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2462 }
2463 cachep->ctor = ctor;
2464 cachep->name = name;
2465
2466 if (setup_cpu_cache(cachep, gfp)) {
2467 __kmem_cache_destroy(cachep);
2468 cachep = NULL;
2469 goto oops;
2470 }
2471
2472 if (flags & SLAB_DEBUG_OBJECTS) {
2473
2474
2475
2476
2477 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2478
2479 slab_set_debugobj_lock_classes(cachep);
2480 }
2481
2482
2483 list_add(&cachep->next, &cache_chain);
2484oops:
2485 if (!cachep && (flags & SLAB_PANIC))
2486 panic("kmem_cache_create(): failed to create slab `%s'\n",
2487 name);
2488 if (slab_is_available()) {
2489 mutex_unlock(&cache_chain_mutex);
2490 put_online_cpus();
2491 }
2492 return cachep;
2493}
2494EXPORT_SYMBOL(kmem_cache_create);
2495
2496#if DEBUG
2497static void check_irq_off(void)
2498{
2499 BUG_ON(!irqs_disabled());
2500}
2501
2502static void check_irq_on(void)
2503{
2504 BUG_ON(irqs_disabled());
2505}
2506
2507static void check_spinlock_acquired(struct kmem_cache *cachep)
2508{
2509#ifdef CONFIG_SMP
2510 check_irq_off();
2511 assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
2512#endif
2513}
2514
2515static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2516{
2517#ifdef CONFIG_SMP
2518 check_irq_off();
2519 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2520#endif
2521}
2522
2523#else
2524#define check_irq_off() do { } while(0)
2525#define check_irq_on() do { } while(0)
2526#define check_spinlock_acquired(x) do { } while(0)
2527#define check_spinlock_acquired_node(x, y) do { } while(0)
2528#endif
2529
2530static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2531 struct array_cache *ac,
2532 int force, int node);
2533
2534static void do_drain(void *arg)
2535{
2536 struct kmem_cache *cachep = arg;
2537 struct array_cache *ac;
2538 int node = numa_mem_id();
2539
2540 check_irq_off();
2541 ac = cpu_cache_get(cachep);
2542 spin_lock(&cachep->nodelists[node]->list_lock);
2543 free_block(cachep, ac->entry, ac->avail, node);
2544 spin_unlock(&cachep->nodelists[node]->list_lock);
2545 ac->avail = 0;
2546}
2547
2548static void drain_cpu_caches(struct kmem_cache *cachep)
2549{
2550 struct kmem_list3 *l3;
2551 int node;
2552
2553 on_each_cpu(do_drain, cachep, 1);
2554 check_irq_on();
2555 for_each_online_node(node) {
2556 l3 = cachep->nodelists[node];
2557 if (l3 && l3->alien)
2558 drain_alien_cache(cachep, l3->alien);
2559 }
2560
2561 for_each_online_node(node) {
2562 l3 = cachep->nodelists[node];
2563 if (l3)
2564 drain_array(cachep, l3, l3->shared, 1, node);
2565 }
2566}
2567
2568
2569
2570
2571
2572
2573
2574static int drain_freelist(struct kmem_cache *cache,
2575 struct kmem_list3 *l3, int tofree)
2576{
2577 struct list_head *p;
2578 int nr_freed;
2579 struct slab *slabp;
2580
2581 nr_freed = 0;
2582 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2583
2584 spin_lock_irq(&l3->list_lock);
2585 p = l3->slabs_free.prev;
2586 if (p == &l3->slabs_free) {
2587 spin_unlock_irq(&l3->list_lock);
2588 goto out;
2589 }
2590
2591 slabp = list_entry(p, struct slab, list);
2592#if DEBUG
2593 BUG_ON(slabp->inuse);
2594#endif
2595 list_del(&slabp->list);
2596
2597
2598
2599
2600 l3->free_objects -= cache->num;
2601 spin_unlock_irq(&l3->list_lock);
2602 slab_destroy(cache, slabp);
2603 nr_freed++;
2604 }
2605out:
2606 return nr_freed;
2607}
2608
2609
2610static int __cache_shrink(struct kmem_cache *cachep)
2611{
2612 int ret = 0, i = 0;
2613 struct kmem_list3 *l3;
2614
2615 drain_cpu_caches(cachep);
2616
2617 check_irq_on();
2618 for_each_online_node(i) {
2619 l3 = cachep->nodelists[i];
2620 if (!l3)
2621 continue;
2622
2623 drain_freelist(cachep, l3, l3->free_objects);
2624
2625 ret += !list_empty(&l3->slabs_full) ||
2626 !list_empty(&l3->slabs_partial);
2627 }
2628 return (ret ? 1 : 0);
2629}
2630
2631
2632
2633
2634
2635
2636
2637
2638int kmem_cache_shrink(struct kmem_cache *cachep)
2639{
2640 int ret;
2641 BUG_ON(!cachep || in_interrupt());
2642
2643 get_online_cpus();
2644 mutex_lock(&cache_chain_mutex);
2645 ret = __cache_shrink(cachep);
2646 mutex_unlock(&cache_chain_mutex);
2647 put_online_cpus();
2648 return ret;
2649}
2650EXPORT_SYMBOL(kmem_cache_shrink);
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668void kmem_cache_destroy(struct kmem_cache *cachep)
2669{
2670 BUG_ON(!cachep || in_interrupt());
2671
2672
2673 get_online_cpus();
2674 mutex_lock(&cache_chain_mutex);
2675
2676
2677
2678 list_del(&cachep->next);
2679 if (__cache_shrink(cachep)) {
2680 slab_error(cachep, "Can't free all objects");
2681 list_add(&cachep->next, &cache_chain);
2682 mutex_unlock(&cache_chain_mutex);
2683 put_online_cpus();
2684 return;
2685 }
2686
2687 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2688 rcu_barrier();
2689
2690 __kmem_cache_destroy(cachep);
2691 mutex_unlock(&cache_chain_mutex);
2692 put_online_cpus();
2693}
2694EXPORT_SYMBOL(kmem_cache_destroy);
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2708 int colour_off, gfp_t local_flags,
2709 int nodeid)
2710{
2711 struct slab *slabp;
2712
2713 if (OFF_SLAB(cachep)) {
2714
2715 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2716 local_flags, nodeid);
2717
2718
2719
2720
2721
2722
2723 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2724 local_flags);
2725 if (!slabp)
2726 return NULL;
2727 } else {
2728 slabp = objp + colour_off;
2729 colour_off += cachep->slab_size;
2730 }
2731 slabp->inuse = 0;
2732 slabp->colouroff = colour_off;
2733 slabp->s_mem = objp + colour_off;
2734 slabp->nodeid = nodeid;
2735 slabp->free = 0;
2736 return slabp;
2737}
2738
2739static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2740{
2741 return (kmem_bufctl_t *) (slabp + 1);
2742}
2743
2744static void cache_init_objs(struct kmem_cache *cachep,
2745 struct slab *slabp)
2746{
2747 int i;
2748
2749 for (i = 0; i < cachep->num; i++) {
2750 void *objp = index_to_obj(cachep, slabp, i);
2751#if DEBUG
2752
2753 if (cachep->flags & SLAB_POISON)
2754 poison_obj(cachep, objp, POISON_FREE);
2755 if (cachep->flags & SLAB_STORE_USER)
2756 *dbg_userword(cachep, objp) = NULL;
2757
2758 if (cachep->flags & SLAB_RED_ZONE) {
2759 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2760 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2761 }
2762
2763
2764
2765
2766
2767 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2768 cachep->ctor(objp + obj_offset(cachep));
2769
2770 if (cachep->flags & SLAB_RED_ZONE) {
2771 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2772 slab_error(cachep, "constructor overwrote the"
2773 " end of an object");
2774 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2775 slab_error(cachep, "constructor overwrote the"
2776 " start of an object");
2777 }
2778 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2779 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2780 kernel_map_pages(virt_to_page(objp),
2781 cachep->buffer_size / PAGE_SIZE, 0);
2782#else
2783 if (cachep->ctor)
2784 cachep->ctor(objp);
2785#endif
2786 slab_bufctl(slabp)[i] = i + 1;
2787 }
2788 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2789}
2790
2791static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2792{
2793 if (CONFIG_ZONE_DMA_FLAG) {
2794 if (flags & GFP_DMA)
2795 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2796 else
2797 BUG_ON(cachep->gfpflags & GFP_DMA);
2798 }
2799}
2800
2801static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2802 int nodeid)
2803{
2804 void *objp = index_to_obj(cachep, slabp, slabp->free);
2805 kmem_bufctl_t next;
2806
2807 slabp->inuse++;
2808 next = slab_bufctl(slabp)[slabp->free];
2809#if DEBUG
2810 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2811 WARN_ON(slabp->nodeid != nodeid);
2812#endif
2813 slabp->free = next;
2814
2815 return objp;
2816}
2817
2818static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2819 void *objp, int nodeid)
2820{
2821 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2822
2823#if DEBUG
2824
2825 WARN_ON(slabp->nodeid != nodeid);
2826
2827 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2828 printk(KERN_ERR "slab: double free detected in cache "
2829 "'%s', objp %p\n", cachep->name, objp);
2830 BUG();
2831 }
2832#endif
2833 slab_bufctl(slabp)[objnr] = slabp->free;
2834 slabp->free = objnr;
2835 slabp->inuse--;
2836}
2837
2838
2839
2840
2841
2842
2843static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2844 void *addr)
2845{
2846 int nr_pages;
2847 struct page *page;
2848
2849 page = virt_to_page(addr);
2850
2851 nr_pages = 1;
2852 if (likely(!PageCompound(page)))
2853 nr_pages <<= cache->gfporder;
2854
2855 do {
2856 page_set_cache(page, cache);
2857 page_set_slab(page, slab);
2858 page++;
2859 } while (--nr_pages);
2860}
2861
2862
2863
2864
2865
2866static int cache_grow(struct kmem_cache *cachep,
2867 gfp_t flags, int nodeid, void *objp)
2868{
2869 struct slab *slabp;
2870 size_t offset;
2871 gfp_t local_flags;
2872 struct kmem_list3 *l3;
2873
2874
2875
2876
2877
2878 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2879 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2880
2881
2882 check_irq_off();
2883 l3 = cachep->nodelists[nodeid];
2884 spin_lock(&l3->list_lock);
2885
2886
2887 offset = l3->colour_next;
2888 l3->colour_next++;
2889 if (l3->colour_next >= cachep->colour)
2890 l3->colour_next = 0;
2891 spin_unlock(&l3->list_lock);
2892
2893 offset *= cachep->colour_off;
2894
2895 if (local_flags & __GFP_WAIT)
2896 local_irq_enable();
2897
2898
2899
2900
2901
2902
2903
2904 kmem_flagcheck(cachep, flags);
2905
2906
2907
2908
2909
2910 if (!objp)
2911 objp = kmem_getpages(cachep, local_flags, nodeid);
2912 if (!objp)
2913 goto failed;
2914
2915
2916 slabp = alloc_slabmgmt(cachep, objp, offset,
2917 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2918 if (!slabp)
2919 goto opps1;
2920
2921 slab_map_pages(cachep, slabp, objp);
2922
2923 cache_init_objs(cachep, slabp);
2924
2925 if (local_flags & __GFP_WAIT)
2926 local_irq_disable();
2927 check_irq_off();
2928 spin_lock(&l3->list_lock);
2929
2930
2931 list_add_tail(&slabp->list, &(l3->slabs_free));
2932 STATS_INC_GROWN(cachep);
2933 l3->free_objects += cachep->num;
2934 spin_unlock(&l3->list_lock);
2935 return 1;
2936opps1:
2937 kmem_freepages(cachep, objp);
2938failed:
2939 if (local_flags & __GFP_WAIT)
2940 local_irq_disable();
2941 return 0;
2942}
2943
2944#if DEBUG
2945
2946
2947
2948
2949
2950
2951static void kfree_debugcheck(const void *objp)
2952{
2953 if (!virt_addr_valid(objp)) {
2954 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2955 (unsigned long)objp);
2956 BUG();
2957 }
2958}
2959
2960static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2961{
2962 unsigned long long redzone1, redzone2;
2963
2964 redzone1 = *dbg_redzone1(cache, obj);
2965 redzone2 = *dbg_redzone2(cache, obj);
2966
2967
2968
2969
2970 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2971 return;
2972
2973 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2974 slab_error(cache, "double free detected");
2975 else
2976 slab_error(cache, "memory outside object was overwritten");
2977
2978 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2979 obj, redzone1, redzone2);
2980}
2981
2982static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2983 void *caller)
2984{
2985 struct page *page;
2986 unsigned int objnr;
2987 struct slab *slabp;
2988
2989 BUG_ON(virt_to_cache(objp) != cachep);
2990
2991 objp -= obj_offset(cachep);
2992 kfree_debugcheck(objp);
2993 page = virt_to_head_page(objp);
2994
2995 slabp = page_get_slab(page);
2996
2997 if (cachep->flags & SLAB_RED_ZONE) {
2998 verify_redzone_free(cachep, objp);
2999 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
3000 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
3001 }
3002 if (cachep->flags & SLAB_STORE_USER)
3003 *dbg_userword(cachep, objp) = caller;
3004
3005 objnr = obj_to_index(cachep, slabp, objp);
3006
3007 BUG_ON(objnr >= cachep->num);
3008 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
3009
3010#ifdef CONFIG_DEBUG_SLAB_LEAK
3011 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
3012#endif
3013 if (cachep->flags & SLAB_POISON) {
3014#ifdef CONFIG_DEBUG_PAGEALLOC
3015 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
3016 store_stackinfo(cachep, objp, (unsigned long)caller);
3017 kernel_map_pages(virt_to_page(objp),
3018 cachep->buffer_size / PAGE_SIZE, 0);
3019 } else {
3020 poison_obj(cachep, objp, POISON_FREE);
3021 }
3022#else
3023 poison_obj(cachep, objp, POISON_FREE);
3024#endif
3025 }
3026 return objp;
3027}
3028
3029static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
3030{
3031 kmem_bufctl_t i;
3032 int entries = 0;
3033
3034
3035 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
3036 entries++;
3037 if (entries > cachep->num || i >= cachep->num)
3038 goto bad;
3039 }
3040 if (entries != cachep->num - slabp->inuse) {
3041bad:
3042 printk(KERN_ERR "slab: Internal list corruption detected in "
3043 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
3044 cachep->name, cachep->num, slabp, slabp->inuse);
3045 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
3046 sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
3047 1);
3048 BUG();
3049 }
3050}
3051#else
3052#define kfree_debugcheck(x) do { } while(0)
3053#define cache_free_debugcheck(x,objp,z) (objp)
3054#define check_slabp(x,y) do { } while(0)
3055#endif
3056
3057static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
3058{
3059 int batchcount;
3060 struct kmem_list3 *l3;
3061 struct array_cache *ac;
3062 int node;
3063
3064retry:
3065 check_irq_off();
3066 node = numa_mem_id();
3067 ac = cpu_cache_get(cachep);
3068 batchcount = ac->batchcount;
3069 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
3070
3071
3072
3073
3074
3075 batchcount = BATCHREFILL_LIMIT;
3076 }
3077 l3 = cachep->nodelists[node];
3078
3079 BUG_ON(ac->avail > 0 || !l3);
3080 spin_lock(&l3->list_lock);
3081
3082
3083 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
3084 l3->shared->touched = 1;
3085 goto alloc_done;
3086 }
3087
3088 while (batchcount > 0) {
3089 struct list_head *entry;
3090 struct slab *slabp;
3091
3092 entry = l3->slabs_partial.next;
3093 if (entry == &l3->slabs_partial) {
3094 l3->free_touched = 1;
3095 entry = l3->slabs_free.next;
3096 if (entry == &l3->slabs_free)
3097 goto must_grow;
3098 }
3099
3100 slabp = list_entry(entry, struct slab, list);
3101 check_slabp(cachep, slabp);
3102 check_spinlock_acquired(cachep);
3103
3104
3105
3106
3107
3108
3109 BUG_ON(slabp->inuse >= cachep->num);
3110
3111 while (slabp->inuse < cachep->num && batchcount--) {
3112 STATS_INC_ALLOCED(cachep);
3113 STATS_INC_ACTIVE(cachep);
3114 STATS_SET_HIGH(cachep);
3115
3116 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
3117 node);
3118 }
3119 check_slabp(cachep, slabp);
3120
3121
3122 list_del(&slabp->list);
3123 if (slabp->free == BUFCTL_END)
3124 list_add(&slabp->list, &l3->slabs_full);
3125 else
3126 list_add(&slabp->list, &l3->slabs_partial);
3127 }
3128
3129must_grow:
3130 l3->free_objects -= ac->avail;
3131alloc_done:
3132 spin_unlock(&l3->list_lock);
3133
3134 if (unlikely(!ac->avail)) {
3135 int x;
3136 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3137
3138
3139 ac = cpu_cache_get(cachep);
3140 if (!x && ac->avail == 0)
3141 return NULL;
3142
3143 if (!ac->avail)
3144 goto retry;
3145 }
3146 ac->touched = 1;
3147 return ac->entry[--ac->avail];
3148}
3149
3150static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3151 gfp_t flags)
3152{
3153 might_sleep_if(flags & __GFP_WAIT);
3154#if DEBUG
3155 kmem_flagcheck(cachep, flags);
3156#endif
3157}
3158
3159#if DEBUG
3160static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3161 gfp_t flags, void *objp, void *caller)
3162{
3163 if (!objp)
3164 return objp;
3165 if (cachep->flags & SLAB_POISON) {
3166#ifdef CONFIG_DEBUG_PAGEALLOC
3167 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3168 kernel_map_pages(virt_to_page(objp),
3169 cachep->buffer_size / PAGE_SIZE, 1);
3170 else
3171 check_poison_obj(cachep, objp);
3172#else
3173 check_poison_obj(cachep, objp);
3174#endif
3175 poison_obj(cachep, objp, POISON_INUSE);
3176 }
3177 if (cachep->flags & SLAB_STORE_USER)
3178 *dbg_userword(cachep, objp) = caller;
3179
3180 if (cachep->flags & SLAB_RED_ZONE) {
3181 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3182 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3183 slab_error(cachep, "double free, or memory outside"
3184 " object was overwritten");
3185 printk(KERN_ERR
3186 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3187 objp, *dbg_redzone1(cachep, objp),
3188 *dbg_redzone2(cachep, objp));
3189 }
3190 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3191 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3192 }
3193#ifdef CONFIG_DEBUG_SLAB_LEAK
3194 {
3195 struct slab *slabp;
3196 unsigned objnr;
3197
3198 slabp = page_get_slab(virt_to_head_page(objp));
3199 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3200 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3201 }
3202#endif
3203 objp += obj_offset(cachep);
3204 if (cachep->ctor && cachep->flags & SLAB_POISON)
3205 cachep->ctor(objp);
3206 if (ARCH_SLAB_MINALIGN &&
3207 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
3208 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3209 objp, (int)ARCH_SLAB_MINALIGN);
3210 }
3211 return objp;
3212}
3213#else
3214#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3215#endif
3216
3217static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3218{
3219 if (cachep == &cache_cache)
3220 return false;
3221
3222 return should_failslab(obj_size(cachep), flags, cachep->flags);
3223}
3224
3225static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3226{
3227 void *objp;
3228 struct array_cache *ac;
3229
3230 check_irq_off();
3231
3232 ac = cpu_cache_get(cachep);
3233 if (likely(ac->avail)) {
3234 STATS_INC_ALLOCHIT(cachep);
3235 ac->touched = 1;
3236 objp = ac->entry[--ac->avail];
3237 } else {
3238 STATS_INC_ALLOCMISS(cachep);
3239 objp = cache_alloc_refill(cachep, flags);
3240
3241
3242
3243
3244 ac = cpu_cache_get(cachep);
3245 }
3246
3247
3248
3249
3250
3251 if (objp)
3252 kmemleak_erase(&ac->entry[ac->avail]);
3253 return objp;
3254}
3255
3256#ifdef CONFIG_NUMA
3257
3258
3259
3260
3261
3262
3263static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3264{
3265 int nid_alloc, nid_here;
3266
3267 if (in_interrupt() || (flags & __GFP_THISNODE))
3268 return NULL;
3269 nid_alloc = nid_here = numa_mem_id();
3270 get_mems_allowed();
3271 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3272 nid_alloc = cpuset_slab_spread_node();
3273 else if (current->mempolicy)
3274 nid_alloc = slab_node(current->mempolicy);
3275 put_mems_allowed();
3276 if (nid_alloc != nid_here)
3277 return ____cache_alloc_node(cachep, flags, nid_alloc);
3278 return NULL;
3279}
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3290{
3291 struct zonelist *zonelist;
3292 gfp_t local_flags;
3293 struct zoneref *z;
3294 struct zone *zone;
3295 enum zone_type high_zoneidx = gfp_zone(flags);
3296 void *obj = NULL;
3297 int nid;
3298
3299 if (flags & __GFP_THISNODE)
3300 return NULL;
3301
3302 get_mems_allowed();
3303 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3304 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3305
3306retry:
3307
3308
3309
3310
3311 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3312 nid = zone_to_nid(zone);
3313
3314 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3315 cache->nodelists[nid] &&
3316 cache->nodelists[nid]->free_objects) {
3317 obj = ____cache_alloc_node(cache,
3318 flags | GFP_THISNODE, nid);
3319 if (obj)
3320 break;
3321 }
3322 }
3323
3324 if (!obj) {
3325
3326
3327
3328
3329
3330
3331 if (local_flags & __GFP_WAIT)
3332 local_irq_enable();
3333 kmem_flagcheck(cache, flags);
3334 obj = kmem_getpages(cache, local_flags, numa_mem_id());
3335 if (local_flags & __GFP_WAIT)
3336 local_irq_disable();
3337 if (obj) {
3338
3339
3340
3341 nid = page_to_nid(virt_to_page(obj));
3342 if (cache_grow(cache, flags, nid, obj)) {
3343 obj = ____cache_alloc_node(cache,
3344 flags | GFP_THISNODE, nid);
3345 if (!obj)
3346
3347
3348
3349
3350
3351 goto retry;
3352 } else {
3353
3354 obj = NULL;
3355 }
3356 }
3357 }
3358 put_mems_allowed();
3359 return obj;
3360}
3361
3362
3363
3364
3365static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3366 int nodeid)
3367{
3368 struct list_head *entry;
3369 struct slab *slabp;
3370 struct kmem_list3 *l3;
3371 void *obj;
3372 int x;
3373
3374 l3 = cachep->nodelists[nodeid];
3375 BUG_ON(!l3);
3376
3377retry:
3378 check_irq_off();
3379 spin_lock(&l3->list_lock);
3380 entry = l3->slabs_partial.next;
3381 if (entry == &l3->slabs_partial) {
3382 l3->free_touched = 1;
3383 entry = l3->slabs_free.next;
3384 if (entry == &l3->slabs_free)
3385 goto must_grow;
3386 }
3387
3388 slabp = list_entry(entry, struct slab, list);
3389 check_spinlock_acquired_node(cachep, nodeid);
3390 check_slabp(cachep, slabp);
3391
3392 STATS_INC_NODEALLOCS(cachep);
3393 STATS_INC_ACTIVE(cachep);
3394 STATS_SET_HIGH(cachep);
3395
3396 BUG_ON(slabp->inuse == cachep->num);
3397
3398 obj = slab_get_obj(cachep, slabp, nodeid);
3399 check_slabp(cachep, slabp);
3400 l3->free_objects--;
3401
3402 list_del(&slabp->list);
3403
3404 if (slabp->free == BUFCTL_END)
3405 list_add(&slabp->list, &l3->slabs_full);
3406 else
3407 list_add(&slabp->list, &l3->slabs_partial);
3408
3409 spin_unlock(&l3->list_lock);
3410 goto done;
3411
3412must_grow:
3413 spin_unlock(&l3->list_lock);
3414 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3415 if (x)
3416 goto retry;
3417
3418 return fallback_alloc(cachep, flags);
3419
3420done:
3421 return obj;
3422}
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436static __always_inline void *
3437__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3438 void *caller)
3439{
3440 unsigned long save_flags;
3441 void *ptr;
3442 int slab_node = numa_mem_id();
3443
3444 flags &= gfp_allowed_mask;
3445
3446 lockdep_trace_alloc(flags);
3447
3448 if (slab_should_failslab(cachep, flags))
3449 return NULL;
3450
3451 cache_alloc_debugcheck_before(cachep, flags);
3452 local_irq_save(save_flags);
3453
3454 if (nodeid == NUMA_NO_NODE)
3455 nodeid = slab_node;
3456
3457 if (unlikely(!cachep->nodelists[nodeid])) {
3458
3459 ptr = fallback_alloc(cachep, flags);
3460 goto out;
3461 }
3462
3463 if (nodeid == slab_node) {
3464
3465
3466
3467
3468
3469
3470 ptr = ____cache_alloc(cachep, flags);
3471 if (ptr)
3472 goto out;
3473 }
3474
3475 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3476 out:
3477 local_irq_restore(save_flags);
3478 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3479 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3480 flags);
3481
3482 if (likely(ptr))
3483 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3484
3485 if (unlikely((flags & __GFP_ZERO) && ptr))
3486 memset(ptr, 0, obj_size(cachep));
3487
3488 return ptr;
3489}
3490
3491static __always_inline void *
3492__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3493{
3494 void *objp;
3495
3496 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3497 objp = alternate_node_alloc(cache, flags);
3498 if (objp)
3499 goto out;
3500 }
3501 objp = ____cache_alloc(cache, flags);
3502
3503
3504
3505
3506
3507 if (!objp)
3508 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3509
3510 out:
3511 return objp;
3512}
3513#else
3514
3515static __always_inline void *
3516__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3517{
3518 return ____cache_alloc(cachep, flags);
3519}
3520
3521#endif
3522
3523static __always_inline void *
3524__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3525{
3526 unsigned long save_flags;
3527 void *objp;
3528
3529 flags &= gfp_allowed_mask;
3530
3531 lockdep_trace_alloc(flags);
3532
3533 if (slab_should_failslab(cachep, flags))
3534 return NULL;
3535
3536 cache_alloc_debugcheck_before(cachep, flags);
3537 local_irq_save(save_flags);
3538 objp = __do_cache_alloc(cachep, flags);
3539 local_irq_restore(save_flags);
3540 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3541 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3542 flags);
3543 prefetchw(objp);
3544
3545 if (likely(objp))
3546 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3547
3548 if (unlikely((flags & __GFP_ZERO) && objp))
3549 memset(objp, 0, obj_size(cachep));
3550
3551 return objp;
3552}
3553
3554
3555
3556
3557static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3558 int node)
3559{
3560 int i;
3561 struct kmem_list3 *l3;
3562
3563 for (i = 0; i < nr_objects; i++) {
3564 void *objp = objpp[i];
3565 struct slab *slabp;
3566
3567 slabp = virt_to_slab(objp);
3568 l3 = cachep->nodelists[node];
3569 list_del(&slabp->list);
3570 check_spinlock_acquired_node(cachep, node);
3571 check_slabp(cachep, slabp);
3572 slab_put_obj(cachep, slabp, objp, node);
3573 STATS_DEC_ACTIVE(cachep);
3574 l3->free_objects++;
3575 check_slabp(cachep, slabp);
3576
3577
3578 if (slabp->inuse == 0) {
3579 if (l3->free_objects > l3->free_limit) {
3580 l3->free_objects -= cachep->num;
3581
3582
3583
3584
3585
3586
3587 slab_destroy(cachep, slabp);
3588 } else {
3589 list_add(&slabp->list, &l3->slabs_free);
3590 }
3591 } else {
3592
3593
3594
3595
3596 list_add_tail(&slabp->list, &l3->slabs_partial);
3597 }
3598 }
3599}
3600
3601static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3602{
3603 int batchcount;
3604 struct kmem_list3 *l3;
3605 int node = numa_mem_id();
3606
3607 batchcount = ac->batchcount;
3608#if DEBUG
3609 BUG_ON(!batchcount || batchcount > ac->avail);
3610#endif
3611 check_irq_off();
3612 l3 = cachep->nodelists[node];
3613 spin_lock(&l3->list_lock);
3614 if (l3->shared) {
3615 struct array_cache *shared_array = l3->shared;
3616 int max = shared_array->limit - shared_array->avail;
3617 if (max) {
3618 if (batchcount > max)
3619 batchcount = max;
3620 memcpy(&(shared_array->entry[shared_array->avail]),
3621 ac->entry, sizeof(void *) * batchcount);
3622 shared_array->avail += batchcount;
3623 goto free_done;
3624 }
3625 }
3626
3627 free_block(cachep, ac->entry, batchcount, node);
3628free_done:
3629#if STATS
3630 {
3631 int i = 0;
3632 struct list_head *p;
3633
3634 p = l3->slabs_free.next;
3635 while (p != &(l3->slabs_free)) {
3636 struct slab *slabp;
3637
3638 slabp = list_entry(p, struct slab, list);
3639 BUG_ON(slabp->inuse);
3640
3641 i++;
3642 p = p->next;
3643 }
3644 STATS_SET_FREEABLE(cachep, i);
3645 }
3646#endif
3647 spin_unlock(&l3->list_lock);
3648 ac->avail -= batchcount;
3649 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3650}
3651
3652
3653
3654
3655
3656static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3657 void *caller)
3658{
3659 struct array_cache *ac = cpu_cache_get(cachep);
3660
3661 check_irq_off();
3662 kmemleak_free_recursive(objp, cachep->flags);
3663 objp = cache_free_debugcheck(cachep, objp, caller);
3664
3665 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3666
3667
3668
3669
3670
3671
3672
3673
3674 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3675 return;
3676
3677 if (likely(ac->avail < ac->limit)) {
3678 STATS_INC_FREEHIT(cachep);
3679 ac->entry[ac->avail++] = objp;
3680 return;
3681 } else {
3682 STATS_INC_FREEMISS(cachep);
3683 cache_flusharray(cachep, ac);
3684 ac->entry[ac->avail++] = objp;
3685 }
3686}
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3697{
3698 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3699
3700 trace_kmem_cache_alloc(_RET_IP_, ret,
3701 obj_size(cachep), cachep->buffer_size, flags);
3702
3703 return ret;
3704}
3705EXPORT_SYMBOL(kmem_cache_alloc);
3706
3707#ifdef CONFIG_TRACING
3708void *
3709kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags)
3710{
3711 void *ret;
3712
3713 ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3714
3715 trace_kmalloc(_RET_IP_, ret,
3716 size, slab_buffer_size(cachep), flags);
3717 return ret;
3718}
3719EXPORT_SYMBOL(kmem_cache_alloc_trace);
3720#endif
3721
3722#ifdef CONFIG_NUMA
3723void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3724{
3725 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3726 __builtin_return_address(0));
3727
3728 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3729 obj_size(cachep), cachep->buffer_size,
3730 flags, nodeid);
3731
3732 return ret;
3733}
3734EXPORT_SYMBOL(kmem_cache_alloc_node);
3735
3736#ifdef CONFIG_TRACING
3737void *kmem_cache_alloc_node_trace(size_t size,
3738 struct kmem_cache *cachep,
3739 gfp_t flags,
3740 int nodeid)
3741{
3742 void *ret;
3743
3744 ret = __cache_alloc_node(cachep, flags, nodeid,
3745 __builtin_return_address(0));
3746 trace_kmalloc_node(_RET_IP_, ret,
3747 size, slab_buffer_size(cachep),
3748 flags, nodeid);
3749 return ret;
3750}
3751EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3752#endif
3753
3754static __always_inline void *
3755__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3756{
3757 struct kmem_cache *cachep;
3758
3759 cachep = kmem_find_general_cachep(size, flags);
3760 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3761 return cachep;
3762 return kmem_cache_alloc_node_trace(size, cachep, flags, node);
3763}
3764
3765#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3766void *__kmalloc_node(size_t size, gfp_t flags, int node)
3767{
3768 return __do_kmalloc_node(size, flags, node,
3769 __builtin_return_address(0));
3770}
3771EXPORT_SYMBOL(__kmalloc_node);
3772
3773void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3774 int node, unsigned long caller)
3775{
3776 return __do_kmalloc_node(size, flags, node, (void *)caller);
3777}
3778EXPORT_SYMBOL(__kmalloc_node_track_caller);
3779#else
3780void *__kmalloc_node(size_t size, gfp_t flags, int node)
3781{
3782 return __do_kmalloc_node(size, flags, node, NULL);
3783}
3784EXPORT_SYMBOL(__kmalloc_node);
3785#endif
3786#endif
3787
3788
3789
3790
3791
3792
3793
3794static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3795 void *caller)
3796{
3797 struct kmem_cache *cachep;
3798 void *ret;
3799
3800
3801
3802
3803
3804
3805 cachep = __find_general_cachep(size, flags);
3806 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3807 return cachep;
3808 ret = __cache_alloc(cachep, flags, caller);
3809
3810 trace_kmalloc((unsigned long) caller, ret,
3811 size, cachep->buffer_size, flags);
3812
3813 return ret;
3814}
3815
3816
3817#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3818void *__kmalloc(size_t size, gfp_t flags)
3819{
3820 return __do_kmalloc(size, flags, __builtin_return_address(0));
3821}
3822EXPORT_SYMBOL(__kmalloc);
3823
3824void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3825{
3826 return __do_kmalloc(size, flags, (void *)caller);
3827}
3828EXPORT_SYMBOL(__kmalloc_track_caller);
3829
3830#else
3831void *__kmalloc(size_t size, gfp_t flags)
3832{
3833 return __do_kmalloc(size, flags, NULL);
3834}
3835EXPORT_SYMBOL(__kmalloc);
3836#endif
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3847{
3848 unsigned long flags;
3849
3850 local_irq_save(flags);
3851 debug_check_no_locks_freed(objp, obj_size(cachep));
3852 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3853 debug_check_no_obj_freed(objp, obj_size(cachep));
3854 __cache_free(cachep, objp, __builtin_return_address(0));
3855 local_irq_restore(flags);
3856
3857 trace_kmem_cache_free(_RET_IP_, objp);
3858}
3859EXPORT_SYMBOL(kmem_cache_free);
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870void kfree(const void *objp)
3871{
3872 struct kmem_cache *c;
3873 unsigned long flags;
3874
3875 trace_kfree(_RET_IP_, objp);
3876
3877 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3878 return;
3879 local_irq_save(flags);
3880 kfree_debugcheck(objp);
3881 c = virt_to_cache(objp);
3882 debug_check_no_locks_freed(objp, obj_size(c));
3883 debug_check_no_obj_freed(objp, obj_size(c));
3884 __cache_free(c, (void *)objp, __builtin_return_address(0));
3885 local_irq_restore(flags);
3886}
3887EXPORT_SYMBOL(kfree);
3888
3889unsigned int kmem_cache_size(struct kmem_cache *cachep)
3890{
3891 return obj_size(cachep);
3892}
3893EXPORT_SYMBOL(kmem_cache_size);
3894
3895
3896
3897
3898static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3899{
3900 int node;
3901 struct kmem_list3 *l3;
3902 struct array_cache *new_shared;
3903 struct array_cache **new_alien = NULL;
3904
3905 for_each_online_node(node) {
3906
3907 if (use_alien_caches) {
3908 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3909 if (!new_alien)
3910 goto fail;
3911 }
3912
3913 new_shared = NULL;
3914 if (cachep->shared) {
3915 new_shared = alloc_arraycache(node,
3916 cachep->shared*cachep->batchcount,
3917 0xbaadf00d, gfp);
3918 if (!new_shared) {
3919 free_alien_cache(new_alien);
3920 goto fail;
3921 }
3922 }
3923
3924 l3 = cachep->nodelists[node];
3925 if (l3) {
3926 struct array_cache *shared = l3->shared;
3927
3928 spin_lock_irq(&l3->list_lock);
3929
3930 if (shared)
3931 free_block(cachep, shared->entry,
3932 shared->avail, node);
3933
3934 l3->shared = new_shared;
3935 if (!l3->alien) {
3936 l3->alien = new_alien;
3937 new_alien = NULL;
3938 }
3939 l3->free_limit = (1 + nr_cpus_node(node)) *
3940 cachep->batchcount + cachep->num;
3941 spin_unlock_irq(&l3->list_lock);
3942 kfree(shared);
3943 free_alien_cache(new_alien);
3944 continue;
3945 }
3946 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3947 if (!l3) {
3948 free_alien_cache(new_alien);
3949 kfree(new_shared);
3950 goto fail;
3951 }
3952
3953 kmem_list3_init(l3);
3954 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3955 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3956 l3->shared = new_shared;
3957 l3->alien = new_alien;
3958 l3->free_limit = (1 + nr_cpus_node(node)) *
3959 cachep->batchcount + cachep->num;
3960 cachep->nodelists[node] = l3;
3961 }
3962 return 0;
3963
3964fail:
3965 if (!cachep->next.next) {
3966
3967 node--;
3968 while (node >= 0) {
3969 if (cachep->nodelists[node]) {
3970 l3 = cachep->nodelists[node];
3971
3972 kfree(l3->shared);
3973 free_alien_cache(l3->alien);
3974 kfree(l3);
3975 cachep->nodelists[node] = NULL;
3976 }
3977 node--;
3978 }
3979 }
3980 return -ENOMEM;
3981}
3982
3983struct ccupdate_struct {
3984 struct kmem_cache *cachep;
3985 struct array_cache *new[0];
3986};
3987
3988static void do_ccupdate_local(void *info)
3989{
3990 struct ccupdate_struct *new = info;
3991 struct array_cache *old;
3992
3993 check_irq_off();
3994 old = cpu_cache_get(new->cachep);
3995
3996 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3997 new->new[smp_processor_id()] = old;
3998}
3999
4000
4001static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
4002 int batchcount, int shared, gfp_t gfp)
4003{
4004 struct ccupdate_struct *new;
4005 int i;
4006
4007 new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
4008 gfp);
4009 if (!new)
4010 return -ENOMEM;
4011
4012 for_each_online_cpu(i) {
4013 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
4014 batchcount, gfp);
4015 if (!new->new[i]) {
4016 for (i--; i >= 0; i--)
4017 kfree(new->new[i]);
4018 kfree(new);
4019 return -ENOMEM;
4020 }
4021 }
4022 new->cachep = cachep;
4023
4024 on_each_cpu(do_ccupdate_local, (void *)new, 1);
4025
4026 check_irq_on();
4027 cachep->batchcount = batchcount;
4028 cachep->limit = limit;
4029 cachep->shared = shared;
4030
4031 for_each_online_cpu(i) {
4032 struct array_cache *ccold = new->new[i];
4033 if (!ccold)
4034 continue;
4035 spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
4036 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
4037 spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
4038 kfree(ccold);
4039 }
4040 kfree(new);
4041 return alloc_kmemlist(cachep, gfp);
4042}
4043
4044
4045static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
4046{
4047 int err;
4048 int limit, shared;
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059 if (cachep->buffer_size > 131072)
4060 limit = 1;
4061 else if (cachep->buffer_size > PAGE_SIZE)
4062 limit = 8;
4063 else if (cachep->buffer_size > 1024)
4064 limit = 24;
4065 else if (cachep->buffer_size > 256)
4066 limit = 54;
4067 else
4068 limit = 120;
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079 shared = 0;
4080 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
4081 shared = 8;
4082
4083#if DEBUG
4084
4085
4086
4087
4088 if (limit > 32)
4089 limit = 32;
4090#endif
4091 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
4092 if (err)
4093 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4094 cachep->name, -err);
4095 return err;
4096}
4097
4098
4099
4100
4101
4102
4103static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4104 struct array_cache *ac, int force, int node)
4105{
4106 int tofree;
4107
4108 if (!ac || !ac->avail)
4109 return;
4110 if (ac->touched && !force) {
4111 ac->touched = 0;
4112 } else {
4113 spin_lock_irq(&l3->list_lock);
4114 if (ac->avail) {
4115 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4116 if (tofree > ac->avail)
4117 tofree = (ac->avail + 1) / 2;
4118 free_block(cachep, ac->entry, tofree, node);
4119 ac->avail -= tofree;
4120 memmove(ac->entry, &(ac->entry[tofree]),
4121 sizeof(void *) * ac->avail);
4122 }
4123 spin_unlock_irq(&l3->list_lock);
4124 }
4125}
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139static void cache_reap(struct work_struct *w)
4140{
4141 struct kmem_cache *searchp;
4142 struct kmem_list3 *l3;
4143 int node = numa_mem_id();
4144 struct delayed_work *work = to_delayed_work(w);
4145
4146 if (!mutex_trylock(&cache_chain_mutex))
4147
4148 goto out;
4149
4150 list_for_each_entry(searchp, &cache_chain, next) {
4151 check_irq_on();
4152
4153
4154
4155
4156
4157
4158 l3 = searchp->nodelists[node];
4159
4160 reap_alien(searchp, l3);
4161
4162 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4163
4164
4165
4166
4167
4168 if (time_after(l3->next_reap, jiffies))
4169 goto next;
4170
4171 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4172
4173 drain_array(searchp, l3, l3->shared, 0, node);
4174
4175 if (l3->free_touched)
4176 l3->free_touched = 0;
4177 else {
4178 int freed;
4179
4180 freed = drain_freelist(searchp, l3, (l3->free_limit +
4181 5 * searchp->num - 1) / (5 * searchp->num));
4182 STATS_ADD_REAPED(searchp, freed);
4183 }
4184next:
4185 cond_resched();
4186 }
4187 check_irq_on();
4188 mutex_unlock(&cache_chain_mutex);
4189 next_reap_node();
4190out:
4191
4192 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4193}
4194
4195#ifdef CONFIG_SLABINFO
4196
4197static void print_slabinfo_header(struct seq_file *m)
4198{
4199
4200
4201
4202
4203#if STATS
4204 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4205#else
4206 seq_puts(m, "slabinfo - version: 2.1\n");
4207#endif
4208 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4209 "<objperslab> <pagesperslab>");
4210 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4211 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4212#if STATS
4213 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4214 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4215 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4216#endif
4217 seq_putc(m, '\n');
4218}
4219
4220static void *s_start(struct seq_file *m, loff_t *pos)
4221{
4222 loff_t n = *pos;
4223
4224 mutex_lock(&cache_chain_mutex);
4225 if (!n)
4226 print_slabinfo_header(m);
4227
4228 return seq_list_start(&cache_chain, *pos);
4229}
4230
4231static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4232{
4233 return seq_list_next(p, &cache_chain, pos);
4234}
4235
4236static void s_stop(struct seq_file *m, void *p)
4237{
4238 mutex_unlock(&cache_chain_mutex);
4239}
4240
4241static int s_show(struct seq_file *m, void *p)
4242{
4243 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4244 struct slab *slabp;
4245 unsigned long active_objs;
4246 unsigned long num_objs;
4247 unsigned long active_slabs = 0;
4248 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4249 const char *name;
4250 char *error = NULL;
4251 int node;
4252 struct kmem_list3 *l3;
4253
4254 active_objs = 0;
4255 num_slabs = 0;
4256 for_each_online_node(node) {
4257 l3 = cachep->nodelists[node];
4258 if (!l3)
4259 continue;
4260
4261 check_irq_on();
4262 spin_lock_irq(&l3->list_lock);
4263
4264 list_for_each_entry(slabp, &l3->slabs_full, list) {
4265 if (slabp->inuse != cachep->num && !error)
4266 error = "slabs_full accounting error";
4267 active_objs += cachep->num;
4268 active_slabs++;
4269 }
4270 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4271 if (slabp->inuse == cachep->num && !error)
4272 error = "slabs_partial inuse accounting error";
4273 if (!slabp->inuse && !error)
4274 error = "slabs_partial/inuse accounting error";
4275 active_objs += slabp->inuse;
4276 active_slabs++;
4277 }
4278 list_for_each_entry(slabp, &l3->slabs_free, list) {
4279 if (slabp->inuse && !error)
4280 error = "slabs_free/inuse accounting error";
4281 num_slabs++;
4282 }
4283 free_objects += l3->free_objects;
4284 if (l3->shared)
4285 shared_avail += l3->shared->avail;
4286
4287 spin_unlock_irq(&l3->list_lock);
4288 }
4289 num_slabs += active_slabs;
4290 num_objs = num_slabs * cachep->num;
4291 if (num_objs - active_objs != free_objects && !error)
4292 error = "free_objects accounting error";
4293
4294 name = cachep->name;
4295 if (error)
4296 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4297
4298 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4299 name, active_objs, num_objs, cachep->buffer_size,
4300 cachep->num, (1 << cachep->gfporder));
4301 seq_printf(m, " : tunables %4u %4u %4u",
4302 cachep->limit, cachep->batchcount, cachep->shared);
4303 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4304 active_slabs, num_slabs, shared_avail);
4305#if STATS
4306 {
4307 unsigned long high = cachep->high_mark;
4308 unsigned long allocs = cachep->num_allocations;
4309 unsigned long grown = cachep->grown;
4310 unsigned long reaped = cachep->reaped;
4311 unsigned long errors = cachep->errors;
4312 unsigned long max_freeable = cachep->max_freeable;
4313 unsigned long node_allocs = cachep->node_allocs;
4314 unsigned long node_frees = cachep->node_frees;
4315 unsigned long overflows = cachep->node_overflow;
4316
4317 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4318 "%4lu %4lu %4lu %4lu %4lu",
4319 allocs, high, grown,
4320 reaped, errors, max_freeable, node_allocs,
4321 node_frees, overflows);
4322 }
4323
4324 {
4325 unsigned long allochit = atomic_read(&cachep->allochit);
4326 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4327 unsigned long freehit = atomic_read(&cachep->freehit);
4328 unsigned long freemiss = atomic_read(&cachep->freemiss);
4329
4330 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4331 allochit, allocmiss, freehit, freemiss);
4332 }
4333#endif
4334 seq_putc(m, '\n');
4335 return 0;
4336}
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352static const struct seq_operations slabinfo_op = {
4353 .start = s_start,
4354 .next = s_next,
4355 .stop = s_stop,
4356 .show = s_show,
4357};
4358
4359#define MAX_SLABINFO_WRITE 128
4360
4361
4362
4363
4364
4365
4366
4367static ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4368 size_t count, loff_t *ppos)
4369{
4370 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4371 int limit, batchcount, shared, res;
4372 struct kmem_cache *cachep;
4373
4374 if (count > MAX_SLABINFO_WRITE)
4375 return -EINVAL;
4376 if (copy_from_user(&kbuf, buffer, count))
4377 return -EFAULT;
4378 kbuf[MAX_SLABINFO_WRITE] = '\0';
4379
4380 tmp = strchr(kbuf, ' ');
4381 if (!tmp)
4382 return -EINVAL;
4383 *tmp = '\0';
4384 tmp++;
4385 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4386 return -EINVAL;
4387
4388
4389 mutex_lock(&cache_chain_mutex);
4390 res = -EINVAL;
4391 list_for_each_entry(cachep, &cache_chain, next) {
4392 if (!strcmp(cachep->name, kbuf)) {
4393 if (limit < 1 || batchcount < 1 ||
4394 batchcount > limit || shared < 0) {
4395 res = 0;
4396 } else {
4397 res = do_tune_cpucache(cachep, limit,
4398 batchcount, shared,
4399 GFP_KERNEL);
4400 }
4401 break;
4402 }
4403 }
4404 mutex_unlock(&cache_chain_mutex);
4405 if (res >= 0)
4406 res = count;
4407 return res;
4408}
4409
4410static int slabinfo_open(struct inode *inode, struct file *file)
4411{
4412 return seq_open(file, &slabinfo_op);
4413}
4414
4415static const struct file_operations proc_slabinfo_operations = {
4416 .open = slabinfo_open,
4417 .read = seq_read,
4418 .write = slabinfo_write,
4419 .llseek = seq_lseek,
4420 .release = seq_release,
4421};
4422
4423#ifdef CONFIG_DEBUG_SLAB_LEAK
4424
4425static void *leaks_start(struct seq_file *m, loff_t *pos)
4426{
4427 mutex_lock(&cache_chain_mutex);
4428 return seq_list_start(&cache_chain, *pos);
4429}
4430
4431static inline int add_caller(unsigned long *n, unsigned long v)
4432{
4433 unsigned long *p;
4434 int l;
4435 if (!v)
4436 return 1;
4437 l = n[1];
4438 p = n + 2;
4439 while (l) {
4440 int i = l/2;
4441 unsigned long *q = p + 2 * i;
4442 if (*q == v) {
4443 q[1]++;
4444 return 1;
4445 }
4446 if (*q > v) {
4447 l = i;
4448 } else {
4449 p = q + 2;
4450 l -= i + 1;
4451 }
4452 }
4453 if (++n[1] == n[0])
4454 return 0;
4455 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4456 p[0] = v;
4457 p[1] = 1;
4458 return 1;
4459}
4460
4461static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4462{
4463 void *p;
4464 int i;
4465 if (n[0] == n[1])
4466 return;
4467 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4468 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4469 continue;
4470 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4471 return;
4472 }
4473}
4474
4475static void show_symbol(struct seq_file *m, unsigned long address)
4476{
4477#ifdef CONFIG_KALLSYMS
4478 unsigned long offset, size;
4479 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4480
4481 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4482 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4483 if (modname[0])
4484 seq_printf(m, " [%s]", modname);
4485 return;
4486 }
4487#endif
4488 seq_printf(m, "%p", (void *)address);
4489}
4490
4491static int leaks_show(struct seq_file *m, void *p)
4492{
4493 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4494 struct slab *slabp;
4495 struct kmem_list3 *l3;
4496 const char *name;
4497 unsigned long *n = m->private;
4498 int node;
4499 int i;
4500
4501 if (!(cachep->flags & SLAB_STORE_USER))
4502 return 0;
4503 if (!(cachep->flags & SLAB_RED_ZONE))
4504 return 0;
4505
4506
4507
4508 n[1] = 0;
4509
4510 for_each_online_node(node) {
4511 l3 = cachep->nodelists[node];
4512 if (!l3)
4513 continue;
4514
4515 check_irq_on();
4516 spin_lock_irq(&l3->list_lock);
4517
4518 list_for_each_entry(slabp, &l3->slabs_full, list)
4519 handle_slab(n, cachep, slabp);
4520 list_for_each_entry(slabp, &l3->slabs_partial, list)
4521 handle_slab(n, cachep, slabp);
4522 spin_unlock_irq(&l3->list_lock);
4523 }
4524 name = cachep->name;
4525 if (n[0] == n[1]) {
4526
4527 mutex_unlock(&cache_chain_mutex);
4528 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4529 if (!m->private) {
4530
4531 m->private = n;
4532 mutex_lock(&cache_chain_mutex);
4533 return -ENOMEM;
4534 }
4535 *(unsigned long *)m->private = n[0] * 2;
4536 kfree(n);
4537 mutex_lock(&cache_chain_mutex);
4538
4539 m->count = m->size;
4540 return 0;
4541 }
4542 for (i = 0; i < n[1]; i++) {
4543 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4544 show_symbol(m, n[2*i+2]);
4545 seq_putc(m, '\n');
4546 }
4547
4548 return 0;
4549}
4550
4551static const struct seq_operations slabstats_op = {
4552 .start = leaks_start,
4553 .next = s_next,
4554 .stop = s_stop,
4555 .show = leaks_show,
4556};
4557
4558static int slabstats_open(struct inode *inode, struct file *file)
4559{
4560 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4561 int ret = -ENOMEM;
4562 if (n) {
4563 ret = seq_open(file, &slabstats_op);
4564 if (!ret) {
4565 struct seq_file *m = file->private_data;
4566 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4567 m->private = n;
4568 n = NULL;
4569 }
4570 kfree(n);
4571 }
4572 return ret;
4573}
4574
4575static const struct file_operations proc_slabstats_operations = {
4576 .open = slabstats_open,
4577 .read = seq_read,
4578 .llseek = seq_lseek,
4579 .release = seq_release_private,
4580};
4581#endif
4582
4583static int __init slab_proc_init(void)
4584{
4585 proc_create("slabinfo",S_IWUSR|S_IRUSR,NULL,&proc_slabinfo_operations);
4586#ifdef CONFIG_DEBUG_SLAB_LEAK
4587 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4588#endif
4589 return 0;
4590}
4591module_init(slab_proc_init);
4592#endif
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606size_t ksize(const void *objp)
4607{
4608 BUG_ON(!objp);
4609 if (unlikely(objp == ZERO_SIZE_PTR))
4610 return 0;
4611
4612 return obj_size(virt_to_cache(objp));
4613}
4614EXPORT_SYMBOL(ksize);
4615