1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h>
107#include <linux/string.h>
108#include <linux/uaccess.h>
109#include <linux/nodemask.h>
110#include <linux/kmemleak.h>
111#include <linux/mempolicy.h>
112#include <linux/mutex.h>
113#include <linux/fault-inject.h>
114#include <linux/rtmutex.h>
115#include <linux/reciprocal_div.h>
116#include <linux/debugobjects.h>
117#include <linux/kmemcheck.h>
118
119#include <asm/cacheflush.h>
120#include <asm/tlbflush.h>
121#include <asm/page.h>
122
123
124
125
126
127
128
129
130
131
132
133#ifdef CONFIG_DEBUG_SLAB
134#define DEBUG 1
135#define STATS 1
136#define FORCED_DEBUG 1
137#else
138#define DEBUG 0
139#define STATS 0
140#define FORCED_DEBUG 0
141#endif
142
143
144#define BYTES_PER_WORD sizeof(void *)
145#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
146
147#ifndef ARCH_KMALLOC_MINALIGN
148
149
150
151
152
153
154
155
156
157#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
158#endif
159
160#ifndef ARCH_SLAB_MINALIGN
161
162
163
164
165
166
167
168#define ARCH_SLAB_MINALIGN 0
169#endif
170
171#ifndef ARCH_KMALLOC_FLAGS
172#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
173#endif
174
175
176#if DEBUG
177# define CREATE_MASK (SLAB_RED_ZONE | \
178 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
179 SLAB_CACHE_DMA | \
180 SLAB_STORE_USER | \
181 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
182 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
183 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
184#else
185# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
186 SLAB_CACHE_DMA | \
187 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
188 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
189 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
190#endif
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211typedef unsigned int kmem_bufctl_t;
212#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
213#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
214#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
215#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
216
217
218
219
220
221
222
223
224struct slab {
225 struct list_head list;
226 unsigned long colouroff;
227 void *s_mem;
228 unsigned int inuse;
229 kmem_bufctl_t free;
230 unsigned short nodeid;
231};
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249struct slab_rcu {
250 struct rcu_head head;
251 struct kmem_cache *cachep;
252 void *addr;
253};
254
255
256
257
258
259
260
261
262
263
264
265
266
267struct array_cache {
268 unsigned int avail;
269 unsigned int limit;
270 unsigned int batchcount;
271 unsigned int touched;
272 spinlock_t lock;
273 void *entry[];
274
275
276
277
278};
279
280
281
282
283
284#define BOOT_CPUCACHE_ENTRIES 1
285struct arraycache_init {
286 struct array_cache cache;
287 void *entries[BOOT_CPUCACHE_ENTRIES];
288};
289
290
291
292
293struct kmem_list3 {
294 struct list_head slabs_partial;
295 struct list_head slabs_full;
296 struct list_head slabs_free;
297 unsigned long free_objects;
298 unsigned int free_limit;
299 unsigned int colour_next;
300 spinlock_t list_lock;
301 struct array_cache *shared;
302 struct array_cache **alien;
303 unsigned long next_reap;
304 int free_touched;
305};
306
307
308
309
310#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
311struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
312#define CACHE_CACHE 0
313#define SIZE_AC MAX_NUMNODES
314#define SIZE_L3 (2 * MAX_NUMNODES)
315
316static int drain_freelist(struct kmem_cache *cache,
317 struct kmem_list3 *l3, int tofree);
318static void free_block(struct kmem_cache *cachep, void **objpp, int len,
319 int node);
320static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
321static void cache_reap(struct work_struct *unused);
322
323
324
325
326
327static __always_inline int index_of(const size_t size)
328{
329 extern void __bad_size(void);
330
331 if (__builtin_constant_p(size)) {
332 int i = 0;
333
334#define CACHE(x) \
335 if (size <=x) \
336 return i; \
337 else \
338 i++;
339#include <linux/kmalloc_sizes.h>
340#undef CACHE
341 __bad_size();
342 } else
343 __bad_size();
344 return 0;
345}
346
347static int slab_early_init = 1;
348
349#define INDEX_AC index_of(sizeof(struct arraycache_init))
350#define INDEX_L3 index_of(sizeof(struct kmem_list3))
351
352static void kmem_list3_init(struct kmem_list3 *parent)
353{
354 INIT_LIST_HEAD(&parent->slabs_full);
355 INIT_LIST_HEAD(&parent->slabs_partial);
356 INIT_LIST_HEAD(&parent->slabs_free);
357 parent->shared = NULL;
358 parent->alien = NULL;
359 parent->colour_next = 0;
360 spin_lock_init(&parent->list_lock);
361 parent->free_objects = 0;
362 parent->free_touched = 0;
363}
364
365#define MAKE_LIST(cachep, listp, slab, nodeid) \
366 do { \
367 INIT_LIST_HEAD(listp); \
368 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
369 } while (0)
370
371#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
372 do { \
373 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
374 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
375 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
376 } while (0)
377
378#define CFLGS_OFF_SLAB (0x80000000UL)
379#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
380
381#define BATCHREFILL_LIMIT 16
382
383
384
385
386
387
388
389#define REAPTIMEOUT_CPUC (2*HZ)
390#define REAPTIMEOUT_LIST3 (4*HZ)
391
392#if STATS
393#define STATS_INC_ACTIVE(x) ((x)->num_active++)
394#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
395#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
396#define STATS_INC_GROWN(x) ((x)->grown++)
397#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
398#define STATS_SET_HIGH(x) \
399 do { \
400 if ((x)->num_active > (x)->high_mark) \
401 (x)->high_mark = (x)->num_active; \
402 } while (0)
403#define STATS_INC_ERR(x) ((x)->errors++)
404#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
405#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
406#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
407#define STATS_SET_FREEABLE(x, i) \
408 do { \
409 if ((x)->max_freeable < i) \
410 (x)->max_freeable = i; \
411 } while (0)
412#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
413#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
414#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
415#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
416#else
417#define STATS_INC_ACTIVE(x) do { } while (0)
418#define STATS_DEC_ACTIVE(x) do { } while (0)
419#define STATS_INC_ALLOCED(x) do { } while (0)
420#define STATS_INC_GROWN(x) do { } while (0)
421#define STATS_ADD_REAPED(x,y) do { } while (0)
422#define STATS_SET_HIGH(x) do { } while (0)
423#define STATS_INC_ERR(x) do { } while (0)
424#define STATS_INC_NODEALLOCS(x) do { } while (0)
425#define STATS_INC_NODEFREES(x) do { } while (0)
426#define STATS_INC_ACOVERFLOW(x) do { } while (0)
427#define STATS_SET_FREEABLE(x, i) do { } while (0)
428#define STATS_INC_ALLOCHIT(x) do { } while (0)
429#define STATS_INC_ALLOCMISS(x) do { } while (0)
430#define STATS_INC_FREEHIT(x) do { } while (0)
431#define STATS_INC_FREEMISS(x) do { } while (0)
432#endif
433
434#if DEBUG
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449static int obj_offset(struct kmem_cache *cachep)
450{
451 return cachep->obj_offset;
452}
453
454static int obj_size(struct kmem_cache *cachep)
455{
456 return cachep->obj_size;
457}
458
459static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
460{
461 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
462 return (unsigned long long*) (objp + obj_offset(cachep) -
463 sizeof(unsigned long long));
464}
465
466static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
467{
468 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
469 if (cachep->flags & SLAB_STORE_USER)
470 return (unsigned long long *)(objp + cachep->buffer_size -
471 sizeof(unsigned long long) -
472 REDZONE_ALIGN);
473 return (unsigned long long *) (objp + cachep->buffer_size -
474 sizeof(unsigned long long));
475}
476
477static void **dbg_userword(struct kmem_cache *cachep, void *objp)
478{
479 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
480 return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
481}
482
483#else
484
485#define obj_offset(x) 0
486#define obj_size(cachep) (cachep->buffer_size)
487#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
488#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
489#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
490
491#endif
492
493#ifdef CONFIG_KMEMTRACE
494size_t slab_buffer_size(struct kmem_cache *cachep)
495{
496 return cachep->buffer_size;
497}
498EXPORT_SYMBOL(slab_buffer_size);
499#endif
500
501
502
503
504#define BREAK_GFP_ORDER_HI 1
505#define BREAK_GFP_ORDER_LO 0
506static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
507
508
509
510
511
512
513static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
514{
515 page->lru.next = (struct list_head *)cache;
516}
517
518static inline struct kmem_cache *page_get_cache(struct page *page)
519{
520 page = compound_head(page);
521 BUG_ON(!PageSlab(page));
522 return (struct kmem_cache *)page->lru.next;
523}
524
525static inline void page_set_slab(struct page *page, struct slab *slab)
526{
527 page->lru.prev = (struct list_head *)slab;
528}
529
530static inline struct slab *page_get_slab(struct page *page)
531{
532 BUG_ON(!PageSlab(page));
533 return (struct slab *)page->lru.prev;
534}
535
536static inline struct kmem_cache *virt_to_cache(const void *obj)
537{
538 struct page *page = virt_to_head_page(obj);
539 return page_get_cache(page);
540}
541
542static inline struct slab *virt_to_slab(const void *obj)
543{
544 struct page *page = virt_to_head_page(obj);
545 return page_get_slab(page);
546}
547
548static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
549 unsigned int idx)
550{
551 return slab->s_mem + cache->buffer_size * idx;
552}
553
554
555
556
557
558
559
560static inline unsigned int obj_to_index(const struct kmem_cache *cache,
561 const struct slab *slab, void *obj)
562{
563 u32 offset = (obj - slab->s_mem);
564 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
565}
566
567
568
569
570struct cache_sizes malloc_sizes[] = {
571#define CACHE(x) { .cs_size = (x) },
572#include <linux/kmalloc_sizes.h>
573 CACHE(ULONG_MAX)
574#undef CACHE
575};
576EXPORT_SYMBOL(malloc_sizes);
577
578
579struct cache_names {
580 char *name;
581 char *name_dma;
582};
583
584static struct cache_names __initdata cache_names[] = {
585#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
586#include <linux/kmalloc_sizes.h>
587 {NULL,}
588#undef CACHE
589};
590
591static struct arraycache_init initarray_cache __initdata =
592 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
593static struct arraycache_init initarray_generic =
594 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
595
596
597static struct kmem_cache cache_cache = {
598 .batchcount = 1,
599 .limit = BOOT_CPUCACHE_ENTRIES,
600 .shared = 1,
601 .buffer_size = sizeof(struct kmem_cache),
602 .name = "kmem_cache",
603};
604
605#define BAD_ALIEN_MAGIC 0x01020304ul
606
607#ifdef CONFIG_LOCKDEP
608
609
610
611
612
613
614
615
616
617
618
619
620static struct lock_class_key on_slab_l3_key;
621static struct lock_class_key on_slab_alc_key;
622
623static inline void init_lock_keys(void)
624
625{
626 int q;
627 struct cache_sizes *s = malloc_sizes;
628
629 while (s->cs_size != ULONG_MAX) {
630 for_each_node(q) {
631 struct array_cache **alc;
632 int r;
633 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q];
634 if (!l3 || OFF_SLAB(s->cs_cachep))
635 continue;
636 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
637 alc = l3->alien;
638
639
640
641
642
643
644
645 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
646 continue;
647 for_each_node(r) {
648 if (alc[r])
649 lockdep_set_class(&alc[r]->lock,
650 &on_slab_alc_key);
651 }
652 }
653 s++;
654 }
655}
656#else
657static inline void init_lock_keys(void)
658{
659}
660#endif
661
662
663
664
665static DEFINE_MUTEX(cache_chain_mutex);
666static struct list_head cache_chain;
667
668
669
670
671
672static enum {
673 NONE,
674 PARTIAL_AC,
675 PARTIAL_L3,
676 EARLY,
677 FULL
678} g_cpucache_up;
679
680
681
682
683int slab_is_available(void)
684{
685 return g_cpucache_up >= EARLY;
686}
687
688static DEFINE_PER_CPU(struct delayed_work, reap_work);
689
690static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
691{
692 return cachep->array[smp_processor_id()];
693}
694
695static inline struct kmem_cache *__find_general_cachep(size_t size,
696 gfp_t gfpflags)
697{
698 struct cache_sizes *csizep = malloc_sizes;
699
700#if DEBUG
701
702
703
704
705 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
706#endif
707 if (!size)
708 return ZERO_SIZE_PTR;
709
710 while (size > csizep->cs_size)
711 csizep++;
712
713
714
715
716
717
718#ifdef CONFIG_ZONE_DMA
719 if (unlikely(gfpflags & GFP_DMA))
720 return csizep->cs_dmacachep;
721#endif
722 return csizep->cs_cachep;
723}
724
725static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
726{
727 return __find_general_cachep(size, gfpflags);
728}
729
730static size_t slab_mgmt_size(size_t nr_objs, size_t align)
731{
732 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
733}
734
735
736
737
738static void cache_estimate(unsigned long gfporder, size_t buffer_size,
739 size_t align, int flags, size_t *left_over,
740 unsigned int *num)
741{
742 int nr_objs;
743 size_t mgmt_size;
744 size_t slab_size = PAGE_SIZE << gfporder;
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761 if (flags & CFLGS_OFF_SLAB) {
762 mgmt_size = 0;
763 nr_objs = slab_size / buffer_size;
764
765 if (nr_objs > SLAB_LIMIT)
766 nr_objs = SLAB_LIMIT;
767 } else {
768
769
770
771
772
773
774
775
776 nr_objs = (slab_size - sizeof(struct slab)) /
777 (buffer_size + sizeof(kmem_bufctl_t));
778
779
780
781
782
783 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
784 > slab_size)
785 nr_objs--;
786
787 if (nr_objs > SLAB_LIMIT)
788 nr_objs = SLAB_LIMIT;
789
790 mgmt_size = slab_mgmt_size(nr_objs, align);
791 }
792 *num = nr_objs;
793 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
794}
795
796#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
797
798static void __slab_error(const char *function, struct kmem_cache *cachep,
799 char *msg)
800{
801 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
802 function, cachep->name, msg);
803 dump_stack();
804}
805
806
807
808
809
810
811
812
813
814static int use_alien_caches __read_mostly = 1;
815static int __init noaliencache_setup(char *s)
816{
817 use_alien_caches = 0;
818 return 1;
819}
820__setup("noaliencache", noaliencache_setup);
821
822#ifdef CONFIG_NUMA
823
824
825
826
827
828
829static DEFINE_PER_CPU(unsigned long, reap_node);
830
831static void init_reap_node(int cpu)
832{
833 int node;
834
835 node = next_node(cpu_to_node(cpu), node_online_map);
836 if (node == MAX_NUMNODES)
837 node = first_node(node_online_map);
838
839 per_cpu(reap_node, cpu) = node;
840}
841
842static void next_reap_node(void)
843{
844 int node = __get_cpu_var(reap_node);
845
846 node = next_node(node, node_online_map);
847 if (unlikely(node >= MAX_NUMNODES))
848 node = first_node(node_online_map);
849 __get_cpu_var(reap_node) = node;
850}
851
852#else
853#define init_reap_node(cpu) do { } while (0)
854#define next_reap_node(void) do { } while (0)
855#endif
856
857
858
859
860
861
862
863
864static void __cpuinit start_cpu_timer(int cpu)
865{
866 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
867
868
869
870
871
872
873 if (keventd_up() && reap_work->work.func == NULL) {
874 init_reap_node(cpu);
875 INIT_DELAYED_WORK(reap_work, cache_reap);
876 schedule_delayed_work_on(cpu, reap_work,
877 __round_jiffies_relative(HZ, cpu));
878 }
879}
880
881static struct array_cache *alloc_arraycache(int node, int entries,
882 int batchcount, gfp_t gfp)
883{
884 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
885 struct array_cache *nc = NULL;
886
887 nc = kmalloc_node(memsize, gfp, node);
888
889
890
891
892
893
894
895 kmemleak_no_scan(nc);
896 if (nc) {
897 nc->avail = 0;
898 nc->limit = entries;
899 nc->batchcount = batchcount;
900 nc->touched = 0;
901 spin_lock_init(&nc->lock);
902 }
903 return nc;
904}
905
906
907
908
909
910
911
912static int transfer_objects(struct array_cache *to,
913 struct array_cache *from, unsigned int max)
914{
915
916 int nr = min(min(from->avail, max), to->limit - to->avail);
917
918 if (!nr)
919 return 0;
920
921 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
922 sizeof(void *) *nr);
923
924 from->avail -= nr;
925 to->avail += nr;
926 to->touched = 1;
927 return nr;
928}
929
930#ifndef CONFIG_NUMA
931
932#define drain_alien_cache(cachep, alien) do { } while (0)
933#define reap_alien(cachep, l3) do { } while (0)
934
935static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
936{
937 return (struct array_cache **)BAD_ALIEN_MAGIC;
938}
939
940static inline void free_alien_cache(struct array_cache **ac_ptr)
941{
942}
943
944static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
945{
946 return 0;
947}
948
949static inline void *alternate_node_alloc(struct kmem_cache *cachep,
950 gfp_t flags)
951{
952 return NULL;
953}
954
955static inline void *____cache_alloc_node(struct kmem_cache *cachep,
956 gfp_t flags, int nodeid)
957{
958 return NULL;
959}
960
961#else
962
963static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
964static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
965
966static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
967{
968 struct array_cache **ac_ptr;
969 int memsize = sizeof(void *) * nr_node_ids;
970 int i;
971
972 if (limit > 1)
973 limit = 12;
974 ac_ptr = kmalloc_node(memsize, gfp, node);
975 if (ac_ptr) {
976 for_each_node(i) {
977 if (i == node || !node_online(i)) {
978 ac_ptr[i] = NULL;
979 continue;
980 }
981 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
982 if (!ac_ptr[i]) {
983 for (i--; i >= 0; i--)
984 kfree(ac_ptr[i]);
985 kfree(ac_ptr);
986 return NULL;
987 }
988 }
989 }
990 return ac_ptr;
991}
992
993static void free_alien_cache(struct array_cache **ac_ptr)
994{
995 int i;
996
997 if (!ac_ptr)
998 return;
999 for_each_node(i)
1000 kfree(ac_ptr[i]);
1001 kfree(ac_ptr);
1002}
1003
1004static void __drain_alien_cache(struct kmem_cache *cachep,
1005 struct array_cache *ac, int node)
1006{
1007 struct kmem_list3 *rl3 = cachep->nodelists[node];
1008
1009 if (ac->avail) {
1010 spin_lock(&rl3->list_lock);
1011
1012
1013
1014
1015
1016 if (rl3->shared)
1017 transfer_objects(rl3->shared, ac, ac->limit);
1018
1019 free_block(cachep, ac->entry, ac->avail, node);
1020 ac->avail = 0;
1021 spin_unlock(&rl3->list_lock);
1022 }
1023}
1024
1025
1026
1027
1028static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1029{
1030 int node = __get_cpu_var(reap_node);
1031
1032 if (l3->alien) {
1033 struct array_cache *ac = l3->alien[node];
1034
1035 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1036 __drain_alien_cache(cachep, ac, node);
1037 spin_unlock_irq(&ac->lock);
1038 }
1039 }
1040}
1041
1042static void drain_alien_cache(struct kmem_cache *cachep,
1043 struct array_cache **alien)
1044{
1045 int i = 0;
1046 struct array_cache *ac;
1047 unsigned long flags;
1048
1049 for_each_online_node(i) {
1050 ac = alien[i];
1051 if (ac) {
1052 spin_lock_irqsave(&ac->lock, flags);
1053 __drain_alien_cache(cachep, ac, i);
1054 spin_unlock_irqrestore(&ac->lock, flags);
1055 }
1056 }
1057}
1058
1059static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1060{
1061 struct slab *slabp = virt_to_slab(objp);
1062 int nodeid = slabp->nodeid;
1063 struct kmem_list3 *l3;
1064 struct array_cache *alien = NULL;
1065 int node;
1066
1067 node = numa_node_id();
1068
1069
1070
1071
1072
1073 if (likely(slabp->nodeid == node))
1074 return 0;
1075
1076 l3 = cachep->nodelists[node];
1077 STATS_INC_NODEFREES(cachep);
1078 if (l3->alien && l3->alien[nodeid]) {
1079 alien = l3->alien[nodeid];
1080 spin_lock(&alien->lock);
1081 if (unlikely(alien->avail == alien->limit)) {
1082 STATS_INC_ACOVERFLOW(cachep);
1083 __drain_alien_cache(cachep, alien, nodeid);
1084 }
1085 alien->entry[alien->avail++] = objp;
1086 spin_unlock(&alien->lock);
1087 } else {
1088 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1089 free_block(cachep, &objp, 1, nodeid);
1090 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1091 }
1092 return 1;
1093}
1094#endif
1095
1096static void __cpuinit cpuup_canceled(long cpu)
1097{
1098 struct kmem_cache *cachep;
1099 struct kmem_list3 *l3 = NULL;
1100 int node = cpu_to_node(cpu);
1101 const struct cpumask *mask = cpumask_of_node(node);
1102
1103 list_for_each_entry(cachep, &cache_chain, next) {
1104 struct array_cache *nc;
1105 struct array_cache *shared;
1106 struct array_cache **alien;
1107
1108
1109 nc = cachep->array[cpu];
1110 cachep->array[cpu] = NULL;
1111 l3 = cachep->nodelists[node];
1112
1113 if (!l3)
1114 goto free_array_cache;
1115
1116 spin_lock_irq(&l3->list_lock);
1117
1118
1119 l3->free_limit -= cachep->batchcount;
1120 if (nc)
1121 free_block(cachep, nc->entry, nc->avail, node);
1122
1123 if (!cpus_empty(*mask)) {
1124 spin_unlock_irq(&l3->list_lock);
1125 goto free_array_cache;
1126 }
1127
1128 shared = l3->shared;
1129 if (shared) {
1130 free_block(cachep, shared->entry,
1131 shared->avail, node);
1132 l3->shared = NULL;
1133 }
1134
1135 alien = l3->alien;
1136 l3->alien = NULL;
1137
1138 spin_unlock_irq(&l3->list_lock);
1139
1140 kfree(shared);
1141 if (alien) {
1142 drain_alien_cache(cachep, alien);
1143 free_alien_cache(alien);
1144 }
1145free_array_cache:
1146 kfree(nc);
1147 }
1148
1149
1150
1151
1152
1153 list_for_each_entry(cachep, &cache_chain, next) {
1154 l3 = cachep->nodelists[node];
1155 if (!l3)
1156 continue;
1157 drain_freelist(cachep, l3, l3->free_objects);
1158 }
1159}
1160
1161static int __cpuinit cpuup_prepare(long cpu)
1162{
1163 struct kmem_cache *cachep;
1164 struct kmem_list3 *l3 = NULL;
1165 int node = cpu_to_node(cpu);
1166 const int memsize = sizeof(struct kmem_list3);
1167
1168
1169
1170
1171
1172
1173
1174
1175 list_for_each_entry(cachep, &cache_chain, next) {
1176
1177
1178
1179
1180
1181 if (!cachep->nodelists[node]) {
1182 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1183 if (!l3)
1184 goto bad;
1185 kmem_list3_init(l3);
1186 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1187 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1188
1189
1190
1191
1192
1193
1194 cachep->nodelists[node] = l3;
1195 }
1196
1197 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1198 cachep->nodelists[node]->free_limit =
1199 (1 + nr_cpus_node(node)) *
1200 cachep->batchcount + cachep->num;
1201 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1202 }
1203
1204
1205
1206
1207
1208 list_for_each_entry(cachep, &cache_chain, next) {
1209 struct array_cache *nc;
1210 struct array_cache *shared = NULL;
1211 struct array_cache **alien = NULL;
1212
1213 nc = alloc_arraycache(node, cachep->limit,
1214 cachep->batchcount, GFP_KERNEL);
1215 if (!nc)
1216 goto bad;
1217 if (cachep->shared) {
1218 shared = alloc_arraycache(node,
1219 cachep->shared * cachep->batchcount,
1220 0xbaadf00d, GFP_KERNEL);
1221 if (!shared) {
1222 kfree(nc);
1223 goto bad;
1224 }
1225 }
1226 if (use_alien_caches) {
1227 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1228 if (!alien) {
1229 kfree(shared);
1230 kfree(nc);
1231 goto bad;
1232 }
1233 }
1234 cachep->array[cpu] = nc;
1235 l3 = cachep->nodelists[node];
1236 BUG_ON(!l3);
1237
1238 spin_lock_irq(&l3->list_lock);
1239 if (!l3->shared) {
1240
1241
1242
1243
1244 l3->shared = shared;
1245 shared = NULL;
1246 }
1247#ifdef CONFIG_NUMA
1248 if (!l3->alien) {
1249 l3->alien = alien;
1250 alien = NULL;
1251 }
1252#endif
1253 spin_unlock_irq(&l3->list_lock);
1254 kfree(shared);
1255 free_alien_cache(alien);
1256 }
1257 return 0;
1258bad:
1259 cpuup_canceled(cpu);
1260 return -ENOMEM;
1261}
1262
1263static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1264 unsigned long action, void *hcpu)
1265{
1266 long cpu = (long)hcpu;
1267 int err = 0;
1268
1269 switch (action) {
1270 case CPU_UP_PREPARE:
1271 case CPU_UP_PREPARE_FROZEN:
1272 mutex_lock(&cache_chain_mutex);
1273 err = cpuup_prepare(cpu);
1274 mutex_unlock(&cache_chain_mutex);
1275 break;
1276 case CPU_ONLINE:
1277 case CPU_ONLINE_FROZEN:
1278 start_cpu_timer(cpu);
1279 break;
1280#ifdef CONFIG_HOTPLUG_CPU
1281 case CPU_DOWN_PREPARE:
1282 case CPU_DOWN_PREPARE_FROZEN:
1283
1284
1285
1286
1287
1288
1289 cancel_rearming_delayed_work(&per_cpu(reap_work, cpu));
1290
1291 per_cpu(reap_work, cpu).work.func = NULL;
1292 break;
1293 case CPU_DOWN_FAILED:
1294 case CPU_DOWN_FAILED_FROZEN:
1295 start_cpu_timer(cpu);
1296 break;
1297 case CPU_DEAD:
1298 case CPU_DEAD_FROZEN:
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308#endif
1309 case CPU_UP_CANCELED:
1310 case CPU_UP_CANCELED_FROZEN:
1311 mutex_lock(&cache_chain_mutex);
1312 cpuup_canceled(cpu);
1313 mutex_unlock(&cache_chain_mutex);
1314 break;
1315 }
1316 return err ? NOTIFY_BAD : NOTIFY_OK;
1317}
1318
1319static struct notifier_block __cpuinitdata cpucache_notifier = {
1320 &cpuup_callback, NULL, 0
1321};
1322
1323
1324
1325
1326static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1327 int nodeid)
1328{
1329 struct kmem_list3 *ptr;
1330
1331 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1332 BUG_ON(!ptr);
1333
1334 memcpy(ptr, list, sizeof(struct kmem_list3));
1335
1336
1337
1338 spin_lock_init(&ptr->list_lock);
1339
1340 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1341 cachep->nodelists[nodeid] = ptr;
1342}
1343
1344
1345
1346
1347
1348static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1349{
1350 int node;
1351
1352 for_each_online_node(node) {
1353 cachep->nodelists[node] = &initkmem_list3[index + node];
1354 cachep->nodelists[node]->next_reap = jiffies +
1355 REAPTIMEOUT_LIST3 +
1356 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1357 }
1358}
1359
1360
1361
1362
1363
1364void __init kmem_cache_init(void)
1365{
1366 size_t left_over;
1367 struct cache_sizes *sizes;
1368 struct cache_names *names;
1369 int i;
1370 int order;
1371 int node;
1372
1373 if (num_possible_nodes() == 1)
1374 use_alien_caches = 0;
1375
1376 for (i = 0; i < NUM_INIT_LISTS; i++) {
1377 kmem_list3_init(&initkmem_list3[i]);
1378 if (i < MAX_NUMNODES)
1379 cache_cache.nodelists[i] = NULL;
1380 }
1381 set_up_list3s(&cache_cache, CACHE_CACHE);
1382
1383
1384
1385
1386
1387 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1388 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410 node = numa_node_id();
1411
1412
1413 INIT_LIST_HEAD(&cache_chain);
1414 list_add(&cache_cache.next, &cache_chain);
1415 cache_cache.colour_off = cache_line_size();
1416 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1417 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1418
1419
1420
1421
1422
1423 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1424 nr_node_ids * sizeof(struct kmem_list3 *);
1425#if DEBUG
1426 cache_cache.obj_size = cache_cache.buffer_size;
1427#endif
1428 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1429 cache_line_size());
1430 cache_cache.reciprocal_buffer_size =
1431 reciprocal_value(cache_cache.buffer_size);
1432
1433 for (order = 0; order < MAX_ORDER; order++) {
1434 cache_estimate(order, cache_cache.buffer_size,
1435 cache_line_size(), 0, &left_over, &cache_cache.num);
1436 if (cache_cache.num)
1437 break;
1438 }
1439 BUG_ON(!cache_cache.num);
1440 cache_cache.gfporder = order;
1441 cache_cache.colour = left_over / cache_cache.colour_off;
1442 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1443 sizeof(struct slab), cache_line_size());
1444
1445
1446 sizes = malloc_sizes;
1447 names = cache_names;
1448
1449
1450
1451
1452
1453
1454
1455 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1456 sizes[INDEX_AC].cs_size,
1457 ARCH_KMALLOC_MINALIGN,
1458 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1459 NULL);
1460
1461 if (INDEX_AC != INDEX_L3) {
1462 sizes[INDEX_L3].cs_cachep =
1463 kmem_cache_create(names[INDEX_L3].name,
1464 sizes[INDEX_L3].cs_size,
1465 ARCH_KMALLOC_MINALIGN,
1466 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1467 NULL);
1468 }
1469
1470 slab_early_init = 0;
1471
1472 while (sizes->cs_size != ULONG_MAX) {
1473
1474
1475
1476
1477
1478
1479
1480 if (!sizes->cs_cachep) {
1481 sizes->cs_cachep = kmem_cache_create(names->name,
1482 sizes->cs_size,
1483 ARCH_KMALLOC_MINALIGN,
1484 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1485 NULL);
1486 }
1487#ifdef CONFIG_ZONE_DMA
1488 sizes->cs_dmacachep = kmem_cache_create(
1489 names->name_dma,
1490 sizes->cs_size,
1491 ARCH_KMALLOC_MINALIGN,
1492 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1493 SLAB_PANIC,
1494 NULL);
1495#endif
1496 sizes++;
1497 names++;
1498 }
1499
1500 {
1501 struct array_cache *ptr;
1502
1503 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1504
1505 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1506 memcpy(ptr, cpu_cache_get(&cache_cache),
1507 sizeof(struct arraycache_init));
1508
1509
1510
1511 spin_lock_init(&ptr->lock);
1512
1513 cache_cache.array[smp_processor_id()] = ptr;
1514
1515 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1516
1517 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1518 != &initarray_generic.cache);
1519 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1520 sizeof(struct arraycache_init));
1521
1522
1523
1524 spin_lock_init(&ptr->lock);
1525
1526 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1527 ptr;
1528 }
1529
1530 {
1531 int nid;
1532
1533 for_each_online_node(nid) {
1534 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1535
1536 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1537 &initkmem_list3[SIZE_AC + nid], nid);
1538
1539 if (INDEX_AC != INDEX_L3) {
1540 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1541 &initkmem_list3[SIZE_L3 + nid], nid);
1542 }
1543 }
1544 }
1545
1546 g_cpucache_up = EARLY;
1547}
1548
1549void __init kmem_cache_init_late(void)
1550{
1551 struct kmem_cache *cachep;
1552
1553
1554 mutex_lock(&cache_chain_mutex);
1555 list_for_each_entry(cachep, &cache_chain, next)
1556 if (enable_cpucache(cachep, GFP_NOWAIT))
1557 BUG();
1558 mutex_unlock(&cache_chain_mutex);
1559
1560
1561 g_cpucache_up = FULL;
1562
1563
1564 init_lock_keys();
1565
1566
1567
1568
1569
1570 register_cpu_notifier(&cpucache_notifier);
1571
1572
1573
1574
1575
1576}
1577
1578static int __init cpucache_init(void)
1579{
1580 int cpu;
1581
1582
1583
1584
1585 for_each_online_cpu(cpu)
1586 start_cpu_timer(cpu);
1587 return 0;
1588}
1589__initcall(cpucache_init);
1590
1591
1592
1593
1594
1595
1596
1597
1598static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1599{
1600 struct page *page;
1601 int nr_pages;
1602 int i;
1603
1604#ifndef CONFIG_MMU
1605
1606
1607
1608
1609 flags |= __GFP_COMP;
1610#endif
1611
1612 flags |= cachep->gfpflags;
1613 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1614 flags |= __GFP_RECLAIMABLE;
1615
1616 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1617 if (!page)
1618 return NULL;
1619
1620 nr_pages = (1 << cachep->gfporder);
1621 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1622 add_zone_page_state(page_zone(page),
1623 NR_SLAB_RECLAIMABLE, nr_pages);
1624 else
1625 add_zone_page_state(page_zone(page),
1626 NR_SLAB_UNRECLAIMABLE, nr_pages);
1627 for (i = 0; i < nr_pages; i++)
1628 __SetPageSlab(page + i);
1629
1630 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1631 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1632
1633 if (cachep->ctor)
1634 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1635 else
1636 kmemcheck_mark_unallocated_pages(page, nr_pages);
1637 }
1638
1639 return page_address(page);
1640}
1641
1642
1643
1644
1645static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1646{
1647 unsigned long i = (1 << cachep->gfporder);
1648 struct page *page = virt_to_page(addr);
1649 const unsigned long nr_freed = i;
1650
1651 kmemcheck_free_shadow(page, cachep->gfporder);
1652
1653 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1654 sub_zone_page_state(page_zone(page),
1655 NR_SLAB_RECLAIMABLE, nr_freed);
1656 else
1657 sub_zone_page_state(page_zone(page),
1658 NR_SLAB_UNRECLAIMABLE, nr_freed);
1659 while (i--) {
1660 BUG_ON(!PageSlab(page));
1661 __ClearPageSlab(page);
1662 page++;
1663 }
1664 if (current->reclaim_state)
1665 current->reclaim_state->reclaimed_slab += nr_freed;
1666 free_pages((unsigned long)addr, cachep->gfporder);
1667}
1668
1669static void kmem_rcu_free(struct rcu_head *head)
1670{
1671 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1672 struct kmem_cache *cachep = slab_rcu->cachep;
1673
1674 kmem_freepages(cachep, slab_rcu->addr);
1675 if (OFF_SLAB(cachep))
1676 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1677}
1678
1679#if DEBUG
1680
1681#ifdef CONFIG_DEBUG_PAGEALLOC
1682static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1683 unsigned long caller)
1684{
1685 int size = obj_size(cachep);
1686
1687 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1688
1689 if (size < 5 * sizeof(unsigned long))
1690 return;
1691
1692 *addr++ = 0x12345678;
1693 *addr++ = caller;
1694 *addr++ = smp_processor_id();
1695 size -= 3 * sizeof(unsigned long);
1696 {
1697 unsigned long *sptr = &caller;
1698 unsigned long svalue;
1699
1700 while (!kstack_end(sptr)) {
1701 svalue = *sptr++;
1702 if (kernel_text_address(svalue)) {
1703 *addr++ = svalue;
1704 size -= sizeof(unsigned long);
1705 if (size <= sizeof(unsigned long))
1706 break;
1707 }
1708 }
1709
1710 }
1711 *addr++ = 0x87654321;
1712}
1713#endif
1714
1715static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1716{
1717 int size = obj_size(cachep);
1718 addr = &((char *)addr)[obj_offset(cachep)];
1719
1720 memset(addr, val, size);
1721 *(unsigned char *)(addr + size - 1) = POISON_END;
1722}
1723
1724static void dump_line(char *data, int offset, int limit)
1725{
1726 int i;
1727 unsigned char error = 0;
1728 int bad_count = 0;
1729
1730 printk(KERN_ERR "%03x:", offset);
1731 for (i = 0; i < limit; i++) {
1732 if (data[offset + i] != POISON_FREE) {
1733 error = data[offset + i];
1734 bad_count++;
1735 }
1736 printk(" %02x", (unsigned char)data[offset + i]);
1737 }
1738 printk("\n");
1739
1740 if (bad_count == 1) {
1741 error ^= POISON_FREE;
1742 if (!(error & (error - 1))) {
1743 printk(KERN_ERR "Single bit error detected. Probably "
1744 "bad RAM.\n");
1745#ifdef CONFIG_X86
1746 printk(KERN_ERR "Run memtest86+ or a similar memory "
1747 "test tool.\n");
1748#else
1749 printk(KERN_ERR "Run a memory test tool.\n");
1750#endif
1751 }
1752 }
1753}
1754#endif
1755
1756#if DEBUG
1757
1758static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1759{
1760 int i, size;
1761 char *realobj;
1762
1763 if (cachep->flags & SLAB_RED_ZONE) {
1764 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1765 *dbg_redzone1(cachep, objp),
1766 *dbg_redzone2(cachep, objp));
1767 }
1768
1769 if (cachep->flags & SLAB_STORE_USER) {
1770 printk(KERN_ERR "Last user: [<%p>]",
1771 *dbg_userword(cachep, objp));
1772 print_symbol("(%s)",
1773 (unsigned long)*dbg_userword(cachep, objp));
1774 printk("\n");
1775 }
1776 realobj = (char *)objp + obj_offset(cachep);
1777 size = obj_size(cachep);
1778 for (i = 0; i < size && lines; i += 16, lines--) {
1779 int limit;
1780 limit = 16;
1781 if (i + limit > size)
1782 limit = size - i;
1783 dump_line(realobj, i, limit);
1784 }
1785}
1786
1787static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1788{
1789 char *realobj;
1790 int size, i;
1791 int lines = 0;
1792
1793 realobj = (char *)objp + obj_offset(cachep);
1794 size = obj_size(cachep);
1795
1796 for (i = 0; i < size; i++) {
1797 char exp = POISON_FREE;
1798 if (i == size - 1)
1799 exp = POISON_END;
1800 if (realobj[i] != exp) {
1801 int limit;
1802
1803
1804 if (lines == 0) {
1805 printk(KERN_ERR
1806 "Slab corruption: %s start=%p, len=%d\n",
1807 cachep->name, realobj, size);
1808 print_objinfo(cachep, objp, 0);
1809 }
1810
1811 i = (i / 16) * 16;
1812 limit = 16;
1813 if (i + limit > size)
1814 limit = size - i;
1815 dump_line(realobj, i, limit);
1816 i += 16;
1817 lines++;
1818
1819 if (lines > 5)
1820 break;
1821 }
1822 }
1823 if (lines != 0) {
1824
1825
1826
1827 struct slab *slabp = virt_to_slab(objp);
1828 unsigned int objnr;
1829
1830 objnr = obj_to_index(cachep, slabp, objp);
1831 if (objnr) {
1832 objp = index_to_obj(cachep, slabp, objnr - 1);
1833 realobj = (char *)objp + obj_offset(cachep);
1834 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1835 realobj, size);
1836 print_objinfo(cachep, objp, 2);
1837 }
1838 if (objnr + 1 < cachep->num) {
1839 objp = index_to_obj(cachep, slabp, objnr + 1);
1840 realobj = (char *)objp + obj_offset(cachep);
1841 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1842 realobj, size);
1843 print_objinfo(cachep, objp, 2);
1844 }
1845 }
1846}
1847#endif
1848
1849#if DEBUG
1850static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1851{
1852 int i;
1853 for (i = 0; i < cachep->num; i++) {
1854 void *objp = index_to_obj(cachep, slabp, i);
1855
1856 if (cachep->flags & SLAB_POISON) {
1857#ifdef CONFIG_DEBUG_PAGEALLOC
1858 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1859 OFF_SLAB(cachep))
1860 kernel_map_pages(virt_to_page(objp),
1861 cachep->buffer_size / PAGE_SIZE, 1);
1862 else
1863 check_poison_obj(cachep, objp);
1864#else
1865 check_poison_obj(cachep, objp);
1866#endif
1867 }
1868 if (cachep->flags & SLAB_RED_ZONE) {
1869 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
1870 slab_error(cachep, "start of a freed object "
1871 "was overwritten");
1872 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
1873 slab_error(cachep, "end of a freed object "
1874 "was overwritten");
1875 }
1876 }
1877}
1878#else
1879static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
1880{
1881}
1882#endif
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1894{
1895 void *addr = slabp->s_mem - slabp->colouroff;
1896
1897 slab_destroy_debugcheck(cachep, slabp);
1898 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
1899 struct slab_rcu *slab_rcu;
1900
1901 slab_rcu = (struct slab_rcu *)slabp;
1902 slab_rcu->cachep = cachep;
1903 slab_rcu->addr = addr;
1904 call_rcu(&slab_rcu->head, kmem_rcu_free);
1905 } else {
1906 kmem_freepages(cachep, addr);
1907 if (OFF_SLAB(cachep))
1908 kmem_cache_free(cachep->slabp_cache, slabp);
1909 }
1910}
1911
1912static void __kmem_cache_destroy(struct kmem_cache *cachep)
1913{
1914 int i;
1915 struct kmem_list3 *l3;
1916
1917 for_each_online_cpu(i)
1918 kfree(cachep->array[i]);
1919
1920
1921 for_each_online_node(i) {
1922 l3 = cachep->nodelists[i];
1923 if (l3) {
1924 kfree(l3->shared);
1925 free_alien_cache(l3->alien);
1926 kfree(l3);
1927 }
1928 }
1929 kmem_cache_free(&cache_cache, cachep);
1930}
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946static size_t calculate_slab_order(struct kmem_cache *cachep,
1947 size_t size, size_t align, unsigned long flags)
1948{
1949 unsigned long offslab_limit;
1950 size_t left_over = 0;
1951 int gfporder;
1952
1953 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
1954 unsigned int num;
1955 size_t remainder;
1956
1957 cache_estimate(gfporder, size, align, flags, &remainder, &num);
1958 if (!num)
1959 continue;
1960
1961 if (flags & CFLGS_OFF_SLAB) {
1962
1963
1964
1965
1966
1967 offslab_limit = size - sizeof(struct slab);
1968 offslab_limit /= sizeof(kmem_bufctl_t);
1969
1970 if (num > offslab_limit)
1971 break;
1972 }
1973
1974
1975 cachep->num = num;
1976 cachep->gfporder = gfporder;
1977 left_over = remainder;
1978
1979
1980
1981
1982
1983
1984 if (flags & SLAB_RECLAIM_ACCOUNT)
1985 break;
1986
1987
1988
1989
1990
1991 if (gfporder >= slab_break_gfp_order)
1992 break;
1993
1994
1995
1996
1997 if (left_over * 8 <= (PAGE_SIZE << gfporder))
1998 break;
1999 }
2000 return left_over;
2001}
2002
2003static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2004{
2005 if (g_cpucache_up == FULL)
2006 return enable_cpucache(cachep, gfp);
2007
2008 if (g_cpucache_up == NONE) {
2009
2010
2011
2012
2013
2014 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2015
2016
2017
2018
2019
2020
2021 set_up_list3s(cachep, SIZE_AC);
2022 if (INDEX_AC == INDEX_L3)
2023 g_cpucache_up = PARTIAL_L3;
2024 else
2025 g_cpucache_up = PARTIAL_AC;
2026 } else {
2027 cachep->array[smp_processor_id()] =
2028 kmalloc(sizeof(struct arraycache_init), gfp);
2029
2030 if (g_cpucache_up == PARTIAL_AC) {
2031 set_up_list3s(cachep, SIZE_L3);
2032 g_cpucache_up = PARTIAL_L3;
2033 } else {
2034 int node;
2035 for_each_online_node(node) {
2036 cachep->nodelists[node] =
2037 kmalloc_node(sizeof(struct kmem_list3),
2038 gfp, node);
2039 BUG_ON(!cachep->nodelists[node]);
2040 kmem_list3_init(cachep->nodelists[node]);
2041 }
2042 }
2043 }
2044 cachep->nodelists[numa_node_id()]->next_reap =
2045 jiffies + REAPTIMEOUT_LIST3 +
2046 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2047
2048 cpu_cache_get(cachep)->avail = 0;
2049 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2050 cpu_cache_get(cachep)->batchcount = 1;
2051 cpu_cache_get(cachep)->touched = 0;
2052 cachep->batchcount = 1;
2053 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2054 return 0;
2055}
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086struct kmem_cache *
2087kmem_cache_create (const char *name, size_t size, size_t align,
2088 unsigned long flags, void (*ctor)(void *))
2089{
2090 size_t left_over, slab_size, ralign;
2091 struct kmem_cache *cachep = NULL, *pc;
2092 gfp_t gfp;
2093
2094
2095
2096
2097 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
2098 size > KMALLOC_MAX_SIZE) {
2099 printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
2100 name);
2101 BUG();
2102 }
2103
2104
2105
2106
2107
2108 if (slab_is_available()) {
2109 get_online_cpus();
2110 mutex_lock(&cache_chain_mutex);
2111 }
2112
2113 list_for_each_entry(pc, &cache_chain, next) {
2114 char tmp;
2115 int res;
2116
2117
2118
2119
2120
2121
2122 res = probe_kernel_address(pc->name, tmp);
2123 if (res) {
2124 printk(KERN_ERR
2125 "SLAB: cache with size %d has lost its name\n",
2126 pc->buffer_size);
2127 continue;
2128 }
2129
2130 if (!strcmp(pc->name, name)) {
2131 printk(KERN_ERR
2132 "kmem_cache_create: duplicate cache %s\n", name);
2133 dump_stack();
2134 goto oops;
2135 }
2136 }
2137
2138#if DEBUG
2139 WARN_ON(strchr(name, ' '));
2140#if FORCED_DEBUG
2141
2142
2143
2144
2145
2146
2147 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2148 2 * sizeof(unsigned long long)))
2149 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2150 if (!(flags & SLAB_DESTROY_BY_RCU))
2151 flags |= SLAB_POISON;
2152#endif
2153 if (flags & SLAB_DESTROY_BY_RCU)
2154 BUG_ON(flags & SLAB_POISON);
2155#endif
2156
2157
2158
2159
2160 BUG_ON(flags & ~CREATE_MASK);
2161
2162
2163
2164
2165
2166
2167 if (size & (BYTES_PER_WORD - 1)) {
2168 size += (BYTES_PER_WORD - 1);
2169 size &= ~(BYTES_PER_WORD - 1);
2170 }
2171
2172
2173
2174
2175 if (flags & SLAB_HWCACHE_ALIGN) {
2176
2177
2178
2179
2180
2181 ralign = cache_line_size();
2182 while (size <= ralign / 2)
2183 ralign /= 2;
2184 } else {
2185 ralign = BYTES_PER_WORD;
2186 }
2187
2188
2189
2190
2191
2192
2193 if (flags & SLAB_STORE_USER)
2194 ralign = BYTES_PER_WORD;
2195
2196 if (flags & SLAB_RED_ZONE) {
2197 ralign = REDZONE_ALIGN;
2198
2199
2200 size += REDZONE_ALIGN - 1;
2201 size &= ~(REDZONE_ALIGN - 1);
2202 }
2203
2204
2205 if (ralign < ARCH_SLAB_MINALIGN) {
2206 ralign = ARCH_SLAB_MINALIGN;
2207 }
2208
2209 if (ralign < align) {
2210 ralign = align;
2211 }
2212
2213 if (ralign > __alignof__(unsigned long long))
2214 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2215
2216
2217
2218 align = ralign;
2219
2220 if (slab_is_available())
2221 gfp = GFP_KERNEL;
2222 else
2223 gfp = GFP_NOWAIT;
2224
2225
2226 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2227 if (!cachep)
2228 goto oops;
2229
2230#if DEBUG
2231 cachep->obj_size = size;
2232
2233
2234
2235
2236
2237 if (flags & SLAB_RED_ZONE) {
2238
2239 cachep->obj_offset += sizeof(unsigned long long);
2240 size += 2 * sizeof(unsigned long long);
2241 }
2242 if (flags & SLAB_STORE_USER) {
2243
2244
2245
2246
2247 if (flags & SLAB_RED_ZONE)
2248 size += REDZONE_ALIGN;
2249 else
2250 size += BYTES_PER_WORD;
2251 }
2252#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2253 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
2254 && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
2255 cachep->obj_offset += PAGE_SIZE - size;
2256 size = PAGE_SIZE;
2257 }
2258#endif
2259#endif
2260
2261
2262
2263
2264
2265
2266 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
2267
2268
2269
2270
2271 flags |= CFLGS_OFF_SLAB;
2272
2273 size = ALIGN(size, align);
2274
2275 left_over = calculate_slab_order(cachep, size, align, flags);
2276
2277 if (!cachep->num) {
2278 printk(KERN_ERR
2279 "kmem_cache_create: couldn't create cache %s.\n", name);
2280 kmem_cache_free(&cache_cache, cachep);
2281 cachep = NULL;
2282 goto oops;
2283 }
2284 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2285 + sizeof(struct slab), align);
2286
2287
2288
2289
2290
2291 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2292 flags &= ~CFLGS_OFF_SLAB;
2293 left_over -= slab_size;
2294 }
2295
2296 if (flags & CFLGS_OFF_SLAB) {
2297
2298 slab_size =
2299 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2300
2301#ifdef CONFIG_PAGE_POISONING
2302
2303
2304
2305
2306 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2307 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2308#endif
2309 }
2310
2311 cachep->colour_off = cache_line_size();
2312
2313 if (cachep->colour_off < align)
2314 cachep->colour_off = align;
2315 cachep->colour = left_over / cachep->colour_off;
2316 cachep->slab_size = slab_size;
2317 cachep->flags = flags;
2318 cachep->gfpflags = 0;
2319 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2320 cachep->gfpflags |= GFP_DMA;
2321 cachep->buffer_size = size;
2322 cachep->reciprocal_buffer_size = reciprocal_value(size);
2323
2324 if (flags & CFLGS_OFF_SLAB) {
2325 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
2326
2327
2328
2329
2330
2331
2332
2333 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2334 }
2335 cachep->ctor = ctor;
2336 cachep->name = name;
2337
2338 if (setup_cpu_cache(cachep, gfp)) {
2339 __kmem_cache_destroy(cachep);
2340 cachep = NULL;
2341 goto oops;
2342 }
2343
2344
2345 list_add(&cachep->next, &cache_chain);
2346oops:
2347 if (!cachep && (flags & SLAB_PANIC))
2348 panic("kmem_cache_create(): failed to create slab `%s'\n",
2349 name);
2350 if (slab_is_available()) {
2351 mutex_unlock(&cache_chain_mutex);
2352 put_online_cpus();
2353 }
2354 return cachep;
2355}
2356EXPORT_SYMBOL(kmem_cache_create);
2357
2358#if DEBUG
2359static void check_irq_off(void)
2360{
2361 BUG_ON(!irqs_disabled());
2362}
2363
2364static void check_irq_on(void)
2365{
2366 BUG_ON(irqs_disabled());
2367}
2368
2369static void check_spinlock_acquired(struct kmem_cache *cachep)
2370{
2371#ifdef CONFIG_SMP
2372 check_irq_off();
2373 assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
2374#endif
2375}
2376
2377static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2378{
2379#ifdef CONFIG_SMP
2380 check_irq_off();
2381 assert_spin_locked(&cachep->nodelists[node]->list_lock);
2382#endif
2383}
2384
2385#else
2386#define check_irq_off() do { } while(0)
2387#define check_irq_on() do { } while(0)
2388#define check_spinlock_acquired(x) do { } while(0)
2389#define check_spinlock_acquired_node(x, y) do { } while(0)
2390#endif
2391
2392static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2393 struct array_cache *ac,
2394 int force, int node);
2395
2396static void do_drain(void *arg)
2397{
2398 struct kmem_cache *cachep = arg;
2399 struct array_cache *ac;
2400 int node = numa_node_id();
2401
2402 check_irq_off();
2403 ac = cpu_cache_get(cachep);
2404 spin_lock(&cachep->nodelists[node]->list_lock);
2405 free_block(cachep, ac->entry, ac->avail, node);
2406 spin_unlock(&cachep->nodelists[node]->list_lock);
2407 ac->avail = 0;
2408}
2409
2410static void drain_cpu_caches(struct kmem_cache *cachep)
2411{
2412 struct kmem_list3 *l3;
2413 int node;
2414
2415 on_each_cpu(do_drain, cachep, 1);
2416 check_irq_on();
2417 for_each_online_node(node) {
2418 l3 = cachep->nodelists[node];
2419 if (l3 && l3->alien)
2420 drain_alien_cache(cachep, l3->alien);
2421 }
2422
2423 for_each_online_node(node) {
2424 l3 = cachep->nodelists[node];
2425 if (l3)
2426 drain_array(cachep, l3, l3->shared, 1, node);
2427 }
2428}
2429
2430
2431
2432
2433
2434
2435
2436static int drain_freelist(struct kmem_cache *cache,
2437 struct kmem_list3 *l3, int tofree)
2438{
2439 struct list_head *p;
2440 int nr_freed;
2441 struct slab *slabp;
2442
2443 nr_freed = 0;
2444 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
2445
2446 spin_lock_irq(&l3->list_lock);
2447 p = l3->slabs_free.prev;
2448 if (p == &l3->slabs_free) {
2449 spin_unlock_irq(&l3->list_lock);
2450 goto out;
2451 }
2452
2453 slabp = list_entry(p, struct slab, list);
2454#if DEBUG
2455 BUG_ON(slabp->inuse);
2456#endif
2457 list_del(&slabp->list);
2458
2459
2460
2461
2462 l3->free_objects -= cache->num;
2463 spin_unlock_irq(&l3->list_lock);
2464 slab_destroy(cache, slabp);
2465 nr_freed++;
2466 }
2467out:
2468 return nr_freed;
2469}
2470
2471
2472static int __cache_shrink(struct kmem_cache *cachep)
2473{
2474 int ret = 0, i = 0;
2475 struct kmem_list3 *l3;
2476
2477 drain_cpu_caches(cachep);
2478
2479 check_irq_on();
2480 for_each_online_node(i) {
2481 l3 = cachep->nodelists[i];
2482 if (!l3)
2483 continue;
2484
2485 drain_freelist(cachep, l3, l3->free_objects);
2486
2487 ret += !list_empty(&l3->slabs_full) ||
2488 !list_empty(&l3->slabs_partial);
2489 }
2490 return (ret ? 1 : 0);
2491}
2492
2493
2494
2495
2496
2497
2498
2499
2500int kmem_cache_shrink(struct kmem_cache *cachep)
2501{
2502 int ret;
2503 BUG_ON(!cachep || in_interrupt());
2504
2505 get_online_cpus();
2506 mutex_lock(&cache_chain_mutex);
2507 ret = __cache_shrink(cachep);
2508 mutex_unlock(&cache_chain_mutex);
2509 put_online_cpus();
2510 return ret;
2511}
2512EXPORT_SYMBOL(kmem_cache_shrink);
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530void kmem_cache_destroy(struct kmem_cache *cachep)
2531{
2532 BUG_ON(!cachep || in_interrupt());
2533
2534
2535 get_online_cpus();
2536 mutex_lock(&cache_chain_mutex);
2537
2538
2539
2540 list_del(&cachep->next);
2541 if (__cache_shrink(cachep)) {
2542 slab_error(cachep, "Can't free all objects");
2543 list_add(&cachep->next, &cache_chain);
2544 mutex_unlock(&cache_chain_mutex);
2545 put_online_cpus();
2546 return;
2547 }
2548
2549 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
2550 rcu_barrier();
2551
2552 __kmem_cache_destroy(cachep);
2553 mutex_unlock(&cache_chain_mutex);
2554 put_online_cpus();
2555}
2556EXPORT_SYMBOL(kmem_cache_destroy);
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2570 int colour_off, gfp_t local_flags,
2571 int nodeid)
2572{
2573 struct slab *slabp;
2574
2575 if (OFF_SLAB(cachep)) {
2576
2577 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2578 local_flags, nodeid);
2579
2580
2581
2582
2583
2584
2585 kmemleak_scan_area(slabp, offsetof(struct slab, list),
2586 sizeof(struct list_head), local_flags);
2587 if (!slabp)
2588 return NULL;
2589 } else {
2590 slabp = objp + colour_off;
2591 colour_off += cachep->slab_size;
2592 }
2593 slabp->inuse = 0;
2594 slabp->colouroff = colour_off;
2595 slabp->s_mem = objp + colour_off;
2596 slabp->nodeid = nodeid;
2597 slabp->free = 0;
2598 return slabp;
2599}
2600
2601static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2602{
2603 return (kmem_bufctl_t *) (slabp + 1);
2604}
2605
2606static void cache_init_objs(struct kmem_cache *cachep,
2607 struct slab *slabp)
2608{
2609 int i;
2610
2611 for (i = 0; i < cachep->num; i++) {
2612 void *objp = index_to_obj(cachep, slabp, i);
2613#if DEBUG
2614
2615 if (cachep->flags & SLAB_POISON)
2616 poison_obj(cachep, objp, POISON_FREE);
2617 if (cachep->flags & SLAB_STORE_USER)
2618 *dbg_userword(cachep, objp) = NULL;
2619
2620 if (cachep->flags & SLAB_RED_ZONE) {
2621 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2622 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2623 }
2624
2625
2626
2627
2628
2629 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2630 cachep->ctor(objp + obj_offset(cachep));
2631
2632 if (cachep->flags & SLAB_RED_ZONE) {
2633 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2634 slab_error(cachep, "constructor overwrote the"
2635 " end of an object");
2636 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2637 slab_error(cachep, "constructor overwrote the"
2638 " start of an object");
2639 }
2640 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2641 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2642 kernel_map_pages(virt_to_page(objp),
2643 cachep->buffer_size / PAGE_SIZE, 0);
2644#else
2645 if (cachep->ctor)
2646 cachep->ctor(objp);
2647#endif
2648 slab_bufctl(slabp)[i] = i + 1;
2649 }
2650 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2651}
2652
2653static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2654{
2655 if (CONFIG_ZONE_DMA_FLAG) {
2656 if (flags & GFP_DMA)
2657 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2658 else
2659 BUG_ON(cachep->gfpflags & GFP_DMA);
2660 }
2661}
2662
2663static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2664 int nodeid)
2665{
2666 void *objp = index_to_obj(cachep, slabp, slabp->free);
2667 kmem_bufctl_t next;
2668
2669 slabp->inuse++;
2670 next = slab_bufctl(slabp)[slabp->free];
2671#if DEBUG
2672 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2673 WARN_ON(slabp->nodeid != nodeid);
2674#endif
2675 slabp->free = next;
2676
2677 return objp;
2678}
2679
2680static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2681 void *objp, int nodeid)
2682{
2683 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2684
2685#if DEBUG
2686
2687 WARN_ON(slabp->nodeid != nodeid);
2688
2689 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2690 printk(KERN_ERR "slab: double free detected in cache "
2691 "'%s', objp %p\n", cachep->name, objp);
2692 BUG();
2693 }
2694#endif
2695 slab_bufctl(slabp)[objnr] = slabp->free;
2696 slabp->free = objnr;
2697 slabp->inuse--;
2698}
2699
2700
2701
2702
2703
2704
2705static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2706 void *addr)
2707{
2708 int nr_pages;
2709 struct page *page;
2710
2711 page = virt_to_page(addr);
2712
2713 nr_pages = 1;
2714 if (likely(!PageCompound(page)))
2715 nr_pages <<= cache->gfporder;
2716
2717 do {
2718 page_set_cache(page, cache);
2719 page_set_slab(page, slab);
2720 page++;
2721 } while (--nr_pages);
2722}
2723
2724
2725
2726
2727
2728static int cache_grow(struct kmem_cache *cachep,
2729 gfp_t flags, int nodeid, void *objp)
2730{
2731 struct slab *slabp;
2732 size_t offset;
2733 gfp_t local_flags;
2734 struct kmem_list3 *l3;
2735
2736
2737
2738
2739
2740 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2741 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2742
2743
2744 check_irq_off();
2745 l3 = cachep->nodelists[nodeid];
2746 spin_lock(&l3->list_lock);
2747
2748
2749 offset = l3->colour_next;
2750 l3->colour_next++;
2751 if (l3->colour_next >= cachep->colour)
2752 l3->colour_next = 0;
2753 spin_unlock(&l3->list_lock);
2754
2755 offset *= cachep->colour_off;
2756
2757 if (local_flags & __GFP_WAIT)
2758 local_irq_enable();
2759
2760
2761
2762
2763
2764
2765
2766 kmem_flagcheck(cachep, flags);
2767
2768
2769
2770
2771
2772 if (!objp)
2773 objp = kmem_getpages(cachep, local_flags, nodeid);
2774 if (!objp)
2775 goto failed;
2776
2777
2778 slabp = alloc_slabmgmt(cachep, objp, offset,
2779 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2780 if (!slabp)
2781 goto opps1;
2782
2783 slab_map_pages(cachep, slabp, objp);
2784
2785 cache_init_objs(cachep, slabp);
2786
2787 if (local_flags & __GFP_WAIT)
2788 local_irq_disable();
2789 check_irq_off();
2790 spin_lock(&l3->list_lock);
2791
2792
2793 list_add_tail(&slabp->list, &(l3->slabs_free));
2794 STATS_INC_GROWN(cachep);
2795 l3->free_objects += cachep->num;
2796 spin_unlock(&l3->list_lock);
2797 return 1;
2798opps1:
2799 kmem_freepages(cachep, objp);
2800failed:
2801 if (local_flags & __GFP_WAIT)
2802 local_irq_disable();
2803 return 0;
2804}
2805
2806#if DEBUG
2807
2808
2809
2810
2811
2812
2813static void kfree_debugcheck(const void *objp)
2814{
2815 if (!virt_addr_valid(objp)) {
2816 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2817 (unsigned long)objp);
2818 BUG();
2819 }
2820}
2821
2822static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2823{
2824 unsigned long long redzone1, redzone2;
2825
2826 redzone1 = *dbg_redzone1(cache, obj);
2827 redzone2 = *dbg_redzone2(cache, obj);
2828
2829
2830
2831
2832 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2833 return;
2834
2835 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2836 slab_error(cache, "double free detected");
2837 else
2838 slab_error(cache, "memory outside object was overwritten");
2839
2840 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2841 obj, redzone1, redzone2);
2842}
2843
2844static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2845 void *caller)
2846{
2847 struct page *page;
2848 unsigned int objnr;
2849 struct slab *slabp;
2850
2851 BUG_ON(virt_to_cache(objp) != cachep);
2852
2853 objp -= obj_offset(cachep);
2854 kfree_debugcheck(objp);
2855 page = virt_to_head_page(objp);
2856
2857 slabp = page_get_slab(page);
2858
2859 if (cachep->flags & SLAB_RED_ZONE) {
2860 verify_redzone_free(cachep, objp);
2861 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2862 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2863 }
2864 if (cachep->flags & SLAB_STORE_USER)
2865 *dbg_userword(cachep, objp) = caller;
2866
2867 objnr = obj_to_index(cachep, slabp, objp);
2868
2869 BUG_ON(objnr >= cachep->num);
2870 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2871
2872#ifdef CONFIG_DEBUG_SLAB_LEAK
2873 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2874#endif
2875 if (cachep->flags & SLAB_POISON) {
2876#ifdef CONFIG_DEBUG_PAGEALLOC
2877 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2878 store_stackinfo(cachep, objp, (unsigned long)caller);
2879 kernel_map_pages(virt_to_page(objp),
2880 cachep->buffer_size / PAGE_SIZE, 0);
2881 } else {
2882 poison_obj(cachep, objp, POISON_FREE);
2883 }
2884#else
2885 poison_obj(cachep, objp, POISON_FREE);
2886#endif
2887 }
2888 return objp;
2889}
2890
2891static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2892{
2893 kmem_bufctl_t i;
2894 int entries = 0;
2895
2896
2897 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2898 entries++;
2899 if (entries > cachep->num || i >= cachep->num)
2900 goto bad;
2901 }
2902 if (entries != cachep->num - slabp->inuse) {
2903bad:
2904 printk(KERN_ERR "slab: Internal list corruption detected in "
2905 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2906 cachep->name, cachep->num, slabp, slabp->inuse);
2907 for (i = 0;
2908 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2909 i++) {
2910 if (i % 16 == 0)
2911 printk("\n%03x:", i);
2912 printk(" %02x", ((unsigned char *)slabp)[i]);
2913 }
2914 printk("\n");
2915 BUG();
2916 }
2917}
2918#else
2919#define kfree_debugcheck(x) do { } while(0)
2920#define cache_free_debugcheck(x,objp,z) (objp)
2921#define check_slabp(x,y) do { } while(0)
2922#endif
2923
2924static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2925{
2926 int batchcount;
2927 struct kmem_list3 *l3;
2928 struct array_cache *ac;
2929 int node;
2930
2931retry:
2932 check_irq_off();
2933 node = numa_node_id();
2934 ac = cpu_cache_get(cachep);
2935 batchcount = ac->batchcount;
2936 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2937
2938
2939
2940
2941
2942 batchcount = BATCHREFILL_LIMIT;
2943 }
2944 l3 = cachep->nodelists[node];
2945
2946 BUG_ON(ac->avail > 0 || !l3);
2947 spin_lock(&l3->list_lock);
2948
2949
2950 if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
2951 goto alloc_done;
2952
2953 while (batchcount > 0) {
2954 struct list_head *entry;
2955 struct slab *slabp;
2956
2957 entry = l3->slabs_partial.next;
2958 if (entry == &l3->slabs_partial) {
2959 l3->free_touched = 1;
2960 entry = l3->slabs_free.next;
2961 if (entry == &l3->slabs_free)
2962 goto must_grow;
2963 }
2964
2965 slabp = list_entry(entry, struct slab, list);
2966 check_slabp(cachep, slabp);
2967 check_spinlock_acquired(cachep);
2968
2969
2970
2971
2972
2973
2974 BUG_ON(slabp->inuse >= cachep->num);
2975
2976 while (slabp->inuse < cachep->num && batchcount--) {
2977 STATS_INC_ALLOCED(cachep);
2978 STATS_INC_ACTIVE(cachep);
2979 STATS_SET_HIGH(cachep);
2980
2981 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
2982 node);
2983 }
2984 check_slabp(cachep, slabp);
2985
2986
2987 list_del(&slabp->list);
2988 if (slabp->free == BUFCTL_END)
2989 list_add(&slabp->list, &l3->slabs_full);
2990 else
2991 list_add(&slabp->list, &l3->slabs_partial);
2992 }
2993
2994must_grow:
2995 l3->free_objects -= ac->avail;
2996alloc_done:
2997 spin_unlock(&l3->list_lock);
2998
2999 if (unlikely(!ac->avail)) {
3000 int x;
3001 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3002
3003
3004 ac = cpu_cache_get(cachep);
3005 if (!x && ac->avail == 0)
3006 return NULL;
3007
3008 if (!ac->avail)
3009 goto retry;
3010 }
3011 ac->touched = 1;
3012 return ac->entry[--ac->avail];
3013}
3014
3015static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3016 gfp_t flags)
3017{
3018 might_sleep_if(flags & __GFP_WAIT);
3019#if DEBUG
3020 kmem_flagcheck(cachep, flags);
3021#endif
3022}
3023
3024#if DEBUG
3025static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3026 gfp_t flags, void *objp, void *caller)
3027{
3028 if (!objp)
3029 return objp;
3030 if (cachep->flags & SLAB_POISON) {
3031#ifdef CONFIG_DEBUG_PAGEALLOC
3032 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3033 kernel_map_pages(virt_to_page(objp),
3034 cachep->buffer_size / PAGE_SIZE, 1);
3035 else
3036 check_poison_obj(cachep, objp);
3037#else
3038 check_poison_obj(cachep, objp);
3039#endif
3040 poison_obj(cachep, objp, POISON_INUSE);
3041 }
3042 if (cachep->flags & SLAB_STORE_USER)
3043 *dbg_userword(cachep, objp) = caller;
3044
3045 if (cachep->flags & SLAB_RED_ZONE) {
3046 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3047 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3048 slab_error(cachep, "double free, or memory outside"
3049 " object was overwritten");
3050 printk(KERN_ERR
3051 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3052 objp, *dbg_redzone1(cachep, objp),
3053 *dbg_redzone2(cachep, objp));
3054 }
3055 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3056 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3057 }
3058#ifdef CONFIG_DEBUG_SLAB_LEAK
3059 {
3060 struct slab *slabp;
3061 unsigned objnr;
3062
3063 slabp = page_get_slab(virt_to_head_page(objp));
3064 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
3065 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3066 }
3067#endif
3068 objp += obj_offset(cachep);
3069 if (cachep->ctor && cachep->flags & SLAB_POISON)
3070 cachep->ctor(objp);
3071#if ARCH_SLAB_MINALIGN
3072 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
3073 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3074 objp, ARCH_SLAB_MINALIGN);
3075 }
3076#endif
3077 return objp;
3078}
3079#else
3080#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3081#endif
3082
3083static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3084{
3085 if (cachep == &cache_cache)
3086 return false;
3087
3088 return should_failslab(obj_size(cachep), flags);
3089}
3090
3091static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3092{
3093 void *objp;
3094 struct array_cache *ac;
3095
3096 check_irq_off();
3097
3098 ac = cpu_cache_get(cachep);
3099 if (likely(ac->avail)) {
3100 STATS_INC_ALLOCHIT(cachep);
3101 ac->touched = 1;
3102 objp = ac->entry[--ac->avail];
3103 } else {
3104 STATS_INC_ALLOCMISS(cachep);
3105 objp = cache_alloc_refill(cachep, flags);
3106 }
3107
3108
3109
3110
3111
3112 kmemleak_erase(&ac->entry[ac->avail]);
3113 return objp;
3114}
3115
3116#ifdef CONFIG_NUMA
3117
3118
3119
3120
3121
3122
3123static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3124{
3125 int nid_alloc, nid_here;
3126
3127 if (in_interrupt() || (flags & __GFP_THISNODE))
3128 return NULL;
3129 nid_alloc = nid_here = numa_node_id();
3130 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3131 nid_alloc = cpuset_mem_spread_node();
3132 else if (current->mempolicy)
3133 nid_alloc = slab_node(current->mempolicy);
3134 if (nid_alloc != nid_here)
3135 return ____cache_alloc_node(cachep, flags, nid_alloc);
3136 return NULL;
3137}
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3148{
3149 struct zonelist *zonelist;
3150 gfp_t local_flags;
3151 struct zoneref *z;
3152 struct zone *zone;
3153 enum zone_type high_zoneidx = gfp_zone(flags);
3154 void *obj = NULL;
3155 int nid;
3156
3157 if (flags & __GFP_THISNODE)
3158 return NULL;
3159
3160 zonelist = node_zonelist(slab_node(current->mempolicy), flags);
3161 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3162
3163retry:
3164
3165
3166
3167
3168 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3169 nid = zone_to_nid(zone);
3170
3171 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3172 cache->nodelists[nid] &&
3173 cache->nodelists[nid]->free_objects) {
3174 obj = ____cache_alloc_node(cache,
3175 flags | GFP_THISNODE, nid);
3176 if (obj)
3177 break;
3178 }
3179 }
3180
3181 if (!obj) {
3182
3183
3184
3185
3186
3187
3188 if (local_flags & __GFP_WAIT)
3189 local_irq_enable();
3190 kmem_flagcheck(cache, flags);
3191 obj = kmem_getpages(cache, local_flags, numa_node_id());
3192 if (local_flags & __GFP_WAIT)
3193 local_irq_disable();
3194 if (obj) {
3195
3196
3197
3198 nid = page_to_nid(virt_to_page(obj));
3199 if (cache_grow(cache, flags, nid, obj)) {
3200 obj = ____cache_alloc_node(cache,
3201 flags | GFP_THISNODE, nid);
3202 if (!obj)
3203
3204
3205
3206
3207
3208 goto retry;
3209 } else {
3210
3211 obj = NULL;
3212 }
3213 }
3214 }
3215 return obj;
3216}
3217
3218
3219
3220
3221static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3222 int nodeid)
3223{
3224 struct list_head *entry;
3225 struct slab *slabp;
3226 struct kmem_list3 *l3;
3227 void *obj;
3228 int x;
3229
3230 l3 = cachep->nodelists[nodeid];
3231 BUG_ON(!l3);
3232
3233retry:
3234 check_irq_off();
3235 spin_lock(&l3->list_lock);
3236 entry = l3->slabs_partial.next;
3237 if (entry == &l3->slabs_partial) {
3238 l3->free_touched = 1;
3239 entry = l3->slabs_free.next;
3240 if (entry == &l3->slabs_free)
3241 goto must_grow;
3242 }
3243
3244 slabp = list_entry(entry, struct slab, list);
3245 check_spinlock_acquired_node(cachep, nodeid);
3246 check_slabp(cachep, slabp);
3247
3248 STATS_INC_NODEALLOCS(cachep);
3249 STATS_INC_ACTIVE(cachep);
3250 STATS_SET_HIGH(cachep);
3251
3252 BUG_ON(slabp->inuse == cachep->num);
3253
3254 obj = slab_get_obj(cachep, slabp, nodeid);
3255 check_slabp(cachep, slabp);
3256 l3->free_objects--;
3257
3258 list_del(&slabp->list);
3259
3260 if (slabp->free == BUFCTL_END)
3261 list_add(&slabp->list, &l3->slabs_full);
3262 else
3263 list_add(&slabp->list, &l3->slabs_partial);
3264
3265 spin_unlock(&l3->list_lock);
3266 goto done;
3267
3268must_grow:
3269 spin_unlock(&l3->list_lock);
3270 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3271 if (x)
3272 goto retry;
3273
3274 return fallback_alloc(cachep, flags);
3275
3276done:
3277 return obj;
3278}
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292static __always_inline void *
3293__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3294 void *caller)
3295{
3296 unsigned long save_flags;
3297 void *ptr;
3298
3299 flags &= gfp_allowed_mask;
3300
3301 lockdep_trace_alloc(flags);
3302
3303 if (slab_should_failslab(cachep, flags))
3304 return NULL;
3305
3306 cache_alloc_debugcheck_before(cachep, flags);
3307 local_irq_save(save_flags);
3308
3309 if (unlikely(nodeid == -1))
3310 nodeid = numa_node_id();
3311
3312 if (unlikely(!cachep->nodelists[nodeid])) {
3313
3314 ptr = fallback_alloc(cachep, flags);
3315 goto out;
3316 }
3317
3318 if (nodeid == numa_node_id()) {
3319
3320
3321
3322
3323
3324
3325 ptr = ____cache_alloc(cachep, flags);
3326 if (ptr)
3327 goto out;
3328 }
3329
3330 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3331 out:
3332 local_irq_restore(save_flags);
3333 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3334 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3335 flags);
3336
3337 if (likely(ptr))
3338 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3339
3340 if (unlikely((flags & __GFP_ZERO) && ptr))
3341 memset(ptr, 0, obj_size(cachep));
3342
3343 return ptr;
3344}
3345
3346static __always_inline void *
3347__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3348{
3349 void *objp;
3350
3351 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3352 objp = alternate_node_alloc(cache, flags);
3353 if (objp)
3354 goto out;
3355 }
3356 objp = ____cache_alloc(cache, flags);
3357
3358
3359
3360
3361
3362 if (!objp)
3363 objp = ____cache_alloc_node(cache, flags, numa_node_id());
3364
3365 out:
3366 return objp;
3367}
3368#else
3369
3370static __always_inline void *
3371__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3372{
3373 return ____cache_alloc(cachep, flags);
3374}
3375
3376#endif
3377
3378static __always_inline void *
3379__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3380{
3381 unsigned long save_flags;
3382 void *objp;
3383
3384 flags &= gfp_allowed_mask;
3385
3386 lockdep_trace_alloc(flags);
3387
3388 if (slab_should_failslab(cachep, flags))
3389 return NULL;
3390
3391 cache_alloc_debugcheck_before(cachep, flags);
3392 local_irq_save(save_flags);
3393 objp = __do_cache_alloc(cachep, flags);
3394 local_irq_restore(save_flags);
3395 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3396 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3397 flags);
3398 prefetchw(objp);
3399
3400 if (likely(objp))
3401 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3402
3403 if (unlikely((flags & __GFP_ZERO) && objp))
3404 memset(objp, 0, obj_size(cachep));
3405
3406 return objp;
3407}
3408
3409
3410
3411
3412static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3413 int node)
3414{
3415 int i;
3416 struct kmem_list3 *l3;
3417
3418 for (i = 0; i < nr_objects; i++) {
3419 void *objp = objpp[i];
3420 struct slab *slabp;
3421
3422 slabp = virt_to_slab(objp);
3423 l3 = cachep->nodelists[node];
3424 list_del(&slabp->list);
3425 check_spinlock_acquired_node(cachep, node);
3426 check_slabp(cachep, slabp);
3427 slab_put_obj(cachep, slabp, objp, node);
3428 STATS_DEC_ACTIVE(cachep);
3429 l3->free_objects++;
3430 check_slabp(cachep, slabp);
3431
3432
3433 if (slabp->inuse == 0) {
3434 if (l3->free_objects > l3->free_limit) {
3435 l3->free_objects -= cachep->num;
3436
3437
3438
3439
3440
3441
3442 slab_destroy(cachep, slabp);
3443 } else {
3444 list_add(&slabp->list, &l3->slabs_free);
3445 }
3446 } else {
3447
3448
3449
3450
3451 list_add_tail(&slabp->list, &l3->slabs_partial);
3452 }
3453 }
3454}
3455
3456static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3457{
3458 int batchcount;
3459 struct kmem_list3 *l3;
3460 int node = numa_node_id();
3461
3462 batchcount = ac->batchcount;
3463#if DEBUG
3464 BUG_ON(!batchcount || batchcount > ac->avail);
3465#endif
3466 check_irq_off();
3467 l3 = cachep->nodelists[node];
3468 spin_lock(&l3->list_lock);
3469 if (l3->shared) {
3470 struct array_cache *shared_array = l3->shared;
3471 int max = shared_array->limit - shared_array->avail;
3472 if (max) {
3473 if (batchcount > max)
3474 batchcount = max;
3475 memcpy(&(shared_array->entry[shared_array->avail]),
3476 ac->entry, sizeof(void *) * batchcount);
3477 shared_array->avail += batchcount;
3478 goto free_done;
3479 }
3480 }
3481
3482 free_block(cachep, ac->entry, batchcount, node);
3483free_done:
3484#if STATS
3485 {
3486 int i = 0;
3487 struct list_head *p;
3488
3489 p = l3->slabs_free.next;
3490 while (p != &(l3->slabs_free)) {
3491 struct slab *slabp;
3492
3493 slabp = list_entry(p, struct slab, list);
3494 BUG_ON(slabp->inuse);
3495
3496 i++;
3497 p = p->next;
3498 }
3499 STATS_SET_FREEABLE(cachep, i);
3500 }
3501#endif
3502 spin_unlock(&l3->list_lock);
3503 ac->avail -= batchcount;
3504 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3505}
3506
3507
3508
3509
3510
3511static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3512{
3513 struct array_cache *ac = cpu_cache_get(cachep);
3514
3515 check_irq_off();
3516 kmemleak_free_recursive(objp, cachep->flags);
3517 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3518
3519 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3520
3521
3522
3523
3524
3525
3526
3527
3528 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3529 return;
3530
3531 if (likely(ac->avail < ac->limit)) {
3532 STATS_INC_FREEHIT(cachep);
3533 ac->entry[ac->avail++] = objp;
3534 return;
3535 } else {
3536 STATS_INC_FREEMISS(cachep);
3537 cache_flusharray(cachep, ac);
3538 ac->entry[ac->avail++] = objp;
3539 }
3540}
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3551{
3552 void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
3553
3554 trace_kmem_cache_alloc(_RET_IP_, ret,
3555 obj_size(cachep), cachep->buffer_size, flags);
3556
3557 return ret;
3558}
3559EXPORT_SYMBOL(kmem_cache_alloc);
3560
3561#ifdef CONFIG_KMEMTRACE
3562void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3563{
3564 return __cache_alloc(cachep, flags, __builtin_return_address(0));
3565}
3566EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3567#endif
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3583{
3584 unsigned long addr = (unsigned long)ptr;
3585 unsigned long min_addr = PAGE_OFFSET;
3586 unsigned long align_mask = BYTES_PER_WORD - 1;
3587 unsigned long size = cachep->buffer_size;
3588 struct page *page;
3589
3590 if (unlikely(addr < min_addr))
3591 goto out;
3592 if (unlikely(addr > (unsigned long)high_memory - size))
3593 goto out;
3594 if (unlikely(addr & align_mask))
3595 goto out;
3596 if (unlikely(!kern_addr_valid(addr)))
3597 goto out;
3598 if (unlikely(!kern_addr_valid(addr + size - 1)))
3599 goto out;
3600 page = virt_to_page(ptr);
3601 if (unlikely(!PageSlab(page)))
3602 goto out;
3603 if (unlikely(page_get_cache(page) != cachep))
3604 goto out;
3605 return 1;
3606out:
3607 return 0;
3608}
3609
3610#ifdef CONFIG_NUMA
3611void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3612{
3613 void *ret = __cache_alloc_node(cachep, flags, nodeid,
3614 __builtin_return_address(0));
3615
3616 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3617 obj_size(cachep), cachep->buffer_size,
3618 flags, nodeid);
3619
3620 return ret;
3621}
3622EXPORT_SYMBOL(kmem_cache_alloc_node);
3623
3624#ifdef CONFIG_KMEMTRACE
3625void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3626 gfp_t flags,
3627 int nodeid)
3628{
3629 return __cache_alloc_node(cachep, flags, nodeid,
3630 __builtin_return_address(0));
3631}
3632EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
3633#endif
3634
3635static __always_inline void *
3636__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3637{
3638 struct kmem_cache *cachep;
3639 void *ret;
3640
3641 cachep = kmem_find_general_cachep(size, flags);
3642 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3643 return cachep;
3644 ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
3645
3646 trace_kmalloc_node((unsigned long) caller, ret,
3647 size, cachep->buffer_size, flags, node);
3648
3649 return ret;
3650}
3651
3652#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3653void *__kmalloc_node(size_t size, gfp_t flags, int node)
3654{
3655 return __do_kmalloc_node(size, flags, node,
3656 __builtin_return_address(0));
3657}
3658EXPORT_SYMBOL(__kmalloc_node);
3659
3660void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3661 int node, unsigned long caller)
3662{
3663 return __do_kmalloc_node(size, flags, node, (void *)caller);
3664}
3665EXPORT_SYMBOL(__kmalloc_node_track_caller);
3666#else
3667void *__kmalloc_node(size_t size, gfp_t flags, int node)
3668{
3669 return __do_kmalloc_node(size, flags, node, NULL);
3670}
3671EXPORT_SYMBOL(__kmalloc_node);
3672#endif
3673#endif
3674
3675
3676
3677
3678
3679
3680
3681static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3682 void *caller)
3683{
3684 struct kmem_cache *cachep;
3685 void *ret;
3686
3687
3688
3689
3690
3691
3692 cachep = __find_general_cachep(size, flags);
3693 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3694 return cachep;
3695 ret = __cache_alloc(cachep, flags, caller);
3696
3697 trace_kmalloc((unsigned long) caller, ret,
3698 size, cachep->buffer_size, flags);
3699
3700 return ret;
3701}
3702
3703
3704#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
3705void *__kmalloc(size_t size, gfp_t flags)
3706{
3707 return __do_kmalloc(size, flags, __builtin_return_address(0));
3708}
3709EXPORT_SYMBOL(__kmalloc);
3710
3711void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3712{
3713 return __do_kmalloc(size, flags, (void *)caller);
3714}
3715EXPORT_SYMBOL(__kmalloc_track_caller);
3716
3717#else
3718void *__kmalloc(size_t size, gfp_t flags)
3719{
3720 return __do_kmalloc(size, flags, NULL);
3721}
3722EXPORT_SYMBOL(__kmalloc);
3723#endif
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3734{
3735 unsigned long flags;
3736
3737 local_irq_save(flags);
3738 debug_check_no_locks_freed(objp, obj_size(cachep));
3739 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3740 debug_check_no_obj_freed(objp, obj_size(cachep));
3741 __cache_free(cachep, objp);
3742 local_irq_restore(flags);
3743
3744 trace_kmem_cache_free(_RET_IP_, objp);
3745}
3746EXPORT_SYMBOL(kmem_cache_free);
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757void kfree(const void *objp)
3758{
3759 struct kmem_cache *c;
3760 unsigned long flags;
3761
3762 trace_kfree(_RET_IP_, objp);
3763
3764 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3765 return;
3766 local_irq_save(flags);
3767 kfree_debugcheck(objp);
3768 c = virt_to_cache(objp);
3769 debug_check_no_locks_freed(objp, obj_size(c));
3770 debug_check_no_obj_freed(objp, obj_size(c));
3771 __cache_free(c, (void *)objp);
3772 local_irq_restore(flags);
3773}
3774EXPORT_SYMBOL(kfree);
3775
3776unsigned int kmem_cache_size(struct kmem_cache *cachep)
3777{
3778 return obj_size(cachep);
3779}
3780EXPORT_SYMBOL(kmem_cache_size);
3781
3782const char *kmem_cache_name(struct kmem_cache *cachep)
3783{
3784 return cachep->name;
3785}
3786EXPORT_SYMBOL_GPL(kmem_cache_name);
3787
3788
3789
3790
3791static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3792{
3793 int node;
3794 struct kmem_list3 *l3;
3795 struct array_cache *new_shared;
3796 struct array_cache **new_alien = NULL;
3797
3798 for_each_online_node(node) {
3799
3800 if (use_alien_caches) {
3801 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3802 if (!new_alien)
3803 goto fail;
3804 }
3805
3806 new_shared = NULL;
3807 if (cachep->shared) {
3808 new_shared = alloc_arraycache(node,
3809 cachep->shared*cachep->batchcount,
3810 0xbaadf00d, gfp);
3811 if (!new_shared) {
3812 free_alien_cache(new_alien);
3813 goto fail;
3814 }
3815 }
3816
3817 l3 = cachep->nodelists[node];
3818 if (l3) {
3819 struct array_cache *shared = l3->shared;
3820
3821 spin_lock_irq(&l3->list_lock);
3822
3823 if (shared)
3824 free_block(cachep, shared->entry,
3825 shared->avail, node);
3826
3827 l3->shared = new_shared;
3828 if (!l3->alien) {
3829 l3->alien = new_alien;
3830 new_alien = NULL;
3831 }
3832 l3->free_limit = (1 + nr_cpus_node(node)) *
3833 cachep->batchcount + cachep->num;
3834 spin_unlock_irq(&l3->list_lock);
3835 kfree(shared);
3836 free_alien_cache(new_alien);
3837 continue;
3838 }
3839 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3840 if (!l3) {
3841 free_alien_cache(new_alien);
3842 kfree(new_shared);
3843 goto fail;
3844 }
3845
3846 kmem_list3_init(l3);
3847 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3848 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3849 l3->shared = new_shared;
3850 l3->alien = new_alien;
3851 l3->free_limit = (1 + nr_cpus_node(node)) *
3852 cachep->batchcount + cachep->num;
3853 cachep->nodelists[node] = l3;
3854 }
3855 return 0;
3856
3857fail:
3858 if (!cachep->next.next) {
3859
3860 node--;
3861 while (node >= 0) {
3862 if (cachep->nodelists[node]) {
3863 l3 = cachep->nodelists[node];
3864
3865 kfree(l3->shared);
3866 free_alien_cache(l3->alien);
3867 kfree(l3);
3868 cachep->nodelists[node] = NULL;
3869 }
3870 node--;
3871 }
3872 }
3873 return -ENOMEM;
3874}
3875
3876struct ccupdate_struct {
3877 struct kmem_cache *cachep;
3878 struct array_cache *new[NR_CPUS];
3879};
3880
3881static void do_ccupdate_local(void *info)
3882{
3883 struct ccupdate_struct *new = info;
3884 struct array_cache *old;
3885
3886 check_irq_off();
3887 old = cpu_cache_get(new->cachep);
3888
3889 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3890 new->new[smp_processor_id()] = old;
3891}
3892
3893
3894static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3895 int batchcount, int shared, gfp_t gfp)
3896{
3897 struct ccupdate_struct *new;
3898 int i;
3899
3900 new = kzalloc(sizeof(*new), gfp);
3901 if (!new)
3902 return -ENOMEM;
3903
3904 for_each_online_cpu(i) {
3905 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3906 batchcount, gfp);
3907 if (!new->new[i]) {
3908 for (i--; i >= 0; i--)
3909 kfree(new->new[i]);
3910 kfree(new);
3911 return -ENOMEM;
3912 }
3913 }
3914 new->cachep = cachep;
3915
3916 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3917
3918 check_irq_on();
3919 cachep->batchcount = batchcount;
3920 cachep->limit = limit;
3921 cachep->shared = shared;
3922
3923 for_each_online_cpu(i) {
3924 struct array_cache *ccold = new->new[i];
3925 if (!ccold)
3926 continue;
3927 spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3928 free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
3929 spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
3930 kfree(ccold);
3931 }
3932 kfree(new);
3933 return alloc_kmemlist(cachep, gfp);
3934}
3935
3936
3937static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3938{
3939 int err;
3940 int limit, shared;
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951 if (cachep->buffer_size > 131072)
3952 limit = 1;
3953 else if (cachep->buffer_size > PAGE_SIZE)
3954 limit = 8;
3955 else if (cachep->buffer_size > 1024)
3956 limit = 24;
3957 else if (cachep->buffer_size > 256)
3958 limit = 54;
3959 else
3960 limit = 120;
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971 shared = 0;
3972 if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
3973 shared = 8;
3974
3975#if DEBUG
3976
3977
3978
3979
3980 if (limit > 32)
3981 limit = 32;
3982#endif
3983 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
3984 if (err)
3985 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
3986 cachep->name, -err);
3987 return err;
3988}
3989
3990
3991
3992
3993
3994
3995void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
3996 struct array_cache *ac, int force, int node)
3997{
3998 int tofree;
3999
4000 if (!ac || !ac->avail)
4001 return;
4002 if (ac->touched && !force) {
4003 ac->touched = 0;
4004 } else {
4005 spin_lock_irq(&l3->list_lock);
4006 if (ac->avail) {
4007 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4008 if (tofree > ac->avail)
4009 tofree = (ac->avail + 1) / 2;
4010 free_block(cachep, ac->entry, tofree, node);
4011 ac->avail -= tofree;
4012 memmove(ac->entry, &(ac->entry[tofree]),
4013 sizeof(void *) * ac->avail);
4014 }
4015 spin_unlock_irq(&l3->list_lock);
4016 }
4017}
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031static void cache_reap(struct work_struct *w)
4032{
4033 struct kmem_cache *searchp;
4034 struct kmem_list3 *l3;
4035 int node = numa_node_id();
4036 struct delayed_work *work = to_delayed_work(w);
4037
4038 if (!mutex_trylock(&cache_chain_mutex))
4039
4040 goto out;
4041
4042 list_for_each_entry(searchp, &cache_chain, next) {
4043 check_irq_on();
4044
4045
4046
4047
4048
4049
4050 l3 = searchp->nodelists[node];
4051
4052 reap_alien(searchp, l3);
4053
4054 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
4055
4056
4057
4058
4059
4060 if (time_after(l3->next_reap, jiffies))
4061 goto next;
4062
4063 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
4064
4065 drain_array(searchp, l3, l3->shared, 0, node);
4066
4067 if (l3->free_touched)
4068 l3->free_touched = 0;
4069 else {
4070 int freed;
4071
4072 freed = drain_freelist(searchp, l3, (l3->free_limit +
4073 5 * searchp->num - 1) / (5 * searchp->num));
4074 STATS_ADD_REAPED(searchp, freed);
4075 }
4076next:
4077 cond_resched();
4078 }
4079 check_irq_on();
4080 mutex_unlock(&cache_chain_mutex);
4081 next_reap_node();
4082out:
4083
4084 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4085}
4086
4087#ifdef CONFIG_SLABINFO
4088
4089static void print_slabinfo_header(struct seq_file *m)
4090{
4091
4092
4093
4094
4095#if STATS
4096 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
4097#else
4098 seq_puts(m, "slabinfo - version: 2.1\n");
4099#endif
4100 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4101 "<objperslab> <pagesperslab>");
4102 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4103 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4104#if STATS
4105 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
4106 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
4107 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
4108#endif
4109 seq_putc(m, '\n');
4110}
4111
4112static void *s_start(struct seq_file *m, loff_t *pos)
4113{
4114 loff_t n = *pos;
4115
4116 mutex_lock(&cache_chain_mutex);
4117 if (!n)
4118 print_slabinfo_header(m);
4119
4120 return seq_list_start(&cache_chain, *pos);
4121}
4122
4123static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4124{
4125 return seq_list_next(p, &cache_chain, pos);
4126}
4127
4128static void s_stop(struct seq_file *m, void *p)
4129{
4130 mutex_unlock(&cache_chain_mutex);
4131}
4132
4133static int s_show(struct seq_file *m, void *p)
4134{
4135 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4136 struct slab *slabp;
4137 unsigned long active_objs;
4138 unsigned long num_objs;
4139 unsigned long active_slabs = 0;
4140 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4141 const char *name;
4142 char *error = NULL;
4143 int node;
4144 struct kmem_list3 *l3;
4145
4146 active_objs = 0;
4147 num_slabs = 0;
4148 for_each_online_node(node) {
4149 l3 = cachep->nodelists[node];
4150 if (!l3)
4151 continue;
4152
4153 check_irq_on();
4154 spin_lock_irq(&l3->list_lock);
4155
4156 list_for_each_entry(slabp, &l3->slabs_full, list) {
4157 if (slabp->inuse != cachep->num && !error)
4158 error = "slabs_full accounting error";
4159 active_objs += cachep->num;
4160 active_slabs++;
4161 }
4162 list_for_each_entry(slabp, &l3->slabs_partial, list) {
4163 if (slabp->inuse == cachep->num && !error)
4164 error = "slabs_partial inuse accounting error";
4165 if (!slabp->inuse && !error)
4166 error = "slabs_partial/inuse accounting error";
4167 active_objs += slabp->inuse;
4168 active_slabs++;
4169 }
4170 list_for_each_entry(slabp, &l3->slabs_free, list) {
4171 if (slabp->inuse && !error)
4172 error = "slabs_free/inuse accounting error";
4173 num_slabs++;
4174 }
4175 free_objects += l3->free_objects;
4176 if (l3->shared)
4177 shared_avail += l3->shared->avail;
4178
4179 spin_unlock_irq(&l3->list_lock);
4180 }
4181 num_slabs += active_slabs;
4182 num_objs = num_slabs * cachep->num;
4183 if (num_objs - active_objs != free_objects && !error)
4184 error = "free_objects accounting error";
4185
4186 name = cachep->name;
4187 if (error)
4188 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4189
4190 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
4191 name, active_objs, num_objs, cachep->buffer_size,
4192 cachep->num, (1 << cachep->gfporder));
4193 seq_printf(m, " : tunables %4u %4u %4u",
4194 cachep->limit, cachep->batchcount, cachep->shared);
4195 seq_printf(m, " : slabdata %6lu %6lu %6lu",
4196 active_slabs, num_slabs, shared_avail);
4197#if STATS
4198 {
4199 unsigned long high = cachep->high_mark;
4200 unsigned long allocs = cachep->num_allocations;
4201 unsigned long grown = cachep->grown;
4202 unsigned long reaped = cachep->reaped;
4203 unsigned long errors = cachep->errors;
4204 unsigned long max_freeable = cachep->max_freeable;
4205 unsigned long node_allocs = cachep->node_allocs;
4206 unsigned long node_frees = cachep->node_frees;
4207 unsigned long overflows = cachep->node_overflow;
4208
4209 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
4210 %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
4211 reaped, errors, max_freeable, node_allocs,
4212 node_frees, overflows);
4213 }
4214
4215 {
4216 unsigned long allochit = atomic_read(&cachep->allochit);
4217 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4218 unsigned long freehit = atomic_read(&cachep->freehit);
4219 unsigned long freemiss = atomic_read(&cachep->freemiss);
4220
4221 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4222 allochit, allocmiss, freehit, freemiss);
4223 }
4224#endif
4225 seq_putc(m, '\n');
4226 return 0;
4227}
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243static const struct seq_operations slabinfo_op = {
4244 .start = s_start,
4245 .next = s_next,
4246 .stop = s_stop,
4247 .show = s_show,
4248};
4249
4250#define MAX_SLABINFO_WRITE 128
4251
4252
4253
4254
4255
4256
4257
4258ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4259 size_t count, loff_t *ppos)
4260{
4261 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4262 int limit, batchcount, shared, res;
4263 struct kmem_cache *cachep;
4264
4265 if (count > MAX_SLABINFO_WRITE)
4266 return -EINVAL;
4267 if (copy_from_user(&kbuf, buffer, count))
4268 return -EFAULT;
4269 kbuf[MAX_SLABINFO_WRITE] = '\0';
4270
4271 tmp = strchr(kbuf, ' ');
4272 if (!tmp)
4273 return -EINVAL;
4274 *tmp = '\0';
4275 tmp++;
4276 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4277 return -EINVAL;
4278
4279
4280 mutex_lock(&cache_chain_mutex);
4281 res = -EINVAL;
4282 list_for_each_entry(cachep, &cache_chain, next) {
4283 if (!strcmp(cachep->name, kbuf)) {
4284 if (limit < 1 || batchcount < 1 ||
4285 batchcount > limit || shared < 0) {
4286 res = 0;
4287 } else {
4288 res = do_tune_cpucache(cachep, limit,
4289 batchcount, shared,
4290 GFP_KERNEL);
4291 }
4292 break;
4293 }
4294 }
4295 mutex_unlock(&cache_chain_mutex);
4296 if (res >= 0)
4297 res = count;
4298 return res;
4299}
4300
4301static int slabinfo_open(struct inode *inode, struct file *file)
4302{
4303 return seq_open(file, &slabinfo_op);
4304}
4305
4306static const struct file_operations proc_slabinfo_operations = {
4307 .open = slabinfo_open,
4308 .read = seq_read,
4309 .write = slabinfo_write,
4310 .llseek = seq_lseek,
4311 .release = seq_release,
4312};
4313
4314#ifdef CONFIG_DEBUG_SLAB_LEAK
4315
4316static void *leaks_start(struct seq_file *m, loff_t *pos)
4317{
4318 mutex_lock(&cache_chain_mutex);
4319 return seq_list_start(&cache_chain, *pos);
4320}
4321
4322static inline int add_caller(unsigned long *n, unsigned long v)
4323{
4324 unsigned long *p;
4325 int l;
4326 if (!v)
4327 return 1;
4328 l = n[1];
4329 p = n + 2;
4330 while (l) {
4331 int i = l/2;
4332 unsigned long *q = p + 2 * i;
4333 if (*q == v) {
4334 q[1]++;
4335 return 1;
4336 }
4337 if (*q > v) {
4338 l = i;
4339 } else {
4340 p = q + 2;
4341 l -= i + 1;
4342 }
4343 }
4344 if (++n[1] == n[0])
4345 return 0;
4346 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4347 p[0] = v;
4348 p[1] = 1;
4349 return 1;
4350}
4351
4352static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4353{
4354 void *p;
4355 int i;
4356 if (n[0] == n[1])
4357 return;
4358 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
4359 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4360 continue;
4361 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4362 return;
4363 }
4364}
4365
4366static void show_symbol(struct seq_file *m, unsigned long address)
4367{
4368#ifdef CONFIG_KALLSYMS
4369 unsigned long offset, size;
4370 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4371
4372 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4373 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4374 if (modname[0])
4375 seq_printf(m, " [%s]", modname);
4376 return;
4377 }
4378#endif
4379 seq_printf(m, "%p", (void *)address);
4380}
4381
4382static int leaks_show(struct seq_file *m, void *p)
4383{
4384 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
4385 struct slab *slabp;
4386 struct kmem_list3 *l3;
4387 const char *name;
4388 unsigned long *n = m->private;
4389 int node;
4390 int i;
4391
4392 if (!(cachep->flags & SLAB_STORE_USER))
4393 return 0;
4394 if (!(cachep->flags & SLAB_RED_ZONE))
4395 return 0;
4396
4397
4398
4399 n[1] = 0;
4400
4401 for_each_online_node(node) {
4402 l3 = cachep->nodelists[node];
4403 if (!l3)
4404 continue;
4405
4406 check_irq_on();
4407 spin_lock_irq(&l3->list_lock);
4408
4409 list_for_each_entry(slabp, &l3->slabs_full, list)
4410 handle_slab(n, cachep, slabp);
4411 list_for_each_entry(slabp, &l3->slabs_partial, list)
4412 handle_slab(n, cachep, slabp);
4413 spin_unlock_irq(&l3->list_lock);
4414 }
4415 name = cachep->name;
4416 if (n[0] == n[1]) {
4417
4418 mutex_unlock(&cache_chain_mutex);
4419 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4420 if (!m->private) {
4421
4422 m->private = n;
4423 mutex_lock(&cache_chain_mutex);
4424 return -ENOMEM;
4425 }
4426 *(unsigned long *)m->private = n[0] * 2;
4427 kfree(n);
4428 mutex_lock(&cache_chain_mutex);
4429
4430 m->count = m->size;
4431 return 0;
4432 }
4433 for (i = 0; i < n[1]; i++) {
4434 seq_printf(m, "%s: %lu ", name, n[2*i+3]);
4435 show_symbol(m, n[2*i+2]);
4436 seq_putc(m, '\n');
4437 }
4438
4439 return 0;
4440}
4441
4442static const struct seq_operations slabstats_op = {
4443 .start = leaks_start,
4444 .next = s_next,
4445 .stop = s_stop,
4446 .show = leaks_show,
4447};
4448
4449static int slabstats_open(struct inode *inode, struct file *file)
4450{
4451 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4452 int ret = -ENOMEM;
4453 if (n) {
4454 ret = seq_open(file, &slabstats_op);
4455 if (!ret) {
4456 struct seq_file *m = file->private_data;
4457 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4458 m->private = n;
4459 n = NULL;
4460 }
4461 kfree(n);
4462 }
4463 return ret;
4464}
4465
4466static const struct file_operations proc_slabstats_operations = {
4467 .open = slabstats_open,
4468 .read = seq_read,
4469 .llseek = seq_lseek,
4470 .release = seq_release_private,
4471};
4472#endif
4473
4474static int __init slab_proc_init(void)
4475{
4476 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4477#ifdef CONFIG_DEBUG_SLAB_LEAK
4478 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4479#endif
4480 return 0;
4481}
4482module_init(slab_proc_init);
4483#endif
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497size_t ksize(const void *objp)
4498{
4499 BUG_ON(!objp);
4500 if (unlikely(objp == ZERO_SIZE_PTR))
4501 return 0;
4502
4503 return obj_size(virt_to_cache(objp));
4504}
4505EXPORT_SYMBOL(ksize);
4506