1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89#include <linux/slab.h>
90#include <linux/mm.h>
91#include <linux/poison.h>
92#include <linux/swap.h>
93#include <linux/cache.h>
94#include <linux/interrupt.h>
95#include <linux/init.h>
96#include <linux/compiler.h>
97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/notifier.h>
101#include <linux/kallsyms.h>
102#include <linux/cpu.h>
103#include <linux/sysctl.h>
104#include <linux/module.h>
105#include <linux/rcupdate.h>
106#include <linux/string.h>
107#include <linux/uaccess.h>
108#include <linux/nodemask.h>
109#include <linux/kmemleak.h>
110#include <linux/mempolicy.h>
111#include <linux/mutex.h>
112#include <linux/fault-inject.h>
113#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h>
116#include <linux/kmemcheck.h>
117#include <linux/memory.h>
118#include <linux/prefetch.h>
119
120#include <net/sock.h>
121
122#include <asm/cacheflush.h>
123#include <asm/tlbflush.h>
124#include <asm/page.h>
125
126#include <trace/events/kmem.h>
127
128#include "internal.h"
129
130#include "slab.h"
131
132
133
134
135
136
137
138
139
140
141
142#ifdef CONFIG_DEBUG_SLAB
143#define DEBUG 1
144#define STATS 1
145#define FORCED_DEBUG 1
146#else
147#define DEBUG 0
148#define STATS 0
149#define FORCED_DEBUG 0
150#endif
151
152
153#define BYTES_PER_WORD sizeof(void *)
154#define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long))
155
156#ifndef ARCH_KMALLOC_FLAGS
157#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
158#endif
159
160
161
162
163
164static bool pfmemalloc_active __read_mostly;
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185typedef unsigned int kmem_bufctl_t;
186#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
187#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
188#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
189#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205struct slab_rcu {
206 struct rcu_head head;
207 struct kmem_cache *cachep;
208 void *addr;
209};
210
211
212
213
214
215
216
217
218struct slab {
219 union {
220 struct {
221 struct list_head list;
222 unsigned long colouroff;
223 void *s_mem;
224 unsigned int inuse;
225 kmem_bufctl_t free;
226 unsigned short nodeid;
227 };
228 struct slab_rcu __slab_cover_slab_rcu;
229 };
230};
231
232
233
234
235
236
237
238
239
240
241
242
243
244struct array_cache {
245 unsigned int avail;
246 unsigned int limit;
247 unsigned int batchcount;
248 unsigned int touched;
249 spinlock_t lock;
250 void *entry[];
251
252
253
254
255
256
257
258
259};
260
261#define SLAB_OBJ_PFMEMALLOC 1
262static inline bool is_obj_pfmemalloc(void *objp)
263{
264 return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
265}
266
267static inline void set_obj_pfmemalloc(void **objp)
268{
269 *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
270 return;
271}
272
273static inline void clear_obj_pfmemalloc(void **objp)
274{
275 *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
276}
277
278
279
280
281
282#define BOOT_CPUCACHE_ENTRIES 1
283struct arraycache_init {
284 struct array_cache cache;
285 void *entries[BOOT_CPUCACHE_ENTRIES];
286};
287
288
289
290
291#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
292static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
293#define CACHE_CACHE 0
294#define SIZE_AC MAX_NUMNODES
295#define SIZE_NODE (2 * MAX_NUMNODES)
296
297static int drain_freelist(struct kmem_cache *cache,
298 struct kmem_cache_node *n, int tofree);
299static void free_block(struct kmem_cache *cachep, void **objpp, int len,
300 int node);
301static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
302static void cache_reap(struct work_struct *unused);
303
304static int slab_early_init = 1;
305
306#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
307#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
308
309static void kmem_cache_node_init(struct kmem_cache_node *parent)
310{
311 INIT_LIST_HEAD(&parent->slabs_full);
312 INIT_LIST_HEAD(&parent->slabs_partial);
313 INIT_LIST_HEAD(&parent->slabs_free);
314 parent->shared = NULL;
315 parent->alien = NULL;
316 parent->colour_next = 0;
317 spin_lock_init(&parent->list_lock);
318 parent->free_objects = 0;
319 parent->free_touched = 0;
320}
321
322#define MAKE_LIST(cachep, listp, slab, nodeid) \
323 do { \
324 INIT_LIST_HEAD(listp); \
325 list_splice(&(cachep->node[nodeid]->slab), listp); \
326 } while (0)
327
328#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
329 do { \
330 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
331 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
332 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
333 } while (0)
334
335#define CFLGS_OFF_SLAB (0x80000000UL)
336#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
337
338#define BATCHREFILL_LIMIT 16
339
340
341
342
343
344
345
346#define REAPTIMEOUT_CPUC (2*HZ)
347#define REAPTIMEOUT_LIST3 (4*HZ)
348
349#if STATS
350#define STATS_INC_ACTIVE(x) ((x)->num_active++)
351#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
352#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
353#define STATS_INC_GROWN(x) ((x)->grown++)
354#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
355#define STATS_SET_HIGH(x) \
356 do { \
357 if ((x)->num_active > (x)->high_mark) \
358 (x)->high_mark = (x)->num_active; \
359 } while (0)
360#define STATS_INC_ERR(x) ((x)->errors++)
361#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
362#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
363#define STATS_INC_ACOVERFLOW(x) ((x)->node_overflow++)
364#define STATS_SET_FREEABLE(x, i) \
365 do { \
366 if ((x)->max_freeable < i) \
367 (x)->max_freeable = i; \
368 } while (0)
369#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
370#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
371#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
372#define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
373#else
374#define STATS_INC_ACTIVE(x) do { } while (0)
375#define STATS_DEC_ACTIVE(x) do { } while (0)
376#define STATS_INC_ALLOCED(x) do { } while (0)
377#define STATS_INC_GROWN(x) do { } while (0)
378#define STATS_ADD_REAPED(x,y) do { (void)(y); } while (0)
379#define STATS_SET_HIGH(x) do { } while (0)
380#define STATS_INC_ERR(x) do { } while (0)
381#define STATS_INC_NODEALLOCS(x) do { } while (0)
382#define STATS_INC_NODEFREES(x) do { } while (0)
383#define STATS_INC_ACOVERFLOW(x) do { } while (0)
384#define STATS_SET_FREEABLE(x, i) do { } while (0)
385#define STATS_INC_ALLOCHIT(x) do { } while (0)
386#define STATS_INC_ALLOCMISS(x) do { } while (0)
387#define STATS_INC_FREEHIT(x) do { } while (0)
388#define STATS_INC_FREEMISS(x) do { } while (0)
389#endif
390
391#if DEBUG
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406static int obj_offset(struct kmem_cache *cachep)
407{
408 return cachep->obj_offset;
409}
410
411static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
412{
413 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
414 return (unsigned long long*) (objp + obj_offset(cachep) -
415 sizeof(unsigned long long));
416}
417
418static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
419{
420 BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
421 if (cachep->flags & SLAB_STORE_USER)
422 return (unsigned long long *)(objp + cachep->size -
423 sizeof(unsigned long long) -
424 REDZONE_ALIGN);
425 return (unsigned long long *) (objp + cachep->size -
426 sizeof(unsigned long long));
427}
428
429static void **dbg_userword(struct kmem_cache *cachep, void *objp)
430{
431 BUG_ON(!(cachep->flags & SLAB_STORE_USER));
432 return (void **)(objp + cachep->size - BYTES_PER_WORD);
433}
434
435#else
436
437#define obj_offset(x) 0
438#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
439#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
440#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
441
442#endif
443
444
445
446
447
448#define SLAB_MAX_ORDER_HI 1
449#define SLAB_MAX_ORDER_LO 0
450static int slab_max_order = SLAB_MAX_ORDER_LO;
451static bool slab_max_order_set __initdata;
452
453static inline struct kmem_cache *virt_to_cache(const void *obj)
454{
455 struct page *page = virt_to_head_page(obj);
456 return page->slab_cache;
457}
458
459static inline struct slab *virt_to_slab(const void *obj)
460{
461 struct page *page = virt_to_head_page(obj);
462
463 VM_BUG_ON(!PageSlab(page));
464 return page->slab_page;
465}
466
467static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
468 unsigned int idx)
469{
470 return slab->s_mem + cache->size * idx;
471}
472
473
474
475
476
477
478
479static inline unsigned int obj_to_index(const struct kmem_cache *cache,
480 const struct slab *slab, void *obj)
481{
482 u32 offset = (obj - slab->s_mem);
483 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
484}
485
486static struct arraycache_init initarray_generic =
487 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
488
489
490static struct kmem_cache kmem_cache_boot = {
491 .batchcount = 1,
492 .limit = BOOT_CPUCACHE_ENTRIES,
493 .shared = 1,
494 .size = sizeof(struct kmem_cache),
495 .name = "kmem_cache",
496};
497
498#define BAD_ALIEN_MAGIC 0x01020304ul
499
500#ifdef CONFIG_LOCKDEP
501
502
503
504
505
506
507
508
509
510
511
512
513static struct lock_class_key on_slab_l3_key;
514static struct lock_class_key on_slab_alc_key;
515
516static struct lock_class_key debugobj_l3_key;
517static struct lock_class_key debugobj_alc_key;
518
519static void slab_set_lock_classes(struct kmem_cache *cachep,
520 struct lock_class_key *l3_key, struct lock_class_key *alc_key,
521 int q)
522{
523 struct array_cache **alc;
524 struct kmem_cache_node *n;
525 int r;
526
527 n = cachep->node[q];
528 if (!n)
529 return;
530
531 lockdep_set_class(&n->list_lock, l3_key);
532 alc = n->alien;
533
534
535
536
537
538
539
540 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
541 return;
542 for_each_node(r) {
543 if (alc[r])
544 lockdep_set_class(&alc[r]->lock, alc_key);
545 }
546}
547
548static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
549{
550 slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
551}
552
553static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
554{
555 int node;
556
557 for_each_online_node(node)
558 slab_set_debugobj_lock_classes_node(cachep, node);
559}
560
561static void init_node_lock_keys(int q)
562{
563 int i;
564
565 if (slab_state < UP)
566 return;
567
568 for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
569 struct kmem_cache_node *n;
570 struct kmem_cache *cache = kmalloc_caches[i];
571
572 if (!cache)
573 continue;
574
575 n = cache->node[q];
576 if (!n || OFF_SLAB(cache))
577 continue;
578
579 slab_set_lock_classes(cache, &on_slab_l3_key,
580 &on_slab_alc_key, q);
581 }
582}
583
584static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
585{
586 if (!cachep->node[q])
587 return;
588
589 slab_set_lock_classes(cachep, &on_slab_l3_key,
590 &on_slab_alc_key, q);
591}
592
593static inline void on_slab_lock_classes(struct kmem_cache *cachep)
594{
595 int node;
596
597 VM_BUG_ON(OFF_SLAB(cachep));
598 for_each_node(node)
599 on_slab_lock_classes_node(cachep, node);
600}
601
602static inline void init_lock_keys(void)
603{
604 int node;
605
606 for_each_node(node)
607 init_node_lock_keys(node);
608}
609#else
610static void init_node_lock_keys(int q)
611{
612}
613
614static inline void init_lock_keys(void)
615{
616}
617
618static inline void on_slab_lock_classes(struct kmem_cache *cachep)
619{
620}
621
622static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
623{
624}
625
626static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
627{
628}
629
630static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
631{
632}
633#endif
634
635static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
636
637static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
638{
639 return cachep->array[smp_processor_id()];
640}
641
642static size_t slab_mgmt_size(size_t nr_objs, size_t align)
643{
644 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
645}
646
647
648
649
650static void cache_estimate(unsigned long gfporder, size_t buffer_size,
651 size_t align, int flags, size_t *left_over,
652 unsigned int *num)
653{
654 int nr_objs;
655 size_t mgmt_size;
656 size_t slab_size = PAGE_SIZE << gfporder;
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673 if (flags & CFLGS_OFF_SLAB) {
674 mgmt_size = 0;
675 nr_objs = slab_size / buffer_size;
676
677 if (nr_objs > SLAB_LIMIT)
678 nr_objs = SLAB_LIMIT;
679 } else {
680
681
682
683
684
685
686
687
688 nr_objs = (slab_size - sizeof(struct slab)) /
689 (buffer_size + sizeof(kmem_bufctl_t));
690
691
692
693
694
695 if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
696 > slab_size)
697 nr_objs--;
698
699 if (nr_objs > SLAB_LIMIT)
700 nr_objs = SLAB_LIMIT;
701
702 mgmt_size = slab_mgmt_size(nr_objs, align);
703 }
704 *num = nr_objs;
705 *left_over = slab_size - nr_objs*buffer_size - mgmt_size;
706}
707
708#if DEBUG
709#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
710
711static void __slab_error(const char *function, struct kmem_cache *cachep,
712 char *msg)
713{
714 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
715 function, cachep->name, msg);
716 dump_stack();
717 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
718}
719#endif
720
721
722
723
724
725
726
727
728
729static int use_alien_caches __read_mostly = 1;
730static int __init noaliencache_setup(char *s)
731{
732 use_alien_caches = 0;
733 return 1;
734}
735__setup("noaliencache", noaliencache_setup);
736
737static int __init slab_max_order_setup(char *str)
738{
739 get_option(&str, &slab_max_order);
740 slab_max_order = slab_max_order < 0 ? 0 :
741 min(slab_max_order, MAX_ORDER - 1);
742 slab_max_order_set = true;
743
744 return 1;
745}
746__setup("slab_max_order=", slab_max_order_setup);
747
748#ifdef CONFIG_NUMA
749
750
751
752
753
754
755static DEFINE_PER_CPU(unsigned long, slab_reap_node);
756
757static void init_reap_node(int cpu)
758{
759 int node;
760
761 node = next_node(cpu_to_mem(cpu), node_online_map);
762 if (node == MAX_NUMNODES)
763 node = first_node(node_online_map);
764
765 per_cpu(slab_reap_node, cpu) = node;
766}
767
768static void next_reap_node(void)
769{
770 int node = __this_cpu_read(slab_reap_node);
771
772 node = next_node(node, node_online_map);
773 if (unlikely(node >= MAX_NUMNODES))
774 node = first_node(node_online_map);
775 __this_cpu_write(slab_reap_node, node);
776}
777
778#else
779#define init_reap_node(cpu) do { } while (0)
780#define next_reap_node(void) do { } while (0)
781#endif
782
783
784
785
786
787
788
789
790static void start_cpu_timer(int cpu)
791{
792 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
793
794
795
796
797
798
799 if (keventd_up() && reap_work->work.func == NULL) {
800 init_reap_node(cpu);
801 INIT_DEFERRABLE_WORK(reap_work, cache_reap);
802 schedule_delayed_work_on(cpu, reap_work,
803 __round_jiffies_relative(HZ, cpu));
804 }
805}
806
807static struct array_cache *alloc_arraycache(int node, int entries,
808 int batchcount, gfp_t gfp)
809{
810 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
811 struct array_cache *nc = NULL;
812
813 nc = kmalloc_node(memsize, gfp, node);
814
815
816
817
818
819
820
821 kmemleak_no_scan(nc);
822 if (nc) {
823 nc->avail = 0;
824 nc->limit = entries;
825 nc->batchcount = batchcount;
826 nc->touched = 0;
827 spin_lock_init(&nc->lock);
828 }
829 return nc;
830}
831
832static inline bool is_slab_pfmemalloc(struct slab *slabp)
833{
834 struct page *page = virt_to_page(slabp->s_mem);
835
836 return PageSlabPfmemalloc(page);
837}
838
839
840static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
841 struct array_cache *ac)
842{
843 struct kmem_cache_node *n = cachep->node[numa_mem_id()];
844 struct slab *slabp;
845 unsigned long flags;
846
847 if (!pfmemalloc_active)
848 return;
849
850 spin_lock_irqsave(&n->list_lock, flags);
851 list_for_each_entry(slabp, &n->slabs_full, list)
852 if (is_slab_pfmemalloc(slabp))
853 goto out;
854
855 list_for_each_entry(slabp, &n->slabs_partial, list)
856 if (is_slab_pfmemalloc(slabp))
857 goto out;
858
859 list_for_each_entry(slabp, &n->slabs_free, list)
860 if (is_slab_pfmemalloc(slabp))
861 goto out;
862
863 pfmemalloc_active = false;
864out:
865 spin_unlock_irqrestore(&n->list_lock, flags);
866}
867
868static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
869 gfp_t flags, bool force_refill)
870{
871 int i;
872 void *objp = ac->entry[--ac->avail];
873
874
875 if (unlikely(is_obj_pfmemalloc(objp))) {
876 struct kmem_cache_node *n;
877
878 if (gfp_pfmemalloc_allowed(flags)) {
879 clear_obj_pfmemalloc(&objp);
880 return objp;
881 }
882
883
884 for (i = 0; i < ac->avail; i++) {
885
886 if (!is_obj_pfmemalloc(ac->entry[i])) {
887 objp = ac->entry[i];
888 ac->entry[i] = ac->entry[ac->avail];
889 ac->entry[ac->avail] = objp;
890 return objp;
891 }
892 }
893
894
895
896
897
898 n = cachep->node[numa_mem_id()];
899 if (!list_empty(&n->slabs_free) && force_refill) {
900 struct slab *slabp = virt_to_slab(objp);
901 ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
902 clear_obj_pfmemalloc(&objp);
903 recheck_pfmemalloc_active(cachep, ac);
904 return objp;
905 }
906
907
908 ac->avail++;
909 objp = NULL;
910 }
911
912 return objp;
913}
914
915static inline void *ac_get_obj(struct kmem_cache *cachep,
916 struct array_cache *ac, gfp_t flags, bool force_refill)
917{
918 void *objp;
919
920 if (unlikely(sk_memalloc_socks()))
921 objp = __ac_get_obj(cachep, ac, flags, force_refill);
922 else
923 objp = ac->entry[--ac->avail];
924
925 return objp;
926}
927
928static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
929 void *objp)
930{
931 if (unlikely(pfmemalloc_active)) {
932
933 struct page *page = virt_to_head_page(objp);
934 if (PageSlabPfmemalloc(page))
935 set_obj_pfmemalloc(&objp);
936 }
937
938 return objp;
939}
940
941static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
942 void *objp)
943{
944 if (unlikely(sk_memalloc_socks()))
945 objp = __ac_put_obj(cachep, ac, objp);
946
947 ac->entry[ac->avail++] = objp;
948}
949
950
951
952
953
954
955
956static int transfer_objects(struct array_cache *to,
957 struct array_cache *from, unsigned int max)
958{
959
960 int nr = min3(from->avail, max, to->limit - to->avail);
961
962 if (!nr)
963 return 0;
964
965 memcpy(to->entry + to->avail, from->entry + from->avail -nr,
966 sizeof(void *) *nr);
967
968 from->avail -= nr;
969 to->avail += nr;
970 return nr;
971}
972
973#ifndef CONFIG_NUMA
974
975#define drain_alien_cache(cachep, alien) do { } while (0)
976#define reap_alien(cachep, n) do { } while (0)
977
978static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
979{
980 return (struct array_cache **)BAD_ALIEN_MAGIC;
981}
982
983static inline void free_alien_cache(struct array_cache **ac_ptr)
984{
985}
986
987static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
988{
989 return 0;
990}
991
992static inline void *alternate_node_alloc(struct kmem_cache *cachep,
993 gfp_t flags)
994{
995 return NULL;
996}
997
998static inline void *____cache_alloc_node(struct kmem_cache *cachep,
999 gfp_t flags, int nodeid)
1000{
1001 return NULL;
1002}
1003
1004#else
1005
1006static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1007static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1008
1009static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1010{
1011 struct array_cache **ac_ptr;
1012 int memsize = sizeof(void *) * nr_node_ids;
1013 int i;
1014
1015 if (limit > 1)
1016 limit = 12;
1017 ac_ptr = kzalloc_node(memsize, gfp, node);
1018 if (ac_ptr) {
1019 for_each_node(i) {
1020 if (i == node || !node_online(i))
1021 continue;
1022 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
1023 if (!ac_ptr[i]) {
1024 for (i--; i >= 0; i--)
1025 kfree(ac_ptr[i]);
1026 kfree(ac_ptr);
1027 return NULL;
1028 }
1029 }
1030 }
1031 return ac_ptr;
1032}
1033
1034static void free_alien_cache(struct array_cache **ac_ptr)
1035{
1036 int i;
1037
1038 if (!ac_ptr)
1039 return;
1040 for_each_node(i)
1041 kfree(ac_ptr[i]);
1042 kfree(ac_ptr);
1043}
1044
1045static void __drain_alien_cache(struct kmem_cache *cachep,
1046 struct array_cache *ac, int node)
1047{
1048 struct kmem_cache_node *n = cachep->node[node];
1049
1050 if (ac->avail) {
1051 spin_lock(&n->list_lock);
1052
1053
1054
1055
1056
1057 if (n->shared)
1058 transfer_objects(n->shared, ac, ac->limit);
1059
1060 free_block(cachep, ac->entry, ac->avail, node);
1061 ac->avail = 0;
1062 spin_unlock(&n->list_lock);
1063 }
1064}
1065
1066
1067
1068
1069static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
1070{
1071 int node = __this_cpu_read(slab_reap_node);
1072
1073 if (n->alien) {
1074 struct array_cache *ac = n->alien[node];
1075
1076 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1077 __drain_alien_cache(cachep, ac, node);
1078 spin_unlock_irq(&ac->lock);
1079 }
1080 }
1081}
1082
1083static void drain_alien_cache(struct kmem_cache *cachep,
1084 struct array_cache **alien)
1085{
1086 int i = 0;
1087 struct array_cache *ac;
1088 unsigned long flags;
1089
1090 for_each_online_node(i) {
1091 ac = alien[i];
1092 if (ac) {
1093 spin_lock_irqsave(&ac->lock, flags);
1094 __drain_alien_cache(cachep, ac, i);
1095 spin_unlock_irqrestore(&ac->lock, flags);
1096 }
1097 }
1098}
1099
1100static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1101{
1102 struct slab *slabp = virt_to_slab(objp);
1103 int nodeid = slabp->nodeid;
1104 struct kmem_cache_node *n;
1105 struct array_cache *alien = NULL;
1106 int node;
1107
1108 node = numa_mem_id();
1109
1110
1111
1112
1113
1114 if (likely(slabp->nodeid == node))
1115 return 0;
1116
1117 n = cachep->node[node];
1118 STATS_INC_NODEFREES(cachep);
1119 if (n->alien && n->alien[nodeid]) {
1120 alien = n->alien[nodeid];
1121 spin_lock(&alien->lock);
1122 if (unlikely(alien->avail == alien->limit)) {
1123 STATS_INC_ACOVERFLOW(cachep);
1124 __drain_alien_cache(cachep, alien, nodeid);
1125 }
1126 ac_put_obj(cachep, alien, objp);
1127 spin_unlock(&alien->lock);
1128 } else {
1129 spin_lock(&(cachep->node[nodeid])->list_lock);
1130 free_block(cachep, &objp, 1, nodeid);
1131 spin_unlock(&(cachep->node[nodeid])->list_lock);
1132 }
1133 return 1;
1134}
1135#endif
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146static int init_cache_node_node(int node)
1147{
1148 struct kmem_cache *cachep;
1149 struct kmem_cache_node *n;
1150 const int memsize = sizeof(struct kmem_cache_node);
1151
1152 list_for_each_entry(cachep, &slab_caches, list) {
1153
1154
1155
1156
1157
1158 if (!cachep->node[node]) {
1159 n = kmalloc_node(memsize, GFP_KERNEL, node);
1160 if (!n)
1161 return -ENOMEM;
1162 kmem_cache_node_init(n);
1163 n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1164 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1165
1166
1167
1168
1169
1170
1171 cachep->node[node] = n;
1172 }
1173
1174 spin_lock_irq(&cachep->node[node]->list_lock);
1175 cachep->node[node]->free_limit =
1176 (1 + nr_cpus_node(node)) *
1177 cachep->batchcount + cachep->num;
1178 spin_unlock_irq(&cachep->node[node]->list_lock);
1179 }
1180 return 0;
1181}
1182
1183static inline int slabs_tofree(struct kmem_cache *cachep,
1184 struct kmem_cache_node *n)
1185{
1186 return (n->free_objects + cachep->num - 1) / cachep->num;
1187}
1188
1189static void cpuup_canceled(long cpu)
1190{
1191 struct kmem_cache *cachep;
1192 struct kmem_cache_node *n = NULL;
1193 int node = cpu_to_mem(cpu);
1194 const struct cpumask *mask = cpumask_of_node(node);
1195
1196 list_for_each_entry(cachep, &slab_caches, list) {
1197 struct array_cache *nc;
1198 struct array_cache *shared;
1199 struct array_cache **alien;
1200
1201
1202 nc = cachep->array[cpu];
1203 cachep->array[cpu] = NULL;
1204 n = cachep->node[node];
1205
1206 if (!n)
1207 goto free_array_cache;
1208
1209 spin_lock_irq(&n->list_lock);
1210
1211
1212 n->free_limit -= cachep->batchcount;
1213 if (nc)
1214 free_block(cachep, nc->entry, nc->avail, node);
1215
1216 if (!cpumask_empty(mask)) {
1217 spin_unlock_irq(&n->list_lock);
1218 goto free_array_cache;
1219 }
1220
1221 shared = n->shared;
1222 if (shared) {
1223 free_block(cachep, shared->entry,
1224 shared->avail, node);
1225 n->shared = NULL;
1226 }
1227
1228 alien = n->alien;
1229 n->alien = NULL;
1230
1231 spin_unlock_irq(&n->list_lock);
1232
1233 kfree(shared);
1234 if (alien) {
1235 drain_alien_cache(cachep, alien);
1236 free_alien_cache(alien);
1237 }
1238free_array_cache:
1239 kfree(nc);
1240 }
1241
1242
1243
1244
1245
1246 list_for_each_entry(cachep, &slab_caches, list) {
1247 n = cachep->node[node];
1248 if (!n)
1249 continue;
1250 drain_freelist(cachep, n, slabs_tofree(cachep, n));
1251 }
1252}
1253
1254static int cpuup_prepare(long cpu)
1255{
1256 struct kmem_cache *cachep;
1257 struct kmem_cache_node *n = NULL;
1258 int node = cpu_to_mem(cpu);
1259 int err;
1260
1261
1262
1263
1264
1265
1266
1267 err = init_cache_node_node(node);
1268 if (err < 0)
1269 goto bad;
1270
1271
1272
1273
1274
1275 list_for_each_entry(cachep, &slab_caches, list) {
1276 struct array_cache *nc;
1277 struct array_cache *shared = NULL;
1278 struct array_cache **alien = NULL;
1279
1280 nc = alloc_arraycache(node, cachep->limit,
1281 cachep->batchcount, GFP_KERNEL);
1282 if (!nc)
1283 goto bad;
1284 if (cachep->shared) {
1285 shared = alloc_arraycache(node,
1286 cachep->shared * cachep->batchcount,
1287 0xbaadf00d, GFP_KERNEL);
1288 if (!shared) {
1289 kfree(nc);
1290 goto bad;
1291 }
1292 }
1293 if (use_alien_caches) {
1294 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1295 if (!alien) {
1296 kfree(shared);
1297 kfree(nc);
1298 goto bad;
1299 }
1300 }
1301 cachep->array[cpu] = nc;
1302 n = cachep->node[node];
1303 BUG_ON(!n);
1304
1305 spin_lock_irq(&n->list_lock);
1306 if (!n->shared) {
1307
1308
1309
1310
1311 n->shared = shared;
1312 shared = NULL;
1313 }
1314#ifdef CONFIG_NUMA
1315 if (!n->alien) {
1316 n->alien = alien;
1317 alien = NULL;
1318 }
1319#endif
1320 spin_unlock_irq(&n->list_lock);
1321 kfree(shared);
1322 free_alien_cache(alien);
1323 if (cachep->flags & SLAB_DEBUG_OBJECTS)
1324 slab_set_debugobj_lock_classes_node(cachep, node);
1325 else if (!OFF_SLAB(cachep) &&
1326 !(cachep->flags & SLAB_DESTROY_BY_RCU))
1327 on_slab_lock_classes_node(cachep, node);
1328 }
1329 init_node_lock_keys(node);
1330
1331 return 0;
1332bad:
1333 cpuup_canceled(cpu);
1334 return -ENOMEM;
1335}
1336
1337static int cpuup_callback(struct notifier_block *nfb,
1338 unsigned long action, void *hcpu)
1339{
1340 long cpu = (long)hcpu;
1341 int err = 0;
1342
1343 switch (action) {
1344 case CPU_UP_PREPARE:
1345 case CPU_UP_PREPARE_FROZEN:
1346 mutex_lock(&slab_mutex);
1347 err = cpuup_prepare(cpu);
1348 mutex_unlock(&slab_mutex);
1349 break;
1350 case CPU_ONLINE:
1351 case CPU_ONLINE_FROZEN:
1352 start_cpu_timer(cpu);
1353 break;
1354#ifdef CONFIG_HOTPLUG_CPU
1355 case CPU_DOWN_PREPARE:
1356 case CPU_DOWN_PREPARE_FROZEN:
1357
1358
1359
1360
1361
1362
1363 cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
1364
1365 per_cpu(slab_reap_work, cpu).work.func = NULL;
1366 break;
1367 case CPU_DOWN_FAILED:
1368 case CPU_DOWN_FAILED_FROZEN:
1369 start_cpu_timer(cpu);
1370 break;
1371 case CPU_DEAD:
1372 case CPU_DEAD_FROZEN:
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382#endif
1383 case CPU_UP_CANCELED:
1384 case CPU_UP_CANCELED_FROZEN:
1385 mutex_lock(&slab_mutex);
1386 cpuup_canceled(cpu);
1387 mutex_unlock(&slab_mutex);
1388 break;
1389 }
1390 return notifier_from_errno(err);
1391}
1392
1393static struct notifier_block cpucache_notifier = {
1394 &cpuup_callback, NULL, 0
1395};
1396
1397#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1398
1399
1400
1401
1402
1403
1404
1405static int __meminit drain_cache_node_node(int node)
1406{
1407 struct kmem_cache *cachep;
1408 int ret = 0;
1409
1410 list_for_each_entry(cachep, &slab_caches, list) {
1411 struct kmem_cache_node *n;
1412
1413 n = cachep->node[node];
1414 if (!n)
1415 continue;
1416
1417 drain_freelist(cachep, n, slabs_tofree(cachep, n));
1418
1419 if (!list_empty(&n->slabs_full) ||
1420 !list_empty(&n->slabs_partial)) {
1421 ret = -EBUSY;
1422 break;
1423 }
1424 }
1425 return ret;
1426}
1427
1428static int __meminit slab_memory_callback(struct notifier_block *self,
1429 unsigned long action, void *arg)
1430{
1431 struct memory_notify *mnb = arg;
1432 int ret = 0;
1433 int nid;
1434
1435 nid = mnb->status_change_nid;
1436 if (nid < 0)
1437 goto out;
1438
1439 switch (action) {
1440 case MEM_GOING_ONLINE:
1441 mutex_lock(&slab_mutex);
1442 ret = init_cache_node_node(nid);
1443 mutex_unlock(&slab_mutex);
1444 break;
1445 case MEM_GOING_OFFLINE:
1446 mutex_lock(&slab_mutex);
1447 ret = drain_cache_node_node(nid);
1448 mutex_unlock(&slab_mutex);
1449 break;
1450 case MEM_ONLINE:
1451 case MEM_OFFLINE:
1452 case MEM_CANCEL_ONLINE:
1453 case MEM_CANCEL_OFFLINE:
1454 break;
1455 }
1456out:
1457 return notifier_from_errno(ret);
1458}
1459#endif
1460
1461
1462
1463
1464static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
1465 int nodeid)
1466{
1467 struct kmem_cache_node *ptr;
1468
1469 ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
1470 BUG_ON(!ptr);
1471
1472 memcpy(ptr, list, sizeof(struct kmem_cache_node));
1473
1474
1475
1476 spin_lock_init(&ptr->list_lock);
1477
1478 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1479 cachep->node[nodeid] = ptr;
1480}
1481
1482
1483
1484
1485
1486static void __init set_up_node(struct kmem_cache *cachep, int index)
1487{
1488 int node;
1489
1490 for_each_online_node(node) {
1491 cachep->node[node] = &init_kmem_cache_node[index + node];
1492 cachep->node[node]->next_reap = jiffies +
1493 REAPTIMEOUT_LIST3 +
1494 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1495 }
1496}
1497
1498
1499
1500
1501
1502static void setup_node_pointer(struct kmem_cache *cachep)
1503{
1504 cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
1505}
1506
1507
1508
1509
1510
1511void __init kmem_cache_init(void)
1512{
1513 int i;
1514
1515 kmem_cache = &kmem_cache_boot;
1516 setup_node_pointer(kmem_cache);
1517
1518 if (num_possible_nodes() == 1)
1519 use_alien_caches = 0;
1520
1521 for (i = 0; i < NUM_INIT_LISTS; i++)
1522 kmem_cache_node_init(&init_kmem_cache_node[i]);
1523
1524 set_up_node(kmem_cache, CACHE_CACHE);
1525
1526
1527
1528
1529
1530
1531 if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
1532 slab_max_order = SLAB_MAX_ORDER_HI;
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559 create_boot_cache(kmem_cache, "kmem_cache",
1560 offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1561 nr_node_ids * sizeof(struct kmem_cache_node *),
1562 SLAB_HWCACHE_ALIGN);
1563 list_add(&kmem_cache->list, &slab_caches);
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573 kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
1574 kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
1575
1576 if (INDEX_AC != INDEX_NODE)
1577 kmalloc_caches[INDEX_NODE] =
1578 create_kmalloc_cache("kmalloc-node",
1579 kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
1580
1581 slab_early_init = 0;
1582
1583
1584 {
1585 struct array_cache *ptr;
1586
1587 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1588
1589 memcpy(ptr, cpu_cache_get(kmem_cache),
1590 sizeof(struct arraycache_init));
1591
1592
1593
1594 spin_lock_init(&ptr->lock);
1595
1596 kmem_cache->array[smp_processor_id()] = ptr;
1597
1598 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1599
1600 BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
1601 != &initarray_generic.cache);
1602 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
1603 sizeof(struct arraycache_init));
1604
1605
1606
1607 spin_lock_init(&ptr->lock);
1608
1609 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
1610 }
1611
1612 {
1613 int nid;
1614
1615 for_each_online_node(nid) {
1616 init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1617
1618 init_list(kmalloc_caches[INDEX_AC],
1619 &init_kmem_cache_node[SIZE_AC + nid], nid);
1620
1621 if (INDEX_AC != INDEX_NODE) {
1622 init_list(kmalloc_caches[INDEX_NODE],
1623 &init_kmem_cache_node[SIZE_NODE + nid], nid);
1624 }
1625 }
1626 }
1627
1628 create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
1629}
1630
1631void __init kmem_cache_init_late(void)
1632{
1633 struct kmem_cache *cachep;
1634
1635 slab_state = UP;
1636
1637
1638 mutex_lock(&slab_mutex);
1639 list_for_each_entry(cachep, &slab_caches, list)
1640 if (enable_cpucache(cachep, GFP_NOWAIT))
1641 BUG();
1642 mutex_unlock(&slab_mutex);
1643
1644
1645 init_lock_keys();
1646
1647
1648 slab_state = FULL;
1649
1650
1651
1652
1653
1654 register_cpu_notifier(&cpucache_notifier);
1655
1656#ifdef CONFIG_NUMA
1657
1658
1659
1660
1661 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1662#endif
1663
1664
1665
1666
1667
1668}
1669
1670static int __init cpucache_init(void)
1671{
1672 int cpu;
1673
1674
1675
1676
1677 for_each_online_cpu(cpu)
1678 start_cpu_timer(cpu);
1679
1680
1681 slab_state = FULL;
1682 return 0;
1683}
1684__initcall(cpucache_init);
1685
1686static noinline void
1687slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1688{
1689 struct kmem_cache_node *n;
1690 struct slab *slabp;
1691 unsigned long flags;
1692 int node;
1693
1694 printk(KERN_WARNING
1695 "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1696 nodeid, gfpflags);
1697 printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
1698 cachep->name, cachep->size, cachep->gfporder);
1699
1700 for_each_online_node(node) {
1701 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
1702 unsigned long active_slabs = 0, num_slabs = 0;
1703
1704 n = cachep->node[node];
1705 if (!n)
1706 continue;
1707
1708 spin_lock_irqsave(&n->list_lock, flags);
1709 list_for_each_entry(slabp, &n->slabs_full, list) {
1710 active_objs += cachep->num;
1711 active_slabs++;
1712 }
1713 list_for_each_entry(slabp, &n->slabs_partial, list) {
1714 active_objs += slabp->inuse;
1715 active_slabs++;
1716 }
1717 list_for_each_entry(slabp, &n->slabs_free, list)
1718 num_slabs++;
1719
1720 free_objects += n->free_objects;
1721 spin_unlock_irqrestore(&n->list_lock, flags);
1722
1723 num_slabs += active_slabs;
1724 num_objs = num_slabs * cachep->num;
1725 printk(KERN_WARNING
1726 " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
1727 node, active_slabs, num_slabs, active_objs, num_objs,
1728 free_objects);
1729 }
1730}
1731
1732
1733
1734
1735
1736
1737
1738
1739static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1740{
1741 struct page *page;
1742 int nr_pages;
1743 int i;
1744
1745#ifndef CONFIG_MMU
1746
1747
1748
1749
1750 flags |= __GFP_COMP;
1751#endif
1752
1753 flags |= cachep->allocflags;
1754 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1755 flags |= __GFP_RECLAIMABLE;
1756
1757 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1758 if (!page) {
1759 if (!(flags & __GFP_NOWARN) && printk_ratelimit())
1760 slab_out_of_memory(cachep, flags, nodeid);
1761 return NULL;
1762 }
1763
1764
1765 if (unlikely(page->pfmemalloc))
1766 pfmemalloc_active = true;
1767
1768 nr_pages = (1 << cachep->gfporder);
1769 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1770 add_zone_page_state(page_zone(page),
1771 NR_SLAB_RECLAIMABLE, nr_pages);
1772 else
1773 add_zone_page_state(page_zone(page),
1774 NR_SLAB_UNRECLAIMABLE, nr_pages);
1775 for (i = 0; i < nr_pages; i++) {
1776 __SetPageSlab(page + i);
1777
1778 if (page->pfmemalloc)
1779 SetPageSlabPfmemalloc(page + i);
1780 }
1781 memcg_bind_pages(cachep, cachep->gfporder);
1782
1783 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1784 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1785
1786 if (cachep->ctor)
1787 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1788 else
1789 kmemcheck_mark_unallocated_pages(page, nr_pages);
1790 }
1791
1792 return page_address(page);
1793}
1794
1795
1796
1797
1798static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1799{
1800 unsigned long i = (1 << cachep->gfporder);
1801 struct page *page = virt_to_page(addr);
1802 const unsigned long nr_freed = i;
1803
1804 kmemcheck_free_shadow(page, cachep->gfporder);
1805
1806 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1807 sub_zone_page_state(page_zone(page),
1808 NR_SLAB_RECLAIMABLE, nr_freed);
1809 else
1810 sub_zone_page_state(page_zone(page),
1811 NR_SLAB_UNRECLAIMABLE, nr_freed);
1812 while (i--) {
1813 BUG_ON(!PageSlab(page));
1814 __ClearPageSlabPfmemalloc(page);
1815 __ClearPageSlab(page);
1816 page++;
1817 }
1818
1819 memcg_release_pages(cachep, cachep->gfporder);
1820 if (current->reclaim_state)
1821 current->reclaim_state->reclaimed_slab += nr_freed;
1822 free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
1823}
1824
1825static void kmem_rcu_free(struct rcu_head *head)
1826{
1827 struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
1828 struct kmem_cache *cachep = slab_rcu->cachep;
1829
1830 kmem_freepages(cachep, slab_rcu->addr);
1831 if (OFF_SLAB(cachep))
1832 kmem_cache_free(cachep->slabp_cache, slab_rcu);
1833}
1834
1835#if DEBUG
1836
1837#ifdef CONFIG_DEBUG_PAGEALLOC
1838static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
1839 unsigned long caller)
1840{
1841 int size = cachep->object_size;
1842
1843 addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
1844
1845 if (size < 5 * sizeof(unsigned long))
1846 return;
1847
1848 *addr++ = 0x12345678;
1849 *addr++ = caller;
1850 *addr++ = smp_processor_id();
1851 size -= 3 * sizeof(unsigned long);
1852 {
1853 unsigned long *sptr = &caller;
1854 unsigned long svalue;
1855
1856 while (!kstack_end(sptr)) {
1857 svalue = *sptr++;
1858 if (kernel_text_address(svalue)) {
1859 *addr++ = svalue;
1860 size -= sizeof(unsigned long);
1861 if (size <= sizeof(unsigned long))
1862 break;
1863 }
1864 }
1865
1866 }
1867 *addr++ = 0x87654321;
1868}
1869#endif
1870
1871static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
1872{
1873 int size = cachep->object_size;
1874 addr = &((char *)addr)[obj_offset(cachep)];
1875
1876 memset(addr, val, size);
1877 *(unsigned char *)(addr + size - 1) = POISON_END;
1878}
1879
1880static void dump_line(char *data, int offset, int limit)
1881{
1882 int i;
1883 unsigned char error = 0;
1884 int bad_count = 0;
1885
1886 printk(KERN_ERR "%03x: ", offset);
1887 for (i = 0; i < limit; i++) {
1888 if (data[offset + i] != POISON_FREE) {
1889 error = data[offset + i];
1890 bad_count++;
1891 }
1892 }
1893 print_hex_dump(KERN_CONT, "", 0, 16, 1,
1894 &data[offset], limit, 1);
1895
1896 if (bad_count == 1) {
1897 error ^= POISON_FREE;
1898 if (!(error & (error - 1))) {
1899 printk(KERN_ERR "Single bit error detected. Probably "
1900 "bad RAM.\n");
1901#ifdef CONFIG_X86
1902 printk(KERN_ERR "Run memtest86+ or a similar memory "
1903 "test tool.\n");
1904#else
1905 printk(KERN_ERR "Run a memory test tool.\n");
1906#endif
1907 }
1908 }
1909}
1910#endif
1911
1912#if DEBUG
1913
1914static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1915{
1916 int i, size;
1917 char *realobj;
1918
1919 if (cachep->flags & SLAB_RED_ZONE) {
1920 printk(KERN_ERR "Redzone: 0x%llx/0x%llx.\n",
1921 *dbg_redzone1(cachep, objp),
1922 *dbg_redzone2(cachep, objp));
1923 }
1924
1925 if (cachep->flags & SLAB_STORE_USER) {
1926 printk(KERN_ERR "Last user: [<%p>](%pSR)\n",
1927 *dbg_userword(cachep, objp),
1928 *dbg_userword(cachep, objp));
1929 }
1930 realobj = (char *)objp + obj_offset(cachep);
1931 size = cachep->object_size;
1932 for (i = 0; i < size && lines; i += 16, lines--) {
1933 int limit;
1934 limit = 16;
1935 if (i + limit > size)
1936 limit = size - i;
1937 dump_line(realobj, i, limit);
1938 }
1939}
1940
1941static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1942{
1943 char *realobj;
1944 int size, i;
1945 int lines = 0;
1946
1947 realobj = (char *)objp + obj_offset(cachep);
1948 size = cachep->object_size;
1949
1950 for (i = 0; i < size; i++) {
1951 char exp = POISON_FREE;
1952 if (i == size - 1)
1953 exp = POISON_END;
1954 if (realobj[i] != exp) {
1955 int limit;
1956
1957
1958 if (lines == 0) {
1959 printk(KERN_ERR
1960 "Slab corruption (%s): %s start=%p, len=%d\n",
1961 print_tainted(), cachep->name, realobj, size);
1962 print_objinfo(cachep, objp, 0);
1963 }
1964
1965 i = (i / 16) * 16;
1966 limit = 16;
1967 if (i + limit > size)
1968 limit = size - i;
1969 dump_line(realobj, i, limit);
1970 i += 16;
1971 lines++;
1972
1973 if (lines > 5)
1974 break;
1975 }
1976 }
1977 if (lines != 0) {
1978
1979
1980
1981 struct slab *slabp = virt_to_slab(objp);
1982 unsigned int objnr;
1983
1984 objnr = obj_to_index(cachep, slabp, objp);
1985 if (objnr) {
1986 objp = index_to_obj(cachep, slabp, objnr - 1);
1987 realobj = (char *)objp + obj_offset(cachep);
1988 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1989 realobj, size);
1990 print_objinfo(cachep, objp, 2);
1991 }
1992 if (objnr + 1 < cachep->num) {
1993 objp = index_to_obj(cachep, slabp, objnr + 1);
1994 realobj = (char *)objp + obj_offset(cachep);
1995 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1996 realobj, size);
1997 print_objinfo(cachep, objp, 2);
1998 }
1999 }
2000}
2001#endif
2002
2003#if DEBUG
2004static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
2005{
2006 int i;
2007 for (i = 0; i < cachep->num; i++) {
2008 void *objp = index_to_obj(cachep, slabp, i);
2009
2010 if (cachep->flags & SLAB_POISON) {
2011#ifdef CONFIG_DEBUG_PAGEALLOC
2012 if (cachep->size % PAGE_SIZE == 0 &&
2013 OFF_SLAB(cachep))
2014 kernel_map_pages(virt_to_page(objp),
2015 cachep->size / PAGE_SIZE, 1);
2016 else
2017 check_poison_obj(cachep, objp);
2018#else
2019 check_poison_obj(cachep, objp);
2020#endif
2021 }
2022 if (cachep->flags & SLAB_RED_ZONE) {
2023 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2024 slab_error(cachep, "start of a freed object "
2025 "was overwritten");
2026 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2027 slab_error(cachep, "end of a freed object "
2028 "was overwritten");
2029 }
2030 }
2031}
2032#else
2033static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
2034{
2035}
2036#endif
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
2048{
2049 void *addr = slabp->s_mem - slabp->colouroff;
2050
2051 slab_destroy_debugcheck(cachep, slabp);
2052 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
2053 struct slab_rcu *slab_rcu;
2054
2055 slab_rcu = (struct slab_rcu *)slabp;
2056 slab_rcu->cachep = cachep;
2057 slab_rcu->addr = addr;
2058 call_rcu(&slab_rcu->head, kmem_rcu_free);
2059 } else {
2060 kmem_freepages(cachep, addr);
2061 if (OFF_SLAB(cachep))
2062 kmem_cache_free(cachep->slabp_cache, slabp);
2063 }
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079static size_t calculate_slab_order(struct kmem_cache *cachep,
2080 size_t size, size_t align, unsigned long flags)
2081{
2082 unsigned long offslab_limit;
2083 size_t left_over = 0;
2084 int gfporder;
2085
2086 for (gfporder = 0; gfporder <= KMALLOC_MAX_ORDER; gfporder++) {
2087 unsigned int num;
2088 size_t remainder;
2089
2090 cache_estimate(gfporder, size, align, flags, &remainder, &num);
2091 if (!num)
2092 continue;
2093
2094 if (flags & CFLGS_OFF_SLAB) {
2095
2096
2097
2098
2099
2100 offslab_limit = size - sizeof(struct slab);
2101 offslab_limit /= sizeof(kmem_bufctl_t);
2102
2103 if (num > offslab_limit)
2104 break;
2105 }
2106
2107
2108 cachep->num = num;
2109 cachep->gfporder = gfporder;
2110 left_over = remainder;
2111
2112
2113
2114
2115
2116
2117 if (flags & SLAB_RECLAIM_ACCOUNT)
2118 break;
2119
2120
2121
2122
2123
2124 if (gfporder >= slab_max_order)
2125 break;
2126
2127
2128
2129
2130 if (left_over * 8 <= (PAGE_SIZE << gfporder))
2131 break;
2132 }
2133 return left_over;
2134}
2135
2136static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2137{
2138 if (slab_state >= FULL)
2139 return enable_cpucache(cachep, gfp);
2140
2141 if (slab_state == DOWN) {
2142
2143
2144
2145
2146
2147 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2148 slab_state = PARTIAL;
2149 } else if (slab_state == PARTIAL) {
2150
2151
2152
2153
2154
2155 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2156
2157
2158
2159
2160
2161
2162 set_up_node(cachep, SIZE_AC);
2163 if (INDEX_AC == INDEX_NODE)
2164 slab_state = PARTIAL_NODE;
2165 else
2166 slab_state = PARTIAL_ARRAYCACHE;
2167 } else {
2168
2169 cachep->array[smp_processor_id()] =
2170 kmalloc(sizeof(struct arraycache_init), gfp);
2171
2172 if (slab_state == PARTIAL_ARRAYCACHE) {
2173 set_up_node(cachep, SIZE_NODE);
2174 slab_state = PARTIAL_NODE;
2175 } else {
2176 int node;
2177 for_each_online_node(node) {
2178 cachep->node[node] =
2179 kmalloc_node(sizeof(struct kmem_cache_node),
2180 gfp, node);
2181 BUG_ON(!cachep->node[node]);
2182 kmem_cache_node_init(cachep->node[node]);
2183 }
2184 }
2185 }
2186 cachep->node[numa_mem_id()]->next_reap =
2187 jiffies + REAPTIMEOUT_LIST3 +
2188 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2189
2190 cpu_cache_get(cachep)->avail = 0;
2191 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2192 cpu_cache_get(cachep)->batchcount = 1;
2193 cpu_cache_get(cachep)->touched = 0;
2194 cachep->batchcount = 1;
2195 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2196 return 0;
2197}
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220int
2221__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2222{
2223 size_t left_over, slab_size, ralign;
2224 gfp_t gfp;
2225 int err;
2226 size_t size = cachep->size;
2227
2228#if DEBUG
2229#if FORCED_DEBUG
2230
2231
2232
2233
2234
2235
2236 if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN +
2237 2 * sizeof(unsigned long long)))
2238 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
2239 if (!(flags & SLAB_DESTROY_BY_RCU))
2240 flags |= SLAB_POISON;
2241#endif
2242 if (flags & SLAB_DESTROY_BY_RCU)
2243 BUG_ON(flags & SLAB_POISON);
2244#endif
2245
2246
2247
2248
2249
2250
2251 if (size & (BYTES_PER_WORD - 1)) {
2252 size += (BYTES_PER_WORD - 1);
2253 size &= ~(BYTES_PER_WORD - 1);
2254 }
2255
2256
2257
2258
2259
2260
2261 if (flags & SLAB_STORE_USER)
2262 ralign = BYTES_PER_WORD;
2263
2264 if (flags & SLAB_RED_ZONE) {
2265 ralign = REDZONE_ALIGN;
2266
2267
2268 size += REDZONE_ALIGN - 1;
2269 size &= ~(REDZONE_ALIGN - 1);
2270 }
2271
2272
2273 if (ralign < cachep->align) {
2274 ralign = cachep->align;
2275 }
2276
2277 if (ralign > __alignof__(unsigned long long))
2278 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2279
2280
2281
2282 cachep->align = ralign;
2283
2284 if (slab_is_available())
2285 gfp = GFP_KERNEL;
2286 else
2287 gfp = GFP_NOWAIT;
2288
2289 setup_node_pointer(cachep);
2290#if DEBUG
2291
2292
2293
2294
2295
2296 if (flags & SLAB_RED_ZONE) {
2297
2298 cachep->obj_offset += sizeof(unsigned long long);
2299 size += 2 * sizeof(unsigned long long);
2300 }
2301 if (flags & SLAB_STORE_USER) {
2302
2303
2304
2305
2306 if (flags & SLAB_RED_ZONE)
2307 size += REDZONE_ALIGN;
2308 else
2309 size += BYTES_PER_WORD;
2310 }
2311#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2312 if (size >= kmalloc_size(INDEX_NODE + 1)
2313 && cachep->object_size > cache_line_size()
2314 && ALIGN(size, cachep->align) < PAGE_SIZE) {
2315 cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
2316 size = PAGE_SIZE;
2317 }
2318#endif
2319#endif
2320
2321
2322
2323
2324
2325
2326
2327 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2328 !(flags & SLAB_NOLEAKTRACE))
2329
2330
2331
2332
2333 flags |= CFLGS_OFF_SLAB;
2334
2335 size = ALIGN(size, cachep->align);
2336
2337 left_over = calculate_slab_order(cachep, size, cachep->align, flags);
2338
2339 if (!cachep->num)
2340 return -E2BIG;
2341
2342 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
2343 + sizeof(struct slab), cachep->align);
2344
2345
2346
2347
2348
2349 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
2350 flags &= ~CFLGS_OFF_SLAB;
2351 left_over -= slab_size;
2352 }
2353
2354 if (flags & CFLGS_OFF_SLAB) {
2355
2356 slab_size =
2357 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2358
2359#ifdef CONFIG_PAGE_POISONING
2360
2361
2362
2363
2364 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2365 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2366#endif
2367 }
2368
2369 cachep->colour_off = cache_line_size();
2370
2371 if (cachep->colour_off < cachep->align)
2372 cachep->colour_off = cachep->align;
2373 cachep->colour = left_over / cachep->colour_off;
2374 cachep->slab_size = slab_size;
2375 cachep->flags = flags;
2376 cachep->allocflags = 0;
2377 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2378 cachep->allocflags |= GFP_DMA;
2379 cachep->size = size;
2380 cachep->reciprocal_buffer_size = reciprocal_value(size);
2381
2382 if (flags & CFLGS_OFF_SLAB) {
2383 cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
2384
2385
2386
2387
2388
2389
2390
2391 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
2392 }
2393
2394 err = setup_cpu_cache(cachep, gfp);
2395 if (err) {
2396 __kmem_cache_shutdown(cachep);
2397 return err;
2398 }
2399
2400 if (flags & SLAB_DEBUG_OBJECTS) {
2401
2402
2403
2404
2405 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
2406
2407 slab_set_debugobj_lock_classes(cachep);
2408 } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
2409 on_slab_lock_classes(cachep);
2410
2411 return 0;
2412}
2413
2414#if DEBUG
2415static void check_irq_off(void)
2416{
2417 BUG_ON(!irqs_disabled());
2418}
2419
2420static void check_irq_on(void)
2421{
2422 BUG_ON(irqs_disabled());
2423}
2424
2425static void check_spinlock_acquired(struct kmem_cache *cachep)
2426{
2427#ifdef CONFIG_SMP
2428 check_irq_off();
2429 assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
2430#endif
2431}
2432
2433static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2434{
2435#ifdef CONFIG_SMP
2436 check_irq_off();
2437 assert_spin_locked(&cachep->node[node]->list_lock);
2438#endif
2439}
2440
2441#else
2442#define check_irq_off() do { } while(0)
2443#define check_irq_on() do { } while(0)
2444#define check_spinlock_acquired(x) do { } while(0)
2445#define check_spinlock_acquired_node(x, y) do { } while(0)
2446#endif
2447
2448static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
2449 struct array_cache *ac,
2450 int force, int node);
2451
2452static void do_drain(void *arg)
2453{
2454 struct kmem_cache *cachep = arg;
2455 struct array_cache *ac;
2456 int node = numa_mem_id();
2457
2458 check_irq_off();
2459 ac = cpu_cache_get(cachep);
2460 spin_lock(&cachep->node[node]->list_lock);
2461 free_block(cachep, ac->entry, ac->avail, node);
2462 spin_unlock(&cachep->node[node]->list_lock);
2463 ac->avail = 0;
2464}
2465
2466static void drain_cpu_caches(struct kmem_cache *cachep)
2467{
2468 struct kmem_cache_node *n;
2469 int node;
2470
2471 on_each_cpu(do_drain, cachep, 1);
2472 check_irq_on();
2473 for_each_online_node(node) {
2474 n = cachep->node[node];
2475 if (n && n->alien)
2476 drain_alien_cache(cachep, n->alien);
2477 }
2478
2479 for_each_online_node(node) {
2480 n = cachep->node[node];
2481 if (n)
2482 drain_array(cachep, n, n->shared, 1, node);
2483 }
2484}
2485
2486
2487
2488
2489
2490
2491
2492static int drain_freelist(struct kmem_cache *cache,
2493 struct kmem_cache_node *n, int tofree)
2494{
2495 struct list_head *p;
2496 int nr_freed;
2497 struct slab *slabp;
2498
2499 nr_freed = 0;
2500 while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
2501
2502 spin_lock_irq(&n->list_lock);
2503 p = n->slabs_free.prev;
2504 if (p == &n->slabs_free) {
2505 spin_unlock_irq(&n->list_lock);
2506 goto out;
2507 }
2508
2509 slabp = list_entry(p, struct slab, list);
2510#if DEBUG
2511 BUG_ON(slabp->inuse);
2512#endif
2513 list_del(&slabp->list);
2514
2515
2516
2517
2518 n->free_objects -= cache->num;
2519 spin_unlock_irq(&n->list_lock);
2520 slab_destroy(cache, slabp);
2521 nr_freed++;
2522 }
2523out:
2524 return nr_freed;
2525}
2526
2527
2528static int __cache_shrink(struct kmem_cache *cachep)
2529{
2530 int ret = 0, i = 0;
2531 struct kmem_cache_node *n;
2532
2533 drain_cpu_caches(cachep);
2534
2535 check_irq_on();
2536 for_each_online_node(i) {
2537 n = cachep->node[i];
2538 if (!n)
2539 continue;
2540
2541 drain_freelist(cachep, n, slabs_tofree(cachep, n));
2542
2543 ret += !list_empty(&n->slabs_full) ||
2544 !list_empty(&n->slabs_partial);
2545 }
2546 return (ret ? 1 : 0);
2547}
2548
2549
2550
2551
2552
2553
2554
2555
2556int kmem_cache_shrink(struct kmem_cache *cachep)
2557{
2558 int ret;
2559 BUG_ON(!cachep || in_interrupt());
2560
2561 get_online_cpus();
2562 mutex_lock(&slab_mutex);
2563 ret = __cache_shrink(cachep);
2564 mutex_unlock(&slab_mutex);
2565 put_online_cpus();
2566 return ret;
2567}
2568EXPORT_SYMBOL(kmem_cache_shrink);
2569
2570int __kmem_cache_shutdown(struct kmem_cache *cachep)
2571{
2572 int i;
2573 struct kmem_cache_node *n;
2574 int rc = __cache_shrink(cachep);
2575
2576 if (rc)
2577 return rc;
2578
2579 for_each_online_cpu(i)
2580 kfree(cachep->array[i]);
2581
2582
2583 for_each_online_node(i) {
2584 n = cachep->node[i];
2585 if (n) {
2586 kfree(n->shared);
2587 free_alien_cache(n->alien);
2588 kfree(n);
2589 }
2590 }
2591 return 0;
2592}
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2606 int colour_off, gfp_t local_flags,
2607 int nodeid)
2608{
2609 struct slab *slabp;
2610
2611 if (OFF_SLAB(cachep)) {
2612
2613 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2614 local_flags, nodeid);
2615
2616
2617
2618
2619
2620
2621 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2622 local_flags);
2623 if (!slabp)
2624 return NULL;
2625 } else {
2626 slabp = objp + colour_off;
2627 colour_off += cachep->slab_size;
2628 }
2629 slabp->inuse = 0;
2630 slabp->colouroff = colour_off;
2631 slabp->s_mem = objp + colour_off;
2632 slabp->nodeid = nodeid;
2633 slabp->free = 0;
2634 return slabp;
2635}
2636
2637static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
2638{
2639 return (kmem_bufctl_t *) (slabp + 1);
2640}
2641
2642static void cache_init_objs(struct kmem_cache *cachep,
2643 struct slab *slabp)
2644{
2645 int i;
2646
2647 for (i = 0; i < cachep->num; i++) {
2648 void *objp = index_to_obj(cachep, slabp, i);
2649#if DEBUG
2650
2651 if (cachep->flags & SLAB_POISON)
2652 poison_obj(cachep, objp, POISON_FREE);
2653 if (cachep->flags & SLAB_STORE_USER)
2654 *dbg_userword(cachep, objp) = NULL;
2655
2656 if (cachep->flags & SLAB_RED_ZONE) {
2657 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2658 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2659 }
2660
2661
2662
2663
2664
2665 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2666 cachep->ctor(objp + obj_offset(cachep));
2667
2668 if (cachep->flags & SLAB_RED_ZONE) {
2669 if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
2670 slab_error(cachep, "constructor overwrote the"
2671 " end of an object");
2672 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
2673 slab_error(cachep, "constructor overwrote the"
2674 " start of an object");
2675 }
2676 if ((cachep->size % PAGE_SIZE) == 0 &&
2677 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2678 kernel_map_pages(virt_to_page(objp),
2679 cachep->size / PAGE_SIZE, 0);
2680#else
2681 if (cachep->ctor)
2682 cachep->ctor(objp);
2683#endif
2684 slab_bufctl(slabp)[i] = i + 1;
2685 }
2686 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2687}
2688
2689static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2690{
2691 if (CONFIG_ZONE_DMA_FLAG) {
2692 if (flags & GFP_DMA)
2693 BUG_ON(!(cachep->allocflags & GFP_DMA));
2694 else
2695 BUG_ON(cachep->allocflags & GFP_DMA);
2696 }
2697}
2698
2699static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2700 int nodeid)
2701{
2702 void *objp = index_to_obj(cachep, slabp, slabp->free);
2703 kmem_bufctl_t next;
2704
2705 slabp->inuse++;
2706 next = slab_bufctl(slabp)[slabp->free];
2707#if DEBUG
2708 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
2709 WARN_ON(slabp->nodeid != nodeid);
2710#endif
2711 slabp->free = next;
2712
2713 return objp;
2714}
2715
2716static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2717 void *objp, int nodeid)
2718{
2719 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2720
2721#if DEBUG
2722
2723 WARN_ON(slabp->nodeid != nodeid);
2724
2725 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
2726 printk(KERN_ERR "slab: double free detected in cache "
2727 "'%s', objp %p\n", cachep->name, objp);
2728 BUG();
2729 }
2730#endif
2731 slab_bufctl(slabp)[objnr] = slabp->free;
2732 slabp->free = objnr;
2733 slabp->inuse--;
2734}
2735
2736
2737
2738
2739
2740
2741static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2742 void *addr)
2743{
2744 int nr_pages;
2745 struct page *page;
2746
2747 page = virt_to_page(addr);
2748
2749 nr_pages = 1;
2750 if (likely(!PageCompound(page)))
2751 nr_pages <<= cache->gfporder;
2752
2753 do {
2754 page->slab_cache = cache;
2755 page->slab_page = slab;
2756 page++;
2757 } while (--nr_pages);
2758}
2759
2760
2761
2762
2763
2764static int cache_grow(struct kmem_cache *cachep,
2765 gfp_t flags, int nodeid, void *objp)
2766{
2767 struct slab *slabp;
2768 size_t offset;
2769 gfp_t local_flags;
2770 struct kmem_cache_node *n;
2771
2772
2773
2774
2775
2776 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2777 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2778
2779
2780 check_irq_off();
2781 n = cachep->node[nodeid];
2782 spin_lock(&n->list_lock);
2783
2784
2785 offset = n->colour_next;
2786 n->colour_next++;
2787 if (n->colour_next >= cachep->colour)
2788 n->colour_next = 0;
2789 spin_unlock(&n->list_lock);
2790
2791 offset *= cachep->colour_off;
2792
2793 if (local_flags & __GFP_WAIT)
2794 local_irq_enable();
2795
2796
2797
2798
2799
2800
2801
2802 kmem_flagcheck(cachep, flags);
2803
2804
2805
2806
2807
2808 if (!objp)
2809 objp = kmem_getpages(cachep, local_flags, nodeid);
2810 if (!objp)
2811 goto failed;
2812
2813
2814 slabp = alloc_slabmgmt(cachep, objp, offset,
2815 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2816 if (!slabp)
2817 goto opps1;
2818
2819 slab_map_pages(cachep, slabp, objp);
2820
2821 cache_init_objs(cachep, slabp);
2822
2823 if (local_flags & __GFP_WAIT)
2824 local_irq_disable();
2825 check_irq_off();
2826 spin_lock(&n->list_lock);
2827
2828
2829 list_add_tail(&slabp->list, &(n->slabs_free));
2830 STATS_INC_GROWN(cachep);
2831 n->free_objects += cachep->num;
2832 spin_unlock(&n->list_lock);
2833 return 1;
2834opps1:
2835 kmem_freepages(cachep, objp);
2836failed:
2837 if (local_flags & __GFP_WAIT)
2838 local_irq_disable();
2839 return 0;
2840}
2841
2842#if DEBUG
2843
2844
2845
2846
2847
2848
2849static void kfree_debugcheck(const void *objp)
2850{
2851 if (!virt_addr_valid(objp)) {
2852 printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
2853 (unsigned long)objp);
2854 BUG();
2855 }
2856}
2857
2858static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2859{
2860 unsigned long long redzone1, redzone2;
2861
2862 redzone1 = *dbg_redzone1(cache, obj);
2863 redzone2 = *dbg_redzone2(cache, obj);
2864
2865
2866
2867
2868 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2869 return;
2870
2871 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2872 slab_error(cache, "double free detected");
2873 else
2874 slab_error(cache, "memory outside object was overwritten");
2875
2876 printk(KERN_ERR "%p: redzone 1:0x%llx, redzone 2:0x%llx.\n",
2877 obj, redzone1, redzone2);
2878}
2879
2880static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2881 unsigned long caller)
2882{
2883 struct page *page;
2884 unsigned int objnr;
2885 struct slab *slabp;
2886
2887 BUG_ON(virt_to_cache(objp) != cachep);
2888
2889 objp -= obj_offset(cachep);
2890 kfree_debugcheck(objp);
2891 page = virt_to_head_page(objp);
2892
2893 slabp = page->slab_page;
2894
2895 if (cachep->flags & SLAB_RED_ZONE) {
2896 verify_redzone_free(cachep, objp);
2897 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2898 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2899 }
2900 if (cachep->flags & SLAB_STORE_USER)
2901 *dbg_userword(cachep, objp) = (void *)caller;
2902
2903 objnr = obj_to_index(cachep, slabp, objp);
2904
2905 BUG_ON(objnr >= cachep->num);
2906 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2907
2908#ifdef CONFIG_DEBUG_SLAB_LEAK
2909 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2910#endif
2911 if (cachep->flags & SLAB_POISON) {
2912#ifdef CONFIG_DEBUG_PAGEALLOC
2913 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2914 store_stackinfo(cachep, objp, caller);
2915 kernel_map_pages(virt_to_page(objp),
2916 cachep->size / PAGE_SIZE, 0);
2917 } else {
2918 poison_obj(cachep, objp, POISON_FREE);
2919 }
2920#else
2921 poison_obj(cachep, objp, POISON_FREE);
2922#endif
2923 }
2924 return objp;
2925}
2926
2927static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2928{
2929 kmem_bufctl_t i;
2930 int entries = 0;
2931
2932
2933 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2934 entries++;
2935 if (entries > cachep->num || i >= cachep->num)
2936 goto bad;
2937 }
2938 if (entries != cachep->num - slabp->inuse) {
2939bad:
2940 printk(KERN_ERR "slab: Internal list corruption detected in "
2941 "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
2942 cachep->name, cachep->num, slabp, slabp->inuse,
2943 print_tainted());
2944 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
2945 sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
2946 1);
2947 BUG();
2948 }
2949}
2950#else
2951#define kfree_debugcheck(x) do { } while(0)
2952#define cache_free_debugcheck(x,objp,z) (objp)
2953#define check_slabp(x,y) do { } while(0)
2954#endif
2955
2956static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
2957 bool force_refill)
2958{
2959 int batchcount;
2960 struct kmem_cache_node *n;
2961 struct array_cache *ac;
2962 int node;
2963
2964 check_irq_off();
2965 node = numa_mem_id();
2966 if (unlikely(force_refill))
2967 goto force_grow;
2968retry:
2969 ac = cpu_cache_get(cachep);
2970 batchcount = ac->batchcount;
2971 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2972
2973
2974
2975
2976
2977 batchcount = BATCHREFILL_LIMIT;
2978 }
2979 n = cachep->node[node];
2980
2981 BUG_ON(ac->avail > 0 || !n);
2982 spin_lock(&n->list_lock);
2983
2984
2985 if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
2986 n->shared->touched = 1;
2987 goto alloc_done;
2988 }
2989
2990 while (batchcount > 0) {
2991 struct list_head *entry;
2992 struct slab *slabp;
2993
2994 entry = n->slabs_partial.next;
2995 if (entry == &n->slabs_partial) {
2996 n->free_touched = 1;
2997 entry = n->slabs_free.next;
2998 if (entry == &n->slabs_free)
2999 goto must_grow;
3000 }
3001
3002 slabp = list_entry(entry, struct slab, list);
3003 check_slabp(cachep, slabp);
3004 check_spinlock_acquired(cachep);
3005
3006
3007
3008
3009
3010
3011 BUG_ON(slabp->inuse >= cachep->num);
3012
3013 while (slabp->inuse < cachep->num && batchcount--) {
3014 STATS_INC_ALLOCED(cachep);
3015 STATS_INC_ACTIVE(cachep);
3016 STATS_SET_HIGH(cachep);
3017
3018 ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
3019 node));
3020 }
3021 check_slabp(cachep, slabp);
3022
3023
3024 list_del(&slabp->list);
3025 if (slabp->free == BUFCTL_END)
3026 list_add(&slabp->list, &n->slabs_full);
3027 else
3028 list_add(&slabp->list, &n->slabs_partial);
3029 }
3030
3031must_grow:
3032 n->free_objects -= ac->avail;
3033alloc_done:
3034 spin_unlock(&n->list_lock);
3035
3036 if (unlikely(!ac->avail)) {
3037 int x;
3038force_grow:
3039 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3040
3041
3042 ac = cpu_cache_get(cachep);
3043 node = numa_mem_id();
3044
3045
3046 if (!x && (ac->avail == 0 || force_refill))
3047 return NULL;
3048
3049 if (!ac->avail)
3050 goto retry;
3051 }
3052 ac->touched = 1;
3053
3054 return ac_get_obj(cachep, ac, flags, force_refill);
3055}
3056
3057static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
3058 gfp_t flags)
3059{
3060 might_sleep_if(flags & __GFP_WAIT);
3061#if DEBUG
3062 kmem_flagcheck(cachep, flags);
3063#endif
3064}
3065
3066#if DEBUG
3067static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3068 gfp_t flags, void *objp, unsigned long caller)
3069{
3070 if (!objp)
3071 return objp;
3072 if (cachep->flags & SLAB_POISON) {
3073#ifdef CONFIG_DEBUG_PAGEALLOC
3074 if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
3075 kernel_map_pages(virt_to_page(objp),
3076 cachep->size / PAGE_SIZE, 1);
3077 else
3078 check_poison_obj(cachep, objp);
3079#else
3080 check_poison_obj(cachep, objp);
3081#endif
3082 poison_obj(cachep, objp, POISON_INUSE);
3083 }
3084 if (cachep->flags & SLAB_STORE_USER)
3085 *dbg_userword(cachep, objp) = (void *)caller;
3086
3087 if (cachep->flags & SLAB_RED_ZONE) {
3088 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
3089 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
3090 slab_error(cachep, "double free, or memory outside"
3091 " object was overwritten");
3092 printk(KERN_ERR
3093 "%p: redzone 1:0x%llx, redzone 2:0x%llx\n",
3094 objp, *dbg_redzone1(cachep, objp),
3095 *dbg_redzone2(cachep, objp));
3096 }
3097 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3098 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3099 }
3100#ifdef CONFIG_DEBUG_SLAB_LEAK
3101 {
3102 struct slab *slabp;
3103 unsigned objnr;
3104
3105 slabp = virt_to_head_page(objp)->slab_page;
3106 objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
3107 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3108 }
3109#endif
3110 objp += obj_offset(cachep);
3111 if (cachep->ctor && cachep->flags & SLAB_POISON)
3112 cachep->ctor(objp);
3113 if (ARCH_SLAB_MINALIGN &&
3114 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
3115 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3116 objp, (int)ARCH_SLAB_MINALIGN);
3117 }
3118 return objp;
3119}
3120#else
3121#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
3122#endif
3123
3124static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3125{
3126 if (cachep == kmem_cache)
3127 return false;
3128
3129 return should_failslab(cachep->object_size, flags, cachep->flags);
3130}
3131
3132static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3133{
3134 void *objp;
3135 struct array_cache *ac;
3136 bool force_refill = false;
3137
3138 check_irq_off();
3139
3140 ac = cpu_cache_get(cachep);
3141 if (likely(ac->avail)) {
3142 ac->touched = 1;
3143 objp = ac_get_obj(cachep, ac, flags, false);
3144
3145
3146
3147
3148
3149 if (objp) {
3150 STATS_INC_ALLOCHIT(cachep);
3151 goto out;
3152 }
3153 force_refill = true;
3154 }
3155
3156 STATS_INC_ALLOCMISS(cachep);
3157 objp = cache_alloc_refill(cachep, flags, force_refill);
3158
3159
3160
3161
3162 ac = cpu_cache_get(cachep);
3163
3164out:
3165
3166
3167
3168
3169
3170 if (objp)
3171 kmemleak_erase(&ac->entry[ac->avail]);
3172 return objp;
3173}
3174
3175#ifdef CONFIG_NUMA
3176
3177
3178
3179
3180
3181
3182static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3183{
3184 int nid_alloc, nid_here;
3185
3186 if (in_interrupt() || (flags & __GFP_THISNODE))
3187 return NULL;
3188 nid_alloc = nid_here = numa_mem_id();
3189 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
3190 nid_alloc = cpuset_slab_spread_node();
3191 else if (current->mempolicy)
3192 nid_alloc = slab_node();
3193 if (nid_alloc != nid_here)
3194 return ____cache_alloc_node(cachep, flags, nid_alloc);
3195 return NULL;
3196}
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
3207{
3208 struct zonelist *zonelist;
3209 gfp_t local_flags;
3210 struct zoneref *z;
3211 struct zone *zone;
3212 enum zone_type high_zoneidx = gfp_zone(flags);
3213 void *obj = NULL;
3214 int nid;
3215 unsigned int cpuset_mems_cookie;
3216
3217 if (flags & __GFP_THISNODE)
3218 return NULL;
3219
3220 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
3221
3222retry_cpuset:
3223 cpuset_mems_cookie = get_mems_allowed();
3224 zonelist = node_zonelist(slab_node(), flags);
3225
3226retry:
3227
3228
3229
3230
3231 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
3232 nid = zone_to_nid(zone);
3233
3234 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3235 cache->node[nid] &&
3236 cache->node[nid]->free_objects) {
3237 obj = ____cache_alloc_node(cache,
3238 flags | GFP_THISNODE, nid);
3239 if (obj)
3240 break;
3241 }
3242 }
3243
3244 if (!obj) {
3245
3246
3247
3248
3249
3250
3251 if (local_flags & __GFP_WAIT)
3252 local_irq_enable();
3253 kmem_flagcheck(cache, flags);
3254 obj = kmem_getpages(cache, local_flags, numa_mem_id());
3255 if (local_flags & __GFP_WAIT)
3256 local_irq_disable();
3257 if (obj) {
3258
3259
3260
3261 nid = page_to_nid(virt_to_page(obj));
3262 if (cache_grow(cache, flags, nid, obj)) {
3263 obj = ____cache_alloc_node(cache,
3264 flags | GFP_THISNODE, nid);
3265 if (!obj)
3266
3267
3268
3269
3270
3271 goto retry;
3272 } else {
3273
3274 obj = NULL;
3275 }
3276 }
3277 }
3278
3279 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
3280 goto retry_cpuset;
3281 return obj;
3282}
3283
3284
3285
3286
3287static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3288 int nodeid)
3289{
3290 struct list_head *entry;
3291 struct slab *slabp;
3292 struct kmem_cache_node *n;
3293 void *obj;
3294 int x;
3295
3296 VM_BUG_ON(nodeid > num_online_nodes());
3297 n = cachep->node[nodeid];
3298 BUG_ON(!n);
3299
3300retry:
3301 check_irq_off();
3302 spin_lock(&n->list_lock);
3303 entry = n->slabs_partial.next;
3304 if (entry == &n->slabs_partial) {
3305 n->free_touched = 1;
3306 entry = n->slabs_free.next;
3307 if (entry == &n->slabs_free)
3308 goto must_grow;
3309 }
3310
3311 slabp = list_entry(entry, struct slab, list);
3312 check_spinlock_acquired_node(cachep, nodeid);
3313 check_slabp(cachep, slabp);
3314
3315 STATS_INC_NODEALLOCS(cachep);
3316 STATS_INC_ACTIVE(cachep);
3317 STATS_SET_HIGH(cachep);
3318
3319 BUG_ON(slabp->inuse == cachep->num);
3320
3321 obj = slab_get_obj(cachep, slabp, nodeid);
3322 check_slabp(cachep, slabp);
3323 n->free_objects--;
3324
3325 list_del(&slabp->list);
3326
3327 if (slabp->free == BUFCTL_END)
3328 list_add(&slabp->list, &n->slabs_full);
3329 else
3330 list_add(&slabp->list, &n->slabs_partial);
3331
3332 spin_unlock(&n->list_lock);
3333 goto done;
3334
3335must_grow:
3336 spin_unlock(&n->list_lock);
3337 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3338 if (x)
3339 goto retry;
3340
3341 return fallback_alloc(cachep, flags);
3342
3343done:
3344 return obj;
3345}
3346
3347static __always_inline void *
3348slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3349 unsigned long caller)
3350{
3351 unsigned long save_flags;
3352 void *ptr;
3353 int slab_node = numa_mem_id();
3354
3355 flags &= gfp_allowed_mask;
3356
3357 lockdep_trace_alloc(flags);
3358
3359 if (slab_should_failslab(cachep, flags))
3360 return NULL;
3361
3362 cachep = memcg_kmem_get_cache(cachep, flags);
3363
3364 cache_alloc_debugcheck_before(cachep, flags);
3365 local_irq_save(save_flags);
3366
3367 if (nodeid == NUMA_NO_NODE)
3368 nodeid = slab_node;
3369
3370 if (unlikely(!cachep->node[nodeid])) {
3371
3372 ptr = fallback_alloc(cachep, flags);
3373 goto out;
3374 }
3375
3376 if (nodeid == slab_node) {
3377
3378
3379
3380
3381
3382
3383 ptr = ____cache_alloc(cachep, flags);
3384 if (ptr)
3385 goto out;
3386 }
3387
3388 ptr = ____cache_alloc_node(cachep, flags, nodeid);
3389 out:
3390 local_irq_restore(save_flags);
3391 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3392 kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
3393 flags);
3394
3395 if (likely(ptr))
3396 kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
3397
3398 if (unlikely((flags & __GFP_ZERO) && ptr))
3399 memset(ptr, 0, cachep->object_size);
3400
3401 return ptr;
3402}
3403
3404static __always_inline void *
3405__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3406{
3407 void *objp;
3408
3409 if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
3410 objp = alternate_node_alloc(cache, flags);
3411 if (objp)
3412 goto out;
3413 }
3414 objp = ____cache_alloc(cache, flags);
3415
3416
3417
3418
3419
3420 if (!objp)
3421 objp = ____cache_alloc_node(cache, flags, numa_mem_id());
3422
3423 out:
3424 return objp;
3425}
3426#else
3427
3428static __always_inline void *
3429__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3430{
3431 return ____cache_alloc(cachep, flags);
3432}
3433
3434#endif
3435
3436static __always_inline void *
3437slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
3438{
3439 unsigned long save_flags;
3440 void *objp;
3441
3442 flags &= gfp_allowed_mask;
3443
3444 lockdep_trace_alloc(flags);
3445
3446 if (slab_should_failslab(cachep, flags))
3447 return NULL;
3448
3449 cachep = memcg_kmem_get_cache(cachep, flags);
3450
3451 cache_alloc_debugcheck_before(cachep, flags);
3452 local_irq_save(save_flags);
3453 objp = __do_cache_alloc(cachep, flags);
3454 local_irq_restore(save_flags);
3455 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3456 kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
3457 flags);
3458 prefetchw(objp);
3459
3460 if (likely(objp))
3461 kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
3462
3463 if (unlikely((flags & __GFP_ZERO) && objp))
3464 memset(objp, 0, cachep->object_size);
3465
3466 return objp;
3467}
3468
3469
3470
3471
3472static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3473 int node)
3474{
3475 int i;
3476 struct kmem_cache_node *n;
3477
3478 for (i = 0; i < nr_objects; i++) {
3479 void *objp;
3480 struct slab *slabp;
3481
3482 clear_obj_pfmemalloc(&objpp[i]);
3483 objp = objpp[i];
3484
3485 slabp = virt_to_slab(objp);
3486 n = cachep->node[node];
3487 list_del(&slabp->list);
3488 check_spinlock_acquired_node(cachep, node);
3489 check_slabp(cachep, slabp);
3490 slab_put_obj(cachep, slabp, objp, node);
3491 STATS_DEC_ACTIVE(cachep);
3492 n->free_objects++;
3493 check_slabp(cachep, slabp);
3494
3495
3496 if (slabp->inuse == 0) {
3497 if (n->free_objects > n->free_limit) {
3498 n->free_objects -= cachep->num;
3499
3500
3501
3502
3503
3504
3505 slab_destroy(cachep, slabp);
3506 } else {
3507 list_add(&slabp->list, &n->slabs_free);
3508 }
3509 } else {
3510
3511
3512
3513
3514 list_add_tail(&slabp->list, &n->slabs_partial);
3515 }
3516 }
3517}
3518
3519static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3520{
3521 int batchcount;
3522 struct kmem_cache_node *n;
3523 int node = numa_mem_id();
3524
3525 batchcount = ac->batchcount;
3526#if DEBUG
3527 BUG_ON(!batchcount || batchcount > ac->avail);
3528#endif
3529 check_irq_off();
3530 n = cachep->node[node];
3531 spin_lock(&n->list_lock);
3532 if (n->shared) {
3533 struct array_cache *shared_array = n->shared;
3534 int max = shared_array->limit - shared_array->avail;
3535 if (max) {
3536 if (batchcount > max)
3537 batchcount = max;
3538 memcpy(&(shared_array->entry[shared_array->avail]),
3539 ac->entry, sizeof(void *) * batchcount);
3540 shared_array->avail += batchcount;
3541 goto free_done;
3542 }
3543 }
3544
3545 free_block(cachep, ac->entry, batchcount, node);
3546free_done:
3547#if STATS
3548 {
3549 int i = 0;
3550 struct list_head *p;
3551
3552 p = n->slabs_free.next;
3553 while (p != &(n->slabs_free)) {
3554 struct slab *slabp;
3555
3556 slabp = list_entry(p, struct slab, list);
3557 BUG_ON(slabp->inuse);
3558
3559 i++;
3560 p = p->next;
3561 }
3562 STATS_SET_FREEABLE(cachep, i);
3563 }
3564#endif
3565 spin_unlock(&n->list_lock);
3566 ac->avail -= batchcount;
3567 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3568}
3569
3570
3571
3572
3573
3574static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3575 unsigned long caller)
3576{
3577 struct array_cache *ac = cpu_cache_get(cachep);
3578
3579 check_irq_off();
3580 kmemleak_free_recursive(objp, cachep->flags);
3581 objp = cache_free_debugcheck(cachep, objp, caller);
3582
3583 kmemcheck_slab_free(cachep, objp, cachep->object_size);
3584
3585
3586
3587
3588
3589
3590
3591
3592 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3593 return;
3594
3595 if (likely(ac->avail < ac->limit)) {
3596 STATS_INC_FREEHIT(cachep);
3597 } else {
3598 STATS_INC_FREEMISS(cachep);
3599 cache_flusharray(cachep, ac);
3600 }
3601
3602 ac_put_obj(cachep, ac, objp);
3603}
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3614{
3615 void *ret = slab_alloc(cachep, flags, _RET_IP_);
3616
3617 trace_kmem_cache_alloc(_RET_IP_, ret,
3618 cachep->object_size, cachep->size, flags);
3619
3620 return ret;
3621}
3622EXPORT_SYMBOL(kmem_cache_alloc);
3623
3624#ifdef CONFIG_TRACING
3625void *
3626kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
3627{
3628 void *ret;
3629
3630 ret = slab_alloc(cachep, flags, _RET_IP_);
3631
3632 trace_kmalloc(_RET_IP_, ret,
3633 size, cachep->size, flags);
3634 return ret;
3635}
3636EXPORT_SYMBOL(kmem_cache_alloc_trace);
3637#endif
3638
3639#ifdef CONFIG_NUMA
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3652{
3653 void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
3654
3655 trace_kmem_cache_alloc_node(_RET_IP_, ret,
3656 cachep->object_size, cachep->size,
3657 flags, nodeid);
3658
3659 return ret;
3660}
3661EXPORT_SYMBOL(kmem_cache_alloc_node);
3662
3663#ifdef CONFIG_TRACING
3664void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
3665 gfp_t flags,
3666 int nodeid,
3667 size_t size)
3668{
3669 void *ret;
3670
3671 ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
3672
3673 trace_kmalloc_node(_RET_IP_, ret,
3674 size, cachep->size,
3675 flags, nodeid);
3676 return ret;
3677}
3678EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3679#endif
3680
3681static __always_inline void *
3682__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
3683{
3684 struct kmem_cache *cachep;
3685
3686 cachep = kmalloc_slab(size, flags);
3687 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3688 return cachep;
3689 return kmem_cache_alloc_node_trace(cachep, flags, node, size);
3690}
3691
3692#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3693void *__kmalloc_node(size_t size, gfp_t flags, int node)
3694{
3695 return __do_kmalloc_node(size, flags, node, _RET_IP_);
3696}
3697EXPORT_SYMBOL(__kmalloc_node);
3698
3699void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
3700 int node, unsigned long caller)
3701{
3702 return __do_kmalloc_node(size, flags, node, caller);
3703}
3704EXPORT_SYMBOL(__kmalloc_node_track_caller);
3705#else
3706void *__kmalloc_node(size_t size, gfp_t flags, int node)
3707{
3708 return __do_kmalloc_node(size, flags, node, 0);
3709}
3710EXPORT_SYMBOL(__kmalloc_node);
3711#endif
3712#endif
3713
3714
3715
3716
3717
3718
3719
3720static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3721 unsigned long caller)
3722{
3723 struct kmem_cache *cachep;
3724 void *ret;
3725
3726
3727
3728
3729
3730
3731 cachep = kmalloc_slab(size, flags);
3732 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3733 return cachep;
3734 ret = slab_alloc(cachep, flags, caller);
3735
3736 trace_kmalloc(caller, ret,
3737 size, cachep->size, flags);
3738
3739 return ret;
3740}
3741
3742
3743#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3744void *__kmalloc(size_t size, gfp_t flags)
3745{
3746 return __do_kmalloc(size, flags, _RET_IP_);
3747}
3748EXPORT_SYMBOL(__kmalloc);
3749
3750void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
3751{
3752 return __do_kmalloc(size, flags, caller);
3753}
3754EXPORT_SYMBOL(__kmalloc_track_caller);
3755
3756#else
3757void *__kmalloc(size_t size, gfp_t flags)
3758{
3759 return __do_kmalloc(size, flags, 0);
3760}
3761EXPORT_SYMBOL(__kmalloc);
3762#endif
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3773{
3774 unsigned long flags;
3775 cachep = cache_from_obj(cachep, objp);
3776 if (!cachep)
3777 return;
3778
3779 local_irq_save(flags);
3780 debug_check_no_locks_freed(objp, cachep->object_size);
3781 if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
3782 debug_check_no_obj_freed(objp, cachep->object_size);
3783 __cache_free(cachep, objp, _RET_IP_);
3784 local_irq_restore(flags);
3785
3786 trace_kmem_cache_free(_RET_IP_, objp);
3787}
3788EXPORT_SYMBOL(kmem_cache_free);
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799void kfree(const void *objp)
3800{
3801 struct kmem_cache *c;
3802 unsigned long flags;
3803
3804 trace_kfree(_RET_IP_, objp);
3805
3806 if (unlikely(ZERO_OR_NULL_PTR(objp)))
3807 return;
3808 local_irq_save(flags);
3809 kfree_debugcheck(objp);
3810 c = virt_to_cache(objp);
3811 debug_check_no_locks_freed(objp, c->object_size);
3812
3813 debug_check_no_obj_freed(objp, c->object_size);
3814 __cache_free(c, (void *)objp, _RET_IP_);
3815 local_irq_restore(flags);
3816}
3817EXPORT_SYMBOL(kfree);
3818
3819
3820
3821
3822static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3823{
3824 int node;
3825 struct kmem_cache_node *n;
3826 struct array_cache *new_shared;
3827 struct array_cache **new_alien = NULL;
3828
3829 for_each_online_node(node) {
3830
3831 if (use_alien_caches) {
3832 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3833 if (!new_alien)
3834 goto fail;
3835 }
3836
3837 new_shared = NULL;
3838 if (cachep->shared) {
3839 new_shared = alloc_arraycache(node,
3840 cachep->shared*cachep->batchcount,
3841 0xbaadf00d, gfp);
3842 if (!new_shared) {
3843 free_alien_cache(new_alien);
3844 goto fail;
3845 }
3846 }
3847
3848 n = cachep->node[node];
3849 if (n) {
3850 struct array_cache *shared = n->shared;
3851
3852 spin_lock_irq(&n->list_lock);
3853
3854 if (shared)
3855 free_block(cachep, shared->entry,
3856 shared->avail, node);
3857
3858 n->shared = new_shared;
3859 if (!n->alien) {
3860 n->alien = new_alien;
3861 new_alien = NULL;
3862 }
3863 n->free_limit = (1 + nr_cpus_node(node)) *
3864 cachep->batchcount + cachep->num;
3865 spin_unlock_irq(&n->list_lock);
3866 kfree(shared);
3867 free_alien_cache(new_alien);
3868 continue;
3869 }
3870 n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
3871 if (!n) {
3872 free_alien_cache(new_alien);
3873 kfree(new_shared);
3874 goto fail;
3875 }
3876
3877 kmem_cache_node_init(n);
3878 n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3879 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3880 n->shared = new_shared;
3881 n->alien = new_alien;
3882 n->free_limit = (1 + nr_cpus_node(node)) *
3883 cachep->batchcount + cachep->num;
3884 cachep->node[node] = n;
3885 }
3886 return 0;
3887
3888fail:
3889 if (!cachep->list.next) {
3890
3891 node--;
3892 while (node >= 0) {
3893 if (cachep->node[node]) {
3894 n = cachep->node[node];
3895
3896 kfree(n->shared);
3897 free_alien_cache(n->alien);
3898 kfree(n);
3899 cachep->node[node] = NULL;
3900 }
3901 node--;
3902 }
3903 }
3904 return -ENOMEM;
3905}
3906
3907struct ccupdate_struct {
3908 struct kmem_cache *cachep;
3909 struct array_cache *new[0];
3910};
3911
3912static void do_ccupdate_local(void *info)
3913{
3914 struct ccupdate_struct *new = info;
3915 struct array_cache *old;
3916
3917 check_irq_off();
3918 old = cpu_cache_get(new->cachep);
3919
3920 new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3921 new->new[smp_processor_id()] = old;
3922}
3923
3924
3925static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
3926 int batchcount, int shared, gfp_t gfp)
3927{
3928 struct ccupdate_struct *new;
3929 int i;
3930
3931 new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
3932 gfp);
3933 if (!new)
3934 return -ENOMEM;
3935
3936 for_each_online_cpu(i) {
3937 new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
3938 batchcount, gfp);
3939 if (!new->new[i]) {
3940 for (i--; i >= 0; i--)
3941 kfree(new->new[i]);
3942 kfree(new);
3943 return -ENOMEM;
3944 }
3945 }
3946 new->cachep = cachep;
3947
3948 on_each_cpu(do_ccupdate_local, (void *)new, 1);
3949
3950 check_irq_on();
3951 cachep->batchcount = batchcount;
3952 cachep->limit = limit;
3953 cachep->shared = shared;
3954
3955 for_each_online_cpu(i) {
3956 struct array_cache *ccold = new->new[i];
3957 if (!ccold)
3958 continue;
3959 spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
3960 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
3961 spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
3962 kfree(ccold);
3963 }
3964 kfree(new);
3965 return alloc_kmemlist(cachep, gfp);
3966}
3967
3968static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3969 int batchcount, int shared, gfp_t gfp)
3970{
3971 int ret;
3972 struct kmem_cache *c = NULL;
3973 int i = 0;
3974
3975 ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
3976
3977 if (slab_state < FULL)
3978 return ret;
3979
3980 if ((ret < 0) || !is_root_cache(cachep))
3981 return ret;
3982
3983 VM_BUG_ON(!mutex_is_locked(&slab_mutex));
3984 for_each_memcg_cache_index(i) {
3985 c = cache_from_memcg(cachep, i);
3986 if (c)
3987
3988 __do_tune_cpucache(c, limit, batchcount, shared, gfp);
3989 }
3990
3991 return ret;
3992}
3993
3994
3995static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3996{
3997 int err;
3998 int limit = 0;
3999 int shared = 0;
4000 int batchcount = 0;
4001
4002 if (!is_root_cache(cachep)) {
4003 struct kmem_cache *root = memcg_root_cache(cachep);
4004 limit = root->limit;
4005 shared = root->shared;
4006 batchcount = root->batchcount;
4007 }
4008
4009 if (limit && shared && batchcount)
4010 goto skip_setup;
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020 if (cachep->size > 131072)
4021 limit = 1;
4022 else if (cachep->size > PAGE_SIZE)
4023 limit = 8;
4024 else if (cachep->size > 1024)
4025 limit = 24;
4026 else if (cachep->size > 256)
4027 limit = 54;
4028 else
4029 limit = 120;
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040 shared = 0;
4041 if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
4042 shared = 8;
4043
4044#if DEBUG
4045
4046
4047
4048
4049 if (limit > 32)
4050 limit = 32;
4051#endif
4052 batchcount = (limit + 1) / 2;
4053skip_setup:
4054 err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
4055 if (err)
4056 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4057 cachep->name, -err);
4058 return err;
4059}
4060
4061
4062
4063
4064
4065
4066static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
4067 struct array_cache *ac, int force, int node)
4068{
4069 int tofree;
4070
4071 if (!ac || !ac->avail)
4072 return;
4073 if (ac->touched && !force) {
4074 ac->touched = 0;
4075 } else {
4076 spin_lock_irq(&n->list_lock);
4077 if (ac->avail) {
4078 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4079 if (tofree > ac->avail)
4080 tofree = (ac->avail + 1) / 2;
4081 free_block(cachep, ac->entry, tofree, node);
4082 ac->avail -= tofree;
4083 memmove(ac->entry, &(ac->entry[tofree]),
4084 sizeof(void *) * ac->avail);
4085 }
4086 spin_unlock_irq(&n->list_lock);
4087 }
4088}
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102static void cache_reap(struct work_struct *w)
4103{
4104 struct kmem_cache *searchp;
4105 struct kmem_cache_node *n;
4106 int node = numa_mem_id();
4107 struct delayed_work *work = to_delayed_work(w);
4108
4109 if (!mutex_trylock(&slab_mutex))
4110
4111 goto out;
4112
4113 list_for_each_entry(searchp, &slab_caches, list) {
4114 check_irq_on();
4115
4116
4117
4118
4119
4120
4121 n = searchp->node[node];
4122
4123 reap_alien(searchp, n);
4124
4125 drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
4126
4127
4128
4129
4130
4131 if (time_after(n->next_reap, jiffies))
4132 goto next;
4133
4134 n->next_reap = jiffies + REAPTIMEOUT_LIST3;
4135
4136 drain_array(searchp, n, n->shared, 0, node);
4137
4138 if (n->free_touched)
4139 n->free_touched = 0;
4140 else {
4141 int freed;
4142
4143 freed = drain_freelist(searchp, n, (n->free_limit +
4144 5 * searchp->num - 1) / (5 * searchp->num));
4145 STATS_ADD_REAPED(searchp, freed);
4146 }
4147next:
4148 cond_resched();
4149 }
4150 check_irq_on();
4151 mutex_unlock(&slab_mutex);
4152 next_reap_node();
4153out:
4154
4155 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4156}
4157
4158#ifdef CONFIG_SLABINFO
4159void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4160{
4161 struct slab *slabp;
4162 unsigned long active_objs;
4163 unsigned long num_objs;
4164 unsigned long active_slabs = 0;
4165 unsigned long num_slabs, free_objects = 0, shared_avail = 0;
4166 const char *name;
4167 char *error = NULL;
4168 int node;
4169 struct kmem_cache_node *n;
4170
4171 active_objs = 0;
4172 num_slabs = 0;
4173 for_each_online_node(node) {
4174 n = cachep->node[node];
4175 if (!n)
4176 continue;
4177
4178 check_irq_on();
4179 spin_lock_irq(&n->list_lock);
4180
4181 list_for_each_entry(slabp, &n->slabs_full, list) {
4182 if (slabp->inuse != cachep->num && !error)
4183 error = "slabs_full accounting error";
4184 active_objs += cachep->num;
4185 active_slabs++;
4186 }
4187 list_for_each_entry(slabp, &n->slabs_partial, list) {
4188 if (slabp->inuse == cachep->num && !error)
4189 error = "slabs_partial inuse accounting error";
4190 if (!slabp->inuse && !error)
4191 error = "slabs_partial/inuse accounting error";
4192 active_objs += slabp->inuse;
4193 active_slabs++;
4194 }
4195 list_for_each_entry(slabp, &n->slabs_free, list) {
4196 if (slabp->inuse && !error)
4197 error = "slabs_free/inuse accounting error";
4198 num_slabs++;
4199 }
4200 free_objects += n->free_objects;
4201 if (n->shared)
4202 shared_avail += n->shared->avail;
4203
4204 spin_unlock_irq(&n->list_lock);
4205 }
4206 num_slabs += active_slabs;
4207 num_objs = num_slabs * cachep->num;
4208 if (num_objs - active_objs != free_objects && !error)
4209 error = "free_objects accounting error";
4210
4211 name = cachep->name;
4212 if (error)
4213 printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
4214
4215 sinfo->active_objs = active_objs;
4216 sinfo->num_objs = num_objs;
4217 sinfo->active_slabs = active_slabs;
4218 sinfo->num_slabs = num_slabs;
4219 sinfo->shared_avail = shared_avail;
4220 sinfo->limit = cachep->limit;
4221 sinfo->batchcount = cachep->batchcount;
4222 sinfo->shared = cachep->shared;
4223 sinfo->objects_per_slab = cachep->num;
4224 sinfo->cache_order = cachep->gfporder;
4225}
4226
4227void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
4228{
4229#if STATS
4230 {
4231 unsigned long high = cachep->high_mark;
4232 unsigned long allocs = cachep->num_allocations;
4233 unsigned long grown = cachep->grown;
4234 unsigned long reaped = cachep->reaped;
4235 unsigned long errors = cachep->errors;
4236 unsigned long max_freeable = cachep->max_freeable;
4237 unsigned long node_allocs = cachep->node_allocs;
4238 unsigned long node_frees = cachep->node_frees;
4239 unsigned long overflows = cachep->node_overflow;
4240
4241 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
4242 "%4lu %4lu %4lu %4lu %4lu",
4243 allocs, high, grown,
4244 reaped, errors, max_freeable, node_allocs,
4245 node_frees, overflows);
4246 }
4247
4248 {
4249 unsigned long allochit = atomic_read(&cachep->allochit);
4250 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
4251 unsigned long freehit = atomic_read(&cachep->freehit);
4252 unsigned long freemiss = atomic_read(&cachep->freemiss);
4253
4254 seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
4255 allochit, allocmiss, freehit, freemiss);
4256 }
4257#endif
4258}
4259
4260#define MAX_SLABINFO_WRITE 128
4261
4262
4263
4264
4265
4266
4267
4268ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4269 size_t count, loff_t *ppos)
4270{
4271 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
4272 int limit, batchcount, shared, res;
4273 struct kmem_cache *cachep;
4274
4275 if (count > MAX_SLABINFO_WRITE)
4276 return -EINVAL;
4277 if (copy_from_user(&kbuf, buffer, count))
4278 return -EFAULT;
4279 kbuf[MAX_SLABINFO_WRITE] = '\0';
4280
4281 tmp = strchr(kbuf, ' ');
4282 if (!tmp)
4283 return -EINVAL;
4284 *tmp = '\0';
4285 tmp++;
4286 if (sscanf(tmp, " %d %d %d", &limit, &batchcount, &shared) != 3)
4287 return -EINVAL;
4288
4289
4290 mutex_lock(&slab_mutex);
4291 res = -EINVAL;
4292 list_for_each_entry(cachep, &slab_caches, list) {
4293 if (!strcmp(cachep->name, kbuf)) {
4294 if (limit < 1 || batchcount < 1 ||
4295 batchcount > limit || shared < 0) {
4296 res = 0;
4297 } else {
4298 res = do_tune_cpucache(cachep, limit,
4299 batchcount, shared,
4300 GFP_KERNEL);
4301 }
4302 break;
4303 }
4304 }
4305 mutex_unlock(&slab_mutex);
4306 if (res >= 0)
4307 res = count;
4308 return res;
4309}
4310
4311#ifdef CONFIG_DEBUG_SLAB_LEAK
4312
4313static void *leaks_start(struct seq_file *m, loff_t *pos)
4314{
4315 mutex_lock(&slab_mutex);
4316 return seq_list_start(&slab_caches, *pos);
4317}
4318
4319static inline int add_caller(unsigned long *n, unsigned long v)
4320{
4321 unsigned long *p;
4322 int l;
4323 if (!v)
4324 return 1;
4325 l = n[1];
4326 p = n + 2;
4327 while (l) {
4328 int i = l/2;
4329 unsigned long *q = p + 2 * i;
4330 if (*q == v) {
4331 q[1]++;
4332 return 1;
4333 }
4334 if (*q > v) {
4335 l = i;
4336 } else {
4337 p = q + 2;
4338 l -= i + 1;
4339 }
4340 }
4341 if (++n[1] == n[0])
4342 return 0;
4343 memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n));
4344 p[0] = v;
4345 p[1] = 1;
4346 return 1;
4347}
4348
4349static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
4350{
4351 void *p;
4352 int i;
4353 if (n[0] == n[1])
4354 return;
4355 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) {
4356 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
4357 continue;
4358 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4359 return;
4360 }
4361}
4362
4363static void show_symbol(struct seq_file *m, unsigned long address)
4364{
4365#ifdef CONFIG_KALLSYMS
4366 unsigned long offset, size;
4367 char modname[MODULE_NAME_LEN], name[KSYM_NAME_LEN];
4368
4369 if (lookup_symbol_attrs(address, &size, &offset, modname, name) == 0) {
4370 seq_printf(m, "%s+%#lx/%#lx", name, offset, size);
4371 if (modname[0])
4372 seq_printf(m, " [%s]", modname);
4373 return;
4374 }
4375#endif
4376 seq_printf(m, "%p", (void *)address);
4377}
4378
4379static int leaks_show(struct seq_file *m, void *p)
4380{
4381 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4382 struct slab *slabp;
4383 struct kmem_cache_node *n;
4384 const char *name;
4385 unsigned long *x = m->private;
4386 int node;
4387 int i;
4388
4389 if (!(cachep->flags & SLAB_STORE_USER))
4390 return 0;
4391 if (!(cachep->flags & SLAB_RED_ZONE))
4392 return 0;
4393
4394
4395
4396 x[1] = 0;
4397
4398 for_each_online_node(node) {
4399 n = cachep->node[node];
4400 if (!n)
4401 continue;
4402
4403 check_irq_on();
4404 spin_lock_irq(&n->list_lock);
4405
4406 list_for_each_entry(slabp, &n->slabs_full, list)
4407 handle_slab(x, cachep, slabp);
4408 list_for_each_entry(slabp, &n->slabs_partial, list)
4409 handle_slab(x, cachep, slabp);
4410 spin_unlock_irq(&n->list_lock);
4411 }
4412 name = cachep->name;
4413 if (x[0] == x[1]) {
4414
4415 mutex_unlock(&slab_mutex);
4416 m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4417 if (!m->private) {
4418
4419 m->private = x;
4420 mutex_lock(&slab_mutex);
4421 return -ENOMEM;
4422 }
4423 *(unsigned long *)m->private = x[0] * 2;
4424 kfree(x);
4425 mutex_lock(&slab_mutex);
4426
4427 m->count = m->size;
4428 return 0;
4429 }
4430 for (i = 0; i < x[1]; i++) {
4431 seq_printf(m, "%s: %lu ", name, x[2*i+3]);
4432 show_symbol(m, x[2*i+2]);
4433 seq_putc(m, '\n');
4434 }
4435
4436 return 0;
4437}
4438
4439static const struct seq_operations slabstats_op = {
4440 .start = leaks_start,
4441 .next = slab_next,
4442 .stop = slab_stop,
4443 .show = leaks_show,
4444};
4445
4446static int slabstats_open(struct inode *inode, struct file *file)
4447{
4448 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4449 int ret = -ENOMEM;
4450 if (n) {
4451 ret = seq_open(file, &slabstats_op);
4452 if (!ret) {
4453 struct seq_file *m = file->private_data;
4454 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4455 m->private = n;
4456 n = NULL;
4457 }
4458 kfree(n);
4459 }
4460 return ret;
4461}
4462
4463static const struct file_operations proc_slabstats_operations = {
4464 .open = slabstats_open,
4465 .read = seq_read,
4466 .llseek = seq_lseek,
4467 .release = seq_release_private,
4468};
4469#endif
4470
4471static int __init slab_proc_init(void)
4472{
4473#ifdef CONFIG_DEBUG_SLAB_LEAK
4474 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4475#endif
4476 return 0;
4477}
4478module_init(slab_proc_init);
4479#endif
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493size_t ksize(const void *objp)
4494{
4495 BUG_ON(!objp);
4496 if (unlikely(objp == ZERO_SIZE_PTR))
4497 return 0;
4498
4499 return virt_to_cache(objp)->object_size;
4500}
4501EXPORT_SYMBOL(ksize);
4502