1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60#include <linux/kernel.h>
61#include <linux/slab.h>
62
63#include <linux/mm.h>
64#include <linux/swap.h>
65#include <linux/cache.h>
66#include <linux/init.h>
67#include <linux/export.h>
68#include <linux/rcupdate.h>
69#include <linux/list.h>
70#include <linux/kmemleak.h>
71
72#include <trace/events/kmem.h>
73
74#include <linux/atomic.h>
75
76#include "slab.h"
77
78
79
80
81
82
83
84
85#if PAGE_SIZE <= (32767 * 2)
86typedef s16 slobidx_t;
87#else
88typedef s32 slobidx_t;
89#endif
90
91struct slob_block {
92 slobidx_t units;
93};
94typedef struct slob_block slob_t;
95
96
97
98
99#define SLOB_BREAK1 256
100#define SLOB_BREAK2 1024
101static LIST_HEAD(free_slob_small);
102static LIST_HEAD(free_slob_medium);
103static LIST_HEAD(free_slob_large);
104
105
106
107
108static inline int slob_page_free(struct page *sp)
109{
110 return PageSlobFree(sp);
111}
112
113static void set_slob_page_free(struct page *sp, struct list_head *list)
114{
115 list_add(&sp->slab_list, list);
116 __SetPageSlobFree(sp);
117}
118
119static inline void clear_slob_page_free(struct page *sp)
120{
121 list_del(&sp->slab_list);
122 __ClearPageSlobFree(sp);
123}
124
125#define SLOB_UNIT sizeof(slob_t)
126#define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT)
127
128
129
130
131
132
133struct slob_rcu {
134 struct rcu_head head;
135 int size;
136};
137
138
139
140
141static DEFINE_SPINLOCK(slob_lock);
142
143
144
145
146static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
147{
148 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
149 slobidx_t offset = next - base;
150
151 if (size > 1) {
152 s[0].units = size;
153 s[1].units = offset;
154 } else
155 s[0].units = -offset;
156}
157
158
159
160
161static slobidx_t slob_units(slob_t *s)
162{
163 if (s->units > 0)
164 return s->units;
165 return 1;
166}
167
168
169
170
171static slob_t *slob_next(slob_t *s)
172{
173 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
174 slobidx_t next;
175
176 if (s[0].units < 0)
177 next = -s[0].units;
178 else
179 next = s[1].units;
180 return base+next;
181}
182
183
184
185
186static int slob_last(slob_t *s)
187{
188 return !((unsigned long)slob_next(s) & ~PAGE_MASK);
189}
190
191static void *slob_new_pages(gfp_t gfp, int order, int node)
192{
193 struct page *page;
194
195#ifdef CONFIG_NUMA
196 if (node != NUMA_NO_NODE)
197 page = __alloc_pages_node(node, gfp, order);
198 else
199#endif
200 page = alloc_pages(gfp, order);
201
202 if (!page)
203 return NULL;
204
205 mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
206 PAGE_SIZE << order);
207 return page_address(page);
208}
209
210static void slob_free_pages(void *b, int order)
211{
212 struct page *sp = virt_to_page(b);
213
214 if (current->reclaim_state)
215 current->reclaim_state->reclaimed_slab += 1 << order;
216
217 mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
218 -(PAGE_SIZE << order));
219 __free_pages(sp, order);
220}
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237static void *slob_page_alloc(struct page *sp, size_t size, int align,
238 int align_offset, bool *page_removed_from_list)
239{
240 slob_t *prev, *cur, *aligned = NULL;
241 int delta = 0, units = SLOB_UNITS(size);
242
243 *page_removed_from_list = false;
244 for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
245 slobidx_t avail = slob_units(cur);
246
247
248
249
250
251
252
253
254 if (align) {
255 aligned = (slob_t *)
256 (ALIGN((unsigned long)cur + align_offset, align)
257 - align_offset);
258 delta = aligned - cur;
259 }
260 if (avail >= units + delta) {
261 slob_t *next;
262
263 if (delta) {
264 next = slob_next(cur);
265 set_slob(aligned, avail - delta, next);
266 set_slob(cur, delta, aligned);
267 prev = cur;
268 cur = aligned;
269 avail = slob_units(cur);
270 }
271
272 next = slob_next(cur);
273 if (avail == units) {
274 if (prev)
275 set_slob(prev, slob_units(prev), next);
276 else
277 sp->freelist = next;
278 } else {
279 if (prev)
280 set_slob(prev, slob_units(prev), cur + units);
281 else
282 sp->freelist = cur + units;
283 set_slob(cur + units, avail - units, next);
284 }
285
286 sp->units -= units;
287 if (!sp->units) {
288 clear_slob_page_free(sp);
289 *page_removed_from_list = true;
290 }
291 return cur;
292 }
293 if (slob_last(cur))
294 return NULL;
295 }
296}
297
298
299
300
301static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
302 int align_offset)
303{
304 struct page *sp;
305 struct list_head *slob_list;
306 slob_t *b = NULL;
307 unsigned long flags;
308 bool _unused;
309
310 if (size < SLOB_BREAK1)
311 slob_list = &free_slob_small;
312 else if (size < SLOB_BREAK2)
313 slob_list = &free_slob_medium;
314 else
315 slob_list = &free_slob_large;
316
317 spin_lock_irqsave(&slob_lock, flags);
318
319 list_for_each_entry(sp, slob_list, slab_list) {
320 bool page_removed_from_list = false;
321#ifdef CONFIG_NUMA
322
323
324
325
326 if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
327 continue;
328#endif
329
330 if (sp->units < SLOB_UNITS(size))
331 continue;
332
333 b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
334 if (!b)
335 continue;
336
337
338
339
340
341
342
343 if (!page_removed_from_list) {
344
345
346
347
348
349 if (!list_is_first(&sp->slab_list, slob_list))
350 list_rotate_to_front(&sp->slab_list, slob_list);
351 }
352 break;
353 }
354 spin_unlock_irqrestore(&slob_lock, flags);
355
356
357 if (!b) {
358 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
359 if (!b)
360 return NULL;
361 sp = virt_to_page(b);
362 __SetPageSlab(sp);
363
364 spin_lock_irqsave(&slob_lock, flags);
365 sp->units = SLOB_UNITS(PAGE_SIZE);
366 sp->freelist = b;
367 INIT_LIST_HEAD(&sp->slab_list);
368 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
369 set_slob_page_free(sp, slob_list);
370 b = slob_page_alloc(sp, size, align, align_offset, &_unused);
371 BUG_ON(!b);
372 spin_unlock_irqrestore(&slob_lock, flags);
373 }
374 if (unlikely(gfp & __GFP_ZERO))
375 memset(b, 0, size);
376 return b;
377}
378
379
380
381
382static void slob_free(void *block, int size)
383{
384 struct page *sp;
385 slob_t *prev, *next, *b = (slob_t *)block;
386 slobidx_t units;
387 unsigned long flags;
388 struct list_head *slob_list;
389
390 if (unlikely(ZERO_OR_NULL_PTR(block)))
391 return;
392 BUG_ON(!size);
393
394 sp = virt_to_page(block);
395 units = SLOB_UNITS(size);
396
397 spin_lock_irqsave(&slob_lock, flags);
398
399 if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
400
401 if (slob_page_free(sp))
402 clear_slob_page_free(sp);
403 spin_unlock_irqrestore(&slob_lock, flags);
404 __ClearPageSlab(sp);
405 page_mapcount_reset(sp);
406 slob_free_pages(b, 0);
407 return;
408 }
409
410 if (!slob_page_free(sp)) {
411
412 sp->units = units;
413 sp->freelist = b;
414 set_slob(b, units,
415 (void *)((unsigned long)(b +
416 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
417 if (size < SLOB_BREAK1)
418 slob_list = &free_slob_small;
419 else if (size < SLOB_BREAK2)
420 slob_list = &free_slob_medium;
421 else
422 slob_list = &free_slob_large;
423 set_slob_page_free(sp, slob_list);
424 goto out;
425 }
426
427
428
429
430
431 sp->units += units;
432
433 if (b < (slob_t *)sp->freelist) {
434 if (b + units == sp->freelist) {
435 units += slob_units(sp->freelist);
436 sp->freelist = slob_next(sp->freelist);
437 }
438 set_slob(b, units, sp->freelist);
439 sp->freelist = b;
440 } else {
441 prev = sp->freelist;
442 next = slob_next(prev);
443 while (b > next) {
444 prev = next;
445 next = slob_next(prev);
446 }
447
448 if (!slob_last(prev) && b + units == next) {
449 units += slob_units(next);
450 set_slob(b, units, slob_next(next));
451 } else
452 set_slob(b, units, next);
453
454 if (prev + slob_units(prev) == b) {
455 units = slob_units(b) + slob_units(prev);
456 set_slob(prev, units, slob_next(b));
457 } else
458 set_slob(prev, slob_units(prev), b);
459 }
460out:
461 spin_unlock_irqrestore(&slob_lock, flags);
462}
463
464
465
466
467
468static __always_inline void *
469__do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
470{
471 unsigned int *m;
472 int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
473 void *ret;
474
475 gfp &= gfp_allowed_mask;
476
477 fs_reclaim_acquire(gfp);
478 fs_reclaim_release(gfp);
479
480 if (size < PAGE_SIZE - minalign) {
481 int align = minalign;
482
483
484
485
486
487 if (is_power_of_2(size))
488 align = max(minalign, (int) size);
489
490 if (!size)
491 return ZERO_SIZE_PTR;
492
493 m = slob_alloc(size + minalign, gfp, align, node, minalign);
494
495 if (!m)
496 return NULL;
497 *m = size;
498 ret = (void *)m + minalign;
499
500 trace_kmalloc_node(caller, ret,
501 size, size + minalign, gfp, node);
502 } else {
503 unsigned int order = get_order(size);
504
505 if (likely(order))
506 gfp |= __GFP_COMP;
507 ret = slob_new_pages(gfp, order, node);
508
509 trace_kmalloc_node(caller, ret,
510 size, PAGE_SIZE << order, gfp, node);
511 }
512
513 kmemleak_alloc(ret, size, 1, gfp);
514 return ret;
515}
516
517void *__kmalloc(size_t size, gfp_t gfp)
518{
519 return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, _RET_IP_);
520}
521EXPORT_SYMBOL(__kmalloc);
522
523void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
524{
525 return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
526}
527EXPORT_SYMBOL(__kmalloc_track_caller);
528
529#ifdef CONFIG_NUMA
530void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
531 int node, unsigned long caller)
532{
533 return __do_kmalloc_node(size, gfp, node, caller);
534}
535EXPORT_SYMBOL(__kmalloc_node_track_caller);
536#endif
537
538void kfree(const void *block)
539{
540 struct page *sp;
541
542 trace_kfree(_RET_IP_, block);
543
544 if (unlikely(ZERO_OR_NULL_PTR(block)))
545 return;
546 kmemleak_free(block);
547
548 sp = virt_to_page(block);
549 if (PageSlab(sp)) {
550 int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
551 unsigned int *m = (unsigned int *)(block - align);
552 slob_free(m, *m + align);
553 } else {
554 unsigned int order = compound_order(sp);
555 mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
556 -(PAGE_SIZE << order));
557 __free_pages(sp, order);
558
559 }
560}
561EXPORT_SYMBOL(kfree);
562
563
564size_t __ksize(const void *block)
565{
566 struct page *sp;
567 int align;
568 unsigned int *m;
569
570 BUG_ON(!block);
571 if (unlikely(block == ZERO_SIZE_PTR))
572 return 0;
573
574 sp = virt_to_page(block);
575 if (unlikely(!PageSlab(sp)))
576 return page_size(sp);
577
578 align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
579 m = (unsigned int *)(block - align);
580 return SLOB_UNITS(*m) * SLOB_UNIT;
581}
582EXPORT_SYMBOL(__ksize);
583
584int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
585{
586 if (flags & SLAB_TYPESAFE_BY_RCU) {
587
588 c->size += sizeof(struct slob_rcu);
589 }
590 c->flags = flags;
591 return 0;
592}
593
594static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
595{
596 void *b;
597
598 flags &= gfp_allowed_mask;
599
600 fs_reclaim_acquire(flags);
601 fs_reclaim_release(flags);
602
603 if (c->size < PAGE_SIZE) {
604 b = slob_alloc(c->size, flags, c->align, node, 0);
605 trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
606 SLOB_UNITS(c->size) * SLOB_UNIT,
607 flags, node);
608 } else {
609 b = slob_new_pages(flags, get_order(c->size), node);
610 trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
611 PAGE_SIZE << get_order(c->size),
612 flags, node);
613 }
614
615 if (b && c->ctor) {
616 WARN_ON_ONCE(flags & __GFP_ZERO);
617 c->ctor(b);
618 }
619
620 kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
621 return b;
622}
623
624void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
625{
626 return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
627}
628EXPORT_SYMBOL(kmem_cache_alloc);
629
630#ifdef CONFIG_NUMA
631void *__kmalloc_node(size_t size, gfp_t gfp, int node)
632{
633 return __do_kmalloc_node(size, gfp, node, _RET_IP_);
634}
635EXPORT_SYMBOL(__kmalloc_node);
636
637void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node)
638{
639 return slob_alloc_node(cachep, gfp, node);
640}
641EXPORT_SYMBOL(kmem_cache_alloc_node);
642#endif
643
644static void __kmem_cache_free(void *b, int size)
645{
646 if (size < PAGE_SIZE)
647 slob_free(b, size);
648 else
649 slob_free_pages(b, get_order(size));
650}
651
652static void kmem_rcu_free(struct rcu_head *head)
653{
654 struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
655 void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
656
657 __kmem_cache_free(b, slob_rcu->size);
658}
659
660void kmem_cache_free(struct kmem_cache *c, void *b)
661{
662 kmemleak_free_recursive(b, c->flags);
663 if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
664 struct slob_rcu *slob_rcu;
665 slob_rcu = b + (c->size - sizeof(struct slob_rcu));
666 slob_rcu->size = c->size;
667 call_rcu(&slob_rcu->head, kmem_rcu_free);
668 } else {
669 __kmem_cache_free(b, c->size);
670 }
671
672 trace_kmem_cache_free(_RET_IP_, b);
673}
674EXPORT_SYMBOL(kmem_cache_free);
675
676void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
677{
678 __kmem_cache_free_bulk(s, size, p);
679}
680EXPORT_SYMBOL(kmem_cache_free_bulk);
681
682int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
683 void **p)
684{
685 return __kmem_cache_alloc_bulk(s, flags, size, p);
686}
687EXPORT_SYMBOL(kmem_cache_alloc_bulk);
688
689int __kmem_cache_shutdown(struct kmem_cache *c)
690{
691
692 return 0;
693}
694
695void __kmem_cache_release(struct kmem_cache *c)
696{
697}
698
699int __kmem_cache_shrink(struct kmem_cache *d)
700{
701 return 0;
702}
703
704struct kmem_cache kmem_cache_boot = {
705 .name = "kmem_cache",
706 .size = sizeof(struct kmem_cache),
707 .flags = SLAB_PANIC,
708 .align = ARCH_KMALLOC_MINALIGN,
709};
710
711void __init kmem_cache_init(void)
712{
713 kmem_cache = &kmem_cache_boot;
714 slab_state = UP;
715}
716
717void __init kmem_cache_init_late(void)
718{
719 slab_state = FULL;
720}
721