1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
66
67#include <linux/bitmap.h>
68#include <linux/bootmem.h>
69#include <linux/err.h>
70#include <linux/lcm.h>
71#include <linux/list.h>
72#include <linux/log2.h>
73#include <linux/mm.h>
74#include <linux/module.h>
75#include <linux/mutex.h>
76#include <linux/percpu.h>
77#include <linux/pfn.h>
78#include <linux/slab.h>
79#include <linux/spinlock.h>
80#include <linux/vmalloc.h>
81#include <linux/workqueue.h>
82#include <linux/kmemleak.h>
83#include <linux/sched.h>
84
85#include <asm/cacheflush.h>
86#include <asm/sections.h>
87#include <asm/tlbflush.h>
88#include <asm/io.h>
89
90#define CREATE_TRACE_POINTS
91#include <trace/events/percpu.h>
92
93#include "percpu-internal.h"
94
95
96#define PCPU_SLOT_BASE_SHIFT 5
97
98#define PCPU_EMPTY_POP_PAGES_LOW 2
99#define PCPU_EMPTY_POP_PAGES_HIGH 4
100
101#ifdef CONFIG_SMP
102
103#ifndef __addr_to_pcpu_ptr
104#define __addr_to_pcpu_ptr(addr) \
105 (void __percpu *)((unsigned long)(addr) - \
106 (unsigned long)pcpu_base_addr + \
107 (unsigned long)__per_cpu_start)
108#endif
109#ifndef __pcpu_ptr_to_addr
110#define __pcpu_ptr_to_addr(ptr) \
111 (void __force *)((unsigned long)(ptr) + \
112 (unsigned long)pcpu_base_addr - \
113 (unsigned long)__per_cpu_start)
114#endif
115#else
116
117#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
118#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
119#endif
120
121static int pcpu_unit_pages __ro_after_init;
122static int pcpu_unit_size __ro_after_init;
123static int pcpu_nr_units __ro_after_init;
124static int pcpu_atom_size __ro_after_init;
125int pcpu_nr_slots __ro_after_init;
126static size_t pcpu_chunk_struct_size __ro_after_init;
127
128
129static unsigned int pcpu_low_unit_cpu __ro_after_init;
130static unsigned int pcpu_high_unit_cpu __ro_after_init;
131
132
133void *pcpu_base_addr __ro_after_init;
134EXPORT_SYMBOL_GPL(pcpu_base_addr);
135
136static const int *pcpu_unit_map __ro_after_init;
137const unsigned long *pcpu_unit_offsets __ro_after_init;
138
139
140static int pcpu_nr_groups __ro_after_init;
141static const unsigned long *pcpu_group_offsets __ro_after_init;
142static const size_t *pcpu_group_sizes __ro_after_init;
143
144
145
146
147
148
149struct pcpu_chunk *pcpu_first_chunk __ro_after_init;
150
151
152
153
154
155
156struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;
157
158DEFINE_SPINLOCK(pcpu_lock);
159static DEFINE_MUTEX(pcpu_alloc_mutex);
160
161struct list_head *pcpu_slot __ro_after_init;
162
163
164static LIST_HEAD(pcpu_map_extend_chunks);
165
166
167
168
169
170int pcpu_nr_empty_pop_pages;
171
172
173
174
175
176
177
178static void pcpu_balance_workfn(struct work_struct *work);
179static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
180static bool pcpu_async_enabled __read_mostly;
181static bool pcpu_atomic_alloc_failed;
182
183static void pcpu_schedule_balance_work(void)
184{
185 if (pcpu_async_enabled)
186 schedule_work(&pcpu_balance_work);
187}
188
189
190
191
192
193
194
195
196
197static bool pcpu_addr_in_chunk(struct pcpu_chunk *chunk, void *addr)
198{
199 void *start_addr, *end_addr;
200
201 if (!chunk)
202 return false;
203
204 start_addr = chunk->base_addr + chunk->start_offset;
205 end_addr = chunk->base_addr + chunk->nr_pages * PAGE_SIZE -
206 chunk->end_offset;
207
208 return addr >= start_addr && addr < end_addr;
209}
210
211static int __pcpu_size_to_slot(int size)
212{
213 int highbit = fls(size);
214 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
215}
216
217static int pcpu_size_to_slot(int size)
218{
219 if (size == pcpu_unit_size)
220 return pcpu_nr_slots - 1;
221 return __pcpu_size_to_slot(size);
222}
223
224static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
225{
226 if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE || chunk->contig_bits == 0)
227 return 0;
228
229 return pcpu_size_to_slot(chunk->free_bytes);
230}
231
232
233static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
234{
235 page->index = (unsigned long)pcpu;
236}
237
238
239static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
240{
241 return (struct pcpu_chunk *)page->index;
242}
243
244static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
245{
246 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
247}
248
249static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
250{
251 return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
252}
253
254static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
255 unsigned int cpu, int page_idx)
256{
257 return (unsigned long)chunk->base_addr +
258 pcpu_unit_page_offset(cpu, page_idx);
259}
260
261static void pcpu_next_unpop(unsigned long *bitmap, int *rs, int *re, int end)
262{
263 *rs = find_next_zero_bit(bitmap, end, *rs);
264 *re = find_next_bit(bitmap, end, *rs + 1);
265}
266
267static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end)
268{
269 *rs = find_next_bit(bitmap, end, *rs);
270 *re = find_next_zero_bit(bitmap, end, *rs + 1);
271}
272
273
274
275
276
277
278#define pcpu_for_each_unpop_region(bitmap, rs, re, start, end) \
279 for ((rs) = (start), pcpu_next_unpop((bitmap), &(rs), &(re), (end)); \
280 (rs) < (re); \
281 (rs) = (re) + 1, pcpu_next_unpop((bitmap), &(rs), &(re), (end)))
282
283#define pcpu_for_each_pop_region(bitmap, rs, re, start, end) \
284 for ((rs) = (start), pcpu_next_pop((bitmap), &(rs), &(re), (end)); \
285 (rs) < (re); \
286 (rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end)))
287
288
289
290
291
292static unsigned long *pcpu_index_alloc_map(struct pcpu_chunk *chunk, int index)
293{
294 return chunk->alloc_map +
295 (index * PCPU_BITMAP_BLOCK_BITS / BITS_PER_LONG);
296}
297
298static unsigned long pcpu_off_to_block_index(int off)
299{
300 return off / PCPU_BITMAP_BLOCK_BITS;
301}
302
303static unsigned long pcpu_off_to_block_off(int off)
304{
305 return off & (PCPU_BITMAP_BLOCK_BITS - 1);
306}
307
308static unsigned long pcpu_block_off_to_off(int index, int off)
309{
310 return index * PCPU_BITMAP_BLOCK_BITS + off;
311}
312
313
314
315
316
317
318
319
320
321
322
323
324static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
325 int *bits)
326{
327 int i = pcpu_off_to_block_index(*bit_off);
328 int block_off = pcpu_off_to_block_off(*bit_off);
329 struct pcpu_block_md *block;
330
331 *bits = 0;
332 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
333 block++, i++) {
334
335 if (*bits) {
336 *bits += block->left_free;
337 if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
338 continue;
339 return;
340 }
341
342
343
344
345
346
347
348
349
350 *bits = block->contig_hint;
351 if (*bits && block->contig_hint_start >= block_off &&
352 *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) {
353 *bit_off = pcpu_block_off_to_off(i,
354 block->contig_hint_start);
355 return;
356 }
357
358 block_off = 0;
359
360 *bits = block->right_free;
361 *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
362 }
363}
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
380 int align, int *bit_off, int *bits)
381{
382 int i = pcpu_off_to_block_index(*bit_off);
383 int block_off = pcpu_off_to_block_off(*bit_off);
384 struct pcpu_block_md *block;
385
386 *bits = 0;
387 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
388 block++, i++) {
389
390 if (*bits) {
391 *bits += block->left_free;
392 if (*bits >= alloc_bits)
393 return;
394 if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
395 continue;
396 }
397
398
399 *bits = ALIGN(block->contig_hint_start, align) -
400 block->contig_hint_start;
401
402
403
404
405 if (block->contig_hint &&
406 block->contig_hint_start >= block_off &&
407 block->contig_hint >= *bits + alloc_bits) {
408 *bits += alloc_bits + block->contig_hint_start -
409 block->first_free;
410 *bit_off = pcpu_block_off_to_off(i, block->first_free);
411 return;
412 }
413
414 block_off = 0;
415
416 *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,
417 align);
418 *bits = PCPU_BITMAP_BLOCK_BITS - *bit_off;
419 *bit_off = pcpu_block_off_to_off(i, *bit_off);
420 if (*bits >= alloc_bits)
421 return;
422 }
423
424
425 *bit_off = pcpu_chunk_map_bits(chunk);
426}
427
428
429
430
431
432
433
434#define pcpu_for_each_md_free_region(chunk, bit_off, bits) \
435 for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits)); \
436 (bit_off) < pcpu_chunk_map_bits((chunk)); \
437 (bit_off) += (bits) + 1, \
438 pcpu_next_md_free_region((chunk), &(bit_off), &(bits)))
439
440#define pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) \
441 for (pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
442 &(bits)); \
443 (bit_off) < pcpu_chunk_map_bits((chunk)); \
444 (bit_off) += (bits), \
445 pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
446 &(bits)))
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
462{
463 if (WARN_ON_ONCE(!slab_is_available()))
464 return NULL;
465
466 if (size <= PAGE_SIZE)
467 return kzalloc(size, gfp);
468 else
469 return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
470}
471
472
473
474
475
476
477
478static void pcpu_mem_free(void *ptr)
479{
480 kvfree(ptr);
481}
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
497{
498 int nslot = pcpu_chunk_slot(chunk);
499
500 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
501 if (oslot < nslot)
502 list_move(&chunk->list, &pcpu_slot[nslot]);
503 else
504 list_move_tail(&chunk->list, &pcpu_slot[nslot]);
505 }
506}
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521static inline int pcpu_cnt_pop_pages(struct pcpu_chunk *chunk, int bit_off,
522 int bits)
523{
524 int page_start = PFN_UP(bit_off * PCPU_MIN_ALLOC_SIZE);
525 int page_end = PFN_DOWN((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
526
527 if (page_start >= page_end)
528 return 0;
529
530
531
532
533
534
535
536
537 return bitmap_weight(chunk->populated, page_end) -
538 bitmap_weight(chunk->populated, page_start);
539}
540
541
542
543
544
545
546
547
548
549
550static void pcpu_chunk_update(struct pcpu_chunk *chunk, int bit_off, int bits)
551{
552 if (bits > chunk->contig_bits) {
553 chunk->contig_bits_start = bit_off;
554 chunk->contig_bits = bits;
555 } else if (bits == chunk->contig_bits && chunk->contig_bits_start &&
556 (!bit_off ||
557 __ffs(bit_off) > __ffs(chunk->contig_bits_start))) {
558
559 chunk->contig_bits_start = bit_off;
560 }
561}
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk)
577{
578 int bit_off, bits, nr_empty_pop_pages;
579
580
581 chunk->contig_bits = 0;
582
583 bit_off = chunk->first_bit;
584 bits = nr_empty_pop_pages = 0;
585 pcpu_for_each_md_free_region(chunk, bit_off, bits) {
586 pcpu_chunk_update(chunk, bit_off, bits);
587
588 nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, bit_off, bits);
589 }
590
591
592
593
594
595
596
597
598
599 if (chunk != pcpu_reserved_chunk)
600 pcpu_nr_empty_pop_pages +=
601 (nr_empty_pop_pages - chunk->nr_empty_pop_pages);
602
603 chunk->nr_empty_pop_pages = nr_empty_pop_pages;
604}
605
606
607
608
609
610
611
612
613
614
615
616static void pcpu_block_update(struct pcpu_block_md *block, int start, int end)
617{
618 int contig = end - start;
619
620 block->first_free = min(block->first_free, start);
621 if (start == 0)
622 block->left_free = contig;
623
624 if (end == PCPU_BITMAP_BLOCK_BITS)
625 block->right_free = contig;
626
627 if (contig > block->contig_hint) {
628 block->contig_hint_start = start;
629 block->contig_hint = contig;
630 } else if (block->contig_hint_start && contig == block->contig_hint &&
631 (!start || __ffs(start) > __ffs(block->contig_hint_start))) {
632
633 block->contig_hint_start = start;
634 }
635}
636
637
638
639
640
641
642
643
644
645static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
646{
647 struct pcpu_block_md *block = chunk->md_blocks + index;
648 unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
649 int rs, re;
650
651
652 block->contig_hint = 0;
653 block->left_free = block->right_free = 0;
654
655
656 pcpu_for_each_unpop_region(alloc_map, rs, re, block->first_free,
657 PCPU_BITMAP_BLOCK_BITS) {
658 pcpu_block_update(block, rs, re);
659 }
660}
661
662
663
664
665
666
667
668
669
670
671
672static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off,
673 int bits)
674{
675 struct pcpu_block_md *s_block, *e_block, *block;
676 int s_index, e_index;
677 int s_off, e_off;
678
679
680
681
682
683
684
685 s_index = pcpu_off_to_block_index(bit_off);
686 e_index = pcpu_off_to_block_index(bit_off + bits - 1);
687 s_off = pcpu_off_to_block_off(bit_off);
688 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
689
690 s_block = chunk->md_blocks + s_index;
691 e_block = chunk->md_blocks + e_index;
692
693
694
695
696
697
698
699 if (s_off == s_block->first_free)
700 s_block->first_free = find_next_zero_bit(
701 pcpu_index_alloc_map(chunk, s_index),
702 PCPU_BITMAP_BLOCK_BITS,
703 s_off + bits);
704
705 if (s_off >= s_block->contig_hint_start &&
706 s_off < s_block->contig_hint_start + s_block->contig_hint) {
707
708 pcpu_block_refresh_hint(chunk, s_index);
709 } else {
710
711 s_block->left_free = min(s_block->left_free, s_off);
712 if (s_index == e_index)
713 s_block->right_free = min_t(int, s_block->right_free,
714 PCPU_BITMAP_BLOCK_BITS - e_off);
715 else
716 s_block->right_free = 0;
717 }
718
719
720
721
722 if (s_index != e_index) {
723
724
725
726
727 e_block->first_free = find_next_zero_bit(
728 pcpu_index_alloc_map(chunk, e_index),
729 PCPU_BITMAP_BLOCK_BITS, e_off);
730
731 if (e_off == PCPU_BITMAP_BLOCK_BITS) {
732
733 e_block++;
734 } else {
735 if (e_off > e_block->contig_hint_start) {
736
737 pcpu_block_refresh_hint(chunk, e_index);
738 } else {
739 e_block->left_free = 0;
740 e_block->right_free =
741 min_t(int, e_block->right_free,
742 PCPU_BITMAP_BLOCK_BITS - e_off);
743 }
744 }
745
746
747 for (block = s_block + 1; block < e_block; block++) {
748 block->contig_hint = 0;
749 block->left_free = 0;
750 block->right_free = 0;
751 }
752 }
753
754
755
756
757
758
759 if (bit_off >= chunk->contig_bits_start &&
760 bit_off < chunk->contig_bits_start + chunk->contig_bits)
761 pcpu_chunk_refresh_hint(chunk);
762}
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
782 int bits)
783{
784 struct pcpu_block_md *s_block, *e_block, *block;
785 int s_index, e_index;
786 int s_off, e_off;
787 int start, end;
788
789
790
791
792
793
794
795 s_index = pcpu_off_to_block_index(bit_off);
796 e_index = pcpu_off_to_block_index(bit_off + bits - 1);
797 s_off = pcpu_off_to_block_off(bit_off);
798 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
799
800 s_block = chunk->md_blocks + s_index;
801 e_block = chunk->md_blocks + e_index;
802
803
804
805
806
807
808
809
810
811
812
813 start = s_off;
814 if (s_off == s_block->contig_hint + s_block->contig_hint_start) {
815 start = s_block->contig_hint_start;
816 } else {
817
818
819
820
821
822
823 int l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index),
824 start);
825 start = (start == l_bit) ? 0 : l_bit + 1;
826 }
827
828 end = e_off;
829 if (e_off == e_block->contig_hint_start)
830 end = e_block->contig_hint_start + e_block->contig_hint;
831 else
832 end = find_next_bit(pcpu_index_alloc_map(chunk, e_index),
833 PCPU_BITMAP_BLOCK_BITS, end);
834
835
836 e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS;
837 pcpu_block_update(s_block, start, e_off);
838
839
840 if (s_index != e_index) {
841
842 pcpu_block_update(e_block, 0, end);
843
844
845 for (block = s_block + 1; block < e_block; block++) {
846 block->first_free = 0;
847 block->contig_hint_start = 0;
848 block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
849 block->left_free = PCPU_BITMAP_BLOCK_BITS;
850 block->right_free = PCPU_BITMAP_BLOCK_BITS;
851 }
852 }
853
854
855
856
857
858
859
860 if ((ALIGN_DOWN(end, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS)) >
861 ALIGN(start, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS))) ||
862 s_index != e_index)
863 pcpu_chunk_refresh_hint(chunk);
864 else
865 pcpu_chunk_update(chunk, pcpu_block_off_to_off(s_index, start),
866 s_block->contig_hint);
867}
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
883 int *next_off)
884{
885 int page_start, page_end, rs, re;
886
887 page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
888 page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
889
890 rs = page_start;
891 pcpu_next_unpop(chunk->populated, &rs, &re, page_end);
892 if (rs >= page_end)
893 return true;
894
895 *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
896 return false;
897}
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits,
919 size_t align, bool pop_only)
920{
921 int bit_off, bits, next_off;
922
923
924
925
926
927
928
929 bit_off = ALIGN(chunk->contig_bits_start, align) -
930 chunk->contig_bits_start;
931 if (bit_off + alloc_bits > chunk->contig_bits)
932 return -1;
933
934 bit_off = chunk->first_bit;
935 bits = 0;
936 pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) {
937 if (!pop_only || pcpu_is_populated(chunk, bit_off, bits,
938 &next_off))
939 break;
940
941 bit_off = next_off;
942 bits = 0;
943 }
944
945 if (bit_off == pcpu_chunk_map_bits(chunk))
946 return -1;
947
948 return bit_off;
949}
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
971 size_t align, int start)
972{
973 size_t align_mask = (align) ? (align - 1) : 0;
974 int bit_off, end, oslot;
975
976 lockdep_assert_held(&pcpu_lock);
977
978 oslot = pcpu_chunk_slot(chunk);
979
980
981
982
983 end = start + alloc_bits + PCPU_BITMAP_BLOCK_BITS;
984 bit_off = bitmap_find_next_zero_area(chunk->alloc_map, end, start,
985 alloc_bits, align_mask);
986 if (bit_off >= end)
987 return -1;
988
989
990 bitmap_set(chunk->alloc_map, bit_off, alloc_bits);
991
992
993 set_bit(bit_off, chunk->bound_map);
994 bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
995 set_bit(bit_off + alloc_bits, chunk->bound_map);
996
997 chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
998
999
1000 if (bit_off == chunk->first_bit)
1001 chunk->first_bit = find_next_zero_bit(
1002 chunk->alloc_map,
1003 pcpu_chunk_map_bits(chunk),
1004 bit_off + alloc_bits);
1005
1006 pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits);
1007
1008 pcpu_chunk_relocate(chunk, oslot);
1009
1010 return bit_off * PCPU_MIN_ALLOC_SIZE;
1011}
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021static void pcpu_free_area(struct pcpu_chunk *chunk, int off)
1022{
1023 int bit_off, bits, end, oslot;
1024
1025 lockdep_assert_held(&pcpu_lock);
1026 pcpu_stats_area_dealloc(chunk);
1027
1028 oslot = pcpu_chunk_slot(chunk);
1029
1030 bit_off = off / PCPU_MIN_ALLOC_SIZE;
1031
1032
1033 end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
1034 bit_off + 1);
1035 bits = end - bit_off;
1036 bitmap_clear(chunk->alloc_map, bit_off, bits);
1037
1038
1039 chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE;
1040
1041
1042 chunk->first_bit = min(chunk->first_bit, bit_off);
1043
1044 pcpu_block_update_hint_free(chunk, bit_off, bits);
1045
1046 pcpu_chunk_relocate(chunk, oslot);
1047}
1048
1049static void pcpu_init_md_blocks(struct pcpu_chunk *chunk)
1050{
1051 struct pcpu_block_md *md_block;
1052
1053 for (md_block = chunk->md_blocks;
1054 md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk);
1055 md_block++) {
1056 md_block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
1057 md_block->left_free = PCPU_BITMAP_BLOCK_BITS;
1058 md_block->right_free = PCPU_BITMAP_BLOCK_BITS;
1059 }
1060}
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
1076 int map_size)
1077{
1078 struct pcpu_chunk *chunk;
1079 unsigned long aligned_addr, lcm_align;
1080 int start_offset, offset_bits, region_size, region_bits;
1081
1082
1083 aligned_addr = tmp_addr & PAGE_MASK;
1084
1085 start_offset = tmp_addr - aligned_addr;
1086
1087
1088
1089
1090
1091
1092 lcm_align = lcm(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE);
1093 region_size = ALIGN(start_offset + map_size, lcm_align);
1094
1095
1096 chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) +
1097 BITS_TO_LONGS(region_size >> PAGE_SHIFT),
1098 0);
1099
1100 INIT_LIST_HEAD(&chunk->list);
1101
1102 chunk->base_addr = (void *)aligned_addr;
1103 chunk->start_offset = start_offset;
1104 chunk->end_offset = region_size - chunk->start_offset - map_size;
1105
1106 chunk->nr_pages = region_size >> PAGE_SHIFT;
1107 region_bits = pcpu_chunk_map_bits(chunk);
1108
1109 chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) *
1110 sizeof(chunk->alloc_map[0]), 0);
1111 chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) *
1112 sizeof(chunk->bound_map[0]), 0);
1113 chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) *
1114 sizeof(chunk->md_blocks[0]), 0);
1115 pcpu_init_md_blocks(chunk);
1116
1117
1118 chunk->immutable = true;
1119 bitmap_fill(chunk->populated, chunk->nr_pages);
1120 chunk->nr_populated = chunk->nr_pages;
1121 chunk->nr_empty_pop_pages =
1122 pcpu_cnt_pop_pages(chunk, start_offset / PCPU_MIN_ALLOC_SIZE,
1123 map_size / PCPU_MIN_ALLOC_SIZE);
1124
1125 chunk->contig_bits = map_size / PCPU_MIN_ALLOC_SIZE;
1126 chunk->free_bytes = map_size;
1127
1128 if (chunk->start_offset) {
1129
1130 offset_bits = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;
1131 bitmap_set(chunk->alloc_map, 0, offset_bits);
1132 set_bit(0, chunk->bound_map);
1133 set_bit(offset_bits, chunk->bound_map);
1134
1135 chunk->first_bit = offset_bits;
1136
1137 pcpu_block_update_hint_alloc(chunk, 0, offset_bits);
1138 }
1139
1140 if (chunk->end_offset) {
1141
1142 offset_bits = chunk->end_offset / PCPU_MIN_ALLOC_SIZE;
1143 bitmap_set(chunk->alloc_map,
1144 pcpu_chunk_map_bits(chunk) - offset_bits,
1145 offset_bits);
1146 set_bit((start_offset + map_size) / PCPU_MIN_ALLOC_SIZE,
1147 chunk->bound_map);
1148 set_bit(region_bits, chunk->bound_map);
1149
1150 pcpu_block_update_hint_alloc(chunk, pcpu_chunk_map_bits(chunk)
1151 - offset_bits, offset_bits);
1152 }
1153
1154 return chunk;
1155}
1156
1157static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
1158{
1159 struct pcpu_chunk *chunk;
1160 int region_bits;
1161
1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
1163 if (!chunk)
1164 return NULL;
1165
1166 INIT_LIST_HEAD(&chunk->list);
1167 chunk->nr_pages = pcpu_unit_pages;
1168 region_bits = pcpu_chunk_map_bits(chunk);
1169
1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
1171 sizeof(chunk->alloc_map[0]), gfp);
1172 if (!chunk->alloc_map)
1173 goto alloc_map_fail;
1174
1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
1176 sizeof(chunk->bound_map[0]), gfp);
1177 if (!chunk->bound_map)
1178 goto bound_map_fail;
1179
1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
1181 sizeof(chunk->md_blocks[0]), gfp);
1182 if (!chunk->md_blocks)
1183 goto md_blocks_fail;
1184
1185 pcpu_init_md_blocks(chunk);
1186
1187
1188 chunk->contig_bits = region_bits;
1189 chunk->free_bytes = chunk->nr_pages * PAGE_SIZE;
1190
1191 return chunk;
1192
1193md_blocks_fail:
1194 pcpu_mem_free(chunk->bound_map);
1195bound_map_fail:
1196 pcpu_mem_free(chunk->alloc_map);
1197alloc_map_fail:
1198 pcpu_mem_free(chunk);
1199
1200 return NULL;
1201}
1202
1203static void pcpu_free_chunk(struct pcpu_chunk *chunk)
1204{
1205 if (!chunk)
1206 return;
1207 pcpu_mem_free(chunk->bound_map);
1208 pcpu_mem_free(chunk->alloc_map);
1209 pcpu_mem_free(chunk);
1210}
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
1227 int page_end, bool for_alloc)
1228{
1229 int nr = page_end - page_start;
1230
1231 lockdep_assert_held(&pcpu_lock);
1232
1233 bitmap_set(chunk->populated, page_start, nr);
1234 chunk->nr_populated += nr;
1235
1236 if (!for_alloc) {
1237 chunk->nr_empty_pop_pages += nr;
1238 pcpu_nr_empty_pop_pages += nr;
1239 }
1240}
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1253 int page_start, int page_end)
1254{
1255 int nr = page_end - page_start;
1256
1257 lockdep_assert_held(&pcpu_lock);
1258
1259 bitmap_clear(chunk->populated, page_start, nr);
1260 chunk->nr_populated -= nr;
1261 chunk->nr_empty_pop_pages -= nr;
1262 pcpu_nr_empty_pop_pages -= nr;
1263}
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
1281 int page_start, int page_end, gfp_t gfp);
1282static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
1283 int page_start, int page_end);
1284static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
1285static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
1286static struct page *pcpu_addr_to_page(void *addr);
1287static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
1288
1289#ifdef CONFIG_NEED_PER_CPU_KM
1290#include "percpu-km.c"
1291#else
1292#include "percpu-vm.c"
1293#endif
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
1306{
1307
1308 if (pcpu_addr_in_chunk(pcpu_first_chunk, addr))
1309 return pcpu_first_chunk;
1310
1311
1312 if (pcpu_addr_in_chunk(pcpu_reserved_chunk, addr))
1313 return pcpu_reserved_chunk;
1314
1315
1316
1317
1318
1319
1320
1321
1322 addr += pcpu_unit_offsets[raw_smp_processor_id()];
1323 return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
1324}
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1342 gfp_t gfp)
1343{
1344
1345 gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1346 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1347 bool do_warn = !(gfp & __GFP_NOWARN);
1348 static int warn_limit = 10;
1349 struct pcpu_chunk *chunk;
1350 const char *err;
1351 int slot, off, cpu, ret;
1352 unsigned long flags;
1353 void __percpu *ptr;
1354 size_t bits, bit_align;
1355
1356
1357
1358
1359
1360
1361
1362 if (unlikely(align < PCPU_MIN_ALLOC_SIZE))
1363 align = PCPU_MIN_ALLOC_SIZE;
1364
1365 size = ALIGN(size, PCPU_MIN_ALLOC_SIZE);
1366 bits = size >> PCPU_MIN_ALLOC_SHIFT;
1367 bit_align = align >> PCPU_MIN_ALLOC_SHIFT;
1368
1369 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
1370 !is_power_of_2(align))) {
1371 WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",
1372 size, align);
1373 return NULL;
1374 }
1375
1376 if (!is_atomic) {
1377
1378
1379
1380
1381
1382 if (gfp & __GFP_NOFAIL)
1383 mutex_lock(&pcpu_alloc_mutex);
1384 else if (mutex_lock_killable(&pcpu_alloc_mutex))
1385 return NULL;
1386 }
1387
1388 spin_lock_irqsave(&pcpu_lock, flags);
1389
1390
1391 if (reserved && pcpu_reserved_chunk) {
1392 chunk = pcpu_reserved_chunk;
1393
1394 off = pcpu_find_block_fit(chunk, bits, bit_align, is_atomic);
1395 if (off < 0) {
1396 err = "alloc from reserved chunk failed";
1397 goto fail_unlock;
1398 }
1399
1400 off = pcpu_alloc_area(chunk, bits, bit_align, off);
1401 if (off >= 0)
1402 goto area_found;
1403
1404 err = "alloc from reserved chunk failed";
1405 goto fail_unlock;
1406 }
1407
1408restart:
1409
1410 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
1411 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
1412 off = pcpu_find_block_fit(chunk, bits, bit_align,
1413 is_atomic);
1414 if (off < 0)
1415 continue;
1416
1417 off = pcpu_alloc_area(chunk, bits, bit_align, off);
1418 if (off >= 0)
1419 goto area_found;
1420
1421 }
1422 }
1423
1424 spin_unlock_irqrestore(&pcpu_lock, flags);
1425
1426
1427
1428
1429
1430
1431 if (is_atomic) {
1432 err = "atomic alloc failed, no space left";
1433 goto fail;
1434 }
1435
1436 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
1437 chunk = pcpu_create_chunk(pcpu_gfp);
1438 if (!chunk) {
1439 err = "failed to allocate new chunk";
1440 goto fail;
1441 }
1442
1443 spin_lock_irqsave(&pcpu_lock, flags);
1444 pcpu_chunk_relocate(chunk, -1);
1445 } else {
1446 spin_lock_irqsave(&pcpu_lock, flags);
1447 }
1448
1449 goto restart;
1450
1451area_found:
1452 pcpu_stats_area_alloc(chunk, size);
1453 spin_unlock_irqrestore(&pcpu_lock, flags);
1454
1455
1456 if (!is_atomic) {
1457 int page_start, page_end, rs, re;
1458
1459 page_start = PFN_DOWN(off);
1460 page_end = PFN_UP(off + size);
1461
1462 pcpu_for_each_unpop_region(chunk->populated, rs, re,
1463 page_start, page_end) {
1464 WARN_ON(chunk->immutable);
1465
1466 ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
1467
1468 spin_lock_irqsave(&pcpu_lock, flags);
1469 if (ret) {
1470 pcpu_free_area(chunk, off);
1471 err = "failed to populate";
1472 goto fail_unlock;
1473 }
1474 pcpu_chunk_populated(chunk, rs, re, true);
1475 spin_unlock_irqrestore(&pcpu_lock, flags);
1476 }
1477
1478 mutex_unlock(&pcpu_alloc_mutex);
1479 }
1480
1481 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
1482 pcpu_schedule_balance_work();
1483
1484
1485 for_each_possible_cpu(cpu)
1486 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
1487
1488 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
1489 kmemleak_alloc_percpu(ptr, size, gfp);
1490
1491 trace_percpu_alloc_percpu(reserved, is_atomic, size, align,
1492 chunk->base_addr, off, ptr);
1493
1494 return ptr;
1495
1496fail_unlock:
1497 spin_unlock_irqrestore(&pcpu_lock, flags);
1498fail:
1499 trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
1500
1501 if (!is_atomic && do_warn && warn_limit) {
1502 pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
1503 size, align, is_atomic, err);
1504 dump_stack();
1505 if (!--warn_limit)
1506 pr_info("limit reached, disable warning\n");
1507 }
1508 if (is_atomic) {
1509
1510 pcpu_atomic_alloc_failed = true;
1511 pcpu_schedule_balance_work();
1512 } else {
1513 mutex_unlock(&pcpu_alloc_mutex);
1514 }
1515 return NULL;
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
1534{
1535 return pcpu_alloc(size, align, false, gfp);
1536}
1537EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);
1538
1539
1540
1541
1542
1543
1544
1545
1546void __percpu *__alloc_percpu(size_t size, size_t align)
1547{
1548 return pcpu_alloc(size, align, false, GFP_KERNEL);
1549}
1550EXPORT_SYMBOL_GPL(__alloc_percpu);
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1569{
1570 return pcpu_alloc(size, align, true, GFP_KERNEL);
1571}
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584static void pcpu_balance_workfn(struct work_struct *work)
1585{
1586
1587 const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
1588 LIST_HEAD(to_free);
1589 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
1590 struct pcpu_chunk *chunk, *next;
1591 int slot, nr_to_pop, ret;
1592
1593
1594
1595
1596
1597 mutex_lock(&pcpu_alloc_mutex);
1598 spin_lock_irq(&pcpu_lock);
1599
1600 list_for_each_entry_safe(chunk, next, free_head, list) {
1601 WARN_ON(chunk->immutable);
1602
1603
1604 if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
1605 continue;
1606
1607 list_move(&chunk->list, &to_free);
1608 }
1609
1610 spin_unlock_irq(&pcpu_lock);
1611
1612 list_for_each_entry_safe(chunk, next, &to_free, list) {
1613 int rs, re;
1614
1615 pcpu_for_each_pop_region(chunk->populated, rs, re, 0,
1616 chunk->nr_pages) {
1617 pcpu_depopulate_chunk(chunk, rs, re);
1618 spin_lock_irq(&pcpu_lock);
1619 pcpu_chunk_depopulated(chunk, rs, re);
1620 spin_unlock_irq(&pcpu_lock);
1621 }
1622 pcpu_destroy_chunk(chunk);
1623 cond_resched();
1624 }
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636retry_pop:
1637 if (pcpu_atomic_alloc_failed) {
1638 nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
1639
1640 pcpu_atomic_alloc_failed = false;
1641 } else {
1642 nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
1643 pcpu_nr_empty_pop_pages,
1644 0, PCPU_EMPTY_POP_PAGES_HIGH);
1645 }
1646
1647 for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
1648 int nr_unpop = 0, rs, re;
1649
1650 if (!nr_to_pop)
1651 break;
1652
1653 spin_lock_irq(&pcpu_lock);
1654 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
1655 nr_unpop = chunk->nr_pages - chunk->nr_populated;
1656 if (nr_unpop)
1657 break;
1658 }
1659 spin_unlock_irq(&pcpu_lock);
1660
1661 if (!nr_unpop)
1662 continue;
1663
1664
1665 pcpu_for_each_unpop_region(chunk->populated, rs, re, 0,
1666 chunk->nr_pages) {
1667 int nr = min(re - rs, nr_to_pop);
1668
1669 ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
1670 if (!ret) {
1671 nr_to_pop -= nr;
1672 spin_lock_irq(&pcpu_lock);
1673 pcpu_chunk_populated(chunk, rs, rs + nr, false);
1674 spin_unlock_irq(&pcpu_lock);
1675 } else {
1676 nr_to_pop = 0;
1677 }
1678
1679 if (!nr_to_pop)
1680 break;
1681 }
1682 }
1683
1684 if (nr_to_pop) {
1685
1686 chunk = pcpu_create_chunk(gfp);
1687 if (chunk) {
1688 spin_lock_irq(&pcpu_lock);
1689 pcpu_chunk_relocate(chunk, -1);
1690 spin_unlock_irq(&pcpu_lock);
1691 goto retry_pop;
1692 }
1693 }
1694
1695 mutex_unlock(&pcpu_alloc_mutex);
1696}
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707void free_percpu(void __percpu *ptr)
1708{
1709 void *addr;
1710 struct pcpu_chunk *chunk;
1711 unsigned long flags;
1712 int off;
1713
1714 if (!ptr)
1715 return;
1716
1717 kmemleak_free_percpu(ptr);
1718
1719 addr = __pcpu_ptr_to_addr(ptr);
1720
1721 spin_lock_irqsave(&pcpu_lock, flags);
1722
1723 chunk = pcpu_chunk_addr_search(addr);
1724 off = addr - chunk->base_addr;
1725
1726 pcpu_free_area(chunk, off);
1727
1728
1729 if (chunk->free_bytes == pcpu_unit_size) {
1730 struct pcpu_chunk *pos;
1731
1732 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
1733 if (pos != chunk) {
1734 pcpu_schedule_balance_work();
1735 break;
1736 }
1737 }
1738
1739 trace_percpu_free_percpu(chunk->base_addr, off, ptr);
1740
1741 spin_unlock_irqrestore(&pcpu_lock, flags);
1742}
1743EXPORT_SYMBOL_GPL(free_percpu);
1744
1745bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
1746{
1747#ifdef CONFIG_SMP
1748 const size_t static_size = __per_cpu_end - __per_cpu_start;
1749 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1750 unsigned int cpu;
1751
1752 for_each_possible_cpu(cpu) {
1753 void *start = per_cpu_ptr(base, cpu);
1754 void *va = (void *)addr;
1755
1756 if (va >= start && va < start + static_size) {
1757 if (can_addr) {
1758 *can_addr = (unsigned long) (va - start);
1759 *can_addr += (unsigned long)
1760 per_cpu_ptr(base, get_boot_cpu_id());
1761 }
1762 return true;
1763 }
1764 }
1765#endif
1766
1767 return false;
1768}
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781bool is_kernel_percpu_address(unsigned long addr)
1782{
1783 return __is_kernel_percpu_address(addr, NULL);
1784}
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809phys_addr_t per_cpu_ptr_to_phys(void *addr)
1810{
1811 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1812 bool in_first_chunk = false;
1813 unsigned long first_low, first_high;
1814 unsigned int cpu;
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826 first_low = (unsigned long)pcpu_base_addr +
1827 pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
1828 first_high = (unsigned long)pcpu_base_addr +
1829 pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
1830 if ((unsigned long)addr >= first_low &&
1831 (unsigned long)addr < first_high) {
1832 for_each_possible_cpu(cpu) {
1833 void *start = per_cpu_ptr(base, cpu);
1834
1835 if (addr >= start && addr < start + pcpu_unit_size) {
1836 in_first_chunk = true;
1837 break;
1838 }
1839 }
1840 }
1841
1842 if (in_first_chunk) {
1843 if (!is_vmalloc_addr(addr))
1844 return __pa(addr);
1845 else
1846 return page_to_phys(vmalloc_to_page(addr)) +
1847 offset_in_page(addr);
1848 } else
1849 return page_to_phys(pcpu_addr_to_page(addr)) +
1850 offset_in_page(addr);
1851}
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
1869 int nr_units)
1870{
1871 struct pcpu_alloc_info *ai;
1872 size_t base_size, ai_size;
1873 void *ptr;
1874 int unit;
1875
1876 base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
1877 __alignof__(ai->groups[0].cpu_map[0]));
1878 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
1879
1880 ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
1881 if (!ptr)
1882 return NULL;
1883 ai = ptr;
1884 ptr += base_size;
1885
1886 ai->groups[0].cpu_map = ptr;
1887
1888 for (unit = 0; unit < nr_units; unit++)
1889 ai->groups[0].cpu_map[unit] = NR_CPUS;
1890
1891 ai->nr_groups = nr_groups;
1892 ai->__ai_size = PFN_ALIGN(ai_size);
1893
1894 return ai;
1895}
1896
1897
1898
1899
1900
1901
1902
1903void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1904{
1905 memblock_free_early(__pa(ai), ai->__ai_size);
1906}
1907
1908
1909
1910
1911
1912
1913
1914
1915static void pcpu_dump_alloc_info(const char *lvl,
1916 const struct pcpu_alloc_info *ai)
1917{
1918 int group_width = 1, cpu_width = 1, width;
1919 char empty_str[] = "--------";
1920 int alloc = 0, alloc_end = 0;
1921 int group, v;
1922 int upa, apl;
1923
1924 v = ai->nr_groups;
1925 while (v /= 10)
1926 group_width++;
1927
1928 v = num_possible_cpus();
1929 while (v /= 10)
1930 cpu_width++;
1931 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
1932
1933 upa = ai->alloc_size / ai->unit_size;
1934 width = upa * (cpu_width + 1) + group_width + 3;
1935 apl = rounddown_pow_of_two(max(60 / width, 1));
1936
1937 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
1938 lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
1939 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
1940
1941 for (group = 0; group < ai->nr_groups; group++) {
1942 const struct pcpu_group_info *gi = &ai->groups[group];
1943 int unit = 0, unit_end = 0;
1944
1945 BUG_ON(gi->nr_units % upa);
1946 for (alloc_end += gi->nr_units / upa;
1947 alloc < alloc_end; alloc++) {
1948 if (!(alloc % apl)) {
1949 pr_cont("\n");
1950 printk("%spcpu-alloc: ", lvl);
1951 }
1952 pr_cont("[%0*d] ", group_width, group);
1953
1954 for (unit_end += upa; unit < unit_end; unit++)
1955 if (gi->cpu_map[unit] != NR_CPUS)
1956 pr_cont("%0*d ",
1957 cpu_width, gi->cpu_map[unit]);
1958 else
1959 pr_cont("%s ", empty_str);
1960 }
1961 }
1962 pr_cont("\n");
1963}
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
2023 void *base_addr)
2024{
2025 size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
2026 size_t static_size, dyn_size;
2027 struct pcpu_chunk *chunk;
2028 unsigned long *group_offsets;
2029 size_t *group_sizes;
2030 unsigned long *unit_off;
2031 unsigned int cpu;
2032 int *unit_map;
2033 int group, unit, i;
2034 int map_size;
2035 unsigned long tmp_addr;
2036
2037#define PCPU_SETUP_BUG_ON(cond) do { \
2038 if (unlikely(cond)) { \
2039 pr_emerg("failed to initialize, %s\n", #cond); \
2040 pr_emerg("cpu_possible_mask=%*pb\n", \
2041 cpumask_pr_args(cpu_possible_mask)); \
2042 pcpu_dump_alloc_info(KERN_EMERG, ai); \
2043 BUG(); \
2044 } \
2045} while (0)
2046
2047
2048 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
2049#ifdef CONFIG_SMP
2050 PCPU_SETUP_BUG_ON(!ai->static_size);
2051 PCPU_SETUP_BUG_ON(offset_in_page(__per_cpu_start));
2052#endif
2053 PCPU_SETUP_BUG_ON(!base_addr);
2054 PCPU_SETUP_BUG_ON(offset_in_page(base_addr));
2055 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
2056 PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size));
2057 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
2058 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
2059 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
2060 PCPU_SETUP_BUG_ON(!ai->dyn_size);
2061 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
2062 PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
2063 IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
2064 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
2065
2066
2067 group_offsets = memblock_virt_alloc(ai->nr_groups *
2068 sizeof(group_offsets[0]), 0);
2069 group_sizes = memblock_virt_alloc(ai->nr_groups *
2070 sizeof(group_sizes[0]), 0);
2071 unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
2072 unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
2073
2074 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
2075 unit_map[cpu] = UINT_MAX;
2076
2077 pcpu_low_unit_cpu = NR_CPUS;
2078 pcpu_high_unit_cpu = NR_CPUS;
2079
2080 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
2081 const struct pcpu_group_info *gi = &ai->groups[group];
2082
2083 group_offsets[group] = gi->base_offset;
2084 group_sizes[group] = gi->nr_units * ai->unit_size;
2085
2086 for (i = 0; i < gi->nr_units; i++) {
2087 cpu = gi->cpu_map[i];
2088 if (cpu == NR_CPUS)
2089 continue;
2090
2091 PCPU_SETUP_BUG_ON(cpu >= nr_cpu_ids);
2092 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
2093 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
2094
2095 unit_map[cpu] = unit + i;
2096 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
2097
2098
2099 if (pcpu_low_unit_cpu == NR_CPUS ||
2100 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
2101 pcpu_low_unit_cpu = cpu;
2102 if (pcpu_high_unit_cpu == NR_CPUS ||
2103 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
2104 pcpu_high_unit_cpu = cpu;
2105 }
2106 }
2107 pcpu_nr_units = unit;
2108
2109 for_each_possible_cpu(cpu)
2110 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
2111
2112
2113#undef PCPU_SETUP_BUG_ON
2114 pcpu_dump_alloc_info(KERN_DEBUG, ai);
2115
2116 pcpu_nr_groups = ai->nr_groups;
2117 pcpu_group_offsets = group_offsets;
2118 pcpu_group_sizes = group_sizes;
2119 pcpu_unit_map = unit_map;
2120 pcpu_unit_offsets = unit_off;
2121
2122
2123 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
2124 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
2125 pcpu_atom_size = ai->atom_size;
2126 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
2127 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
2128
2129 pcpu_stats_save_ai(ai);
2130
2131
2132
2133
2134
2135 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
2136 pcpu_slot = memblock_virt_alloc(
2137 pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
2138 for (i = 0; i < pcpu_nr_slots; i++)
2139 INIT_LIST_HEAD(&pcpu_slot[i]);
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149 static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE);
2150 dyn_size = ai->dyn_size - (static_size - ai->static_size);
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160 tmp_addr = (unsigned long)base_addr + static_size;
2161 map_size = ai->reserved_size ?: dyn_size;
2162 chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
2163
2164
2165 if (ai->reserved_size) {
2166 pcpu_reserved_chunk = chunk;
2167
2168 tmp_addr = (unsigned long)base_addr + static_size +
2169 ai->reserved_size;
2170 map_size = dyn_size;
2171 chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
2172 }
2173
2174
2175 pcpu_first_chunk = chunk;
2176 pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
2177 pcpu_chunk_relocate(pcpu_first_chunk, -1);
2178
2179 pcpu_stats_chunk_alloc();
2180 trace_percpu_create_chunk(base_addr);
2181
2182
2183 pcpu_base_addr = base_addr;
2184 return 0;
2185}
2186
2187#ifdef CONFIG_SMP
2188
2189const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {
2190 [PCPU_FC_AUTO] = "auto",
2191 [PCPU_FC_EMBED] = "embed",
2192 [PCPU_FC_PAGE] = "page",
2193};
2194
2195enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
2196
2197static int __init percpu_alloc_setup(char *str)
2198{
2199 if (!str)
2200 return -EINVAL;
2201
2202 if (0)
2203 ;
2204#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
2205 else if (!strcmp(str, "embed"))
2206 pcpu_chosen_fc = PCPU_FC_EMBED;
2207#endif
2208#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
2209 else if (!strcmp(str, "page"))
2210 pcpu_chosen_fc = PCPU_FC_PAGE;
2211#endif
2212 else
2213 pr_warn("unknown allocator %s specified\n", str);
2214
2215 return 0;
2216}
2217early_param("percpu_alloc", percpu_alloc_setup);
2218
2219
2220
2221
2222
2223
2224#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
2225 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
2226#define BUILD_EMBED_FIRST_CHUNK
2227#endif
2228
2229
2230#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
2231#define BUILD_PAGE_FIRST_CHUNK
2232#endif
2233
2234
2235#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
2258 size_t reserved_size, size_t dyn_size,
2259 size_t atom_size,
2260 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
2261{
2262 static int group_map[NR_CPUS] __initdata;
2263 static int group_cnt[NR_CPUS] __initdata;
2264 const size_t static_size = __per_cpu_end - __per_cpu_start;
2265 int nr_groups = 1, nr_units = 0;
2266 size_t size_sum, min_unit_size, alloc_size;
2267 int upa, max_upa, uninitialized_var(best_upa);
2268 int last_allocs, group, unit;
2269 unsigned int cpu, tcpu;
2270 struct pcpu_alloc_info *ai;
2271 unsigned int *cpu_map;
2272
2273
2274 memset(group_map, 0, sizeof(group_map));
2275 memset(group_cnt, 0, sizeof(group_cnt));
2276
2277
2278 size_sum = PFN_ALIGN(static_size + reserved_size +
2279 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
2280 dyn_size = size_sum - static_size - reserved_size;
2281
2282
2283
2284
2285
2286
2287
2288 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
2289
2290
2291 alloc_size = roundup(min_unit_size, atom_size);
2292 upa = alloc_size / min_unit_size;
2293 while (alloc_size % upa || (offset_in_page(alloc_size / upa)))
2294 upa--;
2295 max_upa = upa;
2296
2297
2298 for_each_possible_cpu(cpu) {
2299 group = 0;
2300 next_group:
2301 for_each_possible_cpu(tcpu) {
2302 if (cpu == tcpu)
2303 break;
2304 if (group_map[tcpu] == group && cpu_distance_fn &&
2305 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
2306 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
2307 group++;
2308 nr_groups = max(nr_groups, group + 1);
2309 goto next_group;
2310 }
2311 }
2312 group_map[cpu] = group;
2313 group_cnt[group]++;
2314 }
2315
2316
2317
2318
2319
2320
2321 last_allocs = INT_MAX;
2322 for (upa = max_upa; upa; upa--) {
2323 int allocs = 0, wasted = 0;
2324
2325 if (alloc_size % upa || (offset_in_page(alloc_size / upa)))
2326 continue;
2327
2328 for (group = 0; group < nr_groups; group++) {
2329 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
2330 allocs += this_allocs;
2331 wasted += this_allocs * upa - group_cnt[group];
2332 }
2333
2334
2335
2336
2337
2338
2339 if (wasted > num_possible_cpus() / 3)
2340 continue;
2341
2342
2343 if (allocs > last_allocs)
2344 break;
2345 last_allocs = allocs;
2346 best_upa = upa;
2347 }
2348 upa = best_upa;
2349
2350
2351 for (group = 0; group < nr_groups; group++)
2352 nr_units += roundup(group_cnt[group], upa);
2353
2354 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
2355 if (!ai)
2356 return ERR_PTR(-ENOMEM);
2357 cpu_map = ai->groups[0].cpu_map;
2358
2359 for (group = 0; group < nr_groups; group++) {
2360 ai->groups[group].cpu_map = cpu_map;
2361 cpu_map += roundup(group_cnt[group], upa);
2362 }
2363
2364 ai->static_size = static_size;
2365 ai->reserved_size = reserved_size;
2366 ai->dyn_size = dyn_size;
2367 ai->unit_size = alloc_size / upa;
2368 ai->atom_size = atom_size;
2369 ai->alloc_size = alloc_size;
2370
2371 for (group = 0, unit = 0; group_cnt[group]; group++) {
2372 struct pcpu_group_info *gi = &ai->groups[group];
2373
2374
2375
2376
2377
2378
2379 gi->base_offset = unit * ai->unit_size;
2380
2381 for_each_possible_cpu(cpu)
2382 if (group_map[cpu] == group)
2383 gi->cpu_map[gi->nr_units++] = cpu;
2384 gi->nr_units = roundup(gi->nr_units, upa);
2385 unit += gi->nr_units;
2386 }
2387 BUG_ON(unit != nr_units);
2388
2389 return ai;
2390}
2391#endif
2392
2393#if defined(BUILD_EMBED_FIRST_CHUNK)
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
2427 size_t atom_size,
2428 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
2429 pcpu_fc_alloc_fn_t alloc_fn,
2430 pcpu_fc_free_fn_t free_fn)
2431{
2432 void *base = (void *)ULONG_MAX;
2433 void **areas = NULL;
2434 struct pcpu_alloc_info *ai;
2435 size_t size_sum, areas_size;
2436 unsigned long max_distance;
2437 int group, i, highest_group, rc;
2438
2439 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
2440 cpu_distance_fn);
2441 if (IS_ERR(ai))
2442 return PTR_ERR(ai);
2443
2444 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
2445 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
2446
2447 areas = memblock_virt_alloc_nopanic(areas_size, 0);
2448 if (!areas) {
2449 rc = -ENOMEM;
2450 goto out_free;
2451 }
2452
2453
2454 highest_group = 0;
2455 for (group = 0; group < ai->nr_groups; group++) {
2456 struct pcpu_group_info *gi = &ai->groups[group];
2457 unsigned int cpu = NR_CPUS;
2458 void *ptr;
2459
2460 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
2461 cpu = gi->cpu_map[i];
2462 BUG_ON(cpu == NR_CPUS);
2463
2464
2465 ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
2466 if (!ptr) {
2467 rc = -ENOMEM;
2468 goto out_free_areas;
2469 }
2470
2471 kmemleak_free(ptr);
2472 areas[group] = ptr;
2473
2474 base = min(ptr, base);
2475 if (ptr > areas[highest_group])
2476 highest_group = group;
2477 }
2478 max_distance = areas[highest_group] - base;
2479 max_distance += ai->unit_size * ai->groups[highest_group].nr_units;
2480
2481
2482 if (max_distance > VMALLOC_TOTAL * 3 / 4) {
2483 pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n",
2484 max_distance, VMALLOC_TOTAL);
2485#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
2486
2487 rc = -EINVAL;
2488 goto out_free_areas;
2489#endif
2490 }
2491
2492
2493
2494
2495
2496
2497 for (group = 0; group < ai->nr_groups; group++) {
2498 struct pcpu_group_info *gi = &ai->groups[group];
2499 void *ptr = areas[group];
2500
2501 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
2502 if (gi->cpu_map[i] == NR_CPUS) {
2503
2504 free_fn(ptr, ai->unit_size);
2505 continue;
2506 }
2507
2508 memcpy(ptr, __per_cpu_load, ai->static_size);
2509 free_fn(ptr + size_sum, ai->unit_size - size_sum);
2510 }
2511 }
2512
2513
2514 for (group = 0; group < ai->nr_groups; group++) {
2515 ai->groups[group].base_offset = areas[group] - base;
2516 }
2517
2518 pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
2519 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
2520 ai->dyn_size, ai->unit_size);
2521
2522 rc = pcpu_setup_first_chunk(ai, base);
2523 goto out_free;
2524
2525out_free_areas:
2526 for (group = 0; group < ai->nr_groups; group++)
2527 if (areas[group])
2528 free_fn(areas[group],
2529 ai->groups[group].nr_units * ai->unit_size);
2530out_free:
2531 pcpu_free_alloc_info(ai);
2532 if (areas)
2533 memblock_free_early(__pa(areas), areas_size);
2534 return rc;
2535}
2536#endif
2537
2538#ifdef BUILD_PAGE_FIRST_CHUNK
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555int __init pcpu_page_first_chunk(size_t reserved_size,
2556 pcpu_fc_alloc_fn_t alloc_fn,
2557 pcpu_fc_free_fn_t free_fn,
2558 pcpu_fc_populate_pte_fn_t populate_pte_fn)
2559{
2560 static struct vm_struct vm;
2561 struct pcpu_alloc_info *ai;
2562 char psize_str[16];
2563 int unit_pages;
2564 size_t pages_size;
2565 struct page **pages;
2566 int unit, i, j, rc;
2567 int upa;
2568 int nr_g0_units;
2569
2570 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
2571
2572 ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
2573 if (IS_ERR(ai))
2574 return PTR_ERR(ai);
2575 BUG_ON(ai->nr_groups != 1);
2576 upa = ai->alloc_size/ai->unit_size;
2577 nr_g0_units = roundup(num_possible_cpus(), upa);
2578 if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) {
2579 pcpu_free_alloc_info(ai);
2580 return -EINVAL;
2581 }
2582
2583 unit_pages = ai->unit_size >> PAGE_SHIFT;
2584
2585
2586 pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
2587 sizeof(pages[0]));
2588 pages = memblock_virt_alloc(pages_size, 0);
2589
2590
2591 j = 0;
2592 for (unit = 0; unit < num_possible_cpus(); unit++) {
2593 unsigned int cpu = ai->groups[0].cpu_map[unit];
2594 for (i = 0; i < unit_pages; i++) {
2595 void *ptr;
2596
2597 ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
2598 if (!ptr) {
2599 pr_warn("failed to allocate %s page for cpu%u\n",
2600 psize_str, cpu);
2601 goto enomem;
2602 }
2603
2604 kmemleak_free(ptr);
2605 pages[j++] = virt_to_page(ptr);
2606 }
2607 }
2608
2609
2610 vm.flags = VM_ALLOC;
2611 vm.size = num_possible_cpus() * ai->unit_size;
2612 vm_area_register_early(&vm, PAGE_SIZE);
2613
2614 for (unit = 0; unit < num_possible_cpus(); unit++) {
2615 unsigned long unit_addr =
2616 (unsigned long)vm.addr + unit * ai->unit_size;
2617
2618 for (i = 0; i < unit_pages; i++)
2619 populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
2620
2621
2622 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
2623 unit_pages);
2624 if (rc < 0)
2625 panic("failed to map percpu area, err=%d\n", rc);
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
2637 }
2638
2639
2640 pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n",
2641 unit_pages, psize_str, vm.addr, ai->static_size,
2642 ai->reserved_size, ai->dyn_size);
2643
2644 rc = pcpu_setup_first_chunk(ai, vm.addr);
2645 goto out_free_ar;
2646
2647enomem:
2648 while (--j >= 0)
2649 free_fn(page_address(pages[j]), PAGE_SIZE);
2650 rc = -ENOMEM;
2651out_free_ar:
2652 memblock_free_early(__pa(pages), pages_size);
2653 pcpu_free_alloc_info(ai);
2654 return rc;
2655}
2656#endif
2657
2658#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
2672EXPORT_SYMBOL(__per_cpu_offset);
2673
2674static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
2675 size_t align)
2676{
2677 return memblock_virt_alloc_from_nopanic(
2678 size, align, __pa(MAX_DMA_ADDRESS));
2679}
2680
2681static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
2682{
2683 memblock_free_early(__pa(ptr), size);
2684}
2685
2686void __init setup_per_cpu_areas(void)
2687{
2688 unsigned long delta;
2689 unsigned int cpu;
2690 int rc;
2691
2692
2693
2694
2695
2696 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
2697 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
2698 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
2699 if (rc < 0)
2700 panic("Failed to initialize percpu areas.");
2701
2702 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
2703 for_each_possible_cpu(cpu)
2704 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
2705}
2706#endif
2707
2708#else
2709
2710
2711
2712
2713
2714
2715
2716
2717void __init setup_per_cpu_areas(void)
2718{
2719 const size_t unit_size =
2720 roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
2721 PERCPU_DYNAMIC_RESERVE));
2722 struct pcpu_alloc_info *ai;
2723 void *fc;
2724
2725 ai = pcpu_alloc_alloc_info(1, 1);
2726 fc = memblock_virt_alloc_from_nopanic(unit_size,
2727 PAGE_SIZE,
2728 __pa(MAX_DMA_ADDRESS));
2729 if (!ai || !fc)
2730 panic("Failed to allocate memory for percpu areas.");
2731
2732 kmemleak_free(fc);
2733
2734 ai->dyn_size = unit_size;
2735 ai->unit_size = unit_size;
2736 ai->atom_size = unit_size;
2737 ai->alloc_size = unit_size;
2738 ai->groups[0].nr_units = 1;
2739 ai->groups[0].cpu_map[0] = 0;
2740
2741 if (pcpu_setup_first_chunk(ai, fc) < 0)
2742 panic("Failed to initialize percpu areas.");
2743 pcpu_free_alloc_info(ai);
2744}
2745
2746#endif
2747
2748
2749
2750
2751
2752
2753static int __init percpu_enable_async(void)
2754{
2755 pcpu_async_enabled = true;
2756 return 0;
2757}
2758subsys_initcall(percpu_enable_async);
2759