1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
66
67#include <linux/bitmap.h>
68#include <linux/bootmem.h>
69#include <linux/err.h>
70#include <linux/lcm.h>
71#include <linux/list.h>
72#include <linux/log2.h>
73#include <linux/mm.h>
74#include <linux/module.h>
75#include <linux/mutex.h>
76#include <linux/percpu.h>
77#include <linux/pfn.h>
78#include <linux/slab.h>
79#include <linux/spinlock.h>
80#include <linux/vmalloc.h>
81#include <linux/workqueue.h>
82#include <linux/kmemleak.h>
83#include <linux/sched.h>
84
85#include <asm/cacheflush.h>
86#include <asm/sections.h>
87#include <asm/tlbflush.h>
88#include <asm/io.h>
89
90#define CREATE_TRACE_POINTS
91#include <trace/events/percpu.h>
92
93#include "percpu-internal.h"
94
95
96#define PCPU_SLOT_BASE_SHIFT 5
97
98#define PCPU_EMPTY_POP_PAGES_LOW 2
99#define PCPU_EMPTY_POP_PAGES_HIGH 4
100
101#ifdef CONFIG_SMP
102
103#ifndef __addr_to_pcpu_ptr
104#define __addr_to_pcpu_ptr(addr) \
105 (void __percpu *)((unsigned long)(addr) - \
106 (unsigned long)pcpu_base_addr + \
107 (unsigned long)__per_cpu_start)
108#endif
109#ifndef __pcpu_ptr_to_addr
110#define __pcpu_ptr_to_addr(ptr) \
111 (void __force *)((unsigned long)(ptr) + \
112 (unsigned long)pcpu_base_addr - \
113 (unsigned long)__per_cpu_start)
114#endif
115#else
116
117#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
118#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
119#endif
120
121static int pcpu_unit_pages __ro_after_init;
122static int pcpu_unit_size __ro_after_init;
123static int pcpu_nr_units __ro_after_init;
124static int pcpu_atom_size __ro_after_init;
125int pcpu_nr_slots __ro_after_init;
126static size_t pcpu_chunk_struct_size __ro_after_init;
127
128
129static unsigned int pcpu_low_unit_cpu __ro_after_init;
130static unsigned int pcpu_high_unit_cpu __ro_after_init;
131
132
133void *pcpu_base_addr __ro_after_init;
134EXPORT_SYMBOL_GPL(pcpu_base_addr);
135
136static const int *pcpu_unit_map __ro_after_init;
137const unsigned long *pcpu_unit_offsets __ro_after_init;
138
139
140static int pcpu_nr_groups __ro_after_init;
141static const unsigned long *pcpu_group_offsets __ro_after_init;
142static const size_t *pcpu_group_sizes __ro_after_init;
143
144
145
146
147
148
149struct pcpu_chunk *pcpu_first_chunk __ro_after_init;
150
151
152
153
154
155
156struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;
157
158DEFINE_SPINLOCK(pcpu_lock);
159static DEFINE_MUTEX(pcpu_alloc_mutex);
160
161struct list_head *pcpu_slot __ro_after_init;
162
163
164static LIST_HEAD(pcpu_map_extend_chunks);
165
166
167
168
169
170int pcpu_nr_empty_pop_pages;
171
172
173
174
175
176
177
178static unsigned long pcpu_nr_populated;
179
180
181
182
183
184
185
186static void pcpu_balance_workfn(struct work_struct *work);
187static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
188static bool pcpu_async_enabled __read_mostly;
189static bool pcpu_atomic_alloc_failed;
190
191static void pcpu_schedule_balance_work(void)
192{
193 if (pcpu_async_enabled)
194 schedule_work(&pcpu_balance_work);
195}
196
197
198
199
200
201
202
203
204
205static bool pcpu_addr_in_chunk(struct pcpu_chunk *chunk, void *addr)
206{
207 void *start_addr, *end_addr;
208
209 if (!chunk)
210 return false;
211
212 start_addr = chunk->base_addr + chunk->start_offset;
213 end_addr = chunk->base_addr + chunk->nr_pages * PAGE_SIZE -
214 chunk->end_offset;
215
216 return addr >= start_addr && addr < end_addr;
217}
218
219static int __pcpu_size_to_slot(int size)
220{
221 int highbit = fls(size);
222 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
223}
224
225static int pcpu_size_to_slot(int size)
226{
227 if (size == pcpu_unit_size)
228 return pcpu_nr_slots - 1;
229 return __pcpu_size_to_slot(size);
230}
231
232static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
233{
234 if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE || chunk->contig_bits == 0)
235 return 0;
236
237 return pcpu_size_to_slot(chunk->free_bytes);
238}
239
240
241static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
242{
243 page->index = (unsigned long)pcpu;
244}
245
246
247static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
248{
249 return (struct pcpu_chunk *)page->index;
250}
251
252static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
253{
254 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
255}
256
257static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
258{
259 return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
260}
261
262static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
263 unsigned int cpu, int page_idx)
264{
265 return (unsigned long)chunk->base_addr +
266 pcpu_unit_page_offset(cpu, page_idx);
267}
268
269static void pcpu_next_unpop(unsigned long *bitmap, int *rs, int *re, int end)
270{
271 *rs = find_next_zero_bit(bitmap, end, *rs);
272 *re = find_next_bit(bitmap, end, *rs + 1);
273}
274
275static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end)
276{
277 *rs = find_next_bit(bitmap, end, *rs);
278 *re = find_next_zero_bit(bitmap, end, *rs + 1);
279}
280
281
282
283
284
285
286#define pcpu_for_each_unpop_region(bitmap, rs, re, start, end) \
287 for ((rs) = (start), pcpu_next_unpop((bitmap), &(rs), &(re), (end)); \
288 (rs) < (re); \
289 (rs) = (re) + 1, pcpu_next_unpop((bitmap), &(rs), &(re), (end)))
290
291#define pcpu_for_each_pop_region(bitmap, rs, re, start, end) \
292 for ((rs) = (start), pcpu_next_pop((bitmap), &(rs), &(re), (end)); \
293 (rs) < (re); \
294 (rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end)))
295
296
297
298
299
300static unsigned long *pcpu_index_alloc_map(struct pcpu_chunk *chunk, int index)
301{
302 return chunk->alloc_map +
303 (index * PCPU_BITMAP_BLOCK_BITS / BITS_PER_LONG);
304}
305
306static unsigned long pcpu_off_to_block_index(int off)
307{
308 return off / PCPU_BITMAP_BLOCK_BITS;
309}
310
311static unsigned long pcpu_off_to_block_off(int off)
312{
313 return off & (PCPU_BITMAP_BLOCK_BITS - 1);
314}
315
316static unsigned long pcpu_block_off_to_off(int index, int off)
317{
318 return index * PCPU_BITMAP_BLOCK_BITS + off;
319}
320
321
322
323
324
325
326
327
328
329
330
331
332static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
333 int *bits)
334{
335 int i = pcpu_off_to_block_index(*bit_off);
336 int block_off = pcpu_off_to_block_off(*bit_off);
337 struct pcpu_block_md *block;
338
339 *bits = 0;
340 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
341 block++, i++) {
342
343 if (*bits) {
344 *bits += block->left_free;
345 if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
346 continue;
347 return;
348 }
349
350
351
352
353
354
355
356
357
358 *bits = block->contig_hint;
359 if (*bits && block->contig_hint_start >= block_off &&
360 *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) {
361 *bit_off = pcpu_block_off_to_off(i,
362 block->contig_hint_start);
363 return;
364 }
365
366 block_off = 0;
367
368 *bits = block->right_free;
369 *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
370 }
371}
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
388 int align, int *bit_off, int *bits)
389{
390 int i = pcpu_off_to_block_index(*bit_off);
391 int block_off = pcpu_off_to_block_off(*bit_off);
392 struct pcpu_block_md *block;
393
394 *bits = 0;
395 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
396 block++, i++) {
397
398 if (*bits) {
399 *bits += block->left_free;
400 if (*bits >= alloc_bits)
401 return;
402 if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
403 continue;
404 }
405
406
407 *bits = ALIGN(block->contig_hint_start, align) -
408 block->contig_hint_start;
409
410
411
412
413 if (block->contig_hint &&
414 block->contig_hint_start >= block_off &&
415 block->contig_hint >= *bits + alloc_bits) {
416 *bits += alloc_bits + block->contig_hint_start -
417 block->first_free;
418 *bit_off = pcpu_block_off_to_off(i, block->first_free);
419 return;
420 }
421
422 block_off = 0;
423
424 *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,
425 align);
426 *bits = PCPU_BITMAP_BLOCK_BITS - *bit_off;
427 *bit_off = pcpu_block_off_to_off(i, *bit_off);
428 if (*bits >= alloc_bits)
429 return;
430 }
431
432
433 *bit_off = pcpu_chunk_map_bits(chunk);
434}
435
436
437
438
439
440
441
442#define pcpu_for_each_md_free_region(chunk, bit_off, bits) \
443 for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits)); \
444 (bit_off) < pcpu_chunk_map_bits((chunk)); \
445 (bit_off) += (bits) + 1, \
446 pcpu_next_md_free_region((chunk), &(bit_off), &(bits)))
447
448#define pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) \
449 for (pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
450 &(bits)); \
451 (bit_off) < pcpu_chunk_map_bits((chunk)); \
452 (bit_off) += (bits), \
453 pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
454 &(bits)))
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
470{
471 if (WARN_ON_ONCE(!slab_is_available()))
472 return NULL;
473
474 if (size <= PAGE_SIZE)
475 return kzalloc(size, gfp);
476 else
477 return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
478}
479
480
481
482
483
484
485
486static void pcpu_mem_free(void *ptr)
487{
488 kvfree(ptr);
489}
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
505{
506 int nslot = pcpu_chunk_slot(chunk);
507
508 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
509 if (oslot < nslot)
510 list_move(&chunk->list, &pcpu_slot[nslot]);
511 else
512 list_move_tail(&chunk->list, &pcpu_slot[nslot]);
513 }
514}
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529static inline int pcpu_cnt_pop_pages(struct pcpu_chunk *chunk, int bit_off,
530 int bits)
531{
532 int page_start = PFN_UP(bit_off * PCPU_MIN_ALLOC_SIZE);
533 int page_end = PFN_DOWN((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
534
535 if (page_start >= page_end)
536 return 0;
537
538
539
540
541
542
543
544
545 return bitmap_weight(chunk->populated, page_end) -
546 bitmap_weight(chunk->populated, page_start);
547}
548
549
550
551
552
553
554
555
556
557
558static void pcpu_chunk_update(struct pcpu_chunk *chunk, int bit_off, int bits)
559{
560 if (bits > chunk->contig_bits) {
561 chunk->contig_bits_start = bit_off;
562 chunk->contig_bits = bits;
563 } else if (bits == chunk->contig_bits && chunk->contig_bits_start &&
564 (!bit_off ||
565 __ffs(bit_off) > __ffs(chunk->contig_bits_start))) {
566
567 chunk->contig_bits_start = bit_off;
568 }
569}
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk)
585{
586 int bit_off, bits, nr_empty_pop_pages;
587
588
589 chunk->contig_bits = 0;
590
591 bit_off = chunk->first_bit;
592 bits = nr_empty_pop_pages = 0;
593 pcpu_for_each_md_free_region(chunk, bit_off, bits) {
594 pcpu_chunk_update(chunk, bit_off, bits);
595
596 nr_empty_pop_pages += pcpu_cnt_pop_pages(chunk, bit_off, bits);
597 }
598
599
600
601
602
603
604
605
606
607 if (chunk != pcpu_reserved_chunk)
608 pcpu_nr_empty_pop_pages +=
609 (nr_empty_pop_pages - chunk->nr_empty_pop_pages);
610
611 chunk->nr_empty_pop_pages = nr_empty_pop_pages;
612}
613
614
615
616
617
618
619
620
621
622
623
624static void pcpu_block_update(struct pcpu_block_md *block, int start, int end)
625{
626 int contig = end - start;
627
628 block->first_free = min(block->first_free, start);
629 if (start == 0)
630 block->left_free = contig;
631
632 if (end == PCPU_BITMAP_BLOCK_BITS)
633 block->right_free = contig;
634
635 if (contig > block->contig_hint) {
636 block->contig_hint_start = start;
637 block->contig_hint = contig;
638 } else if (block->contig_hint_start && contig == block->contig_hint &&
639 (!start || __ffs(start) > __ffs(block->contig_hint_start))) {
640
641 block->contig_hint_start = start;
642 }
643}
644
645
646
647
648
649
650
651
652
653static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
654{
655 struct pcpu_block_md *block = chunk->md_blocks + index;
656 unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
657 int rs, re;
658
659
660 block->contig_hint = 0;
661 block->left_free = block->right_free = 0;
662
663
664 pcpu_for_each_unpop_region(alloc_map, rs, re, block->first_free,
665 PCPU_BITMAP_BLOCK_BITS) {
666 pcpu_block_update(block, rs, re);
667 }
668}
669
670
671
672
673
674
675
676
677
678
679
680static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off,
681 int bits)
682{
683 struct pcpu_block_md *s_block, *e_block, *block;
684 int s_index, e_index;
685 int s_off, e_off;
686
687
688
689
690
691
692
693 s_index = pcpu_off_to_block_index(bit_off);
694 e_index = pcpu_off_to_block_index(bit_off + bits - 1);
695 s_off = pcpu_off_to_block_off(bit_off);
696 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
697
698 s_block = chunk->md_blocks + s_index;
699 e_block = chunk->md_blocks + e_index;
700
701
702
703
704
705
706
707 if (s_off == s_block->first_free)
708 s_block->first_free = find_next_zero_bit(
709 pcpu_index_alloc_map(chunk, s_index),
710 PCPU_BITMAP_BLOCK_BITS,
711 s_off + bits);
712
713 if (s_off >= s_block->contig_hint_start &&
714 s_off < s_block->contig_hint_start + s_block->contig_hint) {
715
716 pcpu_block_refresh_hint(chunk, s_index);
717 } else {
718
719 s_block->left_free = min(s_block->left_free, s_off);
720 if (s_index == e_index)
721 s_block->right_free = min_t(int, s_block->right_free,
722 PCPU_BITMAP_BLOCK_BITS - e_off);
723 else
724 s_block->right_free = 0;
725 }
726
727
728
729
730 if (s_index != e_index) {
731
732
733
734
735 e_block->first_free = find_next_zero_bit(
736 pcpu_index_alloc_map(chunk, e_index),
737 PCPU_BITMAP_BLOCK_BITS, e_off);
738
739 if (e_off == PCPU_BITMAP_BLOCK_BITS) {
740
741 e_block++;
742 } else {
743 if (e_off > e_block->contig_hint_start) {
744
745 pcpu_block_refresh_hint(chunk, e_index);
746 } else {
747 e_block->left_free = 0;
748 e_block->right_free =
749 min_t(int, e_block->right_free,
750 PCPU_BITMAP_BLOCK_BITS - e_off);
751 }
752 }
753
754
755 for (block = s_block + 1; block < e_block; block++) {
756 block->contig_hint = 0;
757 block->left_free = 0;
758 block->right_free = 0;
759 }
760 }
761
762
763
764
765
766
767 if (bit_off >= chunk->contig_bits_start &&
768 bit_off < chunk->contig_bits_start + chunk->contig_bits)
769 pcpu_chunk_refresh_hint(chunk);
770}
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
790 int bits)
791{
792 struct pcpu_block_md *s_block, *e_block, *block;
793 int s_index, e_index;
794 int s_off, e_off;
795 int start, end;
796
797
798
799
800
801
802
803 s_index = pcpu_off_to_block_index(bit_off);
804 e_index = pcpu_off_to_block_index(bit_off + bits - 1);
805 s_off = pcpu_off_to_block_off(bit_off);
806 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
807
808 s_block = chunk->md_blocks + s_index;
809 e_block = chunk->md_blocks + e_index;
810
811
812
813
814
815
816
817
818
819
820
821 start = s_off;
822 if (s_off == s_block->contig_hint + s_block->contig_hint_start) {
823 start = s_block->contig_hint_start;
824 } else {
825
826
827
828
829
830
831 int l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index),
832 start);
833 start = (start == l_bit) ? 0 : l_bit + 1;
834 }
835
836 end = e_off;
837 if (e_off == e_block->contig_hint_start)
838 end = e_block->contig_hint_start + e_block->contig_hint;
839 else
840 end = find_next_bit(pcpu_index_alloc_map(chunk, e_index),
841 PCPU_BITMAP_BLOCK_BITS, end);
842
843
844 e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS;
845 pcpu_block_update(s_block, start, e_off);
846
847
848 if (s_index != e_index) {
849
850 pcpu_block_update(e_block, 0, end);
851
852
853 for (block = s_block + 1; block < e_block; block++) {
854 block->first_free = 0;
855 block->contig_hint_start = 0;
856 block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
857 block->left_free = PCPU_BITMAP_BLOCK_BITS;
858 block->right_free = PCPU_BITMAP_BLOCK_BITS;
859 }
860 }
861
862
863
864
865
866
867
868 if ((ALIGN_DOWN(end, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS)) >
869 ALIGN(start, min(PCPU_BITS_PER_PAGE, PCPU_BITMAP_BLOCK_BITS))) ||
870 s_index != e_index)
871 pcpu_chunk_refresh_hint(chunk);
872 else
873 pcpu_chunk_update(chunk, pcpu_block_off_to_off(s_index, start),
874 s_block->contig_hint);
875}
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
891 int *next_off)
892{
893 int page_start, page_end, rs, re;
894
895 page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
896 page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
897
898 rs = page_start;
899 pcpu_next_unpop(chunk->populated, &rs, &re, page_end);
900 if (rs >= page_end)
901 return true;
902
903 *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
904 return false;
905}
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits,
927 size_t align, bool pop_only)
928{
929 int bit_off, bits, next_off;
930
931
932
933
934
935
936
937 bit_off = ALIGN(chunk->contig_bits_start, align) -
938 chunk->contig_bits_start;
939 if (bit_off + alloc_bits > chunk->contig_bits)
940 return -1;
941
942 bit_off = chunk->first_bit;
943 bits = 0;
944 pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) {
945 if (!pop_only || pcpu_is_populated(chunk, bit_off, bits,
946 &next_off))
947 break;
948
949 bit_off = next_off;
950 bits = 0;
951 }
952
953 if (bit_off == pcpu_chunk_map_bits(chunk))
954 return -1;
955
956 return bit_off;
957}
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
979 size_t align, int start)
980{
981 size_t align_mask = (align) ? (align - 1) : 0;
982 int bit_off, end, oslot;
983
984 lockdep_assert_held(&pcpu_lock);
985
986 oslot = pcpu_chunk_slot(chunk);
987
988
989
990
991 end = start + alloc_bits + PCPU_BITMAP_BLOCK_BITS;
992 bit_off = bitmap_find_next_zero_area(chunk->alloc_map, end, start,
993 alloc_bits, align_mask);
994 if (bit_off >= end)
995 return -1;
996
997
998 bitmap_set(chunk->alloc_map, bit_off, alloc_bits);
999
1000
1001 set_bit(bit_off, chunk->bound_map);
1002 bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
1003 set_bit(bit_off + alloc_bits, chunk->bound_map);
1004
1005 chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
1006
1007
1008 if (bit_off == chunk->first_bit)
1009 chunk->first_bit = find_next_zero_bit(
1010 chunk->alloc_map,
1011 pcpu_chunk_map_bits(chunk),
1012 bit_off + alloc_bits);
1013
1014 pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits);
1015
1016 pcpu_chunk_relocate(chunk, oslot);
1017
1018 return bit_off * PCPU_MIN_ALLOC_SIZE;
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029static void pcpu_free_area(struct pcpu_chunk *chunk, int off)
1030{
1031 int bit_off, bits, end, oslot;
1032
1033 lockdep_assert_held(&pcpu_lock);
1034 pcpu_stats_area_dealloc(chunk);
1035
1036 oslot = pcpu_chunk_slot(chunk);
1037
1038 bit_off = off / PCPU_MIN_ALLOC_SIZE;
1039
1040
1041 end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
1042 bit_off + 1);
1043 bits = end - bit_off;
1044 bitmap_clear(chunk->alloc_map, bit_off, bits);
1045
1046
1047 chunk->free_bytes += bits * PCPU_MIN_ALLOC_SIZE;
1048
1049
1050 chunk->first_bit = min(chunk->first_bit, bit_off);
1051
1052 pcpu_block_update_hint_free(chunk, bit_off, bits);
1053
1054 pcpu_chunk_relocate(chunk, oslot);
1055}
1056
1057static void pcpu_init_md_blocks(struct pcpu_chunk *chunk)
1058{
1059 struct pcpu_block_md *md_block;
1060
1061 for (md_block = chunk->md_blocks;
1062 md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk);
1063 md_block++) {
1064 md_block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
1065 md_block->left_free = PCPU_BITMAP_BLOCK_BITS;
1066 md_block->right_free = PCPU_BITMAP_BLOCK_BITS;
1067 }
1068}
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
1084 int map_size)
1085{
1086 struct pcpu_chunk *chunk;
1087 unsigned long aligned_addr, lcm_align;
1088 int start_offset, offset_bits, region_size, region_bits;
1089
1090
1091 aligned_addr = tmp_addr & PAGE_MASK;
1092
1093 start_offset = tmp_addr - aligned_addr;
1094
1095
1096
1097
1098
1099
1100 lcm_align = lcm(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE);
1101 region_size = ALIGN(start_offset + map_size, lcm_align);
1102
1103
1104 chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) +
1105 BITS_TO_LONGS(region_size >> PAGE_SHIFT),
1106 0);
1107
1108 INIT_LIST_HEAD(&chunk->list);
1109
1110 chunk->base_addr = (void *)aligned_addr;
1111 chunk->start_offset = start_offset;
1112 chunk->end_offset = region_size - chunk->start_offset - map_size;
1113
1114 chunk->nr_pages = region_size >> PAGE_SHIFT;
1115 region_bits = pcpu_chunk_map_bits(chunk);
1116
1117 chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) *
1118 sizeof(chunk->alloc_map[0]), 0);
1119 chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) *
1120 sizeof(chunk->bound_map[0]), 0);
1121 chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) *
1122 sizeof(chunk->md_blocks[0]), 0);
1123 pcpu_init_md_blocks(chunk);
1124
1125
1126 chunk->immutable = true;
1127 bitmap_fill(chunk->populated, chunk->nr_pages);
1128 chunk->nr_populated = chunk->nr_pages;
1129 chunk->nr_empty_pop_pages =
1130 pcpu_cnt_pop_pages(chunk, start_offset / PCPU_MIN_ALLOC_SIZE,
1131 map_size / PCPU_MIN_ALLOC_SIZE);
1132
1133 chunk->contig_bits = map_size / PCPU_MIN_ALLOC_SIZE;
1134 chunk->free_bytes = map_size;
1135
1136 if (chunk->start_offset) {
1137
1138 offset_bits = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;
1139 bitmap_set(chunk->alloc_map, 0, offset_bits);
1140 set_bit(0, chunk->bound_map);
1141 set_bit(offset_bits, chunk->bound_map);
1142
1143 chunk->first_bit = offset_bits;
1144
1145 pcpu_block_update_hint_alloc(chunk, 0, offset_bits);
1146 }
1147
1148 if (chunk->end_offset) {
1149
1150 offset_bits = chunk->end_offset / PCPU_MIN_ALLOC_SIZE;
1151 bitmap_set(chunk->alloc_map,
1152 pcpu_chunk_map_bits(chunk) - offset_bits,
1153 offset_bits);
1154 set_bit((start_offset + map_size) / PCPU_MIN_ALLOC_SIZE,
1155 chunk->bound_map);
1156 set_bit(region_bits, chunk->bound_map);
1157
1158 pcpu_block_update_hint_alloc(chunk, pcpu_chunk_map_bits(chunk)
1159 - offset_bits, offset_bits);
1160 }
1161
1162 return chunk;
1163}
1164
1165static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
1166{
1167 struct pcpu_chunk *chunk;
1168 int region_bits;
1169
1170 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
1171 if (!chunk)
1172 return NULL;
1173
1174 INIT_LIST_HEAD(&chunk->list);
1175 chunk->nr_pages = pcpu_unit_pages;
1176 region_bits = pcpu_chunk_map_bits(chunk);
1177
1178 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
1179 sizeof(chunk->alloc_map[0]), gfp);
1180 if (!chunk->alloc_map)
1181 goto alloc_map_fail;
1182
1183 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
1184 sizeof(chunk->bound_map[0]), gfp);
1185 if (!chunk->bound_map)
1186 goto bound_map_fail;
1187
1188 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
1189 sizeof(chunk->md_blocks[0]), gfp);
1190 if (!chunk->md_blocks)
1191 goto md_blocks_fail;
1192
1193 pcpu_init_md_blocks(chunk);
1194
1195
1196 chunk->contig_bits = region_bits;
1197 chunk->free_bytes = chunk->nr_pages * PAGE_SIZE;
1198
1199 return chunk;
1200
1201md_blocks_fail:
1202 pcpu_mem_free(chunk->bound_map);
1203bound_map_fail:
1204 pcpu_mem_free(chunk->alloc_map);
1205alloc_map_fail:
1206 pcpu_mem_free(chunk);
1207
1208 return NULL;
1209}
1210
1211static void pcpu_free_chunk(struct pcpu_chunk *chunk)
1212{
1213 if (!chunk)
1214 return;
1215 pcpu_mem_free(chunk->md_blocks);
1216 pcpu_mem_free(chunk->bound_map);
1217 pcpu_mem_free(chunk->alloc_map);
1218 pcpu_mem_free(chunk);
1219}
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
1236 int page_end, bool for_alloc)
1237{
1238 int nr = page_end - page_start;
1239
1240 lockdep_assert_held(&pcpu_lock);
1241
1242 bitmap_set(chunk->populated, page_start, nr);
1243 chunk->nr_populated += nr;
1244 pcpu_nr_populated += nr;
1245
1246 if (!for_alloc) {
1247 chunk->nr_empty_pop_pages += nr;
1248 pcpu_nr_empty_pop_pages += nr;
1249 }
1250}
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1263 int page_start, int page_end)
1264{
1265 int nr = page_end - page_start;
1266
1267 lockdep_assert_held(&pcpu_lock);
1268
1269 bitmap_clear(chunk->populated, page_start, nr);
1270 chunk->nr_populated -= nr;
1271 chunk->nr_empty_pop_pages -= nr;
1272 pcpu_nr_empty_pop_pages -= nr;
1273 pcpu_nr_populated -= nr;
1274}
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
1292 int page_start, int page_end, gfp_t gfp);
1293static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
1294 int page_start, int page_end);
1295static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
1296static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
1297static struct page *pcpu_addr_to_page(void *addr);
1298static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
1299
1300#ifdef CONFIG_NEED_PER_CPU_KM
1301#include "percpu-km.c"
1302#else
1303#include "percpu-vm.c"
1304#endif
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
1317{
1318
1319 if (pcpu_addr_in_chunk(pcpu_first_chunk, addr))
1320 return pcpu_first_chunk;
1321
1322
1323 if (pcpu_addr_in_chunk(pcpu_reserved_chunk, addr))
1324 return pcpu_reserved_chunk;
1325
1326
1327
1328
1329
1330
1331
1332
1333 addr += pcpu_unit_offsets[raw_smp_processor_id()];
1334 return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
1335}
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1353 gfp_t gfp)
1354{
1355
1356 gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1357 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1358 bool do_warn = !(gfp & __GFP_NOWARN);
1359 static int warn_limit = 10;
1360 struct pcpu_chunk *chunk;
1361 const char *err;
1362 int slot, off, cpu, ret;
1363 unsigned long flags;
1364 void __percpu *ptr;
1365 size_t bits, bit_align;
1366
1367
1368
1369
1370
1371
1372
1373 if (unlikely(align < PCPU_MIN_ALLOC_SIZE))
1374 align = PCPU_MIN_ALLOC_SIZE;
1375
1376 size = ALIGN(size, PCPU_MIN_ALLOC_SIZE);
1377 bits = size >> PCPU_MIN_ALLOC_SHIFT;
1378 bit_align = align >> PCPU_MIN_ALLOC_SHIFT;
1379
1380 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
1381 !is_power_of_2(align))) {
1382 WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",
1383 size, align);
1384 return NULL;
1385 }
1386
1387 if (!is_atomic) {
1388
1389
1390
1391
1392
1393 if (gfp & __GFP_NOFAIL)
1394 mutex_lock(&pcpu_alloc_mutex);
1395 else if (mutex_lock_killable(&pcpu_alloc_mutex))
1396 return NULL;
1397 }
1398
1399 spin_lock_irqsave(&pcpu_lock, flags);
1400
1401
1402 if (reserved && pcpu_reserved_chunk) {
1403 chunk = pcpu_reserved_chunk;
1404
1405 off = pcpu_find_block_fit(chunk, bits, bit_align, is_atomic);
1406 if (off < 0) {
1407 err = "alloc from reserved chunk failed";
1408 goto fail_unlock;
1409 }
1410
1411 off = pcpu_alloc_area(chunk, bits, bit_align, off);
1412 if (off >= 0)
1413 goto area_found;
1414
1415 err = "alloc from reserved chunk failed";
1416 goto fail_unlock;
1417 }
1418
1419restart:
1420
1421 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
1422 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
1423 off = pcpu_find_block_fit(chunk, bits, bit_align,
1424 is_atomic);
1425 if (off < 0)
1426 continue;
1427
1428 off = pcpu_alloc_area(chunk, bits, bit_align, off);
1429 if (off >= 0)
1430 goto area_found;
1431
1432 }
1433 }
1434
1435 spin_unlock_irqrestore(&pcpu_lock, flags);
1436
1437
1438
1439
1440
1441
1442 if (is_atomic) {
1443 err = "atomic alloc failed, no space left";
1444 goto fail;
1445 }
1446
1447 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
1448 chunk = pcpu_create_chunk(pcpu_gfp);
1449 if (!chunk) {
1450 err = "failed to allocate new chunk";
1451 goto fail;
1452 }
1453
1454 spin_lock_irqsave(&pcpu_lock, flags);
1455 pcpu_chunk_relocate(chunk, -1);
1456 } else {
1457 spin_lock_irqsave(&pcpu_lock, flags);
1458 }
1459
1460 goto restart;
1461
1462area_found:
1463 pcpu_stats_area_alloc(chunk, size);
1464 spin_unlock_irqrestore(&pcpu_lock, flags);
1465
1466
1467 if (!is_atomic) {
1468 int page_start, page_end, rs, re;
1469
1470 page_start = PFN_DOWN(off);
1471 page_end = PFN_UP(off + size);
1472
1473 pcpu_for_each_unpop_region(chunk->populated, rs, re,
1474 page_start, page_end) {
1475 WARN_ON(chunk->immutable);
1476
1477 ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
1478
1479 spin_lock_irqsave(&pcpu_lock, flags);
1480 if (ret) {
1481 pcpu_free_area(chunk, off);
1482 err = "failed to populate";
1483 goto fail_unlock;
1484 }
1485 pcpu_chunk_populated(chunk, rs, re, true);
1486 spin_unlock_irqrestore(&pcpu_lock, flags);
1487 }
1488
1489 mutex_unlock(&pcpu_alloc_mutex);
1490 }
1491
1492 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
1493 pcpu_schedule_balance_work();
1494
1495
1496 for_each_possible_cpu(cpu)
1497 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
1498
1499 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
1500 kmemleak_alloc_percpu(ptr, size, gfp);
1501
1502 trace_percpu_alloc_percpu(reserved, is_atomic, size, align,
1503 chunk->base_addr, off, ptr);
1504
1505 return ptr;
1506
1507fail_unlock:
1508 spin_unlock_irqrestore(&pcpu_lock, flags);
1509fail:
1510 trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
1511
1512 if (!is_atomic && do_warn && warn_limit) {
1513 pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
1514 size, align, is_atomic, err);
1515 dump_stack();
1516 if (!--warn_limit)
1517 pr_info("limit reached, disable warning\n");
1518 }
1519 if (is_atomic) {
1520
1521 pcpu_atomic_alloc_failed = true;
1522 pcpu_schedule_balance_work();
1523 } else {
1524 mutex_unlock(&pcpu_alloc_mutex);
1525 }
1526 return NULL;
1527}
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
1545{
1546 return pcpu_alloc(size, align, false, gfp);
1547}
1548EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);
1549
1550
1551
1552
1553
1554
1555
1556
1557void __percpu *__alloc_percpu(size_t size, size_t align)
1558{
1559 return pcpu_alloc(size, align, false, GFP_KERNEL);
1560}
1561EXPORT_SYMBOL_GPL(__alloc_percpu);
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1580{
1581 return pcpu_alloc(size, align, true, GFP_KERNEL);
1582}
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595static void pcpu_balance_workfn(struct work_struct *work)
1596{
1597
1598 const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
1599 LIST_HEAD(to_free);
1600 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
1601 struct pcpu_chunk *chunk, *next;
1602 int slot, nr_to_pop, ret;
1603
1604
1605
1606
1607
1608 mutex_lock(&pcpu_alloc_mutex);
1609 spin_lock_irq(&pcpu_lock);
1610
1611 list_for_each_entry_safe(chunk, next, free_head, list) {
1612 WARN_ON(chunk->immutable);
1613
1614
1615 if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
1616 continue;
1617
1618 list_move(&chunk->list, &to_free);
1619 }
1620
1621 spin_unlock_irq(&pcpu_lock);
1622
1623 list_for_each_entry_safe(chunk, next, &to_free, list) {
1624 int rs, re;
1625
1626 pcpu_for_each_pop_region(chunk->populated, rs, re, 0,
1627 chunk->nr_pages) {
1628 pcpu_depopulate_chunk(chunk, rs, re);
1629 spin_lock_irq(&pcpu_lock);
1630 pcpu_chunk_depopulated(chunk, rs, re);
1631 spin_unlock_irq(&pcpu_lock);
1632 }
1633 pcpu_destroy_chunk(chunk);
1634 cond_resched();
1635 }
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647retry_pop:
1648 if (pcpu_atomic_alloc_failed) {
1649 nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
1650
1651 pcpu_atomic_alloc_failed = false;
1652 } else {
1653 nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
1654 pcpu_nr_empty_pop_pages,
1655 0, PCPU_EMPTY_POP_PAGES_HIGH);
1656 }
1657
1658 for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
1659 int nr_unpop = 0, rs, re;
1660
1661 if (!nr_to_pop)
1662 break;
1663
1664 spin_lock_irq(&pcpu_lock);
1665 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
1666 nr_unpop = chunk->nr_pages - chunk->nr_populated;
1667 if (nr_unpop)
1668 break;
1669 }
1670 spin_unlock_irq(&pcpu_lock);
1671
1672 if (!nr_unpop)
1673 continue;
1674
1675
1676 pcpu_for_each_unpop_region(chunk->populated, rs, re, 0,
1677 chunk->nr_pages) {
1678 int nr = min(re - rs, nr_to_pop);
1679
1680 ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
1681 if (!ret) {
1682 nr_to_pop -= nr;
1683 spin_lock_irq(&pcpu_lock);
1684 pcpu_chunk_populated(chunk, rs, rs + nr, false);
1685 spin_unlock_irq(&pcpu_lock);
1686 } else {
1687 nr_to_pop = 0;
1688 }
1689
1690 if (!nr_to_pop)
1691 break;
1692 }
1693 }
1694
1695 if (nr_to_pop) {
1696
1697 chunk = pcpu_create_chunk(gfp);
1698 if (chunk) {
1699 spin_lock_irq(&pcpu_lock);
1700 pcpu_chunk_relocate(chunk, -1);
1701 spin_unlock_irq(&pcpu_lock);
1702 goto retry_pop;
1703 }
1704 }
1705
1706 mutex_unlock(&pcpu_alloc_mutex);
1707}
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718void free_percpu(void __percpu *ptr)
1719{
1720 void *addr;
1721 struct pcpu_chunk *chunk;
1722 unsigned long flags;
1723 int off;
1724
1725 if (!ptr)
1726 return;
1727
1728 kmemleak_free_percpu(ptr);
1729
1730 addr = __pcpu_ptr_to_addr(ptr);
1731
1732 spin_lock_irqsave(&pcpu_lock, flags);
1733
1734 chunk = pcpu_chunk_addr_search(addr);
1735 off = addr - chunk->base_addr;
1736
1737 pcpu_free_area(chunk, off);
1738
1739
1740 if (chunk->free_bytes == pcpu_unit_size) {
1741 struct pcpu_chunk *pos;
1742
1743 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
1744 if (pos != chunk) {
1745 pcpu_schedule_balance_work();
1746 break;
1747 }
1748 }
1749
1750 trace_percpu_free_percpu(chunk->base_addr, off, ptr);
1751
1752 spin_unlock_irqrestore(&pcpu_lock, flags);
1753}
1754EXPORT_SYMBOL_GPL(free_percpu);
1755
1756bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
1757{
1758#ifdef CONFIG_SMP
1759 const size_t static_size = __per_cpu_end - __per_cpu_start;
1760 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1761 unsigned int cpu;
1762
1763 for_each_possible_cpu(cpu) {
1764 void *start = per_cpu_ptr(base, cpu);
1765 void *va = (void *)addr;
1766
1767 if (va >= start && va < start + static_size) {
1768 if (can_addr) {
1769 *can_addr = (unsigned long) (va - start);
1770 *can_addr += (unsigned long)
1771 per_cpu_ptr(base, get_boot_cpu_id());
1772 }
1773 return true;
1774 }
1775 }
1776#endif
1777
1778 return false;
1779}
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792bool is_kernel_percpu_address(unsigned long addr)
1793{
1794 return __is_kernel_percpu_address(addr, NULL);
1795}
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820phys_addr_t per_cpu_ptr_to_phys(void *addr)
1821{
1822 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1823 bool in_first_chunk = false;
1824 unsigned long first_low, first_high;
1825 unsigned int cpu;
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837 first_low = (unsigned long)pcpu_base_addr +
1838 pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
1839 first_high = (unsigned long)pcpu_base_addr +
1840 pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
1841 if ((unsigned long)addr >= first_low &&
1842 (unsigned long)addr < first_high) {
1843 for_each_possible_cpu(cpu) {
1844 void *start = per_cpu_ptr(base, cpu);
1845
1846 if (addr >= start && addr < start + pcpu_unit_size) {
1847 in_first_chunk = true;
1848 break;
1849 }
1850 }
1851 }
1852
1853 if (in_first_chunk) {
1854 if (!is_vmalloc_addr(addr))
1855 return __pa(addr);
1856 else
1857 return page_to_phys(vmalloc_to_page(addr)) +
1858 offset_in_page(addr);
1859 } else
1860 return page_to_phys(pcpu_addr_to_page(addr)) +
1861 offset_in_page(addr);
1862}
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
1880 int nr_units)
1881{
1882 struct pcpu_alloc_info *ai;
1883 size_t base_size, ai_size;
1884 void *ptr;
1885 int unit;
1886
1887 base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
1888 __alignof__(ai->groups[0].cpu_map[0]));
1889 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
1890
1891 ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
1892 if (!ptr)
1893 return NULL;
1894 ai = ptr;
1895 ptr += base_size;
1896
1897 ai->groups[0].cpu_map = ptr;
1898
1899 for (unit = 0; unit < nr_units; unit++)
1900 ai->groups[0].cpu_map[unit] = NR_CPUS;
1901
1902 ai->nr_groups = nr_groups;
1903 ai->__ai_size = PFN_ALIGN(ai_size);
1904
1905 return ai;
1906}
1907
1908
1909
1910
1911
1912
1913
1914void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1915{
1916 memblock_free_early(__pa(ai), ai->__ai_size);
1917}
1918
1919
1920
1921
1922
1923
1924
1925
1926static void pcpu_dump_alloc_info(const char *lvl,
1927 const struct pcpu_alloc_info *ai)
1928{
1929 int group_width = 1, cpu_width = 1, width;
1930 char empty_str[] = "--------";
1931 int alloc = 0, alloc_end = 0;
1932 int group, v;
1933 int upa, apl;
1934
1935 v = ai->nr_groups;
1936 while (v /= 10)
1937 group_width++;
1938
1939 v = num_possible_cpus();
1940 while (v /= 10)
1941 cpu_width++;
1942 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
1943
1944 upa = ai->alloc_size / ai->unit_size;
1945 width = upa * (cpu_width + 1) + group_width + 3;
1946 apl = rounddown_pow_of_two(max(60 / width, 1));
1947
1948 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
1949 lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
1950 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
1951
1952 for (group = 0; group < ai->nr_groups; group++) {
1953 const struct pcpu_group_info *gi = &ai->groups[group];
1954 int unit = 0, unit_end = 0;
1955
1956 BUG_ON(gi->nr_units % upa);
1957 for (alloc_end += gi->nr_units / upa;
1958 alloc < alloc_end; alloc++) {
1959 if (!(alloc % apl)) {
1960 pr_cont("\n");
1961 printk("%spcpu-alloc: ", lvl);
1962 }
1963 pr_cont("[%0*d] ", group_width, group);
1964
1965 for (unit_end += upa; unit < unit_end; unit++)
1966 if (gi->cpu_map[unit] != NR_CPUS)
1967 pr_cont("%0*d ",
1968 cpu_width, gi->cpu_map[unit]);
1969 else
1970 pr_cont("%s ", empty_str);
1971 }
1972 }
1973 pr_cont("\n");
1974}
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
2034 void *base_addr)
2035{
2036 size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
2037 size_t static_size, dyn_size;
2038 struct pcpu_chunk *chunk;
2039 unsigned long *group_offsets;
2040 size_t *group_sizes;
2041 unsigned long *unit_off;
2042 unsigned int cpu;
2043 int *unit_map;
2044 int group, unit, i;
2045 int map_size;
2046 unsigned long tmp_addr;
2047
2048#define PCPU_SETUP_BUG_ON(cond) do { \
2049 if (unlikely(cond)) { \
2050 pr_emerg("failed to initialize, %s\n", #cond); \
2051 pr_emerg("cpu_possible_mask=%*pb\n", \
2052 cpumask_pr_args(cpu_possible_mask)); \
2053 pcpu_dump_alloc_info(KERN_EMERG, ai); \
2054 BUG(); \
2055 } \
2056} while (0)
2057
2058
2059 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
2060#ifdef CONFIG_SMP
2061 PCPU_SETUP_BUG_ON(!ai->static_size);
2062 PCPU_SETUP_BUG_ON(offset_in_page(__per_cpu_start));
2063#endif
2064 PCPU_SETUP_BUG_ON(!base_addr);
2065 PCPU_SETUP_BUG_ON(offset_in_page(base_addr));
2066 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
2067 PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size));
2068 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
2069 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
2070 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
2071 PCPU_SETUP_BUG_ON(!ai->dyn_size);
2072 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
2073 PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
2074 IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
2075 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
2076
2077
2078 group_offsets = memblock_virt_alloc(ai->nr_groups *
2079 sizeof(group_offsets[0]), 0);
2080 group_sizes = memblock_virt_alloc(ai->nr_groups *
2081 sizeof(group_sizes[0]), 0);
2082 unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
2083 unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
2084
2085 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
2086 unit_map[cpu] = UINT_MAX;
2087
2088 pcpu_low_unit_cpu = NR_CPUS;
2089 pcpu_high_unit_cpu = NR_CPUS;
2090
2091 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
2092 const struct pcpu_group_info *gi = &ai->groups[group];
2093
2094 group_offsets[group] = gi->base_offset;
2095 group_sizes[group] = gi->nr_units * ai->unit_size;
2096
2097 for (i = 0; i < gi->nr_units; i++) {
2098 cpu = gi->cpu_map[i];
2099 if (cpu == NR_CPUS)
2100 continue;
2101
2102 PCPU_SETUP_BUG_ON(cpu >= nr_cpu_ids);
2103 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
2104 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
2105
2106 unit_map[cpu] = unit + i;
2107 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
2108
2109
2110 if (pcpu_low_unit_cpu == NR_CPUS ||
2111 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
2112 pcpu_low_unit_cpu = cpu;
2113 if (pcpu_high_unit_cpu == NR_CPUS ||
2114 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
2115 pcpu_high_unit_cpu = cpu;
2116 }
2117 }
2118 pcpu_nr_units = unit;
2119
2120 for_each_possible_cpu(cpu)
2121 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
2122
2123
2124#undef PCPU_SETUP_BUG_ON
2125 pcpu_dump_alloc_info(KERN_DEBUG, ai);
2126
2127 pcpu_nr_groups = ai->nr_groups;
2128 pcpu_group_offsets = group_offsets;
2129 pcpu_group_sizes = group_sizes;
2130 pcpu_unit_map = unit_map;
2131 pcpu_unit_offsets = unit_off;
2132
2133
2134 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
2135 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
2136 pcpu_atom_size = ai->atom_size;
2137 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
2138 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
2139
2140 pcpu_stats_save_ai(ai);
2141
2142
2143
2144
2145
2146 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
2147 pcpu_slot = memblock_virt_alloc(
2148 pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
2149 for (i = 0; i < pcpu_nr_slots; i++)
2150 INIT_LIST_HEAD(&pcpu_slot[i]);
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160 static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE);
2161 dyn_size = ai->dyn_size - (static_size - ai->static_size);
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171 tmp_addr = (unsigned long)base_addr + static_size;
2172 map_size = ai->reserved_size ?: dyn_size;
2173 chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
2174
2175
2176 if (ai->reserved_size) {
2177 pcpu_reserved_chunk = chunk;
2178
2179 tmp_addr = (unsigned long)base_addr + static_size +
2180 ai->reserved_size;
2181 map_size = dyn_size;
2182 chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
2183 }
2184
2185
2186 pcpu_first_chunk = chunk;
2187 pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
2188 pcpu_chunk_relocate(pcpu_first_chunk, -1);
2189
2190
2191 pcpu_nr_populated += PFN_DOWN(size_sum);
2192
2193 pcpu_stats_chunk_alloc();
2194 trace_percpu_create_chunk(base_addr);
2195
2196
2197 pcpu_base_addr = base_addr;
2198 return 0;
2199}
2200
2201#ifdef CONFIG_SMP
2202
2203const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {
2204 [PCPU_FC_AUTO] = "auto",
2205 [PCPU_FC_EMBED] = "embed",
2206 [PCPU_FC_PAGE] = "page",
2207};
2208
2209enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
2210
2211static int __init percpu_alloc_setup(char *str)
2212{
2213 if (!str)
2214 return -EINVAL;
2215
2216 if (0)
2217 ;
2218#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
2219 else if (!strcmp(str, "embed"))
2220 pcpu_chosen_fc = PCPU_FC_EMBED;
2221#endif
2222#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
2223 else if (!strcmp(str, "page"))
2224 pcpu_chosen_fc = PCPU_FC_PAGE;
2225#endif
2226 else
2227 pr_warn("unknown allocator %s specified\n", str);
2228
2229 return 0;
2230}
2231early_param("percpu_alloc", percpu_alloc_setup);
2232
2233
2234
2235
2236
2237
2238#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
2239 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
2240#define BUILD_EMBED_FIRST_CHUNK
2241#endif
2242
2243
2244#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
2245#define BUILD_PAGE_FIRST_CHUNK
2246#endif
2247
2248
2249#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
2272 size_t reserved_size, size_t dyn_size,
2273 size_t atom_size,
2274 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
2275{
2276 static int group_map[NR_CPUS] __initdata;
2277 static int group_cnt[NR_CPUS] __initdata;
2278 const size_t static_size = __per_cpu_end - __per_cpu_start;
2279 int nr_groups = 1, nr_units = 0;
2280 size_t size_sum, min_unit_size, alloc_size;
2281 int upa, max_upa, uninitialized_var(best_upa);
2282 int last_allocs, group, unit;
2283 unsigned int cpu, tcpu;
2284 struct pcpu_alloc_info *ai;
2285 unsigned int *cpu_map;
2286
2287
2288 memset(group_map, 0, sizeof(group_map));
2289 memset(group_cnt, 0, sizeof(group_cnt));
2290
2291
2292 size_sum = PFN_ALIGN(static_size + reserved_size +
2293 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
2294 dyn_size = size_sum - static_size - reserved_size;
2295
2296
2297
2298
2299
2300
2301
2302 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
2303
2304
2305 alloc_size = roundup(min_unit_size, atom_size);
2306 upa = alloc_size / min_unit_size;
2307 while (alloc_size % upa || (offset_in_page(alloc_size / upa)))
2308 upa--;
2309 max_upa = upa;
2310
2311
2312 for_each_possible_cpu(cpu) {
2313 group = 0;
2314 next_group:
2315 for_each_possible_cpu(tcpu) {
2316 if (cpu == tcpu)
2317 break;
2318 if (group_map[tcpu] == group && cpu_distance_fn &&
2319 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
2320 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
2321 group++;
2322 nr_groups = max(nr_groups, group + 1);
2323 goto next_group;
2324 }
2325 }
2326 group_map[cpu] = group;
2327 group_cnt[group]++;
2328 }
2329
2330
2331
2332
2333
2334
2335 last_allocs = INT_MAX;
2336 for (upa = max_upa; upa; upa--) {
2337 int allocs = 0, wasted = 0;
2338
2339 if (alloc_size % upa || (offset_in_page(alloc_size / upa)))
2340 continue;
2341
2342 for (group = 0; group < nr_groups; group++) {
2343 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
2344 allocs += this_allocs;
2345 wasted += this_allocs * upa - group_cnt[group];
2346 }
2347
2348
2349
2350
2351
2352
2353 if (wasted > num_possible_cpus() / 3)
2354 continue;
2355
2356
2357 if (allocs > last_allocs)
2358 break;
2359 last_allocs = allocs;
2360 best_upa = upa;
2361 }
2362 upa = best_upa;
2363
2364
2365 for (group = 0; group < nr_groups; group++)
2366 nr_units += roundup(group_cnt[group], upa);
2367
2368 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
2369 if (!ai)
2370 return ERR_PTR(-ENOMEM);
2371 cpu_map = ai->groups[0].cpu_map;
2372
2373 for (group = 0; group < nr_groups; group++) {
2374 ai->groups[group].cpu_map = cpu_map;
2375 cpu_map += roundup(group_cnt[group], upa);
2376 }
2377
2378 ai->static_size = static_size;
2379 ai->reserved_size = reserved_size;
2380 ai->dyn_size = dyn_size;
2381 ai->unit_size = alloc_size / upa;
2382 ai->atom_size = atom_size;
2383 ai->alloc_size = alloc_size;
2384
2385 for (group = 0, unit = 0; group_cnt[group]; group++) {
2386 struct pcpu_group_info *gi = &ai->groups[group];
2387
2388
2389
2390
2391
2392
2393 gi->base_offset = unit * ai->unit_size;
2394
2395 for_each_possible_cpu(cpu)
2396 if (group_map[cpu] == group)
2397 gi->cpu_map[gi->nr_units++] = cpu;
2398 gi->nr_units = roundup(gi->nr_units, upa);
2399 unit += gi->nr_units;
2400 }
2401 BUG_ON(unit != nr_units);
2402
2403 return ai;
2404}
2405#endif
2406
2407#if defined(BUILD_EMBED_FIRST_CHUNK)
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
2441 size_t atom_size,
2442 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
2443 pcpu_fc_alloc_fn_t alloc_fn,
2444 pcpu_fc_free_fn_t free_fn)
2445{
2446 void *base = (void *)ULONG_MAX;
2447 void **areas = NULL;
2448 struct pcpu_alloc_info *ai;
2449 size_t size_sum, areas_size;
2450 unsigned long max_distance;
2451 int group, i, highest_group, rc;
2452
2453 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
2454 cpu_distance_fn);
2455 if (IS_ERR(ai))
2456 return PTR_ERR(ai);
2457
2458 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
2459 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
2460
2461 areas = memblock_virt_alloc_nopanic(areas_size, 0);
2462 if (!areas) {
2463 rc = -ENOMEM;
2464 goto out_free;
2465 }
2466
2467
2468 highest_group = 0;
2469 for (group = 0; group < ai->nr_groups; group++) {
2470 struct pcpu_group_info *gi = &ai->groups[group];
2471 unsigned int cpu = NR_CPUS;
2472 void *ptr;
2473
2474 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
2475 cpu = gi->cpu_map[i];
2476 BUG_ON(cpu == NR_CPUS);
2477
2478
2479 ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
2480 if (!ptr) {
2481 rc = -ENOMEM;
2482 goto out_free_areas;
2483 }
2484
2485 kmemleak_free(ptr);
2486 areas[group] = ptr;
2487
2488 base = min(ptr, base);
2489 if (ptr > areas[highest_group])
2490 highest_group = group;
2491 }
2492 max_distance = areas[highest_group] - base;
2493 max_distance += ai->unit_size * ai->groups[highest_group].nr_units;
2494
2495
2496 if (max_distance > VMALLOC_TOTAL * 3 / 4) {
2497 pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n",
2498 max_distance, VMALLOC_TOTAL);
2499#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
2500
2501 rc = -EINVAL;
2502 goto out_free_areas;
2503#endif
2504 }
2505
2506
2507
2508
2509
2510
2511 for (group = 0; group < ai->nr_groups; group++) {
2512 struct pcpu_group_info *gi = &ai->groups[group];
2513 void *ptr = areas[group];
2514
2515 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
2516 if (gi->cpu_map[i] == NR_CPUS) {
2517
2518 free_fn(ptr, ai->unit_size);
2519 continue;
2520 }
2521
2522 memcpy(ptr, __per_cpu_load, ai->static_size);
2523 free_fn(ptr + size_sum, ai->unit_size - size_sum);
2524 }
2525 }
2526
2527
2528 for (group = 0; group < ai->nr_groups; group++) {
2529 ai->groups[group].base_offset = areas[group] - base;
2530 }
2531
2532 pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
2533 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
2534 ai->dyn_size, ai->unit_size);
2535
2536 rc = pcpu_setup_first_chunk(ai, base);
2537 goto out_free;
2538
2539out_free_areas:
2540 for (group = 0; group < ai->nr_groups; group++)
2541 if (areas[group])
2542 free_fn(areas[group],
2543 ai->groups[group].nr_units * ai->unit_size);
2544out_free:
2545 pcpu_free_alloc_info(ai);
2546 if (areas)
2547 memblock_free_early(__pa(areas), areas_size);
2548 return rc;
2549}
2550#endif
2551
2552#ifdef BUILD_PAGE_FIRST_CHUNK
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569int __init pcpu_page_first_chunk(size_t reserved_size,
2570 pcpu_fc_alloc_fn_t alloc_fn,
2571 pcpu_fc_free_fn_t free_fn,
2572 pcpu_fc_populate_pte_fn_t populate_pte_fn)
2573{
2574 static struct vm_struct vm;
2575 struct pcpu_alloc_info *ai;
2576 char psize_str[16];
2577 int unit_pages;
2578 size_t pages_size;
2579 struct page **pages;
2580 int unit, i, j, rc;
2581 int upa;
2582 int nr_g0_units;
2583
2584 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
2585
2586 ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
2587 if (IS_ERR(ai))
2588 return PTR_ERR(ai);
2589 BUG_ON(ai->nr_groups != 1);
2590 upa = ai->alloc_size/ai->unit_size;
2591 nr_g0_units = roundup(num_possible_cpus(), upa);
2592 if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) {
2593 pcpu_free_alloc_info(ai);
2594 return -EINVAL;
2595 }
2596
2597 unit_pages = ai->unit_size >> PAGE_SHIFT;
2598
2599
2600 pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
2601 sizeof(pages[0]));
2602 pages = memblock_virt_alloc(pages_size, 0);
2603
2604
2605 j = 0;
2606 for (unit = 0; unit < num_possible_cpus(); unit++) {
2607 unsigned int cpu = ai->groups[0].cpu_map[unit];
2608 for (i = 0; i < unit_pages; i++) {
2609 void *ptr;
2610
2611 ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
2612 if (!ptr) {
2613 pr_warn("failed to allocate %s page for cpu%u\n",
2614 psize_str, cpu);
2615 goto enomem;
2616 }
2617
2618 kmemleak_free(ptr);
2619 pages[j++] = virt_to_page(ptr);
2620 }
2621 }
2622
2623
2624 vm.flags = VM_ALLOC;
2625 vm.size = num_possible_cpus() * ai->unit_size;
2626 vm_area_register_early(&vm, PAGE_SIZE);
2627
2628 for (unit = 0; unit < num_possible_cpus(); unit++) {
2629 unsigned long unit_addr =
2630 (unsigned long)vm.addr + unit * ai->unit_size;
2631
2632 for (i = 0; i < unit_pages; i++)
2633 populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
2634
2635
2636 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
2637 unit_pages);
2638 if (rc < 0)
2639 panic("failed to map percpu area, err=%d\n", rc);
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
2651 }
2652
2653
2654 pr_info("%d %s pages/cpu @%p s%zu r%zu d%zu\n",
2655 unit_pages, psize_str, vm.addr, ai->static_size,
2656 ai->reserved_size, ai->dyn_size);
2657
2658 rc = pcpu_setup_first_chunk(ai, vm.addr);
2659 goto out_free_ar;
2660
2661enomem:
2662 while (--j >= 0)
2663 free_fn(page_address(pages[j]), PAGE_SIZE);
2664 rc = -ENOMEM;
2665out_free_ar:
2666 memblock_free_early(__pa(pages), pages_size);
2667 pcpu_free_alloc_info(ai);
2668 return rc;
2669}
2670#endif
2671
2672#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
2686EXPORT_SYMBOL(__per_cpu_offset);
2687
2688static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
2689 size_t align)
2690{
2691 return memblock_virt_alloc_from_nopanic(
2692 size, align, __pa(MAX_DMA_ADDRESS));
2693}
2694
2695static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
2696{
2697 memblock_free_early(__pa(ptr), size);
2698}
2699
2700void __init setup_per_cpu_areas(void)
2701{
2702 unsigned long delta;
2703 unsigned int cpu;
2704 int rc;
2705
2706
2707
2708
2709
2710 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
2711 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
2712 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
2713 if (rc < 0)
2714 panic("Failed to initialize percpu areas.");
2715
2716 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
2717 for_each_possible_cpu(cpu)
2718 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
2719}
2720#endif
2721
2722#else
2723
2724
2725
2726
2727
2728
2729
2730
2731void __init setup_per_cpu_areas(void)
2732{
2733 const size_t unit_size =
2734 roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
2735 PERCPU_DYNAMIC_RESERVE));
2736 struct pcpu_alloc_info *ai;
2737 void *fc;
2738
2739 ai = pcpu_alloc_alloc_info(1, 1);
2740 fc = memblock_virt_alloc_from_nopanic(unit_size,
2741 PAGE_SIZE,
2742 __pa(MAX_DMA_ADDRESS));
2743 if (!ai || !fc)
2744 panic("Failed to allocate memory for percpu areas.");
2745
2746 kmemleak_free(fc);
2747
2748 ai->dyn_size = unit_size;
2749 ai->unit_size = unit_size;
2750 ai->atom_size = unit_size;
2751 ai->alloc_size = unit_size;
2752 ai->groups[0].nr_units = 1;
2753 ai->groups[0].cpu_map[0] = 0;
2754
2755 if (pcpu_setup_first_chunk(ai, fc) < 0)
2756 panic("Failed to initialize percpu areas.");
2757 pcpu_free_alloc_info(ai);
2758}
2759
2760#endif
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773unsigned long pcpu_nr_pages(void)
2774{
2775 return pcpu_nr_populated * pcpu_nr_units;
2776}
2777
2778
2779
2780
2781
2782
2783static int __init percpu_enable_async(void)
2784{
2785 pcpu_async_enabled = true;
2786 return 0;
2787}
2788subsys_initcall(percpu_enable_async);
2789