1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56#include <linux/bitmap.h>
57#include <linux/bootmem.h>
58#include <linux/err.h>
59#include <linux/list.h>
60#include <linux/log2.h>
61#include <linux/mm.h>
62#include <linux/module.h>
63#include <linux/mutex.h>
64#include <linux/percpu.h>
65#include <linux/pfn.h>
66#include <linux/slab.h>
67#include <linux/spinlock.h>
68#include <linux/vmalloc.h>
69#include <linux/workqueue.h>
70#include <linux/kmemleak.h>
71
72#include <asm/cacheflush.h>
73#include <asm/sections.h>
74#include <asm/tlbflush.h>
75#include <asm/io.h>
76
77#define PCPU_SLOT_BASE_SHIFT 5
78#define PCPU_DFL_MAP_ALLOC 16
79
80#ifdef CONFIG_SMP
81
82#ifndef __addr_to_pcpu_ptr
83#define __addr_to_pcpu_ptr(addr) \
84 (void __percpu *)((unsigned long)(addr) - \
85 (unsigned long)pcpu_base_addr + \
86 (unsigned long)__per_cpu_start)
87#endif
88#ifndef __pcpu_ptr_to_addr
89#define __pcpu_ptr_to_addr(ptr) \
90 (void __force *)((unsigned long)(ptr) + \
91 (unsigned long)pcpu_base_addr - \
92 (unsigned long)__per_cpu_start)
93#endif
94#else
95
96#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
97#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
98#endif
99
100struct pcpu_chunk {
101 struct list_head list;
102 int free_size;
103 int contig_hint;
104 void *base_addr;
105 int map_used;
106 int map_alloc;
107 int *map;
108 void *data;
109 bool immutable;
110 unsigned long populated[];
111};
112
113static int pcpu_unit_pages __read_mostly;
114static int pcpu_unit_size __read_mostly;
115static int pcpu_nr_units __read_mostly;
116static int pcpu_atom_size __read_mostly;
117static int pcpu_nr_slots __read_mostly;
118static size_t pcpu_chunk_struct_size __read_mostly;
119
120
121static unsigned int pcpu_low_unit_cpu __read_mostly;
122static unsigned int pcpu_high_unit_cpu __read_mostly;
123
124
125void *pcpu_base_addr __read_mostly;
126EXPORT_SYMBOL_GPL(pcpu_base_addr);
127
128static const int *pcpu_unit_map __read_mostly;
129const unsigned long *pcpu_unit_offsets __read_mostly;
130
131
132static int pcpu_nr_groups __read_mostly;
133static const unsigned long *pcpu_group_offsets __read_mostly;
134static const size_t *pcpu_group_sizes __read_mostly;
135
136
137
138
139
140
141static struct pcpu_chunk *pcpu_first_chunk;
142
143
144
145
146
147
148
149
150static struct pcpu_chunk *pcpu_reserved_chunk;
151static int pcpu_reserved_chunk_limit;
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177static DEFINE_MUTEX(pcpu_alloc_mutex);
178static DEFINE_SPINLOCK(pcpu_lock);
179
180static struct list_head *pcpu_slot __read_mostly;
181
182
183static void pcpu_reclaim(struct work_struct *work);
184static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim);
185
186static bool pcpu_addr_in_first_chunk(void *addr)
187{
188 void *first_start = pcpu_first_chunk->base_addr;
189
190 return addr >= first_start && addr < first_start + pcpu_unit_size;
191}
192
193static bool pcpu_addr_in_reserved_chunk(void *addr)
194{
195 void *first_start = pcpu_first_chunk->base_addr;
196
197 return addr >= first_start &&
198 addr < first_start + pcpu_reserved_chunk_limit;
199}
200
201static int __pcpu_size_to_slot(int size)
202{
203 int highbit = fls(size);
204 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
205}
206
207static int pcpu_size_to_slot(int size)
208{
209 if (size == pcpu_unit_size)
210 return pcpu_nr_slots - 1;
211 return __pcpu_size_to_slot(size);
212}
213
214static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
215{
216 if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int))
217 return 0;
218
219 return pcpu_size_to_slot(chunk->free_size);
220}
221
222
223static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
224{
225 page->index = (unsigned long)pcpu;
226}
227
228
229static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
230{
231 return (struct pcpu_chunk *)page->index;
232}
233
234static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
235{
236 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
237}
238
239static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
240 unsigned int cpu, int page_idx)
241{
242 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
243 (page_idx << PAGE_SHIFT);
244}
245
246static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk,
247 int *rs, int *re, int end)
248{
249 *rs = find_next_zero_bit(chunk->populated, end, *rs);
250 *re = find_next_bit(chunk->populated, end, *rs + 1);
251}
252
253static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
254 int *rs, int *re, int end)
255{
256 *rs = find_next_bit(chunk->populated, end, *rs);
257 *re = find_next_zero_bit(chunk->populated, end, *rs + 1);
258}
259
260
261
262
263
264
265
266#define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \
267 for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \
268 (rs) < (re); \
269 (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end)))
270
271#define pcpu_for_each_pop_region(chunk, rs, re, start, end) \
272 for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \
273 (rs) < (re); \
274 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290static void *pcpu_mem_zalloc(size_t size)
291{
292 if (WARN_ON_ONCE(!slab_is_available()))
293 return NULL;
294
295 if (size <= PAGE_SIZE)
296 return kzalloc(size, GFP_KERNEL);
297 else
298 return vzalloc(size);
299}
300
301
302
303
304
305
306
307
308static void pcpu_mem_free(void *ptr, size_t size)
309{
310 if (size <= PAGE_SIZE)
311 kfree(ptr);
312 else
313 vfree(ptr);
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
330{
331 int nslot = pcpu_chunk_slot(chunk);
332
333 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
334 if (oslot < nslot)
335 list_move(&chunk->list, &pcpu_slot[nslot]);
336 else
337 list_move_tail(&chunk->list, &pcpu_slot[nslot]);
338 }
339}
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
356{
357 int new_alloc;
358
359 if (chunk->map_alloc >= chunk->map_used + 2)
360 return 0;
361
362 new_alloc = PCPU_DFL_MAP_ALLOC;
363 while (new_alloc < chunk->map_used + 2)
364 new_alloc *= 2;
365
366 return new_alloc;
367}
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
383{
384 int *old = NULL, *new = NULL;
385 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
386 unsigned long flags;
387
388 new = pcpu_mem_zalloc(new_size);
389 if (!new)
390 return -ENOMEM;
391
392
393 spin_lock_irqsave(&pcpu_lock, flags);
394
395 if (new_alloc <= chunk->map_alloc)
396 goto out_unlock;
397
398 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
399 old = chunk->map;
400
401 memcpy(new, old, old_size);
402
403 chunk->map_alloc = new_alloc;
404 chunk->map = new;
405 new = NULL;
406
407out_unlock:
408 spin_unlock_irqrestore(&pcpu_lock, flags);
409
410
411
412
413
414 pcpu_mem_free(old, old_size);
415 pcpu_mem_free(new, new_size);
416
417 return 0;
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
441 int head, int tail)
442{
443 int nr_extra = !!head + !!tail;
444
445 BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
446
447
448 memmove(&chunk->map[i + nr_extra], &chunk->map[i],
449 sizeof(chunk->map[0]) * (chunk->map_used - i));
450 chunk->map_used += nr_extra;
451
452 if (head) {
453 chunk->map[i + 1] = chunk->map[i] - head;
454 chunk->map[i++] = head;
455 }
456 if (tail) {
457 chunk->map[i++] -= tail;
458 chunk->map[i] = tail;
459 }
460}
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
482{
483 int oslot = pcpu_chunk_slot(chunk);
484 int max_contig = 0;
485 int i, off;
486
487 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
488 bool is_last = i + 1 == chunk->map_used;
489 int head, tail;
490
491
492 head = ALIGN(off, align) - off;
493 BUG_ON(i == 0 && head != 0);
494
495 if (chunk->map[i] < 0)
496 continue;
497 if (chunk->map[i] < head + size) {
498 max_contig = max(chunk->map[i], max_contig);
499 continue;
500 }
501
502
503
504
505
506
507
508 if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
509 if (chunk->map[i - 1] > 0)
510 chunk->map[i - 1] += head;
511 else {
512 chunk->map[i - 1] -= head;
513 chunk->free_size -= head;
514 }
515 chunk->map[i] -= head;
516 off += head;
517 head = 0;
518 }
519
520
521 tail = chunk->map[i] - head - size;
522 if (tail < sizeof(int))
523 tail = 0;
524
525
526 if (head || tail) {
527 pcpu_split_block(chunk, i, head, tail);
528 if (head) {
529 i++;
530 off += head;
531 max_contig = max(chunk->map[i - 1], max_contig);
532 }
533 if (tail)
534 max_contig = max(chunk->map[i + 1], max_contig);
535 }
536
537
538 if (is_last)
539 chunk->contig_hint = max_contig;
540 else
541 chunk->contig_hint = max(chunk->contig_hint,
542 max_contig);
543
544 chunk->free_size -= chunk->map[i];
545 chunk->map[i] = -chunk->map[i];
546
547 pcpu_chunk_relocate(chunk, oslot);
548 return off;
549 }
550
551 chunk->contig_hint = max_contig;
552 pcpu_chunk_relocate(chunk, oslot);
553
554
555 return -1;
556}
557
558
559
560
561
562
563
564
565
566
567
568
569
570static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
571{
572 int oslot = pcpu_chunk_slot(chunk);
573 int i, off;
574
575 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++]))
576 if (off == freeme)
577 break;
578 BUG_ON(off != freeme);
579 BUG_ON(chunk->map[i] > 0);
580
581 chunk->map[i] = -chunk->map[i];
582 chunk->free_size += chunk->map[i];
583
584
585 if (i > 0 && chunk->map[i - 1] >= 0) {
586 chunk->map[i - 1] += chunk->map[i];
587 chunk->map_used--;
588 memmove(&chunk->map[i], &chunk->map[i + 1],
589 (chunk->map_used - i) * sizeof(chunk->map[0]));
590 i--;
591 }
592
593 if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) {
594 chunk->map[i] += chunk->map[i + 1];
595 chunk->map_used--;
596 memmove(&chunk->map[i + 1], &chunk->map[i + 2],
597 (chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
598 }
599
600 chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
601 pcpu_chunk_relocate(chunk, oslot);
602}
603
604static struct pcpu_chunk *pcpu_alloc_chunk(void)
605{
606 struct pcpu_chunk *chunk;
607
608 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
609 if (!chunk)
610 return NULL;
611
612 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
613 sizeof(chunk->map[0]));
614 if (!chunk->map) {
615 kfree(chunk);
616 return NULL;
617 }
618
619 chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
620 chunk->map[chunk->map_used++] = pcpu_unit_size;
621
622 INIT_LIST_HEAD(&chunk->list);
623 chunk->free_size = pcpu_unit_size;
624 chunk->contig_hint = pcpu_unit_size;
625
626 return chunk;
627}
628
629static void pcpu_free_chunk(struct pcpu_chunk *chunk)
630{
631 if (!chunk)
632 return;
633 pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
634 kfree(chunk);
635}
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
653static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
654static struct pcpu_chunk *pcpu_create_chunk(void);
655static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
656static struct page *pcpu_addr_to_page(void *addr);
657static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
658
659#ifdef CONFIG_NEED_PER_CPU_KM
660#include "percpu-km.c"
661#else
662#include "percpu-vm.c"
663#endif
664
665
666
667
668
669
670
671
672static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
673{
674
675 if (pcpu_addr_in_first_chunk(addr)) {
676
677 if (pcpu_addr_in_reserved_chunk(addr))
678 return pcpu_reserved_chunk;
679 return pcpu_first_chunk;
680 }
681
682
683
684
685
686
687
688
689 addr += pcpu_unit_offsets[raw_smp_processor_id()];
690 return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
691}
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
708{
709 static int warn_limit = 10;
710 struct pcpu_chunk *chunk;
711 const char *err;
712 int slot, off, new_alloc;
713 unsigned long flags;
714 void __percpu *ptr;
715
716 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
717 WARN(true, "illegal size (%zu) or align (%zu) for "
718 "percpu allocation\n", size, align);
719 return NULL;
720 }
721
722 mutex_lock(&pcpu_alloc_mutex);
723 spin_lock_irqsave(&pcpu_lock, flags);
724
725
726 if (reserved && pcpu_reserved_chunk) {
727 chunk = pcpu_reserved_chunk;
728
729 if (size > chunk->contig_hint) {
730 err = "alloc from reserved chunk failed";
731 goto fail_unlock;
732 }
733
734 while ((new_alloc = pcpu_need_to_extend(chunk))) {
735 spin_unlock_irqrestore(&pcpu_lock, flags);
736 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
737 err = "failed to extend area map of reserved chunk";
738 goto fail_unlock_mutex;
739 }
740 spin_lock_irqsave(&pcpu_lock, flags);
741 }
742
743 off = pcpu_alloc_area(chunk, size, align);
744 if (off >= 0)
745 goto area_found;
746
747 err = "alloc from reserved chunk failed";
748 goto fail_unlock;
749 }
750
751restart:
752
753 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
754 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
755 if (size > chunk->contig_hint)
756 continue;
757
758 new_alloc = pcpu_need_to_extend(chunk);
759 if (new_alloc) {
760 spin_unlock_irqrestore(&pcpu_lock, flags);
761 if (pcpu_extend_area_map(chunk,
762 new_alloc) < 0) {
763 err = "failed to extend area map";
764 goto fail_unlock_mutex;
765 }
766 spin_lock_irqsave(&pcpu_lock, flags);
767
768
769
770
771 goto restart;
772 }
773
774 off = pcpu_alloc_area(chunk, size, align);
775 if (off >= 0)
776 goto area_found;
777 }
778 }
779
780
781 spin_unlock_irqrestore(&pcpu_lock, flags);
782
783 chunk = pcpu_create_chunk();
784 if (!chunk) {
785 err = "failed to allocate new chunk";
786 goto fail_unlock_mutex;
787 }
788
789 spin_lock_irqsave(&pcpu_lock, flags);
790 pcpu_chunk_relocate(chunk, -1);
791 goto restart;
792
793area_found:
794 spin_unlock_irqrestore(&pcpu_lock, flags);
795
796
797 if (pcpu_populate_chunk(chunk, off, size)) {
798 spin_lock_irqsave(&pcpu_lock, flags);
799 pcpu_free_area(chunk, off);
800 err = "failed to populate";
801 goto fail_unlock;
802 }
803
804 mutex_unlock(&pcpu_alloc_mutex);
805
806
807 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
808 kmemleak_alloc_percpu(ptr, size);
809 return ptr;
810
811fail_unlock:
812 spin_unlock_irqrestore(&pcpu_lock, flags);
813fail_unlock_mutex:
814 mutex_unlock(&pcpu_alloc_mutex);
815 if (warn_limit) {
816 pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
817 "%s\n", size, align, err);
818 dump_stack();
819 if (!--warn_limit)
820 pr_info("PERCPU: limit reached, disable warning\n");
821 }
822 return NULL;
823}
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839void __percpu *__alloc_percpu(size_t size, size_t align)
840{
841 return pcpu_alloc(size, align, false);
842}
843EXPORT_SYMBOL_GPL(__alloc_percpu);
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
862{
863 return pcpu_alloc(size, align, true);
864}
865
866
867
868
869
870
871
872
873
874
875static void pcpu_reclaim(struct work_struct *work)
876{
877 LIST_HEAD(todo);
878 struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1];
879 struct pcpu_chunk *chunk, *next;
880
881 mutex_lock(&pcpu_alloc_mutex);
882 spin_lock_irq(&pcpu_lock);
883
884 list_for_each_entry_safe(chunk, next, head, list) {
885 WARN_ON(chunk->immutable);
886
887
888 if (chunk == list_first_entry(head, struct pcpu_chunk, list))
889 continue;
890
891 list_move(&chunk->list, &todo);
892 }
893
894 spin_unlock_irq(&pcpu_lock);
895
896 list_for_each_entry_safe(chunk, next, &todo, list) {
897 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size);
898 pcpu_destroy_chunk(chunk);
899 }
900
901 mutex_unlock(&pcpu_alloc_mutex);
902}
903
904
905
906
907
908
909
910
911
912
913void free_percpu(void __percpu *ptr)
914{
915 void *addr;
916 struct pcpu_chunk *chunk;
917 unsigned long flags;
918 int off;
919
920 if (!ptr)
921 return;
922
923 kmemleak_free_percpu(ptr);
924
925 addr = __pcpu_ptr_to_addr(ptr);
926
927 spin_lock_irqsave(&pcpu_lock, flags);
928
929 chunk = pcpu_chunk_addr_search(addr);
930 off = addr - chunk->base_addr;
931
932 pcpu_free_area(chunk, off);
933
934
935 if (chunk->free_size == pcpu_unit_size) {
936 struct pcpu_chunk *pos;
937
938 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
939 if (pos != chunk) {
940 schedule_work(&pcpu_reclaim_work);
941 break;
942 }
943 }
944
945 spin_unlock_irqrestore(&pcpu_lock, flags);
946}
947EXPORT_SYMBOL_GPL(free_percpu);
948
949
950
951
952
953
954
955
956
957
958
959
960bool is_kernel_percpu_address(unsigned long addr)
961{
962#ifdef CONFIG_SMP
963 const size_t static_size = __per_cpu_end - __per_cpu_start;
964 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
965 unsigned int cpu;
966
967 for_each_possible_cpu(cpu) {
968 void *start = per_cpu_ptr(base, cpu);
969
970 if ((void *)addr >= start && (void *)addr < start + static_size)
971 return true;
972 }
973#endif
974
975 return false;
976}
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001phys_addr_t per_cpu_ptr_to_phys(void *addr)
1002{
1003 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1004 bool in_first_chunk = false;
1005 unsigned long first_low, first_high;
1006 unsigned int cpu;
1007
1008
1009
1010
1011
1012
1013 first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
1014 first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
1015 pcpu_unit_pages);
1016 if ((unsigned long)addr >= first_low &&
1017 (unsigned long)addr < first_high) {
1018 for_each_possible_cpu(cpu) {
1019 void *start = per_cpu_ptr(base, cpu);
1020
1021 if (addr >= start && addr < start + pcpu_unit_size) {
1022 in_first_chunk = true;
1023 break;
1024 }
1025 }
1026 }
1027
1028 if (in_first_chunk) {
1029 if (!is_vmalloc_addr(addr))
1030 return __pa(addr);
1031 else
1032 return page_to_phys(vmalloc_to_page(addr)) +
1033 offset_in_page(addr);
1034 } else
1035 return page_to_phys(pcpu_addr_to_page(addr)) +
1036 offset_in_page(addr);
1037}
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
1055 int nr_units)
1056{
1057 struct pcpu_alloc_info *ai;
1058 size_t base_size, ai_size;
1059 void *ptr;
1060 int unit;
1061
1062 base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
1063 __alignof__(ai->groups[0].cpu_map[0]));
1064 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
1065
1066 ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
1067 if (!ptr)
1068 return NULL;
1069 ai = ptr;
1070 ptr += base_size;
1071
1072 ai->groups[0].cpu_map = ptr;
1073
1074 for (unit = 0; unit < nr_units; unit++)
1075 ai->groups[0].cpu_map[unit] = NR_CPUS;
1076
1077 ai->nr_groups = nr_groups;
1078 ai->__ai_size = PFN_ALIGN(ai_size);
1079
1080 return ai;
1081}
1082
1083
1084
1085
1086
1087
1088
1089void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1090{
1091 free_bootmem(__pa(ai), ai->__ai_size);
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101static void pcpu_dump_alloc_info(const char *lvl,
1102 const struct pcpu_alloc_info *ai)
1103{
1104 int group_width = 1, cpu_width = 1, width;
1105 char empty_str[] = "--------";
1106 int alloc = 0, alloc_end = 0;
1107 int group, v;
1108 int upa, apl;
1109
1110 v = ai->nr_groups;
1111 while (v /= 10)
1112 group_width++;
1113
1114 v = num_possible_cpus();
1115 while (v /= 10)
1116 cpu_width++;
1117 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
1118
1119 upa = ai->alloc_size / ai->unit_size;
1120 width = upa * (cpu_width + 1) + group_width + 3;
1121 apl = rounddown_pow_of_two(max(60 / width, 1));
1122
1123 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
1124 lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
1125 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
1126
1127 for (group = 0; group < ai->nr_groups; group++) {
1128 const struct pcpu_group_info *gi = &ai->groups[group];
1129 int unit = 0, unit_end = 0;
1130
1131 BUG_ON(gi->nr_units % upa);
1132 for (alloc_end += gi->nr_units / upa;
1133 alloc < alloc_end; alloc++) {
1134 if (!(alloc % apl)) {
1135 printk(KERN_CONT "\n");
1136 printk("%spcpu-alloc: ", lvl);
1137 }
1138 printk(KERN_CONT "[%0*d] ", group_width, group);
1139
1140 for (unit_end += upa; unit < unit_end; unit++)
1141 if (gi->cpu_map[unit] != NR_CPUS)
1142 printk(KERN_CONT "%0*d ", cpu_width,
1143 gi->cpu_map[unit]);
1144 else
1145 printk(KERN_CONT "%s ", empty_str);
1146 }
1147 }
1148 printk(KERN_CONT "\n");
1149}
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1208 void *base_addr)
1209{
1210 static char cpus_buf[4096] __initdata;
1211 static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
1212 static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
1213 size_t dyn_size = ai->dyn_size;
1214 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
1215 struct pcpu_chunk *schunk, *dchunk = NULL;
1216 unsigned long *group_offsets;
1217 size_t *group_sizes;
1218 unsigned long *unit_off;
1219 unsigned int cpu;
1220 int *unit_map;
1221 int group, unit, i;
1222
1223 cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask);
1224
1225#define PCPU_SETUP_BUG_ON(cond) do { \
1226 if (unlikely(cond)) { \
1227 pr_emerg("PERCPU: failed to initialize, %s", #cond); \
1228 pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \
1229 pcpu_dump_alloc_info(KERN_EMERG, ai); \
1230 BUG(); \
1231 } \
1232} while (0)
1233
1234
1235 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
1236#ifdef CONFIG_SMP
1237 PCPU_SETUP_BUG_ON(!ai->static_size);
1238 PCPU_SETUP_BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK);
1239#endif
1240 PCPU_SETUP_BUG_ON(!base_addr);
1241 PCPU_SETUP_BUG_ON((unsigned long)base_addr & ~PAGE_MASK);
1242 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1243 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
1244 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
1245 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
1246 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
1247
1248
1249 group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
1250 group_sizes = alloc_bootmem(ai->nr_groups * sizeof(group_sizes[0]));
1251 unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
1252 unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
1253
1254 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1255 unit_map[cpu] = UINT_MAX;
1256
1257 pcpu_low_unit_cpu = NR_CPUS;
1258 pcpu_high_unit_cpu = NR_CPUS;
1259
1260 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
1261 const struct pcpu_group_info *gi = &ai->groups[group];
1262
1263 group_offsets[group] = gi->base_offset;
1264 group_sizes[group] = gi->nr_units * ai->unit_size;
1265
1266 for (i = 0; i < gi->nr_units; i++) {
1267 cpu = gi->cpu_map[i];
1268 if (cpu == NR_CPUS)
1269 continue;
1270
1271 PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids);
1272 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
1273 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
1274
1275 unit_map[cpu] = unit + i;
1276 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
1277
1278
1279 if (pcpu_low_unit_cpu == NR_CPUS ||
1280 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
1281 pcpu_low_unit_cpu = cpu;
1282 if (pcpu_high_unit_cpu == NR_CPUS ||
1283 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
1284 pcpu_high_unit_cpu = cpu;
1285 }
1286 }
1287 pcpu_nr_units = unit;
1288
1289 for_each_possible_cpu(cpu)
1290 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
1291
1292
1293#undef PCPU_SETUP_BUG_ON
1294 pcpu_dump_alloc_info(KERN_DEBUG, ai);
1295
1296 pcpu_nr_groups = ai->nr_groups;
1297 pcpu_group_offsets = group_offsets;
1298 pcpu_group_sizes = group_sizes;
1299 pcpu_unit_map = unit_map;
1300 pcpu_unit_offsets = unit_off;
1301
1302
1303 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
1304 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
1305 pcpu_atom_size = ai->atom_size;
1306 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
1307 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
1308
1309
1310
1311
1312
1313 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
1314 pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
1315 for (i = 0; i < pcpu_nr_slots; i++)
1316 INIT_LIST_HEAD(&pcpu_slot[i]);
1317
1318
1319
1320
1321
1322
1323
1324
1325 schunk = alloc_bootmem(pcpu_chunk_struct_size);
1326 INIT_LIST_HEAD(&schunk->list);
1327 schunk->base_addr = base_addr;
1328 schunk->map = smap;
1329 schunk->map_alloc = ARRAY_SIZE(smap);
1330 schunk->immutable = true;
1331 bitmap_fill(schunk->populated, pcpu_unit_pages);
1332
1333 if (ai->reserved_size) {
1334 schunk->free_size = ai->reserved_size;
1335 pcpu_reserved_chunk = schunk;
1336 pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size;
1337 } else {
1338 schunk->free_size = dyn_size;
1339 dyn_size = 0;
1340 }
1341 schunk->contig_hint = schunk->free_size;
1342
1343 schunk->map[schunk->map_used++] = -ai->static_size;
1344 if (schunk->free_size)
1345 schunk->map[schunk->map_used++] = schunk->free_size;
1346
1347
1348 if (dyn_size) {
1349 dchunk = alloc_bootmem(pcpu_chunk_struct_size);
1350 INIT_LIST_HEAD(&dchunk->list);
1351 dchunk->base_addr = base_addr;
1352 dchunk->map = dmap;
1353 dchunk->map_alloc = ARRAY_SIZE(dmap);
1354 dchunk->immutable = true;
1355 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1356
1357 dchunk->contig_hint = dchunk->free_size = dyn_size;
1358 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1359 dchunk->map[dchunk->map_used++] = dchunk->free_size;
1360 }
1361
1362
1363 pcpu_first_chunk = dchunk ?: schunk;
1364 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1365
1366
1367 pcpu_base_addr = base_addr;
1368 return 0;
1369}
1370
1371#ifdef CONFIG_SMP
1372
1373const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
1374 [PCPU_FC_AUTO] = "auto",
1375 [PCPU_FC_EMBED] = "embed",
1376 [PCPU_FC_PAGE] = "page",
1377};
1378
1379enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
1380
1381static int __init percpu_alloc_setup(char *str)
1382{
1383 if (0)
1384 ;
1385#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
1386 else if (!strcmp(str, "embed"))
1387 pcpu_chosen_fc = PCPU_FC_EMBED;
1388#endif
1389#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1390 else if (!strcmp(str, "page"))
1391 pcpu_chosen_fc = PCPU_FC_PAGE;
1392#endif
1393 else
1394 pr_warning("PERCPU: unknown allocator %s specified\n", str);
1395
1396 return 0;
1397}
1398early_param("percpu_alloc", percpu_alloc_setup);
1399
1400
1401
1402
1403
1404
1405#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1406 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1407#define BUILD_EMBED_FIRST_CHUNK
1408#endif
1409
1410
1411#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
1412#define BUILD_PAGE_FIRST_CHUNK
1413#endif
1414
1415
1416#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1439 size_t reserved_size, size_t dyn_size,
1440 size_t atom_size,
1441 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1442{
1443 static int group_map[NR_CPUS] __initdata;
1444 static int group_cnt[NR_CPUS] __initdata;
1445 const size_t static_size = __per_cpu_end - __per_cpu_start;
1446 int nr_groups = 1, nr_units = 0;
1447 size_t size_sum, min_unit_size, alloc_size;
1448 int upa, max_upa, uninitialized_var(best_upa);
1449 int last_allocs, group, unit;
1450 unsigned int cpu, tcpu;
1451 struct pcpu_alloc_info *ai;
1452 unsigned int *cpu_map;
1453
1454
1455 memset(group_map, 0, sizeof(group_map));
1456 memset(group_cnt, 0, sizeof(group_cnt));
1457
1458
1459 size_sum = PFN_ALIGN(static_size + reserved_size +
1460 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1461 dyn_size = size_sum - static_size - reserved_size;
1462
1463
1464
1465
1466
1467
1468
1469 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1470
1471 alloc_size = roundup(min_unit_size, atom_size);
1472 upa = alloc_size / min_unit_size;
1473 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1474 upa--;
1475 max_upa = upa;
1476
1477
1478 for_each_possible_cpu(cpu) {
1479 group = 0;
1480 next_group:
1481 for_each_possible_cpu(tcpu) {
1482 if (cpu == tcpu)
1483 break;
1484 if (group_map[tcpu] == group && cpu_distance_fn &&
1485 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1486 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1487 group++;
1488 nr_groups = max(nr_groups, group + 1);
1489 goto next_group;
1490 }
1491 }
1492 group_map[cpu] = group;
1493 group_cnt[group]++;
1494 }
1495
1496
1497
1498
1499
1500
1501 last_allocs = INT_MAX;
1502 for (upa = max_upa; upa; upa--) {
1503 int allocs = 0, wasted = 0;
1504
1505 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1506 continue;
1507
1508 for (group = 0; group < nr_groups; group++) {
1509 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1510 allocs += this_allocs;
1511 wasted += this_allocs * upa - group_cnt[group];
1512 }
1513
1514
1515
1516
1517
1518
1519 if (wasted > num_possible_cpus() / 3)
1520 continue;
1521
1522
1523 if (allocs > last_allocs)
1524 break;
1525 last_allocs = allocs;
1526 best_upa = upa;
1527 }
1528 upa = best_upa;
1529
1530
1531 for (group = 0; group < nr_groups; group++)
1532 nr_units += roundup(group_cnt[group], upa);
1533
1534 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1535 if (!ai)
1536 return ERR_PTR(-ENOMEM);
1537 cpu_map = ai->groups[0].cpu_map;
1538
1539 for (group = 0; group < nr_groups; group++) {
1540 ai->groups[group].cpu_map = cpu_map;
1541 cpu_map += roundup(group_cnt[group], upa);
1542 }
1543
1544 ai->static_size = static_size;
1545 ai->reserved_size = reserved_size;
1546 ai->dyn_size = dyn_size;
1547 ai->unit_size = alloc_size / upa;
1548 ai->atom_size = atom_size;
1549 ai->alloc_size = alloc_size;
1550
1551 for (group = 0, unit = 0; group_cnt[group]; group++) {
1552 struct pcpu_group_info *gi = &ai->groups[group];
1553
1554
1555
1556
1557
1558
1559 gi->base_offset = unit * ai->unit_size;
1560
1561 for_each_possible_cpu(cpu)
1562 if (group_map[cpu] == group)
1563 gi->cpu_map[gi->nr_units++] = cpu;
1564 gi->nr_units = roundup(gi->nr_units, upa);
1565 unit += gi->nr_units;
1566 }
1567 BUG_ON(unit != nr_units);
1568
1569 return ai;
1570}
1571#endif
1572
1573#if defined(BUILD_EMBED_FIRST_CHUNK)
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
1607 size_t atom_size,
1608 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
1609 pcpu_fc_alloc_fn_t alloc_fn,
1610 pcpu_fc_free_fn_t free_fn)
1611{
1612 void *base = (void *)ULONG_MAX;
1613 void **areas = NULL;
1614 struct pcpu_alloc_info *ai;
1615 size_t size_sum, areas_size, max_distance;
1616 int group, i, rc;
1617
1618 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
1619 cpu_distance_fn);
1620 if (IS_ERR(ai))
1621 return PTR_ERR(ai);
1622
1623 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
1624 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
1625
1626 areas = alloc_bootmem_nopanic(areas_size);
1627 if (!areas) {
1628 rc = -ENOMEM;
1629 goto out_free;
1630 }
1631
1632
1633 for (group = 0; group < ai->nr_groups; group++) {
1634 struct pcpu_group_info *gi = &ai->groups[group];
1635 unsigned int cpu = NR_CPUS;
1636 void *ptr;
1637
1638 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
1639 cpu = gi->cpu_map[i];
1640 BUG_ON(cpu == NR_CPUS);
1641
1642
1643 ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
1644 if (!ptr) {
1645 rc = -ENOMEM;
1646 goto out_free_areas;
1647 }
1648
1649 kmemleak_free(ptr);
1650 areas[group] = ptr;
1651
1652 base = min(ptr, base);
1653 }
1654
1655
1656
1657
1658
1659
1660 for (group = 0; group < ai->nr_groups; group++) {
1661 struct pcpu_group_info *gi = &ai->groups[group];
1662 void *ptr = areas[group];
1663
1664 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
1665 if (gi->cpu_map[i] == NR_CPUS) {
1666
1667 free_fn(ptr, ai->unit_size);
1668 continue;
1669 }
1670
1671 memcpy(ptr, __per_cpu_load, ai->static_size);
1672 free_fn(ptr + size_sum, ai->unit_size - size_sum);
1673 }
1674 }
1675
1676
1677 max_distance = 0;
1678 for (group = 0; group < ai->nr_groups; group++) {
1679 ai->groups[group].base_offset = areas[group] - base;
1680 max_distance = max_t(size_t, max_distance,
1681 ai->groups[group].base_offset);
1682 }
1683 max_distance += ai->unit_size;
1684
1685
1686 if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
1687 pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc "
1688 "space 0x%lx\n", max_distance,
1689 (unsigned long)(VMALLOC_END - VMALLOC_START));
1690#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1691
1692 rc = -EINVAL;
1693 goto out_free;
1694#endif
1695 }
1696
1697 pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
1698 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
1699 ai->dyn_size, ai->unit_size);
1700
1701 rc = pcpu_setup_first_chunk(ai, base);
1702 goto out_free;
1703
1704out_free_areas:
1705 for (group = 0; group < ai->nr_groups; group++)
1706 free_fn(areas[group],
1707 ai->groups[group].nr_units * ai->unit_size);
1708out_free:
1709 pcpu_free_alloc_info(ai);
1710 if (areas)
1711 free_bootmem(__pa(areas), areas_size);
1712 return rc;
1713}
1714#endif
1715
1716#ifdef BUILD_PAGE_FIRST_CHUNK
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733int __init pcpu_page_first_chunk(size_t reserved_size,
1734 pcpu_fc_alloc_fn_t alloc_fn,
1735 pcpu_fc_free_fn_t free_fn,
1736 pcpu_fc_populate_pte_fn_t populate_pte_fn)
1737{
1738 static struct vm_struct vm;
1739 struct pcpu_alloc_info *ai;
1740 char psize_str[16];
1741 int unit_pages;
1742 size_t pages_size;
1743 struct page **pages;
1744 int unit, i, j, rc;
1745
1746 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
1747
1748 ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
1749 if (IS_ERR(ai))
1750 return PTR_ERR(ai);
1751 BUG_ON(ai->nr_groups != 1);
1752 BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
1753
1754 unit_pages = ai->unit_size >> PAGE_SHIFT;
1755
1756
1757 pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
1758 sizeof(pages[0]));
1759 pages = alloc_bootmem(pages_size);
1760
1761
1762 j = 0;
1763 for (unit = 0; unit < num_possible_cpus(); unit++)
1764 for (i = 0; i < unit_pages; i++) {
1765 unsigned int cpu = ai->groups[0].cpu_map[unit];
1766 void *ptr;
1767
1768 ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
1769 if (!ptr) {
1770 pr_warning("PERCPU: failed to allocate %s page "
1771 "for cpu%u\n", psize_str, cpu);
1772 goto enomem;
1773 }
1774
1775 kmemleak_free(ptr);
1776 pages[j++] = virt_to_page(ptr);
1777 }
1778
1779
1780 vm.flags = VM_ALLOC;
1781 vm.size = num_possible_cpus() * ai->unit_size;
1782 vm_area_register_early(&vm, PAGE_SIZE);
1783
1784 for (unit = 0; unit < num_possible_cpus(); unit++) {
1785 unsigned long unit_addr =
1786 (unsigned long)vm.addr + unit * ai->unit_size;
1787
1788 for (i = 0; i < unit_pages; i++)
1789 populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
1790
1791
1792 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
1793 unit_pages);
1794 if (rc < 0)
1795 panic("failed to map percpu area, err=%d\n", rc);
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
1807 }
1808
1809
1810 pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
1811 unit_pages, psize_str, vm.addr, ai->static_size,
1812 ai->reserved_size, ai->dyn_size);
1813
1814 rc = pcpu_setup_first_chunk(ai, vm.addr);
1815 goto out_free_ar;
1816
1817enomem:
1818 while (--j >= 0)
1819 free_fn(page_address(pages[j]), PAGE_SIZE);
1820 rc = -ENOMEM;
1821out_free_ar:
1822 free_bootmem(__pa(pages), pages_size);
1823 pcpu_free_alloc_info(ai);
1824 return rc;
1825}
1826#endif
1827
1828#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
1842EXPORT_SYMBOL(__per_cpu_offset);
1843
1844static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
1845 size_t align)
1846{
1847 return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
1848}
1849
1850static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
1851{
1852 free_bootmem(__pa(ptr), size);
1853}
1854
1855void __init setup_per_cpu_areas(void)
1856{
1857 unsigned long delta;
1858 unsigned int cpu;
1859 int rc;
1860
1861
1862
1863
1864
1865 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1866 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
1867 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
1868 if (rc < 0)
1869 panic("Failed to initialize percpu areas.");
1870
1871 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1872 for_each_possible_cpu(cpu)
1873 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
1874}
1875#endif
1876
1877#else
1878
1879
1880
1881
1882
1883
1884
1885
1886void __init setup_per_cpu_areas(void)
1887{
1888 const size_t unit_size =
1889 roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
1890 PERCPU_DYNAMIC_RESERVE));
1891 struct pcpu_alloc_info *ai;
1892 void *fc;
1893
1894 ai = pcpu_alloc_alloc_info(1, 1);
1895 fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
1896 if (!ai || !fc)
1897 panic("Failed to allocate memory for percpu areas.");
1898
1899 kmemleak_free(fc);
1900
1901 ai->dyn_size = unit_size;
1902 ai->unit_size = unit_size;
1903 ai->atom_size = unit_size;
1904 ai->alloc_size = unit_size;
1905 ai->groups[0].nr_units = 1;
1906 ai->groups[0].cpu_map[0] = 0;
1907
1908 if (pcpu_setup_first_chunk(ai, fc) < 0)
1909 panic("Failed to initialize percpu areas.");
1910}
1911
1912#endif
1913
1914
1915
1916
1917
1918
1919
1920void __init percpu_init_late(void)
1921{
1922 struct pcpu_chunk *target_chunks[] =
1923 { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
1924 struct pcpu_chunk *chunk;
1925 unsigned long flags;
1926 int i;
1927
1928 for (i = 0; (chunk = target_chunks[i]); i++) {
1929 int *map;
1930 const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
1931
1932 BUILD_BUG_ON(size > PAGE_SIZE);
1933
1934 map = pcpu_mem_zalloc(size);
1935 BUG_ON(!map);
1936
1937 spin_lock_irqsave(&pcpu_lock, flags);
1938 memcpy(map, chunk->map, size);
1939 chunk->map = map;
1940 spin_unlock_irqrestore(&pcpu_lock, flags);
1941 }
1942}
1943