1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56#include <linux/bitmap.h>
57#include <linux/bootmem.h>
58#include <linux/err.h>
59#include <linux/list.h>
60#include <linux/log2.h>
61#include <linux/mm.h>
62#include <linux/module.h>
63#include <linux/mutex.h>
64#include <linux/percpu.h>
65#include <linux/pfn.h>
66#include <linux/slab.h>
67#include <linux/spinlock.h>
68#include <linux/vmalloc.h>
69#include <linux/workqueue.h>
70#include <linux/kmemleak.h>
71
72#include <asm/cacheflush.h>
73#include <asm/sections.h>
74#include <asm/tlbflush.h>
75#include <asm/io.h>
76
77#define PCPU_SLOT_BASE_SHIFT 5
78#define PCPU_DFL_MAP_ALLOC 16
79
80#ifdef CONFIG_SMP
81
82#ifndef __addr_to_pcpu_ptr
83#define __addr_to_pcpu_ptr(addr) \
84 (void __percpu *)((unsigned long)(addr) - \
85 (unsigned long)pcpu_base_addr + \
86 (unsigned long)__per_cpu_start)
87#endif
88#ifndef __pcpu_ptr_to_addr
89#define __pcpu_ptr_to_addr(ptr) \
90 (void __force *)((unsigned long)(ptr) + \
91 (unsigned long)pcpu_base_addr - \
92 (unsigned long)__per_cpu_start)
93#endif
94#else
95
96#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
97#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
98#endif
99
100struct pcpu_chunk {
101 struct list_head list;
102 int free_size;
103 int contig_hint;
104 void *base_addr;
105 int map_used;
106 int map_alloc;
107 int *map;
108 void *data;
109 bool immutable;
110 unsigned long populated[];
111};
112
113static int pcpu_unit_pages __read_mostly;
114static int pcpu_unit_size __read_mostly;
115static int pcpu_nr_units __read_mostly;
116static int pcpu_atom_size __read_mostly;
117static int pcpu_nr_slots __read_mostly;
118static size_t pcpu_chunk_struct_size __read_mostly;
119
120
121static unsigned int pcpu_low_unit_cpu __read_mostly;
122static unsigned int pcpu_high_unit_cpu __read_mostly;
123
124
125void *pcpu_base_addr __read_mostly;
126EXPORT_SYMBOL_GPL(pcpu_base_addr);
127
128static const int *pcpu_unit_map __read_mostly;
129const unsigned long *pcpu_unit_offsets __read_mostly;
130
131
132static int pcpu_nr_groups __read_mostly;
133static const unsigned long *pcpu_group_offsets __read_mostly;
134static const size_t *pcpu_group_sizes __read_mostly;
135
136
137
138
139
140
141static struct pcpu_chunk *pcpu_first_chunk;
142
143
144
145
146
147
148
149
150static struct pcpu_chunk *pcpu_reserved_chunk;
151static int pcpu_reserved_chunk_limit;
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177static DEFINE_MUTEX(pcpu_alloc_mutex);
178static DEFINE_SPINLOCK(pcpu_lock);
179
180static struct list_head *pcpu_slot __read_mostly;
181
182
183static void pcpu_reclaim(struct work_struct *work);
184static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim);
185
186static bool pcpu_addr_in_first_chunk(void *addr)
187{
188 void *first_start = pcpu_first_chunk->base_addr;
189
190 return addr >= first_start && addr < first_start + pcpu_unit_size;
191}
192
193static bool pcpu_addr_in_reserved_chunk(void *addr)
194{
195 void *first_start = pcpu_first_chunk->base_addr;
196
197 return addr >= first_start &&
198 addr < first_start + pcpu_reserved_chunk_limit;
199}
200
201static int __pcpu_size_to_slot(int size)
202{
203 int highbit = fls(size);
204 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
205}
206
207static int pcpu_size_to_slot(int size)
208{
209 if (size == pcpu_unit_size)
210 return pcpu_nr_slots - 1;
211 return __pcpu_size_to_slot(size);
212}
213
214static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
215{
216 if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int))
217 return 0;
218
219 return pcpu_size_to_slot(chunk->free_size);
220}
221
222
223static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
224{
225 page->index = (unsigned long)pcpu;
226}
227
228
229static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
230{
231 return (struct pcpu_chunk *)page->index;
232}
233
234static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
235{
236 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
237}
238
239static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
240 unsigned int cpu, int page_idx)
241{
242 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
243 (page_idx << PAGE_SHIFT);
244}
245
246static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk,
247 int *rs, int *re, int end)
248{
249 *rs = find_next_zero_bit(chunk->populated, end, *rs);
250 *re = find_next_bit(chunk->populated, end, *rs + 1);
251}
252
253static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
254 int *rs, int *re, int end)
255{
256 *rs = find_next_bit(chunk->populated, end, *rs);
257 *re = find_next_zero_bit(chunk->populated, end, *rs + 1);
258}
259
260
261
262
263
264
265
266#define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \
267 for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \
268 (rs) < (re); \
269 (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end)))
270
271#define pcpu_for_each_pop_region(chunk, rs, re, start, end) \
272 for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \
273 (rs) < (re); \
274 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290static void *pcpu_mem_zalloc(size_t size)
291{
292 if (WARN_ON_ONCE(!slab_is_available()))
293 return NULL;
294
295 if (size <= PAGE_SIZE)
296 return kzalloc(size, GFP_KERNEL);
297 else
298 return vzalloc(size);
299}
300
301
302
303
304
305
306
307
308static void pcpu_mem_free(void *ptr, size_t size)
309{
310 if (size <= PAGE_SIZE)
311 kfree(ptr);
312 else
313 vfree(ptr);
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
330{
331 int nslot = pcpu_chunk_slot(chunk);
332
333 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
334 if (oslot < nslot)
335 list_move(&chunk->list, &pcpu_slot[nslot]);
336 else
337 list_move_tail(&chunk->list, &pcpu_slot[nslot]);
338 }
339}
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
356{
357 int new_alloc;
358
359 if (chunk->map_alloc >= chunk->map_used + 2)
360 return 0;
361
362 new_alloc = PCPU_DFL_MAP_ALLOC;
363 while (new_alloc < chunk->map_used + 2)
364 new_alloc *= 2;
365
366 return new_alloc;
367}
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
383{
384 int *old = NULL, *new = NULL;
385 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
386 unsigned long flags;
387
388 new = pcpu_mem_zalloc(new_size);
389 if (!new)
390 return -ENOMEM;
391
392
393 spin_lock_irqsave(&pcpu_lock, flags);
394
395 if (new_alloc <= chunk->map_alloc)
396 goto out_unlock;
397
398 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
399 old = chunk->map;
400
401 memcpy(new, old, old_size);
402
403 chunk->map_alloc = new_alloc;
404 chunk->map = new;
405 new = NULL;
406
407out_unlock:
408 spin_unlock_irqrestore(&pcpu_lock, flags);
409
410
411
412
413
414 pcpu_mem_free(old, old_size);
415 pcpu_mem_free(new, new_size);
416
417 return 0;
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
441 int head, int tail)
442{
443 int nr_extra = !!head + !!tail;
444
445 BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
446
447
448 memmove(&chunk->map[i + nr_extra], &chunk->map[i],
449 sizeof(chunk->map[0]) * (chunk->map_used - i));
450 chunk->map_used += nr_extra;
451
452 if (head) {
453 chunk->map[i + 1] = chunk->map[i] - head;
454 chunk->map[i++] = head;
455 }
456 if (tail) {
457 chunk->map[i++] -= tail;
458 chunk->map[i] = tail;
459 }
460}
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
482{
483 int oslot = pcpu_chunk_slot(chunk);
484 int max_contig = 0;
485 int i, off;
486
487 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
488 bool is_last = i + 1 == chunk->map_used;
489 int head, tail;
490
491
492 head = ALIGN(off, align) - off;
493 BUG_ON(i == 0 && head != 0);
494
495 if (chunk->map[i] < 0)
496 continue;
497 if (chunk->map[i] < head + size) {
498 max_contig = max(chunk->map[i], max_contig);
499 continue;
500 }
501
502
503
504
505
506
507
508 if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
509 if (chunk->map[i - 1] > 0)
510 chunk->map[i - 1] += head;
511 else {
512 chunk->map[i - 1] -= head;
513 chunk->free_size -= head;
514 }
515 chunk->map[i] -= head;
516 off += head;
517 head = 0;
518 }
519
520
521 tail = chunk->map[i] - head - size;
522 if (tail < sizeof(int))
523 tail = 0;
524
525
526 if (head || tail) {
527 pcpu_split_block(chunk, i, head, tail);
528 if (head) {
529 i++;
530 off += head;
531 max_contig = max(chunk->map[i - 1], max_contig);
532 }
533 if (tail)
534 max_contig = max(chunk->map[i + 1], max_contig);
535 }
536
537
538 if (is_last)
539 chunk->contig_hint = max_contig;
540 else
541 chunk->contig_hint = max(chunk->contig_hint,
542 max_contig);
543
544 chunk->free_size -= chunk->map[i];
545 chunk->map[i] = -chunk->map[i];
546
547 pcpu_chunk_relocate(chunk, oslot);
548 return off;
549 }
550
551 chunk->contig_hint = max_contig;
552 pcpu_chunk_relocate(chunk, oslot);
553
554
555 return -1;
556}
557
558
559
560
561
562
563
564
565
566
567
568
569
570static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
571{
572 int oslot = pcpu_chunk_slot(chunk);
573 int i, off;
574
575 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++]))
576 if (off == freeme)
577 break;
578 BUG_ON(off != freeme);
579 BUG_ON(chunk->map[i] > 0);
580
581 chunk->map[i] = -chunk->map[i];
582 chunk->free_size += chunk->map[i];
583
584
585 if (i > 0 && chunk->map[i - 1] >= 0) {
586 chunk->map[i - 1] += chunk->map[i];
587 chunk->map_used--;
588 memmove(&chunk->map[i], &chunk->map[i + 1],
589 (chunk->map_used - i) * sizeof(chunk->map[0]));
590 i--;
591 }
592
593 if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) {
594 chunk->map[i] += chunk->map[i + 1];
595 chunk->map_used--;
596 memmove(&chunk->map[i + 1], &chunk->map[i + 2],
597 (chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
598 }
599
600 chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
601 pcpu_chunk_relocate(chunk, oslot);
602}
603
604static struct pcpu_chunk *pcpu_alloc_chunk(void)
605{
606 struct pcpu_chunk *chunk;
607
608 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
609 if (!chunk)
610 return NULL;
611
612 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
613 sizeof(chunk->map[0]));
614 if (!chunk->map) {
615 kfree(chunk);
616 return NULL;
617 }
618
619 chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
620 chunk->map[chunk->map_used++] = pcpu_unit_size;
621
622 INIT_LIST_HEAD(&chunk->list);
623 chunk->free_size = pcpu_unit_size;
624 chunk->contig_hint = pcpu_unit_size;
625
626 return chunk;
627}
628
629static void pcpu_free_chunk(struct pcpu_chunk *chunk)
630{
631 if (!chunk)
632 return;
633 pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
634 pcpu_mem_free(chunk, pcpu_chunk_struct_size);
635}
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
653static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
654static struct pcpu_chunk *pcpu_create_chunk(void);
655static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
656static struct page *pcpu_addr_to_page(void *addr);
657static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
658
659#ifdef CONFIG_NEED_PER_CPU_KM
660#include "percpu-km.c"
661#else
662#include "percpu-vm.c"
663#endif
664
665
666
667
668
669
670
671
672static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
673{
674
675 if (pcpu_addr_in_first_chunk(addr)) {
676
677 if (pcpu_addr_in_reserved_chunk(addr))
678 return pcpu_reserved_chunk;
679 return pcpu_first_chunk;
680 }
681
682
683
684
685
686
687
688
689 addr += pcpu_unit_offsets[raw_smp_processor_id()];
690 return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
691}
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
708{
709 static int warn_limit = 10;
710 struct pcpu_chunk *chunk;
711 const char *err;
712 int slot, off, new_alloc;
713 unsigned long flags;
714 void __percpu *ptr;
715
716 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
717 WARN(true, "illegal size (%zu) or align (%zu) for "
718 "percpu allocation\n", size, align);
719 return NULL;
720 }
721
722 mutex_lock(&pcpu_alloc_mutex);
723 spin_lock_irqsave(&pcpu_lock, flags);
724
725
726 if (reserved && pcpu_reserved_chunk) {
727 chunk = pcpu_reserved_chunk;
728
729 if (size > chunk->contig_hint) {
730 err = "alloc from reserved chunk failed";
731 goto fail_unlock;
732 }
733
734 while ((new_alloc = pcpu_need_to_extend(chunk))) {
735 spin_unlock_irqrestore(&pcpu_lock, flags);
736 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
737 err = "failed to extend area map of reserved chunk";
738 goto fail_unlock_mutex;
739 }
740 spin_lock_irqsave(&pcpu_lock, flags);
741 }
742
743 off = pcpu_alloc_area(chunk, size, align);
744 if (off >= 0)
745 goto area_found;
746
747 err = "alloc from reserved chunk failed";
748 goto fail_unlock;
749 }
750
751restart:
752
753 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
754 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
755 if (size > chunk->contig_hint)
756 continue;
757
758 new_alloc = pcpu_need_to_extend(chunk);
759 if (new_alloc) {
760 spin_unlock_irqrestore(&pcpu_lock, flags);
761 if (pcpu_extend_area_map(chunk,
762 new_alloc) < 0) {
763 err = "failed to extend area map";
764 goto fail_unlock_mutex;
765 }
766 spin_lock_irqsave(&pcpu_lock, flags);
767
768
769
770
771 goto restart;
772 }
773
774 off = pcpu_alloc_area(chunk, size, align);
775 if (off >= 0)
776 goto area_found;
777 }
778 }
779
780
781 spin_unlock_irqrestore(&pcpu_lock, flags);
782
783 chunk = pcpu_create_chunk();
784 if (!chunk) {
785 err = "failed to allocate new chunk";
786 goto fail_unlock_mutex;
787 }
788
789 spin_lock_irqsave(&pcpu_lock, flags);
790 pcpu_chunk_relocate(chunk, -1);
791 goto restart;
792
793area_found:
794 spin_unlock_irqrestore(&pcpu_lock, flags);
795
796
797 if (pcpu_populate_chunk(chunk, off, size)) {
798 spin_lock_irqsave(&pcpu_lock, flags);
799 pcpu_free_area(chunk, off);
800 err = "failed to populate";
801 goto fail_unlock;
802 }
803
804 mutex_unlock(&pcpu_alloc_mutex);
805
806
807 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
808 kmemleak_alloc_percpu(ptr, size);
809 return ptr;
810
811fail_unlock:
812 spin_unlock_irqrestore(&pcpu_lock, flags);
813fail_unlock_mutex:
814 mutex_unlock(&pcpu_alloc_mutex);
815 if (warn_limit) {
816 pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
817 "%s\n", size, align, err);
818 dump_stack();
819 if (!--warn_limit)
820 pr_info("PERCPU: limit reached, disable warning\n");
821 }
822 return NULL;
823}
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839void __percpu *__alloc_percpu(size_t size, size_t align)
840{
841 return pcpu_alloc(size, align, false);
842}
843EXPORT_SYMBOL_GPL(__alloc_percpu);
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
862{
863 return pcpu_alloc(size, align, true);
864}
865
866
867
868
869
870
871
872
873
874
875static void pcpu_reclaim(struct work_struct *work)
876{
877 LIST_HEAD(todo);
878 struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1];
879 struct pcpu_chunk *chunk, *next;
880
881 mutex_lock(&pcpu_alloc_mutex);
882 spin_lock_irq(&pcpu_lock);
883
884 list_for_each_entry_safe(chunk, next, head, list) {
885 WARN_ON(chunk->immutable);
886
887
888 if (chunk == list_first_entry(head, struct pcpu_chunk, list))
889 continue;
890
891 list_move(&chunk->list, &todo);
892 }
893
894 spin_unlock_irq(&pcpu_lock);
895
896 list_for_each_entry_safe(chunk, next, &todo, list) {
897 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size);
898 pcpu_destroy_chunk(chunk);
899 }
900
901 mutex_unlock(&pcpu_alloc_mutex);
902}
903
904
905
906
907
908
909
910
911
912
913void free_percpu(void __percpu *ptr)
914{
915 void *addr;
916 struct pcpu_chunk *chunk;
917 unsigned long flags;
918 int off;
919
920 if (!ptr)
921 return;
922
923 kmemleak_free_percpu(ptr);
924
925 addr = __pcpu_ptr_to_addr(ptr);
926
927 spin_lock_irqsave(&pcpu_lock, flags);
928
929 chunk = pcpu_chunk_addr_search(addr);
930 off = addr - chunk->base_addr;
931
932 pcpu_free_area(chunk, off);
933
934
935 if (chunk->free_size == pcpu_unit_size) {
936 struct pcpu_chunk *pos;
937
938 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
939 if (pos != chunk) {
940 schedule_work(&pcpu_reclaim_work);
941 break;
942 }
943 }
944
945 spin_unlock_irqrestore(&pcpu_lock, flags);
946}
947EXPORT_SYMBOL_GPL(free_percpu);
948
949
950
951
952
953
954
955
956
957
958
959
960bool is_kernel_percpu_address(unsigned long addr)
961{
962#ifdef CONFIG_SMP
963 const size_t static_size = __per_cpu_end - __per_cpu_start;
964 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
965 unsigned int cpu;
966
967 for_each_possible_cpu(cpu) {
968 void *start = per_cpu_ptr(base, cpu);
969
970 if ((void *)addr >= start && (void *)addr < start + static_size)
971 return true;
972 }
973#endif
974
975 return false;
976}
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001phys_addr_t per_cpu_ptr_to_phys(void *addr)
1002{
1003 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1004 bool in_first_chunk = false;
1005 unsigned long first_low, first_high;
1006 unsigned int cpu;
1007
1008
1009
1010
1011
1012
1013 first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
1014 first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
1015 pcpu_unit_pages);
1016 if ((unsigned long)addr >= first_low &&
1017 (unsigned long)addr < first_high) {
1018 for_each_possible_cpu(cpu) {
1019 void *start = per_cpu_ptr(base, cpu);
1020
1021 if (addr >= start && addr < start + pcpu_unit_size) {
1022 in_first_chunk = true;
1023 break;
1024 }
1025 }
1026 }
1027
1028 if (in_first_chunk) {
1029 if (!is_vmalloc_addr(addr))
1030 return __pa(addr);
1031 else
1032 return page_to_phys(vmalloc_to_page(addr)) +
1033 offset_in_page(addr);
1034 } else
1035 return page_to_phys(pcpu_addr_to_page(addr)) +
1036 offset_in_page(addr);
1037}
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
1055 int nr_units)
1056{
1057 struct pcpu_alloc_info *ai;
1058 size_t base_size, ai_size;
1059 void *ptr;
1060 int unit;
1061
1062 base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
1063 __alignof__(ai->groups[0].cpu_map[0]));
1064 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
1065
1066 ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
1067 if (!ptr)
1068 return NULL;
1069 ai = ptr;
1070 ptr += base_size;
1071
1072 ai->groups[0].cpu_map = ptr;
1073
1074 for (unit = 0; unit < nr_units; unit++)
1075 ai->groups[0].cpu_map[unit] = NR_CPUS;
1076
1077 ai->nr_groups = nr_groups;
1078 ai->__ai_size = PFN_ALIGN(ai_size);
1079
1080 return ai;
1081}
1082
1083
1084
1085
1086
1087
1088
1089void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1090{
1091 free_bootmem(__pa(ai), ai->__ai_size);
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101static void pcpu_dump_alloc_info(const char *lvl,
1102 const struct pcpu_alloc_info *ai)
1103{
1104 int group_width = 1, cpu_width = 1, width;
1105 char empty_str[] = "--------";
1106 int alloc = 0, alloc_end = 0;
1107 int group, v;
1108 int upa, apl;
1109
1110 v = ai->nr_groups;
1111 while (v /= 10)
1112 group_width++;
1113
1114 v = num_possible_cpus();
1115 while (v /= 10)
1116 cpu_width++;
1117 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
1118
1119 upa = ai->alloc_size / ai->unit_size;
1120 width = upa * (cpu_width + 1) + group_width + 3;
1121 apl = rounddown_pow_of_two(max(60 / width, 1));
1122
1123 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
1124 lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
1125 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
1126
1127 for (group = 0; group < ai->nr_groups; group++) {
1128 const struct pcpu_group_info *gi = &ai->groups[group];
1129 int unit = 0, unit_end = 0;
1130
1131 BUG_ON(gi->nr_units % upa);
1132 for (alloc_end += gi->nr_units / upa;
1133 alloc < alloc_end; alloc++) {
1134 if (!(alloc % apl)) {
1135 printk(KERN_CONT "\n");
1136 printk("%spcpu-alloc: ", lvl);
1137 }
1138 printk(KERN_CONT "[%0*d] ", group_width, group);
1139
1140 for (unit_end += upa; unit < unit_end; unit++)
1141 if (gi->cpu_map[unit] != NR_CPUS)
1142 printk(KERN_CONT "%0*d ", cpu_width,
1143 gi->cpu_map[unit]);
1144 else
1145 printk(KERN_CONT "%s ", empty_str);
1146 }
1147 }
1148 printk(KERN_CONT "\n");
1149}
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1208 void *base_addr)
1209{
1210 static char cpus_buf[4096] __initdata;
1211 static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
1212 static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
1213 size_t dyn_size = ai->dyn_size;
1214 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
1215 struct pcpu_chunk *schunk, *dchunk = NULL;
1216 unsigned long *group_offsets;
1217 size_t *group_sizes;
1218 unsigned long *unit_off;
1219 unsigned int cpu;
1220 int *unit_map;
1221 int group, unit, i;
1222
1223 cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask);
1224
1225#define PCPU_SETUP_BUG_ON(cond) do { \
1226 if (unlikely(cond)) { \
1227 pr_emerg("PERCPU: failed to initialize, %s", #cond); \
1228 pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \
1229 pcpu_dump_alloc_info(KERN_EMERG, ai); \
1230 BUG(); \
1231 } \
1232} while (0)
1233
1234
1235 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
1236#ifdef CONFIG_SMP
1237 PCPU_SETUP_BUG_ON(!ai->static_size);
1238 PCPU_SETUP_BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK);
1239#endif
1240 PCPU_SETUP_BUG_ON(!base_addr);
1241 PCPU_SETUP_BUG_ON((unsigned long)base_addr & ~PAGE_MASK);
1242 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1243 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
1244 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
1245 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
1246 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
1247
1248
1249 group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
1250 group_sizes = alloc_bootmem(ai->nr_groups * sizeof(group_sizes[0]));
1251 unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
1252 unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
1253
1254 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1255 unit_map[cpu] = UINT_MAX;
1256
1257 pcpu_low_unit_cpu = NR_CPUS;
1258 pcpu_high_unit_cpu = NR_CPUS;
1259
1260 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
1261 const struct pcpu_group_info *gi = &ai->groups[group];
1262
1263 group_offsets[group] = gi->base_offset;
1264 group_sizes[group] = gi->nr_units * ai->unit_size;
1265
1266 for (i = 0; i < gi->nr_units; i++) {
1267 cpu = gi->cpu_map[i];
1268 if (cpu == NR_CPUS)
1269 continue;
1270
1271 PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids);
1272 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
1273 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
1274
1275 unit_map[cpu] = unit + i;
1276 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
1277
1278
1279 if (pcpu_low_unit_cpu == NR_CPUS ||
1280 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
1281 pcpu_low_unit_cpu = cpu;
1282 if (pcpu_high_unit_cpu == NR_CPUS ||
1283 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
1284 pcpu_high_unit_cpu = cpu;
1285 }
1286 }
1287 pcpu_nr_units = unit;
1288
1289 for_each_possible_cpu(cpu)
1290 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
1291
1292
1293#undef PCPU_SETUP_BUG_ON
1294 pcpu_dump_alloc_info(KERN_DEBUG, ai);
1295
1296 pcpu_nr_groups = ai->nr_groups;
1297 pcpu_group_offsets = group_offsets;
1298 pcpu_group_sizes = group_sizes;
1299 pcpu_unit_map = unit_map;
1300 pcpu_unit_offsets = unit_off;
1301
1302
1303 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
1304 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
1305 pcpu_atom_size = ai->atom_size;
1306 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
1307 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
1308
1309
1310
1311
1312
1313 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
1314 pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
1315 for (i = 0; i < pcpu_nr_slots; i++)
1316 INIT_LIST_HEAD(&pcpu_slot[i]);
1317
1318
1319
1320
1321
1322
1323
1324
1325 schunk = alloc_bootmem(pcpu_chunk_struct_size);
1326 INIT_LIST_HEAD(&schunk->list);
1327 schunk->base_addr = base_addr;
1328 schunk->map = smap;
1329 schunk->map_alloc = ARRAY_SIZE(smap);
1330 schunk->immutable = true;
1331 bitmap_fill(schunk->populated, pcpu_unit_pages);
1332
1333 if (ai->reserved_size) {
1334 schunk->free_size = ai->reserved_size;
1335 pcpu_reserved_chunk = schunk;
1336 pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size;
1337 } else {
1338 schunk->free_size = dyn_size;
1339 dyn_size = 0;
1340 }
1341 schunk->contig_hint = schunk->free_size;
1342
1343 schunk->map[schunk->map_used++] = -ai->static_size;
1344 if (schunk->free_size)
1345 schunk->map[schunk->map_used++] = schunk->free_size;
1346
1347
1348 if (dyn_size) {
1349 dchunk = alloc_bootmem(pcpu_chunk_struct_size);
1350 INIT_LIST_HEAD(&dchunk->list);
1351 dchunk->base_addr = base_addr;
1352 dchunk->map = dmap;
1353 dchunk->map_alloc = ARRAY_SIZE(dmap);
1354 dchunk->immutable = true;
1355 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1356
1357 dchunk->contig_hint = dchunk->free_size = dyn_size;
1358 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1359 dchunk->map[dchunk->map_used++] = dchunk->free_size;
1360 }
1361
1362
1363 pcpu_first_chunk = dchunk ?: schunk;
1364 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1365
1366
1367 pcpu_base_addr = base_addr;
1368 return 0;
1369}
1370
1371#ifdef CONFIG_SMP
1372
1373const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {
1374 [PCPU_FC_AUTO] = "auto",
1375 [PCPU_FC_EMBED] = "embed",
1376 [PCPU_FC_PAGE] = "page",
1377};
1378
1379enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
1380
1381static int __init percpu_alloc_setup(char *str)
1382{
1383 if (!str)
1384 return -EINVAL;
1385
1386 if (0)
1387 ;
1388#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
1389 else if (!strcmp(str, "embed"))
1390 pcpu_chosen_fc = PCPU_FC_EMBED;
1391#endif
1392#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1393 else if (!strcmp(str, "page"))
1394 pcpu_chosen_fc = PCPU_FC_PAGE;
1395#endif
1396 else
1397 pr_warning("PERCPU: unknown allocator %s specified\n", str);
1398
1399 return 0;
1400}
1401early_param("percpu_alloc", percpu_alloc_setup);
1402
1403
1404
1405
1406
1407
1408#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1409 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1410#define BUILD_EMBED_FIRST_CHUNK
1411#endif
1412
1413
1414#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
1415#define BUILD_PAGE_FIRST_CHUNK
1416#endif
1417
1418
1419#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1442 size_t reserved_size, size_t dyn_size,
1443 size_t atom_size,
1444 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1445{
1446 static int group_map[NR_CPUS] __initdata;
1447 static int group_cnt[NR_CPUS] __initdata;
1448 const size_t static_size = __per_cpu_end - __per_cpu_start;
1449 int nr_groups = 1, nr_units = 0;
1450 size_t size_sum, min_unit_size, alloc_size;
1451 int upa, max_upa, uninitialized_var(best_upa);
1452 int last_allocs, group, unit;
1453 unsigned int cpu, tcpu;
1454 struct pcpu_alloc_info *ai;
1455 unsigned int *cpu_map;
1456
1457
1458 memset(group_map, 0, sizeof(group_map));
1459 memset(group_cnt, 0, sizeof(group_cnt));
1460
1461
1462 size_sum = PFN_ALIGN(static_size + reserved_size +
1463 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1464 dyn_size = size_sum - static_size - reserved_size;
1465
1466
1467
1468
1469
1470
1471
1472 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1473
1474 alloc_size = roundup(min_unit_size, atom_size);
1475 upa = alloc_size / min_unit_size;
1476 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1477 upa--;
1478 max_upa = upa;
1479
1480
1481 for_each_possible_cpu(cpu) {
1482 group = 0;
1483 next_group:
1484 for_each_possible_cpu(tcpu) {
1485 if (cpu == tcpu)
1486 break;
1487 if (group_map[tcpu] == group && cpu_distance_fn &&
1488 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1489 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1490 group++;
1491 nr_groups = max(nr_groups, group + 1);
1492 goto next_group;
1493 }
1494 }
1495 group_map[cpu] = group;
1496 group_cnt[group]++;
1497 }
1498
1499
1500
1501
1502
1503
1504 last_allocs = INT_MAX;
1505 for (upa = max_upa; upa; upa--) {
1506 int allocs = 0, wasted = 0;
1507
1508 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1509 continue;
1510
1511 for (group = 0; group < nr_groups; group++) {
1512 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1513 allocs += this_allocs;
1514 wasted += this_allocs * upa - group_cnt[group];
1515 }
1516
1517
1518
1519
1520
1521
1522 if (wasted > num_possible_cpus() / 3)
1523 continue;
1524
1525
1526 if (allocs > last_allocs)
1527 break;
1528 last_allocs = allocs;
1529 best_upa = upa;
1530 }
1531 upa = best_upa;
1532
1533
1534 for (group = 0; group < nr_groups; group++)
1535 nr_units += roundup(group_cnt[group], upa);
1536
1537 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1538 if (!ai)
1539 return ERR_PTR(-ENOMEM);
1540 cpu_map = ai->groups[0].cpu_map;
1541
1542 for (group = 0; group < nr_groups; group++) {
1543 ai->groups[group].cpu_map = cpu_map;
1544 cpu_map += roundup(group_cnt[group], upa);
1545 }
1546
1547 ai->static_size = static_size;
1548 ai->reserved_size = reserved_size;
1549 ai->dyn_size = dyn_size;
1550 ai->unit_size = alloc_size / upa;
1551 ai->atom_size = atom_size;
1552 ai->alloc_size = alloc_size;
1553
1554 for (group = 0, unit = 0; group_cnt[group]; group++) {
1555 struct pcpu_group_info *gi = &ai->groups[group];
1556
1557
1558
1559
1560
1561
1562 gi->base_offset = unit * ai->unit_size;
1563
1564 for_each_possible_cpu(cpu)
1565 if (group_map[cpu] == group)
1566 gi->cpu_map[gi->nr_units++] = cpu;
1567 gi->nr_units = roundup(gi->nr_units, upa);
1568 unit += gi->nr_units;
1569 }
1570 BUG_ON(unit != nr_units);
1571
1572 return ai;
1573}
1574#endif
1575
1576#if defined(BUILD_EMBED_FIRST_CHUNK)
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
1610 size_t atom_size,
1611 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
1612 pcpu_fc_alloc_fn_t alloc_fn,
1613 pcpu_fc_free_fn_t free_fn)
1614{
1615 void *base = (void *)ULONG_MAX;
1616 void **areas = NULL;
1617 struct pcpu_alloc_info *ai;
1618 size_t size_sum, areas_size, max_distance;
1619 int group, i, rc;
1620
1621 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
1622 cpu_distance_fn);
1623 if (IS_ERR(ai))
1624 return PTR_ERR(ai);
1625
1626 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
1627 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
1628
1629 areas = alloc_bootmem_nopanic(areas_size);
1630 if (!areas) {
1631 rc = -ENOMEM;
1632 goto out_free;
1633 }
1634
1635
1636 for (group = 0; group < ai->nr_groups; group++) {
1637 struct pcpu_group_info *gi = &ai->groups[group];
1638 unsigned int cpu = NR_CPUS;
1639 void *ptr;
1640
1641 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
1642 cpu = gi->cpu_map[i];
1643 BUG_ON(cpu == NR_CPUS);
1644
1645
1646 ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
1647 if (!ptr) {
1648 rc = -ENOMEM;
1649 goto out_free_areas;
1650 }
1651
1652 kmemleak_free(ptr);
1653 areas[group] = ptr;
1654
1655 base = min(ptr, base);
1656 }
1657
1658
1659
1660
1661
1662
1663 for (group = 0; group < ai->nr_groups; group++) {
1664 struct pcpu_group_info *gi = &ai->groups[group];
1665 void *ptr = areas[group];
1666
1667 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
1668 if (gi->cpu_map[i] == NR_CPUS) {
1669
1670 free_fn(ptr, ai->unit_size);
1671 continue;
1672 }
1673
1674 memcpy(ptr, __per_cpu_load, ai->static_size);
1675 free_fn(ptr + size_sum, ai->unit_size - size_sum);
1676 }
1677 }
1678
1679
1680 max_distance = 0;
1681 for (group = 0; group < ai->nr_groups; group++) {
1682 ai->groups[group].base_offset = areas[group] - base;
1683 max_distance = max_t(size_t, max_distance,
1684 ai->groups[group].base_offset);
1685 }
1686 max_distance += ai->unit_size;
1687
1688
1689 if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
1690 pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc "
1691 "space 0x%lx\n", max_distance,
1692 (unsigned long)(VMALLOC_END - VMALLOC_START));
1693#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1694
1695 rc = -EINVAL;
1696 goto out_free;
1697#endif
1698 }
1699
1700 pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
1701 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
1702 ai->dyn_size, ai->unit_size);
1703
1704 rc = pcpu_setup_first_chunk(ai, base);
1705 goto out_free;
1706
1707out_free_areas:
1708 for (group = 0; group < ai->nr_groups; group++)
1709 free_fn(areas[group],
1710 ai->groups[group].nr_units * ai->unit_size);
1711out_free:
1712 pcpu_free_alloc_info(ai);
1713 if (areas)
1714 free_bootmem(__pa(areas), areas_size);
1715 return rc;
1716}
1717#endif
1718
1719#ifdef BUILD_PAGE_FIRST_CHUNK
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736int __init pcpu_page_first_chunk(size_t reserved_size,
1737 pcpu_fc_alloc_fn_t alloc_fn,
1738 pcpu_fc_free_fn_t free_fn,
1739 pcpu_fc_populate_pte_fn_t populate_pte_fn)
1740{
1741 static struct vm_struct vm;
1742 struct pcpu_alloc_info *ai;
1743 char psize_str[16];
1744 int unit_pages;
1745 size_t pages_size;
1746 struct page **pages;
1747 int unit, i, j, rc;
1748
1749 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
1750
1751 ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
1752 if (IS_ERR(ai))
1753 return PTR_ERR(ai);
1754 BUG_ON(ai->nr_groups != 1);
1755 BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
1756
1757 unit_pages = ai->unit_size >> PAGE_SHIFT;
1758
1759
1760 pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
1761 sizeof(pages[0]));
1762 pages = alloc_bootmem(pages_size);
1763
1764
1765 j = 0;
1766 for (unit = 0; unit < num_possible_cpus(); unit++)
1767 for (i = 0; i < unit_pages; i++) {
1768 unsigned int cpu = ai->groups[0].cpu_map[unit];
1769 void *ptr;
1770
1771 ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
1772 if (!ptr) {
1773 pr_warning("PERCPU: failed to allocate %s page "
1774 "for cpu%u\n", psize_str, cpu);
1775 goto enomem;
1776 }
1777
1778 kmemleak_free(ptr);
1779 pages[j++] = virt_to_page(ptr);
1780 }
1781
1782
1783 vm.flags = VM_ALLOC;
1784 vm.size = num_possible_cpus() * ai->unit_size;
1785 vm_area_register_early(&vm, PAGE_SIZE);
1786
1787 for (unit = 0; unit < num_possible_cpus(); unit++) {
1788 unsigned long unit_addr =
1789 (unsigned long)vm.addr + unit * ai->unit_size;
1790
1791 for (i = 0; i < unit_pages; i++)
1792 populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
1793
1794
1795 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
1796 unit_pages);
1797 if (rc < 0)
1798 panic("failed to map percpu area, err=%d\n", rc);
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
1810 }
1811
1812
1813 pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
1814 unit_pages, psize_str, vm.addr, ai->static_size,
1815 ai->reserved_size, ai->dyn_size);
1816
1817 rc = pcpu_setup_first_chunk(ai, vm.addr);
1818 goto out_free_ar;
1819
1820enomem:
1821 while (--j >= 0)
1822 free_fn(page_address(pages[j]), PAGE_SIZE);
1823 rc = -ENOMEM;
1824out_free_ar:
1825 free_bootmem(__pa(pages), pages_size);
1826 pcpu_free_alloc_info(ai);
1827 return rc;
1828}
1829#endif
1830
1831#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
1845EXPORT_SYMBOL(__per_cpu_offset);
1846
1847static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
1848 size_t align)
1849{
1850 return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
1851}
1852
1853static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
1854{
1855 free_bootmem(__pa(ptr), size);
1856}
1857
1858void __init setup_per_cpu_areas(void)
1859{
1860 unsigned long delta;
1861 unsigned int cpu;
1862 int rc;
1863
1864
1865
1866
1867
1868 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1869 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
1870 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
1871 if (rc < 0)
1872 panic("Failed to initialize percpu areas.");
1873
1874 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1875 for_each_possible_cpu(cpu)
1876 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
1877}
1878#endif
1879
1880#else
1881
1882
1883
1884
1885
1886
1887
1888
1889void __init setup_per_cpu_areas(void)
1890{
1891 const size_t unit_size =
1892 roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
1893 PERCPU_DYNAMIC_RESERVE));
1894 struct pcpu_alloc_info *ai;
1895 void *fc;
1896
1897 ai = pcpu_alloc_alloc_info(1, 1);
1898 fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
1899 if (!ai || !fc)
1900 panic("Failed to allocate memory for percpu areas.");
1901
1902 kmemleak_free(fc);
1903
1904 ai->dyn_size = unit_size;
1905 ai->unit_size = unit_size;
1906 ai->atom_size = unit_size;
1907 ai->alloc_size = unit_size;
1908 ai->groups[0].nr_units = 1;
1909 ai->groups[0].cpu_map[0] = 0;
1910
1911 if (pcpu_setup_first_chunk(ai, fc) < 0)
1912 panic("Failed to initialize percpu areas.");
1913}
1914
1915#endif
1916
1917
1918
1919
1920
1921
1922
1923void __init percpu_init_late(void)
1924{
1925 struct pcpu_chunk *target_chunks[] =
1926 { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
1927 struct pcpu_chunk *chunk;
1928 unsigned long flags;
1929 int i;
1930
1931 for (i = 0; (chunk = target_chunks[i]); i++) {
1932 int *map;
1933 const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
1934
1935 BUILD_BUG_ON(size > PAGE_SIZE);
1936
1937 map = pcpu_mem_zalloc(size);
1938 BUG_ON(!map);
1939
1940 spin_lock_irqsave(&pcpu_lock, flags);
1941 memcpy(map, chunk->map, size);
1942 chunk->map = map;
1943 spin_unlock_irqrestore(&pcpu_lock, flags);
1944 }
1945}
1946