1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59#include <linux/bitmap.h>
60#include <linux/bootmem.h>
61#include <linux/err.h>
62#include <linux/list.h>
63#include <linux/log2.h>
64#include <linux/mm.h>
65#include <linux/module.h>
66#include <linux/mutex.h>
67#include <linux/percpu.h>
68#include <linux/pfn.h>
69#include <linux/slab.h>
70#include <linux/spinlock.h>
71#include <linux/vmalloc.h>
72#include <linux/workqueue.h>
73
74#include <asm/cacheflush.h>
75#include <asm/sections.h>
76#include <asm/tlbflush.h>
77
78#define PCPU_SLOT_BASE_SHIFT 5
79#define PCPU_DFL_MAP_ALLOC 16
80
81
82#ifndef __addr_to_pcpu_ptr
83#define __addr_to_pcpu_ptr(addr) \
84 (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
85 + (unsigned long)__per_cpu_start)
86#endif
87#ifndef __pcpu_ptr_to_addr
88#define __pcpu_ptr_to_addr(ptr) \
89 (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
90 - (unsigned long)__per_cpu_start)
91#endif
92
93struct pcpu_chunk {
94 struct list_head list;
95 int free_size;
96 int contig_hint;
97 void *base_addr;
98 int map_used;
99 int map_alloc;
100 int *map;
101 struct vm_struct **vms;
102 bool immutable;
103 unsigned long populated[];
104};
105
106static int pcpu_unit_pages __read_mostly;
107static int pcpu_unit_size __read_mostly;
108static int pcpu_nr_units __read_mostly;
109static int pcpu_atom_size __read_mostly;
110static int pcpu_nr_slots __read_mostly;
111static size_t pcpu_chunk_struct_size __read_mostly;
112
113
114static unsigned int pcpu_first_unit_cpu __read_mostly;
115static unsigned int pcpu_last_unit_cpu __read_mostly;
116
117
118void *pcpu_base_addr __read_mostly;
119EXPORT_SYMBOL_GPL(pcpu_base_addr);
120
121static const int *pcpu_unit_map __read_mostly;
122const unsigned long *pcpu_unit_offsets __read_mostly;
123
124
125static int pcpu_nr_groups __read_mostly;
126static const unsigned long *pcpu_group_offsets __read_mostly;
127static const size_t *pcpu_group_sizes __read_mostly;
128
129
130
131
132
133
134static struct pcpu_chunk *pcpu_first_chunk;
135
136
137
138
139
140
141
142
143static struct pcpu_chunk *pcpu_reserved_chunk;
144static int pcpu_reserved_chunk_limit;
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170static DEFINE_MUTEX(pcpu_alloc_mutex);
171static DEFINE_SPINLOCK(pcpu_lock);
172
173static struct list_head *pcpu_slot __read_mostly;
174
175
176static void pcpu_reclaim(struct work_struct *work);
177static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim);
178
179static int __pcpu_size_to_slot(int size)
180{
181 int highbit = fls(size);
182 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
183}
184
185static int pcpu_size_to_slot(int size)
186{
187 if (size == pcpu_unit_size)
188 return pcpu_nr_slots - 1;
189 return __pcpu_size_to_slot(size);
190}
191
192static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
193{
194 if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int))
195 return 0;
196
197 return pcpu_size_to_slot(chunk->free_size);
198}
199
200static int pcpu_page_idx(unsigned int cpu, int page_idx)
201{
202 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
203}
204
205static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
206 unsigned int cpu, int page_idx)
207{
208 return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] +
209 (page_idx << PAGE_SHIFT);
210}
211
212static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
213 unsigned int cpu, int page_idx)
214{
215
216 WARN_ON(chunk->immutable);
217
218 return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
219}
220
221
222static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
223{
224 page->index = (unsigned long)pcpu;
225}
226
227
228static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
229{
230 return (struct pcpu_chunk *)page->index;
231}
232
233static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end)
234{
235 *rs = find_next_zero_bit(chunk->populated, end, *rs);
236 *re = find_next_bit(chunk->populated, end, *rs + 1);
237}
238
239static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end)
240{
241 *rs = find_next_bit(chunk->populated, end, *rs);
242 *re = find_next_zero_bit(chunk->populated, end, *rs + 1);
243}
244
245
246
247
248
249
250
251#define pcpu_for_each_unpop_region(chunk, rs, re, start, end) \
252 for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \
253 (rs) < (re); \
254 (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end)))
255
256#define pcpu_for_each_pop_region(chunk, rs, re, start, end) \
257 for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end)); \
258 (rs) < (re); \
259 (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275static void *pcpu_mem_alloc(size_t size)
276{
277 if (size <= PAGE_SIZE)
278 return kzalloc(size, GFP_KERNEL);
279 else {
280 void *ptr = vmalloc(size);
281 if (ptr)
282 memset(ptr, 0, size);
283 return ptr;
284 }
285}
286
287
288
289
290
291
292
293
294static void pcpu_mem_free(void *ptr, size_t size)
295{
296 if (size <= PAGE_SIZE)
297 kfree(ptr);
298 else
299 vfree(ptr);
300}
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
316{
317 int nslot = pcpu_chunk_slot(chunk);
318
319 if (chunk != pcpu_reserved_chunk && oslot != nslot) {
320 if (oslot < nslot)
321 list_move(&chunk->list, &pcpu_slot[nslot]);
322 else
323 list_move_tail(&chunk->list, &pcpu_slot[nslot]);
324 }
325}
326
327
328
329
330
331
332
333
334static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
335{
336 void *first_start = pcpu_first_chunk->base_addr;
337
338
339 if (addr >= first_start && addr < first_start + pcpu_unit_size) {
340
341 if (addr < first_start + pcpu_reserved_chunk_limit)
342 return pcpu_reserved_chunk;
343 return pcpu_first_chunk;
344 }
345
346
347
348
349
350
351
352
353 addr += pcpu_unit_offsets[raw_smp_processor_id()];
354 return pcpu_get_page_chunk(vmalloc_to_page(addr));
355}
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
372{
373 int new_alloc;
374
375 if (chunk->map_alloc >= chunk->map_used + 2)
376 return 0;
377
378 new_alloc = PCPU_DFL_MAP_ALLOC;
379 while (new_alloc < chunk->map_used + 2)
380 new_alloc *= 2;
381
382 return new_alloc;
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
399{
400 int *old = NULL, *new = NULL;
401 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
402 unsigned long flags;
403
404 new = pcpu_mem_alloc(new_size);
405 if (!new)
406 return -ENOMEM;
407
408
409 spin_lock_irqsave(&pcpu_lock, flags);
410
411 if (new_alloc <= chunk->map_alloc)
412 goto out_unlock;
413
414 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
415 memcpy(new, chunk->map, old_size);
416
417
418
419
420
421 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
422 old = chunk->map;
423
424 chunk->map_alloc = new_alloc;
425 chunk->map = new;
426 new = NULL;
427
428out_unlock:
429 spin_unlock_irqrestore(&pcpu_lock, flags);
430
431
432
433
434
435 pcpu_mem_free(old, old_size);
436 pcpu_mem_free(new, new_size);
437
438 return 0;
439}
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
462 int head, int tail)
463{
464 int nr_extra = !!head + !!tail;
465
466 BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
467
468
469 memmove(&chunk->map[i + nr_extra], &chunk->map[i],
470 sizeof(chunk->map[0]) * (chunk->map_used - i));
471 chunk->map_used += nr_extra;
472
473 if (head) {
474 chunk->map[i + 1] = chunk->map[i] - head;
475 chunk->map[i++] = head;
476 }
477 if (tail) {
478 chunk->map[i++] -= tail;
479 chunk->map[i] = tail;
480 }
481}
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
503{
504 int oslot = pcpu_chunk_slot(chunk);
505 int max_contig = 0;
506 int i, off;
507
508 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
509 bool is_last = i + 1 == chunk->map_used;
510 int head, tail;
511
512
513 head = ALIGN(off, align) - off;
514 BUG_ON(i == 0 && head != 0);
515
516 if (chunk->map[i] < 0)
517 continue;
518 if (chunk->map[i] < head + size) {
519 max_contig = max(chunk->map[i], max_contig);
520 continue;
521 }
522
523
524
525
526
527
528
529 if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
530 if (chunk->map[i - 1] > 0)
531 chunk->map[i - 1] += head;
532 else {
533 chunk->map[i - 1] -= head;
534 chunk->free_size -= head;
535 }
536 chunk->map[i] -= head;
537 off += head;
538 head = 0;
539 }
540
541
542 tail = chunk->map[i] - head - size;
543 if (tail < sizeof(int))
544 tail = 0;
545
546
547 if (head || tail) {
548 pcpu_split_block(chunk, i, head, tail);
549 if (head) {
550 i++;
551 off += head;
552 max_contig = max(chunk->map[i - 1], max_contig);
553 }
554 if (tail)
555 max_contig = max(chunk->map[i + 1], max_contig);
556 }
557
558
559 if (is_last)
560 chunk->contig_hint = max_contig;
561 else
562 chunk->contig_hint = max(chunk->contig_hint,
563 max_contig);
564
565 chunk->free_size -= chunk->map[i];
566 chunk->map[i] = -chunk->map[i];
567
568 pcpu_chunk_relocate(chunk, oslot);
569 return off;
570 }
571
572 chunk->contig_hint = max_contig;
573 pcpu_chunk_relocate(chunk, oslot);
574
575
576 return -1;
577}
578
579
580
581
582
583
584
585
586
587
588
589
590
591static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
592{
593 int oslot = pcpu_chunk_slot(chunk);
594 int i, off;
595
596 for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++]))
597 if (off == freeme)
598 break;
599 BUG_ON(off != freeme);
600 BUG_ON(chunk->map[i] > 0);
601
602 chunk->map[i] = -chunk->map[i];
603 chunk->free_size += chunk->map[i];
604
605
606 if (i > 0 && chunk->map[i - 1] >= 0) {
607 chunk->map[i - 1] += chunk->map[i];
608 chunk->map_used--;
609 memmove(&chunk->map[i], &chunk->map[i + 1],
610 (chunk->map_used - i) * sizeof(chunk->map[0]));
611 i--;
612 }
613
614 if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) {
615 chunk->map[i] += chunk->map[i + 1];
616 chunk->map_used--;
617 memmove(&chunk->map[i + 1], &chunk->map[i + 2],
618 (chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
619 }
620
621 chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
622 pcpu_chunk_relocate(chunk, oslot);
623}
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
645 unsigned long **bitmapp,
646 bool may_alloc)
647{
648 static struct page **pages;
649 static unsigned long *bitmap;
650 size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
651 size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
652 sizeof(unsigned long);
653
654 if (!pages || !bitmap) {
655 if (may_alloc && !pages)
656 pages = pcpu_mem_alloc(pages_size);
657 if (may_alloc && !bitmap)
658 bitmap = pcpu_mem_alloc(bitmap_size);
659 if (!pages || !bitmap)
660 return NULL;
661 }
662
663 memset(pages, 0, pages_size);
664 bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
665
666 *bitmapp = bitmap;
667 return pages;
668}
669
670
671
672
673
674
675
676
677
678
679
680
681static void pcpu_free_pages(struct pcpu_chunk *chunk,
682 struct page **pages, unsigned long *populated,
683 int page_start, int page_end)
684{
685 unsigned int cpu;
686 int i;
687
688 for_each_possible_cpu(cpu) {
689 for (i = page_start; i < page_end; i++) {
690 struct page *page = pages[pcpu_page_idx(cpu, i)];
691
692 if (page)
693 __free_page(page);
694 }
695 }
696}
697
698
699
700
701
702
703
704
705
706
707
708
709
710static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
711 struct page **pages, unsigned long *populated,
712 int page_start, int page_end)
713{
714 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
715 unsigned int cpu;
716 int i;
717
718 for_each_possible_cpu(cpu) {
719 for (i = page_start; i < page_end; i++) {
720 struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
721
722 *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
723 if (!*pagep) {
724 pcpu_free_pages(chunk, pages, populated,
725 page_start, page_end);
726 return -ENOMEM;
727 }
728 }
729 }
730 return 0;
731}
732
733
734
735
736
737
738
739
740
741
742
743
744
745static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
746 int page_start, int page_end)
747{
748 flush_cache_vunmap(
749 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
750 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
751}
752
753static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
754{
755 unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
756}
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
773 struct page **pages, unsigned long *populated,
774 int page_start, int page_end)
775{
776 unsigned int cpu;
777 int i;
778
779 for_each_possible_cpu(cpu) {
780 for (i = page_start; i < page_end; i++) {
781 struct page *page;
782
783 page = pcpu_chunk_page(chunk, cpu, i);
784 WARN_ON(!page);
785 pages[pcpu_page_idx(cpu, i)] = page;
786 }
787 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
788 page_end - page_start);
789 }
790
791 for (i = page_start; i < page_end; i++)
792 __clear_bit(i, populated);
793}
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
809 int page_start, int page_end)
810{
811 flush_tlb_kernel_range(
812 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
813 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
814}
815
816static int __pcpu_map_pages(unsigned long addr, struct page **pages,
817 int nr_pages)
818{
819 return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT,
820 PAGE_KERNEL, pages);
821}
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839static int pcpu_map_pages(struct pcpu_chunk *chunk,
840 struct page **pages, unsigned long *populated,
841 int page_start, int page_end)
842{
843 unsigned int cpu, tcpu;
844 int i, err;
845
846 for_each_possible_cpu(cpu) {
847 err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
848 &pages[pcpu_page_idx(cpu, page_start)],
849 page_end - page_start);
850 if (err < 0)
851 goto err;
852 }
853
854
855 for (i = page_start; i < page_end; i++) {
856 for_each_possible_cpu(cpu)
857 pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
858 chunk);
859 __set_bit(i, populated);
860 }
861
862 return 0;
863
864err:
865 for_each_possible_cpu(tcpu) {
866 if (tcpu == cpu)
867 break;
868 __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
869 page_end - page_start);
870 }
871 return err;
872}
873
874
875
876
877
878
879
880
881
882
883
884
885
886static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
887 int page_start, int page_end)
888{
889 flush_cache_vmap(
890 pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
891 pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
892}
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
909{
910 int page_start = PFN_DOWN(off);
911 int page_end = PFN_UP(off + size);
912 struct page **pages;
913 unsigned long *populated;
914 int rs, re;
915
916
917 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
918 if (rs == page_start && re == page_end)
919 return;
920 break;
921 }
922
923
924 WARN_ON(chunk->immutable);
925
926
927
928
929
930
931 pages = pcpu_get_pages_and_bitmap(chunk, &populated, false);
932 BUG_ON(!pages);
933
934
935 pcpu_pre_unmap_flush(chunk, page_start, page_end);
936
937 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
938 pcpu_unmap_pages(chunk, pages, populated, rs, re);
939
940
941
942 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
943 pcpu_free_pages(chunk, pages, populated, rs, re);
944
945
946 bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
947}
948
949
950
951
952
953
954
955
956
957
958
959
960
961static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
962{
963 int page_start = PFN_DOWN(off);
964 int page_end = PFN_UP(off + size);
965 int free_end = page_start, unmap_end = page_start;
966 struct page **pages;
967 unsigned long *populated;
968 unsigned int cpu;
969 int rs, re, rc;
970
971
972 pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
973 if (rs == page_start && re == page_end)
974 goto clear;
975 break;
976 }
977
978
979 WARN_ON(chunk->immutable);
980
981 pages = pcpu_get_pages_and_bitmap(chunk, &populated, true);
982 if (!pages)
983 return -ENOMEM;
984
985
986 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
987 rc = pcpu_alloc_pages(chunk, pages, populated, rs, re);
988 if (rc)
989 goto err_free;
990 free_end = re;
991 }
992
993 pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
994 rc = pcpu_map_pages(chunk, pages, populated, rs, re);
995 if (rc)
996 goto err_unmap;
997 unmap_end = re;
998 }
999 pcpu_post_map_flush(chunk, page_start, page_end);
1000
1001
1002 bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
1003clear:
1004 for_each_possible_cpu(cpu)
1005 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
1006 return 0;
1007
1008err_unmap:
1009 pcpu_pre_unmap_flush(chunk, page_start, unmap_end);
1010 pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end)
1011 pcpu_unmap_pages(chunk, pages, populated, rs, re);
1012 pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end);
1013err_free:
1014 pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end)
1015 pcpu_free_pages(chunk, pages, populated, rs, re);
1016 return rc;
1017}
1018
1019static void free_pcpu_chunk(struct pcpu_chunk *chunk)
1020{
1021 if (!chunk)
1022 return;
1023 if (chunk->vms)
1024 pcpu_free_vm_areas(chunk->vms, pcpu_nr_groups);
1025 pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
1026 kfree(chunk);
1027}
1028
1029static struct pcpu_chunk *alloc_pcpu_chunk(void)
1030{
1031 struct pcpu_chunk *chunk;
1032
1033 chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL);
1034 if (!chunk)
1035 return NULL;
1036
1037 chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
1038 chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
1039 chunk->map[chunk->map_used++] = pcpu_unit_size;
1040
1041 chunk->vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes,
1042 pcpu_nr_groups, pcpu_atom_size,
1043 GFP_KERNEL);
1044 if (!chunk->vms) {
1045 free_pcpu_chunk(chunk);
1046 return NULL;
1047 }
1048
1049 INIT_LIST_HEAD(&chunk->list);
1050 chunk->free_size = pcpu_unit_size;
1051 chunk->contig_hint = pcpu_unit_size;
1052 chunk->base_addr = chunk->vms[0]->addr - pcpu_group_offsets[0];
1053
1054 return chunk;
1055}
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1072{
1073 static int warn_limit = 10;
1074 struct pcpu_chunk *chunk;
1075 const char *err;
1076 int slot, off, new_alloc;
1077 unsigned long flags;
1078
1079 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
1080 WARN(true, "illegal size (%zu) or align (%zu) for "
1081 "percpu allocation\n", size, align);
1082 return NULL;
1083 }
1084
1085 mutex_lock(&pcpu_alloc_mutex);
1086 spin_lock_irqsave(&pcpu_lock, flags);
1087
1088
1089 if (reserved && pcpu_reserved_chunk) {
1090 chunk = pcpu_reserved_chunk;
1091
1092 if (size > chunk->contig_hint) {
1093 err = "alloc from reserved chunk failed";
1094 goto fail_unlock;
1095 }
1096
1097 while ((new_alloc = pcpu_need_to_extend(chunk))) {
1098 spin_unlock_irqrestore(&pcpu_lock, flags);
1099 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
1100 err = "failed to extend area map of reserved chunk";
1101 goto fail_unlock_mutex;
1102 }
1103 spin_lock_irqsave(&pcpu_lock, flags);
1104 }
1105
1106 off = pcpu_alloc_area(chunk, size, align);
1107 if (off >= 0)
1108 goto area_found;
1109
1110 err = "alloc from reserved chunk failed";
1111 goto fail_unlock;
1112 }
1113
1114restart:
1115
1116 for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
1117 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
1118 if (size > chunk->contig_hint)
1119 continue;
1120
1121 new_alloc = pcpu_need_to_extend(chunk);
1122 if (new_alloc) {
1123 spin_unlock_irqrestore(&pcpu_lock, flags);
1124 if (pcpu_extend_area_map(chunk,
1125 new_alloc) < 0) {
1126 err = "failed to extend area map";
1127 goto fail_unlock_mutex;
1128 }
1129 spin_lock_irqsave(&pcpu_lock, flags);
1130
1131
1132
1133
1134 goto restart;
1135 }
1136
1137 off = pcpu_alloc_area(chunk, size, align);
1138 if (off >= 0)
1139 goto area_found;
1140 }
1141 }
1142
1143
1144 spin_unlock_irqrestore(&pcpu_lock, flags);
1145
1146 chunk = alloc_pcpu_chunk();
1147 if (!chunk) {
1148 err = "failed to allocate new chunk";
1149 goto fail_unlock_mutex;
1150 }
1151
1152 spin_lock_irqsave(&pcpu_lock, flags);
1153 pcpu_chunk_relocate(chunk, -1);
1154 goto restart;
1155
1156area_found:
1157 spin_unlock_irqrestore(&pcpu_lock, flags);
1158
1159
1160 if (pcpu_populate_chunk(chunk, off, size)) {
1161 spin_lock_irqsave(&pcpu_lock, flags);
1162 pcpu_free_area(chunk, off);
1163 err = "failed to populate";
1164 goto fail_unlock;
1165 }
1166
1167 mutex_unlock(&pcpu_alloc_mutex);
1168
1169
1170 return __addr_to_pcpu_ptr(chunk->base_addr + off);
1171
1172fail_unlock:
1173 spin_unlock_irqrestore(&pcpu_lock, flags);
1174fail_unlock_mutex:
1175 mutex_unlock(&pcpu_alloc_mutex);
1176 if (warn_limit) {
1177 pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
1178 "%s\n", size, align, err);
1179 dump_stack();
1180 if (!--warn_limit)
1181 pr_info("PERCPU: limit reached, disable warning\n");
1182 }
1183 return NULL;
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200void *__alloc_percpu(size_t size, size_t align)
1201{
1202 return pcpu_alloc(size, align, false);
1203}
1204EXPORT_SYMBOL_GPL(__alloc_percpu);
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221void *__alloc_reserved_percpu(size_t size, size_t align)
1222{
1223 return pcpu_alloc(size, align, true);
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235static void pcpu_reclaim(struct work_struct *work)
1236{
1237 LIST_HEAD(todo);
1238 struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1];
1239 struct pcpu_chunk *chunk, *next;
1240
1241 mutex_lock(&pcpu_alloc_mutex);
1242 spin_lock_irq(&pcpu_lock);
1243
1244 list_for_each_entry_safe(chunk, next, head, list) {
1245 WARN_ON(chunk->immutable);
1246
1247
1248 if (chunk == list_first_entry(head, struct pcpu_chunk, list))
1249 continue;
1250
1251 list_move(&chunk->list, &todo);
1252 }
1253
1254 spin_unlock_irq(&pcpu_lock);
1255
1256 list_for_each_entry_safe(chunk, next, &todo, list) {
1257 pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size);
1258 free_pcpu_chunk(chunk);
1259 }
1260
1261 mutex_unlock(&pcpu_alloc_mutex);
1262}
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273void free_percpu(void *ptr)
1274{
1275 void *addr = __pcpu_ptr_to_addr(ptr);
1276 struct pcpu_chunk *chunk;
1277 unsigned long flags;
1278 int off;
1279
1280 if (!ptr)
1281 return;
1282
1283 spin_lock_irqsave(&pcpu_lock, flags);
1284
1285 chunk = pcpu_chunk_addr_search(addr);
1286 off = addr - chunk->base_addr;
1287
1288 pcpu_free_area(chunk, off);
1289
1290
1291 if (chunk->free_size == pcpu_unit_size) {
1292 struct pcpu_chunk *pos;
1293
1294 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
1295 if (pos != chunk) {
1296 schedule_work(&pcpu_reclaim_work);
1297 break;
1298 }
1299 }
1300
1301 spin_unlock_irqrestore(&pcpu_lock, flags);
1302}
1303EXPORT_SYMBOL_GPL(free_percpu);
1304
1305static inline size_t pcpu_calc_fc_sizes(size_t static_size,
1306 size_t reserved_size,
1307 ssize_t *dyn_sizep)
1308{
1309 size_t size_sum;
1310
1311 size_sum = PFN_ALIGN(static_size + reserved_size +
1312 (*dyn_sizep >= 0 ? *dyn_sizep : 0));
1313 if (*dyn_sizep != 0)
1314 *dyn_sizep = size_sum - static_size - reserved_size;
1315
1316 return size_sum;
1317}
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
1335 int nr_units)
1336{
1337 struct pcpu_alloc_info *ai;
1338 size_t base_size, ai_size;
1339 void *ptr;
1340 int unit;
1341
1342 base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
1343 __alignof__(ai->groups[0].cpu_map[0]));
1344 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
1345
1346 ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
1347 if (!ptr)
1348 return NULL;
1349 ai = ptr;
1350 ptr += base_size;
1351
1352 ai->groups[0].cpu_map = ptr;
1353
1354 for (unit = 0; unit < nr_units; unit++)
1355 ai->groups[0].cpu_map[unit] = NR_CPUS;
1356
1357 ai->nr_groups = nr_groups;
1358 ai->__ai_size = PFN_ALIGN(ai_size);
1359
1360 return ai;
1361}
1362
1363
1364
1365
1366
1367
1368
1369void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1370{
1371 free_bootmem(__pa(ai), ai->__ai_size);
1372}
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1396 size_t reserved_size, ssize_t dyn_size,
1397 size_t atom_size,
1398 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1399{
1400 static int group_map[NR_CPUS] __initdata;
1401 static int group_cnt[NR_CPUS] __initdata;
1402 const size_t static_size = __per_cpu_end - __per_cpu_start;
1403 int group_cnt_max = 0, nr_groups = 1, nr_units = 0;
1404 size_t size_sum, min_unit_size, alloc_size;
1405 int upa, max_upa, uninitialized_var(best_upa);
1406 int last_allocs, group, unit;
1407 unsigned int cpu, tcpu;
1408 struct pcpu_alloc_info *ai;
1409 unsigned int *cpu_map;
1410
1411
1412 memset(group_map, 0, sizeof(group_map));
1413 memset(group_cnt, 0, sizeof(group_map));
1414
1415
1416
1417
1418
1419
1420
1421 size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
1422 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1423
1424 alloc_size = roundup(min_unit_size, atom_size);
1425 upa = alloc_size / min_unit_size;
1426 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1427 upa--;
1428 max_upa = upa;
1429
1430
1431 for_each_possible_cpu(cpu) {
1432 group = 0;
1433 next_group:
1434 for_each_possible_cpu(tcpu) {
1435 if (cpu == tcpu)
1436 break;
1437 if (group_map[tcpu] == group && cpu_distance_fn &&
1438 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1439 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1440 group++;
1441 nr_groups = max(nr_groups, group + 1);
1442 goto next_group;
1443 }
1444 }
1445 group_map[cpu] = group;
1446 group_cnt[group]++;
1447 group_cnt_max = max(group_cnt_max, group_cnt[group]);
1448 }
1449
1450
1451
1452
1453
1454
1455 last_allocs = INT_MAX;
1456 for (upa = max_upa; upa; upa--) {
1457 int allocs = 0, wasted = 0;
1458
1459 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1460 continue;
1461
1462 for (group = 0; group < nr_groups; group++) {
1463 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1464 allocs += this_allocs;
1465 wasted += this_allocs * upa - group_cnt[group];
1466 }
1467
1468
1469
1470
1471
1472
1473 if (wasted > num_possible_cpus() / 3)
1474 continue;
1475
1476
1477 if (allocs > last_allocs)
1478 break;
1479 last_allocs = allocs;
1480 best_upa = upa;
1481 }
1482 upa = best_upa;
1483
1484
1485 for (group = 0; group < nr_groups; group++)
1486 nr_units += roundup(group_cnt[group], upa);
1487
1488 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1489 if (!ai)
1490 return ERR_PTR(-ENOMEM);
1491 cpu_map = ai->groups[0].cpu_map;
1492
1493 for (group = 0; group < nr_groups; group++) {
1494 ai->groups[group].cpu_map = cpu_map;
1495 cpu_map += roundup(group_cnt[group], upa);
1496 }
1497
1498 ai->static_size = static_size;
1499 ai->reserved_size = reserved_size;
1500 ai->dyn_size = dyn_size;
1501 ai->unit_size = alloc_size / upa;
1502 ai->atom_size = atom_size;
1503 ai->alloc_size = alloc_size;
1504
1505 for (group = 0, unit = 0; group_cnt[group]; group++) {
1506 struct pcpu_group_info *gi = &ai->groups[group];
1507
1508
1509
1510
1511
1512
1513 gi->base_offset = unit * ai->unit_size;
1514
1515 for_each_possible_cpu(cpu)
1516 if (group_map[cpu] == group)
1517 gi->cpu_map[gi->nr_units++] = cpu;
1518 gi->nr_units = roundup(gi->nr_units, upa);
1519 unit += gi->nr_units;
1520 }
1521 BUG_ON(unit != nr_units);
1522
1523 return ai;
1524}
1525
1526
1527
1528
1529
1530
1531
1532
1533static void pcpu_dump_alloc_info(const char *lvl,
1534 const struct pcpu_alloc_info *ai)
1535{
1536 int group_width = 1, cpu_width = 1, width;
1537 char empty_str[] = "--------";
1538 int alloc = 0, alloc_end = 0;
1539 int group, v;
1540 int upa, apl;
1541
1542 v = ai->nr_groups;
1543 while (v /= 10)
1544 group_width++;
1545
1546 v = num_possible_cpus();
1547 while (v /= 10)
1548 cpu_width++;
1549 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
1550
1551 upa = ai->alloc_size / ai->unit_size;
1552 width = upa * (cpu_width + 1) + group_width + 3;
1553 apl = rounddown_pow_of_two(max(60 / width, 1));
1554
1555 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
1556 lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
1557 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
1558
1559 for (group = 0; group < ai->nr_groups; group++) {
1560 const struct pcpu_group_info *gi = &ai->groups[group];
1561 int unit = 0, unit_end = 0;
1562
1563 BUG_ON(gi->nr_units % upa);
1564 for (alloc_end += gi->nr_units / upa;
1565 alloc < alloc_end; alloc++) {
1566 if (!(alloc % apl)) {
1567 printk("\n");
1568 printk("%spcpu-alloc: ", lvl);
1569 }
1570 printk("[%0*d] ", group_width, group);
1571
1572 for (unit_end += upa; unit < unit_end; unit++)
1573 if (gi->cpu_map[unit] != NR_CPUS)
1574 printk("%0*d ", cpu_width,
1575 gi->cpu_map[unit]);
1576 else
1577 printk("%s ", empty_str);
1578 }
1579 }
1580 printk("\n");
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1640 void *base_addr)
1641{
1642 static char cpus_buf[4096] __initdata;
1643 static int smap[2], dmap[2];
1644 size_t dyn_size = ai->dyn_size;
1645 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
1646 struct pcpu_chunk *schunk, *dchunk = NULL;
1647 unsigned long *group_offsets;
1648 size_t *group_sizes;
1649 unsigned long *unit_off;
1650 unsigned int cpu;
1651 int *unit_map;
1652 int group, unit, i;
1653
1654 cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask);
1655
1656#define PCPU_SETUP_BUG_ON(cond) do { \
1657 if (unlikely(cond)) { \
1658 pr_emerg("PERCPU: failed to initialize, %s", #cond); \
1659 pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \
1660 pcpu_dump_alloc_info(KERN_EMERG, ai); \
1661 BUG(); \
1662 } \
1663} while (0)
1664
1665
1666 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
1667 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
1668 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
1669 PCPU_SETUP_BUG_ON(!ai->static_size);
1670 PCPU_SETUP_BUG_ON(!base_addr);
1671 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1672 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
1673 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
1674
1675
1676 group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0]));
1677 group_sizes = alloc_bootmem(ai->nr_groups * sizeof(group_sizes[0]));
1678 unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
1679 unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
1680
1681 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
1682 unit_map[cpu] = UINT_MAX;
1683 pcpu_first_unit_cpu = NR_CPUS;
1684
1685 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
1686 const struct pcpu_group_info *gi = &ai->groups[group];
1687
1688 group_offsets[group] = gi->base_offset;
1689 group_sizes[group] = gi->nr_units * ai->unit_size;
1690
1691 for (i = 0; i < gi->nr_units; i++) {
1692 cpu = gi->cpu_map[i];
1693 if (cpu == NR_CPUS)
1694 continue;
1695
1696 PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids);
1697 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
1698 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
1699
1700 unit_map[cpu] = unit + i;
1701 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
1702
1703 if (pcpu_first_unit_cpu == NR_CPUS)
1704 pcpu_first_unit_cpu = cpu;
1705 }
1706 }
1707 pcpu_last_unit_cpu = cpu;
1708 pcpu_nr_units = unit;
1709
1710 for_each_possible_cpu(cpu)
1711 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
1712
1713
1714#undef PCPU_SETUP_BUG_ON
1715 pcpu_dump_alloc_info(KERN_INFO, ai);
1716
1717 pcpu_nr_groups = ai->nr_groups;
1718 pcpu_group_offsets = group_offsets;
1719 pcpu_group_sizes = group_sizes;
1720 pcpu_unit_map = unit_map;
1721 pcpu_unit_offsets = unit_off;
1722
1723
1724 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
1725 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
1726 pcpu_atom_size = ai->atom_size;
1727 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
1728 BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
1729
1730
1731
1732
1733
1734 pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
1735 pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
1736 for (i = 0; i < pcpu_nr_slots; i++)
1737 INIT_LIST_HEAD(&pcpu_slot[i]);
1738
1739
1740
1741
1742
1743
1744
1745
1746 schunk = alloc_bootmem(pcpu_chunk_struct_size);
1747 INIT_LIST_HEAD(&schunk->list);
1748 schunk->base_addr = base_addr;
1749 schunk->map = smap;
1750 schunk->map_alloc = ARRAY_SIZE(smap);
1751 schunk->immutable = true;
1752 bitmap_fill(schunk->populated, pcpu_unit_pages);
1753
1754 if (ai->reserved_size) {
1755 schunk->free_size = ai->reserved_size;
1756 pcpu_reserved_chunk = schunk;
1757 pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size;
1758 } else {
1759 schunk->free_size = dyn_size;
1760 dyn_size = 0;
1761 }
1762 schunk->contig_hint = schunk->free_size;
1763
1764 schunk->map[schunk->map_used++] = -ai->static_size;
1765 if (schunk->free_size)
1766 schunk->map[schunk->map_used++] = schunk->free_size;
1767
1768
1769 if (dyn_size) {
1770 dchunk = alloc_bootmem(pcpu_chunk_struct_size);
1771 INIT_LIST_HEAD(&dchunk->list);
1772 dchunk->base_addr = base_addr;
1773 dchunk->map = dmap;
1774 dchunk->map_alloc = ARRAY_SIZE(dmap);
1775 dchunk->immutable = true;
1776 bitmap_fill(dchunk->populated, pcpu_unit_pages);
1777
1778 dchunk->contig_hint = dchunk->free_size = dyn_size;
1779 dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
1780 dchunk->map[dchunk->map_used++] = dchunk->free_size;
1781 }
1782
1783
1784 pcpu_first_chunk = dchunk ?: schunk;
1785 pcpu_chunk_relocate(pcpu_first_chunk, -1);
1786
1787
1788 pcpu_base_addr = base_addr;
1789 return 0;
1790}
1791
1792const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
1793 [PCPU_FC_AUTO] = "auto",
1794 [PCPU_FC_EMBED] = "embed",
1795 [PCPU_FC_PAGE] = "page",
1796};
1797
1798enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
1799
1800static int __init percpu_alloc_setup(char *str)
1801{
1802 if (0)
1803 ;
1804#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
1805 else if (!strcmp(str, "embed"))
1806 pcpu_chosen_fc = PCPU_FC_EMBED;
1807#endif
1808#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1809 else if (!strcmp(str, "page"))
1810 pcpu_chosen_fc = PCPU_FC_PAGE;
1811#endif
1812 else
1813 pr_warning("PERCPU: unknown allocator %s specified\n", str);
1814
1815 return 0;
1816}
1817early_param("percpu_alloc", percpu_alloc_setup);
1818
1819#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1820 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
1857 size_t atom_size,
1858 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
1859 pcpu_fc_alloc_fn_t alloc_fn,
1860 pcpu_fc_free_fn_t free_fn)
1861{
1862 void *base = (void *)ULONG_MAX;
1863 void **areas = NULL;
1864 struct pcpu_alloc_info *ai;
1865 size_t size_sum, areas_size, max_distance;
1866 int group, i, rc;
1867
1868 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
1869 cpu_distance_fn);
1870 if (IS_ERR(ai))
1871 return PTR_ERR(ai);
1872
1873 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
1874 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
1875
1876 areas = alloc_bootmem_nopanic(areas_size);
1877 if (!areas) {
1878 rc = -ENOMEM;
1879 goto out_free;
1880 }
1881
1882
1883 for (group = 0; group < ai->nr_groups; group++) {
1884 struct pcpu_group_info *gi = &ai->groups[group];
1885 unsigned int cpu = NR_CPUS;
1886 void *ptr;
1887
1888 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
1889 cpu = gi->cpu_map[i];
1890 BUG_ON(cpu == NR_CPUS);
1891
1892
1893 ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
1894 if (!ptr) {
1895 rc = -ENOMEM;
1896 goto out_free_areas;
1897 }
1898 areas[group] = ptr;
1899
1900 base = min(ptr, base);
1901
1902 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
1903 if (gi->cpu_map[i] == NR_CPUS) {
1904
1905 free_fn(ptr, ai->unit_size);
1906 continue;
1907 }
1908
1909 memcpy(ptr, __per_cpu_load, ai->static_size);
1910 free_fn(ptr + size_sum, ai->unit_size - size_sum);
1911 }
1912 }
1913
1914
1915 max_distance = 0;
1916 for (group = 0; group < ai->nr_groups; group++) {
1917 ai->groups[group].base_offset = areas[group] - base;
1918 max_distance = max_t(size_t, max_distance,
1919 ai->groups[group].base_offset);
1920 }
1921 max_distance += ai->unit_size;
1922
1923
1924 if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
1925 pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc "
1926 "space 0x%lx\n",
1927 max_distance, VMALLOC_END - VMALLOC_START);
1928#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1929
1930 rc = -EINVAL;
1931 goto out_free;
1932#endif
1933 }
1934
1935 pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
1936 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
1937 ai->dyn_size, ai->unit_size);
1938
1939 rc = pcpu_setup_first_chunk(ai, base);
1940 goto out_free;
1941
1942out_free_areas:
1943 for (group = 0; group < ai->nr_groups; group++)
1944 free_fn(areas[group],
1945 ai->groups[group].nr_units * ai->unit_size);
1946out_free:
1947 pcpu_free_alloc_info(ai);
1948 if (areas)
1949 free_bootmem(__pa(areas), areas_size);
1950 return rc;
1951}
1952#endif
1953
1954
1955#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972int __init pcpu_page_first_chunk(size_t reserved_size,
1973 pcpu_fc_alloc_fn_t alloc_fn,
1974 pcpu_fc_free_fn_t free_fn,
1975 pcpu_fc_populate_pte_fn_t populate_pte_fn)
1976{
1977 static struct vm_struct vm;
1978 struct pcpu_alloc_info *ai;
1979 char psize_str[16];
1980 int unit_pages;
1981 size_t pages_size;
1982 struct page **pages;
1983 int unit, i, j, rc;
1984
1985 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
1986
1987 ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL);
1988 if (IS_ERR(ai))
1989 return PTR_ERR(ai);
1990 BUG_ON(ai->nr_groups != 1);
1991 BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
1992
1993 unit_pages = ai->unit_size >> PAGE_SHIFT;
1994
1995
1996 pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
1997 sizeof(pages[0]));
1998 pages = alloc_bootmem(pages_size);
1999
2000
2001 j = 0;
2002 for (unit = 0; unit < num_possible_cpus(); unit++)
2003 for (i = 0; i < unit_pages; i++) {
2004 unsigned int cpu = ai->groups[0].cpu_map[unit];
2005 void *ptr;
2006
2007 ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
2008 if (!ptr) {
2009 pr_warning("PERCPU: failed to allocate %s page "
2010 "for cpu%u\n", psize_str, cpu);
2011 goto enomem;
2012 }
2013 pages[j++] = virt_to_page(ptr);
2014 }
2015
2016
2017 vm.flags = VM_ALLOC;
2018 vm.size = num_possible_cpus() * ai->unit_size;
2019 vm_area_register_early(&vm, PAGE_SIZE);
2020
2021 for (unit = 0; unit < num_possible_cpus(); unit++) {
2022 unsigned long unit_addr =
2023 (unsigned long)vm.addr + unit * ai->unit_size;
2024
2025 for (i = 0; i < unit_pages; i++)
2026 populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
2027
2028
2029 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
2030 unit_pages);
2031 if (rc < 0)
2032 panic("failed to map percpu area, err=%d\n", rc);
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
2044 }
2045
2046
2047 pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
2048 unit_pages, psize_str, vm.addr, ai->static_size,
2049 ai->reserved_size, ai->dyn_size);
2050
2051 rc = pcpu_setup_first_chunk(ai, vm.addr);
2052 goto out_free_ar;
2053
2054enomem:
2055 while (--j >= 0)
2056 free_fn(page_address(pages[j]), PAGE_SIZE);
2057 rc = -ENOMEM;
2058out_free_ar:
2059 free_bootmem(__pa(pages), pages_size);
2060 pcpu_free_alloc_info(ai);
2061 return rc;
2062}
2063#endif
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
2078unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
2079EXPORT_SYMBOL(__per_cpu_offset);
2080
2081static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
2082 size_t align)
2083{
2084 return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
2085}
2086
2087static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
2088{
2089 free_bootmem(__pa(ptr), size);
2090}
2091
2092void __init setup_per_cpu_areas(void)
2093{
2094 unsigned long delta;
2095 unsigned int cpu;
2096 int rc;
2097
2098
2099
2100
2101
2102 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
2103 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
2104 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
2105 if (rc < 0)
2106 panic("Failed to initialized percpu areas.");
2107
2108 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
2109 for_each_possible_cpu(cpu)
2110 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
2111}
2112#endif
2113