1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched/signal.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/set_memory.h>
22#include <linux/debugobjects.h>
23#include <linux/kallsyms.h>
24#include <linux/list.h>
25#include <linux/notifier.h>
26#include <linux/rbtree.h>
27#include <linux/xarray.h>
28#include <linux/rcupdate.h>
29#include <linux/pfn.h>
30#include <linux/kmemleak.h>
31#include <linux/atomic.h>
32#include <linux/compiler.h>
33#include <linux/llist.h>
34#include <linux/bitops.h>
35#include <linux/rbtree_augmented.h>
36#include <linux/overflow.h>
37
38#include <linux/uaccess.h>
39#include <asm/tlbflush.h>
40#include <asm/shmparam.h>
41
42#include "internal.h"
43#include "pgalloc-track.h"
44
45bool is_vmalloc_addr(const void *x)
46{
47 unsigned long addr = (unsigned long)x;
48
49 return addr >= VMALLOC_START && addr < VMALLOC_END;
50}
51EXPORT_SYMBOL(is_vmalloc_addr);
52
53struct vfree_deferred {
54 struct llist_head list;
55 struct work_struct wq;
56};
57static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
58
59static void __vunmap(const void *, int);
60
61static void free_work(struct work_struct *w)
62{
63 struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
64 struct llist_node *t, *llnode;
65
66 llist_for_each_safe(llnode, t, llist_del_all(&p->list))
67 __vunmap((void *)llnode, 1);
68}
69
70
71
72static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
73 pgtbl_mod_mask *mask)
74{
75 pte_t *pte;
76
77 pte = pte_offset_kernel(pmd, addr);
78 do {
79 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
80 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
81 } while (pte++, addr += PAGE_SIZE, addr != end);
82 *mask |= PGTBL_PTE_MODIFIED;
83}
84
85static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
86 pgtbl_mod_mask *mask)
87{
88 pmd_t *pmd;
89 unsigned long next;
90 int cleared;
91
92 pmd = pmd_offset(pud, addr);
93 do {
94 next = pmd_addr_end(addr, end);
95
96 cleared = pmd_clear_huge(pmd);
97 if (cleared || pmd_bad(*pmd))
98 *mask |= PGTBL_PMD_MODIFIED;
99
100 if (cleared)
101 continue;
102 if (pmd_none_or_clear_bad(pmd))
103 continue;
104 vunmap_pte_range(pmd, addr, next, mask);
105
106 cond_resched();
107 } while (pmd++, addr = next, addr != end);
108}
109
110static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
111 pgtbl_mod_mask *mask)
112{
113 pud_t *pud;
114 unsigned long next;
115 int cleared;
116
117 pud = pud_offset(p4d, addr);
118 do {
119 next = pud_addr_end(addr, end);
120
121 cleared = pud_clear_huge(pud);
122 if (cleared || pud_bad(*pud))
123 *mask |= PGTBL_PUD_MODIFIED;
124
125 if (cleared)
126 continue;
127 if (pud_none_or_clear_bad(pud))
128 continue;
129 vunmap_pmd_range(pud, addr, next, mask);
130 } while (pud++, addr = next, addr != end);
131}
132
133static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
134 pgtbl_mod_mask *mask)
135{
136 p4d_t *p4d;
137 unsigned long next;
138 int cleared;
139
140 p4d = p4d_offset(pgd, addr);
141 do {
142 next = p4d_addr_end(addr, end);
143
144 cleared = p4d_clear_huge(p4d);
145 if (cleared || p4d_bad(*p4d))
146 *mask |= PGTBL_P4D_MODIFIED;
147
148 if (cleared)
149 continue;
150 if (p4d_none_or_clear_bad(p4d))
151 continue;
152 vunmap_pud_range(p4d, addr, next, mask);
153 } while (p4d++, addr = next, addr != end);
154}
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
170{
171 unsigned long end = start + size;
172 unsigned long next;
173 pgd_t *pgd;
174 unsigned long addr = start;
175 pgtbl_mod_mask mask = 0;
176
177 BUG_ON(addr >= end);
178 pgd = pgd_offset_k(addr);
179 do {
180 next = pgd_addr_end(addr, end);
181 if (pgd_bad(*pgd))
182 mask |= PGTBL_PGD_MODIFIED;
183 if (pgd_none_or_clear_bad(pgd))
184 continue;
185 vunmap_p4d_range(pgd, addr, next, &mask);
186 } while (pgd++, addr = next, addr != end);
187
188 if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
189 arch_sync_kernel_mappings(start, end);
190}
191
192static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
193 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
194 pgtbl_mod_mask *mask)
195{
196 pte_t *pte;
197
198
199
200
201
202
203 pte = pte_alloc_kernel_track(pmd, addr, mask);
204 if (!pte)
205 return -ENOMEM;
206 do {
207 struct page *page = pages[*nr];
208
209 if (WARN_ON(!pte_none(*pte)))
210 return -EBUSY;
211 if (WARN_ON(!page))
212 return -ENOMEM;
213 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
214 (*nr)++;
215 } while (pte++, addr += PAGE_SIZE, addr != end);
216 *mask |= PGTBL_PTE_MODIFIED;
217 return 0;
218}
219
220static int vmap_pmd_range(pud_t *pud, unsigned long addr,
221 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
222 pgtbl_mod_mask *mask)
223{
224 pmd_t *pmd;
225 unsigned long next;
226
227 pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
228 if (!pmd)
229 return -ENOMEM;
230 do {
231 next = pmd_addr_end(addr, end);
232 if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
233 return -ENOMEM;
234 } while (pmd++, addr = next, addr != end);
235 return 0;
236}
237
238static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
239 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
240 pgtbl_mod_mask *mask)
241{
242 pud_t *pud;
243 unsigned long next;
244
245 pud = pud_alloc_track(&init_mm, p4d, addr, mask);
246 if (!pud)
247 return -ENOMEM;
248 do {
249 next = pud_addr_end(addr, end);
250 if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
251 return -ENOMEM;
252 } while (pud++, addr = next, addr != end);
253 return 0;
254}
255
256static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
257 unsigned long end, pgprot_t prot, struct page **pages, int *nr,
258 pgtbl_mod_mask *mask)
259{
260 p4d_t *p4d;
261 unsigned long next;
262
263 p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
264 if (!p4d)
265 return -ENOMEM;
266 do {
267 next = p4d_addr_end(addr, end);
268 if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
269 return -ENOMEM;
270 } while (p4d++, addr = next, addr != end);
271 return 0;
272}
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292int map_kernel_range_noflush(unsigned long addr, unsigned long size,
293 pgprot_t prot, struct page **pages)
294{
295 unsigned long start = addr;
296 unsigned long end = addr + size;
297 unsigned long next;
298 pgd_t *pgd;
299 int err = 0;
300 int nr = 0;
301 pgtbl_mod_mask mask = 0;
302
303 BUG_ON(addr >= end);
304 pgd = pgd_offset_k(addr);
305 do {
306 next = pgd_addr_end(addr, end);
307 if (pgd_bad(*pgd))
308 mask |= PGTBL_PGD_MODIFIED;
309 err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
310 if (err)
311 return err;
312 } while (pgd++, addr = next, addr != end);
313
314 if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
315 arch_sync_kernel_mappings(start, end);
316
317 return 0;
318}
319
320int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
321 struct page **pages)
322{
323 int ret;
324
325 ret = map_kernel_range_noflush(start, size, prot, pages);
326 flush_cache_vmap(start, start + size);
327 return ret;
328}
329
330int is_vmalloc_or_module_addr(const void *x)
331{
332
333
334
335
336
337#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
338 unsigned long addr = (unsigned long)x;
339 if (addr >= MODULES_VADDR && addr < MODULES_END)
340 return 1;
341#endif
342 return is_vmalloc_addr(x);
343}
344
345
346
347
348struct page *vmalloc_to_page(const void *vmalloc_addr)
349{
350 unsigned long addr = (unsigned long) vmalloc_addr;
351 struct page *page = NULL;
352 pgd_t *pgd = pgd_offset_k(addr);
353 p4d_t *p4d;
354 pud_t *pud;
355 pmd_t *pmd;
356 pte_t *ptep, pte;
357
358
359
360
361
362 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
363
364 if (pgd_none(*pgd))
365 return NULL;
366 p4d = p4d_offset(pgd, addr);
367 if (p4d_none(*p4d))
368 return NULL;
369 pud = pud_offset(p4d, addr);
370
371
372
373
374
375
376
377
378
379 WARN_ON_ONCE(pud_bad(*pud));
380 if (pud_none(*pud) || pud_bad(*pud))
381 return NULL;
382 pmd = pmd_offset(pud, addr);
383 WARN_ON_ONCE(pmd_bad(*pmd));
384 if (pmd_none(*pmd) || pmd_bad(*pmd))
385 return NULL;
386
387 ptep = pte_offset_map(pmd, addr);
388 pte = *ptep;
389 if (pte_present(pte))
390 page = pte_page(pte);
391 pte_unmap(ptep);
392 return page;
393}
394EXPORT_SYMBOL(vmalloc_to_page);
395
396
397
398
399unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
400{
401 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
402}
403EXPORT_SYMBOL(vmalloc_to_pfn);
404
405
406
407
408#define DEBUG_AUGMENT_PROPAGATE_CHECK 0
409#define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
410
411
412static DEFINE_SPINLOCK(vmap_area_lock);
413static DEFINE_SPINLOCK(free_vmap_area_lock);
414
415LIST_HEAD(vmap_area_list);
416static struct rb_root vmap_area_root = RB_ROOT;
417static bool vmap_initialized __read_mostly;
418
419static struct rb_root purge_vmap_area_root = RB_ROOT;
420static LIST_HEAD(purge_vmap_area_list);
421static DEFINE_SPINLOCK(purge_vmap_area_lock);
422
423
424
425
426
427
428
429static struct kmem_cache *vmap_area_cachep;
430
431
432
433
434
435static LIST_HEAD(free_vmap_area_list);
436
437
438
439
440
441
442
443
444
445
446
447static struct rb_root free_vmap_area_root = RB_ROOT;
448
449
450
451
452
453
454static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
455
456static __always_inline unsigned long
457va_size(struct vmap_area *va)
458{
459 return (va->va_end - va->va_start);
460}
461
462static __always_inline unsigned long
463get_subtree_max_size(struct rb_node *node)
464{
465 struct vmap_area *va;
466
467 va = rb_entry_safe(node, struct vmap_area, rb_node);
468 return va ? va->subtree_max_size : 0;
469}
470
471
472
473
474static __always_inline unsigned long
475compute_subtree_max_size(struct vmap_area *va)
476{
477 return max3(va_size(va),
478 get_subtree_max_size(va->rb_node.rb_left),
479 get_subtree_max_size(va->rb_node.rb_right));
480}
481
482RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
483 struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
484
485static void purge_vmap_area_lazy(void);
486static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
487static unsigned long lazy_max_pages(void);
488
489static atomic_long_t nr_vmalloc_pages;
490
491unsigned long vmalloc_nr_pages(void)
492{
493 return atomic_long_read(&nr_vmalloc_pages);
494}
495
496static struct vmap_area *__find_vmap_area(unsigned long addr)
497{
498 struct rb_node *n = vmap_area_root.rb_node;
499
500 while (n) {
501 struct vmap_area *va;
502
503 va = rb_entry(n, struct vmap_area, rb_node);
504 if (addr < va->va_start)
505 n = n->rb_left;
506 else if (addr >= va->va_end)
507 n = n->rb_right;
508 else
509 return va;
510 }
511
512 return NULL;
513}
514
515
516
517
518
519
520
521
522
523static __always_inline struct rb_node **
524find_va_links(struct vmap_area *va,
525 struct rb_root *root, struct rb_node *from,
526 struct rb_node **parent)
527{
528 struct vmap_area *tmp_va;
529 struct rb_node **link;
530
531 if (root) {
532 link = &root->rb_node;
533 if (unlikely(!*link)) {
534 *parent = NULL;
535 return link;
536 }
537 } else {
538 link = &from;
539 }
540
541
542
543
544
545
546 do {
547 tmp_va = rb_entry(*link, struct vmap_area, rb_node);
548
549
550
551
552
553
554 if (va->va_start < tmp_va->va_end &&
555 va->va_end <= tmp_va->va_start)
556 link = &(*link)->rb_left;
557 else if (va->va_end > tmp_va->va_start &&
558 va->va_start >= tmp_va->va_end)
559 link = &(*link)->rb_right;
560 else {
561 WARN(1, "vmalloc bug: 0x%lx-0x%lx overlaps with 0x%lx-0x%lx\n",
562 va->va_start, va->va_end, tmp_va->va_start, tmp_va->va_end);
563
564 return NULL;
565 }
566 } while (*link);
567
568 *parent = &tmp_va->rb_node;
569 return link;
570}
571
572static __always_inline struct list_head *
573get_va_next_sibling(struct rb_node *parent, struct rb_node **link)
574{
575 struct list_head *list;
576
577 if (unlikely(!parent))
578
579
580
581
582
583
584 return NULL;
585
586 list = &rb_entry(parent, struct vmap_area, rb_node)->list;
587 return (&parent->rb_right == link ? list->next : list);
588}
589
590static __always_inline void
591link_va(struct vmap_area *va, struct rb_root *root,
592 struct rb_node *parent, struct rb_node **link, struct list_head *head)
593{
594
595
596
597
598 if (likely(parent)) {
599 head = &rb_entry(parent, struct vmap_area, rb_node)->list;
600 if (&parent->rb_right != link)
601 head = head->prev;
602 }
603
604
605 rb_link_node(&va->rb_node, parent, link);
606 if (root == &free_vmap_area_root) {
607
608
609
610
611
612
613
614
615
616
617
618 rb_insert_augmented(&va->rb_node,
619 root, &free_vmap_area_rb_augment_cb);
620 va->subtree_max_size = 0;
621 } else {
622 rb_insert_color(&va->rb_node, root);
623 }
624
625
626 list_add(&va->list, head);
627}
628
629static __always_inline void
630unlink_va(struct vmap_area *va, struct rb_root *root)
631{
632 if (WARN_ON(RB_EMPTY_NODE(&va->rb_node)))
633 return;
634
635 if (root == &free_vmap_area_root)
636 rb_erase_augmented(&va->rb_node,
637 root, &free_vmap_area_rb_augment_cb);
638 else
639 rb_erase(&va->rb_node, root);
640
641 list_del(&va->list);
642 RB_CLEAR_NODE(&va->rb_node);
643}
644
645#if DEBUG_AUGMENT_PROPAGATE_CHECK
646static void
647augment_tree_propagate_check(void)
648{
649 struct vmap_area *va;
650 unsigned long computed_size;
651
652 list_for_each_entry(va, &free_vmap_area_list, list) {
653 computed_size = compute_subtree_max_size(va);
654 if (computed_size != va->subtree_max_size)
655 pr_emerg("tree is corrupted: %lu, %lu\n",
656 va_size(va), va->subtree_max_size);
657 }
658}
659#endif
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688static __always_inline void
689augment_tree_propagate_from(struct vmap_area *va)
690{
691
692
693
694
695
696 free_vmap_area_rb_augment_cb_propagate(&va->rb_node, NULL);
697
698#if DEBUG_AUGMENT_PROPAGATE_CHECK
699 augment_tree_propagate_check();
700#endif
701}
702
703static void
704insert_vmap_area(struct vmap_area *va,
705 struct rb_root *root, struct list_head *head)
706{
707 struct rb_node **link;
708 struct rb_node *parent;
709
710 link = find_va_links(va, root, NULL, &parent);
711 if (link)
712 link_va(va, root, parent, link, head);
713}
714
715static void
716insert_vmap_area_augment(struct vmap_area *va,
717 struct rb_node *from, struct rb_root *root,
718 struct list_head *head)
719{
720 struct rb_node **link;
721 struct rb_node *parent;
722
723 if (from)
724 link = find_va_links(va, NULL, from, &parent);
725 else
726 link = find_va_links(va, root, NULL, &parent);
727
728 if (link) {
729 link_va(va, root, parent, link, head);
730 augment_tree_propagate_from(va);
731 }
732}
733
734
735
736
737
738
739
740
741
742
743
744
745static __always_inline struct vmap_area *
746merge_or_add_vmap_area(struct vmap_area *va,
747 struct rb_root *root, struct list_head *head)
748{
749 struct vmap_area *sibling;
750 struct list_head *next;
751 struct rb_node **link;
752 struct rb_node *parent;
753 bool merged = false;
754
755
756
757
758
759 link = find_va_links(va, root, NULL, &parent);
760 if (!link)
761 return NULL;
762
763
764
765
766 next = get_va_next_sibling(parent, link);
767 if (unlikely(next == NULL))
768 goto insert;
769
770
771
772
773
774
775
776
777 if (next != head) {
778 sibling = list_entry(next, struct vmap_area, list);
779 if (sibling->va_start == va->va_end) {
780 sibling->va_start = va->va_start;
781
782
783 kmem_cache_free(vmap_area_cachep, va);
784
785
786 va = sibling;
787 merged = true;
788 }
789 }
790
791
792
793
794
795
796
797
798 if (next->prev != head) {
799 sibling = list_entry(next->prev, struct vmap_area, list);
800 if (sibling->va_end == va->va_start) {
801
802
803
804
805
806
807
808 if (merged)
809 unlink_va(va, root);
810
811 sibling->va_end = va->va_end;
812
813
814 kmem_cache_free(vmap_area_cachep, va);
815
816
817 va = sibling;
818 merged = true;
819 }
820 }
821
822insert:
823 if (!merged)
824 link_va(va, root, parent, link, head);
825
826 return va;
827}
828
829static __always_inline struct vmap_area *
830merge_or_add_vmap_area_augment(struct vmap_area *va,
831 struct rb_root *root, struct list_head *head)
832{
833 va = merge_or_add_vmap_area(va, root, head);
834 if (va)
835 augment_tree_propagate_from(va);
836
837 return va;
838}
839
840static __always_inline bool
841is_within_this_va(struct vmap_area *va, unsigned long size,
842 unsigned long align, unsigned long vstart)
843{
844 unsigned long nva_start_addr;
845
846 if (va->va_start > vstart)
847 nva_start_addr = ALIGN(va->va_start, align);
848 else
849 nva_start_addr = ALIGN(vstart, align);
850
851
852 if (nva_start_addr + size < nva_start_addr ||
853 nva_start_addr < vstart)
854 return false;
855
856 return (nva_start_addr + size <= va->va_end);
857}
858
859
860
861
862
863
864static __always_inline struct vmap_area *
865find_vmap_lowest_match(unsigned long size,
866 unsigned long align, unsigned long vstart)
867{
868 struct vmap_area *va;
869 struct rb_node *node;
870 unsigned long length;
871
872
873 node = free_vmap_area_root.rb_node;
874
875
876 length = size + align - 1;
877
878 while (node) {
879 va = rb_entry(node, struct vmap_area, rb_node);
880
881 if (get_subtree_max_size(node->rb_left) >= length &&
882 vstart < va->va_start) {
883 node = node->rb_left;
884 } else {
885 if (is_within_this_va(va, size, align, vstart))
886 return va;
887
888
889
890
891
892
893 if (get_subtree_max_size(node->rb_right) >= length) {
894 node = node->rb_right;
895 continue;
896 }
897
898
899
900
901
902
903 while ((node = rb_parent(node))) {
904 va = rb_entry(node, struct vmap_area, rb_node);
905 if (is_within_this_va(va, size, align, vstart))
906 return va;
907
908 if (get_subtree_max_size(node->rb_right) >= length &&
909 vstart <= va->va_start) {
910 node = node->rb_right;
911 break;
912 }
913 }
914 }
915 }
916
917 return NULL;
918}
919
920#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
921#include <linux/random.h>
922
923static struct vmap_area *
924find_vmap_lowest_linear_match(unsigned long size,
925 unsigned long align, unsigned long vstart)
926{
927 struct vmap_area *va;
928
929 list_for_each_entry(va, &free_vmap_area_list, list) {
930 if (!is_within_this_va(va, size, align, vstart))
931 continue;
932
933 return va;
934 }
935
936 return NULL;
937}
938
939static void
940find_vmap_lowest_match_check(unsigned long size)
941{
942 struct vmap_area *va_1, *va_2;
943 unsigned long vstart;
944 unsigned int rnd;
945
946 get_random_bytes(&rnd, sizeof(rnd));
947 vstart = VMALLOC_START + rnd;
948
949 va_1 = find_vmap_lowest_match(size, 1, vstart);
950 va_2 = find_vmap_lowest_linear_match(size, 1, vstart);
951
952 if (va_1 != va_2)
953 pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
954 va_1, va_2, vstart);
955}
956#endif
957
958enum fit_type {
959 NOTHING_FIT = 0,
960 FL_FIT_TYPE = 1,
961 LE_FIT_TYPE = 2,
962 RE_FIT_TYPE = 3,
963 NE_FIT_TYPE = 4
964};
965
966static __always_inline enum fit_type
967classify_va_fit_type(struct vmap_area *va,
968 unsigned long nva_start_addr, unsigned long size)
969{
970 enum fit_type type;
971
972
973 if (nva_start_addr < va->va_start ||
974 nva_start_addr + size > va->va_end)
975 return NOTHING_FIT;
976
977
978 if (va->va_start == nva_start_addr) {
979 if (va->va_end == nva_start_addr + size)
980 type = FL_FIT_TYPE;
981 else
982 type = LE_FIT_TYPE;
983 } else if (va->va_end == nva_start_addr + size) {
984 type = RE_FIT_TYPE;
985 } else {
986 type = NE_FIT_TYPE;
987 }
988
989 return type;
990}
991
992static __always_inline int
993adjust_va_to_fit_type(struct vmap_area *va,
994 unsigned long nva_start_addr, unsigned long size,
995 enum fit_type type)
996{
997 struct vmap_area *lva = NULL;
998
999 if (type == FL_FIT_TYPE) {
1000
1001
1002
1003
1004
1005
1006
1007 unlink_va(va, &free_vmap_area_root);
1008 kmem_cache_free(vmap_area_cachep, va);
1009 } else if (type == LE_FIT_TYPE) {
1010
1011
1012
1013
1014
1015
1016
1017 va->va_start += size;
1018 } else if (type == RE_FIT_TYPE) {
1019
1020
1021
1022
1023
1024
1025
1026 va->va_end = nva_start_addr;
1027 } else if (type == NE_FIT_TYPE) {
1028
1029
1030
1031
1032
1033
1034
1035 lva = __this_cpu_xchg(ne_fit_preload_node, NULL);
1036 if (unlikely(!lva)) {
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
1063 if (!lva)
1064 return -1;
1065 }
1066
1067
1068
1069
1070 lva->va_start = va->va_start;
1071 lva->va_end = nva_start_addr;
1072
1073
1074
1075
1076 va->va_start = nva_start_addr + size;
1077 } else {
1078 return -1;
1079 }
1080
1081 if (type != FL_FIT_TYPE) {
1082 augment_tree_propagate_from(va);
1083
1084 if (lva)
1085 insert_vmap_area_augment(lva, &va->rb_node,
1086 &free_vmap_area_root, &free_vmap_area_list);
1087 }
1088
1089 return 0;
1090}
1091
1092
1093
1094
1095
1096static __always_inline unsigned long
1097__alloc_vmap_area(unsigned long size, unsigned long align,
1098 unsigned long vstart, unsigned long vend)
1099{
1100 unsigned long nva_start_addr;
1101 struct vmap_area *va;
1102 enum fit_type type;
1103 int ret;
1104
1105 va = find_vmap_lowest_match(size, align, vstart);
1106 if (unlikely(!va))
1107 return vend;
1108
1109 if (va->va_start > vstart)
1110 nva_start_addr = ALIGN(va->va_start, align);
1111 else
1112 nva_start_addr = ALIGN(vstart, align);
1113
1114
1115 if (nva_start_addr + size > vend)
1116 return vend;
1117
1118
1119 type = classify_va_fit_type(va, nva_start_addr, size);
1120 if (WARN_ON_ONCE(type == NOTHING_FIT))
1121 return vend;
1122
1123
1124 ret = adjust_va_to_fit_type(va, nva_start_addr, size, type);
1125 if (ret)
1126 return vend;
1127
1128#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
1129 find_vmap_lowest_match_check(size);
1130#endif
1131
1132 return nva_start_addr;
1133}
1134
1135
1136
1137
1138static void free_vmap_area(struct vmap_area *va)
1139{
1140
1141
1142
1143 spin_lock(&vmap_area_lock);
1144 unlink_va(va, &vmap_area_root);
1145 spin_unlock(&vmap_area_lock);
1146
1147
1148
1149
1150 spin_lock(&free_vmap_area_lock);
1151 merge_or_add_vmap_area_augment(va, &free_vmap_area_root, &free_vmap_area_list);
1152 spin_unlock(&free_vmap_area_lock);
1153}
1154
1155
1156
1157
1158
1159static struct vmap_area *alloc_vmap_area(unsigned long size,
1160 unsigned long align,
1161 unsigned long vstart, unsigned long vend,
1162 int node, gfp_t gfp_mask)
1163{
1164 struct vmap_area *va, *pva;
1165 unsigned long addr;
1166 int purged = 0;
1167 int ret;
1168
1169 BUG_ON(!size);
1170 BUG_ON(offset_in_page(size));
1171 BUG_ON(!is_power_of_2(align));
1172
1173 if (unlikely(!vmap_initialized))
1174 return ERR_PTR(-EBUSY);
1175
1176 might_sleep();
1177 gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
1178
1179 va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
1180 if (unlikely(!va))
1181 return ERR_PTR(-ENOMEM);
1182
1183
1184
1185
1186
1187 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
1188
1189retry:
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205 pva = NULL;
1206
1207 if (!this_cpu_read(ne_fit_preload_node))
1208
1209
1210
1211
1212
1213 pva = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
1214
1215 spin_lock(&free_vmap_area_lock);
1216
1217 if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
1218 kmem_cache_free(vmap_area_cachep, pva);
1219
1220
1221
1222
1223
1224 addr = __alloc_vmap_area(size, align, vstart, vend);
1225 spin_unlock(&free_vmap_area_lock);
1226
1227 if (unlikely(addr == vend))
1228 goto overflow;
1229
1230 va->va_start = addr;
1231 va->va_end = addr + size;
1232 va->vm = NULL;
1233
1234
1235 spin_lock(&vmap_area_lock);
1236 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
1237 spin_unlock(&vmap_area_lock);
1238
1239 BUG_ON(!IS_ALIGNED(va->va_start, align));
1240 BUG_ON(va->va_start < vstart);
1241 BUG_ON(va->va_end > vend);
1242
1243 ret = kasan_populate_vmalloc(addr, size);
1244 if (ret) {
1245 free_vmap_area(va);
1246 return ERR_PTR(ret);
1247 }
1248
1249 return va;
1250
1251overflow:
1252 if (!purged) {
1253 purge_vmap_area_lazy();
1254 purged = 1;
1255 goto retry;
1256 }
1257
1258 if (gfpflags_allow_blocking(gfp_mask)) {
1259 unsigned long freed = 0;
1260 blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
1261 if (freed > 0) {
1262 purged = 0;
1263 goto retry;
1264 }
1265 }
1266
1267 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
1268 pr_warn("vmap allocation for size %lu failed: use vmalloc=<size> to increase size\n",
1269 size);
1270
1271 kmem_cache_free(vmap_area_cachep, va);
1272 return ERR_PTR(-EBUSY);
1273}
1274
1275int register_vmap_purge_notifier(struct notifier_block *nb)
1276{
1277 return blocking_notifier_chain_register(&vmap_notify_list, nb);
1278}
1279EXPORT_SYMBOL_GPL(register_vmap_purge_notifier);
1280
1281int unregister_vmap_purge_notifier(struct notifier_block *nb)
1282{
1283 return blocking_notifier_chain_unregister(&vmap_notify_list, nb);
1284}
1285EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303static unsigned long lazy_max_pages(void)
1304{
1305 unsigned int log;
1306
1307 log = fls(num_online_cpus());
1308
1309 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
1310}
1311
1312static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
1313
1314
1315
1316
1317
1318
1319static DEFINE_MUTEX(vmap_purge_lock);
1320
1321
1322static void purge_fragmented_blocks_allcpus(void);
1323
1324
1325
1326
1327
1328void set_iounmap_nonlazy(void)
1329{
1330 atomic_long_set(&vmap_lazy_nr, lazy_max_pages()+1);
1331}
1332
1333
1334
1335
1336static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
1337{
1338 unsigned long resched_threshold;
1339 struct list_head local_pure_list;
1340 struct vmap_area *va, *n_va;
1341
1342 lockdep_assert_held(&vmap_purge_lock);
1343
1344 spin_lock(&purge_vmap_area_lock);
1345 purge_vmap_area_root = RB_ROOT;
1346 list_replace_init(&purge_vmap_area_list, &local_pure_list);
1347 spin_unlock(&purge_vmap_area_lock);
1348
1349 if (unlikely(list_empty(&local_pure_list)))
1350 return false;
1351
1352 start = min(start,
1353 list_first_entry(&local_pure_list,
1354 struct vmap_area, list)->va_start);
1355
1356 end = max(end,
1357 list_last_entry(&local_pure_list,
1358 struct vmap_area, list)->va_end);
1359
1360 flush_tlb_kernel_range(start, end);
1361 resched_threshold = lazy_max_pages() << 1;
1362
1363 spin_lock(&free_vmap_area_lock);
1364 list_for_each_entry_safe(va, n_va, &local_pure_list, list) {
1365 unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
1366 unsigned long orig_start = va->va_start;
1367 unsigned long orig_end = va->va_end;
1368
1369
1370
1371
1372
1373
1374 va = merge_or_add_vmap_area_augment(va, &free_vmap_area_root,
1375 &free_vmap_area_list);
1376
1377 if (!va)
1378 continue;
1379
1380 if (is_vmalloc_or_module_addr((void *)orig_start))
1381 kasan_release_vmalloc(orig_start, orig_end,
1382 va->va_start, va->va_end);
1383
1384 atomic_long_sub(nr, &vmap_lazy_nr);
1385
1386 if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
1387 cond_resched_lock(&free_vmap_area_lock);
1388 }
1389 spin_unlock(&free_vmap_area_lock);
1390 return true;
1391}
1392
1393
1394
1395
1396
1397static void try_purge_vmap_area_lazy(void)
1398{
1399 if (mutex_trylock(&vmap_purge_lock)) {
1400 __purge_vmap_area_lazy(ULONG_MAX, 0);
1401 mutex_unlock(&vmap_purge_lock);
1402 }
1403}
1404
1405
1406
1407
1408static void purge_vmap_area_lazy(void)
1409{
1410 mutex_lock(&vmap_purge_lock);
1411 purge_fragmented_blocks_allcpus();
1412 __purge_vmap_area_lazy(ULONG_MAX, 0);
1413 mutex_unlock(&vmap_purge_lock);
1414}
1415
1416
1417
1418
1419
1420
1421static void free_vmap_area_noflush(struct vmap_area *va)
1422{
1423 unsigned long nr_lazy;
1424
1425 spin_lock(&vmap_area_lock);
1426 unlink_va(va, &vmap_area_root);
1427 spin_unlock(&vmap_area_lock);
1428
1429 nr_lazy = atomic_long_add_return((va->va_end - va->va_start) >>
1430 PAGE_SHIFT, &vmap_lazy_nr);
1431
1432
1433
1434
1435 spin_lock(&purge_vmap_area_lock);
1436 merge_or_add_vmap_area(va,
1437 &purge_vmap_area_root, &purge_vmap_area_list);
1438 spin_unlock(&purge_vmap_area_lock);
1439
1440
1441 if (unlikely(nr_lazy > lazy_max_pages()))
1442 try_purge_vmap_area_lazy();
1443}
1444
1445
1446
1447
1448static void free_unmap_vmap_area(struct vmap_area *va)
1449{
1450 flush_cache_vunmap(va->va_start, va->va_end);
1451 unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start);
1452 if (debug_pagealloc_enabled_static())
1453 flush_tlb_kernel_range(va->va_start, va->va_end);
1454
1455 free_vmap_area_noflush(va);
1456}
1457
1458static struct vmap_area *find_vmap_area(unsigned long addr)
1459{
1460 struct vmap_area *va;
1461
1462 spin_lock(&vmap_area_lock);
1463 va = __find_vmap_area(addr);
1464 spin_unlock(&vmap_area_lock);
1465
1466 return va;
1467}
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480#if BITS_PER_LONG == 32
1481#define VMALLOC_SPACE (128UL*1024*1024)
1482#else
1483#define VMALLOC_SPACE (128UL*1024*1024*1024)
1484#endif
1485
1486#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
1487#define VMAP_MAX_ALLOC BITS_PER_LONG
1488#define VMAP_BBMAP_BITS_MAX 1024
1489#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
1490#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
1491#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
1492#define VMAP_BBMAP_BITS \
1493 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
1494 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
1495 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
1496
1497#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
1498
1499struct vmap_block_queue {
1500 spinlock_t lock;
1501 struct list_head free;
1502};
1503
1504struct vmap_block {
1505 spinlock_t lock;
1506 struct vmap_area *va;
1507 unsigned long free, dirty;
1508 unsigned long dirty_min, dirty_max;
1509 struct list_head free_list;
1510 struct rcu_head rcu_head;
1511 struct list_head purge;
1512};
1513
1514
1515static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
1516
1517
1518
1519
1520
1521
1522static DEFINE_XARRAY(vmap_blocks);
1523
1524
1525
1526
1527
1528
1529
1530
1531static unsigned long addr_to_vb_idx(unsigned long addr)
1532{
1533 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
1534 addr /= VMAP_BLOCK_SIZE;
1535 return addr;
1536}
1537
1538static void *vmap_block_vaddr(unsigned long va_start, unsigned long pages_off)
1539{
1540 unsigned long addr;
1541
1542 addr = va_start + (pages_off << PAGE_SHIFT);
1543 BUG_ON(addr_to_vb_idx(addr) != addr_to_vb_idx(va_start));
1544 return (void *)addr;
1545}
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
1556{
1557 struct vmap_block_queue *vbq;
1558 struct vmap_block *vb;
1559 struct vmap_area *va;
1560 unsigned long vb_idx;
1561 int node, err;
1562 void *vaddr;
1563
1564 node = numa_node_id();
1565
1566 vb = kmalloc_node(sizeof(struct vmap_block),
1567 gfp_mask & GFP_RECLAIM_MASK, node);
1568 if (unlikely(!vb))
1569 return ERR_PTR(-ENOMEM);
1570
1571 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
1572 VMALLOC_START, VMALLOC_END,
1573 node, gfp_mask);
1574 if (IS_ERR(va)) {
1575 kfree(vb);
1576 return ERR_CAST(va);
1577 }
1578
1579 vaddr = vmap_block_vaddr(va->va_start, 0);
1580 spin_lock_init(&vb->lock);
1581 vb->va = va;
1582
1583 BUG_ON(VMAP_BBMAP_BITS <= (1UL << order));
1584 vb->free = VMAP_BBMAP_BITS - (1UL << order);
1585 vb->dirty = 0;
1586 vb->dirty_min = VMAP_BBMAP_BITS;
1587 vb->dirty_max = 0;
1588 INIT_LIST_HEAD(&vb->free_list);
1589
1590 vb_idx = addr_to_vb_idx(va->va_start);
1591 err = xa_insert(&vmap_blocks, vb_idx, vb, gfp_mask);
1592 if (err) {
1593 kfree(vb);
1594 free_vmap_area(va);
1595 return ERR_PTR(err);
1596 }
1597
1598 vbq = &get_cpu_var(vmap_block_queue);
1599 spin_lock(&vbq->lock);
1600 list_add_tail_rcu(&vb->free_list, &vbq->free);
1601 spin_unlock(&vbq->lock);
1602 put_cpu_var(vmap_block_queue);
1603
1604 return vaddr;
1605}
1606
1607static void free_vmap_block(struct vmap_block *vb)
1608{
1609 struct vmap_block *tmp;
1610
1611 tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start));
1612 BUG_ON(tmp != vb);
1613
1614 free_vmap_area_noflush(vb->va);
1615 kfree_rcu(vb, rcu_head);
1616}
1617
1618static void purge_fragmented_blocks(int cpu)
1619{
1620 LIST_HEAD(purge);
1621 struct vmap_block *vb;
1622 struct vmap_block *n_vb;
1623 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1624
1625 rcu_read_lock();
1626 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1627
1628 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
1629 continue;
1630
1631 spin_lock(&vb->lock);
1632 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
1633 vb->free = 0;
1634 vb->dirty = VMAP_BBMAP_BITS;
1635 vb->dirty_min = 0;
1636 vb->dirty_max = VMAP_BBMAP_BITS;
1637 spin_lock(&vbq->lock);
1638 list_del_rcu(&vb->free_list);
1639 spin_unlock(&vbq->lock);
1640 spin_unlock(&vb->lock);
1641 list_add_tail(&vb->purge, &purge);
1642 } else
1643 spin_unlock(&vb->lock);
1644 }
1645 rcu_read_unlock();
1646
1647 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
1648 list_del(&vb->purge);
1649 free_vmap_block(vb);
1650 }
1651}
1652
1653static void purge_fragmented_blocks_allcpus(void)
1654{
1655 int cpu;
1656
1657 for_each_possible_cpu(cpu)
1658 purge_fragmented_blocks(cpu);
1659}
1660
1661static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
1662{
1663 struct vmap_block_queue *vbq;
1664 struct vmap_block *vb;
1665 void *vaddr = NULL;
1666 unsigned int order;
1667
1668 BUG_ON(offset_in_page(size));
1669 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
1670 if (WARN_ON(size == 0)) {
1671
1672
1673
1674
1675
1676 return NULL;
1677 }
1678 order = get_order(size);
1679
1680 rcu_read_lock();
1681 vbq = &get_cpu_var(vmap_block_queue);
1682 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1683 unsigned long pages_off;
1684
1685 spin_lock(&vb->lock);
1686 if (vb->free < (1UL << order)) {
1687 spin_unlock(&vb->lock);
1688 continue;
1689 }
1690
1691 pages_off = VMAP_BBMAP_BITS - vb->free;
1692 vaddr = vmap_block_vaddr(vb->va->va_start, pages_off);
1693 vb->free -= 1UL << order;
1694 if (vb->free == 0) {
1695 spin_lock(&vbq->lock);
1696 list_del_rcu(&vb->free_list);
1697 spin_unlock(&vbq->lock);
1698 }
1699
1700 spin_unlock(&vb->lock);
1701 break;
1702 }
1703
1704 put_cpu_var(vmap_block_queue);
1705 rcu_read_unlock();
1706
1707
1708 if (!vaddr)
1709 vaddr = new_vmap_block(order, gfp_mask);
1710
1711 return vaddr;
1712}
1713
1714static void vb_free(unsigned long addr, unsigned long size)
1715{
1716 unsigned long offset;
1717 unsigned int order;
1718 struct vmap_block *vb;
1719
1720 BUG_ON(offset_in_page(size));
1721 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
1722
1723 flush_cache_vunmap(addr, addr + size);
1724
1725 order = get_order(size);
1726 offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
1727 vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr));
1728
1729 unmap_kernel_range_noflush(addr, size);
1730
1731 if (debug_pagealloc_enabled_static())
1732 flush_tlb_kernel_range(addr, addr + size);
1733
1734 spin_lock(&vb->lock);
1735
1736
1737 vb->dirty_min = min(vb->dirty_min, offset);
1738 vb->dirty_max = max(vb->dirty_max, offset + (1UL << order));
1739
1740 vb->dirty += 1UL << order;
1741 if (vb->dirty == VMAP_BBMAP_BITS) {
1742 BUG_ON(vb->free);
1743 spin_unlock(&vb->lock);
1744 free_vmap_block(vb);
1745 } else
1746 spin_unlock(&vb->lock);
1747}
1748
1749static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
1750{
1751 int cpu;
1752
1753 if (unlikely(!vmap_initialized))
1754 return;
1755
1756 might_sleep();
1757
1758 for_each_possible_cpu(cpu) {
1759 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1760 struct vmap_block *vb;
1761
1762 rcu_read_lock();
1763 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1764 spin_lock(&vb->lock);
1765 if (vb->dirty) {
1766 unsigned long va_start = vb->va->va_start;
1767 unsigned long s, e;
1768
1769 s = va_start + (vb->dirty_min << PAGE_SHIFT);
1770 e = va_start + (vb->dirty_max << PAGE_SHIFT);
1771
1772 start = min(s, start);
1773 end = max(e, end);
1774
1775 flush = 1;
1776 }
1777 spin_unlock(&vb->lock);
1778 }
1779 rcu_read_unlock();
1780 }
1781
1782 mutex_lock(&vmap_purge_lock);
1783 purge_fragmented_blocks_allcpus();
1784 if (!__purge_vmap_area_lazy(start, end) && flush)
1785 flush_tlb_kernel_range(start, end);
1786 mutex_unlock(&vmap_purge_lock);
1787}
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802void vm_unmap_aliases(void)
1803{
1804 unsigned long start = ULONG_MAX, end = 0;
1805 int flush = 0;
1806
1807 _vm_unmap_aliases(start, end, flush);
1808}
1809EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1810
1811
1812
1813
1814
1815
1816void vm_unmap_ram(const void *mem, unsigned int count)
1817{
1818 unsigned long size = (unsigned long)count << PAGE_SHIFT;
1819 unsigned long addr = (unsigned long)mem;
1820 struct vmap_area *va;
1821
1822 might_sleep();
1823 BUG_ON(!addr);
1824 BUG_ON(addr < VMALLOC_START);
1825 BUG_ON(addr > VMALLOC_END);
1826 BUG_ON(!PAGE_ALIGNED(addr));
1827
1828 kasan_poison_vmalloc(mem, size);
1829
1830 if (likely(count <= VMAP_MAX_ALLOC)) {
1831 debug_check_no_locks_freed(mem, size);
1832 vb_free(addr, size);
1833 return;
1834 }
1835
1836 va = find_vmap_area(addr);
1837 BUG_ON(!va);
1838 debug_check_no_locks_freed((void *)va->va_start,
1839 (va->va_end - va->va_start));
1840 free_unmap_vmap_area(va);
1841}
1842EXPORT_SYMBOL(vm_unmap_ram);
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858void *vm_map_ram(struct page **pages, unsigned int count, int node)
1859{
1860 unsigned long size = (unsigned long)count << PAGE_SHIFT;
1861 unsigned long addr;
1862 void *mem;
1863
1864 if (likely(count <= VMAP_MAX_ALLOC)) {
1865 mem = vb_alloc(size, GFP_KERNEL);
1866 if (IS_ERR(mem))
1867 return NULL;
1868 addr = (unsigned long)mem;
1869 } else {
1870 struct vmap_area *va;
1871 va = alloc_vmap_area(size, PAGE_SIZE,
1872 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1873 if (IS_ERR(va))
1874 return NULL;
1875
1876 addr = va->va_start;
1877 mem = (void *)addr;
1878 }
1879
1880 kasan_unpoison_vmalloc(mem, size);
1881
1882 if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) {
1883 vm_unmap_ram(mem, count);
1884 return NULL;
1885 }
1886 return mem;
1887}
1888EXPORT_SYMBOL(vm_map_ram);
1889
1890static struct vm_struct *vmlist __initdata;
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902void __init vm_area_add_early(struct vm_struct *vm)
1903{
1904 struct vm_struct *tmp, **p;
1905
1906 BUG_ON(vmap_initialized);
1907 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1908 if (tmp->addr >= vm->addr) {
1909 BUG_ON(tmp->addr < vm->addr + vm->size);
1910 break;
1911 } else
1912 BUG_ON(tmp->addr + tmp->size > vm->addr);
1913 }
1914 vm->next = *p;
1915 *p = vm;
1916}
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1931{
1932 static size_t vm_init_off __initdata;
1933 unsigned long addr;
1934
1935 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1936 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1937
1938 vm->addr = (void *)addr;
1939
1940 vm_area_add_early(vm);
1941}
1942
1943static void vmap_init_free_space(void)
1944{
1945 unsigned long vmap_start = 1;
1946 const unsigned long vmap_end = ULONG_MAX;
1947 struct vmap_area *busy, *free;
1948
1949
1950
1951
1952
1953
1954
1955 list_for_each_entry(busy, &vmap_area_list, list) {
1956 if (busy->va_start - vmap_start > 0) {
1957 free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
1958 if (!WARN_ON_ONCE(!free)) {
1959 free->va_start = vmap_start;
1960 free->va_end = busy->va_start;
1961
1962 insert_vmap_area_augment(free, NULL,
1963 &free_vmap_area_root,
1964 &free_vmap_area_list);
1965 }
1966 }
1967
1968 vmap_start = busy->va_end;
1969 }
1970
1971 if (vmap_end - vmap_start > 0) {
1972 free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
1973 if (!WARN_ON_ONCE(!free)) {
1974 free->va_start = vmap_start;
1975 free->va_end = vmap_end;
1976
1977 insert_vmap_area_augment(free, NULL,
1978 &free_vmap_area_root,
1979 &free_vmap_area_list);
1980 }
1981 }
1982}
1983
1984void __init vmalloc_init(void)
1985{
1986 struct vmap_area *va;
1987 struct vm_struct *tmp;
1988 int i;
1989
1990
1991
1992
1993 vmap_area_cachep = KMEM_CACHE(vmap_area, SLAB_PANIC);
1994
1995 for_each_possible_cpu(i) {
1996 struct vmap_block_queue *vbq;
1997 struct vfree_deferred *p;
1998
1999 vbq = &per_cpu(vmap_block_queue, i);
2000 spin_lock_init(&vbq->lock);
2001 INIT_LIST_HEAD(&vbq->free);
2002 p = &per_cpu(vfree_deferred, i);
2003 init_llist_head(&p->list);
2004 INIT_WORK(&p->wq, free_work);
2005 }
2006
2007
2008 for (tmp = vmlist; tmp; tmp = tmp->next) {
2009 va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
2010 if (WARN_ON_ONCE(!va))
2011 continue;
2012
2013 va->va_start = (unsigned long)tmp->addr;
2014 va->va_end = va->va_start + tmp->size;
2015 va->vm = tmp;
2016 insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
2017 }
2018
2019
2020
2021
2022 vmap_init_free_space();
2023 vmap_initialized = true;
2024}
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034void unmap_kernel_range(unsigned long addr, unsigned long size)
2035{
2036 unsigned long end = addr + size;
2037
2038 flush_cache_vunmap(addr, end);
2039 unmap_kernel_range_noflush(addr, size);
2040 flush_tlb_kernel_range(addr, end);
2041}
2042
2043static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
2044 struct vmap_area *va, unsigned long flags, const void *caller)
2045{
2046 vm->flags = flags;
2047 vm->addr = (void *)va->va_start;
2048 vm->size = va->va_end - va->va_start;
2049 vm->caller = caller;
2050 va->vm = vm;
2051}
2052
2053static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
2054 unsigned long flags, const void *caller)
2055{
2056 spin_lock(&vmap_area_lock);
2057 setup_vmalloc_vm_locked(vm, va, flags, caller);
2058 spin_unlock(&vmap_area_lock);
2059}
2060
2061static void clear_vm_uninitialized_flag(struct vm_struct *vm)
2062{
2063
2064
2065
2066
2067
2068 smp_wmb();
2069 vm->flags &= ~VM_UNINITIALIZED;
2070}
2071
2072static struct vm_struct *__get_vm_area_node(unsigned long size,
2073 unsigned long align, unsigned long flags, unsigned long start,
2074 unsigned long end, int node, gfp_t gfp_mask, const void *caller)
2075{
2076 struct vmap_area *va;
2077 struct vm_struct *area;
2078 unsigned long requested_size = size;
2079
2080 BUG_ON(in_interrupt());
2081 size = PAGE_ALIGN(size);
2082 if (unlikely(!size))
2083 return NULL;
2084
2085 if (flags & VM_IOREMAP)
2086 align = 1ul << clamp_t(int, get_count_order_long(size),
2087 PAGE_SHIFT, IOREMAP_MAX_ORDER);
2088
2089 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
2090 if (unlikely(!area))
2091 return NULL;
2092
2093 if (!(flags & VM_NO_GUARD))
2094 size += PAGE_SIZE;
2095
2096 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
2097 if (IS_ERR(va)) {
2098 kfree(area);
2099 return NULL;
2100 }
2101
2102 kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
2103
2104 setup_vmalloc_vm(area, va, flags, caller);
2105
2106 return area;
2107}
2108
2109struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
2110 unsigned long start, unsigned long end,
2111 const void *caller)
2112{
2113 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
2114 GFP_KERNEL, caller);
2115}
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
2129{
2130 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
2131 NUMA_NO_NODE, GFP_KERNEL,
2132 __builtin_return_address(0));
2133}
2134
2135struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
2136 const void *caller)
2137{
2138 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
2139 NUMA_NO_NODE, GFP_KERNEL, caller);
2140}
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152struct vm_struct *find_vm_area(const void *addr)
2153{
2154 struct vmap_area *va;
2155
2156 va = find_vmap_area((unsigned long)addr);
2157 if (!va)
2158 return NULL;
2159
2160 return va->vm;
2161}
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173struct vm_struct *remove_vm_area(const void *addr)
2174{
2175 struct vmap_area *va;
2176
2177 might_sleep();
2178
2179 spin_lock(&vmap_area_lock);
2180 va = __find_vmap_area((unsigned long)addr);
2181 if (va && va->vm) {
2182 struct vm_struct *vm = va->vm;
2183
2184 va->vm = NULL;
2185 spin_unlock(&vmap_area_lock);
2186
2187 kasan_free_shadow(vm);
2188 free_unmap_vmap_area(va);
2189
2190 return vm;
2191 }
2192
2193 spin_unlock(&vmap_area_lock);
2194 return NULL;
2195}
2196
2197static inline void set_area_direct_map(const struct vm_struct *area,
2198 int (*set_direct_map)(struct page *page))
2199{
2200 int i;
2201
2202 for (i = 0; i < area->nr_pages; i++)
2203 if (page_address(area->pages[i]))
2204 set_direct_map(area->pages[i]);
2205}
2206
2207
2208static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
2209{
2210 unsigned long start = ULONG_MAX, end = 0;
2211 int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
2212 int flush_dmap = 0;
2213 int i;
2214
2215 remove_vm_area(area->addr);
2216
2217
2218 if (!flush_reset)
2219 return;
2220
2221
2222
2223
2224
2225 if (!deallocate_pages) {
2226 vm_unmap_aliases();
2227 return;
2228 }
2229
2230
2231
2232
2233
2234
2235 for (i = 0; i < area->nr_pages; i++) {
2236 unsigned long addr = (unsigned long)page_address(area->pages[i]);
2237 if (addr) {
2238 start = min(addr, start);
2239 end = max(addr + PAGE_SIZE, end);
2240 flush_dmap = 1;
2241 }
2242 }
2243
2244
2245
2246
2247
2248
2249 set_area_direct_map(area, set_direct_map_invalid_noflush);
2250 _vm_unmap_aliases(start, end, flush_dmap);
2251 set_area_direct_map(area, set_direct_map_default_noflush);
2252}
2253
2254static void __vunmap(const void *addr, int deallocate_pages)
2255{
2256 struct vm_struct *area;
2257
2258 if (!addr)
2259 return;
2260
2261 if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
2262 addr))
2263 return;
2264
2265 area = find_vm_area(addr);
2266 if (unlikely(!area)) {
2267 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
2268 addr);
2269 return;
2270 }
2271
2272 debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
2273 debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
2274
2275 kasan_poison_vmalloc(area->addr, get_vm_area_size(area));
2276
2277 vm_remove_mappings(area, deallocate_pages);
2278
2279 if (deallocate_pages) {
2280 int i;
2281
2282 for (i = 0; i < area->nr_pages; i++) {
2283 struct page *page = area->pages[i];
2284
2285 BUG_ON(!page);
2286 __free_pages(page, 0);
2287 }
2288 atomic_long_sub(area->nr_pages, &nr_vmalloc_pages);
2289
2290 kvfree(area->pages);
2291 }
2292
2293 kfree(area);
2294}
2295
2296static inline void __vfree_deferred(const void *addr)
2297{
2298
2299
2300
2301
2302
2303
2304 struct vfree_deferred *p = raw_cpu_ptr(&vfree_deferred);
2305
2306 if (llist_add((struct llist_node *)addr, &p->list))
2307 schedule_work(&p->wq);
2308}
2309
2310
2311
2312
2313
2314
2315
2316
2317void vfree_atomic(const void *addr)
2318{
2319 BUG_ON(in_nmi());
2320
2321 kmemleak_free(addr);
2322
2323 if (!addr)
2324 return;
2325 __vfree_deferred(addr);
2326}
2327
2328static void __vfree(const void *addr)
2329{
2330 if (unlikely(in_interrupt()))
2331 __vfree_deferred(addr);
2332 else
2333 __vunmap(addr, 1);
2334}
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353void vfree(const void *addr)
2354{
2355 BUG_ON(in_nmi());
2356
2357 kmemleak_free(addr);
2358
2359 might_sleep_if(!in_interrupt());
2360
2361 if (!addr)
2362 return;
2363
2364 __vfree(addr);
2365}
2366EXPORT_SYMBOL(vfree);
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377void vunmap(const void *addr)
2378{
2379 BUG_ON(in_interrupt());
2380 might_sleep();
2381 if (addr)
2382 __vunmap(addr, 0);
2383}
2384EXPORT_SYMBOL(vunmap);
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401void *vmap(struct page **pages, unsigned int count,
2402 unsigned long flags, pgprot_t prot)
2403{
2404 struct vm_struct *area;
2405 unsigned long size;
2406
2407 might_sleep();
2408
2409 if (count > totalram_pages())
2410 return NULL;
2411
2412 size = (unsigned long)count << PAGE_SHIFT;
2413 area = get_vm_area_caller(size, flags, __builtin_return_address(0));
2414 if (!area)
2415 return NULL;
2416
2417 if (map_kernel_range((unsigned long)area->addr, size, pgprot_nx(prot),
2418 pages) < 0) {
2419 vunmap(area->addr);
2420 return NULL;
2421 }
2422
2423 if (flags & VM_MAP_PUT_PAGES) {
2424 area->pages = pages;
2425 area->nr_pages = count;
2426 }
2427 return area->addr;
2428}
2429EXPORT_SYMBOL(vmap);
2430
2431#ifdef CONFIG_VMAP_PFN
2432struct vmap_pfn_data {
2433 unsigned long *pfns;
2434 pgprot_t prot;
2435 unsigned int idx;
2436};
2437
2438static int vmap_pfn_apply(pte_t *pte, unsigned long addr, void *private)
2439{
2440 struct vmap_pfn_data *data = private;
2441
2442 if (WARN_ON_ONCE(pfn_valid(data->pfns[data->idx])))
2443 return -EINVAL;
2444 *pte = pte_mkspecial(pfn_pte(data->pfns[data->idx++], data->prot));
2445 return 0;
2446}
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
2458{
2459 struct vmap_pfn_data data = { .pfns = pfns, .prot = pgprot_nx(prot) };
2460 struct vm_struct *area;
2461
2462 area = get_vm_area_caller(count * PAGE_SIZE, VM_IOREMAP,
2463 __builtin_return_address(0));
2464 if (!area)
2465 return NULL;
2466 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2467 count * PAGE_SIZE, vmap_pfn_apply, &data)) {
2468 free_vm_area(area);
2469 return NULL;
2470 }
2471 return area->addr;
2472}
2473EXPORT_SYMBOL_GPL(vmap_pfn);
2474#endif
2475
2476static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
2477 pgprot_t prot, int node)
2478{
2479 const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
2480 unsigned int nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
2481 unsigned long array_size;
2482 unsigned int i;
2483 struct page **pages;
2484
2485 array_size = (unsigned long)nr_pages * sizeof(struct page *);
2486 gfp_mask |= __GFP_NOWARN;
2487 if (!(gfp_mask & (GFP_DMA | GFP_DMA32)))
2488 gfp_mask |= __GFP_HIGHMEM;
2489
2490
2491 if (array_size > PAGE_SIZE) {
2492 pages = __vmalloc_node(array_size, 1, nested_gfp, node,
2493 area->caller);
2494 } else {
2495 pages = kmalloc_node(array_size, nested_gfp, node);
2496 }
2497
2498 if (!pages) {
2499 free_vm_area(area);
2500 return NULL;
2501 }
2502
2503 area->pages = pages;
2504 area->nr_pages = nr_pages;
2505
2506 for (i = 0; i < area->nr_pages; i++) {
2507 struct page *page;
2508
2509 if (node == NUMA_NO_NODE)
2510 page = alloc_page(gfp_mask);
2511 else
2512 page = alloc_pages_node(node, gfp_mask, 0);
2513
2514 if (unlikely(!page)) {
2515
2516 area->nr_pages = i;
2517 atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
2518 goto fail;
2519 }
2520 area->pages[i] = page;
2521 if (gfpflags_allow_blocking(gfp_mask))
2522 cond_resched();
2523 }
2524 atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
2525
2526 if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area),
2527 prot, pages) < 0)
2528 goto fail;
2529
2530 return area->addr;
2531
2532fail:
2533 warn_alloc(gfp_mask, NULL,
2534 "vmalloc: allocation failure, allocated %ld of %ld bytes",
2535 (area->nr_pages*PAGE_SIZE), area->size);
2536 __vfree(area->addr);
2537 return NULL;
2538}
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558void *__vmalloc_node_range(unsigned long size, unsigned long align,
2559 unsigned long start, unsigned long end, gfp_t gfp_mask,
2560 pgprot_t prot, unsigned long vm_flags, int node,
2561 const void *caller)
2562{
2563 struct vm_struct *area;
2564 void *addr;
2565 unsigned long real_size = size;
2566
2567 size = PAGE_ALIGN(size);
2568 if (!size || (size >> PAGE_SHIFT) > totalram_pages())
2569 goto fail;
2570
2571 area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
2572 vm_flags, start, end, node, gfp_mask, caller);
2573 if (!area)
2574 goto fail;
2575
2576 addr = __vmalloc_area_node(area, gfp_mask, prot, node);
2577 if (!addr)
2578 return NULL;
2579
2580
2581
2582
2583
2584
2585 clear_vm_uninitialized_flag(area);
2586
2587 kmemleak_vmalloc(area, size, gfp_mask);
2588
2589 return addr;
2590
2591fail:
2592 warn_alloc(gfp_mask, NULL,
2593 "vmalloc: allocation failure: %lu bytes", real_size);
2594 return NULL;
2595}
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616void *__vmalloc_node(unsigned long size, unsigned long align,
2617 gfp_t gfp_mask, int node, const void *caller)
2618{
2619 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
2620 gfp_mask, PAGE_KERNEL, 0, node, caller);
2621}
2622
2623
2624
2625
2626
2627#ifdef CONFIG_TEST_VMALLOC_MODULE
2628EXPORT_SYMBOL_GPL(__vmalloc_node);
2629#endif
2630
2631void *__vmalloc(unsigned long size, gfp_t gfp_mask)
2632{
2633 return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
2634 __builtin_return_address(0));
2635}
2636EXPORT_SYMBOL(__vmalloc);
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650void *vmalloc(unsigned long size)
2651{
2652 return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
2653 __builtin_return_address(0));
2654}
2655EXPORT_SYMBOL(vmalloc);
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670void *vzalloc(unsigned long size)
2671{
2672 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
2673 __builtin_return_address(0));
2674}
2675EXPORT_SYMBOL(vzalloc);
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686void *vmalloc_user(unsigned long size)
2687{
2688 return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
2689 GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
2690 VM_USERMAP, NUMA_NO_NODE,
2691 __builtin_return_address(0));
2692}
2693EXPORT_SYMBOL(vmalloc_user);
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708void *vmalloc_node(unsigned long size, int node)
2709{
2710 return __vmalloc_node(size, 1, GFP_KERNEL, node,
2711 __builtin_return_address(0));
2712}
2713EXPORT_SYMBOL(vmalloc_node);
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726void *vzalloc_node(unsigned long size, int node)
2727{
2728 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
2729 __builtin_return_address(0));
2730}
2731EXPORT_SYMBOL(vzalloc_node);
2732
2733#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
2734#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
2735#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
2736#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
2737#else
2738
2739
2740
2741
2742#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
2743#endif
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754void *vmalloc_32(unsigned long size)
2755{
2756 return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
2757 __builtin_return_address(0));
2758}
2759EXPORT_SYMBOL(vmalloc_32);
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770void *vmalloc_32_user(unsigned long size)
2771{
2772 return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
2773 GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
2774 VM_USERMAP, NUMA_NO_NODE,
2775 __builtin_return_address(0));
2776}
2777EXPORT_SYMBOL(vmalloc_32_user);
2778
2779
2780
2781
2782
2783
2784static int aligned_vread(char *buf, char *addr, unsigned long count)
2785{
2786 struct page *p;
2787 int copied = 0;
2788
2789 while (count) {
2790 unsigned long offset, length;
2791
2792 offset = offset_in_page(addr);
2793 length = PAGE_SIZE - offset;
2794 if (length > count)
2795 length = count;
2796 p = vmalloc_to_page(addr);
2797
2798
2799
2800
2801
2802
2803
2804 if (p) {
2805
2806
2807
2808
2809 void *map = kmap_atomic(p);
2810 memcpy(buf, map + offset, length);
2811 kunmap_atomic(map);
2812 } else
2813 memset(buf, 0, length);
2814
2815 addr += length;
2816 buf += length;
2817 copied += length;
2818 count -= length;
2819 }
2820 return copied;
2821}
2822
2823static int aligned_vwrite(char *buf, char *addr, unsigned long count)
2824{
2825 struct page *p;
2826 int copied = 0;
2827
2828 while (count) {
2829 unsigned long offset, length;
2830
2831 offset = offset_in_page(addr);
2832 length = PAGE_SIZE - offset;
2833 if (length > count)
2834 length = count;
2835 p = vmalloc_to_page(addr);
2836
2837
2838
2839
2840
2841
2842
2843 if (p) {
2844
2845
2846
2847
2848 void *map = kmap_atomic(p);
2849 memcpy(map + offset, buf, length);
2850 kunmap_atomic(map);
2851 }
2852 addr += length;
2853 buf += length;
2854 copied += length;
2855 count -= length;
2856 }
2857 return copied;
2858}
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884long vread(char *buf, char *addr, unsigned long count)
2885{
2886 struct vmap_area *va;
2887 struct vm_struct *vm;
2888 char *vaddr, *buf_start = buf;
2889 unsigned long buflen = count;
2890 unsigned long n;
2891
2892
2893 if ((unsigned long) addr + count < count)
2894 count = -(unsigned long) addr;
2895
2896 spin_lock(&vmap_area_lock);
2897 list_for_each_entry(va, &vmap_area_list, list) {
2898 if (!count)
2899 break;
2900
2901 if (!va->vm)
2902 continue;
2903
2904 vm = va->vm;
2905 vaddr = (char *) vm->addr;
2906 if (addr >= vaddr + get_vm_area_size(vm))
2907 continue;
2908 while (addr < vaddr) {
2909 if (count == 0)
2910 goto finished;
2911 *buf = '\0';
2912 buf++;
2913 addr++;
2914 count--;
2915 }
2916 n = vaddr + get_vm_area_size(vm) - addr;
2917 if (n > count)
2918 n = count;
2919 if (!(vm->flags & VM_IOREMAP))
2920 aligned_vread(buf, addr, n);
2921 else
2922 memset(buf, 0, n);
2923 buf += n;
2924 addr += n;
2925 count -= n;
2926 }
2927finished:
2928 spin_unlock(&vmap_area_lock);
2929
2930 if (buf == buf_start)
2931 return 0;
2932
2933 if (buf != buf_start + buflen)
2934 memset(buf, 0, buflen - (buf - buf_start));
2935
2936 return buflen;
2937}
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963long vwrite(char *buf, char *addr, unsigned long count)
2964{
2965 struct vmap_area *va;
2966 struct vm_struct *vm;
2967 char *vaddr;
2968 unsigned long n, buflen;
2969 int copied = 0;
2970
2971
2972 if ((unsigned long) addr + count < count)
2973 count = -(unsigned long) addr;
2974 buflen = count;
2975
2976 spin_lock(&vmap_area_lock);
2977 list_for_each_entry(va, &vmap_area_list, list) {
2978 if (!count)
2979 break;
2980
2981 if (!va->vm)
2982 continue;
2983
2984 vm = va->vm;
2985 vaddr = (char *) vm->addr;
2986 if (addr >= vaddr + get_vm_area_size(vm))
2987 continue;
2988 while (addr < vaddr) {
2989 if (count == 0)
2990 goto finished;
2991 buf++;
2992 addr++;
2993 count--;
2994 }
2995 n = vaddr + get_vm_area_size(vm) - addr;
2996 if (n > count)
2997 n = count;
2998 if (!(vm->flags & VM_IOREMAP)) {
2999 aligned_vwrite(buf, addr, n);
3000 copied++;
3001 }
3002 buf += n;
3003 addr += n;
3004 count -= n;
3005 }
3006finished:
3007 spin_unlock(&vmap_area_lock);
3008 if (!copied)
3009 return 0;
3010 return buflen;
3011}
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
3031 void *kaddr, unsigned long pgoff,
3032 unsigned long size)
3033{
3034 struct vm_struct *area;
3035 unsigned long off;
3036 unsigned long end_index;
3037
3038 if (check_shl_overflow(pgoff, PAGE_SHIFT, &off))
3039 return -EINVAL;
3040
3041 size = PAGE_ALIGN(size);
3042
3043 if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
3044 return -EINVAL;
3045
3046 area = find_vm_area(kaddr);
3047 if (!area)
3048 return -EINVAL;
3049
3050 if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
3051 return -EINVAL;
3052
3053 if (check_add_overflow(size, off, &end_index) ||
3054 end_index > get_vm_area_size(area))
3055 return -EINVAL;
3056 kaddr += off;
3057
3058 do {
3059 struct page *page = vmalloc_to_page(kaddr);
3060 int ret;
3061
3062 ret = vm_insert_page(vma, uaddr, page);
3063 if (ret)
3064 return ret;
3065
3066 uaddr += PAGE_SIZE;
3067 kaddr += PAGE_SIZE;
3068 size -= PAGE_SIZE;
3069 } while (size > 0);
3070
3071 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
3072
3073 return 0;
3074}
3075EXPORT_SYMBOL(remap_vmalloc_range_partial);
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
3092 unsigned long pgoff)
3093{
3094 return remap_vmalloc_range_partial(vma, vma->vm_start,
3095 addr, pgoff,
3096 vma->vm_end - vma->vm_start);
3097}
3098EXPORT_SYMBOL(remap_vmalloc_range);
3099
3100void free_vm_area(struct vm_struct *area)
3101{
3102 struct vm_struct *ret;
3103 ret = remove_vm_area(area->addr);
3104 BUG_ON(ret != area);
3105 kfree(area);
3106}
3107EXPORT_SYMBOL_GPL(free_vm_area);
3108
3109#ifdef CONFIG_SMP
3110static struct vmap_area *node_to_va(struct rb_node *n)
3111{
3112 return rb_entry_safe(n, struct vmap_area, rb_node);
3113}
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124static struct vmap_area *
3125pvm_find_va_enclose_addr(unsigned long addr)
3126{
3127 struct vmap_area *va, *tmp;
3128 struct rb_node *n;
3129
3130 n = free_vmap_area_root.rb_node;
3131 va = NULL;
3132
3133 while (n) {
3134 tmp = rb_entry(n, struct vmap_area, rb_node);
3135 if (tmp->va_start <= addr) {
3136 va = tmp;
3137 if (tmp->va_end >= addr)
3138 break;
3139
3140 n = n->rb_right;
3141 } else {
3142 n = n->rb_left;
3143 }
3144 }
3145
3146 return va;
3147}
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159static unsigned long
3160pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
3161{
3162 unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
3163 unsigned long addr;
3164
3165 if (likely(*va)) {
3166 list_for_each_entry_from_reverse((*va),
3167 &free_vmap_area_list, list) {
3168 addr = min((*va)->va_end & ~(align - 1), vmalloc_end);
3169 if ((*va)->va_start < addr)
3170 return addr;
3171 }
3172 }
3173
3174 return 0;
3175}
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
3202 const size_t *sizes, int nr_vms,
3203 size_t align)
3204{
3205 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
3206 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
3207 struct vmap_area **vas, *va;
3208 struct vm_struct **vms;
3209 int area, area2, last_area, term_area;
3210 unsigned long base, start, size, end, last_end, orig_start, orig_end;
3211 bool purged = false;
3212 enum fit_type type;
3213
3214
3215 BUG_ON(offset_in_page(align) || !is_power_of_2(align));
3216 for (last_area = 0, area = 0; area < nr_vms; area++) {
3217 start = offsets[area];
3218 end = start + sizes[area];
3219
3220
3221 BUG_ON(!IS_ALIGNED(offsets[area], align));
3222 BUG_ON(!IS_ALIGNED(sizes[area], align));
3223
3224
3225 if (start > offsets[last_area])
3226 last_area = area;
3227
3228 for (area2 = area + 1; area2 < nr_vms; area2++) {
3229 unsigned long start2 = offsets[area2];
3230 unsigned long end2 = start2 + sizes[area2];
3231
3232 BUG_ON(start2 < end && start < end2);
3233 }
3234 }
3235 last_end = offsets[last_area] + sizes[last_area];
3236
3237 if (vmalloc_end - vmalloc_start < last_end) {
3238 WARN_ON(true);
3239 return NULL;
3240 }
3241
3242 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
3243 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
3244 if (!vas || !vms)
3245 goto err_free2;
3246
3247 for (area = 0; area < nr_vms; area++) {
3248 vas[area] = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
3249 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
3250 if (!vas[area] || !vms[area])
3251 goto err_free;
3252 }
3253retry:
3254 spin_lock(&free_vmap_area_lock);
3255
3256
3257 area = term_area = last_area;
3258 start = offsets[area];
3259 end = start + sizes[area];
3260
3261 va = pvm_find_va_enclose_addr(vmalloc_end);
3262 base = pvm_determine_end_from_reverse(&va, align) - end;
3263
3264 while (true) {
3265
3266
3267
3268
3269 if (base + last_end < vmalloc_start + last_end)
3270 goto overflow;
3271
3272
3273
3274
3275 if (va == NULL)
3276 goto overflow;
3277
3278
3279
3280
3281
3282 if (base + end > va->va_end) {
3283 base = pvm_determine_end_from_reverse(&va, align) - end;
3284 term_area = area;
3285 continue;
3286 }
3287
3288
3289
3290
3291 if (base + start < va->va_start) {
3292 va = node_to_va(rb_prev(&va->rb_node));
3293 base = pvm_determine_end_from_reverse(&va, align) - end;
3294 term_area = area;
3295 continue;
3296 }
3297
3298
3299
3300
3301
3302 area = (area + nr_vms - 1) % nr_vms;
3303 if (area == term_area)
3304 break;
3305
3306 start = offsets[area];
3307 end = start + sizes[area];
3308 va = pvm_find_va_enclose_addr(base + end);
3309 }
3310
3311
3312 for (area = 0; area < nr_vms; area++) {
3313 int ret;
3314
3315 start = base + offsets[area];
3316 size = sizes[area];
3317
3318 va = pvm_find_va_enclose_addr(start);
3319 if (WARN_ON_ONCE(va == NULL))
3320
3321 goto recovery;
3322
3323 type = classify_va_fit_type(va, start, size);
3324 if (WARN_ON_ONCE(type == NOTHING_FIT))
3325
3326 goto recovery;
3327
3328 ret = adjust_va_to_fit_type(va, start, size, type);
3329 if (unlikely(ret))
3330 goto recovery;
3331
3332
3333 va = vas[area];
3334 va->va_start = start;
3335 va->va_end = start + size;
3336 }
3337
3338 spin_unlock(&free_vmap_area_lock);
3339
3340
3341 for (area = 0; area < nr_vms; area++) {
3342 if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
3343 goto err_free_shadow;
3344
3345 kasan_unpoison_vmalloc((void *)vas[area]->va_start,
3346 sizes[area]);
3347 }
3348
3349
3350 spin_lock(&vmap_area_lock);
3351 for (area = 0; area < nr_vms; area++) {
3352 insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
3353
3354 setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
3355 pcpu_get_vm_areas);
3356 }
3357 spin_unlock(&vmap_area_lock);
3358
3359 kfree(vas);
3360 return vms;
3361
3362recovery:
3363
3364
3365
3366
3367
3368
3369 while (area--) {
3370 orig_start = vas[area]->va_start;
3371 orig_end = vas[area]->va_end;
3372 va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
3373 &free_vmap_area_list);
3374 if (va)
3375 kasan_release_vmalloc(orig_start, orig_end,
3376 va->va_start, va->va_end);
3377 vas[area] = NULL;
3378 }
3379
3380overflow:
3381 spin_unlock(&free_vmap_area_lock);
3382 if (!purged) {
3383 purge_vmap_area_lazy();
3384 purged = true;
3385
3386
3387 for (area = 0; area < nr_vms; area++) {
3388 if (vas[area])
3389 continue;
3390
3391 vas[area] = kmem_cache_zalloc(
3392 vmap_area_cachep, GFP_KERNEL);
3393 if (!vas[area])
3394 goto err_free;
3395 }
3396
3397 goto retry;
3398 }
3399
3400err_free:
3401 for (area = 0; area < nr_vms; area++) {
3402 if (vas[area])
3403 kmem_cache_free(vmap_area_cachep, vas[area]);
3404
3405 kfree(vms[area]);
3406 }
3407err_free2:
3408 kfree(vas);
3409 kfree(vms);
3410 return NULL;
3411
3412err_free_shadow:
3413 spin_lock(&free_vmap_area_lock);
3414
3415
3416
3417
3418
3419 for (area = 0; area < nr_vms; area++) {
3420 orig_start = vas[area]->va_start;
3421 orig_end = vas[area]->va_end;
3422 va = merge_or_add_vmap_area_augment(vas[area], &free_vmap_area_root,
3423 &free_vmap_area_list);
3424 if (va)
3425 kasan_release_vmalloc(orig_start, orig_end,
3426 va->va_start, va->va_end);
3427 vas[area] = NULL;
3428 kfree(vms[area]);
3429 }
3430 spin_unlock(&free_vmap_area_lock);
3431 kfree(vas);
3432 kfree(vms);
3433 return NULL;
3434}
3435
3436
3437
3438
3439
3440
3441
3442
3443void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
3444{
3445 int i;
3446
3447 for (i = 0; i < nr_vms; i++)
3448 free_vm_area(vms[i]);
3449 kfree(vms);
3450}
3451#endif
3452
3453bool vmalloc_dump_obj(void *object)
3454{
3455 struct vm_struct *vm;
3456 void *objp = (void *)PAGE_ALIGN((unsigned long)object);
3457
3458 vm = find_vm_area(objp);
3459 if (!vm)
3460 return false;
3461 pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
3462 vm->nr_pages, (unsigned long)vm->addr, vm->caller);
3463 return true;
3464}
3465
3466#ifdef CONFIG_PROC_FS
3467static void *s_start(struct seq_file *m, loff_t *pos)
3468 __acquires(&vmap_purge_lock)
3469 __acquires(&vmap_area_lock)
3470{
3471 mutex_lock(&vmap_purge_lock);
3472 spin_lock(&vmap_area_lock);
3473
3474 return seq_list_start(&vmap_area_list, *pos);
3475}
3476
3477static void *s_next(struct seq_file *m, void *p, loff_t *pos)
3478{
3479 return seq_list_next(p, &vmap_area_list, pos);
3480}
3481
3482static void s_stop(struct seq_file *m, void *p)
3483 __releases(&vmap_area_lock)
3484 __releases(&vmap_purge_lock)
3485{
3486 spin_unlock(&vmap_area_lock);
3487 mutex_unlock(&vmap_purge_lock);
3488}
3489
3490static void show_numa_info(struct seq_file *m, struct vm_struct *v)
3491{
3492 if (IS_ENABLED(CONFIG_NUMA)) {
3493 unsigned int nr, *counters = m->private;
3494
3495 if (!counters)
3496 return;
3497
3498 if (v->flags & VM_UNINITIALIZED)
3499 return;
3500
3501 smp_rmb();
3502
3503 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
3504
3505 for (nr = 0; nr < v->nr_pages; nr++)
3506 counters[page_to_nid(v->pages[nr])]++;
3507
3508 for_each_node_state(nr, N_HIGH_MEMORY)
3509 if (counters[nr])
3510 seq_printf(m, " N%u=%u", nr, counters[nr]);
3511 }
3512}
3513
3514static void show_purge_info(struct seq_file *m)
3515{
3516 struct vmap_area *va;
3517
3518 spin_lock(&purge_vmap_area_lock);
3519 list_for_each_entry(va, &purge_vmap_area_list, list) {
3520 seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
3521 (void *)va->va_start, (void *)va->va_end,
3522 va->va_end - va->va_start);
3523 }
3524 spin_unlock(&purge_vmap_area_lock);
3525}
3526
3527static int s_show(struct seq_file *m, void *p)
3528{
3529 struct vmap_area *va;
3530 struct vm_struct *v;
3531
3532 va = list_entry(p, struct vmap_area, list);
3533
3534
3535
3536
3537
3538 if (!va->vm) {
3539 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
3540 (void *)va->va_start, (void *)va->va_end,
3541 va->va_end - va->va_start);
3542
3543 return 0;
3544 }
3545
3546 v = va->vm;
3547
3548 seq_printf(m, "0x%pK-0x%pK %7ld",
3549 v->addr, v->addr + v->size, v->size);
3550
3551 if (v->caller)
3552 seq_printf(m, " %pS", v->caller);
3553
3554 if (v->nr_pages)
3555 seq_printf(m, " pages=%d", v->nr_pages);
3556
3557 if (v->phys_addr)
3558 seq_printf(m, " phys=%pa", &v->phys_addr);
3559
3560 if (v->flags & VM_IOREMAP)
3561 seq_puts(m, " ioremap");
3562
3563 if (v->flags & VM_ALLOC)
3564 seq_puts(m, " vmalloc");
3565
3566 if (v->flags & VM_MAP)
3567 seq_puts(m, " vmap");
3568
3569 if (v->flags & VM_USERMAP)
3570 seq_puts(m, " user");
3571
3572 if (v->flags & VM_DMA_COHERENT)
3573 seq_puts(m, " dma-coherent");
3574
3575 if (is_vmalloc_addr(v->pages))
3576 seq_puts(m, " vpages");
3577
3578 show_numa_info(m, v);
3579 seq_putc(m, '\n');
3580
3581
3582
3583
3584 if (list_is_last(&va->list, &vmap_area_list))
3585 show_purge_info(m);
3586
3587 return 0;
3588}
3589
3590static const struct seq_operations vmalloc_op = {
3591 .start = s_start,
3592 .next = s_next,
3593 .stop = s_stop,
3594 .show = s_show,
3595};
3596
3597static int __init proc_vmalloc_init(void)
3598{
3599 if (IS_ENABLED(CONFIG_NUMA))
3600 proc_create_seq_private("vmallocinfo", 0400, NULL,
3601 &vmalloc_op,
3602 nr_node_ids * sizeof(unsigned int), NULL);
3603 else
3604 proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
3605 return 0;
3606}
3607module_init(proc_vmalloc_init);
3608
3609#endif
3610