1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <linux/atomic.h>
30#include <linux/compiler.h>
31#include <linux/llist.h>
32
33#include <asm/uaccess.h>
34#include <asm/tlbflush.h>
35#include <asm/shmparam.h>
36
37struct vfree_deferred {
38 struct llist_head list;
39 struct work_struct wq;
40};
41static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
42
43static void __vunmap(const void *, int);
44
45static void free_work(struct work_struct *w)
46{
47 struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
48 struct llist_node *llnode = llist_del_all(&p->list);
49 while (llnode) {
50 void *p = llnode;
51 llnode = llist_next(llnode);
52 __vunmap(p, 1);
53 }
54}
55
56
57
58static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
59{
60 pte_t *pte;
61
62 pte = pte_offset_kernel(pmd, addr);
63 do {
64 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
65 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
66 } while (pte++, addr += PAGE_SIZE, addr != end);
67}
68
69static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
70{
71 pmd_t *pmd;
72 unsigned long next;
73
74 pmd = pmd_offset(pud, addr);
75 do {
76 next = pmd_addr_end(addr, end);
77 if (pmd_none_or_clear_bad(pmd))
78 continue;
79 vunmap_pte_range(pmd, addr, next);
80 } while (pmd++, addr = next, addr != end);
81}
82
83static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
84{
85 pud_t *pud;
86 unsigned long next;
87
88 pud = pud_offset(pgd, addr);
89 do {
90 next = pud_addr_end(addr, end);
91 if (pud_none_or_clear_bad(pud))
92 continue;
93 vunmap_pmd_range(pud, addr, next);
94 } while (pud++, addr = next, addr != end);
95}
96
97static void vunmap_page_range(unsigned long addr, unsigned long end)
98{
99 pgd_t *pgd;
100 unsigned long next;
101
102 BUG_ON(addr >= end);
103 pgd = pgd_offset_k(addr);
104 do {
105 next = pgd_addr_end(addr, end);
106 if (pgd_none_or_clear_bad(pgd))
107 continue;
108 vunmap_pud_range(pgd, addr, next);
109 } while (pgd++, addr = next, addr != end);
110}
111
112static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
113 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
114{
115 pte_t *pte;
116
117
118
119
120
121
122 pte = pte_alloc_kernel(pmd, addr);
123 if (!pte)
124 return -ENOMEM;
125 do {
126 struct page *page = pages[*nr];
127
128 if (WARN_ON(!pte_none(*pte)))
129 return -EBUSY;
130 if (WARN_ON(!page))
131 return -ENOMEM;
132 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
133 (*nr)++;
134 } while (pte++, addr += PAGE_SIZE, addr != end);
135 return 0;
136}
137
138static int vmap_pmd_range(pud_t *pud, unsigned long addr,
139 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
140{
141 pmd_t *pmd;
142 unsigned long next;
143
144 pmd = pmd_alloc(&init_mm, pud, addr);
145 if (!pmd)
146 return -ENOMEM;
147 do {
148 next = pmd_addr_end(addr, end);
149 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
150 return -ENOMEM;
151 } while (pmd++, addr = next, addr != end);
152 return 0;
153}
154
155static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
156 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
157{
158 pud_t *pud;
159 unsigned long next;
160
161 pud = pud_alloc(&init_mm, pgd, addr);
162 if (!pud)
163 return -ENOMEM;
164 do {
165 next = pud_addr_end(addr, end);
166 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
167 return -ENOMEM;
168 } while (pud++, addr = next, addr != end);
169 return 0;
170}
171
172
173
174
175
176
177
178static int vmap_page_range_noflush(unsigned long start, unsigned long end,
179 pgprot_t prot, struct page **pages)
180{
181 pgd_t *pgd;
182 unsigned long next;
183 unsigned long addr = start;
184 int err = 0;
185 int nr = 0;
186
187 BUG_ON(addr >= end);
188 pgd = pgd_offset_k(addr);
189 do {
190 next = pgd_addr_end(addr, end);
191 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
192 if (err)
193 return err;
194 } while (pgd++, addr = next, addr != end);
195
196 return nr;
197}
198
199static int vmap_page_range(unsigned long start, unsigned long end,
200 pgprot_t prot, struct page **pages)
201{
202 int ret;
203
204 ret = vmap_page_range_noflush(start, end, prot, pages);
205 flush_cache_vmap(start, end);
206 return ret;
207}
208
209int is_vmalloc_or_module_addr(const void *x)
210{
211
212
213
214
215
216#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
217 unsigned long addr = (unsigned long)x;
218 if (addr >= MODULES_VADDR && addr < MODULES_END)
219 return 1;
220#endif
221 return is_vmalloc_addr(x);
222}
223
224
225
226
227struct page *vmalloc_to_page(const void *vmalloc_addr)
228{
229 unsigned long addr = (unsigned long) vmalloc_addr;
230 struct page *page = NULL;
231 pgd_t *pgd = pgd_offset_k(addr);
232
233
234
235
236
237 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
238
239 if (!pgd_none(*pgd)) {
240 pud_t *pud = pud_offset(pgd, addr);
241 if (!pud_none(*pud)) {
242 pmd_t *pmd = pmd_offset(pud, addr);
243 if (!pmd_none(*pmd)) {
244 pte_t *ptep, pte;
245
246 ptep = pte_offset_map(pmd, addr);
247 pte = *ptep;
248 if (pte_present(pte))
249 page = pte_page(pte);
250 pte_unmap(ptep);
251 }
252 }
253 }
254 return page;
255}
256EXPORT_SYMBOL(vmalloc_to_page);
257
258
259
260
261unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
262{
263 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
264}
265EXPORT_SYMBOL(vmalloc_to_pfn);
266
267
268
269
270#define VM_LAZY_FREE 0x01
271#define VM_LAZY_FREEING 0x02
272#define VM_VM_AREA 0x04
273
274static DEFINE_SPINLOCK(vmap_area_lock);
275
276LIST_HEAD(vmap_area_list);
277static struct rb_root vmap_area_root = RB_ROOT;
278
279
280static struct rb_node *free_vmap_cache;
281static unsigned long cached_hole_size;
282static unsigned long cached_vstart;
283static unsigned long cached_align;
284
285static unsigned long vmap_area_pcpu_hole;
286
287static struct vmap_area *__find_vmap_area(unsigned long addr)
288{
289 struct rb_node *n = vmap_area_root.rb_node;
290
291 while (n) {
292 struct vmap_area *va;
293
294 va = rb_entry(n, struct vmap_area, rb_node);
295 if (addr < va->va_start)
296 n = n->rb_left;
297 else if (addr >= va->va_end)
298 n = n->rb_right;
299 else
300 return va;
301 }
302
303 return NULL;
304}
305
306static void __insert_vmap_area(struct vmap_area *va)
307{
308 struct rb_node **p = &vmap_area_root.rb_node;
309 struct rb_node *parent = NULL;
310 struct rb_node *tmp;
311
312 while (*p) {
313 struct vmap_area *tmp_va;
314
315 parent = *p;
316 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
317 if (va->va_start < tmp_va->va_end)
318 p = &(*p)->rb_left;
319 else if (va->va_end > tmp_va->va_start)
320 p = &(*p)->rb_right;
321 else
322 BUG();
323 }
324
325 rb_link_node(&va->rb_node, parent, p);
326 rb_insert_color(&va->rb_node, &vmap_area_root);
327
328
329 tmp = rb_prev(&va->rb_node);
330 if (tmp) {
331 struct vmap_area *prev;
332 prev = rb_entry(tmp, struct vmap_area, rb_node);
333 list_add_rcu(&va->list, &prev->list);
334 } else
335 list_add_rcu(&va->list, &vmap_area_list);
336}
337
338static void purge_vmap_area_lazy(void);
339
340
341
342
343
344static struct vmap_area *alloc_vmap_area(unsigned long size,
345 unsigned long align,
346 unsigned long vstart, unsigned long vend,
347 int node, gfp_t gfp_mask)
348{
349 struct vmap_area *va;
350 struct rb_node *n;
351 unsigned long addr;
352 int purged = 0;
353 struct vmap_area *first;
354
355 BUG_ON(!size);
356 BUG_ON(size & ~PAGE_MASK);
357 BUG_ON(!is_power_of_2(align));
358
359 va = kmalloc_node(sizeof(struct vmap_area),
360 gfp_mask & GFP_RECLAIM_MASK, node);
361 if (unlikely(!va))
362 return ERR_PTR(-ENOMEM);
363
364
365
366
367
368 kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK);
369
370retry:
371 spin_lock(&vmap_area_lock);
372
373
374
375
376
377
378
379
380
381 if (!free_vmap_cache ||
382 size < cached_hole_size ||
383 vstart < cached_vstart ||
384 align < cached_align) {
385nocache:
386 cached_hole_size = 0;
387 free_vmap_cache = NULL;
388 }
389
390 cached_vstart = vstart;
391 cached_align = align;
392
393
394 if (free_vmap_cache) {
395 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
396 addr = ALIGN(first->va_end, align);
397 if (addr < vstart)
398 goto nocache;
399 if (addr + size < addr)
400 goto overflow;
401
402 } else {
403 addr = ALIGN(vstart, align);
404 if (addr + size < addr)
405 goto overflow;
406
407 n = vmap_area_root.rb_node;
408 first = NULL;
409
410 while (n) {
411 struct vmap_area *tmp;
412 tmp = rb_entry(n, struct vmap_area, rb_node);
413 if (tmp->va_end >= addr) {
414 first = tmp;
415 if (tmp->va_start <= addr)
416 break;
417 n = n->rb_left;
418 } else
419 n = n->rb_right;
420 }
421
422 if (!first)
423 goto found;
424 }
425
426
427 while (addr + size > first->va_start && addr + size <= vend) {
428 if (addr + cached_hole_size < first->va_start)
429 cached_hole_size = first->va_start - addr;
430 addr = ALIGN(first->va_end, align);
431 if (addr + size < addr)
432 goto overflow;
433
434 if (list_is_last(&first->list, &vmap_area_list))
435 goto found;
436
437 first = list_entry(first->list.next,
438 struct vmap_area, list);
439 }
440
441found:
442 if (addr + size > vend)
443 goto overflow;
444
445 va->va_start = addr;
446 va->va_end = addr + size;
447 va->flags = 0;
448 __insert_vmap_area(va);
449 free_vmap_cache = &va->rb_node;
450 spin_unlock(&vmap_area_lock);
451
452 BUG_ON(va->va_start & (align-1));
453 BUG_ON(va->va_start < vstart);
454 BUG_ON(va->va_end > vend);
455
456 return va;
457
458overflow:
459 spin_unlock(&vmap_area_lock);
460 if (!purged) {
461 purge_vmap_area_lazy();
462 purged = 1;
463 goto retry;
464 }
465 if (printk_ratelimit())
466 printk(KERN_WARNING
467 "vmap allocation for size %lu failed: "
468 "use vmalloc=<size> to increase size.\n", size);
469 kfree(va);
470 return ERR_PTR(-EBUSY);
471}
472
473static void __free_vmap_area(struct vmap_area *va)
474{
475 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
476
477 if (free_vmap_cache) {
478 if (va->va_end < cached_vstart) {
479 free_vmap_cache = NULL;
480 } else {
481 struct vmap_area *cache;
482 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
483 if (va->va_start <= cache->va_start) {
484 free_vmap_cache = rb_prev(&va->rb_node);
485
486
487
488
489 }
490 }
491 }
492 rb_erase(&va->rb_node, &vmap_area_root);
493 RB_CLEAR_NODE(&va->rb_node);
494 list_del_rcu(&va->list);
495
496
497
498
499
500
501
502 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
503 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
504
505 kfree_rcu(va, rcu_head);
506}
507
508
509
510
511static void free_vmap_area(struct vmap_area *va)
512{
513 spin_lock(&vmap_area_lock);
514 __free_vmap_area(va);
515 spin_unlock(&vmap_area_lock);
516}
517
518
519
520
521static void unmap_vmap_area(struct vmap_area *va)
522{
523 vunmap_page_range(va->va_start, va->va_end);
524}
525
526static void vmap_debug_free_range(unsigned long start, unsigned long end)
527{
528
529
530
531
532
533
534
535
536
537
538
539
540
541#ifdef CONFIG_DEBUG_PAGEALLOC
542 vunmap_page_range(start, end);
543 flush_tlb_kernel_range(start, end);
544#endif
545}
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563static unsigned long lazy_max_pages(void)
564{
565 unsigned int log;
566
567 log = fls(num_online_cpus());
568
569 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
570}
571
572static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
573
574
575static void purge_fragmented_blocks_allcpus(void);
576
577
578
579
580
581void set_iounmap_nonlazy(void)
582{
583 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
584}
585
586
587
588
589
590
591
592
593
594
595
596static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
597 int sync, int force_flush)
598{
599 static DEFINE_SPINLOCK(purge_lock);
600 LIST_HEAD(valist);
601 struct vmap_area *va;
602 struct vmap_area *n_va;
603 int nr = 0;
604
605
606
607
608
609
610 if (!sync && !force_flush) {
611 if (!spin_trylock(&purge_lock))
612 return;
613 } else
614 spin_lock(&purge_lock);
615
616 if (sync)
617 purge_fragmented_blocks_allcpus();
618
619 rcu_read_lock();
620 list_for_each_entry_rcu(va, &vmap_area_list, list) {
621 if (va->flags & VM_LAZY_FREE) {
622 if (va->va_start < *start)
623 *start = va->va_start;
624 if (va->va_end > *end)
625 *end = va->va_end;
626 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
627 list_add_tail(&va->purge_list, &valist);
628 va->flags |= VM_LAZY_FREEING;
629 va->flags &= ~VM_LAZY_FREE;
630 }
631 }
632 rcu_read_unlock();
633
634 if (nr)
635 atomic_sub(nr, &vmap_lazy_nr);
636
637 if (nr || force_flush)
638 flush_tlb_kernel_range(*start, *end);
639
640 if (nr) {
641 spin_lock(&vmap_area_lock);
642 list_for_each_entry_safe(va, n_va, &valist, purge_list)
643 __free_vmap_area(va);
644 spin_unlock(&vmap_area_lock);
645 }
646 spin_unlock(&purge_lock);
647}
648
649
650
651
652
653static void try_purge_vmap_area_lazy(void)
654{
655 unsigned long start = ULONG_MAX, end = 0;
656
657 __purge_vmap_area_lazy(&start, &end, 0, 0);
658}
659
660
661
662
663static void purge_vmap_area_lazy(void)
664{
665 unsigned long start = ULONG_MAX, end = 0;
666
667 __purge_vmap_area_lazy(&start, &end, 1, 0);
668}
669
670
671
672
673
674
675static void free_vmap_area_noflush(struct vmap_area *va)
676{
677 va->flags |= VM_LAZY_FREE;
678 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
679 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
680 try_purge_vmap_area_lazy();
681}
682
683
684
685
686
687static void free_unmap_vmap_area_noflush(struct vmap_area *va)
688{
689 unmap_vmap_area(va);
690 free_vmap_area_noflush(va);
691}
692
693
694
695
696static void free_unmap_vmap_area(struct vmap_area *va)
697{
698 flush_cache_vunmap(va->va_start, va->va_end);
699 free_unmap_vmap_area_noflush(va);
700}
701
702static struct vmap_area *find_vmap_area(unsigned long addr)
703{
704 struct vmap_area *va;
705
706 spin_lock(&vmap_area_lock);
707 va = __find_vmap_area(addr);
708 spin_unlock(&vmap_area_lock);
709
710 return va;
711}
712
713static void free_unmap_vmap_area_addr(unsigned long addr)
714{
715 struct vmap_area *va;
716
717 va = find_vmap_area(addr);
718 BUG_ON(!va);
719 free_unmap_vmap_area(va);
720}
721
722
723
724
725
726
727
728
729
730
731
732
733
734#if BITS_PER_LONG == 32
735#define VMALLOC_SPACE (128UL*1024*1024)
736#else
737#define VMALLOC_SPACE (128UL*1024*1024*1024)
738#endif
739
740#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
741#define VMAP_MAX_ALLOC BITS_PER_LONG
742#define VMAP_BBMAP_BITS_MAX 1024
743#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
744#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
745#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
746#define VMAP_BBMAP_BITS \
747 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
748 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
749 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
750
751#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
752
753static bool vmap_initialized __read_mostly = false;
754
755struct vmap_block_queue {
756 spinlock_t lock;
757 struct list_head free;
758};
759
760struct vmap_block {
761 spinlock_t lock;
762 struct vmap_area *va;
763 unsigned long free, dirty;
764 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
765 struct list_head free_list;
766 struct rcu_head rcu_head;
767 struct list_head purge;
768};
769
770
771static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
772
773
774
775
776
777
778static DEFINE_SPINLOCK(vmap_block_tree_lock);
779static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
780
781
782
783
784
785
786
787
788static unsigned long addr_to_vb_idx(unsigned long addr)
789{
790 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
791 addr /= VMAP_BLOCK_SIZE;
792 return addr;
793}
794
795static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
796{
797 struct vmap_block_queue *vbq;
798 struct vmap_block *vb;
799 struct vmap_area *va;
800 unsigned long vb_idx;
801 int node, err;
802
803 node = numa_node_id();
804
805 vb = kmalloc_node(sizeof(struct vmap_block),
806 gfp_mask & GFP_RECLAIM_MASK, node);
807 if (unlikely(!vb))
808 return ERR_PTR(-ENOMEM);
809
810 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
811 VMALLOC_START, VMALLOC_END,
812 node, gfp_mask);
813 if (IS_ERR(va)) {
814 kfree(vb);
815 return ERR_CAST(va);
816 }
817
818 err = radix_tree_preload(gfp_mask);
819 if (unlikely(err)) {
820 kfree(vb);
821 free_vmap_area(va);
822 return ERR_PTR(err);
823 }
824
825 spin_lock_init(&vb->lock);
826 vb->va = va;
827 vb->free = VMAP_BBMAP_BITS;
828 vb->dirty = 0;
829 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
830 INIT_LIST_HEAD(&vb->free_list);
831
832 vb_idx = addr_to_vb_idx(va->va_start);
833 spin_lock(&vmap_block_tree_lock);
834 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
835 spin_unlock(&vmap_block_tree_lock);
836 BUG_ON(err);
837 radix_tree_preload_end();
838
839 vbq = &get_cpu_var(vmap_block_queue);
840 spin_lock(&vbq->lock);
841 list_add_rcu(&vb->free_list, &vbq->free);
842 spin_unlock(&vbq->lock);
843 put_cpu_var(vmap_block_queue);
844
845 return vb;
846}
847
848static void free_vmap_block(struct vmap_block *vb)
849{
850 struct vmap_block *tmp;
851 unsigned long vb_idx;
852
853 vb_idx = addr_to_vb_idx(vb->va->va_start);
854 spin_lock(&vmap_block_tree_lock);
855 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
856 spin_unlock(&vmap_block_tree_lock);
857 BUG_ON(tmp != vb);
858
859 free_vmap_area_noflush(vb->va);
860 kfree_rcu(vb, rcu_head);
861}
862
863static void purge_fragmented_blocks(int cpu)
864{
865 LIST_HEAD(purge);
866 struct vmap_block *vb;
867 struct vmap_block *n_vb;
868 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
869
870 rcu_read_lock();
871 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
872
873 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
874 continue;
875
876 spin_lock(&vb->lock);
877 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
878 vb->free = 0;
879 vb->dirty = VMAP_BBMAP_BITS;
880 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
881 spin_lock(&vbq->lock);
882 list_del_rcu(&vb->free_list);
883 spin_unlock(&vbq->lock);
884 spin_unlock(&vb->lock);
885 list_add_tail(&vb->purge, &purge);
886 } else
887 spin_unlock(&vb->lock);
888 }
889 rcu_read_unlock();
890
891 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
892 list_del(&vb->purge);
893 free_vmap_block(vb);
894 }
895}
896
897static void purge_fragmented_blocks_allcpus(void)
898{
899 int cpu;
900
901 for_each_possible_cpu(cpu)
902 purge_fragmented_blocks(cpu);
903}
904
905static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
906{
907 struct vmap_block_queue *vbq;
908 struct vmap_block *vb;
909 unsigned long addr = 0;
910 unsigned int order;
911
912 BUG_ON(size & ~PAGE_MASK);
913 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
914 if (WARN_ON(size == 0)) {
915
916
917
918
919
920 return NULL;
921 }
922 order = get_order(size);
923
924again:
925 rcu_read_lock();
926 vbq = &get_cpu_var(vmap_block_queue);
927 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
928 int i;
929
930 spin_lock(&vb->lock);
931 if (vb->free < 1UL << order)
932 goto next;
933
934 i = VMAP_BBMAP_BITS - vb->free;
935 addr = vb->va->va_start + (i << PAGE_SHIFT);
936 BUG_ON(addr_to_vb_idx(addr) !=
937 addr_to_vb_idx(vb->va->va_start));
938 vb->free -= 1UL << order;
939 if (vb->free == 0) {
940 spin_lock(&vbq->lock);
941 list_del_rcu(&vb->free_list);
942 spin_unlock(&vbq->lock);
943 }
944 spin_unlock(&vb->lock);
945 break;
946next:
947 spin_unlock(&vb->lock);
948 }
949
950 put_cpu_var(vmap_block_queue);
951 rcu_read_unlock();
952
953 if (!addr) {
954 vb = new_vmap_block(gfp_mask);
955 if (IS_ERR(vb))
956 return vb;
957 goto again;
958 }
959
960 return (void *)addr;
961}
962
963static void vb_free(const void *addr, unsigned long size)
964{
965 unsigned long offset;
966 unsigned long vb_idx;
967 unsigned int order;
968 struct vmap_block *vb;
969
970 BUG_ON(size & ~PAGE_MASK);
971 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
972
973 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
974
975 order = get_order(size);
976
977 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
978
979 vb_idx = addr_to_vb_idx((unsigned long)addr);
980 rcu_read_lock();
981 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
982 rcu_read_unlock();
983 BUG_ON(!vb);
984
985 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
986
987 spin_lock(&vb->lock);
988 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
989
990 vb->dirty += 1UL << order;
991 if (vb->dirty == VMAP_BBMAP_BITS) {
992 BUG_ON(vb->free);
993 spin_unlock(&vb->lock);
994 free_vmap_block(vb);
995 } else
996 spin_unlock(&vb->lock);
997}
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012void vm_unmap_aliases(void)
1013{
1014 unsigned long start = ULONG_MAX, end = 0;
1015 int cpu;
1016 int flush = 0;
1017
1018 if (unlikely(!vmap_initialized))
1019 return;
1020
1021 for_each_possible_cpu(cpu) {
1022 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1023 struct vmap_block *vb;
1024
1025 rcu_read_lock();
1026 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1027 int i, j;
1028
1029 spin_lock(&vb->lock);
1030 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1031 if (i < VMAP_BBMAP_BITS) {
1032 unsigned long s, e;
1033
1034 j = find_last_bit(vb->dirty_map,
1035 VMAP_BBMAP_BITS);
1036 j = j + 1;
1037
1038 s = vb->va->va_start + (i << PAGE_SHIFT);
1039 e = vb->va->va_start + (j << PAGE_SHIFT);
1040 flush = 1;
1041
1042 if (s < start)
1043 start = s;
1044 if (e > end)
1045 end = e;
1046 }
1047 spin_unlock(&vb->lock);
1048 }
1049 rcu_read_unlock();
1050 }
1051
1052 __purge_vmap_area_lazy(&start, &end, 1, flush);
1053}
1054EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1055
1056
1057
1058
1059
1060
1061void vm_unmap_ram(const void *mem, unsigned int count)
1062{
1063 unsigned long size = count << PAGE_SHIFT;
1064 unsigned long addr = (unsigned long)mem;
1065
1066 BUG_ON(!addr);
1067 BUG_ON(addr < VMALLOC_START);
1068 BUG_ON(addr > VMALLOC_END);
1069 BUG_ON(addr & (PAGE_SIZE-1));
1070
1071 debug_check_no_locks_freed(mem, size);
1072 vmap_debug_free_range(addr, addr+size);
1073
1074 if (likely(count <= VMAP_MAX_ALLOC))
1075 vb_free(mem, size);
1076 else
1077 free_unmap_vmap_area_addr(addr);
1078}
1079EXPORT_SYMBOL(vm_unmap_ram);
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1097{
1098 unsigned long size = count << PAGE_SHIFT;
1099 unsigned long addr;
1100 void *mem;
1101
1102 if (likely(count <= VMAP_MAX_ALLOC)) {
1103 mem = vb_alloc(size, GFP_KERNEL);
1104 if (IS_ERR(mem))
1105 return NULL;
1106 addr = (unsigned long)mem;
1107 } else {
1108 struct vmap_area *va;
1109 va = alloc_vmap_area(size, PAGE_SIZE,
1110 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1111 if (IS_ERR(va))
1112 return NULL;
1113
1114 addr = va->va_start;
1115 mem = (void *)addr;
1116 }
1117 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1118 vm_unmap_ram(mem, count);
1119 return NULL;
1120 }
1121 return mem;
1122}
1123EXPORT_SYMBOL(vm_map_ram);
1124
1125static struct vm_struct *vmlist __initdata;
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136void __init vm_area_add_early(struct vm_struct *vm)
1137{
1138 struct vm_struct *tmp, **p;
1139
1140 BUG_ON(vmap_initialized);
1141 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1142 if (tmp->addr >= vm->addr) {
1143 BUG_ON(tmp->addr < vm->addr + vm->size);
1144 break;
1145 } else
1146 BUG_ON(tmp->addr + tmp->size > vm->addr);
1147 }
1148 vm->next = *p;
1149 *p = vm;
1150}
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1165{
1166 static size_t vm_init_off __initdata;
1167 unsigned long addr;
1168
1169 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1170 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1171
1172 vm->addr = (void *)addr;
1173
1174 vm_area_add_early(vm);
1175}
1176
1177void __init vmalloc_init(void)
1178{
1179 struct vmap_area *va;
1180 struct vm_struct *tmp;
1181 int i;
1182
1183 for_each_possible_cpu(i) {
1184 struct vmap_block_queue *vbq;
1185 struct vfree_deferred *p;
1186
1187 vbq = &per_cpu(vmap_block_queue, i);
1188 spin_lock_init(&vbq->lock);
1189 INIT_LIST_HEAD(&vbq->free);
1190 p = &per_cpu(vfree_deferred, i);
1191 init_llist_head(&p->list);
1192 INIT_WORK(&p->wq, free_work);
1193 }
1194
1195
1196 for (tmp = vmlist; tmp; tmp = tmp->next) {
1197 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1198 va->flags = VM_VM_AREA;
1199 va->va_start = (unsigned long)tmp->addr;
1200 va->va_end = va->va_start + tmp->size;
1201 va->vm = tmp;
1202 __insert_vmap_area(va);
1203 }
1204
1205 vmap_area_pcpu_hole = VMALLOC_END;
1206
1207 vmap_initialized = true;
1208}
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1230 pgprot_t prot, struct page **pages)
1231{
1232 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1233}
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1250{
1251 vunmap_page_range(addr, addr + size);
1252}
1253EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263void unmap_kernel_range(unsigned long addr, unsigned long size)
1264{
1265 unsigned long end = addr + size;
1266
1267 flush_cache_vunmap(addr, end);
1268 vunmap_page_range(addr, end);
1269 flush_tlb_kernel_range(addr, end);
1270}
1271
1272int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1273{
1274 unsigned long addr = (unsigned long)area->addr;
1275 unsigned long end = addr + get_vm_area_size(area);
1276 int err;
1277
1278 err = vmap_page_range(addr, end, prot, *pages);
1279 if (err > 0) {
1280 *pages += err;
1281 err = 0;
1282 }
1283
1284 return err;
1285}
1286EXPORT_SYMBOL_GPL(map_vm_area);
1287
1288static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1289 unsigned long flags, const void *caller)
1290{
1291 spin_lock(&vmap_area_lock);
1292 vm->flags = flags;
1293 vm->addr = (void *)va->va_start;
1294 vm->size = va->va_end - va->va_start;
1295 vm->caller = caller;
1296 va->vm = vm;
1297 va->flags |= VM_VM_AREA;
1298 spin_unlock(&vmap_area_lock);
1299}
1300
1301static void clear_vm_uninitialized_flag(struct vm_struct *vm)
1302{
1303
1304
1305
1306
1307
1308 smp_wmb();
1309 vm->flags &= ~VM_UNINITIALIZED;
1310}
1311
1312static struct vm_struct *__get_vm_area_node(unsigned long size,
1313 unsigned long align, unsigned long flags, unsigned long start,
1314 unsigned long end, int node, gfp_t gfp_mask, const void *caller)
1315{
1316 struct vmap_area *va;
1317 struct vm_struct *area;
1318
1319 BUG_ON(in_interrupt());
1320 if (flags & VM_IOREMAP)
1321 align = 1ul << clamp(fls(size), PAGE_SHIFT, IOREMAP_MAX_ORDER);
1322
1323 size = PAGE_ALIGN(size);
1324 if (unlikely(!size))
1325 return NULL;
1326
1327 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1328 if (unlikely(!area))
1329 return NULL;
1330
1331
1332
1333
1334 size += PAGE_SIZE;
1335
1336 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1337 if (IS_ERR(va)) {
1338 kfree(area);
1339 return NULL;
1340 }
1341
1342 setup_vmalloc_vm(area, va, flags, caller);
1343
1344 return area;
1345}
1346
1347struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1348 unsigned long start, unsigned long end)
1349{
1350 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1351 GFP_KERNEL, __builtin_return_address(0));
1352}
1353EXPORT_SYMBOL_GPL(__get_vm_area);
1354
1355struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1356 unsigned long start, unsigned long end,
1357 const void *caller)
1358{
1359 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1360 GFP_KERNEL, caller);
1361}
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1373{
1374 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1375 NUMA_NO_NODE, GFP_KERNEL,
1376 __builtin_return_address(0));
1377}
1378
1379struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1380 const void *caller)
1381{
1382 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1383 NUMA_NO_NODE, GFP_KERNEL, caller);
1384}
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394struct vm_struct *find_vm_area(const void *addr)
1395{
1396 struct vmap_area *va;
1397
1398 va = find_vmap_area((unsigned long)addr);
1399 if (va && va->flags & VM_VM_AREA)
1400 return va->vm;
1401
1402 return NULL;
1403}
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413struct vm_struct *remove_vm_area(const void *addr)
1414{
1415 struct vmap_area *va;
1416
1417 va = find_vmap_area((unsigned long)addr);
1418 if (va && va->flags & VM_VM_AREA) {
1419 struct vm_struct *vm = va->vm;
1420
1421 spin_lock(&vmap_area_lock);
1422 va->vm = NULL;
1423 va->flags &= ~VM_VM_AREA;
1424 spin_unlock(&vmap_area_lock);
1425
1426 vmap_debug_free_range(va->va_start, va->va_end);
1427 free_unmap_vmap_area(va);
1428 vm->size -= PAGE_SIZE;
1429
1430 return vm;
1431 }
1432 return NULL;
1433}
1434
1435static void __vunmap(const void *addr, int deallocate_pages)
1436{
1437 struct vm_struct *area;
1438
1439 if (!addr)
1440 return;
1441
1442 if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
1443 addr))
1444 return;
1445
1446 area = remove_vm_area(addr);
1447 if (unlikely(!area)) {
1448 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1449 addr);
1450 return;
1451 }
1452
1453 debug_check_no_locks_freed(addr, area->size);
1454 debug_check_no_obj_freed(addr, area->size);
1455
1456 if (deallocate_pages) {
1457 int i;
1458
1459 for (i = 0; i < area->nr_pages; i++) {
1460 struct page *page = area->pages[i];
1461
1462 BUG_ON(!page);
1463 __free_page(page);
1464 }
1465
1466 if (area->flags & VM_VPAGES)
1467 vfree(area->pages);
1468 else
1469 kfree(area->pages);
1470 }
1471
1472 kfree(area);
1473 return;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490void vfree(const void *addr)
1491{
1492 BUG_ON(in_nmi());
1493
1494 kmemleak_free(addr);
1495
1496 if (!addr)
1497 return;
1498 if (unlikely(in_interrupt())) {
1499 struct vfree_deferred *p = &__get_cpu_var(vfree_deferred);
1500 if (llist_add((struct llist_node *)addr, &p->list))
1501 schedule_work(&p->wq);
1502 } else
1503 __vunmap(addr, 1);
1504}
1505EXPORT_SYMBOL(vfree);
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516void vunmap(const void *addr)
1517{
1518 BUG_ON(in_interrupt());
1519 might_sleep();
1520 if (addr)
1521 __vunmap(addr, 0);
1522}
1523EXPORT_SYMBOL(vunmap);
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535void *vmap(struct page **pages, unsigned int count,
1536 unsigned long flags, pgprot_t prot)
1537{
1538 struct vm_struct *area;
1539
1540 might_sleep();
1541
1542 if (count > totalram_pages)
1543 return NULL;
1544
1545 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1546 __builtin_return_address(0));
1547 if (!area)
1548 return NULL;
1549
1550 if (map_vm_area(area, prot, &pages)) {
1551 vunmap(area->addr);
1552 return NULL;
1553 }
1554
1555 return area->addr;
1556}
1557EXPORT_SYMBOL(vmap);
1558
1559static void *__vmalloc_node(unsigned long size, unsigned long align,
1560 gfp_t gfp_mask, pgprot_t prot,
1561 int node, const void *caller);
1562static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1563 pgprot_t prot, int node)
1564{
1565 const int order = 0;
1566 struct page **pages;
1567 unsigned int nr_pages, array_size, i;
1568 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1569
1570 nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
1571 array_size = (nr_pages * sizeof(struct page *));
1572
1573 area->nr_pages = nr_pages;
1574
1575 if (array_size > PAGE_SIZE) {
1576 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1577 PAGE_KERNEL, node, area->caller);
1578 area->flags |= VM_VPAGES;
1579 } else {
1580 pages = kmalloc_node(array_size, nested_gfp, node);
1581 }
1582 area->pages = pages;
1583 if (!area->pages) {
1584 remove_vm_area(area->addr);
1585 kfree(area);
1586 return NULL;
1587 }
1588
1589 for (i = 0; i < area->nr_pages; i++) {
1590 struct page *page;
1591 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1592
1593 if (node == NUMA_NO_NODE)
1594 page = alloc_page(tmp_mask);
1595 else
1596 page = alloc_pages_node(node, tmp_mask, order);
1597
1598 if (unlikely(!page)) {
1599
1600 area->nr_pages = i;
1601 goto fail;
1602 }
1603 area->pages[i] = page;
1604 }
1605
1606 if (map_vm_area(area, prot, &pages))
1607 goto fail;
1608 return area->addr;
1609
1610fail:
1611 warn_alloc_failed(gfp_mask, order,
1612 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1613 (area->nr_pages*PAGE_SIZE), area->size);
1614 vfree(area->addr);
1615 return NULL;
1616}
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633void *__vmalloc_node_range(unsigned long size, unsigned long align,
1634 unsigned long start, unsigned long end, gfp_t gfp_mask,
1635 pgprot_t prot, int node, const void *caller)
1636{
1637 struct vm_struct *area;
1638 void *addr;
1639 unsigned long real_size = size;
1640
1641 size = PAGE_ALIGN(size);
1642 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1643 goto fail;
1644
1645 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED,
1646 start, end, node, gfp_mask, caller);
1647 if (!area)
1648 goto fail;
1649
1650 addr = __vmalloc_area_node(area, gfp_mask, prot, node);
1651 if (!addr)
1652 return NULL;
1653
1654
1655
1656
1657
1658
1659 clear_vm_uninitialized_flag(area);
1660
1661
1662
1663
1664
1665
1666 kmemleak_alloc(addr, real_size, 2, gfp_mask);
1667
1668 return addr;
1669
1670fail:
1671 warn_alloc_failed(gfp_mask, 0,
1672 "vmalloc: allocation failure: %lu bytes\n",
1673 real_size);
1674 return NULL;
1675}
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690static void *__vmalloc_node(unsigned long size, unsigned long align,
1691 gfp_t gfp_mask, pgprot_t prot,
1692 int node, const void *caller)
1693{
1694 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1695 gfp_mask, prot, node, caller);
1696}
1697
1698void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1699{
1700 return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
1701 __builtin_return_address(0));
1702}
1703EXPORT_SYMBOL(__vmalloc);
1704
1705static inline void *__vmalloc_node_flags(unsigned long size,
1706 int node, gfp_t flags)
1707{
1708 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1709 node, __builtin_return_address(0));
1710}
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721void *vmalloc(unsigned long size)
1722{
1723 return __vmalloc_node_flags(size, NUMA_NO_NODE,
1724 GFP_KERNEL | __GFP_HIGHMEM);
1725}
1726EXPORT_SYMBOL(vmalloc);
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738void *vzalloc(unsigned long size)
1739{
1740 return __vmalloc_node_flags(size, NUMA_NO_NODE,
1741 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1742}
1743EXPORT_SYMBOL(vzalloc);
1744
1745
1746
1747
1748
1749
1750
1751
1752void *vmalloc_user(unsigned long size)
1753{
1754 struct vm_struct *area;
1755 void *ret;
1756
1757 ret = __vmalloc_node(size, SHMLBA,
1758 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1759 PAGE_KERNEL, NUMA_NO_NODE,
1760 __builtin_return_address(0));
1761 if (ret) {
1762 area = find_vm_area(ret);
1763 area->flags |= VM_USERMAP;
1764 }
1765 return ret;
1766}
1767EXPORT_SYMBOL(vmalloc_user);
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780void *vmalloc_node(unsigned long size, int node)
1781{
1782 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1783 node, __builtin_return_address(0));
1784}
1785EXPORT_SYMBOL(vmalloc_node);
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799void *vzalloc_node(unsigned long size, int node)
1800{
1801 return __vmalloc_node_flags(size, node,
1802 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1803}
1804EXPORT_SYMBOL(vzalloc_node);
1805
1806#ifndef PAGE_KERNEL_EXEC
1807# define PAGE_KERNEL_EXEC PAGE_KERNEL
1808#endif
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822void *vmalloc_exec(unsigned long size)
1823{
1824 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1825 NUMA_NO_NODE, __builtin_return_address(0));
1826}
1827
1828#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1829#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1830#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1831#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1832#else
1833#define GFP_VMALLOC32 GFP_KERNEL
1834#endif
1835
1836
1837
1838
1839
1840
1841
1842
1843void *vmalloc_32(unsigned long size)
1844{
1845 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1846 NUMA_NO_NODE, __builtin_return_address(0));
1847}
1848EXPORT_SYMBOL(vmalloc_32);
1849
1850
1851
1852
1853
1854
1855
1856
1857void *vmalloc_32_user(unsigned long size)
1858{
1859 struct vm_struct *area;
1860 void *ret;
1861
1862 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1863 NUMA_NO_NODE, __builtin_return_address(0));
1864 if (ret) {
1865 area = find_vm_area(ret);
1866 area->flags |= VM_USERMAP;
1867 }
1868 return ret;
1869}
1870EXPORT_SYMBOL(vmalloc_32_user);
1871
1872
1873
1874
1875
1876
1877static int aligned_vread(char *buf, char *addr, unsigned long count)
1878{
1879 struct page *p;
1880 int copied = 0;
1881
1882 while (count) {
1883 unsigned long offset, length;
1884
1885 offset = (unsigned long)addr & ~PAGE_MASK;
1886 length = PAGE_SIZE - offset;
1887 if (length > count)
1888 length = count;
1889 p = vmalloc_to_page(addr);
1890
1891
1892
1893
1894
1895
1896
1897 if (p) {
1898
1899
1900
1901
1902 void *map = kmap_atomic(p);
1903 memcpy(buf, map + offset, length);
1904 kunmap_atomic(map);
1905 } else
1906 memset(buf, 0, length);
1907
1908 addr += length;
1909 buf += length;
1910 copied += length;
1911 count -= length;
1912 }
1913 return copied;
1914}
1915
1916static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1917{
1918 struct page *p;
1919 int copied = 0;
1920
1921 while (count) {
1922 unsigned long offset, length;
1923
1924 offset = (unsigned long)addr & ~PAGE_MASK;
1925 length = PAGE_SIZE - offset;
1926 if (length > count)
1927 length = count;
1928 p = vmalloc_to_page(addr);
1929
1930
1931
1932
1933
1934
1935
1936 if (p) {
1937
1938
1939
1940
1941 void *map = kmap_atomic(p);
1942 memcpy(map + offset, buf, length);
1943 kunmap_atomic(map);
1944 }
1945 addr += length;
1946 buf += length;
1947 copied += length;
1948 count -= length;
1949 }
1950 return copied;
1951}
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979long vread(char *buf, char *addr, unsigned long count)
1980{
1981 struct vmap_area *va;
1982 struct vm_struct *vm;
1983 char *vaddr, *buf_start = buf;
1984 unsigned long buflen = count;
1985 unsigned long n;
1986
1987
1988 if ((unsigned long) addr + count < count)
1989 count = -(unsigned long) addr;
1990
1991 spin_lock(&vmap_area_lock);
1992 list_for_each_entry(va, &vmap_area_list, list) {
1993 if (!count)
1994 break;
1995
1996 if (!(va->flags & VM_VM_AREA))
1997 continue;
1998
1999 vm = va->vm;
2000 vaddr = (char *) vm->addr;
2001 if (addr >= vaddr + get_vm_area_size(vm))
2002 continue;
2003 while (addr < vaddr) {
2004 if (count == 0)
2005 goto finished;
2006 *buf = '\0';
2007 buf++;
2008 addr++;
2009 count--;
2010 }
2011 n = vaddr + get_vm_area_size(vm) - addr;
2012 if (n > count)
2013 n = count;
2014 if (!(vm->flags & VM_IOREMAP))
2015 aligned_vread(buf, addr, n);
2016 else
2017 memset(buf, 0, n);
2018 buf += n;
2019 addr += n;
2020 count -= n;
2021 }
2022finished:
2023 spin_unlock(&vmap_area_lock);
2024
2025 if (buf == buf_start)
2026 return 0;
2027
2028 if (buf != buf_start + buflen)
2029 memset(buf, 0, buflen - (buf - buf_start));
2030
2031 return buflen;
2032}
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060long vwrite(char *buf, char *addr, unsigned long count)
2061{
2062 struct vmap_area *va;
2063 struct vm_struct *vm;
2064 char *vaddr;
2065 unsigned long n, buflen;
2066 int copied = 0;
2067
2068
2069 if ((unsigned long) addr + count < count)
2070 count = -(unsigned long) addr;
2071 buflen = count;
2072
2073 spin_lock(&vmap_area_lock);
2074 list_for_each_entry(va, &vmap_area_list, list) {
2075 if (!count)
2076 break;
2077
2078 if (!(va->flags & VM_VM_AREA))
2079 continue;
2080
2081 vm = va->vm;
2082 vaddr = (char *) vm->addr;
2083 if (addr >= vaddr + get_vm_area_size(vm))
2084 continue;
2085 while (addr < vaddr) {
2086 if (count == 0)
2087 goto finished;
2088 buf++;
2089 addr++;
2090 count--;
2091 }
2092 n = vaddr + get_vm_area_size(vm) - addr;
2093 if (n > count)
2094 n = count;
2095 if (!(vm->flags & VM_IOREMAP)) {
2096 aligned_vwrite(buf, addr, n);
2097 copied++;
2098 }
2099 buf += n;
2100 addr += n;
2101 count -= n;
2102 }
2103finished:
2104 spin_unlock(&vmap_area_lock);
2105 if (!copied)
2106 return 0;
2107 return buflen;
2108}
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
2127 void *kaddr, unsigned long size)
2128{
2129 struct vm_struct *area;
2130
2131 size = PAGE_ALIGN(size);
2132
2133 if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
2134 return -EINVAL;
2135
2136 area = find_vm_area(kaddr);
2137 if (!area)
2138 return -EINVAL;
2139
2140 if (!(area->flags & VM_USERMAP))
2141 return -EINVAL;
2142
2143 if (kaddr + size > area->addr + area->size)
2144 return -EINVAL;
2145
2146 do {
2147 struct page *page = vmalloc_to_page(kaddr);
2148 int ret;
2149
2150 ret = vm_insert_page(vma, uaddr, page);
2151 if (ret)
2152 return ret;
2153
2154 uaddr += PAGE_SIZE;
2155 kaddr += PAGE_SIZE;
2156 size -= PAGE_SIZE;
2157 } while (size > 0);
2158
2159 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
2160
2161 return 0;
2162}
2163EXPORT_SYMBOL(remap_vmalloc_range_partial);
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2180 unsigned long pgoff)
2181{
2182 return remap_vmalloc_range_partial(vma, vma->vm_start,
2183 addr + (pgoff << PAGE_SHIFT),
2184 vma->vm_end - vma->vm_start);
2185}
2186EXPORT_SYMBOL(remap_vmalloc_range);
2187
2188
2189
2190
2191
2192void __weak vmalloc_sync_all(void)
2193{
2194}
2195
2196
2197static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2198{
2199 pte_t ***p = data;
2200
2201 if (p) {
2202 *(*p) = pte;
2203 (*p)++;
2204 }
2205 return 0;
2206}
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2223{
2224 struct vm_struct *area;
2225
2226 area = get_vm_area_caller(size, VM_IOREMAP,
2227 __builtin_return_address(0));
2228 if (area == NULL)
2229 return NULL;
2230
2231
2232
2233
2234
2235 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2236 size, f, ptes ? &ptes : NULL)) {
2237 free_vm_area(area);
2238 return NULL;
2239 }
2240
2241 return area;
2242}
2243EXPORT_SYMBOL_GPL(alloc_vm_area);
2244
2245void free_vm_area(struct vm_struct *area)
2246{
2247 struct vm_struct *ret;
2248 ret = remove_vm_area(area->addr);
2249 BUG_ON(ret != area);
2250 kfree(area);
2251}
2252EXPORT_SYMBOL_GPL(free_vm_area);
2253
2254#ifdef CONFIG_SMP
2255static struct vmap_area *node_to_va(struct rb_node *n)
2256{
2257 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2258}
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272static bool pvm_find_next_prev(unsigned long end,
2273 struct vmap_area **pnext,
2274 struct vmap_area **pprev)
2275{
2276 struct rb_node *n = vmap_area_root.rb_node;
2277 struct vmap_area *va = NULL;
2278
2279 while (n) {
2280 va = rb_entry(n, struct vmap_area, rb_node);
2281 if (end < va->va_end)
2282 n = n->rb_left;
2283 else if (end > va->va_end)
2284 n = n->rb_right;
2285 else
2286 break;
2287 }
2288
2289 if (!va)
2290 return false;
2291
2292 if (va->va_end > end) {
2293 *pnext = va;
2294 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2295 } else {
2296 *pprev = va;
2297 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2298 }
2299 return true;
2300}
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318static unsigned long pvm_determine_end(struct vmap_area **pnext,
2319 struct vmap_area **pprev,
2320 unsigned long align)
2321{
2322 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2323 unsigned long addr;
2324
2325 if (*pnext)
2326 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2327 else
2328 addr = vmalloc_end;
2329
2330 while (*pprev && (*pprev)->va_end > addr) {
2331 *pnext = *pprev;
2332 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2333 }
2334
2335 return addr;
2336}
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2363 const size_t *sizes, int nr_vms,
2364 size_t align)
2365{
2366 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2367 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2368 struct vmap_area **vas, *prev, *next;
2369 struct vm_struct **vms;
2370 int area, area2, last_area, term_area;
2371 unsigned long base, start, end, last_end;
2372 bool purged = false;
2373
2374
2375 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2376 for (last_area = 0, area = 0; area < nr_vms; area++) {
2377 start = offsets[area];
2378 end = start + sizes[area];
2379
2380
2381 BUG_ON(!IS_ALIGNED(offsets[area], align));
2382 BUG_ON(!IS_ALIGNED(sizes[area], align));
2383
2384
2385 if (start > offsets[last_area])
2386 last_area = area;
2387
2388 for (area2 = 0; area2 < nr_vms; area2++) {
2389 unsigned long start2 = offsets[area2];
2390 unsigned long end2 = start2 + sizes[area2];
2391
2392 if (area2 == area)
2393 continue;
2394
2395 BUG_ON(start2 >= start && start2 < end);
2396 BUG_ON(end2 <= end && end2 > start);
2397 }
2398 }
2399 last_end = offsets[last_area] + sizes[last_area];
2400
2401 if (vmalloc_end - vmalloc_start < last_end) {
2402 WARN_ON(true);
2403 return NULL;
2404 }
2405
2406 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2407 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2408 if (!vas || !vms)
2409 goto err_free2;
2410
2411 for (area = 0; area < nr_vms; area++) {
2412 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2413 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2414 if (!vas[area] || !vms[area])
2415 goto err_free;
2416 }
2417retry:
2418 spin_lock(&vmap_area_lock);
2419
2420
2421 area = term_area = last_area;
2422 start = offsets[area];
2423 end = start + sizes[area];
2424
2425 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2426 base = vmalloc_end - last_end;
2427 goto found;
2428 }
2429 base = pvm_determine_end(&next, &prev, align) - end;
2430
2431 while (true) {
2432 BUG_ON(next && next->va_end <= base + end);
2433 BUG_ON(prev && prev->va_end > base + end);
2434
2435
2436
2437
2438
2439 if (base + last_end < vmalloc_start + last_end) {
2440 spin_unlock(&vmap_area_lock);
2441 if (!purged) {
2442 purge_vmap_area_lazy();
2443 purged = true;
2444 goto retry;
2445 }
2446 goto err_free;
2447 }
2448
2449
2450
2451
2452
2453 if (next && next->va_start < base + end) {
2454 base = pvm_determine_end(&next, &prev, align) - end;
2455 term_area = area;
2456 continue;
2457 }
2458
2459
2460
2461
2462
2463
2464 if (prev && prev->va_end > base + start) {
2465 next = prev;
2466 prev = node_to_va(rb_prev(&next->rb_node));
2467 base = pvm_determine_end(&next, &prev, align) - end;
2468 term_area = area;
2469 continue;
2470 }
2471
2472
2473
2474
2475
2476 area = (area + nr_vms - 1) % nr_vms;
2477 if (area == term_area)
2478 break;
2479 start = offsets[area];
2480 end = start + sizes[area];
2481 pvm_find_next_prev(base + end, &next, &prev);
2482 }
2483found:
2484
2485 for (area = 0; area < nr_vms; area++) {
2486 struct vmap_area *va = vas[area];
2487
2488 va->va_start = base + offsets[area];
2489 va->va_end = va->va_start + sizes[area];
2490 __insert_vmap_area(va);
2491 }
2492
2493 vmap_area_pcpu_hole = base + offsets[last_area];
2494
2495 spin_unlock(&vmap_area_lock);
2496
2497
2498 for (area = 0; area < nr_vms; area++)
2499 setup_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2500 pcpu_get_vm_areas);
2501
2502 kfree(vas);
2503 return vms;
2504
2505err_free:
2506 for (area = 0; area < nr_vms; area++) {
2507 kfree(vas[area]);
2508 kfree(vms[area]);
2509 }
2510err_free2:
2511 kfree(vas);
2512 kfree(vms);
2513 return NULL;
2514}
2515
2516
2517
2518
2519
2520
2521
2522
2523void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2524{
2525 int i;
2526
2527 for (i = 0; i < nr_vms; i++)
2528 free_vm_area(vms[i]);
2529 kfree(vms);
2530}
2531#endif
2532
2533#ifdef CONFIG_PROC_FS
2534static void *s_start(struct seq_file *m, loff_t *pos)
2535 __acquires(&vmap_area_lock)
2536{
2537 loff_t n = *pos;
2538 struct vmap_area *va;
2539
2540 spin_lock(&vmap_area_lock);
2541 va = list_entry((&vmap_area_list)->next, typeof(*va), list);
2542 while (n > 0 && &va->list != &vmap_area_list) {
2543 n--;
2544 va = list_entry(va->list.next, typeof(*va), list);
2545 }
2546 if (!n && &va->list != &vmap_area_list)
2547 return va;
2548
2549 return NULL;
2550
2551}
2552
2553static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2554{
2555 struct vmap_area *va = p, *next;
2556
2557 ++*pos;
2558 next = list_entry(va->list.next, typeof(*va), list);
2559 if (&next->list != &vmap_area_list)
2560 return next;
2561
2562 return NULL;
2563}
2564
2565static void s_stop(struct seq_file *m, void *p)
2566 __releases(&vmap_area_lock)
2567{
2568 spin_unlock(&vmap_area_lock);
2569}
2570
2571static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2572{
2573 if (IS_ENABLED(CONFIG_NUMA)) {
2574 unsigned int nr, *counters = m->private;
2575
2576 if (!counters)
2577 return;
2578
2579
2580 smp_rmb();
2581 if (v->flags & VM_UNINITIALIZED)
2582 return;
2583
2584 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2585
2586 for (nr = 0; nr < v->nr_pages; nr++)
2587 counters[page_to_nid(v->pages[nr])]++;
2588
2589 for_each_node_state(nr, N_HIGH_MEMORY)
2590 if (counters[nr])
2591 seq_printf(m, " N%u=%u", nr, counters[nr]);
2592 }
2593}
2594
2595static int s_show(struct seq_file *m, void *p)
2596{
2597 struct vmap_area *va = p;
2598 struct vm_struct *v;
2599
2600
2601
2602
2603
2604 if (!(va->flags & VM_VM_AREA))
2605 return 0;
2606
2607 v = va->vm;
2608
2609 seq_printf(m, "0x%pK-0x%pK %7ld",
2610 v->addr, v->addr + v->size, v->size);
2611
2612 if (v->caller)
2613 seq_printf(m, " %pS", v->caller);
2614
2615 if (v->nr_pages)
2616 seq_printf(m, " pages=%d", v->nr_pages);
2617
2618 if (v->phys_addr)
2619 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2620
2621 if (v->flags & VM_IOREMAP)
2622 seq_printf(m, " ioremap");
2623
2624 if (v->flags & VM_ALLOC)
2625 seq_printf(m, " vmalloc");
2626
2627 if (v->flags & VM_MAP)
2628 seq_printf(m, " vmap");
2629
2630 if (v->flags & VM_USERMAP)
2631 seq_printf(m, " user");
2632
2633 if (v->flags & VM_VPAGES)
2634 seq_printf(m, " vpages");
2635
2636 show_numa_info(m, v);
2637 seq_putc(m, '\n');
2638 return 0;
2639}
2640
2641static const struct seq_operations vmalloc_op = {
2642 .start = s_start,
2643 .next = s_next,
2644 .stop = s_stop,
2645 .show = s_show,
2646};
2647
2648static int vmalloc_open(struct inode *inode, struct file *file)
2649{
2650 unsigned int *ptr = NULL;
2651 int ret;
2652
2653 if (IS_ENABLED(CONFIG_NUMA)) {
2654 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2655 if (ptr == NULL)
2656 return -ENOMEM;
2657 }
2658 ret = seq_open(file, &vmalloc_op);
2659 if (!ret) {
2660 struct seq_file *m = file->private_data;
2661 m->private = ptr;
2662 } else
2663 kfree(ptr);
2664 return ret;
2665}
2666
2667static const struct file_operations proc_vmalloc_operations = {
2668 .open = vmalloc_open,
2669 .read = seq_read,
2670 .llseek = seq_lseek,
2671 .release = seq_release_private,
2672};
2673
2674static int __init proc_vmalloc_init(void)
2675{
2676 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2677 return 0;
2678}
2679module_init(proc_vmalloc_init);
2680
2681void get_vmalloc_info(struct vmalloc_info *vmi)
2682{
2683 struct vmap_area *va;
2684 unsigned long free_area_size;
2685 unsigned long prev_end;
2686
2687 vmi->used = 0;
2688 vmi->largest_chunk = 0;
2689
2690 prev_end = VMALLOC_START;
2691
2692 spin_lock(&vmap_area_lock);
2693
2694 if (list_empty(&vmap_area_list)) {
2695 vmi->largest_chunk = VMALLOC_TOTAL;
2696 goto out;
2697 }
2698
2699 list_for_each_entry(va, &vmap_area_list, list) {
2700 unsigned long addr = va->va_start;
2701
2702
2703
2704
2705 if (addr < VMALLOC_START)
2706 continue;
2707 if (addr >= VMALLOC_END)
2708 break;
2709
2710 if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
2711 continue;
2712
2713 vmi->used += (va->va_end - va->va_start);
2714
2715 free_area_size = addr - prev_end;
2716 if (vmi->largest_chunk < free_area_size)
2717 vmi->largest_chunk = free_area_size;
2718
2719 prev_end = va->va_end;
2720 }
2721
2722 if (VMALLOC_END - prev_end > vmi->largest_chunk)
2723 vmi->largest_chunk = VMALLOC_END - prev_end;
2724
2725out:
2726 spin_unlock(&vmap_area_lock);
2727}
2728#endif
2729
2730