1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <linux/atomic.h>
30#include <linux/llist.h>
31#include <linux/bitops.h>
32#include <asm/uaccess.h>
33#include <asm/tlbflush.h>
34#include <asm/shmparam.h>
35
36struct vfree_deferred {
37 struct llist_head list;
38 struct work_struct wq;
39};
40static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
41
42static void __vunmap(const void *, int);
43
44static void free_work(struct work_struct *w)
45{
46 struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
47 struct llist_node *llnode = llist_del_all(&p->list);
48 while (llnode) {
49 void *p = llnode;
50 llnode = llist_next(llnode);
51 __vunmap(p, 1);
52 }
53}
54
55
56
57static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
58{
59 pte_t *pte;
60
61 pte = pte_offset_kernel(pmd, addr);
62 do {
63 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
64 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
65 } while (pte++, addr += PAGE_SIZE, addr != end);
66}
67
68static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
69{
70 pmd_t *pmd;
71 unsigned long next;
72
73 pmd = pmd_offset(pud, addr);
74 do {
75 next = pmd_addr_end(addr, end);
76 if (pmd_clear_huge(pmd))
77 continue;
78 if (pmd_none_or_clear_bad(pmd))
79 continue;
80 vunmap_pte_range(pmd, addr, next);
81 } while (pmd++, addr = next, addr != end);
82}
83
84static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
85{
86 pud_t *pud;
87 unsigned long next;
88
89 pud = pud_offset(pgd, addr);
90 do {
91 next = pud_addr_end(addr, end);
92 if (pud_clear_huge(pud))
93 continue;
94 if (pud_none_or_clear_bad(pud))
95 continue;
96 vunmap_pmd_range(pud, addr, next);
97 } while (pud++, addr = next, addr != end);
98}
99
100static void vunmap_page_range(unsigned long addr, unsigned long end)
101{
102 pgd_t *pgd;
103 unsigned long next;
104
105 BUG_ON(addr >= end);
106 pgd = pgd_offset_k(addr);
107 do {
108 next = pgd_addr_end(addr, end);
109 if (pgd_none_or_clear_bad(pgd))
110 continue;
111 vunmap_pud_range(pgd, addr, next);
112 } while (pgd++, addr = next, addr != end);
113}
114
115static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
116 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
117{
118 pte_t *pte;
119
120
121
122
123
124
125 pte = pte_alloc_kernel(pmd, addr);
126 if (!pte)
127 return -ENOMEM;
128 do {
129 struct page *page = pages[*nr];
130
131 if (WARN_ON(!pte_none(*pte)))
132 return -EBUSY;
133 if (WARN_ON(!page))
134 return -ENOMEM;
135 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
136 (*nr)++;
137 } while (pte++, addr += PAGE_SIZE, addr != end);
138 return 0;
139}
140
141static int vmap_pmd_range(pud_t *pud, unsigned long addr,
142 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
143{
144 pmd_t *pmd;
145 unsigned long next;
146
147 pmd = pmd_alloc(&init_mm, pud, addr);
148 if (!pmd)
149 return -ENOMEM;
150 do {
151 next = pmd_addr_end(addr, end);
152 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
153 return -ENOMEM;
154 } while (pmd++, addr = next, addr != end);
155 return 0;
156}
157
158static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
159 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
160{
161 pud_t *pud;
162 unsigned long next;
163
164 pud = pud_alloc(&init_mm, pgd, addr);
165 if (!pud)
166 return -ENOMEM;
167 do {
168 next = pud_addr_end(addr, end);
169 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
170 return -ENOMEM;
171 } while (pud++, addr = next, addr != end);
172 return 0;
173}
174
175
176
177
178
179
180
181static int vmap_page_range_noflush(unsigned long start, unsigned long end,
182 pgprot_t prot, struct page **pages)
183{
184 pgd_t *pgd;
185 unsigned long next;
186 unsigned long addr = start;
187 int err = 0;
188 int nr = 0;
189
190 BUG_ON(addr >= end);
191 pgd = pgd_offset_k(addr);
192 do {
193 next = pgd_addr_end(addr, end);
194 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
195 if (err)
196 return err;
197 } while (pgd++, addr = next, addr != end);
198
199 return nr;
200}
201
202static int vmap_page_range(unsigned long start, unsigned long end,
203 pgprot_t prot, struct page **pages)
204{
205 int ret;
206
207 ret = vmap_page_range_noflush(start, end, prot, pages);
208 flush_cache_vmap(start, end);
209 return ret;
210}
211
212int is_vmalloc_or_module_addr(const void *x)
213{
214
215
216
217
218
219#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
220 unsigned long addr = (unsigned long)x;
221 if (addr >= MODULES_VADDR && addr < MODULES_END)
222 return 1;
223#endif
224 return is_vmalloc_addr(x);
225}
226
227
228
229
230struct page *vmalloc_to_page(const void *vmalloc_addr)
231{
232 unsigned long addr = (unsigned long) vmalloc_addr;
233 struct page *page = NULL;
234 pgd_t *pgd = pgd_offset_k(addr);
235
236
237
238
239
240 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
241
242 if (!pgd_none(*pgd)) {
243 pud_t *pud = pud_offset(pgd, addr);
244 if (!pud_none(*pud)) {
245 pmd_t *pmd = pmd_offset(pud, addr);
246 if (!pmd_none(*pmd)) {
247 pte_t *ptep, pte;
248
249 ptep = pte_offset_map(pmd, addr);
250 pte = *ptep;
251 if (pte_present(pte))
252 page = pte_page(pte);
253 pte_unmap(ptep);
254 }
255 }
256 }
257 return page;
258}
259EXPORT_SYMBOL(vmalloc_to_page);
260
261
262
263
264unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
265{
266 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
267}
268EXPORT_SYMBOL(vmalloc_to_pfn);
269
270
271
272
273#define VM_LAZY_FREE 0x01
274#define VM_LAZY_FREEING 0x02
275#define VM_VM_AREA 0x04
276
277static DEFINE_SPINLOCK(vmap_area_lock);
278
279LIST_HEAD(vmap_area_list);
280static struct rb_root vmap_area_root = RB_ROOT;
281
282
283static struct rb_node *free_vmap_cache;
284static unsigned long cached_hole_size;
285static unsigned long cached_vstart;
286static unsigned long cached_align;
287
288static unsigned long vmap_area_pcpu_hole;
289
290static struct vmap_area *__find_vmap_area(unsigned long addr)
291{
292 struct rb_node *n = vmap_area_root.rb_node;
293
294 while (n) {
295 struct vmap_area *va;
296
297 va = rb_entry(n, struct vmap_area, rb_node);
298 if (addr < va->va_start)
299 n = n->rb_left;
300 else if (addr >= va->va_end)
301 n = n->rb_right;
302 else
303 return va;
304 }
305
306 return NULL;
307}
308
309static void __insert_vmap_area(struct vmap_area *va)
310{
311 struct rb_node **p = &vmap_area_root.rb_node;
312 struct rb_node *parent = NULL;
313 struct rb_node *tmp;
314
315 while (*p) {
316 struct vmap_area *tmp_va;
317
318 parent = *p;
319 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
320 if (va->va_start < tmp_va->va_end)
321 p = &(*p)->rb_left;
322 else if (va->va_end > tmp_va->va_start)
323 p = &(*p)->rb_right;
324 else
325 BUG();
326 }
327
328 rb_link_node(&va->rb_node, parent, p);
329 rb_insert_color(&va->rb_node, &vmap_area_root);
330
331
332 tmp = rb_prev(&va->rb_node);
333 if (tmp) {
334 struct vmap_area *prev;
335 prev = rb_entry(tmp, struct vmap_area, rb_node);
336 list_add_rcu(&va->list, &prev->list);
337 } else
338 list_add_rcu(&va->list, &vmap_area_list);
339}
340
341static void purge_vmap_area_lazy(void);
342
343
344
345
346
347static struct vmap_area *alloc_vmap_area(unsigned long size,
348 unsigned long align,
349 unsigned long vstart, unsigned long vend,
350 int node, gfp_t gfp_mask)
351{
352 struct vmap_area *va;
353 struct rb_node *n;
354 unsigned long addr;
355 int purged = 0;
356 struct vmap_area *first;
357
358 BUG_ON(!size);
359 BUG_ON(size & ~PAGE_MASK);
360 BUG_ON(!is_power_of_2(align));
361
362 va = kmalloc_node(sizeof(struct vmap_area),
363 gfp_mask & GFP_RECLAIM_MASK, node);
364 if (unlikely(!va))
365 return ERR_PTR(-ENOMEM);
366
367retry:
368 spin_lock(&vmap_area_lock);
369
370
371
372
373
374
375
376
377
378 if (!free_vmap_cache ||
379 size < cached_hole_size ||
380 vstart < cached_vstart ||
381 align < cached_align) {
382nocache:
383 cached_hole_size = 0;
384 free_vmap_cache = NULL;
385 }
386
387 cached_vstart = vstart;
388 cached_align = align;
389
390
391 if (free_vmap_cache) {
392 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
393 addr = ALIGN(first->va_end, align);
394 if (addr < vstart)
395 goto nocache;
396 if (addr + size < addr)
397 goto overflow;
398
399 } else {
400 addr = ALIGN(vstart, align);
401 if (addr + size < addr)
402 goto overflow;
403
404 n = vmap_area_root.rb_node;
405 first = NULL;
406
407 while (n) {
408 struct vmap_area *tmp;
409 tmp = rb_entry(n, struct vmap_area, rb_node);
410 if (tmp->va_end >= addr) {
411 first = tmp;
412 if (tmp->va_start <= addr)
413 break;
414 n = n->rb_left;
415 } else
416 n = n->rb_right;
417 }
418
419 if (!first)
420 goto found;
421 }
422
423
424 while (addr + size > first->va_start && addr + size <= vend) {
425 if (addr + cached_hole_size < first->va_start)
426 cached_hole_size = first->va_start - addr;
427 addr = ALIGN(first->va_end, align);
428 if (addr + size < addr)
429 goto overflow;
430
431 if (list_is_last(&first->list, &vmap_area_list))
432 goto found;
433
434 first = list_entry(first->list.next,
435 struct vmap_area, list);
436 }
437
438found:
439 if (addr + size > vend)
440 goto overflow;
441
442 va->va_start = addr;
443 va->va_end = addr + size;
444 va->flags = 0;
445 __insert_vmap_area(va);
446 free_vmap_cache = &va->rb_node;
447 spin_unlock(&vmap_area_lock);
448
449 BUG_ON(va->va_start & (align-1));
450 BUG_ON(va->va_start < vstart);
451 BUG_ON(va->va_end > vend);
452
453 return va;
454
455overflow:
456 spin_unlock(&vmap_area_lock);
457 if (!purged) {
458 purge_vmap_area_lazy();
459 purged = 1;
460 goto retry;
461 }
462 if (printk_ratelimit())
463 printk(KERN_WARNING
464 "vmap allocation for size %lu failed: "
465 "use vmalloc=<size> to increase size.\n", size);
466 kfree(va);
467 return ERR_PTR(-EBUSY);
468}
469
470static void __free_vmap_area(struct vmap_area *va)
471{
472 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
473
474 if (free_vmap_cache) {
475 if (va->va_end < cached_vstart) {
476 free_vmap_cache = NULL;
477 } else {
478 struct vmap_area *cache;
479 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
480 if (va->va_start <= cache->va_start) {
481 free_vmap_cache = rb_prev(&va->rb_node);
482
483
484
485
486 }
487 }
488 }
489 rb_erase(&va->rb_node, &vmap_area_root);
490 RB_CLEAR_NODE(&va->rb_node);
491 list_del_rcu(&va->list);
492
493
494
495
496
497
498
499 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
500 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
501
502 kfree_rcu(va, rcu_head);
503}
504
505
506
507
508static void free_vmap_area(struct vmap_area *va)
509{
510 spin_lock(&vmap_area_lock);
511 __free_vmap_area(va);
512 spin_unlock(&vmap_area_lock);
513}
514
515
516
517
518static void unmap_vmap_area(struct vmap_area *va)
519{
520 vunmap_page_range(va->va_start, va->va_end);
521}
522
523static void vmap_debug_free_range(unsigned long start, unsigned long end)
524{
525
526
527
528
529
530
531
532
533
534
535
536
537 if (debug_pagealloc_enabled()) {
538 vunmap_page_range(start, end);
539 flush_tlb_kernel_range(start, end);
540 }
541}
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559static unsigned long lazy_max_pages(void)
560{
561 unsigned int log;
562
563 log = fls(num_online_cpus());
564
565 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
566}
567
568static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
569
570
571static void purge_fragmented_blocks_allcpus(void);
572
573
574
575
576
577void set_iounmap_nonlazy(void)
578{
579 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
580}
581
582
583
584
585
586
587
588
589
590
591
592static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
593 int sync, int force_flush)
594{
595 static DEFINE_SPINLOCK(purge_lock);
596 LIST_HEAD(valist);
597 struct vmap_area *va;
598 struct vmap_area *n_va;
599 int nr = 0;
600
601
602
603
604
605
606 if (!sync && !force_flush) {
607 if (!spin_trylock(&purge_lock))
608 return;
609 } else
610 spin_lock(&purge_lock);
611
612 if (sync)
613 purge_fragmented_blocks_allcpus();
614
615 rcu_read_lock();
616 list_for_each_entry_rcu(va, &vmap_area_list, list) {
617 if (va->flags & VM_LAZY_FREE) {
618 if (va->va_start < *start)
619 *start = va->va_start;
620 if (va->va_end > *end)
621 *end = va->va_end;
622 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
623 list_add_tail(&va->purge_list, &valist);
624 va->flags |= VM_LAZY_FREEING;
625 va->flags &= ~VM_LAZY_FREE;
626 }
627 }
628 rcu_read_unlock();
629
630 if (nr)
631 atomic_sub(nr, &vmap_lazy_nr);
632
633 if (nr || force_flush)
634 flush_tlb_kernel_range(*start, *end);
635
636 if (nr) {
637 spin_lock(&vmap_area_lock);
638 list_for_each_entry_safe(va, n_va, &valist, purge_list)
639 __free_vmap_area(va);
640 spin_unlock(&vmap_area_lock);
641 }
642 spin_unlock(&purge_lock);
643}
644
645
646
647
648
649static void try_purge_vmap_area_lazy(void)
650{
651 unsigned long start = ULONG_MAX, end = 0;
652
653 __purge_vmap_area_lazy(&start, &end, 0, 0);
654}
655
656
657
658
659static void purge_vmap_area_lazy(void)
660{
661 unsigned long start = ULONG_MAX, end = 0;
662
663 __purge_vmap_area_lazy(&start, &end, 1, 0);
664}
665
666
667
668
669
670
671static void free_vmap_area_noflush(struct vmap_area *va)
672{
673 va->flags |= VM_LAZY_FREE;
674 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
675 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
676 try_purge_vmap_area_lazy();
677}
678
679
680
681
682
683static void free_unmap_vmap_area_noflush(struct vmap_area *va)
684{
685 unmap_vmap_area(va);
686 free_vmap_area_noflush(va);
687}
688
689
690
691
692static void free_unmap_vmap_area(struct vmap_area *va)
693{
694 flush_cache_vunmap(va->va_start, va->va_end);
695 free_unmap_vmap_area_noflush(va);
696}
697
698static struct vmap_area *find_vmap_area(unsigned long addr)
699{
700 struct vmap_area *va;
701
702 spin_lock(&vmap_area_lock);
703 va = __find_vmap_area(addr);
704 spin_unlock(&vmap_area_lock);
705
706 return va;
707}
708
709static void free_unmap_vmap_area_addr(unsigned long addr)
710{
711 struct vmap_area *va;
712
713 va = find_vmap_area(addr);
714 BUG_ON(!va);
715 free_unmap_vmap_area(va);
716}
717
718
719
720
721
722
723
724
725
726
727
728
729
730#if BITS_PER_LONG == 32
731#define VMALLOC_SPACE (128UL*1024*1024)
732#else
733#define VMALLOC_SPACE (128UL*1024*1024*1024)
734#endif
735
736#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
737#define VMAP_MAX_ALLOC BITS_PER_LONG
738#define VMAP_BBMAP_BITS_MAX 1024
739#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
740#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
741#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
742#define VMAP_BBMAP_BITS \
743 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
744 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
745 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
746
747#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
748
749static bool vmap_initialized __read_mostly = false;
750
751struct vmap_block_queue {
752 spinlock_t lock;
753 struct list_head free;
754};
755
756struct vmap_block {
757 spinlock_t lock;
758 struct vmap_area *va;
759 struct vmap_block_queue *vbq;
760 unsigned long free, dirty;
761 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
762 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
763 struct list_head free_list;
764 struct rcu_head rcu_head;
765 struct list_head purge;
766};
767
768
769static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
770
771
772
773
774
775
776static DEFINE_SPINLOCK(vmap_block_tree_lock);
777static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
778
779
780
781
782
783
784
785
786static unsigned long addr_to_vb_idx(unsigned long addr)
787{
788 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
789 addr /= VMAP_BLOCK_SIZE;
790 return addr;
791}
792
793static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
794{
795 struct vmap_block_queue *vbq;
796 struct vmap_block *vb;
797 struct vmap_area *va;
798 unsigned long vb_idx;
799 int node, err;
800
801 node = numa_node_id();
802
803 vb = kmalloc_node(sizeof(struct vmap_block),
804 gfp_mask & GFP_RECLAIM_MASK, node);
805 if (unlikely(!vb))
806 return ERR_PTR(-ENOMEM);
807
808 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
809 VMALLOC_START, VMALLOC_END,
810 node, gfp_mask);
811 if (IS_ERR(va)) {
812 kfree(vb);
813 return ERR_CAST(va);
814 }
815
816 err = radix_tree_preload(gfp_mask);
817 if (unlikely(err)) {
818 kfree(vb);
819 free_vmap_area(va);
820 return ERR_PTR(err);
821 }
822
823 spin_lock_init(&vb->lock);
824 vb->va = va;
825 vb->free = VMAP_BBMAP_BITS;
826 vb->dirty = 0;
827 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
828 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
829 INIT_LIST_HEAD(&vb->free_list);
830
831 vb_idx = addr_to_vb_idx(va->va_start);
832 spin_lock(&vmap_block_tree_lock);
833 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
834 spin_unlock(&vmap_block_tree_lock);
835 BUG_ON(err);
836 radix_tree_preload_end();
837
838 vbq = &get_cpu_var(vmap_block_queue);
839 vb->vbq = vbq;
840 spin_lock(&vbq->lock);
841 list_add_rcu(&vb->free_list, &vbq->free);
842 spin_unlock(&vbq->lock);
843 put_cpu_var(vmap_block_queue);
844
845 return vb;
846}
847
848static void free_vmap_block(struct vmap_block *vb)
849{
850 struct vmap_block *tmp;
851 unsigned long vb_idx;
852
853 vb_idx = addr_to_vb_idx(vb->va->va_start);
854 spin_lock(&vmap_block_tree_lock);
855 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
856 spin_unlock(&vmap_block_tree_lock);
857 BUG_ON(tmp != vb);
858
859 free_vmap_area_noflush(vb->va);
860 kfree_rcu(vb, rcu_head);
861}
862
863static void purge_fragmented_blocks(int cpu)
864{
865 LIST_HEAD(purge);
866 struct vmap_block *vb;
867 struct vmap_block *n_vb;
868 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
869
870 rcu_read_lock();
871 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
872
873 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
874 continue;
875
876 spin_lock(&vb->lock);
877 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
878 vb->free = 0;
879 vb->dirty = VMAP_BBMAP_BITS;
880 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
881 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
882 spin_lock(&vbq->lock);
883 list_del_rcu(&vb->free_list);
884 spin_unlock(&vbq->lock);
885 spin_unlock(&vb->lock);
886 list_add_tail(&vb->purge, &purge);
887 } else
888 spin_unlock(&vb->lock);
889 }
890 rcu_read_unlock();
891
892 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
893 list_del(&vb->purge);
894 free_vmap_block(vb);
895 }
896}
897
898static void purge_fragmented_blocks_thiscpu(void)
899{
900 purge_fragmented_blocks(smp_processor_id());
901}
902
903static void purge_fragmented_blocks_allcpus(void)
904{
905 int cpu;
906
907 for_each_possible_cpu(cpu)
908 purge_fragmented_blocks(cpu);
909}
910
911static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
912{
913 struct vmap_block_queue *vbq;
914 struct vmap_block *vb;
915 unsigned long addr = 0;
916 unsigned int order;
917 int purge = 0;
918
919 BUG_ON(size & ~PAGE_MASK);
920 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
921 if (WARN_ON(size == 0)) {
922
923
924
925
926
927 return NULL;
928 }
929 order = get_order(size);
930
931again:
932 rcu_read_lock();
933 vbq = &get_cpu_var(vmap_block_queue);
934 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
935 int i;
936
937 spin_lock(&vb->lock);
938 if (vb->free < 1UL << order)
939 goto next;
940
941 i = bitmap_find_free_region(vb->alloc_map,
942 VMAP_BBMAP_BITS, order);
943
944 if (i < 0) {
945 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
946
947 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
948 purge = 1;
949 }
950 goto next;
951 }
952 addr = vb->va->va_start + (i << PAGE_SHIFT);
953 BUG_ON(addr_to_vb_idx(addr) !=
954 addr_to_vb_idx(vb->va->va_start));
955 vb->free -= 1UL << order;
956 if (vb->free == 0) {
957 spin_lock(&vbq->lock);
958 list_del_rcu(&vb->free_list);
959 spin_unlock(&vbq->lock);
960 }
961 spin_unlock(&vb->lock);
962 break;
963next:
964 spin_unlock(&vb->lock);
965 }
966
967 if (purge)
968 purge_fragmented_blocks_thiscpu();
969
970 put_cpu_var(vmap_block_queue);
971 rcu_read_unlock();
972
973 if (!addr) {
974 vb = new_vmap_block(gfp_mask);
975 if (IS_ERR(vb))
976 return vb;
977 goto again;
978 }
979
980 return (void *)addr;
981}
982
983static void vb_free(const void *addr, unsigned long size)
984{
985 unsigned long offset;
986 unsigned long vb_idx;
987 unsigned int order;
988 struct vmap_block *vb;
989
990 BUG_ON(size & ~PAGE_MASK);
991 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
992
993 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
994
995 order = get_order(size);
996
997 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
998
999 vb_idx = addr_to_vb_idx((unsigned long)addr);
1000 rcu_read_lock();
1001 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
1002 rcu_read_unlock();
1003 BUG_ON(!vb);
1004
1005 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
1006
1007 spin_lock(&vb->lock);
1008 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
1009
1010 vb->dirty += 1UL << order;
1011 if (vb->dirty == VMAP_BBMAP_BITS) {
1012 BUG_ON(vb->free);
1013 spin_unlock(&vb->lock);
1014 free_vmap_block(vb);
1015 } else
1016 spin_unlock(&vb->lock);
1017}
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032void vm_unmap_aliases(void)
1033{
1034 unsigned long start = ULONG_MAX, end = 0;
1035 int cpu;
1036 int flush = 0;
1037
1038 if (unlikely(!vmap_initialized))
1039 return;
1040
1041 for_each_possible_cpu(cpu) {
1042 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1043 struct vmap_block *vb;
1044
1045 rcu_read_lock();
1046 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1047 int i;
1048
1049 spin_lock(&vb->lock);
1050 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1051 while (i < VMAP_BBMAP_BITS) {
1052 unsigned long s, e;
1053 int j;
1054 j = find_next_zero_bit(vb->dirty_map,
1055 VMAP_BBMAP_BITS, i);
1056
1057 s = vb->va->va_start + (i << PAGE_SHIFT);
1058 e = vb->va->va_start + (j << PAGE_SHIFT);
1059 flush = 1;
1060
1061 if (s < start)
1062 start = s;
1063 if (e > end)
1064 end = e;
1065
1066 i = j;
1067 i = find_next_bit(vb->dirty_map,
1068 VMAP_BBMAP_BITS, i);
1069 }
1070 spin_unlock(&vb->lock);
1071 }
1072 rcu_read_unlock();
1073 }
1074
1075 __purge_vmap_area_lazy(&start, &end, 1, flush);
1076}
1077EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1078
1079
1080
1081
1082
1083
1084void vm_unmap_ram(const void *mem, unsigned int count)
1085{
1086 unsigned long size = count << PAGE_SHIFT;
1087 unsigned long addr = (unsigned long)mem;
1088
1089 BUG_ON(!addr);
1090 BUG_ON(addr < VMALLOC_START);
1091 BUG_ON(addr > VMALLOC_END);
1092 BUG_ON(addr & (PAGE_SIZE-1));
1093
1094 debug_check_no_locks_freed(mem, size);
1095 vmap_debug_free_range(addr, addr+size);
1096
1097 if (likely(count <= VMAP_MAX_ALLOC))
1098 vb_free(mem, size);
1099 else
1100 free_unmap_vmap_area_addr(addr);
1101}
1102EXPORT_SYMBOL(vm_unmap_ram);
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1114{
1115 unsigned long size = count << PAGE_SHIFT;
1116 unsigned long addr;
1117 void *mem;
1118
1119 if (likely(count <= VMAP_MAX_ALLOC)) {
1120 mem = vb_alloc(size, GFP_KERNEL);
1121 if (IS_ERR(mem))
1122 return NULL;
1123 addr = (unsigned long)mem;
1124 } else {
1125 struct vmap_area *va;
1126 va = alloc_vmap_area(size, PAGE_SIZE,
1127 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1128 if (IS_ERR(va))
1129 return NULL;
1130
1131 addr = va->va_start;
1132 mem = (void *)addr;
1133 }
1134 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1135 vm_unmap_ram(mem, count);
1136 return NULL;
1137 }
1138 return mem;
1139}
1140EXPORT_SYMBOL(vm_map_ram);
1141
1142static struct vm_struct *vmlist __initdata;
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153void __init vm_area_add_early(struct vm_struct *vm)
1154{
1155 struct vm_struct *tmp, **p;
1156
1157 BUG_ON(vmap_initialized);
1158 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1159 if (tmp->addr >= vm->addr) {
1160 BUG_ON(tmp->addr < vm->addr + vm->size);
1161 break;
1162 } else
1163 BUG_ON(tmp->addr + tmp->size > vm->addr);
1164 }
1165 vm->next = *p;
1166 *p = vm;
1167}
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1182{
1183 static size_t vm_init_off __initdata;
1184 unsigned long addr;
1185
1186 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1187 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1188
1189 vm->addr = (void *)addr;
1190
1191 vm_area_add_early(vm);
1192}
1193
1194void __init vmalloc_init(void)
1195{
1196 struct vmap_area *va;
1197 struct vm_struct *tmp;
1198 int i;
1199
1200 for_each_possible_cpu(i) {
1201 struct vmap_block_queue *vbq;
1202 struct vfree_deferred *p;
1203
1204 vbq = &per_cpu(vmap_block_queue, i);
1205 spin_lock_init(&vbq->lock);
1206 INIT_LIST_HEAD(&vbq->free);
1207 p = &per_cpu(vfree_deferred, i);
1208 init_llist_head(&p->list);
1209 INIT_WORK(&p->wq, free_work);
1210 }
1211
1212
1213 for (tmp = vmlist; tmp; tmp = tmp->next) {
1214 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1215 va->flags = VM_VM_AREA;
1216 va->va_start = (unsigned long)tmp->addr;
1217 va->va_end = va->va_start + tmp->size;
1218 va->vm = tmp;
1219 __insert_vmap_area(va);
1220 }
1221
1222 vmap_area_pcpu_hole = VMALLOC_END;
1223
1224 vmap_initialized = true;
1225}
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1247 pgprot_t prot, struct page **pages)
1248{
1249 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1250}
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1267{
1268 vunmap_page_range(addr, addr + size);
1269}
1270EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280void unmap_kernel_range(unsigned long addr, unsigned long size)
1281{
1282 unsigned long end = addr + size;
1283
1284 flush_cache_vunmap(addr, end);
1285 vunmap_page_range(addr, end);
1286 flush_tlb_kernel_range(addr, end);
1287}
1288
1289int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1290{
1291 unsigned long addr = (unsigned long)area->addr;
1292 unsigned long end = addr + area->size - PAGE_SIZE;
1293 int err;
1294
1295 err = vmap_page_range(addr, end, prot, *pages);
1296 if (err > 0) {
1297 *pages += err;
1298 err = 0;
1299 }
1300
1301 return err;
1302}
1303EXPORT_SYMBOL_GPL(map_vm_area);
1304
1305static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1306 unsigned long flags, const void *caller)
1307{
1308 spin_lock(&vmap_area_lock);
1309 vm->flags = flags;
1310 vm->addr = (void *)va->va_start;
1311 vm->size = va->va_end - va->va_start;
1312 vm->caller = caller;
1313 va->vm = vm;
1314 va->flags |= VM_VM_AREA;
1315 spin_unlock(&vmap_area_lock);
1316}
1317
1318static void clear_vm_unlist(struct vm_struct *vm)
1319{
1320
1321
1322
1323
1324
1325 smp_wmb();
1326 vm->flags &= ~VM_UNLIST;
1327}
1328
1329static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1330 unsigned long flags, const void *caller)
1331{
1332 setup_vmalloc_vm(vm, va, flags, caller);
1333 clear_vm_unlist(vm);
1334}
1335
1336static struct vm_struct *__get_vm_area_node(unsigned long size,
1337 unsigned long align, unsigned long flags, unsigned long start,
1338 unsigned long end, int node, gfp_t gfp_mask, const void *caller)
1339{
1340 struct vmap_area *va;
1341 struct vm_struct *area;
1342
1343 BUG_ON(in_interrupt());
1344 if (flags & VM_IOREMAP) {
1345 unsigned int bit = fls_long(size);
1346
1347 if (bit > IOREMAP_MAX_ORDER)
1348 bit = IOREMAP_MAX_ORDER;
1349 else if (bit < PAGE_SHIFT)
1350 bit = PAGE_SHIFT;
1351
1352 align = 1ul << bit;
1353 }
1354
1355 size = PAGE_ALIGN(size);
1356 if (unlikely(!size))
1357 return NULL;
1358
1359 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1360 if (unlikely(!area))
1361 return NULL;
1362
1363
1364
1365
1366 size += PAGE_SIZE;
1367
1368 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1369 if (IS_ERR(va)) {
1370 kfree(area);
1371 return NULL;
1372 }
1373
1374
1375
1376
1377
1378
1379
1380 if (flags & VM_UNLIST)
1381 setup_vmalloc_vm(area, va, flags, caller);
1382 else
1383 insert_vmalloc_vm(area, va, flags, caller);
1384
1385 return area;
1386}
1387
1388struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1389 unsigned long start, unsigned long end)
1390{
1391 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1392 GFP_KERNEL, __builtin_return_address(0));
1393}
1394EXPORT_SYMBOL_GPL(__get_vm_area);
1395
1396struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1397 unsigned long start, unsigned long end,
1398 const void *caller)
1399{
1400 return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
1401 GFP_KERNEL, caller);
1402}
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1414{
1415 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1416 NUMA_NO_NODE, GFP_KERNEL,
1417 __builtin_return_address(0));
1418}
1419
1420struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1421 const void *caller)
1422{
1423 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1424 NUMA_NO_NODE, GFP_KERNEL, caller);
1425}
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435struct vm_struct *find_vm_area(const void *addr)
1436{
1437 struct vmap_area *va;
1438
1439 va = find_vmap_area((unsigned long)addr);
1440 if (va && va->flags & VM_VM_AREA)
1441 return va->vm;
1442
1443 return NULL;
1444}
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454struct vm_struct *remove_vm_area(const void *addr)
1455{
1456 struct vmap_area *va;
1457
1458 va = find_vmap_area((unsigned long)addr);
1459 if (va && va->flags & VM_VM_AREA) {
1460 struct vm_struct *vm = va->vm;
1461
1462 spin_lock(&vmap_area_lock);
1463 va->vm = NULL;
1464 va->flags &= ~VM_VM_AREA;
1465 spin_unlock(&vmap_area_lock);
1466
1467 vmap_debug_free_range(va->va_start, va->va_end);
1468 free_unmap_vmap_area(va);
1469 vm->size -= PAGE_SIZE;
1470
1471 return vm;
1472 }
1473 return NULL;
1474}
1475
1476static void __vunmap(const void *addr, int deallocate_pages)
1477{
1478 struct vm_struct *area;
1479
1480 if (!addr)
1481 return;
1482
1483 if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
1484 addr))
1485 return;
1486
1487 area = remove_vm_area(addr);
1488 if (unlikely(!area)) {
1489 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1490 addr);
1491 return;
1492 }
1493
1494 debug_check_no_locks_freed(addr, area->size);
1495 debug_check_no_obj_freed(addr, area->size);
1496
1497 if (deallocate_pages) {
1498 int i;
1499
1500 for (i = 0; i < area->nr_pages; i++) {
1501 struct page *page = area->pages[i];
1502
1503 BUG_ON(!page);
1504 __free_page(page);
1505 }
1506
1507 if (area->flags & VM_VPAGES)
1508 vfree(area->pages);
1509 else
1510 kfree(area->pages);
1511 }
1512
1513 kfree(area);
1514 return;
1515}
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532void vfree(const void *addr)
1533{
1534 BUG_ON(in_nmi());
1535
1536 kmemleak_free(addr);
1537
1538 if (!addr)
1539 return;
1540 if (unlikely(in_interrupt())) {
1541 struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
1542 llist_add((struct llist_node *)addr, &p->list);
1543 schedule_work(&p->wq);
1544 } else
1545 __vunmap(addr, 1);
1546}
1547EXPORT_SYMBOL(vfree);
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558void vunmap(const void *addr)
1559{
1560 BUG_ON(in_interrupt());
1561 might_sleep();
1562 if (addr)
1563 __vunmap(addr, 0);
1564}
1565EXPORT_SYMBOL(vunmap);
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577void *vmap(struct page **pages, unsigned int count,
1578 unsigned long flags, pgprot_t prot)
1579{
1580 struct vm_struct *area;
1581
1582 might_sleep();
1583
1584 if (count > totalram_pages)
1585 return NULL;
1586
1587 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1588 __builtin_return_address(0));
1589 if (!area)
1590 return NULL;
1591
1592 if (map_vm_area(area, prot, &pages)) {
1593 vunmap(area->addr);
1594 return NULL;
1595 }
1596
1597 return area->addr;
1598}
1599EXPORT_SYMBOL(vmap);
1600
1601static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1602 pgprot_t prot, int node, const void *caller)
1603{
1604 const int order = 0;
1605 struct page **pages;
1606 unsigned int nr_pages, array_size, i;
1607 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1608
1609 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1610 array_size = (nr_pages * sizeof(struct page *));
1611
1612 area->nr_pages = nr_pages;
1613
1614 if (array_size > PAGE_SIZE) {
1615 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1616 PAGE_KERNEL, node, caller);
1617 area->flags |= VM_VPAGES;
1618 } else {
1619 pages = kmalloc_node(array_size, nested_gfp, node);
1620 }
1621 area->pages = pages;
1622 area->caller = caller;
1623 if (!area->pages) {
1624 remove_vm_area(area->addr);
1625 kfree(area);
1626 return NULL;
1627 }
1628
1629 for (i = 0; i < area->nr_pages; i++) {
1630 struct page *page;
1631 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1632
1633 if (node < 0)
1634 page = alloc_page(tmp_mask);
1635 else
1636 page = alloc_pages_node(node, tmp_mask, order);
1637
1638 if (unlikely(!page)) {
1639
1640 area->nr_pages = i;
1641 goto fail;
1642 }
1643 area->pages[i] = page;
1644 }
1645
1646 if (map_vm_area(area, prot, &pages))
1647 goto fail;
1648 return area->addr;
1649
1650fail:
1651 warn_alloc_failed(gfp_mask, order,
1652 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1653 (area->nr_pages*PAGE_SIZE), area->size);
1654 vfree(area->addr);
1655 return NULL;
1656}
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673void *__vmalloc_node_range(unsigned long size, unsigned long align,
1674 unsigned long start, unsigned long end, gfp_t gfp_mask,
1675 pgprot_t prot, int node, const void *caller)
1676{
1677 struct vm_struct *area;
1678 void *addr;
1679 unsigned long real_size = size;
1680
1681 size = PAGE_ALIGN(size);
1682 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1683 goto fail;
1684
1685 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
1686 start, end, node, gfp_mask, caller);
1687 if (!area)
1688 goto fail;
1689
1690 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1691 if (!addr)
1692 return NULL;
1693
1694
1695
1696
1697
1698
1699 clear_vm_unlist(area);
1700
1701
1702
1703
1704
1705
1706 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1707
1708 return addr;
1709
1710fail:
1711 warn_alloc_failed(gfp_mask, 0,
1712 "vmalloc: allocation failure: %lu bytes\n",
1713 real_size);
1714 return NULL;
1715}
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737void *__vmalloc_node(unsigned long size, unsigned long align,
1738 gfp_t gfp_mask, pgprot_t prot,
1739 int node, const void *caller)
1740{
1741 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1742 gfp_mask, prot, node, caller);
1743}
1744
1745void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1746{
1747 return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
1748 __builtin_return_address(0));
1749}
1750EXPORT_SYMBOL(__vmalloc);
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761void *vmalloc(unsigned long size)
1762{
1763 return __vmalloc_node_flags(size, NUMA_NO_NODE,
1764 GFP_KERNEL | __GFP_HIGHMEM);
1765}
1766EXPORT_SYMBOL(vmalloc);
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778void *vzalloc(unsigned long size)
1779{
1780 return __vmalloc_node_flags(size, NUMA_NO_NODE,
1781 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1782}
1783EXPORT_SYMBOL(vzalloc);
1784
1785
1786
1787
1788
1789
1790
1791
1792void *vmalloc_user(unsigned long size)
1793{
1794 struct vm_struct *area;
1795 void *ret;
1796
1797 ret = __vmalloc_node(size, SHMLBA,
1798 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1799 PAGE_KERNEL, NUMA_NO_NODE,
1800 __builtin_return_address(0));
1801 if (ret) {
1802 area = find_vm_area(ret);
1803 area->flags |= VM_USERMAP;
1804 }
1805 return ret;
1806}
1807EXPORT_SYMBOL(vmalloc_user);
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820void *vmalloc_node(unsigned long size, int node)
1821{
1822 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1823 node, __builtin_return_address(0));
1824}
1825EXPORT_SYMBOL(vmalloc_node);
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839void *vzalloc_node(unsigned long size, int node)
1840{
1841 return __vmalloc_node_flags(size, node,
1842 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1843}
1844EXPORT_SYMBOL(vzalloc_node);
1845
1846#ifndef PAGE_KERNEL_EXEC
1847# define PAGE_KERNEL_EXEC PAGE_KERNEL
1848#endif
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862void *vmalloc_exec(unsigned long size)
1863{
1864 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1865 NUMA_NO_NODE, __builtin_return_address(0));
1866}
1867
1868#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1869#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1870#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1871#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1872#else
1873#define GFP_VMALLOC32 GFP_KERNEL
1874#endif
1875
1876
1877
1878
1879
1880
1881
1882
1883void *vmalloc_32(unsigned long size)
1884{
1885 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1886 NUMA_NO_NODE, __builtin_return_address(0));
1887}
1888EXPORT_SYMBOL(vmalloc_32);
1889
1890
1891
1892
1893
1894
1895
1896
1897void *vmalloc_32_user(unsigned long size)
1898{
1899 struct vm_struct *area;
1900 void *ret;
1901
1902 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1903 NUMA_NO_NODE, __builtin_return_address(0));
1904 if (ret) {
1905 area = find_vm_area(ret);
1906 area->flags |= VM_USERMAP;
1907 }
1908 return ret;
1909}
1910EXPORT_SYMBOL(vmalloc_32_user);
1911
1912
1913
1914
1915
1916
1917static int aligned_vread(char *buf, char *addr, unsigned long count)
1918{
1919 struct page *p;
1920 int copied = 0;
1921
1922 while (count) {
1923 unsigned long offset, length;
1924
1925 offset = (unsigned long)addr & ~PAGE_MASK;
1926 length = PAGE_SIZE - offset;
1927 if (length > count)
1928 length = count;
1929 p = vmalloc_to_page(addr);
1930
1931
1932
1933
1934
1935
1936
1937 if (p) {
1938
1939
1940
1941
1942 void *map = kmap_atomic(p);
1943 memcpy(buf, map + offset, length);
1944 kunmap_atomic(map);
1945 } else
1946 memset(buf, 0, length);
1947
1948 addr += length;
1949 buf += length;
1950 copied += length;
1951 count -= length;
1952 }
1953 return copied;
1954}
1955
1956static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1957{
1958 struct page *p;
1959 int copied = 0;
1960
1961 while (count) {
1962 unsigned long offset, length;
1963
1964 offset = (unsigned long)addr & ~PAGE_MASK;
1965 length = PAGE_SIZE - offset;
1966 if (length > count)
1967 length = count;
1968 p = vmalloc_to_page(addr);
1969
1970
1971
1972
1973
1974
1975
1976 if (p) {
1977
1978
1979
1980
1981 void *map = kmap_atomic(p);
1982 memcpy(map + offset, buf, length);
1983 kunmap_atomic(map);
1984 }
1985 addr += length;
1986 buf += length;
1987 copied += length;
1988 count -= length;
1989 }
1990 return copied;
1991}
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019long vread(char *buf, char *addr, unsigned long count)
2020{
2021 struct vmap_area *va;
2022 struct vm_struct *vm;
2023 char *vaddr, *buf_start = buf;
2024 unsigned long buflen = count;
2025 unsigned long n;
2026
2027
2028 if ((unsigned long) addr + count < count)
2029 count = -(unsigned long) addr;
2030
2031 spin_lock(&vmap_area_lock);
2032 list_for_each_entry(va, &vmap_area_list, list) {
2033 if (!count)
2034 break;
2035
2036 if (!(va->flags & VM_VM_AREA))
2037 continue;
2038
2039 vm = va->vm;
2040 vaddr = (char *) vm->addr;
2041 if (addr >= vaddr + vm->size - PAGE_SIZE)
2042 continue;
2043 while (addr < vaddr) {
2044 if (count == 0)
2045 goto finished;
2046 *buf = '\0';
2047 buf++;
2048 addr++;
2049 count--;
2050 }
2051 n = vaddr + vm->size - PAGE_SIZE - addr;
2052 if (n > count)
2053 n = count;
2054 if (!(vm->flags & VM_IOREMAP))
2055 aligned_vread(buf, addr, n);
2056 else
2057 memset(buf, 0, n);
2058 buf += n;
2059 addr += n;
2060 count -= n;
2061 }
2062finished:
2063 spin_unlock(&vmap_area_lock);
2064
2065 if (buf == buf_start)
2066 return 0;
2067
2068 if (buf != buf_start + buflen)
2069 memset(buf, 0, buflen - (buf - buf_start));
2070
2071 return buflen;
2072}
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100long vwrite(char *buf, char *addr, unsigned long count)
2101{
2102 struct vmap_area *va;
2103 struct vm_struct *vm;
2104 char *vaddr;
2105 unsigned long n, buflen;
2106 int copied = 0;
2107
2108
2109 if ((unsigned long) addr + count < count)
2110 count = -(unsigned long) addr;
2111 buflen = count;
2112
2113 spin_lock(&vmap_area_lock);
2114 list_for_each_entry(va, &vmap_area_list, list) {
2115 if (!count)
2116 break;
2117
2118 if (!(va->flags & VM_VM_AREA))
2119 continue;
2120
2121 vm = va->vm;
2122 vaddr = (char *) vm->addr;
2123 if (addr >= vaddr + vm->size - PAGE_SIZE)
2124 continue;
2125 while (addr < vaddr) {
2126 if (count == 0)
2127 goto finished;
2128 buf++;
2129 addr++;
2130 count--;
2131 }
2132 n = vaddr + vm->size - PAGE_SIZE - addr;
2133 if (n > count)
2134 n = count;
2135 if (!(vm->flags & VM_IOREMAP)) {
2136 aligned_vwrite(buf, addr, n);
2137 copied++;
2138 }
2139 buf += n;
2140 addr += n;
2141 count -= n;
2142 }
2143finished:
2144 spin_unlock(&vmap_area_lock);
2145 if (!copied)
2146 return 0;
2147 return buflen;
2148}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
2167 void *kaddr, unsigned long size)
2168{
2169 struct vm_struct *area;
2170
2171 size = PAGE_ALIGN(size);
2172
2173 if (!PAGE_ALIGNED(uaddr) || !PAGE_ALIGNED(kaddr))
2174 return -EINVAL;
2175
2176 area = find_vm_area(kaddr);
2177 if (!area)
2178 return -EINVAL;
2179
2180 if (!(area->flags & VM_USERMAP))
2181 return -EINVAL;
2182
2183 if (kaddr + size > area->addr + area->size)
2184 return -EINVAL;
2185
2186 do {
2187 struct page *page = vmalloc_to_page(kaddr);
2188 int ret;
2189
2190 ret = vm_insert_page(vma, uaddr, page);
2191 if (ret)
2192 return ret;
2193
2194 uaddr += PAGE_SIZE;
2195 kaddr += PAGE_SIZE;
2196 size -= PAGE_SIZE;
2197 } while (size > 0);
2198
2199 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
2200
2201 return 0;
2202}
2203EXPORT_SYMBOL(remap_vmalloc_range_partial);
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2220 unsigned long pgoff)
2221{
2222 return remap_vmalloc_range_partial(vma, vma->vm_start,
2223 addr + (pgoff << PAGE_SHIFT),
2224 vma->vm_end - vma->vm_start);
2225}
2226EXPORT_SYMBOL(remap_vmalloc_range);
2227
2228
2229
2230
2231
2232void __attribute__((weak)) vmalloc_sync_all(void)
2233{
2234}
2235
2236
2237static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2238{
2239 pte_t ***p = data;
2240
2241 if (p) {
2242 *(*p) = pte;
2243 (*p)++;
2244 }
2245 return 0;
2246}
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2263{
2264 struct vm_struct *area;
2265
2266 area = get_vm_area_caller(size, VM_IOREMAP,
2267 __builtin_return_address(0));
2268 if (area == NULL)
2269 return NULL;
2270
2271
2272
2273
2274
2275 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2276 size, f, ptes ? &ptes : NULL)) {
2277 free_vm_area(area);
2278 return NULL;
2279 }
2280
2281 return area;
2282}
2283EXPORT_SYMBOL_GPL(alloc_vm_area);
2284
2285void free_vm_area(struct vm_struct *area)
2286{
2287 struct vm_struct *ret;
2288 ret = remove_vm_area(area->addr);
2289 BUG_ON(ret != area);
2290 kfree(area);
2291}
2292EXPORT_SYMBOL_GPL(free_vm_area);
2293
2294#ifdef CONFIG_SMP
2295static struct vmap_area *node_to_va(struct rb_node *n)
2296{
2297 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2298}
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312static bool pvm_find_next_prev(unsigned long end,
2313 struct vmap_area **pnext,
2314 struct vmap_area **pprev)
2315{
2316 struct rb_node *n = vmap_area_root.rb_node;
2317 struct vmap_area *va = NULL;
2318
2319 while (n) {
2320 va = rb_entry(n, struct vmap_area, rb_node);
2321 if (end < va->va_end)
2322 n = n->rb_left;
2323 else if (end > va->va_end)
2324 n = n->rb_right;
2325 else
2326 break;
2327 }
2328
2329 if (!va)
2330 return false;
2331
2332 if (va->va_end > end) {
2333 *pnext = va;
2334 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2335 } else {
2336 *pprev = va;
2337 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2338 }
2339 return true;
2340}
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358static unsigned long pvm_determine_end(struct vmap_area **pnext,
2359 struct vmap_area **pprev,
2360 unsigned long align)
2361{
2362 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2363 unsigned long addr;
2364
2365 if (*pnext)
2366 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2367 else
2368 addr = vmalloc_end;
2369
2370 while (*pprev && (*pprev)->va_end > addr) {
2371 *pnext = *pprev;
2372 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2373 }
2374
2375 return addr;
2376}
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2403 const size_t *sizes, int nr_vms,
2404 size_t align)
2405{
2406 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2407 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2408 struct vmap_area **vas, *prev, *next;
2409 struct vm_struct **vms;
2410 int area, area2, last_area, term_area;
2411 unsigned long base, start, end, last_end;
2412 bool purged = false;
2413
2414
2415 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2416 for (last_area = 0, area = 0; area < nr_vms; area++) {
2417 start = offsets[area];
2418 end = start + sizes[area];
2419
2420
2421 BUG_ON(!IS_ALIGNED(offsets[area], align));
2422 BUG_ON(!IS_ALIGNED(sizes[area], align));
2423
2424
2425 if (start > offsets[last_area])
2426 last_area = area;
2427
2428 for (area2 = 0; area2 < nr_vms; area2++) {
2429 unsigned long start2 = offsets[area2];
2430 unsigned long end2 = start2 + sizes[area2];
2431
2432 if (area2 == area)
2433 continue;
2434
2435 BUG_ON(start2 >= start && start2 < end);
2436 BUG_ON(end2 <= end && end2 > start);
2437 }
2438 }
2439 last_end = offsets[last_area] + sizes[last_area];
2440
2441 if (vmalloc_end - vmalloc_start < last_end) {
2442 WARN_ON(true);
2443 return NULL;
2444 }
2445
2446 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2447 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2448 if (!vas || !vms)
2449 goto err_free2;
2450
2451 for (area = 0; area < nr_vms; area++) {
2452 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2453 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2454 if (!vas[area] || !vms[area])
2455 goto err_free;
2456 }
2457retry:
2458 spin_lock(&vmap_area_lock);
2459
2460
2461 area = term_area = last_area;
2462 start = offsets[area];
2463 end = start + sizes[area];
2464
2465 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2466 base = vmalloc_end - last_end;
2467 goto found;
2468 }
2469 base = pvm_determine_end(&next, &prev, align) - end;
2470
2471 while (true) {
2472 BUG_ON(next && next->va_end <= base + end);
2473 BUG_ON(prev && prev->va_end > base + end);
2474
2475
2476
2477
2478
2479 if (base + last_end < vmalloc_start + last_end) {
2480 spin_unlock(&vmap_area_lock);
2481 if (!purged) {
2482 purge_vmap_area_lazy();
2483 purged = true;
2484 goto retry;
2485 }
2486 goto err_free;
2487 }
2488
2489
2490
2491
2492
2493 if (next && next->va_start < base + end) {
2494 base = pvm_determine_end(&next, &prev, align) - end;
2495 term_area = area;
2496 continue;
2497 }
2498
2499
2500
2501
2502
2503
2504 if (prev && prev->va_end > base + start) {
2505 next = prev;
2506 prev = node_to_va(rb_prev(&next->rb_node));
2507 base = pvm_determine_end(&next, &prev, align) - end;
2508 term_area = area;
2509 continue;
2510 }
2511
2512
2513
2514
2515
2516 area = (area + nr_vms - 1) % nr_vms;
2517 if (area == term_area)
2518 break;
2519 start = offsets[area];
2520 end = start + sizes[area];
2521 pvm_find_next_prev(base + end, &next, &prev);
2522 }
2523found:
2524
2525 for (area = 0; area < nr_vms; area++) {
2526 struct vmap_area *va = vas[area];
2527
2528 va->va_start = base + offsets[area];
2529 va->va_end = va->va_start + sizes[area];
2530 __insert_vmap_area(va);
2531 }
2532
2533 vmap_area_pcpu_hole = base + offsets[last_area];
2534
2535 spin_unlock(&vmap_area_lock);
2536
2537
2538 for (area = 0; area < nr_vms; area++)
2539 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2540 pcpu_get_vm_areas);
2541
2542 kfree(vas);
2543 return vms;
2544
2545err_free:
2546 for (area = 0; area < nr_vms; area++) {
2547 kfree(vas[area]);
2548 kfree(vms[area]);
2549 }
2550err_free2:
2551 kfree(vas);
2552 kfree(vms);
2553 return NULL;
2554}
2555
2556
2557
2558
2559
2560
2561
2562
2563void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2564{
2565 int i;
2566
2567 for (i = 0; i < nr_vms; i++)
2568 free_vm_area(vms[i]);
2569 kfree(vms);
2570}
2571#endif
2572
2573#ifdef CONFIG_PROC_FS
2574static void *s_start(struct seq_file *m, loff_t *pos)
2575 __acquires(&vmap_area_lock)
2576{
2577 loff_t n = *pos;
2578 struct vmap_area *va;
2579
2580 spin_lock(&vmap_area_lock);
2581 va = list_entry((&vmap_area_list)->next, typeof(*va), list);
2582 while (n > 0 && &va->list != &vmap_area_list) {
2583 n--;
2584 va = list_entry(va->list.next, typeof(*va), list);
2585 }
2586 if (!n && &va->list != &vmap_area_list)
2587 return va;
2588
2589 return NULL;
2590
2591}
2592
2593static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2594{
2595 struct vmap_area *va = p, *next;
2596
2597 ++*pos;
2598 next = list_entry(va->list.next, typeof(*va), list);
2599 if (&next->list != &vmap_area_list)
2600 return next;
2601
2602 return NULL;
2603}
2604
2605static void s_stop(struct seq_file *m, void *p)
2606 __releases(&vmap_area_lock)
2607{
2608 spin_unlock(&vmap_area_lock);
2609}
2610
2611static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2612{
2613 if (IS_ENABLED(CONFIG_NUMA)) {
2614 unsigned int nr, *counters = m->private;
2615
2616 if (!counters)
2617 return;
2618
2619
2620 smp_rmb();
2621 if (v->flags & VM_UNLIST)
2622 return;
2623
2624 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2625
2626 for (nr = 0; nr < v->nr_pages; nr++)
2627 counters[page_to_nid(v->pages[nr])]++;
2628
2629 for_each_node_state(nr, N_HIGH_MEMORY)
2630 if (counters[nr])
2631 seq_printf(m, " N%u=%u", nr, counters[nr]);
2632 }
2633}
2634
2635static int s_show(struct seq_file *m, void *p)
2636{
2637 struct vmap_area *va = p;
2638 struct vm_struct *v;
2639
2640 if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
2641 return 0;
2642
2643 if (!(va->flags & VM_VM_AREA)) {
2644 seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
2645 (void *)va->va_start, (void *)va->va_end,
2646 va->va_end - va->va_start);
2647 return 0;
2648 }
2649
2650 v = va->vm;
2651
2652 seq_printf(m, "0x%pK-0x%pK %7ld",
2653 v->addr, v->addr + v->size, v->size);
2654
2655 if (v->caller)
2656 seq_printf(m, " %pS", v->caller);
2657
2658 if (v->nr_pages)
2659 seq_printf(m, " pages=%d", v->nr_pages);
2660
2661 if (v->phys_addr)
2662 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2663
2664 if (v->flags & VM_IOREMAP)
2665 seq_printf(m, " ioremap");
2666
2667 if (v->flags & VM_ALLOC)
2668 seq_printf(m, " vmalloc");
2669
2670 if (v->flags & VM_MAP)
2671 seq_printf(m, " vmap");
2672
2673 if (v->flags & VM_USERMAP)
2674 seq_printf(m, " user");
2675
2676 if (v->flags & VM_VPAGES)
2677 seq_printf(m, " vpages");
2678
2679 show_numa_info(m, v);
2680 seq_putc(m, '\n');
2681 return 0;
2682}
2683
2684static const struct seq_operations vmalloc_op = {
2685 .start = s_start,
2686 .next = s_next,
2687 .stop = s_stop,
2688 .show = s_show,
2689};
2690
2691static int vmalloc_open(struct inode *inode, struct file *file)
2692{
2693 unsigned int *ptr = NULL;
2694 int ret;
2695
2696 if (IS_ENABLED(CONFIG_NUMA)) {
2697 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2698 if (ptr == NULL)
2699 return -ENOMEM;
2700 }
2701 ret = seq_open(file, &vmalloc_op);
2702 if (!ret) {
2703 struct seq_file *m = file->private_data;
2704 m->private = ptr;
2705 } else
2706 kfree(ptr);
2707 return ret;
2708}
2709
2710static const struct file_operations proc_vmalloc_operations = {
2711 .open = vmalloc_open,
2712 .read = seq_read,
2713 .llseek = seq_lseek,
2714 .release = seq_release_private,
2715};
2716
2717static int __init proc_vmalloc_init(void)
2718{
2719 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2720 return 0;
2721}
2722module_init(proc_vmalloc_init);
2723
2724void get_vmalloc_info(struct vmalloc_info *vmi)
2725{
2726 struct vmap_area *va;
2727 unsigned long free_area_size;
2728 unsigned long prev_end;
2729
2730 vmi->used = 0;
2731 vmi->largest_chunk = 0;
2732
2733 prev_end = VMALLOC_START;
2734
2735 spin_lock(&vmap_area_lock);
2736
2737 if (list_empty(&vmap_area_list)) {
2738 vmi->largest_chunk = VMALLOC_TOTAL;
2739 goto out;
2740 }
2741
2742 list_for_each_entry(va, &vmap_area_list, list) {
2743 unsigned long addr = va->va_start;
2744
2745
2746
2747
2748 if (addr < VMALLOC_START)
2749 continue;
2750 if (addr >= VMALLOC_END)
2751 break;
2752
2753 if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
2754 continue;
2755
2756 vmi->used += (va->va_end - va->va_start);
2757
2758 free_area_size = addr - prev_end;
2759 if (vmi->largest_chunk < free_area_size)
2760 vmi->largest_chunk = free_area_size;
2761
2762 prev_end = va->va_end;
2763 }
2764
2765 if (VMALLOC_END - prev_end > vmi->largest_chunk)
2766 vmi->largest_chunk = VMALLOC_END - prev_end;
2767
2768out:
2769 spin_unlock(&vmap_area_lock);
2770}
2771#endif
2772
2773