1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <asm/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36
37static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
38{
39 pte_t *pte;
40
41 pte = pte_offset_kernel(pmd, addr);
42 do {
43 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
44 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
45 } while (pte++, addr += PAGE_SIZE, addr != end);
46}
47
48static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
49{
50 pmd_t *pmd;
51 unsigned long next;
52
53 pmd = pmd_offset(pud, addr);
54 do {
55 next = pmd_addr_end(addr, end);
56 if (pmd_none_or_clear_bad(pmd))
57 continue;
58 vunmap_pte_range(pmd, addr, next);
59 } while (pmd++, addr = next, addr != end);
60}
61
62static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
63{
64 pud_t *pud;
65 unsigned long next;
66
67 pud = pud_offset(pgd, addr);
68 do {
69 next = pud_addr_end(addr, end);
70 if (pud_none_or_clear_bad(pud))
71 continue;
72 vunmap_pmd_range(pud, addr, next);
73 } while (pud++, addr = next, addr != end);
74}
75
76static void vunmap_page_range(unsigned long addr, unsigned long end)
77{
78 pgd_t *pgd;
79 unsigned long next;
80
81 BUG_ON(addr >= end);
82 pgd = pgd_offset_k(addr);
83 do {
84 next = pgd_addr_end(addr, end);
85 if (pgd_none_or_clear_bad(pgd))
86 continue;
87 vunmap_pud_range(pgd, addr, next);
88 } while (pgd++, addr = next, addr != end);
89}
90
91static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
92 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
93{
94 pte_t *pte;
95
96
97
98
99
100
101 pte = pte_alloc_kernel(pmd, addr);
102 if (!pte)
103 return -ENOMEM;
104 do {
105 struct page *page = pages[*nr];
106
107 if (WARN_ON(!pte_none(*pte)))
108 return -EBUSY;
109 if (WARN_ON(!page))
110 return -ENOMEM;
111 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
112 (*nr)++;
113 } while (pte++, addr += PAGE_SIZE, addr != end);
114 return 0;
115}
116
117static int vmap_pmd_range(pud_t *pud, unsigned long addr,
118 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
119{
120 pmd_t *pmd;
121 unsigned long next;
122
123 pmd = pmd_alloc(&init_mm, pud, addr);
124 if (!pmd)
125 return -ENOMEM;
126 do {
127 next = pmd_addr_end(addr, end);
128 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
129 return -ENOMEM;
130 } while (pmd++, addr = next, addr != end);
131 return 0;
132}
133
134static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
135 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
136{
137 pud_t *pud;
138 unsigned long next;
139
140 pud = pud_alloc(&init_mm, pgd, addr);
141 if (!pud)
142 return -ENOMEM;
143 do {
144 next = pud_addr_end(addr, end);
145 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
146 return -ENOMEM;
147 } while (pud++, addr = next, addr != end);
148 return 0;
149}
150
151
152
153
154
155
156
157static int vmap_page_range_noflush(unsigned long start, unsigned long end,
158 pgprot_t prot, struct page **pages)
159{
160 pgd_t *pgd;
161 unsigned long next;
162 unsigned long addr = start;
163 int err = 0;
164 int nr = 0;
165
166 BUG_ON(addr >= end);
167 pgd = pgd_offset_k(addr);
168 do {
169 next = pgd_addr_end(addr, end);
170 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
171 if (err)
172 return err;
173 } while (pgd++, addr = next, addr != end);
174
175 return nr;
176}
177
178static int vmap_page_range(unsigned long start, unsigned long end,
179 pgprot_t prot, struct page **pages)
180{
181 int ret;
182
183 ret = vmap_page_range_noflush(start, end, prot, pages);
184 flush_cache_vmap(start, end);
185 return ret;
186}
187
188int is_vmalloc_or_module_addr(const void *x)
189{
190
191
192
193
194
195#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
196 unsigned long addr = (unsigned long)x;
197 if (addr >= MODULES_VADDR && addr < MODULES_END)
198 return 1;
199#endif
200 return is_vmalloc_addr(x);
201}
202
203
204
205
206struct page *vmalloc_to_page(const void *vmalloc_addr)
207{
208 unsigned long addr = (unsigned long) vmalloc_addr;
209 struct page *page = NULL;
210 pgd_t *pgd = pgd_offset_k(addr);
211
212
213
214
215
216 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
217
218 if (!pgd_none(*pgd)) {
219 pud_t *pud = pud_offset(pgd, addr);
220 if (!pud_none(*pud)) {
221 pmd_t *pmd = pmd_offset(pud, addr);
222 if (!pmd_none(*pmd)) {
223 pte_t *ptep, pte;
224
225 ptep = pte_offset_map(pmd, addr);
226 pte = *ptep;
227 if (pte_present(pte))
228 page = pte_page(pte);
229 pte_unmap(ptep);
230 }
231 }
232 }
233 return page;
234}
235EXPORT_SYMBOL(vmalloc_to_page);
236
237
238
239
240unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
241{
242 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
243}
244EXPORT_SYMBOL(vmalloc_to_pfn);
245
246
247
248
249#define VM_LAZY_FREE 0x01
250#define VM_LAZY_FREEING 0x02
251#define VM_VM_AREA 0x04
252
253struct vmap_area {
254 unsigned long va_start;
255 unsigned long va_end;
256 unsigned long flags;
257 struct rb_node rb_node;
258 struct list_head list;
259 struct list_head purge_list;
260 void *private;
261 struct rcu_head rcu_head;
262};
263
264static DEFINE_SPINLOCK(vmap_area_lock);
265static struct rb_root vmap_area_root = RB_ROOT;
266static LIST_HEAD(vmap_area_list);
267static unsigned long vmap_area_pcpu_hole;
268
269static struct vmap_area *__find_vmap_area(unsigned long addr)
270{
271 struct rb_node *n = vmap_area_root.rb_node;
272
273 while (n) {
274 struct vmap_area *va;
275
276 va = rb_entry(n, struct vmap_area, rb_node);
277 if (addr < va->va_start)
278 n = n->rb_left;
279 else if (addr > va->va_start)
280 n = n->rb_right;
281 else
282 return va;
283 }
284
285 return NULL;
286}
287
288static void __insert_vmap_area(struct vmap_area *va)
289{
290 struct rb_node **p = &vmap_area_root.rb_node;
291 struct rb_node *parent = NULL;
292 struct rb_node *tmp;
293
294 while (*p) {
295 struct vmap_area *tmp;
296
297 parent = *p;
298 tmp = rb_entry(parent, struct vmap_area, rb_node);
299 if (va->va_start < tmp->va_end)
300 p = &(*p)->rb_left;
301 else if (va->va_end > tmp->va_start)
302 p = &(*p)->rb_right;
303 else
304 BUG();
305 }
306
307 rb_link_node(&va->rb_node, parent, p);
308 rb_insert_color(&va->rb_node, &vmap_area_root);
309
310
311 tmp = rb_prev(&va->rb_node);
312 if (tmp) {
313 struct vmap_area *prev;
314 prev = rb_entry(tmp, struct vmap_area, rb_node);
315 list_add_rcu(&va->list, &prev->list);
316 } else
317 list_add_rcu(&va->list, &vmap_area_list);
318}
319
320static void purge_vmap_area_lazy(void);
321
322
323
324
325
326static struct vmap_area *alloc_vmap_area(unsigned long size,
327 unsigned long align,
328 unsigned long vstart, unsigned long vend,
329 int node, gfp_t gfp_mask)
330{
331 struct vmap_area *va;
332 struct rb_node *n;
333 unsigned long addr;
334 int purged = 0;
335
336 BUG_ON(!size);
337 BUG_ON(size & ~PAGE_MASK);
338
339 va = kmalloc_node(sizeof(struct vmap_area),
340 gfp_mask & GFP_RECLAIM_MASK, node);
341 if (unlikely(!va))
342 return ERR_PTR(-ENOMEM);
343
344retry:
345 addr = ALIGN(vstart, align);
346
347 spin_lock(&vmap_area_lock);
348 if (addr + size - 1 < addr)
349 goto overflow;
350
351
352 n = vmap_area_root.rb_node;
353 if (n) {
354 struct vmap_area *first = NULL;
355
356 do {
357 struct vmap_area *tmp;
358 tmp = rb_entry(n, struct vmap_area, rb_node);
359 if (tmp->va_end >= addr) {
360 if (!first && tmp->va_start < addr + size)
361 first = tmp;
362 n = n->rb_left;
363 } else {
364 first = tmp;
365 n = n->rb_right;
366 }
367 } while (n);
368
369 if (!first)
370 goto found;
371
372 if (first->va_end < addr) {
373 n = rb_next(&first->rb_node);
374 if (n)
375 first = rb_entry(n, struct vmap_area, rb_node);
376 else
377 goto found;
378 }
379
380 while (addr + size > first->va_start && addr + size <= vend) {
381 addr = ALIGN(first->va_end + PAGE_SIZE, align);
382 if (addr + size - 1 < addr)
383 goto overflow;
384
385 n = rb_next(&first->rb_node);
386 if (n)
387 first = rb_entry(n, struct vmap_area, rb_node);
388 else
389 goto found;
390 }
391 }
392found:
393 if (addr + size > vend) {
394overflow:
395 spin_unlock(&vmap_area_lock);
396 if (!purged) {
397 purge_vmap_area_lazy();
398 purged = 1;
399 goto retry;
400 }
401 if (printk_ratelimit())
402 printk(KERN_WARNING
403 "vmap allocation for size %lu failed: "
404 "use vmalloc=<size> to increase size.\n", size);
405 kfree(va);
406 return ERR_PTR(-EBUSY);
407 }
408
409 BUG_ON(addr & (align-1));
410
411 va->va_start = addr;
412 va->va_end = addr + size;
413 va->flags = 0;
414 __insert_vmap_area(va);
415 spin_unlock(&vmap_area_lock);
416
417 return va;
418}
419
420static void rcu_free_va(struct rcu_head *head)
421{
422 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
423
424 kfree(va);
425}
426
427static void __free_vmap_area(struct vmap_area *va)
428{
429 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
430 rb_erase(&va->rb_node, &vmap_area_root);
431 RB_CLEAR_NODE(&va->rb_node);
432 list_del_rcu(&va->list);
433
434
435
436
437
438
439
440 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
441 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
442
443 call_rcu(&va->rcu_head, rcu_free_va);
444}
445
446
447
448
449static void free_vmap_area(struct vmap_area *va)
450{
451 spin_lock(&vmap_area_lock);
452 __free_vmap_area(va);
453 spin_unlock(&vmap_area_lock);
454}
455
456
457
458
459static void unmap_vmap_area(struct vmap_area *va)
460{
461 vunmap_page_range(va->va_start, va->va_end);
462}
463
464static void vmap_debug_free_range(unsigned long start, unsigned long end)
465{
466
467
468
469
470
471
472
473
474
475
476
477
478
479#ifdef CONFIG_DEBUG_PAGEALLOC
480 vunmap_page_range(start, end);
481 flush_tlb_kernel_range(start, end);
482#endif
483}
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501static unsigned long lazy_max_pages(void)
502{
503 unsigned int log;
504
505 log = fls(num_online_cpus());
506
507 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
508}
509
510static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
511
512
513
514
515
516
517
518
519
520
521
522static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
523 int sync, int force_flush)
524{
525 static DEFINE_SPINLOCK(purge_lock);
526 LIST_HEAD(valist);
527 struct vmap_area *va;
528 struct vmap_area *n_va;
529 int nr = 0;
530
531
532
533
534
535
536 if (!sync && !force_flush) {
537 if (!spin_trylock(&purge_lock))
538 return;
539 } else
540 spin_lock(&purge_lock);
541
542 rcu_read_lock();
543 list_for_each_entry_rcu(va, &vmap_area_list, list) {
544 if (va->flags & VM_LAZY_FREE) {
545 if (va->va_start < *start)
546 *start = va->va_start;
547 if (va->va_end > *end)
548 *end = va->va_end;
549 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
550 unmap_vmap_area(va);
551 list_add_tail(&va->purge_list, &valist);
552 va->flags |= VM_LAZY_FREEING;
553 va->flags &= ~VM_LAZY_FREE;
554 }
555 }
556 rcu_read_unlock();
557
558 if (nr) {
559 BUG_ON(nr > atomic_read(&vmap_lazy_nr));
560 atomic_sub(nr, &vmap_lazy_nr);
561 }
562
563 if (nr || force_flush)
564 flush_tlb_kernel_range(*start, *end);
565
566 if (nr) {
567 spin_lock(&vmap_area_lock);
568 list_for_each_entry_safe(va, n_va, &valist, purge_list)
569 __free_vmap_area(va);
570 spin_unlock(&vmap_area_lock);
571 }
572 spin_unlock(&purge_lock);
573}
574
575
576
577
578
579static void try_purge_vmap_area_lazy(void)
580{
581 unsigned long start = ULONG_MAX, end = 0;
582
583 __purge_vmap_area_lazy(&start, &end, 0, 0);
584}
585
586
587
588
589static void purge_vmap_area_lazy(void)
590{
591 unsigned long start = ULONG_MAX, end = 0;
592
593 __purge_vmap_area_lazy(&start, &end, 1, 0);
594}
595
596
597
598
599
600static void free_unmap_vmap_area_noflush(struct vmap_area *va)
601{
602 va->flags |= VM_LAZY_FREE;
603 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
604 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
605 try_purge_vmap_area_lazy();
606}
607
608
609
610
611static void free_unmap_vmap_area(struct vmap_area *va)
612{
613 flush_cache_vunmap(va->va_start, va->va_end);
614 free_unmap_vmap_area_noflush(va);
615}
616
617static struct vmap_area *find_vmap_area(unsigned long addr)
618{
619 struct vmap_area *va;
620
621 spin_lock(&vmap_area_lock);
622 va = __find_vmap_area(addr);
623 spin_unlock(&vmap_area_lock);
624
625 return va;
626}
627
628static void free_unmap_vmap_area_addr(unsigned long addr)
629{
630 struct vmap_area *va;
631
632 va = find_vmap_area(addr);
633 BUG_ON(!va);
634 free_unmap_vmap_area(va);
635}
636
637
638
639
640
641
642
643
644
645
646
647
648
649#if BITS_PER_LONG == 32
650#define VMALLOC_SPACE (128UL*1024*1024)
651#else
652#define VMALLOC_SPACE (128UL*1024*1024*1024)
653#endif
654
655#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
656#define VMAP_MAX_ALLOC BITS_PER_LONG
657#define VMAP_BBMAP_BITS_MAX 1024
658#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
659#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
660#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
661#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
662 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
663 VMALLOC_PAGES / NR_CPUS / 16))
664
665#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
666
667static bool vmap_initialized __read_mostly = false;
668
669struct vmap_block_queue {
670 spinlock_t lock;
671 struct list_head free;
672 struct list_head dirty;
673 unsigned int nr_dirty;
674};
675
676struct vmap_block {
677 spinlock_t lock;
678 struct vmap_area *va;
679 struct vmap_block_queue *vbq;
680 unsigned long free, dirty;
681 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
682 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
683 union {
684 struct list_head free_list;
685 struct rcu_head rcu_head;
686 };
687};
688
689
690static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
691
692
693
694
695
696
697static DEFINE_SPINLOCK(vmap_block_tree_lock);
698static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
699
700
701
702
703
704
705
706
707static unsigned long addr_to_vb_idx(unsigned long addr)
708{
709 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
710 addr /= VMAP_BLOCK_SIZE;
711 return addr;
712}
713
714static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
715{
716 struct vmap_block_queue *vbq;
717 struct vmap_block *vb;
718 struct vmap_area *va;
719 unsigned long vb_idx;
720 int node, err;
721
722 node = numa_node_id();
723
724 vb = kmalloc_node(sizeof(struct vmap_block),
725 gfp_mask & GFP_RECLAIM_MASK, node);
726 if (unlikely(!vb))
727 return ERR_PTR(-ENOMEM);
728
729 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
730 VMALLOC_START, VMALLOC_END,
731 node, gfp_mask);
732 if (unlikely(IS_ERR(va))) {
733 kfree(vb);
734 return ERR_PTR(PTR_ERR(va));
735 }
736
737 err = radix_tree_preload(gfp_mask);
738 if (unlikely(err)) {
739 kfree(vb);
740 free_vmap_area(va);
741 return ERR_PTR(err);
742 }
743
744 spin_lock_init(&vb->lock);
745 vb->va = va;
746 vb->free = VMAP_BBMAP_BITS;
747 vb->dirty = 0;
748 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
749 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
750 INIT_LIST_HEAD(&vb->free_list);
751
752 vb_idx = addr_to_vb_idx(va->va_start);
753 spin_lock(&vmap_block_tree_lock);
754 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
755 spin_unlock(&vmap_block_tree_lock);
756 BUG_ON(err);
757 radix_tree_preload_end();
758
759 vbq = &get_cpu_var(vmap_block_queue);
760 vb->vbq = vbq;
761 spin_lock(&vbq->lock);
762 list_add(&vb->free_list, &vbq->free);
763 spin_unlock(&vbq->lock);
764 put_cpu_var(vmap_cpu_blocks);
765
766 return vb;
767}
768
769static void rcu_free_vb(struct rcu_head *head)
770{
771 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
772
773 kfree(vb);
774}
775
776static void free_vmap_block(struct vmap_block *vb)
777{
778 struct vmap_block *tmp;
779 unsigned long vb_idx;
780
781 BUG_ON(!list_empty(&vb->free_list));
782
783 vb_idx = addr_to_vb_idx(vb->va->va_start);
784 spin_lock(&vmap_block_tree_lock);
785 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
786 spin_unlock(&vmap_block_tree_lock);
787 BUG_ON(tmp != vb);
788
789 free_unmap_vmap_area_noflush(vb->va);
790 call_rcu(&vb->rcu_head, rcu_free_vb);
791}
792
793static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
794{
795 struct vmap_block_queue *vbq;
796 struct vmap_block *vb;
797 unsigned long addr = 0;
798 unsigned int order;
799
800 BUG_ON(size & ~PAGE_MASK);
801 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
802 order = get_order(size);
803
804again:
805 rcu_read_lock();
806 vbq = &get_cpu_var(vmap_block_queue);
807 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
808 int i;
809
810 spin_lock(&vb->lock);
811 i = bitmap_find_free_region(vb->alloc_map,
812 VMAP_BBMAP_BITS, order);
813
814 if (i >= 0) {
815 addr = vb->va->va_start + (i << PAGE_SHIFT);
816 BUG_ON(addr_to_vb_idx(addr) !=
817 addr_to_vb_idx(vb->va->va_start));
818 vb->free -= 1UL << order;
819 if (vb->free == 0) {
820 spin_lock(&vbq->lock);
821 list_del_init(&vb->free_list);
822 spin_unlock(&vbq->lock);
823 }
824 spin_unlock(&vb->lock);
825 break;
826 }
827 spin_unlock(&vb->lock);
828 }
829 put_cpu_var(vmap_cpu_blocks);
830 rcu_read_unlock();
831
832 if (!addr) {
833 vb = new_vmap_block(gfp_mask);
834 if (IS_ERR(vb))
835 return vb;
836 goto again;
837 }
838
839 return (void *)addr;
840}
841
842static void vb_free(const void *addr, unsigned long size)
843{
844 unsigned long offset;
845 unsigned long vb_idx;
846 unsigned int order;
847 struct vmap_block *vb;
848
849 BUG_ON(size & ~PAGE_MASK);
850 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
851
852 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
853
854 order = get_order(size);
855
856 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
857
858 vb_idx = addr_to_vb_idx((unsigned long)addr);
859 rcu_read_lock();
860 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
861 rcu_read_unlock();
862 BUG_ON(!vb);
863
864 spin_lock(&vb->lock);
865 bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
866
867 vb->dirty += 1UL << order;
868 if (vb->dirty == VMAP_BBMAP_BITS) {
869 BUG_ON(vb->free || !list_empty(&vb->free_list));
870 spin_unlock(&vb->lock);
871 free_vmap_block(vb);
872 } else
873 spin_unlock(&vb->lock);
874}
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889void vm_unmap_aliases(void)
890{
891 unsigned long start = ULONG_MAX, end = 0;
892 int cpu;
893 int flush = 0;
894
895 if (unlikely(!vmap_initialized))
896 return;
897
898 for_each_possible_cpu(cpu) {
899 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
900 struct vmap_block *vb;
901
902 rcu_read_lock();
903 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
904 int i;
905
906 spin_lock(&vb->lock);
907 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
908 while (i < VMAP_BBMAP_BITS) {
909 unsigned long s, e;
910 int j;
911 j = find_next_zero_bit(vb->dirty_map,
912 VMAP_BBMAP_BITS, i);
913
914 s = vb->va->va_start + (i << PAGE_SHIFT);
915 e = vb->va->va_start + (j << PAGE_SHIFT);
916 vunmap_page_range(s, e);
917 flush = 1;
918
919 if (s < start)
920 start = s;
921 if (e > end)
922 end = e;
923
924 i = j;
925 i = find_next_bit(vb->dirty_map,
926 VMAP_BBMAP_BITS, i);
927 }
928 spin_unlock(&vb->lock);
929 }
930 rcu_read_unlock();
931 }
932
933 __purge_vmap_area_lazy(&start, &end, 1, flush);
934}
935EXPORT_SYMBOL_GPL(vm_unmap_aliases);
936
937
938
939
940
941
942void vm_unmap_ram(const void *mem, unsigned int count)
943{
944 unsigned long size = count << PAGE_SHIFT;
945 unsigned long addr = (unsigned long)mem;
946
947 BUG_ON(!addr);
948 BUG_ON(addr < VMALLOC_START);
949 BUG_ON(addr > VMALLOC_END);
950 BUG_ON(addr & (PAGE_SIZE-1));
951
952 debug_check_no_locks_freed(mem, size);
953 vmap_debug_free_range(addr, addr+size);
954
955 if (likely(count <= VMAP_MAX_ALLOC))
956 vb_free(mem, size);
957 else
958 free_unmap_vmap_area_addr(addr);
959}
960EXPORT_SYMBOL(vm_unmap_ram);
961
962
963
964
965
966
967
968
969
970
971void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
972{
973 unsigned long size = count << PAGE_SHIFT;
974 unsigned long addr;
975 void *mem;
976
977 if (likely(count <= VMAP_MAX_ALLOC)) {
978 mem = vb_alloc(size, GFP_KERNEL);
979 if (IS_ERR(mem))
980 return NULL;
981 addr = (unsigned long)mem;
982 } else {
983 struct vmap_area *va;
984 va = alloc_vmap_area(size, PAGE_SIZE,
985 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
986 if (IS_ERR(va))
987 return NULL;
988
989 addr = va->va_start;
990 mem = (void *)addr;
991 }
992 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
993 vm_unmap_ram(mem, count);
994 return NULL;
995 }
996 return mem;
997}
998EXPORT_SYMBOL(vm_map_ram);
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1013{
1014 static size_t vm_init_off __initdata;
1015 unsigned long addr;
1016
1017 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1018 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1019
1020 vm->addr = (void *)addr;
1021
1022 vm->next = vmlist;
1023 vmlist = vm;
1024}
1025
1026void __init vmalloc_init(void)
1027{
1028 struct vmap_area *va;
1029 struct vm_struct *tmp;
1030 int i;
1031
1032 for_each_possible_cpu(i) {
1033 struct vmap_block_queue *vbq;
1034
1035 vbq = &per_cpu(vmap_block_queue, i);
1036 spin_lock_init(&vbq->lock);
1037 INIT_LIST_HEAD(&vbq->free);
1038 INIT_LIST_HEAD(&vbq->dirty);
1039 vbq->nr_dirty = 0;
1040 }
1041
1042
1043 for (tmp = vmlist; tmp; tmp = tmp->next) {
1044 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1045 va->flags = tmp->flags | VM_VM_AREA;
1046 va->va_start = (unsigned long)tmp->addr;
1047 va->va_end = va->va_start + tmp->size;
1048 __insert_vmap_area(va);
1049 }
1050
1051 vmap_area_pcpu_hole = VMALLOC_END;
1052
1053 vmap_initialized = true;
1054}
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1076 pgprot_t prot, struct page **pages)
1077{
1078 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1079}
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1096{
1097 vunmap_page_range(addr, addr + size);
1098}
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108void unmap_kernel_range(unsigned long addr, unsigned long size)
1109{
1110 unsigned long end = addr + size;
1111
1112 flush_cache_vunmap(addr, end);
1113 vunmap_page_range(addr, end);
1114 flush_tlb_kernel_range(addr, end);
1115}
1116
1117int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1118{
1119 unsigned long addr = (unsigned long)area->addr;
1120 unsigned long end = addr + area->size - PAGE_SIZE;
1121 int err;
1122
1123 err = vmap_page_range(addr, end, prot, *pages);
1124 if (err > 0) {
1125 *pages += err;
1126 err = 0;
1127 }
1128
1129 return err;
1130}
1131EXPORT_SYMBOL_GPL(map_vm_area);
1132
1133
1134DEFINE_RWLOCK(vmlist_lock);
1135struct vm_struct *vmlist;
1136
1137static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1138 unsigned long flags, void *caller)
1139{
1140 struct vm_struct *tmp, **p;
1141
1142 vm->flags = flags;
1143 vm->addr = (void *)va->va_start;
1144 vm->size = va->va_end - va->va_start;
1145 vm->caller = caller;
1146 va->private = vm;
1147 va->flags |= VM_VM_AREA;
1148
1149 write_lock(&vmlist_lock);
1150 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1151 if (tmp->addr >= vm->addr)
1152 break;
1153 }
1154 vm->next = *p;
1155 *p = vm;
1156 write_unlock(&vmlist_lock);
1157}
1158
1159static struct vm_struct *__get_vm_area_node(unsigned long size,
1160 unsigned long align, unsigned long flags, unsigned long start,
1161 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1162{
1163 static struct vmap_area *va;
1164 struct vm_struct *area;
1165
1166 BUG_ON(in_interrupt());
1167 if (flags & VM_IOREMAP) {
1168 int bit = fls(size);
1169
1170 if (bit > IOREMAP_MAX_ORDER)
1171 bit = IOREMAP_MAX_ORDER;
1172 else if (bit < PAGE_SHIFT)
1173 bit = PAGE_SHIFT;
1174
1175 align = 1ul << bit;
1176 }
1177
1178 size = PAGE_ALIGN(size);
1179 if (unlikely(!size))
1180 return NULL;
1181
1182 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1183 if (unlikely(!area))
1184 return NULL;
1185
1186
1187
1188
1189 size += PAGE_SIZE;
1190
1191 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1192 if (IS_ERR(va)) {
1193 kfree(area);
1194 return NULL;
1195 }
1196
1197 insert_vmalloc_vm(area, va, flags, caller);
1198 return area;
1199}
1200
1201struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1202 unsigned long start, unsigned long end)
1203{
1204 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1205 __builtin_return_address(0));
1206}
1207EXPORT_SYMBOL_GPL(__get_vm_area);
1208
1209struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1210 unsigned long start, unsigned long end,
1211 void *caller)
1212{
1213 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1214 caller);
1215}
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1227{
1228 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1229 -1, GFP_KERNEL, __builtin_return_address(0));
1230}
1231
1232struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1233 void *caller)
1234{
1235 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1236 -1, GFP_KERNEL, caller);
1237}
1238
1239struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
1240 int node, gfp_t gfp_mask)
1241{
1242 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1243 node, gfp_mask, __builtin_return_address(0));
1244}
1245
1246static struct vm_struct *find_vm_area(const void *addr)
1247{
1248 struct vmap_area *va;
1249
1250 va = find_vmap_area((unsigned long)addr);
1251 if (va && va->flags & VM_VM_AREA)
1252 return va->private;
1253
1254 return NULL;
1255}
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265struct vm_struct *remove_vm_area(const void *addr)
1266{
1267 struct vmap_area *va;
1268
1269 va = find_vmap_area((unsigned long)addr);
1270 if (va && va->flags & VM_VM_AREA) {
1271 struct vm_struct *vm = va->private;
1272 struct vm_struct *tmp, **p;
1273
1274
1275
1276
1277
1278 write_lock(&vmlist_lock);
1279 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1280 ;
1281 *p = tmp->next;
1282 write_unlock(&vmlist_lock);
1283
1284 vmap_debug_free_range(va->va_start, va->va_end);
1285 free_unmap_vmap_area(va);
1286 vm->size -= PAGE_SIZE;
1287
1288 return vm;
1289 }
1290 return NULL;
1291}
1292
1293static void __vunmap(const void *addr, int deallocate_pages)
1294{
1295 struct vm_struct *area;
1296
1297 if (!addr)
1298 return;
1299
1300 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1301 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1302 return;
1303 }
1304
1305 area = remove_vm_area(addr);
1306 if (unlikely(!area)) {
1307 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1308 addr);
1309 return;
1310 }
1311
1312 debug_check_no_locks_freed(addr, area->size);
1313 debug_check_no_obj_freed(addr, area->size);
1314
1315 if (deallocate_pages) {
1316 int i;
1317
1318 for (i = 0; i < area->nr_pages; i++) {
1319 struct page *page = area->pages[i];
1320
1321 BUG_ON(!page);
1322 __free_page(page);
1323 }
1324
1325 if (area->flags & VM_VPAGES)
1326 vfree(area->pages);
1327 else
1328 kfree(area->pages);
1329 }
1330
1331 kfree(area);
1332 return;
1333}
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345void vfree(const void *addr)
1346{
1347 BUG_ON(in_interrupt());
1348
1349 kmemleak_free(addr);
1350
1351 __vunmap(addr, 1);
1352}
1353EXPORT_SYMBOL(vfree);
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364void vunmap(const void *addr)
1365{
1366 BUG_ON(in_interrupt());
1367 might_sleep();
1368 __vunmap(addr, 0);
1369}
1370EXPORT_SYMBOL(vunmap);
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382void *vmap(struct page **pages, unsigned int count,
1383 unsigned long flags, pgprot_t prot)
1384{
1385 struct vm_struct *area;
1386
1387 might_sleep();
1388
1389 if (count > totalram_pages)
1390 return NULL;
1391
1392 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1393 __builtin_return_address(0));
1394 if (!area)
1395 return NULL;
1396
1397 if (map_vm_area(area, prot, &pages)) {
1398 vunmap(area->addr);
1399 return NULL;
1400 }
1401
1402 return area->addr;
1403}
1404EXPORT_SYMBOL(vmap);
1405
1406static void *__vmalloc_node(unsigned long size, unsigned long align,
1407 gfp_t gfp_mask, pgprot_t prot,
1408 int node, void *caller);
1409static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1410 pgprot_t prot, int node, void *caller)
1411{
1412 struct page **pages;
1413 unsigned int nr_pages, array_size, i;
1414
1415 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1416 array_size = (nr_pages * sizeof(struct page *));
1417
1418 area->nr_pages = nr_pages;
1419
1420 if (array_size > PAGE_SIZE) {
1421 pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
1422 PAGE_KERNEL, node, caller);
1423 area->flags |= VM_VPAGES;
1424 } else {
1425 pages = kmalloc_node(array_size,
1426 (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
1427 node);
1428 }
1429 area->pages = pages;
1430 area->caller = caller;
1431 if (!area->pages) {
1432 remove_vm_area(area->addr);
1433 kfree(area);
1434 return NULL;
1435 }
1436
1437 for (i = 0; i < area->nr_pages; i++) {
1438 struct page *page;
1439
1440 if (node < 0)
1441 page = alloc_page(gfp_mask);
1442 else
1443 page = alloc_pages_node(node, gfp_mask, 0);
1444
1445 if (unlikely(!page)) {
1446
1447 area->nr_pages = i;
1448 goto fail;
1449 }
1450 area->pages[i] = page;
1451 }
1452
1453 if (map_vm_area(area, prot, &pages))
1454 goto fail;
1455 return area->addr;
1456
1457fail:
1458 vfree(area->addr);
1459 return NULL;
1460}
1461
1462void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
1463{
1464 void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
1465 __builtin_return_address(0));
1466
1467
1468
1469
1470
1471
1472 kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
1473
1474 return addr;
1475}
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490static void *__vmalloc_node(unsigned long size, unsigned long align,
1491 gfp_t gfp_mask, pgprot_t prot,
1492 int node, void *caller)
1493{
1494 struct vm_struct *area;
1495 void *addr;
1496 unsigned long real_size = size;
1497
1498 size = PAGE_ALIGN(size);
1499 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1500 return NULL;
1501
1502 area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
1503 VMALLOC_END, node, gfp_mask, caller);
1504
1505 if (!area)
1506 return NULL;
1507
1508 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1509
1510
1511
1512
1513
1514
1515 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1516
1517 return addr;
1518}
1519
1520void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1521{
1522 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1523 __builtin_return_address(0));
1524}
1525EXPORT_SYMBOL(__vmalloc);
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536void *vmalloc(unsigned long size)
1537{
1538 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1539 -1, __builtin_return_address(0));
1540}
1541EXPORT_SYMBOL(vmalloc);
1542
1543
1544
1545
1546
1547
1548
1549
1550void *vmalloc_user(unsigned long size)
1551{
1552 struct vm_struct *area;
1553 void *ret;
1554
1555 ret = __vmalloc_node(size, SHMLBA,
1556 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1557 PAGE_KERNEL, -1, __builtin_return_address(0));
1558 if (ret) {
1559 area = find_vm_area(ret);
1560 area->flags |= VM_USERMAP;
1561 }
1562 return ret;
1563}
1564EXPORT_SYMBOL(vmalloc_user);
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577void *vmalloc_node(unsigned long size, int node)
1578{
1579 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1580 node, __builtin_return_address(0));
1581}
1582EXPORT_SYMBOL(vmalloc_node);
1583
1584#ifndef PAGE_KERNEL_EXEC
1585# define PAGE_KERNEL_EXEC PAGE_KERNEL
1586#endif
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600void *vmalloc_exec(unsigned long size)
1601{
1602 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1603 -1, __builtin_return_address(0));
1604}
1605
1606#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1607#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1608#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1609#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1610#else
1611#define GFP_VMALLOC32 GFP_KERNEL
1612#endif
1613
1614
1615
1616
1617
1618
1619
1620
1621void *vmalloc_32(unsigned long size)
1622{
1623 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1624 -1, __builtin_return_address(0));
1625}
1626EXPORT_SYMBOL(vmalloc_32);
1627
1628
1629
1630
1631
1632
1633
1634
1635void *vmalloc_32_user(unsigned long size)
1636{
1637 struct vm_struct *area;
1638 void *ret;
1639
1640 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1641 -1, __builtin_return_address(0));
1642 if (ret) {
1643 area = find_vm_area(ret);
1644 area->flags |= VM_USERMAP;
1645 }
1646 return ret;
1647}
1648EXPORT_SYMBOL(vmalloc_32_user);
1649
1650
1651
1652
1653
1654
1655static int aligned_vread(char *buf, char *addr, unsigned long count)
1656{
1657 struct page *p;
1658 int copied = 0;
1659
1660 while (count) {
1661 unsigned long offset, length;
1662
1663 offset = (unsigned long)addr & ~PAGE_MASK;
1664 length = PAGE_SIZE - offset;
1665 if (length > count)
1666 length = count;
1667 p = vmalloc_to_page(addr);
1668
1669
1670
1671
1672
1673
1674
1675 if (p) {
1676
1677
1678
1679
1680 void *map = kmap_atomic(p, KM_USER0);
1681 memcpy(buf, map + offset, length);
1682 kunmap_atomic(map, KM_USER0);
1683 } else
1684 memset(buf, 0, length);
1685
1686 addr += length;
1687 buf += length;
1688 copied += length;
1689 count -= length;
1690 }
1691 return copied;
1692}
1693
1694static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1695{
1696 struct page *p;
1697 int copied = 0;
1698
1699 while (count) {
1700 unsigned long offset, length;
1701
1702 offset = (unsigned long)addr & ~PAGE_MASK;
1703 length = PAGE_SIZE - offset;
1704 if (length > count)
1705 length = count;
1706 p = vmalloc_to_page(addr);
1707
1708
1709
1710
1711
1712
1713
1714 if (p) {
1715
1716
1717
1718
1719 void *map = kmap_atomic(p, KM_USER0);
1720 memcpy(map + offset, buf, length);
1721 kunmap_atomic(map, KM_USER0);
1722 }
1723 addr += length;
1724 buf += length;
1725 copied += length;
1726 count -= length;
1727 }
1728 return copied;
1729}
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759long vread(char *buf, char *addr, unsigned long count)
1760{
1761 struct vm_struct *tmp;
1762 char *vaddr, *buf_start = buf;
1763 unsigned long buflen = count;
1764 unsigned long n;
1765
1766
1767 if ((unsigned long) addr + count < count)
1768 count = -(unsigned long) addr;
1769
1770 read_lock(&vmlist_lock);
1771 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1772 vaddr = (char *) tmp->addr;
1773 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1774 continue;
1775 while (addr < vaddr) {
1776 if (count == 0)
1777 goto finished;
1778 *buf = '\0';
1779 buf++;
1780 addr++;
1781 count--;
1782 }
1783 n = vaddr + tmp->size - PAGE_SIZE - addr;
1784 if (n > count)
1785 n = count;
1786 if (!(tmp->flags & VM_IOREMAP))
1787 aligned_vread(buf, addr, n);
1788 else
1789 memset(buf, 0, n);
1790 buf += n;
1791 addr += n;
1792 count -= n;
1793 }
1794finished:
1795 read_unlock(&vmlist_lock);
1796
1797 if (buf == buf_start)
1798 return 0;
1799
1800 if (buf != buf_start + buflen)
1801 memset(buf, 0, buflen - (buf - buf_start));
1802
1803 return buflen;
1804}
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836long vwrite(char *buf, char *addr, unsigned long count)
1837{
1838 struct vm_struct *tmp;
1839 char *vaddr;
1840 unsigned long n, buflen;
1841 int copied = 0;
1842
1843
1844 if ((unsigned long) addr + count < count)
1845 count = -(unsigned long) addr;
1846 buflen = count;
1847
1848 read_lock(&vmlist_lock);
1849 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1850 vaddr = (char *) tmp->addr;
1851 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1852 continue;
1853 while (addr < vaddr) {
1854 if (count == 0)
1855 goto finished;
1856 buf++;
1857 addr++;
1858 count--;
1859 }
1860 n = vaddr + tmp->size - PAGE_SIZE - addr;
1861 if (n > count)
1862 n = count;
1863 if (!(tmp->flags & VM_IOREMAP)) {
1864 aligned_vwrite(buf, addr, n);
1865 copied++;
1866 }
1867 buf += n;
1868 addr += n;
1869 count -= n;
1870 }
1871finished:
1872 read_unlock(&vmlist_lock);
1873 if (!copied)
1874 return 0;
1875 return buflen;
1876}
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
1893 unsigned long pgoff)
1894{
1895 struct vm_struct *area;
1896 unsigned long uaddr = vma->vm_start;
1897 unsigned long usize = vma->vm_end - vma->vm_start;
1898
1899 if ((PAGE_SIZE-1) & (unsigned long)addr)
1900 return -EINVAL;
1901
1902 area = find_vm_area(addr);
1903 if (!area)
1904 return -EINVAL;
1905
1906 if (!(area->flags & VM_USERMAP))
1907 return -EINVAL;
1908
1909 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
1910 return -EINVAL;
1911
1912 addr += pgoff << PAGE_SHIFT;
1913 do {
1914 struct page *page = vmalloc_to_page(addr);
1915 int ret;
1916
1917 ret = vm_insert_page(vma, uaddr, page);
1918 if (ret)
1919 return ret;
1920
1921 uaddr += PAGE_SIZE;
1922 addr += PAGE_SIZE;
1923 usize -= PAGE_SIZE;
1924 } while (usize > 0);
1925
1926
1927 vma->vm_flags |= VM_RESERVED;
1928
1929 return 0;
1930}
1931EXPORT_SYMBOL(remap_vmalloc_range);
1932
1933
1934
1935
1936
1937void __attribute__((weak)) vmalloc_sync_all(void)
1938{
1939}
1940
1941
1942static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
1943{
1944
1945 return 0;
1946}
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960struct vm_struct *alloc_vm_area(size_t size)
1961{
1962 struct vm_struct *area;
1963
1964 area = get_vm_area_caller(size, VM_IOREMAP,
1965 __builtin_return_address(0));
1966 if (area == NULL)
1967 return NULL;
1968
1969
1970
1971
1972
1973 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
1974 area->size, f, NULL)) {
1975 free_vm_area(area);
1976 return NULL;
1977 }
1978
1979
1980
1981 vmalloc_sync_all();
1982
1983 return area;
1984}
1985EXPORT_SYMBOL_GPL(alloc_vm_area);
1986
1987void free_vm_area(struct vm_struct *area)
1988{
1989 struct vm_struct *ret;
1990 ret = remove_vm_area(area->addr);
1991 BUG_ON(ret != area);
1992 kfree(area);
1993}
1994EXPORT_SYMBOL_GPL(free_vm_area);
1995
1996static struct vmap_area *node_to_va(struct rb_node *n)
1997{
1998 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
1999}
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013static bool pvm_find_next_prev(unsigned long end,
2014 struct vmap_area **pnext,
2015 struct vmap_area **pprev)
2016{
2017 struct rb_node *n = vmap_area_root.rb_node;
2018 struct vmap_area *va = NULL;
2019
2020 while (n) {
2021 va = rb_entry(n, struct vmap_area, rb_node);
2022 if (end < va->va_end)
2023 n = n->rb_left;
2024 else if (end > va->va_end)
2025 n = n->rb_right;
2026 else
2027 break;
2028 }
2029
2030 if (!va)
2031 return false;
2032
2033 if (va->va_end > end) {
2034 *pnext = va;
2035 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2036 } else {
2037 *pprev = va;
2038 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2039 }
2040 return true;
2041}
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059static unsigned long pvm_determine_end(struct vmap_area **pnext,
2060 struct vmap_area **pprev,
2061 unsigned long align)
2062{
2063 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2064 unsigned long addr;
2065
2066 if (*pnext)
2067 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2068 else
2069 addr = vmalloc_end;
2070
2071 while (*pprev && (*pprev)->va_end > addr) {
2072 *pnext = *pprev;
2073 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2074 }
2075
2076 return addr;
2077}
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2105 const size_t *sizes, int nr_vms,
2106 size_t align, gfp_t gfp_mask)
2107{
2108 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2109 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2110 struct vmap_area **vas, *prev, *next;
2111 struct vm_struct **vms;
2112 int area, area2, last_area, term_area;
2113 unsigned long base, start, end, last_end;
2114 bool purged = false;
2115
2116 gfp_mask &= GFP_RECLAIM_MASK;
2117
2118
2119 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2120 for (last_area = 0, area = 0; area < nr_vms; area++) {
2121 start = offsets[area];
2122 end = start + sizes[area];
2123
2124
2125 BUG_ON(!IS_ALIGNED(offsets[area], align));
2126 BUG_ON(!IS_ALIGNED(sizes[area], align));
2127
2128
2129 if (start > offsets[last_area])
2130 last_area = area;
2131
2132 for (area2 = 0; area2 < nr_vms; area2++) {
2133 unsigned long start2 = offsets[area2];
2134 unsigned long end2 = start2 + sizes[area2];
2135
2136 if (area2 == area)
2137 continue;
2138
2139 BUG_ON(start2 >= start && start2 < end);
2140 BUG_ON(end2 <= end && end2 > start);
2141 }
2142 }
2143 last_end = offsets[last_area] + sizes[last_area];
2144
2145 if (vmalloc_end - vmalloc_start < last_end) {
2146 WARN_ON(true);
2147 return NULL;
2148 }
2149
2150 vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
2151 vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
2152 if (!vas || !vms)
2153 goto err_free;
2154
2155 for (area = 0; area < nr_vms; area++) {
2156 vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
2157 vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
2158 if (!vas[area] || !vms[area])
2159 goto err_free;
2160 }
2161retry:
2162 spin_lock(&vmap_area_lock);
2163
2164
2165 area = term_area = last_area;
2166 start = offsets[area];
2167 end = start + sizes[area];
2168
2169 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2170 base = vmalloc_end - last_end;
2171 goto found;
2172 }
2173 base = pvm_determine_end(&next, &prev, align) - end;
2174
2175 while (true) {
2176 BUG_ON(next && next->va_end <= base + end);
2177 BUG_ON(prev && prev->va_end > base + end);
2178
2179
2180
2181
2182
2183 if (base + last_end < vmalloc_start + last_end) {
2184 spin_unlock(&vmap_area_lock);
2185 if (!purged) {
2186 purge_vmap_area_lazy();
2187 purged = true;
2188 goto retry;
2189 }
2190 goto err_free;
2191 }
2192
2193
2194
2195
2196
2197 if (next && next->va_start < base + end) {
2198 base = pvm_determine_end(&next, &prev, align) - end;
2199 term_area = area;
2200 continue;
2201 }
2202
2203
2204
2205
2206
2207
2208 if (prev && prev->va_end > base + start) {
2209 next = prev;
2210 prev = node_to_va(rb_prev(&next->rb_node));
2211 base = pvm_determine_end(&next, &prev, align) - end;
2212 term_area = area;
2213 continue;
2214 }
2215
2216
2217
2218
2219
2220 area = (area + nr_vms - 1) % nr_vms;
2221 if (area == term_area)
2222 break;
2223 start = offsets[area];
2224 end = start + sizes[area];
2225 pvm_find_next_prev(base + end, &next, &prev);
2226 }
2227found:
2228
2229 for (area = 0; area < nr_vms; area++) {
2230 struct vmap_area *va = vas[area];
2231
2232 va->va_start = base + offsets[area];
2233 va->va_end = va->va_start + sizes[area];
2234 __insert_vmap_area(va);
2235 }
2236
2237 vmap_area_pcpu_hole = base + offsets[last_area];
2238
2239 spin_unlock(&vmap_area_lock);
2240
2241
2242 for (area = 0; area < nr_vms; area++)
2243 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2244 pcpu_get_vm_areas);
2245
2246 kfree(vas);
2247 return vms;
2248
2249err_free:
2250 for (area = 0; area < nr_vms; area++) {
2251 if (vas)
2252 kfree(vas[area]);
2253 if (vms)
2254 kfree(vms[area]);
2255 }
2256 kfree(vas);
2257 kfree(vms);
2258 return NULL;
2259}
2260
2261
2262
2263
2264
2265
2266
2267
2268void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2269{
2270 int i;
2271
2272 for (i = 0; i < nr_vms; i++)
2273 free_vm_area(vms[i]);
2274 kfree(vms);
2275}
2276
2277#ifdef CONFIG_PROC_FS
2278static void *s_start(struct seq_file *m, loff_t *pos)
2279{
2280 loff_t n = *pos;
2281 struct vm_struct *v;
2282
2283 read_lock(&vmlist_lock);
2284 v = vmlist;
2285 while (n > 0 && v) {
2286 n--;
2287 v = v->next;
2288 }
2289 if (!n)
2290 return v;
2291
2292 return NULL;
2293
2294}
2295
2296static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2297{
2298 struct vm_struct *v = p;
2299
2300 ++*pos;
2301 return v->next;
2302}
2303
2304static void s_stop(struct seq_file *m, void *p)
2305{
2306 read_unlock(&vmlist_lock);
2307}
2308
2309static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2310{
2311 if (NUMA_BUILD) {
2312 unsigned int nr, *counters = m->private;
2313
2314 if (!counters)
2315 return;
2316
2317 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2318
2319 for (nr = 0; nr < v->nr_pages; nr++)
2320 counters[page_to_nid(v->pages[nr])]++;
2321
2322 for_each_node_state(nr, N_HIGH_MEMORY)
2323 if (counters[nr])
2324 seq_printf(m, " N%u=%u", nr, counters[nr]);
2325 }
2326}
2327
2328static int s_show(struct seq_file *m, void *p)
2329{
2330 struct vm_struct *v = p;
2331
2332 seq_printf(m, "0x%p-0x%p %7ld",
2333 v->addr, v->addr + v->size, v->size);
2334
2335 if (v->caller) {
2336 char buff[KSYM_SYMBOL_LEN];
2337
2338 seq_putc(m, ' ');
2339 sprint_symbol(buff, (unsigned long)v->caller);
2340 seq_puts(m, buff);
2341 }
2342
2343 if (v->nr_pages)
2344 seq_printf(m, " pages=%d", v->nr_pages);
2345
2346 if (v->phys_addr)
2347 seq_printf(m, " phys=%lx", v->phys_addr);
2348
2349 if (v->flags & VM_IOREMAP)
2350 seq_printf(m, " ioremap");
2351
2352 if (v->flags & VM_ALLOC)
2353 seq_printf(m, " vmalloc");
2354
2355 if (v->flags & VM_MAP)
2356 seq_printf(m, " vmap");
2357
2358 if (v->flags & VM_USERMAP)
2359 seq_printf(m, " user");
2360
2361 if (v->flags & VM_VPAGES)
2362 seq_printf(m, " vpages");
2363
2364 show_numa_info(m, v);
2365 seq_putc(m, '\n');
2366 return 0;
2367}
2368
2369static const struct seq_operations vmalloc_op = {
2370 .start = s_start,
2371 .next = s_next,
2372 .stop = s_stop,
2373 .show = s_show,
2374};
2375
2376static int vmalloc_open(struct inode *inode, struct file *file)
2377{
2378 unsigned int *ptr = NULL;
2379 int ret;
2380
2381 if (NUMA_BUILD)
2382 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2383 ret = seq_open(file, &vmalloc_op);
2384 if (!ret) {
2385 struct seq_file *m = file->private_data;
2386 m->private = ptr;
2387 } else
2388 kfree(ptr);
2389 return ret;
2390}
2391
2392static const struct file_operations proc_vmalloc_operations = {
2393 .open = vmalloc_open,
2394 .read = seq_read,
2395 .llseek = seq_lseek,
2396 .release = seq_release_private,
2397};
2398
2399static int __init proc_vmalloc_init(void)
2400{
2401 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2402 return 0;
2403}
2404module_init(proc_vmalloc_init);
2405#endif
2406
2407