1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <asm/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 void *private;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static struct rb_root vmap_area_root = RB_ROOT;
265static LIST_HEAD(vmap_area_list);
266static unsigned long vmap_area_pcpu_hole;
267
268static struct vmap_area *__find_vmap_area(unsigned long addr)
269{
270 struct rb_node *n = vmap_area_root.rb_node;
271
272 while (n) {
273 struct vmap_area *va;
274
275 va = rb_entry(n, struct vmap_area, rb_node);
276 if (addr < va->va_start)
277 n = n->rb_left;
278 else if (addr > va->va_start)
279 n = n->rb_right;
280 else
281 return va;
282 }
283
284 return NULL;
285}
286
287static void __insert_vmap_area(struct vmap_area *va)
288{
289 struct rb_node **p = &vmap_area_root.rb_node;
290 struct rb_node *parent = NULL;
291 struct rb_node *tmp;
292
293 while (*p) {
294 struct vmap_area *tmp_va;
295
296 parent = *p;
297 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
298 if (va->va_start < tmp_va->va_end)
299 p = &(*p)->rb_left;
300 else if (va->va_end > tmp_va->va_start)
301 p = &(*p)->rb_right;
302 else
303 BUG();
304 }
305
306 rb_link_node(&va->rb_node, parent, p);
307 rb_insert_color(&va->rb_node, &vmap_area_root);
308
309
310 tmp = rb_prev(&va->rb_node);
311 if (tmp) {
312 struct vmap_area *prev;
313 prev = rb_entry(tmp, struct vmap_area, rb_node);
314 list_add_rcu(&va->list, &prev->list);
315 } else
316 list_add_rcu(&va->list, &vmap_area_list);
317}
318
319static void purge_vmap_area_lazy(void);
320
321
322
323
324
325static struct vmap_area *alloc_vmap_area(unsigned long size,
326 unsigned long align,
327 unsigned long vstart, unsigned long vend,
328 int node, gfp_t gfp_mask)
329{
330 struct vmap_area *va;
331 struct rb_node *n;
332 unsigned long addr;
333 int purged = 0;
334
335 BUG_ON(!size);
336 BUG_ON(size & ~PAGE_MASK);
337
338 va = kmalloc_node(sizeof(struct vmap_area),
339 gfp_mask & GFP_RECLAIM_MASK, node);
340 if (unlikely(!va))
341 return ERR_PTR(-ENOMEM);
342
343retry:
344 addr = ALIGN(vstart, align);
345
346 spin_lock(&vmap_area_lock);
347 if (addr + size - 1 < addr)
348 goto overflow;
349
350
351 n = vmap_area_root.rb_node;
352 if (n) {
353 struct vmap_area *first = NULL;
354
355 do {
356 struct vmap_area *tmp;
357 tmp = rb_entry(n, struct vmap_area, rb_node);
358 if (tmp->va_end >= addr) {
359 if (!first && tmp->va_start < addr + size)
360 first = tmp;
361 n = n->rb_left;
362 } else {
363 first = tmp;
364 n = n->rb_right;
365 }
366 } while (n);
367
368 if (!first)
369 goto found;
370
371 if (first->va_end < addr) {
372 n = rb_next(&first->rb_node);
373 if (n)
374 first = rb_entry(n, struct vmap_area, rb_node);
375 else
376 goto found;
377 }
378
379 while (addr + size > first->va_start && addr + size <= vend) {
380 addr = ALIGN(first->va_end + PAGE_SIZE, align);
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 n = rb_next(&first->rb_node);
385 if (n)
386 first = rb_entry(n, struct vmap_area, rb_node);
387 else
388 goto found;
389 }
390 }
391found:
392 if (addr + size > vend) {
393overflow:
394 spin_unlock(&vmap_area_lock);
395 if (!purged) {
396 purge_vmap_area_lazy();
397 purged = 1;
398 goto retry;
399 }
400 if (printk_ratelimit())
401 printk(KERN_WARNING
402 "vmap allocation for size %lu failed: "
403 "use vmalloc=<size> to increase size.\n", size);
404 kfree(va);
405 return ERR_PTR(-EBUSY);
406 }
407
408 BUG_ON(addr & (align-1));
409
410 va->va_start = addr;
411 va->va_end = addr + size;
412 va->flags = 0;
413 __insert_vmap_area(va);
414 spin_unlock(&vmap_area_lock);
415
416 return va;
417}
418
419static void rcu_free_va(struct rcu_head *head)
420{
421 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
422
423 kfree(va);
424}
425
426static void __free_vmap_area(struct vmap_area *va)
427{
428 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
429 rb_erase(&va->rb_node, &vmap_area_root);
430 RB_CLEAR_NODE(&va->rb_node);
431 list_del_rcu(&va->list);
432
433
434
435
436
437
438
439 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
440 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
441
442 call_rcu(&va->rcu_head, rcu_free_va);
443}
444
445
446
447
448static void free_vmap_area(struct vmap_area *va)
449{
450 spin_lock(&vmap_area_lock);
451 __free_vmap_area(va);
452 spin_unlock(&vmap_area_lock);
453}
454
455
456
457
458static void unmap_vmap_area(struct vmap_area *va)
459{
460 vunmap_page_range(va->va_start, va->va_end);
461}
462
463static void vmap_debug_free_range(unsigned long start, unsigned long end)
464{
465
466
467
468
469
470
471
472
473
474
475
476
477
478#ifdef CONFIG_DEBUG_PAGEALLOC
479 vunmap_page_range(start, end);
480 flush_tlb_kernel_range(start, end);
481#endif
482}
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500static unsigned long lazy_max_pages(void)
501{
502 unsigned int log;
503
504 log = fls(num_online_cpus());
505
506 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
507}
508
509static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
510
511
512static void purge_fragmented_blocks_allcpus(void);
513
514
515
516
517
518void set_iounmap_nonlazy(void)
519{
520 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
521}
522
523
524
525
526
527
528
529
530
531
532
533static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
534 int sync, int force_flush)
535{
536 static DEFINE_SPINLOCK(purge_lock);
537 LIST_HEAD(valist);
538 struct vmap_area *va;
539 struct vmap_area *n_va;
540 int nr = 0;
541
542
543
544
545
546
547 if (!sync && !force_flush) {
548 if (!spin_trylock(&purge_lock))
549 return;
550 } else
551 spin_lock(&purge_lock);
552
553 if (sync)
554 purge_fragmented_blocks_allcpus();
555
556 rcu_read_lock();
557 list_for_each_entry_rcu(va, &vmap_area_list, list) {
558 if (va->flags & VM_LAZY_FREE) {
559 if (va->va_start < *start)
560 *start = va->va_start;
561 if (va->va_end > *end)
562 *end = va->va_end;
563 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
564 list_add_tail(&va->purge_list, &valist);
565 va->flags |= VM_LAZY_FREEING;
566 va->flags &= ~VM_LAZY_FREE;
567 }
568 }
569 rcu_read_unlock();
570
571 if (nr)
572 atomic_sub(nr, &vmap_lazy_nr);
573
574 if (nr || force_flush)
575 flush_tlb_kernel_range(*start, *end);
576
577 if (nr) {
578 spin_lock(&vmap_area_lock);
579 list_for_each_entry_safe(va, n_va, &valist, purge_list)
580 __free_vmap_area(va);
581 spin_unlock(&vmap_area_lock);
582 }
583 spin_unlock(&purge_lock);
584}
585
586
587
588
589
590static void try_purge_vmap_area_lazy(void)
591{
592 unsigned long start = ULONG_MAX, end = 0;
593
594 __purge_vmap_area_lazy(&start, &end, 0, 0);
595}
596
597
598
599
600static void purge_vmap_area_lazy(void)
601{
602 unsigned long start = ULONG_MAX, end = 0;
603
604 __purge_vmap_area_lazy(&start, &end, 1, 0);
605}
606
607
608
609
610
611
612static void free_vmap_area_noflush(struct vmap_area *va)
613{
614 va->flags |= VM_LAZY_FREE;
615 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
616 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
617 try_purge_vmap_area_lazy();
618}
619
620
621
622
623
624static void free_unmap_vmap_area_noflush(struct vmap_area *va)
625{
626 unmap_vmap_area(va);
627 free_vmap_area_noflush(va);
628}
629
630
631
632
633static void free_unmap_vmap_area(struct vmap_area *va)
634{
635 flush_cache_vunmap(va->va_start, va->va_end);
636 free_unmap_vmap_area_noflush(va);
637}
638
639static struct vmap_area *find_vmap_area(unsigned long addr)
640{
641 struct vmap_area *va;
642
643 spin_lock(&vmap_area_lock);
644 va = __find_vmap_area(addr);
645 spin_unlock(&vmap_area_lock);
646
647 return va;
648}
649
650static void free_unmap_vmap_area_addr(unsigned long addr)
651{
652 struct vmap_area *va;
653
654 va = find_vmap_area(addr);
655 BUG_ON(!va);
656 free_unmap_vmap_area(va);
657}
658
659
660
661
662
663
664
665
666
667
668
669
670
671#if BITS_PER_LONG == 32
672#define VMALLOC_SPACE (128UL*1024*1024)
673#else
674#define VMALLOC_SPACE (128UL*1024*1024*1024)
675#endif
676
677#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
678#define VMAP_MAX_ALLOC BITS_PER_LONG
679#define VMAP_BBMAP_BITS_MAX 1024
680#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
681#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
682#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
683#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
684 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
685 VMALLOC_PAGES / NR_CPUS / 16))
686
687#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
688
689static bool vmap_initialized __read_mostly = false;
690
691struct vmap_block_queue {
692 spinlock_t lock;
693 struct list_head free;
694};
695
696struct vmap_block {
697 spinlock_t lock;
698 struct vmap_area *va;
699 struct vmap_block_queue *vbq;
700 unsigned long free, dirty;
701 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
702 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
703 struct list_head free_list;
704 struct rcu_head rcu_head;
705 struct list_head purge;
706};
707
708
709static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
710
711
712
713
714
715
716static DEFINE_SPINLOCK(vmap_block_tree_lock);
717static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
718
719
720
721
722
723
724
725
726static unsigned long addr_to_vb_idx(unsigned long addr)
727{
728 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
729 addr /= VMAP_BLOCK_SIZE;
730 return addr;
731}
732
733static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
734{
735 struct vmap_block_queue *vbq;
736 struct vmap_block *vb;
737 struct vmap_area *va;
738 unsigned long vb_idx;
739 int node, err;
740
741 node = numa_node_id();
742
743 vb = kmalloc_node(sizeof(struct vmap_block),
744 gfp_mask & GFP_RECLAIM_MASK, node);
745 if (unlikely(!vb))
746 return ERR_PTR(-ENOMEM);
747
748 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
749 VMALLOC_START, VMALLOC_END,
750 node, gfp_mask);
751 if (IS_ERR(va)) {
752 kfree(vb);
753 return ERR_CAST(va);
754 }
755
756 err = radix_tree_preload(gfp_mask);
757 if (unlikely(err)) {
758 kfree(vb);
759 free_vmap_area(va);
760 return ERR_PTR(err);
761 }
762
763 spin_lock_init(&vb->lock);
764 vb->va = va;
765 vb->free = VMAP_BBMAP_BITS;
766 vb->dirty = 0;
767 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
768 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
769 INIT_LIST_HEAD(&vb->free_list);
770
771 vb_idx = addr_to_vb_idx(va->va_start);
772 spin_lock(&vmap_block_tree_lock);
773 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
774 spin_unlock(&vmap_block_tree_lock);
775 BUG_ON(err);
776 radix_tree_preload_end();
777
778 vbq = &get_cpu_var(vmap_block_queue);
779 vb->vbq = vbq;
780 spin_lock(&vbq->lock);
781 list_add_rcu(&vb->free_list, &vbq->free);
782 spin_unlock(&vbq->lock);
783 put_cpu_var(vmap_block_queue);
784
785 return vb;
786}
787
788static void rcu_free_vb(struct rcu_head *head)
789{
790 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
791
792 kfree(vb);
793}
794
795static void free_vmap_block(struct vmap_block *vb)
796{
797 struct vmap_block *tmp;
798 unsigned long vb_idx;
799
800 vb_idx = addr_to_vb_idx(vb->va->va_start);
801 spin_lock(&vmap_block_tree_lock);
802 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
803 spin_unlock(&vmap_block_tree_lock);
804 BUG_ON(tmp != vb);
805
806 free_vmap_area_noflush(vb->va);
807 call_rcu(&vb->rcu_head, rcu_free_vb);
808}
809
810static void purge_fragmented_blocks(int cpu)
811{
812 LIST_HEAD(purge);
813 struct vmap_block *vb;
814 struct vmap_block *n_vb;
815 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
816
817 rcu_read_lock();
818 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
819
820 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
821 continue;
822
823 spin_lock(&vb->lock);
824 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
825 vb->free = 0;
826 vb->dirty = VMAP_BBMAP_BITS;
827 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
828 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
829 spin_lock(&vbq->lock);
830 list_del_rcu(&vb->free_list);
831 spin_unlock(&vbq->lock);
832 spin_unlock(&vb->lock);
833 list_add_tail(&vb->purge, &purge);
834 } else
835 spin_unlock(&vb->lock);
836 }
837 rcu_read_unlock();
838
839 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
840 list_del(&vb->purge);
841 free_vmap_block(vb);
842 }
843}
844
845static void purge_fragmented_blocks_thiscpu(void)
846{
847 purge_fragmented_blocks(smp_processor_id());
848}
849
850static void purge_fragmented_blocks_allcpus(void)
851{
852 int cpu;
853
854 for_each_possible_cpu(cpu)
855 purge_fragmented_blocks(cpu);
856}
857
858static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
859{
860 struct vmap_block_queue *vbq;
861 struct vmap_block *vb;
862 unsigned long addr = 0;
863 unsigned int order;
864 int purge = 0;
865
866 BUG_ON(size & ~PAGE_MASK);
867 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
868 order = get_order(size);
869
870again:
871 rcu_read_lock();
872 vbq = &get_cpu_var(vmap_block_queue);
873 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
874 int i;
875
876 spin_lock(&vb->lock);
877 if (vb->free < 1UL << order)
878 goto next;
879
880 i = bitmap_find_free_region(vb->alloc_map,
881 VMAP_BBMAP_BITS, order);
882
883 if (i < 0) {
884 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
885
886 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
887 purge = 1;
888 }
889 goto next;
890 }
891 addr = vb->va->va_start + (i << PAGE_SHIFT);
892 BUG_ON(addr_to_vb_idx(addr) !=
893 addr_to_vb_idx(vb->va->va_start));
894 vb->free -= 1UL << order;
895 if (vb->free == 0) {
896 spin_lock(&vbq->lock);
897 list_del_rcu(&vb->free_list);
898 spin_unlock(&vbq->lock);
899 }
900 spin_unlock(&vb->lock);
901 break;
902next:
903 spin_unlock(&vb->lock);
904 }
905
906 if (purge)
907 purge_fragmented_blocks_thiscpu();
908
909 put_cpu_var(vmap_block_queue);
910 rcu_read_unlock();
911
912 if (!addr) {
913 vb = new_vmap_block(gfp_mask);
914 if (IS_ERR(vb))
915 return vb;
916 goto again;
917 }
918
919 return (void *)addr;
920}
921
922static void vb_free(const void *addr, unsigned long size)
923{
924 unsigned long offset;
925 unsigned long vb_idx;
926 unsigned int order;
927 struct vmap_block *vb;
928
929 BUG_ON(size & ~PAGE_MASK);
930 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
931
932 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
933
934 order = get_order(size);
935
936 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
937
938 vb_idx = addr_to_vb_idx((unsigned long)addr);
939 rcu_read_lock();
940 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
941 rcu_read_unlock();
942 BUG_ON(!vb);
943
944 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
945
946 spin_lock(&vb->lock);
947 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
948
949 vb->dirty += 1UL << order;
950 if (vb->dirty == VMAP_BBMAP_BITS) {
951 BUG_ON(vb->free);
952 spin_unlock(&vb->lock);
953 free_vmap_block(vb);
954 } else
955 spin_unlock(&vb->lock);
956}
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971void vm_unmap_aliases(void)
972{
973 unsigned long start = ULONG_MAX, end = 0;
974 int cpu;
975 int flush = 0;
976
977 if (unlikely(!vmap_initialized))
978 return;
979
980 for_each_possible_cpu(cpu) {
981 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
982 struct vmap_block *vb;
983
984 rcu_read_lock();
985 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
986 int i;
987
988 spin_lock(&vb->lock);
989 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
990 while (i < VMAP_BBMAP_BITS) {
991 unsigned long s, e;
992 int j;
993 j = find_next_zero_bit(vb->dirty_map,
994 VMAP_BBMAP_BITS, i);
995
996 s = vb->va->va_start + (i << PAGE_SHIFT);
997 e = vb->va->va_start + (j << PAGE_SHIFT);
998 flush = 1;
999
1000 if (s < start)
1001 start = s;
1002 if (e > end)
1003 end = e;
1004
1005 i = j;
1006 i = find_next_bit(vb->dirty_map,
1007 VMAP_BBMAP_BITS, i);
1008 }
1009 spin_unlock(&vb->lock);
1010 }
1011 rcu_read_unlock();
1012 }
1013
1014 __purge_vmap_area_lazy(&start, &end, 1, flush);
1015}
1016EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1017
1018
1019
1020
1021
1022
1023void vm_unmap_ram(const void *mem, unsigned int count)
1024{
1025 unsigned long size = count << PAGE_SHIFT;
1026 unsigned long addr = (unsigned long)mem;
1027
1028 BUG_ON(!addr);
1029 BUG_ON(addr < VMALLOC_START);
1030 BUG_ON(addr > VMALLOC_END);
1031 BUG_ON(addr & (PAGE_SIZE-1));
1032
1033 debug_check_no_locks_freed(mem, size);
1034 vmap_debug_free_range(addr, addr+size);
1035
1036 if (likely(count <= VMAP_MAX_ALLOC))
1037 vb_free(mem, size);
1038 else
1039 free_unmap_vmap_area_addr(addr);
1040}
1041EXPORT_SYMBOL(vm_unmap_ram);
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1053{
1054 unsigned long size = count << PAGE_SHIFT;
1055 unsigned long addr;
1056 void *mem;
1057
1058 if (likely(count <= VMAP_MAX_ALLOC)) {
1059 mem = vb_alloc(size, GFP_KERNEL);
1060 if (IS_ERR(mem))
1061 return NULL;
1062 addr = (unsigned long)mem;
1063 } else {
1064 struct vmap_area *va;
1065 va = alloc_vmap_area(size, PAGE_SIZE,
1066 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1067 if (IS_ERR(va))
1068 return NULL;
1069
1070 addr = va->va_start;
1071 mem = (void *)addr;
1072 }
1073 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1074 vm_unmap_ram(mem, count);
1075 return NULL;
1076 }
1077 return mem;
1078}
1079EXPORT_SYMBOL(vm_map_ram);
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1094{
1095 static size_t vm_init_off __initdata;
1096 unsigned long addr;
1097
1098 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1099 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1100
1101 vm->addr = (void *)addr;
1102
1103 vm->next = vmlist;
1104 vmlist = vm;
1105}
1106
1107void __init vmalloc_init(void)
1108{
1109 struct vmap_area *va;
1110 struct vm_struct *tmp;
1111 int i;
1112
1113 for_each_possible_cpu(i) {
1114 struct vmap_block_queue *vbq;
1115
1116 vbq = &per_cpu(vmap_block_queue, i);
1117 spin_lock_init(&vbq->lock);
1118 INIT_LIST_HEAD(&vbq->free);
1119 }
1120
1121
1122 for (tmp = vmlist; tmp; tmp = tmp->next) {
1123 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1124 va->flags = tmp->flags | VM_VM_AREA;
1125 va->va_start = (unsigned long)tmp->addr;
1126 va->va_end = va->va_start + tmp->size;
1127 __insert_vmap_area(va);
1128 }
1129
1130 vmap_area_pcpu_hole = VMALLOC_END;
1131
1132 vmap_initialized = true;
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1155 pgprot_t prot, struct page **pages)
1156{
1157 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1158}
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1175{
1176 vunmap_page_range(addr, addr + size);
1177}
1178EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188void unmap_kernel_range(unsigned long addr, unsigned long size)
1189{
1190 unsigned long end = addr + size;
1191
1192 flush_cache_vunmap(addr, end);
1193 vunmap_page_range(addr, end);
1194 flush_tlb_kernel_range(addr, end);
1195}
1196
1197int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1198{
1199 unsigned long addr = (unsigned long)area->addr;
1200 unsigned long end = addr + area->size - PAGE_SIZE;
1201 int err;
1202
1203 err = vmap_page_range(addr, end, prot, *pages);
1204 if (err > 0) {
1205 *pages += err;
1206 err = 0;
1207 }
1208
1209 return err;
1210}
1211EXPORT_SYMBOL_GPL(map_vm_area);
1212
1213
1214DEFINE_RWLOCK(vmlist_lock);
1215struct vm_struct *vmlist;
1216
1217static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1218 unsigned long flags, void *caller)
1219{
1220 struct vm_struct *tmp, **p;
1221
1222 vm->flags = flags;
1223 vm->addr = (void *)va->va_start;
1224 vm->size = va->va_end - va->va_start;
1225 vm->caller = caller;
1226 va->private = vm;
1227 va->flags |= VM_VM_AREA;
1228
1229 write_lock(&vmlist_lock);
1230 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1231 if (tmp->addr >= vm->addr)
1232 break;
1233 }
1234 vm->next = *p;
1235 *p = vm;
1236 write_unlock(&vmlist_lock);
1237}
1238
1239static struct vm_struct *__get_vm_area_node(unsigned long size,
1240 unsigned long align, unsigned long flags, unsigned long start,
1241 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1242{
1243 static struct vmap_area *va;
1244 struct vm_struct *area;
1245
1246 BUG_ON(in_interrupt());
1247 if (flags & VM_IOREMAP) {
1248 int bit = fls(size);
1249
1250 if (bit > IOREMAP_MAX_ORDER)
1251 bit = IOREMAP_MAX_ORDER;
1252 else if (bit < PAGE_SHIFT)
1253 bit = PAGE_SHIFT;
1254
1255 align = 1ul << bit;
1256 }
1257
1258 size = PAGE_ALIGN(size);
1259 if (unlikely(!size))
1260 return NULL;
1261
1262 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1263 if (unlikely(!area))
1264 return NULL;
1265
1266
1267
1268
1269 size += PAGE_SIZE;
1270
1271 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1272 if (IS_ERR(va)) {
1273 kfree(area);
1274 return NULL;
1275 }
1276
1277 insert_vmalloc_vm(area, va, flags, caller);
1278 return area;
1279}
1280
1281struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1282 unsigned long start, unsigned long end)
1283{
1284 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1285 __builtin_return_address(0));
1286}
1287EXPORT_SYMBOL_GPL(__get_vm_area);
1288
1289struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1290 unsigned long start, unsigned long end,
1291 void *caller)
1292{
1293 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1294 caller);
1295}
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1307{
1308 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1309 -1, GFP_KERNEL, __builtin_return_address(0));
1310}
1311
1312struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1313 void *caller)
1314{
1315 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1316 -1, GFP_KERNEL, caller);
1317}
1318
1319static struct vm_struct *find_vm_area(const void *addr)
1320{
1321 struct vmap_area *va;
1322
1323 va = find_vmap_area((unsigned long)addr);
1324 if (va && va->flags & VM_VM_AREA)
1325 return va->private;
1326
1327 return NULL;
1328}
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338struct vm_struct *remove_vm_area(const void *addr)
1339{
1340 struct vmap_area *va;
1341
1342 va = find_vmap_area((unsigned long)addr);
1343 if (va && va->flags & VM_VM_AREA) {
1344 struct vm_struct *vm = va->private;
1345 struct vm_struct *tmp, **p;
1346
1347
1348
1349
1350
1351 write_lock(&vmlist_lock);
1352 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1353 ;
1354 *p = tmp->next;
1355 write_unlock(&vmlist_lock);
1356
1357 vmap_debug_free_range(va->va_start, va->va_end);
1358 free_unmap_vmap_area(va);
1359 vm->size -= PAGE_SIZE;
1360
1361 return vm;
1362 }
1363 return NULL;
1364}
1365
1366static void __vunmap(const void *addr, int deallocate_pages)
1367{
1368 struct vm_struct *area;
1369
1370 if (!addr)
1371 return;
1372
1373 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1374 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1375 return;
1376 }
1377
1378 area = remove_vm_area(addr);
1379 if (unlikely(!area)) {
1380 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1381 addr);
1382 return;
1383 }
1384
1385 debug_check_no_locks_freed(addr, area->size);
1386 debug_check_no_obj_freed(addr, area->size);
1387
1388 if (deallocate_pages) {
1389 int i;
1390
1391 for (i = 0; i < area->nr_pages; i++) {
1392 struct page *page = area->pages[i];
1393
1394 BUG_ON(!page);
1395 __free_page(page);
1396 }
1397
1398 if (area->flags & VM_VPAGES)
1399 vfree(area->pages);
1400 else
1401 kfree(area->pages);
1402 }
1403
1404 kfree(area);
1405 return;
1406}
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418void vfree(const void *addr)
1419{
1420 BUG_ON(in_interrupt());
1421
1422 kmemleak_free(addr);
1423
1424 __vunmap(addr, 1);
1425}
1426EXPORT_SYMBOL(vfree);
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437void vunmap(const void *addr)
1438{
1439 BUG_ON(in_interrupt());
1440 might_sleep();
1441 __vunmap(addr, 0);
1442}
1443EXPORT_SYMBOL(vunmap);
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455void *vmap(struct page **pages, unsigned int count,
1456 unsigned long flags, pgprot_t prot)
1457{
1458 struct vm_struct *area;
1459
1460 might_sleep();
1461
1462 if (count > totalram_pages)
1463 return NULL;
1464
1465 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1466 __builtin_return_address(0));
1467 if (!area)
1468 return NULL;
1469
1470 if (map_vm_area(area, prot, &pages)) {
1471 vunmap(area->addr);
1472 return NULL;
1473 }
1474
1475 return area->addr;
1476}
1477EXPORT_SYMBOL(vmap);
1478
1479static void *__vmalloc_node(unsigned long size, unsigned long align,
1480 gfp_t gfp_mask, pgprot_t prot,
1481 int node, void *caller);
1482static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1483 pgprot_t prot, int node, void *caller)
1484{
1485 struct page **pages;
1486 unsigned int nr_pages, array_size, i;
1487 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1488
1489 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1490 array_size = (nr_pages * sizeof(struct page *));
1491
1492 area->nr_pages = nr_pages;
1493
1494 if (array_size > PAGE_SIZE) {
1495 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1496 PAGE_KERNEL, node, caller);
1497 area->flags |= VM_VPAGES;
1498 } else {
1499 pages = kmalloc_node(array_size, nested_gfp, node);
1500 }
1501 area->pages = pages;
1502 area->caller = caller;
1503 if (!area->pages) {
1504 remove_vm_area(area->addr);
1505 kfree(area);
1506 return NULL;
1507 }
1508
1509 for (i = 0; i < area->nr_pages; i++) {
1510 struct page *page;
1511
1512 if (node < 0)
1513 page = alloc_page(gfp_mask);
1514 else
1515 page = alloc_pages_node(node, gfp_mask, 0);
1516
1517 if (unlikely(!page)) {
1518
1519 area->nr_pages = i;
1520 goto fail;
1521 }
1522 area->pages[i] = page;
1523 }
1524
1525 if (map_vm_area(area, prot, &pages))
1526 goto fail;
1527 return area->addr;
1528
1529fail:
1530 vfree(area->addr);
1531 return NULL;
1532}
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549void *__vmalloc_node_range(unsigned long size, unsigned long align,
1550 unsigned long start, unsigned long end, gfp_t gfp_mask,
1551 pgprot_t prot, int node, void *caller)
1552{
1553 struct vm_struct *area;
1554 void *addr;
1555 unsigned long real_size = size;
1556
1557 size = PAGE_ALIGN(size);
1558 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1559 return NULL;
1560
1561 area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node,
1562 gfp_mask, caller);
1563
1564 if (!area)
1565 return NULL;
1566
1567 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1568
1569
1570
1571
1572
1573
1574 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1575
1576 return addr;
1577}
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592static void *__vmalloc_node(unsigned long size, unsigned long align,
1593 gfp_t gfp_mask, pgprot_t prot,
1594 int node, void *caller)
1595{
1596 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1597 gfp_mask, prot, node, caller);
1598}
1599
1600void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1601{
1602 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1603 __builtin_return_address(0));
1604}
1605EXPORT_SYMBOL(__vmalloc);
1606
1607static inline void *__vmalloc_node_flags(unsigned long size,
1608 int node, gfp_t flags)
1609{
1610 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1611 node, __builtin_return_address(0));
1612}
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623void *vmalloc(unsigned long size)
1624{
1625 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1626}
1627EXPORT_SYMBOL(vmalloc);
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639void *vzalloc(unsigned long size)
1640{
1641 return __vmalloc_node_flags(size, -1,
1642 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1643}
1644EXPORT_SYMBOL(vzalloc);
1645
1646
1647
1648
1649
1650
1651
1652
1653void *vmalloc_user(unsigned long size)
1654{
1655 struct vm_struct *area;
1656 void *ret;
1657
1658 ret = __vmalloc_node(size, SHMLBA,
1659 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1660 PAGE_KERNEL, -1, __builtin_return_address(0));
1661 if (ret) {
1662 area = find_vm_area(ret);
1663 area->flags |= VM_USERMAP;
1664 }
1665 return ret;
1666}
1667EXPORT_SYMBOL(vmalloc_user);
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680void *vmalloc_node(unsigned long size, int node)
1681{
1682 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1683 node, __builtin_return_address(0));
1684}
1685EXPORT_SYMBOL(vmalloc_node);
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699void *vzalloc_node(unsigned long size, int node)
1700{
1701 return __vmalloc_node_flags(size, node,
1702 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1703}
1704EXPORT_SYMBOL(vzalloc_node);
1705
1706#ifndef PAGE_KERNEL_EXEC
1707# define PAGE_KERNEL_EXEC PAGE_KERNEL
1708#endif
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722void *vmalloc_exec(unsigned long size)
1723{
1724 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1725 -1, __builtin_return_address(0));
1726}
1727
1728#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1729#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1730#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1731#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1732#else
1733#define GFP_VMALLOC32 GFP_KERNEL
1734#endif
1735
1736
1737
1738
1739
1740
1741
1742
1743void *vmalloc_32(unsigned long size)
1744{
1745 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1746 -1, __builtin_return_address(0));
1747}
1748EXPORT_SYMBOL(vmalloc_32);
1749
1750
1751
1752
1753
1754
1755
1756
1757void *vmalloc_32_user(unsigned long size)
1758{
1759 struct vm_struct *area;
1760 void *ret;
1761
1762 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1763 -1, __builtin_return_address(0));
1764 if (ret) {
1765 area = find_vm_area(ret);
1766 area->flags |= VM_USERMAP;
1767 }
1768 return ret;
1769}
1770EXPORT_SYMBOL(vmalloc_32_user);
1771
1772
1773
1774
1775
1776
1777static int aligned_vread(char *buf, char *addr, unsigned long count)
1778{
1779 struct page *p;
1780 int copied = 0;
1781
1782 while (count) {
1783 unsigned long offset, length;
1784
1785 offset = (unsigned long)addr & ~PAGE_MASK;
1786 length = PAGE_SIZE - offset;
1787 if (length > count)
1788 length = count;
1789 p = vmalloc_to_page(addr);
1790
1791
1792
1793
1794
1795
1796
1797 if (p) {
1798
1799
1800
1801
1802 void *map = kmap_atomic(p, KM_USER0);
1803 memcpy(buf, map + offset, length);
1804 kunmap_atomic(map, KM_USER0);
1805 } else
1806 memset(buf, 0, length);
1807
1808 addr += length;
1809 buf += length;
1810 copied += length;
1811 count -= length;
1812 }
1813 return copied;
1814}
1815
1816static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1817{
1818 struct page *p;
1819 int copied = 0;
1820
1821 while (count) {
1822 unsigned long offset, length;
1823
1824 offset = (unsigned long)addr & ~PAGE_MASK;
1825 length = PAGE_SIZE - offset;
1826 if (length > count)
1827 length = count;
1828 p = vmalloc_to_page(addr);
1829
1830
1831
1832
1833
1834
1835
1836 if (p) {
1837
1838
1839
1840
1841 void *map = kmap_atomic(p, KM_USER0);
1842 memcpy(map + offset, buf, length);
1843 kunmap_atomic(map, KM_USER0);
1844 }
1845 addr += length;
1846 buf += length;
1847 copied += length;
1848 count -= length;
1849 }
1850 return copied;
1851}
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881long vread(char *buf, char *addr, unsigned long count)
1882{
1883 struct vm_struct *tmp;
1884 char *vaddr, *buf_start = buf;
1885 unsigned long buflen = count;
1886 unsigned long n;
1887
1888
1889 if ((unsigned long) addr + count < count)
1890 count = -(unsigned long) addr;
1891
1892 read_lock(&vmlist_lock);
1893 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1894 vaddr = (char *) tmp->addr;
1895 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1896 continue;
1897 while (addr < vaddr) {
1898 if (count == 0)
1899 goto finished;
1900 *buf = '\0';
1901 buf++;
1902 addr++;
1903 count--;
1904 }
1905 n = vaddr + tmp->size - PAGE_SIZE - addr;
1906 if (n > count)
1907 n = count;
1908 if (!(tmp->flags & VM_IOREMAP))
1909 aligned_vread(buf, addr, n);
1910 else
1911 memset(buf, 0, n);
1912 buf += n;
1913 addr += n;
1914 count -= n;
1915 }
1916finished:
1917 read_unlock(&vmlist_lock);
1918
1919 if (buf == buf_start)
1920 return 0;
1921
1922 if (buf != buf_start + buflen)
1923 memset(buf, 0, buflen - (buf - buf_start));
1924
1925 return buflen;
1926}
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958long vwrite(char *buf, char *addr, unsigned long count)
1959{
1960 struct vm_struct *tmp;
1961 char *vaddr;
1962 unsigned long n, buflen;
1963 int copied = 0;
1964
1965
1966 if ((unsigned long) addr + count < count)
1967 count = -(unsigned long) addr;
1968 buflen = count;
1969
1970 read_lock(&vmlist_lock);
1971 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1972 vaddr = (char *) tmp->addr;
1973 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1974 continue;
1975 while (addr < vaddr) {
1976 if (count == 0)
1977 goto finished;
1978 buf++;
1979 addr++;
1980 count--;
1981 }
1982 n = vaddr + tmp->size - PAGE_SIZE - addr;
1983 if (n > count)
1984 n = count;
1985 if (!(tmp->flags & VM_IOREMAP)) {
1986 aligned_vwrite(buf, addr, n);
1987 copied++;
1988 }
1989 buf += n;
1990 addr += n;
1991 count -= n;
1992 }
1993finished:
1994 read_unlock(&vmlist_lock);
1995 if (!copied)
1996 return 0;
1997 return buflen;
1998}
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2015 unsigned long pgoff)
2016{
2017 struct vm_struct *area;
2018 unsigned long uaddr = vma->vm_start;
2019 unsigned long usize = vma->vm_end - vma->vm_start;
2020
2021 if ((PAGE_SIZE-1) & (unsigned long)addr)
2022 return -EINVAL;
2023
2024 area = find_vm_area(addr);
2025 if (!area)
2026 return -EINVAL;
2027
2028 if (!(area->flags & VM_USERMAP))
2029 return -EINVAL;
2030
2031 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2032 return -EINVAL;
2033
2034 addr += pgoff << PAGE_SHIFT;
2035 do {
2036 struct page *page = vmalloc_to_page(addr);
2037 int ret;
2038
2039 ret = vm_insert_page(vma, uaddr, page);
2040 if (ret)
2041 return ret;
2042
2043 uaddr += PAGE_SIZE;
2044 addr += PAGE_SIZE;
2045 usize -= PAGE_SIZE;
2046 } while (usize > 0);
2047
2048
2049 vma->vm_flags |= VM_RESERVED;
2050
2051 return 0;
2052}
2053EXPORT_SYMBOL(remap_vmalloc_range);
2054
2055
2056
2057
2058
2059void __attribute__((weak)) vmalloc_sync_all(void)
2060{
2061}
2062
2063
2064static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2065{
2066
2067 return 0;
2068}
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082struct vm_struct *alloc_vm_area(size_t size)
2083{
2084 struct vm_struct *area;
2085
2086 area = get_vm_area_caller(size, VM_IOREMAP,
2087 __builtin_return_address(0));
2088 if (area == NULL)
2089 return NULL;
2090
2091
2092
2093
2094
2095 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2096 area->size, f, NULL)) {
2097 free_vm_area(area);
2098 return NULL;
2099 }
2100
2101
2102
2103 vmalloc_sync_all();
2104
2105 return area;
2106}
2107EXPORT_SYMBOL_GPL(alloc_vm_area);
2108
2109void free_vm_area(struct vm_struct *area)
2110{
2111 struct vm_struct *ret;
2112 ret = remove_vm_area(area->addr);
2113 BUG_ON(ret != area);
2114 kfree(area);
2115}
2116EXPORT_SYMBOL_GPL(free_vm_area);
2117
2118#ifdef CONFIG_SMP
2119static struct vmap_area *node_to_va(struct rb_node *n)
2120{
2121 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2122}
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136static bool pvm_find_next_prev(unsigned long end,
2137 struct vmap_area **pnext,
2138 struct vmap_area **pprev)
2139{
2140 struct rb_node *n = vmap_area_root.rb_node;
2141 struct vmap_area *va = NULL;
2142
2143 while (n) {
2144 va = rb_entry(n, struct vmap_area, rb_node);
2145 if (end < va->va_end)
2146 n = n->rb_left;
2147 else if (end > va->va_end)
2148 n = n->rb_right;
2149 else
2150 break;
2151 }
2152
2153 if (!va)
2154 return false;
2155
2156 if (va->va_end > end) {
2157 *pnext = va;
2158 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2159 } else {
2160 *pprev = va;
2161 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2162 }
2163 return true;
2164}
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182static unsigned long pvm_determine_end(struct vmap_area **pnext,
2183 struct vmap_area **pprev,
2184 unsigned long align)
2185{
2186 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2187 unsigned long addr;
2188
2189 if (*pnext)
2190 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2191 else
2192 addr = vmalloc_end;
2193
2194 while (*pprev && (*pprev)->va_end > addr) {
2195 *pnext = *pprev;
2196 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2197 }
2198
2199 return addr;
2200}
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2227 const size_t *sizes, int nr_vms,
2228 size_t align)
2229{
2230 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2231 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2232 struct vmap_area **vas, *prev, *next;
2233 struct vm_struct **vms;
2234 int area, area2, last_area, term_area;
2235 unsigned long base, start, end, last_end;
2236 bool purged = false;
2237
2238
2239 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2240 for (last_area = 0, area = 0; area < nr_vms; area++) {
2241 start = offsets[area];
2242 end = start + sizes[area];
2243
2244
2245 BUG_ON(!IS_ALIGNED(offsets[area], align));
2246 BUG_ON(!IS_ALIGNED(sizes[area], align));
2247
2248
2249 if (start > offsets[last_area])
2250 last_area = area;
2251
2252 for (area2 = 0; area2 < nr_vms; area2++) {
2253 unsigned long start2 = offsets[area2];
2254 unsigned long end2 = start2 + sizes[area2];
2255
2256 if (area2 == area)
2257 continue;
2258
2259 BUG_ON(start2 >= start && start2 < end);
2260 BUG_ON(end2 <= end && end2 > start);
2261 }
2262 }
2263 last_end = offsets[last_area] + sizes[last_area];
2264
2265 if (vmalloc_end - vmalloc_start < last_end) {
2266 WARN_ON(true);
2267 return NULL;
2268 }
2269
2270 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
2271 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
2272 if (!vas || !vms)
2273 goto err_free;
2274
2275 for (area = 0; area < nr_vms; area++) {
2276 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2277 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2278 if (!vas[area] || !vms[area])
2279 goto err_free;
2280 }
2281retry:
2282 spin_lock(&vmap_area_lock);
2283
2284
2285 area = term_area = last_area;
2286 start = offsets[area];
2287 end = start + sizes[area];
2288
2289 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2290 base = vmalloc_end - last_end;
2291 goto found;
2292 }
2293 base = pvm_determine_end(&next, &prev, align) - end;
2294
2295 while (true) {
2296 BUG_ON(next && next->va_end <= base + end);
2297 BUG_ON(prev && prev->va_end > base + end);
2298
2299
2300
2301
2302
2303 if (base + last_end < vmalloc_start + last_end) {
2304 spin_unlock(&vmap_area_lock);
2305 if (!purged) {
2306 purge_vmap_area_lazy();
2307 purged = true;
2308 goto retry;
2309 }
2310 goto err_free;
2311 }
2312
2313
2314
2315
2316
2317 if (next && next->va_start < base + end) {
2318 base = pvm_determine_end(&next, &prev, align) - end;
2319 term_area = area;
2320 continue;
2321 }
2322
2323
2324
2325
2326
2327
2328 if (prev && prev->va_end > base + start) {
2329 next = prev;
2330 prev = node_to_va(rb_prev(&next->rb_node));
2331 base = pvm_determine_end(&next, &prev, align) - end;
2332 term_area = area;
2333 continue;
2334 }
2335
2336
2337
2338
2339
2340 area = (area + nr_vms - 1) % nr_vms;
2341 if (area == term_area)
2342 break;
2343 start = offsets[area];
2344 end = start + sizes[area];
2345 pvm_find_next_prev(base + end, &next, &prev);
2346 }
2347found:
2348
2349 for (area = 0; area < nr_vms; area++) {
2350 struct vmap_area *va = vas[area];
2351
2352 va->va_start = base + offsets[area];
2353 va->va_end = va->va_start + sizes[area];
2354 __insert_vmap_area(va);
2355 }
2356
2357 vmap_area_pcpu_hole = base + offsets[last_area];
2358
2359 spin_unlock(&vmap_area_lock);
2360
2361
2362 for (area = 0; area < nr_vms; area++)
2363 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2364 pcpu_get_vm_areas);
2365
2366 kfree(vas);
2367 return vms;
2368
2369err_free:
2370 for (area = 0; area < nr_vms; area++) {
2371 if (vas)
2372 kfree(vas[area]);
2373 if (vms)
2374 kfree(vms[area]);
2375 }
2376 kfree(vas);
2377 kfree(vms);
2378 return NULL;
2379}
2380
2381
2382
2383
2384
2385
2386
2387
2388void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2389{
2390 int i;
2391
2392 for (i = 0; i < nr_vms; i++)
2393 free_vm_area(vms[i]);
2394 kfree(vms);
2395}
2396#endif
2397
2398#ifdef CONFIG_PROC_FS
2399static void *s_start(struct seq_file *m, loff_t *pos)
2400 __acquires(&vmlist_lock)
2401{
2402 loff_t n = *pos;
2403 struct vm_struct *v;
2404
2405 read_lock(&vmlist_lock);
2406 v = vmlist;
2407 while (n > 0 && v) {
2408 n--;
2409 v = v->next;
2410 }
2411 if (!n)
2412 return v;
2413
2414 return NULL;
2415
2416}
2417
2418static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2419{
2420 struct vm_struct *v = p;
2421
2422 ++*pos;
2423 return v->next;
2424}
2425
2426static void s_stop(struct seq_file *m, void *p)
2427 __releases(&vmlist_lock)
2428{
2429 read_unlock(&vmlist_lock);
2430}
2431
2432static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2433{
2434 if (NUMA_BUILD) {
2435 unsigned int nr, *counters = m->private;
2436
2437 if (!counters)
2438 return;
2439
2440 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2441
2442 for (nr = 0; nr < v->nr_pages; nr++)
2443 counters[page_to_nid(v->pages[nr])]++;
2444
2445 for_each_node_state(nr, N_HIGH_MEMORY)
2446 if (counters[nr])
2447 seq_printf(m, " N%u=%u", nr, counters[nr]);
2448 }
2449}
2450
2451static int s_show(struct seq_file *m, void *p)
2452{
2453 struct vm_struct *v = p;
2454
2455 seq_printf(m, "0x%p-0x%p %7ld",
2456 v->addr, v->addr + v->size, v->size);
2457
2458 if (v->caller)
2459 seq_printf(m, " %pS", v->caller);
2460
2461 if (v->nr_pages)
2462 seq_printf(m, " pages=%d", v->nr_pages);
2463
2464 if (v->phys_addr)
2465 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2466
2467 if (v->flags & VM_IOREMAP)
2468 seq_printf(m, " ioremap");
2469
2470 if (v->flags & VM_ALLOC)
2471 seq_printf(m, " vmalloc");
2472
2473 if (v->flags & VM_MAP)
2474 seq_printf(m, " vmap");
2475
2476 if (v->flags & VM_USERMAP)
2477 seq_printf(m, " user");
2478
2479 if (v->flags & VM_VPAGES)
2480 seq_printf(m, " vpages");
2481
2482 show_numa_info(m, v);
2483 seq_putc(m, '\n');
2484 return 0;
2485}
2486
2487static const struct seq_operations vmalloc_op = {
2488 .start = s_start,
2489 .next = s_next,
2490 .stop = s_stop,
2491 .show = s_show,
2492};
2493
2494static int vmalloc_open(struct inode *inode, struct file *file)
2495{
2496 unsigned int *ptr = NULL;
2497 int ret;
2498
2499 if (NUMA_BUILD) {
2500 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2501 if (ptr == NULL)
2502 return -ENOMEM;
2503 }
2504 ret = seq_open(file, &vmalloc_op);
2505 if (!ret) {
2506 struct seq_file *m = file->private_data;
2507 m->private = ptr;
2508 } else
2509 kfree(ptr);
2510 return ret;
2511}
2512
2513static const struct file_operations proc_vmalloc_operations = {
2514 .open = vmalloc_open,
2515 .read = seq_read,
2516 .llseek = seq_lseek,
2517 .release = seq_release_private,
2518};
2519
2520static int __init proc_vmalloc_init(void)
2521{
2522 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2523 return 0;
2524}
2525module_init(proc_vmalloc_init);
2526#endif
2527
2528