1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <asm/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 void *private;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
273static unsigned long vmap_area_pcpu_hole;
274
275static struct vmap_area *__find_vmap_area(unsigned long addr)
276{
277 struct rb_node *n = vmap_area_root.rb_node;
278
279 while (n) {
280 struct vmap_area *va;
281
282 va = rb_entry(n, struct vmap_area, rb_node);
283 if (addr < va->va_start)
284 n = n->rb_left;
285 else if (addr > va->va_start)
286 n = n->rb_right;
287 else
288 return va;
289 }
290
291 return NULL;
292}
293
294static void __insert_vmap_area(struct vmap_area *va)
295{
296 struct rb_node **p = &vmap_area_root.rb_node;
297 struct rb_node *parent = NULL;
298 struct rb_node *tmp;
299
300 while (*p) {
301 struct vmap_area *tmp_va;
302
303 parent = *p;
304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
305 if (va->va_start < tmp_va->va_end)
306 p = &(*p)->rb_left;
307 else if (va->va_end > tmp_va->va_start)
308 p = &(*p)->rb_right;
309 else
310 BUG();
311 }
312
313 rb_link_node(&va->rb_node, parent, p);
314 rb_insert_color(&va->rb_node, &vmap_area_root);
315
316
317 tmp = rb_prev(&va->rb_node);
318 if (tmp) {
319 struct vmap_area *prev;
320 prev = rb_entry(tmp, struct vmap_area, rb_node);
321 list_add_rcu(&va->list, &prev->list);
322 } else
323 list_add_rcu(&va->list, &vmap_area_list);
324}
325
326static void purge_vmap_area_lazy(void);
327
328
329
330
331
332static struct vmap_area *alloc_vmap_area(unsigned long size,
333 unsigned long align,
334 unsigned long vstart, unsigned long vend,
335 int node, gfp_t gfp_mask)
336{
337 struct vmap_area *va;
338 struct rb_node *n;
339 unsigned long addr;
340 int purged = 0;
341 struct vmap_area *first;
342
343 BUG_ON(!size);
344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
346
347 va = kmalloc_node(sizeof(struct vmap_area),
348 gfp_mask & GFP_RECLAIM_MASK, node);
349 if (unlikely(!va))
350 return ERR_PTR(-ENOMEM);
351
352retry:
353 spin_lock(&vmap_area_lock);
354
355
356
357
358
359
360
361
362
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371
372 cached_vstart = vstart;
373 cached_align = align;
374
375
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end + PAGE_SIZE, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
388
389 n = vmap_area_root.rb_node;
390 first = NULL;
391
392 while (n) {
393 struct vmap_area *tmp;
394 tmp = rb_entry(n, struct vmap_area, rb_node);
395 if (tmp->va_end >= addr) {
396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
401 n = n->rb_right;
402 }
403
404 if (!first)
405 goto found;
406 }
407
408
409 while (addr + size >= first->va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->va_start)
411 cached_hole_size = first->va_start - addr;
412 addr = ALIGN(first->va_end + PAGE_SIZE, align);
413 if (addr + size - 1 < addr)
414 goto overflow;
415
416 n = rb_next(&first->rb_node);
417 if (n)
418 first = rb_entry(n, struct vmap_area, rb_node);
419 else
420 goto found;
421 }
422
423found:
424 if (addr + size > vend)
425 goto overflow;
426
427 va->va_start = addr;
428 va->va_end = addr + size;
429 va->flags = 0;
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
432 spin_unlock(&vmap_area_lock);
433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
453}
454
455static void rcu_free_va(struct rcu_head *head)
456{
457 struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
458
459 kfree(va);
460}
461
462static void __free_vmap_area(struct vmap_area *va)
463{
464 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
465
466 if (free_vmap_cache) {
467 if (va->va_end < cached_vstart) {
468 free_vmap_cache = NULL;
469 } else {
470 struct vmap_area *cache;
471 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
472 if (va->va_start <= cache->va_start) {
473 free_vmap_cache = rb_prev(&va->rb_node);
474
475
476
477
478 }
479 }
480 }
481 rb_erase(&va->rb_node, &vmap_area_root);
482 RB_CLEAR_NODE(&va->rb_node);
483 list_del_rcu(&va->list);
484
485
486
487
488
489
490
491 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
492 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
493
494 call_rcu(&va->rcu_head, rcu_free_va);
495}
496
497
498
499
500static void free_vmap_area(struct vmap_area *va)
501{
502 spin_lock(&vmap_area_lock);
503 __free_vmap_area(va);
504 spin_unlock(&vmap_area_lock);
505}
506
507
508
509
510static void unmap_vmap_area(struct vmap_area *va)
511{
512 vunmap_page_range(va->va_start, va->va_end);
513}
514
515static void vmap_debug_free_range(unsigned long start, unsigned long end)
516{
517
518
519
520
521
522
523
524
525
526
527
528
529
530#ifdef CONFIG_DEBUG_PAGEALLOC
531 vunmap_page_range(start, end);
532 flush_tlb_kernel_range(start, end);
533#endif
534}
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552static unsigned long lazy_max_pages(void)
553{
554 unsigned int log;
555
556 log = fls(num_online_cpus());
557
558 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
559}
560
561static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
562
563
564static void purge_fragmented_blocks_allcpus(void);
565
566
567
568
569
570void set_iounmap_nonlazy(void)
571{
572 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
573}
574
575
576
577
578
579
580
581
582
583
584
585static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
586 int sync, int force_flush)
587{
588 static DEFINE_SPINLOCK(purge_lock);
589 LIST_HEAD(valist);
590 struct vmap_area *va;
591 struct vmap_area *n_va;
592 int nr = 0;
593
594
595
596
597
598
599 if (!sync && !force_flush) {
600 if (!spin_trylock(&purge_lock))
601 return;
602 } else
603 spin_lock(&purge_lock);
604
605 if (sync)
606 purge_fragmented_blocks_allcpus();
607
608 rcu_read_lock();
609 list_for_each_entry_rcu(va, &vmap_area_list, list) {
610 if (va->flags & VM_LAZY_FREE) {
611 if (va->va_start < *start)
612 *start = va->va_start;
613 if (va->va_end > *end)
614 *end = va->va_end;
615 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
616 list_add_tail(&va->purge_list, &valist);
617 va->flags |= VM_LAZY_FREEING;
618 va->flags &= ~VM_LAZY_FREE;
619 }
620 }
621 rcu_read_unlock();
622
623 if (nr)
624 atomic_sub(nr, &vmap_lazy_nr);
625
626 if (nr || force_flush)
627 flush_tlb_kernel_range(*start, *end);
628
629 if (nr) {
630 spin_lock(&vmap_area_lock);
631 list_for_each_entry_safe(va, n_va, &valist, purge_list)
632 __free_vmap_area(va);
633 spin_unlock(&vmap_area_lock);
634 }
635 spin_unlock(&purge_lock);
636}
637
638
639
640
641
642static void try_purge_vmap_area_lazy(void)
643{
644 unsigned long start = ULONG_MAX, end = 0;
645
646 __purge_vmap_area_lazy(&start, &end, 0, 0);
647}
648
649
650
651
652static void purge_vmap_area_lazy(void)
653{
654 unsigned long start = ULONG_MAX, end = 0;
655
656 __purge_vmap_area_lazy(&start, &end, 1, 0);
657}
658
659
660
661
662
663
664static void free_vmap_area_noflush(struct vmap_area *va)
665{
666 va->flags |= VM_LAZY_FREE;
667 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
668 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
669 try_purge_vmap_area_lazy();
670}
671
672
673
674
675
676static void free_unmap_vmap_area_noflush(struct vmap_area *va)
677{
678 unmap_vmap_area(va);
679 free_vmap_area_noflush(va);
680}
681
682
683
684
685static void free_unmap_vmap_area(struct vmap_area *va)
686{
687 flush_cache_vunmap(va->va_start, va->va_end);
688 free_unmap_vmap_area_noflush(va);
689}
690
691static struct vmap_area *find_vmap_area(unsigned long addr)
692{
693 struct vmap_area *va;
694
695 spin_lock(&vmap_area_lock);
696 va = __find_vmap_area(addr);
697 spin_unlock(&vmap_area_lock);
698
699 return va;
700}
701
702static void free_unmap_vmap_area_addr(unsigned long addr)
703{
704 struct vmap_area *va;
705
706 va = find_vmap_area(addr);
707 BUG_ON(!va);
708 free_unmap_vmap_area(va);
709}
710
711
712
713
714
715
716
717
718
719
720
721
722
723#if BITS_PER_LONG == 32
724#define VMALLOC_SPACE (128UL*1024*1024)
725#else
726#define VMALLOC_SPACE (128UL*1024*1024*1024)
727#endif
728
729#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
730#define VMAP_MAX_ALLOC BITS_PER_LONG
731#define VMAP_BBMAP_BITS_MAX 1024
732#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
733#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
734#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
735#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
736 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
737 VMALLOC_PAGES / NR_CPUS / 16))
738
739#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
740
741static bool vmap_initialized __read_mostly = false;
742
743struct vmap_block_queue {
744 spinlock_t lock;
745 struct list_head free;
746};
747
748struct vmap_block {
749 spinlock_t lock;
750 struct vmap_area *va;
751 struct vmap_block_queue *vbq;
752 unsigned long free, dirty;
753 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
754 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
755 struct list_head free_list;
756 struct rcu_head rcu_head;
757 struct list_head purge;
758};
759
760
761static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
762
763
764
765
766
767
768static DEFINE_SPINLOCK(vmap_block_tree_lock);
769static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
770
771
772
773
774
775
776
777
778static unsigned long addr_to_vb_idx(unsigned long addr)
779{
780 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
781 addr /= VMAP_BLOCK_SIZE;
782 return addr;
783}
784
785static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
786{
787 struct vmap_block_queue *vbq;
788 struct vmap_block *vb;
789 struct vmap_area *va;
790 unsigned long vb_idx;
791 int node, err;
792
793 node = numa_node_id();
794
795 vb = kmalloc_node(sizeof(struct vmap_block),
796 gfp_mask & GFP_RECLAIM_MASK, node);
797 if (unlikely(!vb))
798 return ERR_PTR(-ENOMEM);
799
800 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
801 VMALLOC_START, VMALLOC_END,
802 node, gfp_mask);
803 if (IS_ERR(va)) {
804 kfree(vb);
805 return ERR_CAST(va);
806 }
807
808 err = radix_tree_preload(gfp_mask);
809 if (unlikely(err)) {
810 kfree(vb);
811 free_vmap_area(va);
812 return ERR_PTR(err);
813 }
814
815 spin_lock_init(&vb->lock);
816 vb->va = va;
817 vb->free = VMAP_BBMAP_BITS;
818 vb->dirty = 0;
819 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
820 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
821 INIT_LIST_HEAD(&vb->free_list);
822
823 vb_idx = addr_to_vb_idx(va->va_start);
824 spin_lock(&vmap_block_tree_lock);
825 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
826 spin_unlock(&vmap_block_tree_lock);
827 BUG_ON(err);
828 radix_tree_preload_end();
829
830 vbq = &get_cpu_var(vmap_block_queue);
831 vb->vbq = vbq;
832 spin_lock(&vbq->lock);
833 list_add_rcu(&vb->free_list, &vbq->free);
834 spin_unlock(&vbq->lock);
835 put_cpu_var(vmap_block_queue);
836
837 return vb;
838}
839
840static void rcu_free_vb(struct rcu_head *head)
841{
842 struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
843
844 kfree(vb);
845}
846
847static void free_vmap_block(struct vmap_block *vb)
848{
849 struct vmap_block *tmp;
850 unsigned long vb_idx;
851
852 vb_idx = addr_to_vb_idx(vb->va->va_start);
853 spin_lock(&vmap_block_tree_lock);
854 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
855 spin_unlock(&vmap_block_tree_lock);
856 BUG_ON(tmp != vb);
857
858 free_vmap_area_noflush(vb->va);
859 call_rcu(&vb->rcu_head, rcu_free_vb);
860}
861
862static void purge_fragmented_blocks(int cpu)
863{
864 LIST_HEAD(purge);
865 struct vmap_block *vb;
866 struct vmap_block *n_vb;
867 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
868
869 rcu_read_lock();
870 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
871
872 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
873 continue;
874
875 spin_lock(&vb->lock);
876 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
877 vb->free = 0;
878 vb->dirty = VMAP_BBMAP_BITS;
879 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
880 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
881 spin_lock(&vbq->lock);
882 list_del_rcu(&vb->free_list);
883 spin_unlock(&vbq->lock);
884 spin_unlock(&vb->lock);
885 list_add_tail(&vb->purge, &purge);
886 } else
887 spin_unlock(&vb->lock);
888 }
889 rcu_read_unlock();
890
891 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
892 list_del(&vb->purge);
893 free_vmap_block(vb);
894 }
895}
896
897static void purge_fragmented_blocks_thiscpu(void)
898{
899 purge_fragmented_blocks(smp_processor_id());
900}
901
902static void purge_fragmented_blocks_allcpus(void)
903{
904 int cpu;
905
906 for_each_possible_cpu(cpu)
907 purge_fragmented_blocks(cpu);
908}
909
910static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
911{
912 struct vmap_block_queue *vbq;
913 struct vmap_block *vb;
914 unsigned long addr = 0;
915 unsigned int order;
916 int purge = 0;
917
918 BUG_ON(size & ~PAGE_MASK);
919 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
920 order = get_order(size);
921
922again:
923 rcu_read_lock();
924 vbq = &get_cpu_var(vmap_block_queue);
925 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
926 int i;
927
928 spin_lock(&vb->lock);
929 if (vb->free < 1UL << order)
930 goto next;
931
932 i = bitmap_find_free_region(vb->alloc_map,
933 VMAP_BBMAP_BITS, order);
934
935 if (i < 0) {
936 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
937
938 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
939 purge = 1;
940 }
941 goto next;
942 }
943 addr = vb->va->va_start + (i << PAGE_SHIFT);
944 BUG_ON(addr_to_vb_idx(addr) !=
945 addr_to_vb_idx(vb->va->va_start));
946 vb->free -= 1UL << order;
947 if (vb->free == 0) {
948 spin_lock(&vbq->lock);
949 list_del_rcu(&vb->free_list);
950 spin_unlock(&vbq->lock);
951 }
952 spin_unlock(&vb->lock);
953 break;
954next:
955 spin_unlock(&vb->lock);
956 }
957
958 if (purge)
959 purge_fragmented_blocks_thiscpu();
960
961 put_cpu_var(vmap_block_queue);
962 rcu_read_unlock();
963
964 if (!addr) {
965 vb = new_vmap_block(gfp_mask);
966 if (IS_ERR(vb))
967 return vb;
968 goto again;
969 }
970
971 return (void *)addr;
972}
973
974static void vb_free(const void *addr, unsigned long size)
975{
976 unsigned long offset;
977 unsigned long vb_idx;
978 unsigned int order;
979 struct vmap_block *vb;
980
981 BUG_ON(size & ~PAGE_MASK);
982 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
983
984 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
985
986 order = get_order(size);
987
988 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
989
990 vb_idx = addr_to_vb_idx((unsigned long)addr);
991 rcu_read_lock();
992 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
993 rcu_read_unlock();
994 BUG_ON(!vb);
995
996 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
997
998 spin_lock(&vb->lock);
999 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
1000
1001 vb->dirty += 1UL << order;
1002 if (vb->dirty == VMAP_BBMAP_BITS) {
1003 BUG_ON(vb->free);
1004 spin_unlock(&vb->lock);
1005 free_vmap_block(vb);
1006 } else
1007 spin_unlock(&vb->lock);
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023void vm_unmap_aliases(void)
1024{
1025 unsigned long start = ULONG_MAX, end = 0;
1026 int cpu;
1027 int flush = 0;
1028
1029 if (unlikely(!vmap_initialized))
1030 return;
1031
1032 for_each_possible_cpu(cpu) {
1033 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1034 struct vmap_block *vb;
1035
1036 rcu_read_lock();
1037 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1038 int i;
1039
1040 spin_lock(&vb->lock);
1041 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1042 while (i < VMAP_BBMAP_BITS) {
1043 unsigned long s, e;
1044 int j;
1045 j = find_next_zero_bit(vb->dirty_map,
1046 VMAP_BBMAP_BITS, i);
1047
1048 s = vb->va->va_start + (i << PAGE_SHIFT);
1049 e = vb->va->va_start + (j << PAGE_SHIFT);
1050 flush = 1;
1051
1052 if (s < start)
1053 start = s;
1054 if (e > end)
1055 end = e;
1056
1057 i = j;
1058 i = find_next_bit(vb->dirty_map,
1059 VMAP_BBMAP_BITS, i);
1060 }
1061 spin_unlock(&vb->lock);
1062 }
1063 rcu_read_unlock();
1064 }
1065
1066 __purge_vmap_area_lazy(&start, &end, 1, flush);
1067}
1068EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1069
1070
1071
1072
1073
1074
1075void vm_unmap_ram(const void *mem, unsigned int count)
1076{
1077 unsigned long size = count << PAGE_SHIFT;
1078 unsigned long addr = (unsigned long)mem;
1079
1080 BUG_ON(!addr);
1081 BUG_ON(addr < VMALLOC_START);
1082 BUG_ON(addr > VMALLOC_END);
1083 BUG_ON(addr & (PAGE_SIZE-1));
1084
1085 debug_check_no_locks_freed(mem, size);
1086 vmap_debug_free_range(addr, addr+size);
1087
1088 if (likely(count <= VMAP_MAX_ALLOC))
1089 vb_free(mem, size);
1090 else
1091 free_unmap_vmap_area_addr(addr);
1092}
1093EXPORT_SYMBOL(vm_unmap_ram);
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1105{
1106 unsigned long size = count << PAGE_SHIFT;
1107 unsigned long addr;
1108 void *mem;
1109
1110 if (likely(count <= VMAP_MAX_ALLOC)) {
1111 mem = vb_alloc(size, GFP_KERNEL);
1112 if (IS_ERR(mem))
1113 return NULL;
1114 addr = (unsigned long)mem;
1115 } else {
1116 struct vmap_area *va;
1117 va = alloc_vmap_area(size, PAGE_SIZE,
1118 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1119 if (IS_ERR(va))
1120 return NULL;
1121
1122 addr = va->va_start;
1123 mem = (void *)addr;
1124 }
1125 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1126 vm_unmap_ram(mem, count);
1127 return NULL;
1128 }
1129 return mem;
1130}
1131EXPORT_SYMBOL(vm_map_ram);
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1146{
1147 static size_t vm_init_off __initdata;
1148 unsigned long addr;
1149
1150 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1151 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1152
1153 vm->addr = (void *)addr;
1154
1155 vm->next = vmlist;
1156 vmlist = vm;
1157}
1158
1159void __init vmalloc_init(void)
1160{
1161 struct vmap_area *va;
1162 struct vm_struct *tmp;
1163 int i;
1164
1165 for_each_possible_cpu(i) {
1166 struct vmap_block_queue *vbq;
1167
1168 vbq = &per_cpu(vmap_block_queue, i);
1169 spin_lock_init(&vbq->lock);
1170 INIT_LIST_HEAD(&vbq->free);
1171 }
1172
1173
1174 for (tmp = vmlist; tmp; tmp = tmp->next) {
1175 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1176 va->flags = tmp->flags | VM_VM_AREA;
1177 va->va_start = (unsigned long)tmp->addr;
1178 va->va_end = va->va_start + tmp->size;
1179 __insert_vmap_area(va);
1180 }
1181
1182 vmap_area_pcpu_hole = VMALLOC_END;
1183
1184 vmap_initialized = true;
1185}
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1207 pgprot_t prot, struct page **pages)
1208{
1209 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1210}
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1227{
1228 vunmap_page_range(addr, addr + size);
1229}
1230EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240void unmap_kernel_range(unsigned long addr, unsigned long size)
1241{
1242 unsigned long end = addr + size;
1243
1244 flush_cache_vunmap(addr, end);
1245 vunmap_page_range(addr, end);
1246 flush_tlb_kernel_range(addr, end);
1247}
1248
1249int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1250{
1251 unsigned long addr = (unsigned long)area->addr;
1252 unsigned long end = addr + area->size - PAGE_SIZE;
1253 int err;
1254
1255 err = vmap_page_range(addr, end, prot, *pages);
1256 if (err > 0) {
1257 *pages += err;
1258 err = 0;
1259 }
1260
1261 return err;
1262}
1263EXPORT_SYMBOL_GPL(map_vm_area);
1264
1265
1266DEFINE_RWLOCK(vmlist_lock);
1267struct vm_struct *vmlist;
1268
1269static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1270 unsigned long flags, void *caller)
1271{
1272 struct vm_struct *tmp, **p;
1273
1274 vm->flags = flags;
1275 vm->addr = (void *)va->va_start;
1276 vm->size = va->va_end - va->va_start;
1277 vm->caller = caller;
1278 va->private = vm;
1279 va->flags |= VM_VM_AREA;
1280
1281 write_lock(&vmlist_lock);
1282 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1283 if (tmp->addr >= vm->addr)
1284 break;
1285 }
1286 vm->next = *p;
1287 *p = vm;
1288 write_unlock(&vmlist_lock);
1289}
1290
1291static struct vm_struct *__get_vm_area_node(unsigned long size,
1292 unsigned long align, unsigned long flags, unsigned long start,
1293 unsigned long end, int node, gfp_t gfp_mask, void *caller)
1294{
1295 static struct vmap_area *va;
1296 struct vm_struct *area;
1297
1298 BUG_ON(in_interrupt());
1299 if (flags & VM_IOREMAP) {
1300 int bit = fls(size);
1301
1302 if (bit > IOREMAP_MAX_ORDER)
1303 bit = IOREMAP_MAX_ORDER;
1304 else if (bit < PAGE_SHIFT)
1305 bit = PAGE_SHIFT;
1306
1307 align = 1ul << bit;
1308 }
1309
1310 size = PAGE_ALIGN(size);
1311 if (unlikely(!size))
1312 return NULL;
1313
1314 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1315 if (unlikely(!area))
1316 return NULL;
1317
1318
1319
1320
1321 size += PAGE_SIZE;
1322
1323 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1324 if (IS_ERR(va)) {
1325 kfree(area);
1326 return NULL;
1327 }
1328
1329 insert_vmalloc_vm(area, va, flags, caller);
1330 return area;
1331}
1332
1333struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1334 unsigned long start, unsigned long end)
1335{
1336 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1337 __builtin_return_address(0));
1338}
1339EXPORT_SYMBOL_GPL(__get_vm_area);
1340
1341struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1342 unsigned long start, unsigned long end,
1343 void *caller)
1344{
1345 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1346 caller);
1347}
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1359{
1360 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1361 -1, GFP_KERNEL, __builtin_return_address(0));
1362}
1363
1364struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1365 void *caller)
1366{
1367 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1368 -1, GFP_KERNEL, caller);
1369}
1370
1371static struct vm_struct *find_vm_area(const void *addr)
1372{
1373 struct vmap_area *va;
1374
1375 va = find_vmap_area((unsigned long)addr);
1376 if (va && va->flags & VM_VM_AREA)
1377 return va->private;
1378
1379 return NULL;
1380}
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390struct vm_struct *remove_vm_area(const void *addr)
1391{
1392 struct vmap_area *va;
1393
1394 va = find_vmap_area((unsigned long)addr);
1395 if (va && va->flags & VM_VM_AREA) {
1396 struct vm_struct *vm = va->private;
1397 struct vm_struct *tmp, **p;
1398
1399
1400
1401
1402
1403 write_lock(&vmlist_lock);
1404 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1405 ;
1406 *p = tmp->next;
1407 write_unlock(&vmlist_lock);
1408
1409 vmap_debug_free_range(va->va_start, va->va_end);
1410 free_unmap_vmap_area(va);
1411 vm->size -= PAGE_SIZE;
1412
1413 return vm;
1414 }
1415 return NULL;
1416}
1417
1418static void __vunmap(const void *addr, int deallocate_pages)
1419{
1420 struct vm_struct *area;
1421
1422 if (!addr)
1423 return;
1424
1425 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1426 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1427 return;
1428 }
1429
1430 area = remove_vm_area(addr);
1431 if (unlikely(!area)) {
1432 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1433 addr);
1434 return;
1435 }
1436
1437 debug_check_no_locks_freed(addr, area->size);
1438 debug_check_no_obj_freed(addr, area->size);
1439
1440 if (deallocate_pages) {
1441 int i;
1442
1443 for (i = 0; i < area->nr_pages; i++) {
1444 struct page *page = area->pages[i];
1445
1446 BUG_ON(!page);
1447 __free_page(page);
1448 }
1449
1450 if (area->flags & VM_VPAGES)
1451 vfree(area->pages);
1452 else
1453 kfree(area->pages);
1454 }
1455
1456 kfree(area);
1457 return;
1458}
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470void vfree(const void *addr)
1471{
1472 BUG_ON(in_interrupt());
1473
1474 kmemleak_free(addr);
1475
1476 __vunmap(addr, 1);
1477}
1478EXPORT_SYMBOL(vfree);
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489void vunmap(const void *addr)
1490{
1491 BUG_ON(in_interrupt());
1492 might_sleep();
1493 __vunmap(addr, 0);
1494}
1495EXPORT_SYMBOL(vunmap);
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507void *vmap(struct page **pages, unsigned int count,
1508 unsigned long flags, pgprot_t prot)
1509{
1510 struct vm_struct *area;
1511
1512 might_sleep();
1513
1514 if (count > totalram_pages)
1515 return NULL;
1516
1517 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1518 __builtin_return_address(0));
1519 if (!area)
1520 return NULL;
1521
1522 if (map_vm_area(area, prot, &pages)) {
1523 vunmap(area->addr);
1524 return NULL;
1525 }
1526
1527 return area->addr;
1528}
1529EXPORT_SYMBOL(vmap);
1530
1531static void *__vmalloc_node(unsigned long size, unsigned long align,
1532 gfp_t gfp_mask, pgprot_t prot,
1533 int node, void *caller);
1534static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1535 pgprot_t prot, int node, void *caller)
1536{
1537 struct page **pages;
1538 unsigned int nr_pages, array_size, i;
1539 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1540
1541 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1542 array_size = (nr_pages * sizeof(struct page *));
1543
1544 area->nr_pages = nr_pages;
1545
1546 if (array_size > PAGE_SIZE) {
1547 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1548 PAGE_KERNEL, node, caller);
1549 area->flags |= VM_VPAGES;
1550 } else {
1551 pages = kmalloc_node(array_size, nested_gfp, node);
1552 }
1553 area->pages = pages;
1554 area->caller = caller;
1555 if (!area->pages) {
1556 remove_vm_area(area->addr);
1557 kfree(area);
1558 return NULL;
1559 }
1560
1561 for (i = 0; i < area->nr_pages; i++) {
1562 struct page *page;
1563
1564 if (node < 0)
1565 page = alloc_page(gfp_mask);
1566 else
1567 page = alloc_pages_node(node, gfp_mask, 0);
1568
1569 if (unlikely(!page)) {
1570
1571 area->nr_pages = i;
1572 goto fail;
1573 }
1574 area->pages[i] = page;
1575 }
1576
1577 if (map_vm_area(area, prot, &pages))
1578 goto fail;
1579 return area->addr;
1580
1581fail:
1582 vfree(area->addr);
1583 return NULL;
1584}
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601void *__vmalloc_node_range(unsigned long size, unsigned long align,
1602 unsigned long start, unsigned long end, gfp_t gfp_mask,
1603 pgprot_t prot, int node, void *caller)
1604{
1605 struct vm_struct *area;
1606 void *addr;
1607 unsigned long real_size = size;
1608
1609 size = PAGE_ALIGN(size);
1610 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1611 return NULL;
1612
1613 area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node,
1614 gfp_mask, caller);
1615
1616 if (!area)
1617 return NULL;
1618
1619 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1620
1621
1622
1623
1624
1625
1626 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1627
1628 return addr;
1629}
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644static void *__vmalloc_node(unsigned long size, unsigned long align,
1645 gfp_t gfp_mask, pgprot_t prot,
1646 int node, void *caller)
1647{
1648 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1649 gfp_mask, prot, node, caller);
1650}
1651
1652void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1653{
1654 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1655 __builtin_return_address(0));
1656}
1657EXPORT_SYMBOL(__vmalloc);
1658
1659static inline void *__vmalloc_node_flags(unsigned long size,
1660 int node, gfp_t flags)
1661{
1662 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1663 node, __builtin_return_address(0));
1664}
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675void *vmalloc(unsigned long size)
1676{
1677 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1678}
1679EXPORT_SYMBOL(vmalloc);
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691void *vzalloc(unsigned long size)
1692{
1693 return __vmalloc_node_flags(size, -1,
1694 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1695}
1696EXPORT_SYMBOL(vzalloc);
1697
1698
1699
1700
1701
1702
1703
1704
1705void *vmalloc_user(unsigned long size)
1706{
1707 struct vm_struct *area;
1708 void *ret;
1709
1710 ret = __vmalloc_node(size, SHMLBA,
1711 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1712 PAGE_KERNEL, -1, __builtin_return_address(0));
1713 if (ret) {
1714 area = find_vm_area(ret);
1715 area->flags |= VM_USERMAP;
1716 }
1717 return ret;
1718}
1719EXPORT_SYMBOL(vmalloc_user);
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732void *vmalloc_node(unsigned long size, int node)
1733{
1734 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1735 node, __builtin_return_address(0));
1736}
1737EXPORT_SYMBOL(vmalloc_node);
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751void *vzalloc_node(unsigned long size, int node)
1752{
1753 return __vmalloc_node_flags(size, node,
1754 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1755}
1756EXPORT_SYMBOL(vzalloc_node);
1757
1758#ifndef PAGE_KERNEL_EXEC
1759# define PAGE_KERNEL_EXEC PAGE_KERNEL
1760#endif
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774void *vmalloc_exec(unsigned long size)
1775{
1776 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1777 -1, __builtin_return_address(0));
1778}
1779
1780#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1781#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1782#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1783#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1784#else
1785#define GFP_VMALLOC32 GFP_KERNEL
1786#endif
1787
1788
1789
1790
1791
1792
1793
1794
1795void *vmalloc_32(unsigned long size)
1796{
1797 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1798 -1, __builtin_return_address(0));
1799}
1800EXPORT_SYMBOL(vmalloc_32);
1801
1802
1803
1804
1805
1806
1807
1808
1809void *vmalloc_32_user(unsigned long size)
1810{
1811 struct vm_struct *area;
1812 void *ret;
1813
1814 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1815 -1, __builtin_return_address(0));
1816 if (ret) {
1817 area = find_vm_area(ret);
1818 area->flags |= VM_USERMAP;
1819 }
1820 return ret;
1821}
1822EXPORT_SYMBOL(vmalloc_32_user);
1823
1824
1825
1826
1827
1828
1829static int aligned_vread(char *buf, char *addr, unsigned long count)
1830{
1831 struct page *p;
1832 int copied = 0;
1833
1834 while (count) {
1835 unsigned long offset, length;
1836
1837 offset = (unsigned long)addr & ~PAGE_MASK;
1838 length = PAGE_SIZE - offset;
1839 if (length > count)
1840 length = count;
1841 p = vmalloc_to_page(addr);
1842
1843
1844
1845
1846
1847
1848
1849 if (p) {
1850
1851
1852
1853
1854 void *map = kmap_atomic(p, KM_USER0);
1855 memcpy(buf, map + offset, length);
1856 kunmap_atomic(map, KM_USER0);
1857 } else
1858 memset(buf, 0, length);
1859
1860 addr += length;
1861 buf += length;
1862 copied += length;
1863 count -= length;
1864 }
1865 return copied;
1866}
1867
1868static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1869{
1870 struct page *p;
1871 int copied = 0;
1872
1873 while (count) {
1874 unsigned long offset, length;
1875
1876 offset = (unsigned long)addr & ~PAGE_MASK;
1877 length = PAGE_SIZE - offset;
1878 if (length > count)
1879 length = count;
1880 p = vmalloc_to_page(addr);
1881
1882
1883
1884
1885
1886
1887
1888 if (p) {
1889
1890
1891
1892
1893 void *map = kmap_atomic(p, KM_USER0);
1894 memcpy(map + offset, buf, length);
1895 kunmap_atomic(map, KM_USER0);
1896 }
1897 addr += length;
1898 buf += length;
1899 copied += length;
1900 count -= length;
1901 }
1902 return copied;
1903}
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933long vread(char *buf, char *addr, unsigned long count)
1934{
1935 struct vm_struct *tmp;
1936 char *vaddr, *buf_start = buf;
1937 unsigned long buflen = count;
1938 unsigned long n;
1939
1940
1941 if ((unsigned long) addr + count < count)
1942 count = -(unsigned long) addr;
1943
1944 read_lock(&vmlist_lock);
1945 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
1946 vaddr = (char *) tmp->addr;
1947 if (addr >= vaddr + tmp->size - PAGE_SIZE)
1948 continue;
1949 while (addr < vaddr) {
1950 if (count == 0)
1951 goto finished;
1952 *buf = '\0';
1953 buf++;
1954 addr++;
1955 count--;
1956 }
1957 n = vaddr + tmp->size - PAGE_SIZE - addr;
1958 if (n > count)
1959 n = count;
1960 if (!(tmp->flags & VM_IOREMAP))
1961 aligned_vread(buf, addr, n);
1962 else
1963 memset(buf, 0, n);
1964 buf += n;
1965 addr += n;
1966 count -= n;
1967 }
1968finished:
1969 read_unlock(&vmlist_lock);
1970
1971 if (buf == buf_start)
1972 return 0;
1973
1974 if (buf != buf_start + buflen)
1975 memset(buf, 0, buflen - (buf - buf_start));
1976
1977 return buflen;
1978}
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008long vwrite(char *buf, char *addr, unsigned long count)
2009{
2010 struct vm_struct *tmp;
2011 char *vaddr;
2012 unsigned long n, buflen;
2013 int copied = 0;
2014
2015
2016 if ((unsigned long) addr + count < count)
2017 count = -(unsigned long) addr;
2018 buflen = count;
2019
2020 read_lock(&vmlist_lock);
2021 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2022 vaddr = (char *) tmp->addr;
2023 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2024 continue;
2025 while (addr < vaddr) {
2026 if (count == 0)
2027 goto finished;
2028 buf++;
2029 addr++;
2030 count--;
2031 }
2032 n = vaddr + tmp->size - PAGE_SIZE - addr;
2033 if (n > count)
2034 n = count;
2035 if (!(tmp->flags & VM_IOREMAP)) {
2036 aligned_vwrite(buf, addr, n);
2037 copied++;
2038 }
2039 buf += n;
2040 addr += n;
2041 count -= n;
2042 }
2043finished:
2044 read_unlock(&vmlist_lock);
2045 if (!copied)
2046 return 0;
2047 return buflen;
2048}
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2065 unsigned long pgoff)
2066{
2067 struct vm_struct *area;
2068 unsigned long uaddr = vma->vm_start;
2069 unsigned long usize = vma->vm_end - vma->vm_start;
2070
2071 if ((PAGE_SIZE-1) & (unsigned long)addr)
2072 return -EINVAL;
2073
2074 area = find_vm_area(addr);
2075 if (!area)
2076 return -EINVAL;
2077
2078 if (!(area->flags & VM_USERMAP))
2079 return -EINVAL;
2080
2081 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2082 return -EINVAL;
2083
2084 addr += pgoff << PAGE_SHIFT;
2085 do {
2086 struct page *page = vmalloc_to_page(addr);
2087 int ret;
2088
2089 ret = vm_insert_page(vma, uaddr, page);
2090 if (ret)
2091 return ret;
2092
2093 uaddr += PAGE_SIZE;
2094 addr += PAGE_SIZE;
2095 usize -= PAGE_SIZE;
2096 } while (usize > 0);
2097
2098
2099 vma->vm_flags |= VM_RESERVED;
2100
2101 return 0;
2102}
2103EXPORT_SYMBOL(remap_vmalloc_range);
2104
2105
2106
2107
2108
2109void __attribute__((weak)) vmalloc_sync_all(void)
2110{
2111}
2112
2113
2114static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2115{
2116
2117 return 0;
2118}
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132struct vm_struct *alloc_vm_area(size_t size)
2133{
2134 struct vm_struct *area;
2135
2136 area = get_vm_area_caller(size, VM_IOREMAP,
2137 __builtin_return_address(0));
2138 if (area == NULL)
2139 return NULL;
2140
2141
2142
2143
2144
2145 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2146 area->size, f, NULL)) {
2147 free_vm_area(area);
2148 return NULL;
2149 }
2150
2151
2152
2153 vmalloc_sync_all();
2154
2155 return area;
2156}
2157EXPORT_SYMBOL_GPL(alloc_vm_area);
2158
2159void free_vm_area(struct vm_struct *area)
2160{
2161 struct vm_struct *ret;
2162 ret = remove_vm_area(area->addr);
2163 BUG_ON(ret != area);
2164 kfree(area);
2165}
2166EXPORT_SYMBOL_GPL(free_vm_area);
2167
2168#ifdef CONFIG_SMP
2169static struct vmap_area *node_to_va(struct rb_node *n)
2170{
2171 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2172}
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186static bool pvm_find_next_prev(unsigned long end,
2187 struct vmap_area **pnext,
2188 struct vmap_area **pprev)
2189{
2190 struct rb_node *n = vmap_area_root.rb_node;
2191 struct vmap_area *va = NULL;
2192
2193 while (n) {
2194 va = rb_entry(n, struct vmap_area, rb_node);
2195 if (end < va->va_end)
2196 n = n->rb_left;
2197 else if (end > va->va_end)
2198 n = n->rb_right;
2199 else
2200 break;
2201 }
2202
2203 if (!va)
2204 return false;
2205
2206 if (va->va_end > end) {
2207 *pnext = va;
2208 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2209 } else {
2210 *pprev = va;
2211 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2212 }
2213 return true;
2214}
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232static unsigned long pvm_determine_end(struct vmap_area **pnext,
2233 struct vmap_area **pprev,
2234 unsigned long align)
2235{
2236 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2237 unsigned long addr;
2238
2239 if (*pnext)
2240 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2241 else
2242 addr = vmalloc_end;
2243
2244 while (*pprev && (*pprev)->va_end > addr) {
2245 *pnext = *pprev;
2246 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2247 }
2248
2249 return addr;
2250}
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2277 const size_t *sizes, int nr_vms,
2278 size_t align)
2279{
2280 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2281 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2282 struct vmap_area **vas, *prev, *next;
2283 struct vm_struct **vms;
2284 int area, area2, last_area, term_area;
2285 unsigned long base, start, end, last_end;
2286 bool purged = false;
2287
2288
2289 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2290 for (last_area = 0, area = 0; area < nr_vms; area++) {
2291 start = offsets[area];
2292 end = start + sizes[area];
2293
2294
2295 BUG_ON(!IS_ALIGNED(offsets[area], align));
2296 BUG_ON(!IS_ALIGNED(sizes[area], align));
2297
2298
2299 if (start > offsets[last_area])
2300 last_area = area;
2301
2302 for (area2 = 0; area2 < nr_vms; area2++) {
2303 unsigned long start2 = offsets[area2];
2304 unsigned long end2 = start2 + sizes[area2];
2305
2306 if (area2 == area)
2307 continue;
2308
2309 BUG_ON(start2 >= start && start2 < end);
2310 BUG_ON(end2 <= end && end2 > start);
2311 }
2312 }
2313 last_end = offsets[last_area] + sizes[last_area];
2314
2315 if (vmalloc_end - vmalloc_start < last_end) {
2316 WARN_ON(true);
2317 return NULL;
2318 }
2319
2320 vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
2321 vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
2322 if (!vas || !vms)
2323 goto err_free;
2324
2325 for (area = 0; area < nr_vms; area++) {
2326 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2327 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2328 if (!vas[area] || !vms[area])
2329 goto err_free;
2330 }
2331retry:
2332 spin_lock(&vmap_area_lock);
2333
2334
2335 area = term_area = last_area;
2336 start = offsets[area];
2337 end = start + sizes[area];
2338
2339 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2340 base = vmalloc_end - last_end;
2341 goto found;
2342 }
2343 base = pvm_determine_end(&next, &prev, align) - end;
2344
2345 while (true) {
2346 BUG_ON(next && next->va_end <= base + end);
2347 BUG_ON(prev && prev->va_end > base + end);
2348
2349
2350
2351
2352
2353 if (base + last_end < vmalloc_start + last_end) {
2354 spin_unlock(&vmap_area_lock);
2355 if (!purged) {
2356 purge_vmap_area_lazy();
2357 purged = true;
2358 goto retry;
2359 }
2360 goto err_free;
2361 }
2362
2363
2364
2365
2366
2367 if (next && next->va_start < base + end) {
2368 base = pvm_determine_end(&next, &prev, align) - end;
2369 term_area = area;
2370 continue;
2371 }
2372
2373
2374
2375
2376
2377
2378 if (prev && prev->va_end > base + start) {
2379 next = prev;
2380 prev = node_to_va(rb_prev(&next->rb_node));
2381 base = pvm_determine_end(&next, &prev, align) - end;
2382 term_area = area;
2383 continue;
2384 }
2385
2386
2387
2388
2389
2390 area = (area + nr_vms - 1) % nr_vms;
2391 if (area == term_area)
2392 break;
2393 start = offsets[area];
2394 end = start + sizes[area];
2395 pvm_find_next_prev(base + end, &next, &prev);
2396 }
2397found:
2398
2399 for (area = 0; area < nr_vms; area++) {
2400 struct vmap_area *va = vas[area];
2401
2402 va->va_start = base + offsets[area];
2403 va->va_end = va->va_start + sizes[area];
2404 __insert_vmap_area(va);
2405 }
2406
2407 vmap_area_pcpu_hole = base + offsets[last_area];
2408
2409 spin_unlock(&vmap_area_lock);
2410
2411
2412 for (area = 0; area < nr_vms; area++)
2413 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2414 pcpu_get_vm_areas);
2415
2416 kfree(vas);
2417 return vms;
2418
2419err_free:
2420 for (area = 0; area < nr_vms; area++) {
2421 if (vas)
2422 kfree(vas[area]);
2423 if (vms)
2424 kfree(vms[area]);
2425 }
2426 kfree(vas);
2427 kfree(vms);
2428 return NULL;
2429}
2430
2431
2432
2433
2434
2435
2436
2437
2438void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2439{
2440 int i;
2441
2442 for (i = 0; i < nr_vms; i++)
2443 free_vm_area(vms[i]);
2444 kfree(vms);
2445}
2446#endif
2447
2448#ifdef CONFIG_PROC_FS
2449static void *s_start(struct seq_file *m, loff_t *pos)
2450 __acquires(&vmlist_lock)
2451{
2452 loff_t n = *pos;
2453 struct vm_struct *v;
2454
2455 read_lock(&vmlist_lock);
2456 v = vmlist;
2457 while (n > 0 && v) {
2458 n--;
2459 v = v->next;
2460 }
2461 if (!n)
2462 return v;
2463
2464 return NULL;
2465
2466}
2467
2468static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2469{
2470 struct vm_struct *v = p;
2471
2472 ++*pos;
2473 return v->next;
2474}
2475
2476static void s_stop(struct seq_file *m, void *p)
2477 __releases(&vmlist_lock)
2478{
2479 read_unlock(&vmlist_lock);
2480}
2481
2482static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2483{
2484 if (NUMA_BUILD) {
2485 unsigned int nr, *counters = m->private;
2486
2487 if (!counters)
2488 return;
2489
2490 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2491
2492 for (nr = 0; nr < v->nr_pages; nr++)
2493 counters[page_to_nid(v->pages[nr])]++;
2494
2495 for_each_node_state(nr, N_HIGH_MEMORY)
2496 if (counters[nr])
2497 seq_printf(m, " N%u=%u", nr, counters[nr]);
2498 }
2499}
2500
2501static int s_show(struct seq_file *m, void *p)
2502{
2503 struct vm_struct *v = p;
2504
2505 seq_printf(m, "0x%p-0x%p %7ld",
2506 v->addr, v->addr + v->size, v->size);
2507
2508 if (v->caller)
2509 seq_printf(m, " %pS", v->caller);
2510
2511 if (v->nr_pages)
2512 seq_printf(m, " pages=%d", v->nr_pages);
2513
2514 if (v->phys_addr)
2515 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2516
2517 if (v->flags & VM_IOREMAP)
2518 seq_printf(m, " ioremap");
2519
2520 if (v->flags & VM_ALLOC)
2521 seq_printf(m, " vmalloc");
2522
2523 if (v->flags & VM_MAP)
2524 seq_printf(m, " vmap");
2525
2526 if (v->flags & VM_USERMAP)
2527 seq_printf(m, " user");
2528
2529 if (v->flags & VM_VPAGES)
2530 seq_printf(m, " vpages");
2531
2532 show_numa_info(m, v);
2533 seq_putc(m, '\n');
2534 return 0;
2535}
2536
2537static const struct seq_operations vmalloc_op = {
2538 .start = s_start,
2539 .next = s_next,
2540 .stop = s_stop,
2541 .show = s_show,
2542};
2543
2544static int vmalloc_open(struct inode *inode, struct file *file)
2545{
2546 unsigned int *ptr = NULL;
2547 int ret;
2548
2549 if (NUMA_BUILD) {
2550 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2551 if (ptr == NULL)
2552 return -ENOMEM;
2553 }
2554 ret = seq_open(file, &vmalloc_op);
2555 if (!ret) {
2556 struct seq_file *m = file->private_data;
2557 m->private = ptr;
2558 } else
2559 kfree(ptr);
2560 return ret;
2561}
2562
2563static const struct file_operations proc_vmalloc_operations = {
2564 .open = vmalloc_open,
2565 .read = seq_read,
2566 .llseek = seq_lseek,
2567 .release = seq_release_private,
2568};
2569
2570static int __init proc_vmalloc_init(void)
2571{
2572 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2573 return 0;
2574}
2575module_init(proc_vmalloc_init);
2576#endif
2577
2578