1
2
3
4
5
6
7
8
9
10
11#include <linux/vmalloc.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/highmem.h>
15#include <linux/sched.h>
16#include <linux/slab.h>
17#include <linux/spinlock.h>
18#include <linux/interrupt.h>
19#include <linux/proc_fs.h>
20#include <linux/seq_file.h>
21#include <linux/debugobjects.h>
22#include <linux/kallsyms.h>
23#include <linux/list.h>
24#include <linux/rbtree.h>
25#include <linux/radix-tree.h>
26#include <linux/rcupdate.h>
27#include <linux/pfn.h>
28#include <linux/kmemleak.h>
29#include <linux/atomic.h>
30#include <asm/uaccess.h>
31#include <asm/tlbflush.h>
32#include <asm/shmparam.h>
33
34
35
36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
37{
38 pte_t *pte;
39
40 pte = pte_offset_kernel(pmd, addr);
41 do {
42 pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
43 WARN_ON(!pte_none(ptent) && !pte_present(ptent));
44 } while (pte++, addr += PAGE_SIZE, addr != end);
45}
46
47static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
48{
49 pmd_t *pmd;
50 unsigned long next;
51
52 pmd = pmd_offset(pud, addr);
53 do {
54 next = pmd_addr_end(addr, end);
55 if (pmd_none_or_clear_bad(pmd))
56 continue;
57 vunmap_pte_range(pmd, addr, next);
58 } while (pmd++, addr = next, addr != end);
59}
60
61static void vunmap_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end)
62{
63 pud_t *pud;
64 unsigned long next;
65
66 pud = pud_offset(pgd, addr);
67 do {
68 next = pud_addr_end(addr, end);
69 if (pud_none_or_clear_bad(pud))
70 continue;
71 vunmap_pmd_range(pud, addr, next);
72 } while (pud++, addr = next, addr != end);
73}
74
75static void vunmap_page_range(unsigned long addr, unsigned long end)
76{
77 pgd_t *pgd;
78 unsigned long next;
79
80 BUG_ON(addr >= end);
81 pgd = pgd_offset_k(addr);
82 do {
83 next = pgd_addr_end(addr, end);
84 if (pgd_none_or_clear_bad(pgd))
85 continue;
86 vunmap_pud_range(pgd, addr, next);
87 } while (pgd++, addr = next, addr != end);
88}
89
90static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
91 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
92{
93 pte_t *pte;
94
95
96
97
98
99
100 pte = pte_alloc_kernel(pmd, addr);
101 if (!pte)
102 return -ENOMEM;
103 do {
104 struct page *page = pages[*nr];
105
106 if (WARN_ON(!pte_none(*pte)))
107 return -EBUSY;
108 if (WARN_ON(!page))
109 return -ENOMEM;
110 set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
111 (*nr)++;
112 } while (pte++, addr += PAGE_SIZE, addr != end);
113 return 0;
114}
115
116static int vmap_pmd_range(pud_t *pud, unsigned long addr,
117 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
118{
119 pmd_t *pmd;
120 unsigned long next;
121
122 pmd = pmd_alloc(&init_mm, pud, addr);
123 if (!pmd)
124 return -ENOMEM;
125 do {
126 next = pmd_addr_end(addr, end);
127 if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
128 return -ENOMEM;
129 } while (pmd++, addr = next, addr != end);
130 return 0;
131}
132
133static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
134 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
135{
136 pud_t *pud;
137 unsigned long next;
138
139 pud = pud_alloc(&init_mm, pgd, addr);
140 if (!pud)
141 return -ENOMEM;
142 do {
143 next = pud_addr_end(addr, end);
144 if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
145 return -ENOMEM;
146 } while (pud++, addr = next, addr != end);
147 return 0;
148}
149
150
151
152
153
154
155
156static int vmap_page_range_noflush(unsigned long start, unsigned long end,
157 pgprot_t prot, struct page **pages)
158{
159 pgd_t *pgd;
160 unsigned long next;
161 unsigned long addr = start;
162 int err = 0;
163 int nr = 0;
164
165 BUG_ON(addr >= end);
166 pgd = pgd_offset_k(addr);
167 do {
168 next = pgd_addr_end(addr, end);
169 err = vmap_pud_range(pgd, addr, next, prot, pages, &nr);
170 if (err)
171 return err;
172 } while (pgd++, addr = next, addr != end);
173
174 return nr;
175}
176
177static int vmap_page_range(unsigned long start, unsigned long end,
178 pgprot_t prot, struct page **pages)
179{
180 int ret;
181
182 ret = vmap_page_range_noflush(start, end, prot, pages);
183 flush_cache_vmap(start, end);
184 return ret;
185}
186
187int is_vmalloc_or_module_addr(const void *x)
188{
189
190
191
192
193
194#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
195 unsigned long addr = (unsigned long)x;
196 if (addr >= MODULES_VADDR && addr < MODULES_END)
197 return 1;
198#endif
199 return is_vmalloc_addr(x);
200}
201
202
203
204
205struct page *vmalloc_to_page(const void *vmalloc_addr)
206{
207 unsigned long addr = (unsigned long) vmalloc_addr;
208 struct page *page = NULL;
209 pgd_t *pgd = pgd_offset_k(addr);
210
211
212
213
214
215 VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr));
216
217 if (!pgd_none(*pgd)) {
218 pud_t *pud = pud_offset(pgd, addr);
219 if (!pud_none(*pud)) {
220 pmd_t *pmd = pmd_offset(pud, addr);
221 if (!pmd_none(*pmd)) {
222 pte_t *ptep, pte;
223
224 ptep = pte_offset_map(pmd, addr);
225 pte = *ptep;
226 if (pte_present(pte))
227 page = pte_page(pte);
228 pte_unmap(ptep);
229 }
230 }
231 }
232 return page;
233}
234EXPORT_SYMBOL(vmalloc_to_page);
235
236
237
238
239unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
240{
241 return page_to_pfn(vmalloc_to_page(vmalloc_addr));
242}
243EXPORT_SYMBOL(vmalloc_to_pfn);
244
245
246
247
248#define VM_LAZY_FREE 0x01
249#define VM_LAZY_FREEING 0x02
250#define VM_VM_AREA 0x04
251
252struct vmap_area {
253 unsigned long va_start;
254 unsigned long va_end;
255 unsigned long flags;
256 struct rb_node rb_node;
257 struct list_head list;
258 struct list_head purge_list;
259 struct vm_struct *vm;
260 struct rcu_head rcu_head;
261};
262
263static DEFINE_SPINLOCK(vmap_area_lock);
264static LIST_HEAD(vmap_area_list);
265static struct rb_root vmap_area_root = RB_ROOT;
266
267
268static struct rb_node *free_vmap_cache;
269static unsigned long cached_hole_size;
270static unsigned long cached_vstart;
271static unsigned long cached_align;
272
273static unsigned long vmap_area_pcpu_hole;
274
275static struct vmap_area *__find_vmap_area(unsigned long addr)
276{
277 struct rb_node *n = vmap_area_root.rb_node;
278
279 while (n) {
280 struct vmap_area *va;
281
282 va = rb_entry(n, struct vmap_area, rb_node);
283 if (addr < va->va_start)
284 n = n->rb_left;
285 else if (addr > va->va_start)
286 n = n->rb_right;
287 else
288 return va;
289 }
290
291 return NULL;
292}
293
294static void __insert_vmap_area(struct vmap_area *va)
295{
296 struct rb_node **p = &vmap_area_root.rb_node;
297 struct rb_node *parent = NULL;
298 struct rb_node *tmp;
299
300 while (*p) {
301 struct vmap_area *tmp_va;
302
303 parent = *p;
304 tmp_va = rb_entry(parent, struct vmap_area, rb_node);
305 if (va->va_start < tmp_va->va_end)
306 p = &(*p)->rb_left;
307 else if (va->va_end > tmp_va->va_start)
308 p = &(*p)->rb_right;
309 else
310 BUG();
311 }
312
313 rb_link_node(&va->rb_node, parent, p);
314 rb_insert_color(&va->rb_node, &vmap_area_root);
315
316
317 tmp = rb_prev(&va->rb_node);
318 if (tmp) {
319 struct vmap_area *prev;
320 prev = rb_entry(tmp, struct vmap_area, rb_node);
321 list_add_rcu(&va->list, &prev->list);
322 } else
323 list_add_rcu(&va->list, &vmap_area_list);
324}
325
326static void purge_vmap_area_lazy(void);
327
328
329
330
331
332static struct vmap_area *alloc_vmap_area(unsigned long size,
333 unsigned long align,
334 unsigned long vstart, unsigned long vend,
335 int node, gfp_t gfp_mask)
336{
337 struct vmap_area *va;
338 struct rb_node *n;
339 unsigned long addr;
340 int purged = 0;
341 struct vmap_area *first;
342
343 BUG_ON(!size);
344 BUG_ON(size & ~PAGE_MASK);
345 BUG_ON(!is_power_of_2(align));
346
347 va = kmalloc_node(sizeof(struct vmap_area),
348 gfp_mask & GFP_RECLAIM_MASK, node);
349 if (unlikely(!va))
350 return ERR_PTR(-ENOMEM);
351
352retry:
353 spin_lock(&vmap_area_lock);
354
355
356
357
358
359
360
361
362
363 if (!free_vmap_cache ||
364 size < cached_hole_size ||
365 vstart < cached_vstart ||
366 align < cached_align) {
367nocache:
368 cached_hole_size = 0;
369 free_vmap_cache = NULL;
370 }
371
372 cached_vstart = vstart;
373 cached_align = align;
374
375
376 if (free_vmap_cache) {
377 first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
378 addr = ALIGN(first->va_end, align);
379 if (addr < vstart)
380 goto nocache;
381 if (addr + size - 1 < addr)
382 goto overflow;
383
384 } else {
385 addr = ALIGN(vstart, align);
386 if (addr + size - 1 < addr)
387 goto overflow;
388
389 n = vmap_area_root.rb_node;
390 first = NULL;
391
392 while (n) {
393 struct vmap_area *tmp;
394 tmp = rb_entry(n, struct vmap_area, rb_node);
395 if (tmp->va_end >= addr) {
396 first = tmp;
397 if (tmp->va_start <= addr)
398 break;
399 n = n->rb_left;
400 } else
401 n = n->rb_right;
402 }
403
404 if (!first)
405 goto found;
406 }
407
408
409 while (addr + size > first->va_start && addr + size <= vend) {
410 if (addr + cached_hole_size < first->va_start)
411 cached_hole_size = first->va_start - addr;
412 addr = ALIGN(first->va_end, align);
413 if (addr + size - 1 < addr)
414 goto overflow;
415
416 if (list_is_last(&first->list, &vmap_area_list))
417 goto found;
418
419 first = list_entry(first->list.next,
420 struct vmap_area, list);
421 }
422
423found:
424 if (addr + size > vend)
425 goto overflow;
426
427 va->va_start = addr;
428 va->va_end = addr + size;
429 va->flags = 0;
430 __insert_vmap_area(va);
431 free_vmap_cache = &va->rb_node;
432 spin_unlock(&vmap_area_lock);
433
434 BUG_ON(va->va_start & (align-1));
435 BUG_ON(va->va_start < vstart);
436 BUG_ON(va->va_end > vend);
437
438 return va;
439
440overflow:
441 spin_unlock(&vmap_area_lock);
442 if (!purged) {
443 purge_vmap_area_lazy();
444 purged = 1;
445 goto retry;
446 }
447 if (printk_ratelimit())
448 printk(KERN_WARNING
449 "vmap allocation for size %lu failed: "
450 "use vmalloc=<size> to increase size.\n", size);
451 kfree(va);
452 return ERR_PTR(-EBUSY);
453}
454
455static void __free_vmap_area(struct vmap_area *va)
456{
457 BUG_ON(RB_EMPTY_NODE(&va->rb_node));
458
459 if (free_vmap_cache) {
460 if (va->va_end < cached_vstart) {
461 free_vmap_cache = NULL;
462 } else {
463 struct vmap_area *cache;
464 cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
465 if (va->va_start <= cache->va_start) {
466 free_vmap_cache = rb_prev(&va->rb_node);
467
468
469
470
471 }
472 }
473 }
474 rb_erase(&va->rb_node, &vmap_area_root);
475 RB_CLEAR_NODE(&va->rb_node);
476 list_del_rcu(&va->list);
477
478
479
480
481
482
483
484 if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
485 vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
486
487 kfree_rcu(va, rcu_head);
488}
489
490
491
492
493static void free_vmap_area(struct vmap_area *va)
494{
495 spin_lock(&vmap_area_lock);
496 __free_vmap_area(va);
497 spin_unlock(&vmap_area_lock);
498}
499
500
501
502
503static void unmap_vmap_area(struct vmap_area *va)
504{
505 vunmap_page_range(va->va_start, va->va_end);
506}
507
508static void vmap_debug_free_range(unsigned long start, unsigned long end)
509{
510
511
512
513
514
515
516
517
518
519
520
521
522
523#ifdef CONFIG_DEBUG_PAGEALLOC
524 vunmap_page_range(start, end);
525 flush_tlb_kernel_range(start, end);
526#endif
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545static unsigned long lazy_max_pages(void)
546{
547 unsigned int log;
548
549 log = fls(num_online_cpus());
550
551 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
552}
553
554static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
555
556
557static void purge_fragmented_blocks_allcpus(void);
558
559
560
561
562
563void set_iounmap_nonlazy(void)
564{
565 atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
566}
567
568
569
570
571
572
573
574
575
576
577
578static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
579 int sync, int force_flush)
580{
581 static DEFINE_SPINLOCK(purge_lock);
582 LIST_HEAD(valist);
583 struct vmap_area *va;
584 struct vmap_area *n_va;
585 int nr = 0;
586
587
588
589
590
591
592 if (!sync && !force_flush) {
593 if (!spin_trylock(&purge_lock))
594 return;
595 } else
596 spin_lock(&purge_lock);
597
598 if (sync)
599 purge_fragmented_blocks_allcpus();
600
601 rcu_read_lock();
602 list_for_each_entry_rcu(va, &vmap_area_list, list) {
603 if (va->flags & VM_LAZY_FREE) {
604 if (va->va_start < *start)
605 *start = va->va_start;
606 if (va->va_end > *end)
607 *end = va->va_end;
608 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
609 list_add_tail(&va->purge_list, &valist);
610 va->flags |= VM_LAZY_FREEING;
611 va->flags &= ~VM_LAZY_FREE;
612 }
613 }
614 rcu_read_unlock();
615
616 if (nr)
617 atomic_sub(nr, &vmap_lazy_nr);
618
619 if (nr || force_flush)
620 flush_tlb_kernel_range(*start, *end);
621
622 if (nr) {
623 spin_lock(&vmap_area_lock);
624 list_for_each_entry_safe(va, n_va, &valist, purge_list)
625 __free_vmap_area(va);
626 spin_unlock(&vmap_area_lock);
627 }
628 spin_unlock(&purge_lock);
629}
630
631
632
633
634
635static void try_purge_vmap_area_lazy(void)
636{
637 unsigned long start = ULONG_MAX, end = 0;
638
639 __purge_vmap_area_lazy(&start, &end, 0, 0);
640}
641
642
643
644
645static void purge_vmap_area_lazy(void)
646{
647 unsigned long start = ULONG_MAX, end = 0;
648
649 __purge_vmap_area_lazy(&start, &end, 1, 0);
650}
651
652
653
654
655
656
657static void free_vmap_area_noflush(struct vmap_area *va)
658{
659 va->flags |= VM_LAZY_FREE;
660 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
661 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
662 try_purge_vmap_area_lazy();
663}
664
665
666
667
668
669static void free_unmap_vmap_area_noflush(struct vmap_area *va)
670{
671 unmap_vmap_area(va);
672 free_vmap_area_noflush(va);
673}
674
675
676
677
678static void free_unmap_vmap_area(struct vmap_area *va)
679{
680 flush_cache_vunmap(va->va_start, va->va_end);
681 free_unmap_vmap_area_noflush(va);
682}
683
684static struct vmap_area *find_vmap_area(unsigned long addr)
685{
686 struct vmap_area *va;
687
688 spin_lock(&vmap_area_lock);
689 va = __find_vmap_area(addr);
690 spin_unlock(&vmap_area_lock);
691
692 return va;
693}
694
695static void free_unmap_vmap_area_addr(unsigned long addr)
696{
697 struct vmap_area *va;
698
699 va = find_vmap_area(addr);
700 BUG_ON(!va);
701 free_unmap_vmap_area(va);
702}
703
704
705
706
707
708
709
710
711
712
713
714
715
716#if BITS_PER_LONG == 32
717#define VMALLOC_SPACE (128UL*1024*1024)
718#else
719#define VMALLOC_SPACE (128UL*1024*1024*1024)
720#endif
721
722#define VMALLOC_PAGES (VMALLOC_SPACE / PAGE_SIZE)
723#define VMAP_MAX_ALLOC BITS_PER_LONG
724#define VMAP_BBMAP_BITS_MAX 1024
725#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
726#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y))
727#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y))
728#define VMAP_BBMAP_BITS \
729 VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
730 VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
731 VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
732
733#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
734
735static bool vmap_initialized __read_mostly = false;
736
737struct vmap_block_queue {
738 spinlock_t lock;
739 struct list_head free;
740};
741
742struct vmap_block {
743 spinlock_t lock;
744 struct vmap_area *va;
745 struct vmap_block_queue *vbq;
746 unsigned long free, dirty;
747 DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
748 DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
749 struct list_head free_list;
750 struct rcu_head rcu_head;
751 struct list_head purge;
752};
753
754
755static DEFINE_PER_CPU(struct vmap_block_queue, vmap_block_queue);
756
757
758
759
760
761
762static DEFINE_SPINLOCK(vmap_block_tree_lock);
763static RADIX_TREE(vmap_block_tree, GFP_ATOMIC);
764
765
766
767
768
769
770
771
772static unsigned long addr_to_vb_idx(unsigned long addr)
773{
774 addr -= VMALLOC_START & ~(VMAP_BLOCK_SIZE-1);
775 addr /= VMAP_BLOCK_SIZE;
776 return addr;
777}
778
779static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
780{
781 struct vmap_block_queue *vbq;
782 struct vmap_block *vb;
783 struct vmap_area *va;
784 unsigned long vb_idx;
785 int node, err;
786
787 node = numa_node_id();
788
789 vb = kmalloc_node(sizeof(struct vmap_block),
790 gfp_mask & GFP_RECLAIM_MASK, node);
791 if (unlikely(!vb))
792 return ERR_PTR(-ENOMEM);
793
794 va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
795 VMALLOC_START, VMALLOC_END,
796 node, gfp_mask);
797 if (IS_ERR(va)) {
798 kfree(vb);
799 return ERR_CAST(va);
800 }
801
802 err = radix_tree_preload(gfp_mask);
803 if (unlikely(err)) {
804 kfree(vb);
805 free_vmap_area(va);
806 return ERR_PTR(err);
807 }
808
809 spin_lock_init(&vb->lock);
810 vb->va = va;
811 vb->free = VMAP_BBMAP_BITS;
812 vb->dirty = 0;
813 bitmap_zero(vb->alloc_map, VMAP_BBMAP_BITS);
814 bitmap_zero(vb->dirty_map, VMAP_BBMAP_BITS);
815 INIT_LIST_HEAD(&vb->free_list);
816
817 vb_idx = addr_to_vb_idx(va->va_start);
818 spin_lock(&vmap_block_tree_lock);
819 err = radix_tree_insert(&vmap_block_tree, vb_idx, vb);
820 spin_unlock(&vmap_block_tree_lock);
821 BUG_ON(err);
822 radix_tree_preload_end();
823
824 vbq = &get_cpu_var(vmap_block_queue);
825 vb->vbq = vbq;
826 spin_lock(&vbq->lock);
827 list_add_rcu(&vb->free_list, &vbq->free);
828 spin_unlock(&vbq->lock);
829 put_cpu_var(vmap_block_queue);
830
831 return vb;
832}
833
834static void free_vmap_block(struct vmap_block *vb)
835{
836 struct vmap_block *tmp;
837 unsigned long vb_idx;
838
839 vb_idx = addr_to_vb_idx(vb->va->va_start);
840 spin_lock(&vmap_block_tree_lock);
841 tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
842 spin_unlock(&vmap_block_tree_lock);
843 BUG_ON(tmp != vb);
844
845 free_vmap_area_noflush(vb->va);
846 kfree_rcu(vb, rcu_head);
847}
848
849static void purge_fragmented_blocks(int cpu)
850{
851 LIST_HEAD(purge);
852 struct vmap_block *vb;
853 struct vmap_block *n_vb;
854 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
855
856 rcu_read_lock();
857 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
858
859 if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
860 continue;
861
862 spin_lock(&vb->lock);
863 if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
864 vb->free = 0;
865 vb->dirty = VMAP_BBMAP_BITS;
866 bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
867 bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
868 spin_lock(&vbq->lock);
869 list_del_rcu(&vb->free_list);
870 spin_unlock(&vbq->lock);
871 spin_unlock(&vb->lock);
872 list_add_tail(&vb->purge, &purge);
873 } else
874 spin_unlock(&vb->lock);
875 }
876 rcu_read_unlock();
877
878 list_for_each_entry_safe(vb, n_vb, &purge, purge) {
879 list_del(&vb->purge);
880 free_vmap_block(vb);
881 }
882}
883
884static void purge_fragmented_blocks_thiscpu(void)
885{
886 purge_fragmented_blocks(smp_processor_id());
887}
888
889static void purge_fragmented_blocks_allcpus(void)
890{
891 int cpu;
892
893 for_each_possible_cpu(cpu)
894 purge_fragmented_blocks(cpu);
895}
896
897static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
898{
899 struct vmap_block_queue *vbq;
900 struct vmap_block *vb;
901 unsigned long addr = 0;
902 unsigned int order;
903 int purge = 0;
904
905 BUG_ON(size & ~PAGE_MASK);
906 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
907 if (WARN_ON(size == 0)) {
908
909
910
911
912
913 return NULL;
914 }
915 order = get_order(size);
916
917again:
918 rcu_read_lock();
919 vbq = &get_cpu_var(vmap_block_queue);
920 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
921 int i;
922
923 spin_lock(&vb->lock);
924 if (vb->free < 1UL << order)
925 goto next;
926
927 i = bitmap_find_free_region(vb->alloc_map,
928 VMAP_BBMAP_BITS, order);
929
930 if (i < 0) {
931 if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
932
933 BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
934 purge = 1;
935 }
936 goto next;
937 }
938 addr = vb->va->va_start + (i << PAGE_SHIFT);
939 BUG_ON(addr_to_vb_idx(addr) !=
940 addr_to_vb_idx(vb->va->va_start));
941 vb->free -= 1UL << order;
942 if (vb->free == 0) {
943 spin_lock(&vbq->lock);
944 list_del_rcu(&vb->free_list);
945 spin_unlock(&vbq->lock);
946 }
947 spin_unlock(&vb->lock);
948 break;
949next:
950 spin_unlock(&vb->lock);
951 }
952
953 if (purge)
954 purge_fragmented_blocks_thiscpu();
955
956 put_cpu_var(vmap_block_queue);
957 rcu_read_unlock();
958
959 if (!addr) {
960 vb = new_vmap_block(gfp_mask);
961 if (IS_ERR(vb))
962 return vb;
963 goto again;
964 }
965
966 return (void *)addr;
967}
968
969static void vb_free(const void *addr, unsigned long size)
970{
971 unsigned long offset;
972 unsigned long vb_idx;
973 unsigned int order;
974 struct vmap_block *vb;
975
976 BUG_ON(size & ~PAGE_MASK);
977 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
978
979 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
980
981 order = get_order(size);
982
983 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
984
985 vb_idx = addr_to_vb_idx((unsigned long)addr);
986 rcu_read_lock();
987 vb = radix_tree_lookup(&vmap_block_tree, vb_idx);
988 rcu_read_unlock();
989 BUG_ON(!vb);
990
991 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
992
993 spin_lock(&vb->lock);
994 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
995
996 vb->dirty += 1UL << order;
997 if (vb->dirty == VMAP_BBMAP_BITS) {
998 BUG_ON(vb->free);
999 spin_unlock(&vb->lock);
1000 free_vmap_block(vb);
1001 } else
1002 spin_unlock(&vb->lock);
1003}
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018void vm_unmap_aliases(void)
1019{
1020 unsigned long start = ULONG_MAX, end = 0;
1021 int cpu;
1022 int flush = 0;
1023
1024 if (unlikely(!vmap_initialized))
1025 return;
1026
1027 for_each_possible_cpu(cpu) {
1028 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
1029 struct vmap_block *vb;
1030
1031 rcu_read_lock();
1032 list_for_each_entry_rcu(vb, &vbq->free, free_list) {
1033 int i;
1034
1035 spin_lock(&vb->lock);
1036 i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
1037 while (i < VMAP_BBMAP_BITS) {
1038 unsigned long s, e;
1039 int j;
1040 j = find_next_zero_bit(vb->dirty_map,
1041 VMAP_BBMAP_BITS, i);
1042
1043 s = vb->va->va_start + (i << PAGE_SHIFT);
1044 e = vb->va->va_start + (j << PAGE_SHIFT);
1045 flush = 1;
1046
1047 if (s < start)
1048 start = s;
1049 if (e > end)
1050 end = e;
1051
1052 i = j;
1053 i = find_next_bit(vb->dirty_map,
1054 VMAP_BBMAP_BITS, i);
1055 }
1056 spin_unlock(&vb->lock);
1057 }
1058 rcu_read_unlock();
1059 }
1060
1061 __purge_vmap_area_lazy(&start, &end, 1, flush);
1062}
1063EXPORT_SYMBOL_GPL(vm_unmap_aliases);
1064
1065
1066
1067
1068
1069
1070void vm_unmap_ram(const void *mem, unsigned int count)
1071{
1072 unsigned long size = count << PAGE_SHIFT;
1073 unsigned long addr = (unsigned long)mem;
1074
1075 BUG_ON(!addr);
1076 BUG_ON(addr < VMALLOC_START);
1077 BUG_ON(addr > VMALLOC_END);
1078 BUG_ON(addr & (PAGE_SIZE-1));
1079
1080 debug_check_no_locks_freed(mem, size);
1081 vmap_debug_free_range(addr, addr+size);
1082
1083 if (likely(count <= VMAP_MAX_ALLOC))
1084 vb_free(mem, size);
1085 else
1086 free_unmap_vmap_area_addr(addr);
1087}
1088EXPORT_SYMBOL(vm_unmap_ram);
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
1100{
1101 unsigned long size = count << PAGE_SHIFT;
1102 unsigned long addr;
1103 void *mem;
1104
1105 if (likely(count <= VMAP_MAX_ALLOC)) {
1106 mem = vb_alloc(size, GFP_KERNEL);
1107 if (IS_ERR(mem))
1108 return NULL;
1109 addr = (unsigned long)mem;
1110 } else {
1111 struct vmap_area *va;
1112 va = alloc_vmap_area(size, PAGE_SIZE,
1113 VMALLOC_START, VMALLOC_END, node, GFP_KERNEL);
1114 if (IS_ERR(va))
1115 return NULL;
1116
1117 addr = va->va_start;
1118 mem = (void *)addr;
1119 }
1120 if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
1121 vm_unmap_ram(mem, count);
1122 return NULL;
1123 }
1124 return mem;
1125}
1126EXPORT_SYMBOL(vm_map_ram);
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138void __init vm_area_add_early(struct vm_struct *vm)
1139{
1140 struct vm_struct *tmp, **p;
1141
1142 BUG_ON(vmap_initialized);
1143 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1144 if (tmp->addr >= vm->addr) {
1145 BUG_ON(tmp->addr < vm->addr + vm->size);
1146 break;
1147 } else
1148 BUG_ON(tmp->addr + tmp->size > vm->addr);
1149 }
1150 vm->next = *p;
1151 *p = vm;
1152}
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166void __init vm_area_register_early(struct vm_struct *vm, size_t align)
1167{
1168 static size_t vm_init_off __initdata;
1169 unsigned long addr;
1170
1171 addr = ALIGN(VMALLOC_START + vm_init_off, align);
1172 vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
1173
1174 vm->addr = (void *)addr;
1175
1176 vm_area_add_early(vm);
1177}
1178
1179void __init vmalloc_init(void)
1180{
1181 struct vmap_area *va;
1182 struct vm_struct *tmp;
1183 int i;
1184
1185 for_each_possible_cpu(i) {
1186 struct vmap_block_queue *vbq;
1187
1188 vbq = &per_cpu(vmap_block_queue, i);
1189 spin_lock_init(&vbq->lock);
1190 INIT_LIST_HEAD(&vbq->free);
1191 }
1192
1193
1194 for (tmp = vmlist; tmp; tmp = tmp->next) {
1195 va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT);
1196 va->flags = VM_VM_AREA;
1197 va->va_start = (unsigned long)tmp->addr;
1198 va->va_end = va->va_start + tmp->size;
1199 va->vm = tmp;
1200 __insert_vmap_area(va);
1201 }
1202
1203 vmap_area_pcpu_hole = VMALLOC_END;
1204
1205 vmap_initialized = true;
1206}
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227int map_kernel_range_noflush(unsigned long addr, unsigned long size,
1228 pgprot_t prot, struct page **pages)
1229{
1230 return vmap_page_range_noflush(addr, addr + size, prot, pages);
1231}
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
1248{
1249 vunmap_page_range(addr, addr + size);
1250}
1251EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261void unmap_kernel_range(unsigned long addr, unsigned long size)
1262{
1263 unsigned long end = addr + size;
1264
1265 flush_cache_vunmap(addr, end);
1266 vunmap_page_range(addr, end);
1267 flush_tlb_kernel_range(addr, end);
1268}
1269
1270int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
1271{
1272 unsigned long addr = (unsigned long)area->addr;
1273 unsigned long end = addr + area->size - PAGE_SIZE;
1274 int err;
1275
1276 err = vmap_page_range(addr, end, prot, *pages);
1277 if (err > 0) {
1278 *pages += err;
1279 err = 0;
1280 }
1281
1282 return err;
1283}
1284EXPORT_SYMBOL_GPL(map_vm_area);
1285
1286
1287DEFINE_RWLOCK(vmlist_lock);
1288struct vm_struct *vmlist;
1289
1290static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1291 unsigned long flags, const void *caller)
1292{
1293 vm->flags = flags;
1294 vm->addr = (void *)va->va_start;
1295 vm->size = va->va_end - va->va_start;
1296 vm->caller = caller;
1297 va->vm = vm;
1298 va->flags |= VM_VM_AREA;
1299}
1300
1301static void insert_vmalloc_vmlist(struct vm_struct *vm)
1302{
1303 struct vm_struct *tmp, **p;
1304
1305 vm->flags &= ~VM_UNLIST;
1306 write_lock(&vmlist_lock);
1307 for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
1308 if (tmp->addr >= vm->addr)
1309 break;
1310 }
1311 vm->next = *p;
1312 *p = vm;
1313 write_unlock(&vmlist_lock);
1314}
1315
1316static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
1317 unsigned long flags, const void *caller)
1318{
1319 setup_vmalloc_vm(vm, va, flags, caller);
1320 insert_vmalloc_vmlist(vm);
1321}
1322
1323static struct vm_struct *__get_vm_area_node(unsigned long size,
1324 unsigned long align, unsigned long flags, unsigned long start,
1325 unsigned long end, int node, gfp_t gfp_mask, const void *caller)
1326{
1327 struct vmap_area *va;
1328 struct vm_struct *area;
1329
1330 BUG_ON(in_interrupt());
1331 if (flags & VM_IOREMAP) {
1332 int bit = fls(size);
1333
1334 if (bit > IOREMAP_MAX_ORDER)
1335 bit = IOREMAP_MAX_ORDER;
1336 else if (bit < PAGE_SHIFT)
1337 bit = PAGE_SHIFT;
1338
1339 align = 1ul << bit;
1340 }
1341
1342 size = PAGE_ALIGN(size);
1343 if (unlikely(!size))
1344 return NULL;
1345
1346 area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
1347 if (unlikely(!area))
1348 return NULL;
1349
1350
1351
1352
1353 size += PAGE_SIZE;
1354
1355 va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
1356 if (IS_ERR(va)) {
1357 kfree(area);
1358 return NULL;
1359 }
1360
1361
1362
1363
1364
1365
1366
1367
1368 if (flags & VM_UNLIST)
1369 setup_vmalloc_vm(area, va, flags, caller);
1370 else
1371 insert_vmalloc_vm(area, va, flags, caller);
1372
1373 return area;
1374}
1375
1376struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
1377 unsigned long start, unsigned long end)
1378{
1379 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1380 __builtin_return_address(0));
1381}
1382EXPORT_SYMBOL_GPL(__get_vm_area);
1383
1384struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
1385 unsigned long start, unsigned long end,
1386 const void *caller)
1387{
1388 return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
1389 caller);
1390}
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
1402{
1403 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1404 -1, GFP_KERNEL, __builtin_return_address(0));
1405}
1406
1407struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
1408 const void *caller)
1409{
1410 return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
1411 -1, GFP_KERNEL, caller);
1412}
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422struct vm_struct *find_vm_area(const void *addr)
1423{
1424 struct vmap_area *va;
1425
1426 va = find_vmap_area((unsigned long)addr);
1427 if (va && va->flags & VM_VM_AREA)
1428 return va->vm;
1429
1430 return NULL;
1431}
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441struct vm_struct *remove_vm_area(const void *addr)
1442{
1443 struct vmap_area *va;
1444
1445 va = find_vmap_area((unsigned long)addr);
1446 if (va && va->flags & VM_VM_AREA) {
1447 struct vm_struct *vm = va->vm;
1448
1449 if (!(vm->flags & VM_UNLIST)) {
1450 struct vm_struct *tmp, **p;
1451
1452
1453
1454
1455
1456 write_lock(&vmlist_lock);
1457 for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
1458 ;
1459 *p = tmp->next;
1460 write_unlock(&vmlist_lock);
1461 }
1462
1463 vmap_debug_free_range(va->va_start, va->va_end);
1464 free_unmap_vmap_area(va);
1465 vm->size -= PAGE_SIZE;
1466
1467 return vm;
1468 }
1469 return NULL;
1470}
1471
1472static void __vunmap(const void *addr, int deallocate_pages)
1473{
1474 struct vm_struct *area;
1475
1476 if (!addr)
1477 return;
1478
1479 if ((PAGE_SIZE-1) & (unsigned long)addr) {
1480 WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
1481 return;
1482 }
1483
1484 area = remove_vm_area(addr);
1485 if (unlikely(!area)) {
1486 WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
1487 addr);
1488 return;
1489 }
1490
1491 debug_check_no_locks_freed(addr, area->size);
1492 debug_check_no_obj_freed(addr, area->size);
1493
1494 if (deallocate_pages) {
1495 int i;
1496
1497 for (i = 0; i < area->nr_pages; i++) {
1498 struct page *page = area->pages[i];
1499
1500 BUG_ON(!page);
1501 __free_page(page);
1502 }
1503
1504 if (area->flags & VM_VPAGES)
1505 vfree(area->pages);
1506 else
1507 kfree(area->pages);
1508 }
1509
1510 kfree(area);
1511 return;
1512}
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524void vfree(const void *addr)
1525{
1526 BUG_ON(in_interrupt());
1527
1528 kmemleak_free(addr);
1529
1530 __vunmap(addr, 1);
1531}
1532EXPORT_SYMBOL(vfree);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543void vunmap(const void *addr)
1544{
1545 BUG_ON(in_interrupt());
1546 might_sleep();
1547 __vunmap(addr, 0);
1548}
1549EXPORT_SYMBOL(vunmap);
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561void *vmap(struct page **pages, unsigned int count,
1562 unsigned long flags, pgprot_t prot)
1563{
1564 struct vm_struct *area;
1565
1566 might_sleep();
1567
1568 if (count > totalram_pages)
1569 return NULL;
1570
1571 area = get_vm_area_caller((count << PAGE_SHIFT), flags,
1572 __builtin_return_address(0));
1573 if (!area)
1574 return NULL;
1575
1576 if (map_vm_area(area, prot, &pages)) {
1577 vunmap(area->addr);
1578 return NULL;
1579 }
1580
1581 return area->addr;
1582}
1583EXPORT_SYMBOL(vmap);
1584
1585static void *__vmalloc_node(unsigned long size, unsigned long align,
1586 gfp_t gfp_mask, pgprot_t prot,
1587 int node, const void *caller);
1588static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
1589 pgprot_t prot, int node, const void *caller)
1590{
1591 const int order = 0;
1592 struct page **pages;
1593 unsigned int nr_pages, array_size, i;
1594 gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
1595
1596 nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
1597 array_size = (nr_pages * sizeof(struct page *));
1598
1599 area->nr_pages = nr_pages;
1600
1601 if (array_size > PAGE_SIZE) {
1602 pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
1603 PAGE_KERNEL, node, caller);
1604 area->flags |= VM_VPAGES;
1605 } else {
1606 pages = kmalloc_node(array_size, nested_gfp, node);
1607 }
1608 area->pages = pages;
1609 area->caller = caller;
1610 if (!area->pages) {
1611 remove_vm_area(area->addr);
1612 kfree(area);
1613 return NULL;
1614 }
1615
1616 for (i = 0; i < area->nr_pages; i++) {
1617 struct page *page;
1618 gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
1619
1620 if (node < 0)
1621 page = alloc_page(tmp_mask);
1622 else
1623 page = alloc_pages_node(node, tmp_mask, order);
1624
1625 if (unlikely(!page)) {
1626
1627 area->nr_pages = i;
1628 goto fail;
1629 }
1630 area->pages[i] = page;
1631 }
1632
1633 if (map_vm_area(area, prot, &pages))
1634 goto fail;
1635 return area->addr;
1636
1637fail:
1638 warn_alloc_failed(gfp_mask, order,
1639 "vmalloc: allocation failure, allocated %ld of %ld bytes\n",
1640 (area->nr_pages*PAGE_SIZE), area->size);
1641 vfree(area->addr);
1642 return NULL;
1643}
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660void *__vmalloc_node_range(unsigned long size, unsigned long align,
1661 unsigned long start, unsigned long end, gfp_t gfp_mask,
1662 pgprot_t prot, int node, const void *caller)
1663{
1664 struct vm_struct *area;
1665 void *addr;
1666 unsigned long real_size = size;
1667
1668 size = PAGE_ALIGN(size);
1669 if (!size || (size >> PAGE_SHIFT) > totalram_pages)
1670 goto fail;
1671
1672 area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNLIST,
1673 start, end, node, gfp_mask, caller);
1674 if (!area)
1675 goto fail;
1676
1677 addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
1678 if (!addr)
1679 return NULL;
1680
1681
1682
1683
1684
1685 insert_vmalloc_vmlist(area);
1686
1687
1688
1689
1690
1691
1692 kmemleak_alloc(addr, real_size, 3, gfp_mask);
1693
1694 return addr;
1695
1696fail:
1697 warn_alloc_failed(gfp_mask, 0,
1698 "vmalloc: allocation failure: %lu bytes\n",
1699 real_size);
1700 return NULL;
1701}
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716static void *__vmalloc_node(unsigned long size, unsigned long align,
1717 gfp_t gfp_mask, pgprot_t prot,
1718 int node, const void *caller)
1719{
1720 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
1721 gfp_mask, prot, node, caller);
1722}
1723
1724void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
1725{
1726 return __vmalloc_node(size, 1, gfp_mask, prot, -1,
1727 __builtin_return_address(0));
1728}
1729EXPORT_SYMBOL(__vmalloc);
1730
1731static inline void *__vmalloc_node_flags(unsigned long size,
1732 int node, gfp_t flags)
1733{
1734 return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
1735 node, __builtin_return_address(0));
1736}
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747void *vmalloc(unsigned long size)
1748{
1749 return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
1750}
1751EXPORT_SYMBOL(vmalloc);
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763void *vzalloc(unsigned long size)
1764{
1765 return __vmalloc_node_flags(size, -1,
1766 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1767}
1768EXPORT_SYMBOL(vzalloc);
1769
1770
1771
1772
1773
1774
1775
1776
1777void *vmalloc_user(unsigned long size)
1778{
1779 struct vm_struct *area;
1780 void *ret;
1781
1782 ret = __vmalloc_node(size, SHMLBA,
1783 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
1784 PAGE_KERNEL, -1, __builtin_return_address(0));
1785 if (ret) {
1786 area = find_vm_area(ret);
1787 area->flags |= VM_USERMAP;
1788 }
1789 return ret;
1790}
1791EXPORT_SYMBOL(vmalloc_user);
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804void *vmalloc_node(unsigned long size, int node)
1805{
1806 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
1807 node, __builtin_return_address(0));
1808}
1809EXPORT_SYMBOL(vmalloc_node);
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823void *vzalloc_node(unsigned long size, int node)
1824{
1825 return __vmalloc_node_flags(size, node,
1826 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
1827}
1828EXPORT_SYMBOL(vzalloc_node);
1829
1830#ifndef PAGE_KERNEL_EXEC
1831# define PAGE_KERNEL_EXEC PAGE_KERNEL
1832#endif
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846void *vmalloc_exec(unsigned long size)
1847{
1848 return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
1849 -1, __builtin_return_address(0));
1850}
1851
1852#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
1853#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
1854#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
1855#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
1856#else
1857#define GFP_VMALLOC32 GFP_KERNEL
1858#endif
1859
1860
1861
1862
1863
1864
1865
1866
1867void *vmalloc_32(unsigned long size)
1868{
1869 return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
1870 -1, __builtin_return_address(0));
1871}
1872EXPORT_SYMBOL(vmalloc_32);
1873
1874
1875
1876
1877
1878
1879
1880
1881void *vmalloc_32_user(unsigned long size)
1882{
1883 struct vm_struct *area;
1884 void *ret;
1885
1886 ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
1887 -1, __builtin_return_address(0));
1888 if (ret) {
1889 area = find_vm_area(ret);
1890 area->flags |= VM_USERMAP;
1891 }
1892 return ret;
1893}
1894EXPORT_SYMBOL(vmalloc_32_user);
1895
1896
1897
1898
1899
1900
1901static int aligned_vread(char *buf, char *addr, unsigned long count)
1902{
1903 struct page *p;
1904 int copied = 0;
1905
1906 while (count) {
1907 unsigned long offset, length;
1908
1909 offset = (unsigned long)addr & ~PAGE_MASK;
1910 length = PAGE_SIZE - offset;
1911 if (length > count)
1912 length = count;
1913 p = vmalloc_to_page(addr);
1914
1915
1916
1917
1918
1919
1920
1921 if (p) {
1922
1923
1924
1925
1926 void *map = kmap_atomic(p);
1927 memcpy(buf, map + offset, length);
1928 kunmap_atomic(map);
1929 } else
1930 memset(buf, 0, length);
1931
1932 addr += length;
1933 buf += length;
1934 copied += length;
1935 count -= length;
1936 }
1937 return copied;
1938}
1939
1940static int aligned_vwrite(char *buf, char *addr, unsigned long count)
1941{
1942 struct page *p;
1943 int copied = 0;
1944
1945 while (count) {
1946 unsigned long offset, length;
1947
1948 offset = (unsigned long)addr & ~PAGE_MASK;
1949 length = PAGE_SIZE - offset;
1950 if (length > count)
1951 length = count;
1952 p = vmalloc_to_page(addr);
1953
1954
1955
1956
1957
1958
1959
1960 if (p) {
1961
1962
1963
1964
1965 void *map = kmap_atomic(p);
1966 memcpy(map + offset, buf, length);
1967 kunmap_atomic(map);
1968 }
1969 addr += length;
1970 buf += length;
1971 copied += length;
1972 count -= length;
1973 }
1974 return copied;
1975}
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003long vread(char *buf, char *addr, unsigned long count)
2004{
2005 struct vm_struct *tmp;
2006 char *vaddr, *buf_start = buf;
2007 unsigned long buflen = count;
2008 unsigned long n;
2009
2010
2011 if ((unsigned long) addr + count < count)
2012 count = -(unsigned long) addr;
2013
2014 read_lock(&vmlist_lock);
2015 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2016 vaddr = (char *) tmp->addr;
2017 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2018 continue;
2019 while (addr < vaddr) {
2020 if (count == 0)
2021 goto finished;
2022 *buf = '\0';
2023 buf++;
2024 addr++;
2025 count--;
2026 }
2027 n = vaddr + tmp->size - PAGE_SIZE - addr;
2028 if (n > count)
2029 n = count;
2030 if (!(tmp->flags & VM_IOREMAP))
2031 aligned_vread(buf, addr, n);
2032 else
2033 memset(buf, 0, n);
2034 buf += n;
2035 addr += n;
2036 count -= n;
2037 }
2038finished:
2039 read_unlock(&vmlist_lock);
2040
2041 if (buf == buf_start)
2042 return 0;
2043
2044 if (buf != buf_start + buflen)
2045 memset(buf, 0, buflen - (buf - buf_start));
2046
2047 return buflen;
2048}
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076long vwrite(char *buf, char *addr, unsigned long count)
2077{
2078 struct vm_struct *tmp;
2079 char *vaddr;
2080 unsigned long n, buflen;
2081 int copied = 0;
2082
2083
2084 if ((unsigned long) addr + count < count)
2085 count = -(unsigned long) addr;
2086 buflen = count;
2087
2088 read_lock(&vmlist_lock);
2089 for (tmp = vmlist; count && tmp; tmp = tmp->next) {
2090 vaddr = (char *) tmp->addr;
2091 if (addr >= vaddr + tmp->size - PAGE_SIZE)
2092 continue;
2093 while (addr < vaddr) {
2094 if (count == 0)
2095 goto finished;
2096 buf++;
2097 addr++;
2098 count--;
2099 }
2100 n = vaddr + tmp->size - PAGE_SIZE - addr;
2101 if (n > count)
2102 n = count;
2103 if (!(tmp->flags & VM_IOREMAP)) {
2104 aligned_vwrite(buf, addr, n);
2105 copied++;
2106 }
2107 buf += n;
2108 addr += n;
2109 count -= n;
2110 }
2111finished:
2112 read_unlock(&vmlist_lock);
2113 if (!copied)
2114 return 0;
2115 return buflen;
2116}
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
2133 unsigned long pgoff)
2134{
2135 struct vm_struct *area;
2136 unsigned long uaddr = vma->vm_start;
2137 unsigned long usize = vma->vm_end - vma->vm_start;
2138
2139 if ((PAGE_SIZE-1) & (unsigned long)addr)
2140 return -EINVAL;
2141
2142 area = find_vm_area(addr);
2143 if (!area)
2144 return -EINVAL;
2145
2146 if (!(area->flags & VM_USERMAP))
2147 return -EINVAL;
2148
2149 if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE)
2150 return -EINVAL;
2151
2152 addr += pgoff << PAGE_SHIFT;
2153 do {
2154 struct page *page = vmalloc_to_page(addr);
2155 int ret;
2156
2157 ret = vm_insert_page(vma, uaddr, page);
2158 if (ret)
2159 return ret;
2160
2161 uaddr += PAGE_SIZE;
2162 addr += PAGE_SIZE;
2163 usize -= PAGE_SIZE;
2164 } while (usize > 0);
2165
2166
2167 vma->vm_flags |= VM_RESERVED;
2168
2169 return 0;
2170}
2171EXPORT_SYMBOL(remap_vmalloc_range);
2172
2173
2174
2175
2176
2177void __attribute__((weak)) vmalloc_sync_all(void)
2178{
2179}
2180
2181
2182static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
2183{
2184 pte_t ***p = data;
2185
2186 if (p) {
2187 *(*p) = pte;
2188 (*p)++;
2189 }
2190 return 0;
2191}
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
2208{
2209 struct vm_struct *area;
2210
2211 area = get_vm_area_caller(size, VM_IOREMAP,
2212 __builtin_return_address(0));
2213 if (area == NULL)
2214 return NULL;
2215
2216
2217
2218
2219
2220 if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
2221 size, f, ptes ? &ptes : NULL)) {
2222 free_vm_area(area);
2223 return NULL;
2224 }
2225
2226 return area;
2227}
2228EXPORT_SYMBOL_GPL(alloc_vm_area);
2229
2230void free_vm_area(struct vm_struct *area)
2231{
2232 struct vm_struct *ret;
2233 ret = remove_vm_area(area->addr);
2234 BUG_ON(ret != area);
2235 kfree(area);
2236}
2237EXPORT_SYMBOL_GPL(free_vm_area);
2238
2239#ifdef CONFIG_SMP
2240static struct vmap_area *node_to_va(struct rb_node *n)
2241{
2242 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
2243}
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257static bool pvm_find_next_prev(unsigned long end,
2258 struct vmap_area **pnext,
2259 struct vmap_area **pprev)
2260{
2261 struct rb_node *n = vmap_area_root.rb_node;
2262 struct vmap_area *va = NULL;
2263
2264 while (n) {
2265 va = rb_entry(n, struct vmap_area, rb_node);
2266 if (end < va->va_end)
2267 n = n->rb_left;
2268 else if (end > va->va_end)
2269 n = n->rb_right;
2270 else
2271 break;
2272 }
2273
2274 if (!va)
2275 return false;
2276
2277 if (va->va_end > end) {
2278 *pnext = va;
2279 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2280 } else {
2281 *pprev = va;
2282 *pnext = node_to_va(rb_next(&(*pprev)->rb_node));
2283 }
2284 return true;
2285}
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303static unsigned long pvm_determine_end(struct vmap_area **pnext,
2304 struct vmap_area **pprev,
2305 unsigned long align)
2306{
2307 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2308 unsigned long addr;
2309
2310 if (*pnext)
2311 addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
2312 else
2313 addr = vmalloc_end;
2314
2315 while (*pprev && (*pprev)->va_end > addr) {
2316 *pnext = *pprev;
2317 *pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
2318 }
2319
2320 return addr;
2321}
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
2348 const size_t *sizes, int nr_vms,
2349 size_t align)
2350{
2351 const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
2352 const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
2353 struct vmap_area **vas, *prev, *next;
2354 struct vm_struct **vms;
2355 int area, area2, last_area, term_area;
2356 unsigned long base, start, end, last_end;
2357 bool purged = false;
2358
2359
2360 BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
2361 for (last_area = 0, area = 0; area < nr_vms; area++) {
2362 start = offsets[area];
2363 end = start + sizes[area];
2364
2365
2366 BUG_ON(!IS_ALIGNED(offsets[area], align));
2367 BUG_ON(!IS_ALIGNED(sizes[area], align));
2368
2369
2370 if (start > offsets[last_area])
2371 last_area = area;
2372
2373 for (area2 = 0; area2 < nr_vms; area2++) {
2374 unsigned long start2 = offsets[area2];
2375 unsigned long end2 = start2 + sizes[area2];
2376
2377 if (area2 == area)
2378 continue;
2379
2380 BUG_ON(start2 >= start && start2 < end);
2381 BUG_ON(end2 <= end && end2 > start);
2382 }
2383 }
2384 last_end = offsets[last_area] + sizes[last_area];
2385
2386 if (vmalloc_end - vmalloc_start < last_end) {
2387 WARN_ON(true);
2388 return NULL;
2389 }
2390
2391 vms = kcalloc(nr_vms, sizeof(vms[0]), GFP_KERNEL);
2392 vas = kcalloc(nr_vms, sizeof(vas[0]), GFP_KERNEL);
2393 if (!vas || !vms)
2394 goto err_free2;
2395
2396 for (area = 0; area < nr_vms; area++) {
2397 vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
2398 vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
2399 if (!vas[area] || !vms[area])
2400 goto err_free;
2401 }
2402retry:
2403 spin_lock(&vmap_area_lock);
2404
2405
2406 area = term_area = last_area;
2407 start = offsets[area];
2408 end = start + sizes[area];
2409
2410 if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
2411 base = vmalloc_end - last_end;
2412 goto found;
2413 }
2414 base = pvm_determine_end(&next, &prev, align) - end;
2415
2416 while (true) {
2417 BUG_ON(next && next->va_end <= base + end);
2418 BUG_ON(prev && prev->va_end > base + end);
2419
2420
2421
2422
2423
2424 if (base + last_end < vmalloc_start + last_end) {
2425 spin_unlock(&vmap_area_lock);
2426 if (!purged) {
2427 purge_vmap_area_lazy();
2428 purged = true;
2429 goto retry;
2430 }
2431 goto err_free;
2432 }
2433
2434
2435
2436
2437
2438 if (next && next->va_start < base + end) {
2439 base = pvm_determine_end(&next, &prev, align) - end;
2440 term_area = area;
2441 continue;
2442 }
2443
2444
2445
2446
2447
2448
2449 if (prev && prev->va_end > base + start) {
2450 next = prev;
2451 prev = node_to_va(rb_prev(&next->rb_node));
2452 base = pvm_determine_end(&next, &prev, align) - end;
2453 term_area = area;
2454 continue;
2455 }
2456
2457
2458
2459
2460
2461 area = (area + nr_vms - 1) % nr_vms;
2462 if (area == term_area)
2463 break;
2464 start = offsets[area];
2465 end = start + sizes[area];
2466 pvm_find_next_prev(base + end, &next, &prev);
2467 }
2468found:
2469
2470 for (area = 0; area < nr_vms; area++) {
2471 struct vmap_area *va = vas[area];
2472
2473 va->va_start = base + offsets[area];
2474 va->va_end = va->va_start + sizes[area];
2475 __insert_vmap_area(va);
2476 }
2477
2478 vmap_area_pcpu_hole = base + offsets[last_area];
2479
2480 spin_unlock(&vmap_area_lock);
2481
2482
2483 for (area = 0; area < nr_vms; area++)
2484 insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
2485 pcpu_get_vm_areas);
2486
2487 kfree(vas);
2488 return vms;
2489
2490err_free:
2491 for (area = 0; area < nr_vms; area++) {
2492 kfree(vas[area]);
2493 kfree(vms[area]);
2494 }
2495err_free2:
2496 kfree(vas);
2497 kfree(vms);
2498 return NULL;
2499}
2500
2501
2502
2503
2504
2505
2506
2507
2508void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2509{
2510 int i;
2511
2512 for (i = 0; i < nr_vms; i++)
2513 free_vm_area(vms[i]);
2514 kfree(vms);
2515}
2516#endif
2517
2518#ifdef CONFIG_PROC_FS
2519static void *s_start(struct seq_file *m, loff_t *pos)
2520 __acquires(&vmlist_lock)
2521{
2522 loff_t n = *pos;
2523 struct vm_struct *v;
2524
2525 read_lock(&vmlist_lock);
2526 v = vmlist;
2527 while (n > 0 && v) {
2528 n--;
2529 v = v->next;
2530 }
2531 if (!n)
2532 return v;
2533
2534 return NULL;
2535
2536}
2537
2538static void *s_next(struct seq_file *m, void *p, loff_t *pos)
2539{
2540 struct vm_struct *v = p;
2541
2542 ++*pos;
2543 return v->next;
2544}
2545
2546static void s_stop(struct seq_file *m, void *p)
2547 __releases(&vmlist_lock)
2548{
2549 read_unlock(&vmlist_lock);
2550}
2551
2552static void show_numa_info(struct seq_file *m, struct vm_struct *v)
2553{
2554 if (NUMA_BUILD) {
2555 unsigned int nr, *counters = m->private;
2556
2557 if (!counters)
2558 return;
2559
2560 memset(counters, 0, nr_node_ids * sizeof(unsigned int));
2561
2562 for (nr = 0; nr < v->nr_pages; nr++)
2563 counters[page_to_nid(v->pages[nr])]++;
2564
2565 for_each_node_state(nr, N_HIGH_MEMORY)
2566 if (counters[nr])
2567 seq_printf(m, " N%u=%u", nr, counters[nr]);
2568 }
2569}
2570
2571static int s_show(struct seq_file *m, void *p)
2572{
2573 struct vm_struct *v = p;
2574
2575 seq_printf(m, "0x%p-0x%p %7ld",
2576 v->addr, v->addr + v->size, v->size);
2577
2578 if (v->caller)
2579 seq_printf(m, " %pS", v->caller);
2580
2581 if (v->nr_pages)
2582 seq_printf(m, " pages=%d", v->nr_pages);
2583
2584 if (v->phys_addr)
2585 seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
2586
2587 if (v->flags & VM_IOREMAP)
2588 seq_printf(m, " ioremap");
2589
2590 if (v->flags & VM_ALLOC)
2591 seq_printf(m, " vmalloc");
2592
2593 if (v->flags & VM_MAP)
2594 seq_printf(m, " vmap");
2595
2596 if (v->flags & VM_USERMAP)
2597 seq_printf(m, " user");
2598
2599 if (v->flags & VM_VPAGES)
2600 seq_printf(m, " vpages");
2601
2602 show_numa_info(m, v);
2603 seq_putc(m, '\n');
2604 return 0;
2605}
2606
2607static const struct seq_operations vmalloc_op = {
2608 .start = s_start,
2609 .next = s_next,
2610 .stop = s_stop,
2611 .show = s_show,
2612};
2613
2614static int vmalloc_open(struct inode *inode, struct file *file)
2615{
2616 unsigned int *ptr = NULL;
2617 int ret;
2618
2619 if (NUMA_BUILD) {
2620 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
2621 if (ptr == NULL)
2622 return -ENOMEM;
2623 }
2624 ret = seq_open(file, &vmalloc_op);
2625 if (!ret) {
2626 struct seq_file *m = file->private_data;
2627 m->private = ptr;
2628 } else
2629 kfree(ptr);
2630 return ret;
2631}
2632
2633static const struct file_operations proc_vmalloc_operations = {
2634 .open = vmalloc_open,
2635 .read = seq_read,
2636 .llseek = seq_lseek,
2637 .release = seq_release_private,
2638};
2639
2640static int __init proc_vmalloc_init(void)
2641{
2642 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
2643 return 0;
2644}
2645module_init(proc_vmalloc_init);
2646#endif
2647
2648