1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/hugetlb.h>
24#include <linux/profile.h>
25#include <linux/module.h>
26#include <linux/mount.h>
27#include <linux/mempolicy.h>
28#include <linux/rmap.h>
29
30#include <asm/uaccess.h>
31#include <asm/cacheflush.h>
32#include <asm/tlb.h>
33#include <asm/mmu_context.h>
34
35#ifndef arch_mmap_check
36#define arch_mmap_check(addr, len, flags) (0)
37#endif
38
39static void unmap_region(struct mm_struct *mm,
40 struct vm_area_struct *vma, struct vm_area_struct *prev,
41 unsigned long start, unsigned long end);
42
43
44
45
46
47#undef DEBUG_MM_RB
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64pgprot_t protection_map[16] = {
65 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
66 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
67};
68
69pgprot_t vm_get_page_prot(unsigned long vm_flags)
70{
71 return protection_map[vm_flags &
72 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
73}
74EXPORT_SYMBOL(vm_get_page_prot);
75
76int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
77int sysctl_overcommit_ratio = 50;
78int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
79atomic_t vm_committed_space = ATOMIC_INIT(0);
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
98{
99 unsigned long free, allowed;
100
101 vm_acct_memory(pages);
102
103
104
105
106 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
107 return 0;
108
109 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
110 unsigned long n;
111
112 free = global_page_state(NR_FILE_PAGES);
113 free += nr_swap_pages;
114
115
116
117
118
119
120
121 free += global_page_state(NR_SLAB_RECLAIMABLE);
122
123
124
125
126 if (!cap_sys_admin)
127 free -= free / 32;
128
129 if (free > pages)
130 return 0;
131
132
133
134
135
136 n = nr_free_pages();
137
138
139
140
141 if (n <= totalreserve_pages)
142 goto error;
143 else
144 n -= totalreserve_pages;
145
146
147
148
149 if (!cap_sys_admin)
150 n -= n / 32;
151 free += n;
152
153 if (free > pages)
154 return 0;
155
156 goto error;
157 }
158
159 allowed = (totalram_pages - hugetlb_total_pages())
160 * sysctl_overcommit_ratio / 100;
161
162
163
164 if (!cap_sys_admin)
165 allowed -= allowed / 32;
166 allowed += total_swap_pages;
167
168
169
170 allowed -= mm->total_vm / 32;
171
172
173
174
175
176 if (atomic_read(&vm_committed_space) < (long)allowed)
177 return 0;
178error:
179 vm_unacct_memory(pages);
180
181 return -ENOMEM;
182}
183
184
185
186
187static void __remove_shared_vm_struct(struct vm_area_struct *vma,
188 struct file *file, struct address_space *mapping)
189{
190 if (vma->vm_flags & VM_DENYWRITE)
191 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
192 if (vma->vm_flags & VM_SHARED)
193 mapping->i_mmap_writable--;
194
195 flush_dcache_mmap_lock(mapping);
196 if (unlikely(vma->vm_flags & VM_NONLINEAR))
197 list_del_init(&vma->shared.vm_set.list);
198 else
199 vma_prio_tree_remove(vma, &mapping->i_mmap);
200 flush_dcache_mmap_unlock(mapping);
201}
202
203
204
205
206
207void unlink_file_vma(struct vm_area_struct *vma)
208{
209 struct file *file = vma->vm_file;
210
211 if (file) {
212 struct address_space *mapping = file->f_mapping;
213 spin_lock(&mapping->i_mmap_lock);
214 __remove_shared_vm_struct(vma, file, mapping);
215 spin_unlock(&mapping->i_mmap_lock);
216 }
217}
218
219
220
221
222static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
223{
224 struct vm_area_struct *next = vma->vm_next;
225
226 might_sleep();
227 if (vma->vm_ops && vma->vm_ops->close)
228 vma->vm_ops->close(vma);
229 if (vma->vm_file)
230 fput(vma->vm_file);
231 mpol_free(vma_policy(vma));
232 kmem_cache_free(vm_area_cachep, vma);
233 return next;
234}
235
236asmlinkage unsigned long sys_brk(unsigned long brk)
237{
238 unsigned long rlim, retval;
239 unsigned long newbrk, oldbrk;
240 struct mm_struct *mm = current->mm;
241
242 down_write(&mm->mmap_sem);
243
244 if (brk < mm->end_code)
245 goto out;
246
247
248
249
250
251
252
253 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
254 if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
255 goto out;
256
257 newbrk = PAGE_ALIGN(brk);
258 oldbrk = PAGE_ALIGN(mm->brk);
259 if (oldbrk == newbrk)
260 goto set_brk;
261
262
263 if (brk <= mm->brk) {
264 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
265 goto set_brk;
266 goto out;
267 }
268
269
270 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
271 goto out;
272
273
274 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
275 goto out;
276set_brk:
277 mm->brk = brk;
278out:
279 retval = mm->brk;
280 up_write(&mm->mmap_sem);
281 return retval;
282}
283
284#ifdef DEBUG_MM_RB
285static int browse_rb(struct rb_root *root)
286{
287 int i = 0, j;
288 struct rb_node *nd, *pn = NULL;
289 unsigned long prev = 0, pend = 0;
290
291 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
292 struct vm_area_struct *vma;
293 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
294 if (vma->vm_start < prev)
295 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
296 if (vma->vm_start < pend)
297 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
298 if (vma->vm_start > vma->vm_end)
299 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
300 i++;
301 pn = nd;
302 prev = vma->vm_start;
303 pend = vma->vm_end;
304 }
305 j = 0;
306 for (nd = pn; nd; nd = rb_prev(nd)) {
307 j++;
308 }
309 if (i != j)
310 printk("backwards %d, forwards %d\n", j, i), i = 0;
311 return i;
312}
313
314void validate_mm(struct mm_struct *mm)
315{
316 int bug = 0;
317 int i = 0;
318 struct vm_area_struct *tmp = mm->mmap;
319 while (tmp) {
320 tmp = tmp->vm_next;
321 i++;
322 }
323 if (i != mm->map_count)
324 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
325 i = browse_rb(&mm->mm_rb);
326 if (i != mm->map_count)
327 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
328 BUG_ON(bug);
329}
330#else
331#define validate_mm(mm) do { } while (0)
332#endif
333
334static struct vm_area_struct *
335find_vma_prepare(struct mm_struct *mm, unsigned long addr,
336 struct vm_area_struct **pprev, struct rb_node ***rb_link,
337 struct rb_node ** rb_parent)
338{
339 struct vm_area_struct * vma;
340 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
341
342 __rb_link = &mm->mm_rb.rb_node;
343 rb_prev = __rb_parent = NULL;
344 vma = NULL;
345
346 while (*__rb_link) {
347 struct vm_area_struct *vma_tmp;
348
349 __rb_parent = *__rb_link;
350 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
351
352 if (vma_tmp->vm_end > addr) {
353 vma = vma_tmp;
354 if (vma_tmp->vm_start <= addr)
355 return vma;
356 __rb_link = &__rb_parent->rb_left;
357 } else {
358 rb_prev = __rb_parent;
359 __rb_link = &__rb_parent->rb_right;
360 }
361 }
362
363 *pprev = NULL;
364 if (rb_prev)
365 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
366 *rb_link = __rb_link;
367 *rb_parent = __rb_parent;
368 return vma;
369}
370
371static inline void
372__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
373 struct vm_area_struct *prev, struct rb_node *rb_parent)
374{
375 if (prev) {
376 vma->vm_next = prev->vm_next;
377 prev->vm_next = vma;
378 } else {
379 mm->mmap = vma;
380 if (rb_parent)
381 vma->vm_next = rb_entry(rb_parent,
382 struct vm_area_struct, vm_rb);
383 else
384 vma->vm_next = NULL;
385 }
386}
387
388void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
389 struct rb_node **rb_link, struct rb_node *rb_parent)
390{
391 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
392 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
393}
394
395static inline void __vma_link_file(struct vm_area_struct *vma)
396{
397 struct file * file;
398
399 file = vma->vm_file;
400 if (file) {
401 struct address_space *mapping = file->f_mapping;
402
403 if (vma->vm_flags & VM_DENYWRITE)
404 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
405 if (vma->vm_flags & VM_SHARED)
406 mapping->i_mmap_writable++;
407
408 flush_dcache_mmap_lock(mapping);
409 if (unlikely(vma->vm_flags & VM_NONLINEAR))
410 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
411 else
412 vma_prio_tree_insert(vma, &mapping->i_mmap);
413 flush_dcache_mmap_unlock(mapping);
414 }
415}
416
417static void
418__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
419 struct vm_area_struct *prev, struct rb_node **rb_link,
420 struct rb_node *rb_parent)
421{
422 __vma_link_list(mm, vma, prev, rb_parent);
423 __vma_link_rb(mm, vma, rb_link, rb_parent);
424 __anon_vma_link(vma);
425}
426
427static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
428 struct vm_area_struct *prev, struct rb_node **rb_link,
429 struct rb_node *rb_parent)
430{
431 struct address_space *mapping = NULL;
432
433 if (vma->vm_file)
434 mapping = vma->vm_file->f_mapping;
435
436 if (mapping) {
437 spin_lock(&mapping->i_mmap_lock);
438 vma->vm_truncate_count = mapping->truncate_count;
439 }
440 anon_vma_lock(vma);
441
442 __vma_link(mm, vma, prev, rb_link, rb_parent);
443 __vma_link_file(vma);
444
445 anon_vma_unlock(vma);
446 if (mapping)
447 spin_unlock(&mapping->i_mmap_lock);
448
449 mm->map_count++;
450 validate_mm(mm);
451}
452
453
454
455
456
457
458static void
459__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
460{
461 struct vm_area_struct * __vma, * prev;
462 struct rb_node ** rb_link, * rb_parent;
463
464 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
465 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
466 __vma_link(mm, vma, prev, rb_link, rb_parent);
467 mm->map_count++;
468}
469
470static inline void
471__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
472 struct vm_area_struct *prev)
473{
474 prev->vm_next = vma->vm_next;
475 rb_erase(&vma->vm_rb, &mm->mm_rb);
476 if (mm->mmap_cache == vma)
477 mm->mmap_cache = prev;
478}
479
480
481
482
483
484
485
486
487void vma_adjust(struct vm_area_struct *vma, unsigned long start,
488 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
489{
490 struct mm_struct *mm = vma->vm_mm;
491 struct vm_area_struct *next = vma->vm_next;
492 struct vm_area_struct *importer = NULL;
493 struct address_space *mapping = NULL;
494 struct prio_tree_root *root = NULL;
495 struct file *file = vma->vm_file;
496 struct anon_vma *anon_vma = NULL;
497 long adjust_next = 0;
498 int remove_next = 0;
499
500 if (next && !insert) {
501 if (end >= next->vm_end) {
502
503
504
505
506again: remove_next = 1 + (end > next->vm_end);
507 end = next->vm_end;
508 anon_vma = next->anon_vma;
509 importer = vma;
510 } else if (end > next->vm_start) {
511
512
513
514
515 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
516 anon_vma = next->anon_vma;
517 importer = vma;
518 } else if (end < vma->vm_end) {
519
520
521
522
523
524 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
525 anon_vma = next->anon_vma;
526 importer = next;
527 }
528 }
529
530 if (file) {
531 mapping = file->f_mapping;
532 if (!(vma->vm_flags & VM_NONLINEAR))
533 root = &mapping->i_mmap;
534 spin_lock(&mapping->i_mmap_lock);
535 if (importer &&
536 vma->vm_truncate_count != next->vm_truncate_count) {
537
538
539
540
541 importer->vm_truncate_count = 0;
542 }
543 if (insert) {
544 insert->vm_truncate_count = vma->vm_truncate_count;
545
546
547
548
549
550
551 __vma_link_file(insert);
552 }
553 }
554
555
556
557
558
559 if (vma->anon_vma)
560 anon_vma = vma->anon_vma;
561 if (anon_vma) {
562 spin_lock(&anon_vma->lock);
563
564
565
566
567
568 if (importer && !importer->anon_vma) {
569 importer->anon_vma = anon_vma;
570 __anon_vma_link(importer);
571 }
572 }
573
574 if (root) {
575 flush_dcache_mmap_lock(mapping);
576 vma_prio_tree_remove(vma, root);
577 if (adjust_next)
578 vma_prio_tree_remove(next, root);
579 }
580
581 vma->vm_start = start;
582 vma->vm_end = end;
583 vma->vm_pgoff = pgoff;
584 if (adjust_next) {
585 next->vm_start += adjust_next << PAGE_SHIFT;
586 next->vm_pgoff += adjust_next;
587 }
588
589 if (root) {
590 if (adjust_next)
591 vma_prio_tree_insert(next, root);
592 vma_prio_tree_insert(vma, root);
593 flush_dcache_mmap_unlock(mapping);
594 }
595
596 if (remove_next) {
597
598
599
600
601 __vma_unlink(mm, next, vma);
602 if (file)
603 __remove_shared_vm_struct(next, file, mapping);
604 if (next->anon_vma)
605 __anon_vma_merge(vma, next);
606 } else if (insert) {
607
608
609
610
611
612 __insert_vm_struct(mm, insert);
613 }
614
615 if (anon_vma)
616 spin_unlock(&anon_vma->lock);
617 if (mapping)
618 spin_unlock(&mapping->i_mmap_lock);
619
620 if (remove_next) {
621 if (file)
622 fput(file);
623 mm->map_count--;
624 mpol_free(vma_policy(next));
625 kmem_cache_free(vm_area_cachep, next);
626
627
628
629
630
631 if (remove_next == 2) {
632 next = vma->vm_next;
633 goto again;
634 }
635 }
636
637 validate_mm(mm);
638}
639
640
641
642
643
644#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
645
646static inline int is_mergeable_vma(struct vm_area_struct *vma,
647 struct file *file, unsigned long vm_flags)
648{
649 if (vma->vm_flags != vm_flags)
650 return 0;
651 if (vma->vm_file != file)
652 return 0;
653 if (vma->vm_ops && vma->vm_ops->close)
654 return 0;
655 return 1;
656}
657
658static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
659 struct anon_vma *anon_vma2)
660{
661 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
662}
663
664
665
666
667
668
669
670
671
672
673
674
675static int
676can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
677 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
678{
679 if (is_mergeable_vma(vma, file, vm_flags) &&
680 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
681 if (vma->vm_pgoff == vm_pgoff)
682 return 1;
683 }
684 return 0;
685}
686
687
688
689
690
691
692
693
694static int
695can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
696 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
697{
698 if (is_mergeable_vma(vma, file, vm_flags) &&
699 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
700 pgoff_t vm_pglen;
701 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
702 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
703 return 1;
704 }
705 return 0;
706}
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737struct vm_area_struct *vma_merge(struct mm_struct *mm,
738 struct vm_area_struct *prev, unsigned long addr,
739 unsigned long end, unsigned long vm_flags,
740 struct anon_vma *anon_vma, struct file *file,
741 pgoff_t pgoff, struct mempolicy *policy)
742{
743 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
744 struct vm_area_struct *area, *next;
745
746
747
748
749
750 if (vm_flags & VM_SPECIAL)
751 return NULL;
752
753 if (prev)
754 next = prev->vm_next;
755 else
756 next = mm->mmap;
757 area = next;
758 if (next && next->vm_end == end)
759 next = next->vm_next;
760
761
762
763
764 if (prev && prev->vm_end == addr &&
765 mpol_equal(vma_policy(prev), policy) &&
766 can_vma_merge_after(prev, vm_flags,
767 anon_vma, file, pgoff)) {
768
769
770
771 if (next && end == next->vm_start &&
772 mpol_equal(policy, vma_policy(next)) &&
773 can_vma_merge_before(next, vm_flags,
774 anon_vma, file, pgoff+pglen) &&
775 is_mergeable_anon_vma(prev->anon_vma,
776 next->anon_vma)) {
777
778 vma_adjust(prev, prev->vm_start,
779 next->vm_end, prev->vm_pgoff, NULL);
780 } else
781 vma_adjust(prev, prev->vm_start,
782 end, prev->vm_pgoff, NULL);
783 return prev;
784 }
785
786
787
788
789 if (next && end == next->vm_start &&
790 mpol_equal(policy, vma_policy(next)) &&
791 can_vma_merge_before(next, vm_flags,
792 anon_vma, file, pgoff+pglen)) {
793 if (prev && addr < prev->vm_end)
794 vma_adjust(prev, prev->vm_start,
795 addr, prev->vm_pgoff, NULL);
796 else
797 vma_adjust(area, addr, next->vm_end,
798 next->vm_pgoff - pglen, NULL);
799 return area;
800 }
801
802 return NULL;
803}
804
805
806
807
808
809
810
811
812
813struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
814{
815 struct vm_area_struct *near;
816 unsigned long vm_flags;
817
818 near = vma->vm_next;
819 if (!near)
820 goto try_prev;
821
822
823
824
825
826
827
828 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
829 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
830
831 if (near->anon_vma && vma->vm_end == near->vm_start &&
832 mpol_equal(vma_policy(vma), vma_policy(near)) &&
833 can_vma_merge_before(near, vm_flags,
834 NULL, vma->vm_file, vma->vm_pgoff +
835 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
836 return near->anon_vma;
837try_prev:
838
839
840
841
842
843
844
845 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
846 if (!near)
847 goto none;
848
849 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
850 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
851
852 if (near->anon_vma && near->vm_end == vma->vm_start &&
853 mpol_equal(vma_policy(near), vma_policy(vma)) &&
854 can_vma_merge_after(near, vm_flags,
855 NULL, vma->vm_file, vma->vm_pgoff))
856 return near->anon_vma;
857none:
858
859
860
861
862
863
864
865
866 return NULL;
867}
868
869#ifdef CONFIG_PROC_FS
870void vm_stat_account(struct mm_struct *mm, unsigned long flags,
871 struct file *file, long pages)
872{
873 const unsigned long stack_flags
874 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
875
876 if (file) {
877 mm->shared_vm += pages;
878 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
879 mm->exec_vm += pages;
880 } else if (flags & stack_flags)
881 mm->stack_vm += pages;
882 if (flags & (VM_RESERVED|VM_IO))
883 mm->reserved_vm += pages;
884}
885#endif
886
887
888
889
890
891unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
892 unsigned long len, unsigned long prot,
893 unsigned long flags, unsigned long pgoff)
894{
895 struct mm_struct * mm = current->mm;
896 struct inode *inode;
897 unsigned int vm_flags;
898 int error;
899 int accountable = 1;
900 unsigned long reqprot = prot;
901
902
903
904
905
906
907
908 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
909 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
910 prot |= PROT_EXEC;
911
912 if (!len)
913 return -EINVAL;
914
915 if (!(flags & MAP_FIXED))
916 addr = round_hint_to_min(addr);
917
918 error = arch_mmap_check(addr, len, flags);
919 if (error)
920 return error;
921
922
923 len = PAGE_ALIGN(len);
924 if (!len || len > TASK_SIZE)
925 return -ENOMEM;
926
927
928 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
929 return -EOVERFLOW;
930
931
932 if (mm->map_count > sysctl_max_map_count)
933 return -ENOMEM;
934
935
936
937
938 addr = get_unmapped_area(file, addr, len, pgoff, flags);
939 if (addr & ~PAGE_MASK)
940 return addr;
941
942
943
944
945
946 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
947 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
948
949 if (flags & MAP_LOCKED) {
950 if (!can_do_mlock())
951 return -EPERM;
952 vm_flags |= VM_LOCKED;
953 }
954
955 if (vm_flags & VM_LOCKED) {
956 unsigned long locked, lock_limit;
957 locked = len >> PAGE_SHIFT;
958 locked += mm->locked_vm;
959 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
960 lock_limit >>= PAGE_SHIFT;
961 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
962 return -EAGAIN;
963 }
964
965 inode = file ? file->f_path.dentry->d_inode : NULL;
966
967 if (file) {
968 switch (flags & MAP_TYPE) {
969 case MAP_SHARED:
970 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
971 return -EACCES;
972
973
974
975
976
977 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
978 return -EACCES;
979
980
981
982
983 if (locks_verify_locked(inode))
984 return -EAGAIN;
985
986 vm_flags |= VM_SHARED | VM_MAYSHARE;
987 if (!(file->f_mode & FMODE_WRITE))
988 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
989
990
991 case MAP_PRIVATE:
992 if (!(file->f_mode & FMODE_READ))
993 return -EACCES;
994 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
995 if (vm_flags & VM_EXEC)
996 return -EPERM;
997 vm_flags &= ~VM_MAYEXEC;
998 }
999 if (is_file_hugepages(file))
1000 accountable = 0;
1001
1002 if (!file->f_op || !file->f_op->mmap)
1003 return -ENODEV;
1004 break;
1005
1006 default:
1007 return -EINVAL;
1008 }
1009 } else {
1010 switch (flags & MAP_TYPE) {
1011 case MAP_SHARED:
1012 vm_flags |= VM_SHARED | VM_MAYSHARE;
1013 break;
1014 case MAP_PRIVATE:
1015
1016
1017
1018 pgoff = addr >> PAGE_SHIFT;
1019 break;
1020 default:
1021 return -EINVAL;
1022 }
1023 }
1024
1025 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1026 if (error)
1027 return error;
1028
1029 return mmap_region(file, addr, len, flags, vm_flags, pgoff,
1030 accountable);
1031}
1032EXPORT_SYMBOL(do_mmap_pgoff);
1033
1034
1035
1036
1037
1038
1039
1040int vma_wants_writenotify(struct vm_area_struct *vma)
1041{
1042 unsigned int vm_flags = vma->vm_flags;
1043
1044
1045 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1046 return 0;
1047
1048
1049 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1050 return 1;
1051
1052
1053 if (pgprot_val(vma->vm_page_prot) !=
1054 pgprot_val(vm_get_page_prot(vm_flags)))
1055 return 0;
1056
1057
1058 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1059 return 0;
1060
1061
1062 return vma->vm_file && vma->vm_file->f_mapping &&
1063 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1064}
1065
1066
1067unsigned long mmap_region(struct file *file, unsigned long addr,
1068 unsigned long len, unsigned long flags,
1069 unsigned int vm_flags, unsigned long pgoff,
1070 int accountable)
1071{
1072 struct mm_struct *mm = current->mm;
1073 struct vm_area_struct *vma, *prev;
1074 int correct_wcount = 0;
1075 int error;
1076 struct rb_node **rb_link, *rb_parent;
1077 unsigned long charged = 0;
1078 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1079
1080
1081 error = -ENOMEM;
1082munmap_back:
1083 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1084 if (vma && vma->vm_start < addr + len) {
1085 if (do_munmap(mm, addr, len))
1086 return -ENOMEM;
1087 goto munmap_back;
1088 }
1089
1090
1091 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1092 return -ENOMEM;
1093
1094 if (accountable && (!(flags & MAP_NORESERVE) ||
1095 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
1096 if (vm_flags & VM_SHARED) {
1097
1098 vm_flags |= VM_ACCOUNT;
1099 } else if (vm_flags & VM_WRITE) {
1100
1101
1102
1103 charged = len >> PAGE_SHIFT;
1104 if (security_vm_enough_memory(charged))
1105 return -ENOMEM;
1106 vm_flags |= VM_ACCOUNT;
1107 }
1108 }
1109
1110
1111
1112
1113
1114
1115 if (!file && !(vm_flags & VM_SHARED) &&
1116 vma_merge(mm, prev, addr, addr + len, vm_flags,
1117 NULL, NULL, pgoff, NULL))
1118 goto out;
1119
1120
1121
1122
1123
1124
1125 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1126 if (!vma) {
1127 error = -ENOMEM;
1128 goto unacct_error;
1129 }
1130
1131 vma->vm_mm = mm;
1132 vma->vm_start = addr;
1133 vma->vm_end = addr + len;
1134 vma->vm_flags = vm_flags;
1135 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1136 vma->vm_pgoff = pgoff;
1137
1138 if (file) {
1139 error = -EINVAL;
1140 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1141 goto free_vma;
1142 if (vm_flags & VM_DENYWRITE) {
1143 error = deny_write_access(file);
1144 if (error)
1145 goto free_vma;
1146 correct_wcount = 1;
1147 }
1148 vma->vm_file = file;
1149 get_file(file);
1150 error = file->f_op->mmap(file, vma);
1151 if (error)
1152 goto unmap_and_free_vma;
1153 } else if (vm_flags & VM_SHARED) {
1154 error = shmem_zero_setup(vma);
1155 if (error)
1156 goto free_vma;
1157 }
1158
1159
1160
1161
1162
1163
1164 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1165 vma->vm_flags &= ~VM_ACCOUNT;
1166
1167
1168
1169
1170
1171
1172 addr = vma->vm_start;
1173 pgoff = vma->vm_pgoff;
1174 vm_flags = vma->vm_flags;
1175
1176 if (vma_wants_writenotify(vma))
1177 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1178
1179 if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
1180 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
1181 file = vma->vm_file;
1182 vma_link(mm, vma, prev, rb_link, rb_parent);
1183 if (correct_wcount)
1184 atomic_inc(&inode->i_writecount);
1185 } else {
1186 if (file) {
1187 if (correct_wcount)
1188 atomic_inc(&inode->i_writecount);
1189 fput(file);
1190 }
1191 mpol_free(vma_policy(vma));
1192 kmem_cache_free(vm_area_cachep, vma);
1193 }
1194out:
1195 mm->total_vm += len >> PAGE_SHIFT;
1196 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1197 if (vm_flags & VM_LOCKED) {
1198 mm->locked_vm += len >> PAGE_SHIFT;
1199 make_pages_present(addr, addr + len);
1200 }
1201 if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1202 make_pages_present(addr, addr + len);
1203 return addr;
1204
1205unmap_and_free_vma:
1206 if (correct_wcount)
1207 atomic_inc(&inode->i_writecount);
1208 vma->vm_file = NULL;
1209 fput(file);
1210
1211
1212 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1213 charged = 0;
1214free_vma:
1215 kmem_cache_free(vm_area_cachep, vma);
1216unacct_error:
1217 if (charged)
1218 vm_unacct_memory(charged);
1219 return error;
1220}
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233#ifndef HAVE_ARCH_UNMAPPED_AREA
1234unsigned long
1235arch_get_unmapped_area(struct file *filp, unsigned long addr,
1236 unsigned long len, unsigned long pgoff, unsigned long flags)
1237{
1238 struct mm_struct *mm = current->mm;
1239 struct vm_area_struct *vma;
1240 unsigned long start_addr;
1241
1242 if (len > TASK_SIZE)
1243 return -ENOMEM;
1244
1245 if (flags & MAP_FIXED)
1246 return addr;
1247
1248 if (addr) {
1249 addr = PAGE_ALIGN(addr);
1250 vma = find_vma(mm, addr);
1251 if (TASK_SIZE - len >= addr &&
1252 (!vma || addr + len <= vma->vm_start))
1253 return addr;
1254 }
1255 if (len > mm->cached_hole_size) {
1256 start_addr = addr = mm->free_area_cache;
1257 } else {
1258 start_addr = addr = TASK_UNMAPPED_BASE;
1259 mm->cached_hole_size = 0;
1260 }
1261
1262full_search:
1263 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1264
1265 if (TASK_SIZE - len < addr) {
1266
1267
1268
1269
1270 if (start_addr != TASK_UNMAPPED_BASE) {
1271 addr = TASK_UNMAPPED_BASE;
1272 start_addr = addr;
1273 mm->cached_hole_size = 0;
1274 goto full_search;
1275 }
1276 return -ENOMEM;
1277 }
1278 if (!vma || addr + len <= vma->vm_start) {
1279
1280
1281
1282 mm->free_area_cache = addr + len;
1283 return addr;
1284 }
1285 if (addr + mm->cached_hole_size < vma->vm_start)
1286 mm->cached_hole_size = vma->vm_start - addr;
1287 addr = vma->vm_end;
1288 }
1289}
1290#endif
1291
1292void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1293{
1294
1295
1296
1297 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1298 mm->free_area_cache = addr;
1299 mm->cached_hole_size = ~0UL;
1300 }
1301}
1302
1303
1304
1305
1306
1307#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1308unsigned long
1309arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1310 const unsigned long len, const unsigned long pgoff,
1311 const unsigned long flags)
1312{
1313 struct vm_area_struct *vma;
1314 struct mm_struct *mm = current->mm;
1315 unsigned long addr = addr0;
1316
1317
1318 if (len > TASK_SIZE)
1319 return -ENOMEM;
1320
1321 if (flags & MAP_FIXED)
1322 return addr;
1323
1324
1325 if (addr) {
1326 addr = PAGE_ALIGN(addr);
1327 vma = find_vma(mm, addr);
1328 if (TASK_SIZE - len >= addr &&
1329 (!vma || addr + len <= vma->vm_start))
1330 return addr;
1331 }
1332
1333
1334 if (len <= mm->cached_hole_size) {
1335 mm->cached_hole_size = 0;
1336 mm->free_area_cache = mm->mmap_base;
1337 }
1338
1339
1340 addr = mm->free_area_cache;
1341
1342
1343 if (addr > len) {
1344 vma = find_vma(mm, addr-len);
1345 if (!vma || addr <= vma->vm_start)
1346
1347 return (mm->free_area_cache = addr-len);
1348 }
1349
1350 if (mm->mmap_base < len)
1351 goto bottomup;
1352
1353 addr = mm->mmap_base-len;
1354
1355 do {
1356
1357
1358
1359
1360
1361 vma = find_vma(mm, addr);
1362 if (!vma || addr+len <= vma->vm_start)
1363
1364 return (mm->free_area_cache = addr);
1365
1366
1367 if (addr + mm->cached_hole_size < vma->vm_start)
1368 mm->cached_hole_size = vma->vm_start - addr;
1369
1370
1371 addr = vma->vm_start-len;
1372 } while (len < vma->vm_start);
1373
1374bottomup:
1375
1376
1377
1378
1379
1380
1381 mm->cached_hole_size = ~0UL;
1382 mm->free_area_cache = TASK_UNMAPPED_BASE;
1383 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1384
1385
1386
1387 mm->free_area_cache = mm->mmap_base;
1388 mm->cached_hole_size = ~0UL;
1389
1390 return addr;
1391}
1392#endif
1393
1394void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1395{
1396
1397
1398
1399 if (addr > mm->free_area_cache)
1400 mm->free_area_cache = addr;
1401
1402
1403 if (mm->free_area_cache > mm->mmap_base)
1404 mm->free_area_cache = mm->mmap_base;
1405}
1406
1407unsigned long
1408get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1409 unsigned long pgoff, unsigned long flags)
1410{
1411 unsigned long (*get_area)(struct file *, unsigned long,
1412 unsigned long, unsigned long, unsigned long);
1413
1414 get_area = current->mm->get_unmapped_area;
1415 if (file && file->f_op && file->f_op->get_unmapped_area)
1416 get_area = file->f_op->get_unmapped_area;
1417 addr = get_area(file, addr, len, pgoff, flags);
1418 if (IS_ERR_VALUE(addr))
1419 return addr;
1420
1421 if (addr > TASK_SIZE - len)
1422 return -ENOMEM;
1423 if (addr & ~PAGE_MASK)
1424 return -EINVAL;
1425
1426 return addr;
1427}
1428
1429EXPORT_SYMBOL(get_unmapped_area);
1430
1431
1432struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
1433{
1434 struct vm_area_struct *vma = NULL;
1435
1436 if (mm) {
1437
1438
1439 vma = mm->mmap_cache;
1440 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1441 struct rb_node * rb_node;
1442
1443 rb_node = mm->mm_rb.rb_node;
1444 vma = NULL;
1445
1446 while (rb_node) {
1447 struct vm_area_struct * vma_tmp;
1448
1449 vma_tmp = rb_entry(rb_node,
1450 struct vm_area_struct, vm_rb);
1451
1452 if (vma_tmp->vm_end > addr) {
1453 vma = vma_tmp;
1454 if (vma_tmp->vm_start <= addr)
1455 break;
1456 rb_node = rb_node->rb_left;
1457 } else
1458 rb_node = rb_node->rb_right;
1459 }
1460 if (vma)
1461 mm->mmap_cache = vma;
1462 }
1463 }
1464 return vma;
1465}
1466
1467EXPORT_SYMBOL(find_vma);
1468
1469
1470struct vm_area_struct *
1471find_vma_prev(struct mm_struct *mm, unsigned long addr,
1472 struct vm_area_struct **pprev)
1473{
1474 struct vm_area_struct *vma = NULL, *prev = NULL;
1475 struct rb_node * rb_node;
1476 if (!mm)
1477 goto out;
1478
1479
1480 vma = mm->mmap;
1481
1482
1483 rb_node = mm->mm_rb.rb_node;
1484
1485 while (rb_node) {
1486 struct vm_area_struct *vma_tmp;
1487 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1488
1489 if (addr < vma_tmp->vm_end) {
1490 rb_node = rb_node->rb_left;
1491 } else {
1492 prev = vma_tmp;
1493 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1494 break;
1495 rb_node = rb_node->rb_right;
1496 }
1497 }
1498
1499out:
1500 *pprev = prev;
1501 return prev ? prev->vm_next : vma;
1502}
1503
1504
1505
1506
1507
1508
1509static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
1510{
1511 struct mm_struct *mm = vma->vm_mm;
1512 struct rlimit *rlim = current->signal->rlim;
1513 unsigned long new_start;
1514
1515
1516 if (!may_expand_vm(mm, grow))
1517 return -ENOMEM;
1518
1519
1520 if (size > rlim[RLIMIT_STACK].rlim_cur)
1521 return -ENOMEM;
1522
1523
1524 if (vma->vm_flags & VM_LOCKED) {
1525 unsigned long locked;
1526 unsigned long limit;
1527 locked = mm->locked_vm + grow;
1528 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1529 if (locked > limit && !capable(CAP_IPC_LOCK))
1530 return -ENOMEM;
1531 }
1532
1533
1534 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1535 vma->vm_end - size;
1536 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1537 return -EFAULT;
1538
1539
1540
1541
1542
1543 if (security_vm_enough_memory(grow))
1544 return -ENOMEM;
1545
1546
1547 mm->total_vm += grow;
1548 if (vma->vm_flags & VM_LOCKED)
1549 mm->locked_vm += grow;
1550 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1551 return 0;
1552}
1553
1554#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1555
1556
1557
1558
1559#ifndef CONFIG_IA64
1560static inline
1561#endif
1562int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1563{
1564 int error;
1565
1566 if (!(vma->vm_flags & VM_GROWSUP))
1567 return -EFAULT;
1568
1569
1570
1571
1572
1573 if (unlikely(anon_vma_prepare(vma)))
1574 return -ENOMEM;
1575 anon_vma_lock(vma);
1576
1577
1578
1579
1580
1581
1582
1583 if (address < PAGE_ALIGN(address+4))
1584 address = PAGE_ALIGN(address+4);
1585 else {
1586 anon_vma_unlock(vma);
1587 return -ENOMEM;
1588 }
1589 error = 0;
1590
1591
1592 if (address > vma->vm_end) {
1593 unsigned long size, grow;
1594
1595 size = address - vma->vm_start;
1596 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1597
1598 error = acct_stack_growth(vma, size, grow);
1599 if (!error)
1600 vma->vm_end = address;
1601 }
1602 anon_vma_unlock(vma);
1603 return error;
1604}
1605#endif
1606
1607
1608
1609
1610static inline int expand_downwards(struct vm_area_struct *vma,
1611 unsigned long address)
1612{
1613 int error;
1614
1615
1616
1617
1618
1619 if (unlikely(anon_vma_prepare(vma)))
1620 return -ENOMEM;
1621
1622 address &= PAGE_MASK;
1623 error = security_file_mmap(0, 0, 0, 0, address, 1);
1624 if (error)
1625 return error;
1626
1627 anon_vma_lock(vma);
1628
1629
1630
1631
1632
1633
1634
1635
1636 if (address < vma->vm_start) {
1637 unsigned long size, grow;
1638
1639 size = vma->vm_end - address;
1640 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1641
1642 error = acct_stack_growth(vma, size, grow);
1643 if (!error) {
1644 vma->vm_start = address;
1645 vma->vm_pgoff -= grow;
1646 }
1647 }
1648 anon_vma_unlock(vma);
1649 return error;
1650}
1651
1652int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1653{
1654 return expand_downwards(vma, address);
1655}
1656
1657#ifdef CONFIG_STACK_GROWSUP
1658int expand_stack(struct vm_area_struct *vma, unsigned long address)
1659{
1660 return expand_upwards(vma, address);
1661}
1662
1663struct vm_area_struct *
1664find_extend_vma(struct mm_struct *mm, unsigned long addr)
1665{
1666 struct vm_area_struct *vma, *prev;
1667
1668 addr &= PAGE_MASK;
1669 vma = find_vma_prev(mm, addr, &prev);
1670 if (vma && (vma->vm_start <= addr))
1671 return vma;
1672 if (!prev || expand_stack(prev, addr))
1673 return NULL;
1674 if (prev->vm_flags & VM_LOCKED)
1675 make_pages_present(addr, prev->vm_end);
1676 return prev;
1677}
1678#else
1679int expand_stack(struct vm_area_struct *vma, unsigned long address)
1680{
1681 return expand_downwards(vma, address);
1682}
1683
1684struct vm_area_struct *
1685find_extend_vma(struct mm_struct * mm, unsigned long addr)
1686{
1687 struct vm_area_struct * vma;
1688 unsigned long start;
1689
1690 addr &= PAGE_MASK;
1691 vma = find_vma(mm,addr);
1692 if (!vma)
1693 return NULL;
1694 if (vma->vm_start <= addr)
1695 return vma;
1696 if (!(vma->vm_flags & VM_GROWSDOWN))
1697 return NULL;
1698 start = vma->vm_start;
1699 if (expand_stack(vma, addr))
1700 return NULL;
1701 if (vma->vm_flags & VM_LOCKED)
1702 make_pages_present(addr, start);
1703 return vma;
1704}
1705#endif
1706
1707
1708
1709
1710
1711
1712
1713static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1714{
1715
1716 update_hiwater_vm(mm);
1717 do {
1718 long nrpages = vma_pages(vma);
1719
1720 mm->total_vm -= nrpages;
1721 if (vma->vm_flags & VM_LOCKED)
1722 mm->locked_vm -= nrpages;
1723 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1724 vma = remove_vma(vma);
1725 } while (vma);
1726 validate_mm(mm);
1727}
1728
1729
1730
1731
1732
1733
1734static void unmap_region(struct mm_struct *mm,
1735 struct vm_area_struct *vma, struct vm_area_struct *prev,
1736 unsigned long start, unsigned long end)
1737{
1738 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1739 struct mmu_gather *tlb;
1740 unsigned long nr_accounted = 0;
1741
1742 lru_add_drain();
1743 tlb = tlb_gather_mmu(mm, 0);
1744 update_hiwater_rss(mm);
1745 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1746 vm_unacct_memory(nr_accounted);
1747 free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1748 next? next->vm_start: 0);
1749 tlb_finish_mmu(tlb, start, end);
1750}
1751
1752
1753
1754
1755
1756static void
1757detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1758 struct vm_area_struct *prev, unsigned long end)
1759{
1760 struct vm_area_struct **insertion_point;
1761 struct vm_area_struct *tail_vma = NULL;
1762 unsigned long addr;
1763
1764 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1765 do {
1766 rb_erase(&vma->vm_rb, &mm->mm_rb);
1767 mm->map_count--;
1768 tail_vma = vma;
1769 vma = vma->vm_next;
1770 } while (vma && vma->vm_start < end);
1771 *insertion_point = vma;
1772 tail_vma->vm_next = NULL;
1773 if (mm->unmap_area == arch_unmap_area)
1774 addr = prev ? prev->vm_end : mm->mmap_base;
1775 else
1776 addr = vma ? vma->vm_start : mm->mmap_base;
1777 mm->unmap_area(mm, addr);
1778 mm->mmap_cache = NULL;
1779}
1780
1781
1782
1783
1784
1785int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1786 unsigned long addr, int new_below)
1787{
1788 struct mempolicy *pol;
1789 struct vm_area_struct *new;
1790
1791 if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
1792 return -EINVAL;
1793
1794 if (mm->map_count >= sysctl_max_map_count)
1795 return -ENOMEM;
1796
1797 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1798 if (!new)
1799 return -ENOMEM;
1800
1801
1802 *new = *vma;
1803
1804 if (new_below)
1805 new->vm_end = addr;
1806 else {
1807 new->vm_start = addr;
1808 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1809 }
1810
1811 pol = mpol_copy(vma_policy(vma));
1812 if (IS_ERR(pol)) {
1813 kmem_cache_free(vm_area_cachep, new);
1814 return PTR_ERR(pol);
1815 }
1816 vma_set_policy(new, pol);
1817
1818 if (new->vm_file)
1819 get_file(new->vm_file);
1820
1821 if (new->vm_ops && new->vm_ops->open)
1822 new->vm_ops->open(new);
1823
1824 if (new_below)
1825 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1826 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1827 else
1828 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1829
1830 return 0;
1831}
1832
1833
1834
1835
1836
1837
1838int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1839{
1840 unsigned long end;
1841 struct vm_area_struct *vma, *prev, *last;
1842
1843 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1844 return -EINVAL;
1845
1846 if ((len = PAGE_ALIGN(len)) == 0)
1847 return -EINVAL;
1848
1849
1850 vma = find_vma_prev(mm, start, &prev);
1851 if (!vma)
1852 return 0;
1853
1854
1855
1856 end = start + len;
1857 if (vma->vm_start >= end)
1858 return 0;
1859
1860
1861
1862
1863
1864
1865
1866
1867 if (start > vma->vm_start) {
1868 int error = split_vma(mm, vma, start, 0);
1869 if (error)
1870 return error;
1871 prev = vma;
1872 }
1873
1874
1875 last = find_vma(mm, end);
1876 if (last && end > last->vm_start) {
1877 int error = split_vma(mm, last, end, 1);
1878 if (error)
1879 return error;
1880 }
1881 vma = prev? prev->vm_next: mm->mmap;
1882
1883
1884
1885
1886 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1887 unmap_region(mm, vma, prev, start, end);
1888
1889
1890 remove_vma_list(mm, vma);
1891
1892 return 0;
1893}
1894
1895EXPORT_SYMBOL(do_munmap);
1896
1897asmlinkage long sys_munmap(unsigned long addr, size_t len)
1898{
1899 int ret;
1900 struct mm_struct *mm = current->mm;
1901
1902 profile_munmap(addr);
1903
1904 down_write(&mm->mmap_sem);
1905 ret = do_munmap(mm, addr, len);
1906 up_write(&mm->mmap_sem);
1907 return ret;
1908}
1909
1910static inline void verify_mm_writelocked(struct mm_struct *mm)
1911{
1912#ifdef CONFIG_DEBUG_VM
1913 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1914 WARN_ON(1);
1915 up_read(&mm->mmap_sem);
1916 }
1917#endif
1918}
1919
1920
1921
1922
1923
1924
1925unsigned long do_brk(unsigned long addr, unsigned long len)
1926{
1927 struct mm_struct * mm = current->mm;
1928 struct vm_area_struct * vma, * prev;
1929 unsigned long flags;
1930 struct rb_node ** rb_link, * rb_parent;
1931 pgoff_t pgoff = addr >> PAGE_SHIFT;
1932 int error;
1933
1934 len = PAGE_ALIGN(len);
1935 if (!len)
1936 return addr;
1937
1938 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
1939 return -EINVAL;
1940
1941 if (is_hugepage_only_range(mm, addr, len))
1942 return -EINVAL;
1943
1944 error = security_file_mmap(0, 0, 0, 0, addr, 1);
1945 if (error)
1946 return error;
1947
1948 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
1949
1950 error = arch_mmap_check(addr, len, flags);
1951 if (error)
1952 return error;
1953
1954
1955
1956
1957 if (mm->def_flags & VM_LOCKED) {
1958 unsigned long locked, lock_limit;
1959 locked = len >> PAGE_SHIFT;
1960 locked += mm->locked_vm;
1961 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
1962 lock_limit >>= PAGE_SHIFT;
1963 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1964 return -EAGAIN;
1965 }
1966
1967
1968
1969
1970
1971 verify_mm_writelocked(mm);
1972
1973
1974
1975
1976 munmap_back:
1977 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1978 if (vma && vma->vm_start < addr + len) {
1979 if (do_munmap(mm, addr, len))
1980 return -ENOMEM;
1981 goto munmap_back;
1982 }
1983
1984
1985 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1986 return -ENOMEM;
1987
1988 if (mm->map_count > sysctl_max_map_count)
1989 return -ENOMEM;
1990
1991 if (security_vm_enough_memory(len >> PAGE_SHIFT))
1992 return -ENOMEM;
1993
1994
1995 if (vma_merge(mm, prev, addr, addr + len, flags,
1996 NULL, NULL, pgoff, NULL))
1997 goto out;
1998
1999
2000
2001
2002 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2003 if (!vma) {
2004 vm_unacct_memory(len >> PAGE_SHIFT);
2005 return -ENOMEM;
2006 }
2007
2008 vma->vm_mm = mm;
2009 vma->vm_start = addr;
2010 vma->vm_end = addr + len;
2011 vma->vm_pgoff = pgoff;
2012 vma->vm_flags = flags;
2013 vma->vm_page_prot = vm_get_page_prot(flags);
2014 vma_link(mm, vma, prev, rb_link, rb_parent);
2015out:
2016 mm->total_vm += len >> PAGE_SHIFT;
2017 if (flags & VM_LOCKED) {
2018 mm->locked_vm += len >> PAGE_SHIFT;
2019 make_pages_present(addr, addr + len);
2020 }
2021 return addr;
2022}
2023
2024EXPORT_SYMBOL(do_brk);
2025
2026
2027void exit_mmap(struct mm_struct *mm)
2028{
2029 struct mmu_gather *tlb;
2030 struct vm_area_struct *vma = mm->mmap;
2031 unsigned long nr_accounted = 0;
2032 unsigned long end;
2033
2034
2035 arch_exit_mmap(mm);
2036
2037 lru_add_drain();
2038 flush_cache_mm(mm);
2039 tlb = tlb_gather_mmu(mm, 1);
2040
2041
2042 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2043 vm_unacct_memory(nr_accounted);
2044 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
2045 tlb_finish_mmu(tlb, 0, end);
2046
2047
2048
2049
2050
2051 while (vma)
2052 vma = remove_vma(vma);
2053
2054 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2055}
2056
2057
2058
2059
2060
2061int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2062{
2063 struct vm_area_struct * __vma, * prev;
2064 struct rb_node ** rb_link, * rb_parent;
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078 if (!vma->vm_file) {
2079 BUG_ON(vma->anon_vma);
2080 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2081 }
2082 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2083 if (__vma && __vma->vm_start < vma->vm_end)
2084 return -ENOMEM;
2085 if ((vma->vm_flags & VM_ACCOUNT) &&
2086 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2087 return -ENOMEM;
2088 vma_link(mm, vma, prev, rb_link, rb_parent);
2089 return 0;
2090}
2091
2092
2093
2094
2095
2096struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2097 unsigned long addr, unsigned long len, pgoff_t pgoff)
2098{
2099 struct vm_area_struct *vma = *vmap;
2100 unsigned long vma_start = vma->vm_start;
2101 struct mm_struct *mm = vma->vm_mm;
2102 struct vm_area_struct *new_vma, *prev;
2103 struct rb_node **rb_link, *rb_parent;
2104 struct mempolicy *pol;
2105
2106
2107
2108
2109
2110 if (!vma->vm_file && !vma->anon_vma)
2111 pgoff = addr >> PAGE_SHIFT;
2112
2113 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2114 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2115 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2116 if (new_vma) {
2117
2118
2119
2120 if (vma_start >= new_vma->vm_start &&
2121 vma_start < new_vma->vm_end)
2122 *vmap = new_vma;
2123 } else {
2124 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2125 if (new_vma) {
2126 *new_vma = *vma;
2127 pol = mpol_copy(vma_policy(vma));
2128 if (IS_ERR(pol)) {
2129 kmem_cache_free(vm_area_cachep, new_vma);
2130 return NULL;
2131 }
2132 vma_set_policy(new_vma, pol);
2133 new_vma->vm_start = addr;
2134 new_vma->vm_end = addr + len;
2135 new_vma->vm_pgoff = pgoff;
2136 if (new_vma->vm_file)
2137 get_file(new_vma->vm_file);
2138 if (new_vma->vm_ops && new_vma->vm_ops->open)
2139 new_vma->vm_ops->open(new_vma);
2140 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2141 }
2142 }
2143 return new_vma;
2144}
2145
2146
2147
2148
2149
2150int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2151{
2152 unsigned long cur = mm->total_vm;
2153 unsigned long lim;
2154
2155 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2156
2157 if (cur + npages > lim)
2158 return 0;
2159 return 1;
2160}
2161
2162
2163static struct page *special_mapping_nopage(struct vm_area_struct *vma,
2164 unsigned long address, int *type)
2165{
2166 struct page **pages;
2167
2168 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
2169
2170 address -= vma->vm_start;
2171 for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
2172 address -= PAGE_SIZE;
2173
2174 if (*pages) {
2175 struct page *page = *pages;
2176 get_page(page);
2177 return page;
2178 }
2179
2180 return NOPAGE_SIGBUS;
2181}
2182
2183
2184
2185
2186static void special_mapping_close(struct vm_area_struct *vma)
2187{
2188}
2189
2190static struct vm_operations_struct special_mapping_vmops = {
2191 .close = special_mapping_close,
2192 .nopage = special_mapping_nopage,
2193};
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204int install_special_mapping(struct mm_struct *mm,
2205 unsigned long addr, unsigned long len,
2206 unsigned long vm_flags, struct page **pages)
2207{
2208 struct vm_area_struct *vma;
2209
2210 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2211 if (unlikely(vma == NULL))
2212 return -ENOMEM;
2213
2214 vma->vm_mm = mm;
2215 vma->vm_start = addr;
2216 vma->vm_end = addr + len;
2217
2218 vma->vm_flags = vm_flags | mm->def_flags;
2219 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2220
2221 vma->vm_ops = &special_mapping_vmops;
2222 vma->vm_private_data = pages;
2223
2224 if (unlikely(insert_vm_struct(mm, vma))) {
2225 kmem_cache_free(vm_area_cachep, vma);
2226 return -ENOMEM;
2227 }
2228
2229 mm->total_vm += len >> PAGE_SHIFT;
2230
2231 return 0;
2232}
2233