1
2
3
4
5
6
7
8
9#include <linux/slab.h>
10#include <linux/backing-dev.h>
11#include <linux/mm.h>
12#include <linux/shm.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swap.h>
16#include <linux/syscalls.h>
17#include <linux/capability.h>
18#include <linux/init.h>
19#include <linux/file.h>
20#include <linux/fs.h>
21#include <linux/personality.h>
22#include <linux/security.h>
23#include <linux/ima.h>
24#include <linux/hugetlb.h>
25#include <linux/profile.h>
26#include <linux/module.h>
27#include <linux/mount.h>
28#include <linux/mempolicy.h>
29#include <linux/rmap.h>
30#include <linux/mmu_notifier.h>
31#include <linux/perf_event.h>
32
33#include <asm/uaccess.h>
34#include <asm/cacheflush.h>
35#include <asm/tlb.h>
36#include <asm/mmu_context.h>
37
38#include "internal.h"
39
40#ifndef arch_mmap_check
41#define arch_mmap_check(addr, len, flags) (0)
42#endif
43
44#ifndef arch_rebalance_pgtables
45#define arch_rebalance_pgtables(addr, len) (addr)
46#endif
47
48static void unmap_region(struct mm_struct *mm,
49 struct vm_area_struct *vma, struct vm_area_struct *prev,
50 unsigned long start, unsigned long end);
51
52
53
54
55
56#undef DEBUG_MM_RB
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73pgprot_t protection_map[16] = {
74 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
75 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
76};
77
78pgprot_t vm_get_page_prot(unsigned long vm_flags)
79{
80 return __pgprot(pgprot_val(protection_map[vm_flags &
81 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
82 pgprot_val(arch_vm_get_page_prot(vm_flags)));
83}
84EXPORT_SYMBOL(vm_get_page_prot);
85
86int sysctl_overcommit_memory = OVERCOMMIT_GUESS;
87int sysctl_overcommit_ratio = 50;
88int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
89struct percpu_counter vm_committed_as;
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
108{
109 unsigned long free, allowed;
110
111 vm_acct_memory(pages);
112
113
114
115
116 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
117 return 0;
118
119 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
120 unsigned long n;
121
122 free = global_page_state(NR_FILE_PAGES);
123 free += nr_swap_pages;
124
125
126
127
128
129
130
131 free += global_page_state(NR_SLAB_RECLAIMABLE);
132
133
134
135
136 if (!cap_sys_admin)
137 free -= free / 32;
138
139 if (free > pages)
140 return 0;
141
142
143
144
145
146 n = nr_free_pages();
147
148
149
150
151 if (n <= totalreserve_pages)
152 goto error;
153 else
154 n -= totalreserve_pages;
155
156
157
158
159 if (!cap_sys_admin)
160 n -= n / 32;
161 free += n;
162
163 if (free > pages)
164 return 0;
165
166 goto error;
167 }
168
169 allowed = (totalram_pages - hugetlb_total_pages())
170 * sysctl_overcommit_ratio / 100;
171
172
173
174 if (!cap_sys_admin)
175 allowed -= allowed / 32;
176 allowed += total_swap_pages;
177
178
179
180 if (mm)
181 allowed -= mm->total_vm / 32;
182
183 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
184 return 0;
185error:
186 vm_unacct_memory(pages);
187
188 return -ENOMEM;
189}
190
191
192
193
194static void __remove_shared_vm_struct(struct vm_area_struct *vma,
195 struct file *file, struct address_space *mapping)
196{
197 if (vma->vm_flags & VM_DENYWRITE)
198 atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
199 if (vma->vm_flags & VM_SHARED)
200 mapping->i_mmap_writable--;
201
202 flush_dcache_mmap_lock(mapping);
203 if (unlikely(vma->vm_flags & VM_NONLINEAR))
204 list_del_init(&vma->shared.vm_set.list);
205 else
206 vma_prio_tree_remove(vma, &mapping->i_mmap);
207 flush_dcache_mmap_unlock(mapping);
208}
209
210
211
212
213
214void unlink_file_vma(struct vm_area_struct *vma)
215{
216 struct file *file = vma->vm_file;
217
218 if (file) {
219 struct address_space *mapping = file->f_mapping;
220 spin_lock(&mapping->i_mmap_lock);
221 __remove_shared_vm_struct(vma, file, mapping);
222 spin_unlock(&mapping->i_mmap_lock);
223 }
224}
225
226
227
228
229static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
230{
231 struct vm_area_struct *next = vma->vm_next;
232
233 might_sleep();
234 if (vma->vm_ops && vma->vm_ops->close)
235 vma->vm_ops->close(vma);
236 if (vma->vm_file) {
237 fput(vma->vm_file);
238 if (vma->vm_flags & VM_EXECUTABLE)
239 removed_exe_file_vma(vma->vm_mm);
240 }
241 mpol_put(vma_policy(vma));
242 kmem_cache_free(vm_area_cachep, vma);
243 return next;
244}
245
246SYSCALL_DEFINE1(brk, unsigned long, brk)
247{
248 unsigned long rlim, retval;
249 unsigned long newbrk, oldbrk;
250 struct mm_struct *mm = current->mm;
251 unsigned long min_brk;
252
253 down_write(&mm->mmap_sem);
254
255#ifdef CONFIG_COMPAT_BRK
256 min_brk = mm->end_code;
257#else
258 min_brk = mm->start_brk;
259#endif
260 if (brk < min_brk)
261 goto out;
262
263
264
265
266
267
268
269 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
270 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
271 (mm->end_data - mm->start_data) > rlim)
272 goto out;
273
274 newbrk = PAGE_ALIGN(brk);
275 oldbrk = PAGE_ALIGN(mm->brk);
276 if (oldbrk == newbrk)
277 goto set_brk;
278
279
280 if (brk <= mm->brk) {
281 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
282 goto set_brk;
283 goto out;
284 }
285
286
287 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
288 goto out;
289
290
291 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
292 goto out;
293set_brk:
294 mm->brk = brk;
295out:
296 retval = mm->brk;
297 up_write(&mm->mmap_sem);
298 return retval;
299}
300
301#ifdef DEBUG_MM_RB
302static int browse_rb(struct rb_root *root)
303{
304 int i = 0, j;
305 struct rb_node *nd, *pn = NULL;
306 unsigned long prev = 0, pend = 0;
307
308 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
309 struct vm_area_struct *vma;
310 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
311 if (vma->vm_start < prev)
312 printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
313 if (vma->vm_start < pend)
314 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
315 if (vma->vm_start > vma->vm_end)
316 printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
317 i++;
318 pn = nd;
319 prev = vma->vm_start;
320 pend = vma->vm_end;
321 }
322 j = 0;
323 for (nd = pn; nd; nd = rb_prev(nd)) {
324 j++;
325 }
326 if (i != j)
327 printk("backwards %d, forwards %d\n", j, i), i = 0;
328 return i;
329}
330
331void validate_mm(struct mm_struct *mm)
332{
333 int bug = 0;
334 int i = 0;
335 struct vm_area_struct *tmp = mm->mmap;
336 while (tmp) {
337 tmp = tmp->vm_next;
338 i++;
339 }
340 if (i != mm->map_count)
341 printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
342 i = browse_rb(&mm->mm_rb);
343 if (i != mm->map_count)
344 printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
345 BUG_ON(bug);
346}
347#else
348#define validate_mm(mm) do { } while (0)
349#endif
350
351static struct vm_area_struct *
352find_vma_prepare(struct mm_struct *mm, unsigned long addr,
353 struct vm_area_struct **pprev, struct rb_node ***rb_link,
354 struct rb_node ** rb_parent)
355{
356 struct vm_area_struct * vma;
357 struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
358
359 __rb_link = &mm->mm_rb.rb_node;
360 rb_prev = __rb_parent = NULL;
361 vma = NULL;
362
363 while (*__rb_link) {
364 struct vm_area_struct *vma_tmp;
365
366 __rb_parent = *__rb_link;
367 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
368
369 if (vma_tmp->vm_end > addr) {
370 vma = vma_tmp;
371 if (vma_tmp->vm_start <= addr)
372 break;
373 __rb_link = &__rb_parent->rb_left;
374 } else {
375 rb_prev = __rb_parent;
376 __rb_link = &__rb_parent->rb_right;
377 }
378 }
379
380 *pprev = NULL;
381 if (rb_prev)
382 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
383 *rb_link = __rb_link;
384 *rb_parent = __rb_parent;
385 return vma;
386}
387
388static inline void
389__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
390 struct vm_area_struct *prev, struct rb_node *rb_parent)
391{
392 if (prev) {
393 vma->vm_next = prev->vm_next;
394 prev->vm_next = vma;
395 } else {
396 mm->mmap = vma;
397 if (rb_parent)
398 vma->vm_next = rb_entry(rb_parent,
399 struct vm_area_struct, vm_rb);
400 else
401 vma->vm_next = NULL;
402 }
403}
404
405void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
406 struct rb_node **rb_link, struct rb_node *rb_parent)
407{
408 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
409 rb_insert_color(&vma->vm_rb, &mm->mm_rb);
410}
411
412static void __vma_link_file(struct vm_area_struct *vma)
413{
414 struct file *file;
415
416 file = vma->vm_file;
417 if (file) {
418 struct address_space *mapping = file->f_mapping;
419
420 if (vma->vm_flags & VM_DENYWRITE)
421 atomic_dec(&file->f_path.dentry->d_inode->i_writecount);
422 if (vma->vm_flags & VM_SHARED)
423 mapping->i_mmap_writable++;
424
425 flush_dcache_mmap_lock(mapping);
426 if (unlikely(vma->vm_flags & VM_NONLINEAR))
427 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
428 else
429 vma_prio_tree_insert(vma, &mapping->i_mmap);
430 flush_dcache_mmap_unlock(mapping);
431 }
432}
433
434static void
435__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
436 struct vm_area_struct *prev, struct rb_node **rb_link,
437 struct rb_node *rb_parent)
438{
439 __vma_link_list(mm, vma, prev, rb_parent);
440 __vma_link_rb(mm, vma, rb_link, rb_parent);
441 __anon_vma_link(vma);
442}
443
444static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
445 struct vm_area_struct *prev, struct rb_node **rb_link,
446 struct rb_node *rb_parent)
447{
448 struct address_space *mapping = NULL;
449
450 if (vma->vm_file)
451 mapping = vma->vm_file->f_mapping;
452
453 if (mapping) {
454 spin_lock(&mapping->i_mmap_lock);
455 vma->vm_truncate_count = mapping->truncate_count;
456 }
457 anon_vma_lock(vma);
458
459 __vma_link(mm, vma, prev, rb_link, rb_parent);
460 __vma_link_file(vma);
461
462 anon_vma_unlock(vma);
463 if (mapping)
464 spin_unlock(&mapping->i_mmap_lock);
465
466 mm->map_count++;
467 validate_mm(mm);
468}
469
470
471
472
473
474
475static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
476{
477 struct vm_area_struct *__vma, *prev;
478 struct rb_node **rb_link, *rb_parent;
479
480 __vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
481 BUG_ON(__vma && __vma->vm_start < vma->vm_end);
482 __vma_link(mm, vma, prev, rb_link, rb_parent);
483 mm->map_count++;
484}
485
486static inline void
487__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
488 struct vm_area_struct *prev)
489{
490 prev->vm_next = vma->vm_next;
491 rb_erase(&vma->vm_rb, &mm->mm_rb);
492 if (mm->mmap_cache == vma)
493 mm->mmap_cache = prev;
494}
495
496
497
498
499
500
501
502
503void vma_adjust(struct vm_area_struct *vma, unsigned long start,
504 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
505{
506 struct mm_struct *mm = vma->vm_mm;
507 struct vm_area_struct *next = vma->vm_next;
508 struct vm_area_struct *importer = NULL;
509 struct address_space *mapping = NULL;
510 struct prio_tree_root *root = NULL;
511 struct file *file = vma->vm_file;
512 struct anon_vma *anon_vma = NULL;
513 long adjust_next = 0;
514 int remove_next = 0;
515
516 if (next && !insert) {
517 if (end >= next->vm_end) {
518
519
520
521
522again: remove_next = 1 + (end > next->vm_end);
523 end = next->vm_end;
524 anon_vma = next->anon_vma;
525 importer = vma;
526 } else if (end > next->vm_start) {
527
528
529
530
531 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
532 anon_vma = next->anon_vma;
533 importer = vma;
534 } else if (end < vma->vm_end) {
535
536
537
538
539
540 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
541 anon_vma = next->anon_vma;
542 importer = next;
543 }
544 }
545
546 if (file) {
547 mapping = file->f_mapping;
548 if (!(vma->vm_flags & VM_NONLINEAR))
549 root = &mapping->i_mmap;
550 spin_lock(&mapping->i_mmap_lock);
551 if (importer &&
552 vma->vm_truncate_count != next->vm_truncate_count) {
553
554
555
556
557 importer->vm_truncate_count = 0;
558 }
559 if (insert) {
560 insert->vm_truncate_count = vma->vm_truncate_count;
561
562
563
564
565
566
567 __vma_link_file(insert);
568 }
569 }
570
571
572
573
574
575 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
576 anon_vma = vma->anon_vma;
577 if (anon_vma) {
578 spin_lock(&anon_vma->lock);
579
580
581
582
583
584 if (importer && !importer->anon_vma) {
585 importer->anon_vma = anon_vma;
586 __anon_vma_link(importer);
587 }
588 }
589
590 if (root) {
591 flush_dcache_mmap_lock(mapping);
592 vma_prio_tree_remove(vma, root);
593 if (adjust_next)
594 vma_prio_tree_remove(next, root);
595 }
596
597 vma->vm_start = start;
598 vma->vm_end = end;
599 vma->vm_pgoff = pgoff;
600 if (adjust_next) {
601 next->vm_start += adjust_next << PAGE_SHIFT;
602 next->vm_pgoff += adjust_next;
603 }
604
605 if (root) {
606 if (adjust_next)
607 vma_prio_tree_insert(next, root);
608 vma_prio_tree_insert(vma, root);
609 flush_dcache_mmap_unlock(mapping);
610 }
611
612 if (remove_next) {
613
614
615
616
617 __vma_unlink(mm, next, vma);
618 if (file)
619 __remove_shared_vm_struct(next, file, mapping);
620 if (next->anon_vma)
621 __anon_vma_merge(vma, next);
622 } else if (insert) {
623
624
625
626
627
628 __insert_vm_struct(mm, insert);
629 }
630
631 if (anon_vma)
632 spin_unlock(&anon_vma->lock);
633 if (mapping)
634 spin_unlock(&mapping->i_mmap_lock);
635
636 if (remove_next) {
637 if (file) {
638 fput(file);
639 if (next->vm_flags & VM_EXECUTABLE)
640 removed_exe_file_vma(mm);
641 }
642 mm->map_count--;
643 mpol_put(vma_policy(next));
644 kmem_cache_free(vm_area_cachep, next);
645
646
647
648
649
650 if (remove_next == 2) {
651 next = vma->vm_next;
652 goto again;
653 }
654 }
655
656 validate_mm(mm);
657}
658
659
660
661
662
663static inline int is_mergeable_vma(struct vm_area_struct *vma,
664 struct file *file, unsigned long vm_flags)
665{
666
667 if ((vma->vm_flags ^ vm_flags) & ~VM_CAN_NONLINEAR)
668 return 0;
669 if (vma->vm_file != file)
670 return 0;
671 if (vma->vm_ops && vma->vm_ops->close)
672 return 0;
673 return 1;
674}
675
676static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
677 struct anon_vma *anon_vma2)
678{
679 return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
680}
681
682
683
684
685
686
687
688
689
690
691
692
693static int
694can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
695 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
696{
697 if (is_mergeable_vma(vma, file, vm_flags) &&
698 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
699 if (vma->vm_pgoff == vm_pgoff)
700 return 1;
701 }
702 return 0;
703}
704
705
706
707
708
709
710
711
712static int
713can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
714 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
715{
716 if (is_mergeable_vma(vma, file, vm_flags) &&
717 is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
718 pgoff_t vm_pglen;
719 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
720 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
721 return 1;
722 }
723 return 0;
724}
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755struct vm_area_struct *vma_merge(struct mm_struct *mm,
756 struct vm_area_struct *prev, unsigned long addr,
757 unsigned long end, unsigned long vm_flags,
758 struct anon_vma *anon_vma, struct file *file,
759 pgoff_t pgoff, struct mempolicy *policy)
760{
761 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
762 struct vm_area_struct *area, *next;
763
764
765
766
767
768 if (vm_flags & VM_SPECIAL)
769 return NULL;
770
771 if (prev)
772 next = prev->vm_next;
773 else
774 next = mm->mmap;
775 area = next;
776 if (next && next->vm_end == end)
777 next = next->vm_next;
778
779
780
781
782 if (prev && prev->vm_end == addr &&
783 mpol_equal(vma_policy(prev), policy) &&
784 can_vma_merge_after(prev, vm_flags,
785 anon_vma, file, pgoff)) {
786
787
788
789 if (next && end == next->vm_start &&
790 mpol_equal(policy, vma_policy(next)) &&
791 can_vma_merge_before(next, vm_flags,
792 anon_vma, file, pgoff+pglen) &&
793 is_mergeable_anon_vma(prev->anon_vma,
794 next->anon_vma)) {
795
796 vma_adjust(prev, prev->vm_start,
797 next->vm_end, prev->vm_pgoff, NULL);
798 } else
799 vma_adjust(prev, prev->vm_start,
800 end, prev->vm_pgoff, NULL);
801 return prev;
802 }
803
804
805
806
807 if (next && end == next->vm_start &&
808 mpol_equal(policy, vma_policy(next)) &&
809 can_vma_merge_before(next, vm_flags,
810 anon_vma, file, pgoff+pglen)) {
811 if (prev && addr < prev->vm_end)
812 vma_adjust(prev, prev->vm_start,
813 addr, prev->vm_pgoff, NULL);
814 else
815 vma_adjust(area, addr, next->vm_end,
816 next->vm_pgoff - pglen, NULL);
817 return area;
818 }
819
820 return NULL;
821}
822
823
824
825
826
827
828
829
830
831struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
832{
833 struct vm_area_struct *near;
834 unsigned long vm_flags;
835
836 near = vma->vm_next;
837 if (!near)
838 goto try_prev;
839
840
841
842
843
844
845
846 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
847 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
848
849 if (near->anon_vma && vma->vm_end == near->vm_start &&
850 mpol_equal(vma_policy(vma), vma_policy(near)) &&
851 can_vma_merge_before(near, vm_flags,
852 NULL, vma->vm_file, vma->vm_pgoff +
853 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
854 return near->anon_vma;
855try_prev:
856
857
858
859
860
861
862
863 BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
864 if (!near)
865 goto none;
866
867 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
868 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
869
870 if (near->anon_vma && near->vm_end == vma->vm_start &&
871 mpol_equal(vma_policy(near), vma_policy(vma)) &&
872 can_vma_merge_after(near, vm_flags,
873 NULL, vma->vm_file, vma->vm_pgoff))
874 return near->anon_vma;
875none:
876
877
878
879
880
881
882
883
884 return NULL;
885}
886
887#ifdef CONFIG_PROC_FS
888void vm_stat_account(struct mm_struct *mm, unsigned long flags,
889 struct file *file, long pages)
890{
891 const unsigned long stack_flags
892 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
893
894 if (file) {
895 mm->shared_vm += pages;
896 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
897 mm->exec_vm += pages;
898 } else if (flags & stack_flags)
899 mm->stack_vm += pages;
900 if (flags & (VM_RESERVED|VM_IO))
901 mm->reserved_vm += pages;
902}
903#endif
904
905
906
907
908
909unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
910 unsigned long len, unsigned long prot,
911 unsigned long flags, unsigned long pgoff)
912{
913 struct mm_struct * mm = current->mm;
914 struct inode *inode;
915 unsigned int vm_flags;
916 int error;
917 unsigned long reqprot = prot;
918
919
920
921
922
923
924
925 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
926 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
927 prot |= PROT_EXEC;
928
929 if (!len)
930 return -EINVAL;
931
932 if (!(flags & MAP_FIXED))
933 addr = round_hint_to_min(addr);
934
935 error = arch_mmap_check(addr, len, flags);
936 if (error)
937 return error;
938
939
940 len = PAGE_ALIGN(len);
941 if (!len || len > TASK_SIZE)
942 return -ENOMEM;
943
944
945 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
946 return -EOVERFLOW;
947
948
949 if (mm->map_count > sysctl_max_map_count)
950 return -ENOMEM;
951
952 if (flags & MAP_HUGETLB) {
953 struct user_struct *user = NULL;
954 if (file)
955 return -EINVAL;
956
957
958
959
960
961
962
963 len = ALIGN(len, huge_page_size(&default_hstate));
964 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
965 &user, HUGETLB_ANONHUGE_INODE);
966 if (IS_ERR(file))
967 return PTR_ERR(file);
968 }
969
970
971
972
973 addr = get_unmapped_area(file, addr, len, pgoff, flags);
974 if (addr & ~PAGE_MASK)
975 return addr;
976
977
978
979
980
981 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
982 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
983
984 if (flags & MAP_LOCKED)
985 if (!can_do_mlock())
986 return -EPERM;
987
988
989 if (vm_flags & VM_LOCKED) {
990 unsigned long locked, lock_limit;
991 locked = len >> PAGE_SHIFT;
992 locked += mm->locked_vm;
993 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
994 lock_limit >>= PAGE_SHIFT;
995 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
996 return -EAGAIN;
997 }
998
999 inode = file ? file->f_path.dentry->d_inode : NULL;
1000
1001 if (file) {
1002 switch (flags & MAP_TYPE) {
1003 case MAP_SHARED:
1004 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1005 return -EACCES;
1006
1007
1008
1009
1010
1011 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1012 return -EACCES;
1013
1014
1015
1016
1017 if (locks_verify_locked(inode))
1018 return -EAGAIN;
1019
1020 vm_flags |= VM_SHARED | VM_MAYSHARE;
1021 if (!(file->f_mode & FMODE_WRITE))
1022 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1023
1024
1025 case MAP_PRIVATE:
1026 if (!(file->f_mode & FMODE_READ))
1027 return -EACCES;
1028 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1029 if (vm_flags & VM_EXEC)
1030 return -EPERM;
1031 vm_flags &= ~VM_MAYEXEC;
1032 }
1033
1034 if (!file->f_op || !file->f_op->mmap)
1035 return -ENODEV;
1036 break;
1037
1038 default:
1039 return -EINVAL;
1040 }
1041 } else {
1042 switch (flags & MAP_TYPE) {
1043 case MAP_SHARED:
1044
1045
1046
1047 pgoff = 0;
1048 vm_flags |= VM_SHARED | VM_MAYSHARE;
1049 break;
1050 case MAP_PRIVATE:
1051
1052
1053
1054 pgoff = addr >> PAGE_SHIFT;
1055 break;
1056 default:
1057 return -EINVAL;
1058 }
1059 }
1060
1061 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1062 if (error)
1063 return error;
1064 error = ima_file_mmap(file, prot);
1065 if (error)
1066 return error;
1067
1068 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1069}
1070EXPORT_SYMBOL(do_mmap_pgoff);
1071
1072
1073
1074
1075
1076
1077
1078int vma_wants_writenotify(struct vm_area_struct *vma)
1079{
1080 unsigned int vm_flags = vma->vm_flags;
1081
1082
1083 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1084 return 0;
1085
1086
1087 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1088 return 1;
1089
1090
1091 if (pgprot_val(vma->vm_page_prot) !=
1092 pgprot_val(vm_get_page_prot(vm_flags)))
1093 return 0;
1094
1095
1096 if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
1097 return 0;
1098
1099
1100 return vma->vm_file && vma->vm_file->f_mapping &&
1101 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1102}
1103
1104
1105
1106
1107
1108static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1109{
1110
1111
1112
1113
1114 if (file && is_file_hugepages(file))
1115 return 0;
1116
1117 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1118}
1119
1120unsigned long mmap_region(struct file *file, unsigned long addr,
1121 unsigned long len, unsigned long flags,
1122 unsigned int vm_flags, unsigned long pgoff)
1123{
1124 struct mm_struct *mm = current->mm;
1125 struct vm_area_struct *vma, *prev;
1126 int correct_wcount = 0;
1127 int error;
1128 struct rb_node **rb_link, *rb_parent;
1129 unsigned long charged = 0;
1130 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
1131
1132
1133 error = -ENOMEM;
1134munmap_back:
1135 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
1136 if (vma && vma->vm_start < addr + len) {
1137 if (do_munmap(mm, addr, len))
1138 return -ENOMEM;
1139 goto munmap_back;
1140 }
1141
1142
1143 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1144 return -ENOMEM;
1145
1146
1147
1148
1149
1150 if ((flags & MAP_NORESERVE)) {
1151
1152 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1153 vm_flags |= VM_NORESERVE;
1154
1155
1156 if (file && is_file_hugepages(file))
1157 vm_flags |= VM_NORESERVE;
1158 }
1159
1160
1161
1162
1163 if (accountable_mapping(file, vm_flags)) {
1164 charged = len >> PAGE_SHIFT;
1165 if (security_vm_enough_memory(charged))
1166 return -ENOMEM;
1167 vm_flags |= VM_ACCOUNT;
1168 }
1169
1170
1171
1172
1173 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1174 if (vma)
1175 goto out;
1176
1177
1178
1179
1180
1181
1182 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1183 if (!vma) {
1184 error = -ENOMEM;
1185 goto unacct_error;
1186 }
1187
1188 vma->vm_mm = mm;
1189 vma->vm_start = addr;
1190 vma->vm_end = addr + len;
1191 vma->vm_flags = vm_flags;
1192 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1193 vma->vm_pgoff = pgoff;
1194
1195 if (file) {
1196 error = -EINVAL;
1197 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1198 goto free_vma;
1199 if (vm_flags & VM_DENYWRITE) {
1200 error = deny_write_access(file);
1201 if (error)
1202 goto free_vma;
1203 correct_wcount = 1;
1204 }
1205 vma->vm_file = file;
1206 get_file(file);
1207 error = file->f_op->mmap(file, vma);
1208 if (error)
1209 goto unmap_and_free_vma;
1210 if (vm_flags & VM_EXECUTABLE)
1211 added_exe_file_vma(mm);
1212
1213
1214
1215
1216
1217
1218 addr = vma->vm_start;
1219 pgoff = vma->vm_pgoff;
1220 vm_flags = vma->vm_flags;
1221 } else if (vm_flags & VM_SHARED) {
1222 error = shmem_zero_setup(vma);
1223 if (error)
1224 goto free_vma;
1225 }
1226
1227 if (vma_wants_writenotify(vma))
1228 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1229
1230 vma_link(mm, vma, prev, rb_link, rb_parent);
1231 file = vma->vm_file;
1232
1233
1234 if (correct_wcount)
1235 atomic_inc(&inode->i_writecount);
1236out:
1237 perf_event_mmap(vma);
1238
1239 mm->total_vm += len >> PAGE_SHIFT;
1240 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1241 if (vm_flags & VM_LOCKED) {
1242
1243
1244
1245 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1246 if (nr_pages < 0)
1247 return nr_pages;
1248 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1249 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1250 make_pages_present(addr, addr + len);
1251 return addr;
1252
1253unmap_and_free_vma:
1254 if (correct_wcount)
1255 atomic_inc(&inode->i_writecount);
1256 vma->vm_file = NULL;
1257 fput(file);
1258
1259
1260 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1261 charged = 0;
1262free_vma:
1263 kmem_cache_free(vm_area_cachep, vma);
1264unacct_error:
1265 if (charged)
1266 vm_unacct_memory(charged);
1267 return error;
1268}
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281#ifndef HAVE_ARCH_UNMAPPED_AREA
1282unsigned long
1283arch_get_unmapped_area(struct file *filp, unsigned long addr,
1284 unsigned long len, unsigned long pgoff, unsigned long flags)
1285{
1286 struct mm_struct *mm = current->mm;
1287 struct vm_area_struct *vma;
1288 unsigned long start_addr;
1289
1290 if (len > TASK_SIZE)
1291 return -ENOMEM;
1292
1293 if (flags & MAP_FIXED)
1294 return addr;
1295
1296 if (addr) {
1297 addr = PAGE_ALIGN(addr);
1298 vma = find_vma(mm, addr);
1299 if (TASK_SIZE - len >= addr &&
1300 (!vma || addr + len <= vma->vm_start))
1301 return addr;
1302 }
1303 if (len > mm->cached_hole_size) {
1304 start_addr = addr = mm->free_area_cache;
1305 } else {
1306 start_addr = addr = TASK_UNMAPPED_BASE;
1307 mm->cached_hole_size = 0;
1308 }
1309
1310full_search:
1311 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
1312
1313 if (TASK_SIZE - len < addr) {
1314
1315
1316
1317
1318 if (start_addr != TASK_UNMAPPED_BASE) {
1319 addr = TASK_UNMAPPED_BASE;
1320 start_addr = addr;
1321 mm->cached_hole_size = 0;
1322 goto full_search;
1323 }
1324 return -ENOMEM;
1325 }
1326 if (!vma || addr + len <= vma->vm_start) {
1327
1328
1329
1330 mm->free_area_cache = addr + len;
1331 return addr;
1332 }
1333 if (addr + mm->cached_hole_size < vma->vm_start)
1334 mm->cached_hole_size = vma->vm_start - addr;
1335 addr = vma->vm_end;
1336 }
1337}
1338#endif
1339
1340void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1341{
1342
1343
1344
1345 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1346 mm->free_area_cache = addr;
1347 mm->cached_hole_size = ~0UL;
1348 }
1349}
1350
1351
1352
1353
1354
1355#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1356unsigned long
1357arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1358 const unsigned long len, const unsigned long pgoff,
1359 const unsigned long flags)
1360{
1361 struct vm_area_struct *vma;
1362 struct mm_struct *mm = current->mm;
1363 unsigned long addr = addr0;
1364
1365
1366 if (len > TASK_SIZE)
1367 return -ENOMEM;
1368
1369 if (flags & MAP_FIXED)
1370 return addr;
1371
1372
1373 if (addr) {
1374 addr = PAGE_ALIGN(addr);
1375 vma = find_vma(mm, addr);
1376 if (TASK_SIZE - len >= addr &&
1377 (!vma || addr + len <= vma->vm_start))
1378 return addr;
1379 }
1380
1381
1382 if (len <= mm->cached_hole_size) {
1383 mm->cached_hole_size = 0;
1384 mm->free_area_cache = mm->mmap_base;
1385 }
1386
1387
1388 addr = mm->free_area_cache;
1389
1390
1391 if (addr > len) {
1392 vma = find_vma(mm, addr-len);
1393 if (!vma || addr <= vma->vm_start)
1394
1395 return (mm->free_area_cache = addr-len);
1396 }
1397
1398 if (mm->mmap_base < len)
1399 goto bottomup;
1400
1401 addr = mm->mmap_base-len;
1402
1403 do {
1404
1405
1406
1407
1408
1409 vma = find_vma(mm, addr);
1410 if (!vma || addr+len <= vma->vm_start)
1411
1412 return (mm->free_area_cache = addr);
1413
1414
1415 if (addr + mm->cached_hole_size < vma->vm_start)
1416 mm->cached_hole_size = vma->vm_start - addr;
1417
1418
1419 addr = vma->vm_start-len;
1420 } while (len < vma->vm_start);
1421
1422bottomup:
1423
1424
1425
1426
1427
1428
1429 mm->cached_hole_size = ~0UL;
1430 mm->free_area_cache = TASK_UNMAPPED_BASE;
1431 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1432
1433
1434
1435 mm->free_area_cache = mm->mmap_base;
1436 mm->cached_hole_size = ~0UL;
1437
1438 return addr;
1439}
1440#endif
1441
1442void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1443{
1444
1445
1446
1447 if (addr > mm->free_area_cache)
1448 mm->free_area_cache = addr;
1449
1450
1451 if (mm->free_area_cache > mm->mmap_base)
1452 mm->free_area_cache = mm->mmap_base;
1453}
1454
1455unsigned long
1456get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1457 unsigned long pgoff, unsigned long flags)
1458{
1459 unsigned long (*get_area)(struct file *, unsigned long,
1460 unsigned long, unsigned long, unsigned long);
1461
1462 get_area = current->mm->get_unmapped_area;
1463 if (file && file->f_op && file->f_op->get_unmapped_area)
1464 get_area = file->f_op->get_unmapped_area;
1465 addr = get_area(file, addr, len, pgoff, flags);
1466 if (IS_ERR_VALUE(addr))
1467 return addr;
1468
1469 if (addr > TASK_SIZE - len)
1470 return -ENOMEM;
1471 if (addr & ~PAGE_MASK)
1472 return -EINVAL;
1473
1474 return arch_rebalance_pgtables(addr, len);
1475}
1476
1477EXPORT_SYMBOL(get_unmapped_area);
1478
1479
1480struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1481{
1482 struct vm_area_struct *vma = NULL;
1483
1484 if (mm) {
1485
1486
1487 vma = mm->mmap_cache;
1488 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1489 struct rb_node * rb_node;
1490
1491 rb_node = mm->mm_rb.rb_node;
1492 vma = NULL;
1493
1494 while (rb_node) {
1495 struct vm_area_struct * vma_tmp;
1496
1497 vma_tmp = rb_entry(rb_node,
1498 struct vm_area_struct, vm_rb);
1499
1500 if (vma_tmp->vm_end > addr) {
1501 vma = vma_tmp;
1502 if (vma_tmp->vm_start <= addr)
1503 break;
1504 rb_node = rb_node->rb_left;
1505 } else
1506 rb_node = rb_node->rb_right;
1507 }
1508 if (vma)
1509 mm->mmap_cache = vma;
1510 }
1511 }
1512 return vma;
1513}
1514
1515EXPORT_SYMBOL(find_vma);
1516
1517
1518struct vm_area_struct *
1519find_vma_prev(struct mm_struct *mm, unsigned long addr,
1520 struct vm_area_struct **pprev)
1521{
1522 struct vm_area_struct *vma = NULL, *prev = NULL;
1523 struct rb_node *rb_node;
1524 if (!mm)
1525 goto out;
1526
1527
1528 vma = mm->mmap;
1529
1530
1531 rb_node = mm->mm_rb.rb_node;
1532
1533 while (rb_node) {
1534 struct vm_area_struct *vma_tmp;
1535 vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
1536
1537 if (addr < vma_tmp->vm_end) {
1538 rb_node = rb_node->rb_left;
1539 } else {
1540 prev = vma_tmp;
1541 if (!prev->vm_next || (addr < prev->vm_next->vm_end))
1542 break;
1543 rb_node = rb_node->rb_right;
1544 }
1545 }
1546
1547out:
1548 *pprev = prev;
1549 return prev ? prev->vm_next : vma;
1550}
1551
1552
1553
1554
1555
1556
1557static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
1558{
1559 struct mm_struct *mm = vma->vm_mm;
1560 struct rlimit *rlim = current->signal->rlim;
1561 unsigned long new_start;
1562
1563
1564 if (!may_expand_vm(mm, grow))
1565 return -ENOMEM;
1566
1567
1568 if (size > rlim[RLIMIT_STACK].rlim_cur)
1569 return -ENOMEM;
1570
1571
1572 if (vma->vm_flags & VM_LOCKED) {
1573 unsigned long locked;
1574 unsigned long limit;
1575 locked = mm->locked_vm + grow;
1576 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
1577 if (locked > limit && !capable(CAP_IPC_LOCK))
1578 return -ENOMEM;
1579 }
1580
1581
1582 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
1583 vma->vm_end - size;
1584 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
1585 return -EFAULT;
1586
1587
1588
1589
1590
1591 if (security_vm_enough_memory_mm(mm, grow))
1592 return -ENOMEM;
1593
1594
1595 mm->total_vm += grow;
1596 if (vma->vm_flags & VM_LOCKED)
1597 mm->locked_vm += grow;
1598 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
1599 return 0;
1600}
1601
1602#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
1603
1604
1605
1606
1607#ifndef CONFIG_IA64
1608static
1609#endif
1610int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1611{
1612 int error;
1613
1614 if (!(vma->vm_flags & VM_GROWSUP))
1615 return -EFAULT;
1616
1617
1618
1619
1620
1621 if (unlikely(anon_vma_prepare(vma)))
1622 return -ENOMEM;
1623 anon_vma_lock(vma);
1624
1625
1626
1627
1628
1629
1630
1631 if (address < PAGE_ALIGN(address+4))
1632 address = PAGE_ALIGN(address+4);
1633 else {
1634 anon_vma_unlock(vma);
1635 return -ENOMEM;
1636 }
1637 error = 0;
1638
1639
1640 if (address > vma->vm_end) {
1641 unsigned long size, grow;
1642
1643 size = address - vma->vm_start;
1644 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1645
1646 error = acct_stack_growth(vma, size, grow);
1647 if (!error)
1648 vma->vm_end = address;
1649 }
1650 anon_vma_unlock(vma);
1651 return error;
1652}
1653#endif
1654
1655
1656
1657
1658static int expand_downwards(struct vm_area_struct *vma,
1659 unsigned long address)
1660{
1661 int error;
1662
1663
1664
1665
1666
1667 if (unlikely(anon_vma_prepare(vma)))
1668 return -ENOMEM;
1669
1670 address &= PAGE_MASK;
1671 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1672 if (error)
1673 return error;
1674
1675 anon_vma_lock(vma);
1676
1677
1678
1679
1680
1681
1682
1683
1684 if (address < vma->vm_start) {
1685 unsigned long size, grow;
1686
1687 size = vma->vm_end - address;
1688 grow = (vma->vm_start - address) >> PAGE_SHIFT;
1689
1690 error = acct_stack_growth(vma, size, grow);
1691 if (!error) {
1692 vma->vm_start = address;
1693 vma->vm_pgoff -= grow;
1694 }
1695 }
1696 anon_vma_unlock(vma);
1697 return error;
1698}
1699
1700int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
1701{
1702 return expand_downwards(vma, address);
1703}
1704
1705#ifdef CONFIG_STACK_GROWSUP
1706int expand_stack(struct vm_area_struct *vma, unsigned long address)
1707{
1708 return expand_upwards(vma, address);
1709}
1710
1711struct vm_area_struct *
1712find_extend_vma(struct mm_struct *mm, unsigned long addr)
1713{
1714 struct vm_area_struct *vma, *prev;
1715
1716 addr &= PAGE_MASK;
1717 vma = find_vma_prev(mm, addr, &prev);
1718 if (vma && (vma->vm_start <= addr))
1719 return vma;
1720 if (!prev || expand_stack(prev, addr))
1721 return NULL;
1722 if (prev->vm_flags & VM_LOCKED) {
1723 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
1724 return NULL;
1725 }
1726 return prev;
1727}
1728#else
1729int expand_stack(struct vm_area_struct *vma, unsigned long address)
1730{
1731 return expand_downwards(vma, address);
1732}
1733
1734struct vm_area_struct *
1735find_extend_vma(struct mm_struct * mm, unsigned long addr)
1736{
1737 struct vm_area_struct * vma;
1738 unsigned long start;
1739
1740 addr &= PAGE_MASK;
1741 vma = find_vma(mm,addr);
1742 if (!vma)
1743 return NULL;
1744 if (vma->vm_start <= addr)
1745 return vma;
1746 if (!(vma->vm_flags & VM_GROWSDOWN))
1747 return NULL;
1748 start = vma->vm_start;
1749 if (expand_stack(vma, addr))
1750 return NULL;
1751 if (vma->vm_flags & VM_LOCKED) {
1752 if (mlock_vma_pages_range(vma, addr, start) < 0)
1753 return NULL;
1754 }
1755 return vma;
1756}
1757#endif
1758
1759
1760
1761
1762
1763
1764
1765static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
1766{
1767
1768 update_hiwater_vm(mm);
1769 do {
1770 long nrpages = vma_pages(vma);
1771
1772 mm->total_vm -= nrpages;
1773 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
1774 vma = remove_vma(vma);
1775 } while (vma);
1776 validate_mm(mm);
1777}
1778
1779
1780
1781
1782
1783
1784static void unmap_region(struct mm_struct *mm,
1785 struct vm_area_struct *vma, struct vm_area_struct *prev,
1786 unsigned long start, unsigned long end)
1787{
1788 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
1789 struct mmu_gather *tlb;
1790 unsigned long nr_accounted = 0;
1791
1792 lru_add_drain();
1793 tlb = tlb_gather_mmu(mm, 0);
1794 update_hiwater_rss(mm);
1795 unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
1796 vm_unacct_memory(nr_accounted);
1797 free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
1798 next? next->vm_start: 0);
1799 tlb_finish_mmu(tlb, start, end);
1800}
1801
1802
1803
1804
1805
1806static void
1807detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1808 struct vm_area_struct *prev, unsigned long end)
1809{
1810 struct vm_area_struct **insertion_point;
1811 struct vm_area_struct *tail_vma = NULL;
1812 unsigned long addr;
1813
1814 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1815 do {
1816 rb_erase(&vma->vm_rb, &mm->mm_rb);
1817 mm->map_count--;
1818 tail_vma = vma;
1819 vma = vma->vm_next;
1820 } while (vma && vma->vm_start < end);
1821 *insertion_point = vma;
1822 tail_vma->vm_next = NULL;
1823 if (mm->unmap_area == arch_unmap_area)
1824 addr = prev ? prev->vm_end : mm->mmap_base;
1825 else
1826 addr = vma ? vma->vm_start : mm->mmap_base;
1827 mm->unmap_area(mm, addr);
1828 mm->mmap_cache = NULL;
1829}
1830
1831
1832
1833
1834
1835int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1836 unsigned long addr, int new_below)
1837{
1838 struct mempolicy *pol;
1839 struct vm_area_struct *new;
1840
1841 if (is_vm_hugetlb_page(vma) && (addr &
1842 ~(huge_page_mask(hstate_vma(vma)))))
1843 return -EINVAL;
1844
1845 if (mm->map_count >= sysctl_max_map_count)
1846 return -ENOMEM;
1847
1848 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1849 if (!new)
1850 return -ENOMEM;
1851
1852
1853 *new = *vma;
1854
1855 if (new_below)
1856 new->vm_end = addr;
1857 else {
1858 new->vm_start = addr;
1859 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
1860 }
1861
1862 pol = mpol_dup(vma_policy(vma));
1863 if (IS_ERR(pol)) {
1864 kmem_cache_free(vm_area_cachep, new);
1865 return PTR_ERR(pol);
1866 }
1867 vma_set_policy(new, pol);
1868
1869 if (new->vm_file) {
1870 get_file(new->vm_file);
1871 if (vma->vm_flags & VM_EXECUTABLE)
1872 added_exe_file_vma(mm);
1873 }
1874
1875 if (new->vm_ops && new->vm_ops->open)
1876 new->vm_ops->open(new);
1877
1878 if (new_below)
1879 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1880 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1881 else
1882 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1883
1884 return 0;
1885}
1886
1887
1888
1889
1890
1891
1892int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1893{
1894 unsigned long end;
1895 struct vm_area_struct *vma, *prev, *last;
1896
1897 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
1898 return -EINVAL;
1899
1900 if ((len = PAGE_ALIGN(len)) == 0)
1901 return -EINVAL;
1902
1903
1904 vma = find_vma_prev(mm, start, &prev);
1905 if (!vma)
1906 return 0;
1907
1908
1909
1910 end = start + len;
1911 if (vma->vm_start >= end)
1912 return 0;
1913
1914
1915
1916
1917
1918
1919
1920
1921 if (start > vma->vm_start) {
1922 int error = split_vma(mm, vma, start, 0);
1923 if (error)
1924 return error;
1925 prev = vma;
1926 }
1927
1928
1929 last = find_vma(mm, end);
1930 if (last && end > last->vm_start) {
1931 int error = split_vma(mm, last, end, 1);
1932 if (error)
1933 return error;
1934 }
1935 vma = prev? prev->vm_next: mm->mmap;
1936
1937
1938
1939
1940 if (mm->locked_vm) {
1941 struct vm_area_struct *tmp = vma;
1942 while (tmp && tmp->vm_start < end) {
1943 if (tmp->vm_flags & VM_LOCKED) {
1944 mm->locked_vm -= vma_pages(tmp);
1945 munlock_vma_pages_all(tmp);
1946 }
1947 tmp = tmp->vm_next;
1948 }
1949 }
1950
1951
1952
1953
1954 detach_vmas_to_be_unmapped(mm, vma, prev, end);
1955 unmap_region(mm, vma, prev, start, end);
1956
1957
1958 remove_vma_list(mm, vma);
1959
1960 return 0;
1961}
1962
1963EXPORT_SYMBOL(do_munmap);
1964
1965SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1966{
1967 int ret;
1968 struct mm_struct *mm = current->mm;
1969
1970 profile_munmap(addr);
1971
1972 down_write(&mm->mmap_sem);
1973 ret = do_munmap(mm, addr, len);
1974 up_write(&mm->mmap_sem);
1975 return ret;
1976}
1977
1978static inline void verify_mm_writelocked(struct mm_struct *mm)
1979{
1980#ifdef CONFIG_DEBUG_VM
1981 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
1982 WARN_ON(1);
1983 up_read(&mm->mmap_sem);
1984 }
1985#endif
1986}
1987
1988
1989
1990
1991
1992
1993unsigned long do_brk(unsigned long addr, unsigned long len)
1994{
1995 struct mm_struct * mm = current->mm;
1996 struct vm_area_struct * vma, * prev;
1997 unsigned long flags;
1998 struct rb_node ** rb_link, * rb_parent;
1999 pgoff_t pgoff = addr >> PAGE_SHIFT;
2000 int error;
2001
2002 len = PAGE_ALIGN(len);
2003 if (!len)
2004 return addr;
2005
2006 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
2007 return -EINVAL;
2008
2009 if (is_hugepage_only_range(mm, addr, len))
2010 return -EINVAL;
2011
2012 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2013 if (error)
2014 return error;
2015
2016 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2017
2018 error = arch_mmap_check(addr, len, flags);
2019 if (error)
2020 return error;
2021
2022
2023
2024
2025 if (mm->def_flags & VM_LOCKED) {
2026 unsigned long locked, lock_limit;
2027 locked = len >> PAGE_SHIFT;
2028 locked += mm->locked_vm;
2029 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
2030 lock_limit >>= PAGE_SHIFT;
2031 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2032 return -EAGAIN;
2033 }
2034
2035
2036
2037
2038
2039 verify_mm_writelocked(mm);
2040
2041
2042
2043
2044 munmap_back:
2045 vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2046 if (vma && vma->vm_start < addr + len) {
2047 if (do_munmap(mm, addr, len))
2048 return -ENOMEM;
2049 goto munmap_back;
2050 }
2051
2052
2053 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2054 return -ENOMEM;
2055
2056 if (mm->map_count > sysctl_max_map_count)
2057 return -ENOMEM;
2058
2059 if (security_vm_enough_memory(len >> PAGE_SHIFT))
2060 return -ENOMEM;
2061
2062
2063 vma = vma_merge(mm, prev, addr, addr + len, flags,
2064 NULL, NULL, pgoff, NULL);
2065 if (vma)
2066 goto out;
2067
2068
2069
2070
2071 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2072 if (!vma) {
2073 vm_unacct_memory(len >> PAGE_SHIFT);
2074 return -ENOMEM;
2075 }
2076
2077 vma->vm_mm = mm;
2078 vma->vm_start = addr;
2079 vma->vm_end = addr + len;
2080 vma->vm_pgoff = pgoff;
2081 vma->vm_flags = flags;
2082 vma->vm_page_prot = vm_get_page_prot(flags);
2083 vma_link(mm, vma, prev, rb_link, rb_parent);
2084out:
2085 mm->total_vm += len >> PAGE_SHIFT;
2086 if (flags & VM_LOCKED) {
2087 if (!mlock_vma_pages_range(vma, addr, addr + len))
2088 mm->locked_vm += (len >> PAGE_SHIFT);
2089 }
2090 return addr;
2091}
2092
2093EXPORT_SYMBOL(do_brk);
2094
2095
2096void exit_mmap(struct mm_struct *mm)
2097{
2098 struct mmu_gather *tlb;
2099 struct vm_area_struct *vma;
2100 unsigned long nr_accounted = 0;
2101 unsigned long end;
2102
2103
2104 mmu_notifier_release(mm);
2105
2106 if (mm->locked_vm) {
2107 vma = mm->mmap;
2108 while (vma) {
2109 if (vma->vm_flags & VM_LOCKED)
2110 munlock_vma_pages_all(vma);
2111 vma = vma->vm_next;
2112 }
2113 }
2114
2115 arch_exit_mmap(mm);
2116
2117 vma = mm->mmap;
2118 if (!vma)
2119 return;
2120
2121 lru_add_drain();
2122 flush_cache_mm(mm);
2123 tlb = tlb_gather_mmu(mm, 1);
2124
2125
2126 end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
2127 vm_unacct_memory(nr_accounted);
2128
2129 free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
2130 tlb_finish_mmu(tlb, 0, end);
2131
2132
2133
2134
2135
2136 while (vma)
2137 vma = remove_vma(vma);
2138
2139 BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2140}
2141
2142
2143
2144
2145
2146int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2147{
2148 struct vm_area_struct * __vma, * prev;
2149 struct rb_node ** rb_link, * rb_parent;
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163 if (!vma->vm_file) {
2164 BUG_ON(vma->anon_vma);
2165 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2166 }
2167 __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
2168 if (__vma && __vma->vm_start < vma->vm_end)
2169 return -ENOMEM;
2170 if ((vma->vm_flags & VM_ACCOUNT) &&
2171 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2172 return -ENOMEM;
2173 vma_link(mm, vma, prev, rb_link, rb_parent);
2174 return 0;
2175}
2176
2177
2178
2179
2180
2181struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2182 unsigned long addr, unsigned long len, pgoff_t pgoff)
2183{
2184 struct vm_area_struct *vma = *vmap;
2185 unsigned long vma_start = vma->vm_start;
2186 struct mm_struct *mm = vma->vm_mm;
2187 struct vm_area_struct *new_vma, *prev;
2188 struct rb_node **rb_link, *rb_parent;
2189 struct mempolicy *pol;
2190
2191
2192
2193
2194
2195 if (!vma->vm_file && !vma->anon_vma)
2196 pgoff = addr >> PAGE_SHIFT;
2197
2198 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2199 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2200 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2201 if (new_vma) {
2202
2203
2204
2205 if (vma_start >= new_vma->vm_start &&
2206 vma_start < new_vma->vm_end)
2207 *vmap = new_vma;
2208 } else {
2209 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2210 if (new_vma) {
2211 *new_vma = *vma;
2212 pol = mpol_dup(vma_policy(vma));
2213 if (IS_ERR(pol)) {
2214 kmem_cache_free(vm_area_cachep, new_vma);
2215 return NULL;
2216 }
2217 vma_set_policy(new_vma, pol);
2218 new_vma->vm_start = addr;
2219 new_vma->vm_end = addr + len;
2220 new_vma->vm_pgoff = pgoff;
2221 if (new_vma->vm_file) {
2222 get_file(new_vma->vm_file);
2223 if (vma->vm_flags & VM_EXECUTABLE)
2224 added_exe_file_vma(mm);
2225 }
2226 if (new_vma->vm_ops && new_vma->vm_ops->open)
2227 new_vma->vm_ops->open(new_vma);
2228 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2229 }
2230 }
2231 return new_vma;
2232}
2233
2234
2235
2236
2237
2238int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2239{
2240 unsigned long cur = mm->total_vm;
2241 unsigned long lim;
2242
2243 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
2244
2245 if (cur + npages > lim)
2246 return 0;
2247 return 1;
2248}
2249
2250
2251static int special_mapping_fault(struct vm_area_struct *vma,
2252 struct vm_fault *vmf)
2253{
2254 pgoff_t pgoff;
2255 struct page **pages;
2256
2257
2258
2259
2260
2261
2262
2263 pgoff = vmf->pgoff - vma->vm_pgoff;
2264
2265 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2266 pgoff--;
2267
2268 if (*pages) {
2269 struct page *page = *pages;
2270 get_page(page);
2271 vmf->page = page;
2272 return 0;
2273 }
2274
2275 return VM_FAULT_SIGBUS;
2276}
2277
2278
2279
2280
2281static void special_mapping_close(struct vm_area_struct *vma)
2282{
2283}
2284
2285static const struct vm_operations_struct special_mapping_vmops = {
2286 .close = special_mapping_close,
2287 .fault = special_mapping_fault,
2288};
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299int install_special_mapping(struct mm_struct *mm,
2300 unsigned long addr, unsigned long len,
2301 unsigned long vm_flags, struct page **pages)
2302{
2303 struct vm_area_struct *vma;
2304
2305 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2306 if (unlikely(vma == NULL))
2307 return -ENOMEM;
2308
2309 vma->vm_mm = mm;
2310 vma->vm_start = addr;
2311 vma->vm_end = addr + len;
2312
2313 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2314 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2315
2316 vma->vm_ops = &special_mapping_vmops;
2317 vma->vm_private_data = pages;
2318
2319 if (unlikely(insert_vm_struct(mm, vma))) {
2320 kmem_cache_free(vm_area_cachep, vma);
2321 return -ENOMEM;
2322 }
2323
2324 mm->total_vm += len >> PAGE_SHIFT;
2325
2326 perf_event_mmap(vma);
2327
2328 return 0;
2329}
2330
2331static DEFINE_MUTEX(mm_all_locks_mutex);
2332
2333static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2334{
2335 if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2336
2337
2338
2339
2340 spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350 if (__test_and_set_bit(0, (unsigned long *)
2351 &anon_vma->head.next))
2352 BUG();
2353 }
2354}
2355
2356static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2357{
2358 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2369 BUG();
2370 spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
2371 }
2372}
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406int mm_take_all_locks(struct mm_struct *mm)
2407{
2408 struct vm_area_struct *vma;
2409 int ret = -EINTR;
2410
2411 BUG_ON(down_read_trylock(&mm->mmap_sem));
2412
2413 mutex_lock(&mm_all_locks_mutex);
2414
2415 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2416 if (signal_pending(current))
2417 goto out_unlock;
2418 if (vma->vm_file && vma->vm_file->f_mapping)
2419 vm_lock_mapping(mm, vma->vm_file->f_mapping);
2420 }
2421
2422 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2423 if (signal_pending(current))
2424 goto out_unlock;
2425 if (vma->anon_vma)
2426 vm_lock_anon_vma(mm, vma->anon_vma);
2427 }
2428
2429 ret = 0;
2430
2431out_unlock:
2432 if (ret)
2433 mm_drop_all_locks(mm);
2434
2435 return ret;
2436}
2437
2438static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2439{
2440 if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453 if (!__test_and_clear_bit(0, (unsigned long *)
2454 &anon_vma->head.next))
2455 BUG();
2456 spin_unlock(&anon_vma->lock);
2457 }
2458}
2459
2460static void vm_unlock_mapping(struct address_space *mapping)
2461{
2462 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2463
2464
2465
2466
2467 spin_unlock(&mapping->i_mmap_lock);
2468 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
2469 &mapping->flags))
2470 BUG();
2471 }
2472}
2473
2474
2475
2476
2477
2478void mm_drop_all_locks(struct mm_struct *mm)
2479{
2480 struct vm_area_struct *vma;
2481
2482 BUG_ON(down_read_trylock(&mm->mmap_sem));
2483 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2484
2485 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2486 if (vma->anon_vma)
2487 vm_unlock_anon_vma(vma->anon_vma);
2488 if (vma->vm_file && vma->vm_file->f_mapping)
2489 vm_unlock_mapping(vma->vm_file->f_mapping);
2490 }
2491
2492 mutex_unlock(&mm_all_locks_mutex);
2493}
2494
2495
2496
2497
2498void __init mmap_init(void)
2499{
2500 int ret;
2501
2502 ret = percpu_counter_init(&vm_committed_as, 0);
2503 VM_BUG_ON(ret);
2504}
2505