1
2
3
4
5
6
7
8
9#include <linux/kernel.h>
10#include <linux/slab.h>
11#include <linux/backing-dev.h>
12#include <linux/mm.h>
13#include <linux/shm.h>
14#include <linux/mman.h>
15#include <linux/pagemap.h>
16#include <linux/swap.h>
17#include <linux/syscalls.h>
18#include <linux/capability.h>
19#include <linux/init.h>
20#include <linux/file.h>
21#include <linux/fs.h>
22#include <linux/personality.h>
23#include <linux/security.h>
24#include <linux/hugetlb.h>
25#include <linux/profile.h>
26#include <linux/export.h>
27#include <linux/mount.h>
28#include <linux/mempolicy.h>
29#include <linux/rmap.h>
30#include <linux/mmu_notifier.h>
31#include <linux/perf_event.h>
32#include <linux/audit.h>
33#include <linux/khugepaged.h>
34#include <linux/uprobes.h>
35#include <linux/rbtree_augmented.h>
36#include <linux/sched/sysctl.h>
37#include <linux/notifier.h>
38#include <linux/memory.h>
39
40#include <asm/uaccess.h>
41#include <asm/cacheflush.h>
42#include <asm/tlb.h>
43#include <asm/mmu_context.h>
44
45#include "internal.h"
46
47#ifndef arch_mmap_check
48#define arch_mmap_check(addr, len, flags) (0)
49#endif
50
51#ifndef arch_rebalance_pgtables
52#define arch_rebalance_pgtables(addr, len) (addr)
53#endif
54
55static void unmap_region(struct mm_struct *mm,
56 struct vm_area_struct *vma, struct vm_area_struct *prev,
57 unsigned long start, unsigned long end);
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74pgprot_t protection_map[16] = {
75 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
76 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
77};
78
79pgprot_t vm_get_page_prot(unsigned long vm_flags)
80{
81 return __pgprot(pgprot_val(protection_map[vm_flags &
82 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
83 pgprot_val(arch_vm_get_page_prot(vm_flags)));
84}
85EXPORT_SYMBOL(vm_get_page_prot);
86
87int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
88int sysctl_overcommit_ratio __read_mostly = 50;
89int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
90unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
91unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
92
93
94
95
96struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
97
98
99
100
101
102
103
104
105
106unsigned long vm_memory_committed(void)
107{
108 return percpu_counter_read_positive(&vm_committed_as);
109}
110EXPORT_SYMBOL_GPL(vm_memory_committed);
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
129{
130 unsigned long free, allowed, reserve;
131
132 vm_acct_memory(pages);
133
134
135
136
137 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
138 return 0;
139
140 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
141 free = global_page_state(NR_FREE_PAGES);
142 free += global_page_state(NR_FILE_PAGES);
143
144
145
146
147
148
149
150 free -= global_page_state(NR_SHMEM);
151
152 free += get_nr_swap_pages();
153
154
155
156
157
158
159
160 free += global_page_state(NR_SLAB_RECLAIMABLE);
161
162
163
164
165 if (free <= totalreserve_pages)
166 goto error;
167 else
168 free -= totalreserve_pages;
169
170
171
172
173 if (!cap_sys_admin)
174 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
175
176 if (free > pages)
177 return 0;
178
179 goto error;
180 }
181
182 allowed = vm_commit_limit();
183
184
185
186 if (!cap_sys_admin)
187 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
188
189
190
191
192 if (mm) {
193 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
194 allowed -= min(mm->total_vm / 32, reserve);
195 }
196
197 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
198 return 0;
199error:
200 vm_unacct_memory(pages);
201
202 return -ENOMEM;
203}
204
205
206
207
208static void __remove_shared_vm_struct(struct vm_area_struct *vma,
209 struct file *file, struct address_space *mapping)
210{
211 if (vma->vm_flags & VM_DENYWRITE)
212 atomic_inc(&file_inode(file)->i_writecount);
213 if (vma->vm_flags & VM_SHARED)
214 mapping->i_mmap_writable--;
215
216 flush_dcache_mmap_lock(mapping);
217 if (unlikely(vma->vm_flags & VM_NONLINEAR))
218 list_del_init(&vma->shared.nonlinear);
219 else
220 vma_interval_tree_remove(vma, &mapping->i_mmap);
221 flush_dcache_mmap_unlock(mapping);
222}
223
224
225
226
227
228void unlink_file_vma(struct vm_area_struct *vma)
229{
230 struct file *file = vma->vm_file;
231
232 if (file) {
233 struct address_space *mapping = file->f_mapping;
234 mutex_lock(&mapping->i_mmap_mutex);
235 __remove_shared_vm_struct(vma, file, mapping);
236 mutex_unlock(&mapping->i_mmap_mutex);
237 }
238}
239
240
241
242
243static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
244{
245 struct vm_area_struct *next = vma->vm_next;
246
247 might_sleep();
248 if (vma->vm_ops && vma->vm_ops->close)
249 vma->vm_ops->close(vma);
250 if (vma->vm_file)
251 fput(vma->vm_file);
252 mpol_put(vma_policy(vma));
253 kmem_cache_free(vm_area_cachep, vma);
254 return next;
255}
256
257static unsigned long do_brk(unsigned long addr, unsigned long len);
258
259SYSCALL_DEFINE1(brk, unsigned long, brk)
260{
261 unsigned long rlim, retval;
262 unsigned long newbrk, oldbrk;
263 struct mm_struct *mm = current->mm;
264 unsigned long min_brk;
265 bool populate;
266
267 down_write(&mm->mmap_sem);
268
269#ifdef CONFIG_COMPAT_BRK
270
271
272
273
274
275 if (current->brk_randomized)
276 min_brk = mm->start_brk;
277 else
278 min_brk = mm->end_data;
279#else
280 min_brk = mm->start_brk;
281#endif
282 if (brk < min_brk)
283 goto out;
284
285
286
287
288
289
290
291 rlim = rlimit(RLIMIT_DATA);
292 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
293 (mm->end_data - mm->start_data) > rlim)
294 goto out;
295
296 newbrk = PAGE_ALIGN(brk);
297 oldbrk = PAGE_ALIGN(mm->brk);
298 if (oldbrk == newbrk)
299 goto set_brk;
300
301
302 if (brk <= mm->brk) {
303 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
304 goto set_brk;
305 goto out;
306 }
307
308
309 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
310 goto out;
311
312
313 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
314 goto out;
315
316set_brk:
317 mm->brk = brk;
318 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
319 up_write(&mm->mmap_sem);
320 if (populate)
321 mm_populate(oldbrk, newbrk - oldbrk);
322 return brk;
323
324out:
325 retval = mm->brk;
326 up_write(&mm->mmap_sem);
327 return retval;
328}
329
330static long vma_compute_subtree_gap(struct vm_area_struct *vma)
331{
332 unsigned long max, subtree_gap;
333 max = vma->vm_start;
334 if (vma->vm_prev)
335 max -= vma->vm_prev->vm_end;
336 if (vma->vm_rb.rb_left) {
337 subtree_gap = rb_entry(vma->vm_rb.rb_left,
338 struct vm_area_struct, vm_rb)->rb_subtree_gap;
339 if (subtree_gap > max)
340 max = subtree_gap;
341 }
342 if (vma->vm_rb.rb_right) {
343 subtree_gap = rb_entry(vma->vm_rb.rb_right,
344 struct vm_area_struct, vm_rb)->rb_subtree_gap;
345 if (subtree_gap > max)
346 max = subtree_gap;
347 }
348 return max;
349}
350
351#ifdef CONFIG_DEBUG_VM_RB
352static int browse_rb(struct rb_root *root)
353{
354 int i = 0, j, bug = 0;
355 struct rb_node *nd, *pn = NULL;
356 unsigned long prev = 0, pend = 0;
357
358 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
359 struct vm_area_struct *vma;
360 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
361 if (vma->vm_start < prev) {
362 printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
363 bug = 1;
364 }
365 if (vma->vm_start < pend) {
366 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
367 bug = 1;
368 }
369 if (vma->vm_start > vma->vm_end) {
370 printk("vm_end %lx < vm_start %lx\n",
371 vma->vm_end, vma->vm_start);
372 bug = 1;
373 }
374 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
375 printk("free gap %lx, correct %lx\n",
376 vma->rb_subtree_gap,
377 vma_compute_subtree_gap(vma));
378 bug = 1;
379 }
380 i++;
381 pn = nd;
382 prev = vma->vm_start;
383 pend = vma->vm_end;
384 }
385 j = 0;
386 for (nd = pn; nd; nd = rb_prev(nd))
387 j++;
388 if (i != j) {
389 printk("backwards %d, forwards %d\n", j, i);
390 bug = 1;
391 }
392 return bug ? -1 : i;
393}
394
395static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
396{
397 struct rb_node *nd;
398
399 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
400 struct vm_area_struct *vma;
401 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
402 BUG_ON(vma != ignore &&
403 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
404 }
405}
406
407void validate_mm(struct mm_struct *mm)
408{
409 int bug = 0;
410 int i = 0;
411 unsigned long highest_address = 0;
412 struct vm_area_struct *vma = mm->mmap;
413 while (vma) {
414 struct anon_vma_chain *avc;
415 vma_lock_anon_vma(vma);
416 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
417 anon_vma_interval_tree_verify(avc);
418 vma_unlock_anon_vma(vma);
419 highest_address = vma->vm_end;
420 vma = vma->vm_next;
421 i++;
422 }
423 if (i != mm->map_count) {
424 printk("map_count %d vm_next %d\n", mm->map_count, i);
425 bug = 1;
426 }
427 if (highest_address != mm->highest_vm_end) {
428 printk("mm->highest_vm_end %lx, found %lx\n",
429 mm->highest_vm_end, highest_address);
430 bug = 1;
431 }
432 i = browse_rb(&mm->mm_rb);
433 if (i != mm->map_count) {
434 printk("map_count %d rb %d\n", mm->map_count, i);
435 bug = 1;
436 }
437 BUG_ON(bug);
438}
439#else
440#define validate_mm_rb(root, ignore) do { } while (0)
441#define validate_mm(mm) do { } while (0)
442#endif
443
444RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
445 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
446
447
448
449
450
451
452static void vma_gap_update(struct vm_area_struct *vma)
453{
454
455
456
457
458 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
459}
460
461static inline void vma_rb_insert(struct vm_area_struct *vma,
462 struct rb_root *root)
463{
464
465 validate_mm_rb(root, NULL);
466
467 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
468}
469
470static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
471{
472
473
474
475
476 validate_mm_rb(root, vma);
477
478
479
480
481
482
483 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
484}
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500static inline void
501anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
502{
503 struct anon_vma_chain *avc;
504
505 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
506 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
507}
508
509static inline void
510anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
511{
512 struct anon_vma_chain *avc;
513
514 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
515 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
516}
517
518static int find_vma_links(struct mm_struct *mm, unsigned long addr,
519 unsigned long end, struct vm_area_struct **pprev,
520 struct rb_node ***rb_link, struct rb_node **rb_parent)
521{
522 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
523
524 __rb_link = &mm->mm_rb.rb_node;
525 rb_prev = __rb_parent = NULL;
526
527 while (*__rb_link) {
528 struct vm_area_struct *vma_tmp;
529
530 __rb_parent = *__rb_link;
531 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
532
533 if (vma_tmp->vm_end > addr) {
534
535 if (vma_tmp->vm_start < end)
536 return -ENOMEM;
537 __rb_link = &__rb_parent->rb_left;
538 } else {
539 rb_prev = __rb_parent;
540 __rb_link = &__rb_parent->rb_right;
541 }
542 }
543
544 *pprev = NULL;
545 if (rb_prev)
546 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
547 *rb_link = __rb_link;
548 *rb_parent = __rb_parent;
549 return 0;
550}
551
552static unsigned long count_vma_pages_range(struct mm_struct *mm,
553 unsigned long addr, unsigned long end)
554{
555 unsigned long nr_pages = 0;
556 struct vm_area_struct *vma;
557
558
559 vma = find_vma_intersection(mm, addr, end);
560 if (!vma)
561 return 0;
562
563 nr_pages = (min(end, vma->vm_end) -
564 max(addr, vma->vm_start)) >> PAGE_SHIFT;
565
566
567 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
568 unsigned long overlap_len;
569
570 if (vma->vm_start > end)
571 break;
572
573 overlap_len = min(end, vma->vm_end) - vma->vm_start;
574 nr_pages += overlap_len >> PAGE_SHIFT;
575 }
576
577 return nr_pages;
578}
579
580void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
581 struct rb_node **rb_link, struct rb_node *rb_parent)
582{
583
584 if (vma->vm_next)
585 vma_gap_update(vma->vm_next);
586 else
587 mm->highest_vm_end = vma->vm_end;
588
589
590
591
592
593
594
595
596
597
598 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
599 vma->rb_subtree_gap = 0;
600 vma_gap_update(vma);
601 vma_rb_insert(vma, &mm->mm_rb);
602}
603
604static void __vma_link_file(struct vm_area_struct *vma)
605{
606 struct file *file;
607
608 file = vma->vm_file;
609 if (file) {
610 struct address_space *mapping = file->f_mapping;
611
612 if (vma->vm_flags & VM_DENYWRITE)
613 atomic_dec(&file_inode(file)->i_writecount);
614 if (vma->vm_flags & VM_SHARED)
615 mapping->i_mmap_writable++;
616
617 flush_dcache_mmap_lock(mapping);
618 if (unlikely(vma->vm_flags & VM_NONLINEAR))
619 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
620 else
621 vma_interval_tree_insert(vma, &mapping->i_mmap);
622 flush_dcache_mmap_unlock(mapping);
623 }
624}
625
626static void
627__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
628 struct vm_area_struct *prev, struct rb_node **rb_link,
629 struct rb_node *rb_parent)
630{
631 __vma_link_list(mm, vma, prev, rb_parent);
632 __vma_link_rb(mm, vma, rb_link, rb_parent);
633}
634
635static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
636 struct vm_area_struct *prev, struct rb_node **rb_link,
637 struct rb_node *rb_parent)
638{
639 struct address_space *mapping = NULL;
640
641 if (vma->vm_file)
642 mapping = vma->vm_file->f_mapping;
643
644 if (mapping)
645 mutex_lock(&mapping->i_mmap_mutex);
646
647 __vma_link(mm, vma, prev, rb_link, rb_parent);
648 __vma_link_file(vma);
649
650 if (mapping)
651 mutex_unlock(&mapping->i_mmap_mutex);
652
653 mm->map_count++;
654 validate_mm(mm);
655}
656
657
658
659
660
661static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
662{
663 struct vm_area_struct *prev;
664 struct rb_node **rb_link, *rb_parent;
665
666 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
667 &prev, &rb_link, &rb_parent))
668 BUG();
669 __vma_link(mm, vma, prev, rb_link, rb_parent);
670 mm->map_count++;
671}
672
673static inline void
674__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
675 struct vm_area_struct *prev)
676{
677 struct vm_area_struct *next;
678
679 vma_rb_erase(vma, &mm->mm_rb);
680 prev->vm_next = next = vma->vm_next;
681 if (next)
682 next->vm_prev = prev;
683 if (mm->mmap_cache == vma)
684 mm->mmap_cache = prev;
685}
686
687
688
689
690
691
692
693
694int vma_adjust(struct vm_area_struct *vma, unsigned long start,
695 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
696{
697 struct mm_struct *mm = vma->vm_mm;
698 struct vm_area_struct *next = vma->vm_next;
699 struct vm_area_struct *importer = NULL;
700 struct address_space *mapping = NULL;
701 struct rb_root *root = NULL;
702 struct anon_vma *anon_vma = NULL;
703 struct file *file = vma->vm_file;
704 bool start_changed = false, end_changed = false;
705 long adjust_next = 0;
706 int remove_next = 0;
707
708 if (next && !insert) {
709 struct vm_area_struct *exporter = NULL;
710
711 if (end >= next->vm_end) {
712
713
714
715
716again: remove_next = 1 + (end > next->vm_end);
717 end = next->vm_end;
718 exporter = next;
719 importer = vma;
720 } else if (end > next->vm_start) {
721
722
723
724
725 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
726 exporter = next;
727 importer = vma;
728 } else if (end < vma->vm_end) {
729
730
731
732
733
734 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
735 exporter = vma;
736 importer = next;
737 }
738
739
740
741
742
743
744 if (exporter && exporter->anon_vma && !importer->anon_vma) {
745 if (anon_vma_clone(importer, exporter))
746 return -ENOMEM;
747 importer->anon_vma = exporter->anon_vma;
748 }
749 }
750
751 if (file) {
752 mapping = file->f_mapping;
753 if (!(vma->vm_flags & VM_NONLINEAR)) {
754 root = &mapping->i_mmap;
755 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
756
757 if (adjust_next)
758 uprobe_munmap(next, next->vm_start,
759 next->vm_end);
760 }
761
762 mutex_lock(&mapping->i_mmap_mutex);
763 if (insert) {
764
765
766
767
768
769
770 __vma_link_file(insert);
771 }
772 }
773
774 vma_adjust_trans_huge(vma, start, end, adjust_next);
775
776 anon_vma = vma->anon_vma;
777 if (!anon_vma && adjust_next)
778 anon_vma = next->anon_vma;
779 if (anon_vma) {
780 VM_BUG_ON(adjust_next && next->anon_vma &&
781 anon_vma != next->anon_vma);
782 anon_vma_lock_write(anon_vma);
783 anon_vma_interval_tree_pre_update_vma(vma);
784 if (adjust_next)
785 anon_vma_interval_tree_pre_update_vma(next);
786 }
787
788 if (root) {
789 flush_dcache_mmap_lock(mapping);
790 vma_interval_tree_remove(vma, root);
791 if (adjust_next)
792 vma_interval_tree_remove(next, root);
793 }
794
795 if (start != vma->vm_start) {
796 vma->vm_start = start;
797 start_changed = true;
798 }
799 if (end != vma->vm_end) {
800 vma->vm_end = end;
801 end_changed = true;
802 }
803 vma->vm_pgoff = pgoff;
804 if (adjust_next) {
805 next->vm_start += adjust_next << PAGE_SHIFT;
806 next->vm_pgoff += adjust_next;
807 }
808
809 if (root) {
810 if (adjust_next)
811 vma_interval_tree_insert(next, root);
812 vma_interval_tree_insert(vma, root);
813 flush_dcache_mmap_unlock(mapping);
814 }
815
816 if (remove_next) {
817
818
819
820
821 __vma_unlink(mm, next, vma);
822 if (file)
823 __remove_shared_vm_struct(next, file, mapping);
824 } else if (insert) {
825
826
827
828
829
830 __insert_vm_struct(mm, insert);
831 } else {
832 if (start_changed)
833 vma_gap_update(vma);
834 if (end_changed) {
835 if (!next)
836 mm->highest_vm_end = end;
837 else if (!adjust_next)
838 vma_gap_update(next);
839 }
840 }
841
842 if (anon_vma) {
843 anon_vma_interval_tree_post_update_vma(vma);
844 if (adjust_next)
845 anon_vma_interval_tree_post_update_vma(next);
846 anon_vma_unlock_write(anon_vma);
847 }
848 if (mapping)
849 mutex_unlock(&mapping->i_mmap_mutex);
850
851 if (root) {
852 uprobe_mmap(vma);
853
854 if (adjust_next)
855 uprobe_mmap(next);
856 }
857
858 if (remove_next) {
859 if (file) {
860 uprobe_munmap(next, next->vm_start, next->vm_end);
861 fput(file);
862 }
863 if (next->anon_vma)
864 anon_vma_merge(vma, next);
865 mm->map_count--;
866 mpol_put(vma_policy(next));
867 kmem_cache_free(vm_area_cachep, next);
868
869
870
871
872
873 next = vma->vm_next;
874 if (remove_next == 2)
875 goto again;
876 else if (next)
877 vma_gap_update(next);
878 else
879 mm->highest_vm_end = end;
880 }
881 if (insert && file)
882 uprobe_mmap(insert);
883
884 validate_mm(mm);
885
886 return 0;
887}
888
889
890
891
892
893static inline int is_mergeable_vma(struct vm_area_struct *vma,
894 struct file *file, unsigned long vm_flags)
895{
896 if (vma->vm_flags ^ vm_flags)
897 return 0;
898 if (vma->vm_file != file)
899 return 0;
900 if (vma->vm_ops && vma->vm_ops->close)
901 return 0;
902 return 1;
903}
904
905static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
906 struct anon_vma *anon_vma2,
907 struct vm_area_struct *vma)
908{
909
910
911
912
913 if ((!anon_vma1 || !anon_vma2) && (!vma ||
914 list_is_singular(&vma->anon_vma_chain)))
915 return 1;
916 return anon_vma1 == anon_vma2;
917}
918
919
920
921
922
923
924
925
926
927
928
929
930static int
931can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
932 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
933{
934 if (is_mergeable_vma(vma, file, vm_flags) &&
935 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
936 if (vma->vm_pgoff == vm_pgoff)
937 return 1;
938 }
939 return 0;
940}
941
942
943
944
945
946
947
948
949static int
950can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
951 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
952{
953 if (is_mergeable_vma(vma, file, vm_flags) &&
954 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
955 pgoff_t vm_pglen;
956 vm_pglen = vma_pages(vma);
957 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
958 return 1;
959 }
960 return 0;
961}
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992struct vm_area_struct *vma_merge(struct mm_struct *mm,
993 struct vm_area_struct *prev, unsigned long addr,
994 unsigned long end, unsigned long vm_flags,
995 struct anon_vma *anon_vma, struct file *file,
996 pgoff_t pgoff, struct mempolicy *policy)
997{
998 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
999 struct vm_area_struct *area, *next;
1000 int err;
1001
1002
1003
1004
1005
1006 if (vm_flags & VM_SPECIAL)
1007 return NULL;
1008
1009 if (prev)
1010 next = prev->vm_next;
1011 else
1012 next = mm->mmap;
1013 area = next;
1014 if (next && next->vm_end == end)
1015 next = next->vm_next;
1016
1017
1018
1019
1020 if (prev && prev->vm_end == addr &&
1021 mpol_equal(vma_policy(prev), policy) &&
1022 can_vma_merge_after(prev, vm_flags,
1023 anon_vma, file, pgoff)) {
1024
1025
1026
1027 if (next && end == next->vm_start &&
1028 mpol_equal(policy, vma_policy(next)) &&
1029 can_vma_merge_before(next, vm_flags,
1030 anon_vma, file, pgoff+pglen) &&
1031 is_mergeable_anon_vma(prev->anon_vma,
1032 next->anon_vma, NULL)) {
1033
1034 err = vma_adjust(prev, prev->vm_start,
1035 next->vm_end, prev->vm_pgoff, NULL);
1036 } else
1037 err = vma_adjust(prev, prev->vm_start,
1038 end, prev->vm_pgoff, NULL);
1039 if (err)
1040 return NULL;
1041 khugepaged_enter_vma_merge(prev);
1042 return prev;
1043 }
1044
1045
1046
1047
1048 if (next && end == next->vm_start &&
1049 mpol_equal(policy, vma_policy(next)) &&
1050 can_vma_merge_before(next, vm_flags,
1051 anon_vma, file, pgoff+pglen)) {
1052 if (prev && addr < prev->vm_end)
1053 err = vma_adjust(prev, prev->vm_start,
1054 addr, prev->vm_pgoff, NULL);
1055 else
1056 err = vma_adjust(area, addr, next->vm_end,
1057 next->vm_pgoff - pglen, NULL);
1058 if (err)
1059 return NULL;
1060 khugepaged_enter_vma_merge(area);
1061 return area;
1062 }
1063
1064 return NULL;
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1081{
1082 return a->vm_end == b->vm_start &&
1083 mpol_equal(vma_policy(a), vma_policy(b)) &&
1084 a->vm_file == b->vm_file &&
1085 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
1086 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1087}
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1112{
1113 if (anon_vma_compatible(a, b)) {
1114 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
1115
1116 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1117 return anon_vma;
1118 }
1119 return NULL;
1120}
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1131{
1132 struct anon_vma *anon_vma;
1133 struct vm_area_struct *near;
1134
1135 near = vma->vm_next;
1136 if (!near)
1137 goto try_prev;
1138
1139 anon_vma = reusable_anon_vma(near, vma, near);
1140 if (anon_vma)
1141 return anon_vma;
1142try_prev:
1143 near = vma->vm_prev;
1144 if (!near)
1145 goto none;
1146
1147 anon_vma = reusable_anon_vma(near, near, vma);
1148 if (anon_vma)
1149 return anon_vma;
1150none:
1151
1152
1153
1154
1155
1156
1157
1158
1159 return NULL;
1160}
1161
1162#ifdef CONFIG_PROC_FS
1163void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1164 struct file *file, long pages)
1165{
1166 const unsigned long stack_flags
1167 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1168
1169 mm->total_vm += pages;
1170
1171 if (file) {
1172 mm->shared_vm += pages;
1173 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1174 mm->exec_vm += pages;
1175 } else if (flags & stack_flags)
1176 mm->stack_vm += pages;
1177}
1178#endif
1179
1180
1181
1182
1183
1184static inline unsigned long round_hint_to_min(unsigned long hint)
1185{
1186 hint &= PAGE_MASK;
1187 if (((void *)hint != NULL) &&
1188 (hint < mmap_min_addr))
1189 return PAGE_ALIGN(mmap_min_addr);
1190 return hint;
1191}
1192
1193
1194
1195
1196
1197unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1198 unsigned long len, unsigned long prot,
1199 unsigned long flags, unsigned long pgoff,
1200 unsigned long *populate)
1201{
1202 struct mm_struct * mm = current->mm;
1203 vm_flags_t vm_flags;
1204
1205 *populate = 0;
1206
1207
1208
1209
1210
1211
1212
1213 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1214 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
1215 prot |= PROT_EXEC;
1216
1217 if (!len)
1218 return -EINVAL;
1219
1220 if (!(flags & MAP_FIXED))
1221 addr = round_hint_to_min(addr);
1222
1223
1224 len = PAGE_ALIGN(len);
1225 if (!len)
1226 return -ENOMEM;
1227
1228
1229 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1230 return -EOVERFLOW;
1231
1232
1233 if (mm->map_count > sysctl_max_map_count)
1234 return -ENOMEM;
1235
1236
1237
1238
1239 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1240 if (addr & ~PAGE_MASK)
1241 return addr;
1242
1243
1244
1245
1246
1247 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1248 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1249
1250 if (flags & MAP_LOCKED)
1251 if (!can_do_mlock())
1252 return -EPERM;
1253
1254
1255 if (vm_flags & VM_LOCKED) {
1256 unsigned long locked, lock_limit;
1257 locked = len >> PAGE_SHIFT;
1258 locked += mm->locked_vm;
1259 lock_limit = rlimit(RLIMIT_MEMLOCK);
1260 lock_limit >>= PAGE_SHIFT;
1261 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1262 return -EAGAIN;
1263 }
1264
1265 if (file) {
1266 struct inode *inode = file_inode(file);
1267
1268 switch (flags & MAP_TYPE) {
1269 case MAP_SHARED:
1270 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1271 return -EACCES;
1272
1273
1274
1275
1276
1277 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1278 return -EACCES;
1279
1280
1281
1282
1283 if (locks_verify_locked(inode))
1284 return -EAGAIN;
1285
1286 vm_flags |= VM_SHARED | VM_MAYSHARE;
1287 if (!(file->f_mode & FMODE_WRITE))
1288 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1289
1290
1291 case MAP_PRIVATE:
1292 if (!(file->f_mode & FMODE_READ))
1293 return -EACCES;
1294 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1295 if (vm_flags & VM_EXEC)
1296 return -EPERM;
1297 vm_flags &= ~VM_MAYEXEC;
1298 }
1299
1300 if (!file->f_op->mmap)
1301 return -ENODEV;
1302 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1303 return -EINVAL;
1304 break;
1305
1306 default:
1307 return -EINVAL;
1308 }
1309 } else {
1310 switch (flags & MAP_TYPE) {
1311 case MAP_SHARED:
1312 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1313 return -EINVAL;
1314
1315
1316
1317 pgoff = 0;
1318 vm_flags |= VM_SHARED | VM_MAYSHARE;
1319 break;
1320 case MAP_PRIVATE:
1321
1322
1323
1324 pgoff = addr >> PAGE_SHIFT;
1325 break;
1326 default:
1327 return -EINVAL;
1328 }
1329 }
1330
1331
1332
1333
1334
1335 if (flags & MAP_NORESERVE) {
1336
1337 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1338 vm_flags |= VM_NORESERVE;
1339
1340
1341 if (file && is_file_hugepages(file))
1342 vm_flags |= VM_NORESERVE;
1343 }
1344
1345 addr = mmap_region(file, addr, len, vm_flags, pgoff);
1346 if (!IS_ERR_VALUE(addr) &&
1347 ((vm_flags & VM_LOCKED) ||
1348 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1349 *populate = len;
1350 return addr;
1351}
1352
1353SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1354 unsigned long, prot, unsigned long, flags,
1355 unsigned long, fd, unsigned long, pgoff)
1356{
1357 struct file *file = NULL;
1358 unsigned long retval = -EBADF;
1359
1360 if (!(flags & MAP_ANONYMOUS)) {
1361 audit_mmap_fd(fd, flags);
1362 file = fget(fd);
1363 if (!file)
1364 goto out;
1365 if (is_file_hugepages(file))
1366 len = ALIGN(len, huge_page_size(hstate_file(file)));
1367 retval = -EINVAL;
1368 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1369 goto out_fput;
1370 } else if (flags & MAP_HUGETLB) {
1371 struct user_struct *user = NULL;
1372 struct hstate *hs;
1373
1374 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
1375 if (!hs)
1376 return -EINVAL;
1377
1378 len = ALIGN(len, huge_page_size(hs));
1379
1380
1381
1382
1383
1384
1385 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1386 VM_NORESERVE,
1387 &user, HUGETLB_ANONHUGE_INODE,
1388 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1389 if (IS_ERR(file))
1390 return PTR_ERR(file);
1391 }
1392
1393 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1394
1395 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1396out_fput:
1397 if (file)
1398 fput(file);
1399out:
1400 return retval;
1401}
1402
1403#ifdef __ARCH_WANT_SYS_OLD_MMAP
1404struct mmap_arg_struct {
1405 unsigned long addr;
1406 unsigned long len;
1407 unsigned long prot;
1408 unsigned long flags;
1409 unsigned long fd;
1410 unsigned long offset;
1411};
1412
1413SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1414{
1415 struct mmap_arg_struct a;
1416
1417 if (copy_from_user(&a, arg, sizeof(a)))
1418 return -EFAULT;
1419 if (a.offset & ~PAGE_MASK)
1420 return -EINVAL;
1421
1422 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1423 a.offset >> PAGE_SHIFT);
1424}
1425#endif
1426
1427
1428
1429
1430
1431
1432
1433int vma_wants_writenotify(struct vm_area_struct *vma)
1434{
1435 vm_flags_t vm_flags = vma->vm_flags;
1436
1437
1438 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1439 return 0;
1440
1441
1442 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1443 return 1;
1444
1445
1446 if (pgprot_val(vma->vm_page_prot) !=
1447 pgprot_val(vm_get_page_prot(vm_flags)))
1448 return 0;
1449
1450
1451 if (vm_flags & VM_PFNMAP)
1452 return 0;
1453
1454
1455 return vma->vm_file && vma->vm_file->f_mapping &&
1456 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1457}
1458
1459
1460
1461
1462
1463static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1464{
1465
1466
1467
1468
1469 if (file && is_file_hugepages(file))
1470 return 0;
1471
1472 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1473}
1474
1475unsigned long mmap_region(struct file *file, unsigned long addr,
1476 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
1477{
1478 struct mm_struct *mm = current->mm;
1479 struct vm_area_struct *vma, *prev;
1480 int error;
1481 struct rb_node **rb_link, *rb_parent;
1482 unsigned long charged = 0;
1483
1484
1485 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1486 unsigned long nr_pages;
1487
1488
1489
1490
1491
1492 if (!(vm_flags & MAP_FIXED))
1493 return -ENOMEM;
1494
1495 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1496
1497 if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1498 return -ENOMEM;
1499 }
1500
1501
1502 error = -ENOMEM;
1503munmap_back:
1504 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
1505 if (do_munmap(mm, addr, len))
1506 return -ENOMEM;
1507 goto munmap_back;
1508 }
1509
1510
1511
1512
1513 if (accountable_mapping(file, vm_flags)) {
1514 charged = len >> PAGE_SHIFT;
1515 if (security_vm_enough_memory_mm(mm, charged))
1516 return -ENOMEM;
1517 vm_flags |= VM_ACCOUNT;
1518 }
1519
1520
1521
1522
1523 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1524 if (vma)
1525 goto out;
1526
1527
1528
1529
1530
1531
1532 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1533 if (!vma) {
1534 error = -ENOMEM;
1535 goto unacct_error;
1536 }
1537
1538 vma->vm_mm = mm;
1539 vma->vm_start = addr;
1540 vma->vm_end = addr + len;
1541 vma->vm_flags = vm_flags;
1542 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1543 vma->vm_pgoff = pgoff;
1544 INIT_LIST_HEAD(&vma->anon_vma_chain);
1545
1546 if (file) {
1547 if (vm_flags & VM_DENYWRITE) {
1548 error = deny_write_access(file);
1549 if (error)
1550 goto free_vma;
1551 }
1552 vma->vm_file = get_file(file);
1553 error = file->f_op->mmap(file, vma);
1554 if (error)
1555 goto unmap_and_free_vma;
1556
1557
1558
1559
1560
1561
1562
1563
1564 WARN_ON_ONCE(addr != vma->vm_start);
1565
1566 addr = vma->vm_start;
1567 vm_flags = vma->vm_flags;
1568 } else if (vm_flags & VM_SHARED) {
1569 error = shmem_zero_setup(vma);
1570 if (error)
1571 goto free_vma;
1572 }
1573
1574 if (vma_wants_writenotify(vma)) {
1575 pgprot_t pprot = vma->vm_page_prot;
1576
1577
1578
1579
1580
1581
1582
1583
1584 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1585 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1586 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1587 }
1588
1589 vma_link(mm, vma, prev, rb_link, rb_parent);
1590
1591 if (vm_flags & VM_DENYWRITE)
1592 allow_write_access(file);
1593 file = vma->vm_file;
1594out:
1595 perf_event_mmap(vma);
1596
1597 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1598 if (vm_flags & VM_LOCKED) {
1599 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1600 vma == get_gate_vma(current->mm)))
1601 mm->locked_vm += (len >> PAGE_SHIFT);
1602 else
1603 vma->vm_flags &= ~VM_LOCKED;
1604 }
1605
1606 if (file)
1607 uprobe_mmap(vma);
1608
1609
1610
1611
1612
1613
1614
1615
1616 vma->vm_flags |= VM_SOFTDIRTY;
1617
1618 return addr;
1619
1620unmap_and_free_vma:
1621 if (vm_flags & VM_DENYWRITE)
1622 allow_write_access(file);
1623 vma->vm_file = NULL;
1624 fput(file);
1625
1626
1627 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1628 charged = 0;
1629free_vma:
1630 kmem_cache_free(vm_area_cachep, vma);
1631unacct_error:
1632 if (charged)
1633 vm_unacct_memory(charged);
1634 return error;
1635}
1636
1637unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1638{
1639
1640
1641
1642
1643
1644
1645
1646
1647 struct mm_struct *mm = current->mm;
1648 struct vm_area_struct *vma;
1649 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1650
1651
1652 length = info->length + info->align_mask;
1653 if (length < info->length)
1654 return -ENOMEM;
1655
1656
1657 if (info->high_limit < length)
1658 return -ENOMEM;
1659 high_limit = info->high_limit - length;
1660
1661 if (info->low_limit > high_limit)
1662 return -ENOMEM;
1663 low_limit = info->low_limit + length;
1664
1665
1666 if (RB_EMPTY_ROOT(&mm->mm_rb))
1667 goto check_highest;
1668 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1669 if (vma->rb_subtree_gap < length)
1670 goto check_highest;
1671
1672 while (true) {
1673
1674 gap_end = vma->vm_start;
1675 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1676 struct vm_area_struct *left =
1677 rb_entry(vma->vm_rb.rb_left,
1678 struct vm_area_struct, vm_rb);
1679 if (left->rb_subtree_gap >= length) {
1680 vma = left;
1681 continue;
1682 }
1683 }
1684
1685 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1686check_current:
1687
1688 if (gap_start > high_limit)
1689 return -ENOMEM;
1690 if (gap_end >= low_limit && gap_end - gap_start >= length)
1691 goto found;
1692
1693
1694 if (vma->vm_rb.rb_right) {
1695 struct vm_area_struct *right =
1696 rb_entry(vma->vm_rb.rb_right,
1697 struct vm_area_struct, vm_rb);
1698 if (right->rb_subtree_gap >= length) {
1699 vma = right;
1700 continue;
1701 }
1702 }
1703
1704
1705 while (true) {
1706 struct rb_node *prev = &vma->vm_rb;
1707 if (!rb_parent(prev))
1708 goto check_highest;
1709 vma = rb_entry(rb_parent(prev),
1710 struct vm_area_struct, vm_rb);
1711 if (prev == vma->vm_rb.rb_left) {
1712 gap_start = vma->vm_prev->vm_end;
1713 gap_end = vma->vm_start;
1714 goto check_current;
1715 }
1716 }
1717 }
1718
1719check_highest:
1720
1721 gap_start = mm->highest_vm_end;
1722 gap_end = ULONG_MAX;
1723 if (gap_start > high_limit)
1724 return -ENOMEM;
1725
1726found:
1727
1728 if (gap_start < info->low_limit)
1729 gap_start = info->low_limit;
1730
1731
1732 gap_start += (info->align_offset - gap_start) & info->align_mask;
1733
1734 VM_BUG_ON(gap_start + info->length > info->high_limit);
1735 VM_BUG_ON(gap_start + info->length > gap_end);
1736 return gap_start;
1737}
1738
1739unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1740{
1741 struct mm_struct *mm = current->mm;
1742 struct vm_area_struct *vma;
1743 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1744
1745
1746 length = info->length + info->align_mask;
1747 if (length < info->length)
1748 return -ENOMEM;
1749
1750
1751
1752
1753
1754 gap_end = info->high_limit;
1755 if (gap_end < length)
1756 return -ENOMEM;
1757 high_limit = gap_end - length;
1758
1759 if (info->low_limit > high_limit)
1760 return -ENOMEM;
1761 low_limit = info->low_limit + length;
1762
1763
1764 gap_start = mm->highest_vm_end;
1765 if (gap_start <= high_limit)
1766 goto found_highest;
1767
1768
1769 if (RB_EMPTY_ROOT(&mm->mm_rb))
1770 return -ENOMEM;
1771 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1772 if (vma->rb_subtree_gap < length)
1773 return -ENOMEM;
1774
1775 while (true) {
1776
1777 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1778 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1779 struct vm_area_struct *right =
1780 rb_entry(vma->vm_rb.rb_right,
1781 struct vm_area_struct, vm_rb);
1782 if (right->rb_subtree_gap >= length) {
1783 vma = right;
1784 continue;
1785 }
1786 }
1787
1788check_current:
1789
1790 gap_end = vma->vm_start;
1791 if (gap_end < low_limit)
1792 return -ENOMEM;
1793 if (gap_start <= high_limit && gap_end - gap_start >= length)
1794 goto found;
1795
1796
1797 if (vma->vm_rb.rb_left) {
1798 struct vm_area_struct *left =
1799 rb_entry(vma->vm_rb.rb_left,
1800 struct vm_area_struct, vm_rb);
1801 if (left->rb_subtree_gap >= length) {
1802 vma = left;
1803 continue;
1804 }
1805 }
1806
1807
1808 while (true) {
1809 struct rb_node *prev = &vma->vm_rb;
1810 if (!rb_parent(prev))
1811 return -ENOMEM;
1812 vma = rb_entry(rb_parent(prev),
1813 struct vm_area_struct, vm_rb);
1814 if (prev == vma->vm_rb.rb_right) {
1815 gap_start = vma->vm_prev ?
1816 vma->vm_prev->vm_end : 0;
1817 goto check_current;
1818 }
1819 }
1820 }
1821
1822found:
1823
1824 if (gap_end > info->high_limit)
1825 gap_end = info->high_limit;
1826
1827found_highest:
1828
1829 gap_end -= info->length;
1830 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1831
1832 VM_BUG_ON(gap_end < info->low_limit);
1833 VM_BUG_ON(gap_end < gap_start);
1834 return gap_end;
1835}
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848#ifndef HAVE_ARCH_UNMAPPED_AREA
1849unsigned long
1850arch_get_unmapped_area(struct file *filp, unsigned long addr,
1851 unsigned long len, unsigned long pgoff, unsigned long flags)
1852{
1853 struct mm_struct *mm = current->mm;
1854 struct vm_area_struct *vma;
1855 struct vm_unmapped_area_info info;
1856
1857 if (len > TASK_SIZE - mmap_min_addr)
1858 return -ENOMEM;
1859
1860 if (flags & MAP_FIXED)
1861 return addr;
1862
1863 if (addr) {
1864 addr = PAGE_ALIGN(addr);
1865 vma = find_vma(mm, addr);
1866 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1867 (!vma || addr + len <= vma->vm_start))
1868 return addr;
1869 }
1870
1871 info.flags = 0;
1872 info.length = len;
1873 info.low_limit = mm->mmap_base;
1874 info.high_limit = TASK_SIZE;
1875 info.align_mask = 0;
1876 return vm_unmapped_area(&info);
1877}
1878#endif
1879
1880
1881
1882
1883
1884#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1885unsigned long
1886arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1887 const unsigned long len, const unsigned long pgoff,
1888 const unsigned long flags)
1889{
1890 struct vm_area_struct *vma;
1891 struct mm_struct *mm = current->mm;
1892 unsigned long addr = addr0;
1893 struct vm_unmapped_area_info info;
1894
1895
1896 if (len > TASK_SIZE - mmap_min_addr)
1897 return -ENOMEM;
1898
1899 if (flags & MAP_FIXED)
1900 return addr;
1901
1902
1903 if (addr) {
1904 addr = PAGE_ALIGN(addr);
1905 vma = find_vma(mm, addr);
1906 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1907 (!vma || addr + len <= vma->vm_start))
1908 return addr;
1909 }
1910
1911 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
1912 info.length = len;
1913 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
1914 info.high_limit = mm->mmap_base;
1915 info.align_mask = 0;
1916 addr = vm_unmapped_area(&info);
1917
1918
1919
1920
1921
1922
1923
1924 if (addr & ~PAGE_MASK) {
1925 VM_BUG_ON(addr != -ENOMEM);
1926 info.flags = 0;
1927 info.low_limit = TASK_UNMAPPED_BASE;
1928 info.high_limit = TASK_SIZE;
1929 addr = vm_unmapped_area(&info);
1930 }
1931
1932 return addr;
1933}
1934#endif
1935
1936unsigned long
1937get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1938 unsigned long pgoff, unsigned long flags)
1939{
1940 unsigned long (*get_area)(struct file *, unsigned long,
1941 unsigned long, unsigned long, unsigned long);
1942
1943 unsigned long error = arch_mmap_check(addr, len, flags);
1944 if (error)
1945 return error;
1946
1947
1948 if (len > TASK_SIZE)
1949 return -ENOMEM;
1950
1951 get_area = current->mm->get_unmapped_area;
1952 if (file && file->f_op->get_unmapped_area)
1953 get_area = file->f_op->get_unmapped_area;
1954 addr = get_area(file, addr, len, pgoff, flags);
1955 if (IS_ERR_VALUE(addr))
1956 return addr;
1957
1958 if (addr > TASK_SIZE - len)
1959 return -ENOMEM;
1960 if (addr & ~PAGE_MASK)
1961 return -EINVAL;
1962
1963 addr = arch_rebalance_pgtables(addr, len);
1964 error = security_mmap_addr(addr);
1965 return error ? error : addr;
1966}
1967
1968EXPORT_SYMBOL(get_unmapped_area);
1969
1970
1971struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1972{
1973 struct vm_area_struct *vma = NULL;
1974
1975
1976
1977 vma = ACCESS_ONCE(mm->mmap_cache);
1978 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1979 struct rb_node *rb_node;
1980
1981 rb_node = mm->mm_rb.rb_node;
1982 vma = NULL;
1983
1984 while (rb_node) {
1985 struct vm_area_struct *vma_tmp;
1986
1987 vma_tmp = rb_entry(rb_node,
1988 struct vm_area_struct, vm_rb);
1989
1990 if (vma_tmp->vm_end > addr) {
1991 vma = vma_tmp;
1992 if (vma_tmp->vm_start <= addr)
1993 break;
1994 rb_node = rb_node->rb_left;
1995 } else
1996 rb_node = rb_node->rb_right;
1997 }
1998 if (vma)
1999 mm->mmap_cache = vma;
2000 }
2001 return vma;
2002}
2003
2004EXPORT_SYMBOL(find_vma);
2005
2006
2007
2008
2009struct vm_area_struct *
2010find_vma_prev(struct mm_struct *mm, unsigned long addr,
2011 struct vm_area_struct **pprev)
2012{
2013 struct vm_area_struct *vma;
2014
2015 vma = find_vma(mm, addr);
2016 if (vma) {
2017 *pprev = vma->vm_prev;
2018 } else {
2019 struct rb_node *rb_node = mm->mm_rb.rb_node;
2020 *pprev = NULL;
2021 while (rb_node) {
2022 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2023 rb_node = rb_node->rb_right;
2024 }
2025 }
2026 return vma;
2027}
2028
2029
2030
2031
2032
2033
2034static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
2035{
2036 struct mm_struct *mm = vma->vm_mm;
2037 struct rlimit *rlim = current->signal->rlim;
2038 unsigned long new_start;
2039
2040
2041 if (!may_expand_vm(mm, grow))
2042 return -ENOMEM;
2043
2044
2045 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2046 return -ENOMEM;
2047
2048
2049 if (vma->vm_flags & VM_LOCKED) {
2050 unsigned long locked;
2051 unsigned long limit;
2052 locked = mm->locked_vm + grow;
2053 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2054 limit >>= PAGE_SHIFT;
2055 if (locked > limit && !capable(CAP_IPC_LOCK))
2056 return -ENOMEM;
2057 }
2058
2059
2060 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2061 vma->vm_end - size;
2062 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2063 return -EFAULT;
2064
2065
2066
2067
2068
2069 if (security_vm_enough_memory_mm(mm, grow))
2070 return -ENOMEM;
2071
2072
2073 if (vma->vm_flags & VM_LOCKED)
2074 mm->locked_vm += grow;
2075 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2076 return 0;
2077}
2078
2079#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2080
2081
2082
2083
2084int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2085{
2086 int error;
2087
2088 if (!(vma->vm_flags & VM_GROWSUP))
2089 return -EFAULT;
2090
2091
2092
2093
2094
2095 if (unlikely(anon_vma_prepare(vma)))
2096 return -ENOMEM;
2097 vma_lock_anon_vma(vma);
2098
2099
2100
2101
2102
2103
2104
2105 if (address < PAGE_ALIGN(address+4))
2106 address = PAGE_ALIGN(address+4);
2107 else {
2108 vma_unlock_anon_vma(vma);
2109 return -ENOMEM;
2110 }
2111 error = 0;
2112
2113
2114 if (address > vma->vm_end) {
2115 unsigned long size, grow;
2116
2117 size = address - vma->vm_start;
2118 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2119
2120 error = -ENOMEM;
2121 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2122 error = acct_stack_growth(vma, size, grow);
2123 if (!error) {
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135 spin_lock(&vma->vm_mm->page_table_lock);
2136 anon_vma_interval_tree_pre_update_vma(vma);
2137 vma->vm_end = address;
2138 anon_vma_interval_tree_post_update_vma(vma);
2139 if (vma->vm_next)
2140 vma_gap_update(vma->vm_next);
2141 else
2142 vma->vm_mm->highest_vm_end = address;
2143 spin_unlock(&vma->vm_mm->page_table_lock);
2144
2145 perf_event_mmap(vma);
2146 }
2147 }
2148 }
2149 vma_unlock_anon_vma(vma);
2150 khugepaged_enter_vma_merge(vma);
2151 validate_mm(vma->vm_mm);
2152 return error;
2153}
2154#endif
2155
2156
2157
2158
2159int expand_downwards(struct vm_area_struct *vma,
2160 unsigned long address)
2161{
2162 int error;
2163
2164
2165
2166
2167
2168 if (unlikely(anon_vma_prepare(vma)))
2169 return -ENOMEM;
2170
2171 address &= PAGE_MASK;
2172 error = security_mmap_addr(address);
2173 if (error)
2174 return error;
2175
2176 vma_lock_anon_vma(vma);
2177
2178
2179
2180
2181
2182
2183
2184
2185 if (address < vma->vm_start) {
2186 unsigned long size, grow;
2187
2188 size = vma->vm_end - address;
2189 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2190
2191 error = -ENOMEM;
2192 if (grow <= vma->vm_pgoff) {
2193 error = acct_stack_growth(vma, size, grow);
2194 if (!error) {
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206 spin_lock(&vma->vm_mm->page_table_lock);
2207 anon_vma_interval_tree_pre_update_vma(vma);
2208 vma->vm_start = address;
2209 vma->vm_pgoff -= grow;
2210 anon_vma_interval_tree_post_update_vma(vma);
2211 vma_gap_update(vma);
2212 spin_unlock(&vma->vm_mm->page_table_lock);
2213
2214 perf_event_mmap(vma);
2215 }
2216 }
2217 }
2218 vma_unlock_anon_vma(vma);
2219 khugepaged_enter_vma_merge(vma);
2220 validate_mm(vma->vm_mm);
2221 return error;
2222}
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235#ifdef CONFIG_STACK_GROWSUP
2236int expand_stack(struct vm_area_struct *vma, unsigned long address)
2237{
2238 struct vm_area_struct *next;
2239
2240 address &= PAGE_MASK;
2241 next = vma->vm_next;
2242 if (next && next->vm_start == address + PAGE_SIZE) {
2243 if (!(next->vm_flags & VM_GROWSUP))
2244 return -ENOMEM;
2245 }
2246 return expand_upwards(vma, address);
2247}
2248
2249struct vm_area_struct *
2250find_extend_vma(struct mm_struct *mm, unsigned long addr)
2251{
2252 struct vm_area_struct *vma, *prev;
2253
2254 addr &= PAGE_MASK;
2255 vma = find_vma_prev(mm, addr, &prev);
2256 if (vma && (vma->vm_start <= addr))
2257 return vma;
2258 if (!prev || expand_stack(prev, addr))
2259 return NULL;
2260 if (prev->vm_flags & VM_LOCKED)
2261 __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
2262 return prev;
2263}
2264#else
2265int expand_stack(struct vm_area_struct *vma, unsigned long address)
2266{
2267 struct vm_area_struct *prev;
2268
2269 address &= PAGE_MASK;
2270 prev = vma->vm_prev;
2271 if (prev && prev->vm_end == address) {
2272 if (!(prev->vm_flags & VM_GROWSDOWN))
2273 return -ENOMEM;
2274 }
2275 return expand_downwards(vma, address);
2276}
2277
2278struct vm_area_struct *
2279find_extend_vma(struct mm_struct * mm, unsigned long addr)
2280{
2281 struct vm_area_struct * vma;
2282 unsigned long start;
2283
2284 addr &= PAGE_MASK;
2285 vma = find_vma(mm,addr);
2286 if (!vma)
2287 return NULL;
2288 if (vma->vm_start <= addr)
2289 return vma;
2290 if (!(vma->vm_flags & VM_GROWSDOWN))
2291 return NULL;
2292 start = vma->vm_start;
2293 if (expand_stack(vma, addr))
2294 return NULL;
2295 if (vma->vm_flags & VM_LOCKED)
2296 __mlock_vma_pages_range(vma, addr, start, NULL);
2297 return vma;
2298}
2299#endif
2300
2301
2302
2303
2304
2305
2306
2307static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2308{
2309 unsigned long nr_accounted = 0;
2310
2311
2312 update_hiwater_vm(mm);
2313 do {
2314 long nrpages = vma_pages(vma);
2315
2316 if (vma->vm_flags & VM_ACCOUNT)
2317 nr_accounted += nrpages;
2318 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2319 vma = remove_vma(vma);
2320 } while (vma);
2321 vm_unacct_memory(nr_accounted);
2322 validate_mm(mm);
2323}
2324
2325
2326
2327
2328
2329
2330static void unmap_region(struct mm_struct *mm,
2331 struct vm_area_struct *vma, struct vm_area_struct *prev,
2332 unsigned long start, unsigned long end)
2333{
2334 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
2335 struct mmu_gather tlb;
2336
2337 lru_add_drain();
2338 tlb_gather_mmu(&tlb, mm, start, end);
2339 update_hiwater_rss(mm);
2340 unmap_vmas(&tlb, vma, start, end);
2341 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2342 next ? next->vm_start : USER_PGTABLES_CEILING);
2343 tlb_finish_mmu(&tlb, start, end);
2344}
2345
2346
2347
2348
2349
2350static void
2351detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2352 struct vm_area_struct *prev, unsigned long end)
2353{
2354 struct vm_area_struct **insertion_point;
2355 struct vm_area_struct *tail_vma = NULL;
2356
2357 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2358 vma->vm_prev = NULL;
2359 do {
2360 vma_rb_erase(vma, &mm->mm_rb);
2361 mm->map_count--;
2362 tail_vma = vma;
2363 vma = vma->vm_next;
2364 } while (vma && vma->vm_start < end);
2365 *insertion_point = vma;
2366 if (vma) {
2367 vma->vm_prev = prev;
2368 vma_gap_update(vma);
2369 } else
2370 mm->highest_vm_end = prev ? prev->vm_end : 0;
2371 tail_vma->vm_next = NULL;
2372 mm->mmap_cache = NULL;
2373}
2374
2375
2376
2377
2378
2379static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2380 unsigned long addr, int new_below)
2381{
2382 struct vm_area_struct *new;
2383 int err = -ENOMEM;
2384
2385 if (is_vm_hugetlb_page(vma) && (addr &
2386 ~(huge_page_mask(hstate_vma(vma)))))
2387 return -EINVAL;
2388
2389 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2390 if (!new)
2391 goto out_err;
2392
2393
2394 *new = *vma;
2395
2396 INIT_LIST_HEAD(&new->anon_vma_chain);
2397
2398 if (new_below)
2399 new->vm_end = addr;
2400 else {
2401 new->vm_start = addr;
2402 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2403 }
2404
2405 err = vma_dup_policy(vma, new);
2406 if (err)
2407 goto out_free_vma;
2408
2409 if (anon_vma_clone(new, vma))
2410 goto out_free_mpol;
2411
2412 if (new->vm_file)
2413 get_file(new->vm_file);
2414
2415 if (new->vm_ops && new->vm_ops->open)
2416 new->vm_ops->open(new);
2417
2418 if (new_below)
2419 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2420 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2421 else
2422 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2423
2424
2425 if (!err)
2426 return 0;
2427
2428
2429 if (new->vm_ops && new->vm_ops->close)
2430 new->vm_ops->close(new);
2431 if (new->vm_file)
2432 fput(new->vm_file);
2433 unlink_anon_vmas(new);
2434 out_free_mpol:
2435 mpol_put(vma_policy(new));
2436 out_free_vma:
2437 kmem_cache_free(vm_area_cachep, new);
2438 out_err:
2439 return err;
2440}
2441
2442
2443
2444
2445
2446int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2447 unsigned long addr, int new_below)
2448{
2449 if (mm->map_count >= sysctl_max_map_count)
2450 return -ENOMEM;
2451
2452 return __split_vma(mm, vma, addr, new_below);
2453}
2454
2455
2456
2457
2458
2459
2460int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2461{
2462 unsigned long end;
2463 struct vm_area_struct *vma, *prev, *last;
2464
2465 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2466 return -EINVAL;
2467
2468 if ((len = PAGE_ALIGN(len)) == 0)
2469 return -EINVAL;
2470
2471
2472 vma = find_vma(mm, start);
2473 if (!vma)
2474 return 0;
2475 prev = vma->vm_prev;
2476
2477
2478
2479 end = start + len;
2480 if (vma->vm_start >= end)
2481 return 0;
2482
2483
2484
2485
2486
2487
2488
2489
2490 if (start > vma->vm_start) {
2491 int error;
2492
2493
2494
2495
2496
2497
2498 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2499 return -ENOMEM;
2500
2501 error = __split_vma(mm, vma, start, 0);
2502 if (error)
2503 return error;
2504 prev = vma;
2505 }
2506
2507
2508 last = find_vma(mm, end);
2509 if (last && end > last->vm_start) {
2510 int error = __split_vma(mm, last, end, 1);
2511 if (error)
2512 return error;
2513 }
2514 vma = prev? prev->vm_next: mm->mmap;
2515
2516
2517
2518
2519 if (mm->locked_vm) {
2520 struct vm_area_struct *tmp = vma;
2521 while (tmp && tmp->vm_start < end) {
2522 if (tmp->vm_flags & VM_LOCKED) {
2523 mm->locked_vm -= vma_pages(tmp);
2524 munlock_vma_pages_all(tmp);
2525 }
2526 tmp = tmp->vm_next;
2527 }
2528 }
2529
2530
2531
2532
2533 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2534 unmap_region(mm, vma, prev, start, end);
2535
2536
2537 remove_vma_list(mm, vma);
2538
2539 return 0;
2540}
2541
2542int vm_munmap(unsigned long start, size_t len)
2543{
2544 int ret;
2545 struct mm_struct *mm = current->mm;
2546
2547 down_write(&mm->mmap_sem);
2548 ret = do_munmap(mm, start, len);
2549 up_write(&mm->mmap_sem);
2550 return ret;
2551}
2552EXPORT_SYMBOL(vm_munmap);
2553
2554SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2555{
2556 profile_munmap(addr);
2557 return vm_munmap(addr, len);
2558}
2559
2560static inline void verify_mm_writelocked(struct mm_struct *mm)
2561{
2562#ifdef CONFIG_DEBUG_VM
2563 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2564 WARN_ON(1);
2565 up_read(&mm->mmap_sem);
2566 }
2567#endif
2568}
2569
2570
2571
2572
2573
2574
2575static unsigned long do_brk(unsigned long addr, unsigned long len)
2576{
2577 struct mm_struct * mm = current->mm;
2578 struct vm_area_struct * vma, * prev;
2579 unsigned long flags;
2580 struct rb_node ** rb_link, * rb_parent;
2581 pgoff_t pgoff = addr >> PAGE_SHIFT;
2582 int error;
2583
2584 len = PAGE_ALIGN(len);
2585 if (!len)
2586 return addr;
2587
2588 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2589
2590 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2591 if (error & ~PAGE_MASK)
2592 return error;
2593
2594
2595
2596
2597 if (mm->def_flags & VM_LOCKED) {
2598 unsigned long locked, lock_limit;
2599 locked = len >> PAGE_SHIFT;
2600 locked += mm->locked_vm;
2601 lock_limit = rlimit(RLIMIT_MEMLOCK);
2602 lock_limit >>= PAGE_SHIFT;
2603 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2604 return -EAGAIN;
2605 }
2606
2607
2608
2609
2610
2611 verify_mm_writelocked(mm);
2612
2613
2614
2615
2616 munmap_back:
2617 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
2618 if (do_munmap(mm, addr, len))
2619 return -ENOMEM;
2620 goto munmap_back;
2621 }
2622
2623
2624 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2625 return -ENOMEM;
2626
2627 if (mm->map_count > sysctl_max_map_count)
2628 return -ENOMEM;
2629
2630 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2631 return -ENOMEM;
2632
2633
2634 vma = vma_merge(mm, prev, addr, addr + len, flags,
2635 NULL, NULL, pgoff, NULL);
2636 if (vma)
2637 goto out;
2638
2639
2640
2641
2642 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2643 if (!vma) {
2644 vm_unacct_memory(len >> PAGE_SHIFT);
2645 return -ENOMEM;
2646 }
2647
2648 INIT_LIST_HEAD(&vma->anon_vma_chain);
2649 vma->vm_mm = mm;
2650 vma->vm_start = addr;
2651 vma->vm_end = addr + len;
2652 vma->vm_pgoff = pgoff;
2653 vma->vm_flags = flags;
2654 vma->vm_page_prot = vm_get_page_prot(flags);
2655 vma_link(mm, vma, prev, rb_link, rb_parent);
2656out:
2657 perf_event_mmap(vma);
2658 mm->total_vm += len >> PAGE_SHIFT;
2659 if (flags & VM_LOCKED)
2660 mm->locked_vm += (len >> PAGE_SHIFT);
2661 vma->vm_flags |= VM_SOFTDIRTY;
2662 return addr;
2663}
2664
2665unsigned long vm_brk(unsigned long addr, unsigned long len)
2666{
2667 struct mm_struct *mm = current->mm;
2668 unsigned long ret;
2669 bool populate;
2670
2671 down_write(&mm->mmap_sem);
2672 ret = do_brk(addr, len);
2673 populate = ((mm->def_flags & VM_LOCKED) != 0);
2674 up_write(&mm->mmap_sem);
2675 if (populate)
2676 mm_populate(addr, len);
2677 return ret;
2678}
2679EXPORT_SYMBOL(vm_brk);
2680
2681
2682void exit_mmap(struct mm_struct *mm)
2683{
2684 struct mmu_gather tlb;
2685 struct vm_area_struct *vma;
2686 unsigned long nr_accounted = 0;
2687
2688
2689 mmu_notifier_release(mm);
2690
2691 if (mm->locked_vm) {
2692 vma = mm->mmap;
2693 while (vma) {
2694 if (vma->vm_flags & VM_LOCKED)
2695 munlock_vma_pages_all(vma);
2696 vma = vma->vm_next;
2697 }
2698 }
2699
2700 arch_exit_mmap(mm);
2701
2702 vma = mm->mmap;
2703 if (!vma)
2704 return;
2705
2706 lru_add_drain();
2707 flush_cache_mm(mm);
2708 tlb_gather_mmu(&tlb, mm, 0, -1);
2709
2710
2711 unmap_vmas(&tlb, vma, 0, -1);
2712
2713 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
2714 tlb_finish_mmu(&tlb, 0, -1);
2715
2716
2717
2718
2719
2720 while (vma) {
2721 if (vma->vm_flags & VM_ACCOUNT)
2722 nr_accounted += vma_pages(vma);
2723 vma = remove_vma(vma);
2724 }
2725 vm_unacct_memory(nr_accounted);
2726
2727 WARN_ON(atomic_long_read(&mm->nr_ptes) >
2728 (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2729}
2730
2731
2732
2733
2734
2735int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2736{
2737 struct vm_area_struct *prev;
2738 struct rb_node **rb_link, *rb_parent;
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752 if (!vma->vm_file) {
2753 BUG_ON(vma->anon_vma);
2754 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2755 }
2756 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2757 &prev, &rb_link, &rb_parent))
2758 return -ENOMEM;
2759 if ((vma->vm_flags & VM_ACCOUNT) &&
2760 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2761 return -ENOMEM;
2762
2763 vma_link(mm, vma, prev, rb_link, rb_parent);
2764 return 0;
2765}
2766
2767
2768
2769
2770
2771struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2772 unsigned long addr, unsigned long len, pgoff_t pgoff,
2773 bool *need_rmap_locks)
2774{
2775 struct vm_area_struct *vma = *vmap;
2776 unsigned long vma_start = vma->vm_start;
2777 struct mm_struct *mm = vma->vm_mm;
2778 struct vm_area_struct *new_vma, *prev;
2779 struct rb_node **rb_link, *rb_parent;
2780 bool faulted_in_anon_vma = true;
2781
2782
2783
2784
2785
2786 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2787 pgoff = addr >> PAGE_SHIFT;
2788 faulted_in_anon_vma = false;
2789 }
2790
2791 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2792 return NULL;
2793 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2794 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2795 if (new_vma) {
2796
2797
2798
2799 if (unlikely(vma_start >= new_vma->vm_start &&
2800 vma_start < new_vma->vm_end)) {
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813 VM_BUG_ON(faulted_in_anon_vma);
2814 *vmap = vma = new_vma;
2815 }
2816 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2817 } else {
2818 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2819 if (new_vma) {
2820 *new_vma = *vma;
2821 new_vma->vm_start = addr;
2822 new_vma->vm_end = addr + len;
2823 new_vma->vm_pgoff = pgoff;
2824 if (vma_dup_policy(vma, new_vma))
2825 goto out_free_vma;
2826 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2827 if (anon_vma_clone(new_vma, vma))
2828 goto out_free_mempol;
2829 if (new_vma->vm_file)
2830 get_file(new_vma->vm_file);
2831 if (new_vma->vm_ops && new_vma->vm_ops->open)
2832 new_vma->vm_ops->open(new_vma);
2833 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2834 *need_rmap_locks = false;
2835 }
2836 }
2837 return new_vma;
2838
2839 out_free_mempol:
2840 mpol_put(vma_policy(new_vma));
2841 out_free_vma:
2842 kmem_cache_free(vm_area_cachep, new_vma);
2843 return NULL;
2844}
2845
2846
2847
2848
2849
2850int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2851{
2852 unsigned long cur = mm->total_vm;
2853 unsigned long lim;
2854
2855 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2856
2857 if (cur + npages > lim)
2858 return 0;
2859 return 1;
2860}
2861
2862
2863static int special_mapping_fault(struct vm_area_struct *vma,
2864 struct vm_fault *vmf)
2865{
2866 pgoff_t pgoff;
2867 struct page **pages;
2868
2869
2870
2871
2872
2873
2874
2875 pgoff = vmf->pgoff - vma->vm_pgoff;
2876
2877 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2878 pgoff--;
2879
2880 if (*pages) {
2881 struct page *page = *pages;
2882 get_page(page);
2883 vmf->page = page;
2884 return 0;
2885 }
2886
2887 return VM_FAULT_SIGBUS;
2888}
2889
2890
2891
2892
2893static void special_mapping_close(struct vm_area_struct *vma)
2894{
2895}
2896
2897static const struct vm_operations_struct special_mapping_vmops = {
2898 .close = special_mapping_close,
2899 .fault = special_mapping_fault,
2900};
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911int install_special_mapping(struct mm_struct *mm,
2912 unsigned long addr, unsigned long len,
2913 unsigned long vm_flags, struct page **pages)
2914{
2915 int ret;
2916 struct vm_area_struct *vma;
2917
2918 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2919 if (unlikely(vma == NULL))
2920 return -ENOMEM;
2921
2922 INIT_LIST_HEAD(&vma->anon_vma_chain);
2923 vma->vm_mm = mm;
2924 vma->vm_start = addr;
2925 vma->vm_end = addr + len;
2926
2927 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
2928 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2929
2930 vma->vm_ops = &special_mapping_vmops;
2931 vma->vm_private_data = pages;
2932
2933 ret = insert_vm_struct(mm, vma);
2934 if (ret)
2935 goto out;
2936
2937 mm->total_vm += len >> PAGE_SHIFT;
2938
2939 perf_event_mmap(vma);
2940
2941 return 0;
2942
2943out:
2944 kmem_cache_free(vm_area_cachep, vma);
2945 return ret;
2946}
2947
2948static DEFINE_MUTEX(mm_all_locks_mutex);
2949
2950static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2951{
2952 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2953
2954
2955
2956
2957 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967 if (__test_and_set_bit(0, (unsigned long *)
2968 &anon_vma->root->rb_root.rb_node))
2969 BUG();
2970 }
2971}
2972
2973static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2974{
2975 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2986 BUG();
2987 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
2988 }
2989}
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023int mm_take_all_locks(struct mm_struct *mm)
3024{
3025 struct vm_area_struct *vma;
3026 struct anon_vma_chain *avc;
3027
3028 BUG_ON(down_read_trylock(&mm->mmap_sem));
3029
3030 mutex_lock(&mm_all_locks_mutex);
3031
3032 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3033 if (signal_pending(current))
3034 goto out_unlock;
3035 if (vma->vm_file && vma->vm_file->f_mapping)
3036 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3037 }
3038
3039 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3040 if (signal_pending(current))
3041 goto out_unlock;
3042 if (vma->anon_vma)
3043 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3044 vm_lock_anon_vma(mm, avc->anon_vma);
3045 }
3046
3047 return 0;
3048
3049out_unlock:
3050 mm_drop_all_locks(mm);
3051 return -EINTR;
3052}
3053
3054static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3055{
3056 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069 if (!__test_and_clear_bit(0, (unsigned long *)
3070 &anon_vma->root->rb_root.rb_node))
3071 BUG();
3072 anon_vma_unlock_write(anon_vma);
3073 }
3074}
3075
3076static void vm_unlock_mapping(struct address_space *mapping)
3077{
3078 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3079
3080
3081
3082
3083 mutex_unlock(&mapping->i_mmap_mutex);
3084 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3085 &mapping->flags))
3086 BUG();
3087 }
3088}
3089
3090
3091
3092
3093
3094void mm_drop_all_locks(struct mm_struct *mm)
3095{
3096 struct vm_area_struct *vma;
3097 struct anon_vma_chain *avc;
3098
3099 BUG_ON(down_read_trylock(&mm->mmap_sem));
3100 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3101
3102 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3103 if (vma->anon_vma)
3104 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3105 vm_unlock_anon_vma(avc->anon_vma);
3106 if (vma->vm_file && vma->vm_file->f_mapping)
3107 vm_unlock_mapping(vma->vm_file->f_mapping);
3108 }
3109
3110 mutex_unlock(&mm_all_locks_mutex);
3111}
3112
3113
3114
3115
3116void __init mmap_init(void)
3117{
3118 int ret;
3119
3120 ret = percpu_counter_init(&vm_committed_as, 0);
3121 VM_BUG_ON(ret);
3122}
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134static int init_user_reserve(void)
3135{
3136 unsigned long free_kbytes;
3137
3138 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3139
3140 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3141 return 0;
3142}
3143module_init(init_user_reserve)
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155static int init_admin_reserve(void)
3156{
3157 unsigned long free_kbytes;
3158
3159 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3160
3161 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3162 return 0;
3163}
3164module_init(init_admin_reserve)
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184static int reserve_mem_notifier(struct notifier_block *nb,
3185 unsigned long action, void *data)
3186{
3187 unsigned long tmp, free_kbytes;
3188
3189 switch (action) {
3190 case MEM_ONLINE:
3191
3192 tmp = sysctl_user_reserve_kbytes;
3193 if (0 < tmp && tmp < (1UL << 17))
3194 init_user_reserve();
3195
3196
3197 tmp = sysctl_admin_reserve_kbytes;
3198 if (0 < tmp && tmp < (1UL << 13))
3199 init_admin_reserve();
3200
3201 break;
3202 case MEM_OFFLINE:
3203 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3204
3205 if (sysctl_user_reserve_kbytes > free_kbytes) {
3206 init_user_reserve();
3207 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3208 sysctl_user_reserve_kbytes);
3209 }
3210
3211 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3212 init_admin_reserve();
3213 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3214 sysctl_admin_reserve_kbytes);
3215 }
3216 break;
3217 default:
3218 break;
3219 }
3220 return NOTIFY_OK;
3221}
3222
3223static struct notifier_block reserve_mem_nb = {
3224 .notifier_call = reserve_mem_notifier,
3225};
3226
3227static int __meminit init_reserve_notifier(void)
3228{
3229 if (register_hotmemory_notifier(&reserve_mem_nb))
3230 printk("Failed registering memory add/remove notifier for admin reserve");
3231
3232 return 0;
3233}
3234module_init(init_reserve_notifier)
3235