1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/backing-dev.h>
14#include <linux/mm.h>
15#include <linux/vmacache.h>
16#include <linux/shm.h>
17#include <linux/mman.h>
18#include <linux/pagemap.h>
19#include <linux/swap.h>
20#include <linux/syscalls.h>
21#include <linux/capability.h>
22#include <linux/init.h>
23#include <linux/file.h>
24#include <linux/fs.h>
25#include <linux/personality.h>
26#include <linux/security.h>
27#include <linux/hugetlb.h>
28#include <linux/profile.h>
29#include <linux/export.h>
30#include <linux/mount.h>
31#include <linux/mempolicy.h>
32#include <linux/rmap.h>
33#include <linux/mmu_notifier.h>
34#include <linux/mmdebug.h>
35#include <linux/perf_event.h>
36#include <linux/audit.h>
37#include <linux/khugepaged.h>
38#include <linux/uprobes.h>
39#include <linux/rbtree_augmented.h>
40#include <linux/sched/sysctl.h>
41#include <linux/notifier.h>
42#include <linux/memory.h>
43#include <linux/printk.h>
44
45#include <asm/uaccess.h>
46#include <asm/cacheflush.h>
47#include <asm/tlb.h>
48#include <asm/mmu_context.h>
49
50#include "internal.h"
51
52#ifndef arch_mmap_check
53#define arch_mmap_check(addr, len, flags) (0)
54#endif
55
56#ifndef arch_rebalance_pgtables
57#define arch_rebalance_pgtables(addr, len) (addr)
58#endif
59
60static void unmap_region(struct mm_struct *mm,
61 struct vm_area_struct *vma, struct vm_area_struct *prev,
62 unsigned long start, unsigned long end);
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79pgprot_t protection_map[16] = {
80 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
81 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
82};
83
84pgprot_t vm_get_page_prot(unsigned long vm_flags)
85{
86 return __pgprot(pgprot_val(protection_map[vm_flags &
87 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
88 pgprot_val(arch_vm_get_page_prot(vm_flags)));
89}
90EXPORT_SYMBOL(vm_get_page_prot);
91
92static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
93{
94 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
95}
96
97
98void vma_set_page_prot(struct vm_area_struct *vma)
99{
100 unsigned long vm_flags = vma->vm_flags;
101
102 vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
103 if (vma_wants_writenotify(vma)) {
104 vm_flags &= ~VM_SHARED;
105 vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
106 vm_flags);
107 }
108}
109
110
111int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
112int sysctl_overcommit_ratio __read_mostly = 50;
113unsigned long sysctl_overcommit_kbytes __read_mostly;
114int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
115unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
116unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
117
118
119
120
121struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
122
123
124
125
126
127
128
129
130
131unsigned long vm_memory_committed(void)
132{
133 return percpu_counter_read_positive(&vm_committed_as);
134}
135EXPORT_SYMBOL_GPL(vm_memory_committed);
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
154{
155 long free, allowed, reserve;
156
157 VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
158 -(s64)vm_committed_as_batch * num_online_cpus(),
159 "memory commitment underflow");
160
161 vm_acct_memory(pages);
162
163
164
165
166 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
167 return 0;
168
169 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
170 free = global_page_state(NR_FREE_PAGES);
171 free += global_page_state(NR_FILE_PAGES);
172
173
174
175
176
177
178
179 free -= global_page_state(NR_SHMEM);
180
181 free += get_nr_swap_pages();
182
183
184
185
186
187
188
189 free += global_page_state(NR_SLAB_RECLAIMABLE);
190
191
192
193
194 if (free <= totalreserve_pages)
195 goto error;
196 else
197 free -= totalreserve_pages;
198
199
200
201
202 if (!cap_sys_admin)
203 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
204
205 if (free > pages)
206 return 0;
207
208 goto error;
209 }
210
211 allowed = vm_commit_limit();
212
213
214
215 if (!cap_sys_admin)
216 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
217
218
219
220
221 if (mm) {
222 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
223 allowed -= min_t(long, mm->total_vm / 32, reserve);
224 }
225
226 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
227 return 0;
228error:
229 vm_unacct_memory(pages);
230
231 return -ENOMEM;
232}
233
234
235
236
237static void __remove_shared_vm_struct(struct vm_area_struct *vma,
238 struct file *file, struct address_space *mapping)
239{
240 if (vma->vm_flags & VM_DENYWRITE)
241 atomic_inc(&file_inode(file)->i_writecount);
242 if (vma->vm_flags & VM_SHARED)
243 mapping_unmap_writable(mapping);
244
245 flush_dcache_mmap_lock(mapping);
246 vma_interval_tree_remove(vma, &mapping->i_mmap);
247 flush_dcache_mmap_unlock(mapping);
248}
249
250
251
252
253
254void unlink_file_vma(struct vm_area_struct *vma)
255{
256 struct file *file = vma->vm_file;
257
258 if (file) {
259 struct address_space *mapping = file->f_mapping;
260 i_mmap_lock_write(mapping);
261 __remove_shared_vm_struct(vma, file, mapping);
262 i_mmap_unlock_write(mapping);
263 }
264}
265
266
267
268
269static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
270{
271 struct vm_area_struct *next = vma->vm_next;
272
273 might_sleep();
274 if (vma->vm_ops && vma->vm_ops->close)
275 vma->vm_ops->close(vma);
276 if (vma->vm_file)
277 fput(vma->vm_file);
278 mpol_put(vma_policy(vma));
279 kmem_cache_free(vm_area_cachep, vma);
280 return next;
281}
282
283static unsigned long do_brk(unsigned long addr, unsigned long len);
284
285SYSCALL_DEFINE1(brk, unsigned long, brk)
286{
287 unsigned long retval;
288 unsigned long newbrk, oldbrk;
289 struct mm_struct *mm = current->mm;
290 unsigned long min_brk;
291 bool populate;
292
293 down_write(&mm->mmap_sem);
294
295#ifdef CONFIG_COMPAT_BRK
296
297
298
299
300
301 if (current->brk_randomized)
302 min_brk = mm->start_brk;
303 else
304 min_brk = mm->end_data;
305#else
306 min_brk = mm->start_brk;
307#endif
308 if (brk < min_brk)
309 goto out;
310
311
312
313
314
315
316
317 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
318 mm->end_data, mm->start_data))
319 goto out;
320
321 newbrk = PAGE_ALIGN(brk);
322 oldbrk = PAGE_ALIGN(mm->brk);
323 if (oldbrk == newbrk)
324 goto set_brk;
325
326
327 if (brk <= mm->brk) {
328 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
329 goto set_brk;
330 goto out;
331 }
332
333
334 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
335 goto out;
336
337
338 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
339 goto out;
340
341set_brk:
342 mm->brk = brk;
343 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
344 up_write(&mm->mmap_sem);
345 if (populate)
346 mm_populate(oldbrk, newbrk - oldbrk);
347 return brk;
348
349out:
350 retval = mm->brk;
351 up_write(&mm->mmap_sem);
352 return retval;
353}
354
355static long vma_compute_subtree_gap(struct vm_area_struct *vma)
356{
357 unsigned long max, subtree_gap;
358 max = vma->vm_start;
359 if (vma->vm_prev)
360 max -= vma->vm_prev->vm_end;
361 if (vma->vm_rb.rb_left) {
362 subtree_gap = rb_entry(vma->vm_rb.rb_left,
363 struct vm_area_struct, vm_rb)->rb_subtree_gap;
364 if (subtree_gap > max)
365 max = subtree_gap;
366 }
367 if (vma->vm_rb.rb_right) {
368 subtree_gap = rb_entry(vma->vm_rb.rb_right,
369 struct vm_area_struct, vm_rb)->rb_subtree_gap;
370 if (subtree_gap > max)
371 max = subtree_gap;
372 }
373 return max;
374}
375
376#ifdef CONFIG_DEBUG_VM_RB
377static int browse_rb(struct rb_root *root)
378{
379 int i = 0, j, bug = 0;
380 struct rb_node *nd, *pn = NULL;
381 unsigned long prev = 0, pend = 0;
382
383 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
384 struct vm_area_struct *vma;
385 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
386 if (vma->vm_start < prev) {
387 pr_emerg("vm_start %lx < prev %lx\n",
388 vma->vm_start, prev);
389 bug = 1;
390 }
391 if (vma->vm_start < pend) {
392 pr_emerg("vm_start %lx < pend %lx\n",
393 vma->vm_start, pend);
394 bug = 1;
395 }
396 if (vma->vm_start > vma->vm_end) {
397 pr_emerg("vm_start %lx > vm_end %lx\n",
398 vma->vm_start, vma->vm_end);
399 bug = 1;
400 }
401 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
402 pr_emerg("free gap %lx, correct %lx\n",
403 vma->rb_subtree_gap,
404 vma_compute_subtree_gap(vma));
405 bug = 1;
406 }
407 i++;
408 pn = nd;
409 prev = vma->vm_start;
410 pend = vma->vm_end;
411 }
412 j = 0;
413 for (nd = pn; nd; nd = rb_prev(nd))
414 j++;
415 if (i != j) {
416 pr_emerg("backwards %d, forwards %d\n", j, i);
417 bug = 1;
418 }
419 return bug ? -1 : i;
420}
421
422static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
423{
424 struct rb_node *nd;
425
426 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
427 struct vm_area_struct *vma;
428 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
429 VM_BUG_ON_VMA(vma != ignore &&
430 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
431 vma);
432 }
433}
434
435static void validate_mm(struct mm_struct *mm)
436{
437 int bug = 0;
438 int i = 0;
439 unsigned long highest_address = 0;
440 struct vm_area_struct *vma = mm->mmap;
441
442 while (vma) {
443 struct anon_vma_chain *avc;
444
445 vma_lock_anon_vma(vma);
446 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
447 anon_vma_interval_tree_verify(avc);
448 vma_unlock_anon_vma(vma);
449 highest_address = vma->vm_end;
450 vma = vma->vm_next;
451 i++;
452 }
453 if (i != mm->map_count) {
454 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
455 bug = 1;
456 }
457 if (highest_address != mm->highest_vm_end) {
458 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
459 mm->highest_vm_end, highest_address);
460 bug = 1;
461 }
462 i = browse_rb(&mm->mm_rb);
463 if (i != mm->map_count) {
464 if (i != -1)
465 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
466 bug = 1;
467 }
468 VM_BUG_ON_MM(bug, mm);
469}
470#else
471#define validate_mm_rb(root, ignore) do { } while (0)
472#define validate_mm(mm) do { } while (0)
473#endif
474
475RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
476 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
477
478
479
480
481
482
483static void vma_gap_update(struct vm_area_struct *vma)
484{
485
486
487
488
489 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
490}
491
492static inline void vma_rb_insert(struct vm_area_struct *vma,
493 struct rb_root *root)
494{
495
496 validate_mm_rb(root, NULL);
497
498 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
499}
500
501static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
502{
503
504
505
506
507 validate_mm_rb(root, vma);
508
509
510
511
512
513
514 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
515}
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531static inline void
532anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
533{
534 struct anon_vma_chain *avc;
535
536 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
537 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
538}
539
540static inline void
541anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
542{
543 struct anon_vma_chain *avc;
544
545 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
546 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
547}
548
549static int find_vma_links(struct mm_struct *mm, unsigned long addr,
550 unsigned long end, struct vm_area_struct **pprev,
551 struct rb_node ***rb_link, struct rb_node **rb_parent)
552{
553 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
554
555 __rb_link = &mm->mm_rb.rb_node;
556 rb_prev = __rb_parent = NULL;
557
558 while (*__rb_link) {
559 struct vm_area_struct *vma_tmp;
560
561 __rb_parent = *__rb_link;
562 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
563
564 if (vma_tmp->vm_end > addr) {
565
566 if (vma_tmp->vm_start < end)
567 return -ENOMEM;
568 __rb_link = &__rb_parent->rb_left;
569 } else {
570 rb_prev = __rb_parent;
571 __rb_link = &__rb_parent->rb_right;
572 }
573 }
574
575 *pprev = NULL;
576 if (rb_prev)
577 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
578 *rb_link = __rb_link;
579 *rb_parent = __rb_parent;
580 return 0;
581}
582
583static unsigned long count_vma_pages_range(struct mm_struct *mm,
584 unsigned long addr, unsigned long end)
585{
586 unsigned long nr_pages = 0;
587 struct vm_area_struct *vma;
588
589
590 vma = find_vma_intersection(mm, addr, end);
591 if (!vma)
592 return 0;
593
594 nr_pages = (min(end, vma->vm_end) -
595 max(addr, vma->vm_start)) >> PAGE_SHIFT;
596
597
598 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
599 unsigned long overlap_len;
600
601 if (vma->vm_start > end)
602 break;
603
604 overlap_len = min(end, vma->vm_end) - vma->vm_start;
605 nr_pages += overlap_len >> PAGE_SHIFT;
606 }
607
608 return nr_pages;
609}
610
611void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
612 struct rb_node **rb_link, struct rb_node *rb_parent)
613{
614
615 if (vma->vm_next)
616 vma_gap_update(vma->vm_next);
617 else
618 mm->highest_vm_end = vma->vm_end;
619
620
621
622
623
624
625
626
627
628
629 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
630 vma->rb_subtree_gap = 0;
631 vma_gap_update(vma);
632 vma_rb_insert(vma, &mm->mm_rb);
633}
634
635static void __vma_link_file(struct vm_area_struct *vma)
636{
637 struct file *file;
638
639 file = vma->vm_file;
640 if (file) {
641 struct address_space *mapping = file->f_mapping;
642
643 if (vma->vm_flags & VM_DENYWRITE)
644 atomic_dec(&file_inode(file)->i_writecount);
645 if (vma->vm_flags & VM_SHARED)
646 atomic_inc(&mapping->i_mmap_writable);
647
648 flush_dcache_mmap_lock(mapping);
649 vma_interval_tree_insert(vma, &mapping->i_mmap);
650 flush_dcache_mmap_unlock(mapping);
651 }
652}
653
654static void
655__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
656 struct vm_area_struct *prev, struct rb_node **rb_link,
657 struct rb_node *rb_parent)
658{
659 __vma_link_list(mm, vma, prev, rb_parent);
660 __vma_link_rb(mm, vma, rb_link, rb_parent);
661}
662
663static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
664 struct vm_area_struct *prev, struct rb_node **rb_link,
665 struct rb_node *rb_parent)
666{
667 struct address_space *mapping = NULL;
668
669 if (vma->vm_file) {
670 mapping = vma->vm_file->f_mapping;
671 i_mmap_lock_write(mapping);
672 }
673
674 __vma_link(mm, vma, prev, rb_link, rb_parent);
675 __vma_link_file(vma);
676
677 if (mapping)
678 i_mmap_unlock_write(mapping);
679
680 mm->map_count++;
681 validate_mm(mm);
682}
683
684
685
686
687
688static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
689{
690 struct vm_area_struct *prev;
691 struct rb_node **rb_link, *rb_parent;
692
693 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
694 &prev, &rb_link, &rb_parent))
695 BUG();
696 __vma_link(mm, vma, prev, rb_link, rb_parent);
697 mm->map_count++;
698}
699
700static inline void
701__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
702 struct vm_area_struct *prev)
703{
704 struct vm_area_struct *next;
705
706 vma_rb_erase(vma, &mm->mm_rb);
707 prev->vm_next = next = vma->vm_next;
708 if (next)
709 next->vm_prev = prev;
710
711
712 vmacache_invalidate(mm);
713}
714
715
716
717
718
719
720
721
722int vma_adjust(struct vm_area_struct *vma, unsigned long start,
723 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
724{
725 struct mm_struct *mm = vma->vm_mm;
726 struct vm_area_struct *next = vma->vm_next;
727 struct vm_area_struct *importer = NULL;
728 struct address_space *mapping = NULL;
729 struct rb_root *root = NULL;
730 struct anon_vma *anon_vma = NULL;
731 struct file *file = vma->vm_file;
732 bool start_changed = false, end_changed = false;
733 long adjust_next = 0;
734 int remove_next = 0;
735
736 if (next && !insert) {
737 struct vm_area_struct *exporter = NULL;
738
739 if (end >= next->vm_end) {
740
741
742
743
744again: remove_next = 1 + (end > next->vm_end);
745 end = next->vm_end;
746 exporter = next;
747 importer = vma;
748 } else if (end > next->vm_start) {
749
750
751
752
753 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
754 exporter = next;
755 importer = vma;
756 } else if (end < vma->vm_end) {
757
758
759
760
761
762 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
763 exporter = vma;
764 importer = next;
765 }
766
767
768
769
770
771
772 if (exporter && exporter->anon_vma && !importer->anon_vma) {
773 int error;
774
775 importer->anon_vma = exporter->anon_vma;
776 error = anon_vma_clone(importer, exporter);
777 if (error)
778 return error;
779 }
780 }
781
782 if (file) {
783 mapping = file->f_mapping;
784 root = &mapping->i_mmap;
785 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
786
787 if (adjust_next)
788 uprobe_munmap(next, next->vm_start, next->vm_end);
789
790 i_mmap_lock_write(mapping);
791 if (insert) {
792
793
794
795
796
797
798 __vma_link_file(insert);
799 }
800 }
801
802 vma_adjust_trans_huge(vma, start, end, adjust_next);
803
804 anon_vma = vma->anon_vma;
805 if (!anon_vma && adjust_next)
806 anon_vma = next->anon_vma;
807 if (anon_vma) {
808 VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
809 anon_vma != next->anon_vma, next);
810 anon_vma_lock_write(anon_vma);
811 anon_vma_interval_tree_pre_update_vma(vma);
812 if (adjust_next)
813 anon_vma_interval_tree_pre_update_vma(next);
814 }
815
816 if (root) {
817 flush_dcache_mmap_lock(mapping);
818 vma_interval_tree_remove(vma, root);
819 if (adjust_next)
820 vma_interval_tree_remove(next, root);
821 }
822
823 if (start != vma->vm_start) {
824 vma->vm_start = start;
825 start_changed = true;
826 }
827 if (end != vma->vm_end) {
828 vma->vm_end = end;
829 end_changed = true;
830 }
831 vma->vm_pgoff = pgoff;
832 if (adjust_next) {
833 next->vm_start += adjust_next << PAGE_SHIFT;
834 next->vm_pgoff += adjust_next;
835 }
836
837 if (root) {
838 if (adjust_next)
839 vma_interval_tree_insert(next, root);
840 vma_interval_tree_insert(vma, root);
841 flush_dcache_mmap_unlock(mapping);
842 }
843
844 if (remove_next) {
845
846
847
848
849 __vma_unlink(mm, next, vma);
850 if (file)
851 __remove_shared_vm_struct(next, file, mapping);
852 } else if (insert) {
853
854
855
856
857
858 __insert_vm_struct(mm, insert);
859 } else {
860 if (start_changed)
861 vma_gap_update(vma);
862 if (end_changed) {
863 if (!next)
864 mm->highest_vm_end = end;
865 else if (!adjust_next)
866 vma_gap_update(next);
867 }
868 }
869
870 if (anon_vma) {
871 anon_vma_interval_tree_post_update_vma(vma);
872 if (adjust_next)
873 anon_vma_interval_tree_post_update_vma(next);
874 anon_vma_unlock_write(anon_vma);
875 }
876 if (mapping)
877 i_mmap_unlock_write(mapping);
878
879 if (root) {
880 uprobe_mmap(vma);
881
882 if (adjust_next)
883 uprobe_mmap(next);
884 }
885
886 if (remove_next) {
887 if (file) {
888 uprobe_munmap(next, next->vm_start, next->vm_end);
889 fput(file);
890 }
891 if (next->anon_vma)
892 anon_vma_merge(vma, next);
893 mm->map_count--;
894 mpol_put(vma_policy(next));
895 kmem_cache_free(vm_area_cachep, next);
896
897
898
899
900
901 next = vma->vm_next;
902 if (remove_next == 2)
903 goto again;
904 else if (next)
905 vma_gap_update(next);
906 else
907 mm->highest_vm_end = end;
908 }
909 if (insert && file)
910 uprobe_mmap(insert);
911
912 validate_mm(mm);
913
914 return 0;
915}
916
917
918
919
920
921static inline int is_mergeable_vma(struct vm_area_struct *vma,
922 struct file *file, unsigned long vm_flags)
923{
924
925
926
927
928
929
930
931
932 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
933 return 0;
934 if (vma->vm_file != file)
935 return 0;
936 if (vma->vm_ops && vma->vm_ops->close)
937 return 0;
938 return 1;
939}
940
941static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
942 struct anon_vma *anon_vma2,
943 struct vm_area_struct *vma)
944{
945
946
947
948
949 if ((!anon_vma1 || !anon_vma2) && (!vma ||
950 list_is_singular(&vma->anon_vma_chain)))
951 return 1;
952 return anon_vma1 == anon_vma2;
953}
954
955
956
957
958
959
960
961
962
963
964
965
966static int
967can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
968 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
969{
970 if (is_mergeable_vma(vma, file, vm_flags) &&
971 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
972 if (vma->vm_pgoff == vm_pgoff)
973 return 1;
974 }
975 return 0;
976}
977
978
979
980
981
982
983
984
985static int
986can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
987 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
988{
989 if (is_mergeable_vma(vma, file, vm_flags) &&
990 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
991 pgoff_t vm_pglen;
992 vm_pglen = vma_pages(vma);
993 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
994 return 1;
995 }
996 return 0;
997}
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028struct vm_area_struct *vma_merge(struct mm_struct *mm,
1029 struct vm_area_struct *prev, unsigned long addr,
1030 unsigned long end, unsigned long vm_flags,
1031 struct anon_vma *anon_vma, struct file *file,
1032 pgoff_t pgoff, struct mempolicy *policy)
1033{
1034 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1035 struct vm_area_struct *area, *next;
1036 int err;
1037
1038
1039
1040
1041
1042 if (vm_flags & VM_SPECIAL)
1043 return NULL;
1044
1045 if (prev)
1046 next = prev->vm_next;
1047 else
1048 next = mm->mmap;
1049 area = next;
1050 if (next && next->vm_end == end)
1051 next = next->vm_next;
1052
1053
1054
1055
1056 if (prev && prev->vm_end == addr &&
1057 mpol_equal(vma_policy(prev), policy) &&
1058 can_vma_merge_after(prev, vm_flags,
1059 anon_vma, file, pgoff)) {
1060
1061
1062
1063 if (next && end == next->vm_start &&
1064 mpol_equal(policy, vma_policy(next)) &&
1065 can_vma_merge_before(next, vm_flags,
1066 anon_vma, file, pgoff+pglen) &&
1067 is_mergeable_anon_vma(prev->anon_vma,
1068 next->anon_vma, NULL)) {
1069
1070 err = vma_adjust(prev, prev->vm_start,
1071 next->vm_end, prev->vm_pgoff, NULL);
1072 } else
1073 err = vma_adjust(prev, prev->vm_start,
1074 end, prev->vm_pgoff, NULL);
1075 if (err)
1076 return NULL;
1077 khugepaged_enter_vma_merge(prev, vm_flags);
1078 return prev;
1079 }
1080
1081
1082
1083
1084 if (next && end == next->vm_start &&
1085 mpol_equal(policy, vma_policy(next)) &&
1086 can_vma_merge_before(next, vm_flags,
1087 anon_vma, file, pgoff+pglen)) {
1088 if (prev && addr < prev->vm_end)
1089 err = vma_adjust(prev, prev->vm_start,
1090 addr, prev->vm_pgoff, NULL);
1091 else
1092 err = vma_adjust(area, addr, next->vm_end,
1093 next->vm_pgoff - pglen, NULL);
1094 if (err)
1095 return NULL;
1096 khugepaged_enter_vma_merge(area, vm_flags);
1097 return area;
1098 }
1099
1100 return NULL;
1101}
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1117{
1118 return a->vm_end == b->vm_start &&
1119 mpol_equal(vma_policy(a), vma_policy(b)) &&
1120 a->vm_file == b->vm_file &&
1121 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1122 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1123}
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1148{
1149 if (anon_vma_compatible(a, b)) {
1150 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1151
1152 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1153 return anon_vma;
1154 }
1155 return NULL;
1156}
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1167{
1168 struct anon_vma *anon_vma;
1169 struct vm_area_struct *near;
1170
1171 near = vma->vm_next;
1172 if (!near)
1173 goto try_prev;
1174
1175 anon_vma = reusable_anon_vma(near, vma, near);
1176 if (anon_vma)
1177 return anon_vma;
1178try_prev:
1179 near = vma->vm_prev;
1180 if (!near)
1181 goto none;
1182
1183 anon_vma = reusable_anon_vma(near, near, vma);
1184 if (anon_vma)
1185 return anon_vma;
1186none:
1187
1188
1189
1190
1191
1192
1193
1194
1195 return NULL;
1196}
1197
1198#ifdef CONFIG_PROC_FS
1199void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1200 struct file *file, long pages)
1201{
1202 const unsigned long stack_flags
1203 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1204
1205 mm->total_vm += pages;
1206
1207 if (file) {
1208 mm->shared_vm += pages;
1209 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1210 mm->exec_vm += pages;
1211 } else if (flags & stack_flags)
1212 mm->stack_vm += pages;
1213}
1214#endif
1215
1216
1217
1218
1219
1220static inline unsigned long round_hint_to_min(unsigned long hint)
1221{
1222 hint &= PAGE_MASK;
1223 if (((void *)hint != NULL) &&
1224 (hint < mmap_min_addr))
1225 return PAGE_ALIGN(mmap_min_addr);
1226 return hint;
1227}
1228
1229static inline int mlock_future_check(struct mm_struct *mm,
1230 unsigned long flags,
1231 unsigned long len)
1232{
1233 unsigned long locked, lock_limit;
1234
1235
1236 if (flags & VM_LOCKED) {
1237 locked = len >> PAGE_SHIFT;
1238 locked += mm->locked_vm;
1239 lock_limit = rlimit(RLIMIT_MEMLOCK);
1240 lock_limit >>= PAGE_SHIFT;
1241 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1242 return -EAGAIN;
1243 }
1244 return 0;
1245}
1246
1247
1248
1249
1250
1251unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1252 unsigned long len, unsigned long prot,
1253 unsigned long flags, unsigned long pgoff,
1254 unsigned long *populate)
1255{
1256 struct mm_struct *mm = current->mm;
1257 vm_flags_t vm_flags;
1258
1259 *populate = 0;
1260
1261 if (!len)
1262 return -EINVAL;
1263
1264
1265
1266
1267
1268
1269
1270 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1271 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
1272 prot |= PROT_EXEC;
1273
1274 if (!(flags & MAP_FIXED))
1275 addr = round_hint_to_min(addr);
1276
1277
1278 len = PAGE_ALIGN(len);
1279 if (!len)
1280 return -ENOMEM;
1281
1282
1283 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1284 return -EOVERFLOW;
1285
1286
1287 if (mm->map_count > sysctl_max_map_count)
1288 return -ENOMEM;
1289
1290
1291
1292
1293 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1294 if (addr & ~PAGE_MASK)
1295 return addr;
1296
1297
1298
1299
1300
1301 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1302 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1303
1304 if (flags & MAP_LOCKED)
1305 if (!can_do_mlock())
1306 return -EPERM;
1307
1308 if (mlock_future_check(mm, vm_flags, len))
1309 return -EAGAIN;
1310
1311 if (file) {
1312 struct inode *inode = file_inode(file);
1313
1314 switch (flags & MAP_TYPE) {
1315 case MAP_SHARED:
1316 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1317 return -EACCES;
1318
1319
1320
1321
1322
1323 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1324 return -EACCES;
1325
1326
1327
1328
1329 if (locks_verify_locked(file))
1330 return -EAGAIN;
1331
1332 vm_flags |= VM_SHARED | VM_MAYSHARE;
1333 if (!(file->f_mode & FMODE_WRITE))
1334 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1335
1336
1337 case MAP_PRIVATE:
1338 if (!(file->f_mode & FMODE_READ))
1339 return -EACCES;
1340 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1341 if (vm_flags & VM_EXEC)
1342 return -EPERM;
1343 vm_flags &= ~VM_MAYEXEC;
1344 }
1345
1346 if (!file->f_op->mmap)
1347 return -ENODEV;
1348 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1349 return -EINVAL;
1350 break;
1351
1352 default:
1353 return -EINVAL;
1354 }
1355 } else {
1356 switch (flags & MAP_TYPE) {
1357 case MAP_SHARED:
1358 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1359 return -EINVAL;
1360
1361
1362
1363 pgoff = 0;
1364 vm_flags |= VM_SHARED | VM_MAYSHARE;
1365 break;
1366 case MAP_PRIVATE:
1367
1368
1369
1370 pgoff = addr >> PAGE_SHIFT;
1371 break;
1372 default:
1373 return -EINVAL;
1374 }
1375 }
1376
1377
1378
1379
1380
1381 if (flags & MAP_NORESERVE) {
1382
1383 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1384 vm_flags |= VM_NORESERVE;
1385
1386
1387 if (file && is_file_hugepages(file))
1388 vm_flags |= VM_NORESERVE;
1389 }
1390
1391 addr = mmap_region(file, addr, len, vm_flags, pgoff);
1392 if (!IS_ERR_VALUE(addr) &&
1393 ((vm_flags & VM_LOCKED) ||
1394 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1395 *populate = len;
1396 return addr;
1397}
1398
1399SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1400 unsigned long, prot, unsigned long, flags,
1401 unsigned long, fd, unsigned long, pgoff)
1402{
1403 struct file *file = NULL;
1404 unsigned long retval = -EBADF;
1405
1406 if (!(flags & MAP_ANONYMOUS)) {
1407 audit_mmap_fd(fd, flags);
1408 file = fget(fd);
1409 if (!file)
1410 goto out;
1411 if (is_file_hugepages(file))
1412 len = ALIGN(len, huge_page_size(hstate_file(file)));
1413 retval = -EINVAL;
1414 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1415 goto out_fput;
1416 } else if (flags & MAP_HUGETLB) {
1417 struct user_struct *user = NULL;
1418 struct hstate *hs;
1419
1420 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
1421 if (!hs)
1422 return -EINVAL;
1423
1424 len = ALIGN(len, huge_page_size(hs));
1425
1426
1427
1428
1429
1430
1431 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1432 VM_NORESERVE,
1433 &user, HUGETLB_ANONHUGE_INODE,
1434 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1435 if (IS_ERR(file))
1436 return PTR_ERR(file);
1437 }
1438
1439 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1440
1441 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1442out_fput:
1443 if (file)
1444 fput(file);
1445out:
1446 return retval;
1447}
1448
1449#ifdef __ARCH_WANT_SYS_OLD_MMAP
1450struct mmap_arg_struct {
1451 unsigned long addr;
1452 unsigned long len;
1453 unsigned long prot;
1454 unsigned long flags;
1455 unsigned long fd;
1456 unsigned long offset;
1457};
1458
1459SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1460{
1461 struct mmap_arg_struct a;
1462
1463 if (copy_from_user(&a, arg, sizeof(a)))
1464 return -EFAULT;
1465 if (a.offset & ~PAGE_MASK)
1466 return -EINVAL;
1467
1468 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1469 a.offset >> PAGE_SHIFT);
1470}
1471#endif
1472
1473
1474
1475
1476
1477
1478
1479int vma_wants_writenotify(struct vm_area_struct *vma)
1480{
1481 vm_flags_t vm_flags = vma->vm_flags;
1482
1483
1484 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1485 return 0;
1486
1487
1488 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1489 return 1;
1490
1491
1492
1493 if (pgprot_val(vma->vm_page_prot) !=
1494 pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
1495 return 0;
1496
1497
1498 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1499 return 1;
1500
1501
1502 if (vm_flags & VM_PFNMAP)
1503 return 0;
1504
1505
1506 return vma->vm_file && vma->vm_file->f_mapping &&
1507 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1508}
1509
1510
1511
1512
1513
1514static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1515{
1516
1517
1518
1519
1520 if (file && is_file_hugepages(file))
1521 return 0;
1522
1523 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1524}
1525
1526unsigned long mmap_region(struct file *file, unsigned long addr,
1527 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
1528{
1529 struct mm_struct *mm = current->mm;
1530 struct vm_area_struct *vma, *prev;
1531 int error;
1532 struct rb_node **rb_link, *rb_parent;
1533 unsigned long charged = 0;
1534
1535
1536 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1537 unsigned long nr_pages;
1538
1539
1540
1541
1542
1543 if (!(vm_flags & MAP_FIXED))
1544 return -ENOMEM;
1545
1546 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1547
1548 if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1549 return -ENOMEM;
1550 }
1551
1552
1553 error = -ENOMEM;
1554 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1555 &rb_parent)) {
1556 if (do_munmap(mm, addr, len))
1557 return -ENOMEM;
1558 }
1559
1560
1561
1562
1563 if (accountable_mapping(file, vm_flags)) {
1564 charged = len >> PAGE_SHIFT;
1565 if (security_vm_enough_memory_mm(mm, charged))
1566 return -ENOMEM;
1567 vm_flags |= VM_ACCOUNT;
1568 }
1569
1570
1571
1572
1573 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff,
1574 NULL);
1575 if (vma)
1576 goto out;
1577
1578
1579
1580
1581
1582
1583 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1584 if (!vma) {
1585 error = -ENOMEM;
1586 goto unacct_error;
1587 }
1588
1589 vma->vm_mm = mm;
1590 vma->vm_start = addr;
1591 vma->vm_end = addr + len;
1592 vma->vm_flags = vm_flags;
1593 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1594 vma->vm_pgoff = pgoff;
1595 INIT_LIST_HEAD(&vma->anon_vma_chain);
1596
1597 if (file) {
1598 if (vm_flags & VM_DENYWRITE) {
1599 error = deny_write_access(file);
1600 if (error)
1601 goto free_vma;
1602 }
1603 if (vm_flags & VM_SHARED) {
1604 error = mapping_map_writable(file->f_mapping);
1605 if (error)
1606 goto allow_write_and_free_vma;
1607 }
1608
1609
1610
1611
1612
1613
1614 vma->vm_file = get_file(file);
1615 error = file->f_op->mmap(file, vma);
1616 if (error)
1617 goto unmap_and_free_vma;
1618
1619
1620
1621
1622
1623
1624
1625
1626 WARN_ON_ONCE(addr != vma->vm_start);
1627
1628 addr = vma->vm_start;
1629 vm_flags = vma->vm_flags;
1630 } else if (vm_flags & VM_SHARED) {
1631 error = shmem_zero_setup(vma);
1632 if (error)
1633 goto free_vma;
1634 }
1635
1636 vma_link(mm, vma, prev, rb_link, rb_parent);
1637
1638 if (file) {
1639 if (vm_flags & VM_SHARED)
1640 mapping_unmap_writable(file->f_mapping);
1641 if (vm_flags & VM_DENYWRITE)
1642 allow_write_access(file);
1643 }
1644 file = vma->vm_file;
1645out:
1646 perf_event_mmap(vma);
1647
1648 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1649 if (vm_flags & VM_LOCKED) {
1650 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1651 vma == get_gate_vma(current->mm)))
1652 mm->locked_vm += (len >> PAGE_SHIFT);
1653 else
1654 vma->vm_flags &= ~VM_LOCKED;
1655 }
1656
1657 if (file)
1658 uprobe_mmap(vma);
1659
1660
1661
1662
1663
1664
1665
1666
1667 vma->vm_flags |= VM_SOFTDIRTY;
1668
1669 vma_set_page_prot(vma);
1670
1671 return addr;
1672
1673unmap_and_free_vma:
1674 vma->vm_file = NULL;
1675 fput(file);
1676
1677
1678 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1679 charged = 0;
1680 if (vm_flags & VM_SHARED)
1681 mapping_unmap_writable(file->f_mapping);
1682allow_write_and_free_vma:
1683 if (vm_flags & VM_DENYWRITE)
1684 allow_write_access(file);
1685free_vma:
1686 kmem_cache_free(vm_area_cachep, vma);
1687unacct_error:
1688 if (charged)
1689 vm_unacct_memory(charged);
1690 return error;
1691}
1692
1693unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1694{
1695
1696
1697
1698
1699
1700
1701
1702
1703 struct mm_struct *mm = current->mm;
1704 struct vm_area_struct *vma;
1705 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1706
1707
1708 length = info->length + info->align_mask;
1709 if (length < info->length)
1710 return -ENOMEM;
1711
1712
1713 if (info->high_limit < length)
1714 return -ENOMEM;
1715 high_limit = info->high_limit - length;
1716
1717 if (info->low_limit > high_limit)
1718 return -ENOMEM;
1719 low_limit = info->low_limit + length;
1720
1721
1722 if (RB_EMPTY_ROOT(&mm->mm_rb))
1723 goto check_highest;
1724 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1725 if (vma->rb_subtree_gap < length)
1726 goto check_highest;
1727
1728 while (true) {
1729
1730 gap_end = vma->vm_start;
1731 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1732 struct vm_area_struct *left =
1733 rb_entry(vma->vm_rb.rb_left,
1734 struct vm_area_struct, vm_rb);
1735 if (left->rb_subtree_gap >= length) {
1736 vma = left;
1737 continue;
1738 }
1739 }
1740
1741 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1742check_current:
1743
1744 if (gap_start > high_limit)
1745 return -ENOMEM;
1746 if (gap_end >= low_limit && gap_end - gap_start >= length)
1747 goto found;
1748
1749
1750 if (vma->vm_rb.rb_right) {
1751 struct vm_area_struct *right =
1752 rb_entry(vma->vm_rb.rb_right,
1753 struct vm_area_struct, vm_rb);
1754 if (right->rb_subtree_gap >= length) {
1755 vma = right;
1756 continue;
1757 }
1758 }
1759
1760
1761 while (true) {
1762 struct rb_node *prev = &vma->vm_rb;
1763 if (!rb_parent(prev))
1764 goto check_highest;
1765 vma = rb_entry(rb_parent(prev),
1766 struct vm_area_struct, vm_rb);
1767 if (prev == vma->vm_rb.rb_left) {
1768 gap_start = vma->vm_prev->vm_end;
1769 gap_end = vma->vm_start;
1770 goto check_current;
1771 }
1772 }
1773 }
1774
1775check_highest:
1776
1777 gap_start = mm->highest_vm_end;
1778 gap_end = ULONG_MAX;
1779 if (gap_start > high_limit)
1780 return -ENOMEM;
1781
1782found:
1783
1784 if (gap_start < info->low_limit)
1785 gap_start = info->low_limit;
1786
1787
1788 gap_start += (info->align_offset - gap_start) & info->align_mask;
1789
1790 VM_BUG_ON(gap_start + info->length > info->high_limit);
1791 VM_BUG_ON(gap_start + info->length > gap_end);
1792 return gap_start;
1793}
1794
1795unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1796{
1797 struct mm_struct *mm = current->mm;
1798 struct vm_area_struct *vma;
1799 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1800
1801
1802 length = info->length + info->align_mask;
1803 if (length < info->length)
1804 return -ENOMEM;
1805
1806
1807
1808
1809
1810 gap_end = info->high_limit;
1811 if (gap_end < length)
1812 return -ENOMEM;
1813 high_limit = gap_end - length;
1814
1815 if (info->low_limit > high_limit)
1816 return -ENOMEM;
1817 low_limit = info->low_limit + length;
1818
1819
1820 gap_start = mm->highest_vm_end;
1821 if (gap_start <= high_limit)
1822 goto found_highest;
1823
1824
1825 if (RB_EMPTY_ROOT(&mm->mm_rb))
1826 return -ENOMEM;
1827 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1828 if (vma->rb_subtree_gap < length)
1829 return -ENOMEM;
1830
1831 while (true) {
1832
1833 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1834 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1835 struct vm_area_struct *right =
1836 rb_entry(vma->vm_rb.rb_right,
1837 struct vm_area_struct, vm_rb);
1838 if (right->rb_subtree_gap >= length) {
1839 vma = right;
1840 continue;
1841 }
1842 }
1843
1844check_current:
1845
1846 gap_end = vma->vm_start;
1847 if (gap_end < low_limit)
1848 return -ENOMEM;
1849 if (gap_start <= high_limit && gap_end - gap_start >= length)
1850 goto found;
1851
1852
1853 if (vma->vm_rb.rb_left) {
1854 struct vm_area_struct *left =
1855 rb_entry(vma->vm_rb.rb_left,
1856 struct vm_area_struct, vm_rb);
1857 if (left->rb_subtree_gap >= length) {
1858 vma = left;
1859 continue;
1860 }
1861 }
1862
1863
1864 while (true) {
1865 struct rb_node *prev = &vma->vm_rb;
1866 if (!rb_parent(prev))
1867 return -ENOMEM;
1868 vma = rb_entry(rb_parent(prev),
1869 struct vm_area_struct, vm_rb);
1870 if (prev == vma->vm_rb.rb_right) {
1871 gap_start = vma->vm_prev ?
1872 vma->vm_prev->vm_end : 0;
1873 goto check_current;
1874 }
1875 }
1876 }
1877
1878found:
1879
1880 if (gap_end > info->high_limit)
1881 gap_end = info->high_limit;
1882
1883found_highest:
1884
1885 gap_end -= info->length;
1886 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1887
1888 VM_BUG_ON(gap_end < info->low_limit);
1889 VM_BUG_ON(gap_end < gap_start);
1890 return gap_end;
1891}
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904#ifndef HAVE_ARCH_UNMAPPED_AREA
1905unsigned long
1906arch_get_unmapped_area(struct file *filp, unsigned long addr,
1907 unsigned long len, unsigned long pgoff, unsigned long flags)
1908{
1909 struct mm_struct *mm = current->mm;
1910 struct vm_area_struct *vma;
1911 struct vm_unmapped_area_info info;
1912
1913 if (len > TASK_SIZE - mmap_min_addr)
1914 return -ENOMEM;
1915
1916 if (flags & MAP_FIXED)
1917 return addr;
1918
1919 if (addr) {
1920 addr = PAGE_ALIGN(addr);
1921 vma = find_vma(mm, addr);
1922 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1923 (!vma || addr + len <= vma->vm_start))
1924 return addr;
1925 }
1926
1927 info.flags = 0;
1928 info.length = len;
1929 info.low_limit = mm->mmap_base;
1930 info.high_limit = TASK_SIZE;
1931 info.align_mask = 0;
1932 return vm_unmapped_area(&info);
1933}
1934#endif
1935
1936
1937
1938
1939
1940#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1941unsigned long
1942arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1943 const unsigned long len, const unsigned long pgoff,
1944 const unsigned long flags)
1945{
1946 struct vm_area_struct *vma;
1947 struct mm_struct *mm = current->mm;
1948 unsigned long addr = addr0;
1949 struct vm_unmapped_area_info info;
1950
1951
1952 if (len > TASK_SIZE - mmap_min_addr)
1953 return -ENOMEM;
1954
1955 if (flags & MAP_FIXED)
1956 return addr;
1957
1958
1959 if (addr) {
1960 addr = PAGE_ALIGN(addr);
1961 vma = find_vma(mm, addr);
1962 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1963 (!vma || addr + len <= vma->vm_start))
1964 return addr;
1965 }
1966
1967 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
1968 info.length = len;
1969 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
1970 info.high_limit = mm->mmap_base;
1971 info.align_mask = 0;
1972 addr = vm_unmapped_area(&info);
1973
1974
1975
1976
1977
1978
1979
1980 if (addr & ~PAGE_MASK) {
1981 VM_BUG_ON(addr != -ENOMEM);
1982 info.flags = 0;
1983 info.low_limit = TASK_UNMAPPED_BASE;
1984 info.high_limit = TASK_SIZE;
1985 addr = vm_unmapped_area(&info);
1986 }
1987
1988 return addr;
1989}
1990#endif
1991
1992unsigned long
1993get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1994 unsigned long pgoff, unsigned long flags)
1995{
1996 unsigned long (*get_area)(struct file *, unsigned long,
1997 unsigned long, unsigned long, unsigned long);
1998
1999 unsigned long error = arch_mmap_check(addr, len, flags);
2000 if (error)
2001 return error;
2002
2003
2004 if (len > TASK_SIZE)
2005 return -ENOMEM;
2006
2007 get_area = current->mm->get_unmapped_area;
2008 if (file && file->f_op->get_unmapped_area)
2009 get_area = file->f_op->get_unmapped_area;
2010 addr = get_area(file, addr, len, pgoff, flags);
2011 if (IS_ERR_VALUE(addr))
2012 return addr;
2013
2014 if (addr > TASK_SIZE - len)
2015 return -ENOMEM;
2016 if (addr & ~PAGE_MASK)
2017 return -EINVAL;
2018
2019 addr = arch_rebalance_pgtables(addr, len);
2020 error = security_mmap_addr(addr);
2021 return error ? error : addr;
2022}
2023
2024EXPORT_SYMBOL(get_unmapped_area);
2025
2026
2027struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2028{
2029 struct rb_node *rb_node;
2030 struct vm_area_struct *vma;
2031
2032
2033 vma = vmacache_find(mm, addr);
2034 if (likely(vma))
2035 return vma;
2036
2037 rb_node = mm->mm_rb.rb_node;
2038 vma = NULL;
2039
2040 while (rb_node) {
2041 struct vm_area_struct *tmp;
2042
2043 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2044
2045 if (tmp->vm_end > addr) {
2046 vma = tmp;
2047 if (tmp->vm_start <= addr)
2048 break;
2049 rb_node = rb_node->rb_left;
2050 } else
2051 rb_node = rb_node->rb_right;
2052 }
2053
2054 if (vma)
2055 vmacache_update(addr, vma);
2056 return vma;
2057}
2058
2059EXPORT_SYMBOL(find_vma);
2060
2061
2062
2063
2064struct vm_area_struct *
2065find_vma_prev(struct mm_struct *mm, unsigned long addr,
2066 struct vm_area_struct **pprev)
2067{
2068 struct vm_area_struct *vma;
2069
2070 vma = find_vma(mm, addr);
2071 if (vma) {
2072 *pprev = vma->vm_prev;
2073 } else {
2074 struct rb_node *rb_node = mm->mm_rb.rb_node;
2075 *pprev = NULL;
2076 while (rb_node) {
2077 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2078 rb_node = rb_node->rb_right;
2079 }
2080 }
2081 return vma;
2082}
2083
2084
2085
2086
2087
2088
2089static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
2090{
2091 struct mm_struct *mm = vma->vm_mm;
2092 struct rlimit *rlim = current->signal->rlim;
2093 unsigned long new_start, actual_size;
2094
2095
2096 if (!may_expand_vm(mm, grow))
2097 return -ENOMEM;
2098
2099
2100 actual_size = size;
2101 if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
2102 actual_size -= PAGE_SIZE;
2103 if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2104 return -ENOMEM;
2105
2106
2107 if (vma->vm_flags & VM_LOCKED) {
2108 unsigned long locked;
2109 unsigned long limit;
2110 locked = mm->locked_vm + grow;
2111 limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2112 limit >>= PAGE_SHIFT;
2113 if (locked > limit && !capable(CAP_IPC_LOCK))
2114 return -ENOMEM;
2115 }
2116
2117
2118 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2119 vma->vm_end - size;
2120 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2121 return -EFAULT;
2122
2123
2124
2125
2126
2127 if (security_vm_enough_memory_mm(mm, grow))
2128 return -ENOMEM;
2129
2130
2131 if (vma->vm_flags & VM_LOCKED)
2132 mm->locked_vm += grow;
2133 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2134 return 0;
2135}
2136
2137#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2138
2139
2140
2141
2142int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2143{
2144 int error;
2145
2146 if (!(vma->vm_flags & VM_GROWSUP))
2147 return -EFAULT;
2148
2149
2150
2151
2152
2153 if (unlikely(anon_vma_prepare(vma)))
2154 return -ENOMEM;
2155 vma_lock_anon_vma(vma);
2156
2157
2158
2159
2160
2161
2162
2163 if (address < PAGE_ALIGN(address+4))
2164 address = PAGE_ALIGN(address+4);
2165 else {
2166 vma_unlock_anon_vma(vma);
2167 return -ENOMEM;
2168 }
2169 error = 0;
2170
2171
2172 if (address > vma->vm_end) {
2173 unsigned long size, grow;
2174
2175 size = address - vma->vm_start;
2176 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2177
2178 error = -ENOMEM;
2179 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2180 error = acct_stack_growth(vma, size, grow);
2181 if (!error) {
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193 spin_lock(&vma->vm_mm->page_table_lock);
2194 anon_vma_interval_tree_pre_update_vma(vma);
2195 vma->vm_end = address;
2196 anon_vma_interval_tree_post_update_vma(vma);
2197 if (vma->vm_next)
2198 vma_gap_update(vma->vm_next);
2199 else
2200 vma->vm_mm->highest_vm_end = address;
2201 spin_unlock(&vma->vm_mm->page_table_lock);
2202
2203 perf_event_mmap(vma);
2204 }
2205 }
2206 }
2207 vma_unlock_anon_vma(vma);
2208 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2209 validate_mm(vma->vm_mm);
2210 return error;
2211}
2212#endif
2213
2214
2215
2216
2217int expand_downwards(struct vm_area_struct *vma,
2218 unsigned long address)
2219{
2220 int error;
2221
2222
2223
2224
2225
2226 if (unlikely(anon_vma_prepare(vma)))
2227 return -ENOMEM;
2228
2229 address &= PAGE_MASK;
2230 error = security_mmap_addr(address);
2231 if (error)
2232 return error;
2233
2234 vma_lock_anon_vma(vma);
2235
2236
2237
2238
2239
2240
2241
2242
2243 if (address < vma->vm_start) {
2244 unsigned long size, grow;
2245
2246 size = vma->vm_end - address;
2247 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2248
2249 error = -ENOMEM;
2250 if (grow <= vma->vm_pgoff) {
2251 error = acct_stack_growth(vma, size, grow);
2252 if (!error) {
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264 spin_lock(&vma->vm_mm->page_table_lock);
2265 anon_vma_interval_tree_pre_update_vma(vma);
2266 vma->vm_start = address;
2267 vma->vm_pgoff -= grow;
2268 anon_vma_interval_tree_post_update_vma(vma);
2269 vma_gap_update(vma);
2270 spin_unlock(&vma->vm_mm->page_table_lock);
2271
2272 perf_event_mmap(vma);
2273 }
2274 }
2275 }
2276 vma_unlock_anon_vma(vma);
2277 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2278 validate_mm(vma->vm_mm);
2279 return error;
2280}
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293#ifdef CONFIG_STACK_GROWSUP
2294int expand_stack(struct vm_area_struct *vma, unsigned long address)
2295{
2296 struct vm_area_struct *next;
2297
2298 address &= PAGE_MASK;
2299 next = vma->vm_next;
2300 if (next && next->vm_start == address + PAGE_SIZE) {
2301 if (!(next->vm_flags & VM_GROWSUP))
2302 return -ENOMEM;
2303 }
2304 return expand_upwards(vma, address);
2305}
2306
2307struct vm_area_struct *
2308find_extend_vma(struct mm_struct *mm, unsigned long addr)
2309{
2310 struct vm_area_struct *vma, *prev;
2311
2312 addr &= PAGE_MASK;
2313 vma = find_vma_prev(mm, addr, &prev);
2314 if (vma && (vma->vm_start <= addr))
2315 return vma;
2316 if (!prev || expand_stack(prev, addr))
2317 return NULL;
2318 if (prev->vm_flags & VM_LOCKED)
2319 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2320 return prev;
2321}
2322#else
2323int expand_stack(struct vm_area_struct *vma, unsigned long address)
2324{
2325 struct vm_area_struct *prev;
2326
2327 address &= PAGE_MASK;
2328 prev = vma->vm_prev;
2329 if (prev && prev->vm_end == address) {
2330 if (!(prev->vm_flags & VM_GROWSDOWN))
2331 return -ENOMEM;
2332 }
2333 return expand_downwards(vma, address);
2334}
2335
2336struct vm_area_struct *
2337find_extend_vma(struct mm_struct *mm, unsigned long addr)
2338{
2339 struct vm_area_struct *vma;
2340 unsigned long start;
2341
2342 addr &= PAGE_MASK;
2343 vma = find_vma(mm, addr);
2344 if (!vma)
2345 return NULL;
2346 if (vma->vm_start <= addr)
2347 return vma;
2348 if (!(vma->vm_flags & VM_GROWSDOWN))
2349 return NULL;
2350 start = vma->vm_start;
2351 if (expand_stack(vma, addr))
2352 return NULL;
2353 if (vma->vm_flags & VM_LOCKED)
2354 populate_vma_page_range(vma, addr, start, NULL);
2355 return vma;
2356}
2357#endif
2358
2359EXPORT_SYMBOL_GPL(find_extend_vma);
2360
2361
2362
2363
2364
2365
2366
2367static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2368{
2369 unsigned long nr_accounted = 0;
2370
2371
2372 update_hiwater_vm(mm);
2373 do {
2374 long nrpages = vma_pages(vma);
2375
2376 if (vma->vm_flags & VM_ACCOUNT)
2377 nr_accounted += nrpages;
2378 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2379 vma = remove_vma(vma);
2380 } while (vma);
2381 vm_unacct_memory(nr_accounted);
2382 validate_mm(mm);
2383}
2384
2385
2386
2387
2388
2389
2390static void unmap_region(struct mm_struct *mm,
2391 struct vm_area_struct *vma, struct vm_area_struct *prev,
2392 unsigned long start, unsigned long end)
2393{
2394 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2395 struct mmu_gather tlb;
2396
2397 lru_add_drain();
2398 tlb_gather_mmu(&tlb, mm, start, end);
2399 update_hiwater_rss(mm);
2400 unmap_vmas(&tlb, vma, start, end);
2401 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2402 next ? next->vm_start : USER_PGTABLES_CEILING);
2403 tlb_finish_mmu(&tlb, start, end);
2404}
2405
2406
2407
2408
2409
2410static void
2411detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2412 struct vm_area_struct *prev, unsigned long end)
2413{
2414 struct vm_area_struct **insertion_point;
2415 struct vm_area_struct *tail_vma = NULL;
2416
2417 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2418 vma->vm_prev = NULL;
2419 do {
2420 vma_rb_erase(vma, &mm->mm_rb);
2421 mm->map_count--;
2422 tail_vma = vma;
2423 vma = vma->vm_next;
2424 } while (vma && vma->vm_start < end);
2425 *insertion_point = vma;
2426 if (vma) {
2427 vma->vm_prev = prev;
2428 vma_gap_update(vma);
2429 } else
2430 mm->highest_vm_end = prev ? prev->vm_end : 0;
2431 tail_vma->vm_next = NULL;
2432
2433
2434 vmacache_invalidate(mm);
2435}
2436
2437
2438
2439
2440
2441static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2442 unsigned long addr, int new_below)
2443{
2444 struct vm_area_struct *new;
2445 int err = -ENOMEM;
2446
2447 if (is_vm_hugetlb_page(vma) && (addr &
2448 ~(huge_page_mask(hstate_vma(vma)))))
2449 return -EINVAL;
2450
2451 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2452 if (!new)
2453 goto out_err;
2454
2455
2456 *new = *vma;
2457
2458 INIT_LIST_HEAD(&new->anon_vma_chain);
2459
2460 if (new_below)
2461 new->vm_end = addr;
2462 else {
2463 new->vm_start = addr;
2464 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2465 }
2466
2467 err = vma_dup_policy(vma, new);
2468 if (err)
2469 goto out_free_vma;
2470
2471 err = anon_vma_clone(new, vma);
2472 if (err)
2473 goto out_free_mpol;
2474
2475 if (new->vm_file)
2476 get_file(new->vm_file);
2477
2478 if (new->vm_ops && new->vm_ops->open)
2479 new->vm_ops->open(new);
2480
2481 if (new_below)
2482 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2483 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2484 else
2485 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2486
2487
2488 if (!err)
2489 return 0;
2490
2491
2492 if (new->vm_ops && new->vm_ops->close)
2493 new->vm_ops->close(new);
2494 if (new->vm_file)
2495 fput(new->vm_file);
2496 unlink_anon_vmas(new);
2497 out_free_mpol:
2498 mpol_put(vma_policy(new));
2499 out_free_vma:
2500 kmem_cache_free(vm_area_cachep, new);
2501 out_err:
2502 return err;
2503}
2504
2505
2506
2507
2508
2509int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2510 unsigned long addr, int new_below)
2511{
2512 if (mm->map_count >= sysctl_max_map_count)
2513 return -ENOMEM;
2514
2515 return __split_vma(mm, vma, addr, new_below);
2516}
2517
2518
2519
2520
2521
2522
2523int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2524{
2525 unsigned long end;
2526 struct vm_area_struct *vma, *prev, *last;
2527
2528 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2529 return -EINVAL;
2530
2531 len = PAGE_ALIGN(len);
2532 if (len == 0)
2533 return -EINVAL;
2534
2535
2536 vma = find_vma(mm, start);
2537 if (!vma)
2538 return 0;
2539 prev = vma->vm_prev;
2540
2541
2542
2543 end = start + len;
2544 if (vma->vm_start >= end)
2545 return 0;
2546
2547
2548
2549
2550
2551
2552
2553
2554 if (start > vma->vm_start) {
2555 int error;
2556
2557
2558
2559
2560
2561
2562 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2563 return -ENOMEM;
2564
2565 error = __split_vma(mm, vma, start, 0);
2566 if (error)
2567 return error;
2568 prev = vma;
2569 }
2570
2571
2572 last = find_vma(mm, end);
2573 if (last && end > last->vm_start) {
2574 int error = __split_vma(mm, last, end, 1);
2575 if (error)
2576 return error;
2577 }
2578 vma = prev ? prev->vm_next : mm->mmap;
2579
2580
2581
2582
2583 if (mm->locked_vm) {
2584 struct vm_area_struct *tmp = vma;
2585 while (tmp && tmp->vm_start < end) {
2586 if (tmp->vm_flags & VM_LOCKED) {
2587 mm->locked_vm -= vma_pages(tmp);
2588 munlock_vma_pages_all(tmp);
2589 }
2590 tmp = tmp->vm_next;
2591 }
2592 }
2593
2594
2595
2596
2597 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2598 unmap_region(mm, vma, prev, start, end);
2599
2600 arch_unmap(mm, vma, start, end);
2601
2602
2603 remove_vma_list(mm, vma);
2604
2605 return 0;
2606}
2607
2608int vm_munmap(unsigned long start, size_t len)
2609{
2610 int ret;
2611 struct mm_struct *mm = current->mm;
2612
2613 down_write(&mm->mmap_sem);
2614 ret = do_munmap(mm, start, len);
2615 up_write(&mm->mmap_sem);
2616 return ret;
2617}
2618EXPORT_SYMBOL(vm_munmap);
2619
2620SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2621{
2622 profile_munmap(addr);
2623 return vm_munmap(addr, len);
2624}
2625
2626
2627
2628
2629
2630SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2631 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2632{
2633
2634 struct mm_struct *mm = current->mm;
2635 struct vm_area_struct *vma;
2636 unsigned long populate = 0;
2637 unsigned long ret = -EINVAL;
2638 struct file *file;
2639
2640 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
2641 "See Documentation/vm/remap_file_pages.txt.\n",
2642 current->comm, current->pid);
2643
2644 if (prot)
2645 return ret;
2646 start = start & PAGE_MASK;
2647 size = size & PAGE_MASK;
2648
2649 if (start + size <= start)
2650 return ret;
2651
2652
2653 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2654 return ret;
2655
2656 down_write(&mm->mmap_sem);
2657 vma = find_vma(mm, start);
2658
2659 if (!vma || !(vma->vm_flags & VM_SHARED))
2660 goto out;
2661
2662 if (start < vma->vm_start || start + size > vma->vm_end)
2663 goto out;
2664
2665 if (pgoff == linear_page_index(vma, start)) {
2666 ret = 0;
2667 goto out;
2668 }
2669
2670 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2671 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2672 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2673
2674 flags &= MAP_NONBLOCK;
2675 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2676 if (vma->vm_flags & VM_LOCKED) {
2677 flags |= MAP_LOCKED;
2678
2679 munlock_vma_pages_range(vma, start, start + size);
2680 }
2681
2682 file = get_file(vma->vm_file);
2683 ret = do_mmap_pgoff(vma->vm_file, start, size,
2684 prot, flags, pgoff, &populate);
2685 fput(file);
2686out:
2687 up_write(&mm->mmap_sem);
2688 if (populate)
2689 mm_populate(ret, populate);
2690 if (!IS_ERR_VALUE(ret))
2691 ret = 0;
2692 return ret;
2693}
2694
2695static inline void verify_mm_writelocked(struct mm_struct *mm)
2696{
2697#ifdef CONFIG_DEBUG_VM
2698 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2699 WARN_ON(1);
2700 up_read(&mm->mmap_sem);
2701 }
2702#endif
2703}
2704
2705
2706
2707
2708
2709
2710static unsigned long do_brk(unsigned long addr, unsigned long len)
2711{
2712 struct mm_struct *mm = current->mm;
2713 struct vm_area_struct *vma, *prev;
2714 unsigned long flags;
2715 struct rb_node **rb_link, *rb_parent;
2716 pgoff_t pgoff = addr >> PAGE_SHIFT;
2717 int error;
2718
2719 len = PAGE_ALIGN(len);
2720 if (!len)
2721 return addr;
2722
2723 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2724
2725 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2726 if (error & ~PAGE_MASK)
2727 return error;
2728
2729 error = mlock_future_check(mm, mm->def_flags, len);
2730 if (error)
2731 return error;
2732
2733
2734
2735
2736
2737 verify_mm_writelocked(mm);
2738
2739
2740
2741
2742 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2743 &rb_parent)) {
2744 if (do_munmap(mm, addr, len))
2745 return -ENOMEM;
2746 }
2747
2748
2749 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2750 return -ENOMEM;
2751
2752 if (mm->map_count > sysctl_max_map_count)
2753 return -ENOMEM;
2754
2755 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2756 return -ENOMEM;
2757
2758
2759 vma = vma_merge(mm, prev, addr, addr + len, flags,
2760 NULL, NULL, pgoff, NULL);
2761 if (vma)
2762 goto out;
2763
2764
2765
2766
2767 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2768 if (!vma) {
2769 vm_unacct_memory(len >> PAGE_SHIFT);
2770 return -ENOMEM;
2771 }
2772
2773 INIT_LIST_HEAD(&vma->anon_vma_chain);
2774 vma->vm_mm = mm;
2775 vma->vm_start = addr;
2776 vma->vm_end = addr + len;
2777 vma->vm_pgoff = pgoff;
2778 vma->vm_flags = flags;
2779 vma->vm_page_prot = vm_get_page_prot(flags);
2780 vma_link(mm, vma, prev, rb_link, rb_parent);
2781out:
2782 perf_event_mmap(vma);
2783 mm->total_vm += len >> PAGE_SHIFT;
2784 if (flags & VM_LOCKED)
2785 mm->locked_vm += (len >> PAGE_SHIFT);
2786 vma->vm_flags |= VM_SOFTDIRTY;
2787 return addr;
2788}
2789
2790unsigned long vm_brk(unsigned long addr, unsigned long len)
2791{
2792 struct mm_struct *mm = current->mm;
2793 unsigned long ret;
2794 bool populate;
2795
2796 down_write(&mm->mmap_sem);
2797 ret = do_brk(addr, len);
2798 populate = ((mm->def_flags & VM_LOCKED) != 0);
2799 up_write(&mm->mmap_sem);
2800 if (populate)
2801 mm_populate(addr, len);
2802 return ret;
2803}
2804EXPORT_SYMBOL(vm_brk);
2805
2806
2807void exit_mmap(struct mm_struct *mm)
2808{
2809 struct mmu_gather tlb;
2810 struct vm_area_struct *vma;
2811 unsigned long nr_accounted = 0;
2812
2813
2814 mmu_notifier_release(mm);
2815
2816 if (mm->locked_vm) {
2817 vma = mm->mmap;
2818 while (vma) {
2819 if (vma->vm_flags & VM_LOCKED)
2820 munlock_vma_pages_all(vma);
2821 vma = vma->vm_next;
2822 }
2823 }
2824
2825 arch_exit_mmap(mm);
2826
2827 vma = mm->mmap;
2828 if (!vma)
2829 return;
2830
2831 lru_add_drain();
2832 flush_cache_mm(mm);
2833 tlb_gather_mmu(&tlb, mm, 0, -1);
2834
2835
2836 unmap_vmas(&tlb, vma, 0, -1);
2837
2838 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
2839 tlb_finish_mmu(&tlb, 0, -1);
2840
2841
2842
2843
2844
2845 while (vma) {
2846 if (vma->vm_flags & VM_ACCOUNT)
2847 nr_accounted += vma_pages(vma);
2848 vma = remove_vma(vma);
2849 }
2850 vm_unacct_memory(nr_accounted);
2851}
2852
2853
2854
2855
2856
2857int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2858{
2859 struct vm_area_struct *prev;
2860 struct rb_node **rb_link, *rb_parent;
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874 if (!vma->vm_file) {
2875 BUG_ON(vma->anon_vma);
2876 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2877 }
2878 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2879 &prev, &rb_link, &rb_parent))
2880 return -ENOMEM;
2881 if ((vma->vm_flags & VM_ACCOUNT) &&
2882 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2883 return -ENOMEM;
2884
2885 vma_link(mm, vma, prev, rb_link, rb_parent);
2886 return 0;
2887}
2888
2889
2890
2891
2892
2893struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2894 unsigned long addr, unsigned long len, pgoff_t pgoff,
2895 bool *need_rmap_locks)
2896{
2897 struct vm_area_struct *vma = *vmap;
2898 unsigned long vma_start = vma->vm_start;
2899 struct mm_struct *mm = vma->vm_mm;
2900 struct vm_area_struct *new_vma, *prev;
2901 struct rb_node **rb_link, *rb_parent;
2902 bool faulted_in_anon_vma = true;
2903
2904
2905
2906
2907
2908 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2909 pgoff = addr >> PAGE_SHIFT;
2910 faulted_in_anon_vma = false;
2911 }
2912
2913 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2914 return NULL;
2915 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2916 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2917 if (new_vma) {
2918
2919
2920
2921 if (unlikely(vma_start >= new_vma->vm_start &&
2922 vma_start < new_vma->vm_end)) {
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
2936 *vmap = vma = new_vma;
2937 }
2938 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2939 } else {
2940 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2941 if (new_vma) {
2942 *new_vma = *vma;
2943 new_vma->vm_start = addr;
2944 new_vma->vm_end = addr + len;
2945 new_vma->vm_pgoff = pgoff;
2946 if (vma_dup_policy(vma, new_vma))
2947 goto out_free_vma;
2948 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2949 if (anon_vma_clone(new_vma, vma))
2950 goto out_free_mempol;
2951 if (new_vma->vm_file)
2952 get_file(new_vma->vm_file);
2953 if (new_vma->vm_ops && new_vma->vm_ops->open)
2954 new_vma->vm_ops->open(new_vma);
2955 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2956 *need_rmap_locks = false;
2957 }
2958 }
2959 return new_vma;
2960
2961 out_free_mempol:
2962 mpol_put(vma_policy(new_vma));
2963 out_free_vma:
2964 kmem_cache_free(vm_area_cachep, new_vma);
2965 return NULL;
2966}
2967
2968
2969
2970
2971
2972int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2973{
2974 unsigned long cur = mm->total_vm;
2975 unsigned long lim;
2976
2977 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2978
2979 if (cur + npages > lim)
2980 return 0;
2981 return 1;
2982}
2983
2984static int special_mapping_fault(struct vm_area_struct *vma,
2985 struct vm_fault *vmf);
2986
2987
2988
2989
2990static void special_mapping_close(struct vm_area_struct *vma)
2991{
2992}
2993
2994static const char *special_mapping_name(struct vm_area_struct *vma)
2995{
2996 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
2997}
2998
2999static const struct vm_operations_struct special_mapping_vmops = {
3000 .close = special_mapping_close,
3001 .fault = special_mapping_fault,
3002 .name = special_mapping_name,
3003};
3004
3005static const struct vm_operations_struct legacy_special_mapping_vmops = {
3006 .close = special_mapping_close,
3007 .fault = special_mapping_fault,
3008};
3009
3010static int special_mapping_fault(struct vm_area_struct *vma,
3011 struct vm_fault *vmf)
3012{
3013 pgoff_t pgoff;
3014 struct page **pages;
3015
3016
3017
3018
3019
3020
3021
3022 pgoff = vmf->pgoff - vma->vm_pgoff;
3023
3024 if (vma->vm_ops == &legacy_special_mapping_vmops)
3025 pages = vma->vm_private_data;
3026 else
3027 pages = ((struct vm_special_mapping *)vma->vm_private_data)->
3028 pages;
3029
3030 for (; pgoff && *pages; ++pages)
3031 pgoff--;
3032
3033 if (*pages) {
3034 struct page *page = *pages;
3035 get_page(page);
3036 vmf->page = page;
3037 return 0;
3038 }
3039
3040 return VM_FAULT_SIGBUS;
3041}
3042
3043static struct vm_area_struct *__install_special_mapping(
3044 struct mm_struct *mm,
3045 unsigned long addr, unsigned long len,
3046 unsigned long vm_flags, const struct vm_operations_struct *ops,
3047 void *priv)
3048{
3049 int ret;
3050 struct vm_area_struct *vma;
3051
3052 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
3053 if (unlikely(vma == NULL))
3054 return ERR_PTR(-ENOMEM);
3055
3056 INIT_LIST_HEAD(&vma->anon_vma_chain);
3057 vma->vm_mm = mm;
3058 vma->vm_start = addr;
3059 vma->vm_end = addr + len;
3060
3061 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3062 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3063
3064 vma->vm_ops = ops;
3065 vma->vm_private_data = priv;
3066
3067 ret = insert_vm_struct(mm, vma);
3068 if (ret)
3069 goto out;
3070
3071 mm->total_vm += len >> PAGE_SHIFT;
3072
3073 perf_event_mmap(vma);
3074
3075 return vma;
3076
3077out:
3078 kmem_cache_free(vm_area_cachep, vma);
3079 return ERR_PTR(ret);
3080}
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091struct vm_area_struct *_install_special_mapping(
3092 struct mm_struct *mm,
3093 unsigned long addr, unsigned long len,
3094 unsigned long vm_flags, const struct vm_special_mapping *spec)
3095{
3096 return __install_special_mapping(mm, addr, len, vm_flags,
3097 &special_mapping_vmops, (void *)spec);
3098}
3099
3100int install_special_mapping(struct mm_struct *mm,
3101 unsigned long addr, unsigned long len,
3102 unsigned long vm_flags, struct page **pages)
3103{
3104 struct vm_area_struct *vma = __install_special_mapping(
3105 mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
3106 (void *)pages);
3107
3108 return PTR_ERR_OR_ZERO(vma);
3109}
3110
3111static DEFINE_MUTEX(mm_all_locks_mutex);
3112
3113static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3114{
3115 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3116
3117
3118
3119
3120 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130 if (__test_and_set_bit(0, (unsigned long *)
3131 &anon_vma->root->rb_root.rb_node))
3132 BUG();
3133 }
3134}
3135
3136static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3137{
3138 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3149 BUG();
3150 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3151 }
3152}
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185int mm_take_all_locks(struct mm_struct *mm)
3186{
3187 struct vm_area_struct *vma;
3188 struct anon_vma_chain *avc;
3189
3190 BUG_ON(down_read_trylock(&mm->mmap_sem));
3191
3192 mutex_lock(&mm_all_locks_mutex);
3193
3194 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3195 if (signal_pending(current))
3196 goto out_unlock;
3197 if (vma->vm_file && vma->vm_file->f_mapping)
3198 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3199 }
3200
3201 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3202 if (signal_pending(current))
3203 goto out_unlock;
3204 if (vma->anon_vma)
3205 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3206 vm_lock_anon_vma(mm, avc->anon_vma);
3207 }
3208
3209 return 0;
3210
3211out_unlock:
3212 mm_drop_all_locks(mm);
3213 return -EINTR;
3214}
3215
3216static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3217{
3218 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231 if (!__test_and_clear_bit(0, (unsigned long *)
3232 &anon_vma->root->rb_root.rb_node))
3233 BUG();
3234 anon_vma_unlock_write(anon_vma);
3235 }
3236}
3237
3238static void vm_unlock_mapping(struct address_space *mapping)
3239{
3240 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3241
3242
3243
3244
3245 i_mmap_unlock_write(mapping);
3246 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3247 &mapping->flags))
3248 BUG();
3249 }
3250}
3251
3252
3253
3254
3255
3256void mm_drop_all_locks(struct mm_struct *mm)
3257{
3258 struct vm_area_struct *vma;
3259 struct anon_vma_chain *avc;
3260
3261 BUG_ON(down_read_trylock(&mm->mmap_sem));
3262 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3263
3264 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3265 if (vma->anon_vma)
3266 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3267 vm_unlock_anon_vma(avc->anon_vma);
3268 if (vma->vm_file && vma->vm_file->f_mapping)
3269 vm_unlock_mapping(vma->vm_file->f_mapping);
3270 }
3271
3272 mutex_unlock(&mm_all_locks_mutex);
3273}
3274
3275
3276
3277
3278void __init mmap_init(void)
3279{
3280 int ret;
3281
3282 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3283 VM_BUG_ON(ret);
3284}
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296static int init_user_reserve(void)
3297{
3298 unsigned long free_kbytes;
3299
3300 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3301
3302 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3303 return 0;
3304}
3305subsys_initcall(init_user_reserve);
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317static int init_admin_reserve(void)
3318{
3319 unsigned long free_kbytes;
3320
3321 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3322
3323 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3324 return 0;
3325}
3326subsys_initcall(init_admin_reserve);
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346static int reserve_mem_notifier(struct notifier_block *nb,
3347 unsigned long action, void *data)
3348{
3349 unsigned long tmp, free_kbytes;
3350
3351 switch (action) {
3352 case MEM_ONLINE:
3353
3354 tmp = sysctl_user_reserve_kbytes;
3355 if (0 < tmp && tmp < (1UL << 17))
3356 init_user_reserve();
3357
3358
3359 tmp = sysctl_admin_reserve_kbytes;
3360 if (0 < tmp && tmp < (1UL << 13))
3361 init_admin_reserve();
3362
3363 break;
3364 case MEM_OFFLINE:
3365 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3366
3367 if (sysctl_user_reserve_kbytes > free_kbytes) {
3368 init_user_reserve();
3369 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3370 sysctl_user_reserve_kbytes);
3371 }
3372
3373 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3374 init_admin_reserve();
3375 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3376 sysctl_admin_reserve_kbytes);
3377 }
3378 break;
3379 default:
3380 break;
3381 }
3382 return NOTIFY_OK;
3383}
3384
3385static struct notifier_block reserve_mem_nb = {
3386 .notifier_call = reserve_mem_notifier,
3387};
3388
3389static int __meminit init_reserve_notifier(void)
3390{
3391 if (register_hotmemory_notifier(&reserve_mem_nb))
3392 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3393
3394 return 0;
3395}
3396subsys_initcall(init_reserve_notifier);
3397