1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/backing-dev.h>
14#include <linux/mm.h>
15#include <linux/vmacache.h>
16#include <linux/shm.h>
17#include <linux/mman.h>
18#include <linux/pagemap.h>
19#include <linux/swap.h>
20#include <linux/syscalls.h>
21#include <linux/capability.h>
22#include <linux/init.h>
23#include <linux/file.h>
24#include <linux/fs.h>
25#include <linux/personality.h>
26#include <linux/security.h>
27#include <linux/hugetlb.h>
28#include <linux/shmem_fs.h>
29#include <linux/profile.h>
30#include <linux/export.h>
31#include <linux/mount.h>
32#include <linux/mempolicy.h>
33#include <linux/rmap.h>
34#include <linux/mmu_notifier.h>
35#include <linux/mmdebug.h>
36#include <linux/perf_event.h>
37#include <linux/audit.h>
38#include <linux/khugepaged.h>
39#include <linux/uprobes.h>
40#include <linux/rbtree_augmented.h>
41#include <linux/notifier.h>
42#include <linux/memory.h>
43#include <linux/printk.h>
44#include <linux/userfaultfd_k.h>
45#include <linux/moduleparam.h>
46#include <linux/pkeys.h>
47#include <linux/oom.h>
48
49#include <linux/uaccess.h>
50#include <asm/cacheflush.h>
51#include <asm/tlb.h>
52#include <asm/mmu_context.h>
53
54#include "internal.h"
55
56#ifndef arch_mmap_check
57#define arch_mmap_check(addr, len, flags) (0)
58#endif
59
60#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
61const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
62const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
63int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
64#endif
65#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
66const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
67const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
68int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
69#endif
70
71static bool ignore_rlimit_data;
72core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
73
74static void unmap_region(struct mm_struct *mm,
75 struct vm_area_struct *vma, struct vm_area_struct *prev,
76 unsigned long start, unsigned long end);
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98pgprot_t protection_map[16] __ro_after_init = {
99 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
100 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
101};
102
103#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
104static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
105{
106 return prot;
107}
108#endif
109
110pgprot_t vm_get_page_prot(unsigned long vm_flags)
111{
112 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
113 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
114 pgprot_val(arch_vm_get_page_prot(vm_flags)));
115
116 return arch_filter_pgprot(ret);
117}
118EXPORT_SYMBOL(vm_get_page_prot);
119
120static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
121{
122 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
123}
124
125
126void vma_set_page_prot(struct vm_area_struct *vma)
127{
128 unsigned long vm_flags = vma->vm_flags;
129 pgprot_t vm_page_prot;
130
131 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
132 if (vma_wants_writenotify(vma, vm_page_prot)) {
133 vm_flags &= ~VM_SHARED;
134 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
135 }
136
137 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
138}
139
140
141
142
143static void __remove_shared_vm_struct(struct vm_area_struct *vma,
144 struct file *file, struct address_space *mapping)
145{
146 if (vma->vm_flags & VM_DENYWRITE)
147 atomic_inc(&file_inode(file)->i_writecount);
148 if (vma->vm_flags & VM_SHARED)
149 mapping_unmap_writable(mapping);
150
151 flush_dcache_mmap_lock(mapping);
152 vma_interval_tree_remove(vma, &mapping->i_mmap);
153 flush_dcache_mmap_unlock(mapping);
154}
155
156
157
158
159
160void unlink_file_vma(struct vm_area_struct *vma)
161{
162 struct file *file = vma->vm_file;
163
164 if (file) {
165 struct address_space *mapping = file->f_mapping;
166 i_mmap_lock_write(mapping);
167 __remove_shared_vm_struct(vma, file, mapping);
168 i_mmap_unlock_write(mapping);
169 }
170}
171
172
173
174
175static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
176{
177 struct vm_area_struct *next = vma->vm_next;
178
179 might_sleep();
180 if (vma->vm_ops && vma->vm_ops->close)
181 vma->vm_ops->close(vma);
182 if (vma->vm_file)
183 fput(vma->vm_file);
184 mpol_put(vma_policy(vma));
185 vm_area_free(vma);
186 return next;
187}
188
189static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
190 struct list_head *uf);
191SYSCALL_DEFINE1(brk, unsigned long, brk)
192{
193 unsigned long retval;
194 unsigned long newbrk, oldbrk;
195 struct mm_struct *mm = current->mm;
196 struct vm_area_struct *next;
197 unsigned long min_brk;
198 bool populate;
199 LIST_HEAD(uf);
200
201 if (down_write_killable(&mm->mmap_sem))
202 return -EINTR;
203
204#ifdef CONFIG_COMPAT_BRK
205
206
207
208
209
210 if (current->brk_randomized)
211 min_brk = mm->start_brk;
212 else
213 min_brk = mm->end_data;
214#else
215 min_brk = mm->start_brk;
216#endif
217 if (brk < min_brk)
218 goto out;
219
220
221
222
223
224
225
226 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
227 mm->end_data, mm->start_data))
228 goto out;
229
230 newbrk = PAGE_ALIGN(brk);
231 oldbrk = PAGE_ALIGN(mm->brk);
232 if (oldbrk == newbrk)
233 goto set_brk;
234
235
236 if (brk <= mm->brk) {
237 if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
238 goto set_brk;
239 goto out;
240 }
241
242
243 next = find_vma(mm, oldbrk);
244 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
245 goto out;
246
247
248 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
249 goto out;
250
251set_brk:
252 mm->brk = brk;
253 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
254 up_write(&mm->mmap_sem);
255 userfaultfd_unmap_complete(mm, &uf);
256 if (populate)
257 mm_populate(oldbrk, newbrk - oldbrk);
258 return brk;
259
260out:
261 retval = mm->brk;
262 up_write(&mm->mmap_sem);
263 return retval;
264}
265
266static long vma_compute_subtree_gap(struct vm_area_struct *vma)
267{
268 unsigned long max, prev_end, subtree_gap;
269
270
271
272
273
274
275
276 max = vm_start_gap(vma);
277 if (vma->vm_prev) {
278 prev_end = vm_end_gap(vma->vm_prev);
279 if (max > prev_end)
280 max -= prev_end;
281 else
282 max = 0;
283 }
284 if (vma->vm_rb.rb_left) {
285 subtree_gap = rb_entry(vma->vm_rb.rb_left,
286 struct vm_area_struct, vm_rb)->rb_subtree_gap;
287 if (subtree_gap > max)
288 max = subtree_gap;
289 }
290 if (vma->vm_rb.rb_right) {
291 subtree_gap = rb_entry(vma->vm_rb.rb_right,
292 struct vm_area_struct, vm_rb)->rb_subtree_gap;
293 if (subtree_gap > max)
294 max = subtree_gap;
295 }
296 return max;
297}
298
299#ifdef CONFIG_DEBUG_VM_RB
300static int browse_rb(struct mm_struct *mm)
301{
302 struct rb_root *root = &mm->mm_rb;
303 int i = 0, j, bug = 0;
304 struct rb_node *nd, *pn = NULL;
305 unsigned long prev = 0, pend = 0;
306
307 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
308 struct vm_area_struct *vma;
309 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
310 if (vma->vm_start < prev) {
311 pr_emerg("vm_start %lx < prev %lx\n",
312 vma->vm_start, prev);
313 bug = 1;
314 }
315 if (vma->vm_start < pend) {
316 pr_emerg("vm_start %lx < pend %lx\n",
317 vma->vm_start, pend);
318 bug = 1;
319 }
320 if (vma->vm_start > vma->vm_end) {
321 pr_emerg("vm_start %lx > vm_end %lx\n",
322 vma->vm_start, vma->vm_end);
323 bug = 1;
324 }
325 spin_lock(&mm->page_table_lock);
326 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
327 pr_emerg("free gap %lx, correct %lx\n",
328 vma->rb_subtree_gap,
329 vma_compute_subtree_gap(vma));
330 bug = 1;
331 }
332 spin_unlock(&mm->page_table_lock);
333 i++;
334 pn = nd;
335 prev = vma->vm_start;
336 pend = vma->vm_end;
337 }
338 j = 0;
339 for (nd = pn; nd; nd = rb_prev(nd))
340 j++;
341 if (i != j) {
342 pr_emerg("backwards %d, forwards %d\n", j, i);
343 bug = 1;
344 }
345 return bug ? -1 : i;
346}
347
348static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
349{
350 struct rb_node *nd;
351
352 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
353 struct vm_area_struct *vma;
354 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
355 VM_BUG_ON_VMA(vma != ignore &&
356 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
357 vma);
358 }
359}
360
361static void validate_mm(struct mm_struct *mm)
362{
363 int bug = 0;
364 int i = 0;
365 unsigned long highest_address = 0;
366 struct vm_area_struct *vma = mm->mmap;
367
368 while (vma) {
369 struct anon_vma *anon_vma = vma->anon_vma;
370 struct anon_vma_chain *avc;
371
372 if (anon_vma) {
373 anon_vma_lock_read(anon_vma);
374 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
375 anon_vma_interval_tree_verify(avc);
376 anon_vma_unlock_read(anon_vma);
377 }
378
379 highest_address = vm_end_gap(vma);
380 vma = vma->vm_next;
381 i++;
382 }
383 if (i != mm->map_count) {
384 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
385 bug = 1;
386 }
387 if (highest_address != mm->highest_vm_end) {
388 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
389 mm->highest_vm_end, highest_address);
390 bug = 1;
391 }
392 i = browse_rb(mm);
393 if (i != mm->map_count) {
394 if (i != -1)
395 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
396 bug = 1;
397 }
398 VM_BUG_ON_MM(bug, mm);
399}
400#else
401#define validate_mm_rb(root, ignore) do { } while (0)
402#define validate_mm(mm) do { } while (0)
403#endif
404
405RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
406 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
407
408
409
410
411
412
413static void vma_gap_update(struct vm_area_struct *vma)
414{
415
416
417
418
419 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
420}
421
422static inline void vma_rb_insert(struct vm_area_struct *vma,
423 struct rb_root *root)
424{
425
426 validate_mm_rb(root, NULL);
427
428 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
429}
430
431static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
432{
433
434
435
436
437
438 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
439}
440
441static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
442 struct rb_root *root,
443 struct vm_area_struct *ignore)
444{
445
446
447
448
449
450 validate_mm_rb(root, ignore);
451
452 __vma_rb_erase(vma, root);
453}
454
455static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
456 struct rb_root *root)
457{
458
459
460
461
462 validate_mm_rb(root, vma);
463
464 __vma_rb_erase(vma, root);
465}
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static inline void
482anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
483{
484 struct anon_vma_chain *avc;
485
486 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
487 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
488}
489
490static inline void
491anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
492{
493 struct anon_vma_chain *avc;
494
495 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
496 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
497}
498
499static int find_vma_links(struct mm_struct *mm, unsigned long addr,
500 unsigned long end, struct vm_area_struct **pprev,
501 struct rb_node ***rb_link, struct rb_node **rb_parent)
502{
503 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
504
505 __rb_link = &mm->mm_rb.rb_node;
506 rb_prev = __rb_parent = NULL;
507
508 while (*__rb_link) {
509 struct vm_area_struct *vma_tmp;
510
511 __rb_parent = *__rb_link;
512 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
513
514 if (vma_tmp->vm_end > addr) {
515
516 if (vma_tmp->vm_start < end)
517 return -ENOMEM;
518 __rb_link = &__rb_parent->rb_left;
519 } else {
520 rb_prev = __rb_parent;
521 __rb_link = &__rb_parent->rb_right;
522 }
523 }
524
525 *pprev = NULL;
526 if (rb_prev)
527 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
528 *rb_link = __rb_link;
529 *rb_parent = __rb_parent;
530 return 0;
531}
532
533static unsigned long count_vma_pages_range(struct mm_struct *mm,
534 unsigned long addr, unsigned long end)
535{
536 unsigned long nr_pages = 0;
537 struct vm_area_struct *vma;
538
539
540 vma = find_vma_intersection(mm, addr, end);
541 if (!vma)
542 return 0;
543
544 nr_pages = (min(end, vma->vm_end) -
545 max(addr, vma->vm_start)) >> PAGE_SHIFT;
546
547
548 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
549 unsigned long overlap_len;
550
551 if (vma->vm_start > end)
552 break;
553
554 overlap_len = min(end, vma->vm_end) - vma->vm_start;
555 nr_pages += overlap_len >> PAGE_SHIFT;
556 }
557
558 return nr_pages;
559}
560
561void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
562 struct rb_node **rb_link, struct rb_node *rb_parent)
563{
564
565 if (vma->vm_next)
566 vma_gap_update(vma->vm_next);
567 else
568 mm->highest_vm_end = vm_end_gap(vma);
569
570
571
572
573
574
575
576
577
578
579 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
580 vma->rb_subtree_gap = 0;
581 vma_gap_update(vma);
582 vma_rb_insert(vma, &mm->mm_rb);
583}
584
585static void __vma_link_file(struct vm_area_struct *vma)
586{
587 struct file *file;
588
589 file = vma->vm_file;
590 if (file) {
591 struct address_space *mapping = file->f_mapping;
592
593 if (vma->vm_flags & VM_DENYWRITE)
594 atomic_dec(&file_inode(file)->i_writecount);
595 if (vma->vm_flags & VM_SHARED)
596 atomic_inc(&mapping->i_mmap_writable);
597
598 flush_dcache_mmap_lock(mapping);
599 vma_interval_tree_insert(vma, &mapping->i_mmap);
600 flush_dcache_mmap_unlock(mapping);
601 }
602}
603
604static void
605__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
606 struct vm_area_struct *prev, struct rb_node **rb_link,
607 struct rb_node *rb_parent)
608{
609 __vma_link_list(mm, vma, prev, rb_parent);
610 __vma_link_rb(mm, vma, rb_link, rb_parent);
611}
612
613static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
614 struct vm_area_struct *prev, struct rb_node **rb_link,
615 struct rb_node *rb_parent)
616{
617 struct address_space *mapping = NULL;
618
619 if (vma->vm_file) {
620 mapping = vma->vm_file->f_mapping;
621 i_mmap_lock_write(mapping);
622 }
623
624 __vma_link(mm, vma, prev, rb_link, rb_parent);
625 __vma_link_file(vma);
626
627 if (mapping)
628 i_mmap_unlock_write(mapping);
629
630 mm->map_count++;
631 validate_mm(mm);
632}
633
634
635
636
637
638static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
639{
640 struct vm_area_struct *prev;
641 struct rb_node **rb_link, *rb_parent;
642
643 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
644 &prev, &rb_link, &rb_parent))
645 BUG();
646 __vma_link(mm, vma, prev, rb_link, rb_parent);
647 mm->map_count++;
648}
649
650static __always_inline void __vma_unlink_common(struct mm_struct *mm,
651 struct vm_area_struct *vma,
652 struct vm_area_struct *prev,
653 bool has_prev,
654 struct vm_area_struct *ignore)
655{
656 struct vm_area_struct *next;
657
658 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
659 next = vma->vm_next;
660 if (has_prev)
661 prev->vm_next = next;
662 else {
663 prev = vma->vm_prev;
664 if (prev)
665 prev->vm_next = next;
666 else
667 mm->mmap = next;
668 }
669 if (next)
670 next->vm_prev = prev;
671
672
673 vmacache_invalidate(mm);
674}
675
676static inline void __vma_unlink_prev(struct mm_struct *mm,
677 struct vm_area_struct *vma,
678 struct vm_area_struct *prev)
679{
680 __vma_unlink_common(mm, vma, prev, true, vma);
681}
682
683
684
685
686
687
688
689
690int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
691 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
692 struct vm_area_struct *expand)
693{
694 struct mm_struct *mm = vma->vm_mm;
695 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
696 struct address_space *mapping = NULL;
697 struct rb_root_cached *root = NULL;
698 struct anon_vma *anon_vma = NULL;
699 struct file *file = vma->vm_file;
700 bool start_changed = false, end_changed = false;
701 long adjust_next = 0;
702 int remove_next = 0;
703
704 if (next && !insert) {
705 struct vm_area_struct *exporter = NULL, *importer = NULL;
706
707 if (end >= next->vm_end) {
708
709
710
711
712
713
714 if (next == expand) {
715
716
717
718
719 VM_WARN_ON(end != next->vm_end);
720
721
722
723
724
725 remove_next = 3;
726 VM_WARN_ON(file != next->vm_file);
727 swap(vma, next);
728 } else {
729 VM_WARN_ON(expand != vma);
730
731
732
733
734 remove_next = 1 + (end > next->vm_end);
735 VM_WARN_ON(remove_next == 2 &&
736 end != next->vm_next->vm_end);
737 VM_WARN_ON(remove_next == 1 &&
738 end != next->vm_end);
739
740 end = next->vm_end;
741 }
742
743 exporter = next;
744 importer = vma;
745
746
747
748
749
750 if (remove_next == 2 && !next->anon_vma)
751 exporter = next->vm_next;
752
753 } else if (end > next->vm_start) {
754
755
756
757
758 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
759 exporter = next;
760 importer = vma;
761 VM_WARN_ON(expand != importer);
762 } else if (end < vma->vm_end) {
763
764
765
766
767
768 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
769 exporter = vma;
770 importer = next;
771 VM_WARN_ON(expand != importer);
772 }
773
774
775
776
777
778
779 if (exporter && exporter->anon_vma && !importer->anon_vma) {
780 int error;
781
782 importer->anon_vma = exporter->anon_vma;
783 error = anon_vma_clone(importer, exporter);
784 if (error)
785 return error;
786 }
787 }
788again:
789 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
790
791 if (file) {
792 mapping = file->f_mapping;
793 root = &mapping->i_mmap;
794 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
795
796 if (adjust_next)
797 uprobe_munmap(next, next->vm_start, next->vm_end);
798
799 i_mmap_lock_write(mapping);
800 if (insert) {
801
802
803
804
805
806
807 __vma_link_file(insert);
808 }
809 }
810
811 anon_vma = vma->anon_vma;
812 if (!anon_vma && adjust_next)
813 anon_vma = next->anon_vma;
814 if (anon_vma) {
815 VM_WARN_ON(adjust_next && next->anon_vma &&
816 anon_vma != next->anon_vma);
817 anon_vma_lock_write(anon_vma);
818 anon_vma_interval_tree_pre_update_vma(vma);
819 if (adjust_next)
820 anon_vma_interval_tree_pre_update_vma(next);
821 }
822
823 if (root) {
824 flush_dcache_mmap_lock(mapping);
825 vma_interval_tree_remove(vma, root);
826 if (adjust_next)
827 vma_interval_tree_remove(next, root);
828 }
829
830 if (start != vma->vm_start) {
831 vma->vm_start = start;
832 start_changed = true;
833 }
834 if (end != vma->vm_end) {
835 vma->vm_end = end;
836 end_changed = true;
837 }
838 vma->vm_pgoff = pgoff;
839 if (adjust_next) {
840 next->vm_start += adjust_next << PAGE_SHIFT;
841 next->vm_pgoff += adjust_next;
842 }
843
844 if (root) {
845 if (adjust_next)
846 vma_interval_tree_insert(next, root);
847 vma_interval_tree_insert(vma, root);
848 flush_dcache_mmap_unlock(mapping);
849 }
850
851 if (remove_next) {
852
853
854
855
856 if (remove_next != 3)
857 __vma_unlink_prev(mm, next, vma);
858 else
859
860
861
862
863
864
865
866
867
868 __vma_unlink_common(mm, next, NULL, false, vma);
869 if (file)
870 __remove_shared_vm_struct(next, file, mapping);
871 } else if (insert) {
872
873
874
875
876
877 __insert_vm_struct(mm, insert);
878 } else {
879 if (start_changed)
880 vma_gap_update(vma);
881 if (end_changed) {
882 if (!next)
883 mm->highest_vm_end = vm_end_gap(vma);
884 else if (!adjust_next)
885 vma_gap_update(next);
886 }
887 }
888
889 if (anon_vma) {
890 anon_vma_interval_tree_post_update_vma(vma);
891 if (adjust_next)
892 anon_vma_interval_tree_post_update_vma(next);
893 anon_vma_unlock_write(anon_vma);
894 }
895 if (mapping)
896 i_mmap_unlock_write(mapping);
897
898 if (root) {
899 uprobe_mmap(vma);
900
901 if (adjust_next)
902 uprobe_mmap(next);
903 }
904
905 if (remove_next) {
906 if (file) {
907 uprobe_munmap(next, next->vm_start, next->vm_end);
908 fput(file);
909 }
910 if (next->anon_vma)
911 anon_vma_merge(vma, next);
912 mm->map_count--;
913 mpol_put(vma_policy(next));
914 vm_area_free(next);
915
916
917
918
919
920 if (remove_next != 3) {
921
922
923
924
925
926
927 next = vma->vm_next;
928 } else {
929
930
931
932
933
934
935
936
937
938
939 next = vma;
940 }
941 if (remove_next == 2) {
942 remove_next = 1;
943 end = next->vm_end;
944 goto again;
945 }
946 else if (next)
947 vma_gap_update(next);
948 else {
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
969 }
970 }
971 if (insert && file)
972 uprobe_mmap(insert);
973
974 validate_mm(mm);
975
976 return 0;
977}
978
979
980
981
982
983static inline int is_mergeable_vma(struct vm_area_struct *vma,
984 struct file *file, unsigned long vm_flags,
985 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
986{
987
988
989
990
991
992
993
994
995 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
996 return 0;
997 if (vma->vm_file != file)
998 return 0;
999 if (vma->vm_ops && vma->vm_ops->close)
1000 return 0;
1001 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1002 return 0;
1003 return 1;
1004}
1005
1006static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1007 struct anon_vma *anon_vma2,
1008 struct vm_area_struct *vma)
1009{
1010
1011
1012
1013
1014 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1015 list_is_singular(&vma->anon_vma_chain)))
1016 return 1;
1017 return anon_vma1 == anon_vma2;
1018}
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031static int
1032can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1033 struct anon_vma *anon_vma, struct file *file,
1034 pgoff_t vm_pgoff,
1035 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1036{
1037 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1038 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1039 if (vma->vm_pgoff == vm_pgoff)
1040 return 1;
1041 }
1042 return 0;
1043}
1044
1045
1046
1047
1048
1049
1050
1051
1052static int
1053can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1054 struct anon_vma *anon_vma, struct file *file,
1055 pgoff_t vm_pgoff,
1056 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1057{
1058 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1059 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1060 pgoff_t vm_pglen;
1061 vm_pglen = vma_pages(vma);
1062 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1063 return 1;
1064 }
1065 return 0;
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108struct vm_area_struct *vma_merge(struct mm_struct *mm,
1109 struct vm_area_struct *prev, unsigned long addr,
1110 unsigned long end, unsigned long vm_flags,
1111 struct anon_vma *anon_vma, struct file *file,
1112 pgoff_t pgoff, struct mempolicy *policy,
1113 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1114{
1115 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1116 struct vm_area_struct *area, *next;
1117 int err;
1118
1119
1120
1121
1122
1123 if (vm_flags & VM_SPECIAL)
1124 return NULL;
1125
1126 if (prev)
1127 next = prev->vm_next;
1128 else
1129 next = mm->mmap;
1130 area = next;
1131 if (area && area->vm_end == end)
1132 next = next->vm_next;
1133
1134
1135 VM_WARN_ON(prev && addr <= prev->vm_start);
1136 VM_WARN_ON(area && end > area->vm_end);
1137 VM_WARN_ON(addr >= end);
1138
1139
1140
1141
1142 if (prev && prev->vm_end == addr &&
1143 mpol_equal(vma_policy(prev), policy) &&
1144 can_vma_merge_after(prev, vm_flags,
1145 anon_vma, file, pgoff,
1146 vm_userfaultfd_ctx)) {
1147
1148
1149
1150 if (next && end == next->vm_start &&
1151 mpol_equal(policy, vma_policy(next)) &&
1152 can_vma_merge_before(next, vm_flags,
1153 anon_vma, file,
1154 pgoff+pglen,
1155 vm_userfaultfd_ctx) &&
1156 is_mergeable_anon_vma(prev->anon_vma,
1157 next->anon_vma, NULL)) {
1158
1159 err = __vma_adjust(prev, prev->vm_start,
1160 next->vm_end, prev->vm_pgoff, NULL,
1161 prev);
1162 } else
1163 err = __vma_adjust(prev, prev->vm_start,
1164 end, prev->vm_pgoff, NULL, prev);
1165 if (err)
1166 return NULL;
1167 khugepaged_enter_vma_merge(prev, vm_flags);
1168 return prev;
1169 }
1170
1171
1172
1173
1174 if (next && end == next->vm_start &&
1175 mpol_equal(policy, vma_policy(next)) &&
1176 can_vma_merge_before(next, vm_flags,
1177 anon_vma, file, pgoff+pglen,
1178 vm_userfaultfd_ctx)) {
1179 if (prev && addr < prev->vm_end)
1180 err = __vma_adjust(prev, prev->vm_start,
1181 addr, prev->vm_pgoff, NULL, next);
1182 else {
1183 err = __vma_adjust(area, addr, next->vm_end,
1184 next->vm_pgoff - pglen, NULL, next);
1185
1186
1187
1188
1189
1190 area = next;
1191 }
1192 if (err)
1193 return NULL;
1194 khugepaged_enter_vma_merge(area, vm_flags);
1195 return area;
1196 }
1197
1198 return NULL;
1199}
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1215{
1216 return a->vm_end == b->vm_start &&
1217 mpol_equal(vma_policy(a), vma_policy(b)) &&
1218 a->vm_file == b->vm_file &&
1219 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1220 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1221}
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1246{
1247 if (anon_vma_compatible(a, b)) {
1248 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1249
1250 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1251 return anon_vma;
1252 }
1253 return NULL;
1254}
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1265{
1266 struct anon_vma *anon_vma;
1267 struct vm_area_struct *near;
1268
1269 near = vma->vm_next;
1270 if (!near)
1271 goto try_prev;
1272
1273 anon_vma = reusable_anon_vma(near, vma, near);
1274 if (anon_vma)
1275 return anon_vma;
1276try_prev:
1277 near = vma->vm_prev;
1278 if (!near)
1279 goto none;
1280
1281 anon_vma = reusable_anon_vma(near, near, vma);
1282 if (anon_vma)
1283 return anon_vma;
1284none:
1285
1286
1287
1288
1289
1290
1291
1292
1293 return NULL;
1294}
1295
1296
1297
1298
1299
1300static inline unsigned long round_hint_to_min(unsigned long hint)
1301{
1302 hint &= PAGE_MASK;
1303 if (((void *)hint != NULL) &&
1304 (hint < mmap_min_addr))
1305 return PAGE_ALIGN(mmap_min_addr);
1306 return hint;
1307}
1308
1309static inline int mlock_future_check(struct mm_struct *mm,
1310 unsigned long flags,
1311 unsigned long len)
1312{
1313 unsigned long locked, lock_limit;
1314
1315
1316 if (flags & VM_LOCKED) {
1317 locked = len >> PAGE_SHIFT;
1318 locked += mm->locked_vm;
1319 lock_limit = rlimit(RLIMIT_MEMLOCK);
1320 lock_limit >>= PAGE_SHIFT;
1321 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1322 return -EAGAIN;
1323 }
1324 return 0;
1325}
1326
1327static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1328{
1329 if (S_ISREG(inode->i_mode))
1330 return MAX_LFS_FILESIZE;
1331
1332 if (S_ISBLK(inode->i_mode))
1333 return MAX_LFS_FILESIZE;
1334
1335
1336 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1337 return 0;
1338
1339
1340 return ULONG_MAX;
1341}
1342
1343static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1344 unsigned long pgoff, unsigned long len)
1345{
1346 u64 maxsize = file_mmap_size_max(file, inode);
1347
1348 if (maxsize && len > maxsize)
1349 return false;
1350 maxsize -= len;
1351 if (pgoff > maxsize >> PAGE_SHIFT)
1352 return false;
1353 return true;
1354}
1355
1356
1357
1358
1359unsigned long do_mmap(struct file *file, unsigned long addr,
1360 unsigned long len, unsigned long prot,
1361 unsigned long flags, vm_flags_t vm_flags,
1362 unsigned long pgoff, unsigned long *populate,
1363 struct list_head *uf)
1364{
1365 struct mm_struct *mm = current->mm;
1366 int pkey = 0;
1367
1368 *populate = 0;
1369
1370 if (!len)
1371 return -EINVAL;
1372
1373
1374
1375
1376
1377
1378
1379 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1380 if (!(file && path_noexec(&file->f_path)))
1381 prot |= PROT_EXEC;
1382
1383
1384 if (flags & MAP_FIXED_NOREPLACE)
1385 flags |= MAP_FIXED;
1386
1387 if (!(flags & MAP_FIXED))
1388 addr = round_hint_to_min(addr);
1389
1390
1391 len = PAGE_ALIGN(len);
1392 if (!len)
1393 return -ENOMEM;
1394
1395
1396 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1397 return -EOVERFLOW;
1398
1399
1400 if (mm->map_count > sysctl_max_map_count)
1401 return -ENOMEM;
1402
1403
1404
1405
1406 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1407 if (offset_in_page(addr))
1408 return addr;
1409
1410 if (flags & MAP_FIXED_NOREPLACE) {
1411 struct vm_area_struct *vma = find_vma(mm, addr);
1412
1413 if (vma && vma->vm_start < addr + len)
1414 return -EEXIST;
1415 }
1416
1417 if (prot == PROT_EXEC) {
1418 pkey = execute_only_pkey(mm);
1419 if (pkey < 0)
1420 pkey = 0;
1421 }
1422
1423
1424
1425
1426
1427 vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1428 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1429
1430 if (flags & MAP_LOCKED)
1431 if (!can_do_mlock())
1432 return -EPERM;
1433
1434 if (mlock_future_check(mm, vm_flags, len))
1435 return -EAGAIN;
1436
1437 if (file) {
1438 struct inode *inode = file_inode(file);
1439 unsigned long flags_mask;
1440
1441 if (!file_mmap_ok(file, inode, pgoff, len))
1442 return -EOVERFLOW;
1443
1444 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1445
1446 switch (flags & MAP_TYPE) {
1447 case MAP_SHARED:
1448
1449
1450
1451
1452
1453
1454
1455 flags &= LEGACY_MAP_MASK;
1456
1457 case MAP_SHARED_VALIDATE:
1458 if (flags & ~flags_mask)
1459 return -EOPNOTSUPP;
1460 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1461 return -EACCES;
1462
1463
1464
1465
1466
1467 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1468 return -EACCES;
1469
1470
1471
1472
1473 if (locks_verify_locked(file))
1474 return -EAGAIN;
1475
1476 vm_flags |= VM_SHARED | VM_MAYSHARE;
1477 if (!(file->f_mode & FMODE_WRITE))
1478 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1479
1480
1481 case MAP_PRIVATE:
1482 if (!(file->f_mode & FMODE_READ))
1483 return -EACCES;
1484 if (path_noexec(&file->f_path)) {
1485 if (vm_flags & VM_EXEC)
1486 return -EPERM;
1487 vm_flags &= ~VM_MAYEXEC;
1488 }
1489
1490 if (!file->f_op->mmap)
1491 return -ENODEV;
1492 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1493 return -EINVAL;
1494 break;
1495
1496 default:
1497 return -EINVAL;
1498 }
1499 } else {
1500 switch (flags & MAP_TYPE) {
1501 case MAP_SHARED:
1502 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1503 return -EINVAL;
1504
1505
1506
1507 pgoff = 0;
1508 vm_flags |= VM_SHARED | VM_MAYSHARE;
1509 break;
1510 case MAP_PRIVATE:
1511
1512
1513
1514 pgoff = addr >> PAGE_SHIFT;
1515 break;
1516 default:
1517 return -EINVAL;
1518 }
1519 }
1520
1521
1522
1523
1524
1525 if (flags & MAP_NORESERVE) {
1526
1527 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1528 vm_flags |= VM_NORESERVE;
1529
1530
1531 if (file && is_file_hugepages(file))
1532 vm_flags |= VM_NORESERVE;
1533 }
1534
1535 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1536 if (!IS_ERR_VALUE(addr) &&
1537 ((vm_flags & VM_LOCKED) ||
1538 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1539 *populate = len;
1540 return addr;
1541}
1542
1543unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1544 unsigned long prot, unsigned long flags,
1545 unsigned long fd, unsigned long pgoff)
1546{
1547 struct file *file = NULL;
1548 unsigned long retval;
1549
1550 if (!(flags & MAP_ANONYMOUS)) {
1551 audit_mmap_fd(fd, flags);
1552 file = fget(fd);
1553 if (!file)
1554 return -EBADF;
1555 if (is_file_hugepages(file))
1556 len = ALIGN(len, huge_page_size(hstate_file(file)));
1557 retval = -EINVAL;
1558 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1559 goto out_fput;
1560 } else if (flags & MAP_HUGETLB) {
1561 struct user_struct *user = NULL;
1562 struct hstate *hs;
1563
1564 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1565 if (!hs)
1566 return -EINVAL;
1567
1568 len = ALIGN(len, huge_page_size(hs));
1569
1570
1571
1572
1573
1574
1575 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1576 VM_NORESERVE,
1577 &user, HUGETLB_ANONHUGE_INODE,
1578 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1579 if (IS_ERR(file))
1580 return PTR_ERR(file);
1581 }
1582
1583 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1584
1585 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1586out_fput:
1587 if (file)
1588 fput(file);
1589 return retval;
1590}
1591
1592SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1593 unsigned long, prot, unsigned long, flags,
1594 unsigned long, fd, unsigned long, pgoff)
1595{
1596 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1597}
1598
1599#ifdef __ARCH_WANT_SYS_OLD_MMAP
1600struct mmap_arg_struct {
1601 unsigned long addr;
1602 unsigned long len;
1603 unsigned long prot;
1604 unsigned long flags;
1605 unsigned long fd;
1606 unsigned long offset;
1607};
1608
1609SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1610{
1611 struct mmap_arg_struct a;
1612
1613 if (copy_from_user(&a, arg, sizeof(a)))
1614 return -EFAULT;
1615 if (offset_in_page(a.offset))
1616 return -EINVAL;
1617
1618 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1619 a.offset >> PAGE_SHIFT);
1620}
1621#endif
1622
1623
1624
1625
1626
1627
1628
1629int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1630{
1631 vm_flags_t vm_flags = vma->vm_flags;
1632 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1633
1634
1635 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1636 return 0;
1637
1638
1639 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1640 return 1;
1641
1642
1643
1644 if (pgprot_val(vm_page_prot) !=
1645 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1646 return 0;
1647
1648
1649 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1650 return 1;
1651
1652
1653 if (vm_flags & VM_PFNMAP)
1654 return 0;
1655
1656
1657 return vma->vm_file && vma->vm_file->f_mapping &&
1658 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1659}
1660
1661
1662
1663
1664
1665static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1666{
1667
1668
1669
1670
1671 if (file && is_file_hugepages(file))
1672 return 0;
1673
1674 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1675}
1676
1677unsigned long mmap_region(struct file *file, unsigned long addr,
1678 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1679 struct list_head *uf)
1680{
1681 struct mm_struct *mm = current->mm;
1682 struct vm_area_struct *vma, *prev;
1683 int error;
1684 struct rb_node **rb_link, *rb_parent;
1685 unsigned long charged = 0;
1686
1687
1688 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1689 unsigned long nr_pages;
1690
1691
1692
1693
1694
1695 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1696
1697 if (!may_expand_vm(mm, vm_flags,
1698 (len >> PAGE_SHIFT) - nr_pages))
1699 return -ENOMEM;
1700 }
1701
1702
1703 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1704 &rb_parent)) {
1705 if (do_munmap(mm, addr, len, uf))
1706 return -ENOMEM;
1707 }
1708
1709
1710
1711
1712 if (accountable_mapping(file, vm_flags)) {
1713 charged = len >> PAGE_SHIFT;
1714 if (security_vm_enough_memory_mm(mm, charged))
1715 return -ENOMEM;
1716 vm_flags |= VM_ACCOUNT;
1717 }
1718
1719
1720
1721
1722 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1723 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1724 if (vma)
1725 goto out;
1726
1727
1728
1729
1730
1731
1732 vma = vm_area_alloc(mm);
1733 if (!vma) {
1734 error = -ENOMEM;
1735 goto unacct_error;
1736 }
1737
1738 vma->vm_start = addr;
1739 vma->vm_end = addr + len;
1740 vma->vm_flags = vm_flags;
1741 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1742 vma->vm_pgoff = pgoff;
1743
1744 if (file) {
1745 if (vm_flags & VM_DENYWRITE) {
1746 error = deny_write_access(file);
1747 if (error)
1748 goto free_vma;
1749 }
1750 if (vm_flags & VM_SHARED) {
1751 error = mapping_map_writable(file->f_mapping);
1752 if (error)
1753 goto allow_write_and_free_vma;
1754 }
1755
1756
1757
1758
1759
1760
1761 vma->vm_file = get_file(file);
1762 error = call_mmap(file, vma);
1763 if (error)
1764 goto unmap_and_free_vma;
1765
1766
1767
1768
1769
1770
1771
1772
1773 WARN_ON_ONCE(addr != vma->vm_start);
1774
1775 addr = vma->vm_start;
1776 vm_flags = vma->vm_flags;
1777 } else if (vm_flags & VM_SHARED) {
1778 error = shmem_zero_setup(vma);
1779 if (error)
1780 goto free_vma;
1781 } else {
1782 vma_set_anonymous(vma);
1783 }
1784
1785 vma_link(mm, vma, prev, rb_link, rb_parent);
1786
1787 if (file) {
1788 if (vm_flags & VM_SHARED)
1789 mapping_unmap_writable(file->f_mapping);
1790 if (vm_flags & VM_DENYWRITE)
1791 allow_write_access(file);
1792 }
1793 file = vma->vm_file;
1794out:
1795 perf_event_mmap(vma);
1796
1797 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1798 if (vm_flags & VM_LOCKED) {
1799 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1800 is_vm_hugetlb_page(vma) ||
1801 vma == get_gate_vma(current->mm))
1802 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1803 else
1804 mm->locked_vm += (len >> PAGE_SHIFT);
1805 }
1806
1807 if (file)
1808 uprobe_mmap(vma);
1809
1810
1811
1812
1813
1814
1815
1816
1817 vma->vm_flags |= VM_SOFTDIRTY;
1818
1819 vma_set_page_prot(vma);
1820
1821 return addr;
1822
1823unmap_and_free_vma:
1824 vma->vm_file = NULL;
1825 fput(file);
1826
1827
1828 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1829 charged = 0;
1830 if (vm_flags & VM_SHARED)
1831 mapping_unmap_writable(file->f_mapping);
1832allow_write_and_free_vma:
1833 if (vm_flags & VM_DENYWRITE)
1834 allow_write_access(file);
1835free_vma:
1836 vm_area_free(vma);
1837unacct_error:
1838 if (charged)
1839 vm_unacct_memory(charged);
1840 return error;
1841}
1842
1843unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1844{
1845
1846
1847
1848
1849
1850
1851
1852
1853 struct mm_struct *mm = current->mm;
1854 struct vm_area_struct *vma;
1855 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1856
1857
1858 length = info->length + info->align_mask;
1859 if (length < info->length)
1860 return -ENOMEM;
1861
1862
1863 if (info->high_limit < length)
1864 return -ENOMEM;
1865 high_limit = info->high_limit - length;
1866
1867 if (info->low_limit > high_limit)
1868 return -ENOMEM;
1869 low_limit = info->low_limit + length;
1870
1871
1872 if (RB_EMPTY_ROOT(&mm->mm_rb))
1873 goto check_highest;
1874 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1875 if (vma->rb_subtree_gap < length)
1876 goto check_highest;
1877
1878 while (true) {
1879
1880 gap_end = vm_start_gap(vma);
1881 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1882 struct vm_area_struct *left =
1883 rb_entry(vma->vm_rb.rb_left,
1884 struct vm_area_struct, vm_rb);
1885 if (left->rb_subtree_gap >= length) {
1886 vma = left;
1887 continue;
1888 }
1889 }
1890
1891 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1892check_current:
1893
1894 if (gap_start > high_limit)
1895 return -ENOMEM;
1896 if (gap_end >= low_limit &&
1897 gap_end > gap_start && gap_end - gap_start >= length)
1898 goto found;
1899
1900
1901 if (vma->vm_rb.rb_right) {
1902 struct vm_area_struct *right =
1903 rb_entry(vma->vm_rb.rb_right,
1904 struct vm_area_struct, vm_rb);
1905 if (right->rb_subtree_gap >= length) {
1906 vma = right;
1907 continue;
1908 }
1909 }
1910
1911
1912 while (true) {
1913 struct rb_node *prev = &vma->vm_rb;
1914 if (!rb_parent(prev))
1915 goto check_highest;
1916 vma = rb_entry(rb_parent(prev),
1917 struct vm_area_struct, vm_rb);
1918 if (prev == vma->vm_rb.rb_left) {
1919 gap_start = vm_end_gap(vma->vm_prev);
1920 gap_end = vm_start_gap(vma);
1921 goto check_current;
1922 }
1923 }
1924 }
1925
1926check_highest:
1927
1928 gap_start = mm->highest_vm_end;
1929 gap_end = ULONG_MAX;
1930 if (gap_start > high_limit)
1931 return -ENOMEM;
1932
1933found:
1934
1935 if (gap_start < info->low_limit)
1936 gap_start = info->low_limit;
1937
1938
1939 gap_start += (info->align_offset - gap_start) & info->align_mask;
1940
1941 VM_BUG_ON(gap_start + info->length > info->high_limit);
1942 VM_BUG_ON(gap_start + info->length > gap_end);
1943 return gap_start;
1944}
1945
1946unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1947{
1948 struct mm_struct *mm = current->mm;
1949 struct vm_area_struct *vma;
1950 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1951
1952
1953 length = info->length + info->align_mask;
1954 if (length < info->length)
1955 return -ENOMEM;
1956
1957
1958
1959
1960
1961 gap_end = info->high_limit;
1962 if (gap_end < length)
1963 return -ENOMEM;
1964 high_limit = gap_end - length;
1965
1966 if (info->low_limit > high_limit)
1967 return -ENOMEM;
1968 low_limit = info->low_limit + length;
1969
1970
1971 gap_start = mm->highest_vm_end;
1972 if (gap_start <= high_limit)
1973 goto found_highest;
1974
1975
1976 if (RB_EMPTY_ROOT(&mm->mm_rb))
1977 return -ENOMEM;
1978 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1979 if (vma->rb_subtree_gap < length)
1980 return -ENOMEM;
1981
1982 while (true) {
1983
1984 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1985 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1986 struct vm_area_struct *right =
1987 rb_entry(vma->vm_rb.rb_right,
1988 struct vm_area_struct, vm_rb);
1989 if (right->rb_subtree_gap >= length) {
1990 vma = right;
1991 continue;
1992 }
1993 }
1994
1995check_current:
1996
1997 gap_end = vm_start_gap(vma);
1998 if (gap_end < low_limit)
1999 return -ENOMEM;
2000 if (gap_start <= high_limit &&
2001 gap_end > gap_start && gap_end - gap_start >= length)
2002 goto found;
2003
2004
2005 if (vma->vm_rb.rb_left) {
2006 struct vm_area_struct *left =
2007 rb_entry(vma->vm_rb.rb_left,
2008 struct vm_area_struct, vm_rb);
2009 if (left->rb_subtree_gap >= length) {
2010 vma = left;
2011 continue;
2012 }
2013 }
2014
2015
2016 while (true) {
2017 struct rb_node *prev = &vma->vm_rb;
2018 if (!rb_parent(prev))
2019 return -ENOMEM;
2020 vma = rb_entry(rb_parent(prev),
2021 struct vm_area_struct, vm_rb);
2022 if (prev == vma->vm_rb.rb_right) {
2023 gap_start = vma->vm_prev ?
2024 vm_end_gap(vma->vm_prev) : 0;
2025 goto check_current;
2026 }
2027 }
2028 }
2029
2030found:
2031
2032 if (gap_end > info->high_limit)
2033 gap_end = info->high_limit;
2034
2035found_highest:
2036
2037 gap_end -= info->length;
2038 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2039
2040 VM_BUG_ON(gap_end < info->low_limit);
2041 VM_BUG_ON(gap_end < gap_start);
2042 return gap_end;
2043}
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056#ifndef HAVE_ARCH_UNMAPPED_AREA
2057unsigned long
2058arch_get_unmapped_area(struct file *filp, unsigned long addr,
2059 unsigned long len, unsigned long pgoff, unsigned long flags)
2060{
2061 struct mm_struct *mm = current->mm;
2062 struct vm_area_struct *vma, *prev;
2063 struct vm_unmapped_area_info info;
2064
2065 if (len > TASK_SIZE - mmap_min_addr)
2066 return -ENOMEM;
2067
2068 if (flags & MAP_FIXED)
2069 return addr;
2070
2071 if (addr) {
2072 addr = PAGE_ALIGN(addr);
2073 vma = find_vma_prev(mm, addr, &prev);
2074 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2075 (!vma || addr + len <= vm_start_gap(vma)) &&
2076 (!prev || addr >= vm_end_gap(prev)))
2077 return addr;
2078 }
2079
2080 info.flags = 0;
2081 info.length = len;
2082 info.low_limit = mm->mmap_base;
2083 info.high_limit = TASK_SIZE;
2084 info.align_mask = 0;
2085 return vm_unmapped_area(&info);
2086}
2087#endif
2088
2089
2090
2091
2092
2093#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2094unsigned long
2095arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
2096 const unsigned long len, const unsigned long pgoff,
2097 const unsigned long flags)
2098{
2099 struct vm_area_struct *vma, *prev;
2100 struct mm_struct *mm = current->mm;
2101 unsigned long addr = addr0;
2102 struct vm_unmapped_area_info info;
2103
2104
2105 if (len > TASK_SIZE - mmap_min_addr)
2106 return -ENOMEM;
2107
2108 if (flags & MAP_FIXED)
2109 return addr;
2110
2111
2112 if (addr) {
2113 addr = PAGE_ALIGN(addr);
2114 vma = find_vma_prev(mm, addr, &prev);
2115 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2116 (!vma || addr + len <= vm_start_gap(vma)) &&
2117 (!prev || addr >= vm_end_gap(prev)))
2118 return addr;
2119 }
2120
2121 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2122 info.length = len;
2123 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2124 info.high_limit = mm->mmap_base;
2125 info.align_mask = 0;
2126 addr = vm_unmapped_area(&info);
2127
2128
2129
2130
2131
2132
2133
2134 if (offset_in_page(addr)) {
2135 VM_BUG_ON(addr != -ENOMEM);
2136 info.flags = 0;
2137 info.low_limit = TASK_UNMAPPED_BASE;
2138 info.high_limit = TASK_SIZE;
2139 addr = vm_unmapped_area(&info);
2140 }
2141
2142 return addr;
2143}
2144#endif
2145
2146unsigned long
2147get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2148 unsigned long pgoff, unsigned long flags)
2149{
2150 unsigned long (*get_area)(struct file *, unsigned long,
2151 unsigned long, unsigned long, unsigned long);
2152
2153 unsigned long error = arch_mmap_check(addr, len, flags);
2154 if (error)
2155 return error;
2156
2157
2158 if (len > TASK_SIZE)
2159 return -ENOMEM;
2160
2161 get_area = current->mm->get_unmapped_area;
2162 if (file) {
2163 if (file->f_op->get_unmapped_area)
2164 get_area = file->f_op->get_unmapped_area;
2165 } else if (flags & MAP_SHARED) {
2166
2167
2168
2169
2170
2171 pgoff = 0;
2172 get_area = shmem_get_unmapped_area;
2173 }
2174
2175 addr = get_area(file, addr, len, pgoff, flags);
2176 if (IS_ERR_VALUE(addr))
2177 return addr;
2178
2179 if (addr > TASK_SIZE - len)
2180 return -ENOMEM;
2181 if (offset_in_page(addr))
2182 return -EINVAL;
2183
2184 error = security_mmap_addr(addr);
2185 return error ? error : addr;
2186}
2187
2188EXPORT_SYMBOL(get_unmapped_area);
2189
2190
2191struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2192{
2193 struct rb_node *rb_node;
2194 struct vm_area_struct *vma;
2195
2196
2197 vma = vmacache_find(mm, addr);
2198 if (likely(vma))
2199 return vma;
2200
2201 rb_node = mm->mm_rb.rb_node;
2202
2203 while (rb_node) {
2204 struct vm_area_struct *tmp;
2205
2206 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2207
2208 if (tmp->vm_end > addr) {
2209 vma = tmp;
2210 if (tmp->vm_start <= addr)
2211 break;
2212 rb_node = rb_node->rb_left;
2213 } else
2214 rb_node = rb_node->rb_right;
2215 }
2216
2217 if (vma)
2218 vmacache_update(addr, vma);
2219 return vma;
2220}
2221
2222EXPORT_SYMBOL(find_vma);
2223
2224
2225
2226
2227struct vm_area_struct *
2228find_vma_prev(struct mm_struct *mm, unsigned long addr,
2229 struct vm_area_struct **pprev)
2230{
2231 struct vm_area_struct *vma;
2232
2233 vma = find_vma(mm, addr);
2234 if (vma) {
2235 *pprev = vma->vm_prev;
2236 } else {
2237 struct rb_node *rb_node = mm->mm_rb.rb_node;
2238 *pprev = NULL;
2239 while (rb_node) {
2240 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2241 rb_node = rb_node->rb_right;
2242 }
2243 }
2244 return vma;
2245}
2246
2247
2248
2249
2250
2251
2252static int acct_stack_growth(struct vm_area_struct *vma,
2253 unsigned long size, unsigned long grow)
2254{
2255 struct mm_struct *mm = vma->vm_mm;
2256 unsigned long new_start;
2257
2258
2259 if (!may_expand_vm(mm, vma->vm_flags, grow))
2260 return -ENOMEM;
2261
2262
2263 if (size > rlimit(RLIMIT_STACK))
2264 return -ENOMEM;
2265
2266
2267 if (vma->vm_flags & VM_LOCKED) {
2268 unsigned long locked;
2269 unsigned long limit;
2270 locked = mm->locked_vm + grow;
2271 limit = rlimit(RLIMIT_MEMLOCK);
2272 limit >>= PAGE_SHIFT;
2273 if (locked > limit && !capable(CAP_IPC_LOCK))
2274 return -ENOMEM;
2275 }
2276
2277
2278 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2279 vma->vm_end - size;
2280 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2281 return -EFAULT;
2282
2283
2284
2285
2286
2287 if (security_vm_enough_memory_mm(mm, grow))
2288 return -ENOMEM;
2289
2290 return 0;
2291}
2292
2293#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2294
2295
2296
2297
2298int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2299{
2300 struct mm_struct *mm = vma->vm_mm;
2301 struct vm_area_struct *next;
2302 unsigned long gap_addr;
2303 int error = 0;
2304
2305 if (!(vma->vm_flags & VM_GROWSUP))
2306 return -EFAULT;
2307
2308
2309 address &= PAGE_MASK;
2310 if (address >= (TASK_SIZE & PAGE_MASK))
2311 return -ENOMEM;
2312 address += PAGE_SIZE;
2313
2314
2315 gap_addr = address + stack_guard_gap;
2316
2317
2318 if (gap_addr < address || gap_addr > TASK_SIZE)
2319 gap_addr = TASK_SIZE;
2320
2321 next = vma->vm_next;
2322 if (next && next->vm_start < gap_addr &&
2323 (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2324 if (!(next->vm_flags & VM_GROWSUP))
2325 return -ENOMEM;
2326
2327 }
2328
2329
2330 if (unlikely(anon_vma_prepare(vma)))
2331 return -ENOMEM;
2332
2333
2334
2335
2336
2337
2338 anon_vma_lock_write(vma->anon_vma);
2339
2340
2341 if (address > vma->vm_end) {
2342 unsigned long size, grow;
2343
2344 size = address - vma->vm_start;
2345 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2346
2347 error = -ENOMEM;
2348 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2349 error = acct_stack_growth(vma, size, grow);
2350 if (!error) {
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362 spin_lock(&mm->page_table_lock);
2363 if (vma->vm_flags & VM_LOCKED)
2364 mm->locked_vm += grow;
2365 vm_stat_account(mm, vma->vm_flags, grow);
2366 anon_vma_interval_tree_pre_update_vma(vma);
2367 vma->vm_end = address;
2368 anon_vma_interval_tree_post_update_vma(vma);
2369 if (vma->vm_next)
2370 vma_gap_update(vma->vm_next);
2371 else
2372 mm->highest_vm_end = vm_end_gap(vma);
2373 spin_unlock(&mm->page_table_lock);
2374
2375 perf_event_mmap(vma);
2376 }
2377 }
2378 }
2379 anon_vma_unlock_write(vma->anon_vma);
2380 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2381 validate_mm(mm);
2382 return error;
2383}
2384#endif
2385
2386
2387
2388
2389int expand_downwards(struct vm_area_struct *vma,
2390 unsigned long address)
2391{
2392 struct mm_struct *mm = vma->vm_mm;
2393 struct vm_area_struct *prev;
2394 int error;
2395
2396 address &= PAGE_MASK;
2397 error = security_mmap_addr(address);
2398 if (error)
2399 return error;
2400
2401
2402 prev = vma->vm_prev;
2403
2404 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2405 (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2406 if (address - prev->vm_end < stack_guard_gap)
2407 return -ENOMEM;
2408 }
2409
2410
2411 if (unlikely(anon_vma_prepare(vma)))
2412 return -ENOMEM;
2413
2414
2415
2416
2417
2418
2419 anon_vma_lock_write(vma->anon_vma);
2420
2421
2422 if (address < vma->vm_start) {
2423 unsigned long size, grow;
2424
2425 size = vma->vm_end - address;
2426 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2427
2428 error = -ENOMEM;
2429 if (grow <= vma->vm_pgoff) {
2430 error = acct_stack_growth(vma, size, grow);
2431 if (!error) {
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443 spin_lock(&mm->page_table_lock);
2444 if (vma->vm_flags & VM_LOCKED)
2445 mm->locked_vm += grow;
2446 vm_stat_account(mm, vma->vm_flags, grow);
2447 anon_vma_interval_tree_pre_update_vma(vma);
2448 vma->vm_start = address;
2449 vma->vm_pgoff -= grow;
2450 anon_vma_interval_tree_post_update_vma(vma);
2451 vma_gap_update(vma);
2452 spin_unlock(&mm->page_table_lock);
2453
2454 perf_event_mmap(vma);
2455 }
2456 }
2457 }
2458 anon_vma_unlock_write(vma->anon_vma);
2459 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2460 validate_mm(mm);
2461 return error;
2462}
2463
2464
2465unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2466
2467static int __init cmdline_parse_stack_guard_gap(char *p)
2468{
2469 unsigned long val;
2470 char *endptr;
2471
2472 val = simple_strtoul(p, &endptr, 10);
2473 if (!*endptr)
2474 stack_guard_gap = val << PAGE_SHIFT;
2475
2476 return 0;
2477}
2478__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2479
2480#ifdef CONFIG_STACK_GROWSUP
2481int expand_stack(struct vm_area_struct *vma, unsigned long address)
2482{
2483 return expand_upwards(vma, address);
2484}
2485
2486struct vm_area_struct *
2487find_extend_vma(struct mm_struct *mm, unsigned long addr)
2488{
2489 struct vm_area_struct *vma, *prev;
2490
2491 addr &= PAGE_MASK;
2492 vma = find_vma_prev(mm, addr, &prev);
2493 if (vma && (vma->vm_start <= addr))
2494 return vma;
2495 if (!prev || expand_stack(prev, addr))
2496 return NULL;
2497 if (prev->vm_flags & VM_LOCKED)
2498 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2499 return prev;
2500}
2501#else
2502int expand_stack(struct vm_area_struct *vma, unsigned long address)
2503{
2504 return expand_downwards(vma, address);
2505}
2506
2507struct vm_area_struct *
2508find_extend_vma(struct mm_struct *mm, unsigned long addr)
2509{
2510 struct vm_area_struct *vma;
2511 unsigned long start;
2512
2513 addr &= PAGE_MASK;
2514 vma = find_vma(mm, addr);
2515 if (!vma)
2516 return NULL;
2517 if (vma->vm_start <= addr)
2518 return vma;
2519 if (!(vma->vm_flags & VM_GROWSDOWN))
2520 return NULL;
2521 start = vma->vm_start;
2522 if (expand_stack(vma, addr))
2523 return NULL;
2524 if (vma->vm_flags & VM_LOCKED)
2525 populate_vma_page_range(vma, addr, start, NULL);
2526 return vma;
2527}
2528#endif
2529
2530EXPORT_SYMBOL_GPL(find_extend_vma);
2531
2532
2533
2534
2535
2536
2537
2538static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2539{
2540 unsigned long nr_accounted = 0;
2541
2542
2543 update_hiwater_vm(mm);
2544 do {
2545 long nrpages = vma_pages(vma);
2546
2547 if (vma->vm_flags & VM_ACCOUNT)
2548 nr_accounted += nrpages;
2549 vm_stat_account(mm, vma->vm_flags, -nrpages);
2550 vma = remove_vma(vma);
2551 } while (vma);
2552 vm_unacct_memory(nr_accounted);
2553 validate_mm(mm);
2554}
2555
2556
2557
2558
2559
2560
2561static void unmap_region(struct mm_struct *mm,
2562 struct vm_area_struct *vma, struct vm_area_struct *prev,
2563 unsigned long start, unsigned long end)
2564{
2565 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2566 struct mmu_gather tlb;
2567
2568 lru_add_drain();
2569 tlb_gather_mmu(&tlb, mm, start, end);
2570 update_hiwater_rss(mm);
2571 unmap_vmas(&tlb, vma, start, end);
2572 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2573 next ? next->vm_start : USER_PGTABLES_CEILING);
2574 tlb_finish_mmu(&tlb, start, end);
2575}
2576
2577
2578
2579
2580
2581static void
2582detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2583 struct vm_area_struct *prev, unsigned long end)
2584{
2585 struct vm_area_struct **insertion_point;
2586 struct vm_area_struct *tail_vma = NULL;
2587
2588 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2589 vma->vm_prev = NULL;
2590 do {
2591 vma_rb_erase(vma, &mm->mm_rb);
2592 mm->map_count--;
2593 tail_vma = vma;
2594 vma = vma->vm_next;
2595 } while (vma && vma->vm_start < end);
2596 *insertion_point = vma;
2597 if (vma) {
2598 vma->vm_prev = prev;
2599 vma_gap_update(vma);
2600 } else
2601 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2602 tail_vma->vm_next = NULL;
2603
2604
2605 vmacache_invalidate(mm);
2606}
2607
2608
2609
2610
2611
2612int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2613 unsigned long addr, int new_below)
2614{
2615 struct vm_area_struct *new;
2616 int err;
2617
2618 if (vma->vm_ops && vma->vm_ops->split) {
2619 err = vma->vm_ops->split(vma, addr);
2620 if (err)
2621 return err;
2622 }
2623
2624 new = vm_area_dup(vma);
2625 if (!new)
2626 return -ENOMEM;
2627
2628 if (new_below)
2629 new->vm_end = addr;
2630 else {
2631 new->vm_start = addr;
2632 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2633 }
2634
2635 err = vma_dup_policy(vma, new);
2636 if (err)
2637 goto out_free_vma;
2638
2639 err = anon_vma_clone(new, vma);
2640 if (err)
2641 goto out_free_mpol;
2642
2643 if (new->vm_file)
2644 get_file(new->vm_file);
2645
2646 if (new->vm_ops && new->vm_ops->open)
2647 new->vm_ops->open(new);
2648
2649 if (new_below)
2650 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2651 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2652 else
2653 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2654
2655
2656 if (!err)
2657 return 0;
2658
2659
2660 if (new->vm_ops && new->vm_ops->close)
2661 new->vm_ops->close(new);
2662 if (new->vm_file)
2663 fput(new->vm_file);
2664 unlink_anon_vmas(new);
2665 out_free_mpol:
2666 mpol_put(vma_policy(new));
2667 out_free_vma:
2668 vm_area_free(new);
2669 return err;
2670}
2671
2672
2673
2674
2675
2676int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2677 unsigned long addr, int new_below)
2678{
2679 if (mm->map_count >= sysctl_max_map_count)
2680 return -ENOMEM;
2681
2682 return __split_vma(mm, vma, addr, new_below);
2683}
2684
2685
2686
2687
2688
2689
2690int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2691 struct list_head *uf)
2692{
2693 unsigned long end;
2694 struct vm_area_struct *vma, *prev, *last;
2695
2696 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2697 return -EINVAL;
2698
2699 len = PAGE_ALIGN(len);
2700 if (len == 0)
2701 return -EINVAL;
2702
2703
2704 vma = find_vma(mm, start);
2705 if (!vma)
2706 return 0;
2707 prev = vma->vm_prev;
2708
2709
2710
2711 end = start + len;
2712 if (vma->vm_start >= end)
2713 return 0;
2714
2715
2716
2717
2718
2719
2720
2721
2722 if (start > vma->vm_start) {
2723 int error;
2724
2725
2726
2727
2728
2729
2730 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2731 return -ENOMEM;
2732
2733 error = __split_vma(mm, vma, start, 0);
2734 if (error)
2735 return error;
2736 prev = vma;
2737 }
2738
2739
2740 last = find_vma(mm, end);
2741 if (last && end > last->vm_start) {
2742 int error = __split_vma(mm, last, end, 1);
2743 if (error)
2744 return error;
2745 }
2746 vma = prev ? prev->vm_next : mm->mmap;
2747
2748 if (unlikely(uf)) {
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2759 if (error)
2760 return error;
2761 }
2762
2763
2764
2765
2766 if (mm->locked_vm) {
2767 struct vm_area_struct *tmp = vma;
2768 while (tmp && tmp->vm_start < end) {
2769 if (tmp->vm_flags & VM_LOCKED) {
2770 mm->locked_vm -= vma_pages(tmp);
2771 munlock_vma_pages_all(tmp);
2772 }
2773 tmp = tmp->vm_next;
2774 }
2775 }
2776
2777
2778
2779
2780 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2781 unmap_region(mm, vma, prev, start, end);
2782
2783 arch_unmap(mm, vma, start, end);
2784
2785
2786 remove_vma_list(mm, vma);
2787
2788 return 0;
2789}
2790
2791int vm_munmap(unsigned long start, size_t len)
2792{
2793 int ret;
2794 struct mm_struct *mm = current->mm;
2795 LIST_HEAD(uf);
2796
2797 if (down_write_killable(&mm->mmap_sem))
2798 return -EINTR;
2799
2800 ret = do_munmap(mm, start, len, &uf);
2801 up_write(&mm->mmap_sem);
2802 userfaultfd_unmap_complete(mm, &uf);
2803 return ret;
2804}
2805EXPORT_SYMBOL(vm_munmap);
2806
2807SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2808{
2809 profile_munmap(addr);
2810 return vm_munmap(addr, len);
2811}
2812
2813
2814
2815
2816
2817SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2818 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2819{
2820
2821 struct mm_struct *mm = current->mm;
2822 struct vm_area_struct *vma;
2823 unsigned long populate = 0;
2824 unsigned long ret = -EINVAL;
2825 struct file *file;
2826
2827 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2828 current->comm, current->pid);
2829
2830 if (prot)
2831 return ret;
2832 start = start & PAGE_MASK;
2833 size = size & PAGE_MASK;
2834
2835 if (start + size <= start)
2836 return ret;
2837
2838
2839 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2840 return ret;
2841
2842 if (down_write_killable(&mm->mmap_sem))
2843 return -EINTR;
2844
2845 vma = find_vma(mm, start);
2846
2847 if (!vma || !(vma->vm_flags & VM_SHARED))
2848 goto out;
2849
2850 if (start < vma->vm_start)
2851 goto out;
2852
2853 if (start + size > vma->vm_end) {
2854 struct vm_area_struct *next;
2855
2856 for (next = vma->vm_next; next; next = next->vm_next) {
2857
2858 if (next->vm_start != next->vm_prev->vm_end)
2859 goto out;
2860
2861 if (next->vm_file != vma->vm_file)
2862 goto out;
2863
2864 if (next->vm_flags != vma->vm_flags)
2865 goto out;
2866
2867 if (start + size <= next->vm_end)
2868 break;
2869 }
2870
2871 if (!next)
2872 goto out;
2873 }
2874
2875 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2876 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2877 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2878
2879 flags &= MAP_NONBLOCK;
2880 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2881 if (vma->vm_flags & VM_LOCKED) {
2882 struct vm_area_struct *tmp;
2883 flags |= MAP_LOCKED;
2884
2885
2886 for (tmp = vma; tmp->vm_start >= start + size;
2887 tmp = tmp->vm_next) {
2888
2889
2890
2891
2892 vma_adjust_trans_huge(tmp, start, start + size, 0);
2893
2894 munlock_vma_pages_range(tmp,
2895 max(tmp->vm_start, start),
2896 min(tmp->vm_end, start + size));
2897 }
2898 }
2899
2900 file = get_file(vma->vm_file);
2901 ret = do_mmap_pgoff(vma->vm_file, start, size,
2902 prot, flags, pgoff, &populate, NULL);
2903 fput(file);
2904out:
2905 up_write(&mm->mmap_sem);
2906 if (populate)
2907 mm_populate(ret, populate);
2908 if (!IS_ERR_VALUE(ret))
2909 ret = 0;
2910 return ret;
2911}
2912
2913static inline void verify_mm_writelocked(struct mm_struct *mm)
2914{
2915#ifdef CONFIG_DEBUG_VM
2916 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2917 WARN_ON(1);
2918 up_read(&mm->mmap_sem);
2919 }
2920#endif
2921}
2922
2923
2924
2925
2926
2927
2928static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
2929{
2930 struct mm_struct *mm = current->mm;
2931 struct vm_area_struct *vma, *prev;
2932 struct rb_node **rb_link, *rb_parent;
2933 pgoff_t pgoff = addr >> PAGE_SHIFT;
2934 int error;
2935
2936
2937 if ((flags & (~VM_EXEC)) != 0)
2938 return -EINVAL;
2939 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2940
2941 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2942 if (offset_in_page(error))
2943 return error;
2944
2945 error = mlock_future_check(mm, mm->def_flags, len);
2946 if (error)
2947 return error;
2948
2949
2950
2951
2952
2953 verify_mm_writelocked(mm);
2954
2955
2956
2957
2958 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2959 &rb_parent)) {
2960 if (do_munmap(mm, addr, len, uf))
2961 return -ENOMEM;
2962 }
2963
2964
2965 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
2966 return -ENOMEM;
2967
2968 if (mm->map_count > sysctl_max_map_count)
2969 return -ENOMEM;
2970
2971 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2972 return -ENOMEM;
2973
2974
2975 vma = vma_merge(mm, prev, addr, addr + len, flags,
2976 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
2977 if (vma)
2978 goto out;
2979
2980
2981
2982
2983 vma = vm_area_alloc(mm);
2984 if (!vma) {
2985 vm_unacct_memory(len >> PAGE_SHIFT);
2986 return -ENOMEM;
2987 }
2988
2989 vma_set_anonymous(vma);
2990 vma->vm_start = addr;
2991 vma->vm_end = addr + len;
2992 vma->vm_pgoff = pgoff;
2993 vma->vm_flags = flags;
2994 vma->vm_page_prot = vm_get_page_prot(flags);
2995 vma_link(mm, vma, prev, rb_link, rb_parent);
2996out:
2997 perf_event_mmap(vma);
2998 mm->total_vm += len >> PAGE_SHIFT;
2999 mm->data_vm += len >> PAGE_SHIFT;
3000 if (flags & VM_LOCKED)
3001 mm->locked_vm += (len >> PAGE_SHIFT);
3002 vma->vm_flags |= VM_SOFTDIRTY;
3003 return 0;
3004}
3005
3006int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3007{
3008 struct mm_struct *mm = current->mm;
3009 unsigned long len;
3010 int ret;
3011 bool populate;
3012 LIST_HEAD(uf);
3013
3014 len = PAGE_ALIGN(request);
3015 if (len < request)
3016 return -ENOMEM;
3017 if (!len)
3018 return 0;
3019
3020 if (down_write_killable(&mm->mmap_sem))
3021 return -EINTR;
3022
3023 ret = do_brk_flags(addr, len, flags, &uf);
3024 populate = ((mm->def_flags & VM_LOCKED) != 0);
3025 up_write(&mm->mmap_sem);
3026 userfaultfd_unmap_complete(mm, &uf);
3027 if (populate && !ret)
3028 mm_populate(addr, len);
3029 return ret;
3030}
3031EXPORT_SYMBOL(vm_brk_flags);
3032
3033int vm_brk(unsigned long addr, unsigned long len)
3034{
3035 return vm_brk_flags(addr, len, 0);
3036}
3037EXPORT_SYMBOL(vm_brk);
3038
3039
3040void exit_mmap(struct mm_struct *mm)
3041{
3042 struct mmu_gather tlb;
3043 struct vm_area_struct *vma;
3044 unsigned long nr_accounted = 0;
3045
3046
3047 mmu_notifier_release(mm);
3048
3049 if (unlikely(mm_is_oom_victim(mm))) {
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066 (void)__oom_reap_task_mm(mm);
3067
3068 set_bit(MMF_OOM_SKIP, &mm->flags);
3069 down_write(&mm->mmap_sem);
3070 up_write(&mm->mmap_sem);
3071 }
3072
3073 if (mm->locked_vm) {
3074 vma = mm->mmap;
3075 while (vma) {
3076 if (vma->vm_flags & VM_LOCKED)
3077 munlock_vma_pages_all(vma);
3078 vma = vma->vm_next;
3079 }
3080 }
3081
3082 arch_exit_mmap(mm);
3083
3084 vma = mm->mmap;
3085 if (!vma)
3086 return;
3087
3088 lru_add_drain();
3089 flush_cache_mm(mm);
3090 tlb_gather_mmu(&tlb, mm, 0, -1);
3091
3092
3093 unmap_vmas(&tlb, vma, 0, -1);
3094 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3095 tlb_finish_mmu(&tlb, 0, -1);
3096
3097
3098
3099
3100
3101 while (vma) {
3102 if (vma->vm_flags & VM_ACCOUNT)
3103 nr_accounted += vma_pages(vma);
3104 vma = remove_vma(vma);
3105 }
3106 vm_unacct_memory(nr_accounted);
3107}
3108
3109
3110
3111
3112
3113int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3114{
3115 struct vm_area_struct *prev;
3116 struct rb_node **rb_link, *rb_parent;
3117
3118 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3119 &prev, &rb_link, &rb_parent))
3120 return -ENOMEM;
3121 if ((vma->vm_flags & VM_ACCOUNT) &&
3122 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3123 return -ENOMEM;
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137 if (vma_is_anonymous(vma)) {
3138 BUG_ON(vma->anon_vma);
3139 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3140 }
3141
3142 vma_link(mm, vma, prev, rb_link, rb_parent);
3143 return 0;
3144}
3145
3146
3147
3148
3149
3150struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3151 unsigned long addr, unsigned long len, pgoff_t pgoff,
3152 bool *need_rmap_locks)
3153{
3154 struct vm_area_struct *vma = *vmap;
3155 unsigned long vma_start = vma->vm_start;
3156 struct mm_struct *mm = vma->vm_mm;
3157 struct vm_area_struct *new_vma, *prev;
3158 struct rb_node **rb_link, *rb_parent;
3159 bool faulted_in_anon_vma = true;
3160
3161
3162
3163
3164
3165 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3166 pgoff = addr >> PAGE_SHIFT;
3167 faulted_in_anon_vma = false;
3168 }
3169
3170 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3171 return NULL;
3172 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3173 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3174 vma->vm_userfaultfd_ctx);
3175 if (new_vma) {
3176
3177
3178
3179 if (unlikely(vma_start >= new_vma->vm_start &&
3180 vma_start < new_vma->vm_end)) {
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3194 *vmap = vma = new_vma;
3195 }
3196 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3197 } else {
3198 new_vma = vm_area_dup(vma);
3199 if (!new_vma)
3200 goto out;
3201 new_vma->vm_start = addr;
3202 new_vma->vm_end = addr + len;
3203 new_vma->vm_pgoff = pgoff;
3204 if (vma_dup_policy(vma, new_vma))
3205 goto out_free_vma;
3206 if (anon_vma_clone(new_vma, vma))
3207 goto out_free_mempol;
3208 if (new_vma->vm_file)
3209 get_file(new_vma->vm_file);
3210 if (new_vma->vm_ops && new_vma->vm_ops->open)
3211 new_vma->vm_ops->open(new_vma);
3212 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3213 *need_rmap_locks = false;
3214 }
3215 return new_vma;
3216
3217out_free_mempol:
3218 mpol_put(vma_policy(new_vma));
3219out_free_vma:
3220 vm_area_free(new_vma);
3221out:
3222 return NULL;
3223}
3224
3225
3226
3227
3228
3229bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3230{
3231 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3232 return false;
3233
3234 if (is_data_mapping(flags) &&
3235 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3236
3237 if (rlimit(RLIMIT_DATA) == 0 &&
3238 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3239 return true;
3240
3241 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3242 current->comm, current->pid,
3243 (mm->data_vm + npages) << PAGE_SHIFT,
3244 rlimit(RLIMIT_DATA),
3245 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3246
3247 if (!ignore_rlimit_data)
3248 return false;
3249 }
3250
3251 return true;
3252}
3253
3254void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3255{
3256 mm->total_vm += npages;
3257
3258 if (is_exec_mapping(flags))
3259 mm->exec_vm += npages;
3260 else if (is_stack_mapping(flags))
3261 mm->stack_vm += npages;
3262 else if (is_data_mapping(flags))
3263 mm->data_vm += npages;
3264}
3265
3266static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3267
3268
3269
3270
3271static void special_mapping_close(struct vm_area_struct *vma)
3272{
3273}
3274
3275static const char *special_mapping_name(struct vm_area_struct *vma)
3276{
3277 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3278}
3279
3280static int special_mapping_mremap(struct vm_area_struct *new_vma)
3281{
3282 struct vm_special_mapping *sm = new_vma->vm_private_data;
3283
3284 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3285 return -EFAULT;
3286
3287 if (sm->mremap)
3288 return sm->mremap(sm, new_vma);
3289
3290 return 0;
3291}
3292
3293static const struct vm_operations_struct special_mapping_vmops = {
3294 .close = special_mapping_close,
3295 .fault = special_mapping_fault,
3296 .mremap = special_mapping_mremap,
3297 .name = special_mapping_name,
3298};
3299
3300static const struct vm_operations_struct legacy_special_mapping_vmops = {
3301 .close = special_mapping_close,
3302 .fault = special_mapping_fault,
3303};
3304
3305static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3306{
3307 struct vm_area_struct *vma = vmf->vma;
3308 pgoff_t pgoff;
3309 struct page **pages;
3310
3311 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3312 pages = vma->vm_private_data;
3313 } else {
3314 struct vm_special_mapping *sm = vma->vm_private_data;
3315
3316 if (sm->fault)
3317 return sm->fault(sm, vmf->vma, vmf);
3318
3319 pages = sm->pages;
3320 }
3321
3322 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3323 pgoff--;
3324
3325 if (*pages) {
3326 struct page *page = *pages;
3327 get_page(page);
3328 vmf->page = page;
3329 return 0;
3330 }
3331
3332 return VM_FAULT_SIGBUS;
3333}
3334
3335static struct vm_area_struct *__install_special_mapping(
3336 struct mm_struct *mm,
3337 unsigned long addr, unsigned long len,
3338 unsigned long vm_flags, void *priv,
3339 const struct vm_operations_struct *ops)
3340{
3341 int ret;
3342 struct vm_area_struct *vma;
3343
3344 vma = vm_area_alloc(mm);
3345 if (unlikely(vma == NULL))
3346 return ERR_PTR(-ENOMEM);
3347
3348 vma->vm_start = addr;
3349 vma->vm_end = addr + len;
3350
3351 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3352 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3353
3354 vma->vm_ops = ops;
3355 vma->vm_private_data = priv;
3356
3357 ret = insert_vm_struct(mm, vma);
3358 if (ret)
3359 goto out;
3360
3361 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3362
3363 perf_event_mmap(vma);
3364
3365 return vma;
3366
3367out:
3368 vm_area_free(vma);
3369 return ERR_PTR(ret);
3370}
3371
3372bool vma_is_special_mapping(const struct vm_area_struct *vma,
3373 const struct vm_special_mapping *sm)
3374{
3375 return vma->vm_private_data == sm &&
3376 (vma->vm_ops == &special_mapping_vmops ||
3377 vma->vm_ops == &legacy_special_mapping_vmops);
3378}
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389struct vm_area_struct *_install_special_mapping(
3390 struct mm_struct *mm,
3391 unsigned long addr, unsigned long len,
3392 unsigned long vm_flags, const struct vm_special_mapping *spec)
3393{
3394 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3395 &special_mapping_vmops);
3396}
3397
3398int install_special_mapping(struct mm_struct *mm,
3399 unsigned long addr, unsigned long len,
3400 unsigned long vm_flags, struct page **pages)
3401{
3402 struct vm_area_struct *vma = __install_special_mapping(
3403 mm, addr, len, vm_flags, (void *)pages,
3404 &legacy_special_mapping_vmops);
3405
3406 return PTR_ERR_OR_ZERO(vma);
3407}
3408
3409static DEFINE_MUTEX(mm_all_locks_mutex);
3410
3411static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3412{
3413 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3414
3415
3416
3417
3418 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428 if (__test_and_set_bit(0, (unsigned long *)
3429 &anon_vma->root->rb_root.rb_root.rb_node))
3430 BUG();
3431 }
3432}
3433
3434static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3435{
3436 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3447 BUG();
3448 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3449 }
3450}
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489int mm_take_all_locks(struct mm_struct *mm)
3490{
3491 struct vm_area_struct *vma;
3492 struct anon_vma_chain *avc;
3493
3494 BUG_ON(down_read_trylock(&mm->mmap_sem));
3495
3496 mutex_lock(&mm_all_locks_mutex);
3497
3498 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3499 if (signal_pending(current))
3500 goto out_unlock;
3501 if (vma->vm_file && vma->vm_file->f_mapping &&
3502 is_vm_hugetlb_page(vma))
3503 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3504 }
3505
3506 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3507 if (signal_pending(current))
3508 goto out_unlock;
3509 if (vma->vm_file && vma->vm_file->f_mapping &&
3510 !is_vm_hugetlb_page(vma))
3511 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3512 }
3513
3514 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3515 if (signal_pending(current))
3516 goto out_unlock;
3517 if (vma->anon_vma)
3518 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3519 vm_lock_anon_vma(mm, avc->anon_vma);
3520 }
3521
3522 return 0;
3523
3524out_unlock:
3525 mm_drop_all_locks(mm);
3526 return -EINTR;
3527}
3528
3529static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3530{
3531 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544 if (!__test_and_clear_bit(0, (unsigned long *)
3545 &anon_vma->root->rb_root.rb_root.rb_node))
3546 BUG();
3547 anon_vma_unlock_write(anon_vma);
3548 }
3549}
3550
3551static void vm_unlock_mapping(struct address_space *mapping)
3552{
3553 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3554
3555
3556
3557
3558 i_mmap_unlock_write(mapping);
3559 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3560 &mapping->flags))
3561 BUG();
3562 }
3563}
3564
3565
3566
3567
3568
3569void mm_drop_all_locks(struct mm_struct *mm)
3570{
3571 struct vm_area_struct *vma;
3572 struct anon_vma_chain *avc;
3573
3574 BUG_ON(down_read_trylock(&mm->mmap_sem));
3575 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3576
3577 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3578 if (vma->anon_vma)
3579 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3580 vm_unlock_anon_vma(avc->anon_vma);
3581 if (vma->vm_file && vma->vm_file->f_mapping)
3582 vm_unlock_mapping(vma->vm_file->f_mapping);
3583 }
3584
3585 mutex_unlock(&mm_all_locks_mutex);
3586}
3587
3588
3589
3590
3591void __init mmap_init(void)
3592{
3593 int ret;
3594
3595 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3596 VM_BUG_ON(ret);
3597}
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609static int init_user_reserve(void)
3610{
3611 unsigned long free_kbytes;
3612
3613 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3614
3615 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3616 return 0;
3617}
3618subsys_initcall(init_user_reserve);
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630static int init_admin_reserve(void)
3631{
3632 unsigned long free_kbytes;
3633
3634 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3635
3636 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3637 return 0;
3638}
3639subsys_initcall(init_admin_reserve);
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659static int reserve_mem_notifier(struct notifier_block *nb,
3660 unsigned long action, void *data)
3661{
3662 unsigned long tmp, free_kbytes;
3663
3664 switch (action) {
3665 case MEM_ONLINE:
3666
3667 tmp = sysctl_user_reserve_kbytes;
3668 if (0 < tmp && tmp < (1UL << 17))
3669 init_user_reserve();
3670
3671
3672 tmp = sysctl_admin_reserve_kbytes;
3673 if (0 < tmp && tmp < (1UL << 13))
3674 init_admin_reserve();
3675
3676 break;
3677 case MEM_OFFLINE:
3678 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3679
3680 if (sysctl_user_reserve_kbytes > free_kbytes) {
3681 init_user_reserve();
3682 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3683 sysctl_user_reserve_kbytes);
3684 }
3685
3686 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3687 init_admin_reserve();
3688 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3689 sysctl_admin_reserve_kbytes);
3690 }
3691 break;
3692 default:
3693 break;
3694 }
3695 return NOTIFY_OK;
3696}
3697
3698static struct notifier_block reserve_mem_nb = {
3699 .notifier_call = reserve_mem_notifier,
3700};
3701
3702static int __meminit init_reserve_notifier(void)
3703{
3704 if (register_hotmemory_notifier(&reserve_mem_nb))
3705 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3706
3707 return 0;
3708}
3709subsys_initcall(init_reserve_notifier);
3710