1
2
3
4
5
6
7
8
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/backing-dev.h>
15#include <linux/mm.h>
16#include <linux/vmacache.h>
17#include <linux/shm.h>
18#include <linux/mman.h>
19#include <linux/pagemap.h>
20#include <linux/swap.h>
21#include <linux/syscalls.h>
22#include <linux/capability.h>
23#include <linux/init.h>
24#include <linux/file.h>
25#include <linux/fs.h>
26#include <linux/personality.h>
27#include <linux/security.h>
28#include <linux/hugetlb.h>
29#include <linux/shmem_fs.h>
30#include <linux/profile.h>
31#include <linux/export.h>
32#include <linux/mount.h>
33#include <linux/mempolicy.h>
34#include <linux/rmap.h>
35#include <linux/mmu_notifier.h>
36#include <linux/mmdebug.h>
37#include <linux/perf_event.h>
38#include <linux/audit.h>
39#include <linux/khugepaged.h>
40#include <linux/uprobes.h>
41#include <linux/rbtree_augmented.h>
42#include <linux/notifier.h>
43#include <linux/memory.h>
44#include <linux/printk.h>
45#include <linux/userfaultfd_k.h>
46#include <linux/moduleparam.h>
47#include <linux/pkeys.h>
48#include <linux/oom.h>
49#include <linux/sched/mm.h>
50
51#include <linux/uaccess.h>
52#include <asm/cacheflush.h>
53#include <asm/tlb.h>
54#include <asm/mmu_context.h>
55
56#include "internal.h"
57
58#ifndef arch_mmap_check
59#define arch_mmap_check(addr, len, flags) (0)
60#endif
61
62#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
63const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
64const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
65int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
66#endif
67#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
68const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
69const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
70int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
71#endif
72
73static bool ignore_rlimit_data;
74core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
75
76static void unmap_region(struct mm_struct *mm,
77 struct vm_area_struct *vma, struct vm_area_struct *prev,
78 unsigned long start, unsigned long end);
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100pgprot_t protection_map[16] __ro_after_init = {
101 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
102 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
103};
104
105#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
106static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
107{
108 return prot;
109}
110#endif
111
112pgprot_t vm_get_page_prot(unsigned long vm_flags)
113{
114 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
115 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
116 pgprot_val(arch_vm_get_page_prot(vm_flags)));
117
118 return arch_filter_pgprot(ret);
119}
120EXPORT_SYMBOL(vm_get_page_prot);
121
122static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
123{
124 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
125}
126
127
128void vma_set_page_prot(struct vm_area_struct *vma)
129{
130 unsigned long vm_flags = vma->vm_flags;
131 pgprot_t vm_page_prot;
132
133 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
134 if (vma_wants_writenotify(vma, vm_page_prot)) {
135 vm_flags &= ~VM_SHARED;
136 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
137 }
138
139 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
140}
141
142
143
144
145static void __remove_shared_vm_struct(struct vm_area_struct *vma,
146 struct file *file, struct address_space *mapping)
147{
148 if (vma->vm_flags & VM_DENYWRITE)
149 atomic_inc(&file_inode(file)->i_writecount);
150 if (vma->vm_flags & VM_SHARED)
151 mapping_unmap_writable(mapping);
152
153 flush_dcache_mmap_lock(mapping);
154 vma_interval_tree_remove(vma, &mapping->i_mmap);
155 flush_dcache_mmap_unlock(mapping);
156}
157
158
159
160
161
162void unlink_file_vma(struct vm_area_struct *vma)
163{
164 struct file *file = vma->vm_file;
165
166 if (file) {
167 struct address_space *mapping = file->f_mapping;
168 i_mmap_lock_write(mapping);
169 __remove_shared_vm_struct(vma, file, mapping);
170 i_mmap_unlock_write(mapping);
171 }
172}
173
174
175
176
177static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
178{
179 struct vm_area_struct *next = vma->vm_next;
180
181 might_sleep();
182 if (vma->vm_ops && vma->vm_ops->close)
183 vma->vm_ops->close(vma);
184 if (vma->vm_file)
185 fput(vma->vm_file);
186 mpol_put(vma_policy(vma));
187 vm_area_free(vma);
188 return next;
189}
190
191static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
192 struct list_head *uf);
193SYSCALL_DEFINE1(brk, unsigned long, brk)
194{
195 unsigned long retval;
196 unsigned long newbrk, oldbrk, origbrk;
197 struct mm_struct *mm = current->mm;
198 struct vm_area_struct *next;
199 unsigned long min_brk;
200 bool populate;
201 bool downgraded = false;
202 LIST_HEAD(uf);
203
204 if (down_write_killable(&mm->mmap_sem))
205 return -EINTR;
206
207 origbrk = mm->brk;
208
209#ifdef CONFIG_COMPAT_BRK
210
211
212
213
214
215 if (current->brk_randomized)
216 min_brk = mm->start_brk;
217 else
218 min_brk = mm->end_data;
219#else
220 min_brk = mm->start_brk;
221#endif
222 if (brk < min_brk)
223 goto out;
224
225
226
227
228
229
230
231 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
232 mm->end_data, mm->start_data))
233 goto out;
234
235 newbrk = PAGE_ALIGN(brk);
236 oldbrk = PAGE_ALIGN(mm->brk);
237 if (oldbrk == newbrk) {
238 mm->brk = brk;
239 goto success;
240 }
241
242
243
244
245
246 if (brk <= mm->brk) {
247 int ret;
248
249
250
251
252
253
254 mm->brk = brk;
255 ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
256 if (ret < 0) {
257 mm->brk = origbrk;
258 goto out;
259 } else if (ret == 1) {
260 downgraded = true;
261 }
262 goto success;
263 }
264
265
266 next = find_vma(mm, oldbrk);
267 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
268 goto out;
269
270
271 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
272 goto out;
273 mm->brk = brk;
274
275success:
276 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
277 if (downgraded)
278 up_read(&mm->mmap_sem);
279 else
280 up_write(&mm->mmap_sem);
281 userfaultfd_unmap_complete(mm, &uf);
282 if (populate)
283 mm_populate(oldbrk, newbrk - oldbrk);
284 return brk;
285
286out:
287 retval = origbrk;
288 up_write(&mm->mmap_sem);
289 return retval;
290}
291
292static long vma_compute_subtree_gap(struct vm_area_struct *vma)
293{
294 unsigned long max, prev_end, subtree_gap;
295
296
297
298
299
300
301
302 max = vm_start_gap(vma);
303 if (vma->vm_prev) {
304 prev_end = vm_end_gap(vma->vm_prev);
305 if (max > prev_end)
306 max -= prev_end;
307 else
308 max = 0;
309 }
310 if (vma->vm_rb.rb_left) {
311 subtree_gap = rb_entry(vma->vm_rb.rb_left,
312 struct vm_area_struct, vm_rb)->rb_subtree_gap;
313 if (subtree_gap > max)
314 max = subtree_gap;
315 }
316 if (vma->vm_rb.rb_right) {
317 subtree_gap = rb_entry(vma->vm_rb.rb_right,
318 struct vm_area_struct, vm_rb)->rb_subtree_gap;
319 if (subtree_gap > max)
320 max = subtree_gap;
321 }
322 return max;
323}
324
325#ifdef CONFIG_DEBUG_VM_RB
326static int browse_rb(struct mm_struct *mm)
327{
328 struct rb_root *root = &mm->mm_rb;
329 int i = 0, j, bug = 0;
330 struct rb_node *nd, *pn = NULL;
331 unsigned long prev = 0, pend = 0;
332
333 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
334 struct vm_area_struct *vma;
335 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
336 if (vma->vm_start < prev) {
337 pr_emerg("vm_start %lx < prev %lx\n",
338 vma->vm_start, prev);
339 bug = 1;
340 }
341 if (vma->vm_start < pend) {
342 pr_emerg("vm_start %lx < pend %lx\n",
343 vma->vm_start, pend);
344 bug = 1;
345 }
346 if (vma->vm_start > vma->vm_end) {
347 pr_emerg("vm_start %lx > vm_end %lx\n",
348 vma->vm_start, vma->vm_end);
349 bug = 1;
350 }
351 spin_lock(&mm->page_table_lock);
352 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
353 pr_emerg("free gap %lx, correct %lx\n",
354 vma->rb_subtree_gap,
355 vma_compute_subtree_gap(vma));
356 bug = 1;
357 }
358 spin_unlock(&mm->page_table_lock);
359 i++;
360 pn = nd;
361 prev = vma->vm_start;
362 pend = vma->vm_end;
363 }
364 j = 0;
365 for (nd = pn; nd; nd = rb_prev(nd))
366 j++;
367 if (i != j) {
368 pr_emerg("backwards %d, forwards %d\n", j, i);
369 bug = 1;
370 }
371 return bug ? -1 : i;
372}
373
374static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
375{
376 struct rb_node *nd;
377
378 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
379 struct vm_area_struct *vma;
380 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
381 VM_BUG_ON_VMA(vma != ignore &&
382 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
383 vma);
384 }
385}
386
387static void validate_mm(struct mm_struct *mm)
388{
389 int bug = 0;
390 int i = 0;
391 unsigned long highest_address = 0;
392 struct vm_area_struct *vma = mm->mmap;
393
394 while (vma) {
395 struct anon_vma *anon_vma = vma->anon_vma;
396 struct anon_vma_chain *avc;
397
398 if (anon_vma) {
399 anon_vma_lock_read(anon_vma);
400 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
401 anon_vma_interval_tree_verify(avc);
402 anon_vma_unlock_read(anon_vma);
403 }
404
405 highest_address = vm_end_gap(vma);
406 vma = vma->vm_next;
407 i++;
408 }
409 if (i != mm->map_count) {
410 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
411 bug = 1;
412 }
413 if (highest_address != mm->highest_vm_end) {
414 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
415 mm->highest_vm_end, highest_address);
416 bug = 1;
417 }
418 i = browse_rb(mm);
419 if (i != mm->map_count) {
420 if (i != -1)
421 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
422 bug = 1;
423 }
424 VM_BUG_ON_MM(bug, mm);
425}
426#else
427#define validate_mm_rb(root, ignore) do { } while (0)
428#define validate_mm(mm) do { } while (0)
429#endif
430
431RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
432 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
433
434
435
436
437
438
439static void vma_gap_update(struct vm_area_struct *vma)
440{
441
442
443
444
445 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
446}
447
448static inline void vma_rb_insert(struct vm_area_struct *vma,
449 struct rb_root *root)
450{
451
452 validate_mm_rb(root, NULL);
453
454 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
455}
456
457static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
458{
459
460
461
462
463
464 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
465}
466
467static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
468 struct rb_root *root,
469 struct vm_area_struct *ignore)
470{
471
472
473
474
475
476 validate_mm_rb(root, ignore);
477
478 __vma_rb_erase(vma, root);
479}
480
481static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
482 struct rb_root *root)
483{
484
485
486
487
488 validate_mm_rb(root, vma);
489
490 __vma_rb_erase(vma, root);
491}
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507static inline void
508anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
509{
510 struct anon_vma_chain *avc;
511
512 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
513 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
514}
515
516static inline void
517anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
518{
519 struct anon_vma_chain *avc;
520
521 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
522 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
523}
524
525static int find_vma_links(struct mm_struct *mm, unsigned long addr,
526 unsigned long end, struct vm_area_struct **pprev,
527 struct rb_node ***rb_link, struct rb_node **rb_parent)
528{
529 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
530
531 __rb_link = &mm->mm_rb.rb_node;
532 rb_prev = __rb_parent = NULL;
533
534 while (*__rb_link) {
535 struct vm_area_struct *vma_tmp;
536
537 __rb_parent = *__rb_link;
538 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
539
540 if (vma_tmp->vm_end > addr) {
541
542 if (vma_tmp->vm_start < end)
543 return -ENOMEM;
544 __rb_link = &__rb_parent->rb_left;
545 } else {
546 rb_prev = __rb_parent;
547 __rb_link = &__rb_parent->rb_right;
548 }
549 }
550
551 *pprev = NULL;
552 if (rb_prev)
553 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
554 *rb_link = __rb_link;
555 *rb_parent = __rb_parent;
556 return 0;
557}
558
559static unsigned long count_vma_pages_range(struct mm_struct *mm,
560 unsigned long addr, unsigned long end)
561{
562 unsigned long nr_pages = 0;
563 struct vm_area_struct *vma;
564
565
566 vma = find_vma_intersection(mm, addr, end);
567 if (!vma)
568 return 0;
569
570 nr_pages = (min(end, vma->vm_end) -
571 max(addr, vma->vm_start)) >> PAGE_SHIFT;
572
573
574 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
575 unsigned long overlap_len;
576
577 if (vma->vm_start > end)
578 break;
579
580 overlap_len = min(end, vma->vm_end) - vma->vm_start;
581 nr_pages += overlap_len >> PAGE_SHIFT;
582 }
583
584 return nr_pages;
585}
586
587void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
588 struct rb_node **rb_link, struct rb_node *rb_parent)
589{
590
591 if (vma->vm_next)
592 vma_gap_update(vma->vm_next);
593 else
594 mm->highest_vm_end = vm_end_gap(vma);
595
596
597
598
599
600
601
602
603
604
605 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
606 vma->rb_subtree_gap = 0;
607 vma_gap_update(vma);
608 vma_rb_insert(vma, &mm->mm_rb);
609}
610
611static void __vma_link_file(struct vm_area_struct *vma)
612{
613 struct file *file;
614
615 file = vma->vm_file;
616 if (file) {
617 struct address_space *mapping = file->f_mapping;
618
619 if (vma->vm_flags & VM_DENYWRITE)
620 atomic_dec(&file_inode(file)->i_writecount);
621 if (vma->vm_flags & VM_SHARED)
622 atomic_inc(&mapping->i_mmap_writable);
623
624 flush_dcache_mmap_lock(mapping);
625 vma_interval_tree_insert(vma, &mapping->i_mmap);
626 flush_dcache_mmap_unlock(mapping);
627 }
628}
629
630static void
631__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
632 struct vm_area_struct *prev, struct rb_node **rb_link,
633 struct rb_node *rb_parent)
634{
635 __vma_link_list(mm, vma, prev, rb_parent);
636 __vma_link_rb(mm, vma, rb_link, rb_parent);
637}
638
639static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
640 struct vm_area_struct *prev, struct rb_node **rb_link,
641 struct rb_node *rb_parent)
642{
643 struct address_space *mapping = NULL;
644
645 if (vma->vm_file) {
646 mapping = vma->vm_file->f_mapping;
647 i_mmap_lock_write(mapping);
648 }
649
650 __vma_link(mm, vma, prev, rb_link, rb_parent);
651 __vma_link_file(vma);
652
653 if (mapping)
654 i_mmap_unlock_write(mapping);
655
656 mm->map_count++;
657 validate_mm(mm);
658}
659
660
661
662
663
664static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
665{
666 struct vm_area_struct *prev;
667 struct rb_node **rb_link, *rb_parent;
668
669 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
670 &prev, &rb_link, &rb_parent))
671 BUG();
672 __vma_link(mm, vma, prev, rb_link, rb_parent);
673 mm->map_count++;
674}
675
676static __always_inline void __vma_unlink_common(struct mm_struct *mm,
677 struct vm_area_struct *vma,
678 struct vm_area_struct *prev,
679 bool has_prev,
680 struct vm_area_struct *ignore)
681{
682 struct vm_area_struct *next;
683
684 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
685 next = vma->vm_next;
686 if (has_prev)
687 prev->vm_next = next;
688 else {
689 prev = vma->vm_prev;
690 if (prev)
691 prev->vm_next = next;
692 else
693 mm->mmap = next;
694 }
695 if (next)
696 next->vm_prev = prev;
697
698
699 vmacache_invalidate(mm);
700}
701
702static inline void __vma_unlink_prev(struct mm_struct *mm,
703 struct vm_area_struct *vma,
704 struct vm_area_struct *prev)
705{
706 __vma_unlink_common(mm, vma, prev, true, vma);
707}
708
709
710
711
712
713
714
715
716int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
717 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
718 struct vm_area_struct *expand)
719{
720 struct mm_struct *mm = vma->vm_mm;
721 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
722 struct address_space *mapping = NULL;
723 struct rb_root_cached *root = NULL;
724 struct anon_vma *anon_vma = NULL;
725 struct file *file = vma->vm_file;
726 bool start_changed = false, end_changed = false;
727 long adjust_next = 0;
728 int remove_next = 0;
729
730 if (next && !insert) {
731 struct vm_area_struct *exporter = NULL, *importer = NULL;
732
733 if (end >= next->vm_end) {
734
735
736
737
738
739
740 if (next == expand) {
741
742
743
744
745 VM_WARN_ON(end != next->vm_end);
746
747
748
749
750
751 remove_next = 3;
752 VM_WARN_ON(file != next->vm_file);
753 swap(vma, next);
754 } else {
755 VM_WARN_ON(expand != vma);
756
757
758
759
760 remove_next = 1 + (end > next->vm_end);
761 VM_WARN_ON(remove_next == 2 &&
762 end != next->vm_next->vm_end);
763 VM_WARN_ON(remove_next == 1 &&
764 end != next->vm_end);
765
766 end = next->vm_end;
767 }
768
769 exporter = next;
770 importer = vma;
771
772
773
774
775
776 if (remove_next == 2 && !next->anon_vma)
777 exporter = next->vm_next;
778
779 } else if (end > next->vm_start) {
780
781
782
783
784 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
785 exporter = next;
786 importer = vma;
787 VM_WARN_ON(expand != importer);
788 } else if (end < vma->vm_end) {
789
790
791
792
793
794 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
795 exporter = vma;
796 importer = next;
797 VM_WARN_ON(expand != importer);
798 }
799
800
801
802
803
804
805 if (exporter && exporter->anon_vma && !importer->anon_vma) {
806 int error;
807
808 importer->anon_vma = exporter->anon_vma;
809 error = anon_vma_clone(importer, exporter);
810 if (error)
811 return error;
812 }
813 }
814again:
815 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
816
817 if (file) {
818 mapping = file->f_mapping;
819 root = &mapping->i_mmap;
820 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
821
822 if (adjust_next)
823 uprobe_munmap(next, next->vm_start, next->vm_end);
824
825 i_mmap_lock_write(mapping);
826 if (insert) {
827
828
829
830
831
832
833 __vma_link_file(insert);
834 }
835 }
836
837 anon_vma = vma->anon_vma;
838 if (!anon_vma && adjust_next)
839 anon_vma = next->anon_vma;
840 if (anon_vma) {
841 VM_WARN_ON(adjust_next && next->anon_vma &&
842 anon_vma != next->anon_vma);
843 anon_vma_lock_write(anon_vma);
844 anon_vma_interval_tree_pre_update_vma(vma);
845 if (adjust_next)
846 anon_vma_interval_tree_pre_update_vma(next);
847 }
848
849 if (root) {
850 flush_dcache_mmap_lock(mapping);
851 vma_interval_tree_remove(vma, root);
852 if (adjust_next)
853 vma_interval_tree_remove(next, root);
854 }
855
856 if (start != vma->vm_start) {
857 vma->vm_start = start;
858 start_changed = true;
859 }
860 if (end != vma->vm_end) {
861 vma->vm_end = end;
862 end_changed = true;
863 }
864 vma->vm_pgoff = pgoff;
865 if (adjust_next) {
866 next->vm_start += adjust_next << PAGE_SHIFT;
867 next->vm_pgoff += adjust_next;
868 }
869
870 if (root) {
871 if (adjust_next)
872 vma_interval_tree_insert(next, root);
873 vma_interval_tree_insert(vma, root);
874 flush_dcache_mmap_unlock(mapping);
875 }
876
877 if (remove_next) {
878
879
880
881
882 if (remove_next != 3)
883 __vma_unlink_prev(mm, next, vma);
884 else
885
886
887
888
889
890
891
892
893
894 __vma_unlink_common(mm, next, NULL, false, vma);
895 if (file)
896 __remove_shared_vm_struct(next, file, mapping);
897 } else if (insert) {
898
899
900
901
902
903 __insert_vm_struct(mm, insert);
904 } else {
905 if (start_changed)
906 vma_gap_update(vma);
907 if (end_changed) {
908 if (!next)
909 mm->highest_vm_end = vm_end_gap(vma);
910 else if (!adjust_next)
911 vma_gap_update(next);
912 }
913 }
914
915 if (anon_vma) {
916 anon_vma_interval_tree_post_update_vma(vma);
917 if (adjust_next)
918 anon_vma_interval_tree_post_update_vma(next);
919 anon_vma_unlock_write(anon_vma);
920 }
921 if (mapping)
922 i_mmap_unlock_write(mapping);
923
924 if (root) {
925 uprobe_mmap(vma);
926
927 if (adjust_next)
928 uprobe_mmap(next);
929 }
930
931 if (remove_next) {
932 if (file) {
933 uprobe_munmap(next, next->vm_start, next->vm_end);
934 fput(file);
935 }
936 if (next->anon_vma)
937 anon_vma_merge(vma, next);
938 mm->map_count--;
939 mpol_put(vma_policy(next));
940 vm_area_free(next);
941
942
943
944
945
946 if (remove_next != 3) {
947
948
949
950
951
952
953 next = vma->vm_next;
954 } else {
955
956
957
958
959
960
961
962
963
964
965 next = vma;
966 }
967 if (remove_next == 2) {
968 remove_next = 1;
969 end = next->vm_end;
970 goto again;
971 }
972 else if (next)
973 vma_gap_update(next);
974 else {
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
995 }
996 }
997 if (insert && file)
998 uprobe_mmap(insert);
999
1000 validate_mm(mm);
1001
1002 return 0;
1003}
1004
1005
1006
1007
1008
1009static inline int is_mergeable_vma(struct vm_area_struct *vma,
1010 struct file *file, unsigned long vm_flags,
1011 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1012{
1013
1014
1015
1016
1017
1018
1019
1020
1021 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1022 return 0;
1023 if (vma->vm_file != file)
1024 return 0;
1025 if (vma->vm_ops && vma->vm_ops->close)
1026 return 0;
1027 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1028 return 0;
1029 return 1;
1030}
1031
1032static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1033 struct anon_vma *anon_vma2,
1034 struct vm_area_struct *vma)
1035{
1036
1037
1038
1039
1040 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1041 list_is_singular(&vma->anon_vma_chain)))
1042 return 1;
1043 return anon_vma1 == anon_vma2;
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057static int
1058can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1059 struct anon_vma *anon_vma, struct file *file,
1060 pgoff_t vm_pgoff,
1061 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1062{
1063 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1064 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1065 if (vma->vm_pgoff == vm_pgoff)
1066 return 1;
1067 }
1068 return 0;
1069}
1070
1071
1072
1073
1074
1075
1076
1077
1078static int
1079can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1080 struct anon_vma *anon_vma, struct file *file,
1081 pgoff_t vm_pgoff,
1082 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1083{
1084 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1085 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1086 pgoff_t vm_pglen;
1087 vm_pglen = vma_pages(vma);
1088 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1089 return 1;
1090 }
1091 return 0;
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134struct vm_area_struct *vma_merge(struct mm_struct *mm,
1135 struct vm_area_struct *prev, unsigned long addr,
1136 unsigned long end, unsigned long vm_flags,
1137 struct anon_vma *anon_vma, struct file *file,
1138 pgoff_t pgoff, struct mempolicy *policy,
1139 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1140{
1141 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1142 struct vm_area_struct *area, *next;
1143 int err;
1144
1145
1146
1147
1148
1149 if (vm_flags & VM_SPECIAL)
1150 return NULL;
1151
1152 if (prev)
1153 next = prev->vm_next;
1154 else
1155 next = mm->mmap;
1156 area = next;
1157 if (area && area->vm_end == end)
1158 next = next->vm_next;
1159
1160
1161 VM_WARN_ON(prev && addr <= prev->vm_start);
1162 VM_WARN_ON(area && end > area->vm_end);
1163 VM_WARN_ON(addr >= end);
1164
1165
1166
1167
1168 if (prev && prev->vm_end == addr &&
1169 mpol_equal(vma_policy(prev), policy) &&
1170 can_vma_merge_after(prev, vm_flags,
1171 anon_vma, file, pgoff,
1172 vm_userfaultfd_ctx)) {
1173
1174
1175
1176 if (next && end == next->vm_start &&
1177 mpol_equal(policy, vma_policy(next)) &&
1178 can_vma_merge_before(next, vm_flags,
1179 anon_vma, file,
1180 pgoff+pglen,
1181 vm_userfaultfd_ctx) &&
1182 is_mergeable_anon_vma(prev->anon_vma,
1183 next->anon_vma, NULL)) {
1184
1185 err = __vma_adjust(prev, prev->vm_start,
1186 next->vm_end, prev->vm_pgoff, NULL,
1187 prev);
1188 } else
1189 err = __vma_adjust(prev, prev->vm_start,
1190 end, prev->vm_pgoff, NULL, prev);
1191 if (err)
1192 return NULL;
1193 khugepaged_enter_vma_merge(prev, vm_flags);
1194 return prev;
1195 }
1196
1197
1198
1199
1200 if (next && end == next->vm_start &&
1201 mpol_equal(policy, vma_policy(next)) &&
1202 can_vma_merge_before(next, vm_flags,
1203 anon_vma, file, pgoff+pglen,
1204 vm_userfaultfd_ctx)) {
1205 if (prev && addr < prev->vm_end)
1206 err = __vma_adjust(prev, prev->vm_start,
1207 addr, prev->vm_pgoff, NULL, next);
1208 else {
1209 err = __vma_adjust(area, addr, next->vm_end,
1210 next->vm_pgoff - pglen, NULL, next);
1211
1212
1213
1214
1215
1216 area = next;
1217 }
1218 if (err)
1219 return NULL;
1220 khugepaged_enter_vma_merge(area, vm_flags);
1221 return area;
1222 }
1223
1224 return NULL;
1225}
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1241{
1242 return a->vm_end == b->vm_start &&
1243 mpol_equal(vma_policy(a), vma_policy(b)) &&
1244 a->vm_file == b->vm_file &&
1245 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1246 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1247}
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1272{
1273 if (anon_vma_compatible(a, b)) {
1274 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1275
1276 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1277 return anon_vma;
1278 }
1279 return NULL;
1280}
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1291{
1292 struct anon_vma *anon_vma;
1293 struct vm_area_struct *near;
1294
1295 near = vma->vm_next;
1296 if (!near)
1297 goto try_prev;
1298
1299 anon_vma = reusable_anon_vma(near, vma, near);
1300 if (anon_vma)
1301 return anon_vma;
1302try_prev:
1303 near = vma->vm_prev;
1304 if (!near)
1305 goto none;
1306
1307 anon_vma = reusable_anon_vma(near, near, vma);
1308 if (anon_vma)
1309 return anon_vma;
1310none:
1311
1312
1313
1314
1315
1316
1317
1318
1319 return NULL;
1320}
1321
1322
1323
1324
1325
1326static inline unsigned long round_hint_to_min(unsigned long hint)
1327{
1328 hint &= PAGE_MASK;
1329 if (((void *)hint != NULL) &&
1330 (hint < mmap_min_addr))
1331 return PAGE_ALIGN(mmap_min_addr);
1332 return hint;
1333}
1334
1335static inline int mlock_future_check(struct mm_struct *mm,
1336 unsigned long flags,
1337 unsigned long len)
1338{
1339 unsigned long locked, lock_limit;
1340
1341
1342 if (flags & VM_LOCKED) {
1343 locked = len >> PAGE_SHIFT;
1344 locked += mm->locked_vm;
1345 lock_limit = rlimit(RLIMIT_MEMLOCK);
1346 lock_limit >>= PAGE_SHIFT;
1347 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1348 return -EAGAIN;
1349 }
1350 return 0;
1351}
1352
1353static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1354{
1355 if (S_ISREG(inode->i_mode))
1356 return MAX_LFS_FILESIZE;
1357
1358 if (S_ISBLK(inode->i_mode))
1359 return MAX_LFS_FILESIZE;
1360
1361
1362 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1363 return 0;
1364
1365
1366 return ULONG_MAX;
1367}
1368
1369static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1370 unsigned long pgoff, unsigned long len)
1371{
1372 u64 maxsize = file_mmap_size_max(file, inode);
1373
1374 if (maxsize && len > maxsize)
1375 return false;
1376 maxsize -= len;
1377 if (pgoff > maxsize >> PAGE_SHIFT)
1378 return false;
1379 return true;
1380}
1381
1382
1383
1384
1385unsigned long do_mmap(struct file *file, unsigned long addr,
1386 unsigned long len, unsigned long prot,
1387 unsigned long flags, vm_flags_t vm_flags,
1388 unsigned long pgoff, unsigned long *populate,
1389 struct list_head *uf)
1390{
1391 struct mm_struct *mm = current->mm;
1392 int pkey = 0;
1393
1394 *populate = 0;
1395
1396 if (!len)
1397 return -EINVAL;
1398
1399
1400
1401
1402
1403
1404
1405 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1406 if (!(file && path_noexec(&file->f_path)))
1407 prot |= PROT_EXEC;
1408
1409
1410 if (flags & MAP_FIXED_NOREPLACE)
1411 flags |= MAP_FIXED;
1412
1413 if (!(flags & MAP_FIXED))
1414 addr = round_hint_to_min(addr);
1415
1416
1417 len = PAGE_ALIGN(len);
1418 if (!len)
1419 return -ENOMEM;
1420
1421
1422 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1423 return -EOVERFLOW;
1424
1425
1426 if (mm->map_count > sysctl_max_map_count)
1427 return -ENOMEM;
1428
1429
1430
1431
1432 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1433 if (offset_in_page(addr))
1434 return addr;
1435
1436 if (flags & MAP_FIXED_NOREPLACE) {
1437 struct vm_area_struct *vma = find_vma(mm, addr);
1438
1439 if (vma && vma->vm_start < addr + len)
1440 return -EEXIST;
1441 }
1442
1443 if (prot == PROT_EXEC) {
1444 pkey = execute_only_pkey(mm);
1445 if (pkey < 0)
1446 pkey = 0;
1447 }
1448
1449
1450
1451
1452
1453 vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1454 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1455
1456 if (flags & MAP_LOCKED)
1457 if (!can_do_mlock())
1458 return -EPERM;
1459
1460 if (mlock_future_check(mm, vm_flags, len))
1461 return -EAGAIN;
1462
1463 if (file) {
1464 struct inode *inode = file_inode(file);
1465 unsigned long flags_mask;
1466
1467 if (!file_mmap_ok(file, inode, pgoff, len))
1468 return -EOVERFLOW;
1469
1470 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1471
1472 switch (flags & MAP_TYPE) {
1473 case MAP_SHARED:
1474
1475
1476
1477
1478
1479
1480
1481 flags &= LEGACY_MAP_MASK;
1482
1483 case MAP_SHARED_VALIDATE:
1484 if (flags & ~flags_mask)
1485 return -EOPNOTSUPP;
1486 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1487 return -EACCES;
1488
1489
1490
1491
1492
1493 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1494 return -EACCES;
1495
1496
1497
1498
1499 if (locks_verify_locked(file))
1500 return -EAGAIN;
1501
1502 vm_flags |= VM_SHARED | VM_MAYSHARE;
1503 if (!(file->f_mode & FMODE_WRITE))
1504 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1505
1506
1507 case MAP_PRIVATE:
1508 if (!(file->f_mode & FMODE_READ))
1509 return -EACCES;
1510 if (path_noexec(&file->f_path)) {
1511 if (vm_flags & VM_EXEC)
1512 return -EPERM;
1513 vm_flags &= ~VM_MAYEXEC;
1514 }
1515
1516 if (!file->f_op->mmap)
1517 return -ENODEV;
1518 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1519 return -EINVAL;
1520 break;
1521
1522 default:
1523 return -EINVAL;
1524 }
1525 } else {
1526 switch (flags & MAP_TYPE) {
1527 case MAP_SHARED:
1528 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1529 return -EINVAL;
1530
1531
1532
1533 pgoff = 0;
1534 vm_flags |= VM_SHARED | VM_MAYSHARE;
1535 break;
1536 case MAP_PRIVATE:
1537
1538
1539
1540 pgoff = addr >> PAGE_SHIFT;
1541 break;
1542 default:
1543 return -EINVAL;
1544 }
1545 }
1546
1547
1548
1549
1550
1551 if (flags & MAP_NORESERVE) {
1552
1553 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1554 vm_flags |= VM_NORESERVE;
1555
1556
1557 if (file && is_file_hugepages(file))
1558 vm_flags |= VM_NORESERVE;
1559 }
1560
1561 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1562 if (!IS_ERR_VALUE(addr) &&
1563 ((vm_flags & VM_LOCKED) ||
1564 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1565 *populate = len;
1566 return addr;
1567}
1568
1569unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1570 unsigned long prot, unsigned long flags,
1571 unsigned long fd, unsigned long pgoff)
1572{
1573 struct file *file = NULL;
1574 unsigned long retval;
1575
1576 if (!(flags & MAP_ANONYMOUS)) {
1577 audit_mmap_fd(fd, flags);
1578 file = fget(fd);
1579 if (!file)
1580 return -EBADF;
1581 if (is_file_hugepages(file))
1582 len = ALIGN(len, huge_page_size(hstate_file(file)));
1583 retval = -EINVAL;
1584 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1585 goto out_fput;
1586 } else if (flags & MAP_HUGETLB) {
1587 struct user_struct *user = NULL;
1588 struct hstate *hs;
1589
1590 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1591 if (!hs)
1592 return -EINVAL;
1593
1594 len = ALIGN(len, huge_page_size(hs));
1595
1596
1597
1598
1599
1600
1601 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1602 VM_NORESERVE,
1603 &user, HUGETLB_ANONHUGE_INODE,
1604 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1605 if (IS_ERR(file))
1606 return PTR_ERR(file);
1607 }
1608
1609 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1610
1611 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1612out_fput:
1613 if (file)
1614 fput(file);
1615 return retval;
1616}
1617
1618SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1619 unsigned long, prot, unsigned long, flags,
1620 unsigned long, fd, unsigned long, pgoff)
1621{
1622 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1623}
1624
1625#ifdef __ARCH_WANT_SYS_OLD_MMAP
1626struct mmap_arg_struct {
1627 unsigned long addr;
1628 unsigned long len;
1629 unsigned long prot;
1630 unsigned long flags;
1631 unsigned long fd;
1632 unsigned long offset;
1633};
1634
1635SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1636{
1637 struct mmap_arg_struct a;
1638
1639 if (copy_from_user(&a, arg, sizeof(a)))
1640 return -EFAULT;
1641 if (offset_in_page(a.offset))
1642 return -EINVAL;
1643
1644 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1645 a.offset >> PAGE_SHIFT);
1646}
1647#endif
1648
1649
1650
1651
1652
1653
1654
1655int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1656{
1657 vm_flags_t vm_flags = vma->vm_flags;
1658 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1659
1660
1661 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1662 return 0;
1663
1664
1665 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1666 return 1;
1667
1668
1669
1670 if (pgprot_val(vm_page_prot) !=
1671 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1672 return 0;
1673
1674
1675 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1676 return 1;
1677
1678
1679 if (vm_flags & VM_PFNMAP)
1680 return 0;
1681
1682
1683 return vma->vm_file && vma->vm_file->f_mapping &&
1684 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1685}
1686
1687
1688
1689
1690
1691static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1692{
1693
1694
1695
1696
1697 if (file && is_file_hugepages(file))
1698 return 0;
1699
1700 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1701}
1702
1703unsigned long mmap_region(struct file *file, unsigned long addr,
1704 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1705 struct list_head *uf)
1706{
1707 struct mm_struct *mm = current->mm;
1708 struct vm_area_struct *vma, *prev;
1709 int error;
1710 struct rb_node **rb_link, *rb_parent;
1711 unsigned long charged = 0;
1712
1713
1714 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1715 unsigned long nr_pages;
1716
1717
1718
1719
1720
1721 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1722
1723 if (!may_expand_vm(mm, vm_flags,
1724 (len >> PAGE_SHIFT) - nr_pages))
1725 return -ENOMEM;
1726 }
1727
1728
1729 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1730 &rb_parent)) {
1731 if (do_munmap(mm, addr, len, uf))
1732 return -ENOMEM;
1733 }
1734
1735
1736
1737
1738 if (accountable_mapping(file, vm_flags)) {
1739 charged = len >> PAGE_SHIFT;
1740 if (security_vm_enough_memory_mm(mm, charged))
1741 return -ENOMEM;
1742 vm_flags |= VM_ACCOUNT;
1743 }
1744
1745
1746
1747
1748 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1749 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1750 if (vma)
1751 goto out;
1752
1753
1754
1755
1756
1757
1758 vma = vm_area_alloc(mm);
1759 if (!vma) {
1760 error = -ENOMEM;
1761 goto unacct_error;
1762 }
1763
1764 vma->vm_start = addr;
1765 vma->vm_end = addr + len;
1766 vma->vm_flags = vm_flags;
1767 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1768 vma->vm_pgoff = pgoff;
1769
1770 if (file) {
1771 if (vm_flags & VM_DENYWRITE) {
1772 error = deny_write_access(file);
1773 if (error)
1774 goto free_vma;
1775 }
1776 if (vm_flags & VM_SHARED) {
1777 error = mapping_map_writable(file->f_mapping);
1778 if (error)
1779 goto allow_write_and_free_vma;
1780 }
1781
1782
1783
1784
1785
1786
1787 vma->vm_file = get_file(file);
1788 error = call_mmap(file, vma);
1789 if (error)
1790 goto unmap_and_free_vma;
1791
1792
1793
1794
1795
1796
1797
1798
1799 WARN_ON_ONCE(addr != vma->vm_start);
1800
1801 addr = vma->vm_start;
1802 vm_flags = vma->vm_flags;
1803 } else if (vm_flags & VM_SHARED) {
1804 error = shmem_zero_setup(vma);
1805 if (error)
1806 goto free_vma;
1807 } else {
1808 vma_set_anonymous(vma);
1809 }
1810
1811 vma_link(mm, vma, prev, rb_link, rb_parent);
1812
1813 if (file) {
1814 if (vm_flags & VM_SHARED)
1815 mapping_unmap_writable(file->f_mapping);
1816 if (vm_flags & VM_DENYWRITE)
1817 allow_write_access(file);
1818 }
1819 file = vma->vm_file;
1820out:
1821 perf_event_mmap(vma);
1822
1823 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1824 if (vm_flags & VM_LOCKED) {
1825 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1826 is_vm_hugetlb_page(vma) ||
1827 vma == get_gate_vma(current->mm))
1828 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1829 else
1830 mm->locked_vm += (len >> PAGE_SHIFT);
1831 }
1832
1833 if (file)
1834 uprobe_mmap(vma);
1835
1836
1837
1838
1839
1840
1841
1842
1843 vma->vm_flags |= VM_SOFTDIRTY;
1844
1845 vma_set_page_prot(vma);
1846
1847 return addr;
1848
1849unmap_and_free_vma:
1850 vma->vm_file = NULL;
1851 fput(file);
1852
1853
1854 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1855 charged = 0;
1856 if (vm_flags & VM_SHARED)
1857 mapping_unmap_writable(file->f_mapping);
1858allow_write_and_free_vma:
1859 if (vm_flags & VM_DENYWRITE)
1860 allow_write_access(file);
1861free_vma:
1862 vm_area_free(vma);
1863unacct_error:
1864 if (charged)
1865 vm_unacct_memory(charged);
1866 return error;
1867}
1868
1869unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1870{
1871
1872
1873
1874
1875
1876
1877
1878
1879 struct mm_struct *mm = current->mm;
1880 struct vm_area_struct *vma;
1881 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1882
1883
1884 length = info->length + info->align_mask;
1885 if (length < info->length)
1886 return -ENOMEM;
1887
1888
1889 if (info->high_limit < length)
1890 return -ENOMEM;
1891 high_limit = info->high_limit - length;
1892
1893 if (info->low_limit > high_limit)
1894 return -ENOMEM;
1895 low_limit = info->low_limit + length;
1896
1897
1898 if (RB_EMPTY_ROOT(&mm->mm_rb))
1899 goto check_highest;
1900 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1901 if (vma->rb_subtree_gap < length)
1902 goto check_highest;
1903
1904 while (true) {
1905
1906 gap_end = vm_start_gap(vma);
1907 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1908 struct vm_area_struct *left =
1909 rb_entry(vma->vm_rb.rb_left,
1910 struct vm_area_struct, vm_rb);
1911 if (left->rb_subtree_gap >= length) {
1912 vma = left;
1913 continue;
1914 }
1915 }
1916
1917 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1918check_current:
1919
1920 if (gap_start > high_limit)
1921 return -ENOMEM;
1922 if (gap_end >= low_limit &&
1923 gap_end > gap_start && gap_end - gap_start >= length)
1924 goto found;
1925
1926
1927 if (vma->vm_rb.rb_right) {
1928 struct vm_area_struct *right =
1929 rb_entry(vma->vm_rb.rb_right,
1930 struct vm_area_struct, vm_rb);
1931 if (right->rb_subtree_gap >= length) {
1932 vma = right;
1933 continue;
1934 }
1935 }
1936
1937
1938 while (true) {
1939 struct rb_node *prev = &vma->vm_rb;
1940 if (!rb_parent(prev))
1941 goto check_highest;
1942 vma = rb_entry(rb_parent(prev),
1943 struct vm_area_struct, vm_rb);
1944 if (prev == vma->vm_rb.rb_left) {
1945 gap_start = vm_end_gap(vma->vm_prev);
1946 gap_end = vm_start_gap(vma);
1947 goto check_current;
1948 }
1949 }
1950 }
1951
1952check_highest:
1953
1954 gap_start = mm->highest_vm_end;
1955 gap_end = ULONG_MAX;
1956 if (gap_start > high_limit)
1957 return -ENOMEM;
1958
1959found:
1960
1961 if (gap_start < info->low_limit)
1962 gap_start = info->low_limit;
1963
1964
1965 gap_start += (info->align_offset - gap_start) & info->align_mask;
1966
1967 VM_BUG_ON(gap_start + info->length > info->high_limit);
1968 VM_BUG_ON(gap_start + info->length > gap_end);
1969 return gap_start;
1970}
1971
1972unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1973{
1974 struct mm_struct *mm = current->mm;
1975 struct vm_area_struct *vma;
1976 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1977
1978
1979 length = info->length + info->align_mask;
1980 if (length < info->length)
1981 return -ENOMEM;
1982
1983
1984
1985
1986
1987 gap_end = info->high_limit;
1988 if (gap_end < length)
1989 return -ENOMEM;
1990 high_limit = gap_end - length;
1991
1992 if (info->low_limit > high_limit)
1993 return -ENOMEM;
1994 low_limit = info->low_limit + length;
1995
1996
1997 gap_start = mm->highest_vm_end;
1998 if (gap_start <= high_limit)
1999 goto found_highest;
2000
2001
2002 if (RB_EMPTY_ROOT(&mm->mm_rb))
2003 return -ENOMEM;
2004 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2005 if (vma->rb_subtree_gap < length)
2006 return -ENOMEM;
2007
2008 while (true) {
2009
2010 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2011 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2012 struct vm_area_struct *right =
2013 rb_entry(vma->vm_rb.rb_right,
2014 struct vm_area_struct, vm_rb);
2015 if (right->rb_subtree_gap >= length) {
2016 vma = right;
2017 continue;
2018 }
2019 }
2020
2021check_current:
2022
2023 gap_end = vm_start_gap(vma);
2024 if (gap_end < low_limit)
2025 return -ENOMEM;
2026 if (gap_start <= high_limit &&
2027 gap_end > gap_start && gap_end - gap_start >= length)
2028 goto found;
2029
2030
2031 if (vma->vm_rb.rb_left) {
2032 struct vm_area_struct *left =
2033 rb_entry(vma->vm_rb.rb_left,
2034 struct vm_area_struct, vm_rb);
2035 if (left->rb_subtree_gap >= length) {
2036 vma = left;
2037 continue;
2038 }
2039 }
2040
2041
2042 while (true) {
2043 struct rb_node *prev = &vma->vm_rb;
2044 if (!rb_parent(prev))
2045 return -ENOMEM;
2046 vma = rb_entry(rb_parent(prev),
2047 struct vm_area_struct, vm_rb);
2048 if (prev == vma->vm_rb.rb_right) {
2049 gap_start = vma->vm_prev ?
2050 vm_end_gap(vma->vm_prev) : 0;
2051 goto check_current;
2052 }
2053 }
2054 }
2055
2056found:
2057
2058 if (gap_end > info->high_limit)
2059 gap_end = info->high_limit;
2060
2061found_highest:
2062
2063 gap_end -= info->length;
2064 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2065
2066 VM_BUG_ON(gap_end < info->low_limit);
2067 VM_BUG_ON(gap_end < gap_start);
2068 return gap_end;
2069}
2070
2071
2072#ifndef arch_get_mmap_end
2073#define arch_get_mmap_end(addr) (TASK_SIZE)
2074#endif
2075
2076#ifndef arch_get_mmap_base
2077#define arch_get_mmap_base(addr, base) (base)
2078#endif
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091#ifndef HAVE_ARCH_UNMAPPED_AREA
2092unsigned long
2093arch_get_unmapped_area(struct file *filp, unsigned long addr,
2094 unsigned long len, unsigned long pgoff, unsigned long flags)
2095{
2096 struct mm_struct *mm = current->mm;
2097 struct vm_area_struct *vma, *prev;
2098 struct vm_unmapped_area_info info;
2099 const unsigned long mmap_end = arch_get_mmap_end(addr);
2100
2101 if (len > mmap_end - mmap_min_addr)
2102 return -ENOMEM;
2103
2104 if (flags & MAP_FIXED)
2105 return addr;
2106
2107 if (addr) {
2108 addr = PAGE_ALIGN(addr);
2109 vma = find_vma_prev(mm, addr, &prev);
2110 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2111 (!vma || addr + len <= vm_start_gap(vma)) &&
2112 (!prev || addr >= vm_end_gap(prev)))
2113 return addr;
2114 }
2115
2116 info.flags = 0;
2117 info.length = len;
2118 info.low_limit = mm->mmap_base;
2119 info.high_limit = mmap_end;
2120 info.align_mask = 0;
2121 return vm_unmapped_area(&info);
2122}
2123#endif
2124
2125
2126
2127
2128
2129#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2130unsigned long
2131arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
2132 unsigned long len, unsigned long pgoff,
2133 unsigned long flags)
2134{
2135 struct vm_area_struct *vma, *prev;
2136 struct mm_struct *mm = current->mm;
2137 struct vm_unmapped_area_info info;
2138 const unsigned long mmap_end = arch_get_mmap_end(addr);
2139
2140
2141 if (len > mmap_end - mmap_min_addr)
2142 return -ENOMEM;
2143
2144 if (flags & MAP_FIXED)
2145 return addr;
2146
2147
2148 if (addr) {
2149 addr = PAGE_ALIGN(addr);
2150 vma = find_vma_prev(mm, addr, &prev);
2151 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2152 (!vma || addr + len <= vm_start_gap(vma)) &&
2153 (!prev || addr >= vm_end_gap(prev)))
2154 return addr;
2155 }
2156
2157 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2158 info.length = len;
2159 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2160 info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
2161 info.align_mask = 0;
2162 addr = vm_unmapped_area(&info);
2163
2164
2165
2166
2167
2168
2169
2170 if (offset_in_page(addr)) {
2171 VM_BUG_ON(addr != -ENOMEM);
2172 info.flags = 0;
2173 info.low_limit = TASK_UNMAPPED_BASE;
2174 info.high_limit = mmap_end;
2175 addr = vm_unmapped_area(&info);
2176 }
2177
2178 return addr;
2179}
2180#endif
2181
2182unsigned long
2183get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2184 unsigned long pgoff, unsigned long flags)
2185{
2186 unsigned long (*get_area)(struct file *, unsigned long,
2187 unsigned long, unsigned long, unsigned long);
2188
2189 unsigned long error = arch_mmap_check(addr, len, flags);
2190 if (error)
2191 return error;
2192
2193
2194 if (len > TASK_SIZE)
2195 return -ENOMEM;
2196
2197 get_area = current->mm->get_unmapped_area;
2198 if (file) {
2199 if (file->f_op->get_unmapped_area)
2200 get_area = file->f_op->get_unmapped_area;
2201 } else if (flags & MAP_SHARED) {
2202
2203
2204
2205
2206
2207 pgoff = 0;
2208 get_area = shmem_get_unmapped_area;
2209 }
2210
2211 addr = get_area(file, addr, len, pgoff, flags);
2212 if (IS_ERR_VALUE(addr))
2213 return addr;
2214
2215 if (addr > TASK_SIZE - len)
2216 return -ENOMEM;
2217 if (offset_in_page(addr))
2218 return -EINVAL;
2219
2220 error = security_mmap_addr(addr);
2221 return error ? error : addr;
2222}
2223
2224EXPORT_SYMBOL(get_unmapped_area);
2225
2226
2227struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2228{
2229 struct rb_node *rb_node;
2230 struct vm_area_struct *vma;
2231
2232
2233 vma = vmacache_find(mm, addr);
2234 if (likely(vma))
2235 return vma;
2236
2237 rb_node = mm->mm_rb.rb_node;
2238
2239 while (rb_node) {
2240 struct vm_area_struct *tmp;
2241
2242 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2243
2244 if (tmp->vm_end > addr) {
2245 vma = tmp;
2246 if (tmp->vm_start <= addr)
2247 break;
2248 rb_node = rb_node->rb_left;
2249 } else
2250 rb_node = rb_node->rb_right;
2251 }
2252
2253 if (vma)
2254 vmacache_update(addr, vma);
2255 return vma;
2256}
2257
2258EXPORT_SYMBOL(find_vma);
2259
2260
2261
2262
2263struct vm_area_struct *
2264find_vma_prev(struct mm_struct *mm, unsigned long addr,
2265 struct vm_area_struct **pprev)
2266{
2267 struct vm_area_struct *vma;
2268
2269 vma = find_vma(mm, addr);
2270 if (vma) {
2271 *pprev = vma->vm_prev;
2272 } else {
2273 struct rb_node *rb_node = mm->mm_rb.rb_node;
2274 *pprev = NULL;
2275 while (rb_node) {
2276 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2277 rb_node = rb_node->rb_right;
2278 }
2279 }
2280 return vma;
2281}
2282
2283
2284
2285
2286
2287
2288static int acct_stack_growth(struct vm_area_struct *vma,
2289 unsigned long size, unsigned long grow)
2290{
2291 struct mm_struct *mm = vma->vm_mm;
2292 unsigned long new_start;
2293
2294
2295 if (!may_expand_vm(mm, vma->vm_flags, grow))
2296 return -ENOMEM;
2297
2298
2299 if (size > rlimit(RLIMIT_STACK))
2300 return -ENOMEM;
2301
2302
2303 if (vma->vm_flags & VM_LOCKED) {
2304 unsigned long locked;
2305 unsigned long limit;
2306 locked = mm->locked_vm + grow;
2307 limit = rlimit(RLIMIT_MEMLOCK);
2308 limit >>= PAGE_SHIFT;
2309 if (locked > limit && !capable(CAP_IPC_LOCK))
2310 return -ENOMEM;
2311 }
2312
2313
2314 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2315 vma->vm_end - size;
2316 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2317 return -EFAULT;
2318
2319
2320
2321
2322
2323 if (security_vm_enough_memory_mm(mm, grow))
2324 return -ENOMEM;
2325
2326 return 0;
2327}
2328
2329#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2330
2331
2332
2333
2334int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2335{
2336 struct mm_struct *mm = vma->vm_mm;
2337 struct vm_area_struct *next;
2338 unsigned long gap_addr;
2339 int error = 0;
2340
2341 if (!(vma->vm_flags & VM_GROWSUP))
2342 return -EFAULT;
2343
2344
2345 address &= PAGE_MASK;
2346 if (address >= (TASK_SIZE & PAGE_MASK))
2347 return -ENOMEM;
2348 address += PAGE_SIZE;
2349
2350
2351 gap_addr = address + stack_guard_gap;
2352
2353
2354 if (gap_addr < address || gap_addr > TASK_SIZE)
2355 gap_addr = TASK_SIZE;
2356
2357 next = vma->vm_next;
2358 if (next && next->vm_start < gap_addr &&
2359 (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2360 if (!(next->vm_flags & VM_GROWSUP))
2361 return -ENOMEM;
2362
2363 }
2364
2365
2366 if (unlikely(anon_vma_prepare(vma)))
2367 return -ENOMEM;
2368
2369
2370
2371
2372
2373
2374 anon_vma_lock_write(vma->anon_vma);
2375
2376
2377 if (address > vma->vm_end) {
2378 unsigned long size, grow;
2379
2380 size = address - vma->vm_start;
2381 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2382
2383 error = -ENOMEM;
2384 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2385 error = acct_stack_growth(vma, size, grow);
2386 if (!error) {
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398 spin_lock(&mm->page_table_lock);
2399 if (vma->vm_flags & VM_LOCKED)
2400 mm->locked_vm += grow;
2401 vm_stat_account(mm, vma->vm_flags, grow);
2402 anon_vma_interval_tree_pre_update_vma(vma);
2403 vma->vm_end = address;
2404 anon_vma_interval_tree_post_update_vma(vma);
2405 if (vma->vm_next)
2406 vma_gap_update(vma->vm_next);
2407 else
2408 mm->highest_vm_end = vm_end_gap(vma);
2409 spin_unlock(&mm->page_table_lock);
2410
2411 perf_event_mmap(vma);
2412 }
2413 }
2414 }
2415 anon_vma_unlock_write(vma->anon_vma);
2416 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2417 validate_mm(mm);
2418 return error;
2419}
2420#endif
2421
2422
2423
2424
2425int expand_downwards(struct vm_area_struct *vma,
2426 unsigned long address)
2427{
2428 struct mm_struct *mm = vma->vm_mm;
2429 struct vm_area_struct *prev;
2430 int error = 0;
2431
2432 address &= PAGE_MASK;
2433 if (address < mmap_min_addr)
2434 return -EPERM;
2435
2436
2437 prev = vma->vm_prev;
2438
2439 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2440 (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2441 if (address - prev->vm_end < stack_guard_gap)
2442 return -ENOMEM;
2443 }
2444
2445
2446 if (unlikely(anon_vma_prepare(vma)))
2447 return -ENOMEM;
2448
2449
2450
2451
2452
2453
2454 anon_vma_lock_write(vma->anon_vma);
2455
2456
2457 if (address < vma->vm_start) {
2458 unsigned long size, grow;
2459
2460 size = vma->vm_end - address;
2461 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2462
2463 error = -ENOMEM;
2464 if (grow <= vma->vm_pgoff) {
2465 error = acct_stack_growth(vma, size, grow);
2466 if (!error) {
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478 spin_lock(&mm->page_table_lock);
2479 if (vma->vm_flags & VM_LOCKED)
2480 mm->locked_vm += grow;
2481 vm_stat_account(mm, vma->vm_flags, grow);
2482 anon_vma_interval_tree_pre_update_vma(vma);
2483 vma->vm_start = address;
2484 vma->vm_pgoff -= grow;
2485 anon_vma_interval_tree_post_update_vma(vma);
2486 vma_gap_update(vma);
2487 spin_unlock(&mm->page_table_lock);
2488
2489 perf_event_mmap(vma);
2490 }
2491 }
2492 }
2493 anon_vma_unlock_write(vma->anon_vma);
2494 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2495 validate_mm(mm);
2496 return error;
2497}
2498
2499
2500unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2501
2502static int __init cmdline_parse_stack_guard_gap(char *p)
2503{
2504 unsigned long val;
2505 char *endptr;
2506
2507 val = simple_strtoul(p, &endptr, 10);
2508 if (!*endptr)
2509 stack_guard_gap = val << PAGE_SHIFT;
2510
2511 return 0;
2512}
2513__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2514
2515#ifdef CONFIG_STACK_GROWSUP
2516int expand_stack(struct vm_area_struct *vma, unsigned long address)
2517{
2518 return expand_upwards(vma, address);
2519}
2520
2521struct vm_area_struct *
2522find_extend_vma(struct mm_struct *mm, unsigned long addr)
2523{
2524 struct vm_area_struct *vma, *prev;
2525
2526 addr &= PAGE_MASK;
2527 vma = find_vma_prev(mm, addr, &prev);
2528 if (vma && (vma->vm_start <= addr))
2529 return vma;
2530
2531 if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
2532 return NULL;
2533 if (prev->vm_flags & VM_LOCKED)
2534 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2535 return prev;
2536}
2537#else
2538int expand_stack(struct vm_area_struct *vma, unsigned long address)
2539{
2540 return expand_downwards(vma, address);
2541}
2542
2543struct vm_area_struct *
2544find_extend_vma(struct mm_struct *mm, unsigned long addr)
2545{
2546 struct vm_area_struct *vma;
2547 unsigned long start;
2548
2549 addr &= PAGE_MASK;
2550 vma = find_vma(mm, addr);
2551 if (!vma)
2552 return NULL;
2553 if (vma->vm_start <= addr)
2554 return vma;
2555 if (!(vma->vm_flags & VM_GROWSDOWN))
2556 return NULL;
2557
2558 if (!mmget_still_valid(mm))
2559 return NULL;
2560 start = vma->vm_start;
2561 if (expand_stack(vma, addr))
2562 return NULL;
2563 if (vma->vm_flags & VM_LOCKED)
2564 populate_vma_page_range(vma, addr, start, NULL);
2565 return vma;
2566}
2567#endif
2568
2569EXPORT_SYMBOL_GPL(find_extend_vma);
2570
2571
2572
2573
2574
2575
2576
2577static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2578{
2579 unsigned long nr_accounted = 0;
2580
2581
2582 update_hiwater_vm(mm);
2583 do {
2584 long nrpages = vma_pages(vma);
2585
2586 if (vma->vm_flags & VM_ACCOUNT)
2587 nr_accounted += nrpages;
2588 vm_stat_account(mm, vma->vm_flags, -nrpages);
2589 vma = remove_vma(vma);
2590 } while (vma);
2591 vm_unacct_memory(nr_accounted);
2592 validate_mm(mm);
2593}
2594
2595
2596
2597
2598
2599
2600static void unmap_region(struct mm_struct *mm,
2601 struct vm_area_struct *vma, struct vm_area_struct *prev,
2602 unsigned long start, unsigned long end)
2603{
2604 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2605 struct mmu_gather tlb;
2606
2607 lru_add_drain();
2608 tlb_gather_mmu(&tlb, mm, start, end);
2609 update_hiwater_rss(mm);
2610 unmap_vmas(&tlb, vma, start, end);
2611 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2612 next ? next->vm_start : USER_PGTABLES_CEILING);
2613 tlb_finish_mmu(&tlb, start, end);
2614}
2615
2616
2617
2618
2619
2620static void
2621detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2622 struct vm_area_struct *prev, unsigned long end)
2623{
2624 struct vm_area_struct **insertion_point;
2625 struct vm_area_struct *tail_vma = NULL;
2626
2627 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2628 vma->vm_prev = NULL;
2629 do {
2630 vma_rb_erase(vma, &mm->mm_rb);
2631 mm->map_count--;
2632 tail_vma = vma;
2633 vma = vma->vm_next;
2634 } while (vma && vma->vm_start < end);
2635 *insertion_point = vma;
2636 if (vma) {
2637 vma->vm_prev = prev;
2638 vma_gap_update(vma);
2639 } else
2640 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2641 tail_vma->vm_next = NULL;
2642
2643
2644 vmacache_invalidate(mm);
2645}
2646
2647
2648
2649
2650
2651int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2652 unsigned long addr, int new_below)
2653{
2654 struct vm_area_struct *new;
2655 int err;
2656
2657 if (vma->vm_ops && vma->vm_ops->split) {
2658 err = vma->vm_ops->split(vma, addr);
2659 if (err)
2660 return err;
2661 }
2662
2663 new = vm_area_dup(vma);
2664 if (!new)
2665 return -ENOMEM;
2666
2667 if (new_below)
2668 new->vm_end = addr;
2669 else {
2670 new->vm_start = addr;
2671 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2672 }
2673
2674 err = vma_dup_policy(vma, new);
2675 if (err)
2676 goto out_free_vma;
2677
2678 err = anon_vma_clone(new, vma);
2679 if (err)
2680 goto out_free_mpol;
2681
2682 if (new->vm_file)
2683 get_file(new->vm_file);
2684
2685 if (new->vm_ops && new->vm_ops->open)
2686 new->vm_ops->open(new);
2687
2688 if (new_below)
2689 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2690 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2691 else
2692 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2693
2694
2695 if (!err)
2696 return 0;
2697
2698
2699 if (new->vm_ops && new->vm_ops->close)
2700 new->vm_ops->close(new);
2701 if (new->vm_file)
2702 fput(new->vm_file);
2703 unlink_anon_vmas(new);
2704 out_free_mpol:
2705 mpol_put(vma_policy(new));
2706 out_free_vma:
2707 vm_area_free(new);
2708 return err;
2709}
2710
2711
2712
2713
2714
2715int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2716 unsigned long addr, int new_below)
2717{
2718 if (mm->map_count >= sysctl_max_map_count)
2719 return -ENOMEM;
2720
2721 return __split_vma(mm, vma, addr, new_below);
2722}
2723
2724
2725
2726
2727
2728
2729int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2730 struct list_head *uf, bool downgrade)
2731{
2732 unsigned long end;
2733 struct vm_area_struct *vma, *prev, *last;
2734
2735 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2736 return -EINVAL;
2737
2738 len = PAGE_ALIGN(len);
2739 end = start + len;
2740 if (len == 0)
2741 return -EINVAL;
2742
2743
2744
2745
2746
2747
2748 arch_unmap(mm, start, end);
2749
2750
2751 vma = find_vma(mm, start);
2752 if (!vma)
2753 return 0;
2754 prev = vma->vm_prev;
2755
2756
2757
2758 if (vma->vm_start >= end)
2759 return 0;
2760
2761
2762
2763
2764
2765
2766
2767
2768 if (start > vma->vm_start) {
2769 int error;
2770
2771
2772
2773
2774
2775
2776 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2777 return -ENOMEM;
2778
2779 error = __split_vma(mm, vma, start, 0);
2780 if (error)
2781 return error;
2782 prev = vma;
2783 }
2784
2785
2786 last = find_vma(mm, end);
2787 if (last && end > last->vm_start) {
2788 int error = __split_vma(mm, last, end, 1);
2789 if (error)
2790 return error;
2791 }
2792 vma = prev ? prev->vm_next : mm->mmap;
2793
2794 if (unlikely(uf)) {
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2805 if (error)
2806 return error;
2807 }
2808
2809
2810
2811
2812 if (mm->locked_vm) {
2813 struct vm_area_struct *tmp = vma;
2814 while (tmp && tmp->vm_start < end) {
2815 if (tmp->vm_flags & VM_LOCKED) {
2816 mm->locked_vm -= vma_pages(tmp);
2817 munlock_vma_pages_all(tmp);
2818 }
2819
2820 tmp = tmp->vm_next;
2821 }
2822 }
2823
2824
2825 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2826
2827 if (downgrade)
2828 downgrade_write(&mm->mmap_sem);
2829
2830 unmap_region(mm, vma, prev, start, end);
2831
2832
2833 remove_vma_list(mm, vma);
2834
2835 return downgrade ? 1 : 0;
2836}
2837
2838int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2839 struct list_head *uf)
2840{
2841 return __do_munmap(mm, start, len, uf, false);
2842}
2843
2844static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2845{
2846 int ret;
2847 struct mm_struct *mm = current->mm;
2848 LIST_HEAD(uf);
2849
2850 if (down_write_killable(&mm->mmap_sem))
2851 return -EINTR;
2852
2853 ret = __do_munmap(mm, start, len, &uf, downgrade);
2854
2855
2856
2857
2858
2859 if (ret == 1) {
2860 up_read(&mm->mmap_sem);
2861 ret = 0;
2862 } else
2863 up_write(&mm->mmap_sem);
2864
2865 userfaultfd_unmap_complete(mm, &uf);
2866 return ret;
2867}
2868
2869int vm_munmap(unsigned long start, size_t len)
2870{
2871 return __vm_munmap(start, len, false);
2872}
2873EXPORT_SYMBOL(vm_munmap);
2874
2875SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2876{
2877 profile_munmap(addr);
2878 return __vm_munmap(addr, len, true);
2879}
2880
2881
2882
2883
2884
2885SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2886 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2887{
2888
2889 struct mm_struct *mm = current->mm;
2890 struct vm_area_struct *vma;
2891 unsigned long populate = 0;
2892 unsigned long ret = -EINVAL;
2893 struct file *file;
2894
2895 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2896 current->comm, current->pid);
2897
2898 if (prot)
2899 return ret;
2900 start = start & PAGE_MASK;
2901 size = size & PAGE_MASK;
2902
2903 if (start + size <= start)
2904 return ret;
2905
2906
2907 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2908 return ret;
2909
2910 if (down_write_killable(&mm->mmap_sem))
2911 return -EINTR;
2912
2913 vma = find_vma(mm, start);
2914
2915 if (!vma || !(vma->vm_flags & VM_SHARED))
2916 goto out;
2917
2918 if (start < vma->vm_start)
2919 goto out;
2920
2921 if (start + size > vma->vm_end) {
2922 struct vm_area_struct *next;
2923
2924 for (next = vma->vm_next; next; next = next->vm_next) {
2925
2926 if (next->vm_start != next->vm_prev->vm_end)
2927 goto out;
2928
2929 if (next->vm_file != vma->vm_file)
2930 goto out;
2931
2932 if (next->vm_flags != vma->vm_flags)
2933 goto out;
2934
2935 if (start + size <= next->vm_end)
2936 break;
2937 }
2938
2939 if (!next)
2940 goto out;
2941 }
2942
2943 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2944 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2945 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2946
2947 flags &= MAP_NONBLOCK;
2948 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2949 if (vma->vm_flags & VM_LOCKED) {
2950 struct vm_area_struct *tmp;
2951 flags |= MAP_LOCKED;
2952
2953
2954 for (tmp = vma; tmp->vm_start >= start + size;
2955 tmp = tmp->vm_next) {
2956
2957
2958
2959
2960 vma_adjust_trans_huge(tmp, start, start + size, 0);
2961
2962 munlock_vma_pages_range(tmp,
2963 max(tmp->vm_start, start),
2964 min(tmp->vm_end, start + size));
2965 }
2966 }
2967
2968 file = get_file(vma->vm_file);
2969 ret = do_mmap_pgoff(vma->vm_file, start, size,
2970 prot, flags, pgoff, &populate, NULL);
2971 fput(file);
2972out:
2973 up_write(&mm->mmap_sem);
2974 if (populate)
2975 mm_populate(ret, populate);
2976 if (!IS_ERR_VALUE(ret))
2977 ret = 0;
2978 return ret;
2979}
2980
2981
2982
2983
2984
2985
2986static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
2987{
2988 struct mm_struct *mm = current->mm;
2989 struct vm_area_struct *vma, *prev;
2990 struct rb_node **rb_link, *rb_parent;
2991 pgoff_t pgoff = addr >> PAGE_SHIFT;
2992 int error;
2993
2994
2995 if ((flags & (~VM_EXEC)) != 0)
2996 return -EINVAL;
2997 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2998
2999 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
3000 if (offset_in_page(error))
3001 return error;
3002
3003 error = mlock_future_check(mm, mm->def_flags, len);
3004 if (error)
3005 return error;
3006
3007
3008
3009
3010 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
3011 &rb_parent)) {
3012 if (do_munmap(mm, addr, len, uf))
3013 return -ENOMEM;
3014 }
3015
3016
3017 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
3018 return -ENOMEM;
3019
3020 if (mm->map_count > sysctl_max_map_count)
3021 return -ENOMEM;
3022
3023 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
3024 return -ENOMEM;
3025
3026
3027 vma = vma_merge(mm, prev, addr, addr + len, flags,
3028 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
3029 if (vma)
3030 goto out;
3031
3032
3033
3034
3035 vma = vm_area_alloc(mm);
3036 if (!vma) {
3037 vm_unacct_memory(len >> PAGE_SHIFT);
3038 return -ENOMEM;
3039 }
3040
3041 vma_set_anonymous(vma);
3042 vma->vm_start = addr;
3043 vma->vm_end = addr + len;
3044 vma->vm_pgoff = pgoff;
3045 vma->vm_flags = flags;
3046 vma->vm_page_prot = vm_get_page_prot(flags);
3047 vma_link(mm, vma, prev, rb_link, rb_parent);
3048out:
3049 perf_event_mmap(vma);
3050 mm->total_vm += len >> PAGE_SHIFT;
3051 mm->data_vm += len >> PAGE_SHIFT;
3052 if (flags & VM_LOCKED)
3053 mm->locked_vm += (len >> PAGE_SHIFT);
3054 vma->vm_flags |= VM_SOFTDIRTY;
3055 return 0;
3056}
3057
3058int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3059{
3060 struct mm_struct *mm = current->mm;
3061 unsigned long len;
3062 int ret;
3063 bool populate;
3064 LIST_HEAD(uf);
3065
3066 len = PAGE_ALIGN(request);
3067 if (len < request)
3068 return -ENOMEM;
3069 if (!len)
3070 return 0;
3071
3072 if (down_write_killable(&mm->mmap_sem))
3073 return -EINTR;
3074
3075 ret = do_brk_flags(addr, len, flags, &uf);
3076 populate = ((mm->def_flags & VM_LOCKED) != 0);
3077 up_write(&mm->mmap_sem);
3078 userfaultfd_unmap_complete(mm, &uf);
3079 if (populate && !ret)
3080 mm_populate(addr, len);
3081 return ret;
3082}
3083EXPORT_SYMBOL(vm_brk_flags);
3084
3085int vm_brk(unsigned long addr, unsigned long len)
3086{
3087 return vm_brk_flags(addr, len, 0);
3088}
3089EXPORT_SYMBOL(vm_brk);
3090
3091
3092void exit_mmap(struct mm_struct *mm)
3093{
3094 struct mmu_gather tlb;
3095 struct vm_area_struct *vma;
3096 unsigned long nr_accounted = 0;
3097
3098
3099 mmu_notifier_release(mm);
3100
3101 if (unlikely(mm_is_oom_victim(mm))) {
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118 (void)__oom_reap_task_mm(mm);
3119
3120 set_bit(MMF_OOM_SKIP, &mm->flags);
3121 down_write(&mm->mmap_sem);
3122 up_write(&mm->mmap_sem);
3123 }
3124
3125 if (mm->locked_vm) {
3126 vma = mm->mmap;
3127 while (vma) {
3128 if (vma->vm_flags & VM_LOCKED)
3129 munlock_vma_pages_all(vma);
3130 vma = vma->vm_next;
3131 }
3132 }
3133
3134 arch_exit_mmap(mm);
3135
3136 vma = mm->mmap;
3137 if (!vma)
3138 return;
3139
3140 lru_add_drain();
3141 flush_cache_mm(mm);
3142 tlb_gather_mmu(&tlb, mm, 0, -1);
3143
3144
3145 unmap_vmas(&tlb, vma, 0, -1);
3146 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3147 tlb_finish_mmu(&tlb, 0, -1);
3148
3149
3150
3151
3152
3153 while (vma) {
3154 if (vma->vm_flags & VM_ACCOUNT)
3155 nr_accounted += vma_pages(vma);
3156 vma = remove_vma(vma);
3157 }
3158 vm_unacct_memory(nr_accounted);
3159}
3160
3161
3162
3163
3164
3165int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3166{
3167 struct vm_area_struct *prev;
3168 struct rb_node **rb_link, *rb_parent;
3169
3170 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3171 &prev, &rb_link, &rb_parent))
3172 return -ENOMEM;
3173 if ((vma->vm_flags & VM_ACCOUNT) &&
3174 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3175 return -ENOMEM;
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189 if (vma_is_anonymous(vma)) {
3190 BUG_ON(vma->anon_vma);
3191 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3192 }
3193
3194 vma_link(mm, vma, prev, rb_link, rb_parent);
3195 return 0;
3196}
3197
3198
3199
3200
3201
3202struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3203 unsigned long addr, unsigned long len, pgoff_t pgoff,
3204 bool *need_rmap_locks)
3205{
3206 struct vm_area_struct *vma = *vmap;
3207 unsigned long vma_start = vma->vm_start;
3208 struct mm_struct *mm = vma->vm_mm;
3209 struct vm_area_struct *new_vma, *prev;
3210 struct rb_node **rb_link, *rb_parent;
3211 bool faulted_in_anon_vma = true;
3212
3213
3214
3215
3216
3217 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3218 pgoff = addr >> PAGE_SHIFT;
3219 faulted_in_anon_vma = false;
3220 }
3221
3222 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3223 return NULL;
3224 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3225 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3226 vma->vm_userfaultfd_ctx);
3227 if (new_vma) {
3228
3229
3230
3231 if (unlikely(vma_start >= new_vma->vm_start &&
3232 vma_start < new_vma->vm_end)) {
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3246 *vmap = vma = new_vma;
3247 }
3248 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3249 } else {
3250 new_vma = vm_area_dup(vma);
3251 if (!new_vma)
3252 goto out;
3253 new_vma->vm_start = addr;
3254 new_vma->vm_end = addr + len;
3255 new_vma->vm_pgoff = pgoff;
3256 if (vma_dup_policy(vma, new_vma))
3257 goto out_free_vma;
3258 if (anon_vma_clone(new_vma, vma))
3259 goto out_free_mempol;
3260 if (new_vma->vm_file)
3261 get_file(new_vma->vm_file);
3262 if (new_vma->vm_ops && new_vma->vm_ops->open)
3263 new_vma->vm_ops->open(new_vma);
3264 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3265 *need_rmap_locks = false;
3266 }
3267 return new_vma;
3268
3269out_free_mempol:
3270 mpol_put(vma_policy(new_vma));
3271out_free_vma:
3272 vm_area_free(new_vma);
3273out:
3274 return NULL;
3275}
3276
3277
3278
3279
3280
3281bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3282{
3283 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3284 return false;
3285
3286 if (is_data_mapping(flags) &&
3287 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3288
3289 if (rlimit(RLIMIT_DATA) == 0 &&
3290 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3291 return true;
3292
3293 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3294 current->comm, current->pid,
3295 (mm->data_vm + npages) << PAGE_SHIFT,
3296 rlimit(RLIMIT_DATA),
3297 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3298
3299 if (!ignore_rlimit_data)
3300 return false;
3301 }
3302
3303 return true;
3304}
3305
3306void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3307{
3308 mm->total_vm += npages;
3309
3310 if (is_exec_mapping(flags))
3311 mm->exec_vm += npages;
3312 else if (is_stack_mapping(flags))
3313 mm->stack_vm += npages;
3314 else if (is_data_mapping(flags))
3315 mm->data_vm += npages;
3316}
3317
3318static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3319
3320
3321
3322
3323static void special_mapping_close(struct vm_area_struct *vma)
3324{
3325}
3326
3327static const char *special_mapping_name(struct vm_area_struct *vma)
3328{
3329 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3330}
3331
3332static int special_mapping_mremap(struct vm_area_struct *new_vma)
3333{
3334 struct vm_special_mapping *sm = new_vma->vm_private_data;
3335
3336 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3337 return -EFAULT;
3338
3339 if (sm->mremap)
3340 return sm->mremap(sm, new_vma);
3341
3342 return 0;
3343}
3344
3345static const struct vm_operations_struct special_mapping_vmops = {
3346 .close = special_mapping_close,
3347 .fault = special_mapping_fault,
3348 .mremap = special_mapping_mremap,
3349 .name = special_mapping_name,
3350};
3351
3352static const struct vm_operations_struct legacy_special_mapping_vmops = {
3353 .close = special_mapping_close,
3354 .fault = special_mapping_fault,
3355};
3356
3357static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3358{
3359 struct vm_area_struct *vma = vmf->vma;
3360 pgoff_t pgoff;
3361 struct page **pages;
3362
3363 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3364 pages = vma->vm_private_data;
3365 } else {
3366 struct vm_special_mapping *sm = vma->vm_private_data;
3367
3368 if (sm->fault)
3369 return sm->fault(sm, vmf->vma, vmf);
3370
3371 pages = sm->pages;
3372 }
3373
3374 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3375 pgoff--;
3376
3377 if (*pages) {
3378 struct page *page = *pages;
3379 get_page(page);
3380 vmf->page = page;
3381 return 0;
3382 }
3383
3384 return VM_FAULT_SIGBUS;
3385}
3386
3387static struct vm_area_struct *__install_special_mapping(
3388 struct mm_struct *mm,
3389 unsigned long addr, unsigned long len,
3390 unsigned long vm_flags, void *priv,
3391 const struct vm_operations_struct *ops)
3392{
3393 int ret;
3394 struct vm_area_struct *vma;
3395
3396 vma = vm_area_alloc(mm);
3397 if (unlikely(vma == NULL))
3398 return ERR_PTR(-ENOMEM);
3399
3400 vma->vm_start = addr;
3401 vma->vm_end = addr + len;
3402
3403 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3404 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3405
3406 vma->vm_ops = ops;
3407 vma->vm_private_data = priv;
3408
3409 ret = insert_vm_struct(mm, vma);
3410 if (ret)
3411 goto out;
3412
3413 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3414
3415 perf_event_mmap(vma);
3416
3417 return vma;
3418
3419out:
3420 vm_area_free(vma);
3421 return ERR_PTR(ret);
3422}
3423
3424bool vma_is_special_mapping(const struct vm_area_struct *vma,
3425 const struct vm_special_mapping *sm)
3426{
3427 return vma->vm_private_data == sm &&
3428 (vma->vm_ops == &special_mapping_vmops ||
3429 vma->vm_ops == &legacy_special_mapping_vmops);
3430}
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441struct vm_area_struct *_install_special_mapping(
3442 struct mm_struct *mm,
3443 unsigned long addr, unsigned long len,
3444 unsigned long vm_flags, const struct vm_special_mapping *spec)
3445{
3446 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3447 &special_mapping_vmops);
3448}
3449
3450int install_special_mapping(struct mm_struct *mm,
3451 unsigned long addr, unsigned long len,
3452 unsigned long vm_flags, struct page **pages)
3453{
3454 struct vm_area_struct *vma = __install_special_mapping(
3455 mm, addr, len, vm_flags, (void *)pages,
3456 &legacy_special_mapping_vmops);
3457
3458 return PTR_ERR_OR_ZERO(vma);
3459}
3460
3461static DEFINE_MUTEX(mm_all_locks_mutex);
3462
3463static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3464{
3465 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3466
3467
3468
3469
3470 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480 if (__test_and_set_bit(0, (unsigned long *)
3481 &anon_vma->root->rb_root.rb_root.rb_node))
3482 BUG();
3483 }
3484}
3485
3486static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3487{
3488 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3499 BUG();
3500 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3501 }
3502}
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541int mm_take_all_locks(struct mm_struct *mm)
3542{
3543 struct vm_area_struct *vma;
3544 struct anon_vma_chain *avc;
3545
3546 BUG_ON(down_read_trylock(&mm->mmap_sem));
3547
3548 mutex_lock(&mm_all_locks_mutex);
3549
3550 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3551 if (signal_pending(current))
3552 goto out_unlock;
3553 if (vma->vm_file && vma->vm_file->f_mapping &&
3554 is_vm_hugetlb_page(vma))
3555 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3556 }
3557
3558 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3559 if (signal_pending(current))
3560 goto out_unlock;
3561 if (vma->vm_file && vma->vm_file->f_mapping &&
3562 !is_vm_hugetlb_page(vma))
3563 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3564 }
3565
3566 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3567 if (signal_pending(current))
3568 goto out_unlock;
3569 if (vma->anon_vma)
3570 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3571 vm_lock_anon_vma(mm, avc->anon_vma);
3572 }
3573
3574 return 0;
3575
3576out_unlock:
3577 mm_drop_all_locks(mm);
3578 return -EINTR;
3579}
3580
3581static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3582{
3583 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596 if (!__test_and_clear_bit(0, (unsigned long *)
3597 &anon_vma->root->rb_root.rb_root.rb_node))
3598 BUG();
3599 anon_vma_unlock_write(anon_vma);
3600 }
3601}
3602
3603static void vm_unlock_mapping(struct address_space *mapping)
3604{
3605 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3606
3607
3608
3609
3610 i_mmap_unlock_write(mapping);
3611 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3612 &mapping->flags))
3613 BUG();
3614 }
3615}
3616
3617
3618
3619
3620
3621void mm_drop_all_locks(struct mm_struct *mm)
3622{
3623 struct vm_area_struct *vma;
3624 struct anon_vma_chain *avc;
3625
3626 BUG_ON(down_read_trylock(&mm->mmap_sem));
3627 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3628
3629 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3630 if (vma->anon_vma)
3631 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3632 vm_unlock_anon_vma(avc->anon_vma);
3633 if (vma->vm_file && vma->vm_file->f_mapping)
3634 vm_unlock_mapping(vma->vm_file->f_mapping);
3635 }
3636
3637 mutex_unlock(&mm_all_locks_mutex);
3638}
3639
3640
3641
3642
3643void __init mmap_init(void)
3644{
3645 int ret;
3646
3647 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3648 VM_BUG_ON(ret);
3649}
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661static int init_user_reserve(void)
3662{
3663 unsigned long free_kbytes;
3664
3665 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3666
3667 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3668 return 0;
3669}
3670subsys_initcall(init_user_reserve);
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682static int init_admin_reserve(void)
3683{
3684 unsigned long free_kbytes;
3685
3686 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3687
3688 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3689 return 0;
3690}
3691subsys_initcall(init_admin_reserve);
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711static int reserve_mem_notifier(struct notifier_block *nb,
3712 unsigned long action, void *data)
3713{
3714 unsigned long tmp, free_kbytes;
3715
3716 switch (action) {
3717 case MEM_ONLINE:
3718
3719 tmp = sysctl_user_reserve_kbytes;
3720 if (0 < tmp && tmp < (1UL << 17))
3721 init_user_reserve();
3722
3723
3724 tmp = sysctl_admin_reserve_kbytes;
3725 if (0 < tmp && tmp < (1UL << 13))
3726 init_admin_reserve();
3727
3728 break;
3729 case MEM_OFFLINE:
3730 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3731
3732 if (sysctl_user_reserve_kbytes > free_kbytes) {
3733 init_user_reserve();
3734 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3735 sysctl_user_reserve_kbytes);
3736 }
3737
3738 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3739 init_admin_reserve();
3740 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3741 sysctl_admin_reserve_kbytes);
3742 }
3743 break;
3744 default:
3745 break;
3746 }
3747 return NOTIFY_OK;
3748}
3749
3750static struct notifier_block reserve_mem_nb = {
3751 .notifier_call = reserve_mem_notifier,
3752};
3753
3754static int __meminit init_reserve_notifier(void)
3755{
3756 if (register_hotmemory_notifier(&reserve_mem_nb))
3757 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3758
3759 return 0;
3760}
3761subsys_initcall(init_reserve_notifier);
3762