1
2
3
4
5
6
7
8
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/backing-dev.h>
15#include <linux/mm.h>
16#include <linux/vmacache.h>
17#include <linux/shm.h>
18#include <linux/mman.h>
19#include <linux/pagemap.h>
20#include <linux/swap.h>
21#include <linux/syscalls.h>
22#include <linux/capability.h>
23#include <linux/init.h>
24#include <linux/file.h>
25#include <linux/fs.h>
26#include <linux/personality.h>
27#include <linux/security.h>
28#include <linux/hugetlb.h>
29#include <linux/shmem_fs.h>
30#include <linux/profile.h>
31#include <linux/export.h>
32#include <linux/mount.h>
33#include <linux/mempolicy.h>
34#include <linux/rmap.h>
35#include <linux/mmu_notifier.h>
36#include <linux/mmdebug.h>
37#include <linux/perf_event.h>
38#include <linux/audit.h>
39#include <linux/khugepaged.h>
40#include <linux/uprobes.h>
41#include <linux/rbtree_augmented.h>
42#include <linux/notifier.h>
43#include <linux/memory.h>
44#include <linux/printk.h>
45#include <linux/userfaultfd_k.h>
46#include <linux/moduleparam.h>
47#include <linux/pkeys.h>
48#include <linux/oom.h>
49#include <linux/sched/mm.h>
50
51#include <linux/uaccess.h>
52#include <asm/cacheflush.h>
53#include <asm/tlb.h>
54#include <asm/mmu_context.h>
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/mmap.h>
58
59#include "internal.h"
60
61#ifndef arch_mmap_check
62#define arch_mmap_check(addr, len, flags) (0)
63#endif
64
65#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
66const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
67const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
68int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
69#endif
70#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
71const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
72const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
73int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
74#endif
75
76static bool ignore_rlimit_data;
77core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
78
79static void unmap_region(struct mm_struct *mm,
80 struct vm_area_struct *vma, struct vm_area_struct *prev,
81 unsigned long start, unsigned long end);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103pgprot_t protection_map[16] __ro_after_init = {
104 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
105 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
106};
107
108#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
109static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
110{
111 return prot;
112}
113#endif
114
115pgprot_t vm_get_page_prot(unsigned long vm_flags)
116{
117 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
118 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
119 pgprot_val(arch_vm_get_page_prot(vm_flags)));
120
121 return arch_filter_pgprot(ret);
122}
123EXPORT_SYMBOL(vm_get_page_prot);
124
125static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
126{
127 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
128}
129
130
131void vma_set_page_prot(struct vm_area_struct *vma)
132{
133 unsigned long vm_flags = vma->vm_flags;
134 pgprot_t vm_page_prot;
135
136 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
137 if (vma_wants_writenotify(vma, vm_page_prot)) {
138 vm_flags &= ~VM_SHARED;
139 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
140 }
141
142 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
143}
144
145
146
147
148static void __remove_shared_vm_struct(struct vm_area_struct *vma,
149 struct file *file, struct address_space *mapping)
150{
151 if (vma->vm_flags & VM_DENYWRITE)
152 allow_write_access(file);
153 if (vma->vm_flags & VM_SHARED)
154 mapping_unmap_writable(mapping);
155
156 flush_dcache_mmap_lock(mapping);
157 vma_interval_tree_remove(vma, &mapping->i_mmap);
158 flush_dcache_mmap_unlock(mapping);
159}
160
161
162
163
164
165void unlink_file_vma(struct vm_area_struct *vma)
166{
167 struct file *file = vma->vm_file;
168
169 if (file) {
170 struct address_space *mapping = file->f_mapping;
171 i_mmap_lock_write(mapping);
172 __remove_shared_vm_struct(vma, file, mapping);
173 i_mmap_unlock_write(mapping);
174 }
175}
176
177
178
179
180static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
181{
182 struct vm_area_struct *next = vma->vm_next;
183
184 might_sleep();
185 if (vma->vm_ops && vma->vm_ops->close)
186 vma->vm_ops->close(vma);
187 if (vma->vm_file)
188 fput(vma->vm_file);
189 mpol_put(vma_policy(vma));
190 vm_area_free(vma);
191 return next;
192}
193
194static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
195 struct list_head *uf);
196SYSCALL_DEFINE1(brk, unsigned long, brk)
197{
198 unsigned long newbrk, oldbrk, origbrk;
199 struct mm_struct *mm = current->mm;
200 struct vm_area_struct *next;
201 unsigned long min_brk;
202 bool populate;
203 bool downgraded = false;
204 LIST_HEAD(uf);
205
206 if (mmap_write_lock_killable(mm))
207 return -EINTR;
208
209 origbrk = mm->brk;
210
211#ifdef CONFIG_COMPAT_BRK
212
213
214
215
216
217 if (current->brk_randomized)
218 min_brk = mm->start_brk;
219 else
220 min_brk = mm->end_data;
221#else
222 min_brk = mm->start_brk;
223#endif
224 if (brk < min_brk)
225 goto out;
226
227
228
229
230
231
232
233 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
234 mm->end_data, mm->start_data))
235 goto out;
236
237 newbrk = PAGE_ALIGN(brk);
238 oldbrk = PAGE_ALIGN(mm->brk);
239 if (oldbrk == newbrk) {
240 mm->brk = brk;
241 goto success;
242 }
243
244
245
246
247
248 if (brk <= mm->brk) {
249 int ret;
250
251
252
253
254
255
256 mm->brk = brk;
257 ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
258 if (ret < 0) {
259 mm->brk = origbrk;
260 goto out;
261 } else if (ret == 1) {
262 downgraded = true;
263 }
264 goto success;
265 }
266
267
268 next = find_vma(mm, oldbrk);
269 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
270 goto out;
271
272
273 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
274 goto out;
275 mm->brk = brk;
276
277success:
278 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
279 if (downgraded)
280 mmap_read_unlock(mm);
281 else
282 mmap_write_unlock(mm);
283 userfaultfd_unmap_complete(mm, &uf);
284 if (populate)
285 mm_populate(oldbrk, newbrk - oldbrk);
286 return brk;
287
288out:
289 mmap_write_unlock(mm);
290 return origbrk;
291}
292
293static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
294{
295 unsigned long gap, prev_end;
296
297
298
299
300
301
302
303 gap = vm_start_gap(vma);
304 if (vma->vm_prev) {
305 prev_end = vm_end_gap(vma->vm_prev);
306 if (gap > prev_end)
307 gap -= prev_end;
308 else
309 gap = 0;
310 }
311 return gap;
312}
313
314#ifdef CONFIG_DEBUG_VM_RB
315static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
316{
317 unsigned long max = vma_compute_gap(vma), subtree_gap;
318 if (vma->vm_rb.rb_left) {
319 subtree_gap = rb_entry(vma->vm_rb.rb_left,
320 struct vm_area_struct, vm_rb)->rb_subtree_gap;
321 if (subtree_gap > max)
322 max = subtree_gap;
323 }
324 if (vma->vm_rb.rb_right) {
325 subtree_gap = rb_entry(vma->vm_rb.rb_right,
326 struct vm_area_struct, vm_rb)->rb_subtree_gap;
327 if (subtree_gap > max)
328 max = subtree_gap;
329 }
330 return max;
331}
332
333static int browse_rb(struct mm_struct *mm)
334{
335 struct rb_root *root = &mm->mm_rb;
336 int i = 0, j, bug = 0;
337 struct rb_node *nd, *pn = NULL;
338 unsigned long prev = 0, pend = 0;
339
340 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
341 struct vm_area_struct *vma;
342 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
343 if (vma->vm_start < prev) {
344 pr_emerg("vm_start %lx < prev %lx\n",
345 vma->vm_start, prev);
346 bug = 1;
347 }
348 if (vma->vm_start < pend) {
349 pr_emerg("vm_start %lx < pend %lx\n",
350 vma->vm_start, pend);
351 bug = 1;
352 }
353 if (vma->vm_start > vma->vm_end) {
354 pr_emerg("vm_start %lx > vm_end %lx\n",
355 vma->vm_start, vma->vm_end);
356 bug = 1;
357 }
358 spin_lock(&mm->page_table_lock);
359 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
360 pr_emerg("free gap %lx, correct %lx\n",
361 vma->rb_subtree_gap,
362 vma_compute_subtree_gap(vma));
363 bug = 1;
364 }
365 spin_unlock(&mm->page_table_lock);
366 i++;
367 pn = nd;
368 prev = vma->vm_start;
369 pend = vma->vm_end;
370 }
371 j = 0;
372 for (nd = pn; nd; nd = rb_prev(nd))
373 j++;
374 if (i != j) {
375 pr_emerg("backwards %d, forwards %d\n", j, i);
376 bug = 1;
377 }
378 return bug ? -1 : i;
379}
380
381static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
382{
383 struct rb_node *nd;
384
385 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
386 struct vm_area_struct *vma;
387 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
388 VM_BUG_ON_VMA(vma != ignore &&
389 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
390 vma);
391 }
392}
393
394static void validate_mm(struct mm_struct *mm)
395{
396 int bug = 0;
397 int i = 0;
398 unsigned long highest_address = 0;
399 struct vm_area_struct *vma = mm->mmap;
400
401 while (vma) {
402 struct anon_vma *anon_vma = vma->anon_vma;
403 struct anon_vma_chain *avc;
404
405 if (anon_vma) {
406 anon_vma_lock_read(anon_vma);
407 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
408 anon_vma_interval_tree_verify(avc);
409 anon_vma_unlock_read(anon_vma);
410 }
411
412 highest_address = vm_end_gap(vma);
413 vma = vma->vm_next;
414 i++;
415 }
416 if (i != mm->map_count) {
417 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
418 bug = 1;
419 }
420 if (highest_address != mm->highest_vm_end) {
421 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
422 mm->highest_vm_end, highest_address);
423 bug = 1;
424 }
425 i = browse_rb(mm);
426 if (i != mm->map_count) {
427 if (i != -1)
428 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
429 bug = 1;
430 }
431 VM_BUG_ON_MM(bug, mm);
432}
433#else
434#define validate_mm_rb(root, ignore) do { } while (0)
435#define validate_mm(mm) do { } while (0)
436#endif
437
438RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
439 struct vm_area_struct, vm_rb,
440 unsigned long, rb_subtree_gap, vma_compute_gap)
441
442
443
444
445
446
447static void vma_gap_update(struct vm_area_struct *vma)
448{
449
450
451
452
453 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
454}
455
456static inline void vma_rb_insert(struct vm_area_struct *vma,
457 struct rb_root *root)
458{
459
460 validate_mm_rb(root, NULL);
461
462 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
463}
464
465static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
466{
467
468
469
470
471
472 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
473}
474
475static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
476 struct rb_root *root,
477 struct vm_area_struct *ignore)
478{
479
480
481
482
483
484
485
486
487
488 validate_mm_rb(root, ignore);
489
490 __vma_rb_erase(vma, root);
491}
492
493static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
494 struct rb_root *root)
495{
496 vma_rb_erase_ignore(vma, root, vma);
497}
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513static inline void
514anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
515{
516 struct anon_vma_chain *avc;
517
518 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
519 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
520}
521
522static inline void
523anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
524{
525 struct anon_vma_chain *avc;
526
527 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
528 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
529}
530
531static int find_vma_links(struct mm_struct *mm, unsigned long addr,
532 unsigned long end, struct vm_area_struct **pprev,
533 struct rb_node ***rb_link, struct rb_node **rb_parent)
534{
535 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
536
537 __rb_link = &mm->mm_rb.rb_node;
538 rb_prev = __rb_parent = NULL;
539
540 while (*__rb_link) {
541 struct vm_area_struct *vma_tmp;
542
543 __rb_parent = *__rb_link;
544 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
545
546 if (vma_tmp->vm_end > addr) {
547
548 if (vma_tmp->vm_start < end)
549 return -ENOMEM;
550 __rb_link = &__rb_parent->rb_left;
551 } else {
552 rb_prev = __rb_parent;
553 __rb_link = &__rb_parent->rb_right;
554 }
555 }
556
557 *pprev = NULL;
558 if (rb_prev)
559 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
560 *rb_link = __rb_link;
561 *rb_parent = __rb_parent;
562 return 0;
563}
564
565
566
567
568
569
570
571
572
573
574static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
575 struct vm_area_struct *vma)
576{
577 if (!vma)
578 return mm->mmap;
579
580 return vma->vm_next;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597static inline int
598munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
599 struct vm_area_struct **pprev, struct rb_node ***link,
600 struct rb_node **parent, struct list_head *uf)
601{
602
603 while (find_vma_links(mm, start, start + len, pprev, link, parent))
604 if (do_munmap(mm, start, len, uf))
605 return -ENOMEM;
606
607 return 0;
608}
609static unsigned long count_vma_pages_range(struct mm_struct *mm,
610 unsigned long addr, unsigned long end)
611{
612 unsigned long nr_pages = 0;
613 struct vm_area_struct *vma;
614
615
616 vma = find_vma_intersection(mm, addr, end);
617 if (!vma)
618 return 0;
619
620 nr_pages = (min(end, vma->vm_end) -
621 max(addr, vma->vm_start)) >> PAGE_SHIFT;
622
623
624 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
625 unsigned long overlap_len;
626
627 if (vma->vm_start > end)
628 break;
629
630 overlap_len = min(end, vma->vm_end) - vma->vm_start;
631 nr_pages += overlap_len >> PAGE_SHIFT;
632 }
633
634 return nr_pages;
635}
636
637void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
638 struct rb_node **rb_link, struct rb_node *rb_parent)
639{
640
641 if (vma->vm_next)
642 vma_gap_update(vma->vm_next);
643 else
644 mm->highest_vm_end = vm_end_gap(vma);
645
646
647
648
649
650
651
652
653
654
655 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
656 vma->rb_subtree_gap = 0;
657 vma_gap_update(vma);
658 vma_rb_insert(vma, &mm->mm_rb);
659}
660
661static void __vma_link_file(struct vm_area_struct *vma)
662{
663 struct file *file;
664
665 file = vma->vm_file;
666 if (file) {
667 struct address_space *mapping = file->f_mapping;
668
669 if (vma->vm_flags & VM_DENYWRITE)
670 put_write_access(file_inode(file));
671 if (vma->vm_flags & VM_SHARED)
672 mapping_allow_writable(mapping);
673
674 flush_dcache_mmap_lock(mapping);
675 vma_interval_tree_insert(vma, &mapping->i_mmap);
676 flush_dcache_mmap_unlock(mapping);
677 }
678}
679
680static void
681__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
682 struct vm_area_struct *prev, struct rb_node **rb_link,
683 struct rb_node *rb_parent)
684{
685 __vma_link_list(mm, vma, prev);
686 __vma_link_rb(mm, vma, rb_link, rb_parent);
687}
688
689static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
690 struct vm_area_struct *prev, struct rb_node **rb_link,
691 struct rb_node *rb_parent)
692{
693 struct address_space *mapping = NULL;
694
695 if (vma->vm_file) {
696 mapping = vma->vm_file->f_mapping;
697 i_mmap_lock_write(mapping);
698 }
699
700 __vma_link(mm, vma, prev, rb_link, rb_parent);
701 __vma_link_file(vma);
702
703 if (mapping)
704 i_mmap_unlock_write(mapping);
705
706 mm->map_count++;
707 validate_mm(mm);
708}
709
710
711
712
713
714static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
715{
716 struct vm_area_struct *prev;
717 struct rb_node **rb_link, *rb_parent;
718
719 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
720 &prev, &rb_link, &rb_parent))
721 BUG();
722 __vma_link(mm, vma, prev, rb_link, rb_parent);
723 mm->map_count++;
724}
725
726static __always_inline void __vma_unlink(struct mm_struct *mm,
727 struct vm_area_struct *vma,
728 struct vm_area_struct *ignore)
729{
730 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
731 __vma_unlink_list(mm, vma);
732
733 vmacache_invalidate(mm);
734}
735
736
737
738
739
740
741
742
743int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
744 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
745 struct vm_area_struct *expand)
746{
747 struct mm_struct *mm = vma->vm_mm;
748 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
749 struct address_space *mapping = NULL;
750 struct rb_root_cached *root = NULL;
751 struct anon_vma *anon_vma = NULL;
752 struct file *file = vma->vm_file;
753 bool start_changed = false, end_changed = false;
754 long adjust_next = 0;
755 int remove_next = 0;
756
757 if (next && !insert) {
758 struct vm_area_struct *exporter = NULL, *importer = NULL;
759
760 if (end >= next->vm_end) {
761
762
763
764
765
766
767 if (next == expand) {
768
769
770
771
772 VM_WARN_ON(end != next->vm_end);
773
774
775
776
777
778 remove_next = 3;
779 VM_WARN_ON(file != next->vm_file);
780 swap(vma, next);
781 } else {
782 VM_WARN_ON(expand != vma);
783
784
785
786
787 remove_next = 1 + (end > next->vm_end);
788 VM_WARN_ON(remove_next == 2 &&
789 end != next->vm_next->vm_end);
790
791 end = next->vm_end;
792 }
793
794 exporter = next;
795 importer = vma;
796
797
798
799
800
801 if (remove_next == 2 && !next->anon_vma)
802 exporter = next->vm_next;
803
804 } else if (end > next->vm_start) {
805
806
807
808
809 adjust_next = (end - next->vm_start);
810 exporter = next;
811 importer = vma;
812 VM_WARN_ON(expand != importer);
813 } else if (end < vma->vm_end) {
814
815
816
817
818
819 adjust_next = -(vma->vm_end - end);
820 exporter = vma;
821 importer = next;
822 VM_WARN_ON(expand != importer);
823 }
824
825
826
827
828
829
830 if (exporter && exporter->anon_vma && !importer->anon_vma) {
831 int error;
832
833 importer->anon_vma = exporter->anon_vma;
834 error = anon_vma_clone(importer, exporter);
835 if (error)
836 return error;
837 }
838 }
839again:
840 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
841
842 if (file) {
843 mapping = file->f_mapping;
844 root = &mapping->i_mmap;
845 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
846
847 if (adjust_next)
848 uprobe_munmap(next, next->vm_start, next->vm_end);
849
850 i_mmap_lock_write(mapping);
851 if (insert) {
852
853
854
855
856
857
858 __vma_link_file(insert);
859 }
860 }
861
862 anon_vma = vma->anon_vma;
863 if (!anon_vma && adjust_next)
864 anon_vma = next->anon_vma;
865 if (anon_vma) {
866 VM_WARN_ON(adjust_next && next->anon_vma &&
867 anon_vma != next->anon_vma);
868 anon_vma_lock_write(anon_vma);
869 anon_vma_interval_tree_pre_update_vma(vma);
870 if (adjust_next)
871 anon_vma_interval_tree_pre_update_vma(next);
872 }
873
874 if (file) {
875 flush_dcache_mmap_lock(mapping);
876 vma_interval_tree_remove(vma, root);
877 if (adjust_next)
878 vma_interval_tree_remove(next, root);
879 }
880
881 if (start != vma->vm_start) {
882 vma->vm_start = start;
883 start_changed = true;
884 }
885 if (end != vma->vm_end) {
886 vma->vm_end = end;
887 end_changed = true;
888 }
889 vma->vm_pgoff = pgoff;
890 if (adjust_next) {
891 next->vm_start += adjust_next;
892 next->vm_pgoff += adjust_next >> PAGE_SHIFT;
893 }
894
895 if (file) {
896 if (adjust_next)
897 vma_interval_tree_insert(next, root);
898 vma_interval_tree_insert(vma, root);
899 flush_dcache_mmap_unlock(mapping);
900 }
901
902 if (remove_next) {
903
904
905
906
907 if (remove_next != 3)
908 __vma_unlink(mm, next, next);
909 else
910
911
912
913
914
915
916
917
918
919 __vma_unlink(mm, next, vma);
920 if (file)
921 __remove_shared_vm_struct(next, file, mapping);
922 } else if (insert) {
923
924
925
926
927
928 __insert_vm_struct(mm, insert);
929 } else {
930 if (start_changed)
931 vma_gap_update(vma);
932 if (end_changed) {
933 if (!next)
934 mm->highest_vm_end = vm_end_gap(vma);
935 else if (!adjust_next)
936 vma_gap_update(next);
937 }
938 }
939
940 if (anon_vma) {
941 anon_vma_interval_tree_post_update_vma(vma);
942 if (adjust_next)
943 anon_vma_interval_tree_post_update_vma(next);
944 anon_vma_unlock_write(anon_vma);
945 }
946
947 if (file) {
948 i_mmap_unlock_write(mapping);
949 uprobe_mmap(vma);
950
951 if (adjust_next)
952 uprobe_mmap(next);
953 }
954
955 if (remove_next) {
956 if (file) {
957 uprobe_munmap(next, next->vm_start, next->vm_end);
958 fput(file);
959 }
960 if (next->anon_vma)
961 anon_vma_merge(vma, next);
962 mm->map_count--;
963 mpol_put(vma_policy(next));
964 vm_area_free(next);
965
966
967
968
969
970 if (remove_next != 3) {
971
972
973
974
975
976
977 next = vma->vm_next;
978 } else {
979
980
981
982
983
984
985
986
987
988
989 next = vma;
990 }
991 if (remove_next == 2) {
992 remove_next = 1;
993 end = next->vm_end;
994 goto again;
995 }
996 else if (next)
997 vma_gap_update(next);
998 else {
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
1019 }
1020 }
1021 if (insert && file)
1022 uprobe_mmap(insert);
1023
1024 validate_mm(mm);
1025
1026 return 0;
1027}
1028
1029
1030
1031
1032
1033static inline int is_mergeable_vma(struct vm_area_struct *vma,
1034 struct file *file, unsigned long vm_flags,
1035 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1036{
1037
1038
1039
1040
1041
1042
1043
1044
1045 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1046 return 0;
1047 if (vma->vm_file != file)
1048 return 0;
1049 if (vma->vm_ops && vma->vm_ops->close)
1050 return 0;
1051 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1052 return 0;
1053 return 1;
1054}
1055
1056static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1057 struct anon_vma *anon_vma2,
1058 struct vm_area_struct *vma)
1059{
1060
1061
1062
1063
1064 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1065 list_is_singular(&vma->anon_vma_chain)))
1066 return 1;
1067 return anon_vma1 == anon_vma2;
1068}
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081static int
1082can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1083 struct anon_vma *anon_vma, struct file *file,
1084 pgoff_t vm_pgoff,
1085 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1086{
1087 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1088 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1089 if (vma->vm_pgoff == vm_pgoff)
1090 return 1;
1091 }
1092 return 0;
1093}
1094
1095
1096
1097
1098
1099
1100
1101
1102static int
1103can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1104 struct anon_vma *anon_vma, struct file *file,
1105 pgoff_t vm_pgoff,
1106 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1107{
1108 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1109 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1110 pgoff_t vm_pglen;
1111 vm_pglen = vma_pages(vma);
1112 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1113 return 1;
1114 }
1115 return 0;
1116}
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161struct vm_area_struct *vma_merge(struct mm_struct *mm,
1162 struct vm_area_struct *prev, unsigned long addr,
1163 unsigned long end, unsigned long vm_flags,
1164 struct anon_vma *anon_vma, struct file *file,
1165 pgoff_t pgoff, struct mempolicy *policy,
1166 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1167{
1168 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1169 struct vm_area_struct *area, *next;
1170 int err;
1171
1172
1173
1174
1175
1176 if (vm_flags & VM_SPECIAL)
1177 return NULL;
1178
1179 next = vma_next(mm, prev);
1180 area = next;
1181 if (area && area->vm_end == end)
1182 next = next->vm_next;
1183
1184
1185 VM_WARN_ON(prev && addr <= prev->vm_start);
1186 VM_WARN_ON(area && end > area->vm_end);
1187 VM_WARN_ON(addr >= end);
1188
1189
1190
1191
1192 if (prev && prev->vm_end == addr &&
1193 mpol_equal(vma_policy(prev), policy) &&
1194 can_vma_merge_after(prev, vm_flags,
1195 anon_vma, file, pgoff,
1196 vm_userfaultfd_ctx)) {
1197
1198
1199
1200 if (next && end == next->vm_start &&
1201 mpol_equal(policy, vma_policy(next)) &&
1202 can_vma_merge_before(next, vm_flags,
1203 anon_vma, file,
1204 pgoff+pglen,
1205 vm_userfaultfd_ctx) &&
1206 is_mergeable_anon_vma(prev->anon_vma,
1207 next->anon_vma, NULL)) {
1208
1209 err = __vma_adjust(prev, prev->vm_start,
1210 next->vm_end, prev->vm_pgoff, NULL,
1211 prev);
1212 } else
1213 err = __vma_adjust(prev, prev->vm_start,
1214 end, prev->vm_pgoff, NULL, prev);
1215 if (err)
1216 return NULL;
1217 khugepaged_enter_vma_merge(prev, vm_flags);
1218 return prev;
1219 }
1220
1221
1222
1223
1224 if (next && end == next->vm_start &&
1225 mpol_equal(policy, vma_policy(next)) &&
1226 can_vma_merge_before(next, vm_flags,
1227 anon_vma, file, pgoff+pglen,
1228 vm_userfaultfd_ctx)) {
1229 if (prev && addr < prev->vm_end)
1230 err = __vma_adjust(prev, prev->vm_start,
1231 addr, prev->vm_pgoff, NULL, next);
1232 else {
1233 err = __vma_adjust(area, addr, next->vm_end,
1234 next->vm_pgoff - pglen, NULL, next);
1235
1236
1237
1238
1239
1240 area = next;
1241 }
1242 if (err)
1243 return NULL;
1244 khugepaged_enter_vma_merge(area, vm_flags);
1245 return area;
1246 }
1247
1248 return NULL;
1249}
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1265{
1266 return a->vm_end == b->vm_start &&
1267 mpol_equal(vma_policy(a), vma_policy(b)) &&
1268 a->vm_file == b->vm_file &&
1269 !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
1270 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1271}
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1296{
1297 if (anon_vma_compatible(a, b)) {
1298 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1299
1300 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1301 return anon_vma;
1302 }
1303 return NULL;
1304}
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1315{
1316 struct anon_vma *anon_vma = NULL;
1317
1318
1319 if (vma->vm_next) {
1320 anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
1321 if (anon_vma)
1322 return anon_vma;
1323 }
1324
1325
1326 if (vma->vm_prev)
1327 anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 return anon_vma;
1340}
1341
1342
1343
1344
1345
1346static inline unsigned long round_hint_to_min(unsigned long hint)
1347{
1348 hint &= PAGE_MASK;
1349 if (((void *)hint != NULL) &&
1350 (hint < mmap_min_addr))
1351 return PAGE_ALIGN(mmap_min_addr);
1352 return hint;
1353}
1354
1355int mlock_future_check(struct mm_struct *mm, unsigned long flags,
1356 unsigned long len)
1357{
1358 unsigned long locked, lock_limit;
1359
1360
1361 if (flags & VM_LOCKED) {
1362 locked = len >> PAGE_SHIFT;
1363 locked += mm->locked_vm;
1364 lock_limit = rlimit(RLIMIT_MEMLOCK);
1365 lock_limit >>= PAGE_SHIFT;
1366 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1367 return -EAGAIN;
1368 }
1369 return 0;
1370}
1371
1372static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1373{
1374 if (S_ISREG(inode->i_mode))
1375 return MAX_LFS_FILESIZE;
1376
1377 if (S_ISBLK(inode->i_mode))
1378 return MAX_LFS_FILESIZE;
1379
1380 if (S_ISSOCK(inode->i_mode))
1381 return MAX_LFS_FILESIZE;
1382
1383
1384 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1385 return 0;
1386
1387
1388 return ULONG_MAX;
1389}
1390
1391static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1392 unsigned long pgoff, unsigned long len)
1393{
1394 u64 maxsize = file_mmap_size_max(file, inode);
1395
1396 if (maxsize && len > maxsize)
1397 return false;
1398 maxsize -= len;
1399 if (pgoff > maxsize >> PAGE_SHIFT)
1400 return false;
1401 return true;
1402}
1403
1404
1405
1406
1407unsigned long do_mmap(struct file *file, unsigned long addr,
1408 unsigned long len, unsigned long prot,
1409 unsigned long flags, unsigned long pgoff,
1410 unsigned long *populate, struct list_head *uf)
1411{
1412 struct mm_struct *mm = current->mm;
1413 vm_flags_t vm_flags;
1414 int pkey = 0;
1415
1416 *populate = 0;
1417
1418 if (!len)
1419 return -EINVAL;
1420
1421
1422
1423
1424
1425
1426
1427 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1428 if (!(file && path_noexec(&file->f_path)))
1429 prot |= PROT_EXEC;
1430
1431
1432 if (flags & MAP_FIXED_NOREPLACE)
1433 flags |= MAP_FIXED;
1434
1435 if (!(flags & MAP_FIXED))
1436 addr = round_hint_to_min(addr);
1437
1438
1439 len = PAGE_ALIGN(len);
1440 if (!len)
1441 return -ENOMEM;
1442
1443
1444 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1445 return -EOVERFLOW;
1446
1447
1448 if (mm->map_count > sysctl_max_map_count)
1449 return -ENOMEM;
1450
1451
1452
1453
1454 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1455 if (IS_ERR_VALUE(addr))
1456 return addr;
1457
1458 if (flags & MAP_FIXED_NOREPLACE) {
1459 if (find_vma_intersection(mm, addr, addr + len))
1460 return -EEXIST;
1461 }
1462
1463 if (prot == PROT_EXEC) {
1464 pkey = execute_only_pkey(mm);
1465 if (pkey < 0)
1466 pkey = 0;
1467 }
1468
1469
1470
1471
1472
1473 vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1474 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1475
1476 if (flags & MAP_LOCKED)
1477 if (!can_do_mlock())
1478 return -EPERM;
1479
1480 if (mlock_future_check(mm, vm_flags, len))
1481 return -EAGAIN;
1482
1483 if (file) {
1484 struct inode *inode = file_inode(file);
1485 unsigned long flags_mask;
1486
1487 if (!file_mmap_ok(file, inode, pgoff, len))
1488 return -EOVERFLOW;
1489
1490 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1491
1492 switch (flags & MAP_TYPE) {
1493 case MAP_SHARED:
1494
1495
1496
1497
1498
1499
1500
1501 flags &= LEGACY_MAP_MASK;
1502 fallthrough;
1503 case MAP_SHARED_VALIDATE:
1504 if (flags & ~flags_mask)
1505 return -EOPNOTSUPP;
1506 if (prot & PROT_WRITE) {
1507 if (!(file->f_mode & FMODE_WRITE))
1508 return -EACCES;
1509 if (IS_SWAPFILE(file->f_mapping->host))
1510 return -ETXTBSY;
1511 }
1512
1513
1514
1515
1516
1517 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1518 return -EACCES;
1519
1520
1521
1522
1523 if (locks_verify_locked(file))
1524 return -EAGAIN;
1525
1526 vm_flags |= VM_SHARED | VM_MAYSHARE;
1527 if (!(file->f_mode & FMODE_WRITE))
1528 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1529 fallthrough;
1530 case MAP_PRIVATE:
1531 if (!(file->f_mode & FMODE_READ))
1532 return -EACCES;
1533 if (path_noexec(&file->f_path)) {
1534 if (vm_flags & VM_EXEC)
1535 return -EPERM;
1536 vm_flags &= ~VM_MAYEXEC;
1537 }
1538
1539 if (!file->f_op->mmap)
1540 return -ENODEV;
1541 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1542 return -EINVAL;
1543 break;
1544
1545 default:
1546 return -EINVAL;
1547 }
1548 } else {
1549 switch (flags & MAP_TYPE) {
1550 case MAP_SHARED:
1551 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1552 return -EINVAL;
1553
1554
1555
1556 pgoff = 0;
1557 vm_flags |= VM_SHARED | VM_MAYSHARE;
1558 break;
1559 case MAP_PRIVATE:
1560
1561
1562
1563 pgoff = addr >> PAGE_SHIFT;
1564 break;
1565 default:
1566 return -EINVAL;
1567 }
1568 }
1569
1570
1571
1572
1573
1574 if (flags & MAP_NORESERVE) {
1575
1576 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1577 vm_flags |= VM_NORESERVE;
1578
1579
1580 if (file && is_file_hugepages(file))
1581 vm_flags |= VM_NORESERVE;
1582 }
1583
1584 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1585 if (!IS_ERR_VALUE(addr) &&
1586 ((vm_flags & VM_LOCKED) ||
1587 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1588 *populate = len;
1589 return addr;
1590}
1591
1592unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1593 unsigned long prot, unsigned long flags,
1594 unsigned long fd, unsigned long pgoff)
1595{
1596 struct file *file = NULL;
1597 unsigned long retval;
1598
1599 if (!(flags & MAP_ANONYMOUS)) {
1600 audit_mmap_fd(fd, flags);
1601 file = fget(fd);
1602 if (!file)
1603 return -EBADF;
1604 if (is_file_hugepages(file)) {
1605 len = ALIGN(len, huge_page_size(hstate_file(file)));
1606 } else if (unlikely(flags & MAP_HUGETLB)) {
1607 retval = -EINVAL;
1608 goto out_fput;
1609 }
1610 } else if (flags & MAP_HUGETLB) {
1611 struct ucounts *ucounts = NULL;
1612 struct hstate *hs;
1613
1614 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1615 if (!hs)
1616 return -EINVAL;
1617
1618 len = ALIGN(len, huge_page_size(hs));
1619
1620
1621
1622
1623
1624
1625 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1626 VM_NORESERVE,
1627 &ucounts, HUGETLB_ANONHUGE_INODE,
1628 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1629 if (IS_ERR(file))
1630 return PTR_ERR(file);
1631 }
1632
1633 flags &= ~MAP_DENYWRITE;
1634
1635 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1636out_fput:
1637 if (file)
1638 fput(file);
1639 return retval;
1640}
1641
1642SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1643 unsigned long, prot, unsigned long, flags,
1644 unsigned long, fd, unsigned long, pgoff)
1645{
1646 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1647}
1648
1649#ifdef __ARCH_WANT_SYS_OLD_MMAP
1650struct mmap_arg_struct {
1651 unsigned long addr;
1652 unsigned long len;
1653 unsigned long prot;
1654 unsigned long flags;
1655 unsigned long fd;
1656 unsigned long offset;
1657};
1658
1659SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1660{
1661 struct mmap_arg_struct a;
1662
1663 if (copy_from_user(&a, arg, sizeof(a)))
1664 return -EFAULT;
1665 if (offset_in_page(a.offset))
1666 return -EINVAL;
1667
1668 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1669 a.offset >> PAGE_SHIFT);
1670}
1671#endif
1672
1673
1674
1675
1676
1677
1678
1679int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1680{
1681 vm_flags_t vm_flags = vma->vm_flags;
1682 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1683
1684
1685 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1686 return 0;
1687
1688
1689 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1690 return 1;
1691
1692
1693
1694 if (pgprot_val(vm_page_prot) !=
1695 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1696 return 0;
1697
1698
1699 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1700 return 1;
1701
1702
1703 if (vm_flags & VM_PFNMAP)
1704 return 0;
1705
1706
1707 return vma->vm_file && vma->vm_file->f_mapping &&
1708 mapping_can_writeback(vma->vm_file->f_mapping);
1709}
1710
1711
1712
1713
1714
1715static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1716{
1717
1718
1719
1720
1721 if (file && is_file_hugepages(file))
1722 return 0;
1723
1724 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1725}
1726
1727unsigned long mmap_region(struct file *file, unsigned long addr,
1728 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1729 struct list_head *uf)
1730{
1731 struct mm_struct *mm = current->mm;
1732 struct vm_area_struct *vma, *prev, *merge;
1733 int error;
1734 struct rb_node **rb_link, *rb_parent;
1735 unsigned long charged = 0;
1736
1737
1738 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1739 unsigned long nr_pages;
1740
1741
1742
1743
1744
1745 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1746
1747 if (!may_expand_vm(mm, vm_flags,
1748 (len >> PAGE_SHIFT) - nr_pages))
1749 return -ENOMEM;
1750 }
1751
1752
1753 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
1754 return -ENOMEM;
1755
1756
1757
1758 if (accountable_mapping(file, vm_flags)) {
1759 charged = len >> PAGE_SHIFT;
1760 if (security_vm_enough_memory_mm(mm, charged))
1761 return -ENOMEM;
1762 vm_flags |= VM_ACCOUNT;
1763 }
1764
1765
1766
1767
1768 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1769 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1770 if (vma)
1771 goto out;
1772
1773
1774
1775
1776
1777
1778 vma = vm_area_alloc(mm);
1779 if (!vma) {
1780 error = -ENOMEM;
1781 goto unacct_error;
1782 }
1783
1784 vma->vm_start = addr;
1785 vma->vm_end = addr + len;
1786 vma->vm_flags = vm_flags;
1787 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1788 vma->vm_pgoff = pgoff;
1789
1790 if (file) {
1791 if (vm_flags & VM_DENYWRITE) {
1792 error = deny_write_access(file);
1793 if (error)
1794 goto free_vma;
1795 }
1796 if (vm_flags & VM_SHARED) {
1797 error = mapping_map_writable(file->f_mapping);
1798 if (error)
1799 goto allow_write_and_free_vma;
1800 }
1801
1802
1803
1804
1805
1806
1807 vma->vm_file = get_file(file);
1808 error = call_mmap(file, vma);
1809 if (error)
1810 goto unmap_and_free_vma;
1811
1812
1813
1814
1815
1816
1817
1818
1819 WARN_ON_ONCE(addr != vma->vm_start);
1820
1821 addr = vma->vm_start;
1822
1823
1824
1825
1826 if (unlikely(vm_flags != vma->vm_flags && prev)) {
1827 merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
1828 NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
1829 if (merge) {
1830
1831
1832
1833
1834 fput(vma->vm_file);
1835 vm_area_free(vma);
1836 vma = merge;
1837
1838 vm_flags = vma->vm_flags;
1839 goto unmap_writable;
1840 }
1841 }
1842
1843 vm_flags = vma->vm_flags;
1844 } else if (vm_flags & VM_SHARED) {
1845 error = shmem_zero_setup(vma);
1846 if (error)
1847 goto free_vma;
1848 } else {
1849 vma_set_anonymous(vma);
1850 }
1851
1852
1853 if (!arch_validate_flags(vma->vm_flags)) {
1854 error = -EINVAL;
1855 if (file)
1856 goto unmap_and_free_vma;
1857 else
1858 goto free_vma;
1859 }
1860
1861 vma_link(mm, vma, prev, rb_link, rb_parent);
1862
1863 if (file) {
1864unmap_writable:
1865 if (vm_flags & VM_SHARED)
1866 mapping_unmap_writable(file->f_mapping);
1867 if (vm_flags & VM_DENYWRITE)
1868 allow_write_access(file);
1869 }
1870 file = vma->vm_file;
1871out:
1872 perf_event_mmap(vma);
1873
1874 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1875 if (vm_flags & VM_LOCKED) {
1876 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1877 is_vm_hugetlb_page(vma) ||
1878 vma == get_gate_vma(current->mm))
1879 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1880 else
1881 mm->locked_vm += (len >> PAGE_SHIFT);
1882 }
1883
1884 if (file)
1885 uprobe_mmap(vma);
1886
1887
1888
1889
1890
1891
1892
1893
1894 vma->vm_flags |= VM_SOFTDIRTY;
1895
1896 vma_set_page_prot(vma);
1897
1898 return addr;
1899
1900unmap_and_free_vma:
1901 fput(vma->vm_file);
1902 vma->vm_file = NULL;
1903
1904
1905 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1906 charged = 0;
1907 if (vm_flags & VM_SHARED)
1908 mapping_unmap_writable(file->f_mapping);
1909allow_write_and_free_vma:
1910 if (vm_flags & VM_DENYWRITE)
1911 allow_write_access(file);
1912free_vma:
1913 vm_area_free(vma);
1914unacct_error:
1915 if (charged)
1916 vm_unacct_memory(charged);
1917 return error;
1918}
1919
1920static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1921{
1922
1923
1924
1925
1926
1927
1928
1929
1930 struct mm_struct *mm = current->mm;
1931 struct vm_area_struct *vma;
1932 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1933
1934
1935 length = info->length + info->align_mask;
1936 if (length < info->length)
1937 return -ENOMEM;
1938
1939
1940 if (info->high_limit < length)
1941 return -ENOMEM;
1942 high_limit = info->high_limit - length;
1943
1944 if (info->low_limit > high_limit)
1945 return -ENOMEM;
1946 low_limit = info->low_limit + length;
1947
1948
1949 if (RB_EMPTY_ROOT(&mm->mm_rb))
1950 goto check_highest;
1951 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1952 if (vma->rb_subtree_gap < length)
1953 goto check_highest;
1954
1955 while (true) {
1956
1957 gap_end = vm_start_gap(vma);
1958 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1959 struct vm_area_struct *left =
1960 rb_entry(vma->vm_rb.rb_left,
1961 struct vm_area_struct, vm_rb);
1962 if (left->rb_subtree_gap >= length) {
1963 vma = left;
1964 continue;
1965 }
1966 }
1967
1968 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1969check_current:
1970
1971 if (gap_start > high_limit)
1972 return -ENOMEM;
1973 if (gap_end >= low_limit &&
1974 gap_end > gap_start && gap_end - gap_start >= length)
1975 goto found;
1976
1977
1978 if (vma->vm_rb.rb_right) {
1979 struct vm_area_struct *right =
1980 rb_entry(vma->vm_rb.rb_right,
1981 struct vm_area_struct, vm_rb);
1982 if (right->rb_subtree_gap >= length) {
1983 vma = right;
1984 continue;
1985 }
1986 }
1987
1988
1989 while (true) {
1990 struct rb_node *prev = &vma->vm_rb;
1991 if (!rb_parent(prev))
1992 goto check_highest;
1993 vma = rb_entry(rb_parent(prev),
1994 struct vm_area_struct, vm_rb);
1995 if (prev == vma->vm_rb.rb_left) {
1996 gap_start = vm_end_gap(vma->vm_prev);
1997 gap_end = vm_start_gap(vma);
1998 goto check_current;
1999 }
2000 }
2001 }
2002
2003check_highest:
2004
2005 gap_start = mm->highest_vm_end;
2006 gap_end = ULONG_MAX;
2007 if (gap_start > high_limit)
2008 return -ENOMEM;
2009
2010found:
2011
2012 if (gap_start < info->low_limit)
2013 gap_start = info->low_limit;
2014
2015
2016 gap_start += (info->align_offset - gap_start) & info->align_mask;
2017
2018 VM_BUG_ON(gap_start + info->length > info->high_limit);
2019 VM_BUG_ON(gap_start + info->length > gap_end);
2020 return gap_start;
2021}
2022
2023static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
2024{
2025 struct mm_struct *mm = current->mm;
2026 struct vm_area_struct *vma;
2027 unsigned long length, low_limit, high_limit, gap_start, gap_end;
2028
2029
2030 length = info->length + info->align_mask;
2031 if (length < info->length)
2032 return -ENOMEM;
2033
2034
2035
2036
2037
2038 gap_end = info->high_limit;
2039 if (gap_end < length)
2040 return -ENOMEM;
2041 high_limit = gap_end - length;
2042
2043 if (info->low_limit > high_limit)
2044 return -ENOMEM;
2045 low_limit = info->low_limit + length;
2046
2047
2048 gap_start = mm->highest_vm_end;
2049 if (gap_start <= high_limit)
2050 goto found_highest;
2051
2052
2053 if (RB_EMPTY_ROOT(&mm->mm_rb))
2054 return -ENOMEM;
2055 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2056 if (vma->rb_subtree_gap < length)
2057 return -ENOMEM;
2058
2059 while (true) {
2060
2061 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2062 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2063 struct vm_area_struct *right =
2064 rb_entry(vma->vm_rb.rb_right,
2065 struct vm_area_struct, vm_rb);
2066 if (right->rb_subtree_gap >= length) {
2067 vma = right;
2068 continue;
2069 }
2070 }
2071
2072check_current:
2073
2074 gap_end = vm_start_gap(vma);
2075 if (gap_end < low_limit)
2076 return -ENOMEM;
2077 if (gap_start <= high_limit &&
2078 gap_end > gap_start && gap_end - gap_start >= length)
2079 goto found;
2080
2081
2082 if (vma->vm_rb.rb_left) {
2083 struct vm_area_struct *left =
2084 rb_entry(vma->vm_rb.rb_left,
2085 struct vm_area_struct, vm_rb);
2086 if (left->rb_subtree_gap >= length) {
2087 vma = left;
2088 continue;
2089 }
2090 }
2091
2092
2093 while (true) {
2094 struct rb_node *prev = &vma->vm_rb;
2095 if (!rb_parent(prev))
2096 return -ENOMEM;
2097 vma = rb_entry(rb_parent(prev),
2098 struct vm_area_struct, vm_rb);
2099 if (prev == vma->vm_rb.rb_right) {
2100 gap_start = vma->vm_prev ?
2101 vm_end_gap(vma->vm_prev) : 0;
2102 goto check_current;
2103 }
2104 }
2105 }
2106
2107found:
2108
2109 if (gap_end > info->high_limit)
2110 gap_end = info->high_limit;
2111
2112found_highest:
2113
2114 gap_end -= info->length;
2115 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2116
2117 VM_BUG_ON(gap_end < info->low_limit);
2118 VM_BUG_ON(gap_end < gap_start);
2119 return gap_end;
2120}
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
2132{
2133 unsigned long addr;
2134
2135 if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
2136 addr = unmapped_area_topdown(info);
2137 else
2138 addr = unmapped_area(info);
2139
2140 trace_vm_unmapped_area(addr, info);
2141 return addr;
2142}
2143
2144#ifndef arch_get_mmap_end
2145#define arch_get_mmap_end(addr) (TASK_SIZE)
2146#endif
2147
2148#ifndef arch_get_mmap_base
2149#define arch_get_mmap_base(addr, base) (base)
2150#endif
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163#ifndef HAVE_ARCH_UNMAPPED_AREA
2164unsigned long
2165arch_get_unmapped_area(struct file *filp, unsigned long addr,
2166 unsigned long len, unsigned long pgoff, unsigned long flags)
2167{
2168 struct mm_struct *mm = current->mm;
2169 struct vm_area_struct *vma, *prev;
2170 struct vm_unmapped_area_info info;
2171 const unsigned long mmap_end = arch_get_mmap_end(addr);
2172
2173 if (len > mmap_end - mmap_min_addr)
2174 return -ENOMEM;
2175
2176 if (flags & MAP_FIXED)
2177 return addr;
2178
2179 if (addr) {
2180 addr = PAGE_ALIGN(addr);
2181 vma = find_vma_prev(mm, addr, &prev);
2182 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2183 (!vma || addr + len <= vm_start_gap(vma)) &&
2184 (!prev || addr >= vm_end_gap(prev)))
2185 return addr;
2186 }
2187
2188 info.flags = 0;
2189 info.length = len;
2190 info.low_limit = mm->mmap_base;
2191 info.high_limit = mmap_end;
2192 info.align_mask = 0;
2193 info.align_offset = 0;
2194 return vm_unmapped_area(&info);
2195}
2196#endif
2197
2198
2199
2200
2201
2202#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2203unsigned long
2204arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
2205 unsigned long len, unsigned long pgoff,
2206 unsigned long flags)
2207{
2208 struct vm_area_struct *vma, *prev;
2209 struct mm_struct *mm = current->mm;
2210 struct vm_unmapped_area_info info;
2211 const unsigned long mmap_end = arch_get_mmap_end(addr);
2212
2213
2214 if (len > mmap_end - mmap_min_addr)
2215 return -ENOMEM;
2216
2217 if (flags & MAP_FIXED)
2218 return addr;
2219
2220
2221 if (addr) {
2222 addr = PAGE_ALIGN(addr);
2223 vma = find_vma_prev(mm, addr, &prev);
2224 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2225 (!vma || addr + len <= vm_start_gap(vma)) &&
2226 (!prev || addr >= vm_end_gap(prev)))
2227 return addr;
2228 }
2229
2230 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2231 info.length = len;
2232 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2233 info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
2234 info.align_mask = 0;
2235 info.align_offset = 0;
2236 addr = vm_unmapped_area(&info);
2237
2238
2239
2240
2241
2242
2243
2244 if (offset_in_page(addr)) {
2245 VM_BUG_ON(addr != -ENOMEM);
2246 info.flags = 0;
2247 info.low_limit = TASK_UNMAPPED_BASE;
2248 info.high_limit = mmap_end;
2249 addr = vm_unmapped_area(&info);
2250 }
2251
2252 return addr;
2253}
2254#endif
2255
2256unsigned long
2257get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2258 unsigned long pgoff, unsigned long flags)
2259{
2260 unsigned long (*get_area)(struct file *, unsigned long,
2261 unsigned long, unsigned long, unsigned long);
2262
2263 unsigned long error = arch_mmap_check(addr, len, flags);
2264 if (error)
2265 return error;
2266
2267
2268 if (len > TASK_SIZE)
2269 return -ENOMEM;
2270
2271 get_area = current->mm->get_unmapped_area;
2272 if (file) {
2273 if (file->f_op->get_unmapped_area)
2274 get_area = file->f_op->get_unmapped_area;
2275 } else if (flags & MAP_SHARED) {
2276
2277
2278
2279
2280
2281 pgoff = 0;
2282 get_area = shmem_get_unmapped_area;
2283 }
2284
2285 addr = get_area(file, addr, len, pgoff, flags);
2286 if (IS_ERR_VALUE(addr))
2287 return addr;
2288
2289 if (addr > TASK_SIZE - len)
2290 return -ENOMEM;
2291 if (offset_in_page(addr))
2292 return -EINVAL;
2293
2294 error = security_mmap_addr(addr);
2295 return error ? error : addr;
2296}
2297
2298EXPORT_SYMBOL(get_unmapped_area);
2299
2300
2301struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2302{
2303 struct rb_node *rb_node;
2304 struct vm_area_struct *vma;
2305
2306
2307 vma = vmacache_find(mm, addr);
2308 if (likely(vma))
2309 return vma;
2310
2311 rb_node = mm->mm_rb.rb_node;
2312
2313 while (rb_node) {
2314 struct vm_area_struct *tmp;
2315
2316 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2317
2318 if (tmp->vm_end > addr) {
2319 vma = tmp;
2320 if (tmp->vm_start <= addr)
2321 break;
2322 rb_node = rb_node->rb_left;
2323 } else
2324 rb_node = rb_node->rb_right;
2325 }
2326
2327 if (vma)
2328 vmacache_update(addr, vma);
2329 return vma;
2330}
2331
2332EXPORT_SYMBOL(find_vma);
2333
2334
2335
2336
2337struct vm_area_struct *
2338find_vma_prev(struct mm_struct *mm, unsigned long addr,
2339 struct vm_area_struct **pprev)
2340{
2341 struct vm_area_struct *vma;
2342
2343 vma = find_vma(mm, addr);
2344 if (vma) {
2345 *pprev = vma->vm_prev;
2346 } else {
2347 struct rb_node *rb_node = rb_last(&mm->mm_rb);
2348
2349 *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
2350 }
2351 return vma;
2352}
2353
2354
2355
2356
2357
2358
2359static int acct_stack_growth(struct vm_area_struct *vma,
2360 unsigned long size, unsigned long grow)
2361{
2362 struct mm_struct *mm = vma->vm_mm;
2363 unsigned long new_start;
2364
2365
2366 if (!may_expand_vm(mm, vma->vm_flags, grow))
2367 return -ENOMEM;
2368
2369
2370 if (size > rlimit(RLIMIT_STACK))
2371 return -ENOMEM;
2372
2373
2374 if (vma->vm_flags & VM_LOCKED) {
2375 unsigned long locked;
2376 unsigned long limit;
2377 locked = mm->locked_vm + grow;
2378 limit = rlimit(RLIMIT_MEMLOCK);
2379 limit >>= PAGE_SHIFT;
2380 if (locked > limit && !capable(CAP_IPC_LOCK))
2381 return -ENOMEM;
2382 }
2383
2384
2385 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2386 vma->vm_end - size;
2387 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2388 return -EFAULT;
2389
2390
2391
2392
2393
2394 if (security_vm_enough_memory_mm(mm, grow))
2395 return -ENOMEM;
2396
2397 return 0;
2398}
2399
2400#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2401
2402
2403
2404
2405int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2406{
2407 struct mm_struct *mm = vma->vm_mm;
2408 struct vm_area_struct *next;
2409 unsigned long gap_addr;
2410 int error = 0;
2411
2412 if (!(vma->vm_flags & VM_GROWSUP))
2413 return -EFAULT;
2414
2415
2416 address &= PAGE_MASK;
2417 if (address >= (TASK_SIZE & PAGE_MASK))
2418 return -ENOMEM;
2419 address += PAGE_SIZE;
2420
2421
2422 gap_addr = address + stack_guard_gap;
2423
2424
2425 if (gap_addr < address || gap_addr > TASK_SIZE)
2426 gap_addr = TASK_SIZE;
2427
2428 next = vma->vm_next;
2429 if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
2430 if (!(next->vm_flags & VM_GROWSUP))
2431 return -ENOMEM;
2432
2433 }
2434
2435
2436 if (unlikely(anon_vma_prepare(vma)))
2437 return -ENOMEM;
2438
2439
2440
2441
2442
2443
2444 anon_vma_lock_write(vma->anon_vma);
2445
2446
2447 if (address > vma->vm_end) {
2448 unsigned long size, grow;
2449
2450 size = address - vma->vm_start;
2451 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2452
2453 error = -ENOMEM;
2454 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2455 error = acct_stack_growth(vma, size, grow);
2456 if (!error) {
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468 spin_lock(&mm->page_table_lock);
2469 if (vma->vm_flags & VM_LOCKED)
2470 mm->locked_vm += grow;
2471 vm_stat_account(mm, vma->vm_flags, grow);
2472 anon_vma_interval_tree_pre_update_vma(vma);
2473 vma->vm_end = address;
2474 anon_vma_interval_tree_post_update_vma(vma);
2475 if (vma->vm_next)
2476 vma_gap_update(vma->vm_next);
2477 else
2478 mm->highest_vm_end = vm_end_gap(vma);
2479 spin_unlock(&mm->page_table_lock);
2480
2481 perf_event_mmap(vma);
2482 }
2483 }
2484 }
2485 anon_vma_unlock_write(vma->anon_vma);
2486 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2487 validate_mm(mm);
2488 return error;
2489}
2490#endif
2491
2492
2493
2494
2495int expand_downwards(struct vm_area_struct *vma,
2496 unsigned long address)
2497{
2498 struct mm_struct *mm = vma->vm_mm;
2499 struct vm_area_struct *prev;
2500 int error = 0;
2501
2502 address &= PAGE_MASK;
2503 if (address < mmap_min_addr)
2504 return -EPERM;
2505
2506
2507 prev = vma->vm_prev;
2508
2509 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2510 vma_is_accessible(prev)) {
2511 if (address - prev->vm_end < stack_guard_gap)
2512 return -ENOMEM;
2513 }
2514
2515
2516 if (unlikely(anon_vma_prepare(vma)))
2517 return -ENOMEM;
2518
2519
2520
2521
2522
2523
2524 anon_vma_lock_write(vma->anon_vma);
2525
2526
2527 if (address < vma->vm_start) {
2528 unsigned long size, grow;
2529
2530 size = vma->vm_end - address;
2531 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2532
2533 error = -ENOMEM;
2534 if (grow <= vma->vm_pgoff) {
2535 error = acct_stack_growth(vma, size, grow);
2536 if (!error) {
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548 spin_lock(&mm->page_table_lock);
2549 if (vma->vm_flags & VM_LOCKED)
2550 mm->locked_vm += grow;
2551 vm_stat_account(mm, vma->vm_flags, grow);
2552 anon_vma_interval_tree_pre_update_vma(vma);
2553 vma->vm_start = address;
2554 vma->vm_pgoff -= grow;
2555 anon_vma_interval_tree_post_update_vma(vma);
2556 vma_gap_update(vma);
2557 spin_unlock(&mm->page_table_lock);
2558
2559 perf_event_mmap(vma);
2560 }
2561 }
2562 }
2563 anon_vma_unlock_write(vma->anon_vma);
2564 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2565 validate_mm(mm);
2566 return error;
2567}
2568
2569
2570unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2571
2572static int __init cmdline_parse_stack_guard_gap(char *p)
2573{
2574 unsigned long val;
2575 char *endptr;
2576
2577 val = simple_strtoul(p, &endptr, 10);
2578 if (!*endptr)
2579 stack_guard_gap = val << PAGE_SHIFT;
2580
2581 return 0;
2582}
2583__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2584
2585#ifdef CONFIG_STACK_GROWSUP
2586int expand_stack(struct vm_area_struct *vma, unsigned long address)
2587{
2588 return expand_upwards(vma, address);
2589}
2590
2591struct vm_area_struct *
2592find_extend_vma(struct mm_struct *mm, unsigned long addr)
2593{
2594 struct vm_area_struct *vma, *prev;
2595
2596 addr &= PAGE_MASK;
2597 vma = find_vma_prev(mm, addr, &prev);
2598 if (vma && (vma->vm_start <= addr))
2599 return vma;
2600
2601 if (!prev || expand_stack(prev, addr))
2602 return NULL;
2603 if (prev->vm_flags & VM_LOCKED)
2604 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2605 return prev;
2606}
2607#else
2608int expand_stack(struct vm_area_struct *vma, unsigned long address)
2609{
2610 return expand_downwards(vma, address);
2611}
2612
2613struct vm_area_struct *
2614find_extend_vma(struct mm_struct *mm, unsigned long addr)
2615{
2616 struct vm_area_struct *vma;
2617 unsigned long start;
2618
2619 addr &= PAGE_MASK;
2620 vma = find_vma(mm, addr);
2621 if (!vma)
2622 return NULL;
2623 if (vma->vm_start <= addr)
2624 return vma;
2625 if (!(vma->vm_flags & VM_GROWSDOWN))
2626 return NULL;
2627 start = vma->vm_start;
2628 if (expand_stack(vma, addr))
2629 return NULL;
2630 if (vma->vm_flags & VM_LOCKED)
2631 populate_vma_page_range(vma, addr, start, NULL);
2632 return vma;
2633}
2634#endif
2635
2636EXPORT_SYMBOL_GPL(find_extend_vma);
2637
2638
2639
2640
2641
2642
2643
2644static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2645{
2646 unsigned long nr_accounted = 0;
2647
2648
2649 update_hiwater_vm(mm);
2650 do {
2651 long nrpages = vma_pages(vma);
2652
2653 if (vma->vm_flags & VM_ACCOUNT)
2654 nr_accounted += nrpages;
2655 vm_stat_account(mm, vma->vm_flags, -nrpages);
2656 vma = remove_vma(vma);
2657 } while (vma);
2658 vm_unacct_memory(nr_accounted);
2659 validate_mm(mm);
2660}
2661
2662
2663
2664
2665
2666
2667static void unmap_region(struct mm_struct *mm,
2668 struct vm_area_struct *vma, struct vm_area_struct *prev,
2669 unsigned long start, unsigned long end)
2670{
2671 struct vm_area_struct *next = vma_next(mm, prev);
2672 struct mmu_gather tlb;
2673
2674 lru_add_drain();
2675 tlb_gather_mmu(&tlb, mm);
2676 update_hiwater_rss(mm);
2677 unmap_vmas(&tlb, vma, start, end);
2678 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2679 next ? next->vm_start : USER_PGTABLES_CEILING);
2680 tlb_finish_mmu(&tlb);
2681}
2682
2683
2684
2685
2686
2687static bool
2688detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2689 struct vm_area_struct *prev, unsigned long end)
2690{
2691 struct vm_area_struct **insertion_point;
2692 struct vm_area_struct *tail_vma = NULL;
2693
2694 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2695 vma->vm_prev = NULL;
2696 do {
2697 vma_rb_erase(vma, &mm->mm_rb);
2698 mm->map_count--;
2699 tail_vma = vma;
2700 vma = vma->vm_next;
2701 } while (vma && vma->vm_start < end);
2702 *insertion_point = vma;
2703 if (vma) {
2704 vma->vm_prev = prev;
2705 vma_gap_update(vma);
2706 } else
2707 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2708 tail_vma->vm_next = NULL;
2709
2710
2711 vmacache_invalidate(mm);
2712
2713
2714
2715
2716
2717
2718 if (vma && (vma->vm_flags & VM_GROWSDOWN))
2719 return false;
2720 if (prev && (prev->vm_flags & VM_GROWSUP))
2721 return false;
2722 return true;
2723}
2724
2725
2726
2727
2728
2729int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2730 unsigned long addr, int new_below)
2731{
2732 struct vm_area_struct *new;
2733 int err;
2734
2735 if (vma->vm_ops && vma->vm_ops->may_split) {
2736 err = vma->vm_ops->may_split(vma, addr);
2737 if (err)
2738 return err;
2739 }
2740
2741 new = vm_area_dup(vma);
2742 if (!new)
2743 return -ENOMEM;
2744
2745 if (new_below)
2746 new->vm_end = addr;
2747 else {
2748 new->vm_start = addr;
2749 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2750 }
2751
2752 err = vma_dup_policy(vma, new);
2753 if (err)
2754 goto out_free_vma;
2755
2756 err = anon_vma_clone(new, vma);
2757 if (err)
2758 goto out_free_mpol;
2759
2760 if (new->vm_file)
2761 get_file(new->vm_file);
2762
2763 if (new->vm_ops && new->vm_ops->open)
2764 new->vm_ops->open(new);
2765
2766 if (new_below)
2767 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2768 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2769 else
2770 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2771
2772
2773 if (!err)
2774 return 0;
2775
2776
2777 if (new->vm_ops && new->vm_ops->close)
2778 new->vm_ops->close(new);
2779 if (new->vm_file)
2780 fput(new->vm_file);
2781 unlink_anon_vmas(new);
2782 out_free_mpol:
2783 mpol_put(vma_policy(new));
2784 out_free_vma:
2785 vm_area_free(new);
2786 return err;
2787}
2788
2789
2790
2791
2792
2793int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2794 unsigned long addr, int new_below)
2795{
2796 if (mm->map_count >= sysctl_max_map_count)
2797 return -ENOMEM;
2798
2799 return __split_vma(mm, vma, addr, new_below);
2800}
2801
2802static inline void
2803unlock_range(struct vm_area_struct *start, unsigned long limit)
2804{
2805 struct mm_struct *mm = start->vm_mm;
2806 struct vm_area_struct *tmp = start;
2807
2808 while (tmp && tmp->vm_start < limit) {
2809 if (tmp->vm_flags & VM_LOCKED) {
2810 mm->locked_vm -= vma_pages(tmp);
2811 munlock_vma_pages_all(tmp);
2812 }
2813
2814 tmp = tmp->vm_next;
2815 }
2816}
2817
2818
2819
2820
2821
2822
2823int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2824 struct list_head *uf, bool downgrade)
2825{
2826 unsigned long end;
2827 struct vm_area_struct *vma, *prev, *last;
2828
2829 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2830 return -EINVAL;
2831
2832 len = PAGE_ALIGN(len);
2833 end = start + len;
2834 if (len == 0)
2835 return -EINVAL;
2836
2837
2838
2839
2840
2841
2842 arch_unmap(mm, start, end);
2843
2844
2845 vma = find_vma_intersection(mm, start, end);
2846 if (!vma)
2847 return 0;
2848 prev = vma->vm_prev;
2849
2850
2851
2852
2853
2854
2855
2856
2857 if (start > vma->vm_start) {
2858 int error;
2859
2860
2861
2862
2863
2864
2865 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2866 return -ENOMEM;
2867
2868 error = __split_vma(mm, vma, start, 0);
2869 if (error)
2870 return error;
2871 prev = vma;
2872 }
2873
2874
2875 last = find_vma(mm, end);
2876 if (last && end > last->vm_start) {
2877 int error = __split_vma(mm, last, end, 1);
2878 if (error)
2879 return error;
2880 }
2881 vma = vma_next(mm, prev);
2882
2883 if (unlikely(uf)) {
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2894 if (error)
2895 return error;
2896 }
2897
2898
2899
2900
2901 if (mm->locked_vm)
2902 unlock_range(vma, end);
2903
2904
2905 if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
2906 downgrade = false;
2907
2908 if (downgrade)
2909 mmap_write_downgrade(mm);
2910
2911 unmap_region(mm, vma, prev, start, end);
2912
2913
2914 remove_vma_list(mm, vma);
2915
2916 return downgrade ? 1 : 0;
2917}
2918
2919int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2920 struct list_head *uf)
2921{
2922 return __do_munmap(mm, start, len, uf, false);
2923}
2924
2925static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2926{
2927 int ret;
2928 struct mm_struct *mm = current->mm;
2929 LIST_HEAD(uf);
2930
2931 if (mmap_write_lock_killable(mm))
2932 return -EINTR;
2933
2934 ret = __do_munmap(mm, start, len, &uf, downgrade);
2935
2936
2937
2938
2939
2940 if (ret == 1) {
2941 mmap_read_unlock(mm);
2942 ret = 0;
2943 } else
2944 mmap_write_unlock(mm);
2945
2946 userfaultfd_unmap_complete(mm, &uf);
2947 return ret;
2948}
2949
2950int vm_munmap(unsigned long start, size_t len)
2951{
2952 return __vm_munmap(start, len, false);
2953}
2954EXPORT_SYMBOL(vm_munmap);
2955
2956SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2957{
2958 addr = untagged_addr(addr);
2959 profile_munmap(addr);
2960 return __vm_munmap(addr, len, true);
2961}
2962
2963
2964
2965
2966
2967SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2968 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2969{
2970
2971 struct mm_struct *mm = current->mm;
2972 struct vm_area_struct *vma;
2973 unsigned long populate = 0;
2974 unsigned long ret = -EINVAL;
2975 struct file *file;
2976
2977 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2978 current->comm, current->pid);
2979
2980 if (prot)
2981 return ret;
2982 start = start & PAGE_MASK;
2983 size = size & PAGE_MASK;
2984
2985 if (start + size <= start)
2986 return ret;
2987
2988
2989 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2990 return ret;
2991
2992 if (mmap_write_lock_killable(mm))
2993 return -EINTR;
2994
2995 vma = find_vma(mm, start);
2996
2997 if (!vma || !(vma->vm_flags & VM_SHARED))
2998 goto out;
2999
3000 if (start < vma->vm_start)
3001 goto out;
3002
3003 if (start + size > vma->vm_end) {
3004 struct vm_area_struct *next;
3005
3006 for (next = vma->vm_next; next; next = next->vm_next) {
3007
3008 if (next->vm_start != next->vm_prev->vm_end)
3009 goto out;
3010
3011 if (next->vm_file != vma->vm_file)
3012 goto out;
3013
3014 if (next->vm_flags != vma->vm_flags)
3015 goto out;
3016
3017 if (start + size <= next->vm_end)
3018 break;
3019 }
3020
3021 if (!next)
3022 goto out;
3023 }
3024
3025 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
3026 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
3027 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
3028
3029 flags &= MAP_NONBLOCK;
3030 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
3031 if (vma->vm_flags & VM_LOCKED)
3032 flags |= MAP_LOCKED;
3033
3034 file = get_file(vma->vm_file);
3035 ret = do_mmap(vma->vm_file, start, size,
3036 prot, flags, pgoff, &populate, NULL);
3037 fput(file);
3038out:
3039 mmap_write_unlock(mm);
3040 if (populate)
3041 mm_populate(ret, populate);
3042 if (!IS_ERR_VALUE(ret))
3043 ret = 0;
3044 return ret;
3045}
3046
3047
3048
3049
3050
3051
3052static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
3053{
3054 struct mm_struct *mm = current->mm;
3055 struct vm_area_struct *vma, *prev;
3056 struct rb_node **rb_link, *rb_parent;
3057 pgoff_t pgoff = addr >> PAGE_SHIFT;
3058 int error;
3059 unsigned long mapped_addr;
3060
3061
3062 if ((flags & (~VM_EXEC)) != 0)
3063 return -EINVAL;
3064 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
3065
3066 mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
3067 if (IS_ERR_VALUE(mapped_addr))
3068 return mapped_addr;
3069
3070 error = mlock_future_check(mm, mm->def_flags, len);
3071 if (error)
3072 return error;
3073
3074
3075 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
3076 return -ENOMEM;
3077
3078
3079 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
3080 return -ENOMEM;
3081
3082 if (mm->map_count > sysctl_max_map_count)
3083 return -ENOMEM;
3084
3085 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
3086 return -ENOMEM;
3087
3088
3089 vma = vma_merge(mm, prev, addr, addr + len, flags,
3090 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
3091 if (vma)
3092 goto out;
3093
3094
3095
3096
3097 vma = vm_area_alloc(mm);
3098 if (!vma) {
3099 vm_unacct_memory(len >> PAGE_SHIFT);
3100 return -ENOMEM;
3101 }
3102
3103 vma_set_anonymous(vma);
3104 vma->vm_start = addr;
3105 vma->vm_end = addr + len;
3106 vma->vm_pgoff = pgoff;
3107 vma->vm_flags = flags;
3108 vma->vm_page_prot = vm_get_page_prot(flags);
3109 vma_link(mm, vma, prev, rb_link, rb_parent);
3110out:
3111 perf_event_mmap(vma);
3112 mm->total_vm += len >> PAGE_SHIFT;
3113 mm->data_vm += len >> PAGE_SHIFT;
3114 if (flags & VM_LOCKED)
3115 mm->locked_vm += (len >> PAGE_SHIFT);
3116 vma->vm_flags |= VM_SOFTDIRTY;
3117 return 0;
3118}
3119
3120int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3121{
3122 struct mm_struct *mm = current->mm;
3123 unsigned long len;
3124 int ret;
3125 bool populate;
3126 LIST_HEAD(uf);
3127
3128 len = PAGE_ALIGN(request);
3129 if (len < request)
3130 return -ENOMEM;
3131 if (!len)
3132 return 0;
3133
3134 if (mmap_write_lock_killable(mm))
3135 return -EINTR;
3136
3137 ret = do_brk_flags(addr, len, flags, &uf);
3138 populate = ((mm->def_flags & VM_LOCKED) != 0);
3139 mmap_write_unlock(mm);
3140 userfaultfd_unmap_complete(mm, &uf);
3141 if (populate && !ret)
3142 mm_populate(addr, len);
3143 return ret;
3144}
3145EXPORT_SYMBOL(vm_brk_flags);
3146
3147int vm_brk(unsigned long addr, unsigned long len)
3148{
3149 return vm_brk_flags(addr, len, 0);
3150}
3151EXPORT_SYMBOL(vm_brk);
3152
3153
3154void exit_mmap(struct mm_struct *mm)
3155{
3156 struct mmu_gather tlb;
3157 struct vm_area_struct *vma;
3158 unsigned long nr_accounted = 0;
3159
3160
3161 mmu_notifier_release(mm);
3162
3163 if (unlikely(mm_is_oom_victim(mm))) {
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180 (void)__oom_reap_task_mm(mm);
3181
3182 set_bit(MMF_OOM_SKIP, &mm->flags);
3183 mmap_write_lock(mm);
3184 mmap_write_unlock(mm);
3185 }
3186
3187 if (mm->locked_vm)
3188 unlock_range(mm->mmap, ULONG_MAX);
3189
3190 arch_exit_mmap(mm);
3191
3192 vma = mm->mmap;
3193 if (!vma)
3194 return;
3195
3196 lru_add_drain();
3197 flush_cache_mm(mm);
3198 tlb_gather_mmu_fullmm(&tlb, mm);
3199
3200
3201 unmap_vmas(&tlb, vma, 0, -1);
3202 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3203 tlb_finish_mmu(&tlb);
3204
3205
3206
3207
3208
3209 while (vma) {
3210 if (vma->vm_flags & VM_ACCOUNT)
3211 nr_accounted += vma_pages(vma);
3212 vma = remove_vma(vma);
3213 cond_resched();
3214 }
3215 vm_unacct_memory(nr_accounted);
3216}
3217
3218
3219
3220
3221
3222int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3223{
3224 struct vm_area_struct *prev;
3225 struct rb_node **rb_link, *rb_parent;
3226
3227 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3228 &prev, &rb_link, &rb_parent))
3229 return -ENOMEM;
3230 if ((vma->vm_flags & VM_ACCOUNT) &&
3231 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3232 return -ENOMEM;
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246 if (vma_is_anonymous(vma)) {
3247 BUG_ON(vma->anon_vma);
3248 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3249 }
3250
3251 vma_link(mm, vma, prev, rb_link, rb_parent);
3252 return 0;
3253}
3254
3255
3256
3257
3258
3259struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3260 unsigned long addr, unsigned long len, pgoff_t pgoff,
3261 bool *need_rmap_locks)
3262{
3263 struct vm_area_struct *vma = *vmap;
3264 unsigned long vma_start = vma->vm_start;
3265 struct mm_struct *mm = vma->vm_mm;
3266 struct vm_area_struct *new_vma, *prev;
3267 struct rb_node **rb_link, *rb_parent;
3268 bool faulted_in_anon_vma = true;
3269
3270
3271
3272
3273
3274 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3275 pgoff = addr >> PAGE_SHIFT;
3276 faulted_in_anon_vma = false;
3277 }
3278
3279 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3280 return NULL;
3281 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3282 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3283 vma->vm_userfaultfd_ctx);
3284 if (new_vma) {
3285
3286
3287
3288 if (unlikely(vma_start >= new_vma->vm_start &&
3289 vma_start < new_vma->vm_end)) {
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3303 *vmap = vma = new_vma;
3304 }
3305 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3306 } else {
3307 new_vma = vm_area_dup(vma);
3308 if (!new_vma)
3309 goto out;
3310 new_vma->vm_start = addr;
3311 new_vma->vm_end = addr + len;
3312 new_vma->vm_pgoff = pgoff;
3313 if (vma_dup_policy(vma, new_vma))
3314 goto out_free_vma;
3315 if (anon_vma_clone(new_vma, vma))
3316 goto out_free_mempol;
3317 if (new_vma->vm_file)
3318 get_file(new_vma->vm_file);
3319 if (new_vma->vm_ops && new_vma->vm_ops->open)
3320 new_vma->vm_ops->open(new_vma);
3321 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3322 *need_rmap_locks = false;
3323 }
3324 return new_vma;
3325
3326out_free_mempol:
3327 mpol_put(vma_policy(new_vma));
3328out_free_vma:
3329 vm_area_free(new_vma);
3330out:
3331 return NULL;
3332}
3333
3334
3335
3336
3337
3338bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3339{
3340 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3341 return false;
3342
3343 if (is_data_mapping(flags) &&
3344 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3345
3346 if (rlimit(RLIMIT_DATA) == 0 &&
3347 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3348 return true;
3349
3350 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3351 current->comm, current->pid,
3352 (mm->data_vm + npages) << PAGE_SHIFT,
3353 rlimit(RLIMIT_DATA),
3354 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3355
3356 if (!ignore_rlimit_data)
3357 return false;
3358 }
3359
3360 return true;
3361}
3362
3363void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3364{
3365 mm->total_vm += npages;
3366
3367 if (is_exec_mapping(flags))
3368 mm->exec_vm += npages;
3369 else if (is_stack_mapping(flags))
3370 mm->stack_vm += npages;
3371 else if (is_data_mapping(flags))
3372 mm->data_vm += npages;
3373}
3374
3375static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3376
3377
3378
3379
3380static void special_mapping_close(struct vm_area_struct *vma)
3381{
3382}
3383
3384static const char *special_mapping_name(struct vm_area_struct *vma)
3385{
3386 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3387}
3388
3389static int special_mapping_mremap(struct vm_area_struct *new_vma)
3390{
3391 struct vm_special_mapping *sm = new_vma->vm_private_data;
3392
3393 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3394 return -EFAULT;
3395
3396 if (sm->mremap)
3397 return sm->mremap(sm, new_vma);
3398
3399 return 0;
3400}
3401
3402static int special_mapping_split(struct vm_area_struct *vma, unsigned long addr)
3403{
3404
3405
3406
3407
3408
3409
3410 return -EINVAL;
3411}
3412
3413static const struct vm_operations_struct special_mapping_vmops = {
3414 .close = special_mapping_close,
3415 .fault = special_mapping_fault,
3416 .mremap = special_mapping_mremap,
3417 .name = special_mapping_name,
3418
3419 .access = NULL,
3420 .may_split = special_mapping_split,
3421};
3422
3423static const struct vm_operations_struct legacy_special_mapping_vmops = {
3424 .close = special_mapping_close,
3425 .fault = special_mapping_fault,
3426};
3427
3428static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3429{
3430 struct vm_area_struct *vma = vmf->vma;
3431 pgoff_t pgoff;
3432 struct page **pages;
3433
3434 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3435 pages = vma->vm_private_data;
3436 } else {
3437 struct vm_special_mapping *sm = vma->vm_private_data;
3438
3439 if (sm->fault)
3440 return sm->fault(sm, vmf->vma, vmf);
3441
3442 pages = sm->pages;
3443 }
3444
3445 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3446 pgoff--;
3447
3448 if (*pages) {
3449 struct page *page = *pages;
3450 get_page(page);
3451 vmf->page = page;
3452 return 0;
3453 }
3454
3455 return VM_FAULT_SIGBUS;
3456}
3457
3458static struct vm_area_struct *__install_special_mapping(
3459 struct mm_struct *mm,
3460 unsigned long addr, unsigned long len,
3461 unsigned long vm_flags, void *priv,
3462 const struct vm_operations_struct *ops)
3463{
3464 int ret;
3465 struct vm_area_struct *vma;
3466
3467 vma = vm_area_alloc(mm);
3468 if (unlikely(vma == NULL))
3469 return ERR_PTR(-ENOMEM);
3470
3471 vma->vm_start = addr;
3472 vma->vm_end = addr + len;
3473
3474 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3475 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3476
3477 vma->vm_ops = ops;
3478 vma->vm_private_data = priv;
3479
3480 ret = insert_vm_struct(mm, vma);
3481 if (ret)
3482 goto out;
3483
3484 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3485
3486 perf_event_mmap(vma);
3487
3488 return vma;
3489
3490out:
3491 vm_area_free(vma);
3492 return ERR_PTR(ret);
3493}
3494
3495bool vma_is_special_mapping(const struct vm_area_struct *vma,
3496 const struct vm_special_mapping *sm)
3497{
3498 return vma->vm_private_data == sm &&
3499 (vma->vm_ops == &special_mapping_vmops ||
3500 vma->vm_ops == &legacy_special_mapping_vmops);
3501}
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512struct vm_area_struct *_install_special_mapping(
3513 struct mm_struct *mm,
3514 unsigned long addr, unsigned long len,
3515 unsigned long vm_flags, const struct vm_special_mapping *spec)
3516{
3517 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3518 &special_mapping_vmops);
3519}
3520
3521int install_special_mapping(struct mm_struct *mm,
3522 unsigned long addr, unsigned long len,
3523 unsigned long vm_flags, struct page **pages)
3524{
3525 struct vm_area_struct *vma = __install_special_mapping(
3526 mm, addr, len, vm_flags, (void *)pages,
3527 &legacy_special_mapping_vmops);
3528
3529 return PTR_ERR_OR_ZERO(vma);
3530}
3531
3532static DEFINE_MUTEX(mm_all_locks_mutex);
3533
3534static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3535{
3536 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3537
3538
3539
3540
3541 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551 if (__test_and_set_bit(0, (unsigned long *)
3552 &anon_vma->root->rb_root.rb_root.rb_node))
3553 BUG();
3554 }
3555}
3556
3557static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3558{
3559 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3570 BUG();
3571 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
3572 }
3573}
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612int mm_take_all_locks(struct mm_struct *mm)
3613{
3614 struct vm_area_struct *vma;
3615 struct anon_vma_chain *avc;
3616
3617 BUG_ON(mmap_read_trylock(mm));
3618
3619 mutex_lock(&mm_all_locks_mutex);
3620
3621 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3622 if (signal_pending(current))
3623 goto out_unlock;
3624 if (vma->vm_file && vma->vm_file->f_mapping &&
3625 is_vm_hugetlb_page(vma))
3626 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3627 }
3628
3629 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3630 if (signal_pending(current))
3631 goto out_unlock;
3632 if (vma->vm_file && vma->vm_file->f_mapping &&
3633 !is_vm_hugetlb_page(vma))
3634 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3635 }
3636
3637 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3638 if (signal_pending(current))
3639 goto out_unlock;
3640 if (vma->anon_vma)
3641 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3642 vm_lock_anon_vma(mm, avc->anon_vma);
3643 }
3644
3645 return 0;
3646
3647out_unlock:
3648 mm_drop_all_locks(mm);
3649 return -EINTR;
3650}
3651
3652static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3653{
3654 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667 if (!__test_and_clear_bit(0, (unsigned long *)
3668 &anon_vma->root->rb_root.rb_root.rb_node))
3669 BUG();
3670 anon_vma_unlock_write(anon_vma);
3671 }
3672}
3673
3674static void vm_unlock_mapping(struct address_space *mapping)
3675{
3676 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3677
3678
3679
3680
3681 i_mmap_unlock_write(mapping);
3682 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3683 &mapping->flags))
3684 BUG();
3685 }
3686}
3687
3688
3689
3690
3691
3692void mm_drop_all_locks(struct mm_struct *mm)
3693{
3694 struct vm_area_struct *vma;
3695 struct anon_vma_chain *avc;
3696
3697 BUG_ON(mmap_read_trylock(mm));
3698 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3699
3700 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3701 if (vma->anon_vma)
3702 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3703 vm_unlock_anon_vma(avc->anon_vma);
3704 if (vma->vm_file && vma->vm_file->f_mapping)
3705 vm_unlock_mapping(vma->vm_file->f_mapping);
3706 }
3707
3708 mutex_unlock(&mm_all_locks_mutex);
3709}
3710
3711
3712
3713
3714void __init mmap_init(void)
3715{
3716 int ret;
3717
3718 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3719 VM_BUG_ON(ret);
3720}
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732static int init_user_reserve(void)
3733{
3734 unsigned long free_kbytes;
3735
3736 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3737
3738 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3739 return 0;
3740}
3741subsys_initcall(init_user_reserve);
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753static int init_admin_reserve(void)
3754{
3755 unsigned long free_kbytes;
3756
3757 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3758
3759 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3760 return 0;
3761}
3762subsys_initcall(init_admin_reserve);
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782static int reserve_mem_notifier(struct notifier_block *nb,
3783 unsigned long action, void *data)
3784{
3785 unsigned long tmp, free_kbytes;
3786
3787 switch (action) {
3788 case MEM_ONLINE:
3789
3790 tmp = sysctl_user_reserve_kbytes;
3791 if (0 < tmp && tmp < (1UL << 17))
3792 init_user_reserve();
3793
3794
3795 tmp = sysctl_admin_reserve_kbytes;
3796 if (0 < tmp && tmp < (1UL << 13))
3797 init_admin_reserve();
3798
3799 break;
3800 case MEM_OFFLINE:
3801 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3802
3803 if (sysctl_user_reserve_kbytes > free_kbytes) {
3804 init_user_reserve();
3805 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3806 sysctl_user_reserve_kbytes);
3807 }
3808
3809 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3810 init_admin_reserve();
3811 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3812 sysctl_admin_reserve_kbytes);
3813 }
3814 break;
3815 default:
3816 break;
3817 }
3818 return NOTIFY_OK;
3819}
3820
3821static struct notifier_block reserve_mem_nb = {
3822 .notifier_call = reserve_mem_notifier,
3823};
3824
3825static int __meminit init_reserve_notifier(void)
3826{
3827 if (register_hotmemory_notifier(&reserve_mem_nb))
3828 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3829
3830 return 0;
3831}
3832subsys_initcall(init_reserve_notifier);
3833