1
2
3
4
5
6
7
8
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/backing-dev.h>
15#include <linux/mm.h>
16#include <linux/vmacache.h>
17#include <linux/shm.h>
18#include <linux/mman.h>
19#include <linux/pagemap.h>
20#include <linux/swap.h>
21#include <linux/syscalls.h>
22#include <linux/capability.h>
23#include <linux/init.h>
24#include <linux/file.h>
25#include <linux/fs.h>
26#include <linux/personality.h>
27#include <linux/security.h>
28#include <linux/hugetlb.h>
29#include <linux/shmem_fs.h>
30#include <linux/profile.h>
31#include <linux/export.h>
32#include <linux/mount.h>
33#include <linux/mempolicy.h>
34#include <linux/rmap.h>
35#include <linux/mmu_notifier.h>
36#include <linux/mmdebug.h>
37#include <linux/perf_event.h>
38#include <linux/audit.h>
39#include <linux/khugepaged.h>
40#include <linux/uprobes.h>
41#include <linux/rbtree_augmented.h>
42#include <linux/notifier.h>
43#include <linux/memory.h>
44#include <linux/printk.h>
45#include <linux/userfaultfd_k.h>
46#include <linux/moduleparam.h>
47#include <linux/pkeys.h>
48#include <linux/oom.h>
49#include <linux/sched/mm.h>
50
51#include <linux/uaccess.h>
52#include <asm/cacheflush.h>
53#include <asm/tlb.h>
54#include <asm/mmu_context.h>
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/mmap.h>
58
59#include "internal.h"
60
61#ifndef arch_mmap_check
62#define arch_mmap_check(addr, len, flags) (0)
63#endif
64
65#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
66const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
67const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
68int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
69#endif
70#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
71const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
72const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
73int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
74#endif
75
76static bool ignore_rlimit_data;
77core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
78
79static void unmap_region(struct mm_struct *mm,
80 struct vm_area_struct *vma, struct vm_area_struct *prev,
81 unsigned long start, unsigned long end);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97pgprot_t protection_map[16] __ro_after_init = {
98 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
99 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
100};
101
102#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
103static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
104{
105 return prot;
106}
107#endif
108
109pgprot_t vm_get_page_prot(unsigned long vm_flags)
110{
111 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
112 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
113 pgprot_val(arch_vm_get_page_prot(vm_flags)));
114
115 return arch_filter_pgprot(ret);
116}
117EXPORT_SYMBOL(vm_get_page_prot);
118
119static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
120{
121 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
122}
123
124
125void vma_set_page_prot(struct vm_area_struct *vma)
126{
127 unsigned long vm_flags = vma->vm_flags;
128 pgprot_t vm_page_prot;
129
130 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
131 if (vma_wants_writenotify(vma, vm_page_prot)) {
132 vm_flags &= ~VM_SHARED;
133 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
134 }
135
136 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
137}
138
139
140
141
142static void __remove_shared_vm_struct(struct vm_area_struct *vma,
143 struct file *file, struct address_space *mapping)
144{
145 if (vma->vm_flags & VM_DENYWRITE)
146 allow_write_access(file);
147 if (vma->vm_flags & VM_SHARED)
148 mapping_unmap_writable(mapping);
149
150 flush_dcache_mmap_lock(mapping);
151 vma_interval_tree_remove(vma, &mapping->i_mmap);
152 flush_dcache_mmap_unlock(mapping);
153}
154
155
156
157
158
159void unlink_file_vma(struct vm_area_struct *vma)
160{
161 struct file *file = vma->vm_file;
162
163 if (file) {
164 struct address_space *mapping = file->f_mapping;
165 i_mmap_lock_write(mapping);
166 __remove_shared_vm_struct(vma, file, mapping);
167 i_mmap_unlock_write(mapping);
168 }
169}
170
171
172
173
174static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
175{
176 struct vm_area_struct *next = vma->vm_next;
177
178 might_sleep();
179 if (vma->vm_ops && vma->vm_ops->close)
180 vma->vm_ops->close(vma);
181 if (vma->vm_file)
182 fput(vma->vm_file);
183 mpol_put(vma_policy(vma));
184 vm_area_free(vma);
185 return next;
186}
187
188static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
189 struct list_head *uf);
190SYSCALL_DEFINE1(brk, unsigned long, brk)
191{
192 unsigned long retval;
193 unsigned long newbrk, oldbrk, origbrk;
194 struct mm_struct *mm = current->mm;
195 struct vm_area_struct *next;
196 unsigned long min_brk;
197 bool populate;
198 bool downgraded = false;
199 LIST_HEAD(uf);
200
201 if (mmap_write_lock_killable(mm))
202 return -EINTR;
203
204 origbrk = mm->brk;
205
206#ifdef CONFIG_COMPAT_BRK
207
208
209
210
211
212 if (current->brk_randomized)
213 min_brk = mm->start_brk;
214 else
215 min_brk = mm->end_data;
216#else
217 min_brk = mm->start_brk;
218#endif
219 if (brk < min_brk)
220 goto out;
221
222
223
224
225
226
227
228 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
229 mm->end_data, mm->start_data))
230 goto out;
231
232 newbrk = PAGE_ALIGN(brk);
233 oldbrk = PAGE_ALIGN(mm->brk);
234 if (oldbrk == newbrk) {
235 mm->brk = brk;
236 goto success;
237 }
238
239
240
241
242
243 if (brk <= mm->brk) {
244 int ret;
245
246
247
248
249
250
251 mm->brk = brk;
252 ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
253 if (ret < 0) {
254 mm->brk = origbrk;
255 goto out;
256 } else if (ret == 1) {
257 downgraded = true;
258 }
259 goto success;
260 }
261
262
263 next = find_vma(mm, oldbrk);
264 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
265 goto out;
266
267
268 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
269 goto out;
270 mm->brk = brk;
271
272success:
273 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
274 if (downgraded)
275 mmap_read_unlock(mm);
276 else
277 mmap_write_unlock(mm);
278 userfaultfd_unmap_complete(mm, &uf);
279 if (populate)
280 mm_populate(oldbrk, newbrk - oldbrk);
281 return brk;
282
283out:
284 retval = origbrk;
285 mmap_write_unlock(mm);
286 return retval;
287}
288
289static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
290{
291 unsigned long gap, prev_end;
292
293
294
295
296
297
298
299 gap = vm_start_gap(vma);
300 if (vma->vm_prev) {
301 prev_end = vm_end_gap(vma->vm_prev);
302 if (gap > prev_end)
303 gap -= prev_end;
304 else
305 gap = 0;
306 }
307 return gap;
308}
309
310#ifdef CONFIG_DEBUG_VM_RB
311static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
312{
313 unsigned long max = vma_compute_gap(vma), subtree_gap;
314 if (vma->vm_rb.rb_left) {
315 subtree_gap = rb_entry(vma->vm_rb.rb_left,
316 struct vm_area_struct, vm_rb)->rb_subtree_gap;
317 if (subtree_gap > max)
318 max = subtree_gap;
319 }
320 if (vma->vm_rb.rb_right) {
321 subtree_gap = rb_entry(vma->vm_rb.rb_right,
322 struct vm_area_struct, vm_rb)->rb_subtree_gap;
323 if (subtree_gap > max)
324 max = subtree_gap;
325 }
326 return max;
327}
328
329static int browse_rb(struct mm_struct *mm)
330{
331 struct rb_root *root = &mm->mm_rb;
332 int i = 0, j, bug = 0;
333 struct rb_node *nd, *pn = NULL;
334 unsigned long prev = 0, pend = 0;
335
336 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
337 struct vm_area_struct *vma;
338 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
339 if (vma->vm_start < prev) {
340 pr_emerg("vm_start %lx < prev %lx\n",
341 vma->vm_start, prev);
342 bug = 1;
343 }
344 if (vma->vm_start < pend) {
345 pr_emerg("vm_start %lx < pend %lx\n",
346 vma->vm_start, pend);
347 bug = 1;
348 }
349 if (vma->vm_start > vma->vm_end) {
350 pr_emerg("vm_start %lx > vm_end %lx\n",
351 vma->vm_start, vma->vm_end);
352 bug = 1;
353 }
354 spin_lock(&mm->page_table_lock);
355 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
356 pr_emerg("free gap %lx, correct %lx\n",
357 vma->rb_subtree_gap,
358 vma_compute_subtree_gap(vma));
359 bug = 1;
360 }
361 spin_unlock(&mm->page_table_lock);
362 i++;
363 pn = nd;
364 prev = vma->vm_start;
365 pend = vma->vm_end;
366 }
367 j = 0;
368 for (nd = pn; nd; nd = rb_prev(nd))
369 j++;
370 if (i != j) {
371 pr_emerg("backwards %d, forwards %d\n", j, i);
372 bug = 1;
373 }
374 return bug ? -1 : i;
375}
376
377static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
378{
379 struct rb_node *nd;
380
381 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
382 struct vm_area_struct *vma;
383 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
384 VM_BUG_ON_VMA(vma != ignore &&
385 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
386 vma);
387 }
388}
389
390static void validate_mm(struct mm_struct *mm)
391{
392 int bug = 0;
393 int i = 0;
394 unsigned long highest_address = 0;
395 struct vm_area_struct *vma = mm->mmap;
396
397 while (vma) {
398 struct anon_vma *anon_vma = vma->anon_vma;
399 struct anon_vma_chain *avc;
400
401 if (anon_vma) {
402 anon_vma_lock_read(anon_vma);
403 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
404 anon_vma_interval_tree_verify(avc);
405 anon_vma_unlock_read(anon_vma);
406 }
407
408 highest_address = vm_end_gap(vma);
409 vma = vma->vm_next;
410 i++;
411 }
412 if (i != mm->map_count) {
413 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
414 bug = 1;
415 }
416 if (highest_address != mm->highest_vm_end) {
417 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
418 mm->highest_vm_end, highest_address);
419 bug = 1;
420 }
421 i = browse_rb(mm);
422 if (i != mm->map_count) {
423 if (i != -1)
424 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
425 bug = 1;
426 }
427 VM_BUG_ON_MM(bug, mm);
428}
429#else
430#define validate_mm_rb(root, ignore) do { } while (0)
431#define validate_mm(mm) do { } while (0)
432#endif
433
434RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
435 struct vm_area_struct, vm_rb,
436 unsigned long, rb_subtree_gap, vma_compute_gap)
437
438
439
440
441
442
443static void vma_gap_update(struct vm_area_struct *vma)
444{
445
446
447
448
449 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
450}
451
452static inline void vma_rb_insert(struct vm_area_struct *vma,
453 struct rb_root *root)
454{
455
456 validate_mm_rb(root, NULL);
457
458 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
459}
460
461static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
462{
463
464
465
466
467
468 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
469}
470
471static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
472 struct rb_root *root,
473 struct vm_area_struct *ignore)
474{
475
476
477
478
479
480
481
482
483
484 validate_mm_rb(root, ignore);
485
486 __vma_rb_erase(vma, root);
487}
488
489static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
490 struct rb_root *root)
491{
492 vma_rb_erase_ignore(vma, root, vma);
493}
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509static inline void
510anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
511{
512 struct anon_vma_chain *avc;
513
514 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
515 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
516}
517
518static inline void
519anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
520{
521 struct anon_vma_chain *avc;
522
523 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
524 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
525}
526
527static int find_vma_links(struct mm_struct *mm, unsigned long addr,
528 unsigned long end, struct vm_area_struct **pprev,
529 struct rb_node ***rb_link, struct rb_node **rb_parent)
530{
531 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
532
533 __rb_link = &mm->mm_rb.rb_node;
534 rb_prev = __rb_parent = NULL;
535
536 while (*__rb_link) {
537 struct vm_area_struct *vma_tmp;
538
539 __rb_parent = *__rb_link;
540 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
541
542 if (vma_tmp->vm_end > addr) {
543
544 if (vma_tmp->vm_start < end)
545 return -ENOMEM;
546 __rb_link = &__rb_parent->rb_left;
547 } else {
548 rb_prev = __rb_parent;
549 __rb_link = &__rb_parent->rb_right;
550 }
551 }
552
553 *pprev = NULL;
554 if (rb_prev)
555 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
556 *rb_link = __rb_link;
557 *rb_parent = __rb_parent;
558 return 0;
559}
560
561
562
563
564
565
566
567
568
569
570static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
571 struct vm_area_struct *vma)
572{
573 if (!vma)
574 return mm->mmap;
575
576 return vma->vm_next;
577}
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593static inline int
594munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
595 struct vm_area_struct **pprev, struct rb_node ***link,
596 struct rb_node **parent, struct list_head *uf)
597{
598
599 while (find_vma_links(mm, start, start + len, pprev, link, parent))
600 if (do_munmap(mm, start, len, uf))
601 return -ENOMEM;
602
603 return 0;
604}
605static unsigned long count_vma_pages_range(struct mm_struct *mm,
606 unsigned long addr, unsigned long end)
607{
608 unsigned long nr_pages = 0;
609 struct vm_area_struct *vma;
610
611
612 vma = find_vma_intersection(mm, addr, end);
613 if (!vma)
614 return 0;
615
616 nr_pages = (min(end, vma->vm_end) -
617 max(addr, vma->vm_start)) >> PAGE_SHIFT;
618
619
620 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
621 unsigned long overlap_len;
622
623 if (vma->vm_start > end)
624 break;
625
626 overlap_len = min(end, vma->vm_end) - vma->vm_start;
627 nr_pages += overlap_len >> PAGE_SHIFT;
628 }
629
630 return nr_pages;
631}
632
633void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
634 struct rb_node **rb_link, struct rb_node *rb_parent)
635{
636
637 if (vma->vm_next)
638 vma_gap_update(vma->vm_next);
639 else
640 mm->highest_vm_end = vm_end_gap(vma);
641
642
643
644
645
646
647
648
649
650
651 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
652 vma->rb_subtree_gap = 0;
653 vma_gap_update(vma);
654 vma_rb_insert(vma, &mm->mm_rb);
655}
656
657static void __vma_link_file(struct vm_area_struct *vma)
658{
659 struct file *file;
660
661 file = vma->vm_file;
662 if (file) {
663 struct address_space *mapping = file->f_mapping;
664
665 if (vma->vm_flags & VM_DENYWRITE)
666 put_write_access(file_inode(file));
667 if (vma->vm_flags & VM_SHARED)
668 mapping_allow_writable(mapping);
669
670 flush_dcache_mmap_lock(mapping);
671 vma_interval_tree_insert(vma, &mapping->i_mmap);
672 flush_dcache_mmap_unlock(mapping);
673 }
674}
675
676static void
677__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
678 struct vm_area_struct *prev, struct rb_node **rb_link,
679 struct rb_node *rb_parent)
680{
681 __vma_link_list(mm, vma, prev);
682 __vma_link_rb(mm, vma, rb_link, rb_parent);
683}
684
685static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
686 struct vm_area_struct *prev, struct rb_node **rb_link,
687 struct rb_node *rb_parent)
688{
689 struct address_space *mapping = NULL;
690
691 if (vma->vm_file) {
692 mapping = vma->vm_file->f_mapping;
693 i_mmap_lock_write(mapping);
694 }
695
696 __vma_link(mm, vma, prev, rb_link, rb_parent);
697 __vma_link_file(vma);
698
699 if (mapping)
700 i_mmap_unlock_write(mapping);
701
702 mm->map_count++;
703 validate_mm(mm);
704}
705
706
707
708
709
710static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
711{
712 struct vm_area_struct *prev;
713 struct rb_node **rb_link, *rb_parent;
714
715 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
716 &prev, &rb_link, &rb_parent))
717 BUG();
718 __vma_link(mm, vma, prev, rb_link, rb_parent);
719 mm->map_count++;
720}
721
722static __always_inline void __vma_unlink(struct mm_struct *mm,
723 struct vm_area_struct *vma,
724 struct vm_area_struct *ignore)
725{
726 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
727 __vma_unlink_list(mm, vma);
728
729 vmacache_invalidate(mm);
730}
731
732
733
734
735
736
737
738
739int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
740 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
741 struct vm_area_struct *expand)
742{
743 struct mm_struct *mm = vma->vm_mm;
744 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
745 struct address_space *mapping = NULL;
746 struct rb_root_cached *root = NULL;
747 struct anon_vma *anon_vma = NULL;
748 struct file *file = vma->vm_file;
749 bool start_changed = false, end_changed = false;
750 long adjust_next = 0;
751 int remove_next = 0;
752
753 if (next && !insert) {
754 struct vm_area_struct *exporter = NULL, *importer = NULL;
755
756 if (end >= next->vm_end) {
757
758
759
760
761
762
763 if (next == expand) {
764
765
766
767
768 VM_WARN_ON(end != next->vm_end);
769
770
771
772
773
774 remove_next = 3;
775 VM_WARN_ON(file != next->vm_file);
776 swap(vma, next);
777 } else {
778 VM_WARN_ON(expand != vma);
779
780
781
782
783 remove_next = 1 + (end > next->vm_end);
784 VM_WARN_ON(remove_next == 2 &&
785 end != next->vm_next->vm_end);
786
787 end = next->vm_end;
788 }
789
790 exporter = next;
791 importer = vma;
792
793
794
795
796
797 if (remove_next == 2 && !next->anon_vma)
798 exporter = next->vm_next;
799
800 } else if (end > next->vm_start) {
801
802
803
804
805 adjust_next = (end - next->vm_start);
806 exporter = next;
807 importer = vma;
808 VM_WARN_ON(expand != importer);
809 } else if (end < vma->vm_end) {
810
811
812
813
814
815 adjust_next = -(vma->vm_end - end);
816 exporter = vma;
817 importer = next;
818 VM_WARN_ON(expand != importer);
819 }
820
821
822
823
824
825
826 if (exporter && exporter->anon_vma && !importer->anon_vma) {
827 int error;
828
829 importer->anon_vma = exporter->anon_vma;
830 error = anon_vma_clone(importer, exporter);
831 if (error)
832 return error;
833 }
834 }
835again:
836 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
837
838 if (file) {
839 mapping = file->f_mapping;
840 root = &mapping->i_mmap;
841 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
842
843 if (adjust_next)
844 uprobe_munmap(next, next->vm_start, next->vm_end);
845
846 i_mmap_lock_write(mapping);
847 if (insert) {
848
849
850
851
852
853
854 __vma_link_file(insert);
855 }
856 }
857
858 anon_vma = vma->anon_vma;
859 if (!anon_vma && adjust_next)
860 anon_vma = next->anon_vma;
861 if (anon_vma) {
862 VM_WARN_ON(adjust_next && next->anon_vma &&
863 anon_vma != next->anon_vma);
864 anon_vma_lock_write(anon_vma);
865 anon_vma_interval_tree_pre_update_vma(vma);
866 if (adjust_next)
867 anon_vma_interval_tree_pre_update_vma(next);
868 }
869
870 if (file) {
871 flush_dcache_mmap_lock(mapping);
872 vma_interval_tree_remove(vma, root);
873 if (adjust_next)
874 vma_interval_tree_remove(next, root);
875 }
876
877 if (start != vma->vm_start) {
878 vma->vm_start = start;
879 start_changed = true;
880 }
881 if (end != vma->vm_end) {
882 vma->vm_end = end;
883 end_changed = true;
884 }
885 vma->vm_pgoff = pgoff;
886 if (adjust_next) {
887 next->vm_start += adjust_next;
888 next->vm_pgoff += adjust_next >> PAGE_SHIFT;
889 }
890
891 if (file) {
892 if (adjust_next)
893 vma_interval_tree_insert(next, root);
894 vma_interval_tree_insert(vma, root);
895 flush_dcache_mmap_unlock(mapping);
896 }
897
898 if (remove_next) {
899
900
901
902
903 if (remove_next != 3)
904 __vma_unlink(mm, next, next);
905 else
906
907
908
909
910
911
912
913
914
915 __vma_unlink(mm, next, vma);
916 if (file)
917 __remove_shared_vm_struct(next, file, mapping);
918 } else if (insert) {
919
920
921
922
923
924 __insert_vm_struct(mm, insert);
925 } else {
926 if (start_changed)
927 vma_gap_update(vma);
928 if (end_changed) {
929 if (!next)
930 mm->highest_vm_end = vm_end_gap(vma);
931 else if (!adjust_next)
932 vma_gap_update(next);
933 }
934 }
935
936 if (anon_vma) {
937 anon_vma_interval_tree_post_update_vma(vma);
938 if (adjust_next)
939 anon_vma_interval_tree_post_update_vma(next);
940 anon_vma_unlock_write(anon_vma);
941 }
942
943 if (file) {
944 i_mmap_unlock_write(mapping);
945 uprobe_mmap(vma);
946
947 if (adjust_next)
948 uprobe_mmap(next);
949 }
950
951 if (remove_next) {
952 if (file) {
953 uprobe_munmap(next, next->vm_start, next->vm_end);
954 fput(file);
955 }
956 if (next->anon_vma)
957 anon_vma_merge(vma, next);
958 mm->map_count--;
959 mpol_put(vma_policy(next));
960 vm_area_free(next);
961
962
963
964
965
966 if (remove_next != 3) {
967
968
969
970
971
972
973 next = vma->vm_next;
974 } else {
975
976
977
978
979
980
981
982
983
984
985 next = vma;
986 }
987 if (remove_next == 2) {
988 remove_next = 1;
989 end = next->vm_end;
990 goto again;
991 }
992 else if (next)
993 vma_gap_update(next);
994 else {
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
1015 }
1016 }
1017 if (insert && file)
1018 uprobe_mmap(insert);
1019
1020 validate_mm(mm);
1021
1022 return 0;
1023}
1024
1025
1026
1027
1028
1029static inline int is_mergeable_vma(struct vm_area_struct *vma,
1030 struct file *file, unsigned long vm_flags,
1031 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1032{
1033
1034
1035
1036
1037
1038
1039
1040
1041 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1042 return 0;
1043 if (vma->vm_file != file)
1044 return 0;
1045 if (vma->vm_ops && vma->vm_ops->close)
1046 return 0;
1047 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1048 return 0;
1049 return 1;
1050}
1051
1052static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1053 struct anon_vma *anon_vma2,
1054 struct vm_area_struct *vma)
1055{
1056
1057
1058
1059
1060 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1061 list_is_singular(&vma->anon_vma_chain)))
1062 return 1;
1063 return anon_vma1 == anon_vma2;
1064}
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077static int
1078can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1079 struct anon_vma *anon_vma, struct file *file,
1080 pgoff_t vm_pgoff,
1081 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1082{
1083 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1084 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1085 if (vma->vm_pgoff == vm_pgoff)
1086 return 1;
1087 }
1088 return 0;
1089}
1090
1091
1092
1093
1094
1095
1096
1097
1098static int
1099can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1100 struct anon_vma *anon_vma, struct file *file,
1101 pgoff_t vm_pgoff,
1102 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1103{
1104 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1105 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1106 pgoff_t vm_pglen;
1107 vm_pglen = vma_pages(vma);
1108 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1109 return 1;
1110 }
1111 return 0;
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157struct vm_area_struct *vma_merge(struct mm_struct *mm,
1158 struct vm_area_struct *prev, unsigned long addr,
1159 unsigned long end, unsigned long vm_flags,
1160 struct anon_vma *anon_vma, struct file *file,
1161 pgoff_t pgoff, struct mempolicy *policy,
1162 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1163{
1164 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1165 struct vm_area_struct *area, *next;
1166 int err;
1167
1168
1169
1170
1171
1172 if (vm_flags & VM_SPECIAL)
1173 return NULL;
1174
1175 next = vma_next(mm, prev);
1176 area = next;
1177 if (area && area->vm_end == end)
1178 next = next->vm_next;
1179
1180
1181 VM_WARN_ON(prev && addr <= prev->vm_start);
1182 VM_WARN_ON(area && end > area->vm_end);
1183 VM_WARN_ON(addr >= end);
1184
1185
1186
1187
1188 if (prev && prev->vm_end == addr &&
1189 mpol_equal(vma_policy(prev), policy) &&
1190 can_vma_merge_after(prev, vm_flags,
1191 anon_vma, file, pgoff,
1192 vm_userfaultfd_ctx)) {
1193
1194
1195
1196 if (next && end == next->vm_start &&
1197 mpol_equal(policy, vma_policy(next)) &&
1198 can_vma_merge_before(next, vm_flags,
1199 anon_vma, file,
1200 pgoff+pglen,
1201 vm_userfaultfd_ctx) &&
1202 is_mergeable_anon_vma(prev->anon_vma,
1203 next->anon_vma, NULL)) {
1204
1205 err = __vma_adjust(prev, prev->vm_start,
1206 next->vm_end, prev->vm_pgoff, NULL,
1207 prev);
1208 } else
1209 err = __vma_adjust(prev, prev->vm_start,
1210 end, prev->vm_pgoff, NULL, prev);
1211 if (err)
1212 return NULL;
1213 khugepaged_enter_vma_merge(prev, vm_flags);
1214 return prev;
1215 }
1216
1217
1218
1219
1220 if (next && end == next->vm_start &&
1221 mpol_equal(policy, vma_policy(next)) &&
1222 can_vma_merge_before(next, vm_flags,
1223 anon_vma, file, pgoff+pglen,
1224 vm_userfaultfd_ctx)) {
1225 if (prev && addr < prev->vm_end)
1226 err = __vma_adjust(prev, prev->vm_start,
1227 addr, prev->vm_pgoff, NULL, next);
1228 else {
1229 err = __vma_adjust(area, addr, next->vm_end,
1230 next->vm_pgoff - pglen, NULL, next);
1231
1232
1233
1234
1235
1236 area = next;
1237 }
1238 if (err)
1239 return NULL;
1240 khugepaged_enter_vma_merge(area, vm_flags);
1241 return area;
1242 }
1243
1244 return NULL;
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1261{
1262 return a->vm_end == b->vm_start &&
1263 mpol_equal(vma_policy(a), vma_policy(b)) &&
1264 a->vm_file == b->vm_file &&
1265 !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
1266 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1267}
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1292{
1293 if (anon_vma_compatible(a, b)) {
1294 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1295
1296 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1297 return anon_vma;
1298 }
1299 return NULL;
1300}
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1311{
1312 struct anon_vma *anon_vma = NULL;
1313
1314
1315 if (vma->vm_next) {
1316 anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
1317 if (anon_vma)
1318 return anon_vma;
1319 }
1320
1321
1322 if (vma->vm_prev)
1323 anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335 return anon_vma;
1336}
1337
1338
1339
1340
1341
1342static inline unsigned long round_hint_to_min(unsigned long hint)
1343{
1344 hint &= PAGE_MASK;
1345 if (((void *)hint != NULL) &&
1346 (hint < mmap_min_addr))
1347 return PAGE_ALIGN(mmap_min_addr);
1348 return hint;
1349}
1350
1351static inline int mlock_future_check(struct mm_struct *mm,
1352 unsigned long flags,
1353 unsigned long len)
1354{
1355 unsigned long locked, lock_limit;
1356
1357
1358 if (flags & VM_LOCKED) {
1359 locked = len >> PAGE_SHIFT;
1360 locked += mm->locked_vm;
1361 lock_limit = rlimit(RLIMIT_MEMLOCK);
1362 lock_limit >>= PAGE_SHIFT;
1363 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1364 return -EAGAIN;
1365 }
1366 return 0;
1367}
1368
1369static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1370{
1371 if (S_ISREG(inode->i_mode))
1372 return MAX_LFS_FILESIZE;
1373
1374 if (S_ISBLK(inode->i_mode))
1375 return MAX_LFS_FILESIZE;
1376
1377 if (S_ISSOCK(inode->i_mode))
1378 return MAX_LFS_FILESIZE;
1379
1380
1381 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1382 return 0;
1383
1384
1385 return ULONG_MAX;
1386}
1387
1388static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1389 unsigned long pgoff, unsigned long len)
1390{
1391 u64 maxsize = file_mmap_size_max(file, inode);
1392
1393 if (maxsize && len > maxsize)
1394 return false;
1395 maxsize -= len;
1396 if (pgoff > maxsize >> PAGE_SHIFT)
1397 return false;
1398 return true;
1399}
1400
1401
1402
1403
1404unsigned long do_mmap(struct file *file, unsigned long addr,
1405 unsigned long len, unsigned long prot,
1406 unsigned long flags, unsigned long pgoff,
1407 unsigned long *populate, struct list_head *uf)
1408{
1409 struct mm_struct *mm = current->mm;
1410 vm_flags_t vm_flags;
1411 int pkey = 0;
1412
1413 *populate = 0;
1414
1415 if (!len)
1416 return -EINVAL;
1417
1418
1419
1420
1421
1422
1423
1424 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1425 if (!(file && path_noexec(&file->f_path)))
1426 prot |= PROT_EXEC;
1427
1428
1429 if (flags & MAP_FIXED_NOREPLACE)
1430 flags |= MAP_FIXED;
1431
1432 if (!(flags & MAP_FIXED))
1433 addr = round_hint_to_min(addr);
1434
1435
1436 len = PAGE_ALIGN(len);
1437 if (!len)
1438 return -ENOMEM;
1439
1440
1441 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1442 return -EOVERFLOW;
1443
1444
1445 if (mm->map_count > sysctl_max_map_count)
1446 return -ENOMEM;
1447
1448
1449
1450
1451 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1452 if (IS_ERR_VALUE(addr))
1453 return addr;
1454
1455 if (flags & MAP_FIXED_NOREPLACE) {
1456 struct vm_area_struct *vma = find_vma(mm, addr);
1457
1458 if (vma && vma->vm_start < addr + len)
1459 return -EEXIST;
1460 }
1461
1462 if (prot == PROT_EXEC) {
1463 pkey = execute_only_pkey(mm);
1464 if (pkey < 0)
1465 pkey = 0;
1466 }
1467
1468
1469
1470
1471
1472 vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1473 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1474
1475 if (flags & MAP_LOCKED)
1476 if (!can_do_mlock())
1477 return -EPERM;
1478
1479 if (mlock_future_check(mm, vm_flags, len))
1480 return -EAGAIN;
1481
1482 if (file) {
1483 struct inode *inode = file_inode(file);
1484 unsigned long flags_mask;
1485
1486 if (!file_mmap_ok(file, inode, pgoff, len))
1487 return -EOVERFLOW;
1488
1489 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1490
1491 switch (flags & MAP_TYPE) {
1492 case MAP_SHARED:
1493
1494
1495
1496
1497
1498
1499
1500 flags &= LEGACY_MAP_MASK;
1501 fallthrough;
1502 case MAP_SHARED_VALIDATE:
1503 if (flags & ~flags_mask)
1504 return -EOPNOTSUPP;
1505 if (prot & PROT_WRITE) {
1506 if (!(file->f_mode & FMODE_WRITE))
1507 return -EACCES;
1508 if (IS_SWAPFILE(file->f_mapping->host))
1509 return -ETXTBSY;
1510 }
1511
1512
1513
1514
1515
1516 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1517 return -EACCES;
1518
1519
1520
1521
1522 if (locks_verify_locked(file))
1523 return -EAGAIN;
1524
1525 vm_flags |= VM_SHARED | VM_MAYSHARE;
1526 if (!(file->f_mode & FMODE_WRITE))
1527 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1528 fallthrough;
1529 case MAP_PRIVATE:
1530 if (!(file->f_mode & FMODE_READ))
1531 return -EACCES;
1532 if (path_noexec(&file->f_path)) {
1533 if (vm_flags & VM_EXEC)
1534 return -EPERM;
1535 vm_flags &= ~VM_MAYEXEC;
1536 }
1537
1538 if (!file->f_op->mmap)
1539 return -ENODEV;
1540 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1541 return -EINVAL;
1542 break;
1543
1544 default:
1545 return -EINVAL;
1546 }
1547 } else {
1548 switch (flags & MAP_TYPE) {
1549 case MAP_SHARED:
1550 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1551 return -EINVAL;
1552
1553
1554
1555 pgoff = 0;
1556 vm_flags |= VM_SHARED | VM_MAYSHARE;
1557 break;
1558 case MAP_PRIVATE:
1559
1560
1561
1562 pgoff = addr >> PAGE_SHIFT;
1563 break;
1564 default:
1565 return -EINVAL;
1566 }
1567 }
1568
1569
1570
1571
1572
1573 if (flags & MAP_NORESERVE) {
1574
1575 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1576 vm_flags |= VM_NORESERVE;
1577
1578
1579 if (file && is_file_hugepages(file))
1580 vm_flags |= VM_NORESERVE;
1581 }
1582
1583 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1584 if (!IS_ERR_VALUE(addr) &&
1585 ((vm_flags & VM_LOCKED) ||
1586 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1587 *populate = len;
1588 return addr;
1589}
1590
1591unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1592 unsigned long prot, unsigned long flags,
1593 unsigned long fd, unsigned long pgoff)
1594{
1595 struct file *file = NULL;
1596 unsigned long retval;
1597
1598 if (!(flags & MAP_ANONYMOUS)) {
1599 audit_mmap_fd(fd, flags);
1600 file = fget(fd);
1601 if (!file)
1602 return -EBADF;
1603 if (is_file_hugepages(file)) {
1604 len = ALIGN(len, huge_page_size(hstate_file(file)));
1605 } else if (unlikely(flags & MAP_HUGETLB)) {
1606 retval = -EINVAL;
1607 goto out_fput;
1608 }
1609 } else if (flags & MAP_HUGETLB) {
1610 struct user_struct *user = NULL;
1611 struct hstate *hs;
1612
1613 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1614 if (!hs)
1615 return -EINVAL;
1616
1617 len = ALIGN(len, huge_page_size(hs));
1618
1619
1620
1621
1622
1623
1624 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1625 VM_NORESERVE,
1626 &user, HUGETLB_ANONHUGE_INODE,
1627 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1628 if (IS_ERR(file))
1629 return PTR_ERR(file);
1630 }
1631
1632 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1633
1634 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1635out_fput:
1636 if (file)
1637 fput(file);
1638 return retval;
1639}
1640
1641SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1642 unsigned long, prot, unsigned long, flags,
1643 unsigned long, fd, unsigned long, pgoff)
1644{
1645 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1646}
1647
1648#ifdef __ARCH_WANT_SYS_OLD_MMAP
1649struct mmap_arg_struct {
1650 unsigned long addr;
1651 unsigned long len;
1652 unsigned long prot;
1653 unsigned long flags;
1654 unsigned long fd;
1655 unsigned long offset;
1656};
1657
1658SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1659{
1660 struct mmap_arg_struct a;
1661
1662 if (copy_from_user(&a, arg, sizeof(a)))
1663 return -EFAULT;
1664 if (offset_in_page(a.offset))
1665 return -EINVAL;
1666
1667 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1668 a.offset >> PAGE_SHIFT);
1669}
1670#endif
1671
1672
1673
1674
1675
1676
1677
1678int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1679{
1680 vm_flags_t vm_flags = vma->vm_flags;
1681 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1682
1683
1684 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1685 return 0;
1686
1687
1688 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1689 return 1;
1690
1691
1692
1693 if (pgprot_val(vm_page_prot) !=
1694 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1695 return 0;
1696
1697
1698 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1699 return 1;
1700
1701
1702 if (vm_flags & VM_PFNMAP)
1703 return 0;
1704
1705
1706 return vma->vm_file && vma->vm_file->f_mapping &&
1707 mapping_can_writeback(vma->vm_file->f_mapping);
1708}
1709
1710
1711
1712
1713
1714static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1715{
1716
1717
1718
1719
1720 if (file && is_file_hugepages(file))
1721 return 0;
1722
1723 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1724}
1725
1726unsigned long mmap_region(struct file *file, unsigned long addr,
1727 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1728 struct list_head *uf)
1729{
1730 struct mm_struct *mm = current->mm;
1731 struct vm_area_struct *vma, *prev, *merge;
1732 int error;
1733 struct rb_node **rb_link, *rb_parent;
1734 unsigned long charged = 0;
1735
1736
1737 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1738 unsigned long nr_pages;
1739
1740
1741
1742
1743
1744 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1745
1746 if (!may_expand_vm(mm, vm_flags,
1747 (len >> PAGE_SHIFT) - nr_pages))
1748 return -ENOMEM;
1749 }
1750
1751
1752 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
1753 return -ENOMEM;
1754
1755
1756
1757 if (accountable_mapping(file, vm_flags)) {
1758 charged = len >> PAGE_SHIFT;
1759 if (security_vm_enough_memory_mm(mm, charged))
1760 return -ENOMEM;
1761 vm_flags |= VM_ACCOUNT;
1762 }
1763
1764
1765
1766
1767 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1768 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1769 if (vma)
1770 goto out;
1771
1772
1773
1774
1775
1776
1777 vma = vm_area_alloc(mm);
1778 if (!vma) {
1779 error = -ENOMEM;
1780 goto unacct_error;
1781 }
1782
1783 vma->vm_start = addr;
1784 vma->vm_end = addr + len;
1785 vma->vm_flags = vm_flags;
1786 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1787 vma->vm_pgoff = pgoff;
1788
1789 if (file) {
1790 if (vm_flags & VM_DENYWRITE) {
1791 error = deny_write_access(file);
1792 if (error)
1793 goto free_vma;
1794 }
1795 if (vm_flags & VM_SHARED) {
1796 error = mapping_map_writable(file->f_mapping);
1797 if (error)
1798 goto allow_write_and_free_vma;
1799 }
1800
1801
1802
1803
1804
1805
1806 vma->vm_file = get_file(file);
1807 error = call_mmap(file, vma);
1808 if (error)
1809 goto unmap_and_free_vma;
1810
1811
1812
1813
1814
1815
1816
1817
1818 WARN_ON_ONCE(addr != vma->vm_start);
1819
1820 addr = vma->vm_start;
1821
1822
1823
1824
1825 if (unlikely(vm_flags != vma->vm_flags && prev)) {
1826 merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
1827 NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
1828 if (merge) {
1829
1830
1831
1832
1833 fput(vma->vm_file);
1834 vm_area_free(vma);
1835 vma = merge;
1836
1837 vm_flags = vma->vm_flags;
1838 goto unmap_writable;
1839 }
1840 }
1841
1842 vm_flags = vma->vm_flags;
1843 } else if (vm_flags & VM_SHARED) {
1844 error = shmem_zero_setup(vma);
1845 if (error)
1846 goto free_vma;
1847 } else {
1848 vma_set_anonymous(vma);
1849 }
1850
1851
1852 if (!arch_validate_flags(vma->vm_flags)) {
1853 error = -EINVAL;
1854 if (file)
1855 goto unmap_and_free_vma;
1856 else
1857 goto free_vma;
1858 }
1859
1860 vma_link(mm, vma, prev, rb_link, rb_parent);
1861
1862 if (file) {
1863unmap_writable:
1864 if (vm_flags & VM_SHARED)
1865 mapping_unmap_writable(file->f_mapping);
1866 if (vm_flags & VM_DENYWRITE)
1867 allow_write_access(file);
1868 }
1869 file = vma->vm_file;
1870out:
1871 perf_event_mmap(vma);
1872
1873 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1874 if (vm_flags & VM_LOCKED) {
1875 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1876 is_vm_hugetlb_page(vma) ||
1877 vma == get_gate_vma(current->mm))
1878 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1879 else
1880 mm->locked_vm += (len >> PAGE_SHIFT);
1881 }
1882
1883 if (file)
1884 uprobe_mmap(vma);
1885
1886
1887
1888
1889
1890
1891
1892
1893 vma->vm_flags |= VM_SOFTDIRTY;
1894
1895 vma_set_page_prot(vma);
1896
1897 return addr;
1898
1899unmap_and_free_vma:
1900 vma->vm_file = NULL;
1901 fput(file);
1902
1903
1904 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1905 charged = 0;
1906 if (vm_flags & VM_SHARED)
1907 mapping_unmap_writable(file->f_mapping);
1908allow_write_and_free_vma:
1909 if (vm_flags & VM_DENYWRITE)
1910 allow_write_access(file);
1911free_vma:
1912 vm_area_free(vma);
1913unacct_error:
1914 if (charged)
1915 vm_unacct_memory(charged);
1916 return error;
1917}
1918
1919static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1920{
1921
1922
1923
1924
1925
1926
1927
1928
1929 struct mm_struct *mm = current->mm;
1930 struct vm_area_struct *vma;
1931 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1932
1933
1934 length = info->length + info->align_mask;
1935 if (length < info->length)
1936 return -ENOMEM;
1937
1938
1939 if (info->high_limit < length)
1940 return -ENOMEM;
1941 high_limit = info->high_limit - length;
1942
1943 if (info->low_limit > high_limit)
1944 return -ENOMEM;
1945 low_limit = info->low_limit + length;
1946
1947
1948 if (RB_EMPTY_ROOT(&mm->mm_rb))
1949 goto check_highest;
1950 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1951 if (vma->rb_subtree_gap < length)
1952 goto check_highest;
1953
1954 while (true) {
1955
1956 gap_end = vm_start_gap(vma);
1957 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1958 struct vm_area_struct *left =
1959 rb_entry(vma->vm_rb.rb_left,
1960 struct vm_area_struct, vm_rb);
1961 if (left->rb_subtree_gap >= length) {
1962 vma = left;
1963 continue;
1964 }
1965 }
1966
1967 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1968check_current:
1969
1970 if (gap_start > high_limit)
1971 return -ENOMEM;
1972 if (gap_end >= low_limit &&
1973 gap_end > gap_start && gap_end - gap_start >= length)
1974 goto found;
1975
1976
1977 if (vma->vm_rb.rb_right) {
1978 struct vm_area_struct *right =
1979 rb_entry(vma->vm_rb.rb_right,
1980 struct vm_area_struct, vm_rb);
1981 if (right->rb_subtree_gap >= length) {
1982 vma = right;
1983 continue;
1984 }
1985 }
1986
1987
1988 while (true) {
1989 struct rb_node *prev = &vma->vm_rb;
1990 if (!rb_parent(prev))
1991 goto check_highest;
1992 vma = rb_entry(rb_parent(prev),
1993 struct vm_area_struct, vm_rb);
1994 if (prev == vma->vm_rb.rb_left) {
1995 gap_start = vm_end_gap(vma->vm_prev);
1996 gap_end = vm_start_gap(vma);
1997 goto check_current;
1998 }
1999 }
2000 }
2001
2002check_highest:
2003
2004 gap_start = mm->highest_vm_end;
2005 gap_end = ULONG_MAX;
2006 if (gap_start > high_limit)
2007 return -ENOMEM;
2008
2009found:
2010
2011 if (gap_start < info->low_limit)
2012 gap_start = info->low_limit;
2013
2014
2015 gap_start += (info->align_offset - gap_start) & info->align_mask;
2016
2017 VM_BUG_ON(gap_start + info->length > info->high_limit);
2018 VM_BUG_ON(gap_start + info->length > gap_end);
2019 return gap_start;
2020}
2021
2022static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
2023{
2024 struct mm_struct *mm = current->mm;
2025 struct vm_area_struct *vma;
2026 unsigned long length, low_limit, high_limit, gap_start, gap_end;
2027
2028
2029 length = info->length + info->align_mask;
2030 if (length < info->length)
2031 return -ENOMEM;
2032
2033
2034
2035
2036
2037 gap_end = info->high_limit;
2038 if (gap_end < length)
2039 return -ENOMEM;
2040 high_limit = gap_end - length;
2041
2042 if (info->low_limit > high_limit)
2043 return -ENOMEM;
2044 low_limit = info->low_limit + length;
2045
2046
2047 gap_start = mm->highest_vm_end;
2048 if (gap_start <= high_limit)
2049 goto found_highest;
2050
2051
2052 if (RB_EMPTY_ROOT(&mm->mm_rb))
2053 return -ENOMEM;
2054 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2055 if (vma->rb_subtree_gap < length)
2056 return -ENOMEM;
2057
2058 while (true) {
2059
2060 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2061 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2062 struct vm_area_struct *right =
2063 rb_entry(vma->vm_rb.rb_right,
2064 struct vm_area_struct, vm_rb);
2065 if (right->rb_subtree_gap >= length) {
2066 vma = right;
2067 continue;
2068 }
2069 }
2070
2071check_current:
2072
2073 gap_end = vm_start_gap(vma);
2074 if (gap_end < low_limit)
2075 return -ENOMEM;
2076 if (gap_start <= high_limit &&
2077 gap_end > gap_start && gap_end - gap_start >= length)
2078 goto found;
2079
2080
2081 if (vma->vm_rb.rb_left) {
2082 struct vm_area_struct *left =
2083 rb_entry(vma->vm_rb.rb_left,
2084 struct vm_area_struct, vm_rb);
2085 if (left->rb_subtree_gap >= length) {
2086 vma = left;
2087 continue;
2088 }
2089 }
2090
2091
2092 while (true) {
2093 struct rb_node *prev = &vma->vm_rb;
2094 if (!rb_parent(prev))
2095 return -ENOMEM;
2096 vma = rb_entry(rb_parent(prev),
2097 struct vm_area_struct, vm_rb);
2098 if (prev == vma->vm_rb.rb_right) {
2099 gap_start = vma->vm_prev ?
2100 vm_end_gap(vma->vm_prev) : 0;
2101 goto check_current;
2102 }
2103 }
2104 }
2105
2106found:
2107
2108 if (gap_end > info->high_limit)
2109 gap_end = info->high_limit;
2110
2111found_highest:
2112
2113 gap_end -= info->length;
2114 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2115
2116 VM_BUG_ON(gap_end < info->low_limit);
2117 VM_BUG_ON(gap_end < gap_start);
2118 return gap_end;
2119}
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
2131{
2132 unsigned long addr;
2133
2134 if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
2135 addr = unmapped_area_topdown(info);
2136 else
2137 addr = unmapped_area(info);
2138
2139 trace_vm_unmapped_area(addr, info);
2140 return addr;
2141}
2142
2143#ifndef arch_get_mmap_end
2144#define arch_get_mmap_end(addr) (TASK_SIZE)
2145#endif
2146
2147#ifndef arch_get_mmap_base
2148#define arch_get_mmap_base(addr, base) (base)
2149#endif
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162#ifndef HAVE_ARCH_UNMAPPED_AREA
2163unsigned long
2164arch_get_unmapped_area(struct file *filp, unsigned long addr,
2165 unsigned long len, unsigned long pgoff, unsigned long flags)
2166{
2167 struct mm_struct *mm = current->mm;
2168 struct vm_area_struct *vma, *prev;
2169 struct vm_unmapped_area_info info;
2170 const unsigned long mmap_end = arch_get_mmap_end(addr);
2171
2172 if (len > mmap_end - mmap_min_addr)
2173 return -ENOMEM;
2174
2175 if (flags & MAP_FIXED)
2176 return addr;
2177
2178 if (addr) {
2179 addr = PAGE_ALIGN(addr);
2180 vma = find_vma_prev(mm, addr, &prev);
2181 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2182 (!vma || addr + len <= vm_start_gap(vma)) &&
2183 (!prev || addr >= vm_end_gap(prev)))
2184 return addr;
2185 }
2186
2187 info.flags = 0;
2188 info.length = len;
2189 info.low_limit = mm->mmap_base;
2190 info.high_limit = mmap_end;
2191 info.align_mask = 0;
2192 info.align_offset = 0;
2193 return vm_unmapped_area(&info);
2194}
2195#endif
2196
2197
2198
2199
2200
2201#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2202unsigned long
2203arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
2204 unsigned long len, unsigned long pgoff,
2205 unsigned long flags)
2206{
2207 struct vm_area_struct *vma, *prev;
2208 struct mm_struct *mm = current->mm;
2209 struct vm_unmapped_area_info info;
2210 const unsigned long mmap_end = arch_get_mmap_end(addr);
2211
2212
2213 if (len > mmap_end - mmap_min_addr)
2214 return -ENOMEM;
2215
2216 if (flags & MAP_FIXED)
2217 return addr;
2218
2219
2220 if (addr) {
2221 addr = PAGE_ALIGN(addr);
2222 vma = find_vma_prev(mm, addr, &prev);
2223 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2224 (!vma || addr + len <= vm_start_gap(vma)) &&
2225 (!prev || addr >= vm_end_gap(prev)))
2226 return addr;
2227 }
2228
2229 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2230 info.length = len;
2231 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2232 info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
2233 info.align_mask = 0;
2234 info.align_offset = 0;
2235 addr = vm_unmapped_area(&info);
2236
2237
2238
2239
2240
2241
2242
2243 if (offset_in_page(addr)) {
2244 VM_BUG_ON(addr != -ENOMEM);
2245 info.flags = 0;
2246 info.low_limit = TASK_UNMAPPED_BASE;
2247 info.high_limit = mmap_end;
2248 addr = vm_unmapped_area(&info);
2249 }
2250
2251 return addr;
2252}
2253#endif
2254
2255unsigned long
2256get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2257 unsigned long pgoff, unsigned long flags)
2258{
2259 unsigned long (*get_area)(struct file *, unsigned long,
2260 unsigned long, unsigned long, unsigned long);
2261
2262 unsigned long error = arch_mmap_check(addr, len, flags);
2263 if (error)
2264 return error;
2265
2266
2267 if (len > TASK_SIZE)
2268 return -ENOMEM;
2269
2270 get_area = current->mm->get_unmapped_area;
2271 if (file) {
2272 if (file->f_op->get_unmapped_area)
2273 get_area = file->f_op->get_unmapped_area;
2274 } else if (flags & MAP_SHARED) {
2275
2276
2277
2278
2279
2280 pgoff = 0;
2281 get_area = shmem_get_unmapped_area;
2282 }
2283
2284 addr = get_area(file, addr, len, pgoff, flags);
2285 if (IS_ERR_VALUE(addr))
2286 return addr;
2287
2288 if (addr > TASK_SIZE - len)
2289 return -ENOMEM;
2290 if (offset_in_page(addr))
2291 return -EINVAL;
2292
2293 error = security_mmap_addr(addr);
2294 return error ? error : addr;
2295}
2296
2297EXPORT_SYMBOL(get_unmapped_area);
2298
2299
2300struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2301{
2302 struct rb_node *rb_node;
2303 struct vm_area_struct *vma;
2304
2305
2306 vma = vmacache_find(mm, addr);
2307 if (likely(vma))
2308 return vma;
2309
2310 rb_node = mm->mm_rb.rb_node;
2311
2312 while (rb_node) {
2313 struct vm_area_struct *tmp;
2314
2315 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2316
2317 if (tmp->vm_end > addr) {
2318 vma = tmp;
2319 if (tmp->vm_start <= addr)
2320 break;
2321 rb_node = rb_node->rb_left;
2322 } else
2323 rb_node = rb_node->rb_right;
2324 }
2325
2326 if (vma)
2327 vmacache_update(addr, vma);
2328 return vma;
2329}
2330
2331EXPORT_SYMBOL(find_vma);
2332
2333
2334
2335
2336struct vm_area_struct *
2337find_vma_prev(struct mm_struct *mm, unsigned long addr,
2338 struct vm_area_struct **pprev)
2339{
2340 struct vm_area_struct *vma;
2341
2342 vma = find_vma(mm, addr);
2343 if (vma) {
2344 *pprev = vma->vm_prev;
2345 } else {
2346 struct rb_node *rb_node = rb_last(&mm->mm_rb);
2347
2348 *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
2349 }
2350 return vma;
2351}
2352
2353
2354
2355
2356
2357
2358static int acct_stack_growth(struct vm_area_struct *vma,
2359 unsigned long size, unsigned long grow)
2360{
2361 struct mm_struct *mm = vma->vm_mm;
2362 unsigned long new_start;
2363
2364
2365 if (!may_expand_vm(mm, vma->vm_flags, grow))
2366 return -ENOMEM;
2367
2368
2369 if (size > rlimit(RLIMIT_STACK))
2370 return -ENOMEM;
2371
2372
2373 if (vma->vm_flags & VM_LOCKED) {
2374 unsigned long locked;
2375 unsigned long limit;
2376 locked = mm->locked_vm + grow;
2377 limit = rlimit(RLIMIT_MEMLOCK);
2378 limit >>= PAGE_SHIFT;
2379 if (locked > limit && !capable(CAP_IPC_LOCK))
2380 return -ENOMEM;
2381 }
2382
2383
2384 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2385 vma->vm_end - size;
2386 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2387 return -EFAULT;
2388
2389
2390
2391
2392
2393 if (security_vm_enough_memory_mm(mm, grow))
2394 return -ENOMEM;
2395
2396 return 0;
2397}
2398
2399#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2400
2401
2402
2403
2404int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2405{
2406 struct mm_struct *mm = vma->vm_mm;
2407 struct vm_area_struct *next;
2408 unsigned long gap_addr;
2409 int error = 0;
2410
2411 if (!(vma->vm_flags & VM_GROWSUP))
2412 return -EFAULT;
2413
2414
2415 address &= PAGE_MASK;
2416 if (address >= (TASK_SIZE & PAGE_MASK))
2417 return -ENOMEM;
2418 address += PAGE_SIZE;
2419
2420
2421 gap_addr = address + stack_guard_gap;
2422
2423
2424 if (gap_addr < address || gap_addr > TASK_SIZE)
2425 gap_addr = TASK_SIZE;
2426
2427 next = vma->vm_next;
2428 if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
2429 if (!(next->vm_flags & VM_GROWSUP))
2430 return -ENOMEM;
2431
2432 }
2433
2434
2435 if (unlikely(anon_vma_prepare(vma)))
2436 return -ENOMEM;
2437
2438
2439
2440
2441
2442
2443 anon_vma_lock_write(vma->anon_vma);
2444
2445
2446 if (address > vma->vm_end) {
2447 unsigned long size, grow;
2448
2449 size = address - vma->vm_start;
2450 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2451
2452 error = -ENOMEM;
2453 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2454 error = acct_stack_growth(vma, size, grow);
2455 if (!error) {
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467 spin_lock(&mm->page_table_lock);
2468 if (vma->vm_flags & VM_LOCKED)
2469 mm->locked_vm += grow;
2470 vm_stat_account(mm, vma->vm_flags, grow);
2471 anon_vma_interval_tree_pre_update_vma(vma);
2472 vma->vm_end = address;
2473 anon_vma_interval_tree_post_update_vma(vma);
2474 if (vma->vm_next)
2475 vma_gap_update(vma->vm_next);
2476 else
2477 mm->highest_vm_end = vm_end_gap(vma);
2478 spin_unlock(&mm->page_table_lock);
2479
2480 perf_event_mmap(vma);
2481 }
2482 }
2483 }
2484 anon_vma_unlock_write(vma->anon_vma);
2485 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2486 validate_mm(mm);
2487 return error;
2488}
2489#endif
2490
2491
2492
2493
2494int expand_downwards(struct vm_area_struct *vma,
2495 unsigned long address)
2496{
2497 struct mm_struct *mm = vma->vm_mm;
2498 struct vm_area_struct *prev;
2499 int error = 0;
2500
2501 address &= PAGE_MASK;
2502 if (address < mmap_min_addr)
2503 return -EPERM;
2504
2505
2506 prev = vma->vm_prev;
2507
2508 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2509 vma_is_accessible(prev)) {
2510 if (address - prev->vm_end < stack_guard_gap)
2511 return -ENOMEM;
2512 }
2513
2514
2515 if (unlikely(anon_vma_prepare(vma)))
2516 return -ENOMEM;
2517
2518
2519
2520
2521
2522
2523 anon_vma_lock_write(vma->anon_vma);
2524
2525
2526 if (address < vma->vm_start) {
2527 unsigned long size, grow;
2528
2529 size = vma->vm_end - address;
2530 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2531
2532 error = -ENOMEM;
2533 if (grow <= vma->vm_pgoff) {
2534 error = acct_stack_growth(vma, size, grow);
2535 if (!error) {
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547 spin_lock(&mm->page_table_lock);
2548 if (vma->vm_flags & VM_LOCKED)
2549 mm->locked_vm += grow;
2550 vm_stat_account(mm, vma->vm_flags, grow);
2551 anon_vma_interval_tree_pre_update_vma(vma);
2552 vma->vm_start = address;
2553 vma->vm_pgoff -= grow;
2554 anon_vma_interval_tree_post_update_vma(vma);
2555 vma_gap_update(vma);
2556 spin_unlock(&mm->page_table_lock);
2557
2558 perf_event_mmap(vma);
2559 }
2560 }
2561 }
2562 anon_vma_unlock_write(vma->anon_vma);
2563 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2564 validate_mm(mm);
2565 return error;
2566}
2567
2568
2569unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2570
2571static int __init cmdline_parse_stack_guard_gap(char *p)
2572{
2573 unsigned long val;
2574 char *endptr;
2575
2576 val = simple_strtoul(p, &endptr, 10);
2577 if (!*endptr)
2578 stack_guard_gap = val << PAGE_SHIFT;
2579
2580 return 0;
2581}
2582__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2583
2584#ifdef CONFIG_STACK_GROWSUP
2585int expand_stack(struct vm_area_struct *vma, unsigned long address)
2586{
2587 return expand_upwards(vma, address);
2588}
2589
2590struct vm_area_struct *
2591find_extend_vma(struct mm_struct *mm, unsigned long addr)
2592{
2593 struct vm_area_struct *vma, *prev;
2594
2595 addr &= PAGE_MASK;
2596 vma = find_vma_prev(mm, addr, &prev);
2597 if (vma && (vma->vm_start <= addr))
2598 return vma;
2599
2600 if (!prev || expand_stack(prev, addr))
2601 return NULL;
2602 if (prev->vm_flags & VM_LOCKED)
2603 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2604 return prev;
2605}
2606#else
2607int expand_stack(struct vm_area_struct *vma, unsigned long address)
2608{
2609 return expand_downwards(vma, address);
2610}
2611
2612struct vm_area_struct *
2613find_extend_vma(struct mm_struct *mm, unsigned long addr)
2614{
2615 struct vm_area_struct *vma;
2616 unsigned long start;
2617
2618 addr &= PAGE_MASK;
2619 vma = find_vma(mm, addr);
2620 if (!vma)
2621 return NULL;
2622 if (vma->vm_start <= addr)
2623 return vma;
2624 if (!(vma->vm_flags & VM_GROWSDOWN))
2625 return NULL;
2626 start = vma->vm_start;
2627 if (expand_stack(vma, addr))
2628 return NULL;
2629 if (vma->vm_flags & VM_LOCKED)
2630 populate_vma_page_range(vma, addr, start, NULL);
2631 return vma;
2632}
2633#endif
2634
2635EXPORT_SYMBOL_GPL(find_extend_vma);
2636
2637
2638
2639
2640
2641
2642
2643static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2644{
2645 unsigned long nr_accounted = 0;
2646
2647
2648 update_hiwater_vm(mm);
2649 do {
2650 long nrpages = vma_pages(vma);
2651
2652 if (vma->vm_flags & VM_ACCOUNT)
2653 nr_accounted += nrpages;
2654 vm_stat_account(mm, vma->vm_flags, -nrpages);
2655 vma = remove_vma(vma);
2656 } while (vma);
2657 vm_unacct_memory(nr_accounted);
2658 validate_mm(mm);
2659}
2660
2661
2662
2663
2664
2665
2666static void unmap_region(struct mm_struct *mm,
2667 struct vm_area_struct *vma, struct vm_area_struct *prev,
2668 unsigned long start, unsigned long end)
2669{
2670 struct vm_area_struct *next = vma_next(mm, prev);
2671 struct mmu_gather tlb;
2672
2673 lru_add_drain();
2674 tlb_gather_mmu(&tlb, mm, start, end);
2675 update_hiwater_rss(mm);
2676 unmap_vmas(&tlb, vma, start, end);
2677 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2678 next ? next->vm_start : USER_PGTABLES_CEILING);
2679 tlb_finish_mmu(&tlb, start, end);
2680}
2681
2682
2683
2684
2685
2686static bool
2687detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2688 struct vm_area_struct *prev, unsigned long end)
2689{
2690 struct vm_area_struct **insertion_point;
2691 struct vm_area_struct *tail_vma = NULL;
2692
2693 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2694 vma->vm_prev = NULL;
2695 do {
2696 vma_rb_erase(vma, &mm->mm_rb);
2697 mm->map_count--;
2698 tail_vma = vma;
2699 vma = vma->vm_next;
2700 } while (vma && vma->vm_start < end);
2701 *insertion_point = vma;
2702 if (vma) {
2703 vma->vm_prev = prev;
2704 vma_gap_update(vma);
2705 } else
2706 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2707 tail_vma->vm_next = NULL;
2708
2709
2710 vmacache_invalidate(mm);
2711
2712
2713
2714
2715
2716
2717 if (vma && (vma->vm_flags & VM_GROWSDOWN))
2718 return false;
2719 if (prev && (prev->vm_flags & VM_GROWSUP))
2720 return false;
2721 return true;
2722}
2723
2724
2725
2726
2727
2728int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2729 unsigned long addr, int new_below)
2730{
2731 struct vm_area_struct *new;
2732 int err;
2733
2734 if (vma->vm_ops && vma->vm_ops->split) {
2735 err = vma->vm_ops->split(vma, addr);
2736 if (err)
2737 return err;
2738 }
2739
2740 new = vm_area_dup(vma);
2741 if (!new)
2742 return -ENOMEM;
2743
2744 if (new_below)
2745 new->vm_end = addr;
2746 else {
2747 new->vm_start = addr;
2748 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2749 }
2750
2751 err = vma_dup_policy(vma, new);
2752 if (err)
2753 goto out_free_vma;
2754
2755 err = anon_vma_clone(new, vma);
2756 if (err)
2757 goto out_free_mpol;
2758
2759 if (new->vm_file)
2760 get_file(new->vm_file);
2761
2762 if (new->vm_ops && new->vm_ops->open)
2763 new->vm_ops->open(new);
2764
2765 if (new_below)
2766 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2767 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2768 else
2769 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2770
2771
2772 if (!err)
2773 return 0;
2774
2775
2776 if (new->vm_ops && new->vm_ops->close)
2777 new->vm_ops->close(new);
2778 if (new->vm_file)
2779 fput(new->vm_file);
2780 unlink_anon_vmas(new);
2781 out_free_mpol:
2782 mpol_put(vma_policy(new));
2783 out_free_vma:
2784 vm_area_free(new);
2785 return err;
2786}
2787
2788
2789
2790
2791
2792int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2793 unsigned long addr, int new_below)
2794{
2795 if (mm->map_count >= sysctl_max_map_count)
2796 return -ENOMEM;
2797
2798 return __split_vma(mm, vma, addr, new_below);
2799}
2800
2801
2802
2803
2804
2805
2806int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2807 struct list_head *uf, bool downgrade)
2808{
2809 unsigned long end;
2810 struct vm_area_struct *vma, *prev, *last;
2811
2812 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2813 return -EINVAL;
2814
2815 len = PAGE_ALIGN(len);
2816 end = start + len;
2817 if (len == 0)
2818 return -EINVAL;
2819
2820
2821
2822
2823
2824
2825 arch_unmap(mm, start, end);
2826
2827
2828 vma = find_vma(mm, start);
2829 if (!vma)
2830 return 0;
2831 prev = vma->vm_prev;
2832
2833
2834
2835 if (vma->vm_start >= end)
2836 return 0;
2837
2838
2839
2840
2841
2842
2843
2844
2845 if (start > vma->vm_start) {
2846 int error;
2847
2848
2849
2850
2851
2852
2853 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2854 return -ENOMEM;
2855
2856 error = __split_vma(mm, vma, start, 0);
2857 if (error)
2858 return error;
2859 prev = vma;
2860 }
2861
2862
2863 last = find_vma(mm, end);
2864 if (last && end > last->vm_start) {
2865 int error = __split_vma(mm, last, end, 1);
2866 if (error)
2867 return error;
2868 }
2869 vma = vma_next(mm, prev);
2870
2871 if (unlikely(uf)) {
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2882 if (error)
2883 return error;
2884 }
2885
2886
2887
2888
2889 if (mm->locked_vm) {
2890 struct vm_area_struct *tmp = vma;
2891 while (tmp && tmp->vm_start < end) {
2892 if (tmp->vm_flags & VM_LOCKED) {
2893 mm->locked_vm -= vma_pages(tmp);
2894 munlock_vma_pages_all(tmp);
2895 }
2896
2897 tmp = tmp->vm_next;
2898 }
2899 }
2900
2901
2902 if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
2903 downgrade = false;
2904
2905 if (downgrade)
2906 mmap_write_downgrade(mm);
2907
2908 unmap_region(mm, vma, prev, start, end);
2909
2910
2911 remove_vma_list(mm, vma);
2912
2913 return downgrade ? 1 : 0;
2914}
2915
2916int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2917 struct list_head *uf)
2918{
2919 return __do_munmap(mm, start, len, uf, false);
2920}
2921
2922static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2923{
2924 int ret;
2925 struct mm_struct *mm = current->mm;
2926 LIST_HEAD(uf);
2927
2928 if (mmap_write_lock_killable(mm))
2929 return -EINTR;
2930
2931 ret = __do_munmap(mm, start, len, &uf, downgrade);
2932
2933
2934
2935
2936
2937 if (ret == 1) {
2938 mmap_read_unlock(mm);
2939 ret = 0;
2940 } else
2941 mmap_write_unlock(mm);
2942
2943 userfaultfd_unmap_complete(mm, &uf);
2944 return ret;
2945}
2946
2947int vm_munmap(unsigned long start, size_t len)
2948{
2949 return __vm_munmap(start, len, false);
2950}
2951EXPORT_SYMBOL(vm_munmap);
2952
2953SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2954{
2955 addr = untagged_addr(addr);
2956 profile_munmap(addr);
2957 return __vm_munmap(addr, len, true);
2958}
2959
2960
2961
2962
2963
2964SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2965 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2966{
2967
2968 struct mm_struct *mm = current->mm;
2969 struct vm_area_struct *vma;
2970 unsigned long populate = 0;
2971 unsigned long ret = -EINVAL;
2972 struct file *file;
2973
2974 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2975 current->comm, current->pid);
2976
2977 if (prot)
2978 return ret;
2979 start = start & PAGE_MASK;
2980 size = size & PAGE_MASK;
2981
2982 if (start + size <= start)
2983 return ret;
2984
2985
2986 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2987 return ret;
2988
2989 if (mmap_write_lock_killable(mm))
2990 return -EINTR;
2991
2992 vma = find_vma(mm, start);
2993
2994 if (!vma || !(vma->vm_flags & VM_SHARED))
2995 goto out;
2996
2997 if (start < vma->vm_start)
2998 goto out;
2999
3000 if (start + size > vma->vm_end) {
3001 struct vm_area_struct *next;
3002
3003 for (next = vma->vm_next; next; next = next->vm_next) {
3004
3005 if (next->vm_start != next->vm_prev->vm_end)
3006 goto out;
3007
3008 if (next->vm_file != vma->vm_file)
3009 goto out;
3010
3011 if (next->vm_flags != vma->vm_flags)
3012 goto out;
3013
3014 if (start + size <= next->vm_end)
3015 break;
3016 }
3017
3018 if (!next)
3019 goto out;
3020 }
3021
3022 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
3023 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
3024 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
3025
3026 flags &= MAP_NONBLOCK;
3027 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
3028 if (vma->vm_flags & VM_LOCKED) {
3029 struct vm_area_struct *tmp;
3030 flags |= MAP_LOCKED;
3031
3032
3033 for (tmp = vma; tmp->vm_start >= start + size;
3034 tmp = tmp->vm_next) {
3035
3036
3037
3038
3039 vma_adjust_trans_huge(tmp, start, start + size, 0);
3040
3041 munlock_vma_pages_range(tmp,
3042 max(tmp->vm_start, start),
3043 min(tmp->vm_end, start + size));
3044 }
3045 }
3046
3047 file = get_file(vma->vm_file);
3048 ret = do_mmap(vma->vm_file, start, size,
3049 prot, flags, pgoff, &populate, NULL);
3050 fput(file);
3051out:
3052 mmap_write_unlock(mm);
3053 if (populate)
3054 mm_populate(ret, populate);
3055 if (!IS_ERR_VALUE(ret))
3056 ret = 0;
3057 return ret;
3058}
3059
3060
3061
3062
3063
3064
3065static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
3066{
3067 struct mm_struct *mm = current->mm;
3068 struct vm_area_struct *vma, *prev;
3069 struct rb_node **rb_link, *rb_parent;
3070 pgoff_t pgoff = addr >> PAGE_SHIFT;
3071 int error;
3072 unsigned long mapped_addr;
3073
3074
3075 if ((flags & (~VM_EXEC)) != 0)
3076 return -EINVAL;
3077 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
3078
3079 mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
3080 if (IS_ERR_VALUE(mapped_addr))
3081 return mapped_addr;
3082
3083 error = mlock_future_check(mm, mm->def_flags, len);
3084 if (error)
3085 return error;
3086
3087
3088 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
3089 return -ENOMEM;
3090
3091
3092 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
3093 return -ENOMEM;
3094
3095 if (mm->map_count > sysctl_max_map_count)
3096 return -ENOMEM;
3097
3098 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
3099 return -ENOMEM;
3100
3101
3102 vma = vma_merge(mm, prev, addr, addr + len, flags,
3103 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
3104 if (vma)
3105 goto out;
3106
3107
3108
3109
3110 vma = vm_area_alloc(mm);
3111 if (!vma) {
3112 vm_unacct_memory(len >> PAGE_SHIFT);
3113 return -ENOMEM;
3114 }
3115
3116 vma_set_anonymous(vma);
3117 vma->vm_start = addr;
3118 vma->vm_end = addr + len;
3119 vma->vm_pgoff = pgoff;
3120 vma->vm_flags = flags;
3121 vma->vm_page_prot = vm_get_page_prot(flags);
3122 vma_link(mm, vma, prev, rb_link, rb_parent);
3123out:
3124 perf_event_mmap(vma);
3125 mm->total_vm += len >> PAGE_SHIFT;
3126 mm->data_vm += len >> PAGE_SHIFT;
3127 if (flags & VM_LOCKED)
3128 mm->locked_vm += (len >> PAGE_SHIFT);
3129 vma->vm_flags |= VM_SOFTDIRTY;
3130 return 0;
3131}
3132
3133int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3134{
3135 struct mm_struct *mm = current->mm;
3136 unsigned long len;
3137 int ret;
3138 bool populate;
3139 LIST_HEAD(uf);
3140
3141 len = PAGE_ALIGN(request);
3142 if (len < request)
3143 return -ENOMEM;
3144 if (!len)
3145 return 0;
3146
3147 if (mmap_write_lock_killable(mm))
3148 return -EINTR;
3149
3150 ret = do_brk_flags(addr, len, flags, &uf);
3151 populate = ((mm->def_flags & VM_LOCKED) != 0);
3152 mmap_write_unlock(mm);
3153 userfaultfd_unmap_complete(mm, &uf);
3154 if (populate && !ret)
3155 mm_populate(addr, len);
3156 return ret;
3157}
3158EXPORT_SYMBOL(vm_brk_flags);
3159
3160int vm_brk(unsigned long addr, unsigned long len)
3161{
3162 return vm_brk_flags(addr, len, 0);
3163}
3164EXPORT_SYMBOL(vm_brk);
3165
3166
3167void exit_mmap(struct mm_struct *mm)
3168{
3169 struct mmu_gather tlb;
3170 struct vm_area_struct *vma;
3171 unsigned long nr_accounted = 0;
3172
3173
3174 mmu_notifier_release(mm);
3175
3176 if (unlikely(mm_is_oom_victim(mm))) {
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193 (void)__oom_reap_task_mm(mm);
3194
3195 set_bit(MMF_OOM_SKIP, &mm->flags);
3196 mmap_write_lock(mm);
3197 mmap_write_unlock(mm);
3198 }
3199
3200 if (mm->locked_vm) {
3201 vma = mm->mmap;
3202 while (vma) {
3203 if (vma->vm_flags & VM_LOCKED)
3204 munlock_vma_pages_all(vma);
3205 vma = vma->vm_next;
3206 }
3207 }
3208
3209 arch_exit_mmap(mm);
3210
3211 vma = mm->mmap;
3212 if (!vma)
3213 return;
3214
3215 lru_add_drain();
3216 flush_cache_mm(mm);
3217 tlb_gather_mmu(&tlb, mm, 0, -1);
3218
3219
3220 unmap_vmas(&tlb, vma, 0, -1);
3221 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3222 tlb_finish_mmu(&tlb, 0, -1);
3223
3224
3225
3226
3227
3228 while (vma) {
3229 if (vma->vm_flags & VM_ACCOUNT)
3230 nr_accounted += vma_pages(vma);
3231 vma = remove_vma(vma);
3232 cond_resched();
3233 }
3234 vm_unacct_memory(nr_accounted);
3235}
3236
3237
3238
3239
3240
3241int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3242{
3243 struct vm_area_struct *prev;
3244 struct rb_node **rb_link, *rb_parent;
3245
3246 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3247 &prev, &rb_link, &rb_parent))
3248 return -ENOMEM;
3249 if ((vma->vm_flags & VM_ACCOUNT) &&
3250 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3251 return -ENOMEM;
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265 if (vma_is_anonymous(vma)) {
3266 BUG_ON(vma->anon_vma);
3267 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3268 }
3269
3270 vma_link(mm, vma, prev, rb_link, rb_parent);
3271 return 0;
3272}
3273
3274
3275
3276
3277
3278struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3279 unsigned long addr, unsigned long len, pgoff_t pgoff,
3280 bool *need_rmap_locks)
3281{
3282 struct vm_area_struct *vma = *vmap;
3283 unsigned long vma_start = vma->vm_start;
3284 struct mm_struct *mm = vma->vm_mm;
3285 struct vm_area_struct *new_vma, *prev;
3286 struct rb_node **rb_link, *rb_parent;
3287 bool faulted_in_anon_vma = true;
3288
3289
3290
3291
3292
3293 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3294 pgoff = addr >> PAGE_SHIFT;
3295 faulted_in_anon_vma = false;
3296 }
3297
3298 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3299 return NULL;
3300 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3301 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3302 vma->vm_userfaultfd_ctx);
3303 if (new_vma) {
3304
3305
3306
3307 if (unlikely(vma_start >= new_vma->vm_start &&
3308 vma_start < new_vma->vm_end)) {
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3322 *vmap = vma = new_vma;
3323 }
3324 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3325 } else {
3326 new_vma = vm_area_dup(vma);
3327 if (!new_vma)
3328 goto out;
3329 new_vma->vm_start = addr;
3330 new_vma->vm_end = addr + len;
3331 new_vma->vm_pgoff = pgoff;
3332 if (vma_dup_policy(vma, new_vma))
3333 goto out_free_vma;
3334 if (anon_vma_clone(new_vma, vma))
3335 goto out_free_mempol;
3336 if (new_vma->vm_file)
3337 get_file(new_vma->vm_file);
3338 if (new_vma->vm_ops && new_vma->vm_ops->open)
3339 new_vma->vm_ops->open(new_vma);
3340 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3341 *need_rmap_locks = false;
3342 }
3343 return new_vma;
3344
3345out_free_mempol:
3346 mpol_put(vma_policy(new_vma));
3347out_free_vma:
3348 vm_area_free(new_vma);
3349out:
3350 return NULL;
3351}
3352
3353
3354
3355
3356
3357bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3358{
3359 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3360 return false;
3361
3362 if (is_data_mapping(flags) &&
3363 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3364
3365 if (rlimit(RLIMIT_DATA) == 0 &&
3366 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3367 return true;
3368
3369 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3370 current->comm, current->pid,
3371 (mm->data_vm + npages) << PAGE_SHIFT,
3372 rlimit(RLIMIT_DATA),
3373 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3374
3375 if (!ignore_rlimit_data)
3376 return false;
3377 }
3378
3379 return true;
3380}
3381
3382void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3383{
3384 mm->total_vm += npages;
3385
3386 if (is_exec_mapping(flags))
3387 mm->exec_vm += npages;
3388 else if (is_stack_mapping(flags))
3389 mm->stack_vm += npages;
3390 else if (is_data_mapping(flags))
3391 mm->data_vm += npages;
3392}
3393
3394static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3395
3396
3397
3398
3399static void special_mapping_close(struct vm_area_struct *vma)
3400{
3401}
3402
3403static const char *special_mapping_name(struct vm_area_struct *vma)
3404{
3405 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3406}
3407
3408static int special_mapping_mremap(struct vm_area_struct *new_vma)
3409{
3410 struct vm_special_mapping *sm = new_vma->vm_private_data;
3411
3412 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3413 return -EFAULT;
3414
3415 if (sm->mremap)
3416 return sm->mremap(sm, new_vma);
3417
3418 return 0;
3419}
3420
3421static const struct vm_operations_struct special_mapping_vmops = {
3422 .close = special_mapping_close,
3423 .fault = special_mapping_fault,
3424 .mremap = special_mapping_mremap,
3425 .name = special_mapping_name,
3426
3427 .access = NULL,
3428};
3429
3430static const struct vm_operations_struct legacy_special_mapping_vmops = {
3431 .close = special_mapping_close,
3432 .fault = special_mapping_fault,
3433};
3434
3435static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3436{
3437 struct vm_area_struct *vma = vmf->vma;
3438 pgoff_t pgoff;
3439 struct page **pages;
3440
3441 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3442 pages = vma->vm_private_data;
3443 } else {
3444 struct vm_special_mapping *sm = vma->vm_private_data;
3445
3446 if (sm->fault)
3447 return sm->fault(sm, vmf->vma, vmf);
3448
3449 pages = sm->pages;
3450 }
3451
3452 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3453 pgoff--;
3454
3455 if (*pages) {
3456 struct page *page = *pages;
3457 get_page(page);
3458 vmf->page = page;
3459 return 0;
3460 }
3461
3462 return VM_FAULT_SIGBUS;
3463}
3464
3465static struct vm_area_struct *__install_special_mapping(
3466 struct mm_struct *mm,
3467 unsigned long addr, unsigned long len,
3468 unsigned long vm_flags, void *priv,
3469 const struct vm_operations_struct *ops)
3470{
3471 int ret;
3472 struct vm_area_struct *vma;
3473
3474 vma = vm_area_alloc(mm);
3475 if (unlikely(vma == NULL))
3476 return ERR_PTR(-ENOMEM);
3477
3478 vma->vm_start = addr;
3479 vma->vm_end = addr + len;
3480
3481 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3482 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3483
3484 vma->vm_ops = ops;
3485 vma->vm_private_data = priv;
3486
3487 ret = insert_vm_struct(mm, vma);
3488 if (ret)
3489 goto out;
3490
3491 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3492
3493 perf_event_mmap(vma);
3494
3495 return vma;
3496
3497out:
3498 vm_area_free(vma);
3499 return ERR_PTR(ret);
3500}
3501
3502bool vma_is_special_mapping(const struct vm_area_struct *vma,
3503 const struct vm_special_mapping *sm)
3504{
3505 return vma->vm_private_data == sm &&
3506 (vma->vm_ops == &special_mapping_vmops ||
3507 vma->vm_ops == &legacy_special_mapping_vmops);
3508}
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519struct vm_area_struct *_install_special_mapping(
3520 struct mm_struct *mm,
3521 unsigned long addr, unsigned long len,
3522 unsigned long vm_flags, const struct vm_special_mapping *spec)
3523{
3524 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3525 &special_mapping_vmops);
3526}
3527
3528int install_special_mapping(struct mm_struct *mm,
3529 unsigned long addr, unsigned long len,
3530 unsigned long vm_flags, struct page **pages)
3531{
3532 struct vm_area_struct *vma = __install_special_mapping(
3533 mm, addr, len, vm_flags, (void *)pages,
3534 &legacy_special_mapping_vmops);
3535
3536 return PTR_ERR_OR_ZERO(vma);
3537}
3538
3539static DEFINE_MUTEX(mm_all_locks_mutex);
3540
3541static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3542{
3543 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3544
3545
3546
3547
3548 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558 if (__test_and_set_bit(0, (unsigned long *)
3559 &anon_vma->root->rb_root.rb_root.rb_node))
3560 BUG();
3561 }
3562}
3563
3564static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3565{
3566 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3577 BUG();
3578 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
3579 }
3580}
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619int mm_take_all_locks(struct mm_struct *mm)
3620{
3621 struct vm_area_struct *vma;
3622 struct anon_vma_chain *avc;
3623
3624 BUG_ON(mmap_read_trylock(mm));
3625
3626 mutex_lock(&mm_all_locks_mutex);
3627
3628 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3629 if (signal_pending(current))
3630 goto out_unlock;
3631 if (vma->vm_file && vma->vm_file->f_mapping &&
3632 is_vm_hugetlb_page(vma))
3633 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3634 }
3635
3636 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3637 if (signal_pending(current))
3638 goto out_unlock;
3639 if (vma->vm_file && vma->vm_file->f_mapping &&
3640 !is_vm_hugetlb_page(vma))
3641 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3642 }
3643
3644 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3645 if (signal_pending(current))
3646 goto out_unlock;
3647 if (vma->anon_vma)
3648 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3649 vm_lock_anon_vma(mm, avc->anon_vma);
3650 }
3651
3652 return 0;
3653
3654out_unlock:
3655 mm_drop_all_locks(mm);
3656 return -EINTR;
3657}
3658
3659static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3660{
3661 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674 if (!__test_and_clear_bit(0, (unsigned long *)
3675 &anon_vma->root->rb_root.rb_root.rb_node))
3676 BUG();
3677 anon_vma_unlock_write(anon_vma);
3678 }
3679}
3680
3681static void vm_unlock_mapping(struct address_space *mapping)
3682{
3683 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3684
3685
3686
3687
3688 i_mmap_unlock_write(mapping);
3689 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3690 &mapping->flags))
3691 BUG();
3692 }
3693}
3694
3695
3696
3697
3698
3699void mm_drop_all_locks(struct mm_struct *mm)
3700{
3701 struct vm_area_struct *vma;
3702 struct anon_vma_chain *avc;
3703
3704 BUG_ON(mmap_read_trylock(mm));
3705 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3706
3707 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3708 if (vma->anon_vma)
3709 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3710 vm_unlock_anon_vma(avc->anon_vma);
3711 if (vma->vm_file && vma->vm_file->f_mapping)
3712 vm_unlock_mapping(vma->vm_file->f_mapping);
3713 }
3714
3715 mutex_unlock(&mm_all_locks_mutex);
3716}
3717
3718
3719
3720
3721void __init mmap_init(void)
3722{
3723 int ret;
3724
3725 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3726 VM_BUG_ON(ret);
3727}
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739static int init_user_reserve(void)
3740{
3741 unsigned long free_kbytes;
3742
3743 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3744
3745 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3746 return 0;
3747}
3748subsys_initcall(init_user_reserve);
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760static int init_admin_reserve(void)
3761{
3762 unsigned long free_kbytes;
3763
3764 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3765
3766 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3767 return 0;
3768}
3769subsys_initcall(init_admin_reserve);
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789static int reserve_mem_notifier(struct notifier_block *nb,
3790 unsigned long action, void *data)
3791{
3792 unsigned long tmp, free_kbytes;
3793
3794 switch (action) {
3795 case MEM_ONLINE:
3796
3797 tmp = sysctl_user_reserve_kbytes;
3798 if (0 < tmp && tmp < (1UL << 17))
3799 init_user_reserve();
3800
3801
3802 tmp = sysctl_admin_reserve_kbytes;
3803 if (0 < tmp && tmp < (1UL << 13))
3804 init_admin_reserve();
3805
3806 break;
3807 case MEM_OFFLINE:
3808 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3809
3810 if (sysctl_user_reserve_kbytes > free_kbytes) {
3811 init_user_reserve();
3812 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3813 sysctl_user_reserve_kbytes);
3814 }
3815
3816 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3817 init_admin_reserve();
3818 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3819 sysctl_admin_reserve_kbytes);
3820 }
3821 break;
3822 default:
3823 break;
3824 }
3825 return NOTIFY_OK;
3826}
3827
3828static struct notifier_block reserve_mem_nb = {
3829 .notifier_call = reserve_mem_notifier,
3830};
3831
3832static int __meminit init_reserve_notifier(void)
3833{
3834 if (register_hotmemory_notifier(&reserve_mem_nb))
3835 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3836
3837 return 0;
3838}
3839subsys_initcall(init_reserve_notifier);
3840