1
2
3
4
5
6
7
8
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/backing-dev.h>
15#include <linux/mm.h>
16#include <linux/mm_inline.h>
17#include <linux/vmacache.h>
18#include <linux/shm.h>
19#include <linux/mman.h>
20#include <linux/pagemap.h>
21#include <linux/swap.h>
22#include <linux/syscalls.h>
23#include <linux/capability.h>
24#include <linux/init.h>
25#include <linux/file.h>
26#include <linux/fs.h>
27#include <linux/personality.h>
28#include <linux/security.h>
29#include <linux/hugetlb.h>
30#include <linux/shmem_fs.h>
31#include <linux/profile.h>
32#include <linux/export.h>
33#include <linux/mount.h>
34#include <linux/mempolicy.h>
35#include <linux/rmap.h>
36#include <linux/mmu_notifier.h>
37#include <linux/mmdebug.h>
38#include <linux/perf_event.h>
39#include <linux/audit.h>
40#include <linux/khugepaged.h>
41#include <linux/uprobes.h>
42#include <linux/rbtree_augmented.h>
43#include <linux/notifier.h>
44#include <linux/memory.h>
45#include <linux/printk.h>
46#include <linux/userfaultfd_k.h>
47#include <linux/moduleparam.h>
48#include <linux/pkeys.h>
49#include <linux/oom.h>
50#include <linux/sched/mm.h>
51
52#include <linux/uaccess.h>
53#include <asm/cacheflush.h>
54#include <asm/tlb.h>
55#include <asm/mmu_context.h>
56
57#define CREATE_TRACE_POINTS
58#include <trace/events/mmap.h>
59
60#include "internal.h"
61
62#ifndef arch_mmap_check
63#define arch_mmap_check(addr, len, flags) (0)
64#endif
65
66#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
67const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
68const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
69int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
70#endif
71#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
72const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
73const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
74int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
75#endif
76
77static bool ignore_rlimit_data;
78core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
79
80static void unmap_region(struct mm_struct *mm,
81 struct vm_area_struct *vma, struct vm_area_struct *prev,
82 unsigned long start, unsigned long end);
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104pgprot_t protection_map[16] __ro_after_init = {
105 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
106 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
107};
108
109#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
110static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
111{
112 return prot;
113}
114#endif
115
116pgprot_t vm_get_page_prot(unsigned long vm_flags)
117{
118 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
119 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
120 pgprot_val(arch_vm_get_page_prot(vm_flags)));
121
122 return arch_filter_pgprot(ret);
123}
124EXPORT_SYMBOL(vm_get_page_prot);
125
126static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
127{
128 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
129}
130
131
132void vma_set_page_prot(struct vm_area_struct *vma)
133{
134 unsigned long vm_flags = vma->vm_flags;
135 pgprot_t vm_page_prot;
136
137 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
138 if (vma_wants_writenotify(vma, vm_page_prot)) {
139 vm_flags &= ~VM_SHARED;
140 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
141 }
142
143 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
144}
145
146
147
148
149static void __remove_shared_vm_struct(struct vm_area_struct *vma,
150 struct file *file, struct address_space *mapping)
151{
152 if (vma->vm_flags & VM_SHARED)
153 mapping_unmap_writable(mapping);
154
155 flush_dcache_mmap_lock(mapping);
156 vma_interval_tree_remove(vma, &mapping->i_mmap);
157 flush_dcache_mmap_unlock(mapping);
158}
159
160
161
162
163
164void unlink_file_vma(struct vm_area_struct *vma)
165{
166 struct file *file = vma->vm_file;
167
168 if (file) {
169 struct address_space *mapping = file->f_mapping;
170 i_mmap_lock_write(mapping);
171 __remove_shared_vm_struct(vma, file, mapping);
172 i_mmap_unlock_write(mapping);
173 }
174}
175
176
177
178
179static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
180{
181 struct vm_area_struct *next = vma->vm_next;
182
183 might_sleep();
184 if (vma->vm_ops && vma->vm_ops->close)
185 vma->vm_ops->close(vma);
186 if (vma->vm_file)
187 fput(vma->vm_file);
188 mpol_put(vma_policy(vma));
189 vm_area_free(vma);
190 return next;
191}
192
193static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
194 struct list_head *uf);
195SYSCALL_DEFINE1(brk, unsigned long, brk)
196{
197 unsigned long newbrk, oldbrk, origbrk;
198 struct mm_struct *mm = current->mm;
199 struct vm_area_struct *next;
200 unsigned long min_brk;
201 bool populate;
202 bool downgraded = false;
203 LIST_HEAD(uf);
204
205 if (mmap_write_lock_killable(mm))
206 return -EINTR;
207
208 origbrk = mm->brk;
209
210#ifdef CONFIG_COMPAT_BRK
211
212
213
214
215
216 if (current->brk_randomized)
217 min_brk = mm->start_brk;
218 else
219 min_brk = mm->end_data;
220#else
221 min_brk = mm->start_brk;
222#endif
223 if (brk < min_brk)
224 goto out;
225
226
227
228
229
230
231
232 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
233 mm->end_data, mm->start_data))
234 goto out;
235
236 newbrk = PAGE_ALIGN(brk);
237 oldbrk = PAGE_ALIGN(mm->brk);
238 if (oldbrk == newbrk) {
239 mm->brk = brk;
240 goto success;
241 }
242
243
244
245
246
247 if (brk <= mm->brk) {
248 int ret;
249
250
251
252
253
254
255 mm->brk = brk;
256 ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
257 if (ret < 0) {
258 mm->brk = origbrk;
259 goto out;
260 } else if (ret == 1) {
261 downgraded = true;
262 }
263 goto success;
264 }
265
266
267 next = find_vma(mm, oldbrk);
268 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
269 goto out;
270
271
272 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
273 goto out;
274 mm->brk = brk;
275
276success:
277 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
278 if (downgraded)
279 mmap_read_unlock(mm);
280 else
281 mmap_write_unlock(mm);
282 userfaultfd_unmap_complete(mm, &uf);
283 if (populate)
284 mm_populate(oldbrk, newbrk - oldbrk);
285 return brk;
286
287out:
288 mmap_write_unlock(mm);
289 return origbrk;
290}
291
292static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
293{
294 unsigned long gap, prev_end;
295
296
297
298
299
300
301
302 gap = vm_start_gap(vma);
303 if (vma->vm_prev) {
304 prev_end = vm_end_gap(vma->vm_prev);
305 if (gap > prev_end)
306 gap -= prev_end;
307 else
308 gap = 0;
309 }
310 return gap;
311}
312
313#ifdef CONFIG_DEBUG_VM_RB
314static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
315{
316 unsigned long max = vma_compute_gap(vma), subtree_gap;
317 if (vma->vm_rb.rb_left) {
318 subtree_gap = rb_entry(vma->vm_rb.rb_left,
319 struct vm_area_struct, vm_rb)->rb_subtree_gap;
320 if (subtree_gap > max)
321 max = subtree_gap;
322 }
323 if (vma->vm_rb.rb_right) {
324 subtree_gap = rb_entry(vma->vm_rb.rb_right,
325 struct vm_area_struct, vm_rb)->rb_subtree_gap;
326 if (subtree_gap > max)
327 max = subtree_gap;
328 }
329 return max;
330}
331
332static int browse_rb(struct mm_struct *mm)
333{
334 struct rb_root *root = &mm->mm_rb;
335 int i = 0, j, bug = 0;
336 struct rb_node *nd, *pn = NULL;
337 unsigned long prev = 0, pend = 0;
338
339 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
340 struct vm_area_struct *vma;
341 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
342 if (vma->vm_start < prev) {
343 pr_emerg("vm_start %lx < prev %lx\n",
344 vma->vm_start, prev);
345 bug = 1;
346 }
347 if (vma->vm_start < pend) {
348 pr_emerg("vm_start %lx < pend %lx\n",
349 vma->vm_start, pend);
350 bug = 1;
351 }
352 if (vma->vm_start > vma->vm_end) {
353 pr_emerg("vm_start %lx > vm_end %lx\n",
354 vma->vm_start, vma->vm_end);
355 bug = 1;
356 }
357 spin_lock(&mm->page_table_lock);
358 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
359 pr_emerg("free gap %lx, correct %lx\n",
360 vma->rb_subtree_gap,
361 vma_compute_subtree_gap(vma));
362 bug = 1;
363 }
364 spin_unlock(&mm->page_table_lock);
365 i++;
366 pn = nd;
367 prev = vma->vm_start;
368 pend = vma->vm_end;
369 }
370 j = 0;
371 for (nd = pn; nd; nd = rb_prev(nd))
372 j++;
373 if (i != j) {
374 pr_emerg("backwards %d, forwards %d\n", j, i);
375 bug = 1;
376 }
377 return bug ? -1 : i;
378}
379
380static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
381{
382 struct rb_node *nd;
383
384 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
385 struct vm_area_struct *vma;
386 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
387 VM_BUG_ON_VMA(vma != ignore &&
388 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
389 vma);
390 }
391}
392
393static void validate_mm(struct mm_struct *mm)
394{
395 int bug = 0;
396 int i = 0;
397 unsigned long highest_address = 0;
398 struct vm_area_struct *vma = mm->mmap;
399
400 while (vma) {
401 struct anon_vma *anon_vma = vma->anon_vma;
402 struct anon_vma_chain *avc;
403
404 if (anon_vma) {
405 anon_vma_lock_read(anon_vma);
406 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
407 anon_vma_interval_tree_verify(avc);
408 anon_vma_unlock_read(anon_vma);
409 }
410
411 highest_address = vm_end_gap(vma);
412 vma = vma->vm_next;
413 i++;
414 }
415 if (i != mm->map_count) {
416 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
417 bug = 1;
418 }
419 if (highest_address != mm->highest_vm_end) {
420 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
421 mm->highest_vm_end, highest_address);
422 bug = 1;
423 }
424 i = browse_rb(mm);
425 if (i != mm->map_count) {
426 if (i != -1)
427 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
428 bug = 1;
429 }
430 VM_BUG_ON_MM(bug, mm);
431}
432#else
433#define validate_mm_rb(root, ignore) do { } while (0)
434#define validate_mm(mm) do { } while (0)
435#endif
436
437RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
438 struct vm_area_struct, vm_rb,
439 unsigned long, rb_subtree_gap, vma_compute_gap)
440
441
442
443
444
445
446static void vma_gap_update(struct vm_area_struct *vma)
447{
448
449
450
451
452 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
453}
454
455static inline void vma_rb_insert(struct vm_area_struct *vma,
456 struct rb_root *root)
457{
458
459 validate_mm_rb(root, NULL);
460
461 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
462}
463
464static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
465{
466
467
468
469
470
471 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
472}
473
474static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
475 struct rb_root *root,
476 struct vm_area_struct *ignore)
477{
478
479
480
481
482
483
484
485
486
487 validate_mm_rb(root, ignore);
488
489 __vma_rb_erase(vma, root);
490}
491
492static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
493 struct rb_root *root)
494{
495 vma_rb_erase_ignore(vma, root, vma);
496}
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512static inline void
513anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
514{
515 struct anon_vma_chain *avc;
516
517 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
518 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
519}
520
521static inline void
522anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
523{
524 struct anon_vma_chain *avc;
525
526 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
527 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
528}
529
530static int find_vma_links(struct mm_struct *mm, unsigned long addr,
531 unsigned long end, struct vm_area_struct **pprev,
532 struct rb_node ***rb_link, struct rb_node **rb_parent)
533{
534 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
535
536 mmap_assert_locked(mm);
537 __rb_link = &mm->mm_rb.rb_node;
538 rb_prev = __rb_parent = NULL;
539
540 while (*__rb_link) {
541 struct vm_area_struct *vma_tmp;
542
543 __rb_parent = *__rb_link;
544 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
545
546 if (vma_tmp->vm_end > addr) {
547
548 if (vma_tmp->vm_start < end)
549 return -ENOMEM;
550 __rb_link = &__rb_parent->rb_left;
551 } else {
552 rb_prev = __rb_parent;
553 __rb_link = &__rb_parent->rb_right;
554 }
555 }
556
557 *pprev = NULL;
558 if (rb_prev)
559 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
560 *rb_link = __rb_link;
561 *rb_parent = __rb_parent;
562 return 0;
563}
564
565
566
567
568
569
570
571
572
573
574static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
575 struct vm_area_struct *vma)
576{
577 if (!vma)
578 return mm->mmap;
579
580 return vma->vm_next;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597static inline int
598munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
599 struct vm_area_struct **pprev, struct rb_node ***link,
600 struct rb_node **parent, struct list_head *uf)
601{
602
603 while (find_vma_links(mm, start, start + len, pprev, link, parent))
604 if (do_munmap(mm, start, len, uf))
605 return -ENOMEM;
606
607 return 0;
608}
609static unsigned long count_vma_pages_range(struct mm_struct *mm,
610 unsigned long addr, unsigned long end)
611{
612 unsigned long nr_pages = 0;
613 struct vm_area_struct *vma;
614
615
616 vma = find_vma_intersection(mm, addr, end);
617 if (!vma)
618 return 0;
619
620 nr_pages = (min(end, vma->vm_end) -
621 max(addr, vma->vm_start)) >> PAGE_SHIFT;
622
623
624 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
625 unsigned long overlap_len;
626
627 if (vma->vm_start > end)
628 break;
629
630 overlap_len = min(end, vma->vm_end) - vma->vm_start;
631 nr_pages += overlap_len >> PAGE_SHIFT;
632 }
633
634 return nr_pages;
635}
636
637void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
638 struct rb_node **rb_link, struct rb_node *rb_parent)
639{
640
641 if (vma->vm_next)
642 vma_gap_update(vma->vm_next);
643 else
644 mm->highest_vm_end = vm_end_gap(vma);
645
646
647
648
649
650
651
652
653
654
655 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
656 vma->rb_subtree_gap = 0;
657 vma_gap_update(vma);
658 vma_rb_insert(vma, &mm->mm_rb);
659}
660
661static void __vma_link_file(struct vm_area_struct *vma)
662{
663 struct file *file;
664
665 file = vma->vm_file;
666 if (file) {
667 struct address_space *mapping = file->f_mapping;
668
669 if (vma->vm_flags & VM_SHARED)
670 mapping_allow_writable(mapping);
671
672 flush_dcache_mmap_lock(mapping);
673 vma_interval_tree_insert(vma, &mapping->i_mmap);
674 flush_dcache_mmap_unlock(mapping);
675 }
676}
677
678static void
679__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
680 struct vm_area_struct *prev, struct rb_node **rb_link,
681 struct rb_node *rb_parent)
682{
683 __vma_link_list(mm, vma, prev);
684 __vma_link_rb(mm, vma, rb_link, rb_parent);
685}
686
687static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
688 struct vm_area_struct *prev, struct rb_node **rb_link,
689 struct rb_node *rb_parent)
690{
691 struct address_space *mapping = NULL;
692
693 if (vma->vm_file) {
694 mapping = vma->vm_file->f_mapping;
695 i_mmap_lock_write(mapping);
696 }
697
698 __vma_link(mm, vma, prev, rb_link, rb_parent);
699 __vma_link_file(vma);
700
701 if (mapping)
702 i_mmap_unlock_write(mapping);
703
704 mm->map_count++;
705 validate_mm(mm);
706}
707
708
709
710
711
712static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
713{
714 struct vm_area_struct *prev;
715 struct rb_node **rb_link, *rb_parent;
716
717 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
718 &prev, &rb_link, &rb_parent))
719 BUG();
720 __vma_link(mm, vma, prev, rb_link, rb_parent);
721 mm->map_count++;
722}
723
724static __always_inline void __vma_unlink(struct mm_struct *mm,
725 struct vm_area_struct *vma,
726 struct vm_area_struct *ignore)
727{
728 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
729 __vma_unlink_list(mm, vma);
730
731 vmacache_invalidate(mm);
732}
733
734
735
736
737
738
739
740
741int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
742 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
743 struct vm_area_struct *expand)
744{
745 struct mm_struct *mm = vma->vm_mm;
746 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
747 struct address_space *mapping = NULL;
748 struct rb_root_cached *root = NULL;
749 struct anon_vma *anon_vma = NULL;
750 struct file *file = vma->vm_file;
751 bool start_changed = false, end_changed = false;
752 long adjust_next = 0;
753 int remove_next = 0;
754
755 if (next && !insert) {
756 struct vm_area_struct *exporter = NULL, *importer = NULL;
757
758 if (end >= next->vm_end) {
759
760
761
762
763
764
765 if (next == expand) {
766
767
768
769
770 VM_WARN_ON(end != next->vm_end);
771
772
773
774
775
776 remove_next = 3;
777 VM_WARN_ON(file != next->vm_file);
778 swap(vma, next);
779 } else {
780 VM_WARN_ON(expand != vma);
781
782
783
784
785 remove_next = 1 + (end > next->vm_end);
786 VM_WARN_ON(remove_next == 2 &&
787 end != next->vm_next->vm_end);
788
789 end = next->vm_end;
790 }
791
792 exporter = next;
793 importer = vma;
794
795
796
797
798
799 if (remove_next == 2 && !next->anon_vma)
800 exporter = next->vm_next;
801
802 } else if (end > next->vm_start) {
803
804
805
806
807 adjust_next = (end - next->vm_start);
808 exporter = next;
809 importer = vma;
810 VM_WARN_ON(expand != importer);
811 } else if (end < vma->vm_end) {
812
813
814
815
816
817 adjust_next = -(vma->vm_end - end);
818 exporter = vma;
819 importer = next;
820 VM_WARN_ON(expand != importer);
821 }
822
823
824
825
826
827
828 if (exporter && exporter->anon_vma && !importer->anon_vma) {
829 int error;
830
831 importer->anon_vma = exporter->anon_vma;
832 error = anon_vma_clone(importer, exporter);
833 if (error)
834 return error;
835 }
836 }
837again:
838 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
839
840 if (file) {
841 mapping = file->f_mapping;
842 root = &mapping->i_mmap;
843 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
844
845 if (adjust_next)
846 uprobe_munmap(next, next->vm_start, next->vm_end);
847
848 i_mmap_lock_write(mapping);
849 if (insert) {
850
851
852
853
854
855
856 __vma_link_file(insert);
857 }
858 }
859
860 anon_vma = vma->anon_vma;
861 if (!anon_vma && adjust_next)
862 anon_vma = next->anon_vma;
863 if (anon_vma) {
864 VM_WARN_ON(adjust_next && next->anon_vma &&
865 anon_vma != next->anon_vma);
866 anon_vma_lock_write(anon_vma);
867 anon_vma_interval_tree_pre_update_vma(vma);
868 if (adjust_next)
869 anon_vma_interval_tree_pre_update_vma(next);
870 }
871
872 if (file) {
873 flush_dcache_mmap_lock(mapping);
874 vma_interval_tree_remove(vma, root);
875 if (adjust_next)
876 vma_interval_tree_remove(next, root);
877 }
878
879 if (start != vma->vm_start) {
880 vma->vm_start = start;
881 start_changed = true;
882 }
883 if (end != vma->vm_end) {
884 vma->vm_end = end;
885 end_changed = true;
886 }
887 vma->vm_pgoff = pgoff;
888 if (adjust_next) {
889 next->vm_start += adjust_next;
890 next->vm_pgoff += adjust_next >> PAGE_SHIFT;
891 }
892
893 if (file) {
894 if (adjust_next)
895 vma_interval_tree_insert(next, root);
896 vma_interval_tree_insert(vma, root);
897 flush_dcache_mmap_unlock(mapping);
898 }
899
900 if (remove_next) {
901
902
903
904
905 if (remove_next != 3)
906 __vma_unlink(mm, next, next);
907 else
908
909
910
911
912
913
914
915
916
917 __vma_unlink(mm, next, vma);
918 if (file)
919 __remove_shared_vm_struct(next, file, mapping);
920 } else if (insert) {
921
922
923
924
925
926 __insert_vm_struct(mm, insert);
927 } else {
928 if (start_changed)
929 vma_gap_update(vma);
930 if (end_changed) {
931 if (!next)
932 mm->highest_vm_end = vm_end_gap(vma);
933 else if (!adjust_next)
934 vma_gap_update(next);
935 }
936 }
937
938 if (anon_vma) {
939 anon_vma_interval_tree_post_update_vma(vma);
940 if (adjust_next)
941 anon_vma_interval_tree_post_update_vma(next);
942 anon_vma_unlock_write(anon_vma);
943 }
944
945 if (file) {
946 i_mmap_unlock_write(mapping);
947 uprobe_mmap(vma);
948
949 if (adjust_next)
950 uprobe_mmap(next);
951 }
952
953 if (remove_next) {
954 if (file) {
955 uprobe_munmap(next, next->vm_start, next->vm_end);
956 fput(file);
957 }
958 if (next->anon_vma)
959 anon_vma_merge(vma, next);
960 mm->map_count--;
961 mpol_put(vma_policy(next));
962 vm_area_free(next);
963
964
965
966
967
968 if (remove_next != 3) {
969
970
971
972
973
974
975 next = vma->vm_next;
976 } else {
977
978
979
980
981
982
983
984
985
986
987 next = vma;
988 }
989 if (remove_next == 2) {
990 remove_next = 1;
991 end = next->vm_end;
992 goto again;
993 }
994 else if (next)
995 vma_gap_update(next);
996 else {
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
1017 }
1018 }
1019 if (insert && file)
1020 uprobe_mmap(insert);
1021
1022 validate_mm(mm);
1023
1024 return 0;
1025}
1026
1027
1028
1029
1030
1031static inline int is_mergeable_vma(struct vm_area_struct *vma,
1032 struct file *file, unsigned long vm_flags,
1033 struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
1034 struct anon_vma_name *anon_name)
1035{
1036
1037
1038
1039
1040
1041
1042
1043
1044 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1045 return 0;
1046 if (vma->vm_file != file)
1047 return 0;
1048 if (vma->vm_ops && vma->vm_ops->close)
1049 return 0;
1050 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1051 return 0;
1052 if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
1053 return 0;
1054 return 1;
1055}
1056
1057static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1058 struct anon_vma *anon_vma2,
1059 struct vm_area_struct *vma)
1060{
1061
1062
1063
1064
1065 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1066 list_is_singular(&vma->anon_vma_chain)))
1067 return 1;
1068 return anon_vma1 == anon_vma2;
1069}
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082static int
1083can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1084 struct anon_vma *anon_vma, struct file *file,
1085 pgoff_t vm_pgoff,
1086 struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
1087 struct anon_vma_name *anon_name)
1088{
1089 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
1090 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1091 if (vma->vm_pgoff == vm_pgoff)
1092 return 1;
1093 }
1094 return 0;
1095}
1096
1097
1098
1099
1100
1101
1102
1103
1104static int
1105can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1106 struct anon_vma *anon_vma, struct file *file,
1107 pgoff_t vm_pgoff,
1108 struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
1109 struct anon_vma_name *anon_name)
1110{
1111 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
1112 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1113 pgoff_t vm_pglen;
1114 vm_pglen = vma_pages(vma);
1115 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1116 return 1;
1117 }
1118 return 0;
1119}
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164struct vm_area_struct *vma_merge(struct mm_struct *mm,
1165 struct vm_area_struct *prev, unsigned long addr,
1166 unsigned long end, unsigned long vm_flags,
1167 struct anon_vma *anon_vma, struct file *file,
1168 pgoff_t pgoff, struct mempolicy *policy,
1169 struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
1170 struct anon_vma_name *anon_name)
1171{
1172 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1173 struct vm_area_struct *area, *next;
1174 int err;
1175
1176
1177
1178
1179
1180 if (vm_flags & VM_SPECIAL)
1181 return NULL;
1182
1183 next = vma_next(mm, prev);
1184 area = next;
1185 if (area && area->vm_end == end)
1186 next = next->vm_next;
1187
1188
1189 VM_WARN_ON(prev && addr <= prev->vm_start);
1190 VM_WARN_ON(area && end > area->vm_end);
1191 VM_WARN_ON(addr >= end);
1192
1193
1194
1195
1196 if (prev && prev->vm_end == addr &&
1197 mpol_equal(vma_policy(prev), policy) &&
1198 can_vma_merge_after(prev, vm_flags,
1199 anon_vma, file, pgoff,
1200 vm_userfaultfd_ctx, anon_name)) {
1201
1202
1203
1204 if (next && end == next->vm_start &&
1205 mpol_equal(policy, vma_policy(next)) &&
1206 can_vma_merge_before(next, vm_flags,
1207 anon_vma, file,
1208 pgoff+pglen,
1209 vm_userfaultfd_ctx, anon_name) &&
1210 is_mergeable_anon_vma(prev->anon_vma,
1211 next->anon_vma, NULL)) {
1212
1213 err = __vma_adjust(prev, prev->vm_start,
1214 next->vm_end, prev->vm_pgoff, NULL,
1215 prev);
1216 } else
1217 err = __vma_adjust(prev, prev->vm_start,
1218 end, prev->vm_pgoff, NULL, prev);
1219 if (err)
1220 return NULL;
1221 khugepaged_enter_vma_merge(prev, vm_flags);
1222 return prev;
1223 }
1224
1225
1226
1227
1228 if (next && end == next->vm_start &&
1229 mpol_equal(policy, vma_policy(next)) &&
1230 can_vma_merge_before(next, vm_flags,
1231 anon_vma, file, pgoff+pglen,
1232 vm_userfaultfd_ctx, anon_name)) {
1233 if (prev && addr < prev->vm_end)
1234 err = __vma_adjust(prev, prev->vm_start,
1235 addr, prev->vm_pgoff, NULL, next);
1236 else {
1237 err = __vma_adjust(area, addr, next->vm_end,
1238 next->vm_pgoff - pglen, NULL, next);
1239
1240
1241
1242
1243
1244 area = next;
1245 }
1246 if (err)
1247 return NULL;
1248 khugepaged_enter_vma_merge(area, vm_flags);
1249 return area;
1250 }
1251
1252 return NULL;
1253}
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1269{
1270 return a->vm_end == b->vm_start &&
1271 mpol_equal(vma_policy(a), vma_policy(b)) &&
1272 a->vm_file == b->vm_file &&
1273 !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
1274 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1275}
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1300{
1301 if (anon_vma_compatible(a, b)) {
1302 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1303
1304 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1305 return anon_vma;
1306 }
1307 return NULL;
1308}
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1319{
1320 struct anon_vma *anon_vma = NULL;
1321
1322
1323 if (vma->vm_next) {
1324 anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
1325 if (anon_vma)
1326 return anon_vma;
1327 }
1328
1329
1330 if (vma->vm_prev)
1331 anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343 return anon_vma;
1344}
1345
1346
1347
1348
1349
1350static inline unsigned long round_hint_to_min(unsigned long hint)
1351{
1352 hint &= PAGE_MASK;
1353 if (((void *)hint != NULL) &&
1354 (hint < mmap_min_addr))
1355 return PAGE_ALIGN(mmap_min_addr);
1356 return hint;
1357}
1358
1359int mlock_future_check(struct mm_struct *mm, unsigned long flags,
1360 unsigned long len)
1361{
1362 unsigned long locked, lock_limit;
1363
1364
1365 if (flags & VM_LOCKED) {
1366 locked = len >> PAGE_SHIFT;
1367 locked += mm->locked_vm;
1368 lock_limit = rlimit(RLIMIT_MEMLOCK);
1369 lock_limit >>= PAGE_SHIFT;
1370 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1371 return -EAGAIN;
1372 }
1373 return 0;
1374}
1375
1376static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1377{
1378 if (S_ISREG(inode->i_mode))
1379 return MAX_LFS_FILESIZE;
1380
1381 if (S_ISBLK(inode->i_mode))
1382 return MAX_LFS_FILESIZE;
1383
1384 if (S_ISSOCK(inode->i_mode))
1385 return MAX_LFS_FILESIZE;
1386
1387
1388 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1389 return 0;
1390
1391
1392 return ULONG_MAX;
1393}
1394
1395static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1396 unsigned long pgoff, unsigned long len)
1397{
1398 u64 maxsize = file_mmap_size_max(file, inode);
1399
1400 if (maxsize && len > maxsize)
1401 return false;
1402 maxsize -= len;
1403 if (pgoff > maxsize >> PAGE_SHIFT)
1404 return false;
1405 return true;
1406}
1407
1408
1409
1410
1411unsigned long do_mmap(struct file *file, unsigned long addr,
1412 unsigned long len, unsigned long prot,
1413 unsigned long flags, unsigned long pgoff,
1414 unsigned long *populate, struct list_head *uf)
1415{
1416 struct mm_struct *mm = current->mm;
1417 vm_flags_t vm_flags;
1418 int pkey = 0;
1419
1420 *populate = 0;
1421
1422 if (!len)
1423 return -EINVAL;
1424
1425
1426
1427
1428
1429
1430
1431 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1432 if (!(file && path_noexec(&file->f_path)))
1433 prot |= PROT_EXEC;
1434
1435
1436 if (flags & MAP_FIXED_NOREPLACE)
1437 flags |= MAP_FIXED;
1438
1439 if (!(flags & MAP_FIXED))
1440 addr = round_hint_to_min(addr);
1441
1442
1443 len = PAGE_ALIGN(len);
1444 if (!len)
1445 return -ENOMEM;
1446
1447
1448 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1449 return -EOVERFLOW;
1450
1451
1452 if (mm->map_count > sysctl_max_map_count)
1453 return -ENOMEM;
1454
1455
1456
1457
1458 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1459 if (IS_ERR_VALUE(addr))
1460 return addr;
1461
1462 if (flags & MAP_FIXED_NOREPLACE) {
1463 if (find_vma_intersection(mm, addr, addr + len))
1464 return -EEXIST;
1465 }
1466
1467 if (prot == PROT_EXEC) {
1468 pkey = execute_only_pkey(mm);
1469 if (pkey < 0)
1470 pkey = 0;
1471 }
1472
1473
1474
1475
1476
1477 vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1478 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1479
1480 if (flags & MAP_LOCKED)
1481 if (!can_do_mlock())
1482 return -EPERM;
1483
1484 if (mlock_future_check(mm, vm_flags, len))
1485 return -EAGAIN;
1486
1487 if (file) {
1488 struct inode *inode = file_inode(file);
1489 unsigned long flags_mask;
1490
1491 if (!file_mmap_ok(file, inode, pgoff, len))
1492 return -EOVERFLOW;
1493
1494 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1495
1496 switch (flags & MAP_TYPE) {
1497 case MAP_SHARED:
1498
1499
1500
1501
1502
1503
1504
1505 flags &= LEGACY_MAP_MASK;
1506 fallthrough;
1507 case MAP_SHARED_VALIDATE:
1508 if (flags & ~flags_mask)
1509 return -EOPNOTSUPP;
1510 if (prot & PROT_WRITE) {
1511 if (!(file->f_mode & FMODE_WRITE))
1512 return -EACCES;
1513 if (IS_SWAPFILE(file->f_mapping->host))
1514 return -ETXTBSY;
1515 }
1516
1517
1518
1519
1520
1521 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1522 return -EACCES;
1523
1524 vm_flags |= VM_SHARED | VM_MAYSHARE;
1525 if (!(file->f_mode & FMODE_WRITE))
1526 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1527 fallthrough;
1528 case MAP_PRIVATE:
1529 if (!(file->f_mode & FMODE_READ))
1530 return -EACCES;
1531 if (path_noexec(&file->f_path)) {
1532 if (vm_flags & VM_EXEC)
1533 return -EPERM;
1534 vm_flags &= ~VM_MAYEXEC;
1535 }
1536
1537 if (!file->f_op->mmap)
1538 return -ENODEV;
1539 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1540 return -EINVAL;
1541 break;
1542
1543 default:
1544 return -EINVAL;
1545 }
1546 } else {
1547 switch (flags & MAP_TYPE) {
1548 case MAP_SHARED:
1549 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1550 return -EINVAL;
1551
1552
1553
1554 pgoff = 0;
1555 vm_flags |= VM_SHARED | VM_MAYSHARE;
1556 break;
1557 case MAP_PRIVATE:
1558
1559
1560
1561 pgoff = addr >> PAGE_SHIFT;
1562 break;
1563 default:
1564 return -EINVAL;
1565 }
1566 }
1567
1568
1569
1570
1571
1572 if (flags & MAP_NORESERVE) {
1573
1574 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1575 vm_flags |= VM_NORESERVE;
1576
1577
1578 if (file && is_file_hugepages(file))
1579 vm_flags |= VM_NORESERVE;
1580 }
1581
1582 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1583 if (!IS_ERR_VALUE(addr) &&
1584 ((vm_flags & VM_LOCKED) ||
1585 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1586 *populate = len;
1587 return addr;
1588}
1589
1590unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1591 unsigned long prot, unsigned long flags,
1592 unsigned long fd, unsigned long pgoff)
1593{
1594 struct file *file = NULL;
1595 unsigned long retval;
1596
1597 if (!(flags & MAP_ANONYMOUS)) {
1598 audit_mmap_fd(fd, flags);
1599 file = fget(fd);
1600 if (!file)
1601 return -EBADF;
1602 if (is_file_hugepages(file)) {
1603 len = ALIGN(len, huge_page_size(hstate_file(file)));
1604 } else if (unlikely(flags & MAP_HUGETLB)) {
1605 retval = -EINVAL;
1606 goto out_fput;
1607 }
1608 } else if (flags & MAP_HUGETLB) {
1609 struct hstate *hs;
1610
1611 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1612 if (!hs)
1613 return -EINVAL;
1614
1615 len = ALIGN(len, huge_page_size(hs));
1616
1617
1618
1619
1620 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1621 VM_NORESERVE,
1622 HUGETLB_ANONHUGE_INODE,
1623 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1624 if (IS_ERR(file))
1625 return PTR_ERR(file);
1626 }
1627
1628 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1629out_fput:
1630 if (file)
1631 fput(file);
1632 return retval;
1633}
1634
1635SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1636 unsigned long, prot, unsigned long, flags,
1637 unsigned long, fd, unsigned long, pgoff)
1638{
1639 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1640}
1641
1642#ifdef __ARCH_WANT_SYS_OLD_MMAP
1643struct mmap_arg_struct {
1644 unsigned long addr;
1645 unsigned long len;
1646 unsigned long prot;
1647 unsigned long flags;
1648 unsigned long fd;
1649 unsigned long offset;
1650};
1651
1652SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1653{
1654 struct mmap_arg_struct a;
1655
1656 if (copy_from_user(&a, arg, sizeof(a)))
1657 return -EFAULT;
1658 if (offset_in_page(a.offset))
1659 return -EINVAL;
1660
1661 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1662 a.offset >> PAGE_SHIFT);
1663}
1664#endif
1665
1666
1667
1668
1669
1670
1671
1672int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1673{
1674 vm_flags_t vm_flags = vma->vm_flags;
1675 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1676
1677
1678 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1679 return 0;
1680
1681
1682 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1683 return 1;
1684
1685
1686
1687 if (pgprot_val(vm_page_prot) !=
1688 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1689 return 0;
1690
1691
1692 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1693 return 1;
1694
1695
1696 if (vm_flags & VM_PFNMAP)
1697 return 0;
1698
1699
1700 return vma->vm_file && vma->vm_file->f_mapping &&
1701 mapping_can_writeback(vma->vm_file->f_mapping);
1702}
1703
1704
1705
1706
1707
1708static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1709{
1710
1711
1712
1713
1714 if (file && is_file_hugepages(file))
1715 return 0;
1716
1717 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1718}
1719
1720unsigned long mmap_region(struct file *file, unsigned long addr,
1721 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1722 struct list_head *uf)
1723{
1724 struct mm_struct *mm = current->mm;
1725 struct vm_area_struct *vma, *prev, *merge;
1726 int error;
1727 struct rb_node **rb_link, *rb_parent;
1728 unsigned long charged = 0;
1729
1730
1731 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1732 unsigned long nr_pages;
1733
1734
1735
1736
1737
1738 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1739
1740 if (!may_expand_vm(mm, vm_flags,
1741 (len >> PAGE_SHIFT) - nr_pages))
1742 return -ENOMEM;
1743 }
1744
1745
1746 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
1747 return -ENOMEM;
1748
1749
1750
1751 if (accountable_mapping(file, vm_flags)) {
1752 charged = len >> PAGE_SHIFT;
1753 if (security_vm_enough_memory_mm(mm, charged))
1754 return -ENOMEM;
1755 vm_flags |= VM_ACCOUNT;
1756 }
1757
1758
1759
1760
1761 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1762 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
1763 if (vma)
1764 goto out;
1765
1766
1767
1768
1769
1770
1771 vma = vm_area_alloc(mm);
1772 if (!vma) {
1773 error = -ENOMEM;
1774 goto unacct_error;
1775 }
1776
1777 vma->vm_start = addr;
1778 vma->vm_end = addr + len;
1779 vma->vm_flags = vm_flags;
1780 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1781 vma->vm_pgoff = pgoff;
1782
1783 if (file) {
1784 if (vm_flags & VM_SHARED) {
1785 error = mapping_map_writable(file->f_mapping);
1786 if (error)
1787 goto free_vma;
1788 }
1789
1790 vma->vm_file = get_file(file);
1791 error = call_mmap(file, vma);
1792 if (error)
1793 goto unmap_and_free_vma;
1794
1795
1796
1797
1798
1799
1800
1801
1802 WARN_ON_ONCE(addr != vma->vm_start);
1803
1804 addr = vma->vm_start;
1805
1806
1807
1808
1809 if (unlikely(vm_flags != vma->vm_flags && prev)) {
1810 merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
1811 NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
1812 if (merge) {
1813
1814
1815
1816
1817 fput(vma->vm_file);
1818 vm_area_free(vma);
1819 vma = merge;
1820
1821 vm_flags = vma->vm_flags;
1822 goto unmap_writable;
1823 }
1824 }
1825
1826 vm_flags = vma->vm_flags;
1827 } else if (vm_flags & VM_SHARED) {
1828 error = shmem_zero_setup(vma);
1829 if (error)
1830 goto free_vma;
1831 } else {
1832 vma_set_anonymous(vma);
1833 }
1834
1835
1836 if (!arch_validate_flags(vma->vm_flags)) {
1837 error = -EINVAL;
1838 if (file)
1839 goto unmap_and_free_vma;
1840 else
1841 goto free_vma;
1842 }
1843
1844 vma_link(mm, vma, prev, rb_link, rb_parent);
1845
1846unmap_writable:
1847 if (file && vm_flags & VM_SHARED)
1848 mapping_unmap_writable(file->f_mapping);
1849 file = vma->vm_file;
1850out:
1851 perf_event_mmap(vma);
1852
1853 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1854 if (vm_flags & VM_LOCKED) {
1855 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1856 is_vm_hugetlb_page(vma) ||
1857 vma == get_gate_vma(current->mm))
1858 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1859 else
1860 mm->locked_vm += (len >> PAGE_SHIFT);
1861 }
1862
1863 if (file)
1864 uprobe_mmap(vma);
1865
1866
1867
1868
1869
1870
1871
1872
1873 vma->vm_flags |= VM_SOFTDIRTY;
1874
1875 vma_set_page_prot(vma);
1876
1877 return addr;
1878
1879unmap_and_free_vma:
1880 fput(vma->vm_file);
1881 vma->vm_file = NULL;
1882
1883
1884 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1885 charged = 0;
1886 if (vm_flags & VM_SHARED)
1887 mapping_unmap_writable(file->f_mapping);
1888free_vma:
1889 vm_area_free(vma);
1890unacct_error:
1891 if (charged)
1892 vm_unacct_memory(charged);
1893 return error;
1894}
1895
1896static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1897{
1898
1899
1900
1901
1902
1903
1904
1905
1906 struct mm_struct *mm = current->mm;
1907 struct vm_area_struct *vma;
1908 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1909
1910
1911 length = info->length + info->align_mask;
1912 if (length < info->length)
1913 return -ENOMEM;
1914
1915
1916 if (info->high_limit < length)
1917 return -ENOMEM;
1918 high_limit = info->high_limit - length;
1919
1920 if (info->low_limit > high_limit)
1921 return -ENOMEM;
1922 low_limit = info->low_limit + length;
1923
1924
1925 if (RB_EMPTY_ROOT(&mm->mm_rb))
1926 goto check_highest;
1927 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1928 if (vma->rb_subtree_gap < length)
1929 goto check_highest;
1930
1931 while (true) {
1932
1933 gap_end = vm_start_gap(vma);
1934 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1935 struct vm_area_struct *left =
1936 rb_entry(vma->vm_rb.rb_left,
1937 struct vm_area_struct, vm_rb);
1938 if (left->rb_subtree_gap >= length) {
1939 vma = left;
1940 continue;
1941 }
1942 }
1943
1944 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1945check_current:
1946
1947 if (gap_start > high_limit)
1948 return -ENOMEM;
1949 if (gap_end >= low_limit &&
1950 gap_end > gap_start && gap_end - gap_start >= length)
1951 goto found;
1952
1953
1954 if (vma->vm_rb.rb_right) {
1955 struct vm_area_struct *right =
1956 rb_entry(vma->vm_rb.rb_right,
1957 struct vm_area_struct, vm_rb);
1958 if (right->rb_subtree_gap >= length) {
1959 vma = right;
1960 continue;
1961 }
1962 }
1963
1964
1965 while (true) {
1966 struct rb_node *prev = &vma->vm_rb;
1967 if (!rb_parent(prev))
1968 goto check_highest;
1969 vma = rb_entry(rb_parent(prev),
1970 struct vm_area_struct, vm_rb);
1971 if (prev == vma->vm_rb.rb_left) {
1972 gap_start = vm_end_gap(vma->vm_prev);
1973 gap_end = vm_start_gap(vma);
1974 goto check_current;
1975 }
1976 }
1977 }
1978
1979check_highest:
1980
1981 gap_start = mm->highest_vm_end;
1982 gap_end = ULONG_MAX;
1983 if (gap_start > high_limit)
1984 return -ENOMEM;
1985
1986found:
1987
1988 if (gap_start < info->low_limit)
1989 gap_start = info->low_limit;
1990
1991
1992 gap_start += (info->align_offset - gap_start) & info->align_mask;
1993
1994 VM_BUG_ON(gap_start + info->length > info->high_limit);
1995 VM_BUG_ON(gap_start + info->length > gap_end);
1996 return gap_start;
1997}
1998
1999static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
2000{
2001 struct mm_struct *mm = current->mm;
2002 struct vm_area_struct *vma;
2003 unsigned long length, low_limit, high_limit, gap_start, gap_end;
2004
2005
2006 length = info->length + info->align_mask;
2007 if (length < info->length)
2008 return -ENOMEM;
2009
2010
2011
2012
2013
2014 gap_end = info->high_limit;
2015 if (gap_end < length)
2016 return -ENOMEM;
2017 high_limit = gap_end - length;
2018
2019 if (info->low_limit > high_limit)
2020 return -ENOMEM;
2021 low_limit = info->low_limit + length;
2022
2023
2024 gap_start = mm->highest_vm_end;
2025 if (gap_start <= high_limit)
2026 goto found_highest;
2027
2028
2029 if (RB_EMPTY_ROOT(&mm->mm_rb))
2030 return -ENOMEM;
2031 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2032 if (vma->rb_subtree_gap < length)
2033 return -ENOMEM;
2034
2035 while (true) {
2036
2037 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2038 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2039 struct vm_area_struct *right =
2040 rb_entry(vma->vm_rb.rb_right,
2041 struct vm_area_struct, vm_rb);
2042 if (right->rb_subtree_gap >= length) {
2043 vma = right;
2044 continue;
2045 }
2046 }
2047
2048check_current:
2049
2050 gap_end = vm_start_gap(vma);
2051 if (gap_end < low_limit)
2052 return -ENOMEM;
2053 if (gap_start <= high_limit &&
2054 gap_end > gap_start && gap_end - gap_start >= length)
2055 goto found;
2056
2057
2058 if (vma->vm_rb.rb_left) {
2059 struct vm_area_struct *left =
2060 rb_entry(vma->vm_rb.rb_left,
2061 struct vm_area_struct, vm_rb);
2062 if (left->rb_subtree_gap >= length) {
2063 vma = left;
2064 continue;
2065 }
2066 }
2067
2068
2069 while (true) {
2070 struct rb_node *prev = &vma->vm_rb;
2071 if (!rb_parent(prev))
2072 return -ENOMEM;
2073 vma = rb_entry(rb_parent(prev),
2074 struct vm_area_struct, vm_rb);
2075 if (prev == vma->vm_rb.rb_right) {
2076 gap_start = vma->vm_prev ?
2077 vm_end_gap(vma->vm_prev) : 0;
2078 goto check_current;
2079 }
2080 }
2081 }
2082
2083found:
2084
2085 if (gap_end > info->high_limit)
2086 gap_end = info->high_limit;
2087
2088found_highest:
2089
2090 gap_end -= info->length;
2091 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2092
2093 VM_BUG_ON(gap_end < info->low_limit);
2094 VM_BUG_ON(gap_end < gap_start);
2095 return gap_end;
2096}
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
2108{
2109 unsigned long addr;
2110
2111 if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
2112 addr = unmapped_area_topdown(info);
2113 else
2114 addr = unmapped_area(info);
2115
2116 trace_vm_unmapped_area(addr, info);
2117 return addr;
2118}
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131#ifndef HAVE_ARCH_UNMAPPED_AREA
2132unsigned long
2133arch_get_unmapped_area(struct file *filp, unsigned long addr,
2134 unsigned long len, unsigned long pgoff, unsigned long flags)
2135{
2136 struct mm_struct *mm = current->mm;
2137 struct vm_area_struct *vma, *prev;
2138 struct vm_unmapped_area_info info;
2139 const unsigned long mmap_end = arch_get_mmap_end(addr);
2140
2141 if (len > mmap_end - mmap_min_addr)
2142 return -ENOMEM;
2143
2144 if (flags & MAP_FIXED)
2145 return addr;
2146
2147 if (addr) {
2148 addr = PAGE_ALIGN(addr);
2149 vma = find_vma_prev(mm, addr, &prev);
2150 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2151 (!vma || addr + len <= vm_start_gap(vma)) &&
2152 (!prev || addr >= vm_end_gap(prev)))
2153 return addr;
2154 }
2155
2156 info.flags = 0;
2157 info.length = len;
2158 info.low_limit = mm->mmap_base;
2159 info.high_limit = mmap_end;
2160 info.align_mask = 0;
2161 info.align_offset = 0;
2162 return vm_unmapped_area(&info);
2163}
2164#endif
2165
2166
2167
2168
2169
2170#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2171unsigned long
2172arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
2173 unsigned long len, unsigned long pgoff,
2174 unsigned long flags)
2175{
2176 struct vm_area_struct *vma, *prev;
2177 struct mm_struct *mm = current->mm;
2178 struct vm_unmapped_area_info info;
2179 const unsigned long mmap_end = arch_get_mmap_end(addr);
2180
2181
2182 if (len > mmap_end - mmap_min_addr)
2183 return -ENOMEM;
2184
2185 if (flags & MAP_FIXED)
2186 return addr;
2187
2188
2189 if (addr) {
2190 addr = PAGE_ALIGN(addr);
2191 vma = find_vma_prev(mm, addr, &prev);
2192 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2193 (!vma || addr + len <= vm_start_gap(vma)) &&
2194 (!prev || addr >= vm_end_gap(prev)))
2195 return addr;
2196 }
2197
2198 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2199 info.length = len;
2200 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2201 info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
2202 info.align_mask = 0;
2203 info.align_offset = 0;
2204 addr = vm_unmapped_area(&info);
2205
2206
2207
2208
2209
2210
2211
2212 if (offset_in_page(addr)) {
2213 VM_BUG_ON(addr != -ENOMEM);
2214 info.flags = 0;
2215 info.low_limit = TASK_UNMAPPED_BASE;
2216 info.high_limit = mmap_end;
2217 addr = vm_unmapped_area(&info);
2218 }
2219
2220 return addr;
2221}
2222#endif
2223
2224unsigned long
2225get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2226 unsigned long pgoff, unsigned long flags)
2227{
2228 unsigned long (*get_area)(struct file *, unsigned long,
2229 unsigned long, unsigned long, unsigned long);
2230
2231 unsigned long error = arch_mmap_check(addr, len, flags);
2232 if (error)
2233 return error;
2234
2235
2236 if (len > TASK_SIZE)
2237 return -ENOMEM;
2238
2239 get_area = current->mm->get_unmapped_area;
2240 if (file) {
2241 if (file->f_op->get_unmapped_area)
2242 get_area = file->f_op->get_unmapped_area;
2243 } else if (flags & MAP_SHARED) {
2244
2245
2246
2247
2248
2249 pgoff = 0;
2250 get_area = shmem_get_unmapped_area;
2251 }
2252
2253 addr = get_area(file, addr, len, pgoff, flags);
2254 if (IS_ERR_VALUE(addr))
2255 return addr;
2256
2257 if (addr > TASK_SIZE - len)
2258 return -ENOMEM;
2259 if (offset_in_page(addr))
2260 return -EINVAL;
2261
2262 error = security_mmap_addr(addr);
2263 return error ? error : addr;
2264}
2265
2266EXPORT_SYMBOL(get_unmapped_area);
2267
2268
2269struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2270{
2271 struct rb_node *rb_node;
2272 struct vm_area_struct *vma;
2273
2274 mmap_assert_locked(mm);
2275
2276 vma = vmacache_find(mm, addr);
2277 if (likely(vma))
2278 return vma;
2279
2280 rb_node = mm->mm_rb.rb_node;
2281
2282 while (rb_node) {
2283 struct vm_area_struct *tmp;
2284
2285 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2286
2287 if (tmp->vm_end > addr) {
2288 vma = tmp;
2289 if (tmp->vm_start <= addr)
2290 break;
2291 rb_node = rb_node->rb_left;
2292 } else
2293 rb_node = rb_node->rb_right;
2294 }
2295
2296 if (vma)
2297 vmacache_update(addr, vma);
2298 return vma;
2299}
2300
2301EXPORT_SYMBOL(find_vma);
2302
2303
2304
2305
2306struct vm_area_struct *
2307find_vma_prev(struct mm_struct *mm, unsigned long addr,
2308 struct vm_area_struct **pprev)
2309{
2310 struct vm_area_struct *vma;
2311
2312 vma = find_vma(mm, addr);
2313 if (vma) {
2314 *pprev = vma->vm_prev;
2315 } else {
2316 struct rb_node *rb_node = rb_last(&mm->mm_rb);
2317
2318 *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
2319 }
2320 return vma;
2321}
2322
2323
2324
2325
2326
2327
2328static int acct_stack_growth(struct vm_area_struct *vma,
2329 unsigned long size, unsigned long grow)
2330{
2331 struct mm_struct *mm = vma->vm_mm;
2332 unsigned long new_start;
2333
2334
2335 if (!may_expand_vm(mm, vma->vm_flags, grow))
2336 return -ENOMEM;
2337
2338
2339 if (size > rlimit(RLIMIT_STACK))
2340 return -ENOMEM;
2341
2342
2343 if (vma->vm_flags & VM_LOCKED) {
2344 unsigned long locked;
2345 unsigned long limit;
2346 locked = mm->locked_vm + grow;
2347 limit = rlimit(RLIMIT_MEMLOCK);
2348 limit >>= PAGE_SHIFT;
2349 if (locked > limit && !capable(CAP_IPC_LOCK))
2350 return -ENOMEM;
2351 }
2352
2353
2354 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2355 vma->vm_end - size;
2356 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2357 return -EFAULT;
2358
2359
2360
2361
2362
2363 if (security_vm_enough_memory_mm(mm, grow))
2364 return -ENOMEM;
2365
2366 return 0;
2367}
2368
2369#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2370
2371
2372
2373
2374int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2375{
2376 struct mm_struct *mm = vma->vm_mm;
2377 struct vm_area_struct *next;
2378 unsigned long gap_addr;
2379 int error = 0;
2380
2381 if (!(vma->vm_flags & VM_GROWSUP))
2382 return -EFAULT;
2383
2384
2385 address &= PAGE_MASK;
2386 if (address >= (TASK_SIZE & PAGE_MASK))
2387 return -ENOMEM;
2388 address += PAGE_SIZE;
2389
2390
2391 gap_addr = address + stack_guard_gap;
2392
2393
2394 if (gap_addr < address || gap_addr > TASK_SIZE)
2395 gap_addr = TASK_SIZE;
2396
2397 next = vma->vm_next;
2398 if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
2399 if (!(next->vm_flags & VM_GROWSUP))
2400 return -ENOMEM;
2401
2402 }
2403
2404
2405 if (unlikely(anon_vma_prepare(vma)))
2406 return -ENOMEM;
2407
2408
2409
2410
2411
2412
2413 anon_vma_lock_write(vma->anon_vma);
2414
2415
2416 if (address > vma->vm_end) {
2417 unsigned long size, grow;
2418
2419 size = address - vma->vm_start;
2420 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2421
2422 error = -ENOMEM;
2423 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2424 error = acct_stack_growth(vma, size, grow);
2425 if (!error) {
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437 spin_lock(&mm->page_table_lock);
2438 if (vma->vm_flags & VM_LOCKED)
2439 mm->locked_vm += grow;
2440 vm_stat_account(mm, vma->vm_flags, grow);
2441 anon_vma_interval_tree_pre_update_vma(vma);
2442 vma->vm_end = address;
2443 anon_vma_interval_tree_post_update_vma(vma);
2444 if (vma->vm_next)
2445 vma_gap_update(vma->vm_next);
2446 else
2447 mm->highest_vm_end = vm_end_gap(vma);
2448 spin_unlock(&mm->page_table_lock);
2449
2450 perf_event_mmap(vma);
2451 }
2452 }
2453 }
2454 anon_vma_unlock_write(vma->anon_vma);
2455 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2456 validate_mm(mm);
2457 return error;
2458}
2459#endif
2460
2461
2462
2463
2464int expand_downwards(struct vm_area_struct *vma,
2465 unsigned long address)
2466{
2467 struct mm_struct *mm = vma->vm_mm;
2468 struct vm_area_struct *prev;
2469 int error = 0;
2470
2471 address &= PAGE_MASK;
2472 if (address < mmap_min_addr)
2473 return -EPERM;
2474
2475
2476 prev = vma->vm_prev;
2477
2478 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2479 vma_is_accessible(prev)) {
2480 if (address - prev->vm_end < stack_guard_gap)
2481 return -ENOMEM;
2482 }
2483
2484
2485 if (unlikely(anon_vma_prepare(vma)))
2486 return -ENOMEM;
2487
2488
2489
2490
2491
2492
2493 anon_vma_lock_write(vma->anon_vma);
2494
2495
2496 if (address < vma->vm_start) {
2497 unsigned long size, grow;
2498
2499 size = vma->vm_end - address;
2500 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2501
2502 error = -ENOMEM;
2503 if (grow <= vma->vm_pgoff) {
2504 error = acct_stack_growth(vma, size, grow);
2505 if (!error) {
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517 spin_lock(&mm->page_table_lock);
2518 if (vma->vm_flags & VM_LOCKED)
2519 mm->locked_vm += grow;
2520 vm_stat_account(mm, vma->vm_flags, grow);
2521 anon_vma_interval_tree_pre_update_vma(vma);
2522 vma->vm_start = address;
2523 vma->vm_pgoff -= grow;
2524 anon_vma_interval_tree_post_update_vma(vma);
2525 vma_gap_update(vma);
2526 spin_unlock(&mm->page_table_lock);
2527
2528 perf_event_mmap(vma);
2529 }
2530 }
2531 }
2532 anon_vma_unlock_write(vma->anon_vma);
2533 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2534 validate_mm(mm);
2535 return error;
2536}
2537
2538
2539unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2540
2541static int __init cmdline_parse_stack_guard_gap(char *p)
2542{
2543 unsigned long val;
2544 char *endptr;
2545
2546 val = simple_strtoul(p, &endptr, 10);
2547 if (!*endptr)
2548 stack_guard_gap = val << PAGE_SHIFT;
2549
2550 return 1;
2551}
2552__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2553
2554#ifdef CONFIG_STACK_GROWSUP
2555int expand_stack(struct vm_area_struct *vma, unsigned long address)
2556{
2557 return expand_upwards(vma, address);
2558}
2559
2560struct vm_area_struct *
2561find_extend_vma(struct mm_struct *mm, unsigned long addr)
2562{
2563 struct vm_area_struct *vma, *prev;
2564
2565 addr &= PAGE_MASK;
2566 vma = find_vma_prev(mm, addr, &prev);
2567 if (vma && (vma->vm_start <= addr))
2568 return vma;
2569
2570 if (!prev || expand_stack(prev, addr))
2571 return NULL;
2572 if (prev->vm_flags & VM_LOCKED)
2573 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2574 return prev;
2575}
2576#else
2577int expand_stack(struct vm_area_struct *vma, unsigned long address)
2578{
2579 return expand_downwards(vma, address);
2580}
2581
2582struct vm_area_struct *
2583find_extend_vma(struct mm_struct *mm, unsigned long addr)
2584{
2585 struct vm_area_struct *vma;
2586 unsigned long start;
2587
2588 addr &= PAGE_MASK;
2589 vma = find_vma(mm, addr);
2590 if (!vma)
2591 return NULL;
2592 if (vma->vm_start <= addr)
2593 return vma;
2594 if (!(vma->vm_flags & VM_GROWSDOWN))
2595 return NULL;
2596 start = vma->vm_start;
2597 if (expand_stack(vma, addr))
2598 return NULL;
2599 if (vma->vm_flags & VM_LOCKED)
2600 populate_vma_page_range(vma, addr, start, NULL);
2601 return vma;
2602}
2603#endif
2604
2605EXPORT_SYMBOL_GPL(find_extend_vma);
2606
2607
2608
2609
2610
2611
2612
2613static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2614{
2615 unsigned long nr_accounted = 0;
2616
2617
2618 update_hiwater_vm(mm);
2619 do {
2620 long nrpages = vma_pages(vma);
2621
2622 if (vma->vm_flags & VM_ACCOUNT)
2623 nr_accounted += nrpages;
2624 vm_stat_account(mm, vma->vm_flags, -nrpages);
2625 vma = remove_vma(vma);
2626 } while (vma);
2627 vm_unacct_memory(nr_accounted);
2628 validate_mm(mm);
2629}
2630
2631
2632
2633
2634
2635
2636static void unmap_region(struct mm_struct *mm,
2637 struct vm_area_struct *vma, struct vm_area_struct *prev,
2638 unsigned long start, unsigned long end)
2639{
2640 struct vm_area_struct *next = vma_next(mm, prev);
2641 struct mmu_gather tlb;
2642
2643 lru_add_drain();
2644 tlb_gather_mmu(&tlb, mm);
2645 update_hiwater_rss(mm);
2646 unmap_vmas(&tlb, vma, start, end);
2647 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2648 next ? next->vm_start : USER_PGTABLES_CEILING);
2649 tlb_finish_mmu(&tlb);
2650}
2651
2652
2653
2654
2655
2656static bool
2657detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2658 struct vm_area_struct *prev, unsigned long end)
2659{
2660 struct vm_area_struct **insertion_point;
2661 struct vm_area_struct *tail_vma = NULL;
2662
2663 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2664 vma->vm_prev = NULL;
2665 do {
2666 vma_rb_erase(vma, &mm->mm_rb);
2667 if (vma->vm_flags & VM_LOCKED)
2668 mm->locked_vm -= vma_pages(vma);
2669 mm->map_count--;
2670 tail_vma = vma;
2671 vma = vma->vm_next;
2672 } while (vma && vma->vm_start < end);
2673 *insertion_point = vma;
2674 if (vma) {
2675 vma->vm_prev = prev;
2676 vma_gap_update(vma);
2677 } else
2678 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2679 tail_vma->vm_next = NULL;
2680
2681
2682 vmacache_invalidate(mm);
2683
2684
2685
2686
2687
2688
2689 if (vma && (vma->vm_flags & VM_GROWSDOWN))
2690 return false;
2691 if (prev && (prev->vm_flags & VM_GROWSUP))
2692 return false;
2693 return true;
2694}
2695
2696
2697
2698
2699
2700int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2701 unsigned long addr, int new_below)
2702{
2703 struct vm_area_struct *new;
2704 int err;
2705
2706 if (vma->vm_ops && vma->vm_ops->may_split) {
2707 err = vma->vm_ops->may_split(vma, addr);
2708 if (err)
2709 return err;
2710 }
2711
2712 new = vm_area_dup(vma);
2713 if (!new)
2714 return -ENOMEM;
2715
2716 if (new_below)
2717 new->vm_end = addr;
2718 else {
2719 new->vm_start = addr;
2720 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2721 }
2722
2723 err = vma_dup_policy(vma, new);
2724 if (err)
2725 goto out_free_vma;
2726
2727 err = anon_vma_clone(new, vma);
2728 if (err)
2729 goto out_free_mpol;
2730
2731 if (new->vm_file)
2732 get_file(new->vm_file);
2733
2734 if (new->vm_ops && new->vm_ops->open)
2735 new->vm_ops->open(new);
2736
2737 if (new_below)
2738 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2739 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2740 else
2741 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2742
2743
2744 if (!err)
2745 return 0;
2746
2747
2748 if (new->vm_ops && new->vm_ops->close)
2749 new->vm_ops->close(new);
2750 if (new->vm_file)
2751 fput(new->vm_file);
2752 unlink_anon_vmas(new);
2753 out_free_mpol:
2754 mpol_put(vma_policy(new));
2755 out_free_vma:
2756 vm_area_free(new);
2757 return err;
2758}
2759
2760
2761
2762
2763
2764int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2765 unsigned long addr, int new_below)
2766{
2767 if (mm->map_count >= sysctl_max_map_count)
2768 return -ENOMEM;
2769
2770 return __split_vma(mm, vma, addr, new_below);
2771}
2772
2773
2774
2775
2776
2777
2778int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2779 struct list_head *uf, bool downgrade)
2780{
2781 unsigned long end;
2782 struct vm_area_struct *vma, *prev, *last;
2783
2784 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2785 return -EINVAL;
2786
2787 len = PAGE_ALIGN(len);
2788 end = start + len;
2789 if (len == 0)
2790 return -EINVAL;
2791
2792
2793
2794
2795
2796
2797 arch_unmap(mm, start, end);
2798
2799
2800 vma = find_vma_intersection(mm, start, end);
2801 if (!vma)
2802 return 0;
2803 prev = vma->vm_prev;
2804
2805
2806
2807
2808
2809
2810
2811
2812 if (start > vma->vm_start) {
2813 int error;
2814
2815
2816
2817
2818
2819
2820 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2821 return -ENOMEM;
2822
2823 error = __split_vma(mm, vma, start, 0);
2824 if (error)
2825 return error;
2826 prev = vma;
2827 }
2828
2829
2830 last = find_vma(mm, end);
2831 if (last && end > last->vm_start) {
2832 int error = __split_vma(mm, last, end, 1);
2833 if (error)
2834 return error;
2835 }
2836 vma = vma_next(mm, prev);
2837
2838 if (unlikely(uf)) {
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2849 if (error)
2850 return error;
2851 }
2852
2853
2854 if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
2855 downgrade = false;
2856
2857 if (downgrade)
2858 mmap_write_downgrade(mm);
2859
2860 unmap_region(mm, vma, prev, start, end);
2861
2862
2863 remove_vma_list(mm, vma);
2864
2865 return downgrade ? 1 : 0;
2866}
2867
2868int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2869 struct list_head *uf)
2870{
2871 return __do_munmap(mm, start, len, uf, false);
2872}
2873
2874static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2875{
2876 int ret;
2877 struct mm_struct *mm = current->mm;
2878 LIST_HEAD(uf);
2879
2880 if (mmap_write_lock_killable(mm))
2881 return -EINTR;
2882
2883 ret = __do_munmap(mm, start, len, &uf, downgrade);
2884
2885
2886
2887
2888
2889 if (ret == 1) {
2890 mmap_read_unlock(mm);
2891 ret = 0;
2892 } else
2893 mmap_write_unlock(mm);
2894
2895 userfaultfd_unmap_complete(mm, &uf);
2896 return ret;
2897}
2898
2899int vm_munmap(unsigned long start, size_t len)
2900{
2901 return __vm_munmap(start, len, false);
2902}
2903EXPORT_SYMBOL(vm_munmap);
2904
2905SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2906{
2907 addr = untagged_addr(addr);
2908 return __vm_munmap(addr, len, true);
2909}
2910
2911
2912
2913
2914
2915SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2916 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2917{
2918
2919 struct mm_struct *mm = current->mm;
2920 struct vm_area_struct *vma;
2921 unsigned long populate = 0;
2922 unsigned long ret = -EINVAL;
2923 struct file *file;
2924
2925 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2926 current->comm, current->pid);
2927
2928 if (prot)
2929 return ret;
2930 start = start & PAGE_MASK;
2931 size = size & PAGE_MASK;
2932
2933 if (start + size <= start)
2934 return ret;
2935
2936
2937 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2938 return ret;
2939
2940 if (mmap_write_lock_killable(mm))
2941 return -EINTR;
2942
2943 vma = vma_lookup(mm, start);
2944
2945 if (!vma || !(vma->vm_flags & VM_SHARED))
2946 goto out;
2947
2948 if (start + size > vma->vm_end) {
2949 struct vm_area_struct *next;
2950
2951 for (next = vma->vm_next; next; next = next->vm_next) {
2952
2953 if (next->vm_start != next->vm_prev->vm_end)
2954 goto out;
2955
2956 if (next->vm_file != vma->vm_file)
2957 goto out;
2958
2959 if (next->vm_flags != vma->vm_flags)
2960 goto out;
2961
2962 if (start + size <= next->vm_end)
2963 break;
2964 }
2965
2966 if (!next)
2967 goto out;
2968 }
2969
2970 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2971 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2972 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2973
2974 flags &= MAP_NONBLOCK;
2975 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2976 if (vma->vm_flags & VM_LOCKED)
2977 flags |= MAP_LOCKED;
2978
2979 file = get_file(vma->vm_file);
2980 ret = do_mmap(vma->vm_file, start, size,
2981 prot, flags, pgoff, &populate, NULL);
2982 fput(file);
2983out:
2984 mmap_write_unlock(mm);
2985 if (populate)
2986 mm_populate(ret, populate);
2987 if (!IS_ERR_VALUE(ret))
2988 ret = 0;
2989 return ret;
2990}
2991
2992
2993
2994
2995
2996
2997static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
2998{
2999 struct mm_struct *mm = current->mm;
3000 struct vm_area_struct *vma, *prev;
3001 struct rb_node **rb_link, *rb_parent;
3002 pgoff_t pgoff = addr >> PAGE_SHIFT;
3003 int error;
3004 unsigned long mapped_addr;
3005
3006
3007 if ((flags & (~VM_EXEC)) != 0)
3008 return -EINVAL;
3009 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
3010
3011 mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
3012 if (IS_ERR_VALUE(mapped_addr))
3013 return mapped_addr;
3014
3015 error = mlock_future_check(mm, mm->def_flags, len);
3016 if (error)
3017 return error;
3018
3019
3020 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
3021 return -ENOMEM;
3022
3023
3024 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
3025 return -ENOMEM;
3026
3027 if (mm->map_count > sysctl_max_map_count)
3028 return -ENOMEM;
3029
3030 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
3031 return -ENOMEM;
3032
3033
3034 vma = vma_merge(mm, prev, addr, addr + len, flags,
3035 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
3036 if (vma)
3037 goto out;
3038
3039
3040
3041
3042 vma = vm_area_alloc(mm);
3043 if (!vma) {
3044 vm_unacct_memory(len >> PAGE_SHIFT);
3045 return -ENOMEM;
3046 }
3047
3048 vma_set_anonymous(vma);
3049 vma->vm_start = addr;
3050 vma->vm_end = addr + len;
3051 vma->vm_pgoff = pgoff;
3052 vma->vm_flags = flags;
3053 vma->vm_page_prot = vm_get_page_prot(flags);
3054 vma_link(mm, vma, prev, rb_link, rb_parent);
3055out:
3056 perf_event_mmap(vma);
3057 mm->total_vm += len >> PAGE_SHIFT;
3058 mm->data_vm += len >> PAGE_SHIFT;
3059 if (flags & VM_LOCKED)
3060 mm->locked_vm += (len >> PAGE_SHIFT);
3061 vma->vm_flags |= VM_SOFTDIRTY;
3062 return 0;
3063}
3064
3065int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3066{
3067 struct mm_struct *mm = current->mm;
3068 unsigned long len;
3069 int ret;
3070 bool populate;
3071 LIST_HEAD(uf);
3072
3073 len = PAGE_ALIGN(request);
3074 if (len < request)
3075 return -ENOMEM;
3076 if (!len)
3077 return 0;
3078
3079 if (mmap_write_lock_killable(mm))
3080 return -EINTR;
3081
3082 ret = do_brk_flags(addr, len, flags, &uf);
3083 populate = ((mm->def_flags & VM_LOCKED) != 0);
3084 mmap_write_unlock(mm);
3085 userfaultfd_unmap_complete(mm, &uf);
3086 if (populate && !ret)
3087 mm_populate(addr, len);
3088 return ret;
3089}
3090EXPORT_SYMBOL(vm_brk_flags);
3091
3092int vm_brk(unsigned long addr, unsigned long len)
3093{
3094 return vm_brk_flags(addr, len, 0);
3095}
3096EXPORT_SYMBOL(vm_brk);
3097
3098
3099void exit_mmap(struct mm_struct *mm)
3100{
3101 struct mmu_gather tlb;
3102 struct vm_area_struct *vma;
3103 unsigned long nr_accounted = 0;
3104
3105
3106 mmu_notifier_release(mm);
3107
3108 if (unlikely(mm_is_oom_victim(mm))) {
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121 (void)__oom_reap_task_mm(mm);
3122 set_bit(MMF_OOM_SKIP, &mm->flags);
3123 }
3124
3125 mmap_write_lock(mm);
3126 arch_exit_mmap(mm);
3127
3128 vma = mm->mmap;
3129 if (!vma) {
3130
3131 mmap_write_unlock(mm);
3132 return;
3133 }
3134
3135 lru_add_drain();
3136 flush_cache_mm(mm);
3137 tlb_gather_mmu_fullmm(&tlb, mm);
3138
3139
3140 unmap_vmas(&tlb, vma, 0, -1);
3141 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3142 tlb_finish_mmu(&tlb);
3143
3144
3145 while (vma) {
3146 if (vma->vm_flags & VM_ACCOUNT)
3147 nr_accounted += vma_pages(vma);
3148 vma = remove_vma(vma);
3149 cond_resched();
3150 }
3151 mm->mmap = NULL;
3152 mmap_write_unlock(mm);
3153 vm_unacct_memory(nr_accounted);
3154}
3155
3156
3157
3158
3159
3160int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3161{
3162 struct vm_area_struct *prev;
3163 struct rb_node **rb_link, *rb_parent;
3164
3165 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3166 &prev, &rb_link, &rb_parent))
3167 return -ENOMEM;
3168 if ((vma->vm_flags & VM_ACCOUNT) &&
3169 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3170 return -ENOMEM;
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184 if (vma_is_anonymous(vma)) {
3185 BUG_ON(vma->anon_vma);
3186 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3187 }
3188
3189 vma_link(mm, vma, prev, rb_link, rb_parent);
3190 return 0;
3191}
3192
3193
3194
3195
3196
3197struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3198 unsigned long addr, unsigned long len, pgoff_t pgoff,
3199 bool *need_rmap_locks)
3200{
3201 struct vm_area_struct *vma = *vmap;
3202 unsigned long vma_start = vma->vm_start;
3203 struct mm_struct *mm = vma->vm_mm;
3204 struct vm_area_struct *new_vma, *prev;
3205 struct rb_node **rb_link, *rb_parent;
3206 bool faulted_in_anon_vma = true;
3207
3208
3209
3210
3211
3212 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3213 pgoff = addr >> PAGE_SHIFT;
3214 faulted_in_anon_vma = false;
3215 }
3216
3217 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3218 return NULL;
3219 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3220 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3221 vma->vm_userfaultfd_ctx, anon_vma_name(vma));
3222 if (new_vma) {
3223
3224
3225
3226 if (unlikely(vma_start >= new_vma->vm_start &&
3227 vma_start < new_vma->vm_end)) {
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3241 *vmap = vma = new_vma;
3242 }
3243 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3244 } else {
3245 new_vma = vm_area_dup(vma);
3246 if (!new_vma)
3247 goto out;
3248 new_vma->vm_start = addr;
3249 new_vma->vm_end = addr + len;
3250 new_vma->vm_pgoff = pgoff;
3251 if (vma_dup_policy(vma, new_vma))
3252 goto out_free_vma;
3253 if (anon_vma_clone(new_vma, vma))
3254 goto out_free_mempol;
3255 if (new_vma->vm_file)
3256 get_file(new_vma->vm_file);
3257 if (new_vma->vm_ops && new_vma->vm_ops->open)
3258 new_vma->vm_ops->open(new_vma);
3259 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3260 *need_rmap_locks = false;
3261 }
3262 return new_vma;
3263
3264out_free_mempol:
3265 mpol_put(vma_policy(new_vma));
3266out_free_vma:
3267 vm_area_free(new_vma);
3268out:
3269 return NULL;
3270}
3271
3272
3273
3274
3275
3276bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3277{
3278 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3279 return false;
3280
3281 if (is_data_mapping(flags) &&
3282 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3283
3284 if (rlimit(RLIMIT_DATA) == 0 &&
3285 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3286 return true;
3287
3288 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3289 current->comm, current->pid,
3290 (mm->data_vm + npages) << PAGE_SHIFT,
3291 rlimit(RLIMIT_DATA),
3292 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3293
3294 if (!ignore_rlimit_data)
3295 return false;
3296 }
3297
3298 return true;
3299}
3300
3301void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3302{
3303 WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages);
3304
3305 if (is_exec_mapping(flags))
3306 mm->exec_vm += npages;
3307 else if (is_stack_mapping(flags))
3308 mm->stack_vm += npages;
3309 else if (is_data_mapping(flags))
3310 mm->data_vm += npages;
3311}
3312
3313static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3314
3315
3316
3317
3318static void special_mapping_close(struct vm_area_struct *vma)
3319{
3320}
3321
3322static const char *special_mapping_name(struct vm_area_struct *vma)
3323{
3324 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3325}
3326
3327static int special_mapping_mremap(struct vm_area_struct *new_vma)
3328{
3329 struct vm_special_mapping *sm = new_vma->vm_private_data;
3330
3331 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3332 return -EFAULT;
3333
3334 if (sm->mremap)
3335 return sm->mremap(sm, new_vma);
3336
3337 return 0;
3338}
3339
3340static int special_mapping_split(struct vm_area_struct *vma, unsigned long addr)
3341{
3342
3343
3344
3345
3346
3347
3348 return -EINVAL;
3349}
3350
3351static const struct vm_operations_struct special_mapping_vmops = {
3352 .close = special_mapping_close,
3353 .fault = special_mapping_fault,
3354 .mremap = special_mapping_mremap,
3355 .name = special_mapping_name,
3356
3357 .access = NULL,
3358 .may_split = special_mapping_split,
3359};
3360
3361static const struct vm_operations_struct legacy_special_mapping_vmops = {
3362 .close = special_mapping_close,
3363 .fault = special_mapping_fault,
3364};
3365
3366static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3367{
3368 struct vm_area_struct *vma = vmf->vma;
3369 pgoff_t pgoff;
3370 struct page **pages;
3371
3372 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3373 pages = vma->vm_private_data;
3374 } else {
3375 struct vm_special_mapping *sm = vma->vm_private_data;
3376
3377 if (sm->fault)
3378 return sm->fault(sm, vmf->vma, vmf);
3379
3380 pages = sm->pages;
3381 }
3382
3383 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3384 pgoff--;
3385
3386 if (*pages) {
3387 struct page *page = *pages;
3388 get_page(page);
3389 vmf->page = page;
3390 return 0;
3391 }
3392
3393 return VM_FAULT_SIGBUS;
3394}
3395
3396static struct vm_area_struct *__install_special_mapping(
3397 struct mm_struct *mm,
3398 unsigned long addr, unsigned long len,
3399 unsigned long vm_flags, void *priv,
3400 const struct vm_operations_struct *ops)
3401{
3402 int ret;
3403 struct vm_area_struct *vma;
3404
3405 vma = vm_area_alloc(mm);
3406 if (unlikely(vma == NULL))
3407 return ERR_PTR(-ENOMEM);
3408
3409 vma->vm_start = addr;
3410 vma->vm_end = addr + len;
3411
3412 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3413 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
3414 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3415
3416 vma->vm_ops = ops;
3417 vma->vm_private_data = priv;
3418
3419 ret = insert_vm_struct(mm, vma);
3420 if (ret)
3421 goto out;
3422
3423 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3424
3425 perf_event_mmap(vma);
3426
3427 return vma;
3428
3429out:
3430 vm_area_free(vma);
3431 return ERR_PTR(ret);
3432}
3433
3434bool vma_is_special_mapping(const struct vm_area_struct *vma,
3435 const struct vm_special_mapping *sm)
3436{
3437 return vma->vm_private_data == sm &&
3438 (vma->vm_ops == &special_mapping_vmops ||
3439 vma->vm_ops == &legacy_special_mapping_vmops);
3440}
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451struct vm_area_struct *_install_special_mapping(
3452 struct mm_struct *mm,
3453 unsigned long addr, unsigned long len,
3454 unsigned long vm_flags, const struct vm_special_mapping *spec)
3455{
3456 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3457 &special_mapping_vmops);
3458}
3459
3460int install_special_mapping(struct mm_struct *mm,
3461 unsigned long addr, unsigned long len,
3462 unsigned long vm_flags, struct page **pages)
3463{
3464 struct vm_area_struct *vma = __install_special_mapping(
3465 mm, addr, len, vm_flags, (void *)pages,
3466 &legacy_special_mapping_vmops);
3467
3468 return PTR_ERR_OR_ZERO(vma);
3469}
3470
3471static DEFINE_MUTEX(mm_all_locks_mutex);
3472
3473static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3474{
3475 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3476
3477
3478
3479
3480 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490 if (__test_and_set_bit(0, (unsigned long *)
3491 &anon_vma->root->rb_root.rb_root.rb_node))
3492 BUG();
3493 }
3494}
3495
3496static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3497{
3498 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3509 BUG();
3510 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
3511 }
3512}
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551int mm_take_all_locks(struct mm_struct *mm)
3552{
3553 struct vm_area_struct *vma;
3554 struct anon_vma_chain *avc;
3555
3556 BUG_ON(mmap_read_trylock(mm));
3557
3558 mutex_lock(&mm_all_locks_mutex);
3559
3560 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3561 if (signal_pending(current))
3562 goto out_unlock;
3563 if (vma->vm_file && vma->vm_file->f_mapping &&
3564 is_vm_hugetlb_page(vma))
3565 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3566 }
3567
3568 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3569 if (signal_pending(current))
3570 goto out_unlock;
3571 if (vma->vm_file && vma->vm_file->f_mapping &&
3572 !is_vm_hugetlb_page(vma))
3573 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3574 }
3575
3576 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3577 if (signal_pending(current))
3578 goto out_unlock;
3579 if (vma->anon_vma)
3580 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3581 vm_lock_anon_vma(mm, avc->anon_vma);
3582 }
3583
3584 return 0;
3585
3586out_unlock:
3587 mm_drop_all_locks(mm);
3588 return -EINTR;
3589}
3590
3591static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3592{
3593 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606 if (!__test_and_clear_bit(0, (unsigned long *)
3607 &anon_vma->root->rb_root.rb_root.rb_node))
3608 BUG();
3609 anon_vma_unlock_write(anon_vma);
3610 }
3611}
3612
3613static void vm_unlock_mapping(struct address_space *mapping)
3614{
3615 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3616
3617
3618
3619
3620 i_mmap_unlock_write(mapping);
3621 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3622 &mapping->flags))
3623 BUG();
3624 }
3625}
3626
3627
3628
3629
3630
3631void mm_drop_all_locks(struct mm_struct *mm)
3632{
3633 struct vm_area_struct *vma;
3634 struct anon_vma_chain *avc;
3635
3636 BUG_ON(mmap_read_trylock(mm));
3637 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3638
3639 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3640 if (vma->anon_vma)
3641 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3642 vm_unlock_anon_vma(avc->anon_vma);
3643 if (vma->vm_file && vma->vm_file->f_mapping)
3644 vm_unlock_mapping(vma->vm_file->f_mapping);
3645 }
3646
3647 mutex_unlock(&mm_all_locks_mutex);
3648}
3649
3650
3651
3652
3653void __init mmap_init(void)
3654{
3655 int ret;
3656
3657 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3658 VM_BUG_ON(ret);
3659}
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671static int init_user_reserve(void)
3672{
3673 unsigned long free_kbytes;
3674
3675 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3676
3677 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3678 return 0;
3679}
3680subsys_initcall(init_user_reserve);
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692static int init_admin_reserve(void)
3693{
3694 unsigned long free_kbytes;
3695
3696 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3697
3698 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3699 return 0;
3700}
3701subsys_initcall(init_admin_reserve);
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721static int reserve_mem_notifier(struct notifier_block *nb,
3722 unsigned long action, void *data)
3723{
3724 unsigned long tmp, free_kbytes;
3725
3726 switch (action) {
3727 case MEM_ONLINE:
3728
3729 tmp = sysctl_user_reserve_kbytes;
3730 if (0 < tmp && tmp < (1UL << 17))
3731 init_user_reserve();
3732
3733
3734 tmp = sysctl_admin_reserve_kbytes;
3735 if (0 < tmp && tmp < (1UL << 13))
3736 init_admin_reserve();
3737
3738 break;
3739 case MEM_OFFLINE:
3740 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3741
3742 if (sysctl_user_reserve_kbytes > free_kbytes) {
3743 init_user_reserve();
3744 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3745 sysctl_user_reserve_kbytes);
3746 }
3747
3748 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3749 init_admin_reserve();
3750 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3751 sysctl_admin_reserve_kbytes);
3752 }
3753 break;
3754 default:
3755 break;
3756 }
3757 return NOTIFY_OK;
3758}
3759
3760static struct notifier_block reserve_mem_nb = {
3761 .notifier_call = reserve_mem_notifier,
3762};
3763
3764static int __meminit init_reserve_notifier(void)
3765{
3766 if (register_hotmemory_notifier(&reserve_mem_nb))
3767 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3768
3769 return 0;
3770}
3771subsys_initcall(init_reserve_notifier);
3772