1
2
3
4
5
6
7
8
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
12#include <linux/kernel.h>
13#include <linux/slab.h>
14#include <linux/backing-dev.h>
15#include <linux/mm.h>
16#include <linux/vmacache.h>
17#include <linux/shm.h>
18#include <linux/mman.h>
19#include <linux/pagemap.h>
20#include <linux/swap.h>
21#include <linux/syscalls.h>
22#include <linux/capability.h>
23#include <linux/init.h>
24#include <linux/file.h>
25#include <linux/fs.h>
26#include <linux/personality.h>
27#include <linux/security.h>
28#include <linux/hugetlb.h>
29#include <linux/shmem_fs.h>
30#include <linux/profile.h>
31#include <linux/export.h>
32#include <linux/mount.h>
33#include <linux/mempolicy.h>
34#include <linux/rmap.h>
35#include <linux/mmu_notifier.h>
36#include <linux/mmdebug.h>
37#include <linux/perf_event.h>
38#include <linux/audit.h>
39#include <linux/khugepaged.h>
40#include <linux/uprobes.h>
41#include <linux/rbtree_augmented.h>
42#include <linux/notifier.h>
43#include <linux/memory.h>
44#include <linux/printk.h>
45#include <linux/userfaultfd_k.h>
46#include <linux/moduleparam.h>
47#include <linux/pkeys.h>
48#include <linux/oom.h>
49#include <linux/sched/mm.h>
50
51#include <linux/uaccess.h>
52#include <asm/cacheflush.h>
53#include <asm/tlb.h>
54#include <asm/mmu_context.h>
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/mmap.h>
58
59#include "internal.h"
60
61#ifndef arch_mmap_check
62#define arch_mmap_check(addr, len, flags) (0)
63#endif
64
65#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
66const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
67const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
68int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
69#endif
70#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
71const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
72const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
73int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
74#endif
75
76static bool ignore_rlimit_data;
77core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
78
79static void unmap_region(struct mm_struct *mm,
80 struct vm_area_struct *vma, struct vm_area_struct *prev,
81 unsigned long start, unsigned long end);
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103pgprot_t protection_map[16] __ro_after_init = {
104 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
105 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
106};
107
108#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
109static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
110{
111 return prot;
112}
113#endif
114
115pgprot_t vm_get_page_prot(unsigned long vm_flags)
116{
117 pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
118 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
119 pgprot_val(arch_vm_get_page_prot(vm_flags)));
120
121 return arch_filter_pgprot(ret);
122}
123EXPORT_SYMBOL(vm_get_page_prot);
124
125static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
126{
127 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
128}
129
130
131void vma_set_page_prot(struct vm_area_struct *vma)
132{
133 unsigned long vm_flags = vma->vm_flags;
134 pgprot_t vm_page_prot;
135
136 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
137 if (vma_wants_writenotify(vma, vm_page_prot)) {
138 vm_flags &= ~VM_SHARED;
139 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
140 }
141
142 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
143}
144
145
146
147
148static void __remove_shared_vm_struct(struct vm_area_struct *vma,
149 struct file *file, struct address_space *mapping)
150{
151 if (vma->vm_flags & VM_SHARED)
152 mapping_unmap_writable(mapping);
153
154 flush_dcache_mmap_lock(mapping);
155 vma_interval_tree_remove(vma, &mapping->i_mmap);
156 flush_dcache_mmap_unlock(mapping);
157}
158
159
160
161
162
163void unlink_file_vma(struct vm_area_struct *vma)
164{
165 struct file *file = vma->vm_file;
166
167 if (file) {
168 struct address_space *mapping = file->f_mapping;
169 i_mmap_lock_write(mapping);
170 __remove_shared_vm_struct(vma, file, mapping);
171 i_mmap_unlock_write(mapping);
172 }
173}
174
175
176
177
178static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
179{
180 struct vm_area_struct *next = vma->vm_next;
181
182 might_sleep();
183 if (vma->vm_ops && vma->vm_ops->close)
184 vma->vm_ops->close(vma);
185 if (vma->vm_file)
186 fput(vma->vm_file);
187 mpol_put(vma_policy(vma));
188 vm_area_free(vma);
189 return next;
190}
191
192static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
193 struct list_head *uf);
194SYSCALL_DEFINE1(brk, unsigned long, brk)
195{
196 unsigned long newbrk, oldbrk, origbrk;
197 struct mm_struct *mm = current->mm;
198 struct vm_area_struct *next;
199 unsigned long min_brk;
200 bool populate;
201 bool downgraded = false;
202 LIST_HEAD(uf);
203
204 if (mmap_write_lock_killable(mm))
205 return -EINTR;
206
207 origbrk = mm->brk;
208
209#ifdef CONFIG_COMPAT_BRK
210
211
212
213
214
215 if (current->brk_randomized)
216 min_brk = mm->start_brk;
217 else
218 min_brk = mm->end_data;
219#else
220 min_brk = mm->start_brk;
221#endif
222 if (brk < min_brk)
223 goto out;
224
225
226
227
228
229
230
231 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
232 mm->end_data, mm->start_data))
233 goto out;
234
235 newbrk = PAGE_ALIGN(brk);
236 oldbrk = PAGE_ALIGN(mm->brk);
237 if (oldbrk == newbrk) {
238 mm->brk = brk;
239 goto success;
240 }
241
242
243
244
245
246 if (brk <= mm->brk) {
247 int ret;
248
249
250
251
252
253
254 mm->brk = brk;
255 ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true);
256 if (ret < 0) {
257 mm->brk = origbrk;
258 goto out;
259 } else if (ret == 1) {
260 downgraded = true;
261 }
262 goto success;
263 }
264
265
266 next = find_vma(mm, oldbrk);
267 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
268 goto out;
269
270
271 if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
272 goto out;
273 mm->brk = brk;
274
275success:
276 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
277 if (downgraded)
278 mmap_read_unlock(mm);
279 else
280 mmap_write_unlock(mm);
281 userfaultfd_unmap_complete(mm, &uf);
282 if (populate)
283 mm_populate(oldbrk, newbrk - oldbrk);
284 return brk;
285
286out:
287 mmap_write_unlock(mm);
288 return origbrk;
289}
290
291static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
292{
293 unsigned long gap, prev_end;
294
295
296
297
298
299
300
301 gap = vm_start_gap(vma);
302 if (vma->vm_prev) {
303 prev_end = vm_end_gap(vma->vm_prev);
304 if (gap > prev_end)
305 gap -= prev_end;
306 else
307 gap = 0;
308 }
309 return gap;
310}
311
312#ifdef CONFIG_DEBUG_VM_RB
313static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
314{
315 unsigned long max = vma_compute_gap(vma), subtree_gap;
316 if (vma->vm_rb.rb_left) {
317 subtree_gap = rb_entry(vma->vm_rb.rb_left,
318 struct vm_area_struct, vm_rb)->rb_subtree_gap;
319 if (subtree_gap > max)
320 max = subtree_gap;
321 }
322 if (vma->vm_rb.rb_right) {
323 subtree_gap = rb_entry(vma->vm_rb.rb_right,
324 struct vm_area_struct, vm_rb)->rb_subtree_gap;
325 if (subtree_gap > max)
326 max = subtree_gap;
327 }
328 return max;
329}
330
331static int browse_rb(struct mm_struct *mm)
332{
333 struct rb_root *root = &mm->mm_rb;
334 int i = 0, j, bug = 0;
335 struct rb_node *nd, *pn = NULL;
336 unsigned long prev = 0, pend = 0;
337
338 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
339 struct vm_area_struct *vma;
340 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
341 if (vma->vm_start < prev) {
342 pr_emerg("vm_start %lx < prev %lx\n",
343 vma->vm_start, prev);
344 bug = 1;
345 }
346 if (vma->vm_start < pend) {
347 pr_emerg("vm_start %lx < pend %lx\n",
348 vma->vm_start, pend);
349 bug = 1;
350 }
351 if (vma->vm_start > vma->vm_end) {
352 pr_emerg("vm_start %lx > vm_end %lx\n",
353 vma->vm_start, vma->vm_end);
354 bug = 1;
355 }
356 spin_lock(&mm->page_table_lock);
357 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
358 pr_emerg("free gap %lx, correct %lx\n",
359 vma->rb_subtree_gap,
360 vma_compute_subtree_gap(vma));
361 bug = 1;
362 }
363 spin_unlock(&mm->page_table_lock);
364 i++;
365 pn = nd;
366 prev = vma->vm_start;
367 pend = vma->vm_end;
368 }
369 j = 0;
370 for (nd = pn; nd; nd = rb_prev(nd))
371 j++;
372 if (i != j) {
373 pr_emerg("backwards %d, forwards %d\n", j, i);
374 bug = 1;
375 }
376 return bug ? -1 : i;
377}
378
379static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
380{
381 struct rb_node *nd;
382
383 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
384 struct vm_area_struct *vma;
385 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
386 VM_BUG_ON_VMA(vma != ignore &&
387 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
388 vma);
389 }
390}
391
392static void validate_mm(struct mm_struct *mm)
393{
394 int bug = 0;
395 int i = 0;
396 unsigned long highest_address = 0;
397 struct vm_area_struct *vma = mm->mmap;
398
399 while (vma) {
400 struct anon_vma *anon_vma = vma->anon_vma;
401 struct anon_vma_chain *avc;
402
403 if (anon_vma) {
404 anon_vma_lock_read(anon_vma);
405 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
406 anon_vma_interval_tree_verify(avc);
407 anon_vma_unlock_read(anon_vma);
408 }
409
410 highest_address = vm_end_gap(vma);
411 vma = vma->vm_next;
412 i++;
413 }
414 if (i != mm->map_count) {
415 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
416 bug = 1;
417 }
418 if (highest_address != mm->highest_vm_end) {
419 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
420 mm->highest_vm_end, highest_address);
421 bug = 1;
422 }
423 i = browse_rb(mm);
424 if (i != mm->map_count) {
425 if (i != -1)
426 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
427 bug = 1;
428 }
429 VM_BUG_ON_MM(bug, mm);
430}
431#else
432#define validate_mm_rb(root, ignore) do { } while (0)
433#define validate_mm(mm) do { } while (0)
434#endif
435
436RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
437 struct vm_area_struct, vm_rb,
438 unsigned long, rb_subtree_gap, vma_compute_gap)
439
440
441
442
443
444
445static void vma_gap_update(struct vm_area_struct *vma)
446{
447
448
449
450
451 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
452}
453
454static inline void vma_rb_insert(struct vm_area_struct *vma,
455 struct rb_root *root)
456{
457
458 validate_mm_rb(root, NULL);
459
460 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
461}
462
463static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
464{
465
466
467
468
469
470 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
471}
472
473static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
474 struct rb_root *root,
475 struct vm_area_struct *ignore)
476{
477
478
479
480
481
482
483
484
485
486 validate_mm_rb(root, ignore);
487
488 __vma_rb_erase(vma, root);
489}
490
491static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
492 struct rb_root *root)
493{
494 vma_rb_erase_ignore(vma, root, vma);
495}
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511static inline void
512anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
513{
514 struct anon_vma_chain *avc;
515
516 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
517 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
518}
519
520static inline void
521anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
522{
523 struct anon_vma_chain *avc;
524
525 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
526 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
527}
528
529static int find_vma_links(struct mm_struct *mm, unsigned long addr,
530 unsigned long end, struct vm_area_struct **pprev,
531 struct rb_node ***rb_link, struct rb_node **rb_parent)
532{
533 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
534
535 mmap_assert_locked(mm);
536 __rb_link = &mm->mm_rb.rb_node;
537 rb_prev = __rb_parent = NULL;
538
539 while (*__rb_link) {
540 struct vm_area_struct *vma_tmp;
541
542 __rb_parent = *__rb_link;
543 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
544
545 if (vma_tmp->vm_end > addr) {
546
547 if (vma_tmp->vm_start < end)
548 return -ENOMEM;
549 __rb_link = &__rb_parent->rb_left;
550 } else {
551 rb_prev = __rb_parent;
552 __rb_link = &__rb_parent->rb_right;
553 }
554 }
555
556 *pprev = NULL;
557 if (rb_prev)
558 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
559 *rb_link = __rb_link;
560 *rb_parent = __rb_parent;
561 return 0;
562}
563
564
565
566
567
568
569
570
571
572
573static inline struct vm_area_struct *vma_next(struct mm_struct *mm,
574 struct vm_area_struct *vma)
575{
576 if (!vma)
577 return mm->mmap;
578
579 return vma->vm_next;
580}
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596static inline int
597munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
598 struct vm_area_struct **pprev, struct rb_node ***link,
599 struct rb_node **parent, struct list_head *uf)
600{
601
602 while (find_vma_links(mm, start, start + len, pprev, link, parent))
603 if (do_munmap(mm, start, len, uf))
604 return -ENOMEM;
605
606 return 0;
607}
608static unsigned long count_vma_pages_range(struct mm_struct *mm,
609 unsigned long addr, unsigned long end)
610{
611 unsigned long nr_pages = 0;
612 struct vm_area_struct *vma;
613
614
615 vma = find_vma_intersection(mm, addr, end);
616 if (!vma)
617 return 0;
618
619 nr_pages = (min(end, vma->vm_end) -
620 max(addr, vma->vm_start)) >> PAGE_SHIFT;
621
622
623 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
624 unsigned long overlap_len;
625
626 if (vma->vm_start > end)
627 break;
628
629 overlap_len = min(end, vma->vm_end) - vma->vm_start;
630 nr_pages += overlap_len >> PAGE_SHIFT;
631 }
632
633 return nr_pages;
634}
635
636void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
637 struct rb_node **rb_link, struct rb_node *rb_parent)
638{
639
640 if (vma->vm_next)
641 vma_gap_update(vma->vm_next);
642 else
643 mm->highest_vm_end = vm_end_gap(vma);
644
645
646
647
648
649
650
651
652
653
654 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
655 vma->rb_subtree_gap = 0;
656 vma_gap_update(vma);
657 vma_rb_insert(vma, &mm->mm_rb);
658}
659
660static void __vma_link_file(struct vm_area_struct *vma)
661{
662 struct file *file;
663
664 file = vma->vm_file;
665 if (file) {
666 struct address_space *mapping = file->f_mapping;
667
668 if (vma->vm_flags & VM_SHARED)
669 mapping_allow_writable(mapping);
670
671 flush_dcache_mmap_lock(mapping);
672 vma_interval_tree_insert(vma, &mapping->i_mmap);
673 flush_dcache_mmap_unlock(mapping);
674 }
675}
676
677static void
678__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
679 struct vm_area_struct *prev, struct rb_node **rb_link,
680 struct rb_node *rb_parent)
681{
682 __vma_link_list(mm, vma, prev);
683 __vma_link_rb(mm, vma, rb_link, rb_parent);
684}
685
686static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
687 struct vm_area_struct *prev, struct rb_node **rb_link,
688 struct rb_node *rb_parent)
689{
690 struct address_space *mapping = NULL;
691
692 if (vma->vm_file) {
693 mapping = vma->vm_file->f_mapping;
694 i_mmap_lock_write(mapping);
695 }
696
697 __vma_link(mm, vma, prev, rb_link, rb_parent);
698 __vma_link_file(vma);
699
700 if (mapping)
701 i_mmap_unlock_write(mapping);
702
703 mm->map_count++;
704 validate_mm(mm);
705}
706
707
708
709
710
711static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
712{
713 struct vm_area_struct *prev;
714 struct rb_node **rb_link, *rb_parent;
715
716 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
717 &prev, &rb_link, &rb_parent))
718 BUG();
719 __vma_link(mm, vma, prev, rb_link, rb_parent);
720 mm->map_count++;
721}
722
723static __always_inline void __vma_unlink(struct mm_struct *mm,
724 struct vm_area_struct *vma,
725 struct vm_area_struct *ignore)
726{
727 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
728 __vma_unlink_list(mm, vma);
729
730 vmacache_invalidate(mm);
731}
732
733
734
735
736
737
738
739
740int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
741 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
742 struct vm_area_struct *expand)
743{
744 struct mm_struct *mm = vma->vm_mm;
745 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
746 struct address_space *mapping = NULL;
747 struct rb_root_cached *root = NULL;
748 struct anon_vma *anon_vma = NULL;
749 struct file *file = vma->vm_file;
750 bool start_changed = false, end_changed = false;
751 long adjust_next = 0;
752 int remove_next = 0;
753
754 if (next && !insert) {
755 struct vm_area_struct *exporter = NULL, *importer = NULL;
756
757 if (end >= next->vm_end) {
758
759
760
761
762
763
764 if (next == expand) {
765
766
767
768
769 VM_WARN_ON(end != next->vm_end);
770
771
772
773
774
775 remove_next = 3;
776 VM_WARN_ON(file != next->vm_file);
777 swap(vma, next);
778 } else {
779 VM_WARN_ON(expand != vma);
780
781
782
783
784 remove_next = 1 + (end > next->vm_end);
785 VM_WARN_ON(remove_next == 2 &&
786 end != next->vm_next->vm_end);
787
788 end = next->vm_end;
789 }
790
791 exporter = next;
792 importer = vma;
793
794
795
796
797
798 if (remove_next == 2 && !next->anon_vma)
799 exporter = next->vm_next;
800
801 } else if (end > next->vm_start) {
802
803
804
805
806 adjust_next = (end - next->vm_start);
807 exporter = next;
808 importer = vma;
809 VM_WARN_ON(expand != importer);
810 } else if (end < vma->vm_end) {
811
812
813
814
815
816 adjust_next = -(vma->vm_end - end);
817 exporter = vma;
818 importer = next;
819 VM_WARN_ON(expand != importer);
820 }
821
822
823
824
825
826
827 if (exporter && exporter->anon_vma && !importer->anon_vma) {
828 int error;
829
830 importer->anon_vma = exporter->anon_vma;
831 error = anon_vma_clone(importer, exporter);
832 if (error)
833 return error;
834 }
835 }
836again:
837 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
838
839 if (file) {
840 mapping = file->f_mapping;
841 root = &mapping->i_mmap;
842 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
843
844 if (adjust_next)
845 uprobe_munmap(next, next->vm_start, next->vm_end);
846
847 i_mmap_lock_write(mapping);
848 if (insert) {
849
850
851
852
853
854
855 __vma_link_file(insert);
856 }
857 }
858
859 anon_vma = vma->anon_vma;
860 if (!anon_vma && adjust_next)
861 anon_vma = next->anon_vma;
862 if (anon_vma) {
863 VM_WARN_ON(adjust_next && next->anon_vma &&
864 anon_vma != next->anon_vma);
865 anon_vma_lock_write(anon_vma);
866 anon_vma_interval_tree_pre_update_vma(vma);
867 if (adjust_next)
868 anon_vma_interval_tree_pre_update_vma(next);
869 }
870
871 if (file) {
872 flush_dcache_mmap_lock(mapping);
873 vma_interval_tree_remove(vma, root);
874 if (adjust_next)
875 vma_interval_tree_remove(next, root);
876 }
877
878 if (start != vma->vm_start) {
879 vma->vm_start = start;
880 start_changed = true;
881 }
882 if (end != vma->vm_end) {
883 vma->vm_end = end;
884 end_changed = true;
885 }
886 vma->vm_pgoff = pgoff;
887 if (adjust_next) {
888 next->vm_start += adjust_next;
889 next->vm_pgoff += adjust_next >> PAGE_SHIFT;
890 }
891
892 if (file) {
893 if (adjust_next)
894 vma_interval_tree_insert(next, root);
895 vma_interval_tree_insert(vma, root);
896 flush_dcache_mmap_unlock(mapping);
897 }
898
899 if (remove_next) {
900
901
902
903
904 if (remove_next != 3)
905 __vma_unlink(mm, next, next);
906 else
907
908
909
910
911
912
913
914
915
916 __vma_unlink(mm, next, vma);
917 if (file)
918 __remove_shared_vm_struct(next, file, mapping);
919 } else if (insert) {
920
921
922
923
924
925 __insert_vm_struct(mm, insert);
926 } else {
927 if (start_changed)
928 vma_gap_update(vma);
929 if (end_changed) {
930 if (!next)
931 mm->highest_vm_end = vm_end_gap(vma);
932 else if (!adjust_next)
933 vma_gap_update(next);
934 }
935 }
936
937 if (anon_vma) {
938 anon_vma_interval_tree_post_update_vma(vma);
939 if (adjust_next)
940 anon_vma_interval_tree_post_update_vma(next);
941 anon_vma_unlock_write(anon_vma);
942 }
943
944 if (file) {
945 i_mmap_unlock_write(mapping);
946 uprobe_mmap(vma);
947
948 if (adjust_next)
949 uprobe_mmap(next);
950 }
951
952 if (remove_next) {
953 if (file) {
954 uprobe_munmap(next, next->vm_start, next->vm_end);
955 fput(file);
956 }
957 if (next->anon_vma)
958 anon_vma_merge(vma, next);
959 mm->map_count--;
960 mpol_put(vma_policy(next));
961 vm_area_free(next);
962
963
964
965
966
967 if (remove_next != 3) {
968
969
970
971
972
973
974 next = vma->vm_next;
975 } else {
976
977
978
979
980
981
982
983
984
985
986 next = vma;
987 }
988 if (remove_next == 2) {
989 remove_next = 1;
990 end = next->vm_end;
991 goto again;
992 }
993 else if (next)
994 vma_gap_update(next);
995 else {
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
1016 }
1017 }
1018 if (insert && file)
1019 uprobe_mmap(insert);
1020
1021 validate_mm(mm);
1022
1023 return 0;
1024}
1025
1026
1027
1028
1029
1030static inline int is_mergeable_vma(struct vm_area_struct *vma,
1031 struct file *file, unsigned long vm_flags,
1032 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1033{
1034
1035
1036
1037
1038
1039
1040
1041
1042 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1043 return 0;
1044 if (vma->vm_file != file)
1045 return 0;
1046 if (vma->vm_ops && vma->vm_ops->close)
1047 return 0;
1048 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1049 return 0;
1050 return 1;
1051}
1052
1053static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1054 struct anon_vma *anon_vma2,
1055 struct vm_area_struct *vma)
1056{
1057
1058
1059
1060
1061 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1062 list_is_singular(&vma->anon_vma_chain)))
1063 return 1;
1064 return anon_vma1 == anon_vma2;
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078static int
1079can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1080 struct anon_vma *anon_vma, struct file *file,
1081 pgoff_t vm_pgoff,
1082 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1083{
1084 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1085 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1086 if (vma->vm_pgoff == vm_pgoff)
1087 return 1;
1088 }
1089 return 0;
1090}
1091
1092
1093
1094
1095
1096
1097
1098
1099static int
1100can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1101 struct anon_vma *anon_vma, struct file *file,
1102 pgoff_t vm_pgoff,
1103 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1104{
1105 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1106 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1107 pgoff_t vm_pglen;
1108 vm_pglen = vma_pages(vma);
1109 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1110 return 1;
1111 }
1112 return 0;
1113}
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158struct vm_area_struct *vma_merge(struct mm_struct *mm,
1159 struct vm_area_struct *prev, unsigned long addr,
1160 unsigned long end, unsigned long vm_flags,
1161 struct anon_vma *anon_vma, struct file *file,
1162 pgoff_t pgoff, struct mempolicy *policy,
1163 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1164{
1165 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1166 struct vm_area_struct *area, *next;
1167 int err;
1168
1169
1170
1171
1172
1173 if (vm_flags & VM_SPECIAL)
1174 return NULL;
1175
1176 next = vma_next(mm, prev);
1177 area = next;
1178 if (area && area->vm_end == end)
1179 next = next->vm_next;
1180
1181
1182 VM_WARN_ON(prev && addr <= prev->vm_start);
1183 VM_WARN_ON(area && end > area->vm_end);
1184 VM_WARN_ON(addr >= end);
1185
1186
1187
1188
1189 if (prev && prev->vm_end == addr &&
1190 mpol_equal(vma_policy(prev), policy) &&
1191 can_vma_merge_after(prev, vm_flags,
1192 anon_vma, file, pgoff,
1193 vm_userfaultfd_ctx)) {
1194
1195
1196
1197 if (next && end == next->vm_start &&
1198 mpol_equal(policy, vma_policy(next)) &&
1199 can_vma_merge_before(next, vm_flags,
1200 anon_vma, file,
1201 pgoff+pglen,
1202 vm_userfaultfd_ctx) &&
1203 is_mergeable_anon_vma(prev->anon_vma,
1204 next->anon_vma, NULL)) {
1205
1206 err = __vma_adjust(prev, prev->vm_start,
1207 next->vm_end, prev->vm_pgoff, NULL,
1208 prev);
1209 } else
1210 err = __vma_adjust(prev, prev->vm_start,
1211 end, prev->vm_pgoff, NULL, prev);
1212 if (err)
1213 return NULL;
1214 khugepaged_enter_vma_merge(prev, vm_flags);
1215 return prev;
1216 }
1217
1218
1219
1220
1221 if (next && end == next->vm_start &&
1222 mpol_equal(policy, vma_policy(next)) &&
1223 can_vma_merge_before(next, vm_flags,
1224 anon_vma, file, pgoff+pglen,
1225 vm_userfaultfd_ctx)) {
1226 if (prev && addr < prev->vm_end)
1227 err = __vma_adjust(prev, prev->vm_start,
1228 addr, prev->vm_pgoff, NULL, next);
1229 else {
1230 err = __vma_adjust(area, addr, next->vm_end,
1231 next->vm_pgoff - pglen, NULL, next);
1232
1233
1234
1235
1236
1237 area = next;
1238 }
1239 if (err)
1240 return NULL;
1241 khugepaged_enter_vma_merge(area, vm_flags);
1242 return area;
1243 }
1244
1245 return NULL;
1246}
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1262{
1263 return a->vm_end == b->vm_start &&
1264 mpol_equal(vma_policy(a), vma_policy(b)) &&
1265 a->vm_file == b->vm_file &&
1266 !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) &&
1267 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1268}
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1293{
1294 if (anon_vma_compatible(a, b)) {
1295 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1296
1297 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1298 return anon_vma;
1299 }
1300 return NULL;
1301}
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1312{
1313 struct anon_vma *anon_vma = NULL;
1314
1315
1316 if (vma->vm_next) {
1317 anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
1318 if (anon_vma)
1319 return anon_vma;
1320 }
1321
1322
1323 if (vma->vm_prev)
1324 anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336 return anon_vma;
1337}
1338
1339
1340
1341
1342
1343static inline unsigned long round_hint_to_min(unsigned long hint)
1344{
1345 hint &= PAGE_MASK;
1346 if (((void *)hint != NULL) &&
1347 (hint < mmap_min_addr))
1348 return PAGE_ALIGN(mmap_min_addr);
1349 return hint;
1350}
1351
1352int mlock_future_check(struct mm_struct *mm, unsigned long flags,
1353 unsigned long len)
1354{
1355 unsigned long locked, lock_limit;
1356
1357
1358 if (flags & VM_LOCKED) {
1359 locked = len >> PAGE_SHIFT;
1360 locked += mm->locked_vm;
1361 lock_limit = rlimit(RLIMIT_MEMLOCK);
1362 lock_limit >>= PAGE_SHIFT;
1363 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1364 return -EAGAIN;
1365 }
1366 return 0;
1367}
1368
1369static inline u64 file_mmap_size_max(struct file *file, struct inode *inode)
1370{
1371 if (S_ISREG(inode->i_mode))
1372 return MAX_LFS_FILESIZE;
1373
1374 if (S_ISBLK(inode->i_mode))
1375 return MAX_LFS_FILESIZE;
1376
1377 if (S_ISSOCK(inode->i_mode))
1378 return MAX_LFS_FILESIZE;
1379
1380
1381 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
1382 return 0;
1383
1384
1385 return ULONG_MAX;
1386}
1387
1388static inline bool file_mmap_ok(struct file *file, struct inode *inode,
1389 unsigned long pgoff, unsigned long len)
1390{
1391 u64 maxsize = file_mmap_size_max(file, inode);
1392
1393 if (maxsize && len > maxsize)
1394 return false;
1395 maxsize -= len;
1396 if (pgoff > maxsize >> PAGE_SHIFT)
1397 return false;
1398 return true;
1399}
1400
1401
1402
1403
1404unsigned long do_mmap(struct file *file, unsigned long addr,
1405 unsigned long len, unsigned long prot,
1406 unsigned long flags, unsigned long pgoff,
1407 unsigned long *populate, struct list_head *uf)
1408{
1409 struct mm_struct *mm = current->mm;
1410 vm_flags_t vm_flags;
1411 int pkey = 0;
1412
1413 *populate = 0;
1414
1415 if (!len)
1416 return -EINVAL;
1417
1418
1419
1420
1421
1422
1423
1424 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1425 if (!(file && path_noexec(&file->f_path)))
1426 prot |= PROT_EXEC;
1427
1428
1429 if (flags & MAP_FIXED_NOREPLACE)
1430 flags |= MAP_FIXED;
1431
1432 if (!(flags & MAP_FIXED))
1433 addr = round_hint_to_min(addr);
1434
1435
1436 len = PAGE_ALIGN(len);
1437 if (!len)
1438 return -ENOMEM;
1439
1440
1441 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1442 return -EOVERFLOW;
1443
1444
1445 if (mm->map_count > sysctl_max_map_count)
1446 return -ENOMEM;
1447
1448
1449
1450
1451 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1452 if (IS_ERR_VALUE(addr))
1453 return addr;
1454
1455 if (flags & MAP_FIXED_NOREPLACE) {
1456 if (find_vma_intersection(mm, addr, addr + len))
1457 return -EEXIST;
1458 }
1459
1460 if (prot == PROT_EXEC) {
1461 pkey = execute_only_pkey(mm);
1462 if (pkey < 0)
1463 pkey = 0;
1464 }
1465
1466
1467
1468
1469
1470 vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1471 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1472
1473 if (flags & MAP_LOCKED)
1474 if (!can_do_mlock())
1475 return -EPERM;
1476
1477 if (mlock_future_check(mm, vm_flags, len))
1478 return -EAGAIN;
1479
1480 if (file) {
1481 struct inode *inode = file_inode(file);
1482 unsigned long flags_mask;
1483
1484 if (!file_mmap_ok(file, inode, pgoff, len))
1485 return -EOVERFLOW;
1486
1487 flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
1488
1489 switch (flags & MAP_TYPE) {
1490 case MAP_SHARED:
1491
1492
1493
1494
1495
1496
1497
1498 flags &= LEGACY_MAP_MASK;
1499 fallthrough;
1500 case MAP_SHARED_VALIDATE:
1501 if (flags & ~flags_mask)
1502 return -EOPNOTSUPP;
1503 if (prot & PROT_WRITE) {
1504 if (!(file->f_mode & FMODE_WRITE))
1505 return -EACCES;
1506 if (IS_SWAPFILE(file->f_mapping->host))
1507 return -ETXTBSY;
1508 }
1509
1510
1511
1512
1513
1514 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1515 return -EACCES;
1516
1517 vm_flags |= VM_SHARED | VM_MAYSHARE;
1518 if (!(file->f_mode & FMODE_WRITE))
1519 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1520 fallthrough;
1521 case MAP_PRIVATE:
1522 if (!(file->f_mode & FMODE_READ))
1523 return -EACCES;
1524 if (path_noexec(&file->f_path)) {
1525 if (vm_flags & VM_EXEC)
1526 return -EPERM;
1527 vm_flags &= ~VM_MAYEXEC;
1528 }
1529
1530 if (!file->f_op->mmap)
1531 return -ENODEV;
1532 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1533 return -EINVAL;
1534 break;
1535
1536 default:
1537 return -EINVAL;
1538 }
1539 } else {
1540 switch (flags & MAP_TYPE) {
1541 case MAP_SHARED:
1542 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1543 return -EINVAL;
1544
1545
1546
1547 pgoff = 0;
1548 vm_flags |= VM_SHARED | VM_MAYSHARE;
1549 break;
1550 case MAP_PRIVATE:
1551
1552
1553
1554 pgoff = addr >> PAGE_SHIFT;
1555 break;
1556 default:
1557 return -EINVAL;
1558 }
1559 }
1560
1561
1562
1563
1564
1565 if (flags & MAP_NORESERVE) {
1566
1567 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1568 vm_flags |= VM_NORESERVE;
1569
1570
1571 if (file && is_file_hugepages(file))
1572 vm_flags |= VM_NORESERVE;
1573 }
1574
1575 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1576 if (!IS_ERR_VALUE(addr) &&
1577 ((vm_flags & VM_LOCKED) ||
1578 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1579 *populate = len;
1580 return addr;
1581}
1582
1583unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
1584 unsigned long prot, unsigned long flags,
1585 unsigned long fd, unsigned long pgoff)
1586{
1587 struct file *file = NULL;
1588 unsigned long retval;
1589
1590 if (!(flags & MAP_ANONYMOUS)) {
1591 audit_mmap_fd(fd, flags);
1592 file = fget(fd);
1593 if (!file)
1594 return -EBADF;
1595 if (is_file_hugepages(file)) {
1596 len = ALIGN(len, huge_page_size(hstate_file(file)));
1597 } else if (unlikely(flags & MAP_HUGETLB)) {
1598 retval = -EINVAL;
1599 goto out_fput;
1600 }
1601 } else if (flags & MAP_HUGETLB) {
1602 struct ucounts *ucounts = NULL;
1603 struct hstate *hs;
1604
1605 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1606 if (!hs)
1607 return -EINVAL;
1608
1609 len = ALIGN(len, huge_page_size(hs));
1610
1611
1612
1613
1614
1615
1616 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1617 VM_NORESERVE,
1618 &ucounts, HUGETLB_ANONHUGE_INODE,
1619 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1620 if (IS_ERR(file))
1621 return PTR_ERR(file);
1622 }
1623
1624 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1625out_fput:
1626 if (file)
1627 fput(file);
1628 return retval;
1629}
1630
1631SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1632 unsigned long, prot, unsigned long, flags,
1633 unsigned long, fd, unsigned long, pgoff)
1634{
1635 return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
1636}
1637
1638#ifdef __ARCH_WANT_SYS_OLD_MMAP
1639struct mmap_arg_struct {
1640 unsigned long addr;
1641 unsigned long len;
1642 unsigned long prot;
1643 unsigned long flags;
1644 unsigned long fd;
1645 unsigned long offset;
1646};
1647
1648SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1649{
1650 struct mmap_arg_struct a;
1651
1652 if (copy_from_user(&a, arg, sizeof(a)))
1653 return -EFAULT;
1654 if (offset_in_page(a.offset))
1655 return -EINVAL;
1656
1657 return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1658 a.offset >> PAGE_SHIFT);
1659}
1660#endif
1661
1662
1663
1664
1665
1666
1667
1668int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1669{
1670 vm_flags_t vm_flags = vma->vm_flags;
1671 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1672
1673
1674 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1675 return 0;
1676
1677
1678 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1679 return 1;
1680
1681
1682
1683 if (pgprot_val(vm_page_prot) !=
1684 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1685 return 0;
1686
1687
1688 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1689 return 1;
1690
1691
1692 if (vm_flags & VM_PFNMAP)
1693 return 0;
1694
1695
1696 return vma->vm_file && vma->vm_file->f_mapping &&
1697 mapping_can_writeback(vma->vm_file->f_mapping);
1698}
1699
1700
1701
1702
1703
1704static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1705{
1706
1707
1708
1709
1710 if (file && is_file_hugepages(file))
1711 return 0;
1712
1713 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1714}
1715
1716unsigned long mmap_region(struct file *file, unsigned long addr,
1717 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1718 struct list_head *uf)
1719{
1720 struct mm_struct *mm = current->mm;
1721 struct vm_area_struct *vma, *prev, *merge;
1722 int error;
1723 struct rb_node **rb_link, *rb_parent;
1724 unsigned long charged = 0;
1725
1726
1727 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1728 unsigned long nr_pages;
1729
1730
1731
1732
1733
1734 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1735
1736 if (!may_expand_vm(mm, vm_flags,
1737 (len >> PAGE_SHIFT) - nr_pages))
1738 return -ENOMEM;
1739 }
1740
1741
1742 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
1743 return -ENOMEM;
1744
1745
1746
1747 if (accountable_mapping(file, vm_flags)) {
1748 charged = len >> PAGE_SHIFT;
1749 if (security_vm_enough_memory_mm(mm, charged))
1750 return -ENOMEM;
1751 vm_flags |= VM_ACCOUNT;
1752 }
1753
1754
1755
1756
1757 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1758 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1759 if (vma)
1760 goto out;
1761
1762
1763
1764
1765
1766
1767 vma = vm_area_alloc(mm);
1768 if (!vma) {
1769 error = -ENOMEM;
1770 goto unacct_error;
1771 }
1772
1773 vma->vm_start = addr;
1774 vma->vm_end = addr + len;
1775 vma->vm_flags = vm_flags;
1776 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1777 vma->vm_pgoff = pgoff;
1778
1779 if (file) {
1780 if (vm_flags & VM_SHARED) {
1781 error = mapping_map_writable(file->f_mapping);
1782 if (error)
1783 goto free_vma;
1784 }
1785
1786 vma->vm_file = get_file(file);
1787 error = call_mmap(file, vma);
1788 if (error)
1789 goto unmap_and_free_vma;
1790
1791
1792
1793
1794
1795
1796
1797
1798 WARN_ON_ONCE(addr != vma->vm_start);
1799
1800 addr = vma->vm_start;
1801
1802
1803
1804
1805 if (unlikely(vm_flags != vma->vm_flags && prev)) {
1806 merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
1807 NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX);
1808 if (merge) {
1809
1810
1811
1812
1813 fput(vma->vm_file);
1814 vm_area_free(vma);
1815 vma = merge;
1816
1817 vm_flags = vma->vm_flags;
1818 goto unmap_writable;
1819 }
1820 }
1821
1822 vm_flags = vma->vm_flags;
1823 } else if (vm_flags & VM_SHARED) {
1824 error = shmem_zero_setup(vma);
1825 if (error)
1826 goto free_vma;
1827 } else {
1828 vma_set_anonymous(vma);
1829 }
1830
1831
1832 if (!arch_validate_flags(vma->vm_flags)) {
1833 error = -EINVAL;
1834 if (file)
1835 goto unmap_and_free_vma;
1836 else
1837 goto free_vma;
1838 }
1839
1840 vma_link(mm, vma, prev, rb_link, rb_parent);
1841
1842unmap_writable:
1843 if (file && vm_flags & VM_SHARED)
1844 mapping_unmap_writable(file->f_mapping);
1845 file = vma->vm_file;
1846out:
1847 perf_event_mmap(vma);
1848
1849 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1850 if (vm_flags & VM_LOCKED) {
1851 if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
1852 is_vm_hugetlb_page(vma) ||
1853 vma == get_gate_vma(current->mm))
1854 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1855 else
1856 mm->locked_vm += (len >> PAGE_SHIFT);
1857 }
1858
1859 if (file)
1860 uprobe_mmap(vma);
1861
1862
1863
1864
1865
1866
1867
1868
1869 vma->vm_flags |= VM_SOFTDIRTY;
1870
1871 vma_set_page_prot(vma);
1872
1873 return addr;
1874
1875unmap_and_free_vma:
1876 fput(vma->vm_file);
1877 vma->vm_file = NULL;
1878
1879
1880 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1881 charged = 0;
1882 if (vm_flags & VM_SHARED)
1883 mapping_unmap_writable(file->f_mapping);
1884free_vma:
1885 vm_area_free(vma);
1886unacct_error:
1887 if (charged)
1888 vm_unacct_memory(charged);
1889 return error;
1890}
1891
1892static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1893{
1894
1895
1896
1897
1898
1899
1900
1901
1902 struct mm_struct *mm = current->mm;
1903 struct vm_area_struct *vma;
1904 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1905
1906
1907 length = info->length + info->align_mask;
1908 if (length < info->length)
1909 return -ENOMEM;
1910
1911
1912 if (info->high_limit < length)
1913 return -ENOMEM;
1914 high_limit = info->high_limit - length;
1915
1916 if (info->low_limit > high_limit)
1917 return -ENOMEM;
1918 low_limit = info->low_limit + length;
1919
1920
1921 if (RB_EMPTY_ROOT(&mm->mm_rb))
1922 goto check_highest;
1923 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1924 if (vma->rb_subtree_gap < length)
1925 goto check_highest;
1926
1927 while (true) {
1928
1929 gap_end = vm_start_gap(vma);
1930 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1931 struct vm_area_struct *left =
1932 rb_entry(vma->vm_rb.rb_left,
1933 struct vm_area_struct, vm_rb);
1934 if (left->rb_subtree_gap >= length) {
1935 vma = left;
1936 continue;
1937 }
1938 }
1939
1940 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1941check_current:
1942
1943 if (gap_start > high_limit)
1944 return -ENOMEM;
1945 if (gap_end >= low_limit &&
1946 gap_end > gap_start && gap_end - gap_start >= length)
1947 goto found;
1948
1949
1950 if (vma->vm_rb.rb_right) {
1951 struct vm_area_struct *right =
1952 rb_entry(vma->vm_rb.rb_right,
1953 struct vm_area_struct, vm_rb);
1954 if (right->rb_subtree_gap >= length) {
1955 vma = right;
1956 continue;
1957 }
1958 }
1959
1960
1961 while (true) {
1962 struct rb_node *prev = &vma->vm_rb;
1963 if (!rb_parent(prev))
1964 goto check_highest;
1965 vma = rb_entry(rb_parent(prev),
1966 struct vm_area_struct, vm_rb);
1967 if (prev == vma->vm_rb.rb_left) {
1968 gap_start = vm_end_gap(vma->vm_prev);
1969 gap_end = vm_start_gap(vma);
1970 goto check_current;
1971 }
1972 }
1973 }
1974
1975check_highest:
1976
1977 gap_start = mm->highest_vm_end;
1978 gap_end = ULONG_MAX;
1979 if (gap_start > high_limit)
1980 return -ENOMEM;
1981
1982found:
1983
1984 if (gap_start < info->low_limit)
1985 gap_start = info->low_limit;
1986
1987
1988 gap_start += (info->align_offset - gap_start) & info->align_mask;
1989
1990 VM_BUG_ON(gap_start + info->length > info->high_limit);
1991 VM_BUG_ON(gap_start + info->length > gap_end);
1992 return gap_start;
1993}
1994
1995static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1996{
1997 struct mm_struct *mm = current->mm;
1998 struct vm_area_struct *vma;
1999 unsigned long length, low_limit, high_limit, gap_start, gap_end;
2000
2001
2002 length = info->length + info->align_mask;
2003 if (length < info->length)
2004 return -ENOMEM;
2005
2006
2007
2008
2009
2010 gap_end = info->high_limit;
2011 if (gap_end < length)
2012 return -ENOMEM;
2013 high_limit = gap_end - length;
2014
2015 if (info->low_limit > high_limit)
2016 return -ENOMEM;
2017 low_limit = info->low_limit + length;
2018
2019
2020 gap_start = mm->highest_vm_end;
2021 if (gap_start <= high_limit)
2022 goto found_highest;
2023
2024
2025 if (RB_EMPTY_ROOT(&mm->mm_rb))
2026 return -ENOMEM;
2027 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2028 if (vma->rb_subtree_gap < length)
2029 return -ENOMEM;
2030
2031 while (true) {
2032
2033 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2034 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2035 struct vm_area_struct *right =
2036 rb_entry(vma->vm_rb.rb_right,
2037 struct vm_area_struct, vm_rb);
2038 if (right->rb_subtree_gap >= length) {
2039 vma = right;
2040 continue;
2041 }
2042 }
2043
2044check_current:
2045
2046 gap_end = vm_start_gap(vma);
2047 if (gap_end < low_limit)
2048 return -ENOMEM;
2049 if (gap_start <= high_limit &&
2050 gap_end > gap_start && gap_end - gap_start >= length)
2051 goto found;
2052
2053
2054 if (vma->vm_rb.rb_left) {
2055 struct vm_area_struct *left =
2056 rb_entry(vma->vm_rb.rb_left,
2057 struct vm_area_struct, vm_rb);
2058 if (left->rb_subtree_gap >= length) {
2059 vma = left;
2060 continue;
2061 }
2062 }
2063
2064
2065 while (true) {
2066 struct rb_node *prev = &vma->vm_rb;
2067 if (!rb_parent(prev))
2068 return -ENOMEM;
2069 vma = rb_entry(rb_parent(prev),
2070 struct vm_area_struct, vm_rb);
2071 if (prev == vma->vm_rb.rb_right) {
2072 gap_start = vma->vm_prev ?
2073 vm_end_gap(vma->vm_prev) : 0;
2074 goto check_current;
2075 }
2076 }
2077 }
2078
2079found:
2080
2081 if (gap_end > info->high_limit)
2082 gap_end = info->high_limit;
2083
2084found_highest:
2085
2086 gap_end -= info->length;
2087 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2088
2089 VM_BUG_ON(gap_end < info->low_limit);
2090 VM_BUG_ON(gap_end < gap_start);
2091 return gap_end;
2092}
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info)
2104{
2105 unsigned long addr;
2106
2107 if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
2108 addr = unmapped_area_topdown(info);
2109 else
2110 addr = unmapped_area(info);
2111
2112 trace_vm_unmapped_area(addr, info);
2113 return addr;
2114}
2115
2116#ifndef arch_get_mmap_end
2117#define arch_get_mmap_end(addr) (TASK_SIZE)
2118#endif
2119
2120#ifndef arch_get_mmap_base
2121#define arch_get_mmap_base(addr, base) (base)
2122#endif
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135#ifndef HAVE_ARCH_UNMAPPED_AREA
2136unsigned long
2137arch_get_unmapped_area(struct file *filp, unsigned long addr,
2138 unsigned long len, unsigned long pgoff, unsigned long flags)
2139{
2140 struct mm_struct *mm = current->mm;
2141 struct vm_area_struct *vma, *prev;
2142 struct vm_unmapped_area_info info;
2143 const unsigned long mmap_end = arch_get_mmap_end(addr);
2144
2145 if (len > mmap_end - mmap_min_addr)
2146 return -ENOMEM;
2147
2148 if (flags & MAP_FIXED)
2149 return addr;
2150
2151 if (addr) {
2152 addr = PAGE_ALIGN(addr);
2153 vma = find_vma_prev(mm, addr, &prev);
2154 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2155 (!vma || addr + len <= vm_start_gap(vma)) &&
2156 (!prev || addr >= vm_end_gap(prev)))
2157 return addr;
2158 }
2159
2160 info.flags = 0;
2161 info.length = len;
2162 info.low_limit = mm->mmap_base;
2163 info.high_limit = mmap_end;
2164 info.align_mask = 0;
2165 info.align_offset = 0;
2166 return vm_unmapped_area(&info);
2167}
2168#endif
2169
2170
2171
2172
2173
2174#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2175unsigned long
2176arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
2177 unsigned long len, unsigned long pgoff,
2178 unsigned long flags)
2179{
2180 struct vm_area_struct *vma, *prev;
2181 struct mm_struct *mm = current->mm;
2182 struct vm_unmapped_area_info info;
2183 const unsigned long mmap_end = arch_get_mmap_end(addr);
2184
2185
2186 if (len > mmap_end - mmap_min_addr)
2187 return -ENOMEM;
2188
2189 if (flags & MAP_FIXED)
2190 return addr;
2191
2192
2193 if (addr) {
2194 addr = PAGE_ALIGN(addr);
2195 vma = find_vma_prev(mm, addr, &prev);
2196 if (mmap_end - len >= addr && addr >= mmap_min_addr &&
2197 (!vma || addr + len <= vm_start_gap(vma)) &&
2198 (!prev || addr >= vm_end_gap(prev)))
2199 return addr;
2200 }
2201
2202 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2203 info.length = len;
2204 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2205 info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
2206 info.align_mask = 0;
2207 info.align_offset = 0;
2208 addr = vm_unmapped_area(&info);
2209
2210
2211
2212
2213
2214
2215
2216 if (offset_in_page(addr)) {
2217 VM_BUG_ON(addr != -ENOMEM);
2218 info.flags = 0;
2219 info.low_limit = TASK_UNMAPPED_BASE;
2220 info.high_limit = mmap_end;
2221 addr = vm_unmapped_area(&info);
2222 }
2223
2224 return addr;
2225}
2226#endif
2227
2228unsigned long
2229get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2230 unsigned long pgoff, unsigned long flags)
2231{
2232 unsigned long (*get_area)(struct file *, unsigned long,
2233 unsigned long, unsigned long, unsigned long);
2234
2235 unsigned long error = arch_mmap_check(addr, len, flags);
2236 if (error)
2237 return error;
2238
2239
2240 if (len > TASK_SIZE)
2241 return -ENOMEM;
2242
2243 get_area = current->mm->get_unmapped_area;
2244 if (file) {
2245 if (file->f_op->get_unmapped_area)
2246 get_area = file->f_op->get_unmapped_area;
2247 } else if (flags & MAP_SHARED) {
2248
2249
2250
2251
2252
2253 pgoff = 0;
2254 get_area = shmem_get_unmapped_area;
2255 }
2256
2257 addr = get_area(file, addr, len, pgoff, flags);
2258 if (IS_ERR_VALUE(addr))
2259 return addr;
2260
2261 if (addr > TASK_SIZE - len)
2262 return -ENOMEM;
2263 if (offset_in_page(addr))
2264 return -EINVAL;
2265
2266 error = security_mmap_addr(addr);
2267 return error ? error : addr;
2268}
2269
2270EXPORT_SYMBOL(get_unmapped_area);
2271
2272
2273struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2274{
2275 struct rb_node *rb_node;
2276 struct vm_area_struct *vma;
2277
2278 mmap_assert_locked(mm);
2279
2280 vma = vmacache_find(mm, addr);
2281 if (likely(vma))
2282 return vma;
2283
2284 rb_node = mm->mm_rb.rb_node;
2285
2286 while (rb_node) {
2287 struct vm_area_struct *tmp;
2288
2289 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2290
2291 if (tmp->vm_end > addr) {
2292 vma = tmp;
2293 if (tmp->vm_start <= addr)
2294 break;
2295 rb_node = rb_node->rb_left;
2296 } else
2297 rb_node = rb_node->rb_right;
2298 }
2299
2300 if (vma)
2301 vmacache_update(addr, vma);
2302 return vma;
2303}
2304
2305EXPORT_SYMBOL(find_vma);
2306
2307
2308
2309
2310struct vm_area_struct *
2311find_vma_prev(struct mm_struct *mm, unsigned long addr,
2312 struct vm_area_struct **pprev)
2313{
2314 struct vm_area_struct *vma;
2315
2316 vma = find_vma(mm, addr);
2317 if (vma) {
2318 *pprev = vma->vm_prev;
2319 } else {
2320 struct rb_node *rb_node = rb_last(&mm->mm_rb);
2321
2322 *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL;
2323 }
2324 return vma;
2325}
2326
2327
2328
2329
2330
2331
2332static int acct_stack_growth(struct vm_area_struct *vma,
2333 unsigned long size, unsigned long grow)
2334{
2335 struct mm_struct *mm = vma->vm_mm;
2336 unsigned long new_start;
2337
2338
2339 if (!may_expand_vm(mm, vma->vm_flags, grow))
2340 return -ENOMEM;
2341
2342
2343 if (size > rlimit(RLIMIT_STACK))
2344 return -ENOMEM;
2345
2346
2347 if (vma->vm_flags & VM_LOCKED) {
2348 unsigned long locked;
2349 unsigned long limit;
2350 locked = mm->locked_vm + grow;
2351 limit = rlimit(RLIMIT_MEMLOCK);
2352 limit >>= PAGE_SHIFT;
2353 if (locked > limit && !capable(CAP_IPC_LOCK))
2354 return -ENOMEM;
2355 }
2356
2357
2358 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2359 vma->vm_end - size;
2360 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2361 return -EFAULT;
2362
2363
2364
2365
2366
2367 if (security_vm_enough_memory_mm(mm, grow))
2368 return -ENOMEM;
2369
2370 return 0;
2371}
2372
2373#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2374
2375
2376
2377
2378int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2379{
2380 struct mm_struct *mm = vma->vm_mm;
2381 struct vm_area_struct *next;
2382 unsigned long gap_addr;
2383 int error = 0;
2384
2385 if (!(vma->vm_flags & VM_GROWSUP))
2386 return -EFAULT;
2387
2388
2389 address &= PAGE_MASK;
2390 if (address >= (TASK_SIZE & PAGE_MASK))
2391 return -ENOMEM;
2392 address += PAGE_SIZE;
2393
2394
2395 gap_addr = address + stack_guard_gap;
2396
2397
2398 if (gap_addr < address || gap_addr > TASK_SIZE)
2399 gap_addr = TASK_SIZE;
2400
2401 next = vma->vm_next;
2402 if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
2403 if (!(next->vm_flags & VM_GROWSUP))
2404 return -ENOMEM;
2405
2406 }
2407
2408
2409 if (unlikely(anon_vma_prepare(vma)))
2410 return -ENOMEM;
2411
2412
2413
2414
2415
2416
2417 anon_vma_lock_write(vma->anon_vma);
2418
2419
2420 if (address > vma->vm_end) {
2421 unsigned long size, grow;
2422
2423 size = address - vma->vm_start;
2424 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2425
2426 error = -ENOMEM;
2427 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2428 error = acct_stack_growth(vma, size, grow);
2429 if (!error) {
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441 spin_lock(&mm->page_table_lock);
2442 if (vma->vm_flags & VM_LOCKED)
2443 mm->locked_vm += grow;
2444 vm_stat_account(mm, vma->vm_flags, grow);
2445 anon_vma_interval_tree_pre_update_vma(vma);
2446 vma->vm_end = address;
2447 anon_vma_interval_tree_post_update_vma(vma);
2448 if (vma->vm_next)
2449 vma_gap_update(vma->vm_next);
2450 else
2451 mm->highest_vm_end = vm_end_gap(vma);
2452 spin_unlock(&mm->page_table_lock);
2453
2454 perf_event_mmap(vma);
2455 }
2456 }
2457 }
2458 anon_vma_unlock_write(vma->anon_vma);
2459 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2460 validate_mm(mm);
2461 return error;
2462}
2463#endif
2464
2465
2466
2467
2468int expand_downwards(struct vm_area_struct *vma,
2469 unsigned long address)
2470{
2471 struct mm_struct *mm = vma->vm_mm;
2472 struct vm_area_struct *prev;
2473 int error = 0;
2474
2475 address &= PAGE_MASK;
2476 if (address < mmap_min_addr)
2477 return -EPERM;
2478
2479
2480 prev = vma->vm_prev;
2481
2482 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2483 vma_is_accessible(prev)) {
2484 if (address - prev->vm_end < stack_guard_gap)
2485 return -ENOMEM;
2486 }
2487
2488
2489 if (unlikely(anon_vma_prepare(vma)))
2490 return -ENOMEM;
2491
2492
2493
2494
2495
2496
2497 anon_vma_lock_write(vma->anon_vma);
2498
2499
2500 if (address < vma->vm_start) {
2501 unsigned long size, grow;
2502
2503 size = vma->vm_end - address;
2504 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2505
2506 error = -ENOMEM;
2507 if (grow <= vma->vm_pgoff) {
2508 error = acct_stack_growth(vma, size, grow);
2509 if (!error) {
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521 spin_lock(&mm->page_table_lock);
2522 if (vma->vm_flags & VM_LOCKED)
2523 mm->locked_vm += grow;
2524 vm_stat_account(mm, vma->vm_flags, grow);
2525 anon_vma_interval_tree_pre_update_vma(vma);
2526 vma->vm_start = address;
2527 vma->vm_pgoff -= grow;
2528 anon_vma_interval_tree_post_update_vma(vma);
2529 vma_gap_update(vma);
2530 spin_unlock(&mm->page_table_lock);
2531
2532 perf_event_mmap(vma);
2533 }
2534 }
2535 }
2536 anon_vma_unlock_write(vma->anon_vma);
2537 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2538 validate_mm(mm);
2539 return error;
2540}
2541
2542
2543unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2544
2545static int __init cmdline_parse_stack_guard_gap(char *p)
2546{
2547 unsigned long val;
2548 char *endptr;
2549
2550 val = simple_strtoul(p, &endptr, 10);
2551 if (!*endptr)
2552 stack_guard_gap = val << PAGE_SHIFT;
2553
2554 return 0;
2555}
2556__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2557
2558#ifdef CONFIG_STACK_GROWSUP
2559int expand_stack(struct vm_area_struct *vma, unsigned long address)
2560{
2561 return expand_upwards(vma, address);
2562}
2563
2564struct vm_area_struct *
2565find_extend_vma(struct mm_struct *mm, unsigned long addr)
2566{
2567 struct vm_area_struct *vma, *prev;
2568
2569 addr &= PAGE_MASK;
2570 vma = find_vma_prev(mm, addr, &prev);
2571 if (vma && (vma->vm_start <= addr))
2572 return vma;
2573
2574 if (!prev || expand_stack(prev, addr))
2575 return NULL;
2576 if (prev->vm_flags & VM_LOCKED)
2577 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2578 return prev;
2579}
2580#else
2581int expand_stack(struct vm_area_struct *vma, unsigned long address)
2582{
2583 return expand_downwards(vma, address);
2584}
2585
2586struct vm_area_struct *
2587find_extend_vma(struct mm_struct *mm, unsigned long addr)
2588{
2589 struct vm_area_struct *vma;
2590 unsigned long start;
2591
2592 addr &= PAGE_MASK;
2593 vma = find_vma(mm, addr);
2594 if (!vma)
2595 return NULL;
2596 if (vma->vm_start <= addr)
2597 return vma;
2598 if (!(vma->vm_flags & VM_GROWSDOWN))
2599 return NULL;
2600 start = vma->vm_start;
2601 if (expand_stack(vma, addr))
2602 return NULL;
2603 if (vma->vm_flags & VM_LOCKED)
2604 populate_vma_page_range(vma, addr, start, NULL);
2605 return vma;
2606}
2607#endif
2608
2609EXPORT_SYMBOL_GPL(find_extend_vma);
2610
2611
2612
2613
2614
2615
2616
2617static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2618{
2619 unsigned long nr_accounted = 0;
2620
2621
2622 update_hiwater_vm(mm);
2623 do {
2624 long nrpages = vma_pages(vma);
2625
2626 if (vma->vm_flags & VM_ACCOUNT)
2627 nr_accounted += nrpages;
2628 vm_stat_account(mm, vma->vm_flags, -nrpages);
2629 vma = remove_vma(vma);
2630 } while (vma);
2631 vm_unacct_memory(nr_accounted);
2632 validate_mm(mm);
2633}
2634
2635
2636
2637
2638
2639
2640static void unmap_region(struct mm_struct *mm,
2641 struct vm_area_struct *vma, struct vm_area_struct *prev,
2642 unsigned long start, unsigned long end)
2643{
2644 struct vm_area_struct *next = vma_next(mm, prev);
2645 struct mmu_gather tlb;
2646
2647 lru_add_drain();
2648 tlb_gather_mmu(&tlb, mm);
2649 update_hiwater_rss(mm);
2650 unmap_vmas(&tlb, vma, start, end);
2651 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2652 next ? next->vm_start : USER_PGTABLES_CEILING);
2653 tlb_finish_mmu(&tlb);
2654}
2655
2656
2657
2658
2659
2660static bool
2661detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2662 struct vm_area_struct *prev, unsigned long end)
2663{
2664 struct vm_area_struct **insertion_point;
2665 struct vm_area_struct *tail_vma = NULL;
2666
2667 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2668 vma->vm_prev = NULL;
2669 do {
2670 vma_rb_erase(vma, &mm->mm_rb);
2671 mm->map_count--;
2672 tail_vma = vma;
2673 vma = vma->vm_next;
2674 } while (vma && vma->vm_start < end);
2675 *insertion_point = vma;
2676 if (vma) {
2677 vma->vm_prev = prev;
2678 vma_gap_update(vma);
2679 } else
2680 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2681 tail_vma->vm_next = NULL;
2682
2683
2684 vmacache_invalidate(mm);
2685
2686
2687
2688
2689
2690
2691 if (vma && (vma->vm_flags & VM_GROWSDOWN))
2692 return false;
2693 if (prev && (prev->vm_flags & VM_GROWSUP))
2694 return false;
2695 return true;
2696}
2697
2698
2699
2700
2701
2702int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2703 unsigned long addr, int new_below)
2704{
2705 struct vm_area_struct *new;
2706 int err;
2707
2708 if (vma->vm_ops && vma->vm_ops->may_split) {
2709 err = vma->vm_ops->may_split(vma, addr);
2710 if (err)
2711 return err;
2712 }
2713
2714 new = vm_area_dup(vma);
2715 if (!new)
2716 return -ENOMEM;
2717
2718 if (new_below)
2719 new->vm_end = addr;
2720 else {
2721 new->vm_start = addr;
2722 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2723 }
2724
2725 err = vma_dup_policy(vma, new);
2726 if (err)
2727 goto out_free_vma;
2728
2729 err = anon_vma_clone(new, vma);
2730 if (err)
2731 goto out_free_mpol;
2732
2733 if (new->vm_file)
2734 get_file(new->vm_file);
2735
2736 if (new->vm_ops && new->vm_ops->open)
2737 new->vm_ops->open(new);
2738
2739 if (new_below)
2740 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2741 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2742 else
2743 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2744
2745
2746 if (!err)
2747 return 0;
2748
2749
2750 if (new->vm_ops && new->vm_ops->close)
2751 new->vm_ops->close(new);
2752 if (new->vm_file)
2753 fput(new->vm_file);
2754 unlink_anon_vmas(new);
2755 out_free_mpol:
2756 mpol_put(vma_policy(new));
2757 out_free_vma:
2758 vm_area_free(new);
2759 return err;
2760}
2761
2762
2763
2764
2765
2766int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2767 unsigned long addr, int new_below)
2768{
2769 if (mm->map_count >= sysctl_max_map_count)
2770 return -ENOMEM;
2771
2772 return __split_vma(mm, vma, addr, new_below);
2773}
2774
2775static inline void
2776unlock_range(struct vm_area_struct *start, unsigned long limit)
2777{
2778 struct mm_struct *mm = start->vm_mm;
2779 struct vm_area_struct *tmp = start;
2780
2781 while (tmp && tmp->vm_start < limit) {
2782 if (tmp->vm_flags & VM_LOCKED) {
2783 mm->locked_vm -= vma_pages(tmp);
2784 munlock_vma_pages_all(tmp);
2785 }
2786
2787 tmp = tmp->vm_next;
2788 }
2789}
2790
2791
2792
2793
2794
2795
2796int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2797 struct list_head *uf, bool downgrade)
2798{
2799 unsigned long end;
2800 struct vm_area_struct *vma, *prev, *last;
2801
2802 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2803 return -EINVAL;
2804
2805 len = PAGE_ALIGN(len);
2806 end = start + len;
2807 if (len == 0)
2808 return -EINVAL;
2809
2810
2811
2812
2813
2814
2815 arch_unmap(mm, start, end);
2816
2817
2818 vma = find_vma_intersection(mm, start, end);
2819 if (!vma)
2820 return 0;
2821 prev = vma->vm_prev;
2822
2823
2824
2825
2826
2827
2828
2829
2830 if (start > vma->vm_start) {
2831 int error;
2832
2833
2834
2835
2836
2837
2838 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2839 return -ENOMEM;
2840
2841 error = __split_vma(mm, vma, start, 0);
2842 if (error)
2843 return error;
2844 prev = vma;
2845 }
2846
2847
2848 last = find_vma(mm, end);
2849 if (last && end > last->vm_start) {
2850 int error = __split_vma(mm, last, end, 1);
2851 if (error)
2852 return error;
2853 }
2854 vma = vma_next(mm, prev);
2855
2856 if (unlikely(uf)) {
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2867 if (error)
2868 return error;
2869 }
2870
2871
2872
2873
2874 if (mm->locked_vm)
2875 unlock_range(vma, end);
2876
2877
2878 if (!detach_vmas_to_be_unmapped(mm, vma, prev, end))
2879 downgrade = false;
2880
2881 if (downgrade)
2882 mmap_write_downgrade(mm);
2883
2884 unmap_region(mm, vma, prev, start, end);
2885
2886
2887 remove_vma_list(mm, vma);
2888
2889 return downgrade ? 1 : 0;
2890}
2891
2892int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2893 struct list_head *uf)
2894{
2895 return __do_munmap(mm, start, len, uf, false);
2896}
2897
2898static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2899{
2900 int ret;
2901 struct mm_struct *mm = current->mm;
2902 LIST_HEAD(uf);
2903
2904 if (mmap_write_lock_killable(mm))
2905 return -EINTR;
2906
2907 ret = __do_munmap(mm, start, len, &uf, downgrade);
2908
2909
2910
2911
2912
2913 if (ret == 1) {
2914 mmap_read_unlock(mm);
2915 ret = 0;
2916 } else
2917 mmap_write_unlock(mm);
2918
2919 userfaultfd_unmap_complete(mm, &uf);
2920 return ret;
2921}
2922
2923int vm_munmap(unsigned long start, size_t len)
2924{
2925 return __vm_munmap(start, len, false);
2926}
2927EXPORT_SYMBOL(vm_munmap);
2928
2929SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2930{
2931 addr = untagged_addr(addr);
2932 profile_munmap(addr);
2933 return __vm_munmap(addr, len, true);
2934}
2935
2936
2937
2938
2939
2940SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2941 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2942{
2943
2944 struct mm_struct *mm = current->mm;
2945 struct vm_area_struct *vma;
2946 unsigned long populate = 0;
2947 unsigned long ret = -EINVAL;
2948 struct file *file;
2949
2950 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
2951 current->comm, current->pid);
2952
2953 if (prot)
2954 return ret;
2955 start = start & PAGE_MASK;
2956 size = size & PAGE_MASK;
2957
2958 if (start + size <= start)
2959 return ret;
2960
2961
2962 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2963 return ret;
2964
2965 if (mmap_write_lock_killable(mm))
2966 return -EINTR;
2967
2968 vma = vma_lookup(mm, start);
2969
2970 if (!vma || !(vma->vm_flags & VM_SHARED))
2971 goto out;
2972
2973 if (start + size > vma->vm_end) {
2974 struct vm_area_struct *next;
2975
2976 for (next = vma->vm_next; next; next = next->vm_next) {
2977
2978 if (next->vm_start != next->vm_prev->vm_end)
2979 goto out;
2980
2981 if (next->vm_file != vma->vm_file)
2982 goto out;
2983
2984 if (next->vm_flags != vma->vm_flags)
2985 goto out;
2986
2987 if (start + size <= next->vm_end)
2988 break;
2989 }
2990
2991 if (!next)
2992 goto out;
2993 }
2994
2995 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2996 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2997 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2998
2999 flags &= MAP_NONBLOCK;
3000 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
3001 if (vma->vm_flags & VM_LOCKED)
3002 flags |= MAP_LOCKED;
3003
3004 file = get_file(vma->vm_file);
3005 ret = do_mmap(vma->vm_file, start, size,
3006 prot, flags, pgoff, &populate, NULL);
3007 fput(file);
3008out:
3009 mmap_write_unlock(mm);
3010 if (populate)
3011 mm_populate(ret, populate);
3012 if (!IS_ERR_VALUE(ret))
3013 ret = 0;
3014 return ret;
3015}
3016
3017
3018
3019
3020
3021
3022static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
3023{
3024 struct mm_struct *mm = current->mm;
3025 struct vm_area_struct *vma, *prev;
3026 struct rb_node **rb_link, *rb_parent;
3027 pgoff_t pgoff = addr >> PAGE_SHIFT;
3028 int error;
3029 unsigned long mapped_addr;
3030
3031
3032 if ((flags & (~VM_EXEC)) != 0)
3033 return -EINVAL;
3034 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
3035
3036 mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
3037 if (IS_ERR_VALUE(mapped_addr))
3038 return mapped_addr;
3039
3040 error = mlock_future_check(mm, mm->def_flags, len);
3041 if (error)
3042 return error;
3043
3044
3045 if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf))
3046 return -ENOMEM;
3047
3048
3049 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
3050 return -ENOMEM;
3051
3052 if (mm->map_count > sysctl_max_map_count)
3053 return -ENOMEM;
3054
3055 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
3056 return -ENOMEM;
3057
3058
3059 vma = vma_merge(mm, prev, addr, addr + len, flags,
3060 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
3061 if (vma)
3062 goto out;
3063
3064
3065
3066
3067 vma = vm_area_alloc(mm);
3068 if (!vma) {
3069 vm_unacct_memory(len >> PAGE_SHIFT);
3070 return -ENOMEM;
3071 }
3072
3073 vma_set_anonymous(vma);
3074 vma->vm_start = addr;
3075 vma->vm_end = addr + len;
3076 vma->vm_pgoff = pgoff;
3077 vma->vm_flags = flags;
3078 vma->vm_page_prot = vm_get_page_prot(flags);
3079 vma_link(mm, vma, prev, rb_link, rb_parent);
3080out:
3081 perf_event_mmap(vma);
3082 mm->total_vm += len >> PAGE_SHIFT;
3083 mm->data_vm += len >> PAGE_SHIFT;
3084 if (flags & VM_LOCKED)
3085 mm->locked_vm += (len >> PAGE_SHIFT);
3086 vma->vm_flags |= VM_SOFTDIRTY;
3087 return 0;
3088}
3089
3090int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
3091{
3092 struct mm_struct *mm = current->mm;
3093 unsigned long len;
3094 int ret;
3095 bool populate;
3096 LIST_HEAD(uf);
3097
3098 len = PAGE_ALIGN(request);
3099 if (len < request)
3100 return -ENOMEM;
3101 if (!len)
3102 return 0;
3103
3104 if (mmap_write_lock_killable(mm))
3105 return -EINTR;
3106
3107 ret = do_brk_flags(addr, len, flags, &uf);
3108 populate = ((mm->def_flags & VM_LOCKED) != 0);
3109 mmap_write_unlock(mm);
3110 userfaultfd_unmap_complete(mm, &uf);
3111 if (populate && !ret)
3112 mm_populate(addr, len);
3113 return ret;
3114}
3115EXPORT_SYMBOL(vm_brk_flags);
3116
3117int vm_brk(unsigned long addr, unsigned long len)
3118{
3119 return vm_brk_flags(addr, len, 0);
3120}
3121EXPORT_SYMBOL(vm_brk);
3122
3123
3124void exit_mmap(struct mm_struct *mm)
3125{
3126 struct mmu_gather tlb;
3127 struct vm_area_struct *vma;
3128 unsigned long nr_accounted = 0;
3129
3130
3131 mmu_notifier_release(mm);
3132
3133 if (unlikely(mm_is_oom_victim(mm))) {
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150 (void)__oom_reap_task_mm(mm);
3151
3152 set_bit(MMF_OOM_SKIP, &mm->flags);
3153 mmap_write_lock(mm);
3154 mmap_write_unlock(mm);
3155 }
3156
3157 if (mm->locked_vm)
3158 unlock_range(mm->mmap, ULONG_MAX);
3159
3160 arch_exit_mmap(mm);
3161
3162 vma = mm->mmap;
3163 if (!vma)
3164 return;
3165
3166 lru_add_drain();
3167 flush_cache_mm(mm);
3168 tlb_gather_mmu_fullmm(&tlb, mm);
3169
3170
3171 unmap_vmas(&tlb, vma, 0, -1);
3172 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3173 tlb_finish_mmu(&tlb);
3174
3175
3176
3177
3178
3179 while (vma) {
3180 if (vma->vm_flags & VM_ACCOUNT)
3181 nr_accounted += vma_pages(vma);
3182 vma = remove_vma(vma);
3183 cond_resched();
3184 }
3185 vm_unacct_memory(nr_accounted);
3186}
3187
3188
3189
3190
3191
3192int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3193{
3194 struct vm_area_struct *prev;
3195 struct rb_node **rb_link, *rb_parent;
3196
3197 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3198 &prev, &rb_link, &rb_parent))
3199 return -ENOMEM;
3200 if ((vma->vm_flags & VM_ACCOUNT) &&
3201 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3202 return -ENOMEM;
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216 if (vma_is_anonymous(vma)) {
3217 BUG_ON(vma->anon_vma);
3218 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3219 }
3220
3221 vma_link(mm, vma, prev, rb_link, rb_parent);
3222 return 0;
3223}
3224
3225
3226
3227
3228
3229struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3230 unsigned long addr, unsigned long len, pgoff_t pgoff,
3231 bool *need_rmap_locks)
3232{
3233 struct vm_area_struct *vma = *vmap;
3234 unsigned long vma_start = vma->vm_start;
3235 struct mm_struct *mm = vma->vm_mm;
3236 struct vm_area_struct *new_vma, *prev;
3237 struct rb_node **rb_link, *rb_parent;
3238 bool faulted_in_anon_vma = true;
3239
3240
3241
3242
3243
3244 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3245 pgoff = addr >> PAGE_SHIFT;
3246 faulted_in_anon_vma = false;
3247 }
3248
3249 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3250 return NULL;
3251 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3252 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3253 vma->vm_userfaultfd_ctx);
3254 if (new_vma) {
3255
3256
3257
3258 if (unlikely(vma_start >= new_vma->vm_start &&
3259 vma_start < new_vma->vm_end)) {
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3273 *vmap = vma = new_vma;
3274 }
3275 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3276 } else {
3277 new_vma = vm_area_dup(vma);
3278 if (!new_vma)
3279 goto out;
3280 new_vma->vm_start = addr;
3281 new_vma->vm_end = addr + len;
3282 new_vma->vm_pgoff = pgoff;
3283 if (vma_dup_policy(vma, new_vma))
3284 goto out_free_vma;
3285 if (anon_vma_clone(new_vma, vma))
3286 goto out_free_mempol;
3287 if (new_vma->vm_file)
3288 get_file(new_vma->vm_file);
3289 if (new_vma->vm_ops && new_vma->vm_ops->open)
3290 new_vma->vm_ops->open(new_vma);
3291 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3292 *need_rmap_locks = false;
3293 }
3294 return new_vma;
3295
3296out_free_mempol:
3297 mpol_put(vma_policy(new_vma));
3298out_free_vma:
3299 vm_area_free(new_vma);
3300out:
3301 return NULL;
3302}
3303
3304
3305
3306
3307
3308bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3309{
3310 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3311 return false;
3312
3313 if (is_data_mapping(flags) &&
3314 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3315
3316 if (rlimit(RLIMIT_DATA) == 0 &&
3317 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3318 return true;
3319
3320 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n",
3321 current->comm, current->pid,
3322 (mm->data_vm + npages) << PAGE_SHIFT,
3323 rlimit(RLIMIT_DATA),
3324 ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data");
3325
3326 if (!ignore_rlimit_data)
3327 return false;
3328 }
3329
3330 return true;
3331}
3332
3333void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3334{
3335 mm->total_vm += npages;
3336
3337 if (is_exec_mapping(flags))
3338 mm->exec_vm += npages;
3339 else if (is_stack_mapping(flags))
3340 mm->stack_vm += npages;
3341 else if (is_data_mapping(flags))
3342 mm->data_vm += npages;
3343}
3344
3345static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
3346
3347
3348
3349
3350static void special_mapping_close(struct vm_area_struct *vma)
3351{
3352}
3353
3354static const char *special_mapping_name(struct vm_area_struct *vma)
3355{
3356 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3357}
3358
3359static int special_mapping_mremap(struct vm_area_struct *new_vma)
3360{
3361 struct vm_special_mapping *sm = new_vma->vm_private_data;
3362
3363 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3364 return -EFAULT;
3365
3366 if (sm->mremap)
3367 return sm->mremap(sm, new_vma);
3368
3369 return 0;
3370}
3371
3372static int special_mapping_split(struct vm_area_struct *vma, unsigned long addr)
3373{
3374
3375
3376
3377
3378
3379
3380 return -EINVAL;
3381}
3382
3383static const struct vm_operations_struct special_mapping_vmops = {
3384 .close = special_mapping_close,
3385 .fault = special_mapping_fault,
3386 .mremap = special_mapping_mremap,
3387 .name = special_mapping_name,
3388
3389 .access = NULL,
3390 .may_split = special_mapping_split,
3391};
3392
3393static const struct vm_operations_struct legacy_special_mapping_vmops = {
3394 .close = special_mapping_close,
3395 .fault = special_mapping_fault,
3396};
3397
3398static vm_fault_t special_mapping_fault(struct vm_fault *vmf)
3399{
3400 struct vm_area_struct *vma = vmf->vma;
3401 pgoff_t pgoff;
3402 struct page **pages;
3403
3404 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3405 pages = vma->vm_private_data;
3406 } else {
3407 struct vm_special_mapping *sm = vma->vm_private_data;
3408
3409 if (sm->fault)
3410 return sm->fault(sm, vmf->vma, vmf);
3411
3412 pages = sm->pages;
3413 }
3414
3415 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3416 pgoff--;
3417
3418 if (*pages) {
3419 struct page *page = *pages;
3420 get_page(page);
3421 vmf->page = page;
3422 return 0;
3423 }
3424
3425 return VM_FAULT_SIGBUS;
3426}
3427
3428static struct vm_area_struct *__install_special_mapping(
3429 struct mm_struct *mm,
3430 unsigned long addr, unsigned long len,
3431 unsigned long vm_flags, void *priv,
3432 const struct vm_operations_struct *ops)
3433{
3434 int ret;
3435 struct vm_area_struct *vma;
3436
3437 vma = vm_area_alloc(mm);
3438 if (unlikely(vma == NULL))
3439 return ERR_PTR(-ENOMEM);
3440
3441 vma->vm_start = addr;
3442 vma->vm_end = addr + len;
3443
3444 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3445 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3446
3447 vma->vm_ops = ops;
3448 vma->vm_private_data = priv;
3449
3450 ret = insert_vm_struct(mm, vma);
3451 if (ret)
3452 goto out;
3453
3454 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3455
3456 perf_event_mmap(vma);
3457
3458 return vma;
3459
3460out:
3461 vm_area_free(vma);
3462 return ERR_PTR(ret);
3463}
3464
3465bool vma_is_special_mapping(const struct vm_area_struct *vma,
3466 const struct vm_special_mapping *sm)
3467{
3468 return vma->vm_private_data == sm &&
3469 (vma->vm_ops == &special_mapping_vmops ||
3470 vma->vm_ops == &legacy_special_mapping_vmops);
3471}
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482struct vm_area_struct *_install_special_mapping(
3483 struct mm_struct *mm,
3484 unsigned long addr, unsigned long len,
3485 unsigned long vm_flags, const struct vm_special_mapping *spec)
3486{
3487 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3488 &special_mapping_vmops);
3489}
3490
3491int install_special_mapping(struct mm_struct *mm,
3492 unsigned long addr, unsigned long len,
3493 unsigned long vm_flags, struct page **pages)
3494{
3495 struct vm_area_struct *vma = __install_special_mapping(
3496 mm, addr, len, vm_flags, (void *)pages,
3497 &legacy_special_mapping_vmops);
3498
3499 return PTR_ERR_OR_ZERO(vma);
3500}
3501
3502static DEFINE_MUTEX(mm_all_locks_mutex);
3503
3504static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3505{
3506 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3507
3508
3509
3510
3511 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521 if (__test_and_set_bit(0, (unsigned long *)
3522 &anon_vma->root->rb_root.rb_root.rb_node))
3523 BUG();
3524 }
3525}
3526
3527static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3528{
3529 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3540 BUG();
3541 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
3542 }
3543}
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582int mm_take_all_locks(struct mm_struct *mm)
3583{
3584 struct vm_area_struct *vma;
3585 struct anon_vma_chain *avc;
3586
3587 BUG_ON(mmap_read_trylock(mm));
3588
3589 mutex_lock(&mm_all_locks_mutex);
3590
3591 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3592 if (signal_pending(current))
3593 goto out_unlock;
3594 if (vma->vm_file && vma->vm_file->f_mapping &&
3595 is_vm_hugetlb_page(vma))
3596 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3597 }
3598
3599 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3600 if (signal_pending(current))
3601 goto out_unlock;
3602 if (vma->vm_file && vma->vm_file->f_mapping &&
3603 !is_vm_hugetlb_page(vma))
3604 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3605 }
3606
3607 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3608 if (signal_pending(current))
3609 goto out_unlock;
3610 if (vma->anon_vma)
3611 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3612 vm_lock_anon_vma(mm, avc->anon_vma);
3613 }
3614
3615 return 0;
3616
3617out_unlock:
3618 mm_drop_all_locks(mm);
3619 return -EINTR;
3620}
3621
3622static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3623{
3624 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637 if (!__test_and_clear_bit(0, (unsigned long *)
3638 &anon_vma->root->rb_root.rb_root.rb_node))
3639 BUG();
3640 anon_vma_unlock_write(anon_vma);
3641 }
3642}
3643
3644static void vm_unlock_mapping(struct address_space *mapping)
3645{
3646 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3647
3648
3649
3650
3651 i_mmap_unlock_write(mapping);
3652 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3653 &mapping->flags))
3654 BUG();
3655 }
3656}
3657
3658
3659
3660
3661
3662void mm_drop_all_locks(struct mm_struct *mm)
3663{
3664 struct vm_area_struct *vma;
3665 struct anon_vma_chain *avc;
3666
3667 BUG_ON(mmap_read_trylock(mm));
3668 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3669
3670 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3671 if (vma->anon_vma)
3672 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3673 vm_unlock_anon_vma(avc->anon_vma);
3674 if (vma->vm_file && vma->vm_file->f_mapping)
3675 vm_unlock_mapping(vma->vm_file->f_mapping);
3676 }
3677
3678 mutex_unlock(&mm_all_locks_mutex);
3679}
3680
3681
3682
3683
3684void __init mmap_init(void)
3685{
3686 int ret;
3687
3688 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3689 VM_BUG_ON(ret);
3690}
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702static int init_user_reserve(void)
3703{
3704 unsigned long free_kbytes;
3705
3706 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3707
3708 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3709 return 0;
3710}
3711subsys_initcall(init_user_reserve);
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723static int init_admin_reserve(void)
3724{
3725 unsigned long free_kbytes;
3726
3727 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3728
3729 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3730 return 0;
3731}
3732subsys_initcall(init_admin_reserve);
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752static int reserve_mem_notifier(struct notifier_block *nb,
3753 unsigned long action, void *data)
3754{
3755 unsigned long tmp, free_kbytes;
3756
3757 switch (action) {
3758 case MEM_ONLINE:
3759
3760 tmp = sysctl_user_reserve_kbytes;
3761 if (0 < tmp && tmp < (1UL << 17))
3762 init_user_reserve();
3763
3764
3765 tmp = sysctl_admin_reserve_kbytes;
3766 if (0 < tmp && tmp < (1UL << 13))
3767 init_admin_reserve();
3768
3769 break;
3770 case MEM_OFFLINE:
3771 free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3772
3773 if (sysctl_user_reserve_kbytes > free_kbytes) {
3774 init_user_reserve();
3775 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3776 sysctl_user_reserve_kbytes);
3777 }
3778
3779 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3780 init_admin_reserve();
3781 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3782 sysctl_admin_reserve_kbytes);
3783 }
3784 break;
3785 default:
3786 break;
3787 }
3788 return NOTIFY_OK;
3789}
3790
3791static struct notifier_block reserve_mem_nb = {
3792 .notifier_call = reserve_mem_notifier,
3793};
3794
3795static int __meminit init_reserve_notifier(void)
3796{
3797 if (register_hotmemory_notifier(&reserve_mem_nb))
3798 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3799
3800 return 0;
3801}
3802subsys_initcall(init_reserve_notifier);
3803