1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/backing-dev.h>
14#include <linux/mm.h>
15#include <linux/vmacache.h>
16#include <linux/shm.h>
17#include <linux/mman.h>
18#include <linux/pagemap.h>
19#include <linux/swap.h>
20#include <linux/syscalls.h>
21#include <linux/capability.h>
22#include <linux/init.h>
23#include <linux/file.h>
24#include <linux/fs.h>
25#include <linux/personality.h>
26#include <linux/security.h>
27#include <linux/hugetlb.h>
28#include <linux/profile.h>
29#include <linux/export.h>
30#include <linux/mount.h>
31#include <linux/mempolicy.h>
32#include <linux/rmap.h>
33#include <linux/mmu_notifier.h>
34#include <linux/mmdebug.h>
35#include <linux/perf_event.h>
36#include <linux/audit.h>
37#include <linux/khugepaged.h>
38#include <linux/uprobes.h>
39#include <linux/rbtree_augmented.h>
40#include <linux/sched/sysctl.h>
41#include <linux/notifier.h>
42#include <linux/memory.h>
43#include <linux/printk.h>
44#include <linux/userfaultfd_k.h>
45
46#include <asm/uaccess.h>
47#include <asm/cacheflush.h>
48#include <asm/tlb.h>
49#include <asm/mmu_context.h>
50
51#include "internal.h"
52
53#ifndef arch_mmap_check
54#define arch_mmap_check(addr, len, flags) (0)
55#endif
56
57#ifndef arch_rebalance_pgtables
58#define arch_rebalance_pgtables(addr, len) (addr)
59#endif
60
61static void unmap_region(struct mm_struct *mm,
62 struct vm_area_struct *vma, struct vm_area_struct *prev,
63 unsigned long start, unsigned long end);
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80pgprot_t protection_map[16] = {
81 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
82 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
83};
84
85pgprot_t vm_get_page_prot(unsigned long vm_flags)
86{
87 return __pgprot(pgprot_val(protection_map[vm_flags &
88 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
89 pgprot_val(arch_vm_get_page_prot(vm_flags)));
90}
91EXPORT_SYMBOL(vm_get_page_prot);
92
93static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
94{
95 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
96}
97
98
99void vma_set_page_prot(struct vm_area_struct *vma)
100{
101 unsigned long vm_flags = vma->vm_flags;
102
103 vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
104 if (vma_wants_writenotify(vma)) {
105 vm_flags &= ~VM_SHARED;
106 vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
107 vm_flags);
108 }
109}
110
111
112int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
113int sysctl_overcommit_ratio __read_mostly = 50;
114unsigned long sysctl_overcommit_kbytes __read_mostly;
115int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
116unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
117unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
118
119
120
121
122struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
123
124
125
126
127
128
129
130
131
132unsigned long vm_memory_committed(void)
133{
134 return percpu_counter_read_positive(&vm_committed_as);
135}
136EXPORT_SYMBOL_GPL(vm_memory_committed);
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
155{
156 long free, allowed, reserve;
157
158 VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
159 -(s64)vm_committed_as_batch * num_online_cpus(),
160 "memory commitment underflow");
161
162 vm_acct_memory(pages);
163
164
165
166
167 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
168 return 0;
169
170 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
171 free = global_page_state(NR_FREE_PAGES);
172 free += global_page_state(NR_FILE_PAGES);
173
174
175
176
177
178
179
180 free -= global_page_state(NR_SHMEM);
181
182 free += get_nr_swap_pages();
183
184
185
186
187
188
189
190 free += global_page_state(NR_SLAB_RECLAIMABLE);
191
192
193
194
195 if (free <= totalreserve_pages)
196 goto error;
197 else
198 free -= totalreserve_pages;
199
200
201
202
203 if (!cap_sys_admin)
204 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
205
206 if (free > pages)
207 return 0;
208
209 goto error;
210 }
211
212 allowed = vm_commit_limit();
213
214
215
216 if (!cap_sys_admin)
217 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
218
219
220
221
222 if (mm) {
223 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
224 allowed -= min_t(long, mm->total_vm / 32, reserve);
225 }
226
227 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
228 return 0;
229error:
230 vm_unacct_memory(pages);
231
232 return -ENOMEM;
233}
234
235
236
237
238static void __remove_shared_vm_struct(struct vm_area_struct *vma,
239 struct file *file, struct address_space *mapping)
240{
241 if (vma->vm_flags & VM_DENYWRITE)
242 atomic_inc(&file_inode(file)->i_writecount);
243 if (vma->vm_flags & VM_SHARED)
244 mapping_unmap_writable(mapping);
245
246 flush_dcache_mmap_lock(mapping);
247 vma_interval_tree_remove(vma, &mapping->i_mmap);
248 flush_dcache_mmap_unlock(mapping);
249}
250
251
252
253
254
255void unlink_file_vma(struct vm_area_struct *vma)
256{
257 struct file *file = vma->vm_file;
258
259 if (file) {
260 struct address_space *mapping = file->f_mapping;
261 i_mmap_lock_write(mapping);
262 __remove_shared_vm_struct(vma, file, mapping);
263 i_mmap_unlock_write(mapping);
264 }
265}
266
267
268
269
270static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
271{
272 struct vm_area_struct *next = vma->vm_next;
273
274 might_sleep();
275 if (vma->vm_ops && vma->vm_ops->close)
276 vma->vm_ops->close(vma);
277 if (vma->vm_file)
278 fput(vma->vm_file);
279 mpol_put(vma_policy(vma));
280 kmem_cache_free(vm_area_cachep, vma);
281 return next;
282}
283
284static unsigned long do_brk(unsigned long addr, unsigned long len);
285
286SYSCALL_DEFINE1(brk, unsigned long, brk)
287{
288 unsigned long retval;
289 unsigned long newbrk, oldbrk;
290 struct mm_struct *mm = current->mm;
291 unsigned long min_brk;
292 bool populate;
293
294 down_write(&mm->mmap_sem);
295
296#ifdef CONFIG_COMPAT_BRK
297
298
299
300
301
302 if (current->brk_randomized)
303 min_brk = mm->start_brk;
304 else
305 min_brk = mm->end_data;
306#else
307 min_brk = mm->start_brk;
308#endif
309 if (brk < min_brk)
310 goto out;
311
312
313
314
315
316
317
318 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
319 mm->end_data, mm->start_data))
320 goto out;
321
322 newbrk = PAGE_ALIGN(brk);
323 oldbrk = PAGE_ALIGN(mm->brk);
324 if (oldbrk == newbrk)
325 goto set_brk;
326
327
328 if (brk <= mm->brk) {
329 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
330 goto set_brk;
331 goto out;
332 }
333
334
335 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
336 goto out;
337
338
339 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
340 goto out;
341
342set_brk:
343 mm->brk = brk;
344 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
345 up_write(&mm->mmap_sem);
346 if (populate)
347 mm_populate(oldbrk, newbrk - oldbrk);
348 return brk;
349
350out:
351 retval = mm->brk;
352 up_write(&mm->mmap_sem);
353 return retval;
354}
355
356static long vma_compute_subtree_gap(struct vm_area_struct *vma)
357{
358 unsigned long max, subtree_gap;
359 max = vma->vm_start;
360 if (vma->vm_prev)
361 max -= vma->vm_prev->vm_end;
362 if (vma->vm_rb.rb_left) {
363 subtree_gap = rb_entry(vma->vm_rb.rb_left,
364 struct vm_area_struct, vm_rb)->rb_subtree_gap;
365 if (subtree_gap > max)
366 max = subtree_gap;
367 }
368 if (vma->vm_rb.rb_right) {
369 subtree_gap = rb_entry(vma->vm_rb.rb_right,
370 struct vm_area_struct, vm_rb)->rb_subtree_gap;
371 if (subtree_gap > max)
372 max = subtree_gap;
373 }
374 return max;
375}
376
377#ifdef CONFIG_DEBUG_VM_RB
378static int browse_rb(struct rb_root *root)
379{
380 int i = 0, j, bug = 0;
381 struct rb_node *nd, *pn = NULL;
382 unsigned long prev = 0, pend = 0;
383
384 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
385 struct vm_area_struct *vma;
386 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
387 if (vma->vm_start < prev) {
388 pr_emerg("vm_start %lx < prev %lx\n",
389 vma->vm_start, prev);
390 bug = 1;
391 }
392 if (vma->vm_start < pend) {
393 pr_emerg("vm_start %lx < pend %lx\n",
394 vma->vm_start, pend);
395 bug = 1;
396 }
397 if (vma->vm_start > vma->vm_end) {
398 pr_emerg("vm_start %lx > vm_end %lx\n",
399 vma->vm_start, vma->vm_end);
400 bug = 1;
401 }
402 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
403 pr_emerg("free gap %lx, correct %lx\n",
404 vma->rb_subtree_gap,
405 vma_compute_subtree_gap(vma));
406 bug = 1;
407 }
408 i++;
409 pn = nd;
410 prev = vma->vm_start;
411 pend = vma->vm_end;
412 }
413 j = 0;
414 for (nd = pn; nd; nd = rb_prev(nd))
415 j++;
416 if (i != j) {
417 pr_emerg("backwards %d, forwards %d\n", j, i);
418 bug = 1;
419 }
420 return bug ? -1 : i;
421}
422
423static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
424{
425 struct rb_node *nd;
426
427 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
428 struct vm_area_struct *vma;
429 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
430 VM_BUG_ON_VMA(vma != ignore &&
431 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
432 vma);
433 }
434}
435
436static void validate_mm(struct mm_struct *mm)
437{
438 int bug = 0;
439 int i = 0;
440 unsigned long highest_address = 0;
441 struct vm_area_struct *vma = mm->mmap;
442
443 while (vma) {
444 struct anon_vma_chain *avc;
445
446 vma_lock_anon_vma(vma);
447 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
448 anon_vma_interval_tree_verify(avc);
449 vma_unlock_anon_vma(vma);
450 highest_address = vma->vm_end;
451 vma = vma->vm_next;
452 i++;
453 }
454 if (i != mm->map_count) {
455 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
456 bug = 1;
457 }
458 if (highest_address != mm->highest_vm_end) {
459 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
460 mm->highest_vm_end, highest_address);
461 bug = 1;
462 }
463 i = browse_rb(&mm->mm_rb);
464 if (i != mm->map_count) {
465 if (i != -1)
466 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
467 bug = 1;
468 }
469 VM_BUG_ON_MM(bug, mm);
470}
471#else
472#define validate_mm_rb(root, ignore) do { } while (0)
473#define validate_mm(mm) do { } while (0)
474#endif
475
476RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
477 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
478
479
480
481
482
483
484static void vma_gap_update(struct vm_area_struct *vma)
485{
486
487
488
489
490 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
491}
492
493static inline void vma_rb_insert(struct vm_area_struct *vma,
494 struct rb_root *root)
495{
496
497 validate_mm_rb(root, NULL);
498
499 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
500}
501
502static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
503{
504
505
506
507
508 validate_mm_rb(root, vma);
509
510
511
512
513
514
515 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
516}
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532static inline void
533anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
534{
535 struct anon_vma_chain *avc;
536
537 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
538 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
539}
540
541static inline void
542anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
543{
544 struct anon_vma_chain *avc;
545
546 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
547 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
548}
549
550static int find_vma_links(struct mm_struct *mm, unsigned long addr,
551 unsigned long end, struct vm_area_struct **pprev,
552 struct rb_node ***rb_link, struct rb_node **rb_parent)
553{
554 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
555
556 __rb_link = &mm->mm_rb.rb_node;
557 rb_prev = __rb_parent = NULL;
558
559 while (*__rb_link) {
560 struct vm_area_struct *vma_tmp;
561
562 __rb_parent = *__rb_link;
563 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
564
565 if (vma_tmp->vm_end > addr) {
566
567 if (vma_tmp->vm_start < end)
568 return -ENOMEM;
569 __rb_link = &__rb_parent->rb_left;
570 } else {
571 rb_prev = __rb_parent;
572 __rb_link = &__rb_parent->rb_right;
573 }
574 }
575
576 *pprev = NULL;
577 if (rb_prev)
578 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
579 *rb_link = __rb_link;
580 *rb_parent = __rb_parent;
581 return 0;
582}
583
584static unsigned long count_vma_pages_range(struct mm_struct *mm,
585 unsigned long addr, unsigned long end)
586{
587 unsigned long nr_pages = 0;
588 struct vm_area_struct *vma;
589
590
591 vma = find_vma_intersection(mm, addr, end);
592 if (!vma)
593 return 0;
594
595 nr_pages = (min(end, vma->vm_end) -
596 max(addr, vma->vm_start)) >> PAGE_SHIFT;
597
598
599 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
600 unsigned long overlap_len;
601
602 if (vma->vm_start > end)
603 break;
604
605 overlap_len = min(end, vma->vm_end) - vma->vm_start;
606 nr_pages += overlap_len >> PAGE_SHIFT;
607 }
608
609 return nr_pages;
610}
611
612void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
613 struct rb_node **rb_link, struct rb_node *rb_parent)
614{
615
616 if (vma->vm_next)
617 vma_gap_update(vma->vm_next);
618 else
619 mm->highest_vm_end = vma->vm_end;
620
621
622
623
624
625
626
627
628
629
630 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
631 vma->rb_subtree_gap = 0;
632 vma_gap_update(vma);
633 vma_rb_insert(vma, &mm->mm_rb);
634}
635
636static void __vma_link_file(struct vm_area_struct *vma)
637{
638 struct file *file;
639
640 file = vma->vm_file;
641 if (file) {
642 struct address_space *mapping = file->f_mapping;
643
644 if (vma->vm_flags & VM_DENYWRITE)
645 atomic_dec(&file_inode(file)->i_writecount);
646 if (vma->vm_flags & VM_SHARED)
647 atomic_inc(&mapping->i_mmap_writable);
648
649 flush_dcache_mmap_lock(mapping);
650 vma_interval_tree_insert(vma, &mapping->i_mmap);
651 flush_dcache_mmap_unlock(mapping);
652 }
653}
654
655static void
656__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
657 struct vm_area_struct *prev, struct rb_node **rb_link,
658 struct rb_node *rb_parent)
659{
660 __vma_link_list(mm, vma, prev, rb_parent);
661 __vma_link_rb(mm, vma, rb_link, rb_parent);
662}
663
664static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
665 struct vm_area_struct *prev, struct rb_node **rb_link,
666 struct rb_node *rb_parent)
667{
668 struct address_space *mapping = NULL;
669
670 if (vma->vm_file) {
671 mapping = vma->vm_file->f_mapping;
672 i_mmap_lock_write(mapping);
673 }
674
675 __vma_link(mm, vma, prev, rb_link, rb_parent);
676 __vma_link_file(vma);
677
678 if (mapping)
679 i_mmap_unlock_write(mapping);
680
681 mm->map_count++;
682 validate_mm(mm);
683}
684
685
686
687
688
689static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
690{
691 struct vm_area_struct *prev;
692 struct rb_node **rb_link, *rb_parent;
693
694 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
695 &prev, &rb_link, &rb_parent))
696 BUG();
697 __vma_link(mm, vma, prev, rb_link, rb_parent);
698 mm->map_count++;
699}
700
701static inline void
702__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
703 struct vm_area_struct *prev)
704{
705 struct vm_area_struct *next;
706
707 vma_rb_erase(vma, &mm->mm_rb);
708 prev->vm_next = next = vma->vm_next;
709 if (next)
710 next->vm_prev = prev;
711
712
713 vmacache_invalidate(mm);
714}
715
716
717
718
719
720
721
722
723int vma_adjust(struct vm_area_struct *vma, unsigned long start,
724 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
725{
726 struct mm_struct *mm = vma->vm_mm;
727 struct vm_area_struct *next = vma->vm_next;
728 struct vm_area_struct *importer = NULL;
729 struct address_space *mapping = NULL;
730 struct rb_root *root = NULL;
731 struct anon_vma *anon_vma = NULL;
732 struct file *file = vma->vm_file;
733 bool start_changed = false, end_changed = false;
734 long adjust_next = 0;
735 int remove_next = 0;
736
737 if (next && !insert) {
738 struct vm_area_struct *exporter = NULL;
739
740 if (end >= next->vm_end) {
741
742
743
744
745again: remove_next = 1 + (end > next->vm_end);
746 end = next->vm_end;
747 exporter = next;
748 importer = vma;
749 } else if (end > next->vm_start) {
750
751
752
753
754 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
755 exporter = next;
756 importer = vma;
757 } else if (end < vma->vm_end) {
758
759
760
761
762
763 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
764 exporter = vma;
765 importer = next;
766 }
767
768
769
770
771
772
773 if (exporter && exporter->anon_vma && !importer->anon_vma) {
774 int error;
775
776 importer->anon_vma = exporter->anon_vma;
777 error = anon_vma_clone(importer, exporter);
778 if (error)
779 return error;
780 }
781 }
782
783 if (file) {
784 mapping = file->f_mapping;
785 root = &mapping->i_mmap;
786 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
787
788 if (adjust_next)
789 uprobe_munmap(next, next->vm_start, next->vm_end);
790
791 i_mmap_lock_write(mapping);
792 if (insert) {
793
794
795
796
797
798
799 __vma_link_file(insert);
800 }
801 }
802
803 vma_adjust_trans_huge(vma, start, end, adjust_next);
804
805 anon_vma = vma->anon_vma;
806 if (!anon_vma && adjust_next)
807 anon_vma = next->anon_vma;
808 if (anon_vma) {
809 VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
810 anon_vma != next->anon_vma, next);
811 anon_vma_lock_write(anon_vma);
812 anon_vma_interval_tree_pre_update_vma(vma);
813 if (adjust_next)
814 anon_vma_interval_tree_pre_update_vma(next);
815 }
816
817 if (root) {
818 flush_dcache_mmap_lock(mapping);
819 vma_interval_tree_remove(vma, root);
820 if (adjust_next)
821 vma_interval_tree_remove(next, root);
822 }
823
824 if (start != vma->vm_start) {
825 vma->vm_start = start;
826 start_changed = true;
827 }
828 if (end != vma->vm_end) {
829 vma->vm_end = end;
830 end_changed = true;
831 }
832 vma->vm_pgoff = pgoff;
833 if (adjust_next) {
834 next->vm_start += adjust_next << PAGE_SHIFT;
835 next->vm_pgoff += adjust_next;
836 }
837
838 if (root) {
839 if (adjust_next)
840 vma_interval_tree_insert(next, root);
841 vma_interval_tree_insert(vma, root);
842 flush_dcache_mmap_unlock(mapping);
843 }
844
845 if (remove_next) {
846
847
848
849
850 __vma_unlink(mm, next, vma);
851 if (file)
852 __remove_shared_vm_struct(next, file, mapping);
853 } else if (insert) {
854
855
856
857
858
859 __insert_vm_struct(mm, insert);
860 } else {
861 if (start_changed)
862 vma_gap_update(vma);
863 if (end_changed) {
864 if (!next)
865 mm->highest_vm_end = end;
866 else if (!adjust_next)
867 vma_gap_update(next);
868 }
869 }
870
871 if (anon_vma) {
872 anon_vma_interval_tree_post_update_vma(vma);
873 if (adjust_next)
874 anon_vma_interval_tree_post_update_vma(next);
875 anon_vma_unlock_write(anon_vma);
876 }
877 if (mapping)
878 i_mmap_unlock_write(mapping);
879
880 if (root) {
881 uprobe_mmap(vma);
882
883 if (adjust_next)
884 uprobe_mmap(next);
885 }
886
887 if (remove_next) {
888 if (file) {
889 uprobe_munmap(next, next->vm_start, next->vm_end);
890 fput(file);
891 }
892 if (next->anon_vma)
893 anon_vma_merge(vma, next);
894 mm->map_count--;
895 mpol_put(vma_policy(next));
896 kmem_cache_free(vm_area_cachep, next);
897
898
899
900
901
902 next = vma->vm_next;
903 if (remove_next == 2)
904 goto again;
905 else if (next)
906 vma_gap_update(next);
907 else
908 mm->highest_vm_end = end;
909 }
910 if (insert && file)
911 uprobe_mmap(insert);
912
913 validate_mm(mm);
914
915 return 0;
916}
917
918
919
920
921
922static inline int is_mergeable_vma(struct vm_area_struct *vma,
923 struct file *file, unsigned long vm_flags,
924 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
925{
926
927
928
929
930
931
932
933
934 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
935 return 0;
936 if (vma->vm_file != file)
937 return 0;
938 if (vma->vm_ops && vma->vm_ops->close)
939 return 0;
940 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
941 return 0;
942 return 1;
943}
944
945static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
946 struct anon_vma *anon_vma2,
947 struct vm_area_struct *vma)
948{
949
950
951
952
953 if ((!anon_vma1 || !anon_vma2) && (!vma ||
954 list_is_singular(&vma->anon_vma_chain)))
955 return 1;
956 return anon_vma1 == anon_vma2;
957}
958
959
960
961
962
963
964
965
966
967
968
969
970static int
971can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
972 struct anon_vma *anon_vma, struct file *file,
973 pgoff_t vm_pgoff,
974 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
975{
976 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
977 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
978 if (vma->vm_pgoff == vm_pgoff)
979 return 1;
980 }
981 return 0;
982}
983
984
985
986
987
988
989
990
991static int
992can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
993 struct anon_vma *anon_vma, struct file *file,
994 pgoff_t vm_pgoff,
995 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
996{
997 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
998 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
999 pgoff_t vm_pglen;
1000 vm_pglen = vma_pages(vma);
1001 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1002 return 1;
1003 }
1004 return 0;
1005}
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036struct vm_area_struct *vma_merge(struct mm_struct *mm,
1037 struct vm_area_struct *prev, unsigned long addr,
1038 unsigned long end, unsigned long vm_flags,
1039 struct anon_vma *anon_vma, struct file *file,
1040 pgoff_t pgoff, struct mempolicy *policy,
1041 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1042{
1043 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1044 struct vm_area_struct *area, *next;
1045 int err;
1046
1047
1048
1049
1050
1051 if (vm_flags & VM_SPECIAL)
1052 return NULL;
1053
1054 if (prev)
1055 next = prev->vm_next;
1056 else
1057 next = mm->mmap;
1058 area = next;
1059 if (next && next->vm_end == end)
1060 next = next->vm_next;
1061
1062
1063
1064
1065 if (prev && prev->vm_end == addr &&
1066 mpol_equal(vma_policy(prev), policy) &&
1067 can_vma_merge_after(prev, vm_flags,
1068 anon_vma, file, pgoff,
1069 vm_userfaultfd_ctx)) {
1070
1071
1072
1073 if (next && end == next->vm_start &&
1074 mpol_equal(policy, vma_policy(next)) &&
1075 can_vma_merge_before(next, vm_flags,
1076 anon_vma, file,
1077 pgoff+pglen,
1078 vm_userfaultfd_ctx) &&
1079 is_mergeable_anon_vma(prev->anon_vma,
1080 next->anon_vma, NULL)) {
1081
1082 err = vma_adjust(prev, prev->vm_start,
1083 next->vm_end, prev->vm_pgoff, NULL);
1084 } else
1085 err = vma_adjust(prev, prev->vm_start,
1086 end, prev->vm_pgoff, NULL);
1087 if (err)
1088 return NULL;
1089 khugepaged_enter_vma_merge(prev, vm_flags);
1090 return prev;
1091 }
1092
1093
1094
1095
1096 if (next && end == next->vm_start &&
1097 mpol_equal(policy, vma_policy(next)) &&
1098 can_vma_merge_before(next, vm_flags,
1099 anon_vma, file, pgoff+pglen,
1100 vm_userfaultfd_ctx)) {
1101 if (prev && addr < prev->vm_end)
1102 err = vma_adjust(prev, prev->vm_start,
1103 addr, prev->vm_pgoff, NULL);
1104 else
1105 err = vma_adjust(area, addr, next->vm_end,
1106 next->vm_pgoff - pglen, NULL);
1107 if (err)
1108 return NULL;
1109 khugepaged_enter_vma_merge(area, vm_flags);
1110 return area;
1111 }
1112
1113 return NULL;
1114}
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1130{
1131 return a->vm_end == b->vm_start &&
1132 mpol_equal(vma_policy(a), vma_policy(b)) &&
1133 a->vm_file == b->vm_file &&
1134 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1135 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1136}
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1161{
1162 if (anon_vma_compatible(a, b)) {
1163 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1164
1165 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1166 return anon_vma;
1167 }
1168 return NULL;
1169}
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1180{
1181 struct anon_vma *anon_vma;
1182 struct vm_area_struct *near;
1183
1184 near = vma->vm_next;
1185 if (!near)
1186 goto try_prev;
1187
1188 anon_vma = reusable_anon_vma(near, vma, near);
1189 if (anon_vma)
1190 return anon_vma;
1191try_prev:
1192 near = vma->vm_prev;
1193 if (!near)
1194 goto none;
1195
1196 anon_vma = reusable_anon_vma(near, near, vma);
1197 if (anon_vma)
1198 return anon_vma;
1199none:
1200
1201
1202
1203
1204
1205
1206
1207
1208 return NULL;
1209}
1210
1211#ifdef CONFIG_PROC_FS
1212void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1213 struct file *file, long pages)
1214{
1215 const unsigned long stack_flags
1216 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1217
1218 mm->total_vm += pages;
1219
1220 if (file) {
1221 mm->shared_vm += pages;
1222 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1223 mm->exec_vm += pages;
1224 } else if (flags & stack_flags)
1225 mm->stack_vm += pages;
1226}
1227#endif
1228
1229
1230
1231
1232
1233static inline unsigned long round_hint_to_min(unsigned long hint)
1234{
1235 hint &= PAGE_MASK;
1236 if (((void *)hint != NULL) &&
1237 (hint < mmap_min_addr))
1238 return PAGE_ALIGN(mmap_min_addr);
1239 return hint;
1240}
1241
1242static inline int mlock_future_check(struct mm_struct *mm,
1243 unsigned long flags,
1244 unsigned long len)
1245{
1246 unsigned long locked, lock_limit;
1247
1248
1249 if (flags & VM_LOCKED) {
1250 locked = len >> PAGE_SHIFT;
1251 locked += mm->locked_vm;
1252 lock_limit = rlimit(RLIMIT_MEMLOCK);
1253 lock_limit >>= PAGE_SHIFT;
1254 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1255 return -EAGAIN;
1256 }
1257 return 0;
1258}
1259
1260
1261
1262
1263unsigned long do_mmap(struct file *file, unsigned long addr,
1264 unsigned long len, unsigned long prot,
1265 unsigned long flags, vm_flags_t vm_flags,
1266 unsigned long pgoff, unsigned long *populate)
1267{
1268 struct mm_struct *mm = current->mm;
1269
1270 *populate = 0;
1271
1272 if (!len)
1273 return -EINVAL;
1274
1275
1276
1277
1278
1279
1280
1281 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1282 if (!(file && path_noexec(&file->f_path)))
1283 prot |= PROT_EXEC;
1284
1285 if (!(flags & MAP_FIXED))
1286 addr = round_hint_to_min(addr);
1287
1288
1289 len = PAGE_ALIGN(len);
1290 if (!len)
1291 return -ENOMEM;
1292
1293
1294 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1295 return -EOVERFLOW;
1296
1297
1298 if (mm->map_count > sysctl_max_map_count)
1299 return -ENOMEM;
1300
1301
1302
1303
1304 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1305 if (addr & ~PAGE_MASK)
1306 return addr;
1307
1308
1309
1310
1311
1312 vm_flags |= calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1313 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1314
1315 if (flags & MAP_LOCKED)
1316 if (!can_do_mlock())
1317 return -EPERM;
1318
1319 if (mlock_future_check(mm, vm_flags, len))
1320 return -EAGAIN;
1321
1322 if (file) {
1323 struct inode *inode = file_inode(file);
1324
1325 switch (flags & MAP_TYPE) {
1326 case MAP_SHARED:
1327 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1328 return -EACCES;
1329
1330
1331
1332
1333
1334 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1335 return -EACCES;
1336
1337
1338
1339
1340 if (locks_verify_locked(file))
1341 return -EAGAIN;
1342
1343 vm_flags |= VM_SHARED | VM_MAYSHARE;
1344 if (!(file->f_mode & FMODE_WRITE))
1345 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1346
1347
1348 case MAP_PRIVATE:
1349 if (!(file->f_mode & FMODE_READ))
1350 return -EACCES;
1351 if (path_noexec(&file->f_path)) {
1352 if (vm_flags & VM_EXEC)
1353 return -EPERM;
1354 vm_flags &= ~VM_MAYEXEC;
1355 }
1356
1357 if (!file->f_op->mmap)
1358 return -ENODEV;
1359 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1360 return -EINVAL;
1361 break;
1362
1363 default:
1364 return -EINVAL;
1365 }
1366 } else {
1367 switch (flags & MAP_TYPE) {
1368 case MAP_SHARED:
1369 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1370 return -EINVAL;
1371
1372
1373
1374 pgoff = 0;
1375 vm_flags |= VM_SHARED | VM_MAYSHARE;
1376 break;
1377 case MAP_PRIVATE:
1378
1379
1380
1381 pgoff = addr >> PAGE_SHIFT;
1382 break;
1383 default:
1384 return -EINVAL;
1385 }
1386 }
1387
1388
1389
1390
1391
1392 if (flags & MAP_NORESERVE) {
1393
1394 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1395 vm_flags |= VM_NORESERVE;
1396
1397
1398 if (file && is_file_hugepages(file))
1399 vm_flags |= VM_NORESERVE;
1400 }
1401
1402 addr = mmap_region(file, addr, len, vm_flags, pgoff);
1403 if (!IS_ERR_VALUE(addr) &&
1404 ((vm_flags & VM_LOCKED) ||
1405 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1406 *populate = len;
1407 return addr;
1408}
1409
1410SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1411 unsigned long, prot, unsigned long, flags,
1412 unsigned long, fd, unsigned long, pgoff)
1413{
1414 struct file *file = NULL;
1415 unsigned long retval = -EBADF;
1416
1417 if (!(flags & MAP_ANONYMOUS)) {
1418 audit_mmap_fd(fd, flags);
1419 file = fget(fd);
1420 if (!file)
1421 goto out;
1422 if (is_file_hugepages(file))
1423 len = ALIGN(len, huge_page_size(hstate_file(file)));
1424 retval = -EINVAL;
1425 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1426 goto out_fput;
1427 } else if (flags & MAP_HUGETLB) {
1428 struct user_struct *user = NULL;
1429 struct hstate *hs;
1430
1431 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
1432 if (!hs)
1433 return -EINVAL;
1434
1435 len = ALIGN(len, huge_page_size(hs));
1436
1437
1438
1439
1440
1441
1442 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1443 VM_NORESERVE,
1444 &user, HUGETLB_ANONHUGE_INODE,
1445 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1446 if (IS_ERR(file))
1447 return PTR_ERR(file);
1448 }
1449
1450 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1451
1452 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1453out_fput:
1454 if (file)
1455 fput(file);
1456out:
1457 return retval;
1458}
1459
1460#ifdef __ARCH_WANT_SYS_OLD_MMAP
1461struct mmap_arg_struct {
1462 unsigned long addr;
1463 unsigned long len;
1464 unsigned long prot;
1465 unsigned long flags;
1466 unsigned long fd;
1467 unsigned long offset;
1468};
1469
1470SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1471{
1472 struct mmap_arg_struct a;
1473
1474 if (copy_from_user(&a, arg, sizeof(a)))
1475 return -EFAULT;
1476 if (a.offset & ~PAGE_MASK)
1477 return -EINVAL;
1478
1479 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1480 a.offset >> PAGE_SHIFT);
1481}
1482#endif
1483
1484
1485
1486
1487
1488
1489
1490int vma_wants_writenotify(struct vm_area_struct *vma)
1491{
1492 vm_flags_t vm_flags = vma->vm_flags;
1493 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1494
1495
1496 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1497 return 0;
1498
1499
1500 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1501 return 1;
1502
1503
1504
1505 if (pgprot_val(vma->vm_page_prot) !=
1506 pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
1507 return 0;
1508
1509
1510 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1511 return 1;
1512
1513
1514 if (vm_flags & VM_PFNMAP)
1515 return 0;
1516
1517
1518 return vma->vm_file && vma->vm_file->f_mapping &&
1519 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1520}
1521
1522
1523
1524
1525
1526static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1527{
1528
1529
1530
1531
1532 if (file && is_file_hugepages(file))
1533 return 0;
1534
1535 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1536}
1537
1538unsigned long mmap_region(struct file *file, unsigned long addr,
1539 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
1540{
1541 struct mm_struct *mm = current->mm;
1542 struct vm_area_struct *vma, *prev;
1543 int error;
1544 struct rb_node **rb_link, *rb_parent;
1545 unsigned long charged = 0;
1546
1547
1548 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1549 unsigned long nr_pages;
1550
1551
1552
1553
1554
1555 if (!(vm_flags & MAP_FIXED))
1556 return -ENOMEM;
1557
1558 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1559
1560 if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1561 return -ENOMEM;
1562 }
1563
1564
1565 error = -ENOMEM;
1566 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1567 &rb_parent)) {
1568 if (do_munmap(mm, addr, len))
1569 return -ENOMEM;
1570 }
1571
1572
1573
1574
1575 if (accountable_mapping(file, vm_flags)) {
1576 charged = len >> PAGE_SHIFT;
1577 if (security_vm_enough_memory_mm(mm, charged))
1578 return -ENOMEM;
1579 vm_flags |= VM_ACCOUNT;
1580 }
1581
1582
1583
1584
1585 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1586 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1587 if (vma)
1588 goto out;
1589
1590
1591
1592
1593
1594
1595 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1596 if (!vma) {
1597 error = -ENOMEM;
1598 goto unacct_error;
1599 }
1600
1601 vma->vm_mm = mm;
1602 vma->vm_start = addr;
1603 vma->vm_end = addr + len;
1604 vma->vm_flags = vm_flags;
1605 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1606 vma->vm_pgoff = pgoff;
1607 INIT_LIST_HEAD(&vma->anon_vma_chain);
1608
1609 if (file) {
1610 if (vm_flags & VM_DENYWRITE) {
1611 error = deny_write_access(file);
1612 if (error)
1613 goto free_vma;
1614 }
1615 if (vm_flags & VM_SHARED) {
1616 error = mapping_map_writable(file->f_mapping);
1617 if (error)
1618 goto allow_write_and_free_vma;
1619 }
1620
1621
1622
1623
1624
1625
1626 vma->vm_file = get_file(file);
1627 error = file->f_op->mmap(file, vma);
1628 if (error)
1629 goto unmap_and_free_vma;
1630
1631
1632
1633
1634
1635
1636
1637
1638 WARN_ON_ONCE(addr != vma->vm_start);
1639
1640 addr = vma->vm_start;
1641 vm_flags = vma->vm_flags;
1642 } else if (vm_flags & VM_SHARED) {
1643 error = shmem_zero_setup(vma);
1644 if (error)
1645 goto free_vma;
1646 }
1647
1648 vma_link(mm, vma, prev, rb_link, rb_parent);
1649
1650 if (file) {
1651 if (vm_flags & VM_SHARED)
1652 mapping_unmap_writable(file->f_mapping);
1653 if (vm_flags & VM_DENYWRITE)
1654 allow_write_access(file);
1655 }
1656 file = vma->vm_file;
1657out:
1658 perf_event_mmap(vma);
1659
1660 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1661 if (vm_flags & VM_LOCKED) {
1662 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1663 vma == get_gate_vma(current->mm)))
1664 mm->locked_vm += (len >> PAGE_SHIFT);
1665 else
1666 vma->vm_flags &= ~VM_LOCKED;
1667 }
1668
1669 if (file)
1670 uprobe_mmap(vma);
1671
1672
1673
1674
1675
1676
1677
1678
1679 vma->vm_flags |= VM_SOFTDIRTY;
1680
1681 vma_set_page_prot(vma);
1682
1683 return addr;
1684
1685unmap_and_free_vma:
1686 vma->vm_file = NULL;
1687 fput(file);
1688
1689
1690 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1691 charged = 0;
1692 if (vm_flags & VM_SHARED)
1693 mapping_unmap_writable(file->f_mapping);
1694allow_write_and_free_vma:
1695 if (vm_flags & VM_DENYWRITE)
1696 allow_write_access(file);
1697free_vma:
1698 kmem_cache_free(vm_area_cachep, vma);
1699unacct_error:
1700 if (charged)
1701 vm_unacct_memory(charged);
1702 return error;
1703}
1704
1705unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1706{
1707
1708
1709
1710
1711
1712
1713
1714
1715 struct mm_struct *mm = current->mm;
1716 struct vm_area_struct *vma;
1717 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1718
1719
1720 length = info->length + info->align_mask;
1721 if (length < info->length)
1722 return -ENOMEM;
1723
1724
1725 if (info->high_limit < length)
1726 return -ENOMEM;
1727 high_limit = info->high_limit - length;
1728
1729 if (info->low_limit > high_limit)
1730 return -ENOMEM;
1731 low_limit = info->low_limit + length;
1732
1733
1734 if (RB_EMPTY_ROOT(&mm->mm_rb))
1735 goto check_highest;
1736 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1737 if (vma->rb_subtree_gap < length)
1738 goto check_highest;
1739
1740 while (true) {
1741
1742 gap_end = vma->vm_start;
1743 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1744 struct vm_area_struct *left =
1745 rb_entry(vma->vm_rb.rb_left,
1746 struct vm_area_struct, vm_rb);
1747 if (left->rb_subtree_gap >= length) {
1748 vma = left;
1749 continue;
1750 }
1751 }
1752
1753 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1754check_current:
1755
1756 if (gap_start > high_limit)
1757 return -ENOMEM;
1758 if (gap_end >= low_limit && gap_end - gap_start >= length)
1759 goto found;
1760
1761
1762 if (vma->vm_rb.rb_right) {
1763 struct vm_area_struct *right =
1764 rb_entry(vma->vm_rb.rb_right,
1765 struct vm_area_struct, vm_rb);
1766 if (right->rb_subtree_gap >= length) {
1767 vma = right;
1768 continue;
1769 }
1770 }
1771
1772
1773 while (true) {
1774 struct rb_node *prev = &vma->vm_rb;
1775 if (!rb_parent(prev))
1776 goto check_highest;
1777 vma = rb_entry(rb_parent(prev),
1778 struct vm_area_struct, vm_rb);
1779 if (prev == vma->vm_rb.rb_left) {
1780 gap_start = vma->vm_prev->vm_end;
1781 gap_end = vma->vm_start;
1782 goto check_current;
1783 }
1784 }
1785 }
1786
1787check_highest:
1788
1789 gap_start = mm->highest_vm_end;
1790 gap_end = ULONG_MAX;
1791 if (gap_start > high_limit)
1792 return -ENOMEM;
1793
1794found:
1795
1796 if (gap_start < info->low_limit)
1797 gap_start = info->low_limit;
1798
1799
1800 gap_start += (info->align_offset - gap_start) & info->align_mask;
1801
1802 VM_BUG_ON(gap_start + info->length > info->high_limit);
1803 VM_BUG_ON(gap_start + info->length > gap_end);
1804 return gap_start;
1805}
1806
1807unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1808{
1809 struct mm_struct *mm = current->mm;
1810 struct vm_area_struct *vma;
1811 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1812
1813
1814 length = info->length + info->align_mask;
1815 if (length < info->length)
1816 return -ENOMEM;
1817
1818
1819
1820
1821
1822 gap_end = info->high_limit;
1823 if (gap_end < length)
1824 return -ENOMEM;
1825 high_limit = gap_end - length;
1826
1827 if (info->low_limit > high_limit)
1828 return -ENOMEM;
1829 low_limit = info->low_limit + length;
1830
1831
1832 gap_start = mm->highest_vm_end;
1833 if (gap_start <= high_limit)
1834 goto found_highest;
1835
1836
1837 if (RB_EMPTY_ROOT(&mm->mm_rb))
1838 return -ENOMEM;
1839 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1840 if (vma->rb_subtree_gap < length)
1841 return -ENOMEM;
1842
1843 while (true) {
1844
1845 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1846 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1847 struct vm_area_struct *right =
1848 rb_entry(vma->vm_rb.rb_right,
1849 struct vm_area_struct, vm_rb);
1850 if (right->rb_subtree_gap >= length) {
1851 vma = right;
1852 continue;
1853 }
1854 }
1855
1856check_current:
1857
1858 gap_end = vma->vm_start;
1859 if (gap_end < low_limit)
1860 return -ENOMEM;
1861 if (gap_start <= high_limit && gap_end - gap_start >= length)
1862 goto found;
1863
1864
1865 if (vma->vm_rb.rb_left) {
1866 struct vm_area_struct *left =
1867 rb_entry(vma->vm_rb.rb_left,
1868 struct vm_area_struct, vm_rb);
1869 if (left->rb_subtree_gap >= length) {
1870 vma = left;
1871 continue;
1872 }
1873 }
1874
1875
1876 while (true) {
1877 struct rb_node *prev = &vma->vm_rb;
1878 if (!rb_parent(prev))
1879 return -ENOMEM;
1880 vma = rb_entry(rb_parent(prev),
1881 struct vm_area_struct, vm_rb);
1882 if (prev == vma->vm_rb.rb_right) {
1883 gap_start = vma->vm_prev ?
1884 vma->vm_prev->vm_end : 0;
1885 goto check_current;
1886 }
1887 }
1888 }
1889
1890found:
1891
1892 if (gap_end > info->high_limit)
1893 gap_end = info->high_limit;
1894
1895found_highest:
1896
1897 gap_end -= info->length;
1898 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1899
1900 VM_BUG_ON(gap_end < info->low_limit);
1901 VM_BUG_ON(gap_end < gap_start);
1902 return gap_end;
1903}
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916#ifndef HAVE_ARCH_UNMAPPED_AREA
1917unsigned long
1918arch_get_unmapped_area(struct file *filp, unsigned long addr,
1919 unsigned long len, unsigned long pgoff, unsigned long flags)
1920{
1921 struct mm_struct *mm = current->mm;
1922 struct vm_area_struct *vma;
1923 struct vm_unmapped_area_info info;
1924
1925 if (len > TASK_SIZE - mmap_min_addr)
1926 return -ENOMEM;
1927
1928 if (flags & MAP_FIXED)
1929 return addr;
1930
1931 if (addr) {
1932 addr = PAGE_ALIGN(addr);
1933 vma = find_vma(mm, addr);
1934 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1935 (!vma || addr + len <= vma->vm_start))
1936 return addr;
1937 }
1938
1939 info.flags = 0;
1940 info.length = len;
1941 info.low_limit = mm->mmap_base;
1942 info.high_limit = TASK_SIZE;
1943 info.align_mask = 0;
1944 return vm_unmapped_area(&info);
1945}
1946#endif
1947
1948
1949
1950
1951
1952#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1953unsigned long
1954arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1955 const unsigned long len, const unsigned long pgoff,
1956 const unsigned long flags)
1957{
1958 struct vm_area_struct *vma;
1959 struct mm_struct *mm = current->mm;
1960 unsigned long addr = addr0;
1961 struct vm_unmapped_area_info info;
1962
1963
1964 if (len > TASK_SIZE - mmap_min_addr)
1965 return -ENOMEM;
1966
1967 if (flags & MAP_FIXED)
1968 return addr;
1969
1970
1971 if (addr) {
1972 addr = PAGE_ALIGN(addr);
1973 vma = find_vma(mm, addr);
1974 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1975 (!vma || addr + len <= vma->vm_start))
1976 return addr;
1977 }
1978
1979 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
1980 info.length = len;
1981 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
1982 info.high_limit = mm->mmap_base;
1983 info.align_mask = 0;
1984 addr = vm_unmapped_area(&info);
1985
1986
1987
1988
1989
1990
1991
1992 if (addr & ~PAGE_MASK) {
1993 VM_BUG_ON(addr != -ENOMEM);
1994 info.flags = 0;
1995 info.low_limit = TASK_UNMAPPED_BASE;
1996 info.high_limit = TASK_SIZE;
1997 addr = vm_unmapped_area(&info);
1998 }
1999
2000 return addr;
2001}
2002#endif
2003
2004unsigned long
2005get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2006 unsigned long pgoff, unsigned long flags)
2007{
2008 unsigned long (*get_area)(struct file *, unsigned long,
2009 unsigned long, unsigned long, unsigned long);
2010
2011 unsigned long error = arch_mmap_check(addr, len, flags);
2012 if (error)
2013 return error;
2014
2015
2016 if (len > TASK_SIZE)
2017 return -ENOMEM;
2018
2019 get_area = current->mm->get_unmapped_area;
2020 if (file && file->f_op->get_unmapped_area)
2021 get_area = file->f_op->get_unmapped_area;
2022 addr = get_area(file, addr, len, pgoff, flags);
2023 if (IS_ERR_VALUE(addr))
2024 return addr;
2025
2026 if (addr > TASK_SIZE - len)
2027 return -ENOMEM;
2028 if (addr & ~PAGE_MASK)
2029 return -EINVAL;
2030
2031 addr = arch_rebalance_pgtables(addr, len);
2032 error = security_mmap_addr(addr);
2033 return error ? error : addr;
2034}
2035
2036EXPORT_SYMBOL(get_unmapped_area);
2037
2038
2039struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2040{
2041 struct rb_node *rb_node;
2042 struct vm_area_struct *vma;
2043
2044
2045 vma = vmacache_find(mm, addr);
2046 if (likely(vma))
2047 return vma;
2048
2049 rb_node = mm->mm_rb.rb_node;
2050 vma = NULL;
2051
2052 while (rb_node) {
2053 struct vm_area_struct *tmp;
2054
2055 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2056
2057 if (tmp->vm_end > addr) {
2058 vma = tmp;
2059 if (tmp->vm_start <= addr)
2060 break;
2061 rb_node = rb_node->rb_left;
2062 } else
2063 rb_node = rb_node->rb_right;
2064 }
2065
2066 if (vma)
2067 vmacache_update(addr, vma);
2068 return vma;
2069}
2070
2071EXPORT_SYMBOL(find_vma);
2072
2073
2074
2075
2076struct vm_area_struct *
2077find_vma_prev(struct mm_struct *mm, unsigned long addr,
2078 struct vm_area_struct **pprev)
2079{
2080 struct vm_area_struct *vma;
2081
2082 vma = find_vma(mm, addr);
2083 if (vma) {
2084 *pprev = vma->vm_prev;
2085 } else {
2086 struct rb_node *rb_node = mm->mm_rb.rb_node;
2087 *pprev = NULL;
2088 while (rb_node) {
2089 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2090 rb_node = rb_node->rb_right;
2091 }
2092 }
2093 return vma;
2094}
2095
2096
2097
2098
2099
2100
2101static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
2102{
2103 struct mm_struct *mm = vma->vm_mm;
2104 struct rlimit *rlim = current->signal->rlim;
2105 unsigned long new_start, actual_size;
2106
2107
2108 if (!may_expand_vm(mm, grow))
2109 return -ENOMEM;
2110
2111
2112 actual_size = size;
2113 if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
2114 actual_size -= PAGE_SIZE;
2115 if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2116 return -ENOMEM;
2117
2118
2119 if (vma->vm_flags & VM_LOCKED) {
2120 unsigned long locked;
2121 unsigned long limit;
2122 locked = mm->locked_vm + grow;
2123 limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2124 limit >>= PAGE_SHIFT;
2125 if (locked > limit && !capable(CAP_IPC_LOCK))
2126 return -ENOMEM;
2127 }
2128
2129
2130 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2131 vma->vm_end - size;
2132 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2133 return -EFAULT;
2134
2135
2136
2137
2138
2139 if (security_vm_enough_memory_mm(mm, grow))
2140 return -ENOMEM;
2141
2142
2143 if (vma->vm_flags & VM_LOCKED)
2144 mm->locked_vm += grow;
2145 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2146 return 0;
2147}
2148
2149#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2150
2151
2152
2153
2154int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2155{
2156 int error;
2157
2158 if (!(vma->vm_flags & VM_GROWSUP))
2159 return -EFAULT;
2160
2161
2162
2163
2164
2165 if (unlikely(anon_vma_prepare(vma)))
2166 return -ENOMEM;
2167 vma_lock_anon_vma(vma);
2168
2169
2170
2171
2172
2173
2174
2175 if (address < PAGE_ALIGN(address+4))
2176 address = PAGE_ALIGN(address+4);
2177 else {
2178 vma_unlock_anon_vma(vma);
2179 return -ENOMEM;
2180 }
2181 error = 0;
2182
2183
2184 if (address > vma->vm_end) {
2185 unsigned long size, grow;
2186
2187 size = address - vma->vm_start;
2188 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2189
2190 error = -ENOMEM;
2191 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2192 error = acct_stack_growth(vma, size, grow);
2193 if (!error) {
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205 spin_lock(&vma->vm_mm->page_table_lock);
2206 anon_vma_interval_tree_pre_update_vma(vma);
2207 vma->vm_end = address;
2208 anon_vma_interval_tree_post_update_vma(vma);
2209 if (vma->vm_next)
2210 vma_gap_update(vma->vm_next);
2211 else
2212 vma->vm_mm->highest_vm_end = address;
2213 spin_unlock(&vma->vm_mm->page_table_lock);
2214
2215 perf_event_mmap(vma);
2216 }
2217 }
2218 }
2219 vma_unlock_anon_vma(vma);
2220 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2221 validate_mm(vma->vm_mm);
2222 return error;
2223}
2224#endif
2225
2226
2227
2228
2229int expand_downwards(struct vm_area_struct *vma,
2230 unsigned long address)
2231{
2232 int error;
2233
2234
2235
2236
2237
2238 if (unlikely(anon_vma_prepare(vma)))
2239 return -ENOMEM;
2240
2241 address &= PAGE_MASK;
2242 error = security_mmap_addr(address);
2243 if (error)
2244 return error;
2245
2246 vma_lock_anon_vma(vma);
2247
2248
2249
2250
2251
2252
2253
2254
2255 if (address < vma->vm_start) {
2256 unsigned long size, grow;
2257
2258 size = vma->vm_end - address;
2259 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2260
2261 error = -ENOMEM;
2262 if (grow <= vma->vm_pgoff) {
2263 error = acct_stack_growth(vma, size, grow);
2264 if (!error) {
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276 spin_lock(&vma->vm_mm->page_table_lock);
2277 anon_vma_interval_tree_pre_update_vma(vma);
2278 vma->vm_start = address;
2279 vma->vm_pgoff -= grow;
2280 anon_vma_interval_tree_post_update_vma(vma);
2281 vma_gap_update(vma);
2282 spin_unlock(&vma->vm_mm->page_table_lock);
2283
2284 perf_event_mmap(vma);
2285 }
2286 }
2287 }
2288 vma_unlock_anon_vma(vma);
2289 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2290 validate_mm(vma->vm_mm);
2291 return error;
2292}
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305#ifdef CONFIG_STACK_GROWSUP
2306int expand_stack(struct vm_area_struct *vma, unsigned long address)
2307{
2308 struct vm_area_struct *next;
2309
2310 address &= PAGE_MASK;
2311 next = vma->vm_next;
2312 if (next && next->vm_start == address + PAGE_SIZE) {
2313 if (!(next->vm_flags & VM_GROWSUP))
2314 return -ENOMEM;
2315 }
2316 return expand_upwards(vma, address);
2317}
2318
2319struct vm_area_struct *
2320find_extend_vma(struct mm_struct *mm, unsigned long addr)
2321{
2322 struct vm_area_struct *vma, *prev;
2323
2324 addr &= PAGE_MASK;
2325 vma = find_vma_prev(mm, addr, &prev);
2326 if (vma && (vma->vm_start <= addr))
2327 return vma;
2328 if (!prev || expand_stack(prev, addr))
2329 return NULL;
2330 if (prev->vm_flags & VM_LOCKED)
2331 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2332 return prev;
2333}
2334#else
2335int expand_stack(struct vm_area_struct *vma, unsigned long address)
2336{
2337 struct vm_area_struct *prev;
2338
2339 address &= PAGE_MASK;
2340 prev = vma->vm_prev;
2341 if (prev && prev->vm_end == address) {
2342 if (!(prev->vm_flags & VM_GROWSDOWN))
2343 return -ENOMEM;
2344 }
2345 return expand_downwards(vma, address);
2346}
2347
2348struct vm_area_struct *
2349find_extend_vma(struct mm_struct *mm, unsigned long addr)
2350{
2351 struct vm_area_struct *vma;
2352 unsigned long start;
2353
2354 addr &= PAGE_MASK;
2355 vma = find_vma(mm, addr);
2356 if (!vma)
2357 return NULL;
2358 if (vma->vm_start <= addr)
2359 return vma;
2360 if (!(vma->vm_flags & VM_GROWSDOWN))
2361 return NULL;
2362 start = vma->vm_start;
2363 if (expand_stack(vma, addr))
2364 return NULL;
2365 if (vma->vm_flags & VM_LOCKED)
2366 populate_vma_page_range(vma, addr, start, NULL);
2367 return vma;
2368}
2369#endif
2370
2371EXPORT_SYMBOL_GPL(find_extend_vma);
2372
2373
2374
2375
2376
2377
2378
2379static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2380{
2381 unsigned long nr_accounted = 0;
2382
2383
2384 update_hiwater_vm(mm);
2385 do {
2386 long nrpages = vma_pages(vma);
2387
2388 if (vma->vm_flags & VM_ACCOUNT)
2389 nr_accounted += nrpages;
2390 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2391 vma = remove_vma(vma);
2392 } while (vma);
2393 vm_unacct_memory(nr_accounted);
2394 validate_mm(mm);
2395}
2396
2397
2398
2399
2400
2401
2402static void unmap_region(struct mm_struct *mm,
2403 struct vm_area_struct *vma, struct vm_area_struct *prev,
2404 unsigned long start, unsigned long end)
2405{
2406 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2407 struct mmu_gather tlb;
2408
2409 lru_add_drain();
2410 tlb_gather_mmu(&tlb, mm, start, end);
2411 update_hiwater_rss(mm);
2412 unmap_vmas(&tlb, vma, start, end);
2413 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2414 next ? next->vm_start : USER_PGTABLES_CEILING);
2415 tlb_finish_mmu(&tlb, start, end);
2416}
2417
2418
2419
2420
2421
2422static void
2423detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2424 struct vm_area_struct *prev, unsigned long end)
2425{
2426 struct vm_area_struct **insertion_point;
2427 struct vm_area_struct *tail_vma = NULL;
2428
2429 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2430 vma->vm_prev = NULL;
2431 do {
2432 vma_rb_erase(vma, &mm->mm_rb);
2433 mm->map_count--;
2434 tail_vma = vma;
2435 vma = vma->vm_next;
2436 } while (vma && vma->vm_start < end);
2437 *insertion_point = vma;
2438 if (vma) {
2439 vma->vm_prev = prev;
2440 vma_gap_update(vma);
2441 } else
2442 mm->highest_vm_end = prev ? prev->vm_end : 0;
2443 tail_vma->vm_next = NULL;
2444
2445
2446 vmacache_invalidate(mm);
2447}
2448
2449
2450
2451
2452
2453static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2454 unsigned long addr, int new_below)
2455{
2456 struct vm_area_struct *new;
2457 int err;
2458
2459 if (is_vm_hugetlb_page(vma) && (addr &
2460 ~(huge_page_mask(hstate_vma(vma)))))
2461 return -EINVAL;
2462
2463 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2464 if (!new)
2465 return -ENOMEM;
2466
2467
2468 *new = *vma;
2469
2470 INIT_LIST_HEAD(&new->anon_vma_chain);
2471
2472 if (new_below)
2473 new->vm_end = addr;
2474 else {
2475 new->vm_start = addr;
2476 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2477 }
2478
2479 err = vma_dup_policy(vma, new);
2480 if (err)
2481 goto out_free_vma;
2482
2483 err = anon_vma_clone(new, vma);
2484 if (err)
2485 goto out_free_mpol;
2486
2487 if (new->vm_file)
2488 get_file(new->vm_file);
2489
2490 if (new->vm_ops && new->vm_ops->open)
2491 new->vm_ops->open(new);
2492
2493 if (new_below)
2494 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2495 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2496 else
2497 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2498
2499
2500 if (!err)
2501 return 0;
2502
2503
2504 if (new->vm_ops && new->vm_ops->close)
2505 new->vm_ops->close(new);
2506 if (new->vm_file)
2507 fput(new->vm_file);
2508 unlink_anon_vmas(new);
2509 out_free_mpol:
2510 mpol_put(vma_policy(new));
2511 out_free_vma:
2512 kmem_cache_free(vm_area_cachep, new);
2513 return err;
2514}
2515
2516
2517
2518
2519
2520int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2521 unsigned long addr, int new_below)
2522{
2523 if (mm->map_count >= sysctl_max_map_count)
2524 return -ENOMEM;
2525
2526 return __split_vma(mm, vma, addr, new_below);
2527}
2528
2529
2530
2531
2532
2533
2534int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2535{
2536 unsigned long end;
2537 struct vm_area_struct *vma, *prev, *last;
2538
2539 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2540 return -EINVAL;
2541
2542 len = PAGE_ALIGN(len);
2543 if (len == 0)
2544 return -EINVAL;
2545
2546
2547 vma = find_vma(mm, start);
2548 if (!vma)
2549 return 0;
2550 prev = vma->vm_prev;
2551
2552
2553
2554 end = start + len;
2555 if (vma->vm_start >= end)
2556 return 0;
2557
2558
2559
2560
2561
2562
2563
2564
2565 if (start > vma->vm_start) {
2566 int error;
2567
2568
2569
2570
2571
2572
2573 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2574 return -ENOMEM;
2575
2576 error = __split_vma(mm, vma, start, 0);
2577 if (error)
2578 return error;
2579 prev = vma;
2580 }
2581
2582
2583 last = find_vma(mm, end);
2584 if (last && end > last->vm_start) {
2585 int error = __split_vma(mm, last, end, 1);
2586 if (error)
2587 return error;
2588 }
2589 vma = prev ? prev->vm_next : mm->mmap;
2590
2591
2592
2593
2594 if (mm->locked_vm) {
2595 struct vm_area_struct *tmp = vma;
2596 while (tmp && tmp->vm_start < end) {
2597 if (tmp->vm_flags & VM_LOCKED) {
2598 mm->locked_vm -= vma_pages(tmp);
2599 munlock_vma_pages_all(tmp);
2600 }
2601 tmp = tmp->vm_next;
2602 }
2603 }
2604
2605
2606
2607
2608 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2609 unmap_region(mm, vma, prev, start, end);
2610
2611 arch_unmap(mm, vma, start, end);
2612
2613
2614 remove_vma_list(mm, vma);
2615
2616 return 0;
2617}
2618
2619int vm_munmap(unsigned long start, size_t len)
2620{
2621 int ret;
2622 struct mm_struct *mm = current->mm;
2623
2624 down_write(&mm->mmap_sem);
2625 ret = do_munmap(mm, start, len);
2626 up_write(&mm->mmap_sem);
2627 return ret;
2628}
2629EXPORT_SYMBOL(vm_munmap);
2630
2631SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2632{
2633 profile_munmap(addr);
2634 return vm_munmap(addr, len);
2635}
2636
2637
2638
2639
2640
2641SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2642 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2643{
2644
2645 struct mm_struct *mm = current->mm;
2646 struct vm_area_struct *vma;
2647 unsigned long populate = 0;
2648 unsigned long ret = -EINVAL;
2649 struct file *file;
2650
2651 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
2652 "See Documentation/vm/remap_file_pages.txt.\n",
2653 current->comm, current->pid);
2654
2655 if (prot)
2656 return ret;
2657 start = start & PAGE_MASK;
2658 size = size & PAGE_MASK;
2659
2660 if (start + size <= start)
2661 return ret;
2662
2663
2664 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2665 return ret;
2666
2667 down_write(&mm->mmap_sem);
2668 vma = find_vma(mm, start);
2669
2670 if (!vma || !(vma->vm_flags & VM_SHARED))
2671 goto out;
2672
2673 if (start < vma->vm_start || start + size > vma->vm_end)
2674 goto out;
2675
2676 if (pgoff == linear_page_index(vma, start)) {
2677 ret = 0;
2678 goto out;
2679 }
2680
2681 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2682 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2683 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2684
2685 flags &= MAP_NONBLOCK;
2686 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2687 if (vma->vm_flags & VM_LOCKED) {
2688 flags |= MAP_LOCKED;
2689
2690 munlock_vma_pages_range(vma, start, start + size);
2691 }
2692
2693 file = get_file(vma->vm_file);
2694 ret = do_mmap_pgoff(vma->vm_file, start, size,
2695 prot, flags, pgoff, &populate);
2696 fput(file);
2697out:
2698 up_write(&mm->mmap_sem);
2699 if (populate)
2700 mm_populate(ret, populate);
2701 if (!IS_ERR_VALUE(ret))
2702 ret = 0;
2703 return ret;
2704}
2705
2706static inline void verify_mm_writelocked(struct mm_struct *mm)
2707{
2708#ifdef CONFIG_DEBUG_VM
2709 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2710 WARN_ON(1);
2711 up_read(&mm->mmap_sem);
2712 }
2713#endif
2714}
2715
2716
2717
2718
2719
2720
2721static unsigned long do_brk(unsigned long addr, unsigned long len)
2722{
2723 struct mm_struct *mm = current->mm;
2724 struct vm_area_struct *vma, *prev;
2725 unsigned long flags;
2726 struct rb_node **rb_link, *rb_parent;
2727 pgoff_t pgoff = addr >> PAGE_SHIFT;
2728 int error;
2729
2730 len = PAGE_ALIGN(len);
2731 if (!len)
2732 return addr;
2733
2734 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2735
2736 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2737 if (error & ~PAGE_MASK)
2738 return error;
2739
2740 error = mlock_future_check(mm, mm->def_flags, len);
2741 if (error)
2742 return error;
2743
2744
2745
2746
2747
2748 verify_mm_writelocked(mm);
2749
2750
2751
2752
2753 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2754 &rb_parent)) {
2755 if (do_munmap(mm, addr, len))
2756 return -ENOMEM;
2757 }
2758
2759
2760 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2761 return -ENOMEM;
2762
2763 if (mm->map_count > sysctl_max_map_count)
2764 return -ENOMEM;
2765
2766 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2767 return -ENOMEM;
2768
2769
2770 vma = vma_merge(mm, prev, addr, addr + len, flags,
2771 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
2772 if (vma)
2773 goto out;
2774
2775
2776
2777
2778 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2779 if (!vma) {
2780 vm_unacct_memory(len >> PAGE_SHIFT);
2781 return -ENOMEM;
2782 }
2783
2784 INIT_LIST_HEAD(&vma->anon_vma_chain);
2785 vma->vm_mm = mm;
2786 vma->vm_start = addr;
2787 vma->vm_end = addr + len;
2788 vma->vm_pgoff = pgoff;
2789 vma->vm_flags = flags;
2790 vma->vm_page_prot = vm_get_page_prot(flags);
2791 vma_link(mm, vma, prev, rb_link, rb_parent);
2792out:
2793 perf_event_mmap(vma);
2794 mm->total_vm += len >> PAGE_SHIFT;
2795 if (flags & VM_LOCKED)
2796 mm->locked_vm += (len >> PAGE_SHIFT);
2797 vma->vm_flags |= VM_SOFTDIRTY;
2798 return addr;
2799}
2800
2801unsigned long vm_brk(unsigned long addr, unsigned long len)
2802{
2803 struct mm_struct *mm = current->mm;
2804 unsigned long ret;
2805 bool populate;
2806
2807 down_write(&mm->mmap_sem);
2808 ret = do_brk(addr, len);
2809 populate = ((mm->def_flags & VM_LOCKED) != 0);
2810 up_write(&mm->mmap_sem);
2811 if (populate)
2812 mm_populate(addr, len);
2813 return ret;
2814}
2815EXPORT_SYMBOL(vm_brk);
2816
2817
2818void exit_mmap(struct mm_struct *mm)
2819{
2820 struct mmu_gather tlb;
2821 struct vm_area_struct *vma;
2822 unsigned long nr_accounted = 0;
2823
2824
2825 mmu_notifier_release(mm);
2826
2827 if (mm->locked_vm) {
2828 vma = mm->mmap;
2829 while (vma) {
2830 if (vma->vm_flags & VM_LOCKED)
2831 munlock_vma_pages_all(vma);
2832 vma = vma->vm_next;
2833 }
2834 }
2835
2836 arch_exit_mmap(mm);
2837
2838 vma = mm->mmap;
2839 if (!vma)
2840 return;
2841
2842 lru_add_drain();
2843 flush_cache_mm(mm);
2844 tlb_gather_mmu(&tlb, mm, 0, -1);
2845
2846
2847 unmap_vmas(&tlb, vma, 0, -1);
2848
2849 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
2850 tlb_finish_mmu(&tlb, 0, -1);
2851
2852
2853
2854
2855
2856 while (vma) {
2857 if (vma->vm_flags & VM_ACCOUNT)
2858 nr_accounted += vma_pages(vma);
2859 vma = remove_vma(vma);
2860 }
2861 vm_unacct_memory(nr_accounted);
2862}
2863
2864
2865
2866
2867
2868int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2869{
2870 struct vm_area_struct *prev;
2871 struct rb_node **rb_link, *rb_parent;
2872
2873 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2874 &prev, &rb_link, &rb_parent))
2875 return -ENOMEM;
2876 if ((vma->vm_flags & VM_ACCOUNT) &&
2877 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2878 return -ENOMEM;
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892 if (vma_is_anonymous(vma)) {
2893 BUG_ON(vma->anon_vma);
2894 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2895 }
2896
2897 vma_link(mm, vma, prev, rb_link, rb_parent);
2898 return 0;
2899}
2900
2901
2902
2903
2904
2905struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2906 unsigned long addr, unsigned long len, pgoff_t pgoff,
2907 bool *need_rmap_locks)
2908{
2909 struct vm_area_struct *vma = *vmap;
2910 unsigned long vma_start = vma->vm_start;
2911 struct mm_struct *mm = vma->vm_mm;
2912 struct vm_area_struct *new_vma, *prev;
2913 struct rb_node **rb_link, *rb_parent;
2914 bool faulted_in_anon_vma = true;
2915
2916
2917
2918
2919
2920 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
2921 pgoff = addr >> PAGE_SHIFT;
2922 faulted_in_anon_vma = false;
2923 }
2924
2925 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2926 return NULL;
2927 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2928 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
2929 vma->vm_userfaultfd_ctx);
2930 if (new_vma) {
2931
2932
2933
2934 if (unlikely(vma_start >= new_vma->vm_start &&
2935 vma_start < new_vma->vm_end)) {
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
2949 *vmap = vma = new_vma;
2950 }
2951 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2952 } else {
2953 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2954 if (!new_vma)
2955 goto out;
2956 *new_vma = *vma;
2957 new_vma->vm_start = addr;
2958 new_vma->vm_end = addr + len;
2959 new_vma->vm_pgoff = pgoff;
2960 if (vma_dup_policy(vma, new_vma))
2961 goto out_free_vma;
2962 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2963 if (anon_vma_clone(new_vma, vma))
2964 goto out_free_mempol;
2965 if (new_vma->vm_file)
2966 get_file(new_vma->vm_file);
2967 if (new_vma->vm_ops && new_vma->vm_ops->open)
2968 new_vma->vm_ops->open(new_vma);
2969 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2970 *need_rmap_locks = false;
2971 }
2972 return new_vma;
2973
2974out_free_mempol:
2975 mpol_put(vma_policy(new_vma));
2976out_free_vma:
2977 kmem_cache_free(vm_area_cachep, new_vma);
2978out:
2979 return NULL;
2980}
2981
2982
2983
2984
2985
2986int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2987{
2988 unsigned long cur = mm->total_vm;
2989 unsigned long lim;
2990
2991 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2992
2993 if (cur + npages > lim)
2994 return 0;
2995 return 1;
2996}
2997
2998static int special_mapping_fault(struct vm_area_struct *vma,
2999 struct vm_fault *vmf);
3000
3001
3002
3003
3004static void special_mapping_close(struct vm_area_struct *vma)
3005{
3006}
3007
3008static const char *special_mapping_name(struct vm_area_struct *vma)
3009{
3010 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3011}
3012
3013static const struct vm_operations_struct special_mapping_vmops = {
3014 .close = special_mapping_close,
3015 .fault = special_mapping_fault,
3016 .name = special_mapping_name,
3017};
3018
3019static const struct vm_operations_struct legacy_special_mapping_vmops = {
3020 .close = special_mapping_close,
3021 .fault = special_mapping_fault,
3022};
3023
3024static int special_mapping_fault(struct vm_area_struct *vma,
3025 struct vm_fault *vmf)
3026{
3027 pgoff_t pgoff;
3028 struct page **pages;
3029
3030 if (vma->vm_ops == &legacy_special_mapping_vmops)
3031 pages = vma->vm_private_data;
3032 else
3033 pages = ((struct vm_special_mapping *)vma->vm_private_data)->
3034 pages;
3035
3036 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3037 pgoff--;
3038
3039 if (*pages) {
3040 struct page *page = *pages;
3041 get_page(page);
3042 vmf->page = page;
3043 return 0;
3044 }
3045
3046 return VM_FAULT_SIGBUS;
3047}
3048
3049static struct vm_area_struct *__install_special_mapping(
3050 struct mm_struct *mm,
3051 unsigned long addr, unsigned long len,
3052 unsigned long vm_flags, const struct vm_operations_struct *ops,
3053 void *priv)
3054{
3055 int ret;
3056 struct vm_area_struct *vma;
3057
3058 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
3059 if (unlikely(vma == NULL))
3060 return ERR_PTR(-ENOMEM);
3061
3062 INIT_LIST_HEAD(&vma->anon_vma_chain);
3063 vma->vm_mm = mm;
3064 vma->vm_start = addr;
3065 vma->vm_end = addr + len;
3066
3067 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3068 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3069
3070 vma->vm_ops = ops;
3071 vma->vm_private_data = priv;
3072
3073 ret = insert_vm_struct(mm, vma);
3074 if (ret)
3075 goto out;
3076
3077 mm->total_vm += len >> PAGE_SHIFT;
3078
3079 perf_event_mmap(vma);
3080
3081 return vma;
3082
3083out:
3084 kmem_cache_free(vm_area_cachep, vma);
3085 return ERR_PTR(ret);
3086}
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097struct vm_area_struct *_install_special_mapping(
3098 struct mm_struct *mm,
3099 unsigned long addr, unsigned long len,
3100 unsigned long vm_flags, const struct vm_special_mapping *spec)
3101{
3102 return __install_special_mapping(mm, addr, len, vm_flags,
3103 &special_mapping_vmops, (void *)spec);
3104}
3105
3106int install_special_mapping(struct mm_struct *mm,
3107 unsigned long addr, unsigned long len,
3108 unsigned long vm_flags, struct page **pages)
3109{
3110 struct vm_area_struct *vma = __install_special_mapping(
3111 mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
3112 (void *)pages);
3113
3114 return PTR_ERR_OR_ZERO(vma);
3115}
3116
3117static DEFINE_MUTEX(mm_all_locks_mutex);
3118
3119static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3120{
3121 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3122
3123
3124
3125
3126 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136 if (__test_and_set_bit(0, (unsigned long *)
3137 &anon_vma->root->rb_root.rb_node))
3138 BUG();
3139 }
3140}
3141
3142static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3143{
3144 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3155 BUG();
3156 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3157 }
3158}
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191int mm_take_all_locks(struct mm_struct *mm)
3192{
3193 struct vm_area_struct *vma;
3194 struct anon_vma_chain *avc;
3195
3196 BUG_ON(down_read_trylock(&mm->mmap_sem));
3197
3198 mutex_lock(&mm_all_locks_mutex);
3199
3200 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3201 if (signal_pending(current))
3202 goto out_unlock;
3203 if (vma->vm_file && vma->vm_file->f_mapping)
3204 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3205 }
3206
3207 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3208 if (signal_pending(current))
3209 goto out_unlock;
3210 if (vma->anon_vma)
3211 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3212 vm_lock_anon_vma(mm, avc->anon_vma);
3213 }
3214
3215 return 0;
3216
3217out_unlock:
3218 mm_drop_all_locks(mm);
3219 return -EINTR;
3220}
3221
3222static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3223{
3224 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237 if (!__test_and_clear_bit(0, (unsigned long *)
3238 &anon_vma->root->rb_root.rb_node))
3239 BUG();
3240 anon_vma_unlock_write(anon_vma);
3241 }
3242}
3243
3244static void vm_unlock_mapping(struct address_space *mapping)
3245{
3246 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3247
3248
3249
3250
3251 i_mmap_unlock_write(mapping);
3252 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3253 &mapping->flags))
3254 BUG();
3255 }
3256}
3257
3258
3259
3260
3261
3262void mm_drop_all_locks(struct mm_struct *mm)
3263{
3264 struct vm_area_struct *vma;
3265 struct anon_vma_chain *avc;
3266
3267 BUG_ON(down_read_trylock(&mm->mmap_sem));
3268 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3269
3270 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3271 if (vma->anon_vma)
3272 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3273 vm_unlock_anon_vma(avc->anon_vma);
3274 if (vma->vm_file && vma->vm_file->f_mapping)
3275 vm_unlock_mapping(vma->vm_file->f_mapping);
3276 }
3277
3278 mutex_unlock(&mm_all_locks_mutex);
3279}
3280
3281
3282
3283
3284void __init mmap_init(void)
3285{
3286 int ret;
3287
3288 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3289 VM_BUG_ON(ret);
3290}
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302static int init_user_reserve(void)
3303{
3304 unsigned long free_kbytes;
3305
3306 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3307
3308 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3309 return 0;
3310}
3311subsys_initcall(init_user_reserve);
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323static int init_admin_reserve(void)
3324{
3325 unsigned long free_kbytes;
3326
3327 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3328
3329 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3330 return 0;
3331}
3332subsys_initcall(init_admin_reserve);
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352static int reserve_mem_notifier(struct notifier_block *nb,
3353 unsigned long action, void *data)
3354{
3355 unsigned long tmp, free_kbytes;
3356
3357 switch (action) {
3358 case MEM_ONLINE:
3359
3360 tmp = sysctl_user_reserve_kbytes;
3361 if (0 < tmp && tmp < (1UL << 17))
3362 init_user_reserve();
3363
3364
3365 tmp = sysctl_admin_reserve_kbytes;
3366 if (0 < tmp && tmp < (1UL << 13))
3367 init_admin_reserve();
3368
3369 break;
3370 case MEM_OFFLINE:
3371 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3372
3373 if (sysctl_user_reserve_kbytes > free_kbytes) {
3374 init_user_reserve();
3375 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3376 sysctl_user_reserve_kbytes);
3377 }
3378
3379 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3380 init_admin_reserve();
3381 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3382 sysctl_admin_reserve_kbytes);
3383 }
3384 break;
3385 default:
3386 break;
3387 }
3388 return NOTIFY_OK;
3389}
3390
3391static struct notifier_block reserve_mem_nb = {
3392 .notifier_call = reserve_mem_notifier,
3393};
3394
3395static int __meminit init_reserve_notifier(void)
3396{
3397 if (register_hotmemory_notifier(&reserve_mem_nb))
3398 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3399
3400 return 0;
3401}
3402subsys_initcall(init_reserve_notifier);
3403