1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/backing-dev.h>
14#include <linux/mm.h>
15#include <linux/vmacache.h>
16#include <linux/shm.h>
17#include <linux/mman.h>
18#include <linux/pagemap.h>
19#include <linux/swap.h>
20#include <linux/syscalls.h>
21#include <linux/capability.h>
22#include <linux/init.h>
23#include <linux/file.h>
24#include <linux/fs.h>
25#include <linux/personality.h>
26#include <linux/security.h>
27#include <linux/hugetlb.h>
28#include <linux/shmem_fs.h>
29#include <linux/profile.h>
30#include <linux/export.h>
31#include <linux/mount.h>
32#include <linux/mempolicy.h>
33#include <linux/rmap.h>
34#include <linux/mmu_notifier.h>
35#include <linux/mmdebug.h>
36#include <linux/perf_event.h>
37#include <linux/audit.h>
38#include <linux/khugepaged.h>
39#include <linux/uprobes.h>
40#include <linux/rbtree_augmented.h>
41#include <linux/notifier.h>
42#include <linux/memory.h>
43#include <linux/printk.h>
44#include <linux/userfaultfd_k.h>
45#include <linux/moduleparam.h>
46#include <linux/pkeys.h>
47
48#include <linux/uaccess.h>
49#include <asm/cacheflush.h>
50#include <asm/tlb.h>
51#include <asm/mmu_context.h>
52
53#include "internal.h"
54
55#ifndef arch_mmap_check
56#define arch_mmap_check(addr, len, flags) (0)
57#endif
58
59#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
60const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
61const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
62int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
63#endif
64#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
65const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
66const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
67int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
68#endif
69
70static bool ignore_rlimit_data;
71core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
72
73static void unmap_region(struct mm_struct *mm,
74 struct vm_area_struct *vma, struct vm_area_struct *prev,
75 unsigned long start, unsigned long end);
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97pgprot_t protection_map[16] __ro_after_init = {
98 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
99 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
100};
101
102pgprot_t vm_get_page_prot(unsigned long vm_flags)
103{
104 return __pgprot(pgprot_val(protection_map[vm_flags &
105 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
106 pgprot_val(arch_vm_get_page_prot(vm_flags)));
107}
108EXPORT_SYMBOL(vm_get_page_prot);
109
110static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
111{
112 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
113}
114
115
116void vma_set_page_prot(struct vm_area_struct *vma)
117{
118 unsigned long vm_flags = vma->vm_flags;
119 pgprot_t vm_page_prot;
120
121 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
122 if (vma_wants_writenotify(vma, vm_page_prot)) {
123 vm_flags &= ~VM_SHARED;
124 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
125 }
126
127 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
128}
129
130
131
132
133static void __remove_shared_vm_struct(struct vm_area_struct *vma,
134 struct file *file, struct address_space *mapping)
135{
136 if (vma->vm_flags & VM_DENYWRITE)
137 atomic_inc(&file_inode(file)->i_writecount);
138 if (vma->vm_flags & VM_SHARED)
139 mapping_unmap_writable(mapping);
140
141 flush_dcache_mmap_lock(mapping);
142 vma_interval_tree_remove(vma, &mapping->i_mmap);
143 flush_dcache_mmap_unlock(mapping);
144}
145
146
147
148
149
150void unlink_file_vma(struct vm_area_struct *vma)
151{
152 struct file *file = vma->vm_file;
153
154 if (file) {
155 struct address_space *mapping = file->f_mapping;
156 i_mmap_lock_write(mapping);
157 __remove_shared_vm_struct(vma, file, mapping);
158 i_mmap_unlock_write(mapping);
159 }
160}
161
162
163
164
165static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
166{
167 struct vm_area_struct *next = vma->vm_next;
168
169 might_sleep();
170 if (vma->vm_ops && vma->vm_ops->close)
171 vma->vm_ops->close(vma);
172 if (vma->vm_file)
173 fput(vma->vm_file);
174 mpol_put(vma_policy(vma));
175 kmem_cache_free(vm_area_cachep, vma);
176 return next;
177}
178
179static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
180
181SYSCALL_DEFINE1(brk, unsigned long, brk)
182{
183 unsigned long retval;
184 unsigned long newbrk, oldbrk;
185 struct mm_struct *mm = current->mm;
186 struct vm_area_struct *next;
187 unsigned long min_brk;
188 bool populate;
189 LIST_HEAD(uf);
190
191 if (down_write_killable(&mm->mmap_sem))
192 return -EINTR;
193
194#ifdef CONFIG_COMPAT_BRK
195
196
197
198
199
200 if (current->brk_randomized)
201 min_brk = mm->start_brk;
202 else
203 min_brk = mm->end_data;
204#else
205 min_brk = mm->start_brk;
206#endif
207 if (brk < min_brk)
208 goto out;
209
210
211
212
213
214
215
216 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
217 mm->end_data, mm->start_data))
218 goto out;
219
220 newbrk = PAGE_ALIGN(brk);
221 oldbrk = PAGE_ALIGN(mm->brk);
222 if (oldbrk == newbrk)
223 goto set_brk;
224
225
226 if (brk <= mm->brk) {
227 if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
228 goto set_brk;
229 goto out;
230 }
231
232
233 next = find_vma(mm, oldbrk);
234 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
235 goto out;
236
237
238 if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
239 goto out;
240
241set_brk:
242 mm->brk = brk;
243 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
244 up_write(&mm->mmap_sem);
245 userfaultfd_unmap_complete(mm, &uf);
246 if (populate)
247 mm_populate(oldbrk, newbrk - oldbrk);
248 return brk;
249
250out:
251 retval = mm->brk;
252 up_write(&mm->mmap_sem);
253 return retval;
254}
255
256static long vma_compute_subtree_gap(struct vm_area_struct *vma)
257{
258 unsigned long max, prev_end, subtree_gap;
259
260
261
262
263
264
265
266 max = vm_start_gap(vma);
267 if (vma->vm_prev) {
268 prev_end = vm_end_gap(vma->vm_prev);
269 if (max > prev_end)
270 max -= prev_end;
271 else
272 max = 0;
273 }
274 if (vma->vm_rb.rb_left) {
275 subtree_gap = rb_entry(vma->vm_rb.rb_left,
276 struct vm_area_struct, vm_rb)->rb_subtree_gap;
277 if (subtree_gap > max)
278 max = subtree_gap;
279 }
280 if (vma->vm_rb.rb_right) {
281 subtree_gap = rb_entry(vma->vm_rb.rb_right,
282 struct vm_area_struct, vm_rb)->rb_subtree_gap;
283 if (subtree_gap > max)
284 max = subtree_gap;
285 }
286 return max;
287}
288
289#ifdef CONFIG_DEBUG_VM_RB
290static int browse_rb(struct mm_struct *mm)
291{
292 struct rb_root *root = &mm->mm_rb;
293 int i = 0, j, bug = 0;
294 struct rb_node *nd, *pn = NULL;
295 unsigned long prev = 0, pend = 0;
296
297 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
298 struct vm_area_struct *vma;
299 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
300 if (vma->vm_start < prev) {
301 pr_emerg("vm_start %lx < prev %lx\n",
302 vma->vm_start, prev);
303 bug = 1;
304 }
305 if (vma->vm_start < pend) {
306 pr_emerg("vm_start %lx < pend %lx\n",
307 vma->vm_start, pend);
308 bug = 1;
309 }
310 if (vma->vm_start > vma->vm_end) {
311 pr_emerg("vm_start %lx > vm_end %lx\n",
312 vma->vm_start, vma->vm_end);
313 bug = 1;
314 }
315 spin_lock(&mm->page_table_lock);
316 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
317 pr_emerg("free gap %lx, correct %lx\n",
318 vma->rb_subtree_gap,
319 vma_compute_subtree_gap(vma));
320 bug = 1;
321 }
322 spin_unlock(&mm->page_table_lock);
323 i++;
324 pn = nd;
325 prev = vma->vm_start;
326 pend = vma->vm_end;
327 }
328 j = 0;
329 for (nd = pn; nd; nd = rb_prev(nd))
330 j++;
331 if (i != j) {
332 pr_emerg("backwards %d, forwards %d\n", j, i);
333 bug = 1;
334 }
335 return bug ? -1 : i;
336}
337
338static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
339{
340 struct rb_node *nd;
341
342 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
343 struct vm_area_struct *vma;
344 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
345 VM_BUG_ON_VMA(vma != ignore &&
346 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
347 vma);
348 }
349}
350
351static void validate_mm(struct mm_struct *mm)
352{
353 int bug = 0;
354 int i = 0;
355 unsigned long highest_address = 0;
356 struct vm_area_struct *vma = mm->mmap;
357
358 while (vma) {
359 struct anon_vma *anon_vma = vma->anon_vma;
360 struct anon_vma_chain *avc;
361
362 if (anon_vma) {
363 anon_vma_lock_read(anon_vma);
364 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
365 anon_vma_interval_tree_verify(avc);
366 anon_vma_unlock_read(anon_vma);
367 }
368
369 highest_address = vm_end_gap(vma);
370 vma = vma->vm_next;
371 i++;
372 }
373 if (i != mm->map_count) {
374 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
375 bug = 1;
376 }
377 if (highest_address != mm->highest_vm_end) {
378 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
379 mm->highest_vm_end, highest_address);
380 bug = 1;
381 }
382 i = browse_rb(mm);
383 if (i != mm->map_count) {
384 if (i != -1)
385 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
386 bug = 1;
387 }
388 VM_BUG_ON_MM(bug, mm);
389}
390#else
391#define validate_mm_rb(root, ignore) do { } while (0)
392#define validate_mm(mm) do { } while (0)
393#endif
394
395RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
396 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
397
398
399
400
401
402
403static void vma_gap_update(struct vm_area_struct *vma)
404{
405
406
407
408
409 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
410}
411
412static inline void vma_rb_insert(struct vm_area_struct *vma,
413 struct rb_root *root)
414{
415
416 validate_mm_rb(root, NULL);
417
418 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
419}
420
421static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
422{
423
424
425
426
427
428 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
429}
430
431static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
432 struct rb_root *root,
433 struct vm_area_struct *ignore)
434{
435
436
437
438
439
440 validate_mm_rb(root, ignore);
441
442 __vma_rb_erase(vma, root);
443}
444
445static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
446 struct rb_root *root)
447{
448
449
450
451
452 validate_mm_rb(root, vma);
453
454 __vma_rb_erase(vma, root);
455}
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471static inline void
472anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
473{
474 struct anon_vma_chain *avc;
475
476 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
477 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
478}
479
480static inline void
481anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
482{
483 struct anon_vma_chain *avc;
484
485 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
486 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
487}
488
489static int find_vma_links(struct mm_struct *mm, unsigned long addr,
490 unsigned long end, struct vm_area_struct **pprev,
491 struct rb_node ***rb_link, struct rb_node **rb_parent)
492{
493 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
494
495 __rb_link = &mm->mm_rb.rb_node;
496 rb_prev = __rb_parent = NULL;
497
498 while (*__rb_link) {
499 struct vm_area_struct *vma_tmp;
500
501 __rb_parent = *__rb_link;
502 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
503
504 if (vma_tmp->vm_end > addr) {
505
506 if (vma_tmp->vm_start < end)
507 return -ENOMEM;
508 __rb_link = &__rb_parent->rb_left;
509 } else {
510 rb_prev = __rb_parent;
511 __rb_link = &__rb_parent->rb_right;
512 }
513 }
514
515 *pprev = NULL;
516 if (rb_prev)
517 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
518 *rb_link = __rb_link;
519 *rb_parent = __rb_parent;
520 return 0;
521}
522
523static unsigned long count_vma_pages_range(struct mm_struct *mm,
524 unsigned long addr, unsigned long end)
525{
526 unsigned long nr_pages = 0;
527 struct vm_area_struct *vma;
528
529
530 vma = find_vma_intersection(mm, addr, end);
531 if (!vma)
532 return 0;
533
534 nr_pages = (min(end, vma->vm_end) -
535 max(addr, vma->vm_start)) >> PAGE_SHIFT;
536
537
538 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
539 unsigned long overlap_len;
540
541 if (vma->vm_start > end)
542 break;
543
544 overlap_len = min(end, vma->vm_end) - vma->vm_start;
545 nr_pages += overlap_len >> PAGE_SHIFT;
546 }
547
548 return nr_pages;
549}
550
551void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
552 struct rb_node **rb_link, struct rb_node *rb_parent)
553{
554
555 if (vma->vm_next)
556 vma_gap_update(vma->vm_next);
557 else
558 mm->highest_vm_end = vm_end_gap(vma);
559
560
561
562
563
564
565
566
567
568
569 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
570 vma->rb_subtree_gap = 0;
571 vma_gap_update(vma);
572 vma_rb_insert(vma, &mm->mm_rb);
573}
574
575static void __vma_link_file(struct vm_area_struct *vma)
576{
577 struct file *file;
578
579 file = vma->vm_file;
580 if (file) {
581 struct address_space *mapping = file->f_mapping;
582
583 if (vma->vm_flags & VM_DENYWRITE)
584 atomic_dec(&file_inode(file)->i_writecount);
585 if (vma->vm_flags & VM_SHARED)
586 atomic_inc(&mapping->i_mmap_writable);
587
588 flush_dcache_mmap_lock(mapping);
589 vma_interval_tree_insert(vma, &mapping->i_mmap);
590 flush_dcache_mmap_unlock(mapping);
591 }
592}
593
594static void
595__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
596 struct vm_area_struct *prev, struct rb_node **rb_link,
597 struct rb_node *rb_parent)
598{
599 __vma_link_list(mm, vma, prev, rb_parent);
600 __vma_link_rb(mm, vma, rb_link, rb_parent);
601}
602
603static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
604 struct vm_area_struct *prev, struct rb_node **rb_link,
605 struct rb_node *rb_parent)
606{
607 struct address_space *mapping = NULL;
608
609 if (vma->vm_file) {
610 mapping = vma->vm_file->f_mapping;
611 i_mmap_lock_write(mapping);
612 }
613
614 __vma_link(mm, vma, prev, rb_link, rb_parent);
615 __vma_link_file(vma);
616
617 if (mapping)
618 i_mmap_unlock_write(mapping);
619
620 mm->map_count++;
621 validate_mm(mm);
622}
623
624
625
626
627
628static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
629{
630 struct vm_area_struct *prev;
631 struct rb_node **rb_link, *rb_parent;
632
633 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
634 &prev, &rb_link, &rb_parent))
635 BUG();
636 __vma_link(mm, vma, prev, rb_link, rb_parent);
637 mm->map_count++;
638}
639
640static __always_inline void __vma_unlink_common(struct mm_struct *mm,
641 struct vm_area_struct *vma,
642 struct vm_area_struct *prev,
643 bool has_prev,
644 struct vm_area_struct *ignore)
645{
646 struct vm_area_struct *next;
647
648 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
649 next = vma->vm_next;
650 if (has_prev)
651 prev->vm_next = next;
652 else {
653 prev = vma->vm_prev;
654 if (prev)
655 prev->vm_next = next;
656 else
657 mm->mmap = next;
658 }
659 if (next)
660 next->vm_prev = prev;
661
662
663 vmacache_invalidate(mm);
664}
665
666static inline void __vma_unlink_prev(struct mm_struct *mm,
667 struct vm_area_struct *vma,
668 struct vm_area_struct *prev)
669{
670 __vma_unlink_common(mm, vma, prev, true, vma);
671}
672
673
674
675
676
677
678
679
680int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
681 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
682 struct vm_area_struct *expand)
683{
684 struct mm_struct *mm = vma->vm_mm;
685 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
686 struct address_space *mapping = NULL;
687 struct rb_root *root = NULL;
688 struct anon_vma *anon_vma = NULL;
689 struct file *file = vma->vm_file;
690 bool start_changed = false, end_changed = false;
691 long adjust_next = 0;
692 int remove_next = 0;
693
694 if (next && !insert) {
695 struct vm_area_struct *exporter = NULL, *importer = NULL;
696
697 if (end >= next->vm_end) {
698
699
700
701
702
703
704 if (next == expand) {
705
706
707
708
709 VM_WARN_ON(end != next->vm_end);
710
711
712
713
714
715 remove_next = 3;
716 VM_WARN_ON(file != next->vm_file);
717 swap(vma, next);
718 } else {
719 VM_WARN_ON(expand != vma);
720
721
722
723
724 remove_next = 1 + (end > next->vm_end);
725 VM_WARN_ON(remove_next == 2 &&
726 end != next->vm_next->vm_end);
727 VM_WARN_ON(remove_next == 1 &&
728 end != next->vm_end);
729
730 end = next->vm_end;
731 }
732
733 exporter = next;
734 importer = vma;
735
736
737
738
739
740 if (remove_next == 2 && !next->anon_vma)
741 exporter = next->vm_next;
742
743 } else if (end > next->vm_start) {
744
745
746
747
748 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
749 exporter = next;
750 importer = vma;
751 VM_WARN_ON(expand != importer);
752 } else if (end < vma->vm_end) {
753
754
755
756
757
758 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
759 exporter = vma;
760 importer = next;
761 VM_WARN_ON(expand != importer);
762 }
763
764
765
766
767
768
769 if (exporter && exporter->anon_vma && !importer->anon_vma) {
770 int error;
771
772 importer->anon_vma = exporter->anon_vma;
773 error = anon_vma_clone(importer, exporter);
774 if (error)
775 return error;
776 }
777 }
778again:
779 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
780
781 if (file) {
782 mapping = file->f_mapping;
783 root = &mapping->i_mmap;
784 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
785
786 if (adjust_next)
787 uprobe_munmap(next, next->vm_start, next->vm_end);
788
789 i_mmap_lock_write(mapping);
790 if (insert) {
791
792
793
794
795
796
797 __vma_link_file(insert);
798 }
799 }
800
801 anon_vma = vma->anon_vma;
802 if (!anon_vma && adjust_next)
803 anon_vma = next->anon_vma;
804 if (anon_vma) {
805 VM_WARN_ON(adjust_next && next->anon_vma &&
806 anon_vma != next->anon_vma);
807 anon_vma_lock_write(anon_vma);
808 anon_vma_interval_tree_pre_update_vma(vma);
809 if (adjust_next)
810 anon_vma_interval_tree_pre_update_vma(next);
811 }
812
813 if (root) {
814 flush_dcache_mmap_lock(mapping);
815 vma_interval_tree_remove(vma, root);
816 if (adjust_next)
817 vma_interval_tree_remove(next, root);
818 }
819
820 if (start != vma->vm_start) {
821 vma->vm_start = start;
822 start_changed = true;
823 }
824 if (end != vma->vm_end) {
825 vma->vm_end = end;
826 end_changed = true;
827 }
828 vma->vm_pgoff = pgoff;
829 if (adjust_next) {
830 next->vm_start += adjust_next << PAGE_SHIFT;
831 next->vm_pgoff += adjust_next;
832 }
833
834 if (root) {
835 if (adjust_next)
836 vma_interval_tree_insert(next, root);
837 vma_interval_tree_insert(vma, root);
838 flush_dcache_mmap_unlock(mapping);
839 }
840
841 if (remove_next) {
842
843
844
845
846 if (remove_next != 3)
847 __vma_unlink_prev(mm, next, vma);
848 else
849
850
851
852
853
854
855
856
857
858 __vma_unlink_common(mm, next, NULL, false, vma);
859 if (file)
860 __remove_shared_vm_struct(next, file, mapping);
861 } else if (insert) {
862
863
864
865
866
867 __insert_vm_struct(mm, insert);
868 } else {
869 if (start_changed)
870 vma_gap_update(vma);
871 if (end_changed) {
872 if (!next)
873 mm->highest_vm_end = vm_end_gap(vma);
874 else if (!adjust_next)
875 vma_gap_update(next);
876 }
877 }
878
879 if (anon_vma) {
880 anon_vma_interval_tree_post_update_vma(vma);
881 if (adjust_next)
882 anon_vma_interval_tree_post_update_vma(next);
883 anon_vma_unlock_write(anon_vma);
884 }
885 if (mapping)
886 i_mmap_unlock_write(mapping);
887
888 if (root) {
889 uprobe_mmap(vma);
890
891 if (adjust_next)
892 uprobe_mmap(next);
893 }
894
895 if (remove_next) {
896 if (file) {
897 uprobe_munmap(next, next->vm_start, next->vm_end);
898 fput(file);
899 }
900 if (next->anon_vma)
901 anon_vma_merge(vma, next);
902 mm->map_count--;
903 mpol_put(vma_policy(next));
904 kmem_cache_free(vm_area_cachep, next);
905
906
907
908
909
910 if (remove_next != 3) {
911
912
913
914
915
916
917 next = vma->vm_next;
918 } else {
919
920
921
922
923
924
925
926
927
928
929 next = vma;
930 }
931 if (remove_next == 2) {
932 remove_next = 1;
933 end = next->vm_end;
934 goto again;
935 }
936 else if (next)
937 vma_gap_update(next);
938 else {
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
959 }
960 }
961 if (insert && file)
962 uprobe_mmap(insert);
963
964 validate_mm(mm);
965
966 return 0;
967}
968
969
970
971
972
973static inline int is_mergeable_vma(struct vm_area_struct *vma,
974 struct file *file, unsigned long vm_flags,
975 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
976{
977
978
979
980
981
982
983
984
985 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
986 return 0;
987 if (vma->vm_file != file)
988 return 0;
989 if (vma->vm_ops && vma->vm_ops->close)
990 return 0;
991 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
992 return 0;
993 return 1;
994}
995
996static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
997 struct anon_vma *anon_vma2,
998 struct vm_area_struct *vma)
999{
1000
1001
1002
1003
1004 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1005 list_is_singular(&vma->anon_vma_chain)))
1006 return 1;
1007 return anon_vma1 == anon_vma2;
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021static int
1022can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1023 struct anon_vma *anon_vma, struct file *file,
1024 pgoff_t vm_pgoff,
1025 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1026{
1027 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1028 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1029 if (vma->vm_pgoff == vm_pgoff)
1030 return 1;
1031 }
1032 return 0;
1033}
1034
1035
1036
1037
1038
1039
1040
1041
1042static int
1043can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1044 struct anon_vma *anon_vma, struct file *file,
1045 pgoff_t vm_pgoff,
1046 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1047{
1048 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1049 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1050 pgoff_t vm_pglen;
1051 vm_pglen = vma_pages(vma);
1052 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1053 return 1;
1054 }
1055 return 0;
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098struct vm_area_struct *vma_merge(struct mm_struct *mm,
1099 struct vm_area_struct *prev, unsigned long addr,
1100 unsigned long end, unsigned long vm_flags,
1101 struct anon_vma *anon_vma, struct file *file,
1102 pgoff_t pgoff, struct mempolicy *policy,
1103 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1104{
1105 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1106 struct vm_area_struct *area, *next;
1107 int err;
1108
1109
1110
1111
1112
1113 if (vm_flags & VM_SPECIAL)
1114 return NULL;
1115
1116 if (prev)
1117 next = prev->vm_next;
1118 else
1119 next = mm->mmap;
1120 area = next;
1121 if (area && area->vm_end == end)
1122 next = next->vm_next;
1123
1124
1125 VM_WARN_ON(prev && addr <= prev->vm_start);
1126 VM_WARN_ON(area && end > area->vm_end);
1127 VM_WARN_ON(addr >= end);
1128
1129
1130
1131
1132 if (prev && prev->vm_end == addr &&
1133 mpol_equal(vma_policy(prev), policy) &&
1134 can_vma_merge_after(prev, vm_flags,
1135 anon_vma, file, pgoff,
1136 vm_userfaultfd_ctx)) {
1137
1138
1139
1140 if (next && end == next->vm_start &&
1141 mpol_equal(policy, vma_policy(next)) &&
1142 can_vma_merge_before(next, vm_flags,
1143 anon_vma, file,
1144 pgoff+pglen,
1145 vm_userfaultfd_ctx) &&
1146 is_mergeable_anon_vma(prev->anon_vma,
1147 next->anon_vma, NULL)) {
1148
1149 err = __vma_adjust(prev, prev->vm_start,
1150 next->vm_end, prev->vm_pgoff, NULL,
1151 prev);
1152 } else
1153 err = __vma_adjust(prev, prev->vm_start,
1154 end, prev->vm_pgoff, NULL, prev);
1155 if (err)
1156 return NULL;
1157 khugepaged_enter_vma_merge(prev, vm_flags);
1158 return prev;
1159 }
1160
1161
1162
1163
1164 if (next && end == next->vm_start &&
1165 mpol_equal(policy, vma_policy(next)) &&
1166 can_vma_merge_before(next, vm_flags,
1167 anon_vma, file, pgoff+pglen,
1168 vm_userfaultfd_ctx)) {
1169 if (prev && addr < prev->vm_end)
1170 err = __vma_adjust(prev, prev->vm_start,
1171 addr, prev->vm_pgoff, NULL, next);
1172 else {
1173 err = __vma_adjust(area, addr, next->vm_end,
1174 next->vm_pgoff - pglen, NULL, next);
1175
1176
1177
1178
1179
1180 area = next;
1181 }
1182 if (err)
1183 return NULL;
1184 khugepaged_enter_vma_merge(area, vm_flags);
1185 return area;
1186 }
1187
1188 return NULL;
1189}
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1205{
1206 return a->vm_end == b->vm_start &&
1207 mpol_equal(vma_policy(a), vma_policy(b)) &&
1208 a->vm_file == b->vm_file &&
1209 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1210 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1211}
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1236{
1237 if (anon_vma_compatible(a, b)) {
1238 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1239
1240 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1241 return anon_vma;
1242 }
1243 return NULL;
1244}
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1255{
1256 struct anon_vma *anon_vma;
1257 struct vm_area_struct *near;
1258
1259 near = vma->vm_next;
1260 if (!near)
1261 goto try_prev;
1262
1263 anon_vma = reusable_anon_vma(near, vma, near);
1264 if (anon_vma)
1265 return anon_vma;
1266try_prev:
1267 near = vma->vm_prev;
1268 if (!near)
1269 goto none;
1270
1271 anon_vma = reusable_anon_vma(near, near, vma);
1272 if (anon_vma)
1273 return anon_vma;
1274none:
1275
1276
1277
1278
1279
1280
1281
1282
1283 return NULL;
1284}
1285
1286
1287
1288
1289
1290static inline unsigned long round_hint_to_min(unsigned long hint)
1291{
1292 hint &= PAGE_MASK;
1293 if (((void *)hint != NULL) &&
1294 (hint < mmap_min_addr))
1295 return PAGE_ALIGN(mmap_min_addr);
1296 return hint;
1297}
1298
1299static inline int mlock_future_check(struct mm_struct *mm,
1300 unsigned long flags,
1301 unsigned long len)
1302{
1303 unsigned long locked, lock_limit;
1304
1305
1306 if (flags & VM_LOCKED) {
1307 locked = len >> PAGE_SHIFT;
1308 locked += mm->locked_vm;
1309 lock_limit = rlimit(RLIMIT_MEMLOCK);
1310 lock_limit >>= PAGE_SHIFT;
1311 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1312 return -EAGAIN;
1313 }
1314 return 0;
1315}
1316
1317
1318
1319
1320unsigned long do_mmap(struct file *file, unsigned long addr,
1321 unsigned long len, unsigned long prot,
1322 unsigned long flags, vm_flags_t vm_flags,
1323 unsigned long pgoff, unsigned long *populate,
1324 struct list_head *uf)
1325{
1326 struct mm_struct *mm = current->mm;
1327 int pkey = 0;
1328
1329 *populate = 0;
1330
1331 if (!len)
1332 return -EINVAL;
1333
1334
1335
1336
1337
1338
1339
1340 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1341 if (!(file && path_noexec(&file->f_path)))
1342 prot |= PROT_EXEC;
1343
1344 if (!(flags & MAP_FIXED))
1345 addr = round_hint_to_min(addr);
1346
1347
1348 len = PAGE_ALIGN(len);
1349 if (!len)
1350 return -ENOMEM;
1351
1352
1353 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1354 return -EOVERFLOW;
1355
1356
1357 if (mm->map_count > sysctl_max_map_count)
1358 return -ENOMEM;
1359
1360
1361
1362
1363 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1364 if (offset_in_page(addr))
1365 return addr;
1366
1367 if (prot == PROT_EXEC) {
1368 pkey = execute_only_pkey(mm);
1369 if (pkey < 0)
1370 pkey = 0;
1371 }
1372
1373
1374
1375
1376
1377 vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1378 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1379
1380 if (flags & MAP_LOCKED)
1381 if (!can_do_mlock())
1382 return -EPERM;
1383
1384 if (mlock_future_check(mm, vm_flags, len))
1385 return -EAGAIN;
1386
1387 if (file) {
1388 struct inode *inode = file_inode(file);
1389
1390 switch (flags & MAP_TYPE) {
1391 case MAP_SHARED:
1392 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1393 return -EACCES;
1394
1395
1396
1397
1398
1399 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1400 return -EACCES;
1401
1402
1403
1404
1405 if (locks_verify_locked(file))
1406 return -EAGAIN;
1407
1408 vm_flags |= VM_SHARED | VM_MAYSHARE;
1409 if (!(file->f_mode & FMODE_WRITE))
1410 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1411
1412
1413 case MAP_PRIVATE:
1414 if (!(file->f_mode & FMODE_READ))
1415 return -EACCES;
1416 if (path_noexec(&file->f_path)) {
1417 if (vm_flags & VM_EXEC)
1418 return -EPERM;
1419 vm_flags &= ~VM_MAYEXEC;
1420 }
1421
1422 if (!file->f_op->mmap)
1423 return -ENODEV;
1424 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1425 return -EINVAL;
1426 break;
1427
1428 default:
1429 return -EINVAL;
1430 }
1431 } else {
1432 switch (flags & MAP_TYPE) {
1433 case MAP_SHARED:
1434 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1435 return -EINVAL;
1436
1437
1438
1439 pgoff = 0;
1440 vm_flags |= VM_SHARED | VM_MAYSHARE;
1441 break;
1442 case MAP_PRIVATE:
1443
1444
1445
1446 pgoff = addr >> PAGE_SHIFT;
1447 break;
1448 default:
1449 return -EINVAL;
1450 }
1451 }
1452
1453
1454
1455
1456
1457 if (flags & MAP_NORESERVE) {
1458
1459 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1460 vm_flags |= VM_NORESERVE;
1461
1462
1463 if (file && is_file_hugepages(file))
1464 vm_flags |= VM_NORESERVE;
1465 }
1466
1467 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1468 if (!IS_ERR_VALUE(addr) &&
1469 ((vm_flags & VM_LOCKED) ||
1470 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1471 *populate = len;
1472 return addr;
1473}
1474
1475SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1476 unsigned long, prot, unsigned long, flags,
1477 unsigned long, fd, unsigned long, pgoff)
1478{
1479 struct file *file = NULL;
1480 unsigned long retval;
1481
1482 if (!(flags & MAP_ANONYMOUS)) {
1483 audit_mmap_fd(fd, flags);
1484 file = fget(fd);
1485 if (!file)
1486 return -EBADF;
1487 if (is_file_hugepages(file))
1488 len = ALIGN(len, huge_page_size(hstate_file(file)));
1489 retval = -EINVAL;
1490 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1491 goto out_fput;
1492 } else if (flags & MAP_HUGETLB) {
1493 struct user_struct *user = NULL;
1494 struct hstate *hs;
1495
1496 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1497 if (!hs)
1498 return -EINVAL;
1499
1500 len = ALIGN(len, huge_page_size(hs));
1501
1502
1503
1504
1505
1506
1507 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1508 VM_NORESERVE,
1509 &user, HUGETLB_ANONHUGE_INODE,
1510 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1511 if (IS_ERR(file))
1512 return PTR_ERR(file);
1513 }
1514
1515 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1516
1517 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1518out_fput:
1519 if (file)
1520 fput(file);
1521 return retval;
1522}
1523
1524#ifdef __ARCH_WANT_SYS_OLD_MMAP
1525struct mmap_arg_struct {
1526 unsigned long addr;
1527 unsigned long len;
1528 unsigned long prot;
1529 unsigned long flags;
1530 unsigned long fd;
1531 unsigned long offset;
1532};
1533
1534SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1535{
1536 struct mmap_arg_struct a;
1537
1538 if (copy_from_user(&a, arg, sizeof(a)))
1539 return -EFAULT;
1540 if (offset_in_page(a.offset))
1541 return -EINVAL;
1542
1543 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1544 a.offset >> PAGE_SHIFT);
1545}
1546#endif
1547
1548
1549
1550
1551
1552
1553
1554int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1555{
1556 vm_flags_t vm_flags = vma->vm_flags;
1557 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1558
1559
1560 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1561 return 0;
1562
1563
1564 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1565 return 1;
1566
1567
1568
1569 if (pgprot_val(vm_page_prot) !=
1570 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1571 return 0;
1572
1573
1574 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1575 return 1;
1576
1577
1578 if (vm_flags & VM_PFNMAP)
1579 return 0;
1580
1581
1582 return vma->vm_file && vma->vm_file->f_mapping &&
1583 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1584}
1585
1586
1587
1588
1589
1590static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1591{
1592
1593
1594
1595
1596 if (file && is_file_hugepages(file))
1597 return 0;
1598
1599 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1600}
1601
1602unsigned long mmap_region(struct file *file, unsigned long addr,
1603 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1604 struct list_head *uf)
1605{
1606 struct mm_struct *mm = current->mm;
1607 struct vm_area_struct *vma, *prev;
1608 int error;
1609 struct rb_node **rb_link, *rb_parent;
1610 unsigned long charged = 0;
1611
1612
1613 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1614 unsigned long nr_pages;
1615
1616
1617
1618
1619
1620 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1621
1622 if (!may_expand_vm(mm, vm_flags,
1623 (len >> PAGE_SHIFT) - nr_pages))
1624 return -ENOMEM;
1625 }
1626
1627
1628 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1629 &rb_parent)) {
1630 if (do_munmap(mm, addr, len, uf))
1631 return -ENOMEM;
1632 }
1633
1634
1635
1636
1637 if (accountable_mapping(file, vm_flags)) {
1638 charged = len >> PAGE_SHIFT;
1639 if (security_vm_enough_memory_mm(mm, charged))
1640 return -ENOMEM;
1641 vm_flags |= VM_ACCOUNT;
1642 }
1643
1644
1645
1646
1647 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1648 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1649 if (vma)
1650 goto out;
1651
1652
1653
1654
1655
1656
1657 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1658 if (!vma) {
1659 error = -ENOMEM;
1660 goto unacct_error;
1661 }
1662
1663 vma->vm_mm = mm;
1664 vma->vm_start = addr;
1665 vma->vm_end = addr + len;
1666 vma->vm_flags = vm_flags;
1667 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1668 vma->vm_pgoff = pgoff;
1669 INIT_LIST_HEAD(&vma->anon_vma_chain);
1670
1671 if (file) {
1672 if (vm_flags & VM_DENYWRITE) {
1673 error = deny_write_access(file);
1674 if (error)
1675 goto free_vma;
1676 }
1677 if (vm_flags & VM_SHARED) {
1678 error = mapping_map_writable(file->f_mapping);
1679 if (error)
1680 goto allow_write_and_free_vma;
1681 }
1682
1683
1684
1685
1686
1687
1688 vma->vm_file = get_file(file);
1689 error = call_mmap(file, vma);
1690 if (error)
1691 goto unmap_and_free_vma;
1692
1693
1694
1695
1696
1697
1698
1699
1700 WARN_ON_ONCE(addr != vma->vm_start);
1701
1702 addr = vma->vm_start;
1703 vm_flags = vma->vm_flags;
1704 } else if (vm_flags & VM_SHARED) {
1705 error = shmem_zero_setup(vma);
1706 if (error)
1707 goto free_vma;
1708 }
1709
1710 vma_link(mm, vma, prev, rb_link, rb_parent);
1711
1712 if (file) {
1713 if (vm_flags & VM_SHARED)
1714 mapping_unmap_writable(file->f_mapping);
1715 if (vm_flags & VM_DENYWRITE)
1716 allow_write_access(file);
1717 }
1718 file = vma->vm_file;
1719out:
1720 perf_event_mmap(vma);
1721
1722 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1723 if (vm_flags & VM_LOCKED) {
1724 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1725 vma == get_gate_vma(current->mm)))
1726 mm->locked_vm += (len >> PAGE_SHIFT);
1727 else
1728 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1729 }
1730
1731 if (file)
1732 uprobe_mmap(vma);
1733
1734
1735
1736
1737
1738
1739
1740
1741 vma->vm_flags |= VM_SOFTDIRTY;
1742
1743 vma_set_page_prot(vma);
1744
1745 return addr;
1746
1747unmap_and_free_vma:
1748 vma->vm_file = NULL;
1749 fput(file);
1750
1751
1752 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1753 charged = 0;
1754 if (vm_flags & VM_SHARED)
1755 mapping_unmap_writable(file->f_mapping);
1756allow_write_and_free_vma:
1757 if (vm_flags & VM_DENYWRITE)
1758 allow_write_access(file);
1759free_vma:
1760 kmem_cache_free(vm_area_cachep, vma);
1761unacct_error:
1762 if (charged)
1763 vm_unacct_memory(charged);
1764 return error;
1765}
1766
1767unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1768{
1769
1770
1771
1772
1773
1774
1775
1776
1777 struct mm_struct *mm = current->mm;
1778 struct vm_area_struct *vma;
1779 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1780
1781
1782 length = info->length + info->align_mask;
1783 if (length < info->length)
1784 return -ENOMEM;
1785
1786
1787 if (info->high_limit < length)
1788 return -ENOMEM;
1789 high_limit = info->high_limit - length;
1790
1791 if (info->low_limit > high_limit)
1792 return -ENOMEM;
1793 low_limit = info->low_limit + length;
1794
1795
1796 if (RB_EMPTY_ROOT(&mm->mm_rb))
1797 goto check_highest;
1798 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1799 if (vma->rb_subtree_gap < length)
1800 goto check_highest;
1801
1802 while (true) {
1803
1804 gap_end = vm_start_gap(vma);
1805 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1806 struct vm_area_struct *left =
1807 rb_entry(vma->vm_rb.rb_left,
1808 struct vm_area_struct, vm_rb);
1809 if (left->rb_subtree_gap >= length) {
1810 vma = left;
1811 continue;
1812 }
1813 }
1814
1815 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1816check_current:
1817
1818 if (gap_start > high_limit)
1819 return -ENOMEM;
1820 if (gap_end >= low_limit &&
1821 gap_end > gap_start && gap_end - gap_start >= length)
1822 goto found;
1823
1824
1825 if (vma->vm_rb.rb_right) {
1826 struct vm_area_struct *right =
1827 rb_entry(vma->vm_rb.rb_right,
1828 struct vm_area_struct, vm_rb);
1829 if (right->rb_subtree_gap >= length) {
1830 vma = right;
1831 continue;
1832 }
1833 }
1834
1835
1836 while (true) {
1837 struct rb_node *prev = &vma->vm_rb;
1838 if (!rb_parent(prev))
1839 goto check_highest;
1840 vma = rb_entry(rb_parent(prev),
1841 struct vm_area_struct, vm_rb);
1842 if (prev == vma->vm_rb.rb_left) {
1843 gap_start = vm_end_gap(vma->vm_prev);
1844 gap_end = vm_start_gap(vma);
1845 goto check_current;
1846 }
1847 }
1848 }
1849
1850check_highest:
1851
1852 gap_start = mm->highest_vm_end;
1853 gap_end = ULONG_MAX;
1854 if (gap_start > high_limit)
1855 return -ENOMEM;
1856
1857found:
1858
1859 if (gap_start < info->low_limit)
1860 gap_start = info->low_limit;
1861
1862
1863 gap_start += (info->align_offset - gap_start) & info->align_mask;
1864
1865 VM_BUG_ON(gap_start + info->length > info->high_limit);
1866 VM_BUG_ON(gap_start + info->length > gap_end);
1867 return gap_start;
1868}
1869
1870unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1871{
1872 struct mm_struct *mm = current->mm;
1873 struct vm_area_struct *vma;
1874 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1875
1876
1877 length = info->length + info->align_mask;
1878 if (length < info->length)
1879 return -ENOMEM;
1880
1881
1882
1883
1884
1885 gap_end = info->high_limit;
1886 if (gap_end < length)
1887 return -ENOMEM;
1888 high_limit = gap_end - length;
1889
1890 if (info->low_limit > high_limit)
1891 return -ENOMEM;
1892 low_limit = info->low_limit + length;
1893
1894
1895 gap_start = mm->highest_vm_end;
1896 if (gap_start <= high_limit)
1897 goto found_highest;
1898
1899
1900 if (RB_EMPTY_ROOT(&mm->mm_rb))
1901 return -ENOMEM;
1902 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1903 if (vma->rb_subtree_gap < length)
1904 return -ENOMEM;
1905
1906 while (true) {
1907
1908 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1909 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1910 struct vm_area_struct *right =
1911 rb_entry(vma->vm_rb.rb_right,
1912 struct vm_area_struct, vm_rb);
1913 if (right->rb_subtree_gap >= length) {
1914 vma = right;
1915 continue;
1916 }
1917 }
1918
1919check_current:
1920
1921 gap_end = vm_start_gap(vma);
1922 if (gap_end < low_limit)
1923 return -ENOMEM;
1924 if (gap_start <= high_limit &&
1925 gap_end > gap_start && gap_end - gap_start >= length)
1926 goto found;
1927
1928
1929 if (vma->vm_rb.rb_left) {
1930 struct vm_area_struct *left =
1931 rb_entry(vma->vm_rb.rb_left,
1932 struct vm_area_struct, vm_rb);
1933 if (left->rb_subtree_gap >= length) {
1934 vma = left;
1935 continue;
1936 }
1937 }
1938
1939
1940 while (true) {
1941 struct rb_node *prev = &vma->vm_rb;
1942 if (!rb_parent(prev))
1943 return -ENOMEM;
1944 vma = rb_entry(rb_parent(prev),
1945 struct vm_area_struct, vm_rb);
1946 if (prev == vma->vm_rb.rb_right) {
1947 gap_start = vma->vm_prev ?
1948 vm_end_gap(vma->vm_prev) : 0;
1949 goto check_current;
1950 }
1951 }
1952 }
1953
1954found:
1955
1956 if (gap_end > info->high_limit)
1957 gap_end = info->high_limit;
1958
1959found_highest:
1960
1961 gap_end -= info->length;
1962 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1963
1964 VM_BUG_ON(gap_end < info->low_limit);
1965 VM_BUG_ON(gap_end < gap_start);
1966 return gap_end;
1967}
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980#ifndef HAVE_ARCH_UNMAPPED_AREA
1981unsigned long
1982arch_get_unmapped_area(struct file *filp, unsigned long addr,
1983 unsigned long len, unsigned long pgoff, unsigned long flags)
1984{
1985 struct mm_struct *mm = current->mm;
1986 struct vm_area_struct *vma, *prev;
1987 struct vm_unmapped_area_info info;
1988
1989 if (len > TASK_SIZE - mmap_min_addr)
1990 return -ENOMEM;
1991
1992 if (flags & MAP_FIXED)
1993 return addr;
1994
1995 if (addr) {
1996 addr = PAGE_ALIGN(addr);
1997 vma = find_vma_prev(mm, addr, &prev);
1998 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1999 (!vma || addr + len <= vm_start_gap(vma)) &&
2000 (!prev || addr >= vm_end_gap(prev)))
2001 return addr;
2002 }
2003
2004 info.flags = 0;
2005 info.length = len;
2006 info.low_limit = mm->mmap_base;
2007 info.high_limit = TASK_SIZE;
2008 info.align_mask = 0;
2009 return vm_unmapped_area(&info);
2010}
2011#endif
2012
2013
2014
2015
2016
2017#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2018unsigned long
2019arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
2020 const unsigned long len, const unsigned long pgoff,
2021 const unsigned long flags)
2022{
2023 struct vm_area_struct *vma, *prev;
2024 struct mm_struct *mm = current->mm;
2025 unsigned long addr = addr0;
2026 struct vm_unmapped_area_info info;
2027
2028
2029 if (len > TASK_SIZE - mmap_min_addr)
2030 return -ENOMEM;
2031
2032 if (flags & MAP_FIXED)
2033 return addr;
2034
2035
2036 if (addr) {
2037 addr = PAGE_ALIGN(addr);
2038 vma = find_vma_prev(mm, addr, &prev);
2039 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2040 (!vma || addr + len <= vm_start_gap(vma)) &&
2041 (!prev || addr >= vm_end_gap(prev)))
2042 return addr;
2043 }
2044
2045 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2046 info.length = len;
2047 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2048 info.high_limit = mm->mmap_base;
2049 info.align_mask = 0;
2050 addr = vm_unmapped_area(&info);
2051
2052
2053
2054
2055
2056
2057
2058 if (offset_in_page(addr)) {
2059 VM_BUG_ON(addr != -ENOMEM);
2060 info.flags = 0;
2061 info.low_limit = TASK_UNMAPPED_BASE;
2062 info.high_limit = TASK_SIZE;
2063 addr = vm_unmapped_area(&info);
2064 }
2065
2066 return addr;
2067}
2068#endif
2069
2070unsigned long
2071get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2072 unsigned long pgoff, unsigned long flags)
2073{
2074 unsigned long (*get_area)(struct file *, unsigned long,
2075 unsigned long, unsigned long, unsigned long);
2076
2077 unsigned long error = arch_mmap_check(addr, len, flags);
2078 if (error)
2079 return error;
2080
2081
2082 if (len > TASK_SIZE)
2083 return -ENOMEM;
2084
2085 get_area = current->mm->get_unmapped_area;
2086 if (file) {
2087 if (file->f_op->get_unmapped_area)
2088 get_area = file->f_op->get_unmapped_area;
2089 } else if (flags & MAP_SHARED) {
2090
2091
2092
2093
2094
2095 pgoff = 0;
2096 get_area = shmem_get_unmapped_area;
2097 }
2098
2099 addr = get_area(file, addr, len, pgoff, flags);
2100 if (IS_ERR_VALUE(addr))
2101 return addr;
2102
2103 if (addr > TASK_SIZE - len)
2104 return -ENOMEM;
2105 if (offset_in_page(addr))
2106 return -EINVAL;
2107
2108 error = security_mmap_addr(addr);
2109 return error ? error : addr;
2110}
2111
2112EXPORT_SYMBOL(get_unmapped_area);
2113
2114
2115struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2116{
2117 struct rb_node *rb_node;
2118 struct vm_area_struct *vma;
2119
2120
2121 vma = vmacache_find(mm, addr);
2122 if (likely(vma))
2123 return vma;
2124
2125 rb_node = mm->mm_rb.rb_node;
2126
2127 while (rb_node) {
2128 struct vm_area_struct *tmp;
2129
2130 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2131
2132 if (tmp->vm_end > addr) {
2133 vma = tmp;
2134 if (tmp->vm_start <= addr)
2135 break;
2136 rb_node = rb_node->rb_left;
2137 } else
2138 rb_node = rb_node->rb_right;
2139 }
2140
2141 if (vma)
2142 vmacache_update(addr, vma);
2143 return vma;
2144}
2145
2146EXPORT_SYMBOL(find_vma);
2147
2148
2149
2150
2151struct vm_area_struct *
2152find_vma_prev(struct mm_struct *mm, unsigned long addr,
2153 struct vm_area_struct **pprev)
2154{
2155 struct vm_area_struct *vma;
2156
2157 vma = find_vma(mm, addr);
2158 if (vma) {
2159 *pprev = vma->vm_prev;
2160 } else {
2161 struct rb_node *rb_node = mm->mm_rb.rb_node;
2162 *pprev = NULL;
2163 while (rb_node) {
2164 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2165 rb_node = rb_node->rb_right;
2166 }
2167 }
2168 return vma;
2169}
2170
2171
2172
2173
2174
2175
2176static int acct_stack_growth(struct vm_area_struct *vma,
2177 unsigned long size, unsigned long grow)
2178{
2179 struct mm_struct *mm = vma->vm_mm;
2180 unsigned long new_start;
2181
2182
2183 if (!may_expand_vm(mm, vma->vm_flags, grow))
2184 return -ENOMEM;
2185
2186
2187 if (size > rlimit(RLIMIT_STACK))
2188 return -ENOMEM;
2189
2190
2191 if (vma->vm_flags & VM_LOCKED) {
2192 unsigned long locked;
2193 unsigned long limit;
2194 locked = mm->locked_vm + grow;
2195 limit = rlimit(RLIMIT_MEMLOCK);
2196 limit >>= PAGE_SHIFT;
2197 if (locked > limit && !capable(CAP_IPC_LOCK))
2198 return -ENOMEM;
2199 }
2200
2201
2202 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2203 vma->vm_end - size;
2204 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2205 return -EFAULT;
2206
2207
2208
2209
2210
2211 if (security_vm_enough_memory_mm(mm, grow))
2212 return -ENOMEM;
2213
2214 return 0;
2215}
2216
2217#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2218
2219
2220
2221
2222int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2223{
2224 struct mm_struct *mm = vma->vm_mm;
2225 struct vm_area_struct *next;
2226 unsigned long gap_addr;
2227 int error = 0;
2228
2229 if (!(vma->vm_flags & VM_GROWSUP))
2230 return -EFAULT;
2231
2232
2233 address &= PAGE_MASK;
2234 if (address >= (TASK_SIZE & PAGE_MASK))
2235 return -ENOMEM;
2236 address += PAGE_SIZE;
2237
2238
2239 gap_addr = address + stack_guard_gap;
2240
2241
2242 if (gap_addr < address || gap_addr > TASK_SIZE)
2243 gap_addr = TASK_SIZE;
2244
2245 next = vma->vm_next;
2246 if (next && next->vm_start < gap_addr &&
2247 (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2248 if (!(next->vm_flags & VM_GROWSUP))
2249 return -ENOMEM;
2250
2251 }
2252
2253
2254 if (unlikely(anon_vma_prepare(vma)))
2255 return -ENOMEM;
2256
2257
2258
2259
2260
2261
2262 anon_vma_lock_write(vma->anon_vma);
2263
2264
2265 if (address > vma->vm_end) {
2266 unsigned long size, grow;
2267
2268 size = address - vma->vm_start;
2269 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2270
2271 error = -ENOMEM;
2272 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2273 error = acct_stack_growth(vma, size, grow);
2274 if (!error) {
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286 spin_lock(&mm->page_table_lock);
2287 if (vma->vm_flags & VM_LOCKED)
2288 mm->locked_vm += grow;
2289 vm_stat_account(mm, vma->vm_flags, grow);
2290 anon_vma_interval_tree_pre_update_vma(vma);
2291 vma->vm_end = address;
2292 anon_vma_interval_tree_post_update_vma(vma);
2293 if (vma->vm_next)
2294 vma_gap_update(vma->vm_next);
2295 else
2296 mm->highest_vm_end = vm_end_gap(vma);
2297 spin_unlock(&mm->page_table_lock);
2298
2299 perf_event_mmap(vma);
2300 }
2301 }
2302 }
2303 anon_vma_unlock_write(vma->anon_vma);
2304 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2305 validate_mm(mm);
2306 return error;
2307}
2308#endif
2309
2310
2311
2312
2313int expand_downwards(struct vm_area_struct *vma,
2314 unsigned long address)
2315{
2316 struct mm_struct *mm = vma->vm_mm;
2317 struct vm_area_struct *prev;
2318 int error;
2319
2320 address &= PAGE_MASK;
2321 error = security_mmap_addr(address);
2322 if (error)
2323 return error;
2324
2325
2326 prev = vma->vm_prev;
2327
2328 if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
2329 (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
2330 if (address - prev->vm_end < stack_guard_gap)
2331 return -ENOMEM;
2332 }
2333
2334
2335 if (unlikely(anon_vma_prepare(vma)))
2336 return -ENOMEM;
2337
2338
2339
2340
2341
2342
2343 anon_vma_lock_write(vma->anon_vma);
2344
2345
2346 if (address < vma->vm_start) {
2347 unsigned long size, grow;
2348
2349 size = vma->vm_end - address;
2350 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2351
2352 error = -ENOMEM;
2353 if (grow <= vma->vm_pgoff) {
2354 error = acct_stack_growth(vma, size, grow);
2355 if (!error) {
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367 spin_lock(&mm->page_table_lock);
2368 if (vma->vm_flags & VM_LOCKED)
2369 mm->locked_vm += grow;
2370 vm_stat_account(mm, vma->vm_flags, grow);
2371 anon_vma_interval_tree_pre_update_vma(vma);
2372 vma->vm_start = address;
2373 vma->vm_pgoff -= grow;
2374 anon_vma_interval_tree_post_update_vma(vma);
2375 vma_gap_update(vma);
2376 spin_unlock(&mm->page_table_lock);
2377
2378 perf_event_mmap(vma);
2379 }
2380 }
2381 }
2382 anon_vma_unlock_write(vma->anon_vma);
2383 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2384 validate_mm(mm);
2385 return error;
2386}
2387
2388
2389unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2390
2391static int __init cmdline_parse_stack_guard_gap(char *p)
2392{
2393 unsigned long val;
2394 char *endptr;
2395
2396 val = simple_strtoul(p, &endptr, 10);
2397 if (!*endptr)
2398 stack_guard_gap = val << PAGE_SHIFT;
2399
2400 return 0;
2401}
2402__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2403
2404#ifdef CONFIG_STACK_GROWSUP
2405int expand_stack(struct vm_area_struct *vma, unsigned long address)
2406{
2407 return expand_upwards(vma, address);
2408}
2409
2410struct vm_area_struct *
2411find_extend_vma(struct mm_struct *mm, unsigned long addr)
2412{
2413 struct vm_area_struct *vma, *prev;
2414
2415 addr &= PAGE_MASK;
2416 vma = find_vma_prev(mm, addr, &prev);
2417 if (vma && (vma->vm_start <= addr))
2418 return vma;
2419 if (!prev || expand_stack(prev, addr))
2420 return NULL;
2421 if (prev->vm_flags & VM_LOCKED)
2422 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2423 return prev;
2424}
2425#else
2426int expand_stack(struct vm_area_struct *vma, unsigned long address)
2427{
2428 return expand_downwards(vma, address);
2429}
2430
2431struct vm_area_struct *
2432find_extend_vma(struct mm_struct *mm, unsigned long addr)
2433{
2434 struct vm_area_struct *vma;
2435 unsigned long start;
2436
2437 addr &= PAGE_MASK;
2438 vma = find_vma(mm, addr);
2439 if (!vma)
2440 return NULL;
2441 if (vma->vm_start <= addr)
2442 return vma;
2443 if (!(vma->vm_flags & VM_GROWSDOWN))
2444 return NULL;
2445 start = vma->vm_start;
2446 if (expand_stack(vma, addr))
2447 return NULL;
2448 if (vma->vm_flags & VM_LOCKED)
2449 populate_vma_page_range(vma, addr, start, NULL);
2450 return vma;
2451}
2452#endif
2453
2454EXPORT_SYMBOL_GPL(find_extend_vma);
2455
2456
2457
2458
2459
2460
2461
2462static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2463{
2464 unsigned long nr_accounted = 0;
2465
2466
2467 update_hiwater_vm(mm);
2468 do {
2469 long nrpages = vma_pages(vma);
2470
2471 if (vma->vm_flags & VM_ACCOUNT)
2472 nr_accounted += nrpages;
2473 vm_stat_account(mm, vma->vm_flags, -nrpages);
2474 vma = remove_vma(vma);
2475 } while (vma);
2476 vm_unacct_memory(nr_accounted);
2477 validate_mm(mm);
2478}
2479
2480
2481
2482
2483
2484
2485static void unmap_region(struct mm_struct *mm,
2486 struct vm_area_struct *vma, struct vm_area_struct *prev,
2487 unsigned long start, unsigned long end)
2488{
2489 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2490 struct mmu_gather tlb;
2491
2492 lru_add_drain();
2493 tlb_gather_mmu(&tlb, mm, start, end);
2494 update_hiwater_rss(mm);
2495 unmap_vmas(&tlb, vma, start, end);
2496 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2497 next ? next->vm_start : USER_PGTABLES_CEILING);
2498 tlb_finish_mmu(&tlb, start, end);
2499}
2500
2501
2502
2503
2504
2505static void
2506detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2507 struct vm_area_struct *prev, unsigned long end)
2508{
2509 struct vm_area_struct **insertion_point;
2510 struct vm_area_struct *tail_vma = NULL;
2511
2512 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2513 vma->vm_prev = NULL;
2514 do {
2515 vma_rb_erase(vma, &mm->mm_rb);
2516 mm->map_count--;
2517 tail_vma = vma;
2518 vma = vma->vm_next;
2519 } while (vma && vma->vm_start < end);
2520 *insertion_point = vma;
2521 if (vma) {
2522 vma->vm_prev = prev;
2523 vma_gap_update(vma);
2524 } else
2525 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2526 tail_vma->vm_next = NULL;
2527
2528
2529 vmacache_invalidate(mm);
2530}
2531
2532
2533
2534
2535
2536int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2537 unsigned long addr, int new_below)
2538{
2539 struct vm_area_struct *new;
2540 int err;
2541
2542 if (is_vm_hugetlb_page(vma) && (addr &
2543 ~(huge_page_mask(hstate_vma(vma)))))
2544 return -EINVAL;
2545
2546 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2547 if (!new)
2548 return -ENOMEM;
2549
2550
2551 *new = *vma;
2552
2553 INIT_LIST_HEAD(&new->anon_vma_chain);
2554
2555 if (new_below)
2556 new->vm_end = addr;
2557 else {
2558 new->vm_start = addr;
2559 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2560 }
2561
2562 err = vma_dup_policy(vma, new);
2563 if (err)
2564 goto out_free_vma;
2565
2566 err = anon_vma_clone(new, vma);
2567 if (err)
2568 goto out_free_mpol;
2569
2570 if (new->vm_file)
2571 get_file(new->vm_file);
2572
2573 if (new->vm_ops && new->vm_ops->open)
2574 new->vm_ops->open(new);
2575
2576 if (new_below)
2577 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2578 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2579 else
2580 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2581
2582
2583 if (!err)
2584 return 0;
2585
2586
2587 if (new->vm_ops && new->vm_ops->close)
2588 new->vm_ops->close(new);
2589 if (new->vm_file)
2590 fput(new->vm_file);
2591 unlink_anon_vmas(new);
2592 out_free_mpol:
2593 mpol_put(vma_policy(new));
2594 out_free_vma:
2595 kmem_cache_free(vm_area_cachep, new);
2596 return err;
2597}
2598
2599
2600
2601
2602
2603int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2604 unsigned long addr, int new_below)
2605{
2606 if (mm->map_count >= sysctl_max_map_count)
2607 return -ENOMEM;
2608
2609 return __split_vma(mm, vma, addr, new_below);
2610}
2611
2612
2613
2614
2615
2616
2617int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2618 struct list_head *uf)
2619{
2620 unsigned long end;
2621 struct vm_area_struct *vma, *prev, *last;
2622
2623 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2624 return -EINVAL;
2625
2626 len = PAGE_ALIGN(len);
2627 if (len == 0)
2628 return -EINVAL;
2629
2630
2631 vma = find_vma(mm, start);
2632 if (!vma)
2633 return 0;
2634 prev = vma->vm_prev;
2635
2636
2637
2638 end = start + len;
2639 if (vma->vm_start >= end)
2640 return 0;
2641
2642 if (uf) {
2643 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2644
2645 if (error)
2646 return error;
2647 }
2648
2649
2650
2651
2652
2653
2654
2655
2656 if (start > vma->vm_start) {
2657 int error;
2658
2659
2660
2661
2662
2663
2664 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2665 return -ENOMEM;
2666
2667 error = __split_vma(mm, vma, start, 0);
2668 if (error)
2669 return error;
2670 prev = vma;
2671 }
2672
2673
2674 last = find_vma(mm, end);
2675 if (last && end > last->vm_start) {
2676 int error = __split_vma(mm, last, end, 1);
2677 if (error)
2678 return error;
2679 }
2680 vma = prev ? prev->vm_next : mm->mmap;
2681
2682
2683
2684
2685 if (mm->locked_vm) {
2686 struct vm_area_struct *tmp = vma;
2687 while (tmp && tmp->vm_start < end) {
2688 if (tmp->vm_flags & VM_LOCKED) {
2689 mm->locked_vm -= vma_pages(tmp);
2690 munlock_vma_pages_all(tmp);
2691 }
2692 tmp = tmp->vm_next;
2693 }
2694 }
2695
2696
2697
2698
2699 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2700 unmap_region(mm, vma, prev, start, end);
2701
2702 arch_unmap(mm, vma, start, end);
2703
2704
2705 remove_vma_list(mm, vma);
2706
2707 return 0;
2708}
2709
2710int vm_munmap(unsigned long start, size_t len)
2711{
2712 int ret;
2713 struct mm_struct *mm = current->mm;
2714 LIST_HEAD(uf);
2715
2716 if (down_write_killable(&mm->mmap_sem))
2717 return -EINTR;
2718
2719 ret = do_munmap(mm, start, len, &uf);
2720 up_write(&mm->mmap_sem);
2721 userfaultfd_unmap_complete(mm, &uf);
2722 return ret;
2723}
2724EXPORT_SYMBOL(vm_munmap);
2725
2726SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2727{
2728 profile_munmap(addr);
2729 return vm_munmap(addr, len);
2730}
2731
2732
2733
2734
2735
2736SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2737 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2738{
2739
2740 struct mm_struct *mm = current->mm;
2741 struct vm_area_struct *vma;
2742 unsigned long populate = 0;
2743 unsigned long ret = -EINVAL;
2744 struct file *file;
2745
2746 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
2747 current->comm, current->pid);
2748
2749 if (prot)
2750 return ret;
2751 start = start & PAGE_MASK;
2752 size = size & PAGE_MASK;
2753
2754 if (start + size <= start)
2755 return ret;
2756
2757
2758 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2759 return ret;
2760
2761 if (down_write_killable(&mm->mmap_sem))
2762 return -EINTR;
2763
2764 vma = find_vma(mm, start);
2765
2766 if (!vma || !(vma->vm_flags & VM_SHARED))
2767 goto out;
2768
2769 if (start < vma->vm_start)
2770 goto out;
2771
2772 if (start + size > vma->vm_end) {
2773 struct vm_area_struct *next;
2774
2775 for (next = vma->vm_next; next; next = next->vm_next) {
2776
2777 if (next->vm_start != next->vm_prev->vm_end)
2778 goto out;
2779
2780 if (next->vm_file != vma->vm_file)
2781 goto out;
2782
2783 if (next->vm_flags != vma->vm_flags)
2784 goto out;
2785
2786 if (start + size <= next->vm_end)
2787 break;
2788 }
2789
2790 if (!next)
2791 goto out;
2792 }
2793
2794 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2795 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2796 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2797
2798 flags &= MAP_NONBLOCK;
2799 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2800 if (vma->vm_flags & VM_LOCKED) {
2801 struct vm_area_struct *tmp;
2802 flags |= MAP_LOCKED;
2803
2804
2805 for (tmp = vma; tmp->vm_start >= start + size;
2806 tmp = tmp->vm_next) {
2807
2808
2809
2810
2811 vma_adjust_trans_huge(tmp, start, start + size, 0);
2812
2813 munlock_vma_pages_range(tmp,
2814 max(tmp->vm_start, start),
2815 min(tmp->vm_end, start + size));
2816 }
2817 }
2818
2819 file = get_file(vma->vm_file);
2820 ret = do_mmap_pgoff(vma->vm_file, start, size,
2821 prot, flags, pgoff, &populate, NULL);
2822 fput(file);
2823out:
2824 up_write(&mm->mmap_sem);
2825 if (populate)
2826 mm_populate(ret, populate);
2827 if (!IS_ERR_VALUE(ret))
2828 ret = 0;
2829 return ret;
2830}
2831
2832static inline void verify_mm_writelocked(struct mm_struct *mm)
2833{
2834#ifdef CONFIG_DEBUG_VM
2835 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2836 WARN_ON(1);
2837 up_read(&mm->mmap_sem);
2838 }
2839#endif
2840}
2841
2842
2843
2844
2845
2846
2847static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
2848{
2849 struct mm_struct *mm = current->mm;
2850 struct vm_area_struct *vma, *prev;
2851 unsigned long len;
2852 struct rb_node **rb_link, *rb_parent;
2853 pgoff_t pgoff = addr >> PAGE_SHIFT;
2854 int error;
2855
2856 len = PAGE_ALIGN(request);
2857 if (len < request)
2858 return -ENOMEM;
2859 if (!len)
2860 return 0;
2861
2862
2863 if ((flags & (~VM_EXEC)) != 0)
2864 return -EINVAL;
2865 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2866
2867 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2868 if (offset_in_page(error))
2869 return error;
2870
2871 error = mlock_future_check(mm, mm->def_flags, len);
2872 if (error)
2873 return error;
2874
2875
2876
2877
2878
2879 verify_mm_writelocked(mm);
2880
2881
2882
2883
2884 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2885 &rb_parent)) {
2886 if (do_munmap(mm, addr, len, uf))
2887 return -ENOMEM;
2888 }
2889
2890
2891 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
2892 return -ENOMEM;
2893
2894 if (mm->map_count > sysctl_max_map_count)
2895 return -ENOMEM;
2896
2897 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2898 return -ENOMEM;
2899
2900
2901 vma = vma_merge(mm, prev, addr, addr + len, flags,
2902 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
2903 if (vma)
2904 goto out;
2905
2906
2907
2908
2909 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2910 if (!vma) {
2911 vm_unacct_memory(len >> PAGE_SHIFT);
2912 return -ENOMEM;
2913 }
2914
2915 INIT_LIST_HEAD(&vma->anon_vma_chain);
2916 vma->vm_mm = mm;
2917 vma->vm_start = addr;
2918 vma->vm_end = addr + len;
2919 vma->vm_pgoff = pgoff;
2920 vma->vm_flags = flags;
2921 vma->vm_page_prot = vm_get_page_prot(flags);
2922 vma_link(mm, vma, prev, rb_link, rb_parent);
2923out:
2924 perf_event_mmap(vma);
2925 mm->total_vm += len >> PAGE_SHIFT;
2926 mm->data_vm += len >> PAGE_SHIFT;
2927 if (flags & VM_LOCKED)
2928 mm->locked_vm += (len >> PAGE_SHIFT);
2929 vma->vm_flags |= VM_SOFTDIRTY;
2930 return 0;
2931}
2932
2933static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
2934{
2935 return do_brk_flags(addr, len, 0, uf);
2936}
2937
2938int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
2939{
2940 struct mm_struct *mm = current->mm;
2941 int ret;
2942 bool populate;
2943 LIST_HEAD(uf);
2944
2945 if (down_write_killable(&mm->mmap_sem))
2946 return -EINTR;
2947
2948 ret = do_brk_flags(addr, len, flags, &uf);
2949 populate = ((mm->def_flags & VM_LOCKED) != 0);
2950 up_write(&mm->mmap_sem);
2951 userfaultfd_unmap_complete(mm, &uf);
2952 if (populate && !ret)
2953 mm_populate(addr, len);
2954 return ret;
2955}
2956EXPORT_SYMBOL(vm_brk_flags);
2957
2958int vm_brk(unsigned long addr, unsigned long len)
2959{
2960 return vm_brk_flags(addr, len, 0);
2961}
2962EXPORT_SYMBOL(vm_brk);
2963
2964
2965void exit_mmap(struct mm_struct *mm)
2966{
2967 struct mmu_gather tlb;
2968 struct vm_area_struct *vma;
2969 unsigned long nr_accounted = 0;
2970
2971
2972 mmu_notifier_release(mm);
2973
2974 if (mm->locked_vm) {
2975 vma = mm->mmap;
2976 while (vma) {
2977 if (vma->vm_flags & VM_LOCKED)
2978 munlock_vma_pages_all(vma);
2979 vma = vma->vm_next;
2980 }
2981 }
2982
2983 arch_exit_mmap(mm);
2984
2985 vma = mm->mmap;
2986 if (!vma)
2987 return;
2988
2989 lru_add_drain();
2990 flush_cache_mm(mm);
2991 tlb_gather_mmu(&tlb, mm, 0, -1);
2992
2993
2994 unmap_vmas(&tlb, vma, 0, -1);
2995
2996 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
2997 tlb_finish_mmu(&tlb, 0, -1);
2998
2999
3000
3001
3002
3003 while (vma) {
3004 if (vma->vm_flags & VM_ACCOUNT)
3005 nr_accounted += vma_pages(vma);
3006 vma = remove_vma(vma);
3007 }
3008 vm_unacct_memory(nr_accounted);
3009}
3010
3011
3012
3013
3014
3015int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3016{
3017 struct vm_area_struct *prev;
3018 struct rb_node **rb_link, *rb_parent;
3019
3020 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3021 &prev, &rb_link, &rb_parent))
3022 return -ENOMEM;
3023 if ((vma->vm_flags & VM_ACCOUNT) &&
3024 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3025 return -ENOMEM;
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039 if (vma_is_anonymous(vma)) {
3040 BUG_ON(vma->anon_vma);
3041 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3042 }
3043
3044 vma_link(mm, vma, prev, rb_link, rb_parent);
3045 return 0;
3046}
3047
3048
3049
3050
3051
3052struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3053 unsigned long addr, unsigned long len, pgoff_t pgoff,
3054 bool *need_rmap_locks)
3055{
3056 struct vm_area_struct *vma = *vmap;
3057 unsigned long vma_start = vma->vm_start;
3058 struct mm_struct *mm = vma->vm_mm;
3059 struct vm_area_struct *new_vma, *prev;
3060 struct rb_node **rb_link, *rb_parent;
3061 bool faulted_in_anon_vma = true;
3062
3063
3064
3065
3066
3067 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3068 pgoff = addr >> PAGE_SHIFT;
3069 faulted_in_anon_vma = false;
3070 }
3071
3072 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3073 return NULL;
3074 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3075 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3076 vma->vm_userfaultfd_ctx);
3077 if (new_vma) {
3078
3079
3080
3081 if (unlikely(vma_start >= new_vma->vm_start &&
3082 vma_start < new_vma->vm_end)) {
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3096 *vmap = vma = new_vma;
3097 }
3098 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3099 } else {
3100 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
3101 if (!new_vma)
3102 goto out;
3103 *new_vma = *vma;
3104 new_vma->vm_start = addr;
3105 new_vma->vm_end = addr + len;
3106 new_vma->vm_pgoff = pgoff;
3107 if (vma_dup_policy(vma, new_vma))
3108 goto out_free_vma;
3109 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
3110 if (anon_vma_clone(new_vma, vma))
3111 goto out_free_mempol;
3112 if (new_vma->vm_file)
3113 get_file(new_vma->vm_file);
3114 if (new_vma->vm_ops && new_vma->vm_ops->open)
3115 new_vma->vm_ops->open(new_vma);
3116 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3117 *need_rmap_locks = false;
3118 }
3119 return new_vma;
3120
3121out_free_mempol:
3122 mpol_put(vma_policy(new_vma));
3123out_free_vma:
3124 kmem_cache_free(vm_area_cachep, new_vma);
3125out:
3126 return NULL;
3127}
3128
3129
3130
3131
3132
3133bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3134{
3135 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3136 return false;
3137
3138 if (is_data_mapping(flags) &&
3139 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3140
3141 if (rlimit(RLIMIT_DATA) == 0 &&
3142 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3143 return true;
3144 if (!ignore_rlimit_data) {
3145 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n",
3146 current->comm, current->pid,
3147 (mm->data_vm + npages) << PAGE_SHIFT,
3148 rlimit(RLIMIT_DATA));
3149 return false;
3150 }
3151 }
3152
3153 return true;
3154}
3155
3156void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3157{
3158 mm->total_vm += npages;
3159
3160 if (is_exec_mapping(flags))
3161 mm->exec_vm += npages;
3162 else if (is_stack_mapping(flags))
3163 mm->stack_vm += npages;
3164 else if (is_data_mapping(flags))
3165 mm->data_vm += npages;
3166}
3167
3168static int special_mapping_fault(struct vm_fault *vmf);
3169
3170
3171
3172
3173static void special_mapping_close(struct vm_area_struct *vma)
3174{
3175}
3176
3177static const char *special_mapping_name(struct vm_area_struct *vma)
3178{
3179 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3180}
3181
3182static int special_mapping_mremap(struct vm_area_struct *new_vma)
3183{
3184 struct vm_special_mapping *sm = new_vma->vm_private_data;
3185
3186 if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
3187 return -EFAULT;
3188
3189 if (sm->mremap)
3190 return sm->mremap(sm, new_vma);
3191
3192 return 0;
3193}
3194
3195static const struct vm_operations_struct special_mapping_vmops = {
3196 .close = special_mapping_close,
3197 .fault = special_mapping_fault,
3198 .mremap = special_mapping_mremap,
3199 .name = special_mapping_name,
3200};
3201
3202static const struct vm_operations_struct legacy_special_mapping_vmops = {
3203 .close = special_mapping_close,
3204 .fault = special_mapping_fault,
3205};
3206
3207static int special_mapping_fault(struct vm_fault *vmf)
3208{
3209 struct vm_area_struct *vma = vmf->vma;
3210 pgoff_t pgoff;
3211 struct page **pages;
3212
3213 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3214 pages = vma->vm_private_data;
3215 } else {
3216 struct vm_special_mapping *sm = vma->vm_private_data;
3217
3218 if (sm->fault)
3219 return sm->fault(sm, vmf->vma, vmf);
3220
3221 pages = sm->pages;
3222 }
3223
3224 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3225 pgoff--;
3226
3227 if (*pages) {
3228 struct page *page = *pages;
3229 get_page(page);
3230 vmf->page = page;
3231 return 0;
3232 }
3233
3234 return VM_FAULT_SIGBUS;
3235}
3236
3237static struct vm_area_struct *__install_special_mapping(
3238 struct mm_struct *mm,
3239 unsigned long addr, unsigned long len,
3240 unsigned long vm_flags, void *priv,
3241 const struct vm_operations_struct *ops)
3242{
3243 int ret;
3244 struct vm_area_struct *vma;
3245
3246 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
3247 if (unlikely(vma == NULL))
3248 return ERR_PTR(-ENOMEM);
3249
3250 INIT_LIST_HEAD(&vma->anon_vma_chain);
3251 vma->vm_mm = mm;
3252 vma->vm_start = addr;
3253 vma->vm_end = addr + len;
3254
3255 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3256 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3257
3258 vma->vm_ops = ops;
3259 vma->vm_private_data = priv;
3260
3261 ret = insert_vm_struct(mm, vma);
3262 if (ret)
3263 goto out;
3264
3265 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3266
3267 perf_event_mmap(vma);
3268
3269 return vma;
3270
3271out:
3272 kmem_cache_free(vm_area_cachep, vma);
3273 return ERR_PTR(ret);
3274}
3275
3276bool vma_is_special_mapping(const struct vm_area_struct *vma,
3277 const struct vm_special_mapping *sm)
3278{
3279 return vma->vm_private_data == sm &&
3280 (vma->vm_ops == &special_mapping_vmops ||
3281 vma->vm_ops == &legacy_special_mapping_vmops);
3282}
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293struct vm_area_struct *_install_special_mapping(
3294 struct mm_struct *mm,
3295 unsigned long addr, unsigned long len,
3296 unsigned long vm_flags, const struct vm_special_mapping *spec)
3297{
3298 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3299 &special_mapping_vmops);
3300}
3301
3302int install_special_mapping(struct mm_struct *mm,
3303 unsigned long addr, unsigned long len,
3304 unsigned long vm_flags, struct page **pages)
3305{
3306 struct vm_area_struct *vma = __install_special_mapping(
3307 mm, addr, len, vm_flags, (void *)pages,
3308 &legacy_special_mapping_vmops);
3309
3310 return PTR_ERR_OR_ZERO(vma);
3311}
3312
3313static DEFINE_MUTEX(mm_all_locks_mutex);
3314
3315static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3316{
3317 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3318
3319
3320
3321
3322 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332 if (__test_and_set_bit(0, (unsigned long *)
3333 &anon_vma->root->rb_root.rb_node))
3334 BUG();
3335 }
3336}
3337
3338static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3339{
3340 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3351 BUG();
3352 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3353 }
3354}
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393int mm_take_all_locks(struct mm_struct *mm)
3394{
3395 struct vm_area_struct *vma;
3396 struct anon_vma_chain *avc;
3397
3398 BUG_ON(down_read_trylock(&mm->mmap_sem));
3399
3400 mutex_lock(&mm_all_locks_mutex);
3401
3402 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3403 if (signal_pending(current))
3404 goto out_unlock;
3405 if (vma->vm_file && vma->vm_file->f_mapping &&
3406 is_vm_hugetlb_page(vma))
3407 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3408 }
3409
3410 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3411 if (signal_pending(current))
3412 goto out_unlock;
3413 if (vma->vm_file && vma->vm_file->f_mapping &&
3414 !is_vm_hugetlb_page(vma))
3415 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3416 }
3417
3418 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3419 if (signal_pending(current))
3420 goto out_unlock;
3421 if (vma->anon_vma)
3422 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3423 vm_lock_anon_vma(mm, avc->anon_vma);
3424 }
3425
3426 return 0;
3427
3428out_unlock:
3429 mm_drop_all_locks(mm);
3430 return -EINTR;
3431}
3432
3433static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3434{
3435 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448 if (!__test_and_clear_bit(0, (unsigned long *)
3449 &anon_vma->root->rb_root.rb_node))
3450 BUG();
3451 anon_vma_unlock_write(anon_vma);
3452 }
3453}
3454
3455static void vm_unlock_mapping(struct address_space *mapping)
3456{
3457 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3458
3459
3460
3461
3462 i_mmap_unlock_write(mapping);
3463 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3464 &mapping->flags))
3465 BUG();
3466 }
3467}
3468
3469
3470
3471
3472
3473void mm_drop_all_locks(struct mm_struct *mm)
3474{
3475 struct vm_area_struct *vma;
3476 struct anon_vma_chain *avc;
3477
3478 BUG_ON(down_read_trylock(&mm->mmap_sem));
3479 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3480
3481 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3482 if (vma->anon_vma)
3483 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3484 vm_unlock_anon_vma(avc->anon_vma);
3485 if (vma->vm_file && vma->vm_file->f_mapping)
3486 vm_unlock_mapping(vma->vm_file->f_mapping);
3487 }
3488
3489 mutex_unlock(&mm_all_locks_mutex);
3490}
3491
3492
3493
3494
3495void __init mmap_init(void)
3496{
3497 int ret;
3498
3499 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3500 VM_BUG_ON(ret);
3501}
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513static int init_user_reserve(void)
3514{
3515 unsigned long free_kbytes;
3516
3517 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3518
3519 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3520 return 0;
3521}
3522subsys_initcall(init_user_reserve);
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534static int init_admin_reserve(void)
3535{
3536 unsigned long free_kbytes;
3537
3538 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3539
3540 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3541 return 0;
3542}
3543subsys_initcall(init_admin_reserve);
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563static int reserve_mem_notifier(struct notifier_block *nb,
3564 unsigned long action, void *data)
3565{
3566 unsigned long tmp, free_kbytes;
3567
3568 switch (action) {
3569 case MEM_ONLINE:
3570
3571 tmp = sysctl_user_reserve_kbytes;
3572 if (0 < tmp && tmp < (1UL << 17))
3573 init_user_reserve();
3574
3575
3576 tmp = sysctl_admin_reserve_kbytes;
3577 if (0 < tmp && tmp < (1UL << 13))
3578 init_admin_reserve();
3579
3580 break;
3581 case MEM_OFFLINE:
3582 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3583
3584 if (sysctl_user_reserve_kbytes > free_kbytes) {
3585 init_user_reserve();
3586 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3587 sysctl_user_reserve_kbytes);
3588 }
3589
3590 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3591 init_admin_reserve();
3592 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3593 sysctl_admin_reserve_kbytes);
3594 }
3595 break;
3596 default:
3597 break;
3598 }
3599 return NOTIFY_OK;
3600}
3601
3602static struct notifier_block reserve_mem_nb = {
3603 .notifier_call = reserve_mem_notifier,
3604};
3605
3606static int __meminit init_reserve_notifier(void)
3607{
3608 if (register_hotmemory_notifier(&reserve_mem_nb))
3609 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3610
3611 return 0;
3612}
3613subsys_initcall(init_reserve_notifier);
3614