1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/slab.h>
13#include <linux/backing-dev.h>
14#include <linux/mm.h>
15#include <linux/vmacache.h>
16#include <linux/shm.h>
17#include <linux/mman.h>
18#include <linux/pagemap.h>
19#include <linux/swap.h>
20#include <linux/syscalls.h>
21#include <linux/capability.h>
22#include <linux/init.h>
23#include <linux/file.h>
24#include <linux/fs.h>
25#include <linux/personality.h>
26#include <linux/security.h>
27#include <linux/hugetlb.h>
28#include <linux/shmem_fs.h>
29#include <linux/profile.h>
30#include <linux/export.h>
31#include <linux/mount.h>
32#include <linux/mempolicy.h>
33#include <linux/rmap.h>
34#include <linux/mmu_notifier.h>
35#include <linux/mmdebug.h>
36#include <linux/perf_event.h>
37#include <linux/audit.h>
38#include <linux/khugepaged.h>
39#include <linux/uprobes.h>
40#include <linux/rbtree_augmented.h>
41#include <linux/notifier.h>
42#include <linux/memory.h>
43#include <linux/printk.h>
44#include <linux/userfaultfd_k.h>
45#include <linux/moduleparam.h>
46#include <linux/pkeys.h>
47
48#include <linux/uaccess.h>
49#include <asm/cacheflush.h>
50#include <asm/tlb.h>
51#include <asm/mmu_context.h>
52
53#include "internal.h"
54
55#ifndef arch_mmap_check
56#define arch_mmap_check(addr, len, flags) (0)
57#endif
58
59#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
60const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
61const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
62int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
63#endif
64#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
65const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
66const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
67int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
68#endif
69
70static bool ignore_rlimit_data;
71core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
72
73static void unmap_region(struct mm_struct *mm,
74 struct vm_area_struct *vma, struct vm_area_struct *prev,
75 unsigned long start, unsigned long end);
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97pgprot_t protection_map[16] = {
98 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
99 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
100};
101
102pgprot_t vm_get_page_prot(unsigned long vm_flags)
103{
104 return __pgprot(pgprot_val(protection_map[vm_flags &
105 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
106 pgprot_val(arch_vm_get_page_prot(vm_flags)));
107}
108EXPORT_SYMBOL(vm_get_page_prot);
109
110static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
111{
112 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
113}
114
115
116void vma_set_page_prot(struct vm_area_struct *vma)
117{
118 unsigned long vm_flags = vma->vm_flags;
119 pgprot_t vm_page_prot;
120
121 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
122 if (vma_wants_writenotify(vma, vm_page_prot)) {
123 vm_flags &= ~VM_SHARED;
124 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
125 }
126
127 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
128}
129
130
131
132
133static void __remove_shared_vm_struct(struct vm_area_struct *vma,
134 struct file *file, struct address_space *mapping)
135{
136 if (vma->vm_flags & VM_DENYWRITE)
137 atomic_inc(&file_inode(file)->i_writecount);
138 if (vma->vm_flags & VM_SHARED)
139 mapping_unmap_writable(mapping);
140
141 flush_dcache_mmap_lock(mapping);
142 vma_interval_tree_remove(vma, &mapping->i_mmap);
143 flush_dcache_mmap_unlock(mapping);
144}
145
146
147
148
149
150void unlink_file_vma(struct vm_area_struct *vma)
151{
152 struct file *file = vma->vm_file;
153
154 if (file) {
155 struct address_space *mapping = file->f_mapping;
156 i_mmap_lock_write(mapping);
157 __remove_shared_vm_struct(vma, file, mapping);
158 i_mmap_unlock_write(mapping);
159 }
160}
161
162
163
164
165static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
166{
167 struct vm_area_struct *next = vma->vm_next;
168
169 might_sleep();
170 if (vma->vm_ops && vma->vm_ops->close)
171 vma->vm_ops->close(vma);
172 if (vma->vm_file)
173 fput(vma->vm_file);
174 mpol_put(vma_policy(vma));
175 kmem_cache_free(vm_area_cachep, vma);
176 return next;
177}
178
179static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
180
181SYSCALL_DEFINE1(brk, unsigned long, brk)
182{
183 unsigned long retval;
184 unsigned long newbrk, oldbrk;
185 struct mm_struct *mm = current->mm;
186 struct vm_area_struct *next;
187 unsigned long min_brk;
188 bool populate;
189 LIST_HEAD(uf);
190
191 if (down_write_killable(&mm->mmap_sem))
192 return -EINTR;
193
194#ifdef CONFIG_COMPAT_BRK
195
196
197
198
199
200 if (current->brk_randomized)
201 min_brk = mm->start_brk;
202 else
203 min_brk = mm->end_data;
204#else
205 min_brk = mm->start_brk;
206#endif
207 if (brk < min_brk)
208 goto out;
209
210
211
212
213
214
215
216 if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
217 mm->end_data, mm->start_data))
218 goto out;
219
220 newbrk = PAGE_ALIGN(brk);
221 oldbrk = PAGE_ALIGN(mm->brk);
222 if (oldbrk == newbrk)
223 goto set_brk;
224
225
226 if (brk <= mm->brk) {
227 if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
228 goto set_brk;
229 goto out;
230 }
231
232
233 next = find_vma(mm, oldbrk);
234 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
235 goto out;
236
237
238 if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
239 goto out;
240
241set_brk:
242 mm->brk = brk;
243 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
244 up_write(&mm->mmap_sem);
245 userfaultfd_unmap_complete(mm, &uf);
246 if (populate)
247 mm_populate(oldbrk, newbrk - oldbrk);
248 return brk;
249
250out:
251 retval = mm->brk;
252 up_write(&mm->mmap_sem);
253 return retval;
254}
255
256static long vma_compute_subtree_gap(struct vm_area_struct *vma)
257{
258 unsigned long max, prev_end, subtree_gap;
259
260
261
262
263
264
265
266 max = vm_start_gap(vma);
267 if (vma->vm_prev) {
268 prev_end = vm_end_gap(vma->vm_prev);
269 if (max > prev_end)
270 max -= prev_end;
271 else
272 max = 0;
273 }
274 if (vma->vm_rb.rb_left) {
275 subtree_gap = rb_entry(vma->vm_rb.rb_left,
276 struct vm_area_struct, vm_rb)->rb_subtree_gap;
277 if (subtree_gap > max)
278 max = subtree_gap;
279 }
280 if (vma->vm_rb.rb_right) {
281 subtree_gap = rb_entry(vma->vm_rb.rb_right,
282 struct vm_area_struct, vm_rb)->rb_subtree_gap;
283 if (subtree_gap > max)
284 max = subtree_gap;
285 }
286 return max;
287}
288
289#ifdef CONFIG_DEBUG_VM_RB
290static int browse_rb(struct mm_struct *mm)
291{
292 struct rb_root *root = &mm->mm_rb;
293 int i = 0, j, bug = 0;
294 struct rb_node *nd, *pn = NULL;
295 unsigned long prev = 0, pend = 0;
296
297 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
298 struct vm_area_struct *vma;
299 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
300 if (vma->vm_start < prev) {
301 pr_emerg("vm_start %lx < prev %lx\n",
302 vma->vm_start, prev);
303 bug = 1;
304 }
305 if (vma->vm_start < pend) {
306 pr_emerg("vm_start %lx < pend %lx\n",
307 vma->vm_start, pend);
308 bug = 1;
309 }
310 if (vma->vm_start > vma->vm_end) {
311 pr_emerg("vm_start %lx > vm_end %lx\n",
312 vma->vm_start, vma->vm_end);
313 bug = 1;
314 }
315 spin_lock(&mm->page_table_lock);
316 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
317 pr_emerg("free gap %lx, correct %lx\n",
318 vma->rb_subtree_gap,
319 vma_compute_subtree_gap(vma));
320 bug = 1;
321 }
322 spin_unlock(&mm->page_table_lock);
323 i++;
324 pn = nd;
325 prev = vma->vm_start;
326 pend = vma->vm_end;
327 }
328 j = 0;
329 for (nd = pn; nd; nd = rb_prev(nd))
330 j++;
331 if (i != j) {
332 pr_emerg("backwards %d, forwards %d\n", j, i);
333 bug = 1;
334 }
335 return bug ? -1 : i;
336}
337
338static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
339{
340 struct rb_node *nd;
341
342 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
343 struct vm_area_struct *vma;
344 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
345 VM_BUG_ON_VMA(vma != ignore &&
346 vma->rb_subtree_gap != vma_compute_subtree_gap(vma),
347 vma);
348 }
349}
350
351static void validate_mm(struct mm_struct *mm)
352{
353 int bug = 0;
354 int i = 0;
355 unsigned long highest_address = 0;
356 struct vm_area_struct *vma = mm->mmap;
357
358 while (vma) {
359 struct anon_vma *anon_vma = vma->anon_vma;
360 struct anon_vma_chain *avc;
361
362 if (anon_vma) {
363 anon_vma_lock_read(anon_vma);
364 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
365 anon_vma_interval_tree_verify(avc);
366 anon_vma_unlock_read(anon_vma);
367 }
368
369 highest_address = vm_end_gap(vma);
370 vma = vma->vm_next;
371 i++;
372 }
373 if (i != mm->map_count) {
374 pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
375 bug = 1;
376 }
377 if (highest_address != mm->highest_vm_end) {
378 pr_emerg("mm->highest_vm_end %lx, found %lx\n",
379 mm->highest_vm_end, highest_address);
380 bug = 1;
381 }
382 i = browse_rb(mm);
383 if (i != mm->map_count) {
384 if (i != -1)
385 pr_emerg("map_count %d rb %d\n", mm->map_count, i);
386 bug = 1;
387 }
388 VM_BUG_ON_MM(bug, mm);
389}
390#else
391#define validate_mm_rb(root, ignore) do { } while (0)
392#define validate_mm(mm) do { } while (0)
393#endif
394
395RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
396 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
397
398
399
400
401
402
403static void vma_gap_update(struct vm_area_struct *vma)
404{
405
406
407
408
409 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
410}
411
412static inline void vma_rb_insert(struct vm_area_struct *vma,
413 struct rb_root *root)
414{
415
416 validate_mm_rb(root, NULL);
417
418 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
419}
420
421static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
422{
423
424
425
426
427
428 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
429}
430
431static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
432 struct rb_root *root,
433 struct vm_area_struct *ignore)
434{
435
436
437
438
439
440 validate_mm_rb(root, ignore);
441
442 __vma_rb_erase(vma, root);
443}
444
445static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
446 struct rb_root *root)
447{
448
449
450
451
452 validate_mm_rb(root, vma);
453
454 __vma_rb_erase(vma, root);
455}
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471static inline void
472anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
473{
474 struct anon_vma_chain *avc;
475
476 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
477 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
478}
479
480static inline void
481anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
482{
483 struct anon_vma_chain *avc;
484
485 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
486 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
487}
488
489static int find_vma_links(struct mm_struct *mm, unsigned long addr,
490 unsigned long end, struct vm_area_struct **pprev,
491 struct rb_node ***rb_link, struct rb_node **rb_parent)
492{
493 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
494
495 __rb_link = &mm->mm_rb.rb_node;
496 rb_prev = __rb_parent = NULL;
497
498 while (*__rb_link) {
499 struct vm_area_struct *vma_tmp;
500
501 __rb_parent = *__rb_link;
502 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
503
504 if (vma_tmp->vm_end > addr) {
505
506 if (vma_tmp->vm_start < end)
507 return -ENOMEM;
508 __rb_link = &__rb_parent->rb_left;
509 } else {
510 rb_prev = __rb_parent;
511 __rb_link = &__rb_parent->rb_right;
512 }
513 }
514
515 *pprev = NULL;
516 if (rb_prev)
517 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
518 *rb_link = __rb_link;
519 *rb_parent = __rb_parent;
520 return 0;
521}
522
523static unsigned long count_vma_pages_range(struct mm_struct *mm,
524 unsigned long addr, unsigned long end)
525{
526 unsigned long nr_pages = 0;
527 struct vm_area_struct *vma;
528
529
530 vma = find_vma_intersection(mm, addr, end);
531 if (!vma)
532 return 0;
533
534 nr_pages = (min(end, vma->vm_end) -
535 max(addr, vma->vm_start)) >> PAGE_SHIFT;
536
537
538 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
539 unsigned long overlap_len;
540
541 if (vma->vm_start > end)
542 break;
543
544 overlap_len = min(end, vma->vm_end) - vma->vm_start;
545 nr_pages += overlap_len >> PAGE_SHIFT;
546 }
547
548 return nr_pages;
549}
550
551void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
552 struct rb_node **rb_link, struct rb_node *rb_parent)
553{
554
555 if (vma->vm_next)
556 vma_gap_update(vma->vm_next);
557 else
558 mm->highest_vm_end = vm_end_gap(vma);
559
560
561
562
563
564
565
566
567
568
569 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
570 vma->rb_subtree_gap = 0;
571 vma_gap_update(vma);
572 vma_rb_insert(vma, &mm->mm_rb);
573}
574
575static void __vma_link_file(struct vm_area_struct *vma)
576{
577 struct file *file;
578
579 file = vma->vm_file;
580 if (file) {
581 struct address_space *mapping = file->f_mapping;
582
583 if (vma->vm_flags & VM_DENYWRITE)
584 atomic_dec(&file_inode(file)->i_writecount);
585 if (vma->vm_flags & VM_SHARED)
586 atomic_inc(&mapping->i_mmap_writable);
587
588 flush_dcache_mmap_lock(mapping);
589 vma_interval_tree_insert(vma, &mapping->i_mmap);
590 flush_dcache_mmap_unlock(mapping);
591 }
592}
593
594static void
595__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
596 struct vm_area_struct *prev, struct rb_node **rb_link,
597 struct rb_node *rb_parent)
598{
599 __vma_link_list(mm, vma, prev, rb_parent);
600 __vma_link_rb(mm, vma, rb_link, rb_parent);
601}
602
603static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
604 struct vm_area_struct *prev, struct rb_node **rb_link,
605 struct rb_node *rb_parent)
606{
607 struct address_space *mapping = NULL;
608
609 if (vma->vm_file) {
610 mapping = vma->vm_file->f_mapping;
611 i_mmap_lock_write(mapping);
612 }
613
614 __vma_link(mm, vma, prev, rb_link, rb_parent);
615 __vma_link_file(vma);
616
617 if (mapping)
618 i_mmap_unlock_write(mapping);
619
620 mm->map_count++;
621 validate_mm(mm);
622}
623
624
625
626
627
628static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
629{
630 struct vm_area_struct *prev;
631 struct rb_node **rb_link, *rb_parent;
632
633 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
634 &prev, &rb_link, &rb_parent))
635 BUG();
636 __vma_link(mm, vma, prev, rb_link, rb_parent);
637 mm->map_count++;
638}
639
640static __always_inline void __vma_unlink_common(struct mm_struct *mm,
641 struct vm_area_struct *vma,
642 struct vm_area_struct *prev,
643 bool has_prev,
644 struct vm_area_struct *ignore)
645{
646 struct vm_area_struct *next;
647
648 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
649 next = vma->vm_next;
650 if (has_prev)
651 prev->vm_next = next;
652 else {
653 prev = vma->vm_prev;
654 if (prev)
655 prev->vm_next = next;
656 else
657 mm->mmap = next;
658 }
659 if (next)
660 next->vm_prev = prev;
661
662
663 vmacache_invalidate(mm);
664}
665
666static inline void __vma_unlink_prev(struct mm_struct *mm,
667 struct vm_area_struct *vma,
668 struct vm_area_struct *prev)
669{
670 __vma_unlink_common(mm, vma, prev, true, vma);
671}
672
673
674
675
676
677
678
679
680int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
681 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
682 struct vm_area_struct *expand)
683{
684 struct mm_struct *mm = vma->vm_mm;
685 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
686 struct address_space *mapping = NULL;
687 struct rb_root *root = NULL;
688 struct anon_vma *anon_vma = NULL;
689 struct file *file = vma->vm_file;
690 bool start_changed = false, end_changed = false;
691 long adjust_next = 0;
692 int remove_next = 0;
693
694 if (next && !insert) {
695 struct vm_area_struct *exporter = NULL, *importer = NULL;
696
697 if (end >= next->vm_end) {
698
699
700
701
702
703
704 if (next == expand) {
705
706
707
708
709 VM_WARN_ON(end != next->vm_end);
710
711
712
713
714
715 remove_next = 3;
716 VM_WARN_ON(file != next->vm_file);
717 swap(vma, next);
718 } else {
719 VM_WARN_ON(expand != vma);
720
721
722
723
724 remove_next = 1 + (end > next->vm_end);
725 VM_WARN_ON(remove_next == 2 &&
726 end != next->vm_next->vm_end);
727 VM_WARN_ON(remove_next == 1 &&
728 end != next->vm_end);
729
730 end = next->vm_end;
731 }
732
733 exporter = next;
734 importer = vma;
735
736
737
738
739
740 if (remove_next == 2 && !next->anon_vma)
741 exporter = next->vm_next;
742
743 } else if (end > next->vm_start) {
744
745
746
747
748 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
749 exporter = next;
750 importer = vma;
751 VM_WARN_ON(expand != importer);
752 } else if (end < vma->vm_end) {
753
754
755
756
757
758 adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT);
759 exporter = vma;
760 importer = next;
761 VM_WARN_ON(expand != importer);
762 }
763
764
765
766
767
768
769 if (exporter && exporter->anon_vma && !importer->anon_vma) {
770 int error;
771
772 importer->anon_vma = exporter->anon_vma;
773 error = anon_vma_clone(importer, exporter);
774 if (error)
775 return error;
776 }
777 }
778again:
779 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
780
781 if (file) {
782 mapping = file->f_mapping;
783 root = &mapping->i_mmap;
784 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
785
786 if (adjust_next)
787 uprobe_munmap(next, next->vm_start, next->vm_end);
788
789 i_mmap_lock_write(mapping);
790 if (insert) {
791
792
793
794
795
796
797 __vma_link_file(insert);
798 }
799 }
800
801 anon_vma = vma->anon_vma;
802 if (!anon_vma && adjust_next)
803 anon_vma = next->anon_vma;
804 if (anon_vma) {
805 VM_WARN_ON(adjust_next && next->anon_vma &&
806 anon_vma != next->anon_vma);
807 anon_vma_lock_write(anon_vma);
808 anon_vma_interval_tree_pre_update_vma(vma);
809 if (adjust_next)
810 anon_vma_interval_tree_pre_update_vma(next);
811 }
812
813 if (root) {
814 flush_dcache_mmap_lock(mapping);
815 vma_interval_tree_remove(vma, root);
816 if (adjust_next)
817 vma_interval_tree_remove(next, root);
818 }
819
820 if (start != vma->vm_start) {
821 vma->vm_start = start;
822 start_changed = true;
823 }
824 if (end != vma->vm_end) {
825 vma->vm_end = end;
826 end_changed = true;
827 }
828 vma->vm_pgoff = pgoff;
829 if (adjust_next) {
830 next->vm_start += adjust_next << PAGE_SHIFT;
831 next->vm_pgoff += adjust_next;
832 }
833
834 if (root) {
835 if (adjust_next)
836 vma_interval_tree_insert(next, root);
837 vma_interval_tree_insert(vma, root);
838 flush_dcache_mmap_unlock(mapping);
839 }
840
841 if (remove_next) {
842
843
844
845
846 if (remove_next != 3)
847 __vma_unlink_prev(mm, next, vma);
848 else
849
850
851
852
853
854
855
856
857
858 __vma_unlink_common(mm, next, NULL, false, vma);
859 if (file)
860 __remove_shared_vm_struct(next, file, mapping);
861 } else if (insert) {
862
863
864
865
866
867 __insert_vm_struct(mm, insert);
868 } else {
869 if (start_changed)
870 vma_gap_update(vma);
871 if (end_changed) {
872 if (!next)
873 mm->highest_vm_end = vm_end_gap(vma);
874 else if (!adjust_next)
875 vma_gap_update(next);
876 }
877 }
878
879 if (anon_vma) {
880 anon_vma_interval_tree_post_update_vma(vma);
881 if (adjust_next)
882 anon_vma_interval_tree_post_update_vma(next);
883 anon_vma_unlock_write(anon_vma);
884 }
885 if (mapping)
886 i_mmap_unlock_write(mapping);
887
888 if (root) {
889 uprobe_mmap(vma);
890
891 if (adjust_next)
892 uprobe_mmap(next);
893 }
894
895 if (remove_next) {
896 if (file) {
897 uprobe_munmap(next, next->vm_start, next->vm_end);
898 fput(file);
899 }
900 if (next->anon_vma)
901 anon_vma_merge(vma, next);
902 mm->map_count--;
903 mpol_put(vma_policy(next));
904 kmem_cache_free(vm_area_cachep, next);
905
906
907
908
909
910 if (remove_next != 3) {
911
912
913
914
915
916
917 next = vma->vm_next;
918 } else {
919
920
921
922
923
924
925
926
927
928
929 next = vma;
930 }
931 if (remove_next == 2) {
932 remove_next = 1;
933 end = next->vm_end;
934 goto again;
935 }
936 else if (next)
937 vma_gap_update(next);
938 else {
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958 VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
959 }
960 }
961 if (insert && file)
962 uprobe_mmap(insert);
963
964 validate_mm(mm);
965
966 return 0;
967}
968
969
970
971
972
973static inline int is_mergeable_vma(struct vm_area_struct *vma,
974 struct file *file, unsigned long vm_flags,
975 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
976{
977
978
979
980
981
982
983
984
985 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
986 return 0;
987 if (vma->vm_file != file)
988 return 0;
989 if (vma->vm_ops && vma->vm_ops->close)
990 return 0;
991 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
992 return 0;
993 return 1;
994}
995
996static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
997 struct anon_vma *anon_vma2,
998 struct vm_area_struct *vma)
999{
1000
1001
1002
1003
1004 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1005 list_is_singular(&vma->anon_vma_chain)))
1006 return 1;
1007 return anon_vma1 == anon_vma2;
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021static int
1022can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1023 struct anon_vma *anon_vma, struct file *file,
1024 pgoff_t vm_pgoff,
1025 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1026{
1027 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1028 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1029 if (vma->vm_pgoff == vm_pgoff)
1030 return 1;
1031 }
1032 return 0;
1033}
1034
1035
1036
1037
1038
1039
1040
1041
1042static int
1043can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1044 struct anon_vma *anon_vma, struct file *file,
1045 pgoff_t vm_pgoff,
1046 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1047{
1048 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1049 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1050 pgoff_t vm_pglen;
1051 vm_pglen = vma_pages(vma);
1052 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1053 return 1;
1054 }
1055 return 0;
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098struct vm_area_struct *vma_merge(struct mm_struct *mm,
1099 struct vm_area_struct *prev, unsigned long addr,
1100 unsigned long end, unsigned long vm_flags,
1101 struct anon_vma *anon_vma, struct file *file,
1102 pgoff_t pgoff, struct mempolicy *policy,
1103 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1104{
1105 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1106 struct vm_area_struct *area, *next;
1107 int err;
1108
1109
1110
1111
1112
1113 if (vm_flags & VM_SPECIAL)
1114 return NULL;
1115
1116 if (prev)
1117 next = prev->vm_next;
1118 else
1119 next = mm->mmap;
1120 area = next;
1121 if (area && area->vm_end == end)
1122 next = next->vm_next;
1123
1124
1125 VM_WARN_ON(prev && addr <= prev->vm_start);
1126 VM_WARN_ON(area && end > area->vm_end);
1127 VM_WARN_ON(addr >= end);
1128
1129
1130
1131
1132 if (prev && prev->vm_end == addr &&
1133 mpol_equal(vma_policy(prev), policy) &&
1134 can_vma_merge_after(prev, vm_flags,
1135 anon_vma, file, pgoff,
1136 vm_userfaultfd_ctx)) {
1137
1138
1139
1140 if (next && end == next->vm_start &&
1141 mpol_equal(policy, vma_policy(next)) &&
1142 can_vma_merge_before(next, vm_flags,
1143 anon_vma, file,
1144 pgoff+pglen,
1145 vm_userfaultfd_ctx) &&
1146 is_mergeable_anon_vma(prev->anon_vma,
1147 next->anon_vma, NULL)) {
1148
1149 err = __vma_adjust(prev, prev->vm_start,
1150 next->vm_end, prev->vm_pgoff, NULL,
1151 prev);
1152 } else
1153 err = __vma_adjust(prev, prev->vm_start,
1154 end, prev->vm_pgoff, NULL, prev);
1155 if (err)
1156 return NULL;
1157 khugepaged_enter_vma_merge(prev, vm_flags);
1158 return prev;
1159 }
1160
1161
1162
1163
1164 if (next && end == next->vm_start &&
1165 mpol_equal(policy, vma_policy(next)) &&
1166 can_vma_merge_before(next, vm_flags,
1167 anon_vma, file, pgoff+pglen,
1168 vm_userfaultfd_ctx)) {
1169 if (prev && addr < prev->vm_end)
1170 err = __vma_adjust(prev, prev->vm_start,
1171 addr, prev->vm_pgoff, NULL, next);
1172 else {
1173 err = __vma_adjust(area, addr, next->vm_end,
1174 next->vm_pgoff - pglen, NULL, next);
1175
1176
1177
1178
1179
1180 area = next;
1181 }
1182 if (err)
1183 return NULL;
1184 khugepaged_enter_vma_merge(area, vm_flags);
1185 return area;
1186 }
1187
1188 return NULL;
1189}
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1205{
1206 return a->vm_end == b->vm_start &&
1207 mpol_equal(vma_policy(a), vma_policy(b)) &&
1208 a->vm_file == b->vm_file &&
1209 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1210 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1211}
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1236{
1237 if (anon_vma_compatible(a, b)) {
1238 struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
1239
1240 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1241 return anon_vma;
1242 }
1243 return NULL;
1244}
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1255{
1256 struct anon_vma *anon_vma;
1257 struct vm_area_struct *near;
1258
1259 near = vma->vm_next;
1260 if (!near)
1261 goto try_prev;
1262
1263 anon_vma = reusable_anon_vma(near, vma, near);
1264 if (anon_vma)
1265 return anon_vma;
1266try_prev:
1267 near = vma->vm_prev;
1268 if (!near)
1269 goto none;
1270
1271 anon_vma = reusable_anon_vma(near, near, vma);
1272 if (anon_vma)
1273 return anon_vma;
1274none:
1275
1276
1277
1278
1279
1280
1281
1282
1283 return NULL;
1284}
1285
1286
1287
1288
1289
1290static inline unsigned long round_hint_to_min(unsigned long hint)
1291{
1292 hint &= PAGE_MASK;
1293 if (((void *)hint != NULL) &&
1294 (hint < mmap_min_addr))
1295 return PAGE_ALIGN(mmap_min_addr);
1296 return hint;
1297}
1298
1299static inline int mlock_future_check(struct mm_struct *mm,
1300 unsigned long flags,
1301 unsigned long len)
1302{
1303 unsigned long locked, lock_limit;
1304
1305
1306 if (flags & VM_LOCKED) {
1307 locked = len >> PAGE_SHIFT;
1308 locked += mm->locked_vm;
1309 lock_limit = rlimit(RLIMIT_MEMLOCK);
1310 lock_limit >>= PAGE_SHIFT;
1311 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1312 return -EAGAIN;
1313 }
1314 return 0;
1315}
1316
1317
1318
1319
1320unsigned long do_mmap(struct file *file, unsigned long addr,
1321 unsigned long len, unsigned long prot,
1322 unsigned long flags, vm_flags_t vm_flags,
1323 unsigned long pgoff, unsigned long *populate,
1324 struct list_head *uf)
1325{
1326 struct mm_struct *mm = current->mm;
1327 int pkey = 0;
1328
1329 *populate = 0;
1330
1331 if (!len)
1332 return -EINVAL;
1333
1334
1335
1336
1337
1338
1339
1340 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1341 if (!(file && path_noexec(&file->f_path)))
1342 prot |= PROT_EXEC;
1343
1344 if (!(flags & MAP_FIXED))
1345 addr = round_hint_to_min(addr);
1346
1347
1348 len = PAGE_ALIGN(len);
1349 if (!len)
1350 return -ENOMEM;
1351
1352
1353 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1354 return -EOVERFLOW;
1355
1356
1357 if (mm->map_count > sysctl_max_map_count)
1358 return -ENOMEM;
1359
1360
1361
1362
1363 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1364 if (offset_in_page(addr))
1365 return addr;
1366
1367 if (prot == PROT_EXEC) {
1368 pkey = execute_only_pkey(mm);
1369 if (pkey < 0)
1370 pkey = 0;
1371 }
1372
1373
1374
1375
1376
1377 vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1378 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1379
1380 if (flags & MAP_LOCKED)
1381 if (!can_do_mlock())
1382 return -EPERM;
1383
1384 if (mlock_future_check(mm, vm_flags, len))
1385 return -EAGAIN;
1386
1387 if (file) {
1388 struct inode *inode = file_inode(file);
1389
1390 switch (flags & MAP_TYPE) {
1391 case MAP_SHARED:
1392 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1393 return -EACCES;
1394
1395
1396
1397
1398
1399 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1400 return -EACCES;
1401
1402
1403
1404
1405 if (locks_verify_locked(file))
1406 return -EAGAIN;
1407
1408 vm_flags |= VM_SHARED | VM_MAYSHARE;
1409 if (!(file->f_mode & FMODE_WRITE))
1410 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1411
1412
1413 case MAP_PRIVATE:
1414 if (!(file->f_mode & FMODE_READ))
1415 return -EACCES;
1416 if (path_noexec(&file->f_path)) {
1417 if (vm_flags & VM_EXEC)
1418 return -EPERM;
1419 vm_flags &= ~VM_MAYEXEC;
1420 }
1421
1422 if (!file->f_op->mmap)
1423 return -ENODEV;
1424 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1425 return -EINVAL;
1426 break;
1427
1428 default:
1429 return -EINVAL;
1430 }
1431 } else {
1432 switch (flags & MAP_TYPE) {
1433 case MAP_SHARED:
1434 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1435 return -EINVAL;
1436
1437
1438
1439 pgoff = 0;
1440 vm_flags |= VM_SHARED | VM_MAYSHARE;
1441 break;
1442 case MAP_PRIVATE:
1443
1444
1445
1446 pgoff = addr >> PAGE_SHIFT;
1447 break;
1448 default:
1449 return -EINVAL;
1450 }
1451 }
1452
1453
1454
1455
1456
1457 if (flags & MAP_NORESERVE) {
1458
1459 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1460 vm_flags |= VM_NORESERVE;
1461
1462
1463 if (file && is_file_hugepages(file))
1464 vm_flags |= VM_NORESERVE;
1465 }
1466
1467 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1468 if (!IS_ERR_VALUE(addr) &&
1469 ((vm_flags & VM_LOCKED) ||
1470 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1471 *populate = len;
1472 return addr;
1473}
1474
1475SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1476 unsigned long, prot, unsigned long, flags,
1477 unsigned long, fd, unsigned long, pgoff)
1478{
1479 struct file *file = NULL;
1480 unsigned long retval;
1481
1482 if (!(flags & MAP_ANONYMOUS)) {
1483 audit_mmap_fd(fd, flags);
1484 file = fget(fd);
1485 if (!file)
1486 return -EBADF;
1487 if (is_file_hugepages(file))
1488 len = ALIGN(len, huge_page_size(hstate_file(file)));
1489 retval = -EINVAL;
1490 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1491 goto out_fput;
1492 } else if (flags & MAP_HUGETLB) {
1493 struct user_struct *user = NULL;
1494 struct hstate *hs;
1495
1496 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1497 if (!hs)
1498 return -EINVAL;
1499
1500 len = ALIGN(len, huge_page_size(hs));
1501
1502
1503
1504
1505
1506
1507 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1508 VM_NORESERVE,
1509 &user, HUGETLB_ANONHUGE_INODE,
1510 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1511 if (IS_ERR(file))
1512 return PTR_ERR(file);
1513 }
1514
1515 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1516
1517 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1518out_fput:
1519 if (file)
1520 fput(file);
1521 return retval;
1522}
1523
1524#ifdef __ARCH_WANT_SYS_OLD_MMAP
1525struct mmap_arg_struct {
1526 unsigned long addr;
1527 unsigned long len;
1528 unsigned long prot;
1529 unsigned long flags;
1530 unsigned long fd;
1531 unsigned long offset;
1532};
1533
1534SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1535{
1536 struct mmap_arg_struct a;
1537
1538 if (copy_from_user(&a, arg, sizeof(a)))
1539 return -EFAULT;
1540 if (offset_in_page(a.offset))
1541 return -EINVAL;
1542
1543 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1544 a.offset >> PAGE_SHIFT);
1545}
1546#endif
1547
1548
1549
1550
1551
1552
1553
1554int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1555{
1556 vm_flags_t vm_flags = vma->vm_flags;
1557 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1558
1559
1560 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1561 return 0;
1562
1563
1564 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1565 return 1;
1566
1567
1568
1569 if (pgprot_val(vm_page_prot) !=
1570 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1571 return 0;
1572
1573
1574 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1575 return 1;
1576
1577
1578 if (vm_flags & VM_PFNMAP)
1579 return 0;
1580
1581
1582 return vma->vm_file && vma->vm_file->f_mapping &&
1583 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1584}
1585
1586
1587
1588
1589
1590static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1591{
1592
1593
1594
1595
1596 if (file && is_file_hugepages(file))
1597 return 0;
1598
1599 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1600}
1601
1602unsigned long mmap_region(struct file *file, unsigned long addr,
1603 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1604 struct list_head *uf)
1605{
1606 struct mm_struct *mm = current->mm;
1607 struct vm_area_struct *vma, *prev;
1608 int error;
1609 struct rb_node **rb_link, *rb_parent;
1610 unsigned long charged = 0;
1611
1612
1613 if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
1614 unsigned long nr_pages;
1615
1616
1617
1618
1619
1620 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1621
1622 if (!may_expand_vm(mm, vm_flags,
1623 (len >> PAGE_SHIFT) - nr_pages))
1624 return -ENOMEM;
1625 }
1626
1627
1628 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
1629 &rb_parent)) {
1630 if (do_munmap(mm, addr, len, uf))
1631 return -ENOMEM;
1632 }
1633
1634
1635
1636
1637 if (accountable_mapping(file, vm_flags)) {
1638 charged = len >> PAGE_SHIFT;
1639 if (security_vm_enough_memory_mm(mm, charged))
1640 return -ENOMEM;
1641 vm_flags |= VM_ACCOUNT;
1642 }
1643
1644
1645
1646
1647 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1648 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1649 if (vma)
1650 goto out;
1651
1652
1653
1654
1655
1656
1657 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1658 if (!vma) {
1659 error = -ENOMEM;
1660 goto unacct_error;
1661 }
1662
1663 vma->vm_mm = mm;
1664 vma->vm_start = addr;
1665 vma->vm_end = addr + len;
1666 vma->vm_flags = vm_flags;
1667 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1668 vma->vm_pgoff = pgoff;
1669 INIT_LIST_HEAD(&vma->anon_vma_chain);
1670
1671 if (file) {
1672 if (vm_flags & VM_DENYWRITE) {
1673 error = deny_write_access(file);
1674 if (error)
1675 goto free_vma;
1676 }
1677 if (vm_flags & VM_SHARED) {
1678 error = mapping_map_writable(file->f_mapping);
1679 if (error)
1680 goto allow_write_and_free_vma;
1681 }
1682
1683
1684
1685
1686
1687
1688 vma->vm_file = get_file(file);
1689 error = call_mmap(file, vma);
1690 if (error)
1691 goto unmap_and_free_vma;
1692
1693
1694
1695
1696
1697
1698
1699
1700 WARN_ON_ONCE(addr != vma->vm_start);
1701
1702 addr = vma->vm_start;
1703 vm_flags = vma->vm_flags;
1704 } else if (vm_flags & VM_SHARED) {
1705 error = shmem_zero_setup(vma);
1706 if (error)
1707 goto free_vma;
1708 }
1709
1710 vma_link(mm, vma, prev, rb_link, rb_parent);
1711
1712 if (file) {
1713 if (vm_flags & VM_SHARED)
1714 mapping_unmap_writable(file->f_mapping);
1715 if (vm_flags & VM_DENYWRITE)
1716 allow_write_access(file);
1717 }
1718 file = vma->vm_file;
1719out:
1720 perf_event_mmap(vma);
1721
1722 vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
1723 if (vm_flags & VM_LOCKED) {
1724 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1725 vma == get_gate_vma(current->mm)))
1726 mm->locked_vm += (len >> PAGE_SHIFT);
1727 else
1728 vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
1729 }
1730
1731 if (file)
1732 uprobe_mmap(vma);
1733
1734
1735
1736
1737
1738
1739
1740
1741 vma->vm_flags |= VM_SOFTDIRTY;
1742
1743 vma_set_page_prot(vma);
1744
1745 return addr;
1746
1747unmap_and_free_vma:
1748 vma->vm_file = NULL;
1749 fput(file);
1750
1751
1752 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1753 charged = 0;
1754 if (vm_flags & VM_SHARED)
1755 mapping_unmap_writable(file->f_mapping);
1756allow_write_and_free_vma:
1757 if (vm_flags & VM_DENYWRITE)
1758 allow_write_access(file);
1759free_vma:
1760 kmem_cache_free(vm_area_cachep, vma);
1761unacct_error:
1762 if (charged)
1763 vm_unacct_memory(charged);
1764 return error;
1765}
1766
1767unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1768{
1769
1770
1771
1772
1773
1774
1775
1776
1777 struct mm_struct *mm = current->mm;
1778 struct vm_area_struct *vma;
1779 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1780
1781
1782 length = info->length + info->align_mask;
1783 if (length < info->length)
1784 return -ENOMEM;
1785
1786
1787 if (info->high_limit < length)
1788 return -ENOMEM;
1789 high_limit = info->high_limit - length;
1790
1791 if (info->low_limit > high_limit)
1792 return -ENOMEM;
1793 low_limit = info->low_limit + length;
1794
1795
1796 if (RB_EMPTY_ROOT(&mm->mm_rb))
1797 goto check_highest;
1798 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1799 if (vma->rb_subtree_gap < length)
1800 goto check_highest;
1801
1802 while (true) {
1803
1804 gap_end = vm_start_gap(vma);
1805 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1806 struct vm_area_struct *left =
1807 rb_entry(vma->vm_rb.rb_left,
1808 struct vm_area_struct, vm_rb);
1809 if (left->rb_subtree_gap >= length) {
1810 vma = left;
1811 continue;
1812 }
1813 }
1814
1815 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1816check_current:
1817
1818 if (gap_start > high_limit)
1819 return -ENOMEM;
1820 if (gap_end >= low_limit &&
1821 gap_end > gap_start && gap_end - gap_start >= length)
1822 goto found;
1823
1824
1825 if (vma->vm_rb.rb_right) {
1826 struct vm_area_struct *right =
1827 rb_entry(vma->vm_rb.rb_right,
1828 struct vm_area_struct, vm_rb);
1829 if (right->rb_subtree_gap >= length) {
1830 vma = right;
1831 continue;
1832 }
1833 }
1834
1835
1836 while (true) {
1837 struct rb_node *prev = &vma->vm_rb;
1838 if (!rb_parent(prev))
1839 goto check_highest;
1840 vma = rb_entry(rb_parent(prev),
1841 struct vm_area_struct, vm_rb);
1842 if (prev == vma->vm_rb.rb_left) {
1843 gap_start = vm_end_gap(vma->vm_prev);
1844 gap_end = vm_start_gap(vma);
1845 goto check_current;
1846 }
1847 }
1848 }
1849
1850check_highest:
1851
1852 gap_start = mm->highest_vm_end;
1853 gap_end = ULONG_MAX;
1854 if (gap_start > high_limit)
1855 return -ENOMEM;
1856
1857found:
1858
1859 if (gap_start < info->low_limit)
1860 gap_start = info->low_limit;
1861
1862
1863 gap_start += (info->align_offset - gap_start) & info->align_mask;
1864
1865 VM_BUG_ON(gap_start + info->length > info->high_limit);
1866 VM_BUG_ON(gap_start + info->length > gap_end);
1867 return gap_start;
1868}
1869
1870unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1871{
1872 struct mm_struct *mm = current->mm;
1873 struct vm_area_struct *vma;
1874 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1875
1876
1877 length = info->length + info->align_mask;
1878 if (length < info->length)
1879 return -ENOMEM;
1880
1881
1882
1883
1884
1885 gap_end = info->high_limit;
1886 if (gap_end < length)
1887 return -ENOMEM;
1888 high_limit = gap_end - length;
1889
1890 if (info->low_limit > high_limit)
1891 return -ENOMEM;
1892 low_limit = info->low_limit + length;
1893
1894
1895 gap_start = mm->highest_vm_end;
1896 if (gap_start <= high_limit)
1897 goto found_highest;
1898
1899
1900 if (RB_EMPTY_ROOT(&mm->mm_rb))
1901 return -ENOMEM;
1902 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1903 if (vma->rb_subtree_gap < length)
1904 return -ENOMEM;
1905
1906 while (true) {
1907
1908 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1909 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1910 struct vm_area_struct *right =
1911 rb_entry(vma->vm_rb.rb_right,
1912 struct vm_area_struct, vm_rb);
1913 if (right->rb_subtree_gap >= length) {
1914 vma = right;
1915 continue;
1916 }
1917 }
1918
1919check_current:
1920
1921 gap_end = vm_start_gap(vma);
1922 if (gap_end < low_limit)
1923 return -ENOMEM;
1924 if (gap_start <= high_limit &&
1925 gap_end > gap_start && gap_end - gap_start >= length)
1926 goto found;
1927
1928
1929 if (vma->vm_rb.rb_left) {
1930 struct vm_area_struct *left =
1931 rb_entry(vma->vm_rb.rb_left,
1932 struct vm_area_struct, vm_rb);
1933 if (left->rb_subtree_gap >= length) {
1934 vma = left;
1935 continue;
1936 }
1937 }
1938
1939
1940 while (true) {
1941 struct rb_node *prev = &vma->vm_rb;
1942 if (!rb_parent(prev))
1943 return -ENOMEM;
1944 vma = rb_entry(rb_parent(prev),
1945 struct vm_area_struct, vm_rb);
1946 if (prev == vma->vm_rb.rb_right) {
1947 gap_start = vma->vm_prev ?
1948 vm_end_gap(vma->vm_prev) : 0;
1949 goto check_current;
1950 }
1951 }
1952 }
1953
1954found:
1955
1956 if (gap_end > info->high_limit)
1957 gap_end = info->high_limit;
1958
1959found_highest:
1960
1961 gap_end -= info->length;
1962 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1963
1964 VM_BUG_ON(gap_end < info->low_limit);
1965 VM_BUG_ON(gap_end < gap_start);
1966 return gap_end;
1967}
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980#ifndef HAVE_ARCH_UNMAPPED_AREA
1981unsigned long
1982arch_get_unmapped_area(struct file *filp, unsigned long addr,
1983 unsigned long len, unsigned long pgoff, unsigned long flags)
1984{
1985 struct mm_struct *mm = current->mm;
1986 struct vm_area_struct *vma, *prev;
1987 struct vm_unmapped_area_info info;
1988
1989 if (len > TASK_SIZE - mmap_min_addr)
1990 return -ENOMEM;
1991
1992 if (flags & MAP_FIXED)
1993 return addr;
1994
1995 if (addr) {
1996 addr = PAGE_ALIGN(addr);
1997 vma = find_vma_prev(mm, addr, &prev);
1998 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
1999 (!vma || addr + len <= vm_start_gap(vma)) &&
2000 (!prev || addr >= vm_end_gap(prev)))
2001 return addr;
2002 }
2003
2004 info.flags = 0;
2005 info.length = len;
2006 info.low_limit = mm->mmap_base;
2007 info.high_limit = TASK_SIZE;
2008 info.align_mask = 0;
2009 return vm_unmapped_area(&info);
2010}
2011#endif
2012
2013
2014
2015
2016
2017#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2018unsigned long
2019arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
2020 const unsigned long len, const unsigned long pgoff,
2021 const unsigned long flags)
2022{
2023 struct vm_area_struct *vma, *prev;
2024 struct mm_struct *mm = current->mm;
2025 unsigned long addr = addr0;
2026 struct vm_unmapped_area_info info;
2027
2028
2029 if (len > TASK_SIZE - mmap_min_addr)
2030 return -ENOMEM;
2031
2032 if (flags & MAP_FIXED)
2033 return addr;
2034
2035
2036 if (addr) {
2037 addr = PAGE_ALIGN(addr);
2038 vma = find_vma_prev(mm, addr, &prev);
2039 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2040 (!vma || addr + len <= vm_start_gap(vma)) &&
2041 (!prev || addr >= vm_end_gap(prev)))
2042 return addr;
2043 }
2044
2045 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2046 info.length = len;
2047 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2048 info.high_limit = mm->mmap_base;
2049 info.align_mask = 0;
2050 addr = vm_unmapped_area(&info);
2051
2052
2053
2054
2055
2056
2057
2058 if (offset_in_page(addr)) {
2059 VM_BUG_ON(addr != -ENOMEM);
2060 info.flags = 0;
2061 info.low_limit = TASK_UNMAPPED_BASE;
2062 info.high_limit = TASK_SIZE;
2063 addr = vm_unmapped_area(&info);
2064 }
2065
2066 return addr;
2067}
2068#endif
2069
2070unsigned long
2071get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2072 unsigned long pgoff, unsigned long flags)
2073{
2074 unsigned long (*get_area)(struct file *, unsigned long,
2075 unsigned long, unsigned long, unsigned long);
2076
2077 unsigned long error = arch_mmap_check(addr, len, flags);
2078 if (error)
2079 return error;
2080
2081
2082 if (len > TASK_SIZE)
2083 return -ENOMEM;
2084
2085 get_area = current->mm->get_unmapped_area;
2086 if (file) {
2087 if (file->f_op->get_unmapped_area)
2088 get_area = file->f_op->get_unmapped_area;
2089 } else if (flags & MAP_SHARED) {
2090
2091
2092
2093
2094
2095 pgoff = 0;
2096 get_area = shmem_get_unmapped_area;
2097 }
2098
2099 addr = get_area(file, addr, len, pgoff, flags);
2100 if (IS_ERR_VALUE(addr))
2101 return addr;
2102
2103 if (addr > TASK_SIZE - len)
2104 return -ENOMEM;
2105 if (offset_in_page(addr))
2106 return -EINVAL;
2107
2108 error = security_mmap_addr(addr);
2109 return error ? error : addr;
2110}
2111
2112EXPORT_SYMBOL(get_unmapped_area);
2113
2114
2115struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2116{
2117 struct rb_node *rb_node;
2118 struct vm_area_struct *vma;
2119
2120
2121 vma = vmacache_find(mm, addr);
2122 if (likely(vma))
2123 return vma;
2124
2125 rb_node = mm->mm_rb.rb_node;
2126
2127 while (rb_node) {
2128 struct vm_area_struct *tmp;
2129
2130 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2131
2132 if (tmp->vm_end > addr) {
2133 vma = tmp;
2134 if (tmp->vm_start <= addr)
2135 break;
2136 rb_node = rb_node->rb_left;
2137 } else
2138 rb_node = rb_node->rb_right;
2139 }
2140
2141 if (vma)
2142 vmacache_update(addr, vma);
2143 return vma;
2144}
2145
2146EXPORT_SYMBOL(find_vma);
2147
2148
2149
2150
2151struct vm_area_struct *
2152find_vma_prev(struct mm_struct *mm, unsigned long addr,
2153 struct vm_area_struct **pprev)
2154{
2155 struct vm_area_struct *vma;
2156
2157 vma = find_vma(mm, addr);
2158 if (vma) {
2159 *pprev = vma->vm_prev;
2160 } else {
2161 struct rb_node *rb_node = mm->mm_rb.rb_node;
2162 *pprev = NULL;
2163 while (rb_node) {
2164 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2165 rb_node = rb_node->rb_right;
2166 }
2167 }
2168 return vma;
2169}
2170
2171
2172
2173
2174
2175
2176static int acct_stack_growth(struct vm_area_struct *vma,
2177 unsigned long size, unsigned long grow)
2178{
2179 struct mm_struct *mm = vma->vm_mm;
2180 struct rlimit *rlim = current->signal->rlim;
2181 unsigned long new_start;
2182
2183
2184 if (!may_expand_vm(mm, vma->vm_flags, grow))
2185 return -ENOMEM;
2186
2187
2188 if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2189 return -ENOMEM;
2190
2191
2192 if (vma->vm_flags & VM_LOCKED) {
2193 unsigned long locked;
2194 unsigned long limit;
2195 locked = mm->locked_vm + grow;
2196 limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2197 limit >>= PAGE_SHIFT;
2198 if (locked > limit && !capable(CAP_IPC_LOCK))
2199 return -ENOMEM;
2200 }
2201
2202
2203 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2204 vma->vm_end - size;
2205 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2206 return -EFAULT;
2207
2208
2209
2210
2211
2212 if (security_vm_enough_memory_mm(mm, grow))
2213 return -ENOMEM;
2214
2215 return 0;
2216}
2217
2218#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2219
2220
2221
2222
2223int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2224{
2225 struct mm_struct *mm = vma->vm_mm;
2226 struct vm_area_struct *next;
2227 unsigned long gap_addr;
2228 int error = 0;
2229
2230 if (!(vma->vm_flags & VM_GROWSUP))
2231 return -EFAULT;
2232
2233
2234 address &= PAGE_MASK;
2235 if (address >= TASK_SIZE)
2236 return -ENOMEM;
2237 address += PAGE_SIZE;
2238
2239
2240 gap_addr = address + stack_guard_gap;
2241
2242
2243 if (gap_addr < address || gap_addr > TASK_SIZE)
2244 gap_addr = TASK_SIZE;
2245
2246 next = vma->vm_next;
2247 if (next && next->vm_start < gap_addr) {
2248 if (!(next->vm_flags & VM_GROWSUP))
2249 return -ENOMEM;
2250
2251 }
2252
2253
2254 if (unlikely(anon_vma_prepare(vma)))
2255 return -ENOMEM;
2256
2257
2258
2259
2260
2261
2262 anon_vma_lock_write(vma->anon_vma);
2263
2264
2265 if (address > vma->vm_end) {
2266 unsigned long size, grow;
2267
2268 size = address - vma->vm_start;
2269 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2270
2271 error = -ENOMEM;
2272 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2273 error = acct_stack_growth(vma, size, grow);
2274 if (!error) {
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286 spin_lock(&mm->page_table_lock);
2287 if (vma->vm_flags & VM_LOCKED)
2288 mm->locked_vm += grow;
2289 vm_stat_account(mm, vma->vm_flags, grow);
2290 anon_vma_interval_tree_pre_update_vma(vma);
2291 vma->vm_end = address;
2292 anon_vma_interval_tree_post_update_vma(vma);
2293 if (vma->vm_next)
2294 vma_gap_update(vma->vm_next);
2295 else
2296 mm->highest_vm_end = vm_end_gap(vma);
2297 spin_unlock(&mm->page_table_lock);
2298
2299 perf_event_mmap(vma);
2300 }
2301 }
2302 }
2303 anon_vma_unlock_write(vma->anon_vma);
2304 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2305 validate_mm(mm);
2306 return error;
2307}
2308#endif
2309
2310
2311
2312
2313int expand_downwards(struct vm_area_struct *vma,
2314 unsigned long address)
2315{
2316 struct mm_struct *mm = vma->vm_mm;
2317 struct vm_area_struct *prev;
2318 unsigned long gap_addr;
2319 int error;
2320
2321 address &= PAGE_MASK;
2322 error = security_mmap_addr(address);
2323 if (error)
2324 return error;
2325
2326
2327 gap_addr = address - stack_guard_gap;
2328 if (gap_addr > address)
2329 return -ENOMEM;
2330 prev = vma->vm_prev;
2331 if (prev && prev->vm_end > gap_addr) {
2332 if (!(prev->vm_flags & VM_GROWSDOWN))
2333 return -ENOMEM;
2334
2335 }
2336
2337
2338 if (unlikely(anon_vma_prepare(vma)))
2339 return -ENOMEM;
2340
2341
2342
2343
2344
2345
2346 anon_vma_lock_write(vma->anon_vma);
2347
2348
2349 if (address < vma->vm_start) {
2350 unsigned long size, grow;
2351
2352 size = vma->vm_end - address;
2353 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2354
2355 error = -ENOMEM;
2356 if (grow <= vma->vm_pgoff) {
2357 error = acct_stack_growth(vma, size, grow);
2358 if (!error) {
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370 spin_lock(&mm->page_table_lock);
2371 if (vma->vm_flags & VM_LOCKED)
2372 mm->locked_vm += grow;
2373 vm_stat_account(mm, vma->vm_flags, grow);
2374 anon_vma_interval_tree_pre_update_vma(vma);
2375 vma->vm_start = address;
2376 vma->vm_pgoff -= grow;
2377 anon_vma_interval_tree_post_update_vma(vma);
2378 vma_gap_update(vma);
2379 spin_unlock(&mm->page_table_lock);
2380
2381 perf_event_mmap(vma);
2382 }
2383 }
2384 }
2385 anon_vma_unlock_write(vma->anon_vma);
2386 khugepaged_enter_vma_merge(vma, vma->vm_flags);
2387 validate_mm(mm);
2388 return error;
2389}
2390
2391
2392unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2393
2394static int __init cmdline_parse_stack_guard_gap(char *p)
2395{
2396 unsigned long val;
2397 char *endptr;
2398
2399 val = simple_strtoul(p, &endptr, 10);
2400 if (!*endptr)
2401 stack_guard_gap = val << PAGE_SHIFT;
2402
2403 return 0;
2404}
2405__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2406
2407#ifdef CONFIG_STACK_GROWSUP
2408int expand_stack(struct vm_area_struct *vma, unsigned long address)
2409{
2410 return expand_upwards(vma, address);
2411}
2412
2413struct vm_area_struct *
2414find_extend_vma(struct mm_struct *mm, unsigned long addr)
2415{
2416 struct vm_area_struct *vma, *prev;
2417
2418 addr &= PAGE_MASK;
2419 vma = find_vma_prev(mm, addr, &prev);
2420 if (vma && (vma->vm_start <= addr))
2421 return vma;
2422 if (!prev || expand_stack(prev, addr))
2423 return NULL;
2424 if (prev->vm_flags & VM_LOCKED)
2425 populate_vma_page_range(prev, addr, prev->vm_end, NULL);
2426 return prev;
2427}
2428#else
2429int expand_stack(struct vm_area_struct *vma, unsigned long address)
2430{
2431 return expand_downwards(vma, address);
2432}
2433
2434struct vm_area_struct *
2435find_extend_vma(struct mm_struct *mm, unsigned long addr)
2436{
2437 struct vm_area_struct *vma;
2438 unsigned long start;
2439
2440 addr &= PAGE_MASK;
2441 vma = find_vma(mm, addr);
2442 if (!vma)
2443 return NULL;
2444 if (vma->vm_start <= addr)
2445 return vma;
2446 if (!(vma->vm_flags & VM_GROWSDOWN))
2447 return NULL;
2448 start = vma->vm_start;
2449 if (expand_stack(vma, addr))
2450 return NULL;
2451 if (vma->vm_flags & VM_LOCKED)
2452 populate_vma_page_range(vma, addr, start, NULL);
2453 return vma;
2454}
2455#endif
2456
2457EXPORT_SYMBOL_GPL(find_extend_vma);
2458
2459
2460
2461
2462
2463
2464
2465static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2466{
2467 unsigned long nr_accounted = 0;
2468
2469
2470 update_hiwater_vm(mm);
2471 do {
2472 long nrpages = vma_pages(vma);
2473
2474 if (vma->vm_flags & VM_ACCOUNT)
2475 nr_accounted += nrpages;
2476 vm_stat_account(mm, vma->vm_flags, -nrpages);
2477 vma = remove_vma(vma);
2478 } while (vma);
2479 vm_unacct_memory(nr_accounted);
2480 validate_mm(mm);
2481}
2482
2483
2484
2485
2486
2487
2488static void unmap_region(struct mm_struct *mm,
2489 struct vm_area_struct *vma, struct vm_area_struct *prev,
2490 unsigned long start, unsigned long end)
2491{
2492 struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap;
2493 struct mmu_gather tlb;
2494
2495 lru_add_drain();
2496 tlb_gather_mmu(&tlb, mm, start, end);
2497 update_hiwater_rss(mm);
2498 unmap_vmas(&tlb, vma, start, end);
2499 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2500 next ? next->vm_start : USER_PGTABLES_CEILING);
2501 tlb_finish_mmu(&tlb, start, end);
2502}
2503
2504
2505
2506
2507
2508static void
2509detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2510 struct vm_area_struct *prev, unsigned long end)
2511{
2512 struct vm_area_struct **insertion_point;
2513 struct vm_area_struct *tail_vma = NULL;
2514
2515 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2516 vma->vm_prev = NULL;
2517 do {
2518 vma_rb_erase(vma, &mm->mm_rb);
2519 mm->map_count--;
2520 tail_vma = vma;
2521 vma = vma->vm_next;
2522 } while (vma && vma->vm_start < end);
2523 *insertion_point = vma;
2524 if (vma) {
2525 vma->vm_prev = prev;
2526 vma_gap_update(vma);
2527 } else
2528 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2529 tail_vma->vm_next = NULL;
2530
2531
2532 vmacache_invalidate(mm);
2533}
2534
2535
2536
2537
2538
2539int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2540 unsigned long addr, int new_below)
2541{
2542 struct vm_area_struct *new;
2543 int err;
2544
2545 if (is_vm_hugetlb_page(vma) && (addr &
2546 ~(huge_page_mask(hstate_vma(vma)))))
2547 return -EINVAL;
2548
2549 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2550 if (!new)
2551 return -ENOMEM;
2552
2553
2554 *new = *vma;
2555
2556 INIT_LIST_HEAD(&new->anon_vma_chain);
2557
2558 if (new_below)
2559 new->vm_end = addr;
2560 else {
2561 new->vm_start = addr;
2562 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2563 }
2564
2565 err = vma_dup_policy(vma, new);
2566 if (err)
2567 goto out_free_vma;
2568
2569 err = anon_vma_clone(new, vma);
2570 if (err)
2571 goto out_free_mpol;
2572
2573 if (new->vm_file)
2574 get_file(new->vm_file);
2575
2576 if (new->vm_ops && new->vm_ops->open)
2577 new->vm_ops->open(new);
2578
2579 if (new_below)
2580 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2581 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2582 else
2583 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2584
2585
2586 if (!err)
2587 return 0;
2588
2589
2590 if (new->vm_ops && new->vm_ops->close)
2591 new->vm_ops->close(new);
2592 if (new->vm_file)
2593 fput(new->vm_file);
2594 unlink_anon_vmas(new);
2595 out_free_mpol:
2596 mpol_put(vma_policy(new));
2597 out_free_vma:
2598 kmem_cache_free(vm_area_cachep, new);
2599 return err;
2600}
2601
2602
2603
2604
2605
2606int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2607 unsigned long addr, int new_below)
2608{
2609 if (mm->map_count >= sysctl_max_map_count)
2610 return -ENOMEM;
2611
2612 return __split_vma(mm, vma, addr, new_below);
2613}
2614
2615
2616
2617
2618
2619
2620int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2621 struct list_head *uf)
2622{
2623 unsigned long end;
2624 struct vm_area_struct *vma, *prev, *last;
2625
2626 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2627 return -EINVAL;
2628
2629 len = PAGE_ALIGN(len);
2630 if (len == 0)
2631 return -EINVAL;
2632
2633
2634 vma = find_vma(mm, start);
2635 if (!vma)
2636 return 0;
2637 prev = vma->vm_prev;
2638
2639
2640
2641 end = start + len;
2642 if (vma->vm_start >= end)
2643 return 0;
2644
2645 if (uf) {
2646 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2647
2648 if (error)
2649 return error;
2650 }
2651
2652
2653
2654
2655
2656
2657
2658
2659 if (start > vma->vm_start) {
2660 int error;
2661
2662
2663
2664
2665
2666
2667 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2668 return -ENOMEM;
2669
2670 error = __split_vma(mm, vma, start, 0);
2671 if (error)
2672 return error;
2673 prev = vma;
2674 }
2675
2676
2677 last = find_vma(mm, end);
2678 if (last && end > last->vm_start) {
2679 int error = __split_vma(mm, last, end, 1);
2680 if (error)
2681 return error;
2682 }
2683 vma = prev ? prev->vm_next : mm->mmap;
2684
2685
2686
2687
2688 if (mm->locked_vm) {
2689 struct vm_area_struct *tmp = vma;
2690 while (tmp && tmp->vm_start < end) {
2691 if (tmp->vm_flags & VM_LOCKED) {
2692 mm->locked_vm -= vma_pages(tmp);
2693 munlock_vma_pages_all(tmp);
2694 }
2695 tmp = tmp->vm_next;
2696 }
2697 }
2698
2699
2700
2701
2702 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2703 unmap_region(mm, vma, prev, start, end);
2704
2705 arch_unmap(mm, vma, start, end);
2706
2707
2708 remove_vma_list(mm, vma);
2709
2710 return 0;
2711}
2712
2713int vm_munmap(unsigned long start, size_t len)
2714{
2715 int ret;
2716 struct mm_struct *mm = current->mm;
2717 LIST_HEAD(uf);
2718
2719 if (down_write_killable(&mm->mmap_sem))
2720 return -EINTR;
2721
2722 ret = do_munmap(mm, start, len, &uf);
2723 up_write(&mm->mmap_sem);
2724 userfaultfd_unmap_complete(mm, &uf);
2725 return ret;
2726}
2727EXPORT_SYMBOL(vm_munmap);
2728
2729SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2730{
2731 profile_munmap(addr);
2732 return vm_munmap(addr, len);
2733}
2734
2735
2736
2737
2738
2739SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2740 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2741{
2742
2743 struct mm_struct *mm = current->mm;
2744 struct vm_area_struct *vma;
2745 unsigned long populate = 0;
2746 unsigned long ret = -EINVAL;
2747 struct file *file;
2748
2749 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
2750 current->comm, current->pid);
2751
2752 if (prot)
2753 return ret;
2754 start = start & PAGE_MASK;
2755 size = size & PAGE_MASK;
2756
2757 if (start + size <= start)
2758 return ret;
2759
2760
2761 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2762 return ret;
2763
2764 if (down_write_killable(&mm->mmap_sem))
2765 return -EINTR;
2766
2767 vma = find_vma(mm, start);
2768
2769 if (!vma || !(vma->vm_flags & VM_SHARED))
2770 goto out;
2771
2772 if (start < vma->vm_start)
2773 goto out;
2774
2775 if (start + size > vma->vm_end) {
2776 struct vm_area_struct *next;
2777
2778 for (next = vma->vm_next; next; next = next->vm_next) {
2779
2780 if (next->vm_start != next->vm_prev->vm_end)
2781 goto out;
2782
2783 if (next->vm_file != vma->vm_file)
2784 goto out;
2785
2786 if (next->vm_flags != vma->vm_flags)
2787 goto out;
2788
2789 if (start + size <= next->vm_end)
2790 break;
2791 }
2792
2793 if (!next)
2794 goto out;
2795 }
2796
2797 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2798 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2799 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2800
2801 flags &= MAP_NONBLOCK;
2802 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2803 if (vma->vm_flags & VM_LOCKED) {
2804 struct vm_area_struct *tmp;
2805 flags |= MAP_LOCKED;
2806
2807
2808 for (tmp = vma; tmp->vm_start >= start + size;
2809 tmp = tmp->vm_next) {
2810
2811
2812
2813
2814 vma_adjust_trans_huge(tmp, start, start + size, 0);
2815
2816 munlock_vma_pages_range(tmp,
2817 max(tmp->vm_start, start),
2818 min(tmp->vm_end, start + size));
2819 }
2820 }
2821
2822 file = get_file(vma->vm_file);
2823 ret = do_mmap_pgoff(vma->vm_file, start, size,
2824 prot, flags, pgoff, &populate, NULL);
2825 fput(file);
2826out:
2827 up_write(&mm->mmap_sem);
2828 if (populate)
2829 mm_populate(ret, populate);
2830 if (!IS_ERR_VALUE(ret))
2831 ret = 0;
2832 return ret;
2833}
2834
2835static inline void verify_mm_writelocked(struct mm_struct *mm)
2836{
2837#ifdef CONFIG_DEBUG_VM
2838 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2839 WARN_ON(1);
2840 up_read(&mm->mmap_sem);
2841 }
2842#endif
2843}
2844
2845
2846
2847
2848
2849
2850static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
2851{
2852 struct mm_struct *mm = current->mm;
2853 struct vm_area_struct *vma, *prev;
2854 unsigned long len;
2855 struct rb_node **rb_link, *rb_parent;
2856 pgoff_t pgoff = addr >> PAGE_SHIFT;
2857 int error;
2858
2859 len = PAGE_ALIGN(request);
2860 if (len < request)
2861 return -ENOMEM;
2862 if (!len)
2863 return 0;
2864
2865
2866 if ((flags & (~VM_EXEC)) != 0)
2867 return -EINVAL;
2868 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2869
2870 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2871 if (offset_in_page(error))
2872 return error;
2873
2874 error = mlock_future_check(mm, mm->def_flags, len);
2875 if (error)
2876 return error;
2877
2878
2879
2880
2881
2882 verify_mm_writelocked(mm);
2883
2884
2885
2886
2887 while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
2888 &rb_parent)) {
2889 if (do_munmap(mm, addr, len, uf))
2890 return -ENOMEM;
2891 }
2892
2893
2894 if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
2895 return -ENOMEM;
2896
2897 if (mm->map_count > sysctl_max_map_count)
2898 return -ENOMEM;
2899
2900 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2901 return -ENOMEM;
2902
2903
2904 vma = vma_merge(mm, prev, addr, addr + len, flags,
2905 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
2906 if (vma)
2907 goto out;
2908
2909
2910
2911
2912 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2913 if (!vma) {
2914 vm_unacct_memory(len >> PAGE_SHIFT);
2915 return -ENOMEM;
2916 }
2917
2918 INIT_LIST_HEAD(&vma->anon_vma_chain);
2919 vma->vm_mm = mm;
2920 vma->vm_start = addr;
2921 vma->vm_end = addr + len;
2922 vma->vm_pgoff = pgoff;
2923 vma->vm_flags = flags;
2924 vma->vm_page_prot = vm_get_page_prot(flags);
2925 vma_link(mm, vma, prev, rb_link, rb_parent);
2926out:
2927 perf_event_mmap(vma);
2928 mm->total_vm += len >> PAGE_SHIFT;
2929 mm->data_vm += len >> PAGE_SHIFT;
2930 if (flags & VM_LOCKED)
2931 mm->locked_vm += (len >> PAGE_SHIFT);
2932 vma->vm_flags |= VM_SOFTDIRTY;
2933 return 0;
2934}
2935
2936static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
2937{
2938 return do_brk_flags(addr, len, 0, uf);
2939}
2940
2941int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
2942{
2943 struct mm_struct *mm = current->mm;
2944 int ret;
2945 bool populate;
2946 LIST_HEAD(uf);
2947
2948 if (down_write_killable(&mm->mmap_sem))
2949 return -EINTR;
2950
2951 ret = do_brk_flags(addr, len, flags, &uf);
2952 populate = ((mm->def_flags & VM_LOCKED) != 0);
2953 up_write(&mm->mmap_sem);
2954 userfaultfd_unmap_complete(mm, &uf);
2955 if (populate && !ret)
2956 mm_populate(addr, len);
2957 return ret;
2958}
2959EXPORT_SYMBOL(vm_brk_flags);
2960
2961int vm_brk(unsigned long addr, unsigned long len)
2962{
2963 return vm_brk_flags(addr, len, 0);
2964}
2965EXPORT_SYMBOL(vm_brk);
2966
2967
2968void exit_mmap(struct mm_struct *mm)
2969{
2970 struct mmu_gather tlb;
2971 struct vm_area_struct *vma;
2972 unsigned long nr_accounted = 0;
2973
2974
2975 mmu_notifier_release(mm);
2976
2977 if (mm->locked_vm) {
2978 vma = mm->mmap;
2979 while (vma) {
2980 if (vma->vm_flags & VM_LOCKED)
2981 munlock_vma_pages_all(vma);
2982 vma = vma->vm_next;
2983 }
2984 }
2985
2986 arch_exit_mmap(mm);
2987
2988 vma = mm->mmap;
2989 if (!vma)
2990 return;
2991
2992 lru_add_drain();
2993 flush_cache_mm(mm);
2994 tlb_gather_mmu(&tlb, mm, 0, -1);
2995
2996
2997 unmap_vmas(&tlb, vma, 0, -1);
2998
2999 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3000 tlb_finish_mmu(&tlb, 0, -1);
3001
3002
3003
3004
3005
3006 while (vma) {
3007 if (vma->vm_flags & VM_ACCOUNT)
3008 nr_accounted += vma_pages(vma);
3009 vma = remove_vma(vma);
3010 }
3011 vm_unacct_memory(nr_accounted);
3012}
3013
3014
3015
3016
3017
3018int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3019{
3020 struct vm_area_struct *prev;
3021 struct rb_node **rb_link, *rb_parent;
3022
3023 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3024 &prev, &rb_link, &rb_parent))
3025 return -ENOMEM;
3026 if ((vma->vm_flags & VM_ACCOUNT) &&
3027 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3028 return -ENOMEM;
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042 if (vma_is_anonymous(vma)) {
3043 BUG_ON(vma->anon_vma);
3044 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3045 }
3046
3047 vma_link(mm, vma, prev, rb_link, rb_parent);
3048 return 0;
3049}
3050
3051
3052
3053
3054
3055struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3056 unsigned long addr, unsigned long len, pgoff_t pgoff,
3057 bool *need_rmap_locks)
3058{
3059 struct vm_area_struct *vma = *vmap;
3060 unsigned long vma_start = vma->vm_start;
3061 struct mm_struct *mm = vma->vm_mm;
3062 struct vm_area_struct *new_vma, *prev;
3063 struct rb_node **rb_link, *rb_parent;
3064 bool faulted_in_anon_vma = true;
3065
3066
3067
3068
3069
3070 if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
3071 pgoff = addr >> PAGE_SHIFT;
3072 faulted_in_anon_vma = false;
3073 }
3074
3075 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3076 return NULL;
3077 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3078 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3079 vma->vm_userfaultfd_ctx);
3080 if (new_vma) {
3081
3082
3083
3084 if (unlikely(vma_start >= new_vma->vm_start &&
3085 vma_start < new_vma->vm_end)) {
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098 VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
3099 *vmap = vma = new_vma;
3100 }
3101 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3102 } else {
3103 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
3104 if (!new_vma)
3105 goto out;
3106 *new_vma = *vma;
3107 new_vma->vm_start = addr;
3108 new_vma->vm_end = addr + len;
3109 new_vma->vm_pgoff = pgoff;
3110 if (vma_dup_policy(vma, new_vma))
3111 goto out_free_vma;
3112 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
3113 if (anon_vma_clone(new_vma, vma))
3114 goto out_free_mempol;
3115 if (new_vma->vm_file)
3116 get_file(new_vma->vm_file);
3117 if (new_vma->vm_ops && new_vma->vm_ops->open)
3118 new_vma->vm_ops->open(new_vma);
3119 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3120 *need_rmap_locks = false;
3121 }
3122 return new_vma;
3123
3124out_free_mempol:
3125 mpol_put(vma_policy(new_vma));
3126out_free_vma:
3127 kmem_cache_free(vm_area_cachep, new_vma);
3128out:
3129 return NULL;
3130}
3131
3132
3133
3134
3135
3136bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
3137{
3138 if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
3139 return false;
3140
3141 if (is_data_mapping(flags) &&
3142 mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
3143
3144 if (rlimit(RLIMIT_DATA) == 0 &&
3145 mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT)
3146 return true;
3147 if (!ignore_rlimit_data) {
3148 pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n",
3149 current->comm, current->pid,
3150 (mm->data_vm + npages) << PAGE_SHIFT,
3151 rlimit(RLIMIT_DATA));
3152 return false;
3153 }
3154 }
3155
3156 return true;
3157}
3158
3159void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
3160{
3161 mm->total_vm += npages;
3162
3163 if (is_exec_mapping(flags))
3164 mm->exec_vm += npages;
3165 else if (is_stack_mapping(flags))
3166 mm->stack_vm += npages;
3167 else if (is_data_mapping(flags))
3168 mm->data_vm += npages;
3169}
3170
3171static int special_mapping_fault(struct vm_fault *vmf);
3172
3173
3174
3175
3176static void special_mapping_close(struct vm_area_struct *vma)
3177{
3178}
3179
3180static const char *special_mapping_name(struct vm_area_struct *vma)
3181{
3182 return ((struct vm_special_mapping *)vma->vm_private_data)->name;
3183}
3184
3185static int special_mapping_mremap(struct vm_area_struct *new_vma)
3186{
3187 struct vm_special_mapping *sm = new_vma->vm_private_data;
3188
3189 if (sm->mremap)
3190 return sm->mremap(sm, new_vma);
3191 return 0;
3192}
3193
3194static const struct vm_operations_struct special_mapping_vmops = {
3195 .close = special_mapping_close,
3196 .fault = special_mapping_fault,
3197 .mremap = special_mapping_mremap,
3198 .name = special_mapping_name,
3199};
3200
3201static const struct vm_operations_struct legacy_special_mapping_vmops = {
3202 .close = special_mapping_close,
3203 .fault = special_mapping_fault,
3204};
3205
3206static int special_mapping_fault(struct vm_fault *vmf)
3207{
3208 struct vm_area_struct *vma = vmf->vma;
3209 pgoff_t pgoff;
3210 struct page **pages;
3211
3212 if (vma->vm_ops == &legacy_special_mapping_vmops) {
3213 pages = vma->vm_private_data;
3214 } else {
3215 struct vm_special_mapping *sm = vma->vm_private_data;
3216
3217 if (sm->fault)
3218 return sm->fault(sm, vmf->vma, vmf);
3219
3220 pages = sm->pages;
3221 }
3222
3223 for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
3224 pgoff--;
3225
3226 if (*pages) {
3227 struct page *page = *pages;
3228 get_page(page);
3229 vmf->page = page;
3230 return 0;
3231 }
3232
3233 return VM_FAULT_SIGBUS;
3234}
3235
3236static struct vm_area_struct *__install_special_mapping(
3237 struct mm_struct *mm,
3238 unsigned long addr, unsigned long len,
3239 unsigned long vm_flags, void *priv,
3240 const struct vm_operations_struct *ops)
3241{
3242 int ret;
3243 struct vm_area_struct *vma;
3244
3245 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
3246 if (unlikely(vma == NULL))
3247 return ERR_PTR(-ENOMEM);
3248
3249 INIT_LIST_HEAD(&vma->anon_vma_chain);
3250 vma->vm_mm = mm;
3251 vma->vm_start = addr;
3252 vma->vm_end = addr + len;
3253
3254 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3255 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3256
3257 vma->vm_ops = ops;
3258 vma->vm_private_data = priv;
3259
3260 ret = insert_vm_struct(mm, vma);
3261 if (ret)
3262 goto out;
3263
3264 vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT);
3265
3266 perf_event_mmap(vma);
3267
3268 return vma;
3269
3270out:
3271 kmem_cache_free(vm_area_cachep, vma);
3272 return ERR_PTR(ret);
3273}
3274
3275bool vma_is_special_mapping(const struct vm_area_struct *vma,
3276 const struct vm_special_mapping *sm)
3277{
3278 return vma->vm_private_data == sm &&
3279 (vma->vm_ops == &special_mapping_vmops ||
3280 vma->vm_ops == &legacy_special_mapping_vmops);
3281}
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292struct vm_area_struct *_install_special_mapping(
3293 struct mm_struct *mm,
3294 unsigned long addr, unsigned long len,
3295 unsigned long vm_flags, const struct vm_special_mapping *spec)
3296{
3297 return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
3298 &special_mapping_vmops);
3299}
3300
3301int install_special_mapping(struct mm_struct *mm,
3302 unsigned long addr, unsigned long len,
3303 unsigned long vm_flags, struct page **pages)
3304{
3305 struct vm_area_struct *vma = __install_special_mapping(
3306 mm, addr, len, vm_flags, (void *)pages,
3307 &legacy_special_mapping_vmops);
3308
3309 return PTR_ERR_OR_ZERO(vma);
3310}
3311
3312static DEFINE_MUTEX(mm_all_locks_mutex);
3313
3314static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3315{
3316 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3317
3318
3319
3320
3321 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331 if (__test_and_set_bit(0, (unsigned long *)
3332 &anon_vma->root->rb_root.rb_node))
3333 BUG();
3334 }
3335}
3336
3337static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3338{
3339 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3350 BUG();
3351 down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
3352 }
3353}
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392int mm_take_all_locks(struct mm_struct *mm)
3393{
3394 struct vm_area_struct *vma;
3395 struct anon_vma_chain *avc;
3396
3397 BUG_ON(down_read_trylock(&mm->mmap_sem));
3398
3399 mutex_lock(&mm_all_locks_mutex);
3400
3401 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3402 if (signal_pending(current))
3403 goto out_unlock;
3404 if (vma->vm_file && vma->vm_file->f_mapping &&
3405 is_vm_hugetlb_page(vma))
3406 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3407 }
3408
3409 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3410 if (signal_pending(current))
3411 goto out_unlock;
3412 if (vma->vm_file && vma->vm_file->f_mapping &&
3413 !is_vm_hugetlb_page(vma))
3414 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3415 }
3416
3417 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3418 if (signal_pending(current))
3419 goto out_unlock;
3420 if (vma->anon_vma)
3421 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3422 vm_lock_anon_vma(mm, avc->anon_vma);
3423 }
3424
3425 return 0;
3426
3427out_unlock:
3428 mm_drop_all_locks(mm);
3429 return -EINTR;
3430}
3431
3432static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3433{
3434 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447 if (!__test_and_clear_bit(0, (unsigned long *)
3448 &anon_vma->root->rb_root.rb_node))
3449 BUG();
3450 anon_vma_unlock_write(anon_vma);
3451 }
3452}
3453
3454static void vm_unlock_mapping(struct address_space *mapping)
3455{
3456 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3457
3458
3459
3460
3461 i_mmap_unlock_write(mapping);
3462 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3463 &mapping->flags))
3464 BUG();
3465 }
3466}
3467
3468
3469
3470
3471
3472void mm_drop_all_locks(struct mm_struct *mm)
3473{
3474 struct vm_area_struct *vma;
3475 struct anon_vma_chain *avc;
3476
3477 BUG_ON(down_read_trylock(&mm->mmap_sem));
3478 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3479
3480 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3481 if (vma->anon_vma)
3482 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3483 vm_unlock_anon_vma(avc->anon_vma);
3484 if (vma->vm_file && vma->vm_file->f_mapping)
3485 vm_unlock_mapping(vma->vm_file->f_mapping);
3486 }
3487
3488 mutex_unlock(&mm_all_locks_mutex);
3489}
3490
3491
3492
3493
3494void __init mmap_init(void)
3495{
3496 int ret;
3497
3498 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3499 VM_BUG_ON(ret);
3500}
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512static int init_user_reserve(void)
3513{
3514 unsigned long free_kbytes;
3515
3516 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3517
3518 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3519 return 0;
3520}
3521subsys_initcall(init_user_reserve);
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533static int init_admin_reserve(void)
3534{
3535 unsigned long free_kbytes;
3536
3537 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3538
3539 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3540 return 0;
3541}
3542subsys_initcall(init_admin_reserve);
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562static int reserve_mem_notifier(struct notifier_block *nb,
3563 unsigned long action, void *data)
3564{
3565 unsigned long tmp, free_kbytes;
3566
3567 switch (action) {
3568 case MEM_ONLINE:
3569
3570 tmp = sysctl_user_reserve_kbytes;
3571 if (0 < tmp && tmp < (1UL << 17))
3572 init_user_reserve();
3573
3574
3575 tmp = sysctl_admin_reserve_kbytes;
3576 if (0 < tmp && tmp < (1UL << 13))
3577 init_admin_reserve();
3578
3579 break;
3580 case MEM_OFFLINE:
3581 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3582
3583 if (sysctl_user_reserve_kbytes > free_kbytes) {
3584 init_user_reserve();
3585 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3586 sysctl_user_reserve_kbytes);
3587 }
3588
3589 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3590 init_admin_reserve();
3591 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3592 sysctl_admin_reserve_kbytes);
3593 }
3594 break;
3595 default:
3596 break;
3597 }
3598 return NOTIFY_OK;
3599}
3600
3601static struct notifier_block reserve_mem_nb = {
3602 .notifier_call = reserve_mem_notifier,
3603};
3604
3605static int __meminit init_reserve_notifier(void)
3606{
3607 if (register_hotmemory_notifier(&reserve_mem_nb))
3608 pr_err("Failed registering memory add/remove notifier for admin reserve\n");
3609
3610 return 0;
3611}
3612subsys_initcall(init_reserve_notifier);
3613