1
2
3
4
5
6
7
8
9#include <linux/kernel.h>
10#include <linux/slab.h>
11#include <linux/backing-dev.h>
12#include <linux/mm.h>
13#include <linux/shm.h>
14#include <linux/mman.h>
15#include <linux/pagemap.h>
16#include <linux/swap.h>
17#include <linux/syscalls.h>
18#include <linux/capability.h>
19#include <linux/init.h>
20#include <linux/file.h>
21#include <linux/fs.h>
22#include <linux/personality.h>
23#include <linux/security.h>
24#include <linux/hugetlb.h>
25#include <linux/profile.h>
26#include <linux/export.h>
27#include <linux/mount.h>
28#include <linux/mempolicy.h>
29#include <linux/rmap.h>
30#include <linux/mmu_notifier.h>
31#include <linux/perf_event.h>
32#include <linux/audit.h>
33#include <linux/khugepaged.h>
34#include <linux/uprobes.h>
35#include <linux/rbtree_augmented.h>
36#include <linux/sched/sysctl.h>
37#include <linux/notifier.h>
38#include <linux/memory.h>
39#include <linux/userfaultfd_k.h>
40#include <linux/pkeys.h>
41
42#include <asm/uaccess.h>
43#include <asm/cacheflush.h>
44#include <asm/tlb.h>
45#include <asm/mmu_context.h>
46
47#include "internal.h"
48
49#ifndef arch_mmap_check
50#define arch_mmap_check(addr, len, flags) (0)
51#endif
52
53#ifndef arch_rebalance_pgtables
54#define arch_rebalance_pgtables(addr, len) (addr)
55#endif
56
57#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
58const int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN;
59const int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX;
60int mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS;
61#endif
62#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
63const int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN;
64const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
65int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
66#endif
67
68
69static void unmap_region(struct mm_struct *mm,
70 struct vm_area_struct *vma, struct vm_area_struct *prev,
71 unsigned long start, unsigned long end);
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88pgprot_t protection_map[16] = {
89 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
90 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
91};
92
93pgprot_t vm_get_page_prot(unsigned long vm_flags)
94{
95 return __pgprot(pgprot_val(protection_map[vm_flags &
96 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
97 pgprot_val(arch_vm_get_page_prot(vm_flags)));
98}
99EXPORT_SYMBOL(vm_get_page_prot);
100
101static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
102{
103 return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
104}
105
106
107void vma_set_page_prot(struct vm_area_struct *vma)
108{
109 unsigned long vm_flags = vma->vm_flags;
110 pgprot_t vm_page_prot;
111
112 vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
113 if (vma_wants_writenotify(vma, vm_page_prot)) {
114 vm_flags &= ~VM_SHARED;
115 vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
116 }
117
118
119 WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
120}
121
122
123int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
124int sysctl_overcommit_ratio __read_mostly = 50;
125unsigned long sysctl_overcommit_kbytes __read_mostly;
126int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
127unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
128unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
129
130
131
132
133struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
134
135
136
137
138
139
140
141
142
143unsigned long vm_memory_committed(void)
144{
145 return percpu_counter_read_positive(&vm_committed_as);
146}
147EXPORT_SYMBOL_GPL(vm_memory_committed);
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
166{
167 long free, allowed, reserve;
168
169 vm_acct_memory(pages);
170
171
172
173
174 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
175 return 0;
176
177 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
178 free = global_page_state(NR_FREE_PAGES);
179 free += global_page_state(NR_FILE_PAGES);
180
181
182
183
184
185
186
187 free -= global_page_state(NR_SHMEM);
188
189 free += get_nr_swap_pages();
190
191
192
193
194
195
196
197 free += global_page_state(NR_SLAB_RECLAIMABLE);
198
199
200
201
202 if (free <= totalreserve_pages)
203 goto error;
204 else
205 free -= totalreserve_pages;
206
207
208
209
210 if (!cap_sys_admin)
211 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
212
213 if (free > pages)
214 return 0;
215
216 goto error;
217 }
218
219 allowed = vm_commit_limit();
220
221
222
223 if (!cap_sys_admin)
224 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
225
226
227
228
229 if (mm) {
230 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
231 allowed -= min_t(long, mm->total_vm / 32, reserve);
232 }
233
234 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
235 return 0;
236error:
237 vm_unacct_memory(pages);
238
239 return -ENOMEM;
240}
241
242
243
244
245static void __remove_shared_vm_struct(struct vm_area_struct *vma,
246 struct file *file, struct address_space *mapping)
247{
248 if (vma->vm_flags & VM_DENYWRITE)
249 atomic_inc(&file_inode(file)->i_writecount);
250 if (vma->vm_flags & VM_SHARED)
251 mapping_unmap_writable(mapping);
252
253 flush_dcache_mmap_lock(mapping);
254 if (unlikely(vma->vm_flags & VM_NONLINEAR))
255 list_del_init(&vma->shared.nonlinear);
256 else
257 vma_interval_tree_remove(vma, &mapping->i_mmap);
258 flush_dcache_mmap_unlock(mapping);
259}
260
261
262
263
264
265void unlink_file_vma(struct vm_area_struct *vma)
266{
267 struct file *file = vma->vm_file;
268
269 if (file) {
270 struct address_space *mapping = file->f_mapping;
271 mutex_lock(&mapping->i_mmap_mutex);
272 __remove_shared_vm_struct(vma, file, mapping);
273 mutex_unlock(&mapping->i_mmap_mutex);
274 }
275}
276
277
278
279
280static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
281{
282 struct vm_area_struct *next = vma->vm_next;
283
284 might_sleep();
285 if (vma->vm_ops && vma->vm_ops->close)
286 vma->vm_ops->close(vma);
287 if (vma->vm_file)
288 fput(vma->vm_file);
289 mpol_put(vma_policy(vma));
290 kmem_cache_free(vm_area_cachep, vma);
291 return next;
292}
293
294static unsigned long do_brk(unsigned long addr, unsigned long len,
295 struct list_head *uf);
296
297SYSCALL_DEFINE1(brk, unsigned long, brk)
298{
299 unsigned long rlim, retval;
300 unsigned long newbrk, oldbrk;
301 struct mm_struct *mm = current->mm;
302 struct vm_area_struct *next;
303 unsigned long min_brk;
304 bool populate;
305 LIST_HEAD(uf);
306
307 down_write(&mm->mmap_sem);
308
309#ifdef CONFIG_COMPAT_BRK
310
311
312
313
314
315 if (current->brk_randomized)
316 min_brk = mm->start_brk;
317 else
318 min_brk = mm->end_data;
319#else
320 min_brk = mm->start_brk;
321#endif
322 if (brk < min_brk)
323 goto out;
324
325
326
327
328
329
330
331 rlim = rlimit(RLIMIT_DATA);
332 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
333 (mm->end_data - mm->start_data) > rlim)
334 goto out;
335
336 newbrk = PAGE_ALIGN(brk);
337 oldbrk = PAGE_ALIGN(mm->brk);
338 if (oldbrk == newbrk)
339 goto set_brk;
340
341
342 if (brk <= mm->brk) {
343 if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
344 goto set_brk;
345 goto out;
346 }
347
348
349 next = find_vma(mm, oldbrk);
350 if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
351 goto out;
352
353
354 if (do_brk(oldbrk, newbrk-oldbrk, &uf) != oldbrk)
355 goto out;
356
357set_brk:
358 mm->brk = brk;
359 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
360 up_write(&mm->mmap_sem);
361 userfaultfd_unmap_complete(mm, &uf);
362 if (populate)
363 mm_populate(oldbrk, newbrk - oldbrk);
364 return brk;
365
366out:
367 retval = mm->brk;
368 up_write(&mm->mmap_sem);
369 return retval;
370}
371
372static long vma_compute_subtree_gap(struct vm_area_struct *vma)
373{
374 unsigned long max, prev_end, subtree_gap;
375
376
377
378
379
380
381
382 max = vm_start_gap(vma);
383 if (vma->vm_prev) {
384 prev_end = vm_end_gap(vma->vm_prev);
385 if (max > prev_end)
386 max -= prev_end;
387 else
388 max = 0;
389 }
390 if (vma->vm_rb.rb_left) {
391 subtree_gap = rb_entry(vma->vm_rb.rb_left,
392 struct vm_area_struct, vm_rb)->rb_subtree_gap;
393 if (subtree_gap > max)
394 max = subtree_gap;
395 }
396 if (vma->vm_rb.rb_right) {
397 subtree_gap = rb_entry(vma->vm_rb.rb_right,
398 struct vm_area_struct, vm_rb)->rb_subtree_gap;
399 if (subtree_gap > max)
400 max = subtree_gap;
401 }
402 return max;
403}
404
405#ifdef CONFIG_DEBUG_VM_RB
406static int browse_rb(struct rb_root *root)
407{
408 int i = 0, j, bug = 0;
409 struct rb_node *nd, *pn = NULL;
410 unsigned long prev = 0, pend = 0;
411
412 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
413 struct vm_area_struct *vma;
414 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
415 if (vma->vm_start < prev) {
416 printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
417 bug = 1;
418 }
419 if (vma->vm_start < pend) {
420 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
421 bug = 1;
422 }
423 if (vma->vm_start > vma->vm_end) {
424 printk("vm_end %lx < vm_start %lx\n",
425 vma->vm_end, vma->vm_start);
426 bug = 1;
427 }
428 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
429 printk("free gap %lx, correct %lx\n",
430 vma->rb_subtree_gap,
431 vma_compute_subtree_gap(vma));
432 bug = 1;
433 }
434 i++;
435 pn = nd;
436 prev = vma->vm_start;
437 pend = vma->vm_end;
438 }
439 j = 0;
440 for (nd = pn; nd; nd = rb_prev(nd))
441 j++;
442 if (i != j) {
443 printk("backwards %d, forwards %d\n", j, i);
444 bug = 1;
445 }
446 return bug ? -1 : i;
447}
448
449static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
450{
451 struct rb_node *nd;
452
453 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
454 struct vm_area_struct *vma;
455 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
456 BUG_ON(vma != ignore &&
457 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
458 }
459}
460
461void validate_mm(struct mm_struct *mm)
462{
463 int bug = 0;
464 int i = 0;
465 unsigned long highest_address = 0;
466 struct vm_area_struct *vma = mm->mmap;
467 while (vma) {
468 struct anon_vma_chain *avc;
469 vma_lock_anon_vma(vma);
470 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
471 anon_vma_interval_tree_verify(avc);
472 vma_unlock_anon_vma(vma);
473 highest_address = vm_end_gap(vma);
474 vma = vma->vm_next;
475 i++;
476 }
477 if (i != mm->map_count) {
478 printk("map_count %d vm_next %d\n", mm->map_count, i);
479 bug = 1;
480 }
481 if (highest_address != mm->highest_vm_end) {
482 printk("mm->highest_vm_end %lx, found %lx\n",
483 mm->highest_vm_end, highest_address);
484 bug = 1;
485 }
486 i = browse_rb(&mm->mm_rb);
487 if (i != mm->map_count) {
488 printk("map_count %d rb %d\n", mm->map_count, i);
489 bug = 1;
490 }
491 BUG_ON(bug);
492}
493#else
494#define validate_mm_rb(root, ignore) do { } while (0)
495#define validate_mm(mm) do { } while (0)
496#endif
497
498RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
499 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
500
501
502
503
504
505
506static void vma_gap_update(struct vm_area_struct *vma)
507{
508
509
510
511
512 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
513}
514
515static inline void vma_rb_insert(struct vm_area_struct *vma,
516 struct rb_root *root)
517{
518
519 validate_mm_rb(root, NULL);
520
521 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
522}
523
524static void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
525{
526
527
528
529
530
531 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
532}
533
534static __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma,
535 struct rb_root *root,
536 struct vm_area_struct *ignore)
537{
538
539
540
541
542
543 validate_mm_rb(root, ignore);
544
545 __vma_rb_erase(vma, root);
546}
547
548static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
549 struct rb_root *root)
550{
551
552
553
554
555 validate_mm_rb(root, vma);
556
557 __vma_rb_erase(vma, root);
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574static inline void
575anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
576{
577 struct anon_vma_chain *avc;
578
579 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
580 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
581}
582
583static inline void
584anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
585{
586 struct anon_vma_chain *avc;
587
588 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
589 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
590}
591
592static int find_vma_links(struct mm_struct *mm, unsigned long addr,
593 unsigned long end, struct vm_area_struct **pprev,
594 struct rb_node ***rb_link, struct rb_node **rb_parent)
595{
596 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
597
598 __rb_link = &mm->mm_rb.rb_node;
599 rb_prev = __rb_parent = NULL;
600
601 while (*__rb_link) {
602 struct vm_area_struct *vma_tmp;
603
604 __rb_parent = *__rb_link;
605 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
606
607 if (vma_tmp->vm_end > addr) {
608
609 if (vma_tmp->vm_start < end)
610 return -ENOMEM;
611 __rb_link = &__rb_parent->rb_left;
612 } else {
613 rb_prev = __rb_parent;
614 __rb_link = &__rb_parent->rb_right;
615 }
616 }
617
618 *pprev = NULL;
619 if (rb_prev)
620 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
621 *rb_link = __rb_link;
622 *rb_parent = __rb_parent;
623 return 0;
624}
625
626static unsigned long count_vma_pages_range(struct mm_struct *mm,
627 unsigned long addr, unsigned long end)
628{
629 unsigned long nr_pages = 0;
630 struct vm_area_struct *vma;
631
632
633 vma = find_vma_intersection(mm, addr, end);
634 if (!vma)
635 return 0;
636
637 nr_pages = (min(end, vma->vm_end) -
638 max(addr, vma->vm_start)) >> PAGE_SHIFT;
639
640
641 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
642 unsigned long overlap_len;
643
644 if (vma->vm_start > end)
645 break;
646
647 overlap_len = min(end, vma->vm_end) - vma->vm_start;
648 nr_pages += overlap_len >> PAGE_SHIFT;
649 }
650
651 return nr_pages;
652}
653
654void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
655 struct rb_node **rb_link, struct rb_node *rb_parent)
656{
657
658 if (vma->vm_next)
659 vma_gap_update(vma->vm_next);
660 else
661 mm->highest_vm_end = vm_end_gap(vma);
662
663
664
665
666
667
668
669
670
671
672 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
673 vma->rb_subtree_gap = 0;
674 vma_gap_update(vma);
675 vma_rb_insert(vma, &mm->mm_rb);
676}
677
678static void __vma_link_file(struct vm_area_struct *vma)
679{
680 struct file *file;
681
682 file = vma->vm_file;
683 if (file) {
684 struct address_space *mapping = file->f_mapping;
685
686 if (vma->vm_flags & VM_DENYWRITE)
687 atomic_dec(&file_inode(file)->i_writecount);
688 if (vma->vm_flags & VM_SHARED)
689 atomic_inc(&mapping->i_mmap_writable);
690
691 flush_dcache_mmap_lock(mapping);
692 if (unlikely(vma->vm_flags & VM_NONLINEAR))
693 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
694 else
695 vma_interval_tree_insert(vma, &mapping->i_mmap);
696 flush_dcache_mmap_unlock(mapping);
697 }
698}
699
700static void
701__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
702 struct vm_area_struct *prev, struct rb_node **rb_link,
703 struct rb_node *rb_parent)
704{
705 __vma_link_list(mm, vma, prev, rb_parent);
706 __vma_link_rb(mm, vma, rb_link, rb_parent);
707}
708
709static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
710 struct vm_area_struct *prev, struct rb_node **rb_link,
711 struct rb_node *rb_parent)
712{
713 struct address_space *mapping = NULL;
714
715 if (vma->vm_file)
716 mapping = vma->vm_file->f_mapping;
717
718 if (mapping)
719 mutex_lock(&mapping->i_mmap_mutex);
720
721 __vma_link(mm, vma, prev, rb_link, rb_parent);
722 __vma_link_file(vma);
723
724 if (mapping)
725 mutex_unlock(&mapping->i_mmap_mutex);
726
727 mm->map_count++;
728 validate_mm(mm);
729}
730
731
732
733
734
735static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
736{
737 struct vm_area_struct *prev;
738 struct rb_node **rb_link, *rb_parent;
739
740 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
741 &prev, &rb_link, &rb_parent))
742 BUG();
743 __vma_link(mm, vma, prev, rb_link, rb_parent);
744 mm->map_count++;
745}
746
747static __always_inline void __vma_unlink_common(struct mm_struct *mm,
748 struct vm_area_struct *vma,
749 struct vm_area_struct *prev,
750 bool has_prev,
751 struct vm_area_struct *ignore)
752{
753 struct vm_area_struct *next;
754
755 vma_rb_erase_ignore(vma, &mm->mm_rb, ignore);
756 next = vma->vm_next;
757 if (has_prev)
758 prev->vm_next = next;
759 else {
760 prev = vma->vm_prev;
761 if (prev)
762 prev->vm_next = next;
763 else
764 mm->mmap = next;
765 }
766 if (next)
767 next->vm_prev = prev;
768 if (mm->mmap_cache == vma)
769 mm->mmap_cache = prev;
770}
771
772static inline void __vma_unlink_prev(struct mm_struct *mm,
773 struct vm_area_struct *vma,
774 struct vm_area_struct *prev)
775{
776 __vma_unlink_common(mm, vma, prev, true, vma);
777}
778
779
780
781
782
783
784
785
786int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
787 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
788 struct vm_area_struct *expand)
789{
790 struct mm_struct *mm = vma->vm_mm;
791 struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
792 struct address_space *mapping = NULL;
793 struct rb_root *root = NULL;
794 struct anon_vma *anon_vma = NULL;
795 struct file *file = vma->vm_file;
796 bool start_changed = false, end_changed = false;
797 long adjust_next = 0;
798 int remove_next = 0;
799
800 if (next && !insert) {
801 struct vm_area_struct *exporter = NULL, *importer = NULL;
802
803 if (end >= next->vm_end) {
804
805
806
807
808
809
810 if (next == expand) {
811
812
813
814
815 VM_WARN_ON(end != next->vm_end);
816
817
818
819
820
821 remove_next = 3;
822 VM_WARN_ON(file != next->vm_file);
823 swap(vma, next);
824 } else {
825 VM_WARN_ON(expand != vma);
826
827
828
829
830 remove_next = 1 + (end > next->vm_end);
831 VM_WARN_ON(remove_next == 2 &&
832 end != next->vm_next->vm_end);
833
834 end = next->vm_end;
835 }
836
837 exporter = next;
838 importer = vma;
839
840
841
842
843
844 if (remove_next == 2 && !next->anon_vma)
845 exporter = next->vm_next;
846
847 } else if (end > next->vm_start) {
848
849
850
851
852 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
853 exporter = next;
854 importer = vma;
855 VM_WARN_ON(expand != importer);
856 } else if (end < vma->vm_end) {
857
858
859
860
861
862 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
863 exporter = vma;
864 importer = next;
865 VM_WARN_ON(expand != importer);
866 }
867
868
869
870
871
872
873 if (exporter && exporter->anon_vma && !importer->anon_vma) {
874 importer->anon_vma = exporter->anon_vma;
875 if (anon_vma_clone(importer, exporter))
876 return -ENOMEM;
877 }
878 }
879again:
880 vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
881 if (file) {
882 mapping = file->f_mapping;
883 if (!(vma->vm_flags & VM_NONLINEAR)) {
884 root = &mapping->i_mmap;
885 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
886
887 if (adjust_next)
888 uprobe_munmap(next, next->vm_start,
889 next->vm_end);
890 }
891
892 mutex_lock(&mapping->i_mmap_mutex);
893 if (insert) {
894
895
896
897
898
899
900 __vma_link_file(insert);
901 }
902 }
903
904 anon_vma = vma->anon_vma;
905 if (!anon_vma && adjust_next)
906 anon_vma = next->anon_vma;
907 if (anon_vma) {
908 VM_BUG_ON(adjust_next && next->anon_vma &&
909 anon_vma != next->anon_vma);
910 anon_vma_lock_write(anon_vma);
911 anon_vma_interval_tree_pre_update_vma(vma);
912 if (adjust_next)
913 anon_vma_interval_tree_pre_update_vma(next);
914 }
915
916 if (root) {
917 flush_dcache_mmap_lock(mapping);
918 vma_interval_tree_remove(vma, root);
919 if (adjust_next)
920 vma_interval_tree_remove(next, root);
921 }
922
923 if (start != vma->vm_start) {
924 vma->vm_start = start;
925 start_changed = true;
926 }
927 if (end != vma->vm_end) {
928 vma->vm_end = end;
929 end_changed = true;
930 }
931 vma->vm_pgoff = pgoff;
932 if (adjust_next) {
933 next->vm_start += adjust_next << PAGE_SHIFT;
934 next->vm_pgoff += adjust_next;
935 }
936
937 if (root) {
938 if (adjust_next)
939 vma_interval_tree_insert(next, root);
940 vma_interval_tree_insert(vma, root);
941 flush_dcache_mmap_unlock(mapping);
942 }
943
944 if (remove_next) {
945
946
947
948
949 if (remove_next != 3)
950 __vma_unlink_prev(mm, next, vma);
951 else
952
953
954
955
956
957
958
959
960
961 __vma_unlink_common(mm, next, NULL, false, vma);
962 if (file)
963 __remove_shared_vm_struct(next, file, mapping);
964 } else if (insert) {
965
966
967
968
969
970 __insert_vm_struct(mm, insert);
971 } else {
972 if (start_changed)
973 vma_gap_update(vma);
974 if (end_changed) {
975 if (!next)
976 mm->highest_vm_end = vm_end_gap(vma);
977 else if (!adjust_next)
978 vma_gap_update(next);
979 }
980 }
981
982 if (anon_vma) {
983 anon_vma_interval_tree_post_update_vma(vma);
984 if (adjust_next)
985 anon_vma_interval_tree_post_update_vma(next);
986 anon_vma_unlock_write(anon_vma);
987 }
988 if (mapping)
989 mutex_unlock(&mapping->i_mmap_mutex);
990
991 if (root) {
992 uprobe_mmap(vma);
993
994 if (adjust_next)
995 uprobe_mmap(next);
996 }
997
998 if (remove_next) {
999 if (file) {
1000 uprobe_munmap(next, next->vm_start, next->vm_end);
1001 fput(file);
1002 }
1003 if (next->anon_vma)
1004 anon_vma_merge(vma, next);
1005 mm->map_count--;
1006 mpol_put(vma_policy(next));
1007 kmem_cache_free(vm_area_cachep, next);
1008
1009
1010
1011
1012
1013 if (remove_next != 3) {
1014
1015
1016
1017
1018
1019
1020 next = vma->vm_next;
1021 } else {
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032 next = vma;
1033 }
1034 if (remove_next == 2) {
1035 remove_next = 1;
1036 end = next->vm_end;
1037 goto again;
1038 }
1039 else if (next)
1040 vma_gap_update(next);
1041 else
1042 mm->highest_vm_end = end;
1043 }
1044 if (insert && file)
1045 uprobe_mmap(insert);
1046
1047 validate_mm(mm);
1048
1049 return 0;
1050}
1051
1052
1053
1054
1055
1056static inline int is_mergeable_vma(struct vm_area_struct *vma,
1057 struct file *file, unsigned long vm_flags,
1058 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1059{
1060
1061
1062
1063
1064
1065
1066
1067
1068 if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
1069 return 0;
1070 if (vma->vm_file != file)
1071 return 0;
1072 if (vma->vm_ops && vma->vm_ops->close)
1073 return 0;
1074 if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
1075 return 0;
1076 return 1;
1077}
1078
1079static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
1080 struct anon_vma *anon_vma2,
1081 struct vm_area_struct *vma)
1082{
1083
1084
1085
1086
1087 if ((!anon_vma1 || !anon_vma2) && (!vma ||
1088 list_is_singular(&vma->anon_vma_chain)))
1089 return 1;
1090 return anon_vma1 == anon_vma2;
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104static int
1105can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
1106 struct anon_vma *anon_vma, struct file *file,
1107 pgoff_t vm_pgoff,
1108 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1109{
1110 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1111 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1112 if (vma->vm_pgoff == vm_pgoff)
1113 return 1;
1114 }
1115 return 0;
1116}
1117
1118
1119
1120
1121
1122
1123
1124
1125static int
1126can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
1127 struct anon_vma *anon_vma, struct file *file,
1128 pgoff_t vm_pgoff,
1129 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1130{
1131 if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx) &&
1132 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
1133 pgoff_t vm_pglen;
1134 vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
1135 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
1136 return 1;
1137 }
1138 return 0;
1139}
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181struct vm_area_struct *vma_merge(struct mm_struct *mm,
1182 struct vm_area_struct *prev, unsigned long addr,
1183 unsigned long end, unsigned long vm_flags,
1184 struct anon_vma *anon_vma, struct file *file,
1185 pgoff_t pgoff, struct mempolicy *policy,
1186 struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
1187{
1188 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1189 struct vm_area_struct *area, *next;
1190 int err;
1191
1192
1193
1194
1195
1196 if (vm_flags & VM_SPECIAL)
1197 return NULL;
1198
1199 if (prev)
1200 next = prev->vm_next;
1201 else
1202 next = mm->mmap;
1203 area = next;
1204 if (area && area->vm_end == end)
1205 next = next->vm_next;
1206
1207
1208 VM_WARN_ON(prev && addr <= prev->vm_start);
1209 VM_WARN_ON(area && end > area->vm_end);
1210 VM_WARN_ON(addr >= end);
1211
1212
1213
1214
1215 if (prev && prev->vm_end == addr &&
1216 mpol_equal(vma_policy(prev), policy) &&
1217 can_vma_merge_after(prev, vm_flags,
1218 anon_vma, file, pgoff,
1219 vm_userfaultfd_ctx)) {
1220
1221
1222
1223 if (next && end == next->vm_start &&
1224 mpol_equal(policy, vma_policy(next)) &&
1225 can_vma_merge_before(next, vm_flags,
1226 anon_vma, file,
1227 pgoff+pglen,
1228 vm_userfaultfd_ctx) &&
1229 is_mergeable_anon_vma(prev->anon_vma,
1230 next->anon_vma, NULL)) {
1231
1232 err = __vma_adjust(prev, prev->vm_start,
1233 next->vm_end, prev->vm_pgoff, NULL,
1234 prev);
1235 } else
1236 err = __vma_adjust(prev, prev->vm_start,
1237 end, prev->vm_pgoff, NULL, prev);
1238 if (err)
1239 return NULL;
1240 khugepaged_enter_vma_merge(prev);
1241 return prev;
1242 }
1243
1244
1245
1246
1247 if (next && end == next->vm_start &&
1248 mpol_equal(policy, vma_policy(next)) &&
1249 can_vma_merge_before(next, vm_flags,
1250 anon_vma, file, pgoff+pglen,
1251 vm_userfaultfd_ctx)) {
1252 if (prev && addr < prev->vm_end)
1253 err = __vma_adjust(prev, prev->vm_start,
1254 addr, prev->vm_pgoff, NULL, next);
1255 else {
1256 err = __vma_adjust(area, addr, next->vm_end,
1257 next->vm_pgoff - pglen, NULL, next);
1258
1259
1260
1261
1262
1263 area = next;
1264 }
1265 if (err)
1266 return NULL;
1267 khugepaged_enter_vma_merge(area);
1268 return area;
1269 }
1270
1271 return NULL;
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1288{
1289 return a->vm_end == b->vm_start &&
1290 mpol_equal(vma_policy(a), vma_policy(b)) &&
1291 a->vm_file == b->vm_file &&
1292 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
1293 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1294}
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1319{
1320 if (anon_vma_compatible(a, b)) {
1321 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
1322
1323 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1324 return anon_vma;
1325 }
1326 return NULL;
1327}
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1338{
1339 struct anon_vma *anon_vma;
1340 struct vm_area_struct *near;
1341
1342 near = vma->vm_next;
1343 if (!near)
1344 goto try_prev;
1345
1346 anon_vma = reusable_anon_vma(near, vma, near);
1347 if (anon_vma)
1348 return anon_vma;
1349try_prev:
1350 near = vma->vm_prev;
1351 if (!near)
1352 goto none;
1353
1354 anon_vma = reusable_anon_vma(near, near, vma);
1355 if (anon_vma)
1356 return anon_vma;
1357none:
1358
1359
1360
1361
1362
1363
1364
1365
1366 return NULL;
1367}
1368
1369#ifdef CONFIG_PROC_FS
1370void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1371 struct file *file, long pages)
1372{
1373 const unsigned long stack_flags
1374 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1375
1376 mm->total_vm += pages;
1377
1378 if (file) {
1379 mm->shared_vm += pages;
1380 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1381 mm->exec_vm += pages;
1382 } else if (flags & stack_flags)
1383 mm->stack_vm += pages;
1384}
1385#endif
1386
1387
1388
1389
1390
1391static inline unsigned long round_hint_to_min(unsigned long hint)
1392{
1393 hint &= PAGE_MASK;
1394 if (((void *)hint != NULL) &&
1395 (hint < mmap_min_addr))
1396 return PAGE_ALIGN(mmap_min_addr);
1397 return hint;
1398}
1399
1400
1401
1402
1403
1404unsigned long do_mmap(struct file *file, unsigned long addr,
1405 unsigned long len, unsigned long prot,
1406 unsigned long flags, vm_flags_t vm_flags,
1407 unsigned long pgoff, unsigned long *populate,
1408 struct list_head *uf)
1409{
1410 struct mm_struct * mm = current->mm;
1411 int pkey = 0;
1412
1413 *populate = 0;
1414
1415
1416
1417
1418
1419
1420
1421 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1422 if (!(file && path_noexec(&file->f_path)))
1423 prot |= PROT_EXEC;
1424
1425 if (!len)
1426 return -EINVAL;
1427
1428 if (!(flags & MAP_FIXED))
1429 addr = round_hint_to_min(addr);
1430
1431
1432 len = PAGE_ALIGN(len);
1433 if (!len)
1434 return -ENOMEM;
1435
1436
1437 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1438 return -EOVERFLOW;
1439
1440
1441 if (mm->map_count > sysctl_max_map_count)
1442 return -ENOMEM;
1443
1444
1445
1446
1447 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1448 if (offset_in_page(addr))
1449 return addr;
1450
1451 if (prot == PROT_EXEC) {
1452 pkey = execute_only_pkey(mm);
1453 if (pkey < 0)
1454 pkey = 0;
1455 }
1456
1457
1458
1459
1460
1461 vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
1462 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1463
1464 if (flags & MAP_LOCKED)
1465 if (!can_do_mlock())
1466 return -EPERM;
1467
1468
1469 if (vm_flags & VM_LOCKED) {
1470 unsigned long locked, lock_limit;
1471 locked = len >> PAGE_SHIFT;
1472 locked += mm->locked_vm;
1473 lock_limit = rlimit(RLIMIT_MEMLOCK);
1474 lock_limit >>= PAGE_SHIFT;
1475 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1476 return -EAGAIN;
1477 }
1478
1479 if (file) {
1480 struct inode *inode = file_inode(file);
1481
1482 switch (flags & MAP_TYPE) {
1483 case MAP_SHARED:
1484 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1485 return -EACCES;
1486
1487
1488
1489
1490
1491 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1492 return -EACCES;
1493
1494
1495
1496
1497 if (locks_verify_locked(file))
1498 return -EAGAIN;
1499
1500 vm_flags |= VM_SHARED | VM_MAYSHARE;
1501 if (!(file->f_mode & FMODE_WRITE))
1502 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1503
1504
1505 case MAP_PRIVATE:
1506 if (!(file->f_mode & FMODE_READ))
1507 return -EACCES;
1508 if (path_noexec(&file->f_path)) {
1509 if (vm_flags & VM_EXEC)
1510 return -EPERM;
1511 vm_flags &= ~VM_MAYEXEC;
1512 }
1513
1514 if (!file->f_op || !file->f_op->mmap)
1515 return -ENODEV;
1516 break;
1517
1518 default:
1519 return -EINVAL;
1520 }
1521 } else {
1522 switch (flags & MAP_TYPE) {
1523 case MAP_SHARED:
1524
1525
1526
1527 pgoff = 0;
1528 vm_flags |= VM_SHARED | VM_MAYSHARE;
1529 break;
1530 case MAP_PRIVATE:
1531
1532
1533
1534 pgoff = addr >> PAGE_SHIFT;
1535 break;
1536 default:
1537 return -EINVAL;
1538 }
1539 }
1540
1541
1542
1543
1544
1545 if (flags & MAP_NORESERVE) {
1546
1547 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1548 vm_flags |= VM_NORESERVE;
1549
1550
1551 if (file && is_file_hugepages(file))
1552 vm_flags |= VM_NORESERVE;
1553 }
1554
1555 addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
1556 if (!IS_ERR_VALUE(addr) &&
1557 ((vm_flags & VM_LOCKED) ||
1558 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1559 *populate = len;
1560 return addr;
1561}
1562
1563SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1564 unsigned long, prot, unsigned long, flags,
1565 unsigned long, fd, unsigned long, pgoff)
1566{
1567 struct file *file = NULL;
1568 unsigned long retval = -EBADF;
1569
1570 if (!(flags & MAP_ANONYMOUS)) {
1571 audit_mmap_fd(fd, flags);
1572 if (unlikely(flags & MAP_HUGETLB))
1573 return -EINVAL;
1574 file = fget(fd);
1575 if (!file)
1576 goto out;
1577 if (is_file_hugepages(file))
1578 len = ALIGN(len, huge_page_size(hstate_file(file)));
1579 } else if (flags & MAP_HUGETLB) {
1580 struct user_struct *user = NULL;
1581 struct hstate *hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) &
1582 SHM_HUGE_MASK);
1583
1584 if (!hs)
1585 return -EINVAL;
1586
1587 len = ALIGN(len, huge_page_size(hs));
1588
1589
1590
1591
1592
1593
1594 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1595 VM_NORESERVE,
1596 &user, HUGETLB_ANONHUGE_INODE,
1597 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1598 if (IS_ERR(file))
1599 return PTR_ERR(file);
1600 }
1601
1602 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1603
1604 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1605 if (file)
1606 fput(file);
1607out:
1608 return retval;
1609}
1610
1611#ifdef __ARCH_WANT_SYS_OLD_MMAP
1612struct mmap_arg_struct {
1613 unsigned long addr;
1614 unsigned long len;
1615 unsigned long prot;
1616 unsigned long flags;
1617 unsigned long fd;
1618 unsigned long offset;
1619};
1620
1621SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1622{
1623 struct mmap_arg_struct a;
1624
1625 if (copy_from_user(&a, arg, sizeof(a)))
1626 return -EFAULT;
1627 if (offset_in_page(a.offset))
1628 return -EINVAL;
1629
1630 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1631 a.offset >> PAGE_SHIFT);
1632}
1633#endif
1634
1635
1636
1637
1638
1639
1640
1641int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
1642{
1643 vm_flags_t vm_flags = vma->vm_flags;
1644 const struct vm_operations_struct *vm_ops = vma->vm_ops;
1645
1646
1647 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1648 return 0;
1649
1650
1651 if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
1652 return 1;
1653
1654
1655
1656 if (pgprot_val(vm_page_prot) !=
1657 pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
1658 return 0;
1659
1660
1661 if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
1662 return 1;
1663
1664
1665 if (vm_flags & VM_PFNMAP)
1666 return 0;
1667
1668
1669 return vma->vm_file && vma->vm_file->f_mapping &&
1670 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1671}
1672
1673
1674
1675
1676
1677static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1678{
1679
1680
1681
1682
1683 if (file && is_file_hugepages(file))
1684 return 0;
1685
1686 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1687}
1688
1689unsigned long mmap_region(struct file *file, unsigned long addr,
1690 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1691 struct list_head *uf)
1692{
1693 struct mm_struct *mm = current->mm;
1694 struct vm_area_struct *vma, *prev;
1695 int error;
1696 struct rb_node **rb_link, *rb_parent;
1697 unsigned long charged = 0;
1698
1699
1700 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1701 unsigned long nr_pages;
1702
1703
1704
1705
1706
1707 if (!(vm_flags & MAP_FIXED))
1708 return -ENOMEM;
1709
1710 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1711
1712 if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1713 return -ENOMEM;
1714 }
1715
1716
1717 error = -ENOMEM;
1718munmap_back:
1719 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
1720 if (do_munmap(mm, addr, len, uf))
1721 return -ENOMEM;
1722 goto munmap_back;
1723 }
1724
1725
1726
1727
1728 if (accountable_mapping(file, vm_flags)) {
1729 charged = len >> PAGE_SHIFT;
1730 if (security_vm_enough_memory_mm(mm, charged))
1731 return -ENOMEM;
1732 vm_flags |= VM_ACCOUNT;
1733 }
1734
1735
1736
1737
1738 vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
1739 NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX);
1740 if (vma)
1741 goto out;
1742
1743
1744
1745
1746
1747
1748 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1749 if (!vma) {
1750 error = -ENOMEM;
1751 goto unacct_error;
1752 }
1753
1754 vma->vm_mm = mm;
1755 vma->vm_start = addr;
1756 vma->vm_end = addr + len;
1757 vma->vm_flags = vm_flags;
1758 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1759 vma->vm_pgoff = pgoff;
1760 INIT_LIST_HEAD(&vma->anon_vma_chain);
1761
1762 error = -EINVAL;
1763
1764 if (file) {
1765 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1766 goto free_vma;
1767 if (vm_flags & VM_DENYWRITE) {
1768 error = deny_write_access(file);
1769 if (error)
1770 goto free_vma;
1771 }
1772 if (vm_flags & VM_SHARED) {
1773 error = mapping_map_writable(file->f_mapping);
1774 if (error)
1775 goto allow_write_and_free_vma;
1776 }
1777
1778
1779
1780
1781
1782
1783 vma->vm_file = get_file(file);
1784 error = file->f_op->mmap(file, vma);
1785 if (error)
1786 goto unmap_and_free_vma;
1787
1788
1789
1790
1791
1792
1793
1794
1795 WARN_ON_ONCE(addr != vma->vm_start);
1796
1797 addr = vma->vm_start;
1798 pgoff = vma->vm_pgoff;
1799 vm_flags = vma->vm_flags;
1800 } else if (vm_flags & VM_SHARED) {
1801 if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
1802 goto free_vma;
1803 error = shmem_zero_setup(vma);
1804 if (error)
1805 goto free_vma;
1806 }
1807
1808 vma_link(mm, vma, prev, rb_link, rb_parent);
1809
1810 if (file) {
1811 if (vm_flags & VM_SHARED)
1812 mapping_unmap_writable(file->f_mapping);
1813 if (vm_flags & VM_DENYWRITE)
1814 allow_write_access(file);
1815 }
1816 file = vma->vm_file;
1817out:
1818 perf_event_mmap(vma);
1819
1820 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1821 if (vm_flags & VM_LOCKED) {
1822 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1823 vma == get_gate_vma(current->mm)))
1824 mm->locked_vm += (len >> PAGE_SHIFT);
1825 else
1826 vma->vm_flags &= ~VM_LOCKED;
1827 }
1828
1829 if (file)
1830 uprobe_mmap(vma);
1831
1832
1833
1834
1835
1836
1837
1838
1839 vma->vm_flags |= VM_SOFTDIRTY;
1840
1841 vma_set_page_prot(vma);
1842
1843 return addr;
1844
1845unmap_and_free_vma:
1846 vma->vm_file = NULL;
1847 fput(file);
1848
1849
1850 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1851 charged = 0;
1852 if (vm_flags & VM_SHARED)
1853 mapping_unmap_writable(file->f_mapping);
1854allow_write_and_free_vma:
1855 if (vm_flags & VM_DENYWRITE)
1856 allow_write_access(file);
1857free_vma:
1858 kmem_cache_free(vm_area_cachep, vma);
1859unacct_error:
1860 if (charged)
1861 vm_unacct_memory(charged);
1862 return error;
1863}
1864
1865unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1866{
1867
1868
1869
1870
1871
1872
1873
1874
1875 struct mm_struct *mm = current->mm;
1876 struct vm_area_struct *vma;
1877 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1878
1879
1880 length = info->length + info->align_mask;
1881 if (length < info->length)
1882 return -ENOMEM;
1883
1884
1885 if (info->high_limit < length)
1886 return -ENOMEM;
1887 high_limit = info->high_limit - length;
1888
1889 if (info->low_limit > high_limit)
1890 return -ENOMEM;
1891 low_limit = info->low_limit + length;
1892
1893
1894 if (RB_EMPTY_ROOT(&mm->mm_rb))
1895 goto check_highest;
1896 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1897 if (vma->rb_subtree_gap < length)
1898 goto check_highest;
1899
1900 while (true) {
1901
1902 gap_end = vm_start_gap(vma);
1903 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1904 struct vm_area_struct *left =
1905 rb_entry(vma->vm_rb.rb_left,
1906 struct vm_area_struct, vm_rb);
1907 if (left->rb_subtree_gap >= length) {
1908 vma = left;
1909 continue;
1910 }
1911 }
1912
1913 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
1914check_current:
1915
1916 if (gap_start > high_limit)
1917 return -ENOMEM;
1918 if (gap_end >= low_limit &&
1919 gap_end > gap_start && gap_end - gap_start >= length)
1920 goto found;
1921
1922
1923 if (vma->vm_rb.rb_right) {
1924 struct vm_area_struct *right =
1925 rb_entry(vma->vm_rb.rb_right,
1926 struct vm_area_struct, vm_rb);
1927 if (right->rb_subtree_gap >= length) {
1928 vma = right;
1929 continue;
1930 }
1931 }
1932
1933
1934 while (true) {
1935 struct rb_node *prev = &vma->vm_rb;
1936 if (!rb_parent(prev))
1937 goto check_highest;
1938 vma = rb_entry(rb_parent(prev),
1939 struct vm_area_struct, vm_rb);
1940 if (prev == vma->vm_rb.rb_left) {
1941 gap_start = vm_end_gap(vma->vm_prev);
1942 gap_end = vm_start_gap(vma);
1943 goto check_current;
1944 }
1945 }
1946 }
1947
1948check_highest:
1949
1950 gap_start = mm->highest_vm_end;
1951 gap_end = ULONG_MAX;
1952 if (gap_start > high_limit)
1953 return -ENOMEM;
1954
1955found:
1956
1957 if (gap_start < info->low_limit)
1958 gap_start = info->low_limit;
1959
1960
1961 gap_start += (info->align_offset - gap_start) & info->align_mask;
1962
1963 VM_BUG_ON(gap_start + info->length > info->high_limit);
1964 VM_BUG_ON(gap_start + info->length > gap_end);
1965 return gap_start;
1966}
1967
1968unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1969{
1970 struct mm_struct *mm = current->mm;
1971 struct vm_area_struct *vma;
1972 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1973
1974
1975 length = info->length + info->align_mask;
1976 if (length < info->length)
1977 return -ENOMEM;
1978
1979
1980
1981
1982
1983 gap_end = info->high_limit;
1984 if (gap_end < length)
1985 return -ENOMEM;
1986 high_limit = gap_end - length;
1987
1988 if (info->low_limit > high_limit)
1989 return -ENOMEM;
1990 low_limit = info->low_limit + length;
1991
1992
1993 gap_start = mm->highest_vm_end;
1994 if (gap_start <= high_limit)
1995 goto found_highest;
1996
1997
1998 if (RB_EMPTY_ROOT(&mm->mm_rb))
1999 return -ENOMEM;
2000 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
2001 if (vma->rb_subtree_gap < length)
2002 return -ENOMEM;
2003
2004 while (true) {
2005
2006 gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
2007 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
2008 struct vm_area_struct *right =
2009 rb_entry(vma->vm_rb.rb_right,
2010 struct vm_area_struct, vm_rb);
2011 if (right->rb_subtree_gap >= length) {
2012 vma = right;
2013 continue;
2014 }
2015 }
2016
2017check_current:
2018
2019 gap_end = vm_start_gap(vma);
2020 if (gap_end < low_limit)
2021 return -ENOMEM;
2022 if (gap_start <= high_limit &&
2023 gap_end > gap_start && gap_end - gap_start >= length)
2024 goto found;
2025
2026
2027 if (vma->vm_rb.rb_left) {
2028 struct vm_area_struct *left =
2029 rb_entry(vma->vm_rb.rb_left,
2030 struct vm_area_struct, vm_rb);
2031 if (left->rb_subtree_gap >= length) {
2032 vma = left;
2033 continue;
2034 }
2035 }
2036
2037
2038 while (true) {
2039 struct rb_node *prev = &vma->vm_rb;
2040 if (!rb_parent(prev))
2041 return -ENOMEM;
2042 vma = rb_entry(rb_parent(prev),
2043 struct vm_area_struct, vm_rb);
2044 if (prev == vma->vm_rb.rb_right) {
2045 gap_start = vma->vm_prev ?
2046 vm_end_gap(vma->vm_prev) : 0;
2047 goto check_current;
2048 }
2049 }
2050 }
2051
2052found:
2053
2054 if (gap_end > info->high_limit)
2055 gap_end = info->high_limit;
2056
2057found_highest:
2058
2059 gap_end -= info->length;
2060 gap_end -= (gap_end - info->align_offset) & info->align_mask;
2061
2062 VM_BUG_ON(gap_end < info->low_limit);
2063 VM_BUG_ON(gap_end < gap_start);
2064 return gap_end;
2065}
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078#ifndef HAVE_ARCH_UNMAPPED_AREA
2079unsigned long
2080arch_get_unmapped_area(struct file *filp, unsigned long addr,
2081 unsigned long len, unsigned long pgoff, unsigned long flags)
2082{
2083 struct mm_struct *mm = current->mm;
2084 struct vm_area_struct *vma, *prev;
2085 struct vm_unmapped_area_info info;
2086
2087 if (len > TASK_SIZE - mmap_min_addr)
2088 return -ENOMEM;
2089
2090 if (flags & MAP_FIXED)
2091 return addr;
2092
2093 if (addr) {
2094 addr = PAGE_ALIGN(addr);
2095 vma = find_vma_prev(mm, addr, &prev);
2096 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2097 (!vma || addr + len <= vm_start_gap(vma)) &&
2098 (!prev || addr >= vm_end_gap(prev)))
2099 return addr;
2100 }
2101
2102 info.flags = 0;
2103 info.length = len;
2104 info.low_limit = TASK_UNMAPPED_BASE;
2105 info.high_limit = TASK_SIZE;
2106 info.align_mask = 0;
2107 return vm_unmapped_area(&info);
2108}
2109#endif
2110
2111void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
2112{
2113
2114
2115
2116 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
2117 mm->free_area_cache = addr;
2118}
2119
2120
2121
2122
2123
2124#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
2125unsigned long
2126arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
2127 const unsigned long len, const unsigned long pgoff,
2128 const unsigned long flags)
2129{
2130 struct vm_area_struct *vma, *prev;
2131 struct mm_struct *mm = current->mm;
2132 unsigned long addr = addr0;
2133 struct vm_unmapped_area_info info;
2134
2135
2136 if (len > TASK_SIZE - mmap_min_addr)
2137 return -ENOMEM;
2138
2139 if (flags & MAP_FIXED)
2140 return addr;
2141
2142
2143 if (addr) {
2144 addr = PAGE_ALIGN(addr);
2145 vma = find_vma_prev(mm, addr, &prev);
2146 if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
2147 (!vma || addr + len <= vm_start_gap(vma)) &&
2148 (!prev || addr >= vm_end_gap(prev)))
2149 return addr;
2150 }
2151
2152 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
2153 info.length = len;
2154 info.low_limit = max(PAGE_SIZE, mmap_min_addr);
2155 info.high_limit = mm->mmap_base;
2156 info.align_mask = 0;
2157 addr = vm_unmapped_area(&info);
2158
2159
2160
2161
2162
2163
2164
2165 if (offset_in_page(addr)) {
2166 VM_BUG_ON(addr != -ENOMEM);
2167 info.flags = 0;
2168 info.low_limit = TASK_UNMAPPED_BASE;
2169 info.high_limit = TASK_SIZE;
2170 addr = vm_unmapped_area(&info);
2171 }
2172
2173 return addr;
2174}
2175#endif
2176
2177void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
2178{
2179
2180
2181
2182 if (addr > mm->free_area_cache)
2183 mm->free_area_cache = addr;
2184
2185
2186 if (mm->free_area_cache > mm->mmap_base)
2187 mm->free_area_cache = mm->mmap_base;
2188}
2189
2190unsigned long
2191get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
2192 unsigned long pgoff, unsigned long flags)
2193{
2194 unsigned long (*get_area)(struct file *, unsigned long,
2195 unsigned long, unsigned long, unsigned long);
2196
2197 unsigned long error = arch_mmap_check(addr, len, flags);
2198 if (error)
2199 return error;
2200
2201
2202 if (len > TASK_SIZE)
2203 return -ENOMEM;
2204
2205 get_area = current->mm->get_unmapped_area;
2206 if (file && file->f_op && file->f_op->get_unmapped_area)
2207 get_area = file->f_op->get_unmapped_area;
2208 addr = get_area(file, addr, len, pgoff, flags);
2209 if (IS_ERR_VALUE(addr))
2210 return addr;
2211
2212 if (addr > TASK_SIZE - len)
2213 return -ENOMEM;
2214 if (offset_in_page(addr))
2215 return -EINVAL;
2216
2217 addr = arch_rebalance_pgtables(addr, len);
2218 error = security_mmap_addr(addr);
2219 return error ? error : addr;
2220}
2221
2222EXPORT_SYMBOL(get_unmapped_area);
2223
2224
2225struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
2226{
2227 struct vm_area_struct *vma = NULL;
2228
2229
2230
2231 vma = ACCESS_ONCE(mm->mmap_cache);
2232 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
2233 struct rb_node *rb_node;
2234
2235 rb_node = mm->mm_rb.rb_node;
2236 vma = NULL;
2237
2238 while (rb_node) {
2239 struct vm_area_struct *vma_tmp;
2240
2241 vma_tmp = rb_entry(rb_node,
2242 struct vm_area_struct, vm_rb);
2243
2244 if (vma_tmp->vm_end > addr) {
2245 vma = vma_tmp;
2246 if (vma_tmp->vm_start <= addr)
2247 break;
2248 rb_node = rb_node->rb_left;
2249 } else
2250 rb_node = rb_node->rb_right;
2251 }
2252 if (vma)
2253 mm->mmap_cache = vma;
2254 }
2255 return vma;
2256}
2257
2258EXPORT_SYMBOL(find_vma);
2259
2260
2261
2262
2263struct vm_area_struct *
2264find_vma_prev(struct mm_struct *mm, unsigned long addr,
2265 struct vm_area_struct **pprev)
2266{
2267 struct vm_area_struct *vma;
2268
2269 vma = find_vma(mm, addr);
2270 if (vma) {
2271 *pprev = vma->vm_prev;
2272 } else {
2273 struct rb_node *rb_node = mm->mm_rb.rb_node;
2274 *pprev = NULL;
2275 while (rb_node) {
2276 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2277 rb_node = rb_node->rb_right;
2278 }
2279 }
2280 return vma;
2281}
2282
2283
2284
2285
2286
2287
2288static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
2289{
2290 struct mm_struct *mm = vma->vm_mm;
2291 struct rlimit *rlim = current->signal->rlim;
2292 unsigned long new_start;
2293
2294
2295 if (!may_expand_vm(mm, grow))
2296 return -ENOMEM;
2297
2298
2299 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2300 return -ENOMEM;
2301
2302
2303 if (vma->vm_flags & VM_LOCKED) {
2304 unsigned long locked;
2305 unsigned long limit;
2306 locked = mm->locked_vm + grow;
2307 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2308 limit >>= PAGE_SHIFT;
2309 if (locked > limit && !capable(CAP_IPC_LOCK))
2310 return -ENOMEM;
2311 }
2312
2313
2314 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2315 vma->vm_end - size;
2316 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2317 return -EFAULT;
2318
2319
2320
2321
2322
2323 if (security_vm_enough_memory_mm(mm, grow))
2324 return -ENOMEM;
2325
2326
2327 if (vma->vm_flags & VM_LOCKED)
2328 mm->locked_vm += grow;
2329 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2330 return 0;
2331}
2332
2333#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2334
2335
2336
2337
2338int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2339{
2340 struct vm_area_struct *next;
2341 unsigned long gap_addr;
2342 int error;
2343
2344 if (!(vma->vm_flags & VM_GROWSUP))
2345 return -EFAULT;
2346
2347
2348
2349
2350
2351 if (unlikely(anon_vma_prepare(vma)))
2352 return -ENOMEM;
2353 vma_lock_anon_vma(vma);
2354
2355
2356
2357
2358
2359
2360
2361 address &= PAGE_MASK;
2362 address += PAGE_SIZE;
2363 if (!address) {
2364 vma_unlock_anon_vma(vma);
2365 return -ENOMEM;
2366 }
2367 error = 0;
2368
2369
2370 gap_addr = address + stack_guard_gap;
2371 if (gap_addr < address) {
2372 vma_unlock_anon_vma(vma);
2373 return -ENOMEM;
2374 }
2375 next = vma->vm_next;
2376 if (next && next->vm_start < gap_addr) {
2377 if (!(next->vm_flags & VM_GROWSUP)) {
2378 vma_unlock_anon_vma(vma);
2379 return -ENOMEM;
2380 }
2381
2382 }
2383
2384
2385 if (address > vma->vm_end) {
2386 unsigned long size, grow;
2387
2388 size = address - vma->vm_start;
2389 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2390
2391 error = -ENOMEM;
2392 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2393 error = acct_stack_growth(vma, size, grow);
2394 if (!error) {
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406 spin_lock(&vma->vm_mm->page_table_lock);
2407 anon_vma_interval_tree_pre_update_vma(vma);
2408 vma->vm_end = address;
2409 anon_vma_interval_tree_post_update_vma(vma);
2410 if (vma->vm_next)
2411 vma_gap_update(vma->vm_next);
2412 else
2413 vma->vm_mm->highest_vm_end = vm_end_gap(vma);
2414 spin_unlock(&vma->vm_mm->page_table_lock);
2415
2416 perf_event_mmap(vma);
2417 }
2418 }
2419 }
2420 vma_unlock_anon_vma(vma);
2421 khugepaged_enter_vma_merge(vma);
2422 validate_mm(vma->vm_mm);
2423 return error;
2424}
2425#endif
2426
2427
2428
2429
2430int expand_downwards(struct vm_area_struct *vma,
2431 unsigned long address)
2432{
2433 struct vm_area_struct *prev;
2434 unsigned long gap_addr;
2435 int error;
2436
2437
2438
2439
2440
2441 if (unlikely(anon_vma_prepare(vma)))
2442 return -ENOMEM;
2443
2444 address &= PAGE_MASK;
2445 error = security_mmap_addr(address);
2446 if (error)
2447 return error;
2448
2449
2450 gap_addr = address - stack_guard_gap;
2451 if (gap_addr > address)
2452 return -ENOMEM;
2453
2454 prev = vma->vm_prev;
2455 if (prev && prev->vm_end > gap_addr) {
2456 if (!(prev->vm_flags & VM_GROWSDOWN))
2457 return -ENOMEM;
2458
2459 }
2460
2461 vma_lock_anon_vma(vma);
2462
2463
2464
2465
2466
2467
2468
2469 if (address < vma->vm_start) {
2470 unsigned long size, grow;
2471
2472 size = vma->vm_end - address;
2473 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2474
2475 error = -ENOMEM;
2476 if (grow <= vma->vm_pgoff) {
2477 error = acct_stack_growth(vma, size, grow);
2478 if (!error) {
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490 spin_lock(&vma->vm_mm->page_table_lock);
2491 anon_vma_interval_tree_pre_update_vma(vma);
2492 vma->vm_start = address;
2493 vma->vm_pgoff -= grow;
2494 anon_vma_interval_tree_post_update_vma(vma);
2495 vma_gap_update(vma);
2496 spin_unlock(&vma->vm_mm->page_table_lock);
2497
2498 perf_event_mmap(vma);
2499 }
2500 }
2501 }
2502 vma_unlock_anon_vma(vma);
2503 khugepaged_enter_vma_merge(vma);
2504 validate_mm(vma->vm_mm);
2505 return error;
2506}
2507
2508
2509unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
2510
2511static int __init cmdline_parse_stack_guard_gap(char *p)
2512{
2513 unsigned long val;
2514 char *endptr;
2515
2516 val = simple_strtoul(p, &endptr, 10);
2517 if (!*endptr)
2518 stack_guard_gap = val << PAGE_SHIFT;
2519
2520 return 0;
2521}
2522__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
2523
2524#ifdef CONFIG_STACK_GROWSUP
2525int expand_stack(struct vm_area_struct *vma, unsigned long address)
2526{
2527 return expand_upwards(vma, address);
2528}
2529
2530struct vm_area_struct *
2531find_extend_vma(struct mm_struct *mm, unsigned long addr)
2532{
2533 struct vm_area_struct *vma, *prev;
2534
2535 addr &= PAGE_MASK;
2536 vma = find_vma_prev(mm, addr, &prev);
2537 if (vma && (vma->vm_start <= addr))
2538 return vma;
2539 if (!prev || expand_stack(prev, addr))
2540 return NULL;
2541 if (prev->vm_flags & VM_LOCKED)
2542 __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
2543 return prev;
2544}
2545#else
2546int expand_stack(struct vm_area_struct *vma, unsigned long address)
2547{
2548 return expand_downwards(vma, address);
2549}
2550
2551struct vm_area_struct *
2552find_extend_vma(struct mm_struct * mm, unsigned long addr)
2553{
2554 struct vm_area_struct * vma;
2555 unsigned long start;
2556
2557 addr &= PAGE_MASK;
2558 vma = find_vma(mm,addr);
2559 if (!vma)
2560 return NULL;
2561 if (vma->vm_start <= addr)
2562 return vma;
2563 if (!(vma->vm_flags & VM_GROWSDOWN))
2564 return NULL;
2565 start = vma->vm_start;
2566 if (expand_stack(vma, addr))
2567 return NULL;
2568 if (vma->vm_flags & VM_LOCKED)
2569 __mlock_vma_pages_range(vma, addr, start, NULL);
2570 return vma;
2571}
2572#endif
2573
2574EXPORT_SYMBOL_GPL(find_extend_vma);
2575
2576
2577
2578
2579
2580
2581
2582static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2583{
2584 unsigned long nr_accounted = 0;
2585
2586
2587 update_hiwater_vm(mm);
2588 do {
2589 long nrpages = vma_pages(vma);
2590
2591 if (vma->vm_flags & VM_ACCOUNT)
2592 nr_accounted += nrpages;
2593 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2594 vma = remove_vma(vma);
2595 } while (vma);
2596 vm_unacct_memory(nr_accounted);
2597 validate_mm(mm);
2598}
2599
2600
2601
2602
2603
2604
2605static void unmap_region(struct mm_struct *mm,
2606 struct vm_area_struct *vma, struct vm_area_struct *prev,
2607 unsigned long start, unsigned long end)
2608{
2609 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
2610 struct mmu_gather tlb;
2611
2612 lru_add_drain();
2613 tlb_gather_mmu(&tlb, mm, start, end);
2614 update_hiwater_rss(mm);
2615 unmap_vmas(&tlb, vma, start, end);
2616 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2617 next ? next->vm_start : USER_PGTABLES_CEILING);
2618 tlb_finish_mmu(&tlb, start, end);
2619}
2620
2621
2622
2623
2624
2625static void
2626detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2627 struct vm_area_struct *prev, unsigned long end)
2628{
2629 struct vm_area_struct **insertion_point;
2630 struct vm_area_struct *tail_vma = NULL;
2631 unsigned long addr;
2632
2633 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2634 vma->vm_prev = NULL;
2635 do {
2636 vma_rb_erase(vma, &mm->mm_rb);
2637 mm->map_count--;
2638 tail_vma = vma;
2639 vma = vma->vm_next;
2640 } while (vma && vma->vm_start < end);
2641 *insertion_point = vma;
2642 if (vma) {
2643 vma->vm_prev = prev;
2644 vma_gap_update(vma);
2645 } else
2646 mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
2647 tail_vma->vm_next = NULL;
2648 if (mm->unmap_area == arch_unmap_area)
2649 addr = prev ? prev->vm_end : mm->mmap_base;
2650 else
2651 addr = vma ? vma->vm_start : mm->mmap_base;
2652 mm->unmap_area(mm, addr);
2653 mm->mmap_cache = NULL;
2654}
2655
2656
2657
2658
2659
2660static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2661 unsigned long addr, int new_below)
2662{
2663 struct mempolicy *pol;
2664 struct vm_area_struct *new;
2665 int err = -ENOMEM;
2666
2667 if (vma->vm_ops && (vma->vm_flags2 & VM_SPLIT) && vma->vm_ops->split) {
2668 err = vma->vm_ops->split(vma, addr);
2669 if (err)
2670 return err;
2671 }
2672
2673 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2674 if (!new)
2675 goto out_err;
2676
2677
2678 *new = *vma;
2679
2680 INIT_LIST_HEAD(&new->anon_vma_chain);
2681
2682 if (new_below)
2683 new->vm_end = addr;
2684 else {
2685 new->vm_start = addr;
2686 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2687 }
2688
2689 pol = mpol_dup(vma_policy(vma));
2690 if (IS_ERR(pol)) {
2691 err = PTR_ERR(pol);
2692 goto out_free_vma;
2693 }
2694 vma_set_policy(new, pol);
2695
2696 if (anon_vma_clone(new, vma))
2697 goto out_free_mpol;
2698
2699 if (new->vm_file)
2700 get_file(new->vm_file);
2701
2702 if (new->vm_ops && new->vm_ops->open)
2703 new->vm_ops->open(new);
2704
2705 if (new_below)
2706 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2707 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2708 else
2709 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2710
2711
2712 if (!err)
2713 return 0;
2714
2715
2716 if (new->vm_ops && new->vm_ops->close)
2717 new->vm_ops->close(new);
2718 if (new->vm_file)
2719 fput(new->vm_file);
2720 unlink_anon_vmas(new);
2721 out_free_mpol:
2722 mpol_put(pol);
2723 out_free_vma:
2724 kmem_cache_free(vm_area_cachep, new);
2725 out_err:
2726 return err;
2727}
2728
2729
2730
2731
2732
2733int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2734 unsigned long addr, int new_below)
2735{
2736 if (mm->map_count >= sysctl_max_map_count)
2737 return -ENOMEM;
2738
2739 return __split_vma(mm, vma, addr, new_below);
2740}
2741
2742
2743
2744
2745
2746
2747int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
2748 struct list_head *uf)
2749{
2750 unsigned long end;
2751 struct vm_area_struct *vma, *prev, *last;
2752
2753 if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
2754 return -EINVAL;
2755
2756 if ((len = PAGE_ALIGN(len)) == 0)
2757 return -EINVAL;
2758
2759
2760 vma = find_vma(mm, start);
2761 if (!vma)
2762 return 0;
2763 prev = vma->vm_prev;
2764
2765
2766
2767 end = start + len;
2768 if (vma->vm_start >= end)
2769 return 0;
2770
2771 if (uf) {
2772 int error = userfaultfd_unmap_prep(vma, start, end, uf);
2773
2774 if (error)
2775 return error;
2776 }
2777
2778
2779
2780
2781
2782
2783
2784
2785 if (start > vma->vm_start) {
2786 int error;
2787
2788
2789
2790
2791
2792
2793 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2794 return -ENOMEM;
2795
2796 error = __split_vma(mm, vma, start, 0);
2797 if (error)
2798 return error;
2799 prev = vma;
2800 }
2801
2802
2803 last = find_vma(mm, end);
2804 if (last && end > last->vm_start) {
2805 int error = __split_vma(mm, last, end, 1);
2806 if (error)
2807 return error;
2808 }
2809 vma = prev? prev->vm_next: mm->mmap;
2810
2811
2812
2813
2814 if (mm->locked_vm) {
2815 struct vm_area_struct *tmp = vma;
2816 while (tmp && tmp->vm_start < end) {
2817 if (tmp->vm_flags & VM_LOCKED) {
2818 mm->locked_vm -= vma_pages(tmp);
2819 munlock_vma_pages_all(tmp);
2820 }
2821 tmp = tmp->vm_next;
2822 }
2823 }
2824
2825
2826
2827
2828 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2829 unmap_region(mm, vma, prev, start, end);
2830
2831 arch_unmap(mm, vma, start, end);
2832
2833
2834 remove_vma_list(mm, vma);
2835
2836 return 0;
2837}
2838
2839int vm_munmap(unsigned long start, size_t len)
2840{
2841 int ret;
2842 struct mm_struct *mm = current->mm;
2843 LIST_HEAD(uf);
2844
2845 down_write(&mm->mmap_sem);
2846 ret = do_munmap(mm, start, len, &uf);
2847 up_write(&mm->mmap_sem);
2848 userfaultfd_unmap_complete(mm, &uf);
2849 return ret;
2850}
2851EXPORT_SYMBOL(vm_munmap);
2852
2853SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2854{
2855 profile_munmap(addr);
2856 return vm_munmap(addr, len);
2857}
2858
2859static inline void verify_mm_writelocked(struct mm_struct *mm)
2860{
2861#ifdef CONFIG_DEBUG_VM
2862 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2863 WARN_ON(1);
2864 up_read(&mm->mmap_sem);
2865 }
2866#endif
2867}
2868
2869
2870
2871
2872
2873
2874static unsigned long do_brk_flags(unsigned long addr, unsigned long len, struct list_head *uf, unsigned long flags)
2875{
2876 struct mm_struct * mm = current->mm;
2877 struct vm_area_struct * vma, * prev;
2878 struct rb_node ** rb_link, * rb_parent;
2879 pgoff_t pgoff = addr >> PAGE_SHIFT;
2880 int error;
2881
2882 len = PAGE_ALIGN(len);
2883 if (!len)
2884 return addr;
2885
2886
2887 if ((flags & (~VM_EXEC)) != 0)
2888 return -EINVAL;
2889 flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2890
2891 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2892 if (offset_in_page(error))
2893 return error;
2894
2895
2896
2897
2898 if (mm->def_flags & VM_LOCKED) {
2899 unsigned long locked, lock_limit;
2900 locked = len >> PAGE_SHIFT;
2901 locked += mm->locked_vm;
2902 lock_limit = rlimit(RLIMIT_MEMLOCK);
2903 lock_limit >>= PAGE_SHIFT;
2904 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2905 return -EAGAIN;
2906 }
2907
2908
2909
2910
2911
2912 verify_mm_writelocked(mm);
2913
2914
2915
2916
2917 munmap_back:
2918 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
2919 if (do_munmap(mm, addr, len, uf))
2920 return -ENOMEM;
2921 goto munmap_back;
2922 }
2923
2924
2925 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2926 return -ENOMEM;
2927
2928 if (mm->map_count > sysctl_max_map_count)
2929 return -ENOMEM;
2930
2931 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2932 return -ENOMEM;
2933
2934
2935 vma = vma_merge(mm, prev, addr, addr + len, flags,
2936 NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX);
2937 if (vma)
2938 goto out;
2939
2940
2941
2942
2943 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2944 if (!vma) {
2945 vm_unacct_memory(len >> PAGE_SHIFT);
2946 return -ENOMEM;
2947 }
2948
2949 INIT_LIST_HEAD(&vma->anon_vma_chain);
2950 vma->vm_mm = mm;
2951 vma->vm_start = addr;
2952 vma->vm_end = addr + len;
2953 vma->vm_pgoff = pgoff;
2954 vma->vm_flags = flags;
2955 vma->vm_page_prot = vm_get_page_prot(flags);
2956 vma_link(mm, vma, prev, rb_link, rb_parent);
2957out:
2958 perf_event_mmap(vma);
2959 mm->total_vm += len >> PAGE_SHIFT;
2960 if (flags & VM_LOCKED)
2961 mm->locked_vm += (len >> PAGE_SHIFT);
2962 vma->vm_flags |= VM_SOFTDIRTY;
2963 return addr;
2964}
2965
2966static unsigned long do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
2967{
2968 return do_brk_flags(addr, len, uf, 0);
2969}
2970
2971unsigned long vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
2972{
2973 struct mm_struct *mm = current->mm;
2974 unsigned long ret;
2975 bool populate;
2976 LIST_HEAD(uf);
2977
2978 down_write(&mm->mmap_sem);
2979 ret = do_brk_flags(addr, len, &uf, flags);
2980 populate = ((mm->def_flags & VM_LOCKED) != 0);
2981 up_write(&mm->mmap_sem);
2982 userfaultfd_unmap_complete(mm, &uf);
2983 if (populate)
2984 mm_populate(addr, len);
2985 return ret;
2986}
2987EXPORT_SYMBOL(vm_brk_flags);
2988
2989unsigned long vm_brk(unsigned long addr, unsigned long len)
2990{
2991 return vm_brk_flags(addr, len, 0);
2992}
2993EXPORT_SYMBOL(vm_brk);
2994
2995
2996void exit_mmap(struct mm_struct *mm)
2997{
2998 struct mmu_gather tlb;
2999 struct vm_area_struct *vma;
3000 unsigned long nr_accounted = 0;
3001
3002
3003 mmu_notifier_release(mm);
3004
3005 if (mm->locked_vm) {
3006 vma = mm->mmap;
3007 while (vma) {
3008 if (vma->vm_flags & VM_LOCKED)
3009 munlock_vma_pages_all(vma);
3010 vma = vma->vm_next;
3011 }
3012 }
3013
3014 arch_exit_mmap(mm);
3015
3016 vma = mm->mmap;
3017 if (!vma)
3018 return;
3019
3020 lru_add_drain();
3021 flush_cache_mm(mm);
3022 tlb_gather_mmu(&tlb, mm, 0, -1);
3023
3024
3025 unmap_vmas(&tlb, vma, 0, -1);
3026
3027 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
3028 tlb_finish_mmu(&tlb, 0, -1);
3029
3030
3031
3032
3033
3034 while (vma) {
3035 if (vma->vm_flags & VM_ACCOUNT)
3036 nr_accounted += vma_pages(vma);
3037 vma = remove_vma(vma);
3038 }
3039 vm_unacct_memory(nr_accounted);
3040
3041 WARN_ON(atomic_long_read(&mm->nr_ptes) >
3042 (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
3043}
3044
3045
3046
3047
3048
3049int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
3050{
3051 struct vm_area_struct *prev;
3052 struct rb_node **rb_link, *rb_parent;
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066 if (!vma->vm_file) {
3067 BUG_ON(vma->anon_vma);
3068 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
3069 }
3070 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
3071 &prev, &rb_link, &rb_parent))
3072 return -ENOMEM;
3073 if ((vma->vm_flags & VM_ACCOUNT) &&
3074 security_vm_enough_memory_mm(mm, vma_pages(vma)))
3075 return -ENOMEM;
3076
3077 vma_link(mm, vma, prev, rb_link, rb_parent);
3078 return 0;
3079}
3080
3081
3082
3083
3084
3085struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
3086 unsigned long addr, unsigned long len, pgoff_t pgoff,
3087 bool *need_rmap_locks)
3088{
3089 struct vm_area_struct *vma = *vmap;
3090 unsigned long vma_start = vma->vm_start;
3091 struct mm_struct *mm = vma->vm_mm;
3092 struct vm_area_struct *new_vma, *prev;
3093 struct rb_node **rb_link, *rb_parent;
3094 struct mempolicy *pol;
3095 bool faulted_in_anon_vma = true;
3096
3097
3098
3099
3100
3101 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
3102 pgoff = addr >> PAGE_SHIFT;
3103 faulted_in_anon_vma = false;
3104 }
3105
3106 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
3107 return NULL;
3108 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
3109 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
3110 vma->vm_userfaultfd_ctx);
3111 if (new_vma) {
3112
3113
3114
3115 if (unlikely(vma_start >= new_vma->vm_start &&
3116 vma_start < new_vma->vm_end)) {
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129 VM_BUG_ON(faulted_in_anon_vma);
3130 *vmap = vma = new_vma;
3131 }
3132 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
3133 } else {
3134 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
3135 if (new_vma) {
3136 *new_vma = *vma;
3137 new_vma->vm_start = addr;
3138 new_vma->vm_end = addr + len;
3139 new_vma->vm_pgoff = pgoff;
3140 pol = mpol_dup(vma_policy(vma));
3141 if (IS_ERR(pol))
3142 goto out_free_vma;
3143 vma_set_policy(new_vma, pol);
3144 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
3145 if (anon_vma_clone(new_vma, vma))
3146 goto out_free_mempol;
3147 if (new_vma->vm_file)
3148 get_file(new_vma->vm_file);
3149 if (new_vma->vm_ops && new_vma->vm_ops->open)
3150 new_vma->vm_ops->open(new_vma);
3151 vma_link(mm, new_vma, prev, rb_link, rb_parent);
3152 *need_rmap_locks = false;
3153 }
3154 }
3155 return new_vma;
3156
3157 out_free_mempol:
3158 mpol_put(pol);
3159 out_free_vma:
3160 kmem_cache_free(vm_area_cachep, new_vma);
3161 return NULL;
3162}
3163
3164
3165
3166
3167
3168int may_expand_vm(struct mm_struct *mm, unsigned long npages)
3169{
3170 unsigned long cur = mm->total_vm;
3171 unsigned long lim;
3172
3173 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
3174
3175 if (cur + npages > lim)
3176 return 0;
3177 return 1;
3178}
3179
3180
3181static int special_mapping_fault(struct vm_area_struct *vma,
3182 struct vm_fault *vmf)
3183{
3184 pgoff_t pgoff;
3185 struct page **pages;
3186
3187
3188
3189
3190
3191
3192
3193 pgoff = vmf->pgoff - vma->vm_pgoff;
3194
3195 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
3196 pgoff--;
3197
3198 if (*pages) {
3199 struct page *page = *pages;
3200 get_page(page);
3201 vmf->page = page;
3202 return 0;
3203 }
3204
3205 return VM_FAULT_SIGBUS;
3206}
3207
3208
3209
3210
3211static void special_mapping_close(struct vm_area_struct *vma)
3212{
3213}
3214
3215static const struct vm_operations_struct special_mapping_vmops = {
3216 .close = special_mapping_close,
3217 .fault = special_mapping_fault,
3218};
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229int install_special_mapping(struct mm_struct *mm,
3230 unsigned long addr, unsigned long len,
3231 unsigned long vm_flags, struct page **pages)
3232{
3233 int ret;
3234 struct vm_area_struct *vma;
3235
3236 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
3237 if (unlikely(vma == NULL))
3238 return -ENOMEM;
3239
3240 INIT_LIST_HEAD(&vma->anon_vma_chain);
3241 vma->vm_mm = mm;
3242 vma->vm_start = addr;
3243 vma->vm_end = addr + len;
3244
3245 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
3246 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
3247
3248 vma->vm_ops = &special_mapping_vmops;
3249 vma->vm_private_data = pages;
3250
3251 ret = insert_vm_struct(mm, vma);
3252 if (ret)
3253 goto out;
3254
3255 mm->total_vm += len >> PAGE_SHIFT;
3256
3257 perf_event_mmap(vma);
3258
3259 return 0;
3260
3261out:
3262 kmem_cache_free(vm_area_cachep, vma);
3263 return ret;
3264}
3265
3266static DEFINE_MUTEX(mm_all_locks_mutex);
3267
3268static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
3269{
3270 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3271
3272
3273
3274
3275 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285 if (__test_and_set_bit(0, (unsigned long *)
3286 &anon_vma->root->rb_root.rb_node))
3287 BUG();
3288 }
3289}
3290
3291static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3292{
3293 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
3304 BUG();
3305 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
3306 }
3307}
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347int mm_take_all_locks(struct mm_struct *mm)
3348{
3349 struct vm_area_struct *vma;
3350 struct anon_vma_chain *avc;
3351
3352 BUG_ON(down_read_trylock(&mm->mmap_sem));
3353
3354 mutex_lock(&mm_all_locks_mutex);
3355
3356 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3357 if (signal_pending(current))
3358 goto out_unlock;
3359 if (vma->vm_file && vma->vm_file->f_mapping &&
3360 is_vm_hugetlb_page(vma))
3361 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3362 }
3363
3364 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3365 if (signal_pending(current))
3366 goto out_unlock;
3367 if (vma->vm_file && vma->vm_file->f_mapping &&
3368 !is_vm_hugetlb_page(vma))
3369 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3370 }
3371
3372 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3373 if (signal_pending(current))
3374 goto out_unlock;
3375 if (vma->anon_vma)
3376 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3377 vm_lock_anon_vma(mm, avc->anon_vma);
3378 }
3379
3380 return 0;
3381
3382out_unlock:
3383 mm_drop_all_locks(mm);
3384 return -EINTR;
3385}
3386
3387static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3388{
3389 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402 if (!__test_and_clear_bit(0, (unsigned long *)
3403 &anon_vma->root->rb_root.rb_node))
3404 BUG();
3405 anon_vma_unlock_write(anon_vma);
3406 }
3407}
3408
3409static void vm_unlock_mapping(struct address_space *mapping)
3410{
3411 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3412
3413
3414
3415
3416 mutex_unlock(&mapping->i_mmap_mutex);
3417 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3418 &mapping->flags))
3419 BUG();
3420 }
3421}
3422
3423
3424
3425
3426
3427void mm_drop_all_locks(struct mm_struct *mm)
3428{
3429 struct vm_area_struct *vma;
3430 struct anon_vma_chain *avc;
3431
3432 BUG_ON(down_read_trylock(&mm->mmap_sem));
3433 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3434
3435 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3436 if (vma->anon_vma)
3437 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3438 vm_unlock_anon_vma(avc->anon_vma);
3439 if (vma->vm_file && vma->vm_file->f_mapping)
3440 vm_unlock_mapping(vma->vm_file->f_mapping);
3441 }
3442
3443 mutex_unlock(&mm_all_locks_mutex);
3444}
3445
3446
3447
3448
3449void __init mmap_init(void)
3450{
3451 int ret;
3452
3453 ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
3454 VM_BUG_ON(ret);
3455}
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467static int init_user_reserve(void)
3468{
3469 unsigned long free_kbytes;
3470
3471 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3472
3473 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3474 return 0;
3475}
3476module_init(init_user_reserve)
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488static int init_admin_reserve(void)
3489{
3490 unsigned long free_kbytes;
3491
3492 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3493
3494 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3495 return 0;
3496}
3497module_init(init_admin_reserve)
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517static int reserve_mem_notifier(struct notifier_block *nb,
3518 unsigned long action, void *data)
3519{
3520 unsigned long tmp, free_kbytes;
3521
3522 switch (action) {
3523 case MEM_ONLINE:
3524
3525 tmp = sysctl_user_reserve_kbytes;
3526 if (0 < tmp && tmp < (1UL << 17))
3527 init_user_reserve();
3528
3529
3530 tmp = sysctl_admin_reserve_kbytes;
3531 if (0 < tmp && tmp < (1UL << 13))
3532 init_admin_reserve();
3533
3534 break;
3535 case MEM_OFFLINE:
3536 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3537
3538 if (sysctl_user_reserve_kbytes > free_kbytes) {
3539 init_user_reserve();
3540 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3541 sysctl_user_reserve_kbytes);
3542 }
3543
3544 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3545 init_admin_reserve();
3546 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3547 sysctl_admin_reserve_kbytes);
3548 }
3549 break;
3550 default:
3551 break;
3552 }
3553 return NOTIFY_OK;
3554}
3555
3556static struct notifier_block reserve_mem_nb = {
3557 .notifier_call = reserve_mem_notifier,
3558};
3559
3560static int __meminit init_reserve_notifier(void)
3561{
3562 if (register_hotmemory_notifier(&reserve_mem_nb))
3563 printk("Failed registering memory add/remove notifier for admin reserve");
3564
3565 return 0;
3566}
3567module_init(init_reserve_notifier)
3568