1
2
3
4
5
6
7
8
9#include <linux/kernel.h>
10#include <linux/slab.h>
11#include <linux/backing-dev.h>
12#include <linux/mm.h>
13#include <linux/shm.h>
14#include <linux/mman.h>
15#include <linux/pagemap.h>
16#include <linux/swap.h>
17#include <linux/syscalls.h>
18#include <linux/capability.h>
19#include <linux/init.h>
20#include <linux/file.h>
21#include <linux/fs.h>
22#include <linux/personality.h>
23#include <linux/security.h>
24#include <linux/hugetlb.h>
25#include <linux/profile.h>
26#include <linux/export.h>
27#include <linux/mount.h>
28#include <linux/mempolicy.h>
29#include <linux/rmap.h>
30#include <linux/mmu_notifier.h>
31#include <linux/perf_event.h>
32#include <linux/audit.h>
33#include <linux/khugepaged.h>
34#include <linux/uprobes.h>
35#include <linux/rbtree_augmented.h>
36#include <linux/sched/sysctl.h>
37#include <linux/notifier.h>
38#include <linux/memory.h>
39
40#include <asm/uaccess.h>
41#include <asm/cacheflush.h>
42#include <asm/tlb.h>
43#include <asm/mmu_context.h>
44
45#include "internal.h"
46
47#ifndef arch_mmap_check
48#define arch_mmap_check(addr, len, flags) (0)
49#endif
50
51#ifndef arch_rebalance_pgtables
52#define arch_rebalance_pgtables(addr, len) (addr)
53#endif
54
55static void unmap_region(struct mm_struct *mm,
56 struct vm_area_struct *vma, struct vm_area_struct *prev,
57 unsigned long start, unsigned long end);
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74pgprot_t protection_map[16] = {
75 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
76 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
77};
78
79pgprot_t vm_get_page_prot(unsigned long vm_flags)
80{
81 return __pgprot(pgprot_val(protection_map[vm_flags &
82 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
83 pgprot_val(arch_vm_get_page_prot(vm_flags)));
84}
85EXPORT_SYMBOL(vm_get_page_prot);
86
87int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
88int sysctl_overcommit_ratio __read_mostly = 50;
89int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
90unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
91unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
92
93
94
95
96struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
97
98
99
100
101
102
103
104
105
106unsigned long vm_memory_committed(void)
107{
108 return percpu_counter_read_positive(&vm_committed_as);
109}
110EXPORT_SYMBOL_GPL(vm_memory_committed);
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
129{
130 unsigned long free, allowed, reserve;
131
132 vm_acct_memory(pages);
133
134
135
136
137 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
138 return 0;
139
140 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
141 free = global_page_state(NR_FREE_PAGES);
142 free += global_page_state(NR_FILE_PAGES);
143
144
145
146
147
148
149
150 free -= global_page_state(NR_SHMEM);
151
152 free += get_nr_swap_pages();
153
154
155
156
157
158
159
160 free += global_page_state(NR_SLAB_RECLAIMABLE);
161
162
163
164
165 if (free <= totalreserve_pages)
166 goto error;
167 else
168 free -= totalreserve_pages;
169
170
171
172
173 if (!cap_sys_admin)
174 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
175
176 if (free > pages)
177 return 0;
178
179 goto error;
180 }
181
182 allowed = (totalram_pages - hugetlb_total_pages())
183 * sysctl_overcommit_ratio / 100;
184
185
186
187 if (!cap_sys_admin)
188 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
189 allowed += total_swap_pages;
190
191
192
193
194 if (mm) {
195 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
196 allowed -= min(mm->total_vm / 32, reserve);
197 }
198
199 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
200 return 0;
201error:
202 vm_unacct_memory(pages);
203
204 return -ENOMEM;
205}
206
207
208
209
210static void __remove_shared_vm_struct(struct vm_area_struct *vma,
211 struct file *file, struct address_space *mapping)
212{
213 if (vma->vm_flags & VM_DENYWRITE)
214 atomic_inc(&file_inode(file)->i_writecount);
215 if (vma->vm_flags & VM_SHARED)
216 mapping->i_mmap_writable--;
217
218 flush_dcache_mmap_lock(mapping);
219 if (unlikely(vma->vm_flags & VM_NONLINEAR))
220 list_del_init(&vma->shared.nonlinear);
221 else
222 vma_interval_tree_remove(vma, &mapping->i_mmap);
223 flush_dcache_mmap_unlock(mapping);
224}
225
226
227
228
229
230void unlink_file_vma(struct vm_area_struct *vma)
231{
232 struct file *file = vma->vm_file;
233
234 if (file) {
235 struct address_space *mapping = file->f_mapping;
236 mutex_lock(&mapping->i_mmap_mutex);
237 __remove_shared_vm_struct(vma, file, mapping);
238 mutex_unlock(&mapping->i_mmap_mutex);
239 }
240}
241
242
243
244
245static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
246{
247 struct vm_area_struct *next = vma->vm_next;
248
249 might_sleep();
250 if (vma->vm_ops && vma->vm_ops->close)
251 vma->vm_ops->close(vma);
252 if (vma->vm_file)
253 fput(vma->vm_file);
254 mpol_put(vma_policy(vma));
255 kmem_cache_free(vm_area_cachep, vma);
256 return next;
257}
258
259static unsigned long do_brk(unsigned long addr, unsigned long len);
260
261SYSCALL_DEFINE1(brk, unsigned long, brk)
262{
263 unsigned long rlim, retval;
264 unsigned long newbrk, oldbrk;
265 struct mm_struct *mm = current->mm;
266 unsigned long min_brk;
267 bool populate;
268
269 down_write(&mm->mmap_sem);
270
271#ifdef CONFIG_COMPAT_BRK
272
273
274
275
276
277 if (current->brk_randomized)
278 min_brk = mm->start_brk;
279 else
280 min_brk = mm->end_data;
281#else
282 min_brk = mm->start_brk;
283#endif
284 if (brk < min_brk)
285 goto out;
286
287
288
289
290
291
292
293 rlim = rlimit(RLIMIT_DATA);
294 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
295 (mm->end_data - mm->start_data) > rlim)
296 goto out;
297
298 newbrk = PAGE_ALIGN(brk);
299 oldbrk = PAGE_ALIGN(mm->brk);
300 if (oldbrk == newbrk)
301 goto set_brk;
302
303
304 if (brk <= mm->brk) {
305 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
306 goto set_brk;
307 goto out;
308 }
309
310
311 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
312 goto out;
313
314
315 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
316 goto out;
317
318set_brk:
319 mm->brk = brk;
320 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
321 up_write(&mm->mmap_sem);
322 if (populate)
323 mm_populate(oldbrk, newbrk - oldbrk);
324 return brk;
325
326out:
327 retval = mm->brk;
328 up_write(&mm->mmap_sem);
329 return retval;
330}
331
332static long vma_compute_subtree_gap(struct vm_area_struct *vma)
333{
334 unsigned long max, subtree_gap;
335 max = vma->vm_start;
336 if (vma->vm_prev)
337 max -= vma->vm_prev->vm_end;
338 if (vma->vm_rb.rb_left) {
339 subtree_gap = rb_entry(vma->vm_rb.rb_left,
340 struct vm_area_struct, vm_rb)->rb_subtree_gap;
341 if (subtree_gap > max)
342 max = subtree_gap;
343 }
344 if (vma->vm_rb.rb_right) {
345 subtree_gap = rb_entry(vma->vm_rb.rb_right,
346 struct vm_area_struct, vm_rb)->rb_subtree_gap;
347 if (subtree_gap > max)
348 max = subtree_gap;
349 }
350 return max;
351}
352
353#ifdef CONFIG_DEBUG_VM_RB
354static int browse_rb(struct rb_root *root)
355{
356 int i = 0, j, bug = 0;
357 struct rb_node *nd, *pn = NULL;
358 unsigned long prev = 0, pend = 0;
359
360 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
361 struct vm_area_struct *vma;
362 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
363 if (vma->vm_start < prev) {
364 printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
365 bug = 1;
366 }
367 if (vma->vm_start < pend) {
368 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
369 bug = 1;
370 }
371 if (vma->vm_start > vma->vm_end) {
372 printk("vm_end %lx < vm_start %lx\n",
373 vma->vm_end, vma->vm_start);
374 bug = 1;
375 }
376 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
377 printk("free gap %lx, correct %lx\n",
378 vma->rb_subtree_gap,
379 vma_compute_subtree_gap(vma));
380 bug = 1;
381 }
382 i++;
383 pn = nd;
384 prev = vma->vm_start;
385 pend = vma->vm_end;
386 }
387 j = 0;
388 for (nd = pn; nd; nd = rb_prev(nd))
389 j++;
390 if (i != j) {
391 printk("backwards %d, forwards %d\n", j, i);
392 bug = 1;
393 }
394 return bug ? -1 : i;
395}
396
397static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
398{
399 struct rb_node *nd;
400
401 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
402 struct vm_area_struct *vma;
403 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
404 BUG_ON(vma != ignore &&
405 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
406 }
407}
408
409void validate_mm(struct mm_struct *mm)
410{
411 int bug = 0;
412 int i = 0;
413 unsigned long highest_address = 0;
414 struct vm_area_struct *vma = mm->mmap;
415 while (vma) {
416 struct anon_vma_chain *avc;
417 vma_lock_anon_vma(vma);
418 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
419 anon_vma_interval_tree_verify(avc);
420 vma_unlock_anon_vma(vma);
421 highest_address = vma->vm_end;
422 vma = vma->vm_next;
423 i++;
424 }
425 if (i != mm->map_count) {
426 printk("map_count %d vm_next %d\n", mm->map_count, i);
427 bug = 1;
428 }
429 if (highest_address != mm->highest_vm_end) {
430 printk("mm->highest_vm_end %lx, found %lx\n",
431 mm->highest_vm_end, highest_address);
432 bug = 1;
433 }
434 i = browse_rb(&mm->mm_rb);
435 if (i != mm->map_count) {
436 printk("map_count %d rb %d\n", mm->map_count, i);
437 bug = 1;
438 }
439 BUG_ON(bug);
440}
441#else
442#define validate_mm_rb(root, ignore) do { } while (0)
443#define validate_mm(mm) do { } while (0)
444#endif
445
446RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
447 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
448
449
450
451
452
453
454static void vma_gap_update(struct vm_area_struct *vma)
455{
456
457
458
459
460 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
461}
462
463static inline void vma_rb_insert(struct vm_area_struct *vma,
464 struct rb_root *root)
465{
466
467 validate_mm_rb(root, NULL);
468
469 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
470}
471
472static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
473{
474
475
476
477
478 validate_mm_rb(root, vma);
479
480
481
482
483
484
485 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
486}
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502static inline void
503anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
504{
505 struct anon_vma_chain *avc;
506
507 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
508 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
509}
510
511static inline void
512anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
513{
514 struct anon_vma_chain *avc;
515
516 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
517 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
518}
519
520static int find_vma_links(struct mm_struct *mm, unsigned long addr,
521 unsigned long end, struct vm_area_struct **pprev,
522 struct rb_node ***rb_link, struct rb_node **rb_parent)
523{
524 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
525
526 __rb_link = &mm->mm_rb.rb_node;
527 rb_prev = __rb_parent = NULL;
528
529 while (*__rb_link) {
530 struct vm_area_struct *vma_tmp;
531
532 __rb_parent = *__rb_link;
533 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
534
535 if (vma_tmp->vm_end > addr) {
536
537 if (vma_tmp->vm_start < end)
538 return -ENOMEM;
539 __rb_link = &__rb_parent->rb_left;
540 } else {
541 rb_prev = __rb_parent;
542 __rb_link = &__rb_parent->rb_right;
543 }
544 }
545
546 *pprev = NULL;
547 if (rb_prev)
548 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
549 *rb_link = __rb_link;
550 *rb_parent = __rb_parent;
551 return 0;
552}
553
554static unsigned long count_vma_pages_range(struct mm_struct *mm,
555 unsigned long addr, unsigned long end)
556{
557 unsigned long nr_pages = 0;
558 struct vm_area_struct *vma;
559
560
561 vma = find_vma_intersection(mm, addr, end);
562 if (!vma)
563 return 0;
564
565 nr_pages = (min(end, vma->vm_end) -
566 max(addr, vma->vm_start)) >> PAGE_SHIFT;
567
568
569 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
570 unsigned long overlap_len;
571
572 if (vma->vm_start > end)
573 break;
574
575 overlap_len = min(end, vma->vm_end) - vma->vm_start;
576 nr_pages += overlap_len >> PAGE_SHIFT;
577 }
578
579 return nr_pages;
580}
581
582void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
583 struct rb_node **rb_link, struct rb_node *rb_parent)
584{
585
586 if (vma->vm_next)
587 vma_gap_update(vma->vm_next);
588 else
589 mm->highest_vm_end = vma->vm_end;
590
591
592
593
594
595
596
597
598
599
600 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
601 vma->rb_subtree_gap = 0;
602 vma_gap_update(vma);
603 vma_rb_insert(vma, &mm->mm_rb);
604}
605
606static void __vma_link_file(struct vm_area_struct *vma)
607{
608 struct file *file;
609
610 file = vma->vm_file;
611 if (file) {
612 struct address_space *mapping = file->f_mapping;
613
614 if (vma->vm_flags & VM_DENYWRITE)
615 atomic_dec(&file_inode(file)->i_writecount);
616 if (vma->vm_flags & VM_SHARED)
617 mapping->i_mmap_writable++;
618
619 flush_dcache_mmap_lock(mapping);
620 if (unlikely(vma->vm_flags & VM_NONLINEAR))
621 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
622 else
623 vma_interval_tree_insert(vma, &mapping->i_mmap);
624 flush_dcache_mmap_unlock(mapping);
625 }
626}
627
628static void
629__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
630 struct vm_area_struct *prev, struct rb_node **rb_link,
631 struct rb_node *rb_parent)
632{
633 __vma_link_list(mm, vma, prev, rb_parent);
634 __vma_link_rb(mm, vma, rb_link, rb_parent);
635}
636
637static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
638 struct vm_area_struct *prev, struct rb_node **rb_link,
639 struct rb_node *rb_parent)
640{
641 struct address_space *mapping = NULL;
642
643 if (vma->vm_file)
644 mapping = vma->vm_file->f_mapping;
645
646 if (mapping)
647 mutex_lock(&mapping->i_mmap_mutex);
648
649 __vma_link(mm, vma, prev, rb_link, rb_parent);
650 __vma_link_file(vma);
651
652 if (mapping)
653 mutex_unlock(&mapping->i_mmap_mutex);
654
655 mm->map_count++;
656 validate_mm(mm);
657}
658
659
660
661
662
663static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
664{
665 struct vm_area_struct *prev;
666 struct rb_node **rb_link, *rb_parent;
667
668 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
669 &prev, &rb_link, &rb_parent))
670 BUG();
671 __vma_link(mm, vma, prev, rb_link, rb_parent);
672 mm->map_count++;
673}
674
675static inline void
676__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
677 struct vm_area_struct *prev)
678{
679 struct vm_area_struct *next;
680
681 vma_rb_erase(vma, &mm->mm_rb);
682 prev->vm_next = next = vma->vm_next;
683 if (next)
684 next->vm_prev = prev;
685 if (mm->mmap_cache == vma)
686 mm->mmap_cache = prev;
687}
688
689
690
691
692
693
694
695
696int vma_adjust(struct vm_area_struct *vma, unsigned long start,
697 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
698{
699 struct mm_struct *mm = vma->vm_mm;
700 struct vm_area_struct *next = vma->vm_next;
701 struct vm_area_struct *importer = NULL;
702 struct address_space *mapping = NULL;
703 struct rb_root *root = NULL;
704 struct anon_vma *anon_vma = NULL;
705 struct file *file = vma->vm_file;
706 bool start_changed = false, end_changed = false;
707 long adjust_next = 0;
708 int remove_next = 0;
709
710 if (next && !insert) {
711 struct vm_area_struct *exporter = NULL;
712
713 if (end >= next->vm_end) {
714
715
716
717
718again: remove_next = 1 + (end > next->vm_end);
719 end = next->vm_end;
720 exporter = next;
721 importer = vma;
722 } else if (end > next->vm_start) {
723
724
725
726
727 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
728 exporter = next;
729 importer = vma;
730 } else if (end < vma->vm_end) {
731
732
733
734
735
736 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
737 exporter = vma;
738 importer = next;
739 }
740
741
742
743
744
745
746 if (exporter && exporter->anon_vma && !importer->anon_vma) {
747 if (anon_vma_clone(importer, exporter))
748 return -ENOMEM;
749 importer->anon_vma = exporter->anon_vma;
750 }
751 }
752
753 if (file) {
754 mapping = file->f_mapping;
755 if (!(vma->vm_flags & VM_NONLINEAR)) {
756 root = &mapping->i_mmap;
757 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
758
759 if (adjust_next)
760 uprobe_munmap(next, next->vm_start,
761 next->vm_end);
762 }
763
764 mutex_lock(&mapping->i_mmap_mutex);
765 if (insert) {
766
767
768
769
770
771
772 __vma_link_file(insert);
773 }
774 }
775
776 vma_adjust_trans_huge(vma, start, end, adjust_next);
777
778 anon_vma = vma->anon_vma;
779 if (!anon_vma && adjust_next)
780 anon_vma = next->anon_vma;
781 if (anon_vma) {
782 VM_BUG_ON(adjust_next && next->anon_vma &&
783 anon_vma != next->anon_vma);
784 anon_vma_lock_write(anon_vma);
785 anon_vma_interval_tree_pre_update_vma(vma);
786 if (adjust_next)
787 anon_vma_interval_tree_pre_update_vma(next);
788 }
789
790 if (root) {
791 flush_dcache_mmap_lock(mapping);
792 vma_interval_tree_remove(vma, root);
793 if (adjust_next)
794 vma_interval_tree_remove(next, root);
795 }
796
797 if (start != vma->vm_start) {
798 vma->vm_start = start;
799 start_changed = true;
800 }
801 if (end != vma->vm_end) {
802 vma->vm_end = end;
803 end_changed = true;
804 }
805 vma->vm_pgoff = pgoff;
806 if (adjust_next) {
807 next->vm_start += adjust_next << PAGE_SHIFT;
808 next->vm_pgoff += adjust_next;
809 }
810
811 if (root) {
812 if (adjust_next)
813 vma_interval_tree_insert(next, root);
814 vma_interval_tree_insert(vma, root);
815 flush_dcache_mmap_unlock(mapping);
816 }
817
818 if (remove_next) {
819
820
821
822
823 __vma_unlink(mm, next, vma);
824 if (file)
825 __remove_shared_vm_struct(next, file, mapping);
826 } else if (insert) {
827
828
829
830
831
832 __insert_vm_struct(mm, insert);
833 } else {
834 if (start_changed)
835 vma_gap_update(vma);
836 if (end_changed) {
837 if (!next)
838 mm->highest_vm_end = end;
839 else if (!adjust_next)
840 vma_gap_update(next);
841 }
842 }
843
844 if (anon_vma) {
845 anon_vma_interval_tree_post_update_vma(vma);
846 if (adjust_next)
847 anon_vma_interval_tree_post_update_vma(next);
848 anon_vma_unlock_write(anon_vma);
849 }
850 if (mapping)
851 mutex_unlock(&mapping->i_mmap_mutex);
852
853 if (root) {
854 uprobe_mmap(vma);
855
856 if (adjust_next)
857 uprobe_mmap(next);
858 }
859
860 if (remove_next) {
861 if (file) {
862 uprobe_munmap(next, next->vm_start, next->vm_end);
863 fput(file);
864 }
865 if (next->anon_vma)
866 anon_vma_merge(vma, next);
867 mm->map_count--;
868 mpol_put(vma_policy(next));
869 kmem_cache_free(vm_area_cachep, next);
870
871
872
873
874
875 next = vma->vm_next;
876 if (remove_next == 2)
877 goto again;
878 else if (next)
879 vma_gap_update(next);
880 else
881 mm->highest_vm_end = end;
882 }
883 if (insert && file)
884 uprobe_mmap(insert);
885
886 validate_mm(mm);
887
888 return 0;
889}
890
891
892
893
894
895static inline int is_mergeable_vma(struct vm_area_struct *vma,
896 struct file *file, unsigned long vm_flags)
897{
898 if (vma->vm_flags ^ vm_flags)
899 return 0;
900 if (vma->vm_file != file)
901 return 0;
902 if (vma->vm_ops && vma->vm_ops->close)
903 return 0;
904 return 1;
905}
906
907static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
908 struct anon_vma *anon_vma2,
909 struct vm_area_struct *vma)
910{
911
912
913
914
915 if ((!anon_vma1 || !anon_vma2) && (!vma ||
916 list_is_singular(&vma->anon_vma_chain)))
917 return 1;
918 return anon_vma1 == anon_vma2;
919}
920
921
922
923
924
925
926
927
928
929
930
931
932static int
933can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
934 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
935{
936 if (is_mergeable_vma(vma, file, vm_flags) &&
937 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
938 if (vma->vm_pgoff == vm_pgoff)
939 return 1;
940 }
941 return 0;
942}
943
944
945
946
947
948
949
950
951static int
952can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
953 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
954{
955 if (is_mergeable_vma(vma, file, vm_flags) &&
956 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
957 pgoff_t vm_pglen;
958 vm_pglen = vma_pages(vma);
959 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
960 return 1;
961 }
962 return 0;
963}
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994struct vm_area_struct *vma_merge(struct mm_struct *mm,
995 struct vm_area_struct *prev, unsigned long addr,
996 unsigned long end, unsigned long vm_flags,
997 struct anon_vma *anon_vma, struct file *file,
998 pgoff_t pgoff, struct mempolicy *policy)
999{
1000 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1001 struct vm_area_struct *area, *next;
1002 int err;
1003
1004
1005
1006
1007
1008 if (vm_flags & VM_SPECIAL)
1009 return NULL;
1010
1011 if (prev)
1012 next = prev->vm_next;
1013 else
1014 next = mm->mmap;
1015 area = next;
1016 if (next && next->vm_end == end)
1017 next = next->vm_next;
1018
1019
1020
1021
1022 if (prev && prev->vm_end == addr &&
1023 mpol_equal(vma_policy(prev), policy) &&
1024 can_vma_merge_after(prev, vm_flags,
1025 anon_vma, file, pgoff)) {
1026
1027
1028
1029 if (next && end == next->vm_start &&
1030 mpol_equal(policy, vma_policy(next)) &&
1031 can_vma_merge_before(next, vm_flags,
1032 anon_vma, file, pgoff+pglen) &&
1033 is_mergeable_anon_vma(prev->anon_vma,
1034 next->anon_vma, NULL)) {
1035
1036 err = vma_adjust(prev, prev->vm_start,
1037 next->vm_end, prev->vm_pgoff, NULL);
1038 } else
1039 err = vma_adjust(prev, prev->vm_start,
1040 end, prev->vm_pgoff, NULL);
1041 if (err)
1042 return NULL;
1043 khugepaged_enter_vma_merge(prev);
1044 return prev;
1045 }
1046
1047
1048
1049
1050 if (next && end == next->vm_start &&
1051 mpol_equal(policy, vma_policy(next)) &&
1052 can_vma_merge_before(next, vm_flags,
1053 anon_vma, file, pgoff+pglen)) {
1054 if (prev && addr < prev->vm_end)
1055 err = vma_adjust(prev, prev->vm_start,
1056 addr, prev->vm_pgoff, NULL);
1057 else
1058 err = vma_adjust(area, addr, next->vm_end,
1059 next->vm_pgoff - pglen, NULL);
1060 if (err)
1061 return NULL;
1062 khugepaged_enter_vma_merge(area);
1063 return area;
1064 }
1065
1066 return NULL;
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1083{
1084 return a->vm_end == b->vm_start &&
1085 mpol_equal(vma_policy(a), vma_policy(b)) &&
1086 a->vm_file == b->vm_file &&
1087 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
1088 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1089}
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1114{
1115 if (anon_vma_compatible(a, b)) {
1116 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
1117
1118 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1119 return anon_vma;
1120 }
1121 return NULL;
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1133{
1134 struct anon_vma *anon_vma;
1135 struct vm_area_struct *near;
1136
1137 near = vma->vm_next;
1138 if (!near)
1139 goto try_prev;
1140
1141 anon_vma = reusable_anon_vma(near, vma, near);
1142 if (anon_vma)
1143 return anon_vma;
1144try_prev:
1145 near = vma->vm_prev;
1146 if (!near)
1147 goto none;
1148
1149 anon_vma = reusable_anon_vma(near, near, vma);
1150 if (anon_vma)
1151 return anon_vma;
1152none:
1153
1154
1155
1156
1157
1158
1159
1160
1161 return NULL;
1162}
1163
1164#ifdef CONFIG_PROC_FS
1165void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1166 struct file *file, long pages)
1167{
1168 const unsigned long stack_flags
1169 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1170
1171 mm->total_vm += pages;
1172
1173 if (file) {
1174 mm->shared_vm += pages;
1175 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1176 mm->exec_vm += pages;
1177 } else if (flags & stack_flags)
1178 mm->stack_vm += pages;
1179}
1180#endif
1181
1182
1183
1184
1185
1186static inline unsigned long round_hint_to_min(unsigned long hint)
1187{
1188 hint &= PAGE_MASK;
1189 if (((void *)hint != NULL) &&
1190 (hint < mmap_min_addr))
1191 return PAGE_ALIGN(mmap_min_addr);
1192 return hint;
1193}
1194
1195
1196
1197
1198
1199unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1200 unsigned long len, unsigned long prot,
1201 unsigned long flags, unsigned long pgoff,
1202 unsigned long *populate)
1203{
1204 struct mm_struct * mm = current->mm;
1205 vm_flags_t vm_flags;
1206
1207 *populate = 0;
1208
1209
1210
1211
1212
1213
1214
1215 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1216 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
1217 prot |= PROT_EXEC;
1218
1219 if (!len)
1220 return -EINVAL;
1221
1222 if (!(flags & MAP_FIXED))
1223 addr = round_hint_to_min(addr);
1224
1225
1226 len = PAGE_ALIGN(len);
1227 if (!len)
1228 return -ENOMEM;
1229
1230
1231 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1232 return -EOVERFLOW;
1233
1234
1235 if (mm->map_count > sysctl_max_map_count)
1236 return -ENOMEM;
1237
1238
1239
1240
1241 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1242 if (addr & ~PAGE_MASK)
1243 return addr;
1244
1245
1246
1247
1248
1249 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1250 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1251
1252 if (flags & MAP_LOCKED)
1253 if (!can_do_mlock())
1254 return -EPERM;
1255
1256
1257 if (vm_flags & VM_LOCKED) {
1258 unsigned long locked, lock_limit;
1259 locked = len >> PAGE_SHIFT;
1260 locked += mm->locked_vm;
1261 lock_limit = rlimit(RLIMIT_MEMLOCK);
1262 lock_limit >>= PAGE_SHIFT;
1263 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1264 return -EAGAIN;
1265 }
1266
1267 if (file) {
1268 struct inode *inode = file_inode(file);
1269
1270 switch (flags & MAP_TYPE) {
1271 case MAP_SHARED:
1272 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1273 return -EACCES;
1274
1275
1276
1277
1278
1279 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1280 return -EACCES;
1281
1282
1283
1284
1285 if (locks_verify_locked(inode))
1286 return -EAGAIN;
1287
1288 vm_flags |= VM_SHARED | VM_MAYSHARE;
1289 if (!(file->f_mode & FMODE_WRITE))
1290 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1291
1292
1293 case MAP_PRIVATE:
1294 if (!(file->f_mode & FMODE_READ))
1295 return -EACCES;
1296 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1297 if (vm_flags & VM_EXEC)
1298 return -EPERM;
1299 vm_flags &= ~VM_MAYEXEC;
1300 }
1301
1302 if (!file->f_op || !file->f_op->mmap)
1303 return -ENODEV;
1304 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1305 return -EINVAL;
1306 break;
1307
1308 default:
1309 return -EINVAL;
1310 }
1311 } else {
1312 switch (flags & MAP_TYPE) {
1313 case MAP_SHARED:
1314 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1315 return -EINVAL;
1316
1317
1318
1319 pgoff = 0;
1320 vm_flags |= VM_SHARED | VM_MAYSHARE;
1321 break;
1322 case MAP_PRIVATE:
1323
1324
1325
1326 pgoff = addr >> PAGE_SHIFT;
1327 break;
1328 default:
1329 return -EINVAL;
1330 }
1331 }
1332
1333
1334
1335
1336
1337 if (flags & MAP_NORESERVE) {
1338
1339 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1340 vm_flags |= VM_NORESERVE;
1341
1342
1343 if (file && is_file_hugepages(file))
1344 vm_flags |= VM_NORESERVE;
1345 }
1346
1347 addr = mmap_region(file, addr, len, vm_flags, pgoff);
1348 if (!IS_ERR_VALUE(addr) &&
1349 ((vm_flags & VM_LOCKED) ||
1350 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1351 *populate = len;
1352 return addr;
1353}
1354
1355SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1356 unsigned long, prot, unsigned long, flags,
1357 unsigned long, fd, unsigned long, pgoff)
1358{
1359 struct file *file = NULL;
1360 unsigned long retval = -EBADF;
1361
1362 if (!(flags & MAP_ANONYMOUS)) {
1363 audit_mmap_fd(fd, flags);
1364 file = fget(fd);
1365 if (!file)
1366 goto out;
1367 if (is_file_hugepages(file))
1368 len = ALIGN(len, huge_page_size(hstate_file(file)));
1369 retval = -EINVAL;
1370 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1371 goto out_fput;
1372 } else if (flags & MAP_HUGETLB) {
1373 struct user_struct *user = NULL;
1374 struct hstate *hs;
1375
1376 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
1377 if (!hs)
1378 return -EINVAL;
1379
1380 len = ALIGN(len, huge_page_size(hs));
1381
1382
1383
1384
1385
1386
1387 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1388 VM_NORESERVE,
1389 &user, HUGETLB_ANONHUGE_INODE,
1390 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1391 if (IS_ERR(file))
1392 return PTR_ERR(file);
1393 }
1394
1395 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1396
1397 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1398out_fput:
1399 if (file)
1400 fput(file);
1401out:
1402 return retval;
1403}
1404
1405#ifdef __ARCH_WANT_SYS_OLD_MMAP
1406struct mmap_arg_struct {
1407 unsigned long addr;
1408 unsigned long len;
1409 unsigned long prot;
1410 unsigned long flags;
1411 unsigned long fd;
1412 unsigned long offset;
1413};
1414
1415SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1416{
1417 struct mmap_arg_struct a;
1418
1419 if (copy_from_user(&a, arg, sizeof(a)))
1420 return -EFAULT;
1421 if (a.offset & ~PAGE_MASK)
1422 return -EINVAL;
1423
1424 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1425 a.offset >> PAGE_SHIFT);
1426}
1427#endif
1428
1429
1430
1431
1432
1433
1434
1435int vma_wants_writenotify(struct vm_area_struct *vma)
1436{
1437 vm_flags_t vm_flags = vma->vm_flags;
1438
1439
1440 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1441 return 0;
1442
1443
1444 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1445 return 1;
1446
1447
1448 if (pgprot_val(vma->vm_page_prot) !=
1449 pgprot_val(vm_get_page_prot(vm_flags)))
1450 return 0;
1451
1452
1453 if (vm_flags & VM_PFNMAP)
1454 return 0;
1455
1456
1457 return vma->vm_file && vma->vm_file->f_mapping &&
1458 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1459}
1460
1461
1462
1463
1464
1465static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1466{
1467
1468
1469
1470
1471 if (file && is_file_hugepages(file))
1472 return 0;
1473
1474 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1475}
1476
1477unsigned long mmap_region(struct file *file, unsigned long addr,
1478 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
1479{
1480 struct mm_struct *mm = current->mm;
1481 struct vm_area_struct *vma, *prev;
1482 int error;
1483 struct rb_node **rb_link, *rb_parent;
1484 unsigned long charged = 0;
1485
1486
1487 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1488 unsigned long nr_pages;
1489
1490
1491
1492
1493
1494 if (!(vm_flags & MAP_FIXED))
1495 return -ENOMEM;
1496
1497 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1498
1499 if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1500 return -ENOMEM;
1501 }
1502
1503
1504 error = -ENOMEM;
1505munmap_back:
1506 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
1507 if (do_munmap(mm, addr, len))
1508 return -ENOMEM;
1509 goto munmap_back;
1510 }
1511
1512
1513
1514
1515 if (accountable_mapping(file, vm_flags)) {
1516 charged = len >> PAGE_SHIFT;
1517 if (security_vm_enough_memory_mm(mm, charged))
1518 return -ENOMEM;
1519 vm_flags |= VM_ACCOUNT;
1520 }
1521
1522
1523
1524
1525 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1526 if (vma)
1527 goto out;
1528
1529
1530
1531
1532
1533
1534 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1535 if (!vma) {
1536 error = -ENOMEM;
1537 goto unacct_error;
1538 }
1539
1540 vma->vm_mm = mm;
1541 vma->vm_start = addr;
1542 vma->vm_end = addr + len;
1543 vma->vm_flags = vm_flags;
1544 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1545 vma->vm_pgoff = pgoff;
1546 INIT_LIST_HEAD(&vma->anon_vma_chain);
1547
1548 if (file) {
1549 if (vm_flags & VM_DENYWRITE) {
1550 error = deny_write_access(file);
1551 if (error)
1552 goto free_vma;
1553 }
1554 vma->vm_file = get_file(file);
1555 error = file->f_op->mmap(file, vma);
1556 if (error)
1557 goto unmap_and_free_vma;
1558
1559
1560
1561
1562
1563
1564
1565
1566 WARN_ON_ONCE(addr != vma->vm_start);
1567
1568 addr = vma->vm_start;
1569 vm_flags = vma->vm_flags;
1570 } else if (vm_flags & VM_SHARED) {
1571 error = shmem_zero_setup(vma);
1572 if (error)
1573 goto free_vma;
1574 }
1575
1576 if (vma_wants_writenotify(vma)) {
1577 pgprot_t pprot = vma->vm_page_prot;
1578
1579
1580
1581
1582
1583
1584
1585
1586 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1587 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1588 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1589 }
1590
1591 vma_link(mm, vma, prev, rb_link, rb_parent);
1592
1593 if (vm_flags & VM_DENYWRITE)
1594 allow_write_access(file);
1595 file = vma->vm_file;
1596out:
1597 perf_event_mmap(vma);
1598
1599 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1600 if (vm_flags & VM_LOCKED) {
1601 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1602 vma == get_gate_vma(current->mm)))
1603 mm->locked_vm += (len >> PAGE_SHIFT);
1604 else
1605 vma->vm_flags &= ~VM_LOCKED;
1606 }
1607
1608 if (file)
1609 uprobe_mmap(vma);
1610
1611
1612
1613
1614
1615
1616
1617
1618 vma->vm_flags |= VM_SOFTDIRTY;
1619
1620 return addr;
1621
1622unmap_and_free_vma:
1623 if (vm_flags & VM_DENYWRITE)
1624 allow_write_access(file);
1625 vma->vm_file = NULL;
1626 fput(file);
1627
1628
1629 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1630 charged = 0;
1631free_vma:
1632 kmem_cache_free(vm_area_cachep, vma);
1633unacct_error:
1634 if (charged)
1635 vm_unacct_memory(charged);
1636 return error;
1637}
1638
1639unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1640{
1641
1642
1643
1644
1645
1646
1647
1648
1649 struct mm_struct *mm = current->mm;
1650 struct vm_area_struct *vma;
1651 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1652
1653
1654 length = info->length + info->align_mask;
1655 if (length < info->length)
1656 return -ENOMEM;
1657
1658
1659 if (info->high_limit < length)
1660 return -ENOMEM;
1661 high_limit = info->high_limit - length;
1662
1663 if (info->low_limit > high_limit)
1664 return -ENOMEM;
1665 low_limit = info->low_limit + length;
1666
1667
1668 if (RB_EMPTY_ROOT(&mm->mm_rb))
1669 goto check_highest;
1670 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1671 if (vma->rb_subtree_gap < length)
1672 goto check_highest;
1673
1674 while (true) {
1675
1676 gap_end = vma->vm_start;
1677 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1678 struct vm_area_struct *left =
1679 rb_entry(vma->vm_rb.rb_left,
1680 struct vm_area_struct, vm_rb);
1681 if (left->rb_subtree_gap >= length) {
1682 vma = left;
1683 continue;
1684 }
1685 }
1686
1687 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1688check_current:
1689
1690 if (gap_start > high_limit)
1691 return -ENOMEM;
1692 if (gap_end >= low_limit && gap_end - gap_start >= length)
1693 goto found;
1694
1695
1696 if (vma->vm_rb.rb_right) {
1697 struct vm_area_struct *right =
1698 rb_entry(vma->vm_rb.rb_right,
1699 struct vm_area_struct, vm_rb);
1700 if (right->rb_subtree_gap >= length) {
1701 vma = right;
1702 continue;
1703 }
1704 }
1705
1706
1707 while (true) {
1708 struct rb_node *prev = &vma->vm_rb;
1709 if (!rb_parent(prev))
1710 goto check_highest;
1711 vma = rb_entry(rb_parent(prev),
1712 struct vm_area_struct, vm_rb);
1713 if (prev == vma->vm_rb.rb_left) {
1714 gap_start = vma->vm_prev->vm_end;
1715 gap_end = vma->vm_start;
1716 goto check_current;
1717 }
1718 }
1719 }
1720
1721check_highest:
1722
1723 gap_start = mm->highest_vm_end;
1724 gap_end = ULONG_MAX;
1725 if (gap_start > high_limit)
1726 return -ENOMEM;
1727
1728found:
1729
1730 if (gap_start < info->low_limit)
1731 gap_start = info->low_limit;
1732
1733
1734 gap_start += (info->align_offset - gap_start) & info->align_mask;
1735
1736 VM_BUG_ON(gap_start + info->length > info->high_limit);
1737 VM_BUG_ON(gap_start + info->length > gap_end);
1738 return gap_start;
1739}
1740
1741unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1742{
1743 struct mm_struct *mm = current->mm;
1744 struct vm_area_struct *vma;
1745 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1746
1747
1748 length = info->length + info->align_mask;
1749 if (length < info->length)
1750 return -ENOMEM;
1751
1752
1753
1754
1755
1756 gap_end = info->high_limit;
1757 if (gap_end < length)
1758 return -ENOMEM;
1759 high_limit = gap_end - length;
1760
1761 if (info->low_limit > high_limit)
1762 return -ENOMEM;
1763 low_limit = info->low_limit + length;
1764
1765
1766 gap_start = mm->highest_vm_end;
1767 if (gap_start <= high_limit)
1768 goto found_highest;
1769
1770
1771 if (RB_EMPTY_ROOT(&mm->mm_rb))
1772 return -ENOMEM;
1773 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1774 if (vma->rb_subtree_gap < length)
1775 return -ENOMEM;
1776
1777 while (true) {
1778
1779 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1780 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1781 struct vm_area_struct *right =
1782 rb_entry(vma->vm_rb.rb_right,
1783 struct vm_area_struct, vm_rb);
1784 if (right->rb_subtree_gap >= length) {
1785 vma = right;
1786 continue;
1787 }
1788 }
1789
1790check_current:
1791
1792 gap_end = vma->vm_start;
1793 if (gap_end < low_limit)
1794 return -ENOMEM;
1795 if (gap_start <= high_limit && gap_end - gap_start >= length)
1796 goto found;
1797
1798
1799 if (vma->vm_rb.rb_left) {
1800 struct vm_area_struct *left =
1801 rb_entry(vma->vm_rb.rb_left,
1802 struct vm_area_struct, vm_rb);
1803 if (left->rb_subtree_gap >= length) {
1804 vma = left;
1805 continue;
1806 }
1807 }
1808
1809
1810 while (true) {
1811 struct rb_node *prev = &vma->vm_rb;
1812 if (!rb_parent(prev))
1813 return -ENOMEM;
1814 vma = rb_entry(rb_parent(prev),
1815 struct vm_area_struct, vm_rb);
1816 if (prev == vma->vm_rb.rb_right) {
1817 gap_start = vma->vm_prev ?
1818 vma->vm_prev->vm_end : 0;
1819 goto check_current;
1820 }
1821 }
1822 }
1823
1824found:
1825
1826 if (gap_end > info->high_limit)
1827 gap_end = info->high_limit;
1828
1829found_highest:
1830
1831 gap_end -= info->length;
1832 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1833
1834 VM_BUG_ON(gap_end < info->low_limit);
1835 VM_BUG_ON(gap_end < gap_start);
1836 return gap_end;
1837}
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850#ifndef HAVE_ARCH_UNMAPPED_AREA
1851unsigned long
1852arch_get_unmapped_area(struct file *filp, unsigned long addr,
1853 unsigned long len, unsigned long pgoff, unsigned long flags)
1854{
1855 struct mm_struct *mm = current->mm;
1856 struct vm_area_struct *vma;
1857 struct vm_unmapped_area_info info;
1858
1859 if (len > TASK_SIZE)
1860 return -ENOMEM;
1861
1862 if (flags & MAP_FIXED)
1863 return addr;
1864
1865 if (addr) {
1866 addr = PAGE_ALIGN(addr);
1867 vma = find_vma(mm, addr);
1868 if (TASK_SIZE - len >= addr &&
1869 (!vma || addr + len <= vma->vm_start))
1870 return addr;
1871 }
1872
1873 info.flags = 0;
1874 info.length = len;
1875 info.low_limit = TASK_UNMAPPED_BASE;
1876 info.high_limit = TASK_SIZE;
1877 info.align_mask = 0;
1878 return vm_unmapped_area(&info);
1879}
1880#endif
1881
1882
1883
1884
1885
1886#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1887unsigned long
1888arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1889 const unsigned long len, const unsigned long pgoff,
1890 const unsigned long flags)
1891{
1892 struct vm_area_struct *vma;
1893 struct mm_struct *mm = current->mm;
1894 unsigned long addr = addr0;
1895 struct vm_unmapped_area_info info;
1896
1897
1898 if (len > TASK_SIZE)
1899 return -ENOMEM;
1900
1901 if (flags & MAP_FIXED)
1902 return addr;
1903
1904
1905 if (addr) {
1906 addr = PAGE_ALIGN(addr);
1907 vma = find_vma(mm, addr);
1908 if (TASK_SIZE - len >= addr &&
1909 (!vma || addr + len <= vma->vm_start))
1910 return addr;
1911 }
1912
1913 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
1914 info.length = len;
1915 info.low_limit = PAGE_SIZE;
1916 info.high_limit = mm->mmap_base;
1917 info.align_mask = 0;
1918 addr = vm_unmapped_area(&info);
1919
1920
1921
1922
1923
1924
1925
1926 if (addr & ~PAGE_MASK) {
1927 VM_BUG_ON(addr != -ENOMEM);
1928 info.flags = 0;
1929 info.low_limit = TASK_UNMAPPED_BASE;
1930 info.high_limit = TASK_SIZE;
1931 addr = vm_unmapped_area(&info);
1932 }
1933
1934 return addr;
1935}
1936#endif
1937
1938unsigned long
1939get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1940 unsigned long pgoff, unsigned long flags)
1941{
1942 unsigned long (*get_area)(struct file *, unsigned long,
1943 unsigned long, unsigned long, unsigned long);
1944
1945 unsigned long error = arch_mmap_check(addr, len, flags);
1946 if (error)
1947 return error;
1948
1949
1950 if (len > TASK_SIZE)
1951 return -ENOMEM;
1952
1953 get_area = current->mm->get_unmapped_area;
1954 if (file && file->f_op && file->f_op->get_unmapped_area)
1955 get_area = file->f_op->get_unmapped_area;
1956 addr = get_area(file, addr, len, pgoff, flags);
1957 if (IS_ERR_VALUE(addr))
1958 return addr;
1959
1960 if (addr > TASK_SIZE - len)
1961 return -ENOMEM;
1962 if (addr & ~PAGE_MASK)
1963 return -EINVAL;
1964
1965 addr = arch_rebalance_pgtables(addr, len);
1966 error = security_mmap_addr(addr);
1967 return error ? error : addr;
1968}
1969
1970EXPORT_SYMBOL(get_unmapped_area);
1971
1972
1973struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1974{
1975 struct vm_area_struct *vma = NULL;
1976
1977
1978
1979 vma = ACCESS_ONCE(mm->mmap_cache);
1980 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1981 struct rb_node *rb_node;
1982
1983 rb_node = mm->mm_rb.rb_node;
1984 vma = NULL;
1985
1986 while (rb_node) {
1987 struct vm_area_struct *vma_tmp;
1988
1989 vma_tmp = rb_entry(rb_node,
1990 struct vm_area_struct, vm_rb);
1991
1992 if (vma_tmp->vm_end > addr) {
1993 vma = vma_tmp;
1994 if (vma_tmp->vm_start <= addr)
1995 break;
1996 rb_node = rb_node->rb_left;
1997 } else
1998 rb_node = rb_node->rb_right;
1999 }
2000 if (vma)
2001 mm->mmap_cache = vma;
2002 }
2003 return vma;
2004}
2005
2006EXPORT_SYMBOL(find_vma);
2007
2008
2009
2010
2011struct vm_area_struct *
2012find_vma_prev(struct mm_struct *mm, unsigned long addr,
2013 struct vm_area_struct **pprev)
2014{
2015 struct vm_area_struct *vma;
2016
2017 vma = find_vma(mm, addr);
2018 if (vma) {
2019 *pprev = vma->vm_prev;
2020 } else {
2021 struct rb_node *rb_node = mm->mm_rb.rb_node;
2022 *pprev = NULL;
2023 while (rb_node) {
2024 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2025 rb_node = rb_node->rb_right;
2026 }
2027 }
2028 return vma;
2029}
2030
2031
2032
2033
2034
2035
2036static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
2037{
2038 struct mm_struct *mm = vma->vm_mm;
2039 struct rlimit *rlim = current->signal->rlim;
2040 unsigned long new_start;
2041
2042
2043 if (!may_expand_vm(mm, grow))
2044 return -ENOMEM;
2045
2046
2047 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2048 return -ENOMEM;
2049
2050
2051 if (vma->vm_flags & VM_LOCKED) {
2052 unsigned long locked;
2053 unsigned long limit;
2054 locked = mm->locked_vm + grow;
2055 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2056 limit >>= PAGE_SHIFT;
2057 if (locked > limit && !capable(CAP_IPC_LOCK))
2058 return -ENOMEM;
2059 }
2060
2061
2062 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2063 vma->vm_end - size;
2064 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2065 return -EFAULT;
2066
2067
2068
2069
2070
2071 if (security_vm_enough_memory_mm(mm, grow))
2072 return -ENOMEM;
2073
2074
2075 if (vma->vm_flags & VM_LOCKED)
2076 mm->locked_vm += grow;
2077 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2078 return 0;
2079}
2080
2081#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2082
2083
2084
2085
2086int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2087{
2088 int error;
2089
2090 if (!(vma->vm_flags & VM_GROWSUP))
2091 return -EFAULT;
2092
2093
2094
2095
2096
2097 if (unlikely(anon_vma_prepare(vma)))
2098 return -ENOMEM;
2099 vma_lock_anon_vma(vma);
2100
2101
2102
2103
2104
2105
2106
2107 if (address < PAGE_ALIGN(address+4))
2108 address = PAGE_ALIGN(address+4);
2109 else {
2110 vma_unlock_anon_vma(vma);
2111 return -ENOMEM;
2112 }
2113 error = 0;
2114
2115
2116 if (address > vma->vm_end) {
2117 unsigned long size, grow;
2118
2119 size = address - vma->vm_start;
2120 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2121
2122 error = -ENOMEM;
2123 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2124 error = acct_stack_growth(vma, size, grow);
2125 if (!error) {
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137 spin_lock(&vma->vm_mm->page_table_lock);
2138 anon_vma_interval_tree_pre_update_vma(vma);
2139 vma->vm_end = address;
2140 anon_vma_interval_tree_post_update_vma(vma);
2141 if (vma->vm_next)
2142 vma_gap_update(vma->vm_next);
2143 else
2144 vma->vm_mm->highest_vm_end = address;
2145 spin_unlock(&vma->vm_mm->page_table_lock);
2146
2147 perf_event_mmap(vma);
2148 }
2149 }
2150 }
2151 vma_unlock_anon_vma(vma);
2152 khugepaged_enter_vma_merge(vma);
2153 validate_mm(vma->vm_mm);
2154 return error;
2155}
2156#endif
2157
2158
2159
2160
2161int expand_downwards(struct vm_area_struct *vma,
2162 unsigned long address)
2163{
2164 int error;
2165
2166
2167
2168
2169
2170 if (unlikely(anon_vma_prepare(vma)))
2171 return -ENOMEM;
2172
2173 address &= PAGE_MASK;
2174 error = security_mmap_addr(address);
2175 if (error)
2176 return error;
2177
2178 vma_lock_anon_vma(vma);
2179
2180
2181
2182
2183
2184
2185
2186
2187 if (address < vma->vm_start) {
2188 unsigned long size, grow;
2189
2190 size = vma->vm_end - address;
2191 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2192
2193 error = -ENOMEM;
2194 if (grow <= vma->vm_pgoff) {
2195 error = acct_stack_growth(vma, size, grow);
2196 if (!error) {
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208 spin_lock(&vma->vm_mm->page_table_lock);
2209 anon_vma_interval_tree_pre_update_vma(vma);
2210 vma->vm_start = address;
2211 vma->vm_pgoff -= grow;
2212 anon_vma_interval_tree_post_update_vma(vma);
2213 vma_gap_update(vma);
2214 spin_unlock(&vma->vm_mm->page_table_lock);
2215
2216 perf_event_mmap(vma);
2217 }
2218 }
2219 }
2220 vma_unlock_anon_vma(vma);
2221 khugepaged_enter_vma_merge(vma);
2222 validate_mm(vma->vm_mm);
2223 return error;
2224}
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237#ifdef CONFIG_STACK_GROWSUP
2238int expand_stack(struct vm_area_struct *vma, unsigned long address)
2239{
2240 struct vm_area_struct *next;
2241
2242 address &= PAGE_MASK;
2243 next = vma->vm_next;
2244 if (next && next->vm_start == address + PAGE_SIZE) {
2245 if (!(next->vm_flags & VM_GROWSUP))
2246 return -ENOMEM;
2247 }
2248 return expand_upwards(vma, address);
2249}
2250
2251struct vm_area_struct *
2252find_extend_vma(struct mm_struct *mm, unsigned long addr)
2253{
2254 struct vm_area_struct *vma, *prev;
2255
2256 addr &= PAGE_MASK;
2257 vma = find_vma_prev(mm, addr, &prev);
2258 if (vma && (vma->vm_start <= addr))
2259 return vma;
2260 if (!prev || expand_stack(prev, addr))
2261 return NULL;
2262 if (prev->vm_flags & VM_LOCKED)
2263 __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
2264 return prev;
2265}
2266#else
2267int expand_stack(struct vm_area_struct *vma, unsigned long address)
2268{
2269 struct vm_area_struct *prev;
2270
2271 address &= PAGE_MASK;
2272 prev = vma->vm_prev;
2273 if (prev && prev->vm_end == address) {
2274 if (!(prev->vm_flags & VM_GROWSDOWN))
2275 return -ENOMEM;
2276 }
2277 return expand_downwards(vma, address);
2278}
2279
2280struct vm_area_struct *
2281find_extend_vma(struct mm_struct * mm, unsigned long addr)
2282{
2283 struct vm_area_struct * vma;
2284 unsigned long start;
2285
2286 addr &= PAGE_MASK;
2287 vma = find_vma(mm,addr);
2288 if (!vma)
2289 return NULL;
2290 if (vma->vm_start <= addr)
2291 return vma;
2292 if (!(vma->vm_flags & VM_GROWSDOWN))
2293 return NULL;
2294 start = vma->vm_start;
2295 if (expand_stack(vma, addr))
2296 return NULL;
2297 if (vma->vm_flags & VM_LOCKED)
2298 __mlock_vma_pages_range(vma, addr, start, NULL);
2299 return vma;
2300}
2301#endif
2302
2303
2304
2305
2306
2307
2308
2309static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2310{
2311 unsigned long nr_accounted = 0;
2312
2313
2314 update_hiwater_vm(mm);
2315 do {
2316 long nrpages = vma_pages(vma);
2317
2318 if (vma->vm_flags & VM_ACCOUNT)
2319 nr_accounted += nrpages;
2320 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2321 vma = remove_vma(vma);
2322 } while (vma);
2323 vm_unacct_memory(nr_accounted);
2324 validate_mm(mm);
2325}
2326
2327
2328
2329
2330
2331
2332static void unmap_region(struct mm_struct *mm,
2333 struct vm_area_struct *vma, struct vm_area_struct *prev,
2334 unsigned long start, unsigned long end)
2335{
2336 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
2337 struct mmu_gather tlb;
2338
2339 lru_add_drain();
2340 tlb_gather_mmu(&tlb, mm, start, end);
2341 update_hiwater_rss(mm);
2342 unmap_vmas(&tlb, vma, start, end);
2343 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2344 next ? next->vm_start : USER_PGTABLES_CEILING);
2345 tlb_finish_mmu(&tlb, start, end);
2346}
2347
2348
2349
2350
2351
2352static void
2353detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2354 struct vm_area_struct *prev, unsigned long end)
2355{
2356 struct vm_area_struct **insertion_point;
2357 struct vm_area_struct *tail_vma = NULL;
2358
2359 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2360 vma->vm_prev = NULL;
2361 do {
2362 vma_rb_erase(vma, &mm->mm_rb);
2363 mm->map_count--;
2364 tail_vma = vma;
2365 vma = vma->vm_next;
2366 } while (vma && vma->vm_start < end);
2367 *insertion_point = vma;
2368 if (vma) {
2369 vma->vm_prev = prev;
2370 vma_gap_update(vma);
2371 } else
2372 mm->highest_vm_end = prev ? prev->vm_end : 0;
2373 tail_vma->vm_next = NULL;
2374 mm->mmap_cache = NULL;
2375}
2376
2377
2378
2379
2380
2381static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2382 unsigned long addr, int new_below)
2383{
2384 struct vm_area_struct *new;
2385 int err = -ENOMEM;
2386
2387 if (is_vm_hugetlb_page(vma) && (addr &
2388 ~(huge_page_mask(hstate_vma(vma)))))
2389 return -EINVAL;
2390
2391 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2392 if (!new)
2393 goto out_err;
2394
2395
2396 *new = *vma;
2397
2398 INIT_LIST_HEAD(&new->anon_vma_chain);
2399
2400 if (new_below)
2401 new->vm_end = addr;
2402 else {
2403 new->vm_start = addr;
2404 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2405 }
2406
2407 err = vma_dup_policy(vma, new);
2408 if (err)
2409 goto out_free_vma;
2410
2411 if (anon_vma_clone(new, vma))
2412 goto out_free_mpol;
2413
2414 if (new->vm_file)
2415 get_file(new->vm_file);
2416
2417 if (new->vm_ops && new->vm_ops->open)
2418 new->vm_ops->open(new);
2419
2420 if (new_below)
2421 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2422 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2423 else
2424 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2425
2426
2427 if (!err)
2428 return 0;
2429
2430
2431 if (new->vm_ops && new->vm_ops->close)
2432 new->vm_ops->close(new);
2433 if (new->vm_file)
2434 fput(new->vm_file);
2435 unlink_anon_vmas(new);
2436 out_free_mpol:
2437 mpol_put(vma_policy(new));
2438 out_free_vma:
2439 kmem_cache_free(vm_area_cachep, new);
2440 out_err:
2441 return err;
2442}
2443
2444
2445
2446
2447
2448int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2449 unsigned long addr, int new_below)
2450{
2451 if (mm->map_count >= sysctl_max_map_count)
2452 return -ENOMEM;
2453
2454 return __split_vma(mm, vma, addr, new_below);
2455}
2456
2457
2458
2459
2460
2461
2462int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2463{
2464 unsigned long end;
2465 struct vm_area_struct *vma, *prev, *last;
2466
2467 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2468 return -EINVAL;
2469
2470 if ((len = PAGE_ALIGN(len)) == 0)
2471 return -EINVAL;
2472
2473
2474 vma = find_vma(mm, start);
2475 if (!vma)
2476 return 0;
2477 prev = vma->vm_prev;
2478
2479
2480
2481 end = start + len;
2482 if (vma->vm_start >= end)
2483 return 0;
2484
2485
2486
2487
2488
2489
2490
2491
2492 if (start > vma->vm_start) {
2493 int error;
2494
2495
2496
2497
2498
2499
2500 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2501 return -ENOMEM;
2502
2503 error = __split_vma(mm, vma, start, 0);
2504 if (error)
2505 return error;
2506 prev = vma;
2507 }
2508
2509
2510 last = find_vma(mm, end);
2511 if (last && end > last->vm_start) {
2512 int error = __split_vma(mm, last, end, 1);
2513 if (error)
2514 return error;
2515 }
2516 vma = prev? prev->vm_next: mm->mmap;
2517
2518
2519
2520
2521 if (mm->locked_vm) {
2522 struct vm_area_struct *tmp = vma;
2523 while (tmp && tmp->vm_start < end) {
2524 if (tmp->vm_flags & VM_LOCKED) {
2525 mm->locked_vm -= vma_pages(tmp);
2526 munlock_vma_pages_all(tmp);
2527 }
2528 tmp = tmp->vm_next;
2529 }
2530 }
2531
2532
2533
2534
2535 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2536 unmap_region(mm, vma, prev, start, end);
2537
2538
2539 remove_vma_list(mm, vma);
2540
2541 return 0;
2542}
2543
2544int vm_munmap(unsigned long start, size_t len)
2545{
2546 int ret;
2547 struct mm_struct *mm = current->mm;
2548
2549 down_write(&mm->mmap_sem);
2550 ret = do_munmap(mm, start, len);
2551 up_write(&mm->mmap_sem);
2552 return ret;
2553}
2554EXPORT_SYMBOL(vm_munmap);
2555
2556SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2557{
2558 profile_munmap(addr);
2559 return vm_munmap(addr, len);
2560}
2561
2562static inline void verify_mm_writelocked(struct mm_struct *mm)
2563{
2564#ifdef CONFIG_DEBUG_VM
2565 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2566 WARN_ON(1);
2567 up_read(&mm->mmap_sem);
2568 }
2569#endif
2570}
2571
2572
2573
2574
2575
2576
2577static unsigned long do_brk(unsigned long addr, unsigned long len)
2578{
2579 struct mm_struct * mm = current->mm;
2580 struct vm_area_struct * vma, * prev;
2581 unsigned long flags;
2582 struct rb_node ** rb_link, * rb_parent;
2583 pgoff_t pgoff = addr >> PAGE_SHIFT;
2584 int error;
2585
2586 len = PAGE_ALIGN(len);
2587 if (!len)
2588 return addr;
2589
2590 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2591
2592 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2593 if (error & ~PAGE_MASK)
2594 return error;
2595
2596
2597
2598
2599 if (mm->def_flags & VM_LOCKED) {
2600 unsigned long locked, lock_limit;
2601 locked = len >> PAGE_SHIFT;
2602 locked += mm->locked_vm;
2603 lock_limit = rlimit(RLIMIT_MEMLOCK);
2604 lock_limit >>= PAGE_SHIFT;
2605 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2606 return -EAGAIN;
2607 }
2608
2609
2610
2611
2612
2613 verify_mm_writelocked(mm);
2614
2615
2616
2617
2618 munmap_back:
2619 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
2620 if (do_munmap(mm, addr, len))
2621 return -ENOMEM;
2622 goto munmap_back;
2623 }
2624
2625
2626 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2627 return -ENOMEM;
2628
2629 if (mm->map_count > sysctl_max_map_count)
2630 return -ENOMEM;
2631
2632 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2633 return -ENOMEM;
2634
2635
2636 vma = vma_merge(mm, prev, addr, addr + len, flags,
2637 NULL, NULL, pgoff, NULL);
2638 if (vma)
2639 goto out;
2640
2641
2642
2643
2644 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2645 if (!vma) {
2646 vm_unacct_memory(len >> PAGE_SHIFT);
2647 return -ENOMEM;
2648 }
2649
2650 INIT_LIST_HEAD(&vma->anon_vma_chain);
2651 vma->vm_mm = mm;
2652 vma->vm_start = addr;
2653 vma->vm_end = addr + len;
2654 vma->vm_pgoff = pgoff;
2655 vma->vm_flags = flags;
2656 vma->vm_page_prot = vm_get_page_prot(flags);
2657 vma_link(mm, vma, prev, rb_link, rb_parent);
2658out:
2659 perf_event_mmap(vma);
2660 mm->total_vm += len >> PAGE_SHIFT;
2661 if (flags & VM_LOCKED)
2662 mm->locked_vm += (len >> PAGE_SHIFT);
2663 vma->vm_flags |= VM_SOFTDIRTY;
2664 return addr;
2665}
2666
2667unsigned long vm_brk(unsigned long addr, unsigned long len)
2668{
2669 struct mm_struct *mm = current->mm;
2670 unsigned long ret;
2671 bool populate;
2672
2673 down_write(&mm->mmap_sem);
2674 ret = do_brk(addr, len);
2675 populate = ((mm->def_flags & VM_LOCKED) != 0);
2676 up_write(&mm->mmap_sem);
2677 if (populate)
2678 mm_populate(addr, len);
2679 return ret;
2680}
2681EXPORT_SYMBOL(vm_brk);
2682
2683
2684void exit_mmap(struct mm_struct *mm)
2685{
2686 struct mmu_gather tlb;
2687 struct vm_area_struct *vma;
2688 unsigned long nr_accounted = 0;
2689
2690
2691 mmu_notifier_release(mm);
2692
2693 if (mm->locked_vm) {
2694 vma = mm->mmap;
2695 while (vma) {
2696 if (vma->vm_flags & VM_LOCKED)
2697 munlock_vma_pages_all(vma);
2698 vma = vma->vm_next;
2699 }
2700 }
2701
2702 arch_exit_mmap(mm);
2703
2704 vma = mm->mmap;
2705 if (!vma)
2706 return;
2707
2708 lru_add_drain();
2709 flush_cache_mm(mm);
2710 tlb_gather_mmu(&tlb, mm, 0, -1);
2711
2712
2713 unmap_vmas(&tlb, vma, 0, -1);
2714
2715 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
2716 tlb_finish_mmu(&tlb, 0, -1);
2717
2718
2719
2720
2721
2722 while (vma) {
2723 if (vma->vm_flags & VM_ACCOUNT)
2724 nr_accounted += vma_pages(vma);
2725 vma = remove_vma(vma);
2726 }
2727 vm_unacct_memory(nr_accounted);
2728
2729 WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2730}
2731
2732
2733
2734
2735
2736int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2737{
2738 struct vm_area_struct *prev;
2739 struct rb_node **rb_link, *rb_parent;
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753 if (!vma->vm_file) {
2754 BUG_ON(vma->anon_vma);
2755 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2756 }
2757 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2758 &prev, &rb_link, &rb_parent))
2759 return -ENOMEM;
2760 if ((vma->vm_flags & VM_ACCOUNT) &&
2761 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2762 return -ENOMEM;
2763
2764 vma_link(mm, vma, prev, rb_link, rb_parent);
2765 return 0;
2766}
2767
2768
2769
2770
2771
2772struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2773 unsigned long addr, unsigned long len, pgoff_t pgoff,
2774 bool *need_rmap_locks)
2775{
2776 struct vm_area_struct *vma = *vmap;
2777 unsigned long vma_start = vma->vm_start;
2778 struct mm_struct *mm = vma->vm_mm;
2779 struct vm_area_struct *new_vma, *prev;
2780 struct rb_node **rb_link, *rb_parent;
2781 bool faulted_in_anon_vma = true;
2782
2783
2784
2785
2786
2787 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2788 pgoff = addr >> PAGE_SHIFT;
2789 faulted_in_anon_vma = false;
2790 }
2791
2792 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2793 return NULL;
2794 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2795 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2796 if (new_vma) {
2797
2798
2799
2800 if (unlikely(vma_start >= new_vma->vm_start &&
2801 vma_start < new_vma->vm_end)) {
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814 VM_BUG_ON(faulted_in_anon_vma);
2815 *vmap = vma = new_vma;
2816 }
2817 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2818 } else {
2819 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2820 if (new_vma) {
2821 *new_vma = *vma;
2822 new_vma->vm_start = addr;
2823 new_vma->vm_end = addr + len;
2824 new_vma->vm_pgoff = pgoff;
2825 if (vma_dup_policy(vma, new_vma))
2826 goto out_free_vma;
2827 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2828 if (anon_vma_clone(new_vma, vma))
2829 goto out_free_mempol;
2830 if (new_vma->vm_file)
2831 get_file(new_vma->vm_file);
2832 if (new_vma->vm_ops && new_vma->vm_ops->open)
2833 new_vma->vm_ops->open(new_vma);
2834 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2835 *need_rmap_locks = false;
2836 }
2837 }
2838 return new_vma;
2839
2840 out_free_mempol:
2841 mpol_put(vma_policy(new_vma));
2842 out_free_vma:
2843 kmem_cache_free(vm_area_cachep, new_vma);
2844 return NULL;
2845}
2846
2847
2848
2849
2850
2851int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2852{
2853 unsigned long cur = mm->total_vm;
2854 unsigned long lim;
2855
2856 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2857
2858 if (cur + npages > lim)
2859 return 0;
2860 return 1;
2861}
2862
2863
2864static int special_mapping_fault(struct vm_area_struct *vma,
2865 struct vm_fault *vmf)
2866{
2867 pgoff_t pgoff;
2868 struct page **pages;
2869
2870
2871
2872
2873
2874
2875
2876 pgoff = vmf->pgoff - vma->vm_pgoff;
2877
2878 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2879 pgoff--;
2880
2881 if (*pages) {
2882 struct page *page = *pages;
2883 get_page(page);
2884 vmf->page = page;
2885 return 0;
2886 }
2887
2888 return VM_FAULT_SIGBUS;
2889}
2890
2891
2892
2893
2894static void special_mapping_close(struct vm_area_struct *vma)
2895{
2896}
2897
2898static const struct vm_operations_struct special_mapping_vmops = {
2899 .close = special_mapping_close,
2900 .fault = special_mapping_fault,
2901};
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912int install_special_mapping(struct mm_struct *mm,
2913 unsigned long addr, unsigned long len,
2914 unsigned long vm_flags, struct page **pages)
2915{
2916 int ret;
2917 struct vm_area_struct *vma;
2918
2919 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2920 if (unlikely(vma == NULL))
2921 return -ENOMEM;
2922
2923 INIT_LIST_HEAD(&vma->anon_vma_chain);
2924 vma->vm_mm = mm;
2925 vma->vm_start = addr;
2926 vma->vm_end = addr + len;
2927
2928 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
2929 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2930
2931 vma->vm_ops = &special_mapping_vmops;
2932 vma->vm_private_data = pages;
2933
2934 ret = insert_vm_struct(mm, vma);
2935 if (ret)
2936 goto out;
2937
2938 mm->total_vm += len >> PAGE_SHIFT;
2939
2940 perf_event_mmap(vma);
2941
2942 return 0;
2943
2944out:
2945 kmem_cache_free(vm_area_cachep, vma);
2946 return ret;
2947}
2948
2949static DEFINE_MUTEX(mm_all_locks_mutex);
2950
2951static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2952{
2953 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2954
2955
2956
2957
2958 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968 if (__test_and_set_bit(0, (unsigned long *)
2969 &anon_vma->root->rb_root.rb_node))
2970 BUG();
2971 }
2972}
2973
2974static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2975{
2976 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2987 BUG();
2988 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
2989 }
2990}
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024int mm_take_all_locks(struct mm_struct *mm)
3025{
3026 struct vm_area_struct *vma;
3027 struct anon_vma_chain *avc;
3028
3029 BUG_ON(down_read_trylock(&mm->mmap_sem));
3030
3031 mutex_lock(&mm_all_locks_mutex);
3032
3033 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3034 if (signal_pending(current))
3035 goto out_unlock;
3036 if (vma->vm_file && vma->vm_file->f_mapping)
3037 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3038 }
3039
3040 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3041 if (signal_pending(current))
3042 goto out_unlock;
3043 if (vma->anon_vma)
3044 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3045 vm_lock_anon_vma(mm, avc->anon_vma);
3046 }
3047
3048 return 0;
3049
3050out_unlock:
3051 mm_drop_all_locks(mm);
3052 return -EINTR;
3053}
3054
3055static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3056{
3057 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070 if (!__test_and_clear_bit(0, (unsigned long *)
3071 &anon_vma->root->rb_root.rb_node))
3072 BUG();
3073 anon_vma_unlock_write(anon_vma);
3074 }
3075}
3076
3077static void vm_unlock_mapping(struct address_space *mapping)
3078{
3079 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3080
3081
3082
3083
3084 mutex_unlock(&mapping->i_mmap_mutex);
3085 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3086 &mapping->flags))
3087 BUG();
3088 }
3089}
3090
3091
3092
3093
3094
3095void mm_drop_all_locks(struct mm_struct *mm)
3096{
3097 struct vm_area_struct *vma;
3098 struct anon_vma_chain *avc;
3099
3100 BUG_ON(down_read_trylock(&mm->mmap_sem));
3101 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3102
3103 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3104 if (vma->anon_vma)
3105 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3106 vm_unlock_anon_vma(avc->anon_vma);
3107 if (vma->vm_file && vma->vm_file->f_mapping)
3108 vm_unlock_mapping(vma->vm_file->f_mapping);
3109 }
3110
3111 mutex_unlock(&mm_all_locks_mutex);
3112}
3113
3114
3115
3116
3117void __init mmap_init(void)
3118{
3119 int ret;
3120
3121 ret = percpu_counter_init(&vm_committed_as, 0);
3122 VM_BUG_ON(ret);
3123}
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135static int init_user_reserve(void)
3136{
3137 unsigned long free_kbytes;
3138
3139 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3140
3141 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3142 return 0;
3143}
3144module_init(init_user_reserve)
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156static int init_admin_reserve(void)
3157{
3158 unsigned long free_kbytes;
3159
3160 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3161
3162 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3163 return 0;
3164}
3165module_init(init_admin_reserve)
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185static int reserve_mem_notifier(struct notifier_block *nb,
3186 unsigned long action, void *data)
3187{
3188 unsigned long tmp, free_kbytes;
3189
3190 switch (action) {
3191 case MEM_ONLINE:
3192
3193 tmp = sysctl_user_reserve_kbytes;
3194 if (0 < tmp && tmp < (1UL << 17))
3195 init_user_reserve();
3196
3197
3198 tmp = sysctl_admin_reserve_kbytes;
3199 if (0 < tmp && tmp < (1UL << 13))
3200 init_admin_reserve();
3201
3202 break;
3203 case MEM_OFFLINE:
3204 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3205
3206 if (sysctl_user_reserve_kbytes > free_kbytes) {
3207 init_user_reserve();
3208 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3209 sysctl_user_reserve_kbytes);
3210 }
3211
3212 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3213 init_admin_reserve();
3214 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3215 sysctl_admin_reserve_kbytes);
3216 }
3217 break;
3218 default:
3219 break;
3220 }
3221 return NOTIFY_OK;
3222}
3223
3224static struct notifier_block reserve_mem_nb = {
3225 .notifier_call = reserve_mem_notifier,
3226};
3227
3228static int __meminit init_reserve_notifier(void)
3229{
3230 if (register_hotmemory_notifier(&reserve_mem_nb))
3231 printk("Failed registering memory add/remove notifier for admin reserve");
3232
3233 return 0;
3234}
3235module_init(init_reserve_notifier)
3236