1
2
3
4
5
6
7
8
9#include <linux/kernel.h>
10#include <linux/slab.h>
11#include <linux/backing-dev.h>
12#include <linux/mm.h>
13#include <linux/shm.h>
14#include <linux/mman.h>
15#include <linux/pagemap.h>
16#include <linux/swap.h>
17#include <linux/syscalls.h>
18#include <linux/capability.h>
19#include <linux/init.h>
20#include <linux/file.h>
21#include <linux/fs.h>
22#include <linux/personality.h>
23#include <linux/security.h>
24#include <linux/hugetlb.h>
25#include <linux/profile.h>
26#include <linux/export.h>
27#include <linux/mount.h>
28#include <linux/mempolicy.h>
29#include <linux/rmap.h>
30#include <linux/mmu_notifier.h>
31#include <linux/perf_event.h>
32#include <linux/audit.h>
33#include <linux/khugepaged.h>
34#include <linux/uprobes.h>
35#include <linux/rbtree_augmented.h>
36#include <linux/sched/sysctl.h>
37#include <linux/notifier.h>
38#include <linux/memory.h>
39
40#include <asm/uaccess.h>
41#include <asm/cacheflush.h>
42#include <asm/tlb.h>
43#include <asm/mmu_context.h>
44
45#include "internal.h"
46
47#ifndef arch_mmap_check
48#define arch_mmap_check(addr, len, flags) (0)
49#endif
50
51#ifndef arch_rebalance_pgtables
52#define arch_rebalance_pgtables(addr, len) (addr)
53#endif
54
55static void unmap_region(struct mm_struct *mm,
56 struct vm_area_struct *vma, struct vm_area_struct *prev,
57 unsigned long start, unsigned long end);
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74pgprot_t protection_map[16] = {
75 __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
76 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
77};
78
79pgprot_t vm_get_page_prot(unsigned long vm_flags)
80{
81 return __pgprot(pgprot_val(protection_map[vm_flags &
82 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
83 pgprot_val(arch_vm_get_page_prot(vm_flags)));
84}
85EXPORT_SYMBOL(vm_get_page_prot);
86
87int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
88int sysctl_overcommit_ratio __read_mostly = 50;
89int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
90unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17;
91unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13;
92
93
94
95
96struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
97
98
99
100
101
102
103
104
105
106unsigned long vm_memory_committed(void)
107{
108 return percpu_counter_read_positive(&vm_committed_as);
109}
110EXPORT_SYMBOL_GPL(vm_memory_committed);
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
129{
130 unsigned long free, allowed, reserve;
131
132 vm_acct_memory(pages);
133
134
135
136
137 if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
138 return 0;
139
140 if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
141 free = global_page_state(NR_FREE_PAGES);
142 free += global_page_state(NR_FILE_PAGES);
143
144
145
146
147
148
149
150 free -= global_page_state(NR_SHMEM);
151
152 free += get_nr_swap_pages();
153
154
155
156
157
158
159
160 free += global_page_state(NR_SLAB_RECLAIMABLE);
161
162
163
164
165 if (free <= totalreserve_pages)
166 goto error;
167 else
168 free -= totalreserve_pages;
169
170
171
172
173 if (!cap_sys_admin)
174 free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
175
176 if (free > pages)
177 return 0;
178
179 goto error;
180 }
181
182 allowed = (totalram_pages - hugetlb_total_pages())
183 * sysctl_overcommit_ratio / 100;
184
185
186
187 if (!cap_sys_admin)
188 allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10);
189 allowed += total_swap_pages;
190
191
192
193
194 if (mm) {
195 reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10);
196 allowed -= min(mm->total_vm / 32, reserve);
197 }
198
199 if (percpu_counter_read_positive(&vm_committed_as) < allowed)
200 return 0;
201error:
202 vm_unacct_memory(pages);
203
204 return -ENOMEM;
205}
206
207
208
209
210static void __remove_shared_vm_struct(struct vm_area_struct *vma,
211 struct file *file, struct address_space *mapping)
212{
213 if (vma->vm_flags & VM_DENYWRITE)
214 atomic_inc(&file_inode(file)->i_writecount);
215 if (vma->vm_flags & VM_SHARED)
216 mapping->i_mmap_writable--;
217
218 flush_dcache_mmap_lock(mapping);
219 if (unlikely(vma->vm_flags & VM_NONLINEAR))
220 list_del_init(&vma->shared.nonlinear);
221 else
222 vma_interval_tree_remove(vma, &mapping->i_mmap);
223 flush_dcache_mmap_unlock(mapping);
224}
225
226
227
228
229
230void unlink_file_vma(struct vm_area_struct *vma)
231{
232 struct file *file = vma->vm_file;
233
234 if (file) {
235 struct address_space *mapping = file->f_mapping;
236 mutex_lock(&mapping->i_mmap_mutex);
237 __remove_shared_vm_struct(vma, file, mapping);
238 mutex_unlock(&mapping->i_mmap_mutex);
239 }
240}
241
242
243
244
245static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
246{
247 struct vm_area_struct *next = vma->vm_next;
248
249 might_sleep();
250 if (vma->vm_ops && vma->vm_ops->close)
251 vma->vm_ops->close(vma);
252 if (vma->vm_file)
253 fput(vma->vm_file);
254 mpol_put(vma_policy(vma));
255 kmem_cache_free(vm_area_cachep, vma);
256 return next;
257}
258
259static unsigned long do_brk(unsigned long addr, unsigned long len);
260
261SYSCALL_DEFINE1(brk, unsigned long, brk)
262{
263 unsigned long rlim, retval;
264 unsigned long newbrk, oldbrk;
265 struct mm_struct *mm = current->mm;
266 unsigned long min_brk;
267 bool populate;
268
269 down_write(&mm->mmap_sem);
270
271#ifdef CONFIG_COMPAT_BRK
272
273
274
275
276
277 if (current->brk_randomized)
278 min_brk = mm->start_brk;
279 else
280 min_brk = mm->end_data;
281#else
282 min_brk = mm->start_brk;
283#endif
284 if (brk < min_brk)
285 goto out;
286
287
288
289
290
291
292
293 rlim = rlimit(RLIMIT_DATA);
294 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
295 (mm->end_data - mm->start_data) > rlim)
296 goto out;
297
298 newbrk = PAGE_ALIGN(brk);
299 oldbrk = PAGE_ALIGN(mm->brk);
300 if (oldbrk == newbrk)
301 goto set_brk;
302
303
304 if (brk <= mm->brk) {
305 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
306 goto set_brk;
307 goto out;
308 }
309
310
311 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
312 goto out;
313
314
315 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
316 goto out;
317
318set_brk:
319 mm->brk = brk;
320 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
321 up_write(&mm->mmap_sem);
322 if (populate)
323 mm_populate(oldbrk, newbrk - oldbrk);
324 return brk;
325
326out:
327 retval = mm->brk;
328 up_write(&mm->mmap_sem);
329 return retval;
330}
331
332static long vma_compute_subtree_gap(struct vm_area_struct *vma)
333{
334 unsigned long max, subtree_gap;
335 max = vma->vm_start;
336 if (vma->vm_prev)
337 max -= vma->vm_prev->vm_end;
338 if (vma->vm_rb.rb_left) {
339 subtree_gap = rb_entry(vma->vm_rb.rb_left,
340 struct vm_area_struct, vm_rb)->rb_subtree_gap;
341 if (subtree_gap > max)
342 max = subtree_gap;
343 }
344 if (vma->vm_rb.rb_right) {
345 subtree_gap = rb_entry(vma->vm_rb.rb_right,
346 struct vm_area_struct, vm_rb)->rb_subtree_gap;
347 if (subtree_gap > max)
348 max = subtree_gap;
349 }
350 return max;
351}
352
353#ifdef CONFIG_DEBUG_VM_RB
354static int browse_rb(struct rb_root *root)
355{
356 int i = 0, j, bug = 0;
357 struct rb_node *nd, *pn = NULL;
358 unsigned long prev = 0, pend = 0;
359
360 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
361 struct vm_area_struct *vma;
362 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
363 if (vma->vm_start < prev) {
364 printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
365 bug = 1;
366 }
367 if (vma->vm_start < pend) {
368 printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
369 bug = 1;
370 }
371 if (vma->vm_start > vma->vm_end) {
372 printk("vm_end %lx < vm_start %lx\n",
373 vma->vm_end, vma->vm_start);
374 bug = 1;
375 }
376 if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
377 printk("free gap %lx, correct %lx\n",
378 vma->rb_subtree_gap,
379 vma_compute_subtree_gap(vma));
380 bug = 1;
381 }
382 i++;
383 pn = nd;
384 prev = vma->vm_start;
385 pend = vma->vm_end;
386 }
387 j = 0;
388 for (nd = pn; nd; nd = rb_prev(nd))
389 j++;
390 if (i != j) {
391 printk("backwards %d, forwards %d\n", j, i);
392 bug = 1;
393 }
394 return bug ? -1 : i;
395}
396
397static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
398{
399 struct rb_node *nd;
400
401 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
402 struct vm_area_struct *vma;
403 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
404 BUG_ON(vma != ignore &&
405 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
406 }
407}
408
409void validate_mm(struct mm_struct *mm)
410{
411 int bug = 0;
412 int i = 0;
413 unsigned long highest_address = 0;
414 struct vm_area_struct *vma = mm->mmap;
415 while (vma) {
416 struct anon_vma_chain *avc;
417 vma_lock_anon_vma(vma);
418 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
419 anon_vma_interval_tree_verify(avc);
420 vma_unlock_anon_vma(vma);
421 highest_address = vma->vm_end;
422 vma = vma->vm_next;
423 i++;
424 }
425 if (i != mm->map_count) {
426 printk("map_count %d vm_next %d\n", mm->map_count, i);
427 bug = 1;
428 }
429 if (highest_address != mm->highest_vm_end) {
430 printk("mm->highest_vm_end %lx, found %lx\n",
431 mm->highest_vm_end, highest_address);
432 bug = 1;
433 }
434 i = browse_rb(&mm->mm_rb);
435 if (i != mm->map_count) {
436 printk("map_count %d rb %d\n", mm->map_count, i);
437 bug = 1;
438 }
439 BUG_ON(bug);
440}
441#else
442#define validate_mm_rb(root, ignore) do { } while (0)
443#define validate_mm(mm) do { } while (0)
444#endif
445
446RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
447 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
448
449
450
451
452
453
454static void vma_gap_update(struct vm_area_struct *vma)
455{
456
457
458
459
460 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
461}
462
463static inline void vma_rb_insert(struct vm_area_struct *vma,
464 struct rb_root *root)
465{
466
467 validate_mm_rb(root, NULL);
468
469 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
470}
471
472static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
473{
474
475
476
477
478 validate_mm_rb(root, vma);
479
480
481
482
483
484
485 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
486}
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502static inline void
503anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
504{
505 struct anon_vma_chain *avc;
506
507 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
508 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
509}
510
511static inline void
512anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
513{
514 struct anon_vma_chain *avc;
515
516 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
517 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
518}
519
520static int find_vma_links(struct mm_struct *mm, unsigned long addr,
521 unsigned long end, struct vm_area_struct **pprev,
522 struct rb_node ***rb_link, struct rb_node **rb_parent)
523{
524 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
525
526 __rb_link = &mm->mm_rb.rb_node;
527 rb_prev = __rb_parent = NULL;
528
529 while (*__rb_link) {
530 struct vm_area_struct *vma_tmp;
531
532 __rb_parent = *__rb_link;
533 vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
534
535 if (vma_tmp->vm_end > addr) {
536
537 if (vma_tmp->vm_start < end)
538 return -ENOMEM;
539 __rb_link = &__rb_parent->rb_left;
540 } else {
541 rb_prev = __rb_parent;
542 __rb_link = &__rb_parent->rb_right;
543 }
544 }
545
546 *pprev = NULL;
547 if (rb_prev)
548 *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
549 *rb_link = __rb_link;
550 *rb_parent = __rb_parent;
551 return 0;
552}
553
554static unsigned long count_vma_pages_range(struct mm_struct *mm,
555 unsigned long addr, unsigned long end)
556{
557 unsigned long nr_pages = 0;
558 struct vm_area_struct *vma;
559
560
561 vma = find_vma_intersection(mm, addr, end);
562 if (!vma)
563 return 0;
564
565 nr_pages = (min(end, vma->vm_end) -
566 max(addr, vma->vm_start)) >> PAGE_SHIFT;
567
568
569 for (vma = vma->vm_next; vma; vma = vma->vm_next) {
570 unsigned long overlap_len;
571
572 if (vma->vm_start > end)
573 break;
574
575 overlap_len = min(end, vma->vm_end) - vma->vm_start;
576 nr_pages += overlap_len >> PAGE_SHIFT;
577 }
578
579 return nr_pages;
580}
581
582void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
583 struct rb_node **rb_link, struct rb_node *rb_parent)
584{
585
586 if (vma->vm_next)
587 vma_gap_update(vma->vm_next);
588 else
589 mm->highest_vm_end = vma->vm_end;
590
591
592
593
594
595
596
597
598
599
600 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
601 vma->rb_subtree_gap = 0;
602 vma_gap_update(vma);
603 vma_rb_insert(vma, &mm->mm_rb);
604}
605
606static void __vma_link_file(struct vm_area_struct *vma)
607{
608 struct file *file;
609
610 file = vma->vm_file;
611 if (file) {
612 struct address_space *mapping = file->f_mapping;
613
614 if (vma->vm_flags & VM_DENYWRITE)
615 atomic_dec(&file_inode(file)->i_writecount);
616 if (vma->vm_flags & VM_SHARED)
617 mapping->i_mmap_writable++;
618
619 flush_dcache_mmap_lock(mapping);
620 if (unlikely(vma->vm_flags & VM_NONLINEAR))
621 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
622 else
623 vma_interval_tree_insert(vma, &mapping->i_mmap);
624 flush_dcache_mmap_unlock(mapping);
625 }
626}
627
628static void
629__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
630 struct vm_area_struct *prev, struct rb_node **rb_link,
631 struct rb_node *rb_parent)
632{
633 __vma_link_list(mm, vma, prev, rb_parent);
634 __vma_link_rb(mm, vma, rb_link, rb_parent);
635}
636
637static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
638 struct vm_area_struct *prev, struct rb_node **rb_link,
639 struct rb_node *rb_parent)
640{
641 struct address_space *mapping = NULL;
642
643 if (vma->vm_file)
644 mapping = vma->vm_file->f_mapping;
645
646 if (mapping)
647 mutex_lock(&mapping->i_mmap_mutex);
648
649 __vma_link(mm, vma, prev, rb_link, rb_parent);
650 __vma_link_file(vma);
651
652 if (mapping)
653 mutex_unlock(&mapping->i_mmap_mutex);
654
655 mm->map_count++;
656 validate_mm(mm);
657}
658
659
660
661
662
663static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
664{
665 struct vm_area_struct *prev;
666 struct rb_node **rb_link, *rb_parent;
667
668 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
669 &prev, &rb_link, &rb_parent))
670 BUG();
671 __vma_link(mm, vma, prev, rb_link, rb_parent);
672 mm->map_count++;
673}
674
675static inline void
676__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
677 struct vm_area_struct *prev)
678{
679 struct vm_area_struct *next;
680
681 vma_rb_erase(vma, &mm->mm_rb);
682 prev->vm_next = next = vma->vm_next;
683 if (next)
684 next->vm_prev = prev;
685 if (mm->mmap_cache == vma)
686 mm->mmap_cache = prev;
687}
688
689
690
691
692
693
694
695
696int vma_adjust(struct vm_area_struct *vma, unsigned long start,
697 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
698{
699 struct mm_struct *mm = vma->vm_mm;
700 struct vm_area_struct *next = vma->vm_next;
701 struct vm_area_struct *importer = NULL;
702 struct address_space *mapping = NULL;
703 struct rb_root *root = NULL;
704 struct anon_vma *anon_vma = NULL;
705 struct file *file = vma->vm_file;
706 bool start_changed = false, end_changed = false;
707 long adjust_next = 0;
708 int remove_next = 0;
709
710 if (next && !insert) {
711 struct vm_area_struct *exporter = NULL;
712
713 if (end >= next->vm_end) {
714
715
716
717
718again: remove_next = 1 + (end > next->vm_end);
719 end = next->vm_end;
720 exporter = next;
721 importer = vma;
722 } else if (end > next->vm_start) {
723
724
725
726
727 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
728 exporter = next;
729 importer = vma;
730 } else if (end < vma->vm_end) {
731
732
733
734
735
736 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
737 exporter = vma;
738 importer = next;
739 }
740
741
742
743
744
745
746 if (exporter && exporter->anon_vma && !importer->anon_vma) {
747 if (anon_vma_clone(importer, exporter))
748 return -ENOMEM;
749 importer->anon_vma = exporter->anon_vma;
750 }
751 }
752
753 if (file) {
754 mapping = file->f_mapping;
755 if (!(vma->vm_flags & VM_NONLINEAR)) {
756 root = &mapping->i_mmap;
757 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
758
759 if (adjust_next)
760 uprobe_munmap(next, next->vm_start,
761 next->vm_end);
762 }
763
764 mutex_lock(&mapping->i_mmap_mutex);
765 if (insert) {
766
767
768
769
770
771
772 __vma_link_file(insert);
773 }
774 }
775
776 vma_adjust_trans_huge(vma, start, end, adjust_next);
777
778 anon_vma = vma->anon_vma;
779 if (!anon_vma && adjust_next)
780 anon_vma = next->anon_vma;
781 if (anon_vma) {
782 VM_BUG_ON(adjust_next && next->anon_vma &&
783 anon_vma != next->anon_vma);
784 anon_vma_lock_write(anon_vma);
785 anon_vma_interval_tree_pre_update_vma(vma);
786 if (adjust_next)
787 anon_vma_interval_tree_pre_update_vma(next);
788 }
789
790 if (root) {
791 flush_dcache_mmap_lock(mapping);
792 vma_interval_tree_remove(vma, root);
793 if (adjust_next)
794 vma_interval_tree_remove(next, root);
795 }
796
797 if (start != vma->vm_start) {
798 vma->vm_start = start;
799 start_changed = true;
800 }
801 if (end != vma->vm_end) {
802 vma->vm_end = end;
803 end_changed = true;
804 }
805 vma->vm_pgoff = pgoff;
806 if (adjust_next) {
807 next->vm_start += adjust_next << PAGE_SHIFT;
808 next->vm_pgoff += adjust_next;
809 }
810
811 if (root) {
812 if (adjust_next)
813 vma_interval_tree_insert(next, root);
814 vma_interval_tree_insert(vma, root);
815 flush_dcache_mmap_unlock(mapping);
816 }
817
818 if (remove_next) {
819
820
821
822
823 __vma_unlink(mm, next, vma);
824 if (file)
825 __remove_shared_vm_struct(next, file, mapping);
826 } else if (insert) {
827
828
829
830
831
832 __insert_vm_struct(mm, insert);
833 } else {
834 if (start_changed)
835 vma_gap_update(vma);
836 if (end_changed) {
837 if (!next)
838 mm->highest_vm_end = end;
839 else if (!adjust_next)
840 vma_gap_update(next);
841 }
842 }
843
844 if (anon_vma) {
845 anon_vma_interval_tree_post_update_vma(vma);
846 if (adjust_next)
847 anon_vma_interval_tree_post_update_vma(next);
848 anon_vma_unlock_write(anon_vma);
849 }
850 if (mapping)
851 mutex_unlock(&mapping->i_mmap_mutex);
852
853 if (root) {
854 uprobe_mmap(vma);
855
856 if (adjust_next)
857 uprobe_mmap(next);
858 }
859
860 if (remove_next) {
861 if (file) {
862 uprobe_munmap(next, next->vm_start, next->vm_end);
863 fput(file);
864 }
865 if (next->anon_vma)
866 anon_vma_merge(vma, next);
867 mm->map_count--;
868 mpol_put(vma_policy(next));
869 kmem_cache_free(vm_area_cachep, next);
870
871
872
873
874
875 next = vma->vm_next;
876 if (remove_next == 2)
877 goto again;
878 else if (next)
879 vma_gap_update(next);
880 else
881 mm->highest_vm_end = end;
882 }
883 if (insert && file)
884 uprobe_mmap(insert);
885
886 validate_mm(mm);
887
888 return 0;
889}
890
891
892
893
894
895static inline int is_mergeable_vma(struct vm_area_struct *vma,
896 struct file *file, unsigned long vm_flags)
897{
898 if (vma->vm_flags ^ vm_flags)
899 return 0;
900 if (vma->vm_file != file)
901 return 0;
902 if (vma->vm_ops && vma->vm_ops->close)
903 return 0;
904 return 1;
905}
906
907static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
908 struct anon_vma *anon_vma2,
909 struct vm_area_struct *vma)
910{
911
912
913
914
915 if ((!anon_vma1 || !anon_vma2) && (!vma ||
916 list_is_singular(&vma->anon_vma_chain)))
917 return 1;
918 return anon_vma1 == anon_vma2;
919}
920
921
922
923
924
925
926
927
928
929
930
931
932static int
933can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
934 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
935{
936 if (is_mergeable_vma(vma, file, vm_flags) &&
937 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
938 if (vma->vm_pgoff == vm_pgoff)
939 return 1;
940 }
941 return 0;
942}
943
944
945
946
947
948
949
950
951static int
952can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
953 struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
954{
955 if (is_mergeable_vma(vma, file, vm_flags) &&
956 is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
957 pgoff_t vm_pglen;
958 vm_pglen = vma_pages(vma);
959 if (vma->vm_pgoff + vm_pglen == vm_pgoff)
960 return 1;
961 }
962 return 0;
963}
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994struct vm_area_struct *vma_merge(struct mm_struct *mm,
995 struct vm_area_struct *prev, unsigned long addr,
996 unsigned long end, unsigned long vm_flags,
997 struct anon_vma *anon_vma, struct file *file,
998 pgoff_t pgoff, struct mempolicy *policy)
999{
1000 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
1001 struct vm_area_struct *area, *next;
1002 int err;
1003
1004
1005
1006
1007
1008 if (vm_flags & VM_SPECIAL)
1009 return NULL;
1010
1011 if (prev)
1012 next = prev->vm_next;
1013 else
1014 next = mm->mmap;
1015 area = next;
1016 if (next && next->vm_end == end)
1017 next = next->vm_next;
1018
1019
1020
1021
1022 if (prev && prev->vm_end == addr &&
1023 mpol_equal(vma_policy(prev), policy) &&
1024 can_vma_merge_after(prev, vm_flags,
1025 anon_vma, file, pgoff)) {
1026
1027
1028
1029 if (next && end == next->vm_start &&
1030 mpol_equal(policy, vma_policy(next)) &&
1031 can_vma_merge_before(next, vm_flags,
1032 anon_vma, file, pgoff+pglen) &&
1033 is_mergeable_anon_vma(prev->anon_vma,
1034 next->anon_vma, NULL)) {
1035
1036 err = vma_adjust(prev, prev->vm_start,
1037 next->vm_end, prev->vm_pgoff, NULL);
1038 } else
1039 err = vma_adjust(prev, prev->vm_start,
1040 end, prev->vm_pgoff, NULL);
1041 if (err)
1042 return NULL;
1043 khugepaged_enter_vma_merge(prev);
1044 return prev;
1045 }
1046
1047
1048
1049
1050 if (next && end == next->vm_start &&
1051 mpol_equal(policy, vma_policy(next)) &&
1052 can_vma_merge_before(next, vm_flags,
1053 anon_vma, file, pgoff+pglen)) {
1054 if (prev && addr < prev->vm_end)
1055 err = vma_adjust(prev, prev->vm_start,
1056 addr, prev->vm_pgoff, NULL);
1057 else
1058 err = vma_adjust(area, addr, next->vm_end,
1059 next->vm_pgoff - pglen, NULL);
1060 if (err)
1061 return NULL;
1062 khugepaged_enter_vma_merge(area);
1063 return area;
1064 }
1065
1066 return NULL;
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
1083{
1084 return a->vm_end == b->vm_start &&
1085 mpol_equal(vma_policy(a), vma_policy(b)) &&
1086 a->vm_file == b->vm_file &&
1087 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
1088 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
1089}
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
1114{
1115 if (anon_vma_compatible(a, b)) {
1116 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
1117
1118 if (anon_vma && list_is_singular(&old->anon_vma_chain))
1119 return anon_vma;
1120 }
1121 return NULL;
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
1133{
1134 struct anon_vma *anon_vma;
1135 struct vm_area_struct *near;
1136
1137 near = vma->vm_next;
1138 if (!near)
1139 goto try_prev;
1140
1141 anon_vma = reusable_anon_vma(near, vma, near);
1142 if (anon_vma)
1143 return anon_vma;
1144try_prev:
1145 near = vma->vm_prev;
1146 if (!near)
1147 goto none;
1148
1149 anon_vma = reusable_anon_vma(near, near, vma);
1150 if (anon_vma)
1151 return anon_vma;
1152none:
1153
1154
1155
1156
1157
1158
1159
1160
1161 return NULL;
1162}
1163
1164#ifdef CONFIG_PROC_FS
1165void vm_stat_account(struct mm_struct *mm, unsigned long flags,
1166 struct file *file, long pages)
1167{
1168 const unsigned long stack_flags
1169 = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
1170
1171 mm->total_vm += pages;
1172
1173 if (file) {
1174 mm->shared_vm += pages;
1175 if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
1176 mm->exec_vm += pages;
1177 } else if (flags & stack_flags)
1178 mm->stack_vm += pages;
1179}
1180#endif
1181
1182
1183
1184
1185
1186static inline unsigned long round_hint_to_min(unsigned long hint)
1187{
1188 hint &= PAGE_MASK;
1189 if (((void *)hint != NULL) &&
1190 (hint < mmap_min_addr))
1191 return PAGE_ALIGN(mmap_min_addr);
1192 return hint;
1193}
1194
1195
1196
1197
1198
1199unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1200 unsigned long len, unsigned long prot,
1201 unsigned long flags, unsigned long pgoff,
1202 unsigned long *populate)
1203{
1204 struct mm_struct * mm = current->mm;
1205 struct inode *inode;
1206 vm_flags_t vm_flags;
1207
1208 *populate = 0;
1209
1210
1211
1212
1213
1214
1215
1216 if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
1217 if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
1218 prot |= PROT_EXEC;
1219
1220 if (!len)
1221 return -EINVAL;
1222
1223 if (!(flags & MAP_FIXED))
1224 addr = round_hint_to_min(addr);
1225
1226
1227 len = PAGE_ALIGN(len);
1228 if (!len)
1229 return -ENOMEM;
1230
1231
1232 if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
1233 return -EOVERFLOW;
1234
1235
1236 if (mm->map_count > sysctl_max_map_count)
1237 return -ENOMEM;
1238
1239
1240
1241
1242 addr = get_unmapped_area(file, addr, len, pgoff, flags);
1243 if (addr & ~PAGE_MASK)
1244 return addr;
1245
1246
1247
1248
1249
1250 vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
1251 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
1252
1253 if (flags & MAP_LOCKED)
1254 if (!can_do_mlock())
1255 return -EPERM;
1256
1257
1258 if (vm_flags & VM_LOCKED) {
1259 unsigned long locked, lock_limit;
1260 locked = len >> PAGE_SHIFT;
1261 locked += mm->locked_vm;
1262 lock_limit = rlimit(RLIMIT_MEMLOCK);
1263 lock_limit >>= PAGE_SHIFT;
1264 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
1265 return -EAGAIN;
1266 }
1267
1268 inode = file ? file_inode(file) : NULL;
1269
1270 if (file) {
1271 switch (flags & MAP_TYPE) {
1272 case MAP_SHARED:
1273 if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
1274 return -EACCES;
1275
1276
1277
1278
1279
1280 if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
1281 return -EACCES;
1282
1283
1284
1285
1286 if (locks_verify_locked(inode))
1287 return -EAGAIN;
1288
1289 vm_flags |= VM_SHARED | VM_MAYSHARE;
1290 if (!(file->f_mode & FMODE_WRITE))
1291 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
1292
1293
1294 case MAP_PRIVATE:
1295 if (!(file->f_mode & FMODE_READ))
1296 return -EACCES;
1297 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
1298 if (vm_flags & VM_EXEC)
1299 return -EPERM;
1300 vm_flags &= ~VM_MAYEXEC;
1301 }
1302
1303 if (!file->f_op || !file->f_op->mmap)
1304 return -ENODEV;
1305 break;
1306
1307 default:
1308 return -EINVAL;
1309 }
1310 } else {
1311 switch (flags & MAP_TYPE) {
1312 case MAP_SHARED:
1313
1314
1315
1316 pgoff = 0;
1317 vm_flags |= VM_SHARED | VM_MAYSHARE;
1318 break;
1319 case MAP_PRIVATE:
1320
1321
1322
1323 pgoff = addr >> PAGE_SHIFT;
1324 break;
1325 default:
1326 return -EINVAL;
1327 }
1328 }
1329
1330
1331
1332
1333
1334 if (flags & MAP_NORESERVE) {
1335
1336 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1337 vm_flags |= VM_NORESERVE;
1338
1339
1340 if (file && is_file_hugepages(file))
1341 vm_flags |= VM_NORESERVE;
1342 }
1343
1344 addr = mmap_region(file, addr, len, vm_flags, pgoff);
1345 if (!IS_ERR_VALUE(addr) &&
1346 ((vm_flags & VM_LOCKED) ||
1347 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1348 *populate = len;
1349 return addr;
1350}
1351
1352SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1353 unsigned long, prot, unsigned long, flags,
1354 unsigned long, fd, unsigned long, pgoff)
1355{
1356 struct file *file = NULL;
1357 unsigned long retval = -EBADF;
1358
1359 if (!(flags & MAP_ANONYMOUS)) {
1360 audit_mmap_fd(fd, flags);
1361 file = fget(fd);
1362 if (!file)
1363 goto out;
1364 if (is_file_hugepages(file))
1365 len = ALIGN(len, huge_page_size(hstate_file(file)));
1366 retval = -EINVAL;
1367 if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
1368 goto out_fput;
1369 } else if (flags & MAP_HUGETLB) {
1370 struct user_struct *user = NULL;
1371 struct hstate *hs;
1372
1373 hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & SHM_HUGE_MASK);
1374 if (!hs)
1375 return -EINVAL;
1376
1377 len = ALIGN(len, huge_page_size(hs));
1378
1379
1380
1381
1382
1383
1384 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
1385 VM_NORESERVE,
1386 &user, HUGETLB_ANONHUGE_INODE,
1387 (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
1388 if (IS_ERR(file))
1389 return PTR_ERR(file);
1390 }
1391
1392 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1393
1394 retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1395out_fput:
1396 if (file)
1397 fput(file);
1398out:
1399 return retval;
1400}
1401
1402#ifdef __ARCH_WANT_SYS_OLD_MMAP
1403struct mmap_arg_struct {
1404 unsigned long addr;
1405 unsigned long len;
1406 unsigned long prot;
1407 unsigned long flags;
1408 unsigned long fd;
1409 unsigned long offset;
1410};
1411
1412SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1413{
1414 struct mmap_arg_struct a;
1415
1416 if (copy_from_user(&a, arg, sizeof(a)))
1417 return -EFAULT;
1418 if (a.offset & ~PAGE_MASK)
1419 return -EINVAL;
1420
1421 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1422 a.offset >> PAGE_SHIFT);
1423}
1424#endif
1425
1426
1427
1428
1429
1430
1431
1432int vma_wants_writenotify(struct vm_area_struct *vma)
1433{
1434 vm_flags_t vm_flags = vma->vm_flags;
1435
1436
1437 if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
1438 return 0;
1439
1440
1441 if (vma->vm_ops && vma->vm_ops->page_mkwrite)
1442 return 1;
1443
1444
1445 if (pgprot_val(vma->vm_page_prot) !=
1446 pgprot_val(vm_get_page_prot(vm_flags)))
1447 return 0;
1448
1449
1450 if (vm_flags & VM_PFNMAP)
1451 return 0;
1452
1453
1454 return vma->vm_file && vma->vm_file->f_mapping &&
1455 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1456}
1457
1458
1459
1460
1461
1462static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1463{
1464
1465
1466
1467
1468 if (file && is_file_hugepages(file))
1469 return 0;
1470
1471 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1472}
1473
1474unsigned long mmap_region(struct file *file, unsigned long addr,
1475 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
1476{
1477 struct mm_struct *mm = current->mm;
1478 struct vm_area_struct *vma, *prev;
1479 int correct_wcount = 0;
1480 int error;
1481 struct rb_node **rb_link, *rb_parent;
1482 unsigned long charged = 0;
1483 struct inode *inode = file ? file_inode(file) : NULL;
1484
1485
1486 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
1487 unsigned long nr_pages;
1488
1489
1490
1491
1492
1493 if (!(vm_flags & MAP_FIXED))
1494 return -ENOMEM;
1495
1496 nr_pages = count_vma_pages_range(mm, addr, addr + len);
1497
1498 if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
1499 return -ENOMEM;
1500 }
1501
1502
1503 error = -ENOMEM;
1504munmap_back:
1505 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
1506 if (do_munmap(mm, addr, len))
1507 return -ENOMEM;
1508 goto munmap_back;
1509 }
1510
1511
1512
1513
1514 if (accountable_mapping(file, vm_flags)) {
1515 charged = len >> PAGE_SHIFT;
1516 if (security_vm_enough_memory_mm(mm, charged))
1517 return -ENOMEM;
1518 vm_flags |= VM_ACCOUNT;
1519 }
1520
1521
1522
1523
1524 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1525 if (vma)
1526 goto out;
1527
1528
1529
1530
1531
1532
1533 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
1534 if (!vma) {
1535 error = -ENOMEM;
1536 goto unacct_error;
1537 }
1538
1539 vma->vm_mm = mm;
1540 vma->vm_start = addr;
1541 vma->vm_end = addr + len;
1542 vma->vm_flags = vm_flags;
1543 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1544 vma->vm_pgoff = pgoff;
1545 INIT_LIST_HEAD(&vma->anon_vma_chain);
1546
1547 error = -EINVAL;
1548
1549 if (file) {
1550 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
1551 goto free_vma;
1552 if (vm_flags & VM_DENYWRITE) {
1553 error = deny_write_access(file);
1554 if (error)
1555 goto free_vma;
1556 correct_wcount = 1;
1557 }
1558 vma->vm_file = get_file(file);
1559 error = file->f_op->mmap(file, vma);
1560 if (error)
1561 goto unmap_and_free_vma;
1562
1563
1564
1565
1566
1567
1568
1569
1570 WARN_ON_ONCE(addr != vma->vm_start);
1571
1572 addr = vma->vm_start;
1573 pgoff = vma->vm_pgoff;
1574 vm_flags = vma->vm_flags;
1575 } else if (vm_flags & VM_SHARED) {
1576 if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
1577 goto free_vma;
1578 error = shmem_zero_setup(vma);
1579 if (error)
1580 goto free_vma;
1581 }
1582
1583 if (vma_wants_writenotify(vma)) {
1584 pgprot_t pprot = vma->vm_page_prot;
1585
1586
1587
1588
1589
1590
1591
1592
1593 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1594 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1595 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1596 }
1597
1598 vma_link(mm, vma, prev, rb_link, rb_parent);
1599 file = vma->vm_file;
1600
1601
1602 if (correct_wcount)
1603 atomic_inc(&inode->i_writecount);
1604out:
1605 perf_event_mmap(vma);
1606
1607 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1608 if (vm_flags & VM_LOCKED) {
1609 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1610 vma == get_gate_vma(current->mm)))
1611 mm->locked_vm += (len >> PAGE_SHIFT);
1612 else
1613 vma->vm_flags &= ~VM_LOCKED;
1614 }
1615
1616 if (file)
1617 uprobe_mmap(vma);
1618
1619 return addr;
1620
1621unmap_and_free_vma:
1622 if (correct_wcount)
1623 atomic_inc(&inode->i_writecount);
1624 vma->vm_file = NULL;
1625 fput(file);
1626
1627
1628 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
1629 charged = 0;
1630free_vma:
1631 kmem_cache_free(vm_area_cachep, vma);
1632unacct_error:
1633 if (charged)
1634 vm_unacct_memory(charged);
1635 return error;
1636}
1637
1638unsigned long unmapped_area(struct vm_unmapped_area_info *info)
1639{
1640
1641
1642
1643
1644
1645
1646
1647
1648 struct mm_struct *mm = current->mm;
1649 struct vm_area_struct *vma;
1650 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1651
1652
1653 length = info->length + info->align_mask;
1654 if (length < info->length)
1655 return -ENOMEM;
1656
1657
1658 if (info->high_limit < length)
1659 return -ENOMEM;
1660 high_limit = info->high_limit - length;
1661
1662 if (info->low_limit > high_limit)
1663 return -ENOMEM;
1664 low_limit = info->low_limit + length;
1665
1666
1667 if (RB_EMPTY_ROOT(&mm->mm_rb))
1668 goto check_highest;
1669 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1670 if (vma->rb_subtree_gap < length)
1671 goto check_highest;
1672
1673 while (true) {
1674
1675 gap_end = vma->vm_start;
1676 if (gap_end >= low_limit && vma->vm_rb.rb_left) {
1677 struct vm_area_struct *left =
1678 rb_entry(vma->vm_rb.rb_left,
1679 struct vm_area_struct, vm_rb);
1680 if (left->rb_subtree_gap >= length) {
1681 vma = left;
1682 continue;
1683 }
1684 }
1685
1686 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1687check_current:
1688
1689 if (gap_start > high_limit)
1690 return -ENOMEM;
1691 if (gap_end >= low_limit && gap_end - gap_start >= length)
1692 goto found;
1693
1694
1695 if (vma->vm_rb.rb_right) {
1696 struct vm_area_struct *right =
1697 rb_entry(vma->vm_rb.rb_right,
1698 struct vm_area_struct, vm_rb);
1699 if (right->rb_subtree_gap >= length) {
1700 vma = right;
1701 continue;
1702 }
1703 }
1704
1705
1706 while (true) {
1707 struct rb_node *prev = &vma->vm_rb;
1708 if (!rb_parent(prev))
1709 goto check_highest;
1710 vma = rb_entry(rb_parent(prev),
1711 struct vm_area_struct, vm_rb);
1712 if (prev == vma->vm_rb.rb_left) {
1713 gap_start = vma->vm_prev->vm_end;
1714 gap_end = vma->vm_start;
1715 goto check_current;
1716 }
1717 }
1718 }
1719
1720check_highest:
1721
1722 gap_start = mm->highest_vm_end;
1723 gap_end = ULONG_MAX;
1724 if (gap_start > high_limit)
1725 return -ENOMEM;
1726
1727found:
1728
1729 if (gap_start < info->low_limit)
1730 gap_start = info->low_limit;
1731
1732
1733 gap_start += (info->align_offset - gap_start) & info->align_mask;
1734
1735 VM_BUG_ON(gap_start + info->length > info->high_limit);
1736 VM_BUG_ON(gap_start + info->length > gap_end);
1737 return gap_start;
1738}
1739
1740unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
1741{
1742 struct mm_struct *mm = current->mm;
1743 struct vm_area_struct *vma;
1744 unsigned long length, low_limit, high_limit, gap_start, gap_end;
1745
1746
1747 length = info->length + info->align_mask;
1748 if (length < info->length)
1749 return -ENOMEM;
1750
1751
1752
1753
1754
1755 gap_end = info->high_limit;
1756 if (gap_end < length)
1757 return -ENOMEM;
1758 high_limit = gap_end - length;
1759
1760 if (info->low_limit > high_limit)
1761 return -ENOMEM;
1762 low_limit = info->low_limit + length;
1763
1764
1765 gap_start = mm->highest_vm_end;
1766 if (gap_start <= high_limit)
1767 goto found_highest;
1768
1769
1770 if (RB_EMPTY_ROOT(&mm->mm_rb))
1771 return -ENOMEM;
1772 vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
1773 if (vma->rb_subtree_gap < length)
1774 return -ENOMEM;
1775
1776 while (true) {
1777
1778 gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
1779 if (gap_start <= high_limit && vma->vm_rb.rb_right) {
1780 struct vm_area_struct *right =
1781 rb_entry(vma->vm_rb.rb_right,
1782 struct vm_area_struct, vm_rb);
1783 if (right->rb_subtree_gap >= length) {
1784 vma = right;
1785 continue;
1786 }
1787 }
1788
1789check_current:
1790
1791 gap_end = vma->vm_start;
1792 if (gap_end < low_limit)
1793 return -ENOMEM;
1794 if (gap_start <= high_limit && gap_end - gap_start >= length)
1795 goto found;
1796
1797
1798 if (vma->vm_rb.rb_left) {
1799 struct vm_area_struct *left =
1800 rb_entry(vma->vm_rb.rb_left,
1801 struct vm_area_struct, vm_rb);
1802 if (left->rb_subtree_gap >= length) {
1803 vma = left;
1804 continue;
1805 }
1806 }
1807
1808
1809 while (true) {
1810 struct rb_node *prev = &vma->vm_rb;
1811 if (!rb_parent(prev))
1812 return -ENOMEM;
1813 vma = rb_entry(rb_parent(prev),
1814 struct vm_area_struct, vm_rb);
1815 if (prev == vma->vm_rb.rb_right) {
1816 gap_start = vma->vm_prev ?
1817 vma->vm_prev->vm_end : 0;
1818 goto check_current;
1819 }
1820 }
1821 }
1822
1823found:
1824
1825 if (gap_end > info->high_limit)
1826 gap_end = info->high_limit;
1827
1828found_highest:
1829
1830 gap_end -= info->length;
1831 gap_end -= (gap_end - info->align_offset) & info->align_mask;
1832
1833 VM_BUG_ON(gap_end < info->low_limit);
1834 VM_BUG_ON(gap_end < gap_start);
1835 return gap_end;
1836}
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849#ifndef HAVE_ARCH_UNMAPPED_AREA
1850unsigned long
1851arch_get_unmapped_area(struct file *filp, unsigned long addr,
1852 unsigned long len, unsigned long pgoff, unsigned long flags)
1853{
1854 struct mm_struct *mm = current->mm;
1855 struct vm_area_struct *vma;
1856 struct vm_unmapped_area_info info;
1857
1858 if (len > TASK_SIZE)
1859 return -ENOMEM;
1860
1861 if (flags & MAP_FIXED)
1862 return addr;
1863
1864 if (addr) {
1865 addr = PAGE_ALIGN(addr);
1866 vma = find_vma(mm, addr);
1867 if (TASK_SIZE - len >= addr &&
1868 (!vma || addr + len <= vma->vm_start))
1869 return addr;
1870 }
1871
1872 info.flags = 0;
1873 info.length = len;
1874 info.low_limit = TASK_UNMAPPED_BASE;
1875 info.high_limit = TASK_SIZE;
1876 info.align_mask = 0;
1877 return vm_unmapped_area(&info);
1878}
1879#endif
1880
1881
1882
1883
1884
1885#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
1886unsigned long
1887arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1888 const unsigned long len, const unsigned long pgoff,
1889 const unsigned long flags)
1890{
1891 struct vm_area_struct *vma;
1892 struct mm_struct *mm = current->mm;
1893 unsigned long addr = addr0;
1894 struct vm_unmapped_area_info info;
1895
1896
1897 if (len > TASK_SIZE)
1898 return -ENOMEM;
1899
1900 if (flags & MAP_FIXED)
1901 return addr;
1902
1903
1904 if (addr) {
1905 addr = PAGE_ALIGN(addr);
1906 vma = find_vma(mm, addr);
1907 if (TASK_SIZE - len >= addr &&
1908 (!vma || addr + len <= vma->vm_start))
1909 return addr;
1910 }
1911
1912 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
1913 info.length = len;
1914 info.low_limit = PAGE_SIZE;
1915 info.high_limit = mm->mmap_base;
1916 info.align_mask = 0;
1917 addr = vm_unmapped_area(&info);
1918
1919
1920
1921
1922
1923
1924
1925 if (addr & ~PAGE_MASK) {
1926 VM_BUG_ON(addr != -ENOMEM);
1927 info.flags = 0;
1928 info.low_limit = TASK_UNMAPPED_BASE;
1929 info.high_limit = TASK_SIZE;
1930 addr = vm_unmapped_area(&info);
1931 }
1932
1933 return addr;
1934}
1935#endif
1936
1937unsigned long
1938get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1939 unsigned long pgoff, unsigned long flags)
1940{
1941 unsigned long (*get_area)(struct file *, unsigned long,
1942 unsigned long, unsigned long, unsigned long);
1943
1944 unsigned long error = arch_mmap_check(addr, len, flags);
1945 if (error)
1946 return error;
1947
1948
1949 if (len > TASK_SIZE)
1950 return -ENOMEM;
1951
1952 get_area = current->mm->get_unmapped_area;
1953 if (file && file->f_op && file->f_op->get_unmapped_area)
1954 get_area = file->f_op->get_unmapped_area;
1955 addr = get_area(file, addr, len, pgoff, flags);
1956 if (IS_ERR_VALUE(addr))
1957 return addr;
1958
1959 if (addr > TASK_SIZE - len)
1960 return -ENOMEM;
1961 if (addr & ~PAGE_MASK)
1962 return -EINVAL;
1963
1964 addr = arch_rebalance_pgtables(addr, len);
1965 error = security_mmap_addr(addr);
1966 return error ? error : addr;
1967}
1968
1969EXPORT_SYMBOL(get_unmapped_area);
1970
1971
1972struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1973{
1974 struct vm_area_struct *vma = NULL;
1975
1976
1977
1978 vma = ACCESS_ONCE(mm->mmap_cache);
1979 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
1980 struct rb_node *rb_node;
1981
1982 rb_node = mm->mm_rb.rb_node;
1983 vma = NULL;
1984
1985 while (rb_node) {
1986 struct vm_area_struct *vma_tmp;
1987
1988 vma_tmp = rb_entry(rb_node,
1989 struct vm_area_struct, vm_rb);
1990
1991 if (vma_tmp->vm_end > addr) {
1992 vma = vma_tmp;
1993 if (vma_tmp->vm_start <= addr)
1994 break;
1995 rb_node = rb_node->rb_left;
1996 } else
1997 rb_node = rb_node->rb_right;
1998 }
1999 if (vma)
2000 mm->mmap_cache = vma;
2001 }
2002 return vma;
2003}
2004
2005EXPORT_SYMBOL(find_vma);
2006
2007
2008
2009
2010struct vm_area_struct *
2011find_vma_prev(struct mm_struct *mm, unsigned long addr,
2012 struct vm_area_struct **pprev)
2013{
2014 struct vm_area_struct *vma;
2015
2016 vma = find_vma(mm, addr);
2017 if (vma) {
2018 *pprev = vma->vm_prev;
2019 } else {
2020 struct rb_node *rb_node = mm->mm_rb.rb_node;
2021 *pprev = NULL;
2022 while (rb_node) {
2023 *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2024 rb_node = rb_node->rb_right;
2025 }
2026 }
2027 return vma;
2028}
2029
2030
2031
2032
2033
2034
2035static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
2036{
2037 struct mm_struct *mm = vma->vm_mm;
2038 struct rlimit *rlim = current->signal->rlim;
2039 unsigned long new_start;
2040
2041
2042 if (!may_expand_vm(mm, grow))
2043 return -ENOMEM;
2044
2045
2046 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
2047 return -ENOMEM;
2048
2049
2050 if (vma->vm_flags & VM_LOCKED) {
2051 unsigned long locked;
2052 unsigned long limit;
2053 locked = mm->locked_vm + grow;
2054 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
2055 limit >>= PAGE_SHIFT;
2056 if (locked > limit && !capable(CAP_IPC_LOCK))
2057 return -ENOMEM;
2058 }
2059
2060
2061 new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
2062 vma->vm_end - size;
2063 if (is_hugepage_only_range(vma->vm_mm, new_start, size))
2064 return -EFAULT;
2065
2066
2067
2068
2069
2070 if (security_vm_enough_memory_mm(mm, grow))
2071 return -ENOMEM;
2072
2073
2074 if (vma->vm_flags & VM_LOCKED)
2075 mm->locked_vm += grow;
2076 vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
2077 return 0;
2078}
2079
2080#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
2081
2082
2083
2084
2085int expand_upwards(struct vm_area_struct *vma, unsigned long address)
2086{
2087 int error;
2088
2089 if (!(vma->vm_flags & VM_GROWSUP))
2090 return -EFAULT;
2091
2092
2093
2094
2095
2096 if (unlikely(anon_vma_prepare(vma)))
2097 return -ENOMEM;
2098 vma_lock_anon_vma(vma);
2099
2100
2101
2102
2103
2104
2105
2106 if (address < PAGE_ALIGN(address+4))
2107 address = PAGE_ALIGN(address+4);
2108 else {
2109 vma_unlock_anon_vma(vma);
2110 return -ENOMEM;
2111 }
2112 error = 0;
2113
2114
2115 if (address > vma->vm_end) {
2116 unsigned long size, grow;
2117
2118 size = address - vma->vm_start;
2119 grow = (address - vma->vm_end) >> PAGE_SHIFT;
2120
2121 error = -ENOMEM;
2122 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
2123 error = acct_stack_growth(vma, size, grow);
2124 if (!error) {
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136 spin_lock(&vma->vm_mm->page_table_lock);
2137 anon_vma_interval_tree_pre_update_vma(vma);
2138 vma->vm_end = address;
2139 anon_vma_interval_tree_post_update_vma(vma);
2140 if (vma->vm_next)
2141 vma_gap_update(vma->vm_next);
2142 else
2143 vma->vm_mm->highest_vm_end = address;
2144 spin_unlock(&vma->vm_mm->page_table_lock);
2145
2146 perf_event_mmap(vma);
2147 }
2148 }
2149 }
2150 vma_unlock_anon_vma(vma);
2151 khugepaged_enter_vma_merge(vma);
2152 validate_mm(vma->vm_mm);
2153 return error;
2154}
2155#endif
2156
2157
2158
2159
2160int expand_downwards(struct vm_area_struct *vma,
2161 unsigned long address)
2162{
2163 int error;
2164
2165
2166
2167
2168
2169 if (unlikely(anon_vma_prepare(vma)))
2170 return -ENOMEM;
2171
2172 address &= PAGE_MASK;
2173 error = security_mmap_addr(address);
2174 if (error)
2175 return error;
2176
2177 vma_lock_anon_vma(vma);
2178
2179
2180
2181
2182
2183
2184
2185
2186 if (address < vma->vm_start) {
2187 unsigned long size, grow;
2188
2189 size = vma->vm_end - address;
2190 grow = (vma->vm_start - address) >> PAGE_SHIFT;
2191
2192 error = -ENOMEM;
2193 if (grow <= vma->vm_pgoff) {
2194 error = acct_stack_growth(vma, size, grow);
2195 if (!error) {
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207 spin_lock(&vma->vm_mm->page_table_lock);
2208 anon_vma_interval_tree_pre_update_vma(vma);
2209 vma->vm_start = address;
2210 vma->vm_pgoff -= grow;
2211 anon_vma_interval_tree_post_update_vma(vma);
2212 vma_gap_update(vma);
2213 spin_unlock(&vma->vm_mm->page_table_lock);
2214
2215 perf_event_mmap(vma);
2216 }
2217 }
2218 }
2219 vma_unlock_anon_vma(vma);
2220 khugepaged_enter_vma_merge(vma);
2221 validate_mm(vma->vm_mm);
2222 return error;
2223}
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236#ifdef CONFIG_STACK_GROWSUP
2237int expand_stack(struct vm_area_struct *vma, unsigned long address)
2238{
2239 struct vm_area_struct *next;
2240
2241 address &= PAGE_MASK;
2242 next = vma->vm_next;
2243 if (next && next->vm_start == address + PAGE_SIZE) {
2244 if (!(next->vm_flags & VM_GROWSUP))
2245 return -ENOMEM;
2246 }
2247 return expand_upwards(vma, address);
2248}
2249
2250struct vm_area_struct *
2251find_extend_vma(struct mm_struct *mm, unsigned long addr)
2252{
2253 struct vm_area_struct *vma, *prev;
2254
2255 addr &= PAGE_MASK;
2256 vma = find_vma_prev(mm, addr, &prev);
2257 if (vma && (vma->vm_start <= addr))
2258 return vma;
2259 if (!prev || expand_stack(prev, addr))
2260 return NULL;
2261 if (prev->vm_flags & VM_LOCKED)
2262 __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
2263 return prev;
2264}
2265#else
2266int expand_stack(struct vm_area_struct *vma, unsigned long address)
2267{
2268 struct vm_area_struct *prev;
2269
2270 address &= PAGE_MASK;
2271 prev = vma->vm_prev;
2272 if (prev && prev->vm_end == address) {
2273 if (!(prev->vm_flags & VM_GROWSDOWN))
2274 return -ENOMEM;
2275 }
2276 return expand_downwards(vma, address);
2277}
2278
2279struct vm_area_struct *
2280find_extend_vma(struct mm_struct * mm, unsigned long addr)
2281{
2282 struct vm_area_struct * vma;
2283 unsigned long start;
2284
2285 addr &= PAGE_MASK;
2286 vma = find_vma(mm,addr);
2287 if (!vma)
2288 return NULL;
2289 if (vma->vm_start <= addr)
2290 return vma;
2291 if (!(vma->vm_flags & VM_GROWSDOWN))
2292 return NULL;
2293 start = vma->vm_start;
2294 if (expand_stack(vma, addr))
2295 return NULL;
2296 if (vma->vm_flags & VM_LOCKED)
2297 __mlock_vma_pages_range(vma, addr, start, NULL);
2298 return vma;
2299}
2300#endif
2301
2302
2303
2304
2305
2306
2307
2308static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
2309{
2310 unsigned long nr_accounted = 0;
2311
2312
2313 update_hiwater_vm(mm);
2314 do {
2315 long nrpages = vma_pages(vma);
2316
2317 if (vma->vm_flags & VM_ACCOUNT)
2318 nr_accounted += nrpages;
2319 vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
2320 vma = remove_vma(vma);
2321 } while (vma);
2322 vm_unacct_memory(nr_accounted);
2323 validate_mm(mm);
2324}
2325
2326
2327
2328
2329
2330
2331static void unmap_region(struct mm_struct *mm,
2332 struct vm_area_struct *vma, struct vm_area_struct *prev,
2333 unsigned long start, unsigned long end)
2334{
2335 struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
2336 struct mmu_gather tlb;
2337
2338 lru_add_drain();
2339 tlb_gather_mmu(&tlb, mm, start, end);
2340 update_hiwater_rss(mm);
2341 unmap_vmas(&tlb, vma, start, end);
2342 free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
2343 next ? next->vm_start : USER_PGTABLES_CEILING);
2344 tlb_finish_mmu(&tlb, start, end);
2345}
2346
2347
2348
2349
2350
2351static void
2352detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2353 struct vm_area_struct *prev, unsigned long end)
2354{
2355 struct vm_area_struct **insertion_point;
2356 struct vm_area_struct *tail_vma = NULL;
2357
2358 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
2359 vma->vm_prev = NULL;
2360 do {
2361 vma_rb_erase(vma, &mm->mm_rb);
2362 mm->map_count--;
2363 tail_vma = vma;
2364 vma = vma->vm_next;
2365 } while (vma && vma->vm_start < end);
2366 *insertion_point = vma;
2367 if (vma) {
2368 vma->vm_prev = prev;
2369 vma_gap_update(vma);
2370 } else
2371 mm->highest_vm_end = prev ? prev->vm_end : 0;
2372 tail_vma->vm_next = NULL;
2373 mm->mmap_cache = NULL;
2374}
2375
2376
2377
2378
2379
2380static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
2381 unsigned long addr, int new_below)
2382{
2383 struct mempolicy *pol;
2384 struct vm_area_struct *new;
2385 int err = -ENOMEM;
2386
2387 if (is_vm_hugetlb_page(vma) && (addr &
2388 ~(huge_page_mask(hstate_vma(vma)))))
2389 return -EINVAL;
2390
2391 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2392 if (!new)
2393 goto out_err;
2394
2395
2396 *new = *vma;
2397
2398 INIT_LIST_HEAD(&new->anon_vma_chain);
2399
2400 if (new_below)
2401 new->vm_end = addr;
2402 else {
2403 new->vm_start = addr;
2404 new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
2405 }
2406
2407 pol = mpol_dup(vma_policy(vma));
2408 if (IS_ERR(pol)) {
2409 err = PTR_ERR(pol);
2410 goto out_free_vma;
2411 }
2412 vma_set_policy(new, pol);
2413
2414 if (anon_vma_clone(new, vma))
2415 goto out_free_mpol;
2416
2417 if (new->vm_file)
2418 get_file(new->vm_file);
2419
2420 if (new->vm_ops && new->vm_ops->open)
2421 new->vm_ops->open(new);
2422
2423 if (new_below)
2424 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
2425 ((addr - new->vm_start) >> PAGE_SHIFT), new);
2426 else
2427 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
2428
2429
2430 if (!err)
2431 return 0;
2432
2433
2434 if (new->vm_ops && new->vm_ops->close)
2435 new->vm_ops->close(new);
2436 if (new->vm_file)
2437 fput(new->vm_file);
2438 unlink_anon_vmas(new);
2439 out_free_mpol:
2440 mpol_put(pol);
2441 out_free_vma:
2442 kmem_cache_free(vm_area_cachep, new);
2443 out_err:
2444 return err;
2445}
2446
2447
2448
2449
2450
2451int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2452 unsigned long addr, int new_below)
2453{
2454 if (mm->map_count >= sysctl_max_map_count)
2455 return -ENOMEM;
2456
2457 return __split_vma(mm, vma, addr, new_below);
2458}
2459
2460
2461
2462
2463
2464
2465int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
2466{
2467 unsigned long end;
2468 struct vm_area_struct *vma, *prev, *last;
2469
2470 if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
2471 return -EINVAL;
2472
2473 if ((len = PAGE_ALIGN(len)) == 0)
2474 return -EINVAL;
2475
2476
2477 vma = find_vma(mm, start);
2478 if (!vma)
2479 return 0;
2480 prev = vma->vm_prev;
2481
2482
2483
2484 end = start + len;
2485 if (vma->vm_start >= end)
2486 return 0;
2487
2488
2489
2490
2491
2492
2493
2494
2495 if (start > vma->vm_start) {
2496 int error;
2497
2498
2499
2500
2501
2502
2503 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2504 return -ENOMEM;
2505
2506 error = __split_vma(mm, vma, start, 0);
2507 if (error)
2508 return error;
2509 prev = vma;
2510 }
2511
2512
2513 last = find_vma(mm, end);
2514 if (last && end > last->vm_start) {
2515 int error = __split_vma(mm, last, end, 1);
2516 if (error)
2517 return error;
2518 }
2519 vma = prev? prev->vm_next: mm->mmap;
2520
2521
2522
2523
2524 if (mm->locked_vm) {
2525 struct vm_area_struct *tmp = vma;
2526 while (tmp && tmp->vm_start < end) {
2527 if (tmp->vm_flags & VM_LOCKED) {
2528 mm->locked_vm -= vma_pages(tmp);
2529 munlock_vma_pages_all(tmp);
2530 }
2531 tmp = tmp->vm_next;
2532 }
2533 }
2534
2535
2536
2537
2538 detach_vmas_to_be_unmapped(mm, vma, prev, end);
2539 unmap_region(mm, vma, prev, start, end);
2540
2541
2542 remove_vma_list(mm, vma);
2543
2544 return 0;
2545}
2546
2547int vm_munmap(unsigned long start, size_t len)
2548{
2549 int ret;
2550 struct mm_struct *mm = current->mm;
2551
2552 down_write(&mm->mmap_sem);
2553 ret = do_munmap(mm, start, len);
2554 up_write(&mm->mmap_sem);
2555 return ret;
2556}
2557EXPORT_SYMBOL(vm_munmap);
2558
2559SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2560{
2561 profile_munmap(addr);
2562 return vm_munmap(addr, len);
2563}
2564
2565static inline void verify_mm_writelocked(struct mm_struct *mm)
2566{
2567#ifdef CONFIG_DEBUG_VM
2568 if (unlikely(down_read_trylock(&mm->mmap_sem))) {
2569 WARN_ON(1);
2570 up_read(&mm->mmap_sem);
2571 }
2572#endif
2573}
2574
2575
2576
2577
2578
2579
2580static unsigned long do_brk(unsigned long addr, unsigned long len)
2581{
2582 struct mm_struct * mm = current->mm;
2583 struct vm_area_struct * vma, * prev;
2584 unsigned long flags;
2585 struct rb_node ** rb_link, * rb_parent;
2586 pgoff_t pgoff = addr >> PAGE_SHIFT;
2587 int error;
2588
2589 len = PAGE_ALIGN(len);
2590 if (!len)
2591 return addr;
2592
2593 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2594
2595 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2596 if (error & ~PAGE_MASK)
2597 return error;
2598
2599
2600
2601
2602 if (mm->def_flags & VM_LOCKED) {
2603 unsigned long locked, lock_limit;
2604 locked = len >> PAGE_SHIFT;
2605 locked += mm->locked_vm;
2606 lock_limit = rlimit(RLIMIT_MEMLOCK);
2607 lock_limit >>= PAGE_SHIFT;
2608 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2609 return -EAGAIN;
2610 }
2611
2612
2613
2614
2615
2616 verify_mm_writelocked(mm);
2617
2618
2619
2620
2621 munmap_back:
2622 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
2623 if (do_munmap(mm, addr, len))
2624 return -ENOMEM;
2625 goto munmap_back;
2626 }
2627
2628
2629 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
2630 return -ENOMEM;
2631
2632 if (mm->map_count > sysctl_max_map_count)
2633 return -ENOMEM;
2634
2635 if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
2636 return -ENOMEM;
2637
2638
2639 vma = vma_merge(mm, prev, addr, addr + len, flags,
2640 NULL, NULL, pgoff, NULL);
2641 if (vma)
2642 goto out;
2643
2644
2645
2646
2647 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2648 if (!vma) {
2649 vm_unacct_memory(len >> PAGE_SHIFT);
2650 return -ENOMEM;
2651 }
2652
2653 INIT_LIST_HEAD(&vma->anon_vma_chain);
2654 vma->vm_mm = mm;
2655 vma->vm_start = addr;
2656 vma->vm_end = addr + len;
2657 vma->vm_pgoff = pgoff;
2658 vma->vm_flags = flags;
2659 vma->vm_page_prot = vm_get_page_prot(flags);
2660 vma_link(mm, vma, prev, rb_link, rb_parent);
2661out:
2662 perf_event_mmap(vma);
2663 mm->total_vm += len >> PAGE_SHIFT;
2664 if (flags & VM_LOCKED)
2665 mm->locked_vm += (len >> PAGE_SHIFT);
2666 return addr;
2667}
2668
2669unsigned long vm_brk(unsigned long addr, unsigned long len)
2670{
2671 struct mm_struct *mm = current->mm;
2672 unsigned long ret;
2673 bool populate;
2674
2675 down_write(&mm->mmap_sem);
2676 ret = do_brk(addr, len);
2677 populate = ((mm->def_flags & VM_LOCKED) != 0);
2678 up_write(&mm->mmap_sem);
2679 if (populate)
2680 mm_populate(addr, len);
2681 return ret;
2682}
2683EXPORT_SYMBOL(vm_brk);
2684
2685
2686void exit_mmap(struct mm_struct *mm)
2687{
2688 struct mmu_gather tlb;
2689 struct vm_area_struct *vma;
2690 unsigned long nr_accounted = 0;
2691
2692
2693 mmu_notifier_release(mm);
2694
2695 if (mm->locked_vm) {
2696 vma = mm->mmap;
2697 while (vma) {
2698 if (vma->vm_flags & VM_LOCKED)
2699 munlock_vma_pages_all(vma);
2700 vma = vma->vm_next;
2701 }
2702 }
2703
2704 arch_exit_mmap(mm);
2705
2706 vma = mm->mmap;
2707 if (!vma)
2708 return;
2709
2710 lru_add_drain();
2711 flush_cache_mm(mm);
2712 tlb_gather_mmu(&tlb, mm, 0, -1);
2713
2714
2715 unmap_vmas(&tlb, vma, 0, -1);
2716
2717 free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
2718 tlb_finish_mmu(&tlb, 0, -1);
2719
2720
2721
2722
2723
2724 while (vma) {
2725 if (vma->vm_flags & VM_ACCOUNT)
2726 nr_accounted += vma_pages(vma);
2727 vma = remove_vma(vma);
2728 }
2729 vm_unacct_memory(nr_accounted);
2730
2731 WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2732}
2733
2734
2735
2736
2737
2738int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2739{
2740 struct vm_area_struct *prev;
2741 struct rb_node **rb_link, *rb_parent;
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755 if (!vma->vm_file) {
2756 BUG_ON(vma->anon_vma);
2757 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2758 }
2759 if (find_vma_links(mm, vma->vm_start, vma->vm_end,
2760 &prev, &rb_link, &rb_parent))
2761 return -ENOMEM;
2762 if ((vma->vm_flags & VM_ACCOUNT) &&
2763 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2764 return -ENOMEM;
2765
2766 vma_link(mm, vma, prev, rb_link, rb_parent);
2767 return 0;
2768}
2769
2770
2771
2772
2773
2774struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2775 unsigned long addr, unsigned long len, pgoff_t pgoff,
2776 bool *need_rmap_locks)
2777{
2778 struct vm_area_struct *vma = *vmap;
2779 unsigned long vma_start = vma->vm_start;
2780 struct mm_struct *mm = vma->vm_mm;
2781 struct vm_area_struct *new_vma, *prev;
2782 struct rb_node **rb_link, *rb_parent;
2783 struct mempolicy *pol;
2784 bool faulted_in_anon_vma = true;
2785
2786
2787
2788
2789
2790 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2791 pgoff = addr >> PAGE_SHIFT;
2792 faulted_in_anon_vma = false;
2793 }
2794
2795 if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
2796 return NULL;
2797 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
2798 vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
2799 if (new_vma) {
2800
2801
2802
2803 if (unlikely(vma_start >= new_vma->vm_start &&
2804 vma_start < new_vma->vm_end)) {
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817 VM_BUG_ON(faulted_in_anon_vma);
2818 *vmap = vma = new_vma;
2819 }
2820 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2821 } else {
2822 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2823 if (new_vma) {
2824 *new_vma = *vma;
2825 new_vma->vm_start = addr;
2826 new_vma->vm_end = addr + len;
2827 new_vma->vm_pgoff = pgoff;
2828 pol = mpol_dup(vma_policy(vma));
2829 if (IS_ERR(pol))
2830 goto out_free_vma;
2831 vma_set_policy(new_vma, pol);
2832 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2833 if (anon_vma_clone(new_vma, vma))
2834 goto out_free_mempol;
2835 if (new_vma->vm_file)
2836 get_file(new_vma->vm_file);
2837 if (new_vma->vm_ops && new_vma->vm_ops->open)
2838 new_vma->vm_ops->open(new_vma);
2839 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2840 *need_rmap_locks = false;
2841 }
2842 }
2843 return new_vma;
2844
2845 out_free_mempol:
2846 mpol_put(pol);
2847 out_free_vma:
2848 kmem_cache_free(vm_area_cachep, new_vma);
2849 return NULL;
2850}
2851
2852
2853
2854
2855
2856int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2857{
2858 unsigned long cur = mm->total_vm;
2859 unsigned long lim;
2860
2861 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2862
2863 if (cur + npages > lim)
2864 return 0;
2865 return 1;
2866}
2867
2868
2869static int special_mapping_fault(struct vm_area_struct *vma,
2870 struct vm_fault *vmf)
2871{
2872 pgoff_t pgoff;
2873 struct page **pages;
2874
2875
2876
2877
2878
2879
2880
2881 pgoff = vmf->pgoff - vma->vm_pgoff;
2882
2883 for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
2884 pgoff--;
2885
2886 if (*pages) {
2887 struct page *page = *pages;
2888 get_page(page);
2889 vmf->page = page;
2890 return 0;
2891 }
2892
2893 return VM_FAULT_SIGBUS;
2894}
2895
2896
2897
2898
2899static void special_mapping_close(struct vm_area_struct *vma)
2900{
2901}
2902
2903static const struct vm_operations_struct special_mapping_vmops = {
2904 .close = special_mapping_close,
2905 .fault = special_mapping_fault,
2906};
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917int install_special_mapping(struct mm_struct *mm,
2918 unsigned long addr, unsigned long len,
2919 unsigned long vm_flags, struct page **pages)
2920{
2921 int ret;
2922 struct vm_area_struct *vma;
2923
2924 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2925 if (unlikely(vma == NULL))
2926 return -ENOMEM;
2927
2928 INIT_LIST_HEAD(&vma->anon_vma_chain);
2929 vma->vm_mm = mm;
2930 vma->vm_start = addr;
2931 vma->vm_end = addr + len;
2932
2933 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
2934 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
2935
2936 vma->vm_ops = &special_mapping_vmops;
2937 vma->vm_private_data = pages;
2938
2939 ret = insert_vm_struct(mm, vma);
2940 if (ret)
2941 goto out;
2942
2943 mm->total_vm += len >> PAGE_SHIFT;
2944
2945 perf_event_mmap(vma);
2946
2947 return 0;
2948
2949out:
2950 kmem_cache_free(vm_area_cachep, vma);
2951 return ret;
2952}
2953
2954static DEFINE_MUTEX(mm_all_locks_mutex);
2955
2956static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2957{
2958 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2959
2960
2961
2962
2963 down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973 if (__test_and_set_bit(0, (unsigned long *)
2974 &anon_vma->root->rb_root.rb_node))
2975 BUG();
2976 }
2977}
2978
2979static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2980{
2981 if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
2992 BUG();
2993 mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
2994 }
2995}
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029int mm_take_all_locks(struct mm_struct *mm)
3030{
3031 struct vm_area_struct *vma;
3032 struct anon_vma_chain *avc;
3033
3034 BUG_ON(down_read_trylock(&mm->mmap_sem));
3035
3036 mutex_lock(&mm_all_locks_mutex);
3037
3038 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3039 if (signal_pending(current))
3040 goto out_unlock;
3041 if (vma->vm_file && vma->vm_file->f_mapping)
3042 vm_lock_mapping(mm, vma->vm_file->f_mapping);
3043 }
3044
3045 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3046 if (signal_pending(current))
3047 goto out_unlock;
3048 if (vma->anon_vma)
3049 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3050 vm_lock_anon_vma(mm, avc->anon_vma);
3051 }
3052
3053 return 0;
3054
3055out_unlock:
3056 mm_drop_all_locks(mm);
3057 return -EINTR;
3058}
3059
3060static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3061{
3062 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075 if (!__test_and_clear_bit(0, (unsigned long *)
3076 &anon_vma->root->rb_root.rb_node))
3077 BUG();
3078 anon_vma_unlock_write(anon_vma);
3079 }
3080}
3081
3082static void vm_unlock_mapping(struct address_space *mapping)
3083{
3084 if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
3085
3086
3087
3088
3089 mutex_unlock(&mapping->i_mmap_mutex);
3090 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
3091 &mapping->flags))
3092 BUG();
3093 }
3094}
3095
3096
3097
3098
3099
3100void mm_drop_all_locks(struct mm_struct *mm)
3101{
3102 struct vm_area_struct *vma;
3103 struct anon_vma_chain *avc;
3104
3105 BUG_ON(down_read_trylock(&mm->mmap_sem));
3106 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
3107
3108 for (vma = mm->mmap; vma; vma = vma->vm_next) {
3109 if (vma->anon_vma)
3110 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
3111 vm_unlock_anon_vma(avc->anon_vma);
3112 if (vma->vm_file && vma->vm_file->f_mapping)
3113 vm_unlock_mapping(vma->vm_file->f_mapping);
3114 }
3115
3116 mutex_unlock(&mm_all_locks_mutex);
3117}
3118
3119
3120
3121
3122void __init mmap_init(void)
3123{
3124 int ret;
3125
3126 ret = percpu_counter_init(&vm_committed_as, 0);
3127 VM_BUG_ON(ret);
3128}
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140static int init_user_reserve(void)
3141{
3142 unsigned long free_kbytes;
3143
3144 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3145
3146 sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
3147 return 0;
3148}
3149module_init(init_user_reserve)
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161static int init_admin_reserve(void)
3162{
3163 unsigned long free_kbytes;
3164
3165 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3166
3167 sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
3168 return 0;
3169}
3170module_init(init_admin_reserve)
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190static int reserve_mem_notifier(struct notifier_block *nb,
3191 unsigned long action, void *data)
3192{
3193 unsigned long tmp, free_kbytes;
3194
3195 switch (action) {
3196 case MEM_ONLINE:
3197
3198 tmp = sysctl_user_reserve_kbytes;
3199 if (0 < tmp && tmp < (1UL << 17))
3200 init_user_reserve();
3201
3202
3203 tmp = sysctl_admin_reserve_kbytes;
3204 if (0 < tmp && tmp < (1UL << 13))
3205 init_admin_reserve();
3206
3207 break;
3208 case MEM_OFFLINE:
3209 free_kbytes = global_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10);
3210
3211 if (sysctl_user_reserve_kbytes > free_kbytes) {
3212 init_user_reserve();
3213 pr_info("vm.user_reserve_kbytes reset to %lu\n",
3214 sysctl_user_reserve_kbytes);
3215 }
3216
3217 if (sysctl_admin_reserve_kbytes > free_kbytes) {
3218 init_admin_reserve();
3219 pr_info("vm.admin_reserve_kbytes reset to %lu\n",
3220 sysctl_admin_reserve_kbytes);
3221 }
3222 break;
3223 default:
3224 break;
3225 }
3226 return NOTIFY_OK;
3227}
3228
3229static struct notifier_block reserve_mem_nb = {
3230 .notifier_call = reserve_mem_notifier,
3231};
3232
3233static int __meminit init_reserve_notifier(void)
3234{
3235 if (register_hotmemory_notifier(&reserve_mem_nb))
3236 printk("Failed registering memory add/remove notifier for admin reserve");
3237
3238 return 0;
3239}
3240module_init(init_reserve_notifier)
3241