1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46#include <linux/mm.h>
47#include <linux/pagemap.h>
48#include <linux/swap.h>
49#include <linux/swapops.h>
50#include <linux/slab.h>
51#include <linux/init.h>
52#include <linux/ksm.h>
53#include <linux/rmap.h>
54#include <linux/rcupdate.h>
55#include <linux/module.h>
56#include <linux/memcontrol.h>
57#include <linux/mmu_notifier.h>
58#include <linux/migrate.h>
59#include <linux/hugetlb.h>
60
61#include <asm/tlbflush.h>
62
63#include "internal.h"
64
65static struct kmem_cache *anon_vma_cachep;
66static struct kmem_cache *anon_vma_chain_cachep;
67
68static inline struct anon_vma *anon_vma_alloc(void)
69{
70 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
71}
72
73void anon_vma_free(struct anon_vma *anon_vma)
74{
75 kmem_cache_free(anon_vma_cachep, anon_vma);
76}
77
78static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
79{
80 return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
81}
82
83static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
84{
85 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
86}
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115int anon_vma_prepare(struct vm_area_struct *vma)
116{
117 struct anon_vma *anon_vma = vma->anon_vma;
118 struct anon_vma_chain *avc;
119
120 might_sleep();
121 if (unlikely(!anon_vma)) {
122 struct mm_struct *mm = vma->vm_mm;
123 struct anon_vma *allocated;
124
125 avc = anon_vma_chain_alloc();
126 if (!avc)
127 goto out_enomem;
128
129 anon_vma = find_mergeable_anon_vma(vma);
130 allocated = NULL;
131 if (!anon_vma) {
132 anon_vma = anon_vma_alloc();
133 if (unlikely(!anon_vma))
134 goto out_enomem_free_avc;
135 allocated = anon_vma;
136
137
138
139
140 anon_vma->root = anon_vma;
141 }
142
143 anon_vma_lock(anon_vma);
144
145 spin_lock(&mm->page_table_lock);
146 if (likely(!vma->anon_vma)) {
147 vma->anon_vma = anon_vma;
148 avc->anon_vma = anon_vma;
149 avc->vma = vma;
150 list_add(&avc->same_vma, &vma->anon_vma_chain);
151 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
152 allocated = NULL;
153 avc = NULL;
154 }
155 spin_unlock(&mm->page_table_lock);
156 anon_vma_unlock(anon_vma);
157
158 if (unlikely(allocated))
159 anon_vma_free(allocated);
160 if (unlikely(avc))
161 anon_vma_chain_free(avc);
162 }
163 return 0;
164
165 out_enomem_free_avc:
166 anon_vma_chain_free(avc);
167 out_enomem:
168 return -ENOMEM;
169}
170
171static void anon_vma_chain_link(struct vm_area_struct *vma,
172 struct anon_vma_chain *avc,
173 struct anon_vma *anon_vma)
174{
175 avc->vma = vma;
176 avc->anon_vma = anon_vma;
177 list_add(&avc->same_vma, &vma->anon_vma_chain);
178
179 anon_vma_lock(anon_vma);
180
181
182
183
184 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
185 anon_vma_unlock(anon_vma);
186}
187
188
189
190
191
192int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
193{
194 struct anon_vma_chain *avc, *pavc;
195
196 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
197 avc = anon_vma_chain_alloc();
198 if (!avc)
199 goto enomem_failure;
200 anon_vma_chain_link(dst, avc, pavc->anon_vma);
201 }
202 return 0;
203
204 enomem_failure:
205 unlink_anon_vmas(dst);
206 return -ENOMEM;
207}
208
209
210
211
212
213
214int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
215{
216 struct anon_vma_chain *avc;
217 struct anon_vma *anon_vma;
218
219
220 if (!pvma->anon_vma)
221 return 0;
222
223
224
225
226
227 if (anon_vma_clone(vma, pvma))
228 return -ENOMEM;
229
230
231 anon_vma = anon_vma_alloc();
232 if (!anon_vma)
233 goto out_error;
234 avc = anon_vma_chain_alloc();
235 if (!avc)
236 goto out_error_free_anon_vma;
237
238
239
240
241
242 anon_vma->root = pvma->anon_vma->root;
243
244
245
246
247
248 get_anon_vma(anon_vma->root);
249
250 vma->anon_vma = anon_vma;
251 anon_vma_chain_link(vma, avc, anon_vma);
252
253 return 0;
254
255 out_error_free_anon_vma:
256 anon_vma_free(anon_vma);
257 out_error:
258 unlink_anon_vmas(vma);
259 return -ENOMEM;
260}
261
262static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
263{
264 struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
265 int empty;
266
267
268 if (!anon_vma)
269 return;
270
271 anon_vma_lock(anon_vma);
272 list_del(&anon_vma_chain->same_anon_vma);
273
274
275 empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
276 anon_vma_unlock(anon_vma);
277
278 if (empty) {
279
280 if (anon_vma->root != anon_vma)
281 drop_anon_vma(anon_vma->root);
282 anon_vma_free(anon_vma);
283 }
284}
285
286void unlink_anon_vmas(struct vm_area_struct *vma)
287{
288 struct anon_vma_chain *avc, *next;
289
290
291
292
293
294 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
295 anon_vma_unlink(avc);
296 list_del(&avc->same_vma);
297 anon_vma_chain_free(avc);
298 }
299}
300
301static void anon_vma_ctor(void *data)
302{
303 struct anon_vma *anon_vma = data;
304
305 spin_lock_init(&anon_vma->lock);
306 anonvma_external_refcount_init(anon_vma);
307 INIT_LIST_HEAD(&anon_vma->head);
308}
309
310void __init anon_vma_init(void)
311{
312 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
313 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
314 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
315}
316
317
318
319
320
321struct anon_vma *__page_lock_anon_vma(struct page *page)
322{
323 struct anon_vma *anon_vma, *root_anon_vma;
324 unsigned long anon_mapping;
325
326 rcu_read_lock();
327 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
328 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
329 goto out;
330 if (!page_mapped(page))
331 goto out;
332
333 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
334 root_anon_vma = ACCESS_ONCE(anon_vma->root);
335 spin_lock(&root_anon_vma->lock);
336
337
338
339
340
341
342
343
344
345 if (page_mapped(page))
346 return anon_vma;
347
348 spin_unlock(&root_anon_vma->lock);
349out:
350 rcu_read_unlock();
351 return NULL;
352}
353
354void page_unlock_anon_vma(struct anon_vma *anon_vma)
355 __releases(&anon_vma->root->lock)
356 __releases(RCU)
357{
358 anon_vma_unlock(anon_vma);
359 rcu_read_unlock();
360}
361
362
363
364
365
366
367inline unsigned long
368vma_address(struct page *page, struct vm_area_struct *vma)
369{
370 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
371 unsigned long address;
372
373 if (unlikely(is_vm_hugetlb_page(vma)))
374 pgoff = page->index << huge_page_order(page_hstate(page));
375 address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
376 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
377
378 return -EFAULT;
379 }
380 return address;
381}
382
383
384
385
386
387unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
388{
389 if (PageAnon(page)) {
390 struct anon_vma *page__anon_vma = page_anon_vma(page);
391
392
393
394
395 if (!vma->anon_vma || !page__anon_vma ||
396 vma->anon_vma->root != page__anon_vma->root)
397 return -EFAULT;
398 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
399 if (!vma->vm_file ||
400 vma->vm_file->f_mapping != page->mapping)
401 return -EFAULT;
402 } else
403 return -EFAULT;
404 return vma_address(page, vma);
405}
406
407
408
409
410
411
412
413
414
415
416pte_t *__page_check_address(struct page *page, struct mm_struct *mm,
417 unsigned long address, spinlock_t **ptlp, int sync)
418{
419 pgd_t *pgd;
420 pud_t *pud;
421 pmd_t *pmd;
422 pte_t *pte;
423 spinlock_t *ptl;
424
425 if (unlikely(PageHuge(page))) {
426 pte = huge_pte_offset(mm, address);
427 ptl = &mm->page_table_lock;
428 goto check;
429 }
430
431 pgd = pgd_offset(mm, address);
432 if (!pgd_present(*pgd))
433 return NULL;
434
435 pud = pud_offset(pgd, address);
436 if (!pud_present(*pud))
437 return NULL;
438
439 pmd = pmd_offset(pud, address);
440 if (!pmd_present(*pmd))
441 return NULL;
442 if (pmd_trans_huge(*pmd))
443 return NULL;
444
445 pte = pte_offset_map(pmd, address);
446
447 if (!sync && !pte_present(*pte)) {
448 pte_unmap(pte);
449 return NULL;
450 }
451
452 ptl = pte_lockptr(mm, pmd);
453check:
454 spin_lock(ptl);
455 if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
456 *ptlp = ptl;
457 return pte;
458 }
459 pte_unmap_unlock(pte, ptl);
460 return NULL;
461}
462
463
464
465
466
467
468
469
470
471
472int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
473{
474 unsigned long address;
475 pte_t *pte;
476 spinlock_t *ptl;
477
478 address = vma_address(page, vma);
479 if (address == -EFAULT)
480 return 0;
481 pte = page_check_address(page, vma->vm_mm, address, &ptl, 1);
482 if (!pte)
483 return 0;
484 pte_unmap_unlock(pte, ptl);
485
486 return 1;
487}
488
489
490
491
492
493int page_referenced_one(struct page *page, struct vm_area_struct *vma,
494 unsigned long address, unsigned int *mapcount,
495 unsigned long *vm_flags)
496{
497 struct mm_struct *mm = vma->vm_mm;
498 int referenced = 0;
499
500 if (unlikely(PageTransHuge(page))) {
501 pmd_t *pmd;
502
503 spin_lock(&mm->page_table_lock);
504
505
506
507
508 pmd = page_check_address_pmd(page, mm, address,
509 PAGE_CHECK_ADDRESS_PMD_FLAG);
510 if (!pmd) {
511 spin_unlock(&mm->page_table_lock);
512 goto out;
513 }
514
515 if (vma->vm_flags & VM_LOCKED) {
516 spin_unlock(&mm->page_table_lock);
517 *mapcount = 0;
518 *vm_flags |= VM_LOCKED;
519 goto out;
520 }
521
522
523 if (pmdp_clear_flush_young_notify(vma, address, pmd))
524 referenced++;
525 spin_unlock(&mm->page_table_lock);
526 } else {
527 pte_t *pte;
528 spinlock_t *ptl;
529
530
531
532
533
534 pte = page_check_address(page, mm, address, &ptl, 0);
535 if (!pte)
536 goto out;
537
538 if (vma->vm_flags & VM_LOCKED) {
539 pte_unmap_unlock(pte, ptl);
540 *mapcount = 0;
541 *vm_flags |= VM_LOCKED;
542 goto out;
543 }
544
545 if (ptep_clear_flush_young_notify(vma, address, pte)) {
546
547
548
549
550
551
552
553 if (likely(!VM_SequentialReadHint(vma)))
554 referenced++;
555 }
556 pte_unmap_unlock(pte, ptl);
557 }
558
559
560
561 if (mm != current->mm && has_swap_token(mm) &&
562 rwsem_is_locked(&mm->mmap_sem))
563 referenced++;
564
565 (*mapcount)--;
566
567 if (referenced)
568 *vm_flags |= vma->vm_flags;
569out:
570 return referenced;
571}
572
573static int page_referenced_anon(struct page *page,
574 struct mem_cgroup *mem_cont,
575 unsigned long *vm_flags)
576{
577 unsigned int mapcount;
578 struct anon_vma *anon_vma;
579 struct anon_vma_chain *avc;
580 int referenced = 0;
581
582 anon_vma = page_lock_anon_vma(page);
583 if (!anon_vma)
584 return referenced;
585
586 mapcount = page_mapcount(page);
587 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
588 struct vm_area_struct *vma = avc->vma;
589 unsigned long address = vma_address(page, vma);
590 if (address == -EFAULT)
591 continue;
592
593
594
595
596
597 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
598 continue;
599 referenced += page_referenced_one(page, vma, address,
600 &mapcount, vm_flags);
601 if (!mapcount)
602 break;
603 }
604
605 page_unlock_anon_vma(anon_vma);
606 return referenced;
607}
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622static int page_referenced_file(struct page *page,
623 struct mem_cgroup *mem_cont,
624 unsigned long *vm_flags)
625{
626 unsigned int mapcount;
627 struct address_space *mapping = page->mapping;
628 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
629 struct vm_area_struct *vma;
630 struct prio_tree_iter iter;
631 int referenced = 0;
632
633
634
635
636
637
638 BUG_ON(PageAnon(page));
639
640
641
642
643
644
645
646 BUG_ON(!PageLocked(page));
647
648 spin_lock(&mapping->i_mmap_lock);
649
650
651
652
653
654 mapcount = page_mapcount(page);
655
656 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
657 unsigned long address = vma_address(page, vma);
658 if (address == -EFAULT)
659 continue;
660
661
662
663
664
665 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
666 continue;
667 referenced += page_referenced_one(page, vma, address,
668 &mapcount, vm_flags);
669 if (!mapcount)
670 break;
671 }
672
673 spin_unlock(&mapping->i_mmap_lock);
674 return referenced;
675}
676
677
678
679
680
681
682
683
684
685
686
687int page_referenced(struct page *page,
688 int is_locked,
689 struct mem_cgroup *mem_cont,
690 unsigned long *vm_flags)
691{
692 int referenced = 0;
693 int we_locked = 0;
694
695 *vm_flags = 0;
696 if (page_mapped(page) && page_rmapping(page)) {
697 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
698 we_locked = trylock_page(page);
699 if (!we_locked) {
700 referenced++;
701 goto out;
702 }
703 }
704 if (unlikely(PageKsm(page)))
705 referenced += page_referenced_ksm(page, mem_cont,
706 vm_flags);
707 else if (PageAnon(page))
708 referenced += page_referenced_anon(page, mem_cont,
709 vm_flags);
710 else if (page->mapping)
711 referenced += page_referenced_file(page, mem_cont,
712 vm_flags);
713 if (we_locked)
714 unlock_page(page);
715 }
716out:
717 if (page_test_and_clear_young(page))
718 referenced++;
719
720 return referenced;
721}
722
723static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
724 unsigned long address)
725{
726 struct mm_struct *mm = vma->vm_mm;
727 pte_t *pte;
728 spinlock_t *ptl;
729 int ret = 0;
730
731 pte = page_check_address(page, mm, address, &ptl, 1);
732 if (!pte)
733 goto out;
734
735 if (pte_dirty(*pte) || pte_write(*pte)) {
736 pte_t entry;
737
738 flush_cache_page(vma, address, pte_pfn(*pte));
739 entry = ptep_clear_flush_notify(vma, address, pte);
740 entry = pte_wrprotect(entry);
741 entry = pte_mkclean(entry);
742 set_pte_at(mm, address, pte, entry);
743 ret = 1;
744 }
745
746 pte_unmap_unlock(pte, ptl);
747out:
748 return ret;
749}
750
751static int page_mkclean_file(struct address_space *mapping, struct page *page)
752{
753 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
754 struct vm_area_struct *vma;
755 struct prio_tree_iter iter;
756 int ret = 0;
757
758 BUG_ON(PageAnon(page));
759
760 spin_lock(&mapping->i_mmap_lock);
761 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
762 if (vma->vm_flags & VM_SHARED) {
763 unsigned long address = vma_address(page, vma);
764 if (address == -EFAULT)
765 continue;
766 ret += page_mkclean_one(page, vma, address);
767 }
768 }
769 spin_unlock(&mapping->i_mmap_lock);
770 return ret;
771}
772
773int page_mkclean(struct page *page)
774{
775 int ret = 0;
776
777 BUG_ON(!PageLocked(page));
778
779 if (page_mapped(page)) {
780 struct address_space *mapping = page_mapping(page);
781 if (mapping) {
782 ret = page_mkclean_file(mapping, page);
783 if (page_test_dirty(page)) {
784 page_clear_dirty(page, 1);
785 ret = 1;
786 }
787 }
788 }
789
790 return ret;
791}
792EXPORT_SYMBOL_GPL(page_mkclean);
793
794
795
796
797
798
799
800
801
802
803
804
805void page_move_anon_rmap(struct page *page,
806 struct vm_area_struct *vma, unsigned long address)
807{
808 struct anon_vma *anon_vma = vma->anon_vma;
809
810 VM_BUG_ON(!PageLocked(page));
811 VM_BUG_ON(!anon_vma);
812 VM_BUG_ON(page->index != linear_page_index(vma, address));
813
814 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
815 page->mapping = (struct address_space *) anon_vma;
816}
817
818
819
820
821
822
823
824
825static void __page_set_anon_rmap(struct page *page,
826 struct vm_area_struct *vma, unsigned long address, int exclusive)
827{
828 struct anon_vma *anon_vma = vma->anon_vma;
829
830 BUG_ON(!anon_vma);
831
832 if (PageAnon(page))
833 return;
834
835
836
837
838
839
840 if (!exclusive)
841 anon_vma = anon_vma->root;
842
843 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
844 page->mapping = (struct address_space *) anon_vma;
845 page->index = linear_page_index(vma, address);
846}
847
848
849
850
851
852
853
854static void __page_check_anon_rmap(struct page *page,
855 struct vm_area_struct *vma, unsigned long address)
856{
857#ifdef CONFIG_DEBUG_VM
858
859
860
861
862
863
864
865
866
867
868
869
870 BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root);
871 BUG_ON(page->index != linear_page_index(vma, address));
872#endif
873}
874
875
876
877
878
879
880
881
882
883
884
885
886void page_add_anon_rmap(struct page *page,
887 struct vm_area_struct *vma, unsigned long address)
888{
889 do_page_add_anon_rmap(page, vma, address, 0);
890}
891
892
893
894
895
896
897void do_page_add_anon_rmap(struct page *page,
898 struct vm_area_struct *vma, unsigned long address, int exclusive)
899{
900 int first = atomic_inc_and_test(&page->_mapcount);
901 if (first) {
902 if (!PageTransHuge(page))
903 __inc_zone_page_state(page, NR_ANON_PAGES);
904 else
905 __inc_zone_page_state(page,
906 NR_ANON_TRANSPARENT_HUGEPAGES);
907 }
908 if (unlikely(PageKsm(page)))
909 return;
910
911 VM_BUG_ON(!PageLocked(page));
912 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
913 if (first)
914 __page_set_anon_rmap(page, vma, address, exclusive);
915 else
916 __page_check_anon_rmap(page, vma, address);
917}
918
919
920
921
922
923
924
925
926
927
928
929void page_add_new_anon_rmap(struct page *page,
930 struct vm_area_struct *vma, unsigned long address)
931{
932 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
933 SetPageSwapBacked(page);
934 atomic_set(&page->_mapcount, 0);
935 if (!PageTransHuge(page))
936 __inc_zone_page_state(page, NR_ANON_PAGES);
937 else
938 __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
939 __page_set_anon_rmap(page, vma, address, 1);
940 if (page_evictable(page, vma))
941 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
942 else
943 add_page_to_unevictable_list(page);
944}
945
946
947
948
949
950
951
952void page_add_file_rmap(struct page *page)
953{
954 if (atomic_inc_and_test(&page->_mapcount)) {
955 __inc_zone_page_state(page, NR_FILE_MAPPED);
956 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
957 }
958}
959
960
961
962
963
964
965
966void page_remove_rmap(struct page *page)
967{
968
969 if (!atomic_add_negative(-1, &page->_mapcount))
970 return;
971
972
973
974
975
976
977
978
979 if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) {
980 page_clear_dirty(page, 1);
981 set_page_dirty(page);
982 }
983
984
985
986
987 if (unlikely(PageHuge(page)))
988 return;
989 if (PageAnon(page)) {
990 mem_cgroup_uncharge_page(page);
991 if (!PageTransHuge(page))
992 __dec_zone_page_state(page, NR_ANON_PAGES);
993 else
994 __dec_zone_page_state(page,
995 NR_ANON_TRANSPARENT_HUGEPAGES);
996 } else {
997 __dec_zone_page_state(page, NR_FILE_MAPPED);
998 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
999 }
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009}
1010
1011
1012
1013
1014
1015int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1016 unsigned long address, enum ttu_flags flags)
1017{
1018 struct mm_struct *mm = vma->vm_mm;
1019 pte_t *pte;
1020 pte_t pteval;
1021 spinlock_t *ptl;
1022 int ret = SWAP_AGAIN;
1023
1024 pte = page_check_address(page, mm, address, &ptl, 0);
1025 if (!pte)
1026 goto out;
1027
1028
1029
1030
1031
1032
1033 if (!(flags & TTU_IGNORE_MLOCK)) {
1034 if (vma->vm_flags & VM_LOCKED)
1035 goto out_mlock;
1036
1037 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1038 goto out_unmap;
1039 }
1040 if (!(flags & TTU_IGNORE_ACCESS)) {
1041 if (ptep_clear_flush_young_notify(vma, address, pte)) {
1042 ret = SWAP_FAIL;
1043 goto out_unmap;
1044 }
1045 }
1046
1047
1048 flush_cache_page(vma, address, page_to_pfn(page));
1049 pteval = ptep_clear_flush_notify(vma, address, pte);
1050
1051
1052 if (pte_dirty(pteval))
1053 set_page_dirty(page);
1054
1055
1056 update_hiwater_rss(mm);
1057
1058 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1059 if (PageAnon(page))
1060 dec_mm_counter(mm, MM_ANONPAGES);
1061 else
1062 dec_mm_counter(mm, MM_FILEPAGES);
1063 set_pte_at(mm, address, pte,
1064 swp_entry_to_pte(make_hwpoison_entry(page)));
1065 } else if (PageAnon(page)) {
1066 swp_entry_t entry = { .val = page_private(page) };
1067
1068 if (PageSwapCache(page)) {
1069
1070
1071
1072
1073 if (swap_duplicate(entry) < 0) {
1074 set_pte_at(mm, address, pte, pteval);
1075 ret = SWAP_FAIL;
1076 goto out_unmap;
1077 }
1078 if (list_empty(&mm->mmlist)) {
1079 spin_lock(&mmlist_lock);
1080 if (list_empty(&mm->mmlist))
1081 list_add(&mm->mmlist, &init_mm.mmlist);
1082 spin_unlock(&mmlist_lock);
1083 }
1084 dec_mm_counter(mm, MM_ANONPAGES);
1085 inc_mm_counter(mm, MM_SWAPENTS);
1086 } else if (PAGE_MIGRATION) {
1087
1088
1089
1090
1091
1092 BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
1093 entry = make_migration_entry(page, pte_write(pteval));
1094 }
1095 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1096 BUG_ON(pte_file(*pte));
1097 } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
1098
1099 swp_entry_t entry;
1100 entry = make_migration_entry(page, pte_write(pteval));
1101 set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
1102 } else
1103 dec_mm_counter(mm, MM_FILEPAGES);
1104
1105 page_remove_rmap(page);
1106 page_cache_release(page);
1107
1108out_unmap:
1109 pte_unmap_unlock(pte, ptl);
1110out:
1111 return ret;
1112
1113out_mlock:
1114 pte_unmap_unlock(pte, ptl);
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1126 if (vma->vm_flags & VM_LOCKED) {
1127 mlock_vma_page(page);
1128 ret = SWAP_MLOCK;
1129 }
1130 up_read(&vma->vm_mm->mmap_sem);
1131 }
1132 return ret;
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1160#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1161
1162static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1163 struct vm_area_struct *vma, struct page *check_page)
1164{
1165 struct mm_struct *mm = vma->vm_mm;
1166 pgd_t *pgd;
1167 pud_t *pud;
1168 pmd_t *pmd;
1169 pte_t *pte;
1170 pte_t pteval;
1171 spinlock_t *ptl;
1172 struct page *page;
1173 unsigned long address;
1174 unsigned long end;
1175 int ret = SWAP_AGAIN;
1176 int locked_vma = 0;
1177
1178 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1179 end = address + CLUSTER_SIZE;
1180 if (address < vma->vm_start)
1181 address = vma->vm_start;
1182 if (end > vma->vm_end)
1183 end = vma->vm_end;
1184
1185 pgd = pgd_offset(mm, address);
1186 if (!pgd_present(*pgd))
1187 return ret;
1188
1189 pud = pud_offset(pgd, address);
1190 if (!pud_present(*pud))
1191 return ret;
1192
1193 pmd = pmd_offset(pud, address);
1194 if (!pmd_present(*pmd))
1195 return ret;
1196
1197
1198
1199
1200
1201 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1202 locked_vma = (vma->vm_flags & VM_LOCKED);
1203 if (!locked_vma)
1204 up_read(&vma->vm_mm->mmap_sem);
1205 }
1206
1207 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1208
1209
1210 update_hiwater_rss(mm);
1211
1212 for (; address < end; pte++, address += PAGE_SIZE) {
1213 if (!pte_present(*pte))
1214 continue;
1215 page = vm_normal_page(vma, address, *pte);
1216 BUG_ON(!page || PageAnon(page));
1217
1218 if (locked_vma) {
1219 mlock_vma_page(page);
1220 if (page == check_page)
1221 ret = SWAP_MLOCK;
1222 continue;
1223 }
1224
1225 if (ptep_clear_flush_young_notify(vma, address, pte))
1226 continue;
1227
1228
1229 flush_cache_page(vma, address, pte_pfn(*pte));
1230 pteval = ptep_clear_flush_notify(vma, address, pte);
1231
1232
1233 if (page->index != linear_page_index(vma, address))
1234 set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
1235
1236
1237 if (pte_dirty(pteval))
1238 set_page_dirty(page);
1239
1240 page_remove_rmap(page);
1241 page_cache_release(page);
1242 dec_mm_counter(mm, MM_FILEPAGES);
1243 (*mapcount)--;
1244 }
1245 pte_unmap_unlock(pte - 1, ptl);
1246 if (locked_vma)
1247 up_read(&vma->vm_mm->mmap_sem);
1248 return ret;
1249}
1250
1251bool is_vma_temporary_stack(struct vm_area_struct *vma)
1252{
1253 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1254
1255 if (!maybe_stack)
1256 return false;
1257
1258 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1259 VM_STACK_INCOMPLETE_SETUP)
1260 return true;
1261
1262 return false;
1263}
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1282{
1283 struct anon_vma *anon_vma;
1284 struct anon_vma_chain *avc;
1285 int ret = SWAP_AGAIN;
1286
1287 anon_vma = page_lock_anon_vma(page);
1288 if (!anon_vma)
1289 return ret;
1290
1291 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1292 struct vm_area_struct *vma = avc->vma;
1293 unsigned long address;
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303 if (PAGE_MIGRATION && (flags & TTU_MIGRATION) &&
1304 is_vma_temporary_stack(vma))
1305 continue;
1306
1307 address = vma_address(page, vma);
1308 if (address == -EFAULT)
1309 continue;
1310 ret = try_to_unmap_one(page, vma, address, flags);
1311 if (ret != SWAP_AGAIN || !page_mapped(page))
1312 break;
1313 }
1314
1315 page_unlock_anon_vma(anon_vma);
1316 return ret;
1317}
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1335{
1336 struct address_space *mapping = page->mapping;
1337 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1338 struct vm_area_struct *vma;
1339 struct prio_tree_iter iter;
1340 int ret = SWAP_AGAIN;
1341 unsigned long cursor;
1342 unsigned long max_nl_cursor = 0;
1343 unsigned long max_nl_size = 0;
1344 unsigned int mapcount;
1345
1346 spin_lock(&mapping->i_mmap_lock);
1347 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1348 unsigned long address = vma_address(page, vma);
1349 if (address == -EFAULT)
1350 continue;
1351 ret = try_to_unmap_one(page, vma, address, flags);
1352 if (ret != SWAP_AGAIN || !page_mapped(page))
1353 goto out;
1354 }
1355
1356 if (list_empty(&mapping->i_mmap_nonlinear))
1357 goto out;
1358
1359
1360
1361
1362
1363
1364 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1365 goto out;
1366
1367 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1368 shared.vm_set.list) {
1369 cursor = (unsigned long) vma->vm_private_data;
1370 if (cursor > max_nl_cursor)
1371 max_nl_cursor = cursor;
1372 cursor = vma->vm_end - vma->vm_start;
1373 if (cursor > max_nl_size)
1374 max_nl_size = cursor;
1375 }
1376
1377 if (max_nl_size == 0) {
1378 ret = SWAP_FAIL;
1379 goto out;
1380 }
1381
1382
1383
1384
1385
1386
1387
1388
1389 mapcount = page_mapcount(page);
1390 if (!mapcount)
1391 goto out;
1392 cond_resched_lock(&mapping->i_mmap_lock);
1393
1394 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1395 if (max_nl_cursor == 0)
1396 max_nl_cursor = CLUSTER_SIZE;
1397
1398 do {
1399 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1400 shared.vm_set.list) {
1401 cursor = (unsigned long) vma->vm_private_data;
1402 while ( cursor < max_nl_cursor &&
1403 cursor < vma->vm_end - vma->vm_start) {
1404 if (try_to_unmap_cluster(cursor, &mapcount,
1405 vma, page) == SWAP_MLOCK)
1406 ret = SWAP_MLOCK;
1407 cursor += CLUSTER_SIZE;
1408 vma->vm_private_data = (void *) cursor;
1409 if ((int)mapcount <= 0)
1410 goto out;
1411 }
1412 vma->vm_private_data = (void *) max_nl_cursor;
1413 }
1414 cond_resched_lock(&mapping->i_mmap_lock);
1415 max_nl_cursor += CLUSTER_SIZE;
1416 } while (max_nl_cursor <= max_nl_size);
1417
1418
1419
1420
1421
1422
1423 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
1424 vma->vm_private_data = NULL;
1425out:
1426 spin_unlock(&mapping->i_mmap_lock);
1427 return ret;
1428}
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444int try_to_unmap(struct page *page, enum ttu_flags flags)
1445{
1446 int ret;
1447
1448 BUG_ON(!PageLocked(page));
1449 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
1450
1451 if (unlikely(PageKsm(page)))
1452 ret = try_to_unmap_ksm(page, flags);
1453 else if (PageAnon(page))
1454 ret = try_to_unmap_anon(page, flags);
1455 else
1456 ret = try_to_unmap_file(page, flags);
1457 if (ret != SWAP_MLOCK && !page_mapped(page))
1458 ret = SWAP_SUCCESS;
1459 return ret;
1460}
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477int try_to_munlock(struct page *page)
1478{
1479 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1480
1481 if (unlikely(PageKsm(page)))
1482 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1483 else if (PageAnon(page))
1484 return try_to_unmap_anon(page, TTU_MUNLOCK);
1485 else
1486 return try_to_unmap_file(page, TTU_MUNLOCK);
1487}
1488
1489#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
1490
1491
1492
1493
1494
1495
1496void drop_anon_vma(struct anon_vma *anon_vma)
1497{
1498 BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
1499 if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
1500 struct anon_vma *root = anon_vma->root;
1501 int empty = list_empty(&anon_vma->head);
1502 int last_root_user = 0;
1503 int root_empty = 0;
1504
1505
1506
1507
1508
1509 if (empty && anon_vma != root) {
1510 BUG_ON(atomic_read(&root->external_refcount) <= 0);
1511 last_root_user = atomic_dec_and_test(&root->external_refcount);
1512 root_empty = list_empty(&root->head);
1513 }
1514 anon_vma_unlock(anon_vma);
1515
1516 if (empty) {
1517 anon_vma_free(anon_vma);
1518 if (root_empty && last_root_user)
1519 anon_vma_free(root);
1520 }
1521 }
1522}
1523#endif
1524
1525#ifdef CONFIG_MIGRATION
1526
1527
1528
1529
1530static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1531 struct vm_area_struct *, unsigned long, void *), void *arg)
1532{
1533 struct anon_vma *anon_vma;
1534 struct anon_vma_chain *avc;
1535 int ret = SWAP_AGAIN;
1536
1537
1538
1539
1540
1541
1542
1543 anon_vma = page_anon_vma(page);
1544 if (!anon_vma)
1545 return ret;
1546 anon_vma_lock(anon_vma);
1547 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1548 struct vm_area_struct *vma = avc->vma;
1549 unsigned long address = vma_address(page, vma);
1550 if (address == -EFAULT)
1551 continue;
1552 ret = rmap_one(page, vma, address, arg);
1553 if (ret != SWAP_AGAIN)
1554 break;
1555 }
1556 anon_vma_unlock(anon_vma);
1557 return ret;
1558}
1559
1560static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1561 struct vm_area_struct *, unsigned long, void *), void *arg)
1562{
1563 struct address_space *mapping = page->mapping;
1564 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1565 struct vm_area_struct *vma;
1566 struct prio_tree_iter iter;
1567 int ret = SWAP_AGAIN;
1568
1569 if (!mapping)
1570 return ret;
1571 spin_lock(&mapping->i_mmap_lock);
1572 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1573 unsigned long address = vma_address(page, vma);
1574 if (address == -EFAULT)
1575 continue;
1576 ret = rmap_one(page, vma, address, arg);
1577 if (ret != SWAP_AGAIN)
1578 break;
1579 }
1580
1581
1582
1583
1584
1585 spin_unlock(&mapping->i_mmap_lock);
1586 return ret;
1587}
1588
1589int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1590 struct vm_area_struct *, unsigned long, void *), void *arg)
1591{
1592 VM_BUG_ON(!PageLocked(page));
1593
1594 if (unlikely(PageKsm(page)))
1595 return rmap_walk_ksm(page, rmap_one, arg);
1596 else if (PageAnon(page))
1597 return rmap_walk_anon(page, rmap_one, arg);
1598 else
1599 return rmap_walk_file(page, rmap_one, arg);
1600}
1601#endif
1602
1603#ifdef CONFIG_HUGETLB_PAGE
1604
1605
1606
1607
1608
1609static void __hugepage_set_anon_rmap(struct page *page,
1610 struct vm_area_struct *vma, unsigned long address, int exclusive)
1611{
1612 struct anon_vma *anon_vma = vma->anon_vma;
1613
1614 BUG_ON(!anon_vma);
1615
1616 if (PageAnon(page))
1617 return;
1618 if (!exclusive)
1619 anon_vma = anon_vma->root;
1620
1621 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1622 page->mapping = (struct address_space *) anon_vma;
1623 page->index = linear_page_index(vma, address);
1624}
1625
1626void hugepage_add_anon_rmap(struct page *page,
1627 struct vm_area_struct *vma, unsigned long address)
1628{
1629 struct anon_vma *anon_vma = vma->anon_vma;
1630 int first;
1631
1632 BUG_ON(!PageLocked(page));
1633 BUG_ON(!anon_vma);
1634 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1635 first = atomic_inc_and_test(&page->_mapcount);
1636 if (first)
1637 __hugepage_set_anon_rmap(page, vma, address, 0);
1638}
1639
1640void hugepage_add_new_anon_rmap(struct page *page,
1641 struct vm_area_struct *vma, unsigned long address)
1642{
1643 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
1644 atomic_set(&page->_mapcount, 0);
1645 __hugepage_set_anon_rmap(page, vma, address, 1);
1646}
1647#endif
1648