1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/mm.h>
56#include <linux/sched/mm.h>
57#include <linux/sched/task.h>
58#include <linux/pagemap.h>
59#include <linux/swap.h>
60#include <linux/swapops.h>
61#include <linux/slab.h>
62#include <linux/init.h>
63#include <linux/ksm.h>
64#include <linux/rmap.h>
65#include <linux/rcupdate.h>
66#include <linux/export.h>
67#include <linux/memcontrol.h>
68#include <linux/mmu_notifier.h>
69#include <linux/migrate.h>
70#include <linux/hugetlb.h>
71#include <linux/huge_mm.h>
72#include <linux/backing-dev.h>
73#include <linux/page_idle.h>
74#include <linux/memremap.h>
75#include <linux/userfaultfd_k.h>
76
77#include <asm/tlbflush.h>
78
79#include <trace/events/tlb.h>
80
81#include "internal.h"
82
83static struct kmem_cache *anon_vma_cachep;
84static struct kmem_cache *anon_vma_chain_cachep;
85
86static inline struct anon_vma *anon_vma_alloc(void)
87{
88 struct anon_vma *anon_vma;
89
90 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
91 if (anon_vma) {
92 atomic_set(&anon_vma->refcount, 1);
93 anon_vma->degree = 1;
94 anon_vma->parent = anon_vma;
95
96
97
98
99 anon_vma->root = anon_vma;
100 }
101
102 return anon_vma;
103}
104
105static inline void anon_vma_free(struct anon_vma *anon_vma)
106{
107 VM_BUG_ON(atomic_read(&anon_vma->refcount));
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126 might_sleep();
127 if (rwsem_is_locked(&anon_vma->root->rwsem)) {
128 anon_vma_lock_write(anon_vma);
129 anon_vma_unlock_write(anon_vma);
130 }
131
132 kmem_cache_free(anon_vma_cachep, anon_vma);
133}
134
135static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
136{
137 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
138}
139
140static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
141{
142 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
143}
144
145static void anon_vma_chain_link(struct vm_area_struct *vma,
146 struct anon_vma_chain *avc,
147 struct anon_vma *anon_vma)
148{
149 avc->vma = vma;
150 avc->anon_vma = anon_vma;
151 list_add(&avc->same_vma, &vma->anon_vma_chain);
152 anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
153}
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183int __anon_vma_prepare(struct vm_area_struct *vma)
184{
185 struct mm_struct *mm = vma->vm_mm;
186 struct anon_vma *anon_vma, *allocated;
187 struct anon_vma_chain *avc;
188
189 might_sleep();
190
191 avc = anon_vma_chain_alloc(GFP_KERNEL);
192 if (!avc)
193 goto out_enomem;
194
195 anon_vma = find_mergeable_anon_vma(vma);
196 allocated = NULL;
197 if (!anon_vma) {
198 anon_vma = anon_vma_alloc();
199 if (unlikely(!anon_vma))
200 goto out_enomem_free_avc;
201 allocated = anon_vma;
202 }
203
204 anon_vma_lock_write(anon_vma);
205
206 spin_lock(&mm->page_table_lock);
207 if (likely(!vma->anon_vma)) {
208 vma->anon_vma = anon_vma;
209 anon_vma_chain_link(vma, avc, anon_vma);
210
211 anon_vma->degree++;
212 allocated = NULL;
213 avc = NULL;
214 }
215 spin_unlock(&mm->page_table_lock);
216 anon_vma_unlock_write(anon_vma);
217
218 if (unlikely(allocated))
219 put_anon_vma(allocated);
220 if (unlikely(avc))
221 anon_vma_chain_free(avc);
222
223 return 0;
224
225 out_enomem_free_avc:
226 anon_vma_chain_free(avc);
227 out_enomem:
228 return -ENOMEM;
229}
230
231
232
233
234
235
236
237
238
239static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
240{
241 struct anon_vma *new_root = anon_vma->root;
242 if (new_root != root) {
243 if (WARN_ON_ONCE(root))
244 up_write(&root->rwsem);
245 root = new_root;
246 down_write(&root->rwsem);
247 }
248 return root;
249}
250
251static inline void unlock_anon_vma_root(struct anon_vma *root)
252{
253 if (root)
254 up_write(&root->rwsem);
255}
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
276{
277 struct anon_vma_chain *avc, *pavc;
278 struct anon_vma *root = NULL;
279
280 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
281 struct anon_vma *anon_vma;
282
283 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
284 if (unlikely(!avc)) {
285 unlock_anon_vma_root(root);
286 root = NULL;
287 avc = anon_vma_chain_alloc(GFP_KERNEL);
288 if (!avc)
289 goto enomem_failure;
290 }
291 anon_vma = pavc->anon_vma;
292 root = lock_anon_vma_root(root, anon_vma);
293 anon_vma_chain_link(dst, avc, anon_vma);
294
295
296
297
298
299
300
301
302
303 if (!dst->anon_vma && src->anon_vma &&
304 anon_vma != src->anon_vma && anon_vma->degree < 2)
305 dst->anon_vma = anon_vma;
306 }
307 if (dst->anon_vma)
308 dst->anon_vma->degree++;
309 unlock_anon_vma_root(root);
310 return 0;
311
312 enomem_failure:
313
314
315
316
317
318
319 dst->anon_vma = NULL;
320 unlink_anon_vmas(dst);
321 return -ENOMEM;
322}
323
324
325
326
327
328
329int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
330{
331 struct anon_vma_chain *avc;
332 struct anon_vma *anon_vma;
333 int error;
334
335
336 if (!pvma->anon_vma)
337 return 0;
338
339
340 vma->anon_vma = NULL;
341
342
343
344
345
346 error = anon_vma_clone(vma, pvma);
347 if (error)
348 return error;
349
350
351 if (vma->anon_vma)
352 return 0;
353
354
355 anon_vma = anon_vma_alloc();
356 if (!anon_vma)
357 goto out_error;
358 avc = anon_vma_chain_alloc(GFP_KERNEL);
359 if (!avc)
360 goto out_error_free_anon_vma;
361
362
363
364
365
366 anon_vma->root = pvma->anon_vma->root;
367 anon_vma->parent = pvma->anon_vma;
368
369
370
371
372
373 get_anon_vma(anon_vma->root);
374
375 vma->anon_vma = anon_vma;
376 anon_vma_lock_write(anon_vma);
377 anon_vma_chain_link(vma, avc, anon_vma);
378 anon_vma->parent->degree++;
379 anon_vma_unlock_write(anon_vma);
380
381 return 0;
382
383 out_error_free_anon_vma:
384 put_anon_vma(anon_vma);
385 out_error:
386 unlink_anon_vmas(vma);
387 return -ENOMEM;
388}
389
390void unlink_anon_vmas(struct vm_area_struct *vma)
391{
392 struct anon_vma_chain *avc, *next;
393 struct anon_vma *root = NULL;
394
395
396
397
398
399 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
400 struct anon_vma *anon_vma = avc->anon_vma;
401
402 root = lock_anon_vma_root(root, anon_vma);
403 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
404
405
406
407
408
409 if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
410 anon_vma->parent->degree--;
411 continue;
412 }
413
414 list_del(&avc->same_vma);
415 anon_vma_chain_free(avc);
416 }
417 if (vma->anon_vma) {
418 vma->anon_vma->degree--;
419
420
421
422
423
424 vma->anon_vma = NULL;
425 }
426 unlock_anon_vma_root(root);
427
428
429
430
431
432
433 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
434 struct anon_vma *anon_vma = avc->anon_vma;
435
436 VM_WARN_ON(anon_vma->degree);
437 put_anon_vma(anon_vma);
438
439 list_del(&avc->same_vma);
440 anon_vma_chain_free(avc);
441 }
442}
443
444static void anon_vma_ctor(void *data)
445{
446 struct anon_vma *anon_vma = data;
447
448 init_rwsem(&anon_vma->rwsem);
449 atomic_set(&anon_vma->refcount, 0);
450 anon_vma->rb_root = RB_ROOT_CACHED;
451}
452
453void __init anon_vma_init(void)
454{
455 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
456 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
457 anon_vma_ctor);
458 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
459 SLAB_PANIC|SLAB_ACCOUNT);
460}
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486struct anon_vma *page_get_anon_vma(struct page *page)
487{
488 struct anon_vma *anon_vma = NULL;
489 unsigned long anon_mapping;
490
491 rcu_read_lock();
492 anon_mapping = (unsigned long)READ_ONCE(page->mapping);
493 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
494 goto out;
495 if (!page_mapped(page))
496 goto out;
497
498 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
499 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
500 anon_vma = NULL;
501 goto out;
502 }
503
504
505
506
507
508
509
510
511 if (!page_mapped(page)) {
512 rcu_read_unlock();
513 put_anon_vma(anon_vma);
514 return NULL;
515 }
516out:
517 rcu_read_unlock();
518
519 return anon_vma;
520}
521
522
523
524
525
526
527
528
529struct anon_vma *page_lock_anon_vma_read(struct page *page)
530{
531 struct anon_vma *anon_vma = NULL;
532 struct anon_vma *root_anon_vma;
533 unsigned long anon_mapping;
534
535 rcu_read_lock();
536 anon_mapping = (unsigned long)READ_ONCE(page->mapping);
537 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
538 goto out;
539 if (!page_mapped(page))
540 goto out;
541
542 anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
543 root_anon_vma = READ_ONCE(anon_vma->root);
544 if (down_read_trylock(&root_anon_vma->rwsem)) {
545
546
547
548
549
550 if (!page_mapped(page)) {
551 up_read(&root_anon_vma->rwsem);
552 anon_vma = NULL;
553 }
554 goto out;
555 }
556
557
558 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
559 anon_vma = NULL;
560 goto out;
561 }
562
563 if (!page_mapped(page)) {
564 rcu_read_unlock();
565 put_anon_vma(anon_vma);
566 return NULL;
567 }
568
569
570 rcu_read_unlock();
571 anon_vma_lock_read(anon_vma);
572
573 if (atomic_dec_and_test(&anon_vma->refcount)) {
574
575
576
577
578
579 anon_vma_unlock_read(anon_vma);
580 __put_anon_vma(anon_vma);
581 anon_vma = NULL;
582 }
583
584 return anon_vma;
585
586out:
587 rcu_read_unlock();
588 return anon_vma;
589}
590
591void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
592{
593 anon_vma_unlock_read(anon_vma);
594}
595
596#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
597
598
599
600
601
602
603void try_to_unmap_flush(void)
604{
605 struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
606
607 if (!tlb_ubc->flush_required)
608 return;
609
610 arch_tlbbatch_flush(&tlb_ubc->arch);
611 tlb_ubc->flush_required = false;
612 tlb_ubc->writable = false;
613}
614
615
616void try_to_unmap_flush_dirty(void)
617{
618 struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
619
620 if (tlb_ubc->writable)
621 try_to_unmap_flush();
622}
623
624static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
625{
626 struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
627
628 arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
629 tlb_ubc->flush_required = true;
630
631
632
633
634
635 barrier();
636 mm->tlb_flush_batched = true;
637
638
639
640
641
642
643 if (writable)
644 tlb_ubc->writable = true;
645}
646
647
648
649
650
651static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
652{
653 bool should_defer = false;
654
655 if (!(flags & TTU_BATCH_FLUSH))
656 return false;
657
658
659 if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
660 should_defer = true;
661 put_cpu();
662
663 return should_defer;
664}
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681void flush_tlb_batched_pending(struct mm_struct *mm)
682{
683 if (data_race(mm->tlb_flush_batched)) {
684 flush_tlb_mm(mm);
685
686
687
688
689
690 barrier();
691 mm->tlb_flush_batched = false;
692 }
693}
694#else
695static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
696{
697}
698
699static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
700{
701 return false;
702}
703#endif
704
705
706
707
708
709unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
710{
711 if (PageAnon(page)) {
712 struct anon_vma *page__anon_vma = page_anon_vma(page);
713
714
715
716
717 if (!vma->anon_vma || !page__anon_vma ||
718 vma->anon_vma->root != page__anon_vma->root)
719 return -EFAULT;
720 } else if (!vma->vm_file) {
721 return -EFAULT;
722 } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
723 return -EFAULT;
724 }
725
726 return vma_address(page, vma);
727}
728
729pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
730{
731 pgd_t *pgd;
732 p4d_t *p4d;
733 pud_t *pud;
734 pmd_t *pmd = NULL;
735 pmd_t pmde;
736
737 pgd = pgd_offset(mm, address);
738 if (!pgd_present(*pgd))
739 goto out;
740
741 p4d = p4d_offset(pgd, address);
742 if (!p4d_present(*p4d))
743 goto out;
744
745 pud = pud_offset(p4d, address);
746 if (!pud_present(*pud))
747 goto out;
748
749 pmd = pmd_offset(pud, address);
750
751
752
753
754
755 pmde = *pmd;
756 barrier();
757 if (!pmd_present(pmde) || pmd_trans_huge(pmde))
758 pmd = NULL;
759out:
760 return pmd;
761}
762
763struct page_referenced_arg {
764 int mapcount;
765 int referenced;
766 unsigned long vm_flags;
767 struct mem_cgroup *memcg;
768};
769
770
771
772static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
773 unsigned long address, void *arg)
774{
775 struct page_referenced_arg *pra = arg;
776 struct page_vma_mapped_walk pvmw = {
777 .page = page,
778 .vma = vma,
779 .address = address,
780 };
781 int referenced = 0;
782
783 while (page_vma_mapped_walk(&pvmw)) {
784 address = pvmw.address;
785
786 if (vma->vm_flags & VM_LOCKED) {
787 page_vma_mapped_walk_done(&pvmw);
788 pra->vm_flags |= VM_LOCKED;
789 return false;
790 }
791
792 if (pvmw.pte) {
793 if (ptep_clear_flush_young_notify(vma, address,
794 pvmw.pte)) {
795
796
797
798
799
800
801
802
803 if (likely(!(vma->vm_flags & VM_SEQ_READ)))
804 referenced++;
805 }
806 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
807 if (pmdp_clear_flush_young_notify(vma, address,
808 pvmw.pmd))
809 referenced++;
810 } else {
811
812 WARN_ON_ONCE(1);
813 }
814
815 pra->mapcount--;
816 }
817
818 if (referenced)
819 clear_page_idle(page);
820 if (test_and_clear_page_young(page))
821 referenced++;
822
823 if (referenced) {
824 pra->referenced++;
825 pra->vm_flags |= vma->vm_flags;
826 }
827
828 if (!pra->mapcount)
829 return false;
830
831 return true;
832}
833
834static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
835{
836 struct page_referenced_arg *pra = arg;
837 struct mem_cgroup *memcg = pra->memcg;
838
839 if (!mm_match_cgroup(vma->vm_mm, memcg))
840 return true;
841
842 return false;
843}
844
845
846
847
848
849
850
851
852
853
854
855int page_referenced(struct page *page,
856 int is_locked,
857 struct mem_cgroup *memcg,
858 unsigned long *vm_flags)
859{
860 int we_locked = 0;
861 struct page_referenced_arg pra = {
862 .mapcount = total_mapcount(page),
863 .memcg = memcg,
864 };
865 struct rmap_walk_control rwc = {
866 .rmap_one = page_referenced_one,
867 .arg = (void *)&pra,
868 .anon_lock = page_lock_anon_vma_read,
869 };
870
871 *vm_flags = 0;
872 if (!pra.mapcount)
873 return 0;
874
875 if (!page_rmapping(page))
876 return 0;
877
878 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
879 we_locked = trylock_page(page);
880 if (!we_locked)
881 return 1;
882 }
883
884
885
886
887
888
889 if (memcg) {
890 rwc.invalid_vma = invalid_page_referenced_vma;
891 }
892
893 rmap_walk(page, &rwc);
894 *vm_flags = pra.vm_flags;
895
896 if (we_locked)
897 unlock_page(page);
898
899 return pra.referenced;
900}
901
902static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
903 unsigned long address, void *arg)
904{
905 struct page_vma_mapped_walk pvmw = {
906 .page = page,
907 .vma = vma,
908 .address = address,
909 .flags = PVMW_SYNC,
910 };
911 struct mmu_notifier_range range;
912 int *cleaned = arg;
913
914
915
916
917
918 mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
919 0, vma, vma->vm_mm, address,
920 vma_address_end(page, vma));
921 mmu_notifier_invalidate_range_start(&range);
922
923 while (page_vma_mapped_walk(&pvmw)) {
924 int ret = 0;
925
926 address = pvmw.address;
927 if (pvmw.pte) {
928 pte_t entry;
929 pte_t *pte = pvmw.pte;
930
931 if (!pte_dirty(*pte) && !pte_write(*pte))
932 continue;
933
934 flush_cache_page(vma, address, pte_pfn(*pte));
935 entry = ptep_clear_flush(vma, address, pte);
936 entry = pte_wrprotect(entry);
937 entry = pte_mkclean(entry);
938 set_pte_at(vma->vm_mm, address, pte, entry);
939 ret = 1;
940 } else {
941#ifdef CONFIG_TRANSPARENT_HUGEPAGE
942 pmd_t *pmd = pvmw.pmd;
943 pmd_t entry;
944
945 if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
946 continue;
947
948 flush_cache_page(vma, address, page_to_pfn(page));
949 entry = pmdp_invalidate(vma, address, pmd);
950 entry = pmd_wrprotect(entry);
951 entry = pmd_mkclean(entry);
952 set_pmd_at(vma->vm_mm, address, pmd, entry);
953 ret = 1;
954#else
955
956 WARN_ON_ONCE(1);
957#endif
958 }
959
960
961
962
963
964
965
966
967 if (ret)
968 (*cleaned)++;
969 }
970
971 mmu_notifier_invalidate_range_end(&range);
972
973 return true;
974}
975
976static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
977{
978 if (vma->vm_flags & VM_SHARED)
979 return false;
980
981 return true;
982}
983
984int page_mkclean(struct page *page)
985{
986 int cleaned = 0;
987 struct address_space *mapping;
988 struct rmap_walk_control rwc = {
989 .arg = (void *)&cleaned,
990 .rmap_one = page_mkclean_one,
991 .invalid_vma = invalid_mkclean_vma,
992 };
993
994 BUG_ON(!PageLocked(page));
995
996 if (!page_mapped(page))
997 return 0;
998
999 mapping = page_mapping(page);
1000 if (!mapping)
1001 return 0;
1002
1003 rmap_walk(page, &rwc);
1004
1005 return cleaned;
1006}
1007EXPORT_SYMBOL_GPL(page_mkclean);
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
1020{
1021 struct anon_vma *anon_vma = vma->anon_vma;
1022
1023 page = compound_head(page);
1024
1025 VM_BUG_ON_PAGE(!PageLocked(page), page);
1026 VM_BUG_ON_VMA(!anon_vma, vma);
1027
1028 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1029
1030
1031
1032
1033
1034 WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
1035}
1036
1037
1038
1039
1040
1041
1042
1043
1044static void __page_set_anon_rmap(struct page *page,
1045 struct vm_area_struct *vma, unsigned long address, int exclusive)
1046{
1047 struct anon_vma *anon_vma = vma->anon_vma;
1048
1049 BUG_ON(!anon_vma);
1050
1051 if (PageAnon(page))
1052 return;
1053
1054
1055
1056
1057
1058
1059 if (!exclusive)
1060 anon_vma = anon_vma->root;
1061
1062
1063
1064
1065
1066
1067
1068 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
1069 WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);
1070 page->index = linear_page_index(vma, address);
1071}
1072
1073
1074
1075
1076
1077
1078
1079static void __page_check_anon_rmap(struct page *page,
1080 struct vm_area_struct *vma, unsigned long address)
1081{
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page);
1094 VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),
1095 page);
1096}
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110void page_add_anon_rmap(struct page *page,
1111 struct vm_area_struct *vma, unsigned long address, bool compound)
1112{
1113 do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0);
1114}
1115
1116
1117
1118
1119
1120
1121void do_page_add_anon_rmap(struct page *page,
1122 struct vm_area_struct *vma, unsigned long address, int flags)
1123{
1124 bool compound = flags & RMAP_COMPOUND;
1125 bool first;
1126
1127 if (unlikely(PageKsm(page)))
1128 lock_page_memcg(page);
1129 else
1130 VM_BUG_ON_PAGE(!PageLocked(page), page);
1131
1132 if (compound) {
1133 atomic_t *mapcount;
1134 VM_BUG_ON_PAGE(!PageLocked(page), page);
1135 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1136 mapcount = compound_mapcount_ptr(page);
1137 first = atomic_inc_and_test(mapcount);
1138 } else {
1139 first = atomic_inc_and_test(&page->_mapcount);
1140 }
1141
1142 if (first) {
1143 int nr = compound ? thp_nr_pages(page) : 1;
1144
1145
1146
1147
1148
1149
1150 if (compound)
1151 __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
1152 __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1153 }
1154
1155 if (unlikely(PageKsm(page))) {
1156 unlock_page_memcg(page);
1157 return;
1158 }
1159
1160
1161 if (first)
1162 __page_set_anon_rmap(page, vma, address,
1163 flags & RMAP_EXCLUSIVE);
1164 else
1165 __page_check_anon_rmap(page, vma, address);
1166}
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179void page_add_new_anon_rmap(struct page *page,
1180 struct vm_area_struct *vma, unsigned long address, bool compound)
1181{
1182 int nr = compound ? thp_nr_pages(page) : 1;
1183
1184 VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
1185 __SetPageSwapBacked(page);
1186 if (compound) {
1187 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
1188
1189 atomic_set(compound_mapcount_ptr(page), 0);
1190 if (hpage_pincount_available(page))
1191 atomic_set(compound_pincount_ptr(page), 0);
1192
1193 __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
1194 } else {
1195
1196 VM_BUG_ON_PAGE(PageTransCompound(page), page);
1197
1198 atomic_set(&page->_mapcount, 0);
1199 }
1200 __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
1201 __page_set_anon_rmap(page, vma, address, 1);
1202}
1203
1204
1205
1206
1207
1208
1209
1210
1211void page_add_file_rmap(struct page *page, bool compound)
1212{
1213 int i, nr = 1;
1214
1215 VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
1216 lock_page_memcg(page);
1217 if (compound && PageTransHuge(page)) {
1218 int nr_pages = thp_nr_pages(page);
1219
1220 for (i = 0, nr = 0; i < nr_pages; i++) {
1221 if (atomic_inc_and_test(&page[i]._mapcount))
1222 nr++;
1223 }
1224 if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
1225 goto out;
1226 if (PageSwapBacked(page))
1227 __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
1228 nr_pages);
1229 else
1230 __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
1231 nr_pages);
1232 } else {
1233 if (PageTransCompound(page) && page_mapping(page)) {
1234 struct page *head = compound_head(page);
1235
1236 VM_WARN_ON_ONCE(!PageLocked(page));
1237
1238 SetPageDoubleMap(head);
1239 if (PageMlocked(page))
1240 clear_page_mlock(head);
1241 }
1242 if (!atomic_inc_and_test(&page->_mapcount))
1243 goto out;
1244 }
1245 __mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
1246out:
1247 unlock_page_memcg(page);
1248}
1249
1250static void page_remove_file_rmap(struct page *page, bool compound)
1251{
1252 int i, nr = 1;
1253
1254 VM_BUG_ON_PAGE(compound && !PageHead(page), page);
1255
1256
1257 if (unlikely(PageHuge(page))) {
1258
1259 atomic_dec(compound_mapcount_ptr(page));
1260 return;
1261 }
1262
1263
1264 if (compound && PageTransHuge(page)) {
1265 int nr_pages = thp_nr_pages(page);
1266
1267 for (i = 0, nr = 0; i < nr_pages; i++) {
1268 if (atomic_add_negative(-1, &page[i]._mapcount))
1269 nr++;
1270 }
1271 if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1272 return;
1273 if (PageSwapBacked(page))
1274 __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
1275 -nr_pages);
1276 else
1277 __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
1278 -nr_pages);
1279 } else {
1280 if (!atomic_add_negative(-1, &page->_mapcount))
1281 return;
1282 }
1283
1284
1285
1286
1287
1288
1289 __mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
1290
1291 if (unlikely(PageMlocked(page)))
1292 clear_page_mlock(page);
1293}
1294
1295static void page_remove_anon_compound_rmap(struct page *page)
1296{
1297 int i, nr;
1298
1299 if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
1300 return;
1301
1302
1303 if (unlikely(PageHuge(page)))
1304 return;
1305
1306 if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1307 return;
1308
1309 __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
1310
1311 if (TestClearPageDoubleMap(page)) {
1312
1313
1314
1315
1316 for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
1317 if (atomic_add_negative(-1, &page[i]._mapcount))
1318 nr++;
1319 }
1320
1321
1322
1323
1324
1325
1326 if (nr && nr < thp_nr_pages(page))
1327 deferred_split_huge_page(page);
1328 } else {
1329 nr = thp_nr_pages(page);
1330 }
1331
1332 if (unlikely(PageMlocked(page)))
1333 clear_page_mlock(page);
1334
1335 if (nr)
1336 __mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
1337}
1338
1339
1340
1341
1342
1343
1344
1345
1346void page_remove_rmap(struct page *page, bool compound)
1347{
1348 lock_page_memcg(page);
1349
1350 if (!PageAnon(page)) {
1351 page_remove_file_rmap(page, compound);
1352 goto out;
1353 }
1354
1355 if (compound) {
1356 page_remove_anon_compound_rmap(page);
1357 goto out;
1358 }
1359
1360
1361 if (!atomic_add_negative(-1, &page->_mapcount))
1362 goto out;
1363
1364
1365
1366
1367
1368
1369 __dec_lruvec_page_state(page, NR_ANON_MAPPED);
1370
1371 if (unlikely(PageMlocked(page)))
1372 clear_page_mlock(page);
1373
1374 if (PageTransCompound(page))
1375 deferred_split_huge_page(compound_head(page));
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386out:
1387 unlock_page_memcg(page);
1388}
1389
1390
1391
1392
1393static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1394 unsigned long address, void *arg)
1395{
1396 struct mm_struct *mm = vma->vm_mm;
1397 struct page_vma_mapped_walk pvmw = {
1398 .page = page,
1399 .vma = vma,
1400 .address = address,
1401 };
1402 pte_t pteval;
1403 struct page *subpage;
1404 bool ret = true;
1405 struct mmu_notifier_range range;
1406 enum ttu_flags flags = (enum ttu_flags)(long)arg;
1407
1408
1409
1410
1411
1412
1413
1414 if (flags & TTU_SYNC)
1415 pvmw.flags = PVMW_SYNC;
1416
1417 if (flags & TTU_SPLIT_HUGE_PMD)
1418 split_huge_pmd_address(vma, address, false, page);
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 range.end = PageKsm(page) ?
1429 address + PAGE_SIZE : vma_address_end(page, vma);
1430 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
1431 address, range.end);
1432 if (PageHuge(page)) {
1433
1434
1435
1436
1437 adjust_range_if_pmd_sharing_possible(vma, &range.start,
1438 &range.end);
1439 }
1440 mmu_notifier_invalidate_range_start(&range);
1441
1442 while (page_vma_mapped_walk(&pvmw)) {
1443
1444
1445
1446 if (!(flags & TTU_IGNORE_MLOCK) &&
1447 (vma->vm_flags & VM_LOCKED)) {
1448
1449
1450
1451
1452
1453
1454 if (!PageTransCompound(page) || (PageHead(page) &&
1455 !PageDoubleMap(page) && !PageAnon(page)))
1456 mlock_vma_page(page);
1457 page_vma_mapped_walk_done(&pvmw);
1458 ret = false;
1459 break;
1460 }
1461
1462
1463 VM_BUG_ON_PAGE(!pvmw.pte, page);
1464
1465 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
1466 address = pvmw.address;
1467
1468 if (PageHuge(page) && !PageAnon(page)) {
1469
1470
1471
1472
1473
1474 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
1475 if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
1476
1477
1478
1479
1480
1481
1482
1483 flush_cache_range(vma, range.start, range.end);
1484 flush_tlb_range(vma, range.start, range.end);
1485 mmu_notifier_invalidate_range(mm, range.start,
1486 range.end);
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497 page_vma_mapped_walk_done(&pvmw);
1498 break;
1499 }
1500 }
1501
1502
1503 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
1504 if (should_defer_flush(mm, flags)) {
1505
1506
1507
1508
1509
1510
1511
1512
1513 pteval = ptep_get_and_clear(mm, address, pvmw.pte);
1514
1515 set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
1516 } else {
1517 pteval = ptep_clear_flush(vma, address, pvmw.pte);
1518 }
1519
1520
1521 if (pte_dirty(pteval))
1522 set_page_dirty(page);
1523
1524
1525 update_hiwater_rss(mm);
1526
1527 if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
1528 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1529 if (PageHuge(page)) {
1530 hugetlb_count_sub(compound_nr(page), mm);
1531 set_huge_swap_pte_at(mm, address,
1532 pvmw.pte, pteval,
1533 vma_mmu_pagesize(vma));
1534 } else {
1535 dec_mm_counter(mm, mm_counter(page));
1536 set_pte_at(mm, address, pvmw.pte, pteval);
1537 }
1538
1539 } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550 dec_mm_counter(mm, mm_counter(page));
1551
1552 mmu_notifier_invalidate_range(mm, address,
1553 address + PAGE_SIZE);
1554 } else if (PageAnon(page)) {
1555 swp_entry_t entry = { .val = page_private(subpage) };
1556 pte_t swp_pte;
1557
1558
1559
1560
1561 if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) {
1562 WARN_ON_ONCE(1);
1563 ret = false;
1564
1565 mmu_notifier_invalidate_range(mm, address,
1566 address + PAGE_SIZE);
1567 page_vma_mapped_walk_done(&pvmw);
1568 break;
1569 }
1570
1571
1572 if (!PageSwapBacked(page)) {
1573 if (!PageDirty(page)) {
1574
1575 mmu_notifier_invalidate_range(mm,
1576 address, address + PAGE_SIZE);
1577 dec_mm_counter(mm, MM_ANONPAGES);
1578 goto discard;
1579 }
1580
1581
1582
1583
1584
1585 set_pte_at(mm, address, pvmw.pte, pteval);
1586 SetPageSwapBacked(page);
1587 ret = false;
1588 page_vma_mapped_walk_done(&pvmw);
1589 break;
1590 }
1591
1592 if (swap_duplicate(entry) < 0) {
1593 set_pte_at(mm, address, pvmw.pte, pteval);
1594 ret = false;
1595 page_vma_mapped_walk_done(&pvmw);
1596 break;
1597 }
1598 if (arch_unmap_one(mm, vma, address, pteval) < 0) {
1599 set_pte_at(mm, address, pvmw.pte, pteval);
1600 ret = false;
1601 page_vma_mapped_walk_done(&pvmw);
1602 break;
1603 }
1604 if (list_empty(&mm->mmlist)) {
1605 spin_lock(&mmlist_lock);
1606 if (list_empty(&mm->mmlist))
1607 list_add(&mm->mmlist, &init_mm.mmlist);
1608 spin_unlock(&mmlist_lock);
1609 }
1610 dec_mm_counter(mm, MM_ANONPAGES);
1611 inc_mm_counter(mm, MM_SWAPENTS);
1612 swp_pte = swp_entry_to_pte(entry);
1613 if (pte_soft_dirty(pteval))
1614 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1615 if (pte_uffd_wp(pteval))
1616 swp_pte = pte_swp_mkuffd_wp(swp_pte);
1617 set_pte_at(mm, address, pvmw.pte, swp_pte);
1618
1619 mmu_notifier_invalidate_range(mm, address,
1620 address + PAGE_SIZE);
1621 } else {
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632 dec_mm_counter(mm, mm_counter_file(page));
1633 }
1634discard:
1635
1636
1637
1638
1639
1640
1641
1642 page_remove_rmap(subpage, PageHuge(page));
1643 put_page(page);
1644 }
1645
1646 mmu_notifier_invalidate_range_end(&range);
1647
1648 return ret;
1649}
1650
1651static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
1652{
1653 return vma_is_temporary_stack(vma);
1654}
1655
1656static int page_not_mapped(struct page *page)
1657{
1658 return !page_mapped(page);
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672void try_to_unmap(struct page *page, enum ttu_flags flags)
1673{
1674 struct rmap_walk_control rwc = {
1675 .rmap_one = try_to_unmap_one,
1676 .arg = (void *)flags,
1677 .done = page_not_mapped,
1678 .anon_lock = page_lock_anon_vma_read,
1679 };
1680
1681 if (flags & TTU_RMAP_LOCKED)
1682 rmap_walk_locked(page, &rwc);
1683 else
1684 rmap_walk(page, &rwc);
1685}
1686
1687
1688
1689
1690
1691
1692
1693static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
1694 unsigned long address, void *arg)
1695{
1696 struct mm_struct *mm = vma->vm_mm;
1697 struct page_vma_mapped_walk pvmw = {
1698 .page = page,
1699 .vma = vma,
1700 .address = address,
1701 };
1702 pte_t pteval;
1703 struct page *subpage;
1704 bool ret = true;
1705 struct mmu_notifier_range range;
1706 enum ttu_flags flags = (enum ttu_flags)(long)arg;
1707
1708
1709
1710
1711
1712
1713
1714 if (flags & TTU_SYNC)
1715 pvmw.flags = PVMW_SYNC;
1716
1717
1718
1719
1720
1721 if (flags & TTU_SPLIT_HUGE_PMD)
1722 split_huge_pmd_address(vma, address, true, page);
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732 range.end = PageKsm(page) ?
1733 address + PAGE_SIZE : vma_address_end(page, vma);
1734 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
1735 address, range.end);
1736 if (PageHuge(page)) {
1737
1738
1739
1740
1741 adjust_range_if_pmd_sharing_possible(vma, &range.start,
1742 &range.end);
1743 }
1744 mmu_notifier_invalidate_range_start(&range);
1745
1746 while (page_vma_mapped_walk(&pvmw)) {
1747#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
1748
1749 if (!pvmw.pte) {
1750 VM_BUG_ON_PAGE(PageHuge(page) ||
1751 !PageTransCompound(page), page);
1752
1753 set_pmd_migration_entry(&pvmw, page);
1754 continue;
1755 }
1756#endif
1757
1758
1759 VM_BUG_ON_PAGE(!pvmw.pte, page);
1760
1761 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
1762 address = pvmw.address;
1763
1764 if (PageHuge(page) && !PageAnon(page)) {
1765
1766
1767
1768
1769
1770 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
1771 if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) {
1772
1773
1774
1775
1776
1777
1778
1779 flush_cache_range(vma, range.start, range.end);
1780 flush_tlb_range(vma, range.start, range.end);
1781 mmu_notifier_invalidate_range(mm, range.start,
1782 range.end);
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793 page_vma_mapped_walk_done(&pvmw);
1794 break;
1795 }
1796 }
1797
1798
1799 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
1800 pteval = ptep_clear_flush(vma, address, pvmw.pte);
1801
1802
1803 if (pte_dirty(pteval))
1804 set_page_dirty(page);
1805
1806
1807 update_hiwater_rss(mm);
1808
1809 if (is_zone_device_page(page)) {
1810 swp_entry_t entry;
1811 pte_t swp_pte;
1812
1813
1814
1815
1816
1817
1818 entry = make_readable_migration_entry(
1819 page_to_pfn(page));
1820 swp_pte = swp_entry_to_pte(entry);
1821
1822
1823
1824
1825
1826 if (pte_swp_soft_dirty(pteval))
1827 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1828 if (pte_swp_uffd_wp(pteval))
1829 swp_pte = pte_swp_mkuffd_wp(swp_pte);
1830 set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842 subpage = page;
1843 } else if (PageHWPoison(page)) {
1844 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
1845 if (PageHuge(page)) {
1846 hugetlb_count_sub(compound_nr(page), mm);
1847 set_huge_swap_pte_at(mm, address,
1848 pvmw.pte, pteval,
1849 vma_mmu_pagesize(vma));
1850 } else {
1851 dec_mm_counter(mm, mm_counter(page));
1852 set_pte_at(mm, address, pvmw.pte, pteval);
1853 }
1854
1855 } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866 dec_mm_counter(mm, mm_counter(page));
1867
1868 mmu_notifier_invalidate_range(mm, address,
1869 address + PAGE_SIZE);
1870 } else {
1871 swp_entry_t entry;
1872 pte_t swp_pte;
1873
1874 if (arch_unmap_one(mm, vma, address, pteval) < 0) {
1875 set_pte_at(mm, address, pvmw.pte, pteval);
1876 ret = false;
1877 page_vma_mapped_walk_done(&pvmw);
1878 break;
1879 }
1880
1881
1882
1883
1884
1885
1886 if (pte_write(pteval))
1887 entry = make_writable_migration_entry(
1888 page_to_pfn(subpage));
1889 else
1890 entry = make_readable_migration_entry(
1891 page_to_pfn(subpage));
1892
1893 swp_pte = swp_entry_to_pte(entry);
1894 if (pte_soft_dirty(pteval))
1895 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1896 if (pte_uffd_wp(pteval))
1897 swp_pte = pte_swp_mkuffd_wp(swp_pte);
1898 set_pte_at(mm, address, pvmw.pte, swp_pte);
1899
1900
1901
1902
1903 }
1904
1905
1906
1907
1908
1909
1910
1911
1912 page_remove_rmap(subpage, PageHuge(page));
1913 put_page(page);
1914 }
1915
1916 mmu_notifier_invalidate_range_end(&range);
1917
1918 return ret;
1919}
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929void try_to_migrate(struct page *page, enum ttu_flags flags)
1930{
1931 struct rmap_walk_control rwc = {
1932 .rmap_one = try_to_migrate_one,
1933 .arg = (void *)flags,
1934 .done = page_not_mapped,
1935 .anon_lock = page_lock_anon_vma_read,
1936 };
1937
1938
1939
1940
1941
1942 if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
1943 TTU_SYNC)))
1944 return;
1945
1946 if (is_zone_device_page(page) && !is_device_private_page(page))
1947 return;
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957 if (!PageKsm(page) && PageAnon(page))
1958 rwc.invalid_vma = invalid_migration_vma;
1959
1960 if (flags & TTU_RMAP_LOCKED)
1961 rmap_walk_locked(page, &rwc);
1962 else
1963 rmap_walk(page, &rwc);
1964}
1965
1966
1967
1968
1969
1970static bool page_mlock_one(struct page *page, struct vm_area_struct *vma,
1971 unsigned long address, void *unused)
1972{
1973 struct page_vma_mapped_walk pvmw = {
1974 .page = page,
1975 .vma = vma,
1976 .address = address,
1977 };
1978
1979
1980 if (!(vma->vm_flags & VM_LOCKED))
1981 return true;
1982
1983 while (page_vma_mapped_walk(&pvmw)) {
1984
1985
1986
1987
1988
1989 if (vma->vm_flags & VM_LOCKED) {
1990
1991
1992
1993
1994
1995
1996 mlock_vma_page(page);
1997
1998
1999
2000
2001 page_vma_mapped_walk_done(&pvmw);
2002 return false;
2003 }
2004 }
2005
2006 return true;
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017void page_mlock(struct page *page)
2018{
2019 struct rmap_walk_control rwc = {
2020 .rmap_one = page_mlock_one,
2021 .done = page_not_mapped,
2022 .anon_lock = page_lock_anon_vma_read,
2023
2024 };
2025
2026 VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
2027 VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page);
2028
2029
2030 if (PageTransCompound(page) && PageAnon(page))
2031 return;
2032
2033 rmap_walk(page, &rwc);
2034}
2035
2036#ifdef CONFIG_DEVICE_PRIVATE
2037struct make_exclusive_args {
2038 struct mm_struct *mm;
2039 unsigned long address;
2040 void *owner;
2041 bool valid;
2042};
2043
2044static bool page_make_device_exclusive_one(struct page *page,
2045 struct vm_area_struct *vma, unsigned long address, void *priv)
2046{
2047 struct mm_struct *mm = vma->vm_mm;
2048 struct page_vma_mapped_walk pvmw = {
2049 .page = page,
2050 .vma = vma,
2051 .address = address,
2052 };
2053 struct make_exclusive_args *args = priv;
2054 pte_t pteval;
2055 struct page *subpage;
2056 bool ret = true;
2057 struct mmu_notifier_range range;
2058 swp_entry_t entry;
2059 pte_t swp_pte;
2060
2061 mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
2062 vma->vm_mm, address, min(vma->vm_end,
2063 address + page_size(page)), args->owner);
2064 mmu_notifier_invalidate_range_start(&range);
2065
2066 while (page_vma_mapped_walk(&pvmw)) {
2067
2068 VM_BUG_ON_PAGE(!pvmw.pte, page);
2069
2070 if (!pte_present(*pvmw.pte)) {
2071 ret = false;
2072 page_vma_mapped_walk_done(&pvmw);
2073 break;
2074 }
2075
2076 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
2077 address = pvmw.address;
2078
2079
2080 flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
2081 pteval = ptep_clear_flush(vma, address, pvmw.pte);
2082
2083
2084 if (pte_dirty(pteval))
2085 set_page_dirty(page);
2086
2087
2088
2089
2090
2091 if (args->mm == mm && args->address == address &&
2092 pte_write(pteval))
2093 args->valid = true;
2094
2095
2096
2097
2098
2099
2100 if (pte_write(pteval))
2101 entry = make_writable_device_exclusive_entry(
2102 page_to_pfn(subpage));
2103 else
2104 entry = make_readable_device_exclusive_entry(
2105 page_to_pfn(subpage));
2106 swp_pte = swp_entry_to_pte(entry);
2107 if (pte_soft_dirty(pteval))
2108 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2109 if (pte_uffd_wp(pteval))
2110 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2111
2112 set_pte_at(mm, address, pvmw.pte, swp_pte);
2113
2114
2115
2116
2117
2118 page_remove_rmap(subpage, false);
2119 }
2120
2121 mmu_notifier_invalidate_range_end(&range);
2122
2123 return ret;
2124}
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,
2141 unsigned long address, void *owner)
2142{
2143 struct make_exclusive_args args = {
2144 .mm = mm,
2145 .address = address,
2146 .owner = owner,
2147 .valid = false,
2148 };
2149 struct rmap_walk_control rwc = {
2150 .rmap_one = page_make_device_exclusive_one,
2151 .done = page_not_mapped,
2152 .anon_lock = page_lock_anon_vma_read,
2153 .arg = &args,
2154 };
2155
2156
2157
2158
2159
2160
2161 if (!PageAnon(page) || PageTail(page))
2162 return false;
2163
2164 rmap_walk(page, &rwc);
2165
2166 return args.valid && !page_mapcount(page);
2167}
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
2191 unsigned long end, struct page **pages,
2192 void *owner)
2193{
2194 long npages = (end - start) >> PAGE_SHIFT;
2195 long i;
2196
2197 npages = get_user_pages_remote(mm, start, npages,
2198 FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
2199 pages, NULL, NULL);
2200 if (npages < 0)
2201 return npages;
2202
2203 for (i = 0; i < npages; i++, start += PAGE_SIZE) {
2204 if (!trylock_page(pages[i])) {
2205 put_page(pages[i]);
2206 pages[i] = NULL;
2207 continue;
2208 }
2209
2210 if (!page_make_device_exclusive(pages[i], mm, start, owner)) {
2211 unlock_page(pages[i]);
2212 put_page(pages[i]);
2213 pages[i] = NULL;
2214 }
2215 }
2216
2217 return npages;
2218}
2219EXPORT_SYMBOL_GPL(make_device_exclusive_range);
2220#endif
2221
2222void __put_anon_vma(struct anon_vma *anon_vma)
2223{
2224 struct anon_vma *root = anon_vma->root;
2225
2226 anon_vma_free(anon_vma);
2227 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
2228 anon_vma_free(root);
2229}
2230
2231static struct anon_vma *rmap_walk_anon_lock(struct page *page,
2232 struct rmap_walk_control *rwc)
2233{
2234 struct anon_vma *anon_vma;
2235
2236 if (rwc->anon_lock)
2237 return rwc->anon_lock(page);
2238
2239
2240
2241
2242
2243
2244
2245 anon_vma = page_anon_vma(page);
2246 if (!anon_vma)
2247 return NULL;
2248
2249 anon_vma_lock_read(anon_vma);
2250 return anon_vma;
2251}
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
2268 bool locked)
2269{
2270 struct anon_vma *anon_vma;
2271 pgoff_t pgoff_start, pgoff_end;
2272 struct anon_vma_chain *avc;
2273
2274 if (locked) {
2275 anon_vma = page_anon_vma(page);
2276
2277 VM_BUG_ON_PAGE(!anon_vma, page);
2278 } else {
2279 anon_vma = rmap_walk_anon_lock(page, rwc);
2280 }
2281 if (!anon_vma)
2282 return;
2283
2284 pgoff_start = page_to_pgoff(page);
2285 pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
2286 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
2287 pgoff_start, pgoff_end) {
2288 struct vm_area_struct *vma = avc->vma;
2289 unsigned long address = vma_address(page, vma);
2290
2291 VM_BUG_ON_VMA(address == -EFAULT, vma);
2292 cond_resched();
2293
2294 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
2295 continue;
2296
2297 if (!rwc->rmap_one(page, vma, address, rwc->arg))
2298 break;
2299 if (rwc->done && rwc->done(page))
2300 break;
2301 }
2302
2303 if (!locked)
2304 anon_vma_unlock_read(anon_vma);
2305}
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
2321 bool locked)
2322{
2323 struct address_space *mapping = page_mapping(page);
2324 pgoff_t pgoff_start, pgoff_end;
2325 struct vm_area_struct *vma;
2326
2327
2328
2329
2330
2331
2332
2333 VM_BUG_ON_PAGE(!PageLocked(page), page);
2334
2335 if (!mapping)
2336 return;
2337
2338 pgoff_start = page_to_pgoff(page);
2339 pgoff_end = pgoff_start + thp_nr_pages(page) - 1;
2340 if (!locked)
2341 i_mmap_lock_read(mapping);
2342 vma_interval_tree_foreach(vma, &mapping->i_mmap,
2343 pgoff_start, pgoff_end) {
2344 unsigned long address = vma_address(page, vma);
2345
2346 VM_BUG_ON_VMA(address == -EFAULT, vma);
2347 cond_resched();
2348
2349 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
2350 continue;
2351
2352 if (!rwc->rmap_one(page, vma, address, rwc->arg))
2353 goto done;
2354 if (rwc->done && rwc->done(page))
2355 goto done;
2356 }
2357
2358done:
2359 if (!locked)
2360 i_mmap_unlock_read(mapping);
2361}
2362
2363void rmap_walk(struct page *page, struct rmap_walk_control *rwc)
2364{
2365 if (unlikely(PageKsm(page)))
2366 rmap_walk_ksm(page, rwc);
2367 else if (PageAnon(page))
2368 rmap_walk_anon(page, rwc, false);
2369 else
2370 rmap_walk_file(page, rwc, false);
2371}
2372
2373
2374void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc)
2375{
2376
2377 VM_BUG_ON_PAGE(PageKsm(page), page);
2378 if (PageAnon(page))
2379 rmap_walk_anon(page, rwc, true);
2380 else
2381 rmap_walk_file(page, rwc, true);
2382}
2383
2384#ifdef CONFIG_HUGETLB_PAGE
2385
2386
2387
2388
2389
2390void hugepage_add_anon_rmap(struct page *page,
2391 struct vm_area_struct *vma, unsigned long address)
2392{
2393 struct anon_vma *anon_vma = vma->anon_vma;
2394 int first;
2395
2396 BUG_ON(!PageLocked(page));
2397 BUG_ON(!anon_vma);
2398
2399 first = atomic_inc_and_test(compound_mapcount_ptr(page));
2400 if (first)
2401 __page_set_anon_rmap(page, vma, address, 0);
2402}
2403
2404void hugepage_add_new_anon_rmap(struct page *page,
2405 struct vm_area_struct *vma, unsigned long address)
2406{
2407 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
2408 atomic_set(compound_mapcount_ptr(page), 0);
2409 if (hpage_pincount_available(page))
2410 atomic_set(compound_pincount_ptr(page), 0);
2411
2412 __page_set_anon_rmap(page, vma, address, 1);
2413}
2414#endif
2415