1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/mm.h>
56#include <linux/sched/mm.h>
57#include <linux/sched/task.h>
58#include <linux/pagemap.h>
59#include <linux/swap.h>
60#include <linux/swapops.h>
61#include <linux/slab.h>
62#include <linux/init.h>
63#include <linux/ksm.h>
64#include <linux/rmap.h>
65#include <linux/rcupdate.h>
66#include <linux/export.h>
67#include <linux/memcontrol.h>
68#include <linux/mmu_notifier.h>
69#include <linux/migrate.h>
70#include <linux/hugetlb.h>
71#include <linux/huge_mm.h>
72#include <linux/backing-dev.h>
73#include <linux/page_idle.h>
74#include <linux/memremap.h>
75#include <linux/userfaultfd_k.h>
76#include <linux/mm_inline.h>
77#include <linux/oom.h>
78
79#include <asm/tlbflush.h>
80
81#define CREATE_TRACE_POINTS
82#include <trace/events/tlb.h>
83#include <trace/events/migrate.h>
84
85#include "internal.h"
86
87static struct kmem_cache *anon_vma_cachep;
88static struct kmem_cache *anon_vma_chain_cachep;
89
90static inline struct anon_vma *anon_vma_alloc(void)
91{
92 struct anon_vma *anon_vma;
93
94 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
95 if (anon_vma) {
96 atomic_set(&anon_vma->refcount, 1);
97 anon_vma->num_children = 0;
98 anon_vma->num_active_vmas = 0;
99 anon_vma->parent = anon_vma;
100
101
102
103
104 anon_vma->root = anon_vma;
105 }
106
107 return anon_vma;
108}
109
110static inline void anon_vma_free(struct anon_vma *anon_vma)
111{
112 VM_BUG_ON(atomic_read(&anon_vma->refcount));
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 might_sleep();
132 if (rwsem_is_locked(&anon_vma->root->rwsem)) {
133 anon_vma_lock_write(anon_vma);
134 anon_vma_unlock_write(anon_vma);
135 }
136
137 kmem_cache_free(anon_vma_cachep, anon_vma);
138}
139
140static inline struct anon_vma_chain *anon_vma_chain_alloc(gfp_t gfp)
141{
142 return kmem_cache_alloc(anon_vma_chain_cachep, gfp);
143}
144
145static void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
146{
147 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
148}
149
150static void anon_vma_chain_link(struct vm_area_struct *vma,
151 struct anon_vma_chain *avc,
152 struct anon_vma *anon_vma)
153{
154 avc->vma = vma;
155 avc->anon_vma = anon_vma;
156 list_add(&avc->same_vma, &vma->anon_vma_chain);
157 anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
158}
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186int __anon_vma_prepare(struct vm_area_struct *vma)
187{
188 struct mm_struct *mm = vma->vm_mm;
189 struct anon_vma *anon_vma, *allocated;
190 struct anon_vma_chain *avc;
191
192 mmap_assert_locked(mm);
193 might_sleep();
194
195 avc = anon_vma_chain_alloc(GFP_KERNEL);
196 if (!avc)
197 goto out_enomem;
198
199 anon_vma = find_mergeable_anon_vma(vma);
200 allocated = NULL;
201 if (!anon_vma) {
202 anon_vma = anon_vma_alloc();
203 if (unlikely(!anon_vma))
204 goto out_enomem_free_avc;
205 anon_vma->num_children++;
206 allocated = anon_vma;
207 }
208
209 anon_vma_lock_write(anon_vma);
210
211 spin_lock(&mm->page_table_lock);
212 if (likely(!vma->anon_vma)) {
213 vma->anon_vma = anon_vma;
214 anon_vma_chain_link(vma, avc, anon_vma);
215 anon_vma->num_active_vmas++;
216 allocated = NULL;
217 avc = NULL;
218 }
219 spin_unlock(&mm->page_table_lock);
220 anon_vma_unlock_write(anon_vma);
221
222 if (unlikely(allocated))
223 put_anon_vma(allocated);
224 if (unlikely(avc))
225 anon_vma_chain_free(avc);
226
227 return 0;
228
229 out_enomem_free_avc:
230 anon_vma_chain_free(avc);
231 out_enomem:
232 return -ENOMEM;
233}
234
235
236
237
238
239
240
241
242
243static inline struct anon_vma *lock_anon_vma_root(struct anon_vma *root, struct anon_vma *anon_vma)
244{
245 struct anon_vma *new_root = anon_vma->root;
246 if (new_root != root) {
247 if (WARN_ON_ONCE(root))
248 up_write(&root->rwsem);
249 root = new_root;
250 down_write(&root->rwsem);
251 }
252 return root;
253}
254
255static inline void unlock_anon_vma_root(struct anon_vma *root)
256{
257 if (root)
258 up_write(&root->rwsem);
259}
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
281{
282 struct anon_vma_chain *avc, *pavc;
283 struct anon_vma *root = NULL;
284
285 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
286 struct anon_vma *anon_vma;
287
288 avc = anon_vma_chain_alloc(GFP_NOWAIT | __GFP_NOWARN);
289 if (unlikely(!avc)) {
290 unlock_anon_vma_root(root);
291 root = NULL;
292 avc = anon_vma_chain_alloc(GFP_KERNEL);
293 if (!avc)
294 goto enomem_failure;
295 }
296 anon_vma = pavc->anon_vma;
297 root = lock_anon_vma_root(root, anon_vma);
298 anon_vma_chain_link(dst, avc, anon_vma);
299
300
301
302
303
304
305
306
307 if (!dst->anon_vma && src->anon_vma &&
308 anon_vma->num_children < 2 &&
309 anon_vma->num_active_vmas == 0)
310 dst->anon_vma = anon_vma;
311 }
312 if (dst->anon_vma)
313 dst->anon_vma->num_active_vmas++;
314 unlock_anon_vma_root(root);
315 return 0;
316
317 enomem_failure:
318
319
320
321
322
323
324 dst->anon_vma = NULL;
325 unlink_anon_vmas(dst);
326 return -ENOMEM;
327}
328
329
330
331
332
333
334int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
335{
336 struct anon_vma_chain *avc;
337 struct anon_vma *anon_vma;
338 int error;
339
340
341 if (!pvma->anon_vma)
342 return 0;
343
344
345 vma->anon_vma = NULL;
346
347
348
349
350
351 error = anon_vma_clone(vma, pvma);
352 if (error)
353 return error;
354
355
356 if (vma->anon_vma)
357 return 0;
358
359
360 anon_vma = anon_vma_alloc();
361 if (!anon_vma)
362 goto out_error;
363 anon_vma->num_active_vmas++;
364 avc = anon_vma_chain_alloc(GFP_KERNEL);
365 if (!avc)
366 goto out_error_free_anon_vma;
367
368
369
370
371
372 anon_vma->root = pvma->anon_vma->root;
373 anon_vma->parent = pvma->anon_vma;
374
375
376
377
378
379 get_anon_vma(anon_vma->root);
380
381 vma->anon_vma = anon_vma;
382 anon_vma_lock_write(anon_vma);
383 anon_vma_chain_link(vma, avc, anon_vma);
384 anon_vma->parent->num_children++;
385 anon_vma_unlock_write(anon_vma);
386
387 return 0;
388
389 out_error_free_anon_vma:
390 put_anon_vma(anon_vma);
391 out_error:
392 unlink_anon_vmas(vma);
393 return -ENOMEM;
394}
395
396void unlink_anon_vmas(struct vm_area_struct *vma)
397{
398 struct anon_vma_chain *avc, *next;
399 struct anon_vma *root = NULL;
400
401
402
403
404
405 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
406 struct anon_vma *anon_vma = avc->anon_vma;
407
408 root = lock_anon_vma_root(root, anon_vma);
409 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
410
411
412
413
414
415 if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
416 anon_vma->parent->num_children--;
417 continue;
418 }
419
420 list_del(&avc->same_vma);
421 anon_vma_chain_free(avc);
422 }
423 if (vma->anon_vma) {
424 vma->anon_vma->num_active_vmas--;
425
426
427
428
429
430 vma->anon_vma = NULL;
431 }
432 unlock_anon_vma_root(root);
433
434
435
436
437
438
439 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
440 struct anon_vma *anon_vma = avc->anon_vma;
441
442 VM_WARN_ON(anon_vma->num_children);
443 VM_WARN_ON(anon_vma->num_active_vmas);
444 put_anon_vma(anon_vma);
445
446 list_del(&avc->same_vma);
447 anon_vma_chain_free(avc);
448 }
449}
450
451static void anon_vma_ctor(void *data)
452{
453 struct anon_vma *anon_vma = data;
454
455 init_rwsem(&anon_vma->rwsem);
456 atomic_set(&anon_vma->refcount, 0);
457 anon_vma->rb_root = RB_ROOT_CACHED;
458}
459
460void __init anon_vma_init(void)
461{
462 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
463 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT,
464 anon_vma_ctor);
465 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain,
466 SLAB_PANIC|SLAB_ACCOUNT);
467}
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499struct anon_vma *folio_get_anon_vma(const struct folio *folio)
500{
501 struct anon_vma *anon_vma = NULL;
502 unsigned long anon_mapping;
503
504 rcu_read_lock();
505 anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
506 if ((anon_mapping & FOLIO_MAPPING_FLAGS) != FOLIO_MAPPING_ANON)
507 goto out;
508 if (!folio_mapped(folio))
509 goto out;
510
511 anon_vma = (struct anon_vma *) (anon_mapping - FOLIO_MAPPING_ANON);
512 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
513 anon_vma = NULL;
514 goto out;
515 }
516
517
518
519
520
521
522
523
524 if (!folio_mapped(folio)) {
525 rcu_read_unlock();
526 put_anon_vma(anon_vma);
527 return NULL;
528 }
529out:
530 rcu_read_unlock();
531
532 return anon_vma;
533}
534
535
536
537
538
539
540
541
542
543struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio,
544 struct rmap_walk_control *rwc)
545{
546 struct anon_vma *anon_vma = NULL;
547 struct anon_vma *root_anon_vma;
548 unsigned long anon_mapping;
549
550retry:
551 rcu_read_lock();
552 anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
553 if ((anon_mapping & FOLIO_MAPPING_FLAGS) != FOLIO_MAPPING_ANON)
554 goto out;
555 if (!folio_mapped(folio))
556 goto out;
557
558 anon_vma = (struct anon_vma *) (anon_mapping - FOLIO_MAPPING_ANON);
559 root_anon_vma = READ_ONCE(anon_vma->root);
560 if (down_read_trylock(&root_anon_vma->rwsem)) {
561
562
563
564
565 if (unlikely((unsigned long)READ_ONCE(folio->mapping) !=
566 anon_mapping)) {
567 up_read(&root_anon_vma->rwsem);
568 rcu_read_unlock();
569 goto retry;
570 }
571
572
573
574
575
576
577 if (!folio_mapped(folio)) {
578 up_read(&root_anon_vma->rwsem);
579 anon_vma = NULL;
580 }
581 goto out;
582 }
583
584 if (rwc && rwc->try_lock) {
585 anon_vma = NULL;
586 rwc->contended = true;
587 goto out;
588 }
589
590
591 if (!atomic_inc_not_zero(&anon_vma->refcount)) {
592 anon_vma = NULL;
593 goto out;
594 }
595
596 if (!folio_mapped(folio)) {
597 rcu_read_unlock();
598 put_anon_vma(anon_vma);
599 return NULL;
600 }
601
602
603 rcu_read_unlock();
604 anon_vma_lock_read(anon_vma);
605
606
607
608
609
610 if (unlikely((unsigned long)READ_ONCE(folio->mapping) !=
611 anon_mapping)) {
612 anon_vma_unlock_read(anon_vma);
613 put_anon_vma(anon_vma);
614 anon_vma = NULL;
615 goto retry;
616 }
617
618 if (atomic_dec_and_test(&anon_vma->refcount)) {
619
620
621
622
623
624 anon_vma_unlock_read(anon_vma);
625 __put_anon_vma(anon_vma);
626 anon_vma = NULL;
627 }
628
629 return anon_vma;
630
631out:
632 rcu_read_unlock();
633 return anon_vma;
634}
635
636#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
637
638
639
640
641
642
643void try_to_unmap_flush(void)
644{
645 struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
646
647 if (!tlb_ubc->flush_required)
648 return;
649
650 arch_tlbbatch_flush(&tlb_ubc->arch);
651 tlb_ubc->flush_required = false;
652 tlb_ubc->writable = false;
653}
654
655
656void try_to_unmap_flush_dirty(void)
657{
658 struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
659
660 if (tlb_ubc->writable)
661 try_to_unmap_flush();
662}
663
664
665
666
667
668#define TLB_FLUSH_BATCH_FLUSHED_SHIFT 16
669#define TLB_FLUSH_BATCH_PENDING_MASK \
670 ((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1)
671#define TLB_FLUSH_BATCH_PENDING_LARGE \
672 (TLB_FLUSH_BATCH_PENDING_MASK / 2)
673
674static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
675 unsigned long start, unsigned long end)
676{
677 struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc;
678 int batch;
679 bool writable = pte_dirty(pteval);
680
681 if (!pte_accessible(mm, pteval))
682 return;
683
684 arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, start, end);
685 tlb_ubc->flush_required = true;
686
687
688
689
690
691 barrier();
692 batch = atomic_read(&mm->tlb_flush_batched);
693retry:
694 if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) {
695
696
697
698
699
700 if (!atomic_try_cmpxchg(&mm->tlb_flush_batched, &batch, 1))
701 goto retry;
702 } else {
703 atomic_inc(&mm->tlb_flush_batched);
704 }
705
706
707
708
709
710
711 if (writable)
712 tlb_ubc->writable = true;
713}
714
715
716
717
718
719static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
720{
721 if (!(flags & TTU_BATCH_FLUSH))
722 return false;
723
724 return arch_tlbbatch_should_defer(mm);
725}
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742void flush_tlb_batched_pending(struct mm_struct *mm)
743{
744 int batch = atomic_read(&mm->tlb_flush_batched);
745 int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK;
746 int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT;
747
748 if (pending != flushed) {
749 flush_tlb_mm(mm);
750
751
752
753
754 atomic_cmpxchg(&mm->tlb_flush_batched, batch,
755 pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT));
756 }
757}
758#else
759static void set_tlb_ubc_flush_pending(struct mm_struct *mm, pte_t pteval,
760 unsigned long start, unsigned long end)
761{
762}
763
764static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
765{
766 return false;
767}
768#endif
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788unsigned long page_address_in_vma(const struct folio *folio,
789 const struct page *page, const struct vm_area_struct *vma)
790{
791 if (folio_test_anon(folio)) {
792 struct anon_vma *anon_vma = folio_anon_vma(folio);
793
794
795
796
797 if (!vma->anon_vma || !anon_vma ||
798 vma->anon_vma->root != anon_vma->root)
799 return -EFAULT;
800 } else if (!vma->vm_file) {
801 return -EFAULT;
802 } else if (vma->vm_file->f_mapping != folio->mapping) {
803 return -EFAULT;
804 }
805
806
807 return vma_address(vma, page_pgoff(folio, page), 1);
808}
809
810
811
812
813
814
815pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
816{
817 pgd_t *pgd;
818 p4d_t *p4d;
819 pud_t *pud;
820 pmd_t *pmd = NULL;
821
822 pgd = pgd_offset(mm, address);
823 if (!pgd_present(*pgd))
824 goto out;
825
826 p4d = p4d_offset(pgd, address);
827 if (!p4d_present(*p4d))
828 goto out;
829
830 pud = pud_offset(p4d, address);
831 if (!pud_present(*pud))
832 goto out;
833
834 pmd = pmd_offset(pud, address);
835out:
836 return pmd;
837}
838
839struct folio_referenced_arg {
840 int mapcount;
841 int referenced;
842 vm_flags_t vm_flags;
843 struct mem_cgroup *memcg;
844};
845
846
847
848
849static bool folio_referenced_one(struct folio *folio,
850 struct vm_area_struct *vma, unsigned long address, void *arg)
851{
852 struct folio_referenced_arg *pra = arg;
853 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
854 int referenced = 0;
855 unsigned long start = address, ptes = 0;
856
857 while (page_vma_mapped_walk(&pvmw)) {
858 address = pvmw.address;
859
860 if (vma->vm_flags & VM_LOCKED) {
861 if (!folio_test_large(folio) || !pvmw.pte) {
862
863 mlock_vma_folio(folio, vma);
864 page_vma_mapped_walk_done(&pvmw);
865 pra->vm_flags |= VM_LOCKED;
866 return false;
867 }
868
869
870
871
872
873
874
875
876
877
878
879 ptes++;
880 pra->mapcount--;
881 continue;
882 }
883
884
885
886
887
888
889 if ((!atomic_read(&vma->vm_mm->mm_users) ||
890 check_stable_address_space(vma->vm_mm)) &&
891 folio_test_anon(folio) && folio_test_swapbacked(folio) &&
892 !folio_maybe_mapped_shared(folio)) {
893 pra->referenced = -1;
894 page_vma_mapped_walk_done(&pvmw);
895 return false;
896 }
897
898 if (lru_gen_enabled() && pvmw.pte) {
899 if (lru_gen_look_around(&pvmw))
900 referenced++;
901 } else if (pvmw.pte) {
902 if (ptep_clear_flush_young_notify(vma, address,
903 pvmw.pte))
904 referenced++;
905 } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
906 if (pmdp_clear_flush_young_notify(vma, address,
907 pvmw.pmd))
908 referenced++;
909 } else {
910
911 WARN_ON_ONCE(1);
912 }
913
914 pra->mapcount--;
915 }
916
917 if ((vma->vm_flags & VM_LOCKED) &&
918 folio_test_large(folio) &&
919 folio_within_vma(folio, vma)) {
920 unsigned long s_align, e_align;
921
922 s_align = ALIGN_DOWN(start, PMD_SIZE);
923 e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);
924
925
926 if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
927
928 mlock_vma_folio(folio, vma);
929 pra->vm_flags |= VM_LOCKED;
930 return false;
931 }
932 }
933
934 if (referenced)
935 folio_clear_idle(folio);
936 if (folio_test_clear_young(folio))
937 referenced++;
938
939 if (referenced) {
940 pra->referenced++;
941 pra->vm_flags |= vma->vm_flags & ~VM_LOCKED;
942 }
943
944 if (!pra->mapcount)
945 return false;
946
947 return true;
948}
949
950static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
951{
952 struct folio_referenced_arg *pra = arg;
953 struct mem_cgroup *memcg = pra->memcg;
954
955
956
957
958
959
960
961 if (!vma_has_recency(vma))
962 return true;
963
964
965
966
967
968 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
969 return true;
970
971 return false;
972}
973
974
975
976
977
978
979
980
981
982
983
984
985
986int folio_referenced(struct folio *folio, int is_locked,
987 struct mem_cgroup *memcg, vm_flags_t *vm_flags)
988{
989 bool we_locked = false;
990 struct folio_referenced_arg pra = {
991 .mapcount = folio_mapcount(folio),
992 .memcg = memcg,
993 };
994 struct rmap_walk_control rwc = {
995 .rmap_one = folio_referenced_one,
996 .arg = (void *)&pra,
997 .anon_lock = folio_lock_anon_vma_read,
998 .try_lock = true,
999 .invalid_vma = invalid_folio_referenced_vma,
1000 };
1001
1002 *vm_flags = 0;
1003 if (!pra.mapcount)
1004 return 0;
1005
1006 if (!folio_raw_mapping(folio))
1007 return 0;
1008
1009 if (!is_locked && (!folio_test_anon(folio) || folio_test_ksm(folio))) {
1010 we_locked = folio_trylock(folio);
1011 if (!we_locked)
1012 return 1;
1013 }
1014
1015 rmap_walk(folio, &rwc);
1016 *vm_flags = pra.vm_flags;
1017
1018 if (we_locked)
1019 folio_unlock(folio);
1020
1021 return rwc.contended ? -1 : pra.referenced;
1022}
1023
1024static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
1025{
1026 int cleaned = 0;
1027 struct vm_area_struct *vma = pvmw->vma;
1028 struct mmu_notifier_range range;
1029 unsigned long address = pvmw->address;
1030
1031
1032
1033
1034
1035 mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0,
1036 vma->vm_mm, address, vma_address_end(pvmw));
1037 mmu_notifier_invalidate_range_start(&range);
1038
1039 while (page_vma_mapped_walk(pvmw)) {
1040 int ret = 0;
1041
1042 address = pvmw->address;
1043 if (pvmw->pte) {
1044 pte_t *pte = pvmw->pte;
1045 pte_t entry = ptep_get(pte);
1046
1047
1048
1049
1050
1051
1052
1053 if (!pte_present(entry))
1054 continue;
1055 if (!pte_dirty(entry) && !pte_write(entry))
1056 continue;
1057
1058 flush_cache_page(vma, address, pte_pfn(entry));
1059 entry = ptep_clear_flush(vma, address, pte);
1060 entry = pte_wrprotect(entry);
1061 entry = pte_mkclean(entry);
1062 set_pte_at(vma->vm_mm, address, pte, entry);
1063 ret = 1;
1064 } else {
1065#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1066 pmd_t *pmd = pvmw->pmd;
1067 pmd_t entry;
1068
1069 if (!pmd_dirty(*pmd) && !pmd_write(*pmd))
1070 continue;
1071
1072 flush_cache_range(vma, address,
1073 address + HPAGE_PMD_SIZE);
1074 entry = pmdp_invalidate(vma, address, pmd);
1075 entry = pmd_wrprotect(entry);
1076 entry = pmd_mkclean(entry);
1077 set_pmd_at(vma->vm_mm, address, pmd, entry);
1078 ret = 1;
1079#else
1080
1081 WARN_ON_ONCE(1);
1082#endif
1083 }
1084
1085 if (ret)
1086 cleaned++;
1087 }
1088
1089 mmu_notifier_invalidate_range_end(&range);
1090
1091 return cleaned;
1092}
1093
1094static bool page_mkclean_one(struct folio *folio, struct vm_area_struct *vma,
1095 unsigned long address, void *arg)
1096{
1097 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_SYNC);
1098 int *cleaned = arg;
1099
1100 *cleaned += page_vma_mkclean_one(&pvmw);
1101
1102 return true;
1103}
1104
1105static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
1106{
1107 if (vma->vm_flags & VM_SHARED)
1108 return false;
1109
1110 return true;
1111}
1112
1113int folio_mkclean(struct folio *folio)
1114{
1115 int cleaned = 0;
1116 struct address_space *mapping;
1117 struct rmap_walk_control rwc = {
1118 .arg = (void *)&cleaned,
1119 .rmap_one = page_mkclean_one,
1120 .invalid_vma = invalid_mkclean_vma,
1121 };
1122
1123 BUG_ON(!folio_test_locked(folio));
1124
1125 if (!folio_mapped(folio))
1126 return 0;
1127
1128 mapping = folio_mapping(folio);
1129 if (!mapping)
1130 return 0;
1131
1132 rmap_walk(folio, &rwc);
1133
1134 return cleaned;
1135}
1136EXPORT_SYMBOL_GPL(folio_mkclean);
1137
1138struct wrprotect_file_state {
1139 int cleaned;
1140 pgoff_t pgoff;
1141 unsigned long pfn;
1142 unsigned long nr_pages;
1143};
1144
1145static bool mapping_wrprotect_range_one(struct folio *folio,
1146 struct vm_area_struct *vma, unsigned long address, void *arg)
1147{
1148 struct wrprotect_file_state *state = (struct wrprotect_file_state *)arg;
1149 struct page_vma_mapped_walk pvmw = {
1150 .pfn = state->pfn,
1151 .nr_pages = state->nr_pages,
1152 .pgoff = state->pgoff,
1153 .vma = vma,
1154 .address = address,
1155 .flags = PVMW_SYNC,
1156 };
1157
1158 state->cleaned += page_vma_mkclean_one(&pvmw);
1159
1160 return true;
1161}
1162
1163static void __rmap_walk_file(struct folio *folio, struct address_space *mapping,
1164 pgoff_t pgoff_start, unsigned long nr_pages,
1165 struct rmap_walk_control *rwc, bool locked);
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff,
1188 unsigned long pfn, unsigned long nr_pages)
1189{
1190 struct wrprotect_file_state state = {
1191 .cleaned = 0,
1192 .pgoff = pgoff,
1193 .pfn = pfn,
1194 .nr_pages = nr_pages,
1195 };
1196 struct rmap_walk_control rwc = {
1197 .arg = (void *)&state,
1198 .rmap_one = mapping_wrprotect_range_one,
1199 .invalid_vma = invalid_mkclean_vma,
1200 };
1201
1202 if (!mapping)
1203 return 0;
1204
1205 __rmap_walk_file(NULL, mapping, pgoff, nr_pages, &rwc,
1206 false);
1207
1208 return state.cleaned;
1209}
1210EXPORT_SYMBOL_GPL(mapping_wrprotect_range);
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
1225 struct vm_area_struct *vma)
1226{
1227 struct page_vma_mapped_walk pvmw = {
1228 .pfn = pfn,
1229 .nr_pages = nr_pages,
1230 .pgoff = pgoff,
1231 .vma = vma,
1232 .flags = PVMW_SYNC,
1233 };
1234
1235 if (invalid_mkclean_vma(vma, NULL))
1236 return 0;
1237
1238 pvmw.address = vma_address(vma, pgoff, nr_pages);
1239 VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma);
1240
1241 return page_vma_mkclean_one(&pvmw);
1242}
1243
1244static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
1245 struct page *page, int nr_pages, struct vm_area_struct *vma,
1246 enum rmap_level level, int *nr_pmdmapped)
1247{
1248 atomic_t *mapped = &folio->_nr_pages_mapped;
1249 const int orig_nr_pages = nr_pages;
1250 int first = 0, nr = 0;
1251
1252 __folio_rmap_sanity_checks(folio, page, nr_pages, level);
1253
1254 switch (level) {
1255 case RMAP_LEVEL_PTE:
1256 if (!folio_test_large(folio)) {
1257 nr = atomic_inc_and_test(&folio->_mapcount);
1258 break;
1259 }
1260
1261 if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
1262 nr = folio_add_return_large_mapcount(folio, orig_nr_pages, vma);
1263 if (nr == orig_nr_pages)
1264
1265 nr = folio_large_nr_pages(folio);
1266 else
1267 nr = 0;
1268 break;
1269 }
1270
1271 do {
1272 first += atomic_inc_and_test(&page->_mapcount);
1273 } while (page++, --nr_pages > 0);
1274
1275 if (first &&
1276 atomic_add_return_relaxed(first, mapped) < ENTIRELY_MAPPED)
1277 nr = first;
1278
1279 folio_add_large_mapcount(folio, orig_nr_pages, vma);
1280 break;
1281 case RMAP_LEVEL_PMD:
1282 case RMAP_LEVEL_PUD:
1283 first = atomic_inc_and_test(&folio->_entire_mapcount);
1284 if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
1285 if (level == RMAP_LEVEL_PMD && first)
1286 *nr_pmdmapped = folio_large_nr_pages(folio);
1287 nr = folio_inc_return_large_mapcount(folio, vma);
1288 if (nr == 1)
1289
1290 nr = folio_large_nr_pages(folio);
1291 else
1292 nr = 0;
1293 break;
1294 }
1295
1296 if (first) {
1297 nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped);
1298 if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) {
1299 nr_pages = folio_large_nr_pages(folio);
1300
1301
1302
1303
1304 if (level == RMAP_LEVEL_PMD)
1305 *nr_pmdmapped = nr_pages;
1306 nr = nr_pages - (nr & FOLIO_PAGES_MAPPED);
1307
1308 if (unlikely(nr < 0))
1309 nr = 0;
1310 } else {
1311
1312 nr = 0;
1313 }
1314 }
1315 folio_inc_large_mapcount(folio, vma);
1316 break;
1317 }
1318 return nr;
1319}
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330void folio_move_anon_rmap(struct folio *folio, struct vm_area_struct *vma)
1331{
1332 void *anon_vma = vma->anon_vma;
1333
1334 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1335 VM_BUG_ON_VMA(!anon_vma, vma);
1336
1337 anon_vma += FOLIO_MAPPING_ANON;
1338
1339
1340
1341
1342
1343 WRITE_ONCE(folio->mapping, anon_vma);
1344}
1345
1346
1347
1348
1349
1350
1351
1352
1353static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma,
1354 unsigned long address, bool exclusive)
1355{
1356 struct anon_vma *anon_vma = vma->anon_vma;
1357
1358 BUG_ON(!anon_vma);
1359
1360
1361
1362
1363
1364 if (!exclusive)
1365 anon_vma = anon_vma->root;
1366
1367
1368
1369
1370
1371
1372
1373 anon_vma = (void *) anon_vma + FOLIO_MAPPING_ANON;
1374 WRITE_ONCE(folio->mapping, (struct address_space *) anon_vma);
1375 folio->index = linear_page_index(vma, address);
1376}
1377
1378
1379
1380
1381
1382
1383
1384
1385static void __page_check_anon_rmap(const struct folio *folio,
1386 const struct page *page, struct vm_area_struct *vma,
1387 unsigned long address)
1388{
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400 VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
1401 folio);
1402 VM_BUG_ON_PAGE(page_pgoff(folio, page) != linear_page_index(vma, address),
1403 page);
1404}
1405
1406static void __folio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
1407{
1408 int idx;
1409
1410 if (nr) {
1411 idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
1412 __lruvec_stat_mod_folio(folio, idx, nr);
1413 }
1414 if (nr_pmdmapped) {
1415 if (folio_test_anon(folio)) {
1416 idx = NR_ANON_THPS;
1417 __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
1418 } else {
1419
1420 idx = folio_test_swapbacked(folio) ?
1421 NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
1422 __mod_node_page_state(folio_pgdat(folio), idx,
1423 nr_pmdmapped);
1424 }
1425 }
1426}
1427
1428static __always_inline void __folio_add_anon_rmap(struct folio *folio,
1429 struct page *page, int nr_pages, struct vm_area_struct *vma,
1430 unsigned long address, rmap_t flags, enum rmap_level level)
1431{
1432 int i, nr, nr_pmdmapped = 0;
1433
1434 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
1435
1436 nr = __folio_add_rmap(folio, page, nr_pages, vma, level, &nr_pmdmapped);
1437
1438 if (likely(!folio_test_ksm(folio)))
1439 __page_check_anon_rmap(folio, page, vma, address);
1440
1441 __folio_mod_stat(folio, nr, nr_pmdmapped);
1442
1443 if (flags & RMAP_EXCLUSIVE) {
1444 switch (level) {
1445 case RMAP_LEVEL_PTE:
1446 for (i = 0; i < nr_pages; i++)
1447 SetPageAnonExclusive(page + i);
1448 break;
1449 case RMAP_LEVEL_PMD:
1450 SetPageAnonExclusive(page);
1451 break;
1452 case RMAP_LEVEL_PUD:
1453
1454
1455
1456
1457 WARN_ON_ONCE(1);
1458 break;
1459 }
1460 }
1461
1462 VM_WARN_ON_FOLIO(!folio_test_large(folio) && PageAnonExclusive(page) &&
1463 atomic_read(&folio->_mapcount) > 0, folio);
1464 for (i = 0; i < nr_pages; i++) {
1465 struct page *cur_page = page + i;
1466
1467 VM_WARN_ON_FOLIO(folio_test_large(folio) &&
1468 folio_entire_mapcount(folio) > 1 &&
1469 PageAnonExclusive(cur_page), folio);
1470 if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT))
1471 continue;
1472
1473
1474
1475
1476
1477 VM_WARN_ON_FOLIO(atomic_read(&cur_page->_mapcount) > 0 &&
1478 PageAnonExclusive(cur_page), folio);
1479 }
1480
1481
1482
1483
1484
1485
1486
1487 if (!folio_test_large(folio))
1488 mlock_vma_folio(folio, vma);
1489}
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507void folio_add_anon_rmap_ptes(struct folio *folio, struct page *page,
1508 int nr_pages, struct vm_area_struct *vma, unsigned long address,
1509 rmap_t flags)
1510{
1511 __folio_add_anon_rmap(folio, page, nr_pages, vma, address, flags,
1512 RMAP_LEVEL_PTE);
1513}
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
1529 struct vm_area_struct *vma, unsigned long address, rmap_t flags)
1530{
1531#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1532 __folio_add_anon_rmap(folio, page, HPAGE_PMD_NR, vma, address, flags,
1533 RMAP_LEVEL_PMD);
1534#else
1535 WARN_ON_ONCE(true);
1536#endif
1537}
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
1555 unsigned long address, rmap_t flags)
1556{
1557 const bool exclusive = flags & RMAP_EXCLUSIVE;
1558 int nr = 1, nr_pmdmapped = 0;
1559
1560 VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
1561 VM_WARN_ON_FOLIO(!exclusive && !folio_test_locked(folio), folio);
1562
1563
1564
1565
1566
1567 if (!folio_test_swapbacked(folio) && !(vma->vm_flags & VM_DROPPABLE))
1568 __folio_set_swapbacked(folio);
1569 __folio_set_anon(folio, vma, address, exclusive);
1570
1571 if (likely(!folio_test_large(folio))) {
1572
1573 atomic_set(&folio->_mapcount, 0);
1574 if (exclusive)
1575 SetPageAnonExclusive(&folio->page);
1576 } else if (!folio_test_pmd_mappable(folio)) {
1577 int i;
1578
1579 nr = folio_large_nr_pages(folio);
1580 for (i = 0; i < nr; i++) {
1581 struct page *page = folio_page(folio, i);
1582
1583 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
1584
1585 atomic_set(&page->_mapcount, 0);
1586 if (exclusive)
1587 SetPageAnonExclusive(page);
1588 }
1589
1590 folio_set_large_mapcount(folio, nr, vma);
1591 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
1592 atomic_set(&folio->_nr_pages_mapped, nr);
1593 } else {
1594 nr = folio_large_nr_pages(folio);
1595
1596 atomic_set(&folio->_entire_mapcount, 0);
1597 folio_set_large_mapcount(folio, 1, vma);
1598 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
1599 atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
1600 if (exclusive)
1601 SetPageAnonExclusive(&folio->page);
1602 nr_pmdmapped = nr;
1603 }
1604
1605 VM_WARN_ON_ONCE(address < vma->vm_start ||
1606 address + (nr << PAGE_SHIFT) > vma->vm_end);
1607
1608 __folio_mod_stat(folio, nr, nr_pmdmapped);
1609 mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON, 1);
1610}
1611
1612static __always_inline void __folio_add_file_rmap(struct folio *folio,
1613 struct page *page, int nr_pages, struct vm_area_struct *vma,
1614 enum rmap_level level)
1615{
1616 int nr, nr_pmdmapped = 0;
1617
1618 VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
1619
1620 nr = __folio_add_rmap(folio, page, nr_pages, vma, level, &nr_pmdmapped);
1621 __folio_mod_stat(folio, nr, nr_pmdmapped);
1622
1623
1624 if (!folio_test_large(folio))
1625 mlock_vma_folio(folio, vma);
1626}
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639void folio_add_file_rmap_ptes(struct folio *folio, struct page *page,
1640 int nr_pages, struct vm_area_struct *vma)
1641{
1642 __folio_add_file_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE);
1643}
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655void folio_add_file_rmap_pmd(struct folio *folio, struct page *page,
1656 struct vm_area_struct *vma)
1657{
1658#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1659 __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD);
1660#else
1661 WARN_ON_ONCE(true);
1662#endif
1663}
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675void folio_add_file_rmap_pud(struct folio *folio, struct page *page,
1676 struct vm_area_struct *vma)
1677{
1678#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
1679 defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1680 __folio_add_file_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD);
1681#else
1682 WARN_ON_ONCE(true);
1683#endif
1684}
1685
1686static __always_inline void __folio_remove_rmap(struct folio *folio,
1687 struct page *page, int nr_pages, struct vm_area_struct *vma,
1688 enum rmap_level level)
1689{
1690 atomic_t *mapped = &folio->_nr_pages_mapped;
1691 int last = 0, nr = 0, nr_pmdmapped = 0;
1692 bool partially_mapped = false;
1693
1694 __folio_rmap_sanity_checks(folio, page, nr_pages, level);
1695
1696 switch (level) {
1697 case RMAP_LEVEL_PTE:
1698 if (!folio_test_large(folio)) {
1699 nr = atomic_add_negative(-1, &folio->_mapcount);
1700 break;
1701 }
1702
1703 if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
1704 nr = folio_sub_return_large_mapcount(folio, nr_pages, vma);
1705 if (!nr) {
1706
1707 nr = folio_nr_pages(folio);
1708 } else {
1709 partially_mapped = nr < folio_large_nr_pages(folio) &&
1710 !folio_entire_mapcount(folio);
1711 nr = 0;
1712 }
1713 break;
1714 }
1715
1716 folio_sub_large_mapcount(folio, nr_pages, vma);
1717 do {
1718 last += atomic_add_negative(-1, &page->_mapcount);
1719 } while (page++, --nr_pages > 0);
1720
1721 if (last &&
1722 atomic_sub_return_relaxed(last, mapped) < ENTIRELY_MAPPED)
1723 nr = last;
1724
1725 partially_mapped = nr && atomic_read(mapped);
1726 break;
1727 case RMAP_LEVEL_PMD:
1728 case RMAP_LEVEL_PUD:
1729 if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
1730 last = atomic_add_negative(-1, &folio->_entire_mapcount);
1731 if (level == RMAP_LEVEL_PMD && last)
1732 nr_pmdmapped = folio_large_nr_pages(folio);
1733 nr = folio_dec_return_large_mapcount(folio, vma);
1734 if (!nr) {
1735
1736 nr = folio_large_nr_pages(folio);
1737 } else {
1738 partially_mapped = last &&
1739 nr < folio_large_nr_pages(folio);
1740 nr = 0;
1741 }
1742 break;
1743 }
1744
1745 folio_dec_large_mapcount(folio, vma);
1746 last = atomic_add_negative(-1, &folio->_entire_mapcount);
1747 if (last) {
1748 nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
1749 if (likely(nr < ENTIRELY_MAPPED)) {
1750 nr_pages = folio_large_nr_pages(folio);
1751 if (level == RMAP_LEVEL_PMD)
1752 nr_pmdmapped = nr_pages;
1753 nr = nr_pages - (nr & FOLIO_PAGES_MAPPED);
1754
1755 if (unlikely(nr < 0))
1756 nr = 0;
1757 } else {
1758
1759 nr = 0;
1760 }
1761 }
1762
1763 partially_mapped = nr && nr < nr_pmdmapped;
1764 break;
1765 }
1766
1767
1768
1769
1770
1771
1772
1773 if (partially_mapped && folio_test_anon(folio) &&
1774 !folio_test_partially_mapped(folio))
1775 deferred_split_folio(folio, true);
1776
1777 __folio_mod_stat(folio, -nr, -nr_pmdmapped);
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787 munlock_vma_folio(folio, vma);
1788}
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
1802 int nr_pages, struct vm_area_struct *vma)
1803{
1804 __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE);
1805}
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
1818 struct vm_area_struct *vma)
1819{
1820#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1821 __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD);
1822#else
1823 WARN_ON_ONCE(true);
1824#endif
1825}
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837void folio_remove_rmap_pud(struct folio *folio, struct page *page,
1838 struct vm_area_struct *vma)
1839{
1840#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
1841 defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
1842 __folio_remove_rmap(folio, page, HPAGE_PUD_NR, vma, RMAP_LEVEL_PUD);
1843#else
1844 WARN_ON_ONCE(true);
1845#endif
1846}
1847
1848static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
1849 struct page_vma_mapped_walk *pvmw,
1850 enum ttu_flags flags, pte_t pte)
1851{
1852 unsigned long end_addr, addr = pvmw->address;
1853 struct vm_area_struct *vma = pvmw->vma;
1854 unsigned int max_nr;
1855
1856 if (flags & TTU_HWPOISON)
1857 return 1;
1858 if (!folio_test_large(folio))
1859 return 1;
1860
1861
1862 end_addr = pmd_addr_end(addr, vma->vm_end);
1863 max_nr = (end_addr - addr) >> PAGE_SHIFT;
1864
1865
1866 if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
1867 return 1;
1868 if (pte_unused(pte))
1869 return 1;
1870
1871 return folio_pte_batch(folio, pvmw->pte, pte, max_nr);
1872}
1873
1874
1875
1876
1877static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
1878 unsigned long address, void *arg)
1879{
1880 struct mm_struct *mm = vma->vm_mm;
1881 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
1882 bool anon_exclusive, ret = true;
1883 pte_t pteval;
1884 struct page *subpage;
1885 struct mmu_notifier_range range;
1886 enum ttu_flags flags = (enum ttu_flags)(long)arg;
1887 unsigned long nr_pages = 1, end_addr;
1888 unsigned long pfn;
1889 unsigned long hsz = 0;
1890
1891
1892
1893
1894
1895
1896
1897 if (flags & TTU_SYNC)
1898 pvmw.flags = PVMW_SYNC;
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908 range.end = vma_address_end(&pvmw);
1909 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
1910 address, range.end);
1911 if (folio_test_hugetlb(folio)) {
1912
1913
1914
1915
1916 adjust_range_if_pmd_sharing_possible(vma, &range.start,
1917 &range.end);
1918
1919
1920 hsz = huge_page_size(hstate_vma(vma));
1921 }
1922 mmu_notifier_invalidate_range_start(&range);
1923
1924 while (page_vma_mapped_walk(&pvmw)) {
1925
1926
1927
1928 if (!(flags & TTU_IGNORE_MLOCK) &&
1929 (vma->vm_flags & VM_LOCKED)) {
1930
1931 if (!folio_test_large(folio))
1932 mlock_vma_folio(folio, vma);
1933 goto walk_abort;
1934 }
1935
1936 if (!pvmw.pte) {
1937 if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) {
1938 if (unmap_huge_pmd_locked(vma, pvmw.address, pvmw.pmd, folio))
1939 goto walk_done;
1940
1941
1942
1943
1944
1945 goto walk_abort;
1946 }
1947
1948 if (flags & TTU_SPLIT_HUGE_PMD) {
1949
1950
1951
1952
1953 split_huge_pmd_locked(vma, pvmw.address,
1954 pvmw.pmd, false);
1955 flags &= ~TTU_SPLIT_HUGE_PMD;
1956 page_vma_mapped_walk_restart(&pvmw);
1957 continue;
1958 }
1959 }
1960
1961
1962 VM_BUG_ON_FOLIO(!pvmw.pte, folio);
1963
1964
1965
1966
1967
1968 pteval = ptep_get(pvmw.pte);
1969 if (likely(pte_present(pteval))) {
1970 pfn = pte_pfn(pteval);
1971 } else {
1972 pfn = swp_offset_pfn(pte_to_swp_entry(pteval));
1973 VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
1974 }
1975
1976 subpage = folio_page(folio, pfn - folio_pfn(folio));
1977 address = pvmw.address;
1978 anon_exclusive = folio_test_anon(folio) &&
1979 PageAnonExclusive(subpage);
1980
1981 if (folio_test_hugetlb(folio)) {
1982 bool anon = folio_test_anon(folio);
1983
1984
1985
1986
1987
1988 VM_BUG_ON_PAGE(!PageHWPoison(subpage), subpage);
1989
1990
1991
1992
1993
1994
1995
1996 flush_cache_range(vma, range.start, range.end);
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008 if (!anon) {
2009 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
2010 if (!hugetlb_vma_trylock_write(vma))
2011 goto walk_abort;
2012 if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
2013 hugetlb_vma_unlock_write(vma);
2014 flush_tlb_range(vma,
2015 range.start, range.end);
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026 goto walk_done;
2027 }
2028 hugetlb_vma_unlock_write(vma);
2029 }
2030 pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
2031 if (pte_dirty(pteval))
2032 folio_mark_dirty(folio);
2033 } else if (likely(pte_present(pteval))) {
2034 nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval);
2035 end_addr = address + nr_pages * PAGE_SIZE;
2036 flush_cache_range(vma, address, end_addr);
2037
2038
2039 pteval = get_and_clear_ptes(mm, address, pvmw.pte, nr_pages);
2040
2041
2042
2043
2044
2045
2046
2047
2048 if (should_defer_flush(mm, flags))
2049 set_tlb_ubc_flush_pending(mm, pteval, address, end_addr);
2050 else
2051 flush_tlb_range(vma, address, end_addr);
2052 if (pte_dirty(pteval))
2053 folio_mark_dirty(folio);
2054 } else {
2055 pte_clear(mm, address, pvmw.pte);
2056 }
2057
2058
2059
2060
2061
2062
2063 pte_install_uffd_wp_if_needed(vma, address, pvmw.pte, pteval);
2064
2065
2066 update_hiwater_rss(mm);
2067
2068 if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
2069 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
2070 if (folio_test_hugetlb(folio)) {
2071 hugetlb_count_sub(folio_nr_pages(folio), mm);
2072 set_huge_pte_at(mm, address, pvmw.pte, pteval,
2073 hsz);
2074 } else {
2075 dec_mm_counter(mm, mm_counter(folio));
2076 set_pte_at(mm, address, pvmw.pte, pteval);
2077 }
2078 } else if (likely(pte_present(pteval)) && pte_unused(pteval) &&
2079 !userfaultfd_armed(vma)) {
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090 dec_mm_counter(mm, mm_counter(folio));
2091 } else if (folio_test_anon(folio)) {
2092 swp_entry_t entry = page_swap_entry(subpage);
2093 pte_t swp_pte;
2094
2095
2096
2097
2098 if (unlikely(folio_test_swapbacked(folio) !=
2099 folio_test_swapcache(folio))) {
2100 WARN_ON_ONCE(1);
2101 goto walk_abort;
2102 }
2103
2104
2105 if (!folio_test_swapbacked(folio)) {
2106 int ref_count, map_count;
2107
2108
2109
2110
2111
2112
2113 smp_mb();
2114
2115 ref_count = folio_ref_count(folio);
2116 map_count = folio_mapcount(folio);
2117
2118
2119
2120
2121
2122 smp_rmb();
2123
2124 if (folio_test_dirty(folio) && !(vma->vm_flags & VM_DROPPABLE)) {
2125
2126
2127
2128
2129 set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
2130 folio_set_swapbacked(folio);
2131 goto walk_abort;
2132 } else if (ref_count != 1 + map_count) {
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142 set_ptes(mm, address, pvmw.pte, pteval, nr_pages);
2143 goto walk_abort;
2144 }
2145 add_mm_counter(mm, MM_ANONPAGES, -nr_pages);
2146 goto discard;
2147 }
2148
2149 if (swap_duplicate(entry) < 0) {
2150 set_pte_at(mm, address, pvmw.pte, pteval);
2151 goto walk_abort;
2152 }
2153
2154
2155
2156
2157
2158
2159 if (arch_unmap_one(mm, vma, address, pteval) < 0) {
2160 swap_free(entry);
2161 set_pte_at(mm, address, pvmw.pte, pteval);
2162 goto walk_abort;
2163 }
2164
2165
2166 if (anon_exclusive &&
2167 folio_try_share_anon_rmap_pte(folio, subpage)) {
2168 swap_free(entry);
2169 set_pte_at(mm, address, pvmw.pte, pteval);
2170 goto walk_abort;
2171 }
2172 if (list_empty(&mm->mmlist)) {
2173 spin_lock(&mmlist_lock);
2174 if (list_empty(&mm->mmlist))
2175 list_add(&mm->mmlist, &init_mm.mmlist);
2176 spin_unlock(&mmlist_lock);
2177 }
2178 dec_mm_counter(mm, MM_ANONPAGES);
2179 inc_mm_counter(mm, MM_SWAPENTS);
2180 swp_pte = swp_entry_to_pte(entry);
2181 if (anon_exclusive)
2182 swp_pte = pte_swp_mkexclusive(swp_pte);
2183 if (likely(pte_present(pteval))) {
2184 if (pte_soft_dirty(pteval))
2185 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2186 if (pte_uffd_wp(pteval))
2187 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2188 } else {
2189 if (pte_swp_soft_dirty(pteval))
2190 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2191 if (pte_swp_uffd_wp(pteval))
2192 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2193 }
2194 set_pte_at(mm, address, pvmw.pte, swp_pte);
2195 } else {
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207 dec_mm_counter(mm, mm_counter_file(folio));
2208 }
2209discard:
2210 if (unlikely(folio_test_hugetlb(folio))) {
2211 hugetlb_remove_rmap(folio);
2212 } else {
2213 folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
2214 }
2215 if (vma->vm_flags & VM_LOCKED)
2216 mlock_drain_local();
2217 folio_put_refs(folio, nr_pages);
2218
2219
2220
2221
2222
2223 if (nr_pages == folio_nr_pages(folio))
2224 goto walk_done;
2225 continue;
2226walk_abort:
2227 ret = false;
2228walk_done:
2229 page_vma_mapped_walk_done(&pvmw);
2230 break;
2231 }
2232
2233 mmu_notifier_invalidate_range_end(&range);
2234
2235 return ret;
2236}
2237
2238static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
2239{
2240 return vma_is_temporary_stack(vma);
2241}
2242
2243static int folio_not_mapped(struct folio *folio)
2244{
2245 return !folio_mapped(folio);
2246}
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259void try_to_unmap(struct folio *folio, enum ttu_flags flags)
2260{
2261 struct rmap_walk_control rwc = {
2262 .rmap_one = try_to_unmap_one,
2263 .arg = (void *)flags,
2264 .done = folio_not_mapped,
2265 .anon_lock = folio_lock_anon_vma_read,
2266 };
2267
2268 if (flags & TTU_RMAP_LOCKED)
2269 rmap_walk_locked(folio, &rwc);
2270 else
2271 rmap_walk(folio, &rwc);
2272}
2273
2274
2275
2276
2277
2278
2279
2280static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
2281 unsigned long address, void *arg)
2282{
2283 struct mm_struct *mm = vma->vm_mm;
2284 DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
2285 bool anon_exclusive, writable, ret = true;
2286 pte_t pteval;
2287 struct page *subpage;
2288 struct mmu_notifier_range range;
2289 enum ttu_flags flags = (enum ttu_flags)(long)arg;
2290 unsigned long pfn;
2291 unsigned long hsz = 0;
2292
2293
2294
2295
2296
2297
2298
2299 if (flags & TTU_SYNC)
2300 pvmw.flags = PVMW_SYNC;
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310 range.end = vma_address_end(&pvmw);
2311 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,
2312 address, range.end);
2313 if (folio_test_hugetlb(folio)) {
2314
2315
2316
2317
2318 adjust_range_if_pmd_sharing_possible(vma, &range.start,
2319 &range.end);
2320
2321
2322 hsz = huge_page_size(hstate_vma(vma));
2323 }
2324 mmu_notifier_invalidate_range_start(&range);
2325
2326 while (page_vma_mapped_walk(&pvmw)) {
2327
2328 if (!pvmw.pte) {
2329 if (flags & TTU_SPLIT_HUGE_PMD) {
2330 split_huge_pmd_locked(vma, pvmw.address,
2331 pvmw.pmd, true);
2332 ret = false;
2333 page_vma_mapped_walk_done(&pvmw);
2334 break;
2335 }
2336#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
2337 subpage = folio_page(folio,
2338 pmd_pfn(*pvmw.pmd) - folio_pfn(folio));
2339 VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) ||
2340 !folio_test_pmd_mappable(folio), folio);
2341
2342 if (set_pmd_migration_entry(&pvmw, subpage)) {
2343 ret = false;
2344 page_vma_mapped_walk_done(&pvmw);
2345 break;
2346 }
2347 continue;
2348#endif
2349 }
2350
2351
2352 VM_BUG_ON_FOLIO(!pvmw.pte, folio);
2353
2354
2355
2356
2357
2358 pteval = ptep_get(pvmw.pte);
2359 if (likely(pte_present(pteval))) {
2360 pfn = pte_pfn(pteval);
2361 } else {
2362 pfn = swp_offset_pfn(pte_to_swp_entry(pteval));
2363 VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
2364 }
2365
2366 subpage = folio_page(folio, pfn - folio_pfn(folio));
2367 address = pvmw.address;
2368 anon_exclusive = folio_test_anon(folio) &&
2369 PageAnonExclusive(subpage);
2370
2371 if (folio_test_hugetlb(folio)) {
2372 bool anon = folio_test_anon(folio);
2373
2374
2375
2376
2377
2378
2379
2380
2381 flush_cache_range(vma, range.start, range.end);
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393 if (!anon) {
2394 VM_BUG_ON(!(flags & TTU_RMAP_LOCKED));
2395 if (!hugetlb_vma_trylock_write(vma)) {
2396 page_vma_mapped_walk_done(&pvmw);
2397 ret = false;
2398 break;
2399 }
2400 if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) {
2401 hugetlb_vma_unlock_write(vma);
2402 flush_tlb_range(vma,
2403 range.start, range.end);
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415 page_vma_mapped_walk_done(&pvmw);
2416 break;
2417 }
2418 hugetlb_vma_unlock_write(vma);
2419 }
2420
2421 pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
2422 if (pte_dirty(pteval))
2423 folio_mark_dirty(folio);
2424 writable = pte_write(pteval);
2425 } else if (likely(pte_present(pteval))) {
2426 flush_cache_page(vma, address, pfn);
2427
2428 if (should_defer_flush(mm, flags)) {
2429
2430
2431
2432
2433
2434
2435
2436
2437 pteval = ptep_get_and_clear(mm, address, pvmw.pte);
2438
2439 set_tlb_ubc_flush_pending(mm, pteval, address, address + PAGE_SIZE);
2440 } else {
2441 pteval = ptep_clear_flush(vma, address, pvmw.pte);
2442 }
2443 if (pte_dirty(pteval))
2444 folio_mark_dirty(folio);
2445 writable = pte_write(pteval);
2446 } else {
2447 pte_clear(mm, address, pvmw.pte);
2448 writable = is_writable_device_private_entry(pte_to_swp_entry(pteval));
2449 }
2450
2451 VM_WARN_ON_FOLIO(writable && folio_test_anon(folio) &&
2452 !anon_exclusive, folio);
2453
2454
2455 update_hiwater_rss(mm);
2456
2457 if (PageHWPoison(subpage)) {
2458 VM_WARN_ON_FOLIO(folio_is_device_private(folio), folio);
2459
2460 pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
2461 if (folio_test_hugetlb(folio)) {
2462 hugetlb_count_sub(folio_nr_pages(folio), mm);
2463 set_huge_pte_at(mm, address, pvmw.pte, pteval,
2464 hsz);
2465 } else {
2466 dec_mm_counter(mm, mm_counter(folio));
2467 set_pte_at(mm, address, pvmw.pte, pteval);
2468 }
2469 } else if (likely(pte_present(pteval)) && pte_unused(pteval) &&
2470 !userfaultfd_armed(vma)) {
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481 dec_mm_counter(mm, mm_counter(folio));
2482 } else {
2483 swp_entry_t entry;
2484 pte_t swp_pte;
2485
2486
2487
2488
2489
2490
2491 if (arch_unmap_one(mm, vma, address, pteval) < 0) {
2492 if (folio_test_hugetlb(folio))
2493 set_huge_pte_at(mm, address, pvmw.pte,
2494 pteval, hsz);
2495 else
2496 set_pte_at(mm, address, pvmw.pte, pteval);
2497 ret = false;
2498 page_vma_mapped_walk_done(&pvmw);
2499 break;
2500 }
2501
2502
2503 if (folio_test_hugetlb(folio)) {
2504 if (anon_exclusive &&
2505 hugetlb_try_share_anon_rmap(folio)) {
2506 set_huge_pte_at(mm, address, pvmw.pte,
2507 pteval, hsz);
2508 ret = false;
2509 page_vma_mapped_walk_done(&pvmw);
2510 break;
2511 }
2512 } else if (anon_exclusive &&
2513 folio_try_share_anon_rmap_pte(folio, subpage)) {
2514 set_pte_at(mm, address, pvmw.pte, pteval);
2515 ret = false;
2516 page_vma_mapped_walk_done(&pvmw);
2517 break;
2518 }
2519
2520
2521
2522
2523
2524
2525 if (writable)
2526 entry = make_writable_migration_entry(
2527 page_to_pfn(subpage));
2528 else if (anon_exclusive)
2529 entry = make_readable_exclusive_migration_entry(
2530 page_to_pfn(subpage));
2531 else
2532 entry = make_readable_migration_entry(
2533 page_to_pfn(subpage));
2534 if (likely(pte_present(pteval))) {
2535 if (pte_young(pteval))
2536 entry = make_migration_entry_young(entry);
2537 if (pte_dirty(pteval))
2538 entry = make_migration_entry_dirty(entry);
2539 swp_pte = swp_entry_to_pte(entry);
2540 if (pte_soft_dirty(pteval))
2541 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2542 if (pte_uffd_wp(pteval))
2543 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2544 } else {
2545 swp_pte = swp_entry_to_pte(entry);
2546 if (pte_swp_soft_dirty(pteval))
2547 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2548 if (pte_swp_uffd_wp(pteval))
2549 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2550 }
2551 if (folio_test_hugetlb(folio))
2552 set_huge_pte_at(mm, address, pvmw.pte, swp_pte,
2553 hsz);
2554 else
2555 set_pte_at(mm, address, pvmw.pte, swp_pte);
2556 trace_set_migration_pte(address, pte_val(swp_pte),
2557 folio_order(folio));
2558
2559
2560
2561
2562 }
2563
2564 if (unlikely(folio_test_hugetlb(folio)))
2565 hugetlb_remove_rmap(folio);
2566 else
2567 folio_remove_rmap_pte(folio, subpage, vma);
2568 if (vma->vm_flags & VM_LOCKED)
2569 mlock_drain_local();
2570 folio_put(folio);
2571 }
2572
2573 mmu_notifier_invalidate_range_end(&range);
2574
2575 return ret;
2576}
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586void try_to_migrate(struct folio *folio, enum ttu_flags flags)
2587{
2588 struct rmap_walk_control rwc = {
2589 .rmap_one = try_to_migrate_one,
2590 .arg = (void *)flags,
2591 .done = folio_not_mapped,
2592 .anon_lock = folio_lock_anon_vma_read,
2593 };
2594
2595
2596
2597
2598
2599 if (WARN_ON_ONCE(flags & ~(TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
2600 TTU_SYNC | TTU_BATCH_FLUSH)))
2601 return;
2602
2603 if (folio_is_zone_device(folio) &&
2604 (!folio_is_device_private(folio) && !folio_is_device_coherent(folio)))
2605 return;
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615 if (!folio_test_ksm(folio) && folio_test_anon(folio))
2616 rwc.invalid_vma = invalid_migration_vma;
2617
2618 if (flags & TTU_RMAP_LOCKED)
2619 rmap_walk_locked(folio, &rwc);
2620 else
2621 rmap_walk(folio, &rwc);
2622}
2623
2624#ifdef CONFIG_DEVICE_PRIVATE
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr,
2664 void *owner, struct folio **foliop)
2665{
2666 struct mmu_notifier_range range;
2667 struct folio *folio, *fw_folio;
2668 struct vm_area_struct *vma;
2669 struct folio_walk fw;
2670 struct page *page;
2671 swp_entry_t entry;
2672 pte_t swp_pte;
2673 int ret;
2674
2675 mmap_assert_locked(mm);
2676 addr = PAGE_ALIGN_DOWN(addr);
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687retry:
2688 page = get_user_page_vma_remote(mm, addr,
2689 FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
2690 &vma);
2691 if (IS_ERR(page))
2692 return page;
2693 folio = page_folio(page);
2694
2695 if (!folio_test_anon(folio) || folio_test_hugetlb(folio)) {
2696 folio_put(folio);
2697 return ERR_PTR(-EOPNOTSUPP);
2698 }
2699
2700 ret = folio_lock_killable(folio);
2701 if (ret) {
2702 folio_put(folio);
2703 return ERR_PTR(ret);
2704 }
2705
2706
2707
2708
2709
2710
2711 mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
2712 mm, addr, addr + PAGE_SIZE, owner);
2713 mmu_notifier_invalidate_range_start(&range);
2714
2715
2716
2717
2718
2719
2720
2721 fw_folio = folio_walk_start(&fw, vma, addr, 0);
2722 if (fw_folio != folio || fw.page != page ||
2723 fw.level != FW_LEVEL_PTE || !pte_write(fw.pte)) {
2724 if (fw_folio)
2725 folio_walk_end(&fw, vma);
2726 mmu_notifier_invalidate_range_end(&range);
2727 folio_unlock(folio);
2728 folio_put(folio);
2729 goto retry;
2730 }
2731
2732
2733 flush_cache_page(vma, addr, page_to_pfn(page));
2734 fw.pte = ptep_clear_flush(vma, addr, fw.ptep);
2735
2736
2737 if (pte_dirty(fw.pte))
2738 folio_mark_dirty(folio);
2739
2740
2741
2742
2743
2744
2745 entry = make_device_exclusive_entry(page_to_pfn(page));
2746 swp_pte = swp_entry_to_pte(entry);
2747 if (pte_soft_dirty(fw.pte))
2748 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2749
2750 set_pte_at(mm, addr, fw.ptep, swp_pte);
2751
2752 folio_walk_end(&fw, vma);
2753 mmu_notifier_invalidate_range_end(&range);
2754 *foliop = folio;
2755 return page;
2756}
2757EXPORT_SYMBOL_GPL(make_device_exclusive);
2758#endif
2759
2760void __put_anon_vma(struct anon_vma *anon_vma)
2761{
2762 struct anon_vma *root = anon_vma->root;
2763
2764 anon_vma_free(anon_vma);
2765 if (root != anon_vma && atomic_dec_and_test(&root->refcount))
2766 anon_vma_free(root);
2767}
2768
2769static struct anon_vma *rmap_walk_anon_lock(const struct folio *folio,
2770 struct rmap_walk_control *rwc)
2771{
2772 struct anon_vma *anon_vma;
2773
2774 if (rwc->anon_lock)
2775 return rwc->anon_lock(folio, rwc);
2776
2777
2778
2779
2780
2781
2782
2783 anon_vma = folio_anon_vma(folio);
2784 if (!anon_vma)
2785 return NULL;
2786
2787 if (anon_vma_trylock_read(anon_vma))
2788 goto out;
2789
2790 if (rwc->try_lock) {
2791 anon_vma = NULL;
2792 rwc->contended = true;
2793 goto out;
2794 }
2795
2796 anon_vma_lock_read(anon_vma);
2797out:
2798 return anon_vma;
2799}
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811static void rmap_walk_anon(struct folio *folio,
2812 struct rmap_walk_control *rwc, bool locked)
2813{
2814 struct anon_vma *anon_vma;
2815 pgoff_t pgoff_start, pgoff_end;
2816 struct anon_vma_chain *avc;
2817
2818 if (locked) {
2819 anon_vma = folio_anon_vma(folio);
2820
2821 VM_BUG_ON_FOLIO(!anon_vma, folio);
2822 } else {
2823 anon_vma = rmap_walk_anon_lock(folio, rwc);
2824 }
2825 if (!anon_vma)
2826 return;
2827
2828 pgoff_start = folio_pgoff(folio);
2829 pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
2830 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
2831 pgoff_start, pgoff_end) {
2832 struct vm_area_struct *vma = avc->vma;
2833 unsigned long address = vma_address(vma, pgoff_start,
2834 folio_nr_pages(folio));
2835
2836 VM_BUG_ON_VMA(address == -EFAULT, vma);
2837 cond_resched();
2838
2839 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
2840 continue;
2841
2842 if (!rwc->rmap_one(folio, vma, address, rwc->arg))
2843 break;
2844 if (rwc->done && rwc->done(folio))
2845 break;
2846 }
2847
2848 if (!locked)
2849 anon_vma_unlock_read(anon_vma);
2850}
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872static void __rmap_walk_file(struct folio *folio, struct address_space *mapping,
2873 pgoff_t pgoff_start, unsigned long nr_pages,
2874 struct rmap_walk_control *rwc, bool locked)
2875{
2876 pgoff_t pgoff_end = pgoff_start + nr_pages - 1;
2877 struct vm_area_struct *vma;
2878
2879 VM_WARN_ON_FOLIO(folio && mapping != folio_mapping(folio), folio);
2880 VM_WARN_ON_FOLIO(folio && pgoff_start != folio_pgoff(folio), folio);
2881 VM_WARN_ON_FOLIO(folio && nr_pages != folio_nr_pages(folio), folio);
2882
2883 if (!locked) {
2884 if (i_mmap_trylock_read(mapping))
2885 goto lookup;
2886
2887 if (rwc->try_lock) {
2888 rwc->contended = true;
2889 return;
2890 }
2891
2892 i_mmap_lock_read(mapping);
2893 }
2894lookup:
2895 vma_interval_tree_foreach(vma, &mapping->i_mmap,
2896 pgoff_start, pgoff_end) {
2897 unsigned long address = vma_address(vma, pgoff_start, nr_pages);
2898
2899 VM_BUG_ON_VMA(address == -EFAULT, vma);
2900 cond_resched();
2901
2902 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
2903 continue;
2904
2905 if (!rwc->rmap_one(folio, vma, address, rwc->arg))
2906 goto done;
2907 if (rwc->done && rwc->done(folio))
2908 goto done;
2909 }
2910done:
2911 if (!locked)
2912 i_mmap_unlock_read(mapping);
2913}
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924static void rmap_walk_file(struct folio *folio,
2925 struct rmap_walk_control *rwc, bool locked)
2926{
2927
2928
2929
2930
2931
2932
2933 VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
2934
2935 if (!folio->mapping)
2936 return;
2937
2938 __rmap_walk_file(folio, folio->mapping, folio->index,
2939 folio_nr_pages(folio), rwc, locked);
2940}
2941
2942void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc)
2943{
2944 if (unlikely(folio_test_ksm(folio)))
2945 rmap_walk_ksm(folio, rwc);
2946 else if (folio_test_anon(folio))
2947 rmap_walk_anon(folio, rwc, false);
2948 else
2949 rmap_walk_file(folio, rwc, false);
2950}
2951
2952
2953void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc)
2954{
2955
2956 VM_BUG_ON_FOLIO(folio_test_ksm(folio), folio);
2957 if (folio_test_anon(folio))
2958 rmap_walk_anon(folio, rwc, true);
2959 else
2960 rmap_walk_file(folio, rwc, true);
2961}
2962
2963#ifdef CONFIG_HUGETLB_PAGE
2964
2965
2966
2967
2968
2969void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
2970 unsigned long address, rmap_t flags)
2971{
2972 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
2973 VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
2974
2975 atomic_inc(&folio->_entire_mapcount);
2976 atomic_inc(&folio->_large_mapcount);
2977 if (flags & RMAP_EXCLUSIVE)
2978 SetPageAnonExclusive(&folio->page);
2979 VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
2980 PageAnonExclusive(&folio->page), folio);
2981}
2982
2983void hugetlb_add_new_anon_rmap(struct folio *folio,
2984 struct vm_area_struct *vma, unsigned long address)
2985{
2986 VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
2987
2988 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
2989
2990 atomic_set(&folio->_entire_mapcount, 0);
2991 atomic_set(&folio->_large_mapcount, 0);
2992 folio_clear_hugetlb_restore_reserve(folio);
2993 __folio_set_anon(folio, vma, address, true);
2994 SetPageAnonExclusive(&folio->page);
2995}
2996#endif
2997