1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/migrate.h>
17#include <linux/export.h>
18#include <linux/swap.h>
19#include <linux/swapops.h>
20#include <linux/pagemap.h>
21#include <linux/buffer_head.h>
22#include <linux/mm_inline.h>
23#include <linux/nsproxy.h>
24#include <linux/pagevec.h>
25#include <linux/ksm.h>
26#include <linux/rmap.h>
27#include <linux/topology.h>
28#include <linux/cpu.h>
29#include <linux/cpuset.h>
30#include <linux/writeback.h>
31#include <linux/mempolicy.h>
32#include <linux/vmalloc.h>
33#include <linux/security.h>
34#include <linux/backing-dev.h>
35#include <linux/compaction.h>
36#include <linux/syscalls.h>
37#include <linux/compat.h>
38#include <linux/hugetlb.h>
39#include <linux/hugetlb_cgroup.h>
40#include <linux/gfp.h>
41#include <linux/pagewalk.h>
42#include <linux/pfn_t.h>
43#include <linux/memremap.h>
44#include <linux/userfaultfd_k.h>
45#include <linux/balloon_compaction.h>
46#include <linux/mmu_notifier.h>
47#include <linux/page_idle.h>
48#include <linux/page_owner.h>
49#include <linux/sched/mm.h>
50#include <linux/ptrace.h>
51#include <linux/oom.h>
52
53#include <asm/tlbflush.h>
54
55#define CREATE_TRACE_POINTS
56#include <trace/events/migrate.h>
57
58#include "internal.h"
59
60int isolate_movable_page(struct page *page, isolate_mode_t mode)
61{
62 struct address_space *mapping;
63
64
65
66
67
68
69
70
71
72
73 if (unlikely(!get_page_unless_zero(page)))
74 goto out;
75
76
77
78
79
80
81 if (unlikely(!__PageMovable(page)))
82 goto out_putpage;
83
84
85
86
87
88
89
90
91
92
93
94 if (unlikely(!trylock_page(page)))
95 goto out_putpage;
96
97 if (!PageMovable(page) || PageIsolated(page))
98 goto out_no_isolated;
99
100 mapping = page_mapping(page);
101 VM_BUG_ON_PAGE(!mapping, page);
102
103 if (!mapping->a_ops->isolate_page(page, mode))
104 goto out_no_isolated;
105
106
107 WARN_ON_ONCE(PageIsolated(page));
108 __SetPageIsolated(page);
109 unlock_page(page);
110
111 return 0;
112
113out_no_isolated:
114 unlock_page(page);
115out_putpage:
116 put_page(page);
117out:
118 return -EBUSY;
119}
120
121static void putback_movable_page(struct page *page)
122{
123 struct address_space *mapping;
124
125 mapping = page_mapping(page);
126 mapping->a_ops->putback_page(page);
127 __ClearPageIsolated(page);
128}
129
130
131
132
133
134
135
136
137
138void putback_movable_pages(struct list_head *l)
139{
140 struct page *page;
141 struct page *page2;
142
143 list_for_each_entry_safe(page, page2, l, lru) {
144 if (unlikely(PageHuge(page))) {
145 putback_active_hugepage(page);
146 continue;
147 }
148 list_del(&page->lru);
149
150
151
152
153
154 if (unlikely(__PageMovable(page))) {
155 VM_BUG_ON_PAGE(!PageIsolated(page), page);
156 lock_page(page);
157 if (PageMovable(page))
158 putback_movable_page(page);
159 else
160 __ClearPageIsolated(page);
161 unlock_page(page);
162 put_page(page);
163 } else {
164 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
165 page_is_file_lru(page), -thp_nr_pages(page));
166 putback_lru_page(page);
167 }
168 }
169}
170
171
172
173
174static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
175 unsigned long addr, void *old)
176{
177 struct page_vma_mapped_walk pvmw = {
178 .page = old,
179 .vma = vma,
180 .address = addr,
181 .flags = PVMW_SYNC | PVMW_MIGRATION,
182 };
183 struct page *new;
184 pte_t pte;
185 swp_entry_t entry;
186
187 VM_BUG_ON_PAGE(PageTail(page), page);
188 while (page_vma_mapped_walk(&pvmw)) {
189 if (PageKsm(page))
190 new = page;
191 else
192 new = page - pvmw.page->index +
193 linear_page_index(vma, pvmw.address);
194
195#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
196
197 if (!pvmw.pte) {
198 VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
199 remove_migration_pmd(&pvmw, new);
200 continue;
201 }
202#endif
203
204 get_page(new);
205 pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
206 if (pte_swp_soft_dirty(*pvmw.pte))
207 pte = pte_mksoft_dirty(pte);
208
209
210
211
212 entry = pte_to_swp_entry(*pvmw.pte);
213 if (is_writable_migration_entry(entry))
214 pte = maybe_mkwrite(pte, vma);
215 else if (pte_swp_uffd_wp(*pvmw.pte))
216 pte = pte_mkuffd_wp(pte);
217
218 if (unlikely(is_device_private_page(new))) {
219 if (pte_write(pte))
220 entry = make_writable_device_private_entry(
221 page_to_pfn(new));
222 else
223 entry = make_readable_device_private_entry(
224 page_to_pfn(new));
225 pte = swp_entry_to_pte(entry);
226 if (pte_swp_soft_dirty(*pvmw.pte))
227 pte = pte_swp_mksoft_dirty(pte);
228 if (pte_swp_uffd_wp(*pvmw.pte))
229 pte = pte_swp_mkuffd_wp(pte);
230 }
231
232#ifdef CONFIG_HUGETLB_PAGE
233 if (PageHuge(new)) {
234 unsigned int shift = huge_page_shift(hstate_vma(vma));
235
236 pte = pte_mkhuge(pte);
237 pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
238 set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
239 if (PageAnon(new))
240 hugepage_add_anon_rmap(new, vma, pvmw.address);
241 else
242 page_dup_rmap(new, true);
243 } else
244#endif
245 {
246 set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
247
248 if (PageAnon(new))
249 page_add_anon_rmap(new, vma, pvmw.address, false);
250 else
251 page_add_file_rmap(new, false);
252 }
253 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
254 mlock_vma_page(new);
255
256 if (PageTransHuge(page) && PageMlocked(page))
257 clear_page_mlock(page);
258
259
260 update_mmu_cache(vma, pvmw.address, pvmw.pte);
261 }
262
263 return true;
264}
265
266
267
268
269
270void remove_migration_ptes(struct page *old, struct page *new, bool locked)
271{
272 struct rmap_walk_control rwc = {
273 .rmap_one = remove_migration_pte,
274 .arg = old,
275 };
276
277 if (locked)
278 rmap_walk_locked(new, &rwc);
279 else
280 rmap_walk(new, &rwc);
281}
282
283
284
285
286
287
288void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
289 spinlock_t *ptl)
290{
291 pte_t pte;
292 swp_entry_t entry;
293 struct page *page;
294
295 spin_lock(ptl);
296 pte = *ptep;
297 if (!is_swap_pte(pte))
298 goto out;
299
300 entry = pte_to_swp_entry(pte);
301 if (!is_migration_entry(entry))
302 goto out;
303
304 page = pfn_swap_entry_to_page(entry);
305 page = compound_head(page);
306
307
308
309
310
311
312 if (!get_page_unless_zero(page))
313 goto out;
314 pte_unmap_unlock(ptep, ptl);
315 put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
316 return;
317out:
318 pte_unmap_unlock(ptep, ptl);
319}
320
321void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
322 unsigned long address)
323{
324 spinlock_t *ptl = pte_lockptr(mm, pmd);
325 pte_t *ptep = pte_offset_map(pmd, address);
326 __migration_entry_wait(mm, ptep, ptl);
327}
328
329void migration_entry_wait_huge(struct vm_area_struct *vma,
330 struct mm_struct *mm, pte_t *pte)
331{
332 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
333 __migration_entry_wait(mm, pte, ptl);
334}
335
336#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
337void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
338{
339 spinlock_t *ptl;
340 struct page *page;
341
342 ptl = pmd_lock(mm, pmd);
343 if (!is_pmd_migration_entry(*pmd))
344 goto unlock;
345 page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd));
346 if (!get_page_unless_zero(page))
347 goto unlock;
348 spin_unlock(ptl);
349 put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
350 return;
351unlock:
352 spin_unlock(ptl);
353}
354#endif
355
356static int expected_page_refs(struct address_space *mapping, struct page *page)
357{
358 int expected_count = 1;
359
360
361
362
363
364 expected_count += is_device_private_page(page);
365 if (mapping)
366 expected_count += thp_nr_pages(page) + page_has_private(page);
367
368 return expected_count;
369}
370
371
372
373
374
375
376
377
378
379int migrate_page_move_mapping(struct address_space *mapping,
380 struct page *newpage, struct page *page, int extra_count)
381{
382 XA_STATE(xas, &mapping->i_pages, page_index(page));
383 struct zone *oldzone, *newzone;
384 int dirty;
385 int expected_count = expected_page_refs(mapping, page) + extra_count;
386 int nr = thp_nr_pages(page);
387
388 if (!mapping) {
389
390 if (page_count(page) != expected_count)
391 return -EAGAIN;
392
393
394 newpage->index = page->index;
395 newpage->mapping = page->mapping;
396 if (PageSwapBacked(page))
397 __SetPageSwapBacked(newpage);
398
399 return MIGRATEPAGE_SUCCESS;
400 }
401
402 oldzone = page_zone(page);
403 newzone = page_zone(newpage);
404
405 xas_lock_irq(&xas);
406 if (page_count(page) != expected_count || xas_load(&xas) != page) {
407 xas_unlock_irq(&xas);
408 return -EAGAIN;
409 }
410
411 if (!page_ref_freeze(page, expected_count)) {
412 xas_unlock_irq(&xas);
413 return -EAGAIN;
414 }
415
416
417
418
419
420 newpage->index = page->index;
421 newpage->mapping = page->mapping;
422 page_ref_add(newpage, nr);
423 if (PageSwapBacked(page)) {
424 __SetPageSwapBacked(newpage);
425 if (PageSwapCache(page)) {
426 SetPageSwapCache(newpage);
427 set_page_private(newpage, page_private(page));
428 }
429 } else {
430 VM_BUG_ON_PAGE(PageSwapCache(page), page);
431 }
432
433
434 dirty = PageDirty(page);
435 if (dirty) {
436 ClearPageDirty(page);
437 SetPageDirty(newpage);
438 }
439
440 xas_store(&xas, newpage);
441 if (PageTransHuge(page)) {
442 int i;
443
444 for (i = 1; i < nr; i++) {
445 xas_next(&xas);
446 xas_store(&xas, newpage);
447 }
448 }
449
450
451
452
453
454
455 page_ref_unfreeze(page, expected_count - nr);
456
457 xas_unlock(&xas);
458
459
460
461
462
463
464
465
466
467
468
469
470 if (newzone != oldzone) {
471 struct lruvec *old_lruvec, *new_lruvec;
472 struct mem_cgroup *memcg;
473
474 memcg = page_memcg(page);
475 old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
476 new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
477
478 __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
479 __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
480 if (PageSwapBacked(page) && !PageSwapCache(page)) {
481 __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
482 __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
483 }
484#ifdef CONFIG_SWAP
485 if (PageSwapCache(page)) {
486 __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr);
487 __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr);
488 }
489#endif
490 if (dirty && mapping_can_writeback(mapping)) {
491 __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
492 __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
493 __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr);
494 __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr);
495 }
496 }
497 local_irq_enable();
498
499 return MIGRATEPAGE_SUCCESS;
500}
501EXPORT_SYMBOL(migrate_page_move_mapping);
502
503
504
505
506
507int migrate_huge_page_move_mapping(struct address_space *mapping,
508 struct page *newpage, struct page *page)
509{
510 XA_STATE(xas, &mapping->i_pages, page_index(page));
511 int expected_count;
512
513 xas_lock_irq(&xas);
514 expected_count = 2 + page_has_private(page);
515 if (page_count(page) != expected_count || xas_load(&xas) != page) {
516 xas_unlock_irq(&xas);
517 return -EAGAIN;
518 }
519
520 if (!page_ref_freeze(page, expected_count)) {
521 xas_unlock_irq(&xas);
522 return -EAGAIN;
523 }
524
525 newpage->index = page->index;
526 newpage->mapping = page->mapping;
527
528 get_page(newpage);
529
530 xas_store(&xas, newpage);
531
532 page_ref_unfreeze(page, expected_count - 1);
533
534 xas_unlock_irq(&xas);
535
536 return MIGRATEPAGE_SUCCESS;
537}
538
539
540
541
542void migrate_page_states(struct page *newpage, struct page *page)
543{
544 int cpupid;
545
546 if (PageError(page))
547 SetPageError(newpage);
548 if (PageReferenced(page))
549 SetPageReferenced(newpage);
550 if (PageUptodate(page))
551 SetPageUptodate(newpage);
552 if (TestClearPageActive(page)) {
553 VM_BUG_ON_PAGE(PageUnevictable(page), page);
554 SetPageActive(newpage);
555 } else if (TestClearPageUnevictable(page))
556 SetPageUnevictable(newpage);
557 if (PageWorkingset(page))
558 SetPageWorkingset(newpage);
559 if (PageChecked(page))
560 SetPageChecked(newpage);
561 if (PageMappedToDisk(page))
562 SetPageMappedToDisk(newpage);
563
564
565 if (PageDirty(page))
566 SetPageDirty(newpage);
567
568 if (page_is_young(page))
569 set_page_young(newpage);
570 if (page_is_idle(page))
571 set_page_idle(newpage);
572
573
574
575
576
577 cpupid = page_cpupid_xchg_last(page, -1);
578 page_cpupid_xchg_last(newpage, cpupid);
579
580 ksm_migrate_page(newpage, page);
581
582
583
584
585 if (PageSwapCache(page))
586 ClearPageSwapCache(page);
587 ClearPagePrivate(page);
588
589
590 if (!PageHuge(page))
591 set_page_private(page, 0);
592
593
594
595
596
597 if (PageWriteback(newpage))
598 end_page_writeback(newpage);
599
600
601
602
603
604
605 if (PageReadahead(page))
606 SetPageReadahead(newpage);
607
608 copy_page_owner(page, newpage);
609
610 if (!PageHuge(page))
611 mem_cgroup_migrate(page, newpage);
612}
613EXPORT_SYMBOL(migrate_page_states);
614
615void migrate_page_copy(struct page *newpage, struct page *page)
616{
617 if (PageHuge(page) || PageTransHuge(page))
618 copy_huge_page(newpage, page);
619 else
620 copy_highpage(newpage, page);
621
622 migrate_page_states(newpage, page);
623}
624EXPORT_SYMBOL(migrate_page_copy);
625
626
627
628
629
630
631
632
633
634
635
636int migrate_page(struct address_space *mapping,
637 struct page *newpage, struct page *page,
638 enum migrate_mode mode)
639{
640 int rc;
641
642 BUG_ON(PageWriteback(page));
643
644 rc = migrate_page_move_mapping(mapping, newpage, page, 0);
645
646 if (rc != MIGRATEPAGE_SUCCESS)
647 return rc;
648
649 if (mode != MIGRATE_SYNC_NO_COPY)
650 migrate_page_copy(newpage, page);
651 else
652 migrate_page_states(newpage, page);
653 return MIGRATEPAGE_SUCCESS;
654}
655EXPORT_SYMBOL(migrate_page);
656
657#ifdef CONFIG_BLOCK
658
659static bool buffer_migrate_lock_buffers(struct buffer_head *head,
660 enum migrate_mode mode)
661{
662 struct buffer_head *bh = head;
663
664
665 if (mode != MIGRATE_ASYNC) {
666 do {
667 lock_buffer(bh);
668 bh = bh->b_this_page;
669
670 } while (bh != head);
671
672 return true;
673 }
674
675
676 do {
677 if (!trylock_buffer(bh)) {
678
679
680
681
682 struct buffer_head *failed_bh = bh;
683 bh = head;
684 while (bh != failed_bh) {
685 unlock_buffer(bh);
686 bh = bh->b_this_page;
687 }
688 return false;
689 }
690
691 bh = bh->b_this_page;
692 } while (bh != head);
693 return true;
694}
695
696static int __buffer_migrate_page(struct address_space *mapping,
697 struct page *newpage, struct page *page, enum migrate_mode mode,
698 bool check_refs)
699{
700 struct buffer_head *bh, *head;
701 int rc;
702 int expected_count;
703
704 if (!page_has_buffers(page))
705 return migrate_page(mapping, newpage, page, mode);
706
707
708 expected_count = expected_page_refs(mapping, page);
709 if (page_count(page) != expected_count)
710 return -EAGAIN;
711
712 head = page_buffers(page);
713 if (!buffer_migrate_lock_buffers(head, mode))
714 return -EAGAIN;
715
716 if (check_refs) {
717 bool busy;
718 bool invalidated = false;
719
720recheck_buffers:
721 busy = false;
722 spin_lock(&mapping->private_lock);
723 bh = head;
724 do {
725 if (atomic_read(&bh->b_count)) {
726 busy = true;
727 break;
728 }
729 bh = bh->b_this_page;
730 } while (bh != head);
731 if (busy) {
732 if (invalidated) {
733 rc = -EAGAIN;
734 goto unlock_buffers;
735 }
736 spin_unlock(&mapping->private_lock);
737 invalidate_bh_lrus();
738 invalidated = true;
739 goto recheck_buffers;
740 }
741 }
742
743 rc = migrate_page_move_mapping(mapping, newpage, page, 0);
744 if (rc != MIGRATEPAGE_SUCCESS)
745 goto unlock_buffers;
746
747 attach_page_private(newpage, detach_page_private(page));
748
749 bh = head;
750 do {
751 set_bh_page(bh, newpage, bh_offset(bh));
752 bh = bh->b_this_page;
753
754 } while (bh != head);
755
756 if (mode != MIGRATE_SYNC_NO_COPY)
757 migrate_page_copy(newpage, page);
758 else
759 migrate_page_states(newpage, page);
760
761 rc = MIGRATEPAGE_SUCCESS;
762unlock_buffers:
763 if (check_refs)
764 spin_unlock(&mapping->private_lock);
765 bh = head;
766 do {
767 unlock_buffer(bh);
768 bh = bh->b_this_page;
769
770 } while (bh != head);
771
772 return rc;
773}
774
775
776
777
778
779
780int buffer_migrate_page(struct address_space *mapping,
781 struct page *newpage, struct page *page, enum migrate_mode mode)
782{
783 return __buffer_migrate_page(mapping, newpage, page, mode, false);
784}
785EXPORT_SYMBOL(buffer_migrate_page);
786
787
788
789
790
791
792
793int buffer_migrate_page_norefs(struct address_space *mapping,
794 struct page *newpage, struct page *page, enum migrate_mode mode)
795{
796 return __buffer_migrate_page(mapping, newpage, page, mode, true);
797}
798#endif
799
800
801
802
803static int writeout(struct address_space *mapping, struct page *page)
804{
805 struct writeback_control wbc = {
806 .sync_mode = WB_SYNC_NONE,
807 .nr_to_write = 1,
808 .range_start = 0,
809 .range_end = LLONG_MAX,
810 .for_reclaim = 1
811 };
812 int rc;
813
814 if (!mapping->a_ops->writepage)
815
816 return -EINVAL;
817
818 if (!clear_page_dirty_for_io(page))
819
820 return -EAGAIN;
821
822
823
824
825
826
827
828
829
830 remove_migration_ptes(page, page, false);
831
832 rc = mapping->a_ops->writepage(page, &wbc);
833
834 if (rc != AOP_WRITEPAGE_ACTIVATE)
835
836 lock_page(page);
837
838 return (rc < 0) ? -EIO : -EAGAIN;
839}
840
841
842
843
844static int fallback_migrate_page(struct address_space *mapping,
845 struct page *newpage, struct page *page, enum migrate_mode mode)
846{
847 if (PageDirty(page)) {
848
849 switch (mode) {
850 case MIGRATE_SYNC:
851 case MIGRATE_SYNC_NO_COPY:
852 break;
853 default:
854 return -EBUSY;
855 }
856 return writeout(mapping, page);
857 }
858
859
860
861
862
863 if (page_has_private(page) &&
864 !try_to_release_page(page, GFP_KERNEL))
865 return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
866
867 return migrate_page(mapping, newpage, page, mode);
868}
869
870
871
872
873
874
875
876
877
878
879
880
881static int move_to_new_page(struct page *newpage, struct page *page,
882 enum migrate_mode mode)
883{
884 struct address_space *mapping;
885 int rc = -EAGAIN;
886 bool is_lru = !__PageMovable(page);
887
888 VM_BUG_ON_PAGE(!PageLocked(page), page);
889 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
890
891 mapping = page_mapping(page);
892
893 if (likely(is_lru)) {
894 if (!mapping)
895 rc = migrate_page(mapping, newpage, page, mode);
896 else if (mapping->a_ops->migratepage)
897
898
899
900
901
902
903
904 rc = mapping->a_ops->migratepage(mapping, newpage,
905 page, mode);
906 else
907 rc = fallback_migrate_page(mapping, newpage,
908 page, mode);
909 } else {
910
911
912
913
914 VM_BUG_ON_PAGE(!PageIsolated(page), page);
915 if (!PageMovable(page)) {
916 rc = MIGRATEPAGE_SUCCESS;
917 __ClearPageIsolated(page);
918 goto out;
919 }
920
921 rc = mapping->a_ops->migratepage(mapping, newpage,
922 page, mode);
923 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
924 !PageIsolated(page));
925 }
926
927
928
929
930
931 if (rc == MIGRATEPAGE_SUCCESS) {
932 if (__PageMovable(page)) {
933 VM_BUG_ON_PAGE(!PageIsolated(page), page);
934
935
936
937
938
939 __ClearPageIsolated(page);
940 }
941
942
943
944
945
946
947 if (!PageMappingFlags(page))
948 page->mapping = NULL;
949
950 if (likely(!is_zone_device_page(newpage)))
951 flush_dcache_page(newpage);
952
953 }
954out:
955 return rc;
956}
957
958static int __unmap_and_move(struct page *page, struct page *newpage,
959 int force, enum migrate_mode mode)
960{
961 int rc = -EAGAIN;
962 int page_was_mapped = 0;
963 struct anon_vma *anon_vma = NULL;
964 bool is_lru = !__PageMovable(page);
965
966 if (!trylock_page(page)) {
967 if (!force || mode == MIGRATE_ASYNC)
968 goto out;
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983 if (current->flags & PF_MEMALLOC)
984 goto out;
985
986 lock_page(page);
987 }
988
989 if (PageWriteback(page)) {
990
991
992
993
994
995
996 switch (mode) {
997 case MIGRATE_SYNC:
998 case MIGRATE_SYNC_NO_COPY:
999 break;
1000 default:
1001 rc = -EBUSY;
1002 goto out_unlock;
1003 }
1004 if (!force)
1005 goto out_unlock;
1006 wait_on_page_writeback(page);
1007 }
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 if (PageAnon(page) && !PageKsm(page))
1024 anon_vma = page_get_anon_vma(page);
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034 if (unlikely(!trylock_page(newpage)))
1035 goto out_unlock;
1036
1037 if (unlikely(!is_lru)) {
1038 rc = move_to_new_page(newpage, page, mode);
1039 goto out_unlock_both;
1040 }
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054 if (!page->mapping) {
1055 VM_BUG_ON_PAGE(PageAnon(page), page);
1056 if (page_has_private(page)) {
1057 try_to_free_buffers(page);
1058 goto out_unlock_both;
1059 }
1060 } else if (page_mapped(page)) {
1061
1062 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
1063 page);
1064 try_to_migrate(page, 0);
1065 page_was_mapped = 1;
1066 }
1067
1068 if (!page_mapped(page))
1069 rc = move_to_new_page(newpage, page, mode);
1070
1071 if (page_was_mapped)
1072 remove_migration_ptes(page,
1073 rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
1074
1075out_unlock_both:
1076 unlock_page(newpage);
1077out_unlock:
1078
1079 if (anon_vma)
1080 put_anon_vma(anon_vma);
1081 unlock_page(page);
1082out:
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 if (rc == MIGRATEPAGE_SUCCESS) {
1093 if (unlikely(!is_lru))
1094 put_page(newpage);
1095 else
1096 putback_lru_page(newpage);
1097 }
1098
1099 return rc;
1100}
1101
1102
1103
1104
1105
1106static int unmap_and_move(new_page_t get_new_page,
1107 free_page_t put_new_page,
1108 unsigned long private, struct page *page,
1109 int force, enum migrate_mode mode,
1110 enum migrate_reason reason,
1111 struct list_head *ret)
1112{
1113 int rc = MIGRATEPAGE_SUCCESS;
1114 struct page *newpage = NULL;
1115
1116 if (!thp_migration_supported() && PageTransHuge(page))
1117 return -ENOSYS;
1118
1119 if (page_count(page) == 1) {
1120
1121 ClearPageActive(page);
1122 ClearPageUnevictable(page);
1123 if (unlikely(__PageMovable(page))) {
1124 lock_page(page);
1125 if (!PageMovable(page))
1126 __ClearPageIsolated(page);
1127 unlock_page(page);
1128 }
1129 goto out;
1130 }
1131
1132 newpage = get_new_page(page, private);
1133 if (!newpage)
1134 return -ENOMEM;
1135
1136 rc = __unmap_and_move(page, newpage, force, mode);
1137 if (rc == MIGRATEPAGE_SUCCESS)
1138 set_page_owner_migrate_reason(newpage, reason);
1139
1140out:
1141 if (rc != -EAGAIN) {
1142
1143
1144
1145
1146
1147 list_del(&page->lru);
1148 }
1149
1150
1151
1152
1153
1154
1155 if (rc == MIGRATEPAGE_SUCCESS) {
1156
1157
1158
1159
1160
1161 if (likely(!__PageMovable(page)))
1162 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
1163 page_is_file_lru(page), -thp_nr_pages(page));
1164
1165 if (reason != MR_MEMORY_FAILURE)
1166
1167
1168
1169 put_page(page);
1170 } else {
1171 if (rc != -EAGAIN)
1172 list_add_tail(&page->lru, ret);
1173
1174 if (put_new_page)
1175 put_new_page(newpage, private);
1176 else
1177 put_page(newpage);
1178 }
1179
1180 return rc;
1181}
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201static int unmap_and_move_huge_page(new_page_t get_new_page,
1202 free_page_t put_new_page, unsigned long private,
1203 struct page *hpage, int force,
1204 enum migrate_mode mode, int reason,
1205 struct list_head *ret)
1206{
1207 int rc = -EAGAIN;
1208 int page_was_mapped = 0;
1209 struct page *new_hpage;
1210 struct anon_vma *anon_vma = NULL;
1211 struct address_space *mapping = NULL;
1212
1213
1214
1215
1216
1217
1218
1219
1220 if (!hugepage_migration_supported(page_hstate(hpage))) {
1221 list_move_tail(&hpage->lru, ret);
1222 return -ENOSYS;
1223 }
1224
1225 if (page_count(hpage) == 1) {
1226
1227 putback_active_hugepage(hpage);
1228 return MIGRATEPAGE_SUCCESS;
1229 }
1230
1231 new_hpage = get_new_page(hpage, private);
1232 if (!new_hpage)
1233 return -ENOMEM;
1234
1235 if (!trylock_page(hpage)) {
1236 if (!force)
1237 goto out;
1238 switch (mode) {
1239 case MIGRATE_SYNC:
1240 case MIGRATE_SYNC_NO_COPY:
1241 break;
1242 default:
1243 goto out;
1244 }
1245 lock_page(hpage);
1246 }
1247
1248
1249
1250
1251
1252
1253 if (hugetlb_page_subpool(hpage) && !page_mapping(hpage)) {
1254 rc = -EBUSY;
1255 goto out_unlock;
1256 }
1257
1258 if (PageAnon(hpage))
1259 anon_vma = page_get_anon_vma(hpage);
1260
1261 if (unlikely(!trylock_page(new_hpage)))
1262 goto put_anon;
1263
1264 if (page_mapped(hpage)) {
1265 bool mapping_locked = false;
1266 enum ttu_flags ttu = 0;
1267
1268 if (!PageAnon(hpage)) {
1269
1270
1271
1272
1273
1274
1275 mapping = hugetlb_page_mapping_lock_write(hpage);
1276 if (unlikely(!mapping))
1277 goto unlock_put_anon;
1278
1279 mapping_locked = true;
1280 ttu |= TTU_RMAP_LOCKED;
1281 }
1282
1283 try_to_migrate(hpage, ttu);
1284 page_was_mapped = 1;
1285
1286 if (mapping_locked)
1287 i_mmap_unlock_write(mapping);
1288 }
1289
1290 if (!page_mapped(hpage))
1291 rc = move_to_new_page(new_hpage, hpage, mode);
1292
1293 if (page_was_mapped)
1294 remove_migration_ptes(hpage,
1295 rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
1296
1297unlock_put_anon:
1298 unlock_page(new_hpage);
1299
1300put_anon:
1301 if (anon_vma)
1302 put_anon_vma(anon_vma);
1303
1304 if (rc == MIGRATEPAGE_SUCCESS) {
1305 move_hugetlb_state(hpage, new_hpage, reason);
1306 put_new_page = NULL;
1307 }
1308
1309out_unlock:
1310 unlock_page(hpage);
1311out:
1312 if (rc == MIGRATEPAGE_SUCCESS)
1313 putback_active_hugepage(hpage);
1314 else if (rc != -EAGAIN)
1315 list_move_tail(&hpage->lru, ret);
1316
1317
1318
1319
1320
1321
1322 if (put_new_page)
1323 put_new_page(new_hpage, private);
1324 else
1325 putback_active_hugepage(new_hpage);
1326
1327 return rc;
1328}
1329
1330static inline int try_split_thp(struct page *page, struct page **page2,
1331 struct list_head *from)
1332{
1333 int rc = 0;
1334
1335 lock_page(page);
1336 rc = split_huge_page_to_list(page, from);
1337 unlock_page(page);
1338 if (!rc)
1339 list_safe_reset_next(page, *page2, lru);
1340
1341 return rc;
1342}
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365int migrate_pages(struct list_head *from, new_page_t get_new_page,
1366 free_page_t put_new_page, unsigned long private,
1367 enum migrate_mode mode, int reason)
1368{
1369 int retry = 1;
1370 int thp_retry = 1;
1371 int nr_failed = 0;
1372 int nr_succeeded = 0;
1373 int nr_thp_succeeded = 0;
1374 int nr_thp_failed = 0;
1375 int nr_thp_split = 0;
1376 int pass = 0;
1377 bool is_thp = false;
1378 struct page *page;
1379 struct page *page2;
1380 int swapwrite = current->flags & PF_SWAPWRITE;
1381 int rc, nr_subpages;
1382 LIST_HEAD(ret_pages);
1383 bool nosplit = (reason == MR_NUMA_MISPLACED);
1384
1385 trace_mm_migrate_pages_start(mode, reason);
1386
1387 if (!swapwrite)
1388 current->flags |= PF_SWAPWRITE;
1389
1390 for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
1391 retry = 0;
1392 thp_retry = 0;
1393
1394 list_for_each_entry_safe(page, page2, from, lru) {
1395retry:
1396
1397
1398
1399
1400
1401 is_thp = PageTransHuge(page) && !PageHuge(page);
1402 nr_subpages = thp_nr_pages(page);
1403 cond_resched();
1404
1405 if (PageHuge(page))
1406 rc = unmap_and_move_huge_page(get_new_page,
1407 put_new_page, private, page,
1408 pass > 2, mode, reason,
1409 &ret_pages);
1410 else
1411 rc = unmap_and_move(get_new_page, put_new_page,
1412 private, page, pass > 2, mode,
1413 reason, &ret_pages);
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423 switch(rc) {
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435 case -ENOSYS:
1436
1437 if (is_thp) {
1438 if (!try_split_thp(page, &page2, from)) {
1439 nr_thp_split++;
1440 goto retry;
1441 }
1442
1443 nr_thp_failed++;
1444 nr_failed += nr_subpages;
1445 break;
1446 }
1447
1448
1449 nr_failed++;
1450 break;
1451 case -ENOMEM:
1452
1453
1454
1455
1456
1457 if (is_thp && !nosplit) {
1458 if (!try_split_thp(page, &page2, from)) {
1459 nr_thp_split++;
1460 goto retry;
1461 }
1462
1463 nr_thp_failed++;
1464 nr_failed += nr_subpages;
1465 goto out;
1466 }
1467 nr_failed++;
1468 goto out;
1469 case -EAGAIN:
1470 if (is_thp) {
1471 thp_retry++;
1472 break;
1473 }
1474 retry++;
1475 break;
1476 case MIGRATEPAGE_SUCCESS:
1477 if (is_thp) {
1478 nr_thp_succeeded++;
1479 nr_succeeded += nr_subpages;
1480 break;
1481 }
1482 nr_succeeded++;
1483 break;
1484 default:
1485
1486
1487
1488
1489
1490
1491 if (is_thp) {
1492 nr_thp_failed++;
1493 nr_failed += nr_subpages;
1494 break;
1495 }
1496 nr_failed++;
1497 break;
1498 }
1499 }
1500 }
1501 nr_failed += retry + thp_retry;
1502 nr_thp_failed += thp_retry;
1503 rc = nr_failed;
1504out:
1505
1506
1507
1508
1509 list_splice(&ret_pages, from);
1510
1511 count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
1512 count_vm_events(PGMIGRATE_FAIL, nr_failed);
1513 count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
1514 count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
1515 count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
1516 trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
1517 nr_thp_failed, nr_thp_split, mode, reason);
1518
1519 if (!swapwrite)
1520 current->flags &= ~PF_SWAPWRITE;
1521
1522 return rc;
1523}
1524
1525struct page *alloc_migration_target(struct page *page, unsigned long private)
1526{
1527 struct migration_target_control *mtc;
1528 gfp_t gfp_mask;
1529 unsigned int order = 0;
1530 struct page *new_page = NULL;
1531 int nid;
1532 int zidx;
1533
1534 mtc = (struct migration_target_control *)private;
1535 gfp_mask = mtc->gfp_mask;
1536 nid = mtc->nid;
1537 if (nid == NUMA_NO_NODE)
1538 nid = page_to_nid(page);
1539
1540 if (PageHuge(page)) {
1541 struct hstate *h = page_hstate(compound_head(page));
1542
1543 gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
1544 return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
1545 }
1546
1547 if (PageTransHuge(page)) {
1548
1549
1550
1551
1552 gfp_mask &= ~__GFP_RECLAIM;
1553 gfp_mask |= GFP_TRANSHUGE;
1554 order = HPAGE_PMD_ORDER;
1555 }
1556 zidx = zone_idx(page_zone(page));
1557 if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
1558 gfp_mask |= __GFP_HIGHMEM;
1559
1560 new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask);
1561
1562 if (new_page && PageTransHuge(new_page))
1563 prep_transhuge_page(new_page);
1564
1565 return new_page;
1566}
1567
1568#ifdef CONFIG_NUMA
1569
1570static int store_status(int __user *status, int start, int value, int nr)
1571{
1572 while (nr-- > 0) {
1573 if (put_user(value, status + start))
1574 return -EFAULT;
1575 start++;
1576 }
1577
1578 return 0;
1579}
1580
1581static int do_move_pages_to_node(struct mm_struct *mm,
1582 struct list_head *pagelist, int node)
1583{
1584 int err;
1585 struct migration_target_control mtc = {
1586 .nid = node,
1587 .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
1588 };
1589
1590 err = migrate_pages(pagelist, alloc_migration_target, NULL,
1591 (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
1592 if (err)
1593 putback_movable_pages(pagelist);
1594 return err;
1595}
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
1607 int node, struct list_head *pagelist, bool migrate_all)
1608{
1609 struct vm_area_struct *vma;
1610 struct page *page;
1611 unsigned int follflags;
1612 int err;
1613
1614 mmap_read_lock(mm);
1615 err = -EFAULT;
1616 vma = find_vma(mm, addr);
1617 if (!vma || addr < vma->vm_start || !vma_migratable(vma))
1618 goto out;
1619
1620
1621 follflags = FOLL_GET | FOLL_DUMP;
1622 page = follow_page(vma, addr, follflags);
1623
1624 err = PTR_ERR(page);
1625 if (IS_ERR(page))
1626 goto out;
1627
1628 err = -ENOENT;
1629 if (!page)
1630 goto out;
1631
1632 err = 0;
1633 if (page_to_nid(page) == node)
1634 goto out_putpage;
1635
1636 err = -EACCES;
1637 if (page_mapcount(page) > 1 && !migrate_all)
1638 goto out_putpage;
1639
1640 if (PageHuge(page)) {
1641 if (PageHead(page)) {
1642 isolate_huge_page(page, pagelist);
1643 err = 1;
1644 }
1645 } else {
1646 struct page *head;
1647
1648 head = compound_head(page);
1649 err = isolate_lru_page(head);
1650 if (err)
1651 goto out_putpage;
1652
1653 err = 1;
1654 list_add_tail(&head->lru, pagelist);
1655 mod_node_page_state(page_pgdat(head),
1656 NR_ISOLATED_ANON + page_is_file_lru(head),
1657 thp_nr_pages(head));
1658 }
1659out_putpage:
1660
1661
1662
1663
1664
1665 put_page(page);
1666out:
1667 mmap_read_unlock(mm);
1668 return err;
1669}
1670
1671static int move_pages_and_store_status(struct mm_struct *mm, int node,
1672 struct list_head *pagelist, int __user *status,
1673 int start, int i, unsigned long nr_pages)
1674{
1675 int err;
1676
1677 if (list_empty(pagelist))
1678 return 0;
1679
1680 err = do_move_pages_to_node(mm, pagelist, node);
1681 if (err) {
1682
1683
1684
1685
1686
1687
1688
1689
1690 if (err > 0)
1691 err += nr_pages - i - 1;
1692 return err;
1693 }
1694 return store_status(status, start, node, i - start);
1695}
1696
1697
1698
1699
1700
1701static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1702 unsigned long nr_pages,
1703 const void __user * __user *pages,
1704 const int __user *nodes,
1705 int __user *status, int flags)
1706{
1707 int current_node = NUMA_NO_NODE;
1708 LIST_HEAD(pagelist);
1709 int start, i;
1710 int err = 0, err1;
1711
1712 lru_cache_disable();
1713
1714 for (i = start = 0; i < nr_pages; i++) {
1715 const void __user *p;
1716 unsigned long addr;
1717 int node;
1718
1719 err = -EFAULT;
1720 if (get_user(p, pages + i))
1721 goto out_flush;
1722 if (get_user(node, nodes + i))
1723 goto out_flush;
1724 addr = (unsigned long)untagged_addr(p);
1725
1726 err = -ENODEV;
1727 if (node < 0 || node >= MAX_NUMNODES)
1728 goto out_flush;
1729 if (!node_state(node, N_MEMORY))
1730 goto out_flush;
1731
1732 err = -EACCES;
1733 if (!node_isset(node, task_nodes))
1734 goto out_flush;
1735
1736 if (current_node == NUMA_NO_NODE) {
1737 current_node = node;
1738 start = i;
1739 } else if (node != current_node) {
1740 err = move_pages_and_store_status(mm, current_node,
1741 &pagelist, status, start, i, nr_pages);
1742 if (err)
1743 goto out;
1744 start = i;
1745 current_node = node;
1746 }
1747
1748
1749
1750
1751
1752 err = add_page_for_migration(mm, addr, current_node,
1753 &pagelist, flags & MPOL_MF_MOVE_ALL);
1754
1755 if (err > 0) {
1756
1757 continue;
1758 }
1759
1760
1761
1762
1763
1764 err = store_status(status, i, err ? : current_node, 1);
1765 if (err)
1766 goto out_flush;
1767
1768 err = move_pages_and_store_status(mm, current_node, &pagelist,
1769 status, start, i, nr_pages);
1770 if (err)
1771 goto out;
1772 current_node = NUMA_NO_NODE;
1773 }
1774out_flush:
1775
1776 err1 = move_pages_and_store_status(mm, current_node, &pagelist,
1777 status, start, i, nr_pages);
1778 if (err >= 0)
1779 err = err1;
1780out:
1781 lru_cache_enable();
1782 return err;
1783}
1784
1785
1786
1787
1788static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1789 const void __user **pages, int *status)
1790{
1791 unsigned long i;
1792
1793 mmap_read_lock(mm);
1794
1795 for (i = 0; i < nr_pages; i++) {
1796 unsigned long addr = (unsigned long)(*pages);
1797 struct vm_area_struct *vma;
1798 struct page *page;
1799 int err = -EFAULT;
1800
1801 vma = vma_lookup(mm, addr);
1802 if (!vma)
1803 goto set_status;
1804
1805
1806 page = follow_page(vma, addr, FOLL_DUMP);
1807
1808 err = PTR_ERR(page);
1809 if (IS_ERR(page))
1810 goto set_status;
1811
1812 err = page ? page_to_nid(page) : -ENOENT;
1813set_status:
1814 *status = err;
1815
1816 pages++;
1817 status++;
1818 }
1819
1820 mmap_read_unlock(mm);
1821}
1822
1823
1824
1825
1826
1827static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1828 const void __user * __user *pages,
1829 int __user *status)
1830{
1831#define DO_PAGES_STAT_CHUNK_NR 16
1832 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1833 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1834
1835 while (nr_pages) {
1836 unsigned long chunk_nr;
1837
1838 chunk_nr = nr_pages;
1839 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1840 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1841
1842 if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
1843 break;
1844
1845 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1846
1847 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1848 break;
1849
1850 pages += chunk_nr;
1851 status += chunk_nr;
1852 nr_pages -= chunk_nr;
1853 }
1854 return nr_pages ? -EFAULT : 0;
1855}
1856
1857static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
1858{
1859 struct task_struct *task;
1860 struct mm_struct *mm;
1861
1862
1863
1864
1865
1866 if (!pid) {
1867 mmget(current->mm);
1868 *mem_nodes = cpuset_mems_allowed(current);
1869 return current->mm;
1870 }
1871
1872
1873 rcu_read_lock();
1874 task = find_task_by_vpid(pid);
1875 if (!task) {
1876 rcu_read_unlock();
1877 return ERR_PTR(-ESRCH);
1878 }
1879 get_task_struct(task);
1880
1881
1882
1883
1884
1885 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
1886 rcu_read_unlock();
1887 mm = ERR_PTR(-EPERM);
1888 goto out;
1889 }
1890 rcu_read_unlock();
1891
1892 mm = ERR_PTR(security_task_movememory(task));
1893 if (IS_ERR(mm))
1894 goto out;
1895 *mem_nodes = cpuset_mems_allowed(task);
1896 mm = get_task_mm(task);
1897out:
1898 put_task_struct(task);
1899 if (!mm)
1900 mm = ERR_PTR(-EINVAL);
1901 return mm;
1902}
1903
1904
1905
1906
1907
1908static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
1909 const void __user * __user *pages,
1910 const int __user *nodes,
1911 int __user *status, int flags)
1912{
1913 struct mm_struct *mm;
1914 int err;
1915 nodemask_t task_nodes;
1916
1917
1918 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
1919 return -EINVAL;
1920
1921 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
1922 return -EPERM;
1923
1924 mm = find_mm_struct(pid, &task_nodes);
1925 if (IS_ERR(mm))
1926 return PTR_ERR(mm);
1927
1928 if (nodes)
1929 err = do_pages_move(mm, task_nodes, nr_pages, pages,
1930 nodes, status, flags);
1931 else
1932 err = do_pages_stat(mm, nr_pages, pages, status);
1933
1934 mmput(mm);
1935 return err;
1936}
1937
1938SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1939 const void __user * __user *, pages,
1940 const int __user *, nodes,
1941 int __user *, status, int, flags)
1942{
1943 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
1944}
1945
1946#ifdef CONFIG_COMPAT
1947COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
1948 compat_uptr_t __user *, pages32,
1949 const int __user *, nodes,
1950 int __user *, status,
1951 int, flags)
1952{
1953 const void __user * __user *pages;
1954 int i;
1955
1956 pages = compat_alloc_user_space(nr_pages * sizeof(void *));
1957 for (i = 0; i < nr_pages; i++) {
1958 compat_uptr_t p;
1959
1960 if (get_user(p, pages32 + i) ||
1961 put_user(compat_ptr(p), pages + i))
1962 return -EFAULT;
1963 }
1964 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
1965}
1966#endif
1967
1968#ifdef CONFIG_NUMA_BALANCING
1969
1970
1971
1972
1973static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
1974 unsigned long nr_migrate_pages)
1975{
1976 int z;
1977
1978 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
1979 struct zone *zone = pgdat->node_zones + z;
1980
1981 if (!populated_zone(zone))
1982 continue;
1983
1984
1985 if (!zone_watermark_ok(zone, 0,
1986 high_wmark_pages(zone) +
1987 nr_migrate_pages,
1988 ZONE_MOVABLE, 0))
1989 continue;
1990 return true;
1991 }
1992 return false;
1993}
1994
1995static struct page *alloc_misplaced_dst_page(struct page *page,
1996 unsigned long data)
1997{
1998 int nid = (int) data;
1999 struct page *newpage;
2000
2001 newpage = __alloc_pages_node(nid,
2002 (GFP_HIGHUSER_MOVABLE |
2003 __GFP_THISNODE | __GFP_NOMEMALLOC |
2004 __GFP_NORETRY | __GFP_NOWARN) &
2005 ~__GFP_RECLAIM, 0);
2006
2007 return newpage;
2008}
2009
2010static struct page *alloc_misplaced_dst_page_thp(struct page *page,
2011 unsigned long data)
2012{
2013 int nid = (int) data;
2014 struct page *newpage;
2015
2016 newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
2017 HPAGE_PMD_ORDER);
2018 if (!newpage)
2019 goto out;
2020
2021 prep_transhuge_page(newpage);
2022
2023out:
2024 return newpage;
2025}
2026
2027static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
2028{
2029 int page_lru;
2030
2031 VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
2032
2033
2034 if (PageTransHuge(page) && total_mapcount(page) > 1)
2035 return 0;
2036
2037
2038 if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
2039 return 0;
2040
2041 if (isolate_lru_page(page))
2042 return 0;
2043
2044 page_lru = page_is_file_lru(page);
2045 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
2046 thp_nr_pages(page));
2047
2048
2049
2050
2051
2052
2053 put_page(page);
2054 return 1;
2055}
2056
2057
2058
2059
2060
2061
2062int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
2063 int node)
2064{
2065 pg_data_t *pgdat = NODE_DATA(node);
2066 int isolated;
2067 int nr_remaining;
2068 LIST_HEAD(migratepages);
2069 new_page_t *new;
2070 bool compound;
2071 int nr_pages = thp_nr_pages(page);
2072
2073
2074
2075
2076
2077
2078 compound = PageTransHuge(page);
2079
2080 if (compound)
2081 new = alloc_misplaced_dst_page_thp;
2082 else
2083 new = alloc_misplaced_dst_page;
2084
2085
2086
2087
2088
2089 if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
2090 (vma->vm_flags & VM_EXEC))
2091 goto out;
2092
2093
2094
2095
2096
2097 if (page_is_file_lru(page) && PageDirty(page))
2098 goto out;
2099
2100 isolated = numamigrate_isolate_page(pgdat, page);
2101 if (!isolated)
2102 goto out;
2103
2104 list_add(&page->lru, &migratepages);
2105 nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
2106 MIGRATE_ASYNC, MR_NUMA_MISPLACED);
2107 if (nr_remaining) {
2108 if (!list_empty(&migratepages)) {
2109 list_del(&page->lru);
2110 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
2111 page_is_file_lru(page), -nr_pages);
2112 putback_lru_page(page);
2113 }
2114 isolated = 0;
2115 } else
2116 count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
2117 BUG_ON(!list_empty(&migratepages));
2118 return isolated;
2119
2120out:
2121 put_page(page);
2122 return 0;
2123}
2124#endif
2125#endif
2126
2127#ifdef CONFIG_DEVICE_PRIVATE
2128static int migrate_vma_collect_skip(unsigned long start,
2129 unsigned long end,
2130 struct mm_walk *walk)
2131{
2132 struct migrate_vma *migrate = walk->private;
2133 unsigned long addr;
2134
2135 for (addr = start; addr < end; addr += PAGE_SIZE) {
2136 migrate->dst[migrate->npages] = 0;
2137 migrate->src[migrate->npages++] = 0;
2138 }
2139
2140 return 0;
2141}
2142
2143static int migrate_vma_collect_hole(unsigned long start,
2144 unsigned long end,
2145 __always_unused int depth,
2146 struct mm_walk *walk)
2147{
2148 struct migrate_vma *migrate = walk->private;
2149 unsigned long addr;
2150
2151
2152 if (!vma_is_anonymous(walk->vma))
2153 return migrate_vma_collect_skip(start, end, walk);
2154
2155 for (addr = start; addr < end; addr += PAGE_SIZE) {
2156 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
2157 migrate->dst[migrate->npages] = 0;
2158 migrate->npages++;
2159 migrate->cpages++;
2160 }
2161
2162 return 0;
2163}
2164
2165static int migrate_vma_collect_pmd(pmd_t *pmdp,
2166 unsigned long start,
2167 unsigned long end,
2168 struct mm_walk *walk)
2169{
2170 struct migrate_vma *migrate = walk->private;
2171 struct vm_area_struct *vma = walk->vma;
2172 struct mm_struct *mm = vma->vm_mm;
2173 unsigned long addr = start, unmapped = 0;
2174 spinlock_t *ptl;
2175 pte_t *ptep;
2176
2177again:
2178 if (pmd_none(*pmdp))
2179 return migrate_vma_collect_hole(start, end, -1, walk);
2180
2181 if (pmd_trans_huge(*pmdp)) {
2182 struct page *page;
2183
2184 ptl = pmd_lock(mm, pmdp);
2185 if (unlikely(!pmd_trans_huge(*pmdp))) {
2186 spin_unlock(ptl);
2187 goto again;
2188 }
2189
2190 page = pmd_page(*pmdp);
2191 if (is_huge_zero_page(page)) {
2192 spin_unlock(ptl);
2193 split_huge_pmd(vma, pmdp, addr);
2194 if (pmd_trans_unstable(pmdp))
2195 return migrate_vma_collect_skip(start, end,
2196 walk);
2197 } else {
2198 int ret;
2199
2200 get_page(page);
2201 spin_unlock(ptl);
2202 if (unlikely(!trylock_page(page)))
2203 return migrate_vma_collect_skip(start, end,
2204 walk);
2205 ret = split_huge_page(page);
2206 unlock_page(page);
2207 put_page(page);
2208 if (ret)
2209 return migrate_vma_collect_skip(start, end,
2210 walk);
2211 if (pmd_none(*pmdp))
2212 return migrate_vma_collect_hole(start, end, -1,
2213 walk);
2214 }
2215 }
2216
2217 if (unlikely(pmd_bad(*pmdp)))
2218 return migrate_vma_collect_skip(start, end, walk);
2219
2220 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2221 arch_enter_lazy_mmu_mode();
2222
2223 for (; addr < end; addr += PAGE_SIZE, ptep++) {
2224 unsigned long mpfn = 0, pfn;
2225 struct page *page;
2226 swp_entry_t entry;
2227 pte_t pte;
2228
2229 pte = *ptep;
2230
2231 if (pte_none(pte)) {
2232 if (vma_is_anonymous(vma)) {
2233 mpfn = MIGRATE_PFN_MIGRATE;
2234 migrate->cpages++;
2235 }
2236 goto next;
2237 }
2238
2239 if (!pte_present(pte)) {
2240
2241
2242
2243
2244
2245 entry = pte_to_swp_entry(pte);
2246 if (!is_device_private_entry(entry))
2247 goto next;
2248
2249 page = pfn_swap_entry_to_page(entry);
2250 if (!(migrate->flags &
2251 MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
2252 page->pgmap->owner != migrate->pgmap_owner)
2253 goto next;
2254
2255 mpfn = migrate_pfn(page_to_pfn(page)) |
2256 MIGRATE_PFN_MIGRATE;
2257 if (is_writable_device_private_entry(entry))
2258 mpfn |= MIGRATE_PFN_WRITE;
2259 } else {
2260 if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
2261 goto next;
2262 pfn = pte_pfn(pte);
2263 if (is_zero_pfn(pfn)) {
2264 mpfn = MIGRATE_PFN_MIGRATE;
2265 migrate->cpages++;
2266 goto next;
2267 }
2268 page = vm_normal_page(migrate->vma, addr, pte);
2269 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2270 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2271 }
2272
2273
2274 if (!page || !page->mapping || PageTransCompound(page)) {
2275 mpfn = 0;
2276 goto next;
2277 }
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288 get_page(page);
2289 migrate->cpages++;
2290
2291
2292
2293
2294
2295
2296 if (trylock_page(page)) {
2297 pte_t swp_pte;
2298
2299 mpfn |= MIGRATE_PFN_LOCKED;
2300 ptep_get_and_clear(mm, addr, ptep);
2301
2302
2303 if (mpfn & MIGRATE_PFN_WRITE)
2304 entry = make_writable_migration_entry(
2305 page_to_pfn(page));
2306 else
2307 entry = make_readable_migration_entry(
2308 page_to_pfn(page));
2309 swp_pte = swp_entry_to_pte(entry);
2310 if (pte_present(pte)) {
2311 if (pte_soft_dirty(pte))
2312 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2313 if (pte_uffd_wp(pte))
2314 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2315 } else {
2316 if (pte_swp_soft_dirty(pte))
2317 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2318 if (pte_swp_uffd_wp(pte))
2319 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2320 }
2321 set_pte_at(mm, addr, ptep, swp_pte);
2322
2323
2324
2325
2326
2327
2328 page_remove_rmap(page, false);
2329 put_page(page);
2330
2331 if (pte_present(pte))
2332 unmapped++;
2333 }
2334
2335next:
2336 migrate->dst[migrate->npages] = 0;
2337 migrate->src[migrate->npages++] = mpfn;
2338 }
2339 arch_leave_lazy_mmu_mode();
2340 pte_unmap_unlock(ptep - 1, ptl);
2341
2342
2343 if (unmapped)
2344 flush_tlb_range(walk->vma, start, end);
2345
2346 return 0;
2347}
2348
2349static const struct mm_walk_ops migrate_vma_walk_ops = {
2350 .pmd_entry = migrate_vma_collect_pmd,
2351 .pte_hole = migrate_vma_collect_hole,
2352};
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362static void migrate_vma_collect(struct migrate_vma *migrate)
2363{
2364 struct mmu_notifier_range range;
2365
2366
2367
2368
2369
2370
2371 mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
2372 migrate->vma, migrate->vma->vm_mm, migrate->start, migrate->end,
2373 migrate->pgmap_owner);
2374 mmu_notifier_invalidate_range_start(&range);
2375
2376 walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
2377 &migrate_vma_walk_ops, migrate);
2378
2379 mmu_notifier_invalidate_range_end(&range);
2380 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
2381}
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391static bool migrate_vma_check_page(struct page *page)
2392{
2393
2394
2395
2396
2397
2398 int extra = 1;
2399
2400
2401
2402
2403
2404
2405 if (PageCompound(page))
2406 return false;
2407
2408
2409 if (is_zone_device_page(page)) {
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423 return is_device_private_page(page);
2424 }
2425
2426
2427 if (page_mapping(page))
2428 extra += 1 + page_has_private(page);
2429
2430 if ((page_count(page) - extra) > page_mapcount(page))
2431 return false;
2432
2433 return true;
2434}
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445static void migrate_vma_prepare(struct migrate_vma *migrate)
2446{
2447 const unsigned long npages = migrate->npages;
2448 const unsigned long start = migrate->start;
2449 unsigned long addr, i, restore = 0;
2450 bool allow_drain = true;
2451
2452 lru_add_drain();
2453
2454 for (i = 0; (i < npages) && migrate->cpages; i++) {
2455 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2456 bool remap = true;
2457
2458 if (!page)
2459 continue;
2460
2461 if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
2462
2463
2464
2465
2466
2467
2468
2469
2470 if (!trylock_page(page)) {
2471 migrate->src[i] = 0;
2472 migrate->cpages--;
2473 put_page(page);
2474 continue;
2475 }
2476 remap = false;
2477 migrate->src[i] |= MIGRATE_PFN_LOCKED;
2478 }
2479
2480
2481 if (!is_zone_device_page(page)) {
2482 if (!PageLRU(page) && allow_drain) {
2483
2484 lru_add_drain_all();
2485 allow_drain = false;
2486 }
2487
2488 if (isolate_lru_page(page)) {
2489 if (remap) {
2490 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2491 migrate->cpages--;
2492 restore++;
2493 } else {
2494 migrate->src[i] = 0;
2495 unlock_page(page);
2496 migrate->cpages--;
2497 put_page(page);
2498 }
2499 continue;
2500 }
2501
2502
2503 put_page(page);
2504 }
2505
2506 if (!migrate_vma_check_page(page)) {
2507 if (remap) {
2508 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2509 migrate->cpages--;
2510 restore++;
2511
2512 if (!is_zone_device_page(page)) {
2513 get_page(page);
2514 putback_lru_page(page);
2515 }
2516 } else {
2517 migrate->src[i] = 0;
2518 unlock_page(page);
2519 migrate->cpages--;
2520
2521 if (!is_zone_device_page(page))
2522 putback_lru_page(page);
2523 else
2524 put_page(page);
2525 }
2526 }
2527 }
2528
2529 for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
2530 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2531
2532 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2533 continue;
2534
2535 remove_migration_pte(page, migrate->vma, addr, page);
2536
2537 migrate->src[i] = 0;
2538 unlock_page(page);
2539 put_page(page);
2540 restore--;
2541 }
2542}
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555static void migrate_vma_unmap(struct migrate_vma *migrate)
2556{
2557 const unsigned long npages = migrate->npages;
2558 const unsigned long start = migrate->start;
2559 unsigned long addr, i, restore = 0;
2560
2561 for (i = 0; i < npages; i++) {
2562 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2563
2564 if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2565 continue;
2566
2567 if (page_mapped(page)) {
2568 try_to_migrate(page, 0);
2569 if (page_mapped(page))
2570 goto restore;
2571 }
2572
2573 if (migrate_vma_check_page(page))
2574 continue;
2575
2576restore:
2577 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2578 migrate->cpages--;
2579 restore++;
2580 }
2581
2582 for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
2583 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2584
2585 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2586 continue;
2587
2588 remove_migration_ptes(page, page, false);
2589
2590 migrate->src[i] = 0;
2591 unlock_page(page);
2592 restore--;
2593
2594 if (is_zone_device_page(page))
2595 put_page(page);
2596 else
2597 putback_lru_page(page);
2598 }
2599}
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665int migrate_vma_setup(struct migrate_vma *args)
2666{
2667 long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
2668
2669 args->start &= PAGE_MASK;
2670 args->end &= PAGE_MASK;
2671 if (!args->vma || is_vm_hugetlb_page(args->vma) ||
2672 (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
2673 return -EINVAL;
2674 if (nr_pages <= 0)
2675 return -EINVAL;
2676 if (args->start < args->vma->vm_start ||
2677 args->start >= args->vma->vm_end)
2678 return -EINVAL;
2679 if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
2680 return -EINVAL;
2681 if (!args->src || !args->dst)
2682 return -EINVAL;
2683
2684 memset(args->src, 0, sizeof(*args->src) * nr_pages);
2685 args->cpages = 0;
2686 args->npages = 0;
2687
2688 migrate_vma_collect(args);
2689
2690 if (args->cpages)
2691 migrate_vma_prepare(args);
2692 if (args->cpages)
2693 migrate_vma_unmap(args);
2694
2695
2696
2697
2698
2699
2700 return 0;
2701
2702}
2703EXPORT_SYMBOL(migrate_vma_setup);
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713static void migrate_vma_insert_page(struct migrate_vma *migrate,
2714 unsigned long addr,
2715 struct page *page,
2716 unsigned long *src)
2717{
2718 struct vm_area_struct *vma = migrate->vma;
2719 struct mm_struct *mm = vma->vm_mm;
2720 bool flush = false;
2721 spinlock_t *ptl;
2722 pte_t entry;
2723 pgd_t *pgdp;
2724 p4d_t *p4dp;
2725 pud_t *pudp;
2726 pmd_t *pmdp;
2727 pte_t *ptep;
2728
2729
2730 if (!vma_is_anonymous(vma))
2731 goto abort;
2732
2733 pgdp = pgd_offset(mm, addr);
2734 p4dp = p4d_alloc(mm, pgdp, addr);
2735 if (!p4dp)
2736 goto abort;
2737 pudp = pud_alloc(mm, p4dp, addr);
2738 if (!pudp)
2739 goto abort;
2740 pmdp = pmd_alloc(mm, pudp, addr);
2741 if (!pmdp)
2742 goto abort;
2743
2744 if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
2745 goto abort;
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757 if (pte_alloc(mm, pmdp))
2758 goto abort;
2759
2760
2761 if (unlikely(pmd_trans_unstable(pmdp)))
2762 goto abort;
2763
2764 if (unlikely(anon_vma_prepare(vma)))
2765 goto abort;
2766 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
2767 goto abort;
2768
2769
2770
2771
2772
2773
2774 __SetPageUptodate(page);
2775
2776 if (is_zone_device_page(page)) {
2777 if (is_device_private_page(page)) {
2778 swp_entry_t swp_entry;
2779
2780 if (vma->vm_flags & VM_WRITE)
2781 swp_entry = make_writable_device_private_entry(
2782 page_to_pfn(page));
2783 else
2784 swp_entry = make_readable_device_private_entry(
2785 page_to_pfn(page));
2786 entry = swp_entry_to_pte(swp_entry);
2787 } else {
2788
2789
2790
2791
2792 pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
2793 goto abort;
2794 }
2795 } else {
2796 entry = mk_pte(page, vma->vm_page_prot);
2797 if (vma->vm_flags & VM_WRITE)
2798 entry = pte_mkwrite(pte_mkdirty(entry));
2799 }
2800
2801 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2802
2803 if (check_stable_address_space(mm))
2804 goto unlock_abort;
2805
2806 if (pte_present(*ptep)) {
2807 unsigned long pfn = pte_pfn(*ptep);
2808
2809 if (!is_zero_pfn(pfn))
2810 goto unlock_abort;
2811 flush = true;
2812 } else if (!pte_none(*ptep))
2813 goto unlock_abort;
2814
2815
2816
2817
2818
2819 if (userfaultfd_missing(vma))
2820 goto unlock_abort;
2821
2822 inc_mm_counter(mm, MM_ANONPAGES);
2823 page_add_new_anon_rmap(page, vma, addr, false);
2824 if (!is_zone_device_page(page))
2825 lru_cache_add_inactive_or_unevictable(page, vma);
2826 get_page(page);
2827
2828 if (flush) {
2829 flush_cache_page(vma, addr, pte_pfn(*ptep));
2830 ptep_clear_flush_notify(vma, addr, ptep);
2831 set_pte_at_notify(mm, addr, ptep, entry);
2832 update_mmu_cache(vma, addr, ptep);
2833 } else {
2834
2835 set_pte_at(mm, addr, ptep, entry);
2836 update_mmu_cache(vma, addr, ptep);
2837 }
2838
2839 pte_unmap_unlock(ptep, ptl);
2840 *src = MIGRATE_PFN_MIGRATE;
2841 return;
2842
2843unlock_abort:
2844 pte_unmap_unlock(ptep, ptl);
2845abort:
2846 *src &= ~MIGRATE_PFN_MIGRATE;
2847}
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857void migrate_vma_pages(struct migrate_vma *migrate)
2858{
2859 const unsigned long npages = migrate->npages;
2860 const unsigned long start = migrate->start;
2861 struct mmu_notifier_range range;
2862 unsigned long addr, i;
2863 bool notified = false;
2864
2865 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
2866 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2867 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2868 struct address_space *mapping;
2869 int r;
2870
2871 if (!newpage) {
2872 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2873 continue;
2874 }
2875
2876 if (!page) {
2877 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2878 continue;
2879 if (!notified) {
2880 notified = true;
2881
2882 mmu_notifier_range_init_owner(&range,
2883 MMU_NOTIFY_MIGRATE, 0, migrate->vma,
2884 migrate->vma->vm_mm, addr, migrate->end,
2885 migrate->pgmap_owner);
2886 mmu_notifier_invalidate_range_start(&range);
2887 }
2888 migrate_vma_insert_page(migrate, addr, newpage,
2889 &migrate->src[i]);
2890 continue;
2891 }
2892
2893 mapping = page_mapping(page);
2894
2895 if (is_zone_device_page(newpage)) {
2896 if (is_device_private_page(newpage)) {
2897
2898
2899
2900
2901 if (mapping) {
2902 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2903 continue;
2904 }
2905 } else {
2906
2907
2908
2909
2910 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2911 continue;
2912 }
2913 }
2914
2915 r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
2916 if (r != MIGRATEPAGE_SUCCESS)
2917 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2918 }
2919
2920
2921
2922
2923
2924
2925 if (notified)
2926 mmu_notifier_invalidate_range_only_end(&range);
2927}
2928EXPORT_SYMBOL(migrate_vma_pages);
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941void migrate_vma_finalize(struct migrate_vma *migrate)
2942{
2943 const unsigned long npages = migrate->npages;
2944 unsigned long i;
2945
2946 for (i = 0; i < npages; i++) {
2947 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2948 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2949
2950 if (!page) {
2951 if (newpage) {
2952 unlock_page(newpage);
2953 put_page(newpage);
2954 }
2955 continue;
2956 }
2957
2958 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
2959 if (newpage) {
2960 unlock_page(newpage);
2961 put_page(newpage);
2962 }
2963 newpage = page;
2964 }
2965
2966 remove_migration_ptes(page, newpage, false);
2967 unlock_page(page);
2968
2969 if (is_zone_device_page(page))
2970 put_page(page);
2971 else
2972 putback_lru_page(page);
2973
2974 if (newpage != page) {
2975 unlock_page(newpage);
2976 if (is_zone_device_page(newpage))
2977 put_page(newpage);
2978 else
2979 putback_lru_page(newpage);
2980 }
2981 }
2982}
2983EXPORT_SYMBOL(migrate_vma_finalize);
2984#endif
2985