1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/migrate.h>
17#include <linux/export.h>
18#include <linux/swap.h>
19#include <linux/swapops.h>
20#include <linux/pagemap.h>
21#include <linux/buffer_head.h>
22#include <linux/mm_inline.h>
23#include <linux/nsproxy.h>
24#include <linux/pagevec.h>
25#include <linux/ksm.h>
26#include <linux/rmap.h>
27#include <linux/topology.h>
28#include <linux/cpu.h>
29#include <linux/cpuset.h>
30#include <linux/writeback.h>
31#include <linux/mempolicy.h>
32#include <linux/vmalloc.h>
33#include <linux/security.h>
34#include <linux/backing-dev.h>
35#include <linux/compaction.h>
36#include <linux/syscalls.h>
37#include <linux/compat.h>
38#include <linux/hugetlb.h>
39#include <linux/hugetlb_cgroup.h>
40#include <linux/gfp.h>
41#include <linux/pagewalk.h>
42#include <linux/pfn_t.h>
43#include <linux/memremap.h>
44#include <linux/userfaultfd_k.h>
45#include <linux/balloon_compaction.h>
46#include <linux/mmu_notifier.h>
47#include <linux/page_idle.h>
48#include <linux/page_owner.h>
49#include <linux/sched/mm.h>
50#include <linux/ptrace.h>
51#include <linux/oom.h>
52
53#include <asm/tlbflush.h>
54
55#define CREATE_TRACE_POINTS
56#include <trace/events/migrate.h>
57
58#include "internal.h"
59
60
61
62
63
64
65void migrate_prep(void)
66{
67
68
69
70
71
72
73 lru_add_drain_all();
74}
75
76
77void migrate_prep_local(void)
78{
79 lru_add_drain();
80}
81
82int isolate_movable_page(struct page *page, isolate_mode_t mode)
83{
84 struct address_space *mapping;
85
86
87
88
89
90
91
92
93
94
95 if (unlikely(!get_page_unless_zero(page)))
96 goto out;
97
98
99
100
101
102
103 if (unlikely(!__PageMovable(page)))
104 goto out_putpage;
105
106
107
108
109
110
111
112
113
114
115
116 if (unlikely(!trylock_page(page)))
117 goto out_putpage;
118
119 if (!PageMovable(page) || PageIsolated(page))
120 goto out_no_isolated;
121
122 mapping = page_mapping(page);
123 VM_BUG_ON_PAGE(!mapping, page);
124
125 if (!mapping->a_ops->isolate_page(page, mode))
126 goto out_no_isolated;
127
128
129 WARN_ON_ONCE(PageIsolated(page));
130 __SetPageIsolated(page);
131 unlock_page(page);
132
133 return 0;
134
135out_no_isolated:
136 unlock_page(page);
137out_putpage:
138 put_page(page);
139out:
140 return -EBUSY;
141}
142
143
144void putback_movable_page(struct page *page)
145{
146 struct address_space *mapping;
147
148 VM_BUG_ON_PAGE(!PageLocked(page), page);
149 VM_BUG_ON_PAGE(!PageMovable(page), page);
150 VM_BUG_ON_PAGE(!PageIsolated(page), page);
151
152 mapping = page_mapping(page);
153 mapping->a_ops->putback_page(page);
154 __ClearPageIsolated(page);
155}
156
157
158
159
160
161
162
163
164
165void putback_movable_pages(struct list_head *l)
166{
167 struct page *page;
168 struct page *page2;
169
170 list_for_each_entry_safe(page, page2, l, lru) {
171 if (unlikely(PageHuge(page))) {
172 putback_active_hugepage(page);
173 continue;
174 }
175 list_del(&page->lru);
176
177
178
179
180
181 if (unlikely(__PageMovable(page))) {
182 VM_BUG_ON_PAGE(!PageIsolated(page), page);
183 lock_page(page);
184 if (PageMovable(page))
185 putback_movable_page(page);
186 else
187 __ClearPageIsolated(page);
188 unlock_page(page);
189 put_page(page);
190 } else {
191 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
192 page_is_file_lru(page), -thp_nr_pages(page));
193 putback_lru_page(page);
194 }
195 }
196}
197
198
199
200
201static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
202 unsigned long addr, void *old)
203{
204 struct page_vma_mapped_walk pvmw = {
205 .page = old,
206 .vma = vma,
207 .address = addr,
208 .flags = PVMW_SYNC | PVMW_MIGRATION,
209 };
210 struct page *new;
211 pte_t pte;
212 swp_entry_t entry;
213
214 VM_BUG_ON_PAGE(PageTail(page), page);
215 while (page_vma_mapped_walk(&pvmw)) {
216 if (PageKsm(page))
217 new = page;
218 else
219 new = page - pvmw.page->index +
220 linear_page_index(vma, pvmw.address);
221
222#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
223
224 if (!pvmw.pte) {
225 VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
226 remove_migration_pmd(&pvmw, new);
227 continue;
228 }
229#endif
230
231 get_page(new);
232 pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
233 if (pte_swp_soft_dirty(*pvmw.pte))
234 pte = pte_mksoft_dirty(pte);
235
236
237
238
239 entry = pte_to_swp_entry(*pvmw.pte);
240 if (is_write_migration_entry(entry))
241 pte = maybe_mkwrite(pte, vma);
242 else if (pte_swp_uffd_wp(*pvmw.pte))
243 pte = pte_mkuffd_wp(pte);
244
245 if (unlikely(is_device_private_page(new))) {
246 entry = make_device_private_entry(new, pte_write(pte));
247 pte = swp_entry_to_pte(entry);
248 if (pte_swp_soft_dirty(*pvmw.pte))
249 pte = pte_swp_mksoft_dirty(pte);
250 if (pte_swp_uffd_wp(*pvmw.pte))
251 pte = pte_swp_mkuffd_wp(pte);
252 }
253
254#ifdef CONFIG_HUGETLB_PAGE
255 if (PageHuge(new)) {
256 pte = pte_mkhuge(pte);
257 pte = arch_make_huge_pte(pte, vma, new, 0);
258 set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
259 if (PageAnon(new))
260 hugepage_add_anon_rmap(new, vma, pvmw.address);
261 else
262 page_dup_rmap(new, true);
263 } else
264#endif
265 {
266 set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
267
268 if (PageAnon(new))
269 page_add_anon_rmap(new, vma, pvmw.address, false);
270 else
271 page_add_file_rmap(new, false);
272 }
273 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
274 mlock_vma_page(new);
275
276 if (PageTransHuge(page) && PageMlocked(page))
277 clear_page_mlock(page);
278
279
280 update_mmu_cache(vma, pvmw.address, pvmw.pte);
281 }
282
283 return true;
284}
285
286
287
288
289
290void remove_migration_ptes(struct page *old, struct page *new, bool locked)
291{
292 struct rmap_walk_control rwc = {
293 .rmap_one = remove_migration_pte,
294 .arg = old,
295 };
296
297 if (locked)
298 rmap_walk_locked(new, &rwc);
299 else
300 rmap_walk(new, &rwc);
301}
302
303
304
305
306
307
308void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
309 spinlock_t *ptl)
310{
311 pte_t pte;
312 swp_entry_t entry;
313 struct page *page;
314
315 spin_lock(ptl);
316 pte = *ptep;
317 if (!is_swap_pte(pte))
318 goto out;
319
320 entry = pte_to_swp_entry(pte);
321 if (!is_migration_entry(entry))
322 goto out;
323
324 page = migration_entry_to_page(entry);
325
326
327
328
329
330
331 if (!get_page_unless_zero(page))
332 goto out;
333 pte_unmap_unlock(ptep, ptl);
334 put_and_wait_on_page_locked(page);
335 return;
336out:
337 pte_unmap_unlock(ptep, ptl);
338}
339
340void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
341 unsigned long address)
342{
343 spinlock_t *ptl = pte_lockptr(mm, pmd);
344 pte_t *ptep = pte_offset_map(pmd, address);
345 __migration_entry_wait(mm, ptep, ptl);
346}
347
348void migration_entry_wait_huge(struct vm_area_struct *vma,
349 struct mm_struct *mm, pte_t *pte)
350{
351 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
352 __migration_entry_wait(mm, pte, ptl);
353}
354
355#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
356void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
357{
358 spinlock_t *ptl;
359 struct page *page;
360
361 ptl = pmd_lock(mm, pmd);
362 if (!is_pmd_migration_entry(*pmd))
363 goto unlock;
364 page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
365 if (!get_page_unless_zero(page))
366 goto unlock;
367 spin_unlock(ptl);
368 put_and_wait_on_page_locked(page);
369 return;
370unlock:
371 spin_unlock(ptl);
372}
373#endif
374
375static int expected_page_refs(struct address_space *mapping, struct page *page)
376{
377 int expected_count = 1;
378
379
380
381
382
383 expected_count += is_device_private_page(page);
384 if (mapping)
385 expected_count += thp_nr_pages(page) + page_has_private(page);
386
387 return expected_count;
388}
389
390
391
392
393
394
395
396
397
398int migrate_page_move_mapping(struct address_space *mapping,
399 struct page *newpage, struct page *page, int extra_count)
400{
401 XA_STATE(xas, &mapping->i_pages, page_index(page));
402 struct zone *oldzone, *newzone;
403 int dirty;
404 int expected_count = expected_page_refs(mapping, page) + extra_count;
405 int nr = thp_nr_pages(page);
406
407 if (!mapping) {
408
409 if (page_count(page) != expected_count)
410 return -EAGAIN;
411
412
413 newpage->index = page->index;
414 newpage->mapping = page->mapping;
415 if (PageSwapBacked(page))
416 __SetPageSwapBacked(newpage);
417
418 return MIGRATEPAGE_SUCCESS;
419 }
420
421 oldzone = page_zone(page);
422 newzone = page_zone(newpage);
423
424 xas_lock_irq(&xas);
425 if (page_count(page) != expected_count || xas_load(&xas) != page) {
426 xas_unlock_irq(&xas);
427 return -EAGAIN;
428 }
429
430 if (!page_ref_freeze(page, expected_count)) {
431 xas_unlock_irq(&xas);
432 return -EAGAIN;
433 }
434
435
436
437
438
439 newpage->index = page->index;
440 newpage->mapping = page->mapping;
441 page_ref_add(newpage, nr);
442 if (PageSwapBacked(page)) {
443 __SetPageSwapBacked(newpage);
444 if (PageSwapCache(page)) {
445 SetPageSwapCache(newpage);
446 set_page_private(newpage, page_private(page));
447 }
448 } else {
449 VM_BUG_ON_PAGE(PageSwapCache(page), page);
450 }
451
452
453 dirty = PageDirty(page);
454 if (dirty) {
455 ClearPageDirty(page);
456 SetPageDirty(newpage);
457 }
458
459 xas_store(&xas, newpage);
460 if (PageTransHuge(page)) {
461 int i;
462
463 for (i = 1; i < nr; i++) {
464 xas_next(&xas);
465 xas_store(&xas, newpage);
466 }
467 }
468
469
470
471
472
473
474 page_ref_unfreeze(page, expected_count - nr);
475
476 xas_unlock(&xas);
477
478
479
480
481
482
483
484
485
486
487
488
489 if (newzone != oldzone) {
490 struct lruvec *old_lruvec, *new_lruvec;
491 struct mem_cgroup *memcg;
492
493 memcg = page_memcg(page);
494 old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
495 new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
496
497 __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
498 __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
499 if (PageSwapBacked(page) && !PageSwapCache(page)) {
500 __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
501 __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
502 }
503 if (dirty && mapping_can_writeback(mapping)) {
504 __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
505 __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
506 __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr);
507 __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr);
508 }
509 }
510 local_irq_enable();
511
512 return MIGRATEPAGE_SUCCESS;
513}
514EXPORT_SYMBOL(migrate_page_move_mapping);
515
516
517
518
519
520int migrate_huge_page_move_mapping(struct address_space *mapping,
521 struct page *newpage, struct page *page)
522{
523 XA_STATE(xas, &mapping->i_pages, page_index(page));
524 int expected_count;
525
526 xas_lock_irq(&xas);
527 expected_count = 2 + page_has_private(page);
528 if (page_count(page) != expected_count || xas_load(&xas) != page) {
529 xas_unlock_irq(&xas);
530 return -EAGAIN;
531 }
532
533 if (!page_ref_freeze(page, expected_count)) {
534 xas_unlock_irq(&xas);
535 return -EAGAIN;
536 }
537
538 newpage->index = page->index;
539 newpage->mapping = page->mapping;
540
541 get_page(newpage);
542
543 xas_store(&xas, newpage);
544
545 page_ref_unfreeze(page, expected_count - 1);
546
547 xas_unlock_irq(&xas);
548
549 return MIGRATEPAGE_SUCCESS;
550}
551
552
553
554
555
556
557static void __copy_gigantic_page(struct page *dst, struct page *src,
558 int nr_pages)
559{
560 int i;
561 struct page *dst_base = dst;
562 struct page *src_base = src;
563
564 for (i = 0; i < nr_pages; ) {
565 cond_resched();
566 copy_highpage(dst, src);
567
568 i++;
569 dst = mem_map_next(dst, dst_base, i);
570 src = mem_map_next(src, src_base, i);
571 }
572}
573
574static void copy_huge_page(struct page *dst, struct page *src)
575{
576 int i;
577 int nr_pages;
578
579 if (PageHuge(src)) {
580
581 struct hstate *h = page_hstate(src);
582 nr_pages = pages_per_huge_page(h);
583
584 if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
585 __copy_gigantic_page(dst, src, nr_pages);
586 return;
587 }
588 } else {
589
590 BUG_ON(!PageTransHuge(src));
591 nr_pages = thp_nr_pages(src);
592 }
593
594 for (i = 0; i < nr_pages; i++) {
595 cond_resched();
596 copy_highpage(dst + i, src + i);
597 }
598}
599
600
601
602
603void migrate_page_states(struct page *newpage, struct page *page)
604{
605 int cpupid;
606
607 if (PageError(page))
608 SetPageError(newpage);
609 if (PageReferenced(page))
610 SetPageReferenced(newpage);
611 if (PageUptodate(page))
612 SetPageUptodate(newpage);
613 if (TestClearPageActive(page)) {
614 VM_BUG_ON_PAGE(PageUnevictable(page), page);
615 SetPageActive(newpage);
616 } else if (TestClearPageUnevictable(page))
617 SetPageUnevictable(newpage);
618 if (PageWorkingset(page))
619 SetPageWorkingset(newpage);
620 if (PageChecked(page))
621 SetPageChecked(newpage);
622 if (PageMappedToDisk(page))
623 SetPageMappedToDisk(newpage);
624
625
626 if (PageDirty(page))
627 SetPageDirty(newpage);
628
629 if (page_is_young(page))
630 set_page_young(newpage);
631 if (page_is_idle(page))
632 set_page_idle(newpage);
633
634
635
636
637
638 cpupid = page_cpupid_xchg_last(page, -1);
639 page_cpupid_xchg_last(newpage, cpupid);
640
641 ksm_migrate_page(newpage, page);
642
643
644
645
646 if (PageSwapCache(page))
647 ClearPageSwapCache(page);
648 ClearPagePrivate(page);
649 set_page_private(page, 0);
650
651
652
653
654
655 if (PageWriteback(newpage))
656 end_page_writeback(newpage);
657
658
659
660
661
662
663 if (PageReadahead(page))
664 SetPageReadahead(newpage);
665
666 copy_page_owner(page, newpage);
667
668 if (!PageHuge(page))
669 mem_cgroup_migrate(page, newpage);
670}
671EXPORT_SYMBOL(migrate_page_states);
672
673void migrate_page_copy(struct page *newpage, struct page *page)
674{
675 if (PageHuge(page) || PageTransHuge(page))
676 copy_huge_page(newpage, page);
677 else
678 copy_highpage(newpage, page);
679
680 migrate_page_states(newpage, page);
681}
682EXPORT_SYMBOL(migrate_page_copy);
683
684
685
686
687
688
689
690
691
692
693
694int migrate_page(struct address_space *mapping,
695 struct page *newpage, struct page *page,
696 enum migrate_mode mode)
697{
698 int rc;
699
700 BUG_ON(PageWriteback(page));
701
702 rc = migrate_page_move_mapping(mapping, newpage, page, 0);
703
704 if (rc != MIGRATEPAGE_SUCCESS)
705 return rc;
706
707 if (mode != MIGRATE_SYNC_NO_COPY)
708 migrate_page_copy(newpage, page);
709 else
710 migrate_page_states(newpage, page);
711 return MIGRATEPAGE_SUCCESS;
712}
713EXPORT_SYMBOL(migrate_page);
714
715#ifdef CONFIG_BLOCK
716
717static bool buffer_migrate_lock_buffers(struct buffer_head *head,
718 enum migrate_mode mode)
719{
720 struct buffer_head *bh = head;
721
722
723 if (mode != MIGRATE_ASYNC) {
724 do {
725 lock_buffer(bh);
726 bh = bh->b_this_page;
727
728 } while (bh != head);
729
730 return true;
731 }
732
733
734 do {
735 if (!trylock_buffer(bh)) {
736
737
738
739
740 struct buffer_head *failed_bh = bh;
741 bh = head;
742 while (bh != failed_bh) {
743 unlock_buffer(bh);
744 bh = bh->b_this_page;
745 }
746 return false;
747 }
748
749 bh = bh->b_this_page;
750 } while (bh != head);
751 return true;
752}
753
754static int __buffer_migrate_page(struct address_space *mapping,
755 struct page *newpage, struct page *page, enum migrate_mode mode,
756 bool check_refs)
757{
758 struct buffer_head *bh, *head;
759 int rc;
760 int expected_count;
761
762 if (!page_has_buffers(page))
763 return migrate_page(mapping, newpage, page, mode);
764
765
766 expected_count = expected_page_refs(mapping, page);
767 if (page_count(page) != expected_count)
768 return -EAGAIN;
769
770 head = page_buffers(page);
771 if (!buffer_migrate_lock_buffers(head, mode))
772 return -EAGAIN;
773
774 if (check_refs) {
775 bool busy;
776 bool invalidated = false;
777
778recheck_buffers:
779 busy = false;
780 spin_lock(&mapping->private_lock);
781 bh = head;
782 do {
783 if (atomic_read(&bh->b_count)) {
784 busy = true;
785 break;
786 }
787 bh = bh->b_this_page;
788 } while (bh != head);
789 if (busy) {
790 if (invalidated) {
791 rc = -EAGAIN;
792 goto unlock_buffers;
793 }
794 spin_unlock(&mapping->private_lock);
795 invalidate_bh_lrus();
796 invalidated = true;
797 goto recheck_buffers;
798 }
799 }
800
801 rc = migrate_page_move_mapping(mapping, newpage, page, 0);
802 if (rc != MIGRATEPAGE_SUCCESS)
803 goto unlock_buffers;
804
805 attach_page_private(newpage, detach_page_private(page));
806
807 bh = head;
808 do {
809 set_bh_page(bh, newpage, bh_offset(bh));
810 bh = bh->b_this_page;
811
812 } while (bh != head);
813
814 if (mode != MIGRATE_SYNC_NO_COPY)
815 migrate_page_copy(newpage, page);
816 else
817 migrate_page_states(newpage, page);
818
819 rc = MIGRATEPAGE_SUCCESS;
820unlock_buffers:
821 if (check_refs)
822 spin_unlock(&mapping->private_lock);
823 bh = head;
824 do {
825 unlock_buffer(bh);
826 bh = bh->b_this_page;
827
828 } while (bh != head);
829
830 return rc;
831}
832
833
834
835
836
837
838int buffer_migrate_page(struct address_space *mapping,
839 struct page *newpage, struct page *page, enum migrate_mode mode)
840{
841 return __buffer_migrate_page(mapping, newpage, page, mode, false);
842}
843EXPORT_SYMBOL(buffer_migrate_page);
844
845
846
847
848
849
850
851int buffer_migrate_page_norefs(struct address_space *mapping,
852 struct page *newpage, struct page *page, enum migrate_mode mode)
853{
854 return __buffer_migrate_page(mapping, newpage, page, mode, true);
855}
856#endif
857
858
859
860
861static int writeout(struct address_space *mapping, struct page *page)
862{
863 struct writeback_control wbc = {
864 .sync_mode = WB_SYNC_NONE,
865 .nr_to_write = 1,
866 .range_start = 0,
867 .range_end = LLONG_MAX,
868 .for_reclaim = 1
869 };
870 int rc;
871
872 if (!mapping->a_ops->writepage)
873
874 return -EINVAL;
875
876 if (!clear_page_dirty_for_io(page))
877
878 return -EAGAIN;
879
880
881
882
883
884
885
886
887
888 remove_migration_ptes(page, page, false);
889
890 rc = mapping->a_ops->writepage(page, &wbc);
891
892 if (rc != AOP_WRITEPAGE_ACTIVATE)
893
894 lock_page(page);
895
896 return (rc < 0) ? -EIO : -EAGAIN;
897}
898
899
900
901
902static int fallback_migrate_page(struct address_space *mapping,
903 struct page *newpage, struct page *page, enum migrate_mode mode)
904{
905 if (PageDirty(page)) {
906
907 switch (mode) {
908 case MIGRATE_SYNC:
909 case MIGRATE_SYNC_NO_COPY:
910 break;
911 default:
912 return -EBUSY;
913 }
914 return writeout(mapping, page);
915 }
916
917
918
919
920
921 if (page_has_private(page) &&
922 !try_to_release_page(page, GFP_KERNEL))
923 return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
924
925 return migrate_page(mapping, newpage, page, mode);
926}
927
928
929
930
931
932
933
934
935
936
937
938
939static int move_to_new_page(struct page *newpage, struct page *page,
940 enum migrate_mode mode)
941{
942 struct address_space *mapping;
943 int rc = -EAGAIN;
944 bool is_lru = !__PageMovable(page);
945
946 VM_BUG_ON_PAGE(!PageLocked(page), page);
947 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
948
949 mapping = page_mapping(page);
950
951 if (likely(is_lru)) {
952 if (!mapping)
953 rc = migrate_page(mapping, newpage, page, mode);
954 else if (mapping->a_ops->migratepage)
955
956
957
958
959
960
961
962 rc = mapping->a_ops->migratepage(mapping, newpage,
963 page, mode);
964 else
965 rc = fallback_migrate_page(mapping, newpage,
966 page, mode);
967 } else {
968
969
970
971
972 VM_BUG_ON_PAGE(!PageIsolated(page), page);
973 if (!PageMovable(page)) {
974 rc = MIGRATEPAGE_SUCCESS;
975 __ClearPageIsolated(page);
976 goto out;
977 }
978
979 rc = mapping->a_ops->migratepage(mapping, newpage,
980 page, mode);
981 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
982 !PageIsolated(page));
983 }
984
985
986
987
988
989 if (rc == MIGRATEPAGE_SUCCESS) {
990 if (__PageMovable(page)) {
991 VM_BUG_ON_PAGE(!PageIsolated(page), page);
992
993
994
995
996
997 __ClearPageIsolated(page);
998 }
999
1000
1001
1002
1003
1004
1005 if (!PageMappingFlags(page))
1006 page->mapping = NULL;
1007
1008 if (likely(!is_zone_device_page(newpage)))
1009 flush_dcache_page(newpage);
1010
1011 }
1012out:
1013 return rc;
1014}
1015
1016static int __unmap_and_move(struct page *page, struct page *newpage,
1017 int force, enum migrate_mode mode)
1018{
1019 int rc = -EAGAIN;
1020 int page_was_mapped = 0;
1021 struct anon_vma *anon_vma = NULL;
1022 bool is_lru = !__PageMovable(page);
1023
1024 if (!trylock_page(page)) {
1025 if (!force || mode == MIGRATE_ASYNC)
1026 goto out;
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041 if (current->flags & PF_MEMALLOC)
1042 goto out;
1043
1044 lock_page(page);
1045 }
1046
1047 if (PageWriteback(page)) {
1048
1049
1050
1051
1052
1053
1054 switch (mode) {
1055 case MIGRATE_SYNC:
1056 case MIGRATE_SYNC_NO_COPY:
1057 break;
1058 default:
1059 rc = -EBUSY;
1060 goto out_unlock;
1061 }
1062 if (!force)
1063 goto out_unlock;
1064 wait_on_page_writeback(page);
1065 }
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 if (PageAnon(page) && !PageKsm(page))
1082 anon_vma = page_get_anon_vma(page);
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 if (unlikely(!trylock_page(newpage)))
1093 goto out_unlock;
1094
1095 if (unlikely(!is_lru)) {
1096 rc = move_to_new_page(newpage, page, mode);
1097 goto out_unlock_both;
1098 }
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112 if (!page->mapping) {
1113 VM_BUG_ON_PAGE(PageAnon(page), page);
1114 if (page_has_private(page)) {
1115 try_to_free_buffers(page);
1116 goto out_unlock_both;
1117 }
1118 } else if (page_mapped(page)) {
1119
1120 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
1121 page);
1122 try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK);
1123 page_was_mapped = 1;
1124 }
1125
1126 if (!page_mapped(page))
1127 rc = move_to_new_page(newpage, page, mode);
1128
1129 if (page_was_mapped)
1130 remove_migration_ptes(page,
1131 rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
1132
1133out_unlock_both:
1134 unlock_page(newpage);
1135out_unlock:
1136
1137 if (anon_vma)
1138 put_anon_vma(anon_vma);
1139 unlock_page(page);
1140out:
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150 if (rc == MIGRATEPAGE_SUCCESS) {
1151 if (unlikely(!is_lru))
1152 put_page(newpage);
1153 else
1154 putback_lru_page(newpage);
1155 }
1156
1157 return rc;
1158}
1159
1160
1161
1162
1163
1164static int unmap_and_move(new_page_t get_new_page,
1165 free_page_t put_new_page,
1166 unsigned long private, struct page *page,
1167 int force, enum migrate_mode mode,
1168 enum migrate_reason reason,
1169 struct list_head *ret)
1170{
1171 int rc = MIGRATEPAGE_SUCCESS;
1172 struct page *newpage = NULL;
1173
1174 if (!thp_migration_supported() && PageTransHuge(page))
1175 return -ENOSYS;
1176
1177 if (page_count(page) == 1) {
1178
1179 ClearPageActive(page);
1180 ClearPageUnevictable(page);
1181 if (unlikely(__PageMovable(page))) {
1182 lock_page(page);
1183 if (!PageMovable(page))
1184 __ClearPageIsolated(page);
1185 unlock_page(page);
1186 }
1187 goto out;
1188 }
1189
1190 newpage = get_new_page(page, private);
1191 if (!newpage)
1192 return -ENOMEM;
1193
1194 rc = __unmap_and_move(page, newpage, force, mode);
1195 if (rc == MIGRATEPAGE_SUCCESS)
1196 set_page_owner_migrate_reason(newpage, reason);
1197
1198out:
1199 if (rc != -EAGAIN) {
1200
1201
1202
1203
1204
1205 list_del(&page->lru);
1206 }
1207
1208
1209
1210
1211
1212
1213 if (rc == MIGRATEPAGE_SUCCESS) {
1214
1215
1216
1217
1218
1219 if (likely(!__PageMovable(page)))
1220 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
1221 page_is_file_lru(page), -thp_nr_pages(page));
1222
1223 if (reason != MR_MEMORY_FAILURE)
1224
1225
1226
1227 put_page(page);
1228 } else {
1229 if (rc != -EAGAIN)
1230 list_add_tail(&page->lru, ret);
1231
1232 if (put_new_page)
1233 put_new_page(newpage, private);
1234 else
1235 put_page(newpage);
1236 }
1237
1238 return rc;
1239}
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259static int unmap_and_move_huge_page(new_page_t get_new_page,
1260 free_page_t put_new_page, unsigned long private,
1261 struct page *hpage, int force,
1262 enum migrate_mode mode, int reason,
1263 struct list_head *ret)
1264{
1265 int rc = -EAGAIN;
1266 int page_was_mapped = 0;
1267 struct page *new_hpage;
1268 struct anon_vma *anon_vma = NULL;
1269 struct address_space *mapping = NULL;
1270
1271
1272
1273
1274
1275
1276
1277
1278 if (!hugepage_migration_supported(page_hstate(hpage))) {
1279 list_move_tail(&hpage->lru, ret);
1280 return -ENOSYS;
1281 }
1282
1283 if (page_count(hpage) == 1) {
1284
1285 putback_active_hugepage(hpage);
1286 return MIGRATEPAGE_SUCCESS;
1287 }
1288
1289 new_hpage = get_new_page(hpage, private);
1290 if (!new_hpage)
1291 return -ENOMEM;
1292
1293 if (!trylock_page(hpage)) {
1294 if (!force)
1295 goto out;
1296 switch (mode) {
1297 case MIGRATE_SYNC:
1298 case MIGRATE_SYNC_NO_COPY:
1299 break;
1300 default:
1301 goto out;
1302 }
1303 lock_page(hpage);
1304 }
1305
1306
1307
1308
1309
1310
1311 if (page_private(hpage) && !page_mapping(hpage)) {
1312 rc = -EBUSY;
1313 goto out_unlock;
1314 }
1315
1316 if (PageAnon(hpage))
1317 anon_vma = page_get_anon_vma(hpage);
1318
1319 if (unlikely(!trylock_page(new_hpage)))
1320 goto put_anon;
1321
1322 if (page_mapped(hpage)) {
1323 bool mapping_locked = false;
1324 enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK;
1325
1326 if (!PageAnon(hpage)) {
1327
1328
1329
1330
1331
1332
1333 mapping = hugetlb_page_mapping_lock_write(hpage);
1334 if (unlikely(!mapping))
1335 goto unlock_put_anon;
1336
1337 mapping_locked = true;
1338 ttu |= TTU_RMAP_LOCKED;
1339 }
1340
1341 try_to_unmap(hpage, ttu);
1342 page_was_mapped = 1;
1343
1344 if (mapping_locked)
1345 i_mmap_unlock_write(mapping);
1346 }
1347
1348 if (!page_mapped(hpage))
1349 rc = move_to_new_page(new_hpage, hpage, mode);
1350
1351 if (page_was_mapped)
1352 remove_migration_ptes(hpage,
1353 rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
1354
1355unlock_put_anon:
1356 unlock_page(new_hpage);
1357
1358put_anon:
1359 if (anon_vma)
1360 put_anon_vma(anon_vma);
1361
1362 if (rc == MIGRATEPAGE_SUCCESS) {
1363 move_hugetlb_state(hpage, new_hpage, reason);
1364 put_new_page = NULL;
1365 }
1366
1367out_unlock:
1368 unlock_page(hpage);
1369out:
1370 if (rc == MIGRATEPAGE_SUCCESS)
1371 putback_active_hugepage(hpage);
1372 else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
1373 list_move_tail(&hpage->lru, ret);
1374
1375
1376
1377
1378
1379
1380 if (put_new_page)
1381 put_new_page(new_hpage, private);
1382 else
1383 putback_active_hugepage(new_hpage);
1384
1385 return rc;
1386}
1387
1388static inline int try_split_thp(struct page *page, struct page **page2,
1389 struct list_head *from)
1390{
1391 int rc = 0;
1392
1393 lock_page(page);
1394 rc = split_huge_page_to_list(page, from);
1395 unlock_page(page);
1396 if (!rc)
1397 list_safe_reset_next(page, *page2, lru);
1398
1399 return rc;
1400}
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423int migrate_pages(struct list_head *from, new_page_t get_new_page,
1424 free_page_t put_new_page, unsigned long private,
1425 enum migrate_mode mode, int reason)
1426{
1427 int retry = 1;
1428 int thp_retry = 1;
1429 int nr_failed = 0;
1430 int nr_succeeded = 0;
1431 int nr_thp_succeeded = 0;
1432 int nr_thp_failed = 0;
1433 int nr_thp_split = 0;
1434 int pass = 0;
1435 bool is_thp = false;
1436 struct page *page;
1437 struct page *page2;
1438 int swapwrite = current->flags & PF_SWAPWRITE;
1439 int rc, nr_subpages;
1440 LIST_HEAD(ret_pages);
1441
1442 if (!swapwrite)
1443 current->flags |= PF_SWAPWRITE;
1444
1445 for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
1446 retry = 0;
1447 thp_retry = 0;
1448
1449 list_for_each_entry_safe(page, page2, from, lru) {
1450retry:
1451
1452
1453
1454
1455
1456 is_thp = PageTransHuge(page) && !PageHuge(page);
1457 nr_subpages = thp_nr_pages(page);
1458 cond_resched();
1459
1460 if (PageHuge(page))
1461 rc = unmap_and_move_huge_page(get_new_page,
1462 put_new_page, private, page,
1463 pass > 2, mode, reason,
1464 &ret_pages);
1465 else
1466 rc = unmap_and_move(get_new_page, put_new_page,
1467 private, page, pass > 2, mode,
1468 reason, &ret_pages);
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478 switch(rc) {
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490 case -ENOSYS:
1491
1492 if (is_thp) {
1493 if (!try_split_thp(page, &page2, from)) {
1494 nr_thp_split++;
1495 goto retry;
1496 }
1497
1498 nr_thp_failed++;
1499 nr_failed += nr_subpages;
1500 break;
1501 }
1502
1503
1504 nr_failed++;
1505 break;
1506 case -ENOMEM:
1507
1508
1509
1510
1511 if (is_thp) {
1512 if (!try_split_thp(page, &page2, from)) {
1513 nr_thp_split++;
1514 goto retry;
1515 }
1516
1517 nr_thp_failed++;
1518 nr_failed += nr_subpages;
1519 goto out;
1520 }
1521 nr_failed++;
1522 goto out;
1523 case -EAGAIN:
1524 if (is_thp) {
1525 thp_retry++;
1526 break;
1527 }
1528 retry++;
1529 break;
1530 case MIGRATEPAGE_SUCCESS:
1531 if (is_thp) {
1532 nr_thp_succeeded++;
1533 nr_succeeded += nr_subpages;
1534 break;
1535 }
1536 nr_succeeded++;
1537 break;
1538 default:
1539
1540
1541
1542
1543
1544
1545 if (is_thp) {
1546 nr_thp_failed++;
1547 nr_failed += nr_subpages;
1548 break;
1549 }
1550 nr_failed++;
1551 break;
1552 }
1553 }
1554 }
1555 nr_failed += retry + thp_retry;
1556 nr_thp_failed += thp_retry;
1557 rc = nr_failed;
1558out:
1559
1560
1561
1562
1563 list_splice(&ret_pages, from);
1564
1565 count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
1566 count_vm_events(PGMIGRATE_FAIL, nr_failed);
1567 count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
1568 count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
1569 count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
1570 trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
1571 nr_thp_failed, nr_thp_split, mode, reason);
1572
1573 if (!swapwrite)
1574 current->flags &= ~PF_SWAPWRITE;
1575
1576 return rc;
1577}
1578
1579struct page *alloc_migration_target(struct page *page, unsigned long private)
1580{
1581 struct migration_target_control *mtc;
1582 gfp_t gfp_mask;
1583 unsigned int order = 0;
1584 struct page *new_page = NULL;
1585 int nid;
1586 int zidx;
1587
1588 mtc = (struct migration_target_control *)private;
1589 gfp_mask = mtc->gfp_mask;
1590 nid = mtc->nid;
1591 if (nid == NUMA_NO_NODE)
1592 nid = page_to_nid(page);
1593
1594 if (PageHuge(page)) {
1595 struct hstate *h = page_hstate(compound_head(page));
1596
1597 gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
1598 return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
1599 }
1600
1601 if (PageTransHuge(page)) {
1602
1603
1604
1605
1606 gfp_mask &= ~__GFP_RECLAIM;
1607 gfp_mask |= GFP_TRANSHUGE;
1608 order = HPAGE_PMD_ORDER;
1609 }
1610 zidx = zone_idx(page_zone(page));
1611 if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
1612 gfp_mask |= __GFP_HIGHMEM;
1613
1614 new_page = __alloc_pages_nodemask(gfp_mask, order, nid, mtc->nmask);
1615
1616 if (new_page && PageTransHuge(new_page))
1617 prep_transhuge_page(new_page);
1618
1619 return new_page;
1620}
1621
1622#ifdef CONFIG_NUMA
1623
1624static int store_status(int __user *status, int start, int value, int nr)
1625{
1626 while (nr-- > 0) {
1627 if (put_user(value, status + start))
1628 return -EFAULT;
1629 start++;
1630 }
1631
1632 return 0;
1633}
1634
1635static int do_move_pages_to_node(struct mm_struct *mm,
1636 struct list_head *pagelist, int node)
1637{
1638 int err;
1639 struct migration_target_control mtc = {
1640 .nid = node,
1641 .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
1642 };
1643
1644 err = migrate_pages(pagelist, alloc_migration_target, NULL,
1645 (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL);
1646 if (err)
1647 putback_movable_pages(pagelist);
1648 return err;
1649}
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
1661 int node, struct list_head *pagelist, bool migrate_all)
1662{
1663 struct vm_area_struct *vma;
1664 struct page *page;
1665 unsigned int follflags;
1666 int err;
1667
1668 mmap_read_lock(mm);
1669 err = -EFAULT;
1670 vma = find_vma(mm, addr);
1671 if (!vma || addr < vma->vm_start || !vma_migratable(vma))
1672 goto out;
1673
1674
1675 follflags = FOLL_GET | FOLL_DUMP;
1676 page = follow_page(vma, addr, follflags);
1677
1678 err = PTR_ERR(page);
1679 if (IS_ERR(page))
1680 goto out;
1681
1682 err = -ENOENT;
1683 if (!page)
1684 goto out;
1685
1686 err = 0;
1687 if (page_to_nid(page) == node)
1688 goto out_putpage;
1689
1690 err = -EACCES;
1691 if (page_mapcount(page) > 1 && !migrate_all)
1692 goto out_putpage;
1693
1694 if (PageHuge(page)) {
1695 if (PageHead(page)) {
1696 isolate_huge_page(page, pagelist);
1697 err = 1;
1698 }
1699 } else {
1700 struct page *head;
1701
1702 head = compound_head(page);
1703 err = isolate_lru_page(head);
1704 if (err)
1705 goto out_putpage;
1706
1707 err = 1;
1708 list_add_tail(&head->lru, pagelist);
1709 mod_node_page_state(page_pgdat(head),
1710 NR_ISOLATED_ANON + page_is_file_lru(head),
1711 thp_nr_pages(head));
1712 }
1713out_putpage:
1714
1715
1716
1717
1718
1719 put_page(page);
1720out:
1721 mmap_read_unlock(mm);
1722 return err;
1723}
1724
1725static int move_pages_and_store_status(struct mm_struct *mm, int node,
1726 struct list_head *pagelist, int __user *status,
1727 int start, int i, unsigned long nr_pages)
1728{
1729 int err;
1730
1731 if (list_empty(pagelist))
1732 return 0;
1733
1734 err = do_move_pages_to_node(mm, pagelist, node);
1735 if (err) {
1736
1737
1738
1739
1740
1741
1742
1743
1744 if (err > 0)
1745 err += nr_pages - i - 1;
1746 return err;
1747 }
1748 return store_status(status, start, node, i - start);
1749}
1750
1751
1752
1753
1754
1755static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1756 unsigned long nr_pages,
1757 const void __user * __user *pages,
1758 const int __user *nodes,
1759 int __user *status, int flags)
1760{
1761 int current_node = NUMA_NO_NODE;
1762 LIST_HEAD(pagelist);
1763 int start, i;
1764 int err = 0, err1;
1765
1766 migrate_prep();
1767
1768 for (i = start = 0; i < nr_pages; i++) {
1769 const void __user *p;
1770 unsigned long addr;
1771 int node;
1772
1773 err = -EFAULT;
1774 if (get_user(p, pages + i))
1775 goto out_flush;
1776 if (get_user(node, nodes + i))
1777 goto out_flush;
1778 addr = (unsigned long)untagged_addr(p);
1779
1780 err = -ENODEV;
1781 if (node < 0 || node >= MAX_NUMNODES)
1782 goto out_flush;
1783 if (!node_state(node, N_MEMORY))
1784 goto out_flush;
1785
1786 err = -EACCES;
1787 if (!node_isset(node, task_nodes))
1788 goto out_flush;
1789
1790 if (current_node == NUMA_NO_NODE) {
1791 current_node = node;
1792 start = i;
1793 } else if (node != current_node) {
1794 err = move_pages_and_store_status(mm, current_node,
1795 &pagelist, status, start, i, nr_pages);
1796 if (err)
1797 goto out;
1798 start = i;
1799 current_node = node;
1800 }
1801
1802
1803
1804
1805
1806 err = add_page_for_migration(mm, addr, current_node,
1807 &pagelist, flags & MPOL_MF_MOVE_ALL);
1808
1809 if (err > 0) {
1810
1811 continue;
1812 }
1813
1814
1815
1816
1817
1818 err = store_status(status, i, err ? : current_node, 1);
1819 if (err)
1820 goto out_flush;
1821
1822 err = move_pages_and_store_status(mm, current_node, &pagelist,
1823 status, start, i, nr_pages);
1824 if (err)
1825 goto out;
1826 current_node = NUMA_NO_NODE;
1827 }
1828out_flush:
1829
1830 err1 = move_pages_and_store_status(mm, current_node, &pagelist,
1831 status, start, i, nr_pages);
1832 if (err >= 0)
1833 err = err1;
1834out:
1835 return err;
1836}
1837
1838
1839
1840
1841static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1842 const void __user **pages, int *status)
1843{
1844 unsigned long i;
1845
1846 mmap_read_lock(mm);
1847
1848 for (i = 0; i < nr_pages; i++) {
1849 unsigned long addr = (unsigned long)(*pages);
1850 struct vm_area_struct *vma;
1851 struct page *page;
1852 int err = -EFAULT;
1853
1854 vma = find_vma(mm, addr);
1855 if (!vma || addr < vma->vm_start)
1856 goto set_status;
1857
1858
1859 page = follow_page(vma, addr, FOLL_DUMP);
1860
1861 err = PTR_ERR(page);
1862 if (IS_ERR(page))
1863 goto set_status;
1864
1865 err = page ? page_to_nid(page) : -ENOENT;
1866set_status:
1867 *status = err;
1868
1869 pages++;
1870 status++;
1871 }
1872
1873 mmap_read_unlock(mm);
1874}
1875
1876
1877
1878
1879
1880static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1881 const void __user * __user *pages,
1882 int __user *status)
1883{
1884#define DO_PAGES_STAT_CHUNK_NR 16
1885 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1886 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1887
1888 while (nr_pages) {
1889 unsigned long chunk_nr;
1890
1891 chunk_nr = nr_pages;
1892 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1893 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1894
1895 if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
1896 break;
1897
1898 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1899
1900 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1901 break;
1902
1903 pages += chunk_nr;
1904 status += chunk_nr;
1905 nr_pages -= chunk_nr;
1906 }
1907 return nr_pages ? -EFAULT : 0;
1908}
1909
1910static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
1911{
1912 struct task_struct *task;
1913 struct mm_struct *mm;
1914
1915
1916
1917
1918
1919 if (!pid) {
1920 mmget(current->mm);
1921 *mem_nodes = cpuset_mems_allowed(current);
1922 return current->mm;
1923 }
1924
1925
1926 rcu_read_lock();
1927 task = find_task_by_vpid(pid);
1928 if (!task) {
1929 rcu_read_unlock();
1930 return ERR_PTR(-ESRCH);
1931 }
1932 get_task_struct(task);
1933
1934
1935
1936
1937
1938 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
1939 rcu_read_unlock();
1940 mm = ERR_PTR(-EPERM);
1941 goto out;
1942 }
1943 rcu_read_unlock();
1944
1945 mm = ERR_PTR(security_task_movememory(task));
1946 if (IS_ERR(mm))
1947 goto out;
1948 *mem_nodes = cpuset_mems_allowed(task);
1949 mm = get_task_mm(task);
1950out:
1951 put_task_struct(task);
1952 if (!mm)
1953 mm = ERR_PTR(-EINVAL);
1954 return mm;
1955}
1956
1957
1958
1959
1960
1961static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
1962 const void __user * __user *pages,
1963 const int __user *nodes,
1964 int __user *status, int flags)
1965{
1966 struct mm_struct *mm;
1967 int err;
1968 nodemask_t task_nodes;
1969
1970
1971 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
1972 return -EINVAL;
1973
1974 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
1975 return -EPERM;
1976
1977 mm = find_mm_struct(pid, &task_nodes);
1978 if (IS_ERR(mm))
1979 return PTR_ERR(mm);
1980
1981 if (nodes)
1982 err = do_pages_move(mm, task_nodes, nr_pages, pages,
1983 nodes, status, flags);
1984 else
1985 err = do_pages_stat(mm, nr_pages, pages, status);
1986
1987 mmput(mm);
1988 return err;
1989}
1990
1991SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1992 const void __user * __user *, pages,
1993 const int __user *, nodes,
1994 int __user *, status, int, flags)
1995{
1996 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
1997}
1998
1999#ifdef CONFIG_COMPAT
2000COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
2001 compat_uptr_t __user *, pages32,
2002 const int __user *, nodes,
2003 int __user *, status,
2004 int, flags)
2005{
2006 const void __user * __user *pages;
2007 int i;
2008
2009 pages = compat_alloc_user_space(nr_pages * sizeof(void *));
2010 for (i = 0; i < nr_pages; i++) {
2011 compat_uptr_t p;
2012
2013 if (get_user(p, pages32 + i) ||
2014 put_user(compat_ptr(p), pages + i))
2015 return -EFAULT;
2016 }
2017 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
2018}
2019#endif
2020
2021#ifdef CONFIG_NUMA_BALANCING
2022
2023
2024
2025
2026static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
2027 unsigned long nr_migrate_pages)
2028{
2029 int z;
2030
2031 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
2032 struct zone *zone = pgdat->node_zones + z;
2033
2034 if (!populated_zone(zone))
2035 continue;
2036
2037
2038 if (!zone_watermark_ok(zone, 0,
2039 high_wmark_pages(zone) +
2040 nr_migrate_pages,
2041 ZONE_MOVABLE, 0))
2042 continue;
2043 return true;
2044 }
2045 return false;
2046}
2047
2048static struct page *alloc_misplaced_dst_page(struct page *page,
2049 unsigned long data)
2050{
2051 int nid = (int) data;
2052 struct page *newpage;
2053
2054 newpage = __alloc_pages_node(nid,
2055 (GFP_HIGHUSER_MOVABLE |
2056 __GFP_THISNODE | __GFP_NOMEMALLOC |
2057 __GFP_NORETRY | __GFP_NOWARN) &
2058 ~__GFP_RECLAIM, 0);
2059
2060 return newpage;
2061}
2062
2063static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
2064{
2065 int page_lru;
2066
2067 VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
2068
2069
2070 if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
2071 return 0;
2072
2073 if (isolate_lru_page(page))
2074 return 0;
2075
2076
2077
2078
2079
2080
2081
2082
2083 if (PageTransHuge(page) && page_count(page) != 3) {
2084 putback_lru_page(page);
2085 return 0;
2086 }
2087
2088 page_lru = page_is_file_lru(page);
2089 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
2090 thp_nr_pages(page));
2091
2092
2093
2094
2095
2096
2097 put_page(page);
2098 return 1;
2099}
2100
2101bool pmd_trans_migrating(pmd_t pmd)
2102{
2103 struct page *page = pmd_page(pmd);
2104 return PageLocked(page);
2105}
2106
2107static inline bool is_shared_exec_page(struct vm_area_struct *vma,
2108 struct page *page)
2109{
2110 if (page_mapcount(page) != 1 &&
2111 (page_is_file_lru(page) || vma_is_shmem(vma)) &&
2112 (vma->vm_flags & VM_EXEC))
2113 return true;
2114
2115 return false;
2116}
2117
2118
2119
2120
2121
2122
2123int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
2124 int node)
2125{
2126 pg_data_t *pgdat = NODE_DATA(node);
2127 int isolated;
2128 int nr_remaining;
2129 LIST_HEAD(migratepages);
2130
2131
2132
2133
2134
2135 if (is_shared_exec_page(vma, page))
2136 goto out;
2137
2138
2139
2140
2141
2142 if (page_is_file_lru(page) && PageDirty(page))
2143 goto out;
2144
2145 isolated = numamigrate_isolate_page(pgdat, page);
2146 if (!isolated)
2147 goto out;
2148
2149 list_add(&page->lru, &migratepages);
2150 nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
2151 NULL, node, MIGRATE_ASYNC,
2152 MR_NUMA_MISPLACED);
2153 if (nr_remaining) {
2154 if (!list_empty(&migratepages)) {
2155 list_del(&page->lru);
2156 dec_node_page_state(page, NR_ISOLATED_ANON +
2157 page_is_file_lru(page));
2158 putback_lru_page(page);
2159 }
2160 isolated = 0;
2161 } else
2162 count_vm_numa_event(NUMA_PAGE_MIGRATE);
2163 BUG_ON(!list_empty(&migratepages));
2164 return isolated;
2165
2166out:
2167 put_page(page);
2168 return 0;
2169}
2170#endif
2171
2172#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
2173
2174
2175
2176
2177int migrate_misplaced_transhuge_page(struct mm_struct *mm,
2178 struct vm_area_struct *vma,
2179 pmd_t *pmd, pmd_t entry,
2180 unsigned long address,
2181 struct page *page, int node)
2182{
2183 spinlock_t *ptl;
2184 pg_data_t *pgdat = NODE_DATA(node);
2185 int isolated = 0;
2186 struct page *new_page = NULL;
2187 int page_lru = page_is_file_lru(page);
2188 unsigned long start = address & HPAGE_PMD_MASK;
2189
2190 if (is_shared_exec_page(vma, page))
2191 goto out;
2192
2193 new_page = alloc_pages_node(node,
2194 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
2195 HPAGE_PMD_ORDER);
2196 if (!new_page)
2197 goto out_fail;
2198 prep_transhuge_page(new_page);
2199
2200 isolated = numamigrate_isolate_page(pgdat, page);
2201 if (!isolated) {
2202 put_page(new_page);
2203 goto out_fail;
2204 }
2205
2206
2207 __SetPageLocked(new_page);
2208 if (PageSwapBacked(page))
2209 __SetPageSwapBacked(new_page);
2210
2211
2212 new_page->mapping = page->mapping;
2213 new_page->index = page->index;
2214
2215 flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);
2216 migrate_page_copy(new_page, page);
2217 WARN_ON(PageLRU(new_page));
2218
2219
2220 ptl = pmd_lock(mm, pmd);
2221 if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
2222 spin_unlock(ptl);
2223
2224
2225 if (TestClearPageActive(new_page))
2226 SetPageActive(page);
2227 if (TestClearPageUnevictable(new_page))
2228 SetPageUnevictable(page);
2229
2230 unlock_page(new_page);
2231 put_page(new_page);
2232
2233
2234 get_page(page);
2235 putback_lru_page(page);
2236 mod_node_page_state(page_pgdat(page),
2237 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
2238
2239 goto out_unlock;
2240 }
2241
2242 entry = mk_huge_pmd(new_page, vma->vm_page_prot);
2243 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253 page_add_anon_rmap(new_page, vma, start, true);
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265 set_pmd_at(mm, start, pmd, entry);
2266 update_mmu_cache_pmd(vma, address, &entry);
2267
2268 page_ref_unfreeze(page, 2);
2269 mlock_migrate_page(new_page, page);
2270 page_remove_rmap(page, true);
2271 set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
2272
2273 spin_unlock(ptl);
2274
2275
2276 get_page(new_page);
2277 putback_lru_page(new_page);
2278
2279 unlock_page(new_page);
2280 unlock_page(page);
2281 put_page(page);
2282 put_page(page);
2283
2284 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
2285 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
2286
2287 mod_node_page_state(page_pgdat(page),
2288 NR_ISOLATED_ANON + page_lru,
2289 -HPAGE_PMD_NR);
2290 return isolated;
2291
2292out_fail:
2293 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
2294 ptl = pmd_lock(mm, pmd);
2295 if (pmd_same(*pmd, entry)) {
2296 entry = pmd_modify(entry, vma->vm_page_prot);
2297 set_pmd_at(mm, start, pmd, entry);
2298 update_mmu_cache_pmd(vma, address, &entry);
2299 }
2300 spin_unlock(ptl);
2301
2302out_unlock:
2303 unlock_page(page);
2304out:
2305 put_page(page);
2306 return 0;
2307}
2308#endif
2309
2310#endif
2311
2312#ifdef CONFIG_DEVICE_PRIVATE
2313static int migrate_vma_collect_hole(unsigned long start,
2314 unsigned long end,
2315 __always_unused int depth,
2316 struct mm_walk *walk)
2317{
2318 struct migrate_vma *migrate = walk->private;
2319 unsigned long addr;
2320
2321
2322 if (!vma_is_anonymous(walk->vma)) {
2323 for (addr = start; addr < end; addr += PAGE_SIZE) {
2324 migrate->src[migrate->npages] = 0;
2325 migrate->dst[migrate->npages] = 0;
2326 migrate->npages++;
2327 }
2328 return 0;
2329 }
2330
2331 for (addr = start; addr < end; addr += PAGE_SIZE) {
2332 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
2333 migrate->dst[migrate->npages] = 0;
2334 migrate->npages++;
2335 migrate->cpages++;
2336 }
2337
2338 return 0;
2339}
2340
2341static int migrate_vma_collect_skip(unsigned long start,
2342 unsigned long end,
2343 struct mm_walk *walk)
2344{
2345 struct migrate_vma *migrate = walk->private;
2346 unsigned long addr;
2347
2348 for (addr = start; addr < end; addr += PAGE_SIZE) {
2349 migrate->dst[migrate->npages] = 0;
2350 migrate->src[migrate->npages++] = 0;
2351 }
2352
2353 return 0;
2354}
2355
2356static int migrate_vma_collect_pmd(pmd_t *pmdp,
2357 unsigned long start,
2358 unsigned long end,
2359 struct mm_walk *walk)
2360{
2361 struct migrate_vma *migrate = walk->private;
2362 struct vm_area_struct *vma = walk->vma;
2363 struct mm_struct *mm = vma->vm_mm;
2364 unsigned long addr = start, unmapped = 0;
2365 spinlock_t *ptl;
2366 pte_t *ptep;
2367
2368again:
2369 if (pmd_none(*pmdp))
2370 return migrate_vma_collect_hole(start, end, -1, walk);
2371
2372 if (pmd_trans_huge(*pmdp)) {
2373 struct page *page;
2374
2375 ptl = pmd_lock(mm, pmdp);
2376 if (unlikely(!pmd_trans_huge(*pmdp))) {
2377 spin_unlock(ptl);
2378 goto again;
2379 }
2380
2381 page = pmd_page(*pmdp);
2382 if (is_huge_zero_page(page)) {
2383 spin_unlock(ptl);
2384 split_huge_pmd(vma, pmdp, addr);
2385 if (pmd_trans_unstable(pmdp))
2386 return migrate_vma_collect_skip(start, end,
2387 walk);
2388 } else {
2389 int ret;
2390
2391 get_page(page);
2392 spin_unlock(ptl);
2393 if (unlikely(!trylock_page(page)))
2394 return migrate_vma_collect_skip(start, end,
2395 walk);
2396 ret = split_huge_page(page);
2397 unlock_page(page);
2398 put_page(page);
2399 if (ret)
2400 return migrate_vma_collect_skip(start, end,
2401 walk);
2402 if (pmd_none(*pmdp))
2403 return migrate_vma_collect_hole(start, end, -1,
2404 walk);
2405 }
2406 }
2407
2408 if (unlikely(pmd_bad(*pmdp)))
2409 return migrate_vma_collect_skip(start, end, walk);
2410
2411 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2412 arch_enter_lazy_mmu_mode();
2413
2414 for (; addr < end; addr += PAGE_SIZE, ptep++) {
2415 unsigned long mpfn = 0, pfn;
2416 struct page *page;
2417 swp_entry_t entry;
2418 pte_t pte;
2419
2420 pte = *ptep;
2421
2422 if (pte_none(pte)) {
2423 if (vma_is_anonymous(vma)) {
2424 mpfn = MIGRATE_PFN_MIGRATE;
2425 migrate->cpages++;
2426 }
2427 goto next;
2428 }
2429
2430 if (!pte_present(pte)) {
2431
2432
2433
2434
2435
2436 entry = pte_to_swp_entry(pte);
2437 if (!is_device_private_entry(entry))
2438 goto next;
2439
2440 page = device_private_entry_to_page(entry);
2441 if (!(migrate->flags &
2442 MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
2443 page->pgmap->owner != migrate->pgmap_owner)
2444 goto next;
2445
2446 mpfn = migrate_pfn(page_to_pfn(page)) |
2447 MIGRATE_PFN_MIGRATE;
2448 if (is_write_device_private_entry(entry))
2449 mpfn |= MIGRATE_PFN_WRITE;
2450 } else {
2451 if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
2452 goto next;
2453 pfn = pte_pfn(pte);
2454 if (is_zero_pfn(pfn)) {
2455 mpfn = MIGRATE_PFN_MIGRATE;
2456 migrate->cpages++;
2457 goto next;
2458 }
2459 page = vm_normal_page(migrate->vma, addr, pte);
2460 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2461 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2462 }
2463
2464
2465 if (!page || !page->mapping || PageTransCompound(page)) {
2466 mpfn = 0;
2467 goto next;
2468 }
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479 get_page(page);
2480 migrate->cpages++;
2481
2482
2483
2484
2485
2486
2487 if (trylock_page(page)) {
2488 pte_t swp_pte;
2489
2490 mpfn |= MIGRATE_PFN_LOCKED;
2491 ptep_get_and_clear(mm, addr, ptep);
2492
2493
2494 entry = make_migration_entry(page, mpfn &
2495 MIGRATE_PFN_WRITE);
2496 swp_pte = swp_entry_to_pte(entry);
2497 if (pte_present(pte)) {
2498 if (pte_soft_dirty(pte))
2499 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2500 if (pte_uffd_wp(pte))
2501 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2502 } else {
2503 if (pte_swp_soft_dirty(pte))
2504 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2505 if (pte_swp_uffd_wp(pte))
2506 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2507 }
2508 set_pte_at(mm, addr, ptep, swp_pte);
2509
2510
2511
2512
2513
2514
2515 page_remove_rmap(page, false);
2516 put_page(page);
2517
2518 if (pte_present(pte))
2519 unmapped++;
2520 }
2521
2522next:
2523 migrate->dst[migrate->npages] = 0;
2524 migrate->src[migrate->npages++] = mpfn;
2525 }
2526 arch_leave_lazy_mmu_mode();
2527 pte_unmap_unlock(ptep - 1, ptl);
2528
2529
2530 if (unmapped)
2531 flush_tlb_range(walk->vma, start, end);
2532
2533 return 0;
2534}
2535
2536static const struct mm_walk_ops migrate_vma_walk_ops = {
2537 .pmd_entry = migrate_vma_collect_pmd,
2538 .pte_hole = migrate_vma_collect_hole,
2539};
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549static void migrate_vma_collect(struct migrate_vma *migrate)
2550{
2551 struct mmu_notifier_range range;
2552
2553
2554
2555
2556
2557
2558 mmu_notifier_range_init_migrate(&range, 0, migrate->vma,
2559 migrate->vma->vm_mm, migrate->start, migrate->end,
2560 migrate->pgmap_owner);
2561 mmu_notifier_invalidate_range_start(&range);
2562
2563 walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
2564 &migrate_vma_walk_ops, migrate);
2565
2566 mmu_notifier_invalidate_range_end(&range);
2567 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
2568}
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578static bool migrate_vma_check_page(struct page *page)
2579{
2580
2581
2582
2583
2584
2585 int extra = 1;
2586
2587
2588
2589
2590
2591
2592 if (PageCompound(page))
2593 return false;
2594
2595
2596 if (is_zone_device_page(page)) {
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610 return is_device_private_page(page);
2611 }
2612
2613
2614 if (page_mapping(page))
2615 extra += 1 + page_has_private(page);
2616
2617 if ((page_count(page) - extra) > page_mapcount(page))
2618 return false;
2619
2620 return true;
2621}
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632static void migrate_vma_prepare(struct migrate_vma *migrate)
2633{
2634 const unsigned long npages = migrate->npages;
2635 const unsigned long start = migrate->start;
2636 unsigned long addr, i, restore = 0;
2637 bool allow_drain = true;
2638
2639 lru_add_drain();
2640
2641 for (i = 0; (i < npages) && migrate->cpages; i++) {
2642 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2643 bool remap = true;
2644
2645 if (!page)
2646 continue;
2647
2648 if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
2649
2650
2651
2652
2653
2654
2655
2656
2657 if (!trylock_page(page)) {
2658 migrate->src[i] = 0;
2659 migrate->cpages--;
2660 put_page(page);
2661 continue;
2662 }
2663 remap = false;
2664 migrate->src[i] |= MIGRATE_PFN_LOCKED;
2665 }
2666
2667
2668 if (!is_zone_device_page(page)) {
2669 if (!PageLRU(page) && allow_drain) {
2670
2671 lru_add_drain_all();
2672 allow_drain = false;
2673 }
2674
2675 if (isolate_lru_page(page)) {
2676 if (remap) {
2677 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2678 migrate->cpages--;
2679 restore++;
2680 } else {
2681 migrate->src[i] = 0;
2682 unlock_page(page);
2683 migrate->cpages--;
2684 put_page(page);
2685 }
2686 continue;
2687 }
2688
2689
2690 put_page(page);
2691 }
2692
2693 if (!migrate_vma_check_page(page)) {
2694 if (remap) {
2695 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2696 migrate->cpages--;
2697 restore++;
2698
2699 if (!is_zone_device_page(page)) {
2700 get_page(page);
2701 putback_lru_page(page);
2702 }
2703 } else {
2704 migrate->src[i] = 0;
2705 unlock_page(page);
2706 migrate->cpages--;
2707
2708 if (!is_zone_device_page(page))
2709 putback_lru_page(page);
2710 else
2711 put_page(page);
2712 }
2713 }
2714 }
2715
2716 for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
2717 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2718
2719 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2720 continue;
2721
2722 remove_migration_pte(page, migrate->vma, addr, page);
2723
2724 migrate->src[i] = 0;
2725 unlock_page(page);
2726 put_page(page);
2727 restore--;
2728 }
2729}
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742static void migrate_vma_unmap(struct migrate_vma *migrate)
2743{
2744 int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK;
2745 const unsigned long npages = migrate->npages;
2746 const unsigned long start = migrate->start;
2747 unsigned long addr, i, restore = 0;
2748
2749 for (i = 0; i < npages; i++) {
2750 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2751
2752 if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2753 continue;
2754
2755 if (page_mapped(page)) {
2756 try_to_unmap(page, flags);
2757 if (page_mapped(page))
2758 goto restore;
2759 }
2760
2761 if (migrate_vma_check_page(page))
2762 continue;
2763
2764restore:
2765 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2766 migrate->cpages--;
2767 restore++;
2768 }
2769
2770 for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
2771 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2772
2773 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2774 continue;
2775
2776 remove_migration_ptes(page, page, false);
2777
2778 migrate->src[i] = 0;
2779 unlock_page(page);
2780 restore--;
2781
2782 if (is_zone_device_page(page))
2783 put_page(page);
2784 else
2785 putback_lru_page(page);
2786 }
2787}
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853int migrate_vma_setup(struct migrate_vma *args)
2854{
2855 long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
2856
2857 args->start &= PAGE_MASK;
2858 args->end &= PAGE_MASK;
2859 if (!args->vma || is_vm_hugetlb_page(args->vma) ||
2860 (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
2861 return -EINVAL;
2862 if (nr_pages <= 0)
2863 return -EINVAL;
2864 if (args->start < args->vma->vm_start ||
2865 args->start >= args->vma->vm_end)
2866 return -EINVAL;
2867 if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
2868 return -EINVAL;
2869 if (!args->src || !args->dst)
2870 return -EINVAL;
2871
2872 memset(args->src, 0, sizeof(*args->src) * nr_pages);
2873 args->cpages = 0;
2874 args->npages = 0;
2875
2876 migrate_vma_collect(args);
2877
2878 if (args->cpages)
2879 migrate_vma_prepare(args);
2880 if (args->cpages)
2881 migrate_vma_unmap(args);
2882
2883
2884
2885
2886
2887
2888 return 0;
2889
2890}
2891EXPORT_SYMBOL(migrate_vma_setup);
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901static void migrate_vma_insert_page(struct migrate_vma *migrate,
2902 unsigned long addr,
2903 struct page *page,
2904 unsigned long *src)
2905{
2906 struct vm_area_struct *vma = migrate->vma;
2907 struct mm_struct *mm = vma->vm_mm;
2908 bool flush = false;
2909 spinlock_t *ptl;
2910 pte_t entry;
2911 pgd_t *pgdp;
2912 p4d_t *p4dp;
2913 pud_t *pudp;
2914 pmd_t *pmdp;
2915 pte_t *ptep;
2916
2917
2918 if (!vma_is_anonymous(vma))
2919 goto abort;
2920
2921 pgdp = pgd_offset(mm, addr);
2922 p4dp = p4d_alloc(mm, pgdp, addr);
2923 if (!p4dp)
2924 goto abort;
2925 pudp = pud_alloc(mm, p4dp, addr);
2926 if (!pudp)
2927 goto abort;
2928 pmdp = pmd_alloc(mm, pudp, addr);
2929 if (!pmdp)
2930 goto abort;
2931
2932 if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
2933 goto abort;
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945 if (pte_alloc(mm, pmdp))
2946 goto abort;
2947
2948
2949 if (unlikely(pmd_trans_unstable(pmdp)))
2950 goto abort;
2951
2952 if (unlikely(anon_vma_prepare(vma)))
2953 goto abort;
2954 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
2955 goto abort;
2956
2957
2958
2959
2960
2961
2962 __SetPageUptodate(page);
2963
2964 if (is_zone_device_page(page)) {
2965 if (is_device_private_page(page)) {
2966 swp_entry_t swp_entry;
2967
2968 swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
2969 entry = swp_entry_to_pte(swp_entry);
2970 }
2971 } else {
2972 entry = mk_pte(page, vma->vm_page_prot);
2973 if (vma->vm_flags & VM_WRITE)
2974 entry = pte_mkwrite(pte_mkdirty(entry));
2975 }
2976
2977 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2978
2979 if (check_stable_address_space(mm))
2980 goto unlock_abort;
2981
2982 if (pte_present(*ptep)) {
2983 unsigned long pfn = pte_pfn(*ptep);
2984
2985 if (!is_zero_pfn(pfn))
2986 goto unlock_abort;
2987 flush = true;
2988 } else if (!pte_none(*ptep))
2989 goto unlock_abort;
2990
2991
2992
2993
2994
2995 if (userfaultfd_missing(vma))
2996 goto unlock_abort;
2997
2998 inc_mm_counter(mm, MM_ANONPAGES);
2999 page_add_new_anon_rmap(page, vma, addr, false);
3000 if (!is_zone_device_page(page))
3001 lru_cache_add_inactive_or_unevictable(page, vma);
3002 get_page(page);
3003
3004 if (flush) {
3005 flush_cache_page(vma, addr, pte_pfn(*ptep));
3006 ptep_clear_flush_notify(vma, addr, ptep);
3007 set_pte_at_notify(mm, addr, ptep, entry);
3008 update_mmu_cache(vma, addr, ptep);
3009 } else {
3010
3011 set_pte_at(mm, addr, ptep, entry);
3012 update_mmu_cache(vma, addr, ptep);
3013 }
3014
3015 pte_unmap_unlock(ptep, ptl);
3016 *src = MIGRATE_PFN_MIGRATE;
3017 return;
3018
3019unlock_abort:
3020 pte_unmap_unlock(ptep, ptl);
3021abort:
3022 *src &= ~MIGRATE_PFN_MIGRATE;
3023}
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033void migrate_vma_pages(struct migrate_vma *migrate)
3034{
3035 const unsigned long npages = migrate->npages;
3036 const unsigned long start = migrate->start;
3037 struct mmu_notifier_range range;
3038 unsigned long addr, i;
3039 bool notified = false;
3040
3041 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
3042 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
3043 struct page *page = migrate_pfn_to_page(migrate->src[i]);
3044 struct address_space *mapping;
3045 int r;
3046
3047 if (!newpage) {
3048 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
3049 continue;
3050 }
3051
3052 if (!page) {
3053 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
3054 continue;
3055 if (!notified) {
3056 notified = true;
3057
3058 mmu_notifier_range_init_migrate(&range, 0,
3059 migrate->vma, migrate->vma->vm_mm,
3060 addr, migrate->end,
3061 migrate->pgmap_owner);
3062 mmu_notifier_invalidate_range_start(&range);
3063 }
3064 migrate_vma_insert_page(migrate, addr, newpage,
3065 &migrate->src[i]);
3066 continue;
3067 }
3068
3069 mapping = page_mapping(page);
3070
3071 if (is_zone_device_page(newpage)) {
3072 if (is_device_private_page(newpage)) {
3073
3074
3075
3076
3077 if (mapping) {
3078 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
3079 continue;
3080 }
3081 } else {
3082
3083
3084
3085
3086 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
3087 continue;
3088 }
3089 }
3090
3091 r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
3092 if (r != MIGRATEPAGE_SUCCESS)
3093 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
3094 }
3095
3096
3097
3098
3099
3100
3101 if (notified)
3102 mmu_notifier_invalidate_range_only_end(&range);
3103}
3104EXPORT_SYMBOL(migrate_vma_pages);
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117void migrate_vma_finalize(struct migrate_vma *migrate)
3118{
3119 const unsigned long npages = migrate->npages;
3120 unsigned long i;
3121
3122 for (i = 0; i < npages; i++) {
3123 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
3124 struct page *page = migrate_pfn_to_page(migrate->src[i]);
3125
3126 if (!page) {
3127 if (newpage) {
3128 unlock_page(newpage);
3129 put_page(newpage);
3130 }
3131 continue;
3132 }
3133
3134 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
3135 if (newpage) {
3136 unlock_page(newpage);
3137 put_page(newpage);
3138 }
3139 newpage = page;
3140 }
3141
3142 remove_migration_ptes(page, newpage, false);
3143 unlock_page(page);
3144
3145 if (is_zone_device_page(page))
3146 put_page(page);
3147 else
3148 putback_lru_page(page);
3149
3150 if (newpage != page) {
3151 unlock_page(newpage);
3152 if (is_zone_device_page(newpage))
3153 put_page(newpage);
3154 else
3155 putback_lru_page(newpage);
3156 }
3157 }
3158}
3159EXPORT_SYMBOL(migrate_vma_finalize);
3160#endif
3161