1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/migrate.h>
17#include <linux/export.h>
18#include <linux/swap.h>
19#include <linux/swapops.h>
20#include <linux/pagemap.h>
21#include <linux/buffer_head.h>
22#include <linux/mm_inline.h>
23#include <linux/nsproxy.h>
24#include <linux/pagevec.h>
25#include <linux/ksm.h>
26#include <linux/rmap.h>
27#include <linux/topology.h>
28#include <linux/cpu.h>
29#include <linux/cpuset.h>
30#include <linux/writeback.h>
31#include <linux/mempolicy.h>
32#include <linux/vmalloc.h>
33#include <linux/security.h>
34#include <linux/backing-dev.h>
35#include <linux/compaction.h>
36#include <linux/syscalls.h>
37#include <linux/compat.h>
38#include <linux/hugetlb.h>
39#include <linux/hugetlb_cgroup.h>
40#include <linux/gfp.h>
41#include <linux/pfn_t.h>
42#include <linux/memremap.h>
43#include <linux/userfaultfd_k.h>
44#include <linux/balloon_compaction.h>
45#include <linux/mmu_notifier.h>
46#include <linux/page_idle.h>
47#include <linux/page_owner.h>
48#include <linux/sched/mm.h>
49#include <linux/ptrace.h>
50
51#include <asm/tlbflush.h>
52
53#define CREATE_TRACE_POINTS
54#include <trace/events/migrate.h>
55
56#include "internal.h"
57
58
59
60
61
62
63int migrate_prep(void)
64{
65
66
67
68
69
70
71 lru_add_drain_all();
72
73 return 0;
74}
75
76
77int migrate_prep_local(void)
78{
79 lru_add_drain();
80
81 return 0;
82}
83
84int isolate_movable_page(struct page *page, isolate_mode_t mode)
85{
86 struct address_space *mapping;
87
88
89
90
91
92
93
94
95
96
97 if (unlikely(!get_page_unless_zero(page)))
98 goto out;
99
100
101
102
103
104
105 if (unlikely(!__PageMovable(page)))
106 goto out_putpage;
107
108
109
110
111
112
113
114
115
116
117
118 if (unlikely(!trylock_page(page)))
119 goto out_putpage;
120
121 if (!PageMovable(page) || PageIsolated(page))
122 goto out_no_isolated;
123
124 mapping = page_mapping(page);
125 VM_BUG_ON_PAGE(!mapping, page);
126
127 if (!mapping->a_ops->isolate_page(page, mode))
128 goto out_no_isolated;
129
130
131 WARN_ON_ONCE(PageIsolated(page));
132 __SetPageIsolated(page);
133 unlock_page(page);
134
135 return 0;
136
137out_no_isolated:
138 unlock_page(page);
139out_putpage:
140 put_page(page);
141out:
142 return -EBUSY;
143}
144
145
146void putback_movable_page(struct page *page)
147{
148 struct address_space *mapping;
149
150 VM_BUG_ON_PAGE(!PageLocked(page), page);
151 VM_BUG_ON_PAGE(!PageMovable(page), page);
152 VM_BUG_ON_PAGE(!PageIsolated(page), page);
153
154 mapping = page_mapping(page);
155 mapping->a_ops->putback_page(page);
156 __ClearPageIsolated(page);
157}
158
159
160
161
162
163
164
165
166
167void putback_movable_pages(struct list_head *l)
168{
169 struct page *page;
170 struct page *page2;
171
172 list_for_each_entry_safe(page, page2, l, lru) {
173 if (unlikely(PageHuge(page))) {
174 putback_active_hugepage(page);
175 continue;
176 }
177 list_del(&page->lru);
178
179
180
181
182
183 if (unlikely(__PageMovable(page))) {
184 VM_BUG_ON_PAGE(!PageIsolated(page), page);
185 lock_page(page);
186 if (PageMovable(page))
187 putback_movable_page(page);
188 else
189 __ClearPageIsolated(page);
190 unlock_page(page);
191 put_page(page);
192 } else {
193 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
194 page_is_file_cache(page), -hpage_nr_pages(page));
195 putback_lru_page(page);
196 }
197 }
198}
199
200
201
202
203static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
204 unsigned long addr, void *old)
205{
206 struct page_vma_mapped_walk pvmw = {
207 .page = old,
208 .vma = vma,
209 .address = addr,
210 .flags = PVMW_SYNC | PVMW_MIGRATION,
211 };
212 struct page *new;
213 pte_t pte;
214 swp_entry_t entry;
215
216 VM_BUG_ON_PAGE(PageTail(page), page);
217 while (page_vma_mapped_walk(&pvmw)) {
218 if (PageKsm(page))
219 new = page;
220 else
221 new = page - pvmw.page->index +
222 linear_page_index(vma, pvmw.address);
223
224#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
225
226 if (!pvmw.pte) {
227 VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
228 remove_migration_pmd(&pvmw, new);
229 continue;
230 }
231#endif
232
233 get_page(new);
234 pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
235 if (pte_swp_soft_dirty(*pvmw.pte))
236 pte = pte_mksoft_dirty(pte);
237
238
239
240
241 entry = pte_to_swp_entry(*pvmw.pte);
242 if (is_write_migration_entry(entry))
243 pte = maybe_mkwrite(pte, vma);
244
245 if (unlikely(is_zone_device_page(new))) {
246 if (is_device_private_page(new)) {
247 entry = make_device_private_entry(new, pte_write(pte));
248 pte = swp_entry_to_pte(entry);
249 } else if (is_device_public_page(new)) {
250 pte = pte_mkdevmap(pte);
251 flush_dcache_page(new);
252 }
253 } else
254 flush_dcache_page(new);
255
256#ifdef CONFIG_HUGETLB_PAGE
257 if (PageHuge(new)) {
258 pte = pte_mkhuge(pte);
259 pte = arch_make_huge_pte(pte, vma, new, 0);
260 set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
261 if (PageAnon(new))
262 hugepage_add_anon_rmap(new, vma, pvmw.address);
263 else
264 page_dup_rmap(new, true);
265 } else
266#endif
267 {
268 set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
269
270 if (PageAnon(new))
271 page_add_anon_rmap(new, vma, pvmw.address, false);
272 else
273 page_add_file_rmap(new, false);
274 }
275 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
276 mlock_vma_page(new);
277
278
279 update_mmu_cache(vma, pvmw.address, pvmw.pte);
280 }
281
282 return true;
283}
284
285
286
287
288
289void remove_migration_ptes(struct page *old, struct page *new, bool locked)
290{
291 struct rmap_walk_control rwc = {
292 .rmap_one = remove_migration_pte,
293 .arg = old,
294 };
295
296 if (locked)
297 rmap_walk_locked(new, &rwc);
298 else
299 rmap_walk(new, &rwc);
300}
301
302
303
304
305
306
307void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
308 spinlock_t *ptl)
309{
310 pte_t pte;
311 swp_entry_t entry;
312 struct page *page;
313
314 spin_lock(ptl);
315 pte = *ptep;
316 if (!is_swap_pte(pte))
317 goto out;
318
319 entry = pte_to_swp_entry(pte);
320 if (!is_migration_entry(entry))
321 goto out;
322
323 page = migration_entry_to_page(entry);
324
325
326
327
328
329
330 if (!get_page_unless_zero(page))
331 goto out;
332 pte_unmap_unlock(ptep, ptl);
333 put_and_wait_on_page_locked(page);
334 return;
335out:
336 pte_unmap_unlock(ptep, ptl);
337}
338
339void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
340 unsigned long address)
341{
342 spinlock_t *ptl = pte_lockptr(mm, pmd);
343 pte_t *ptep = pte_offset_map(pmd, address);
344 __migration_entry_wait(mm, ptep, ptl);
345}
346
347void migration_entry_wait_huge(struct vm_area_struct *vma,
348 struct mm_struct *mm, pte_t *pte)
349{
350 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
351 __migration_entry_wait(mm, pte, ptl);
352}
353
354#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
355void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
356{
357 spinlock_t *ptl;
358 struct page *page;
359
360 ptl = pmd_lock(mm, pmd);
361 if (!is_pmd_migration_entry(*pmd))
362 goto unlock;
363 page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
364 if (!get_page_unless_zero(page))
365 goto unlock;
366 spin_unlock(ptl);
367 put_and_wait_on_page_locked(page);
368 return;
369unlock:
370 spin_unlock(ptl);
371}
372#endif
373
374#ifdef CONFIG_BLOCK
375
376static bool buffer_migrate_lock_buffers(struct buffer_head *head,
377 enum migrate_mode mode)
378{
379 struct buffer_head *bh = head;
380
381
382 if (mode != MIGRATE_ASYNC) {
383 do {
384 get_bh(bh);
385 lock_buffer(bh);
386 bh = bh->b_this_page;
387
388 } while (bh != head);
389
390 return true;
391 }
392
393
394 do {
395 get_bh(bh);
396 if (!trylock_buffer(bh)) {
397
398
399
400
401 struct buffer_head *failed_bh = bh;
402 put_bh(failed_bh);
403 bh = head;
404 while (bh != failed_bh) {
405 unlock_buffer(bh);
406 put_bh(bh);
407 bh = bh->b_this_page;
408 }
409 return false;
410 }
411
412 bh = bh->b_this_page;
413 } while (bh != head);
414 return true;
415}
416#else
417static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
418 enum migrate_mode mode)
419{
420 return true;
421}
422#endif
423
424
425
426
427
428
429
430
431
432int migrate_page_move_mapping(struct address_space *mapping,
433 struct page *newpage, struct page *page,
434 struct buffer_head *head, enum migrate_mode mode,
435 int extra_count)
436{
437 struct zone *oldzone, *newzone;
438 int dirty;
439 int expected_count = 1 + extra_count;
440 void **pslot;
441
442
443
444
445
446 expected_count += is_device_private_page(page);
447 expected_count += is_device_public_page(page);
448
449 if (!mapping) {
450
451 if (page_count(page) != expected_count)
452 return -EAGAIN;
453
454
455 newpage->index = page->index;
456 newpage->mapping = page->mapping;
457 if (PageSwapBacked(page))
458 __SetPageSwapBacked(newpage);
459
460 return MIGRATEPAGE_SUCCESS;
461 }
462
463 oldzone = page_zone(page);
464 newzone = page_zone(newpage);
465
466 xa_lock_irq(&mapping->i_pages);
467
468 pslot = radix_tree_lookup_slot(&mapping->i_pages,
469 page_index(page));
470
471 expected_count += hpage_nr_pages(page) + page_has_private(page);
472 if (page_count(page) != expected_count ||
473 radix_tree_deref_slot_protected(pslot,
474 &mapping->i_pages.xa_lock) != page) {
475 xa_unlock_irq(&mapping->i_pages);
476 return -EAGAIN;
477 }
478
479 if (!page_ref_freeze(page, expected_count)) {
480 xa_unlock_irq(&mapping->i_pages);
481 return -EAGAIN;
482 }
483
484
485
486
487
488
489
490
491 if (mode == MIGRATE_ASYNC && head &&
492 !buffer_migrate_lock_buffers(head, mode)) {
493 page_ref_unfreeze(page, expected_count);
494 xa_unlock_irq(&mapping->i_pages);
495 return -EAGAIN;
496 }
497
498
499
500
501
502 newpage->index = page->index;
503 newpage->mapping = page->mapping;
504 page_ref_add(newpage, hpage_nr_pages(page));
505 if (PageSwapBacked(page)) {
506 __SetPageSwapBacked(newpage);
507 if (PageSwapCache(page)) {
508 SetPageSwapCache(newpage);
509 set_page_private(newpage, page_private(page));
510 }
511 } else {
512 VM_BUG_ON_PAGE(PageSwapCache(page), page);
513 }
514
515
516 dirty = PageDirty(page);
517 if (dirty) {
518 ClearPageDirty(page);
519 SetPageDirty(newpage);
520 }
521
522 radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
523 if (PageTransHuge(page)) {
524 int i;
525 int index = page_index(page);
526
527 for (i = 1; i < HPAGE_PMD_NR; i++) {
528 pslot = radix_tree_lookup_slot(&mapping->i_pages,
529 index + i);
530 radix_tree_replace_slot(&mapping->i_pages, pslot,
531 newpage + i);
532 }
533 }
534
535
536
537
538
539
540 page_ref_unfreeze(page, expected_count - hpage_nr_pages(page));
541
542 xa_unlock(&mapping->i_pages);
543
544
545
546
547
548
549
550
551
552
553
554
555 if (newzone != oldzone) {
556 __dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
557 __inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
558 if (PageSwapBacked(page) && !PageSwapCache(page)) {
559 __dec_node_state(oldzone->zone_pgdat, NR_SHMEM);
560 __inc_node_state(newzone->zone_pgdat, NR_SHMEM);
561 }
562 if (dirty && mapping_cap_account_dirty(mapping)) {
563 __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
564 __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
565 __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
566 __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
567 }
568 }
569 local_irq_enable();
570
571 return MIGRATEPAGE_SUCCESS;
572}
573EXPORT_SYMBOL(migrate_page_move_mapping);
574
575
576
577
578
579int migrate_huge_page_move_mapping(struct address_space *mapping,
580 struct page *newpage, struct page *page)
581{
582 int expected_count;
583 void **pslot;
584
585 xa_lock_irq(&mapping->i_pages);
586
587 pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
588
589 expected_count = 2 + page_has_private(page);
590 if (page_count(page) != expected_count ||
591 radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
592 xa_unlock_irq(&mapping->i_pages);
593 return -EAGAIN;
594 }
595
596 if (!page_ref_freeze(page, expected_count)) {
597 xa_unlock_irq(&mapping->i_pages);
598 return -EAGAIN;
599 }
600
601 newpage->index = page->index;
602 newpage->mapping = page->mapping;
603
604 get_page(newpage);
605
606 radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
607
608 page_ref_unfreeze(page, expected_count - 1);
609
610 xa_unlock_irq(&mapping->i_pages);
611
612 return MIGRATEPAGE_SUCCESS;
613}
614
615
616
617
618
619
620static void __copy_gigantic_page(struct page *dst, struct page *src,
621 int nr_pages)
622{
623 int i;
624 struct page *dst_base = dst;
625 struct page *src_base = src;
626
627 for (i = 0; i < nr_pages; ) {
628 cond_resched();
629 copy_highpage(dst, src);
630
631 i++;
632 dst = mem_map_next(dst, dst_base, i);
633 src = mem_map_next(src, src_base, i);
634 }
635}
636
637static void copy_huge_page(struct page *dst, struct page *src)
638{
639 int i;
640 int nr_pages;
641
642 if (PageHuge(src)) {
643
644 struct hstate *h = page_hstate(src);
645 nr_pages = pages_per_huge_page(h);
646
647 if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
648 __copy_gigantic_page(dst, src, nr_pages);
649 return;
650 }
651 } else {
652
653 BUG_ON(!PageTransHuge(src));
654 nr_pages = hpage_nr_pages(src);
655 }
656
657 for (i = 0; i < nr_pages; i++) {
658 cond_resched();
659 copy_highpage(dst + i, src + i);
660 }
661}
662
663
664
665
666void migrate_page_states(struct page *newpage, struct page *page)
667{
668 int cpupid;
669
670 if (PageError(page))
671 SetPageError(newpage);
672 if (PageReferenced(page))
673 SetPageReferenced(newpage);
674 if (PageUptodate(page))
675 SetPageUptodate(newpage);
676 if (TestClearPageActive(page)) {
677 VM_BUG_ON_PAGE(PageUnevictable(page), page);
678 SetPageActive(newpage);
679 } else if (TestClearPageUnevictable(page))
680 SetPageUnevictable(newpage);
681 if (PageChecked(page))
682 SetPageChecked(newpage);
683 if (PageMappedToDisk(page))
684 SetPageMappedToDisk(newpage);
685
686
687 if (PageDirty(page))
688 SetPageDirty(newpage);
689
690 if (page_is_young(page))
691 set_page_young(newpage);
692 if (page_is_idle(page))
693 set_page_idle(newpage);
694
695
696
697
698
699 cpupid = page_cpupid_xchg_last(page, -1);
700 page_cpupid_xchg_last(newpage, cpupid);
701
702 ksm_migrate_page(newpage, page);
703
704
705
706
707 if (PageSwapCache(page))
708 ClearPageSwapCache(page);
709 ClearPagePrivate(page);
710 set_page_private(page, 0);
711
712
713
714
715
716 if (PageWriteback(newpage))
717 end_page_writeback(newpage);
718
719 copy_page_owner(page, newpage);
720
721 mem_cgroup_migrate(page, newpage);
722}
723EXPORT_SYMBOL(migrate_page_states);
724
725void migrate_page_copy(struct page *newpage, struct page *page)
726{
727 if (PageHuge(page) || PageTransHuge(page))
728 copy_huge_page(newpage, page);
729 else
730 copy_highpage(newpage, page);
731
732 migrate_page_states(newpage, page);
733}
734EXPORT_SYMBOL(migrate_page_copy);
735
736
737
738
739
740
741
742
743
744
745
746int migrate_page(struct address_space *mapping,
747 struct page *newpage, struct page *page,
748 enum migrate_mode mode)
749{
750 int rc;
751
752 BUG_ON(PageWriteback(page));
753
754 rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
755
756 if (rc != MIGRATEPAGE_SUCCESS)
757 return rc;
758
759 if (mode != MIGRATE_SYNC_NO_COPY)
760 migrate_page_copy(newpage, page);
761 else
762 migrate_page_states(newpage, page);
763 return MIGRATEPAGE_SUCCESS;
764}
765EXPORT_SYMBOL(migrate_page);
766
767#ifdef CONFIG_BLOCK
768
769
770
771
772
773int buffer_migrate_page(struct address_space *mapping,
774 struct page *newpage, struct page *page, enum migrate_mode mode)
775{
776 struct buffer_head *bh, *head;
777 int rc;
778
779 if (!page_has_buffers(page))
780 return migrate_page(mapping, newpage, page, mode);
781
782 head = page_buffers(page);
783
784 rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
785
786 if (rc != MIGRATEPAGE_SUCCESS)
787 return rc;
788
789
790
791
792
793
794 if (mode != MIGRATE_ASYNC)
795 BUG_ON(!buffer_migrate_lock_buffers(head, mode));
796
797 ClearPagePrivate(page);
798 set_page_private(newpage, page_private(page));
799 set_page_private(page, 0);
800 put_page(page);
801 get_page(newpage);
802
803 bh = head;
804 do {
805 set_bh_page(bh, newpage, bh_offset(bh));
806 bh = bh->b_this_page;
807
808 } while (bh != head);
809
810 SetPagePrivate(newpage);
811
812 if (mode != MIGRATE_SYNC_NO_COPY)
813 migrate_page_copy(newpage, page);
814 else
815 migrate_page_states(newpage, page);
816
817 bh = head;
818 do {
819 unlock_buffer(bh);
820 put_bh(bh);
821 bh = bh->b_this_page;
822
823 } while (bh != head);
824
825 return MIGRATEPAGE_SUCCESS;
826}
827EXPORT_SYMBOL(buffer_migrate_page);
828#endif
829
830
831
832
833static int writeout(struct address_space *mapping, struct page *page)
834{
835 struct writeback_control wbc = {
836 .sync_mode = WB_SYNC_NONE,
837 .nr_to_write = 1,
838 .range_start = 0,
839 .range_end = LLONG_MAX,
840 .for_reclaim = 1
841 };
842 int rc;
843
844 if (!mapping->a_ops->writepage)
845
846 return -EINVAL;
847
848 if (!clear_page_dirty_for_io(page))
849
850 return -EAGAIN;
851
852
853
854
855
856
857
858
859
860 remove_migration_ptes(page, page, false);
861
862 rc = mapping->a_ops->writepage(page, &wbc);
863
864 if (rc != AOP_WRITEPAGE_ACTIVATE)
865
866 lock_page(page);
867
868 return (rc < 0) ? -EIO : -EAGAIN;
869}
870
871
872
873
874static int fallback_migrate_page(struct address_space *mapping,
875 struct page *newpage, struct page *page, enum migrate_mode mode)
876{
877 if (PageDirty(page)) {
878
879 switch (mode) {
880 case MIGRATE_SYNC:
881 case MIGRATE_SYNC_NO_COPY:
882 break;
883 default:
884 return -EBUSY;
885 }
886 return writeout(mapping, page);
887 }
888
889
890
891
892
893 if (page_has_private(page) &&
894 !try_to_release_page(page, GFP_KERNEL))
895 return -EAGAIN;
896
897 return migrate_page(mapping, newpage, page, mode);
898}
899
900
901
902
903
904
905
906
907
908
909
910
911static int move_to_new_page(struct page *newpage, struct page *page,
912 enum migrate_mode mode)
913{
914 struct address_space *mapping;
915 int rc = -EAGAIN;
916 bool is_lru = !__PageMovable(page);
917
918 VM_BUG_ON_PAGE(!PageLocked(page), page);
919 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
920
921 mapping = page_mapping(page);
922
923 if (likely(is_lru)) {
924 if (!mapping)
925 rc = migrate_page(mapping, newpage, page, mode);
926 else if (mapping->a_ops->migratepage)
927
928
929
930
931
932
933
934 rc = mapping->a_ops->migratepage(mapping, newpage,
935 page, mode);
936 else
937 rc = fallback_migrate_page(mapping, newpage,
938 page, mode);
939 } else {
940
941
942
943
944 VM_BUG_ON_PAGE(!PageIsolated(page), page);
945 if (!PageMovable(page)) {
946 rc = MIGRATEPAGE_SUCCESS;
947 __ClearPageIsolated(page);
948 goto out;
949 }
950
951 rc = mapping->a_ops->migratepage(mapping, newpage,
952 page, mode);
953 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
954 !PageIsolated(page));
955 }
956
957
958
959
960
961 if (rc == MIGRATEPAGE_SUCCESS) {
962 if (__PageMovable(page)) {
963 VM_BUG_ON_PAGE(!PageIsolated(page), page);
964
965
966
967
968
969 __ClearPageIsolated(page);
970 }
971
972
973
974
975
976
977 if (!PageMappingFlags(page))
978 page->mapping = NULL;
979 }
980out:
981 return rc;
982}
983
984static int __unmap_and_move(struct page *page, struct page *newpage,
985 int force, enum migrate_mode mode)
986{
987 int rc = -EAGAIN;
988 int page_was_mapped = 0;
989 struct anon_vma *anon_vma = NULL;
990 bool is_lru = !__PageMovable(page);
991
992 if (!trylock_page(page)) {
993 if (!force || mode == MIGRATE_ASYNC)
994 goto out;
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009 if (current->flags & PF_MEMALLOC)
1010 goto out;
1011
1012 lock_page(page);
1013 }
1014
1015 if (PageWriteback(page)) {
1016
1017
1018
1019
1020
1021
1022 switch (mode) {
1023 case MIGRATE_SYNC:
1024 case MIGRATE_SYNC_NO_COPY:
1025 break;
1026 default:
1027 rc = -EBUSY;
1028 goto out_unlock;
1029 }
1030 if (!force)
1031 goto out_unlock;
1032 wait_on_page_writeback(page);
1033 }
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049 if (PageAnon(page) && !PageKsm(page))
1050 anon_vma = page_get_anon_vma(page);
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060 if (unlikely(!trylock_page(newpage)))
1061 goto out_unlock;
1062
1063 if (unlikely(!is_lru)) {
1064 rc = move_to_new_page(newpage, page, mode);
1065 goto out_unlock_both;
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 if (!page->mapping) {
1081 VM_BUG_ON_PAGE(PageAnon(page), page);
1082 if (page_has_private(page)) {
1083 try_to_free_buffers(page);
1084 goto out_unlock_both;
1085 }
1086 } else if (page_mapped(page)) {
1087
1088 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
1089 page);
1090 try_to_unmap(page,
1091 TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
1092 page_was_mapped = 1;
1093 }
1094
1095 if (!page_mapped(page))
1096 rc = move_to_new_page(newpage, page, mode);
1097
1098 if (page_was_mapped)
1099 remove_migration_ptes(page,
1100 rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
1101
1102out_unlock_both:
1103 unlock_page(newpage);
1104out_unlock:
1105
1106 if (anon_vma)
1107 put_anon_vma(anon_vma);
1108 unlock_page(page);
1109out:
1110
1111
1112
1113
1114
1115
1116 if (rc == MIGRATEPAGE_SUCCESS) {
1117 if (unlikely(__PageMovable(newpage)))
1118 put_page(newpage);
1119 else
1120 putback_lru_page(newpage);
1121 }
1122
1123 return rc;
1124}
1125
1126
1127
1128
1129
1130#if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM)
1131#define ICE_noinline noinline
1132#else
1133#define ICE_noinline
1134#endif
1135
1136
1137
1138
1139
1140static ICE_noinline int unmap_and_move(new_page_t get_new_page,
1141 free_page_t put_new_page,
1142 unsigned long private, struct page *page,
1143 int force, enum migrate_mode mode,
1144 enum migrate_reason reason)
1145{
1146 int rc = MIGRATEPAGE_SUCCESS;
1147 struct page *newpage;
1148
1149 if (!thp_migration_supported() && PageTransHuge(page))
1150 return -ENOMEM;
1151
1152 newpage = get_new_page(page, private);
1153 if (!newpage)
1154 return -ENOMEM;
1155
1156 if (page_count(page) == 1) {
1157
1158 ClearPageActive(page);
1159 ClearPageUnevictable(page);
1160 if (unlikely(__PageMovable(page))) {
1161 lock_page(page);
1162 if (!PageMovable(page))
1163 __ClearPageIsolated(page);
1164 unlock_page(page);
1165 }
1166 if (put_new_page)
1167 put_new_page(newpage, private);
1168 else
1169 put_page(newpage);
1170 goto out;
1171 }
1172
1173 rc = __unmap_and_move(page, newpage, force, mode);
1174 if (rc == MIGRATEPAGE_SUCCESS)
1175 set_page_owner_migrate_reason(newpage, reason);
1176
1177out:
1178 if (rc != -EAGAIN) {
1179
1180
1181
1182
1183
1184
1185 list_del(&page->lru);
1186
1187
1188
1189
1190
1191
1192 if (likely(!__PageMovable(page)))
1193 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
1194 page_is_file_cache(page), -hpage_nr_pages(page));
1195 }
1196
1197
1198
1199
1200
1201
1202 if (rc == MIGRATEPAGE_SUCCESS) {
1203 put_page(page);
1204 if (reason == MR_MEMORY_FAILURE) {
1205
1206
1207
1208
1209
1210 if (!test_set_page_hwpoison(page))
1211 num_poisoned_pages_inc();
1212 }
1213 } else {
1214 if (rc != -EAGAIN) {
1215 if (likely(!__PageMovable(page))) {
1216 putback_lru_page(page);
1217 goto put_new;
1218 }
1219
1220 lock_page(page);
1221 if (PageMovable(page))
1222 putback_movable_page(page);
1223 else
1224 __ClearPageIsolated(page);
1225 unlock_page(page);
1226 put_page(page);
1227 }
1228put_new:
1229 if (put_new_page)
1230 put_new_page(newpage, private);
1231 else
1232 put_page(newpage);
1233 }
1234
1235 return rc;
1236}
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256static int unmap_and_move_huge_page(new_page_t get_new_page,
1257 free_page_t put_new_page, unsigned long private,
1258 struct page *hpage, int force,
1259 enum migrate_mode mode, int reason)
1260{
1261 int rc = -EAGAIN;
1262 int page_was_mapped = 0;
1263 struct page *new_hpage;
1264 struct anon_vma *anon_vma = NULL;
1265
1266
1267
1268
1269
1270
1271
1272
1273 if (!hugepage_migration_supported(page_hstate(hpage))) {
1274 putback_active_hugepage(hpage);
1275 return -ENOSYS;
1276 }
1277
1278 new_hpage = get_new_page(hpage, private);
1279 if (!new_hpage)
1280 return -ENOMEM;
1281
1282 if (!trylock_page(hpage)) {
1283 if (!force)
1284 goto out;
1285 switch (mode) {
1286 case MIGRATE_SYNC:
1287 case MIGRATE_SYNC_NO_COPY:
1288 break;
1289 default:
1290 goto out;
1291 }
1292 lock_page(hpage);
1293 }
1294
1295 if (PageAnon(hpage))
1296 anon_vma = page_get_anon_vma(hpage);
1297
1298 if (unlikely(!trylock_page(new_hpage)))
1299 goto put_anon;
1300
1301 if (page_mapped(hpage)) {
1302 try_to_unmap(hpage,
1303 TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
1304 page_was_mapped = 1;
1305 }
1306
1307 if (!page_mapped(hpage))
1308 rc = move_to_new_page(new_hpage, hpage, mode);
1309
1310 if (page_was_mapped)
1311 remove_migration_ptes(hpage,
1312 rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
1313
1314 unlock_page(new_hpage);
1315
1316put_anon:
1317 if (anon_vma)
1318 put_anon_vma(anon_vma);
1319
1320 if (rc == MIGRATEPAGE_SUCCESS) {
1321 move_hugetlb_state(hpage, new_hpage, reason);
1322 put_new_page = NULL;
1323 }
1324
1325 unlock_page(hpage);
1326out:
1327 if (rc != -EAGAIN)
1328 putback_active_hugepage(hpage);
1329 if (reason == MR_MEMORY_FAILURE && !test_set_page_hwpoison(hpage))
1330 num_poisoned_pages_inc();
1331
1332
1333
1334
1335
1336
1337 if (put_new_page)
1338 put_new_page(new_hpage, private);
1339 else
1340 putback_active_hugepage(new_hpage);
1341
1342 return rc;
1343}
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366int migrate_pages(struct list_head *from, new_page_t get_new_page,
1367 free_page_t put_new_page, unsigned long private,
1368 enum migrate_mode mode, int reason)
1369{
1370 int retry = 1;
1371 int nr_failed = 0;
1372 int nr_succeeded = 0;
1373 int pass = 0;
1374 struct page *page;
1375 struct page *page2;
1376 int swapwrite = current->flags & PF_SWAPWRITE;
1377 int rc;
1378
1379 if (!swapwrite)
1380 current->flags |= PF_SWAPWRITE;
1381
1382 for(pass = 0; pass < 10 && retry; pass++) {
1383 retry = 0;
1384
1385 list_for_each_entry_safe(page, page2, from, lru) {
1386retry:
1387 cond_resched();
1388
1389 if (PageHuge(page))
1390 rc = unmap_and_move_huge_page(get_new_page,
1391 put_new_page, private, page,
1392 pass > 2, mode, reason);
1393 else
1394 rc = unmap_and_move(get_new_page, put_new_page,
1395 private, page, pass > 2, mode,
1396 reason);
1397
1398 switch(rc) {
1399 case -ENOMEM:
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411 if (PageTransHuge(page)) {
1412 lock_page(page);
1413 rc = split_huge_page_to_list(page, from);
1414 unlock_page(page);
1415 if (!rc) {
1416 list_safe_reset_next(page, page2, lru);
1417 goto retry;
1418 }
1419 }
1420 nr_failed++;
1421 goto out;
1422 case -EAGAIN:
1423 retry++;
1424 break;
1425 case MIGRATEPAGE_SUCCESS:
1426 nr_succeeded++;
1427 break;
1428 default:
1429
1430
1431
1432
1433
1434
1435 nr_failed++;
1436 break;
1437 }
1438 }
1439 }
1440 nr_failed += retry;
1441 rc = nr_failed;
1442out:
1443 if (nr_succeeded)
1444 count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
1445 if (nr_failed)
1446 count_vm_events(PGMIGRATE_FAIL, nr_failed);
1447 trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
1448
1449 if (!swapwrite)
1450 current->flags &= ~PF_SWAPWRITE;
1451
1452 return rc;
1453}
1454
1455#ifdef CONFIG_NUMA
1456
1457static int store_status(int __user *status, int start, int value, int nr)
1458{
1459 while (nr-- > 0) {
1460 if (put_user(value, status + start))
1461 return -EFAULT;
1462 start++;
1463 }
1464
1465 return 0;
1466}
1467
1468static int do_move_pages_to_node(struct mm_struct *mm,
1469 struct list_head *pagelist, int node)
1470{
1471 int err;
1472
1473 if (list_empty(pagelist))
1474 return 0;
1475
1476 err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
1477 MIGRATE_SYNC, MR_SYSCALL);
1478 if (err)
1479 putback_movable_pages(pagelist);
1480 return err;
1481}
1482
1483
1484
1485
1486
1487
1488
1489
1490static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
1491 int node, struct list_head *pagelist, bool migrate_all)
1492{
1493 struct vm_area_struct *vma;
1494 struct page *page;
1495 unsigned int follflags;
1496 int err;
1497
1498 down_read(&mm->mmap_sem);
1499 err = -EFAULT;
1500 vma = find_vma(mm, addr);
1501 if (!vma || addr < vma->vm_start || !vma_migratable(vma))
1502 goto out;
1503
1504
1505 follflags = FOLL_GET | FOLL_DUMP;
1506 page = follow_page(vma, addr, follflags);
1507
1508 err = PTR_ERR(page);
1509 if (IS_ERR(page))
1510 goto out;
1511
1512 err = -ENOENT;
1513 if (!page)
1514 goto out;
1515
1516 err = 0;
1517 if (page_to_nid(page) == node)
1518 goto out_putpage;
1519
1520 err = -EACCES;
1521 if (page_mapcount(page) > 1 && !migrate_all)
1522 goto out_putpage;
1523
1524 if (PageHuge(page)) {
1525 if (PageHead(page)) {
1526 isolate_huge_page(page, pagelist);
1527 err = 0;
1528 }
1529 } else {
1530 struct page *head;
1531
1532 head = compound_head(page);
1533 err = isolate_lru_page(head);
1534 if (err)
1535 goto out_putpage;
1536
1537 err = 0;
1538 list_add_tail(&head->lru, pagelist);
1539 mod_node_page_state(page_pgdat(head),
1540 NR_ISOLATED_ANON + page_is_file_cache(head),
1541 hpage_nr_pages(head));
1542 }
1543out_putpage:
1544
1545
1546
1547
1548
1549 put_page(page);
1550out:
1551 up_read(&mm->mmap_sem);
1552 return err;
1553}
1554
1555
1556
1557
1558
1559static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1560 unsigned long nr_pages,
1561 const void __user * __user *pages,
1562 const int __user *nodes,
1563 int __user *status, int flags)
1564{
1565 int current_node = NUMA_NO_NODE;
1566 LIST_HEAD(pagelist);
1567 int start, i;
1568 int err = 0, err1;
1569
1570 migrate_prep();
1571
1572 for (i = start = 0; i < nr_pages; i++) {
1573 const void __user *p;
1574 unsigned long addr;
1575 int node;
1576
1577 err = -EFAULT;
1578 if (get_user(p, pages + i))
1579 goto out_flush;
1580 if (get_user(node, nodes + i))
1581 goto out_flush;
1582 addr = (unsigned long)p;
1583
1584 err = -ENODEV;
1585 if (node < 0 || node >= MAX_NUMNODES)
1586 goto out_flush;
1587 if (!node_state(node, N_MEMORY))
1588 goto out_flush;
1589
1590 err = -EACCES;
1591 if (!node_isset(node, task_nodes))
1592 goto out_flush;
1593
1594 if (current_node == NUMA_NO_NODE) {
1595 current_node = node;
1596 start = i;
1597 } else if (node != current_node) {
1598 err = do_move_pages_to_node(mm, &pagelist, current_node);
1599 if (err)
1600 goto out;
1601 err = store_status(status, start, current_node, i - start);
1602 if (err)
1603 goto out;
1604 start = i;
1605 current_node = node;
1606 }
1607
1608
1609
1610
1611
1612 err = add_page_for_migration(mm, addr, current_node,
1613 &pagelist, flags & MPOL_MF_MOVE_ALL);
1614 if (!err)
1615 continue;
1616
1617 err = store_status(status, i, err, 1);
1618 if (err)
1619 goto out_flush;
1620
1621 err = do_move_pages_to_node(mm, &pagelist, current_node);
1622 if (err)
1623 goto out;
1624 if (i > start) {
1625 err = store_status(status, start, current_node, i - start);
1626 if (err)
1627 goto out;
1628 }
1629 current_node = NUMA_NO_NODE;
1630 }
1631out_flush:
1632 if (list_empty(&pagelist))
1633 return err;
1634
1635
1636 err1 = do_move_pages_to_node(mm, &pagelist, current_node);
1637 if (!err1)
1638 err1 = store_status(status, start, current_node, i - start);
1639 if (!err)
1640 err = err1;
1641out:
1642 return err;
1643}
1644
1645
1646
1647
1648static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1649 const void __user **pages, int *status)
1650{
1651 unsigned long i;
1652
1653 down_read(&mm->mmap_sem);
1654
1655 for (i = 0; i < nr_pages; i++) {
1656 unsigned long addr = (unsigned long)(*pages);
1657 struct vm_area_struct *vma;
1658 struct page *page;
1659 int err = -EFAULT;
1660
1661 vma = find_vma(mm, addr);
1662 if (!vma || addr < vma->vm_start)
1663 goto set_status;
1664
1665
1666 page = follow_page(vma, addr, FOLL_DUMP);
1667
1668 err = PTR_ERR(page);
1669 if (IS_ERR(page))
1670 goto set_status;
1671
1672 err = page ? page_to_nid(page) : -ENOENT;
1673set_status:
1674 *status = err;
1675
1676 pages++;
1677 status++;
1678 }
1679
1680 up_read(&mm->mmap_sem);
1681}
1682
1683
1684
1685
1686
1687static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1688 const void __user * __user *pages,
1689 int __user *status)
1690{
1691#define DO_PAGES_STAT_CHUNK_NR 16
1692 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1693 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1694
1695 while (nr_pages) {
1696 unsigned long chunk_nr;
1697
1698 chunk_nr = nr_pages;
1699 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1700 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1701
1702 if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
1703 break;
1704
1705 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1706
1707 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1708 break;
1709
1710 pages += chunk_nr;
1711 status += chunk_nr;
1712 nr_pages -= chunk_nr;
1713 }
1714 return nr_pages ? -EFAULT : 0;
1715}
1716
1717
1718
1719
1720
1721static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
1722 const void __user * __user *pages,
1723 const int __user *nodes,
1724 int __user *status, int flags)
1725{
1726 struct task_struct *task;
1727 struct mm_struct *mm;
1728 int err;
1729 nodemask_t task_nodes;
1730
1731
1732 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
1733 return -EINVAL;
1734
1735 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
1736 return -EPERM;
1737
1738
1739 rcu_read_lock();
1740 task = pid ? find_task_by_vpid(pid) : current;
1741 if (!task) {
1742 rcu_read_unlock();
1743 return -ESRCH;
1744 }
1745 get_task_struct(task);
1746
1747
1748
1749
1750
1751 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
1752 rcu_read_unlock();
1753 err = -EPERM;
1754 goto out;
1755 }
1756 rcu_read_unlock();
1757
1758 err = security_task_movememory(task);
1759 if (err)
1760 goto out;
1761
1762 task_nodes = cpuset_mems_allowed(task);
1763 mm = get_task_mm(task);
1764 put_task_struct(task);
1765
1766 if (!mm)
1767 return -EINVAL;
1768
1769 if (nodes)
1770 err = do_pages_move(mm, task_nodes, nr_pages, pages,
1771 nodes, status, flags);
1772 else
1773 err = do_pages_stat(mm, nr_pages, pages, status);
1774
1775 mmput(mm);
1776 return err;
1777
1778out:
1779 put_task_struct(task);
1780 return err;
1781}
1782
1783SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1784 const void __user * __user *, pages,
1785 const int __user *, nodes,
1786 int __user *, status, int, flags)
1787{
1788 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
1789}
1790
1791#ifdef CONFIG_COMPAT
1792COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
1793 compat_uptr_t __user *, pages32,
1794 const int __user *, nodes,
1795 int __user *, status,
1796 int, flags)
1797{
1798 const void __user * __user *pages;
1799 int i;
1800
1801 pages = compat_alloc_user_space(nr_pages * sizeof(void *));
1802 for (i = 0; i < nr_pages; i++) {
1803 compat_uptr_t p;
1804
1805 if (get_user(p, pages32 + i) ||
1806 put_user(compat_ptr(p), pages + i))
1807 return -EFAULT;
1808 }
1809 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
1810}
1811#endif
1812
1813#ifdef CONFIG_NUMA_BALANCING
1814
1815
1816
1817
1818static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
1819 unsigned long nr_migrate_pages)
1820{
1821 int z;
1822
1823 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
1824 struct zone *zone = pgdat->node_zones + z;
1825
1826 if (!populated_zone(zone))
1827 continue;
1828
1829
1830 if (!zone_watermark_ok(zone, 0,
1831 high_wmark_pages(zone) +
1832 nr_migrate_pages,
1833 0, 0))
1834 continue;
1835 return true;
1836 }
1837 return false;
1838}
1839
1840static struct page *alloc_misplaced_dst_page(struct page *page,
1841 unsigned long data)
1842{
1843 int nid = (int) data;
1844 struct page *newpage;
1845
1846 newpage = __alloc_pages_node(nid,
1847 (GFP_HIGHUSER_MOVABLE |
1848 __GFP_THISNODE | __GFP_NOMEMALLOC |
1849 __GFP_NORETRY | __GFP_NOWARN) &
1850 ~__GFP_RECLAIM, 0);
1851
1852 return newpage;
1853}
1854
1855
1856
1857
1858
1859
1860static unsigned int migrate_interval_millisecs __read_mostly = 100;
1861static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
1862
1863
1864static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
1865 unsigned long nr_pages)
1866{
1867
1868
1869
1870
1871
1872 if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1873 spin_lock(&pgdat->numabalancing_migrate_lock);
1874 pgdat->numabalancing_migrate_nr_pages = 0;
1875 pgdat->numabalancing_migrate_next_window = jiffies +
1876 msecs_to_jiffies(migrate_interval_millisecs);
1877 spin_unlock(&pgdat->numabalancing_migrate_lock);
1878 }
1879 if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
1880 trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
1881 nr_pages);
1882 return true;
1883 }
1884
1885
1886
1887
1888
1889
1890
1891 pgdat->numabalancing_migrate_nr_pages += nr_pages;
1892 return false;
1893}
1894
1895static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1896{
1897 int page_lru;
1898
1899 VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
1900
1901
1902 if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
1903 return 0;
1904
1905 if (isolate_lru_page(page))
1906 return 0;
1907
1908
1909
1910
1911
1912
1913
1914
1915 if (PageTransHuge(page) && page_count(page) != 3) {
1916 putback_lru_page(page);
1917 return 0;
1918 }
1919
1920 page_lru = page_is_file_cache(page);
1921 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
1922 hpage_nr_pages(page));
1923
1924
1925
1926
1927
1928
1929 put_page(page);
1930 return 1;
1931}
1932
1933bool pmd_trans_migrating(pmd_t pmd)
1934{
1935 struct page *page = pmd_page(pmd);
1936 return PageLocked(page);
1937}
1938
1939
1940
1941
1942
1943
1944int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
1945 int node)
1946{
1947 pg_data_t *pgdat = NODE_DATA(node);
1948 int isolated;
1949 int nr_remaining;
1950 LIST_HEAD(migratepages);
1951
1952
1953
1954
1955
1956 if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
1957 (vma->vm_flags & VM_EXEC))
1958 goto out;
1959
1960
1961
1962
1963
1964 if (page_is_file_cache(page) && PageDirty(page))
1965 goto out;
1966
1967
1968
1969
1970
1971
1972 if (numamigrate_update_ratelimit(pgdat, 1))
1973 goto out;
1974
1975 isolated = numamigrate_isolate_page(pgdat, page);
1976 if (!isolated)
1977 goto out;
1978
1979 list_add(&page->lru, &migratepages);
1980 nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
1981 NULL, node, MIGRATE_ASYNC,
1982 MR_NUMA_MISPLACED);
1983 if (nr_remaining) {
1984 if (!list_empty(&migratepages)) {
1985 list_del(&page->lru);
1986 dec_node_page_state(page, NR_ISOLATED_ANON +
1987 page_is_file_cache(page));
1988 putback_lru_page(page);
1989 }
1990 isolated = 0;
1991 } else
1992 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1993 BUG_ON(!list_empty(&migratepages));
1994 return isolated;
1995
1996out:
1997 put_page(page);
1998 return 0;
1999}
2000#endif
2001
2002#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
2003
2004
2005
2006
2007int migrate_misplaced_transhuge_page(struct mm_struct *mm,
2008 struct vm_area_struct *vma,
2009 pmd_t *pmd, pmd_t entry,
2010 unsigned long address,
2011 struct page *page, int node)
2012{
2013 spinlock_t *ptl;
2014 pg_data_t *pgdat = NODE_DATA(node);
2015 int isolated = 0;
2016 struct page *new_page = NULL;
2017 int page_lru = page_is_file_cache(page);
2018 unsigned long mmun_start = address & HPAGE_PMD_MASK;
2019 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
2020
2021
2022
2023
2024
2025
2026 if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
2027 goto out_dropref;
2028
2029 new_page = alloc_pages_node(node,
2030 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
2031 HPAGE_PMD_ORDER);
2032 if (!new_page)
2033 goto out_fail;
2034 prep_transhuge_page(new_page);
2035
2036 isolated = numamigrate_isolate_page(pgdat, page);
2037 if (!isolated) {
2038 put_page(new_page);
2039 goto out_fail;
2040 }
2041
2042
2043 __SetPageLocked(new_page);
2044 if (PageSwapBacked(page))
2045 __SetPageSwapBacked(new_page);
2046
2047
2048 new_page->mapping = page->mapping;
2049 new_page->index = page->index;
2050 migrate_page_copy(new_page, page);
2051 WARN_ON(PageLRU(new_page));
2052
2053
2054 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2055 ptl = pmd_lock(mm, pmd);
2056 if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
2057 spin_unlock(ptl);
2058 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2059
2060
2061 if (TestClearPageActive(new_page))
2062 SetPageActive(page);
2063 if (TestClearPageUnevictable(new_page))
2064 SetPageUnevictable(page);
2065
2066 unlock_page(new_page);
2067 put_page(new_page);
2068
2069
2070 get_page(page);
2071 putback_lru_page(page);
2072 mod_node_page_state(page_pgdat(page),
2073 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
2074
2075 goto out_unlock;
2076 }
2077
2078 entry = mk_huge_pmd(new_page, vma->vm_page_prot);
2079 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
2080
2081
2082
2083
2084
2085
2086
2087
2088 flush_cache_range(vma, mmun_start, mmun_end);
2089 page_add_anon_rmap(new_page, vma, mmun_start, true);
2090 pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
2091 set_pmd_at(mm, mmun_start, pmd, entry);
2092 update_mmu_cache_pmd(vma, address, &entry);
2093
2094 page_ref_unfreeze(page, 2);
2095 mlock_migrate_page(new_page, page);
2096 page_remove_rmap(page, true);
2097 set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
2098
2099 spin_unlock(ptl);
2100
2101
2102
2103
2104 mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);
2105
2106
2107 get_page(new_page);
2108 putback_lru_page(new_page);
2109
2110 unlock_page(new_page);
2111 unlock_page(page);
2112 put_page(page);
2113 put_page(page);
2114
2115 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
2116 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
2117
2118 mod_node_page_state(page_pgdat(page),
2119 NR_ISOLATED_ANON + page_lru,
2120 -HPAGE_PMD_NR);
2121 return isolated;
2122
2123out_fail:
2124 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
2125out_dropref:
2126 ptl = pmd_lock(mm, pmd);
2127 if (pmd_same(*pmd, entry)) {
2128 entry = pmd_modify(entry, vma->vm_page_prot);
2129 set_pmd_at(mm, mmun_start, pmd, entry);
2130 update_mmu_cache_pmd(vma, address, &entry);
2131 }
2132 spin_unlock(ptl);
2133
2134out_unlock:
2135 unlock_page(page);
2136 put_page(page);
2137 return 0;
2138}
2139#endif
2140
2141#endif
2142
2143#if defined(CONFIG_MIGRATE_VMA_HELPER)
2144struct migrate_vma {
2145 struct vm_area_struct *vma;
2146 unsigned long *dst;
2147 unsigned long *src;
2148 unsigned long cpages;
2149 unsigned long npages;
2150 unsigned long start;
2151 unsigned long end;
2152};
2153
2154static int migrate_vma_collect_hole(unsigned long start,
2155 unsigned long end,
2156 struct mm_walk *walk)
2157{
2158 struct migrate_vma *migrate = walk->private;
2159 unsigned long addr;
2160
2161 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
2162 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
2163 migrate->dst[migrate->npages] = 0;
2164 migrate->npages++;
2165 migrate->cpages++;
2166 }
2167
2168 return 0;
2169}
2170
2171static int migrate_vma_collect_skip(unsigned long start,
2172 unsigned long end,
2173 struct mm_walk *walk)
2174{
2175 struct migrate_vma *migrate = walk->private;
2176 unsigned long addr;
2177
2178 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
2179 migrate->dst[migrate->npages] = 0;
2180 migrate->src[migrate->npages++] = 0;
2181 }
2182
2183 return 0;
2184}
2185
2186static int migrate_vma_collect_pmd(pmd_t *pmdp,
2187 unsigned long start,
2188 unsigned long end,
2189 struct mm_walk *walk)
2190{
2191 struct migrate_vma *migrate = walk->private;
2192 struct vm_area_struct *vma = walk->vma;
2193 struct mm_struct *mm = vma->vm_mm;
2194 unsigned long addr = start, unmapped = 0;
2195 spinlock_t *ptl;
2196 pte_t *ptep;
2197
2198again:
2199 if (pmd_none(*pmdp))
2200 return migrate_vma_collect_hole(start, end, walk);
2201
2202 if (pmd_trans_huge(*pmdp)) {
2203 struct page *page;
2204
2205 ptl = pmd_lock(mm, pmdp);
2206 if (unlikely(!pmd_trans_huge(*pmdp))) {
2207 spin_unlock(ptl);
2208 goto again;
2209 }
2210
2211 page = pmd_page(*pmdp);
2212 if (is_huge_zero_page(page)) {
2213 spin_unlock(ptl);
2214 split_huge_pmd(vma, pmdp, addr);
2215 if (pmd_trans_unstable(pmdp))
2216 return migrate_vma_collect_skip(start, end,
2217 walk);
2218 } else {
2219 int ret;
2220
2221 get_page(page);
2222 spin_unlock(ptl);
2223 if (unlikely(!trylock_page(page)))
2224 return migrate_vma_collect_skip(start, end,
2225 walk);
2226 ret = split_huge_page(page);
2227 unlock_page(page);
2228 put_page(page);
2229 if (ret)
2230 return migrate_vma_collect_skip(start, end,
2231 walk);
2232 if (pmd_none(*pmdp))
2233 return migrate_vma_collect_hole(start, end,
2234 walk);
2235 }
2236 }
2237
2238 if (unlikely(pmd_bad(*pmdp)))
2239 return migrate_vma_collect_skip(start, end, walk);
2240
2241 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2242 arch_enter_lazy_mmu_mode();
2243
2244 for (; addr < end; addr += PAGE_SIZE, ptep++) {
2245 unsigned long mpfn, pfn;
2246 struct page *page;
2247 swp_entry_t entry;
2248 pte_t pte;
2249
2250 pte = *ptep;
2251 pfn = pte_pfn(pte);
2252
2253 if (pte_none(pte)) {
2254 mpfn = MIGRATE_PFN_MIGRATE;
2255 migrate->cpages++;
2256 pfn = 0;
2257 goto next;
2258 }
2259
2260 if (!pte_present(pte)) {
2261 mpfn = pfn = 0;
2262
2263
2264
2265
2266
2267
2268 entry = pte_to_swp_entry(pte);
2269 if (!is_device_private_entry(entry))
2270 goto next;
2271
2272 page = device_private_entry_to_page(entry);
2273 mpfn = migrate_pfn(page_to_pfn(page))|
2274 MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
2275 if (is_write_device_private_entry(entry))
2276 mpfn |= MIGRATE_PFN_WRITE;
2277 } else {
2278 if (is_zero_pfn(pfn)) {
2279 mpfn = MIGRATE_PFN_MIGRATE;
2280 migrate->cpages++;
2281 pfn = 0;
2282 goto next;
2283 }
2284 page = _vm_normal_page(migrate->vma, addr, pte, true);
2285 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2286 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2287 }
2288
2289
2290 if (!page || !page->mapping || PageTransCompound(page)) {
2291 mpfn = pfn = 0;
2292 goto next;
2293 }
2294 pfn = page_to_pfn(page);
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305 get_page(page);
2306 migrate->cpages++;
2307
2308
2309
2310
2311
2312
2313 if (trylock_page(page)) {
2314 pte_t swp_pte;
2315
2316 mpfn |= MIGRATE_PFN_LOCKED;
2317 ptep_get_and_clear(mm, addr, ptep);
2318
2319
2320 entry = make_migration_entry(page, mpfn &
2321 MIGRATE_PFN_WRITE);
2322 swp_pte = swp_entry_to_pte(entry);
2323 if (pte_soft_dirty(pte))
2324 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2325 set_pte_at(mm, addr, ptep, swp_pte);
2326
2327
2328
2329
2330
2331
2332 page_remove_rmap(page, false);
2333 put_page(page);
2334
2335 if (pte_present(pte))
2336 unmapped++;
2337 }
2338
2339next:
2340 migrate->dst[migrate->npages] = 0;
2341 migrate->src[migrate->npages++] = mpfn;
2342 }
2343 arch_leave_lazy_mmu_mode();
2344 pte_unmap_unlock(ptep - 1, ptl);
2345
2346
2347 if (unmapped)
2348 flush_tlb_range(walk->vma, start, end);
2349
2350 return 0;
2351}
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361static void migrate_vma_collect(struct migrate_vma *migrate)
2362{
2363 struct mm_walk mm_walk;
2364
2365 mm_walk.pmd_entry = migrate_vma_collect_pmd;
2366 mm_walk.pte_entry = NULL;
2367 mm_walk.pte_hole = migrate_vma_collect_hole;
2368 mm_walk.hugetlb_entry = NULL;
2369 mm_walk.test_walk = NULL;
2370 mm_walk.vma = migrate->vma;
2371 mm_walk.mm = migrate->vma->vm_mm;
2372 mm_walk.private = migrate;
2373
2374 mmu_notifier_invalidate_range_start(mm_walk.mm,
2375 migrate->start,
2376 migrate->end);
2377 walk_page_range(migrate->start, migrate->end, &mm_walk);
2378 mmu_notifier_invalidate_range_end(mm_walk.mm,
2379 migrate->start,
2380 migrate->end);
2381
2382 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
2383}
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393static bool migrate_vma_check_page(struct page *page)
2394{
2395
2396
2397
2398
2399
2400 int extra = 1;
2401
2402
2403
2404
2405
2406
2407 if (PageCompound(page))
2408 return false;
2409
2410
2411 if (is_zone_device_page(page)) {
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425 if (is_device_private_page(page))
2426 return true;
2427
2428
2429
2430
2431
2432 if (!is_device_public_page(page))
2433 return false;
2434 extra++;
2435 }
2436
2437
2438 if (page_mapping(page))
2439 extra += 1 + page_has_private(page);
2440
2441 if ((page_count(page) - extra) > page_mapcount(page))
2442 return false;
2443
2444 return true;
2445}
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456static void migrate_vma_prepare(struct migrate_vma *migrate)
2457{
2458 const unsigned long npages = migrate->npages;
2459 const unsigned long start = migrate->start;
2460 unsigned long addr, i, restore = 0;
2461 bool allow_drain = true;
2462
2463 lru_add_drain();
2464
2465 for (i = 0; (i < npages) && migrate->cpages; i++) {
2466 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2467 bool remap = true;
2468
2469 if (!page)
2470 continue;
2471
2472 if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
2473
2474
2475
2476
2477
2478
2479
2480
2481 if (!trylock_page(page)) {
2482 migrate->src[i] = 0;
2483 migrate->cpages--;
2484 put_page(page);
2485 continue;
2486 }
2487 remap = false;
2488 migrate->src[i] |= MIGRATE_PFN_LOCKED;
2489 }
2490
2491
2492 if (!is_zone_device_page(page)) {
2493 if (!PageLRU(page) && allow_drain) {
2494
2495 lru_add_drain_all();
2496 allow_drain = false;
2497 }
2498
2499 if (isolate_lru_page(page)) {
2500 if (remap) {
2501 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2502 migrate->cpages--;
2503 restore++;
2504 } else {
2505 migrate->src[i] = 0;
2506 unlock_page(page);
2507 migrate->cpages--;
2508 put_page(page);
2509 }
2510 continue;
2511 }
2512
2513
2514 put_page(page);
2515 }
2516
2517 if (!migrate_vma_check_page(page)) {
2518 if (remap) {
2519 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2520 migrate->cpages--;
2521 restore++;
2522
2523 if (!is_zone_device_page(page)) {
2524 get_page(page);
2525 putback_lru_page(page);
2526 }
2527 } else {
2528 migrate->src[i] = 0;
2529 unlock_page(page);
2530 migrate->cpages--;
2531
2532 if (!is_zone_device_page(page))
2533 putback_lru_page(page);
2534 else
2535 put_page(page);
2536 }
2537 }
2538 }
2539
2540 for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
2541 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2542
2543 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2544 continue;
2545
2546 remove_migration_pte(page, migrate->vma, addr, page);
2547
2548 migrate->src[i] = 0;
2549 unlock_page(page);
2550 put_page(page);
2551 restore--;
2552 }
2553}
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566static void migrate_vma_unmap(struct migrate_vma *migrate)
2567{
2568 int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
2569 const unsigned long npages = migrate->npages;
2570 const unsigned long start = migrate->start;
2571 unsigned long addr, i, restore = 0;
2572
2573 for (i = 0; i < npages; i++) {
2574 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2575
2576 if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2577 continue;
2578
2579 if (page_mapped(page)) {
2580 try_to_unmap(page, flags);
2581 if (page_mapped(page))
2582 goto restore;
2583 }
2584
2585 if (migrate_vma_check_page(page))
2586 continue;
2587
2588restore:
2589 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2590 migrate->cpages--;
2591 restore++;
2592 }
2593
2594 for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
2595 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2596
2597 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2598 continue;
2599
2600 remove_migration_ptes(page, page, false);
2601
2602 migrate->src[i] = 0;
2603 unlock_page(page);
2604 restore--;
2605
2606 if (is_zone_device_page(page))
2607 put_page(page);
2608 else
2609 putback_lru_page(page);
2610 }
2611}
2612
2613static void migrate_vma_insert_page(struct migrate_vma *migrate,
2614 unsigned long addr,
2615 struct page *page,
2616 unsigned long *src,
2617 unsigned long *dst)
2618{
2619 struct vm_area_struct *vma = migrate->vma;
2620 struct mm_struct *mm = vma->vm_mm;
2621 struct mem_cgroup *memcg;
2622 bool flush = false;
2623 spinlock_t *ptl;
2624 pte_t entry;
2625 pgd_t *pgdp;
2626 p4d_t *p4dp;
2627 pud_t *pudp;
2628 pmd_t *pmdp;
2629 pte_t *ptep;
2630
2631
2632 if (!vma_is_anonymous(vma))
2633 goto abort;
2634
2635 pgdp = pgd_offset(mm, addr);
2636 p4dp = p4d_alloc(mm, pgdp, addr);
2637 if (!p4dp)
2638 goto abort;
2639 pudp = pud_alloc(mm, p4dp, addr);
2640 if (!pudp)
2641 goto abort;
2642 pmdp = pmd_alloc(mm, pudp, addr);
2643 if (!pmdp)
2644 goto abort;
2645
2646 if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
2647 goto abort;
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659 if (pte_alloc(mm, pmdp, addr))
2660 goto abort;
2661
2662
2663 if (unlikely(pmd_trans_unstable(pmdp)))
2664 goto abort;
2665
2666 if (unlikely(anon_vma_prepare(vma)))
2667 goto abort;
2668 if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
2669 goto abort;
2670
2671
2672
2673
2674
2675
2676 __SetPageUptodate(page);
2677
2678 if (is_zone_device_page(page)) {
2679 if (is_device_private_page(page)) {
2680 swp_entry_t swp_entry;
2681
2682 swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
2683 entry = swp_entry_to_pte(swp_entry);
2684 } else if (is_device_public_page(page)) {
2685 entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
2686 if (vma->vm_flags & VM_WRITE)
2687 entry = pte_mkwrite(pte_mkdirty(entry));
2688 entry = pte_mkdevmap(entry);
2689 }
2690 } else {
2691 entry = mk_pte(page, vma->vm_page_prot);
2692 if (vma->vm_flags & VM_WRITE)
2693 entry = pte_mkwrite(pte_mkdirty(entry));
2694 }
2695
2696 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2697
2698 if (pte_present(*ptep)) {
2699 unsigned long pfn = pte_pfn(*ptep);
2700
2701 if (!is_zero_pfn(pfn)) {
2702 pte_unmap_unlock(ptep, ptl);
2703 mem_cgroup_cancel_charge(page, memcg, false);
2704 goto abort;
2705 }
2706 flush = true;
2707 } else if (!pte_none(*ptep)) {
2708 pte_unmap_unlock(ptep, ptl);
2709 mem_cgroup_cancel_charge(page, memcg, false);
2710 goto abort;
2711 }
2712
2713
2714
2715
2716
2717 if (userfaultfd_missing(vma)) {
2718 pte_unmap_unlock(ptep, ptl);
2719 mem_cgroup_cancel_charge(page, memcg, false);
2720 goto abort;
2721 }
2722
2723 inc_mm_counter(mm, MM_ANONPAGES);
2724 page_add_new_anon_rmap(page, vma, addr, false);
2725 mem_cgroup_commit_charge(page, memcg, false, false);
2726 if (!is_zone_device_page(page))
2727 lru_cache_add_active_or_unevictable(page, vma);
2728 get_page(page);
2729
2730 if (flush) {
2731 flush_cache_page(vma, addr, pte_pfn(*ptep));
2732 ptep_clear_flush_notify(vma, addr, ptep);
2733 set_pte_at_notify(mm, addr, ptep, entry);
2734 update_mmu_cache(vma, addr, ptep);
2735 } else {
2736
2737 set_pte_at(mm, addr, ptep, entry);
2738 update_mmu_cache(vma, addr, ptep);
2739 }
2740
2741 pte_unmap_unlock(ptep, ptl);
2742 *src = MIGRATE_PFN_MIGRATE;
2743 return;
2744
2745abort:
2746 *src &= ~MIGRATE_PFN_MIGRATE;
2747}
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757static void migrate_vma_pages(struct migrate_vma *migrate)
2758{
2759 const unsigned long npages = migrate->npages;
2760 const unsigned long start = migrate->start;
2761 struct vm_area_struct *vma = migrate->vma;
2762 struct mm_struct *mm = vma->vm_mm;
2763 unsigned long addr, i, mmu_start;
2764 bool notified = false;
2765
2766 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
2767 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2768 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2769 struct address_space *mapping;
2770 int r;
2771
2772 if (!newpage) {
2773 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2774 continue;
2775 }
2776
2777 if (!page) {
2778 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) {
2779 continue;
2780 }
2781 if (!notified) {
2782 mmu_start = addr;
2783 notified = true;
2784 mmu_notifier_invalidate_range_start(mm,
2785 mmu_start,
2786 migrate->end);
2787 }
2788 migrate_vma_insert_page(migrate, addr, newpage,
2789 &migrate->src[i],
2790 &migrate->dst[i]);
2791 continue;
2792 }
2793
2794 mapping = page_mapping(page);
2795
2796 if (is_zone_device_page(newpage)) {
2797 if (is_device_private_page(newpage)) {
2798
2799
2800
2801
2802 if (mapping) {
2803 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2804 continue;
2805 }
2806 } else if (!is_device_public_page(newpage)) {
2807
2808
2809
2810
2811 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2812 continue;
2813 }
2814 }
2815
2816 r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
2817 if (r != MIGRATEPAGE_SUCCESS)
2818 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2819 }
2820
2821
2822
2823
2824
2825
2826 if (notified)
2827 mmu_notifier_invalidate_range_only_end(mm, mmu_start,
2828 migrate->end);
2829}
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842static void migrate_vma_finalize(struct migrate_vma *migrate)
2843{
2844 const unsigned long npages = migrate->npages;
2845 unsigned long i;
2846
2847 for (i = 0; i < npages; i++) {
2848 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2849 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2850
2851 if (!page) {
2852 if (newpage) {
2853 unlock_page(newpage);
2854 put_page(newpage);
2855 }
2856 continue;
2857 }
2858
2859 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
2860 if (newpage) {
2861 unlock_page(newpage);
2862 put_page(newpage);
2863 }
2864 newpage = page;
2865 }
2866
2867 remove_migration_ptes(page, newpage, false);
2868 unlock_page(page);
2869 migrate->cpages--;
2870
2871 if (is_zone_device_page(page))
2872 put_page(page);
2873 else
2874 putback_lru_page(page);
2875
2876 if (newpage != page) {
2877 unlock_page(newpage);
2878 if (is_zone_device_page(newpage))
2879 put_page(newpage);
2880 else
2881 putback_lru_page(newpage);
2882 }
2883 }
2884}
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937int migrate_vma(const struct migrate_vma_ops *ops,
2938 struct vm_area_struct *vma,
2939 unsigned long start,
2940 unsigned long end,
2941 unsigned long *src,
2942 unsigned long *dst,
2943 void *private)
2944{
2945 struct migrate_vma migrate;
2946
2947
2948 start &= PAGE_MASK;
2949 end &= PAGE_MASK;
2950 if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
2951 vma_is_dax(vma))
2952 return -EINVAL;
2953 if (start < vma->vm_start || start >= vma->vm_end)
2954 return -EINVAL;
2955 if (end <= vma->vm_start || end > vma->vm_end)
2956 return -EINVAL;
2957 if (!ops || !src || !dst || start >= end)
2958 return -EINVAL;
2959
2960 memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
2961 migrate.src = src;
2962 migrate.dst = dst;
2963 migrate.start = start;
2964 migrate.npages = 0;
2965 migrate.cpages = 0;
2966 migrate.end = end;
2967 migrate.vma = vma;
2968
2969
2970 migrate_vma_collect(&migrate);
2971 if (!migrate.cpages)
2972 return 0;
2973
2974
2975 migrate_vma_prepare(&migrate);
2976 if (!migrate.cpages)
2977 return 0;
2978
2979
2980 migrate_vma_unmap(&migrate);
2981 if (!migrate.cpages)
2982 return 0;
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992 ops->alloc_and_copy(vma, src, dst, start, end, private);
2993
2994
2995 migrate_vma_pages(&migrate);
2996
2997 ops->finalize_and_map(vma, src, dst, start, end, private);
2998
2999
3000 migrate_vma_finalize(&migrate);
3001
3002 return 0;
3003}
3004EXPORT_SYMBOL(migrate_vma);
3005#endif
3006