1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/migrate.h>
17#include <linux/export.h>
18#include <linux/swap.h>
19#include <linux/swapops.h>
20#include <linux/pagemap.h>
21#include <linux/buffer_head.h>
22#include <linux/mm_inline.h>
23#include <linux/nsproxy.h>
24#include <linux/pagevec.h>
25#include <linux/ksm.h>
26#include <linux/rmap.h>
27#include <linux/topology.h>
28#include <linux/cpu.h>
29#include <linux/cpuset.h>
30#include <linux/writeback.h>
31#include <linux/mempolicy.h>
32#include <linux/vmalloc.h>
33#include <linux/security.h>
34#include <linux/backing-dev.h>
35#include <linux/compaction.h>
36#include <linux/syscalls.h>
37#include <linux/hugetlb.h>
38#include <linux/hugetlb_cgroup.h>
39#include <linux/gfp.h>
40#include <linux/pfn_t.h>
41#include <linux/memremap.h>
42#include <linux/userfaultfd_k.h>
43#include <linux/balloon_compaction.h>
44#include <linux/mmu_notifier.h>
45#include <linux/page_idle.h>
46#include <linux/page_owner.h>
47#include <linux/sched/mm.h>
48#include <linux/ptrace.h>
49
50#include <asm/tlbflush.h>
51
52#define CREATE_TRACE_POINTS
53#include <trace/events/migrate.h>
54
55#include "internal.h"
56
57
58
59
60
61
62int migrate_prep(void)
63{
64
65
66
67
68
69
70 lru_add_drain_all();
71
72 return 0;
73}
74
75
76int migrate_prep_local(void)
77{
78 lru_add_drain();
79
80 return 0;
81}
82
83int isolate_movable_page(struct page *page, isolate_mode_t mode)
84{
85 struct address_space *mapping;
86
87
88
89
90
91
92
93
94
95
96 if (unlikely(!get_page_unless_zero(page)))
97 goto out;
98
99
100
101
102
103
104 if (unlikely(!__PageMovable(page)))
105 goto out_putpage;
106
107
108
109
110
111
112
113
114
115
116
117 if (unlikely(!trylock_page(page)))
118 goto out_putpage;
119
120 if (!PageMovable(page) || PageIsolated(page))
121 goto out_no_isolated;
122
123 mapping = page_mapping(page);
124 VM_BUG_ON_PAGE(!mapping, page);
125
126 if (!mapping->a_ops->isolate_page(page, mode))
127 goto out_no_isolated;
128
129
130 WARN_ON_ONCE(PageIsolated(page));
131 __SetPageIsolated(page);
132 unlock_page(page);
133
134 return 0;
135
136out_no_isolated:
137 unlock_page(page);
138out_putpage:
139 put_page(page);
140out:
141 return -EBUSY;
142}
143
144
145void putback_movable_page(struct page *page)
146{
147 struct address_space *mapping;
148
149 VM_BUG_ON_PAGE(!PageLocked(page), page);
150 VM_BUG_ON_PAGE(!PageMovable(page), page);
151 VM_BUG_ON_PAGE(!PageIsolated(page), page);
152
153 mapping = page_mapping(page);
154 mapping->a_ops->putback_page(page);
155 __ClearPageIsolated(page);
156}
157
158
159
160
161
162
163
164
165
166void putback_movable_pages(struct list_head *l)
167{
168 struct page *page;
169 struct page *page2;
170
171 list_for_each_entry_safe(page, page2, l, lru) {
172 if (unlikely(PageHuge(page))) {
173 putback_active_hugepage(page);
174 continue;
175 }
176 list_del(&page->lru);
177
178
179
180
181
182 if (unlikely(__PageMovable(page))) {
183 VM_BUG_ON_PAGE(!PageIsolated(page), page);
184 lock_page(page);
185 if (PageMovable(page))
186 putback_movable_page(page);
187 else
188 __ClearPageIsolated(page);
189 unlock_page(page);
190 put_page(page);
191 } else {
192 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
193 page_is_file_cache(page), -hpage_nr_pages(page));
194 putback_lru_page(page);
195 }
196 }
197}
198
199
200
201
202static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
203 unsigned long addr, void *old)
204{
205 struct page_vma_mapped_walk pvmw = {
206 .page = old,
207 .vma = vma,
208 .address = addr,
209 .flags = PVMW_SYNC | PVMW_MIGRATION,
210 };
211 struct page *new;
212 pte_t pte;
213 swp_entry_t entry;
214
215 VM_BUG_ON_PAGE(PageTail(page), page);
216 while (page_vma_mapped_walk(&pvmw)) {
217 if (PageKsm(page))
218 new = page;
219 else
220 new = page - pvmw.page->index +
221 linear_page_index(vma, pvmw.address);
222
223#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
224
225 if (!pvmw.pte) {
226 VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
227 remove_migration_pmd(&pvmw, new);
228 continue;
229 }
230#endif
231
232 get_page(new);
233 pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
234 if (pte_swp_soft_dirty(*pvmw.pte))
235 pte = pte_mksoft_dirty(pte);
236
237
238
239
240 entry = pte_to_swp_entry(*pvmw.pte);
241 if (is_write_migration_entry(entry))
242 pte = maybe_mkwrite(pte, vma);
243
244 if (unlikely(is_zone_device_page(new))) {
245 if (is_device_private_page(new)) {
246 entry = make_device_private_entry(new, pte_write(pte));
247 pte = swp_entry_to_pte(entry);
248 } else if (is_device_public_page(new)) {
249 pte = pte_mkdevmap(pte);
250 flush_dcache_page(new);
251 }
252 } else
253 flush_dcache_page(new);
254
255#ifdef CONFIG_HUGETLB_PAGE
256 if (PageHuge(new)) {
257 pte = pte_mkhuge(pte);
258 pte = arch_make_huge_pte(pte, vma, new, 0);
259 set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
260 if (PageAnon(new))
261 hugepage_add_anon_rmap(new, vma, pvmw.address);
262 else
263 page_dup_rmap(new, true);
264 } else
265#endif
266 {
267 set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
268
269 if (PageAnon(new))
270 page_add_anon_rmap(new, vma, pvmw.address, false);
271 else
272 page_add_file_rmap(new, false);
273 }
274 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
275 mlock_vma_page(new);
276
277
278 update_mmu_cache(vma, pvmw.address, pvmw.pte);
279 }
280
281 return true;
282}
283
284
285
286
287
288void remove_migration_ptes(struct page *old, struct page *new, bool locked)
289{
290 struct rmap_walk_control rwc = {
291 .rmap_one = remove_migration_pte,
292 .arg = old,
293 };
294
295 if (locked)
296 rmap_walk_locked(new, &rwc);
297 else
298 rmap_walk(new, &rwc);
299}
300
301
302
303
304
305
306void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
307 spinlock_t *ptl)
308{
309 pte_t pte;
310 swp_entry_t entry;
311 struct page *page;
312
313 spin_lock(ptl);
314 pte = *ptep;
315 if (!is_swap_pte(pte))
316 goto out;
317
318 entry = pte_to_swp_entry(pte);
319 if (!is_migration_entry(entry))
320 goto out;
321
322 page = migration_entry_to_page(entry);
323
324
325
326
327
328
329
330
331 if (!get_page_unless_zero(page))
332 goto out;
333 pte_unmap_unlock(ptep, ptl);
334 wait_on_page_locked(page);
335 put_page(page);
336 return;
337out:
338 pte_unmap_unlock(ptep, ptl);
339}
340
341void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
342 unsigned long address)
343{
344 spinlock_t *ptl = pte_lockptr(mm, pmd);
345 pte_t *ptep = pte_offset_map(pmd, address);
346 __migration_entry_wait(mm, ptep, ptl);
347}
348
349void migration_entry_wait_huge(struct vm_area_struct *vma,
350 struct mm_struct *mm, pte_t *pte)
351{
352 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
353 __migration_entry_wait(mm, pte, ptl);
354}
355
356#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
357void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
358{
359 spinlock_t *ptl;
360 struct page *page;
361
362 ptl = pmd_lock(mm, pmd);
363 if (!is_pmd_migration_entry(*pmd))
364 goto unlock;
365 page = migration_entry_to_page(pmd_to_swp_entry(*pmd));
366 if (!get_page_unless_zero(page))
367 goto unlock;
368 spin_unlock(ptl);
369 wait_on_page_locked(page);
370 put_page(page);
371 return;
372unlock:
373 spin_unlock(ptl);
374}
375#endif
376
377#ifdef CONFIG_BLOCK
378
379static bool buffer_migrate_lock_buffers(struct buffer_head *head,
380 enum migrate_mode mode)
381{
382 struct buffer_head *bh = head;
383
384
385 if (mode != MIGRATE_ASYNC) {
386 do {
387 get_bh(bh);
388 lock_buffer(bh);
389 bh = bh->b_this_page;
390
391 } while (bh != head);
392
393 return true;
394 }
395
396
397 do {
398 get_bh(bh);
399 if (!trylock_buffer(bh)) {
400
401
402
403
404 struct buffer_head *failed_bh = bh;
405 put_bh(failed_bh);
406 bh = head;
407 while (bh != failed_bh) {
408 unlock_buffer(bh);
409 put_bh(bh);
410 bh = bh->b_this_page;
411 }
412 return false;
413 }
414
415 bh = bh->b_this_page;
416 } while (bh != head);
417 return true;
418}
419#else
420static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
421 enum migrate_mode mode)
422{
423 return true;
424}
425#endif
426
427
428
429
430
431
432
433
434
435int migrate_page_move_mapping(struct address_space *mapping,
436 struct page *newpage, struct page *page,
437 struct buffer_head *head, enum migrate_mode mode,
438 int extra_count)
439{
440 struct zone *oldzone, *newzone;
441 int dirty;
442 int expected_count = 1 + extra_count;
443 void **pslot;
444
445
446
447
448
449 expected_count += is_device_private_page(page);
450 expected_count += is_device_public_page(page);
451
452 if (!mapping) {
453
454 if (page_count(page) != expected_count)
455 return -EAGAIN;
456
457
458 newpage->index = page->index;
459 newpage->mapping = page->mapping;
460 if (PageSwapBacked(page))
461 __SetPageSwapBacked(newpage);
462
463 return MIGRATEPAGE_SUCCESS;
464 }
465
466 oldzone = page_zone(page);
467 newzone = page_zone(newpage);
468
469 spin_lock_irq(&mapping->tree_lock);
470
471 pslot = radix_tree_lookup_slot(&mapping->page_tree,
472 page_index(page));
473
474 expected_count += 1 + page_has_private(page);
475 if (page_count(page) != expected_count ||
476 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
477 spin_unlock_irq(&mapping->tree_lock);
478 return -EAGAIN;
479 }
480
481 if (!page_ref_freeze(page, expected_count)) {
482 spin_unlock_irq(&mapping->tree_lock);
483 return -EAGAIN;
484 }
485
486
487
488
489
490
491
492
493 if (mode == MIGRATE_ASYNC && head &&
494 !buffer_migrate_lock_buffers(head, mode)) {
495 page_ref_unfreeze(page, expected_count);
496 spin_unlock_irq(&mapping->tree_lock);
497 return -EAGAIN;
498 }
499
500
501
502
503
504 newpage->index = page->index;
505 newpage->mapping = page->mapping;
506 get_page(newpage);
507 if (PageSwapBacked(page)) {
508 __SetPageSwapBacked(newpage);
509 if (PageSwapCache(page)) {
510 SetPageSwapCache(newpage);
511 set_page_private(newpage, page_private(page));
512 }
513 } else {
514 VM_BUG_ON_PAGE(PageSwapCache(page), page);
515 }
516
517
518 dirty = PageDirty(page);
519 if (dirty) {
520 ClearPageDirty(page);
521 SetPageDirty(newpage);
522 }
523
524 radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
525
526
527
528
529
530
531 page_ref_unfreeze(page, expected_count - 1);
532
533 spin_unlock(&mapping->tree_lock);
534
535
536
537
538
539
540
541
542
543
544
545
546 if (newzone != oldzone) {
547 __dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
548 __inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
549 if (PageSwapBacked(page) && !PageSwapCache(page)) {
550 __dec_node_state(oldzone->zone_pgdat, NR_SHMEM);
551 __inc_node_state(newzone->zone_pgdat, NR_SHMEM);
552 }
553 if (dirty && mapping_cap_account_dirty(mapping)) {
554 __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
555 __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
556 __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
557 __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
558 }
559 }
560 local_irq_enable();
561
562 return MIGRATEPAGE_SUCCESS;
563}
564EXPORT_SYMBOL(migrate_page_move_mapping);
565
566
567
568
569
570int migrate_huge_page_move_mapping(struct address_space *mapping,
571 struct page *newpage, struct page *page)
572{
573 int expected_count;
574 void **pslot;
575
576 spin_lock_irq(&mapping->tree_lock);
577
578 pslot = radix_tree_lookup_slot(&mapping->page_tree,
579 page_index(page));
580
581 expected_count = 2 + page_has_private(page);
582 if (page_count(page) != expected_count ||
583 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
584 spin_unlock_irq(&mapping->tree_lock);
585 return -EAGAIN;
586 }
587
588 if (!page_ref_freeze(page, expected_count)) {
589 spin_unlock_irq(&mapping->tree_lock);
590 return -EAGAIN;
591 }
592
593 newpage->index = page->index;
594 newpage->mapping = page->mapping;
595
596 get_page(newpage);
597
598 radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
599
600 page_ref_unfreeze(page, expected_count - 1);
601
602 spin_unlock_irq(&mapping->tree_lock);
603
604 return MIGRATEPAGE_SUCCESS;
605}
606
607
608
609
610
611
612static void __copy_gigantic_page(struct page *dst, struct page *src,
613 int nr_pages)
614{
615 int i;
616 struct page *dst_base = dst;
617 struct page *src_base = src;
618
619 for (i = 0; i < nr_pages; ) {
620 cond_resched();
621 copy_highpage(dst, src);
622
623 i++;
624 dst = mem_map_next(dst, dst_base, i);
625 src = mem_map_next(src, src_base, i);
626 }
627}
628
629static void copy_huge_page(struct page *dst, struct page *src)
630{
631 int i;
632 int nr_pages;
633
634 if (PageHuge(src)) {
635
636 struct hstate *h = page_hstate(src);
637 nr_pages = pages_per_huge_page(h);
638
639 if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
640 __copy_gigantic_page(dst, src, nr_pages);
641 return;
642 }
643 } else {
644
645 BUG_ON(!PageTransHuge(src));
646 nr_pages = hpage_nr_pages(src);
647 }
648
649 for (i = 0; i < nr_pages; i++) {
650 cond_resched();
651 copy_highpage(dst + i, src + i);
652 }
653}
654
655
656
657
658void migrate_page_states(struct page *newpage, struct page *page)
659{
660 int cpupid;
661
662 if (PageError(page))
663 SetPageError(newpage);
664 if (PageReferenced(page))
665 SetPageReferenced(newpage);
666 if (PageUptodate(page))
667 SetPageUptodate(newpage);
668 if (TestClearPageActive(page)) {
669 VM_BUG_ON_PAGE(PageUnevictable(page), page);
670 SetPageActive(newpage);
671 } else if (TestClearPageUnevictable(page))
672 SetPageUnevictable(newpage);
673 if (PageChecked(page))
674 SetPageChecked(newpage);
675 if (PageMappedToDisk(page))
676 SetPageMappedToDisk(newpage);
677
678
679 if (PageDirty(page))
680 SetPageDirty(newpage);
681
682 if (page_is_young(page))
683 set_page_young(newpage);
684 if (page_is_idle(page))
685 set_page_idle(newpage);
686
687
688
689
690
691 cpupid = page_cpupid_xchg_last(page, -1);
692 page_cpupid_xchg_last(newpage, cpupid);
693
694 ksm_migrate_page(newpage, page);
695
696
697
698
699 if (PageSwapCache(page))
700 ClearPageSwapCache(page);
701 ClearPagePrivate(page);
702 set_page_private(page, 0);
703
704
705
706
707
708 if (PageWriteback(newpage))
709 end_page_writeback(newpage);
710
711 copy_page_owner(page, newpage);
712
713 mem_cgroup_migrate(page, newpage);
714}
715EXPORT_SYMBOL(migrate_page_states);
716
717void migrate_page_copy(struct page *newpage, struct page *page)
718{
719 if (PageHuge(page) || PageTransHuge(page))
720 copy_huge_page(newpage, page);
721 else
722 copy_highpage(newpage, page);
723
724 migrate_page_states(newpage, page);
725}
726EXPORT_SYMBOL(migrate_page_copy);
727
728
729
730
731
732
733
734
735
736
737
738int migrate_page(struct address_space *mapping,
739 struct page *newpage, struct page *page,
740 enum migrate_mode mode)
741{
742 int rc;
743
744 BUG_ON(PageWriteback(page));
745
746 rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
747
748 if (rc != MIGRATEPAGE_SUCCESS)
749 return rc;
750
751 if (mode != MIGRATE_SYNC_NO_COPY)
752 migrate_page_copy(newpage, page);
753 else
754 migrate_page_states(newpage, page);
755 return MIGRATEPAGE_SUCCESS;
756}
757EXPORT_SYMBOL(migrate_page);
758
759#ifdef CONFIG_BLOCK
760
761
762
763
764
765int buffer_migrate_page(struct address_space *mapping,
766 struct page *newpage, struct page *page, enum migrate_mode mode)
767{
768 struct buffer_head *bh, *head;
769 int rc;
770
771 if (!page_has_buffers(page))
772 return migrate_page(mapping, newpage, page, mode);
773
774 head = page_buffers(page);
775
776 rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
777
778 if (rc != MIGRATEPAGE_SUCCESS)
779 return rc;
780
781
782
783
784
785
786 if (mode != MIGRATE_ASYNC)
787 BUG_ON(!buffer_migrate_lock_buffers(head, mode));
788
789 ClearPagePrivate(page);
790 set_page_private(newpage, page_private(page));
791 set_page_private(page, 0);
792 put_page(page);
793 get_page(newpage);
794
795 bh = head;
796 do {
797 set_bh_page(bh, newpage, bh_offset(bh));
798 bh = bh->b_this_page;
799
800 } while (bh != head);
801
802 SetPagePrivate(newpage);
803
804 if (mode != MIGRATE_SYNC_NO_COPY)
805 migrate_page_copy(newpage, page);
806 else
807 migrate_page_states(newpage, page);
808
809 bh = head;
810 do {
811 unlock_buffer(bh);
812 put_bh(bh);
813 bh = bh->b_this_page;
814
815 } while (bh != head);
816
817 return MIGRATEPAGE_SUCCESS;
818}
819EXPORT_SYMBOL(buffer_migrate_page);
820#endif
821
822
823
824
825static int writeout(struct address_space *mapping, struct page *page)
826{
827 struct writeback_control wbc = {
828 .sync_mode = WB_SYNC_NONE,
829 .nr_to_write = 1,
830 .range_start = 0,
831 .range_end = LLONG_MAX,
832 .for_reclaim = 1
833 };
834 int rc;
835
836 if (!mapping->a_ops->writepage)
837
838 return -EINVAL;
839
840 if (!clear_page_dirty_for_io(page))
841
842 return -EAGAIN;
843
844
845
846
847
848
849
850
851
852 remove_migration_ptes(page, page, false);
853
854 rc = mapping->a_ops->writepage(page, &wbc);
855
856 if (rc != AOP_WRITEPAGE_ACTIVATE)
857
858 lock_page(page);
859
860 return (rc < 0) ? -EIO : -EAGAIN;
861}
862
863
864
865
866static int fallback_migrate_page(struct address_space *mapping,
867 struct page *newpage, struct page *page, enum migrate_mode mode)
868{
869 if (PageDirty(page)) {
870
871 switch (mode) {
872 case MIGRATE_SYNC:
873 case MIGRATE_SYNC_NO_COPY:
874 break;
875 default:
876 return -EBUSY;
877 }
878 return writeout(mapping, page);
879 }
880
881
882
883
884
885 if (page_has_private(page) &&
886 !try_to_release_page(page, GFP_KERNEL))
887 return -EAGAIN;
888
889 return migrate_page(mapping, newpage, page, mode);
890}
891
892
893
894
895
896
897
898
899
900
901
902
903static int move_to_new_page(struct page *newpage, struct page *page,
904 enum migrate_mode mode)
905{
906 struct address_space *mapping;
907 int rc = -EAGAIN;
908 bool is_lru = !__PageMovable(page);
909
910 VM_BUG_ON_PAGE(!PageLocked(page), page);
911 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
912
913 mapping = page_mapping(page);
914
915 if (likely(is_lru)) {
916 if (!mapping)
917 rc = migrate_page(mapping, newpage, page, mode);
918 else if (mapping->a_ops->migratepage)
919
920
921
922
923
924
925
926 rc = mapping->a_ops->migratepage(mapping, newpage,
927 page, mode);
928 else
929 rc = fallback_migrate_page(mapping, newpage,
930 page, mode);
931 } else {
932
933
934
935
936 VM_BUG_ON_PAGE(!PageIsolated(page), page);
937 if (!PageMovable(page)) {
938 rc = MIGRATEPAGE_SUCCESS;
939 __ClearPageIsolated(page);
940 goto out;
941 }
942
943 rc = mapping->a_ops->migratepage(mapping, newpage,
944 page, mode);
945 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
946 !PageIsolated(page));
947 }
948
949
950
951
952
953 if (rc == MIGRATEPAGE_SUCCESS) {
954 if (__PageMovable(page)) {
955 VM_BUG_ON_PAGE(!PageIsolated(page), page);
956
957
958
959
960
961 __ClearPageIsolated(page);
962 }
963
964
965
966
967
968
969 if (!PageMappingFlags(page))
970 page->mapping = NULL;
971 }
972out:
973 return rc;
974}
975
976static int __unmap_and_move(struct page *page, struct page *newpage,
977 int force, enum migrate_mode mode)
978{
979 int rc = -EAGAIN;
980 int page_was_mapped = 0;
981 struct anon_vma *anon_vma = NULL;
982 bool is_lru = !__PageMovable(page);
983
984 if (!trylock_page(page)) {
985 if (!force || mode == MIGRATE_ASYNC)
986 goto out;
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001 if (current->flags & PF_MEMALLOC)
1002 goto out;
1003
1004 lock_page(page);
1005 }
1006
1007 if (PageWriteback(page)) {
1008
1009
1010
1011
1012
1013
1014 switch (mode) {
1015 case MIGRATE_SYNC:
1016 case MIGRATE_SYNC_NO_COPY:
1017 break;
1018 default:
1019 rc = -EBUSY;
1020 goto out_unlock;
1021 }
1022 if (!force)
1023 goto out_unlock;
1024 wait_on_page_writeback(page);
1025 }
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041 if (PageAnon(page) && !PageKsm(page))
1042 anon_vma = page_get_anon_vma(page);
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 if (unlikely(!trylock_page(newpage)))
1053 goto out_unlock;
1054
1055 if (unlikely(!is_lru)) {
1056 rc = move_to_new_page(newpage, page, mode);
1057 goto out_unlock_both;
1058 }
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072 if (!page->mapping) {
1073 VM_BUG_ON_PAGE(PageAnon(page), page);
1074 if (page_has_private(page)) {
1075 try_to_free_buffers(page);
1076 goto out_unlock_both;
1077 }
1078 } else if (page_mapped(page)) {
1079
1080 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
1081 page);
1082 try_to_unmap(page,
1083 TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
1084 page_was_mapped = 1;
1085 }
1086
1087 if (!page_mapped(page))
1088 rc = move_to_new_page(newpage, page, mode);
1089
1090 if (page_was_mapped)
1091 remove_migration_ptes(page,
1092 rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
1093
1094out_unlock_both:
1095 unlock_page(newpage);
1096out_unlock:
1097
1098 if (anon_vma)
1099 put_anon_vma(anon_vma);
1100 unlock_page(page);
1101out:
1102
1103
1104
1105
1106
1107
1108 if (rc == MIGRATEPAGE_SUCCESS) {
1109 if (unlikely(__PageMovable(newpage)))
1110 put_page(newpage);
1111 else
1112 putback_lru_page(newpage);
1113 }
1114
1115 return rc;
1116}
1117
1118
1119
1120
1121
1122#if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM)
1123#define ICE_noinline noinline
1124#else
1125#define ICE_noinline
1126#endif
1127
1128
1129
1130
1131
1132static ICE_noinline int unmap_and_move(new_page_t get_new_page,
1133 free_page_t put_new_page,
1134 unsigned long private, struct page *page,
1135 int force, enum migrate_mode mode,
1136 enum migrate_reason reason)
1137{
1138 int rc = MIGRATEPAGE_SUCCESS;
1139 int *result = NULL;
1140 struct page *newpage;
1141
1142 newpage = get_new_page(page, private, &result);
1143 if (!newpage)
1144 return -ENOMEM;
1145
1146 if (page_count(page) == 1) {
1147
1148 ClearPageActive(page);
1149 ClearPageUnevictable(page);
1150 if (unlikely(__PageMovable(page))) {
1151 lock_page(page);
1152 if (!PageMovable(page))
1153 __ClearPageIsolated(page);
1154 unlock_page(page);
1155 }
1156 if (put_new_page)
1157 put_new_page(newpage, private);
1158 else
1159 put_page(newpage);
1160 goto out;
1161 }
1162
1163 if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
1164 lock_page(page);
1165 rc = split_huge_page(page);
1166 unlock_page(page);
1167 if (rc)
1168 goto out;
1169 }
1170
1171 rc = __unmap_and_move(page, newpage, force, mode);
1172 if (rc == MIGRATEPAGE_SUCCESS)
1173 set_page_owner_migrate_reason(newpage, reason);
1174
1175out:
1176 if (rc != -EAGAIN) {
1177
1178
1179
1180
1181
1182
1183 list_del(&page->lru);
1184
1185
1186
1187
1188
1189
1190 if (likely(!__PageMovable(page)))
1191 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
1192 page_is_file_cache(page), -hpage_nr_pages(page));
1193 }
1194
1195
1196
1197
1198
1199
1200 if (rc == MIGRATEPAGE_SUCCESS) {
1201 put_page(page);
1202 if (reason == MR_MEMORY_FAILURE) {
1203
1204
1205
1206
1207
1208 if (!test_set_page_hwpoison(page))
1209 num_poisoned_pages_inc();
1210 }
1211 } else {
1212 if (rc != -EAGAIN) {
1213 if (likely(!__PageMovable(page))) {
1214 putback_lru_page(page);
1215 goto put_new;
1216 }
1217
1218 lock_page(page);
1219 if (PageMovable(page))
1220 putback_movable_page(page);
1221 else
1222 __ClearPageIsolated(page);
1223 unlock_page(page);
1224 put_page(page);
1225 }
1226put_new:
1227 if (put_new_page)
1228 put_new_page(newpage, private);
1229 else
1230 put_page(newpage);
1231 }
1232
1233 if (result) {
1234 if (rc)
1235 *result = rc;
1236 else
1237 *result = page_to_nid(newpage);
1238 }
1239 return rc;
1240}
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260static int unmap_and_move_huge_page(new_page_t get_new_page,
1261 free_page_t put_new_page, unsigned long private,
1262 struct page *hpage, int force,
1263 enum migrate_mode mode, int reason)
1264{
1265 int rc = -EAGAIN;
1266 int *result = NULL;
1267 int page_was_mapped = 0;
1268 struct page *new_hpage;
1269 struct anon_vma *anon_vma = NULL;
1270
1271
1272
1273
1274
1275
1276
1277
1278 if (!hugepage_migration_supported(page_hstate(hpage))) {
1279 putback_active_hugepage(hpage);
1280 return -ENOSYS;
1281 }
1282
1283 new_hpage = get_new_page(hpage, private, &result);
1284 if (!new_hpage)
1285 return -ENOMEM;
1286
1287 if (!trylock_page(hpage)) {
1288 if (!force)
1289 goto out;
1290 switch (mode) {
1291 case MIGRATE_SYNC:
1292 case MIGRATE_SYNC_NO_COPY:
1293 break;
1294 default:
1295 goto out;
1296 }
1297 lock_page(hpage);
1298 }
1299
1300 if (PageAnon(hpage))
1301 anon_vma = page_get_anon_vma(hpage);
1302
1303 if (unlikely(!trylock_page(new_hpage)))
1304 goto put_anon;
1305
1306 if (page_mapped(hpage)) {
1307 try_to_unmap(hpage,
1308 TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
1309 page_was_mapped = 1;
1310 }
1311
1312 if (!page_mapped(hpage))
1313 rc = move_to_new_page(new_hpage, hpage, mode);
1314
1315 if (page_was_mapped)
1316 remove_migration_ptes(hpage,
1317 rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
1318
1319 unlock_page(new_hpage);
1320
1321put_anon:
1322 if (anon_vma)
1323 put_anon_vma(anon_vma);
1324
1325 if (rc == MIGRATEPAGE_SUCCESS) {
1326 hugetlb_cgroup_migrate(hpage, new_hpage);
1327 put_new_page = NULL;
1328 set_page_owner_migrate_reason(new_hpage, reason);
1329 }
1330
1331 unlock_page(hpage);
1332out:
1333 if (rc != -EAGAIN)
1334 putback_active_hugepage(hpage);
1335 if (reason == MR_MEMORY_FAILURE && !test_set_page_hwpoison(hpage))
1336 num_poisoned_pages_inc();
1337
1338
1339
1340
1341
1342
1343 if (put_new_page)
1344 put_new_page(new_hpage, private);
1345 else
1346 putback_active_hugepage(new_hpage);
1347
1348 if (result) {
1349 if (rc)
1350 *result = rc;
1351 else
1352 *result = page_to_nid(new_hpage);
1353 }
1354 return rc;
1355}
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378int migrate_pages(struct list_head *from, new_page_t get_new_page,
1379 free_page_t put_new_page, unsigned long private,
1380 enum migrate_mode mode, int reason)
1381{
1382 int retry = 1;
1383 int nr_failed = 0;
1384 int nr_succeeded = 0;
1385 int pass = 0;
1386 struct page *page;
1387 struct page *page2;
1388 int swapwrite = current->flags & PF_SWAPWRITE;
1389 int rc;
1390
1391 if (!swapwrite)
1392 current->flags |= PF_SWAPWRITE;
1393
1394 for(pass = 0; pass < 10 && retry; pass++) {
1395 retry = 0;
1396
1397 list_for_each_entry_safe(page, page2, from, lru) {
1398 cond_resched();
1399
1400 if (PageHuge(page))
1401 rc = unmap_and_move_huge_page(get_new_page,
1402 put_new_page, private, page,
1403 pass > 2, mode, reason);
1404 else
1405 rc = unmap_and_move(get_new_page, put_new_page,
1406 private, page, pass > 2, mode,
1407 reason);
1408
1409 switch(rc) {
1410 case -ENOMEM:
1411 nr_failed++;
1412 goto out;
1413 case -EAGAIN:
1414 retry++;
1415 break;
1416 case MIGRATEPAGE_SUCCESS:
1417 nr_succeeded++;
1418 break;
1419 default:
1420
1421
1422
1423
1424
1425
1426 nr_failed++;
1427 break;
1428 }
1429 }
1430 }
1431 nr_failed += retry;
1432 rc = nr_failed;
1433out:
1434 if (nr_succeeded)
1435 count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
1436 if (nr_failed)
1437 count_vm_events(PGMIGRATE_FAIL, nr_failed);
1438 trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
1439
1440 if (!swapwrite)
1441 current->flags &= ~PF_SWAPWRITE;
1442
1443 return rc;
1444}
1445
1446#ifdef CONFIG_NUMA
1447
1448
1449
1450struct page_to_node {
1451 unsigned long addr;
1452 struct page *page;
1453 int node;
1454 int status;
1455};
1456
1457static struct page *new_page_node(struct page *p, unsigned long private,
1458 int **result)
1459{
1460 struct page_to_node *pm = (struct page_to_node *)private;
1461
1462 while (pm->node != MAX_NUMNODES && pm->page != p)
1463 pm++;
1464
1465 if (pm->node == MAX_NUMNODES)
1466 return NULL;
1467
1468 *result = &pm->status;
1469
1470 if (PageHuge(p))
1471 return alloc_huge_page_node(page_hstate(compound_head(p)),
1472 pm->node);
1473 else if (thp_migration_supported() && PageTransHuge(p)) {
1474 struct page *thp;
1475
1476 thp = alloc_pages_node(pm->node,
1477 (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
1478 HPAGE_PMD_ORDER);
1479 if (!thp)
1480 return NULL;
1481 prep_transhuge_page(thp);
1482 return thp;
1483 } else
1484 return __alloc_pages_node(pm->node,
1485 GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
1486}
1487
1488
1489
1490
1491
1492
1493
1494static int do_move_page_to_node_array(struct mm_struct *mm,
1495 struct page_to_node *pm,
1496 int migrate_all)
1497{
1498 int err;
1499 struct page_to_node *pp;
1500 LIST_HEAD(pagelist);
1501
1502 down_read(&mm->mmap_sem);
1503
1504
1505
1506
1507 for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
1508 struct vm_area_struct *vma;
1509 struct page *page;
1510 struct page *head;
1511 unsigned int follflags;
1512
1513 err = -EFAULT;
1514 vma = find_vma(mm, pp->addr);
1515 if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
1516 goto set_status;
1517
1518
1519 follflags = FOLL_GET | FOLL_DUMP;
1520 if (!thp_migration_supported())
1521 follflags |= FOLL_SPLIT;
1522 page = follow_page(vma, pp->addr, follflags);
1523
1524 err = PTR_ERR(page);
1525 if (IS_ERR(page))
1526 goto set_status;
1527
1528 err = -ENOENT;
1529 if (!page)
1530 goto set_status;
1531
1532 err = page_to_nid(page);
1533
1534 if (err == pp->node)
1535
1536
1537
1538 goto put_and_set;
1539
1540 err = -EACCES;
1541 if (page_mapcount(page) > 1 &&
1542 !migrate_all)
1543 goto put_and_set;
1544
1545 if (PageHuge(page)) {
1546 if (PageHead(page)) {
1547 isolate_huge_page(page, &pagelist);
1548 err = 0;
1549 pp->page = page;
1550 }
1551 goto put_and_set;
1552 }
1553
1554 pp->page = compound_head(page);
1555 head = compound_head(page);
1556 err = isolate_lru_page(head);
1557 if (!err) {
1558 list_add_tail(&head->lru, &pagelist);
1559 mod_node_page_state(page_pgdat(head),
1560 NR_ISOLATED_ANON + page_is_file_cache(head),
1561 hpage_nr_pages(head));
1562 }
1563put_and_set:
1564
1565
1566
1567
1568
1569 put_page(page);
1570set_status:
1571 pp->status = err;
1572 }
1573
1574 err = 0;
1575 if (!list_empty(&pagelist)) {
1576 err = migrate_pages(&pagelist, new_page_node, NULL,
1577 (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
1578 if (err)
1579 putback_movable_pages(&pagelist);
1580 }
1581
1582 up_read(&mm->mmap_sem);
1583 return err;
1584}
1585
1586
1587
1588
1589
1590static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1591 unsigned long nr_pages,
1592 const void __user * __user *pages,
1593 const int __user *nodes,
1594 int __user *status, int flags)
1595{
1596 struct page_to_node *pm;
1597 unsigned long chunk_nr_pages;
1598 unsigned long chunk_start;
1599 int err;
1600
1601 err = -ENOMEM;
1602 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
1603 if (!pm)
1604 goto out;
1605
1606 migrate_prep();
1607
1608
1609
1610
1611
1612 chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
1613
1614 for (chunk_start = 0;
1615 chunk_start < nr_pages;
1616 chunk_start += chunk_nr_pages) {
1617 int j;
1618
1619 if (chunk_start + chunk_nr_pages > nr_pages)
1620 chunk_nr_pages = nr_pages - chunk_start;
1621
1622
1623 for (j = 0; j < chunk_nr_pages; j++) {
1624 const void __user *p;
1625 int node;
1626
1627 err = -EFAULT;
1628 if (get_user(p, pages + j + chunk_start))
1629 goto out_pm;
1630 pm[j].addr = (unsigned long) p;
1631
1632 if (get_user(node, nodes + j + chunk_start))
1633 goto out_pm;
1634
1635 err = -ENODEV;
1636 if (node < 0 || node >= MAX_NUMNODES)
1637 goto out_pm;
1638
1639 if (!node_state(node, N_MEMORY))
1640 goto out_pm;
1641
1642 err = -EACCES;
1643 if (!node_isset(node, task_nodes))
1644 goto out_pm;
1645
1646 pm[j].node = node;
1647 }
1648
1649
1650 pm[chunk_nr_pages].node = MAX_NUMNODES;
1651
1652
1653 err = do_move_page_to_node_array(mm, pm,
1654 flags & MPOL_MF_MOVE_ALL);
1655 if (err < 0)
1656 goto out_pm;
1657
1658
1659 for (j = 0; j < chunk_nr_pages; j++)
1660 if (put_user(pm[j].status, status + j + chunk_start)) {
1661 err = -EFAULT;
1662 goto out_pm;
1663 }
1664 }
1665 err = 0;
1666
1667out_pm:
1668 free_page((unsigned long)pm);
1669out:
1670 return err;
1671}
1672
1673
1674
1675
1676static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1677 const void __user **pages, int *status)
1678{
1679 unsigned long i;
1680
1681 down_read(&mm->mmap_sem);
1682
1683 for (i = 0; i < nr_pages; i++) {
1684 unsigned long addr = (unsigned long)(*pages);
1685 struct vm_area_struct *vma;
1686 struct page *page;
1687 int err = -EFAULT;
1688
1689 vma = find_vma(mm, addr);
1690 if (!vma || addr < vma->vm_start)
1691 goto set_status;
1692
1693
1694 page = follow_page(vma, addr, FOLL_DUMP);
1695
1696 err = PTR_ERR(page);
1697 if (IS_ERR(page))
1698 goto set_status;
1699
1700 err = page ? page_to_nid(page) : -ENOENT;
1701set_status:
1702 *status = err;
1703
1704 pages++;
1705 status++;
1706 }
1707
1708 up_read(&mm->mmap_sem);
1709}
1710
1711
1712
1713
1714
1715static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1716 const void __user * __user *pages,
1717 int __user *status)
1718{
1719#define DO_PAGES_STAT_CHUNK_NR 16
1720 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1721 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1722
1723 while (nr_pages) {
1724 unsigned long chunk_nr;
1725
1726 chunk_nr = nr_pages;
1727 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1728 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1729
1730 if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages)))
1731 break;
1732
1733 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1734
1735 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1736 break;
1737
1738 pages += chunk_nr;
1739 status += chunk_nr;
1740 nr_pages -= chunk_nr;
1741 }
1742 return nr_pages ? -EFAULT : 0;
1743}
1744
1745
1746
1747
1748
1749SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1750 const void __user * __user *, pages,
1751 const int __user *, nodes,
1752 int __user *, status, int, flags)
1753{
1754 struct task_struct *task;
1755 struct mm_struct *mm;
1756 int err;
1757 nodemask_t task_nodes;
1758
1759
1760 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
1761 return -EINVAL;
1762
1763 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
1764 return -EPERM;
1765
1766
1767 rcu_read_lock();
1768 task = pid ? find_task_by_vpid(pid) : current;
1769 if (!task) {
1770 rcu_read_unlock();
1771 return -ESRCH;
1772 }
1773 get_task_struct(task);
1774
1775
1776
1777
1778
1779 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
1780 rcu_read_unlock();
1781 err = -EPERM;
1782 goto out;
1783 }
1784 rcu_read_unlock();
1785
1786 err = security_task_movememory(task);
1787 if (err)
1788 goto out;
1789
1790 task_nodes = cpuset_mems_allowed(task);
1791 mm = get_task_mm(task);
1792 put_task_struct(task);
1793
1794 if (!mm)
1795 return -EINVAL;
1796
1797 if (nodes)
1798 err = do_pages_move(mm, task_nodes, nr_pages, pages,
1799 nodes, status, flags);
1800 else
1801 err = do_pages_stat(mm, nr_pages, pages, status);
1802
1803 mmput(mm);
1804 return err;
1805
1806out:
1807 put_task_struct(task);
1808 return err;
1809}
1810
1811#ifdef CONFIG_NUMA_BALANCING
1812
1813
1814
1815
1816static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
1817 unsigned long nr_migrate_pages)
1818{
1819 int z;
1820
1821 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
1822 struct zone *zone = pgdat->node_zones + z;
1823
1824 if (!populated_zone(zone))
1825 continue;
1826
1827
1828 if (!zone_watermark_ok(zone, 0,
1829 high_wmark_pages(zone) +
1830 nr_migrate_pages,
1831 0, 0))
1832 continue;
1833 return true;
1834 }
1835 return false;
1836}
1837
1838static struct page *alloc_misplaced_dst_page(struct page *page,
1839 unsigned long data,
1840 int **result)
1841{
1842 int nid = (int) data;
1843 struct page *newpage;
1844
1845 newpage = __alloc_pages_node(nid,
1846 (GFP_HIGHUSER_MOVABLE |
1847 __GFP_THISNODE | __GFP_NOMEMALLOC |
1848 __GFP_NORETRY | __GFP_NOWARN) &
1849 ~__GFP_RECLAIM, 0);
1850
1851 return newpage;
1852}
1853
1854
1855
1856
1857
1858
1859static unsigned int migrate_interval_millisecs __read_mostly = 100;
1860static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
1861
1862
1863static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
1864 unsigned long nr_pages)
1865{
1866
1867
1868
1869
1870
1871 if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
1872 spin_lock(&pgdat->numabalancing_migrate_lock);
1873 pgdat->numabalancing_migrate_nr_pages = 0;
1874 pgdat->numabalancing_migrate_next_window = jiffies +
1875 msecs_to_jiffies(migrate_interval_millisecs);
1876 spin_unlock(&pgdat->numabalancing_migrate_lock);
1877 }
1878 if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
1879 trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
1880 nr_pages);
1881 return true;
1882 }
1883
1884
1885
1886
1887
1888
1889
1890 pgdat->numabalancing_migrate_nr_pages += nr_pages;
1891 return false;
1892}
1893
1894static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1895{
1896 int page_lru;
1897
1898 VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
1899
1900
1901 if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
1902 return 0;
1903
1904 if (isolate_lru_page(page))
1905 return 0;
1906
1907
1908
1909
1910
1911
1912
1913
1914 if (PageTransHuge(page) && page_count(page) != 3) {
1915 putback_lru_page(page);
1916 return 0;
1917 }
1918
1919 page_lru = page_is_file_cache(page);
1920 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
1921 hpage_nr_pages(page));
1922
1923
1924
1925
1926
1927
1928 put_page(page);
1929 return 1;
1930}
1931
1932bool pmd_trans_migrating(pmd_t pmd)
1933{
1934 struct page *page = pmd_page(pmd);
1935 return PageLocked(page);
1936}
1937
1938
1939
1940
1941
1942
1943int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
1944 int node)
1945{
1946 pg_data_t *pgdat = NODE_DATA(node);
1947 int isolated;
1948 int nr_remaining;
1949 LIST_HEAD(migratepages);
1950
1951
1952
1953
1954
1955 if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
1956 (vma->vm_flags & VM_EXEC))
1957 goto out;
1958
1959
1960
1961
1962
1963
1964 if (numamigrate_update_ratelimit(pgdat, 1))
1965 goto out;
1966
1967 isolated = numamigrate_isolate_page(pgdat, page);
1968 if (!isolated)
1969 goto out;
1970
1971 list_add(&page->lru, &migratepages);
1972 nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
1973 NULL, node, MIGRATE_ASYNC,
1974 MR_NUMA_MISPLACED);
1975 if (nr_remaining) {
1976 if (!list_empty(&migratepages)) {
1977 list_del(&page->lru);
1978 dec_node_page_state(page, NR_ISOLATED_ANON +
1979 page_is_file_cache(page));
1980 putback_lru_page(page);
1981 }
1982 isolated = 0;
1983 } else
1984 count_vm_numa_event(NUMA_PAGE_MIGRATE);
1985 BUG_ON(!list_empty(&migratepages));
1986 return isolated;
1987
1988out:
1989 put_page(page);
1990 return 0;
1991}
1992#endif
1993
1994#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1995
1996
1997
1998
1999int migrate_misplaced_transhuge_page(struct mm_struct *mm,
2000 struct vm_area_struct *vma,
2001 pmd_t *pmd, pmd_t entry,
2002 unsigned long address,
2003 struct page *page, int node)
2004{
2005 spinlock_t *ptl;
2006 pg_data_t *pgdat = NODE_DATA(node);
2007 int isolated = 0;
2008 struct page *new_page = NULL;
2009 int page_lru = page_is_file_cache(page);
2010 unsigned long mmun_start = address & HPAGE_PMD_MASK;
2011 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
2012
2013
2014
2015
2016
2017
2018 if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
2019 goto out_dropref;
2020
2021 new_page = alloc_pages_node(node,
2022 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
2023 HPAGE_PMD_ORDER);
2024 if (!new_page)
2025 goto out_fail;
2026 prep_transhuge_page(new_page);
2027
2028 isolated = numamigrate_isolate_page(pgdat, page);
2029 if (!isolated) {
2030 put_page(new_page);
2031 goto out_fail;
2032 }
2033
2034
2035 __SetPageLocked(new_page);
2036 if (PageSwapBacked(page))
2037 __SetPageSwapBacked(new_page);
2038
2039
2040 new_page->mapping = page->mapping;
2041 new_page->index = page->index;
2042 migrate_page_copy(new_page, page);
2043 WARN_ON(PageLRU(new_page));
2044
2045
2046 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2047 ptl = pmd_lock(mm, pmd);
2048 if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {
2049 spin_unlock(ptl);
2050 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2051
2052
2053 if (TestClearPageActive(new_page))
2054 SetPageActive(page);
2055 if (TestClearPageUnevictable(new_page))
2056 SetPageUnevictable(page);
2057
2058 unlock_page(new_page);
2059 put_page(new_page);
2060
2061
2062 get_page(page);
2063 putback_lru_page(page);
2064 mod_node_page_state(page_pgdat(page),
2065 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
2066
2067 goto out_unlock;
2068 }
2069
2070 entry = mk_huge_pmd(new_page, vma->vm_page_prot);
2071 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
2072
2073
2074
2075
2076
2077
2078
2079
2080 flush_cache_range(vma, mmun_start, mmun_end);
2081 page_add_anon_rmap(new_page, vma, mmun_start, true);
2082 pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
2083 set_pmd_at(mm, mmun_start, pmd, entry);
2084 update_mmu_cache_pmd(vma, address, &entry);
2085
2086 page_ref_unfreeze(page, 2);
2087 mlock_migrate_page(new_page, page);
2088 page_remove_rmap(page, true);
2089 set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);
2090
2091 spin_unlock(ptl);
2092 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2093
2094
2095 get_page(new_page);
2096 putback_lru_page(new_page);
2097
2098 unlock_page(new_page);
2099 unlock_page(page);
2100 put_page(page);
2101 put_page(page);
2102
2103 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
2104 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
2105
2106 mod_node_page_state(page_pgdat(page),
2107 NR_ISOLATED_ANON + page_lru,
2108 -HPAGE_PMD_NR);
2109 return isolated;
2110
2111out_fail:
2112 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
2113out_dropref:
2114 ptl = pmd_lock(mm, pmd);
2115 if (pmd_same(*pmd, entry)) {
2116 entry = pmd_modify(entry, vma->vm_page_prot);
2117 set_pmd_at(mm, mmun_start, pmd, entry);
2118 update_mmu_cache_pmd(vma, address, &entry);
2119 }
2120 spin_unlock(ptl);
2121
2122out_unlock:
2123 unlock_page(page);
2124 put_page(page);
2125 return 0;
2126}
2127#endif
2128
2129#endif
2130
2131#if defined(CONFIG_MIGRATE_VMA_HELPER)
2132struct migrate_vma {
2133 struct vm_area_struct *vma;
2134 unsigned long *dst;
2135 unsigned long *src;
2136 unsigned long cpages;
2137 unsigned long npages;
2138 unsigned long start;
2139 unsigned long end;
2140};
2141
2142static int migrate_vma_collect_hole(unsigned long start,
2143 unsigned long end,
2144 struct mm_walk *walk)
2145{
2146 struct migrate_vma *migrate = walk->private;
2147 unsigned long addr;
2148
2149 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
2150 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
2151 migrate->dst[migrate->npages] = 0;
2152 migrate->npages++;
2153 migrate->cpages++;
2154 }
2155
2156 return 0;
2157}
2158
2159static int migrate_vma_collect_skip(unsigned long start,
2160 unsigned long end,
2161 struct mm_walk *walk)
2162{
2163 struct migrate_vma *migrate = walk->private;
2164 unsigned long addr;
2165
2166 for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
2167 migrate->dst[migrate->npages] = 0;
2168 migrate->src[migrate->npages++] = 0;
2169 }
2170
2171 return 0;
2172}
2173
2174static int migrate_vma_collect_pmd(pmd_t *pmdp,
2175 unsigned long start,
2176 unsigned long end,
2177 struct mm_walk *walk)
2178{
2179 struct migrate_vma *migrate = walk->private;
2180 struct vm_area_struct *vma = walk->vma;
2181 struct mm_struct *mm = vma->vm_mm;
2182 unsigned long addr = start, unmapped = 0;
2183 spinlock_t *ptl;
2184 pte_t *ptep;
2185
2186again:
2187 if (pmd_none(*pmdp))
2188 return migrate_vma_collect_hole(start, end, walk);
2189
2190 if (pmd_trans_huge(*pmdp)) {
2191 struct page *page;
2192
2193 ptl = pmd_lock(mm, pmdp);
2194 if (unlikely(!pmd_trans_huge(*pmdp))) {
2195 spin_unlock(ptl);
2196 goto again;
2197 }
2198
2199 page = pmd_page(*pmdp);
2200 if (is_huge_zero_page(page)) {
2201 spin_unlock(ptl);
2202 split_huge_pmd(vma, pmdp, addr);
2203 if (pmd_trans_unstable(pmdp))
2204 return migrate_vma_collect_skip(start, end,
2205 walk);
2206 } else {
2207 int ret;
2208
2209 get_page(page);
2210 spin_unlock(ptl);
2211 if (unlikely(!trylock_page(page)))
2212 return migrate_vma_collect_skip(start, end,
2213 walk);
2214 ret = split_huge_page(page);
2215 unlock_page(page);
2216 put_page(page);
2217 if (ret)
2218 return migrate_vma_collect_skip(start, end,
2219 walk);
2220 if (pmd_none(*pmdp))
2221 return migrate_vma_collect_hole(start, end,
2222 walk);
2223 }
2224 }
2225
2226 if (unlikely(pmd_bad(*pmdp)))
2227 return migrate_vma_collect_skip(start, end, walk);
2228
2229 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2230 arch_enter_lazy_mmu_mode();
2231
2232 for (; addr < end; addr += PAGE_SIZE, ptep++) {
2233 unsigned long mpfn, pfn;
2234 struct page *page;
2235 swp_entry_t entry;
2236 pte_t pte;
2237
2238 pte = *ptep;
2239 pfn = pte_pfn(pte);
2240
2241 if (pte_none(pte)) {
2242 mpfn = MIGRATE_PFN_MIGRATE;
2243 migrate->cpages++;
2244 pfn = 0;
2245 goto next;
2246 }
2247
2248 if (!pte_present(pte)) {
2249 mpfn = pfn = 0;
2250
2251
2252
2253
2254
2255
2256 entry = pte_to_swp_entry(pte);
2257 if (!is_device_private_entry(entry))
2258 goto next;
2259
2260 page = device_private_entry_to_page(entry);
2261 mpfn = migrate_pfn(page_to_pfn(page))|
2262 MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
2263 if (is_write_device_private_entry(entry))
2264 mpfn |= MIGRATE_PFN_WRITE;
2265 } else {
2266 if (is_zero_pfn(pfn)) {
2267 mpfn = MIGRATE_PFN_MIGRATE;
2268 migrate->cpages++;
2269 pfn = 0;
2270 goto next;
2271 }
2272 page = _vm_normal_page(migrate->vma, addr, pte, true);
2273 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2274 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2275 }
2276
2277
2278 if (!page || !page->mapping || PageTransCompound(page)) {
2279 mpfn = pfn = 0;
2280 goto next;
2281 }
2282 pfn = page_to_pfn(page);
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293 get_page(page);
2294 migrate->cpages++;
2295
2296
2297
2298
2299
2300
2301 if (trylock_page(page)) {
2302 pte_t swp_pte;
2303
2304 mpfn |= MIGRATE_PFN_LOCKED;
2305 ptep_get_and_clear(mm, addr, ptep);
2306
2307
2308 entry = make_migration_entry(page, pte_write(pte));
2309 swp_pte = swp_entry_to_pte(entry);
2310 if (pte_soft_dirty(pte))
2311 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2312 set_pte_at(mm, addr, ptep, swp_pte);
2313
2314
2315
2316
2317
2318
2319 page_remove_rmap(page, false);
2320 put_page(page);
2321
2322 if (pte_present(pte))
2323 unmapped++;
2324 }
2325
2326next:
2327 migrate->dst[migrate->npages] = 0;
2328 migrate->src[migrate->npages++] = mpfn;
2329 }
2330 arch_leave_lazy_mmu_mode();
2331 pte_unmap_unlock(ptep - 1, ptl);
2332
2333
2334 if (unmapped)
2335 flush_tlb_range(walk->vma, start, end);
2336
2337 return 0;
2338}
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348static void migrate_vma_collect(struct migrate_vma *migrate)
2349{
2350 struct mm_walk mm_walk;
2351
2352 mm_walk.pmd_entry = migrate_vma_collect_pmd;
2353 mm_walk.pte_entry = NULL;
2354 mm_walk.pte_hole = migrate_vma_collect_hole;
2355 mm_walk.hugetlb_entry = NULL;
2356 mm_walk.test_walk = NULL;
2357 mm_walk.vma = migrate->vma;
2358 mm_walk.mm = migrate->vma->vm_mm;
2359 mm_walk.private = migrate;
2360
2361 mmu_notifier_invalidate_range_start(mm_walk.mm,
2362 migrate->start,
2363 migrate->end);
2364 walk_page_range(migrate->start, migrate->end, &mm_walk);
2365 mmu_notifier_invalidate_range_end(mm_walk.mm,
2366 migrate->start,
2367 migrate->end);
2368
2369 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
2370}
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380static bool migrate_vma_check_page(struct page *page)
2381{
2382
2383
2384
2385
2386
2387 int extra = 1;
2388
2389
2390
2391
2392
2393
2394 if (PageCompound(page))
2395 return false;
2396
2397
2398 if (is_zone_device_page(page)) {
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412 if (is_device_private_page(page))
2413 return true;
2414
2415
2416
2417
2418
2419 if (!is_device_public_page(page))
2420 return false;
2421 extra++;
2422 }
2423
2424
2425 if (page_mapping(page))
2426 extra += 1 + page_has_private(page);
2427
2428 if ((page_count(page) - extra) > page_mapcount(page))
2429 return false;
2430
2431 return true;
2432}
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443static void migrate_vma_prepare(struct migrate_vma *migrate)
2444{
2445 const unsigned long npages = migrate->npages;
2446 const unsigned long start = migrate->start;
2447 unsigned long addr, i, restore = 0;
2448 bool allow_drain = true;
2449
2450 lru_add_drain();
2451
2452 for (i = 0; (i < npages) && migrate->cpages; i++) {
2453 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2454 bool remap = true;
2455
2456 if (!page)
2457 continue;
2458
2459 if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
2460
2461
2462
2463
2464
2465
2466
2467
2468 if (!trylock_page(page)) {
2469 migrate->src[i] = 0;
2470 migrate->cpages--;
2471 put_page(page);
2472 continue;
2473 }
2474 remap = false;
2475 migrate->src[i] |= MIGRATE_PFN_LOCKED;
2476 }
2477
2478
2479 if (!is_zone_device_page(page)) {
2480 if (!PageLRU(page) && allow_drain) {
2481
2482 lru_add_drain_all();
2483 allow_drain = false;
2484 }
2485
2486 if (isolate_lru_page(page)) {
2487 if (remap) {
2488 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2489 migrate->cpages--;
2490 restore++;
2491 } else {
2492 migrate->src[i] = 0;
2493 unlock_page(page);
2494 migrate->cpages--;
2495 put_page(page);
2496 }
2497 continue;
2498 }
2499
2500
2501 put_page(page);
2502 }
2503
2504 if (!migrate_vma_check_page(page)) {
2505 if (remap) {
2506 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2507 migrate->cpages--;
2508 restore++;
2509
2510 if (!is_zone_device_page(page)) {
2511 get_page(page);
2512 putback_lru_page(page);
2513 }
2514 } else {
2515 migrate->src[i] = 0;
2516 unlock_page(page);
2517 migrate->cpages--;
2518
2519 if (!is_zone_device_page(page))
2520 putback_lru_page(page);
2521 else
2522 put_page(page);
2523 }
2524 }
2525 }
2526
2527 for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
2528 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2529
2530 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2531 continue;
2532
2533 remove_migration_pte(page, migrate->vma, addr, page);
2534
2535 migrate->src[i] = 0;
2536 unlock_page(page);
2537 put_page(page);
2538 restore--;
2539 }
2540}
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553static void migrate_vma_unmap(struct migrate_vma *migrate)
2554{
2555 int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
2556 const unsigned long npages = migrate->npages;
2557 const unsigned long start = migrate->start;
2558 unsigned long addr, i, restore = 0;
2559
2560 for (i = 0; i < npages; i++) {
2561 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2562
2563 if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2564 continue;
2565
2566 if (page_mapped(page)) {
2567 try_to_unmap(page, flags);
2568 if (page_mapped(page))
2569 goto restore;
2570 }
2571
2572 if (migrate_vma_check_page(page))
2573 continue;
2574
2575restore:
2576 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2577 migrate->cpages--;
2578 restore++;
2579 }
2580
2581 for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
2582 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2583
2584 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2585 continue;
2586
2587 remove_migration_ptes(page, page, false);
2588
2589 migrate->src[i] = 0;
2590 unlock_page(page);
2591 restore--;
2592
2593 if (is_zone_device_page(page))
2594 put_page(page);
2595 else
2596 putback_lru_page(page);
2597 }
2598}
2599
2600static void migrate_vma_insert_page(struct migrate_vma *migrate,
2601 unsigned long addr,
2602 struct page *page,
2603 unsigned long *src,
2604 unsigned long *dst)
2605{
2606 struct vm_area_struct *vma = migrate->vma;
2607 struct mm_struct *mm = vma->vm_mm;
2608 struct mem_cgroup *memcg;
2609 bool flush = false;
2610 spinlock_t *ptl;
2611 pte_t entry;
2612 pgd_t *pgdp;
2613 p4d_t *p4dp;
2614 pud_t *pudp;
2615 pmd_t *pmdp;
2616 pte_t *ptep;
2617
2618
2619 if (!vma_is_anonymous(vma))
2620 goto abort;
2621
2622 pgdp = pgd_offset(mm, addr);
2623 p4dp = p4d_alloc(mm, pgdp, addr);
2624 if (!p4dp)
2625 goto abort;
2626 pudp = pud_alloc(mm, p4dp, addr);
2627 if (!pudp)
2628 goto abort;
2629 pmdp = pmd_alloc(mm, pudp, addr);
2630 if (!pmdp)
2631 goto abort;
2632
2633 if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
2634 goto abort;
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646 if (pte_alloc(mm, pmdp, addr))
2647 goto abort;
2648
2649
2650 if (unlikely(pmd_trans_unstable(pmdp)))
2651 goto abort;
2652
2653 if (unlikely(anon_vma_prepare(vma)))
2654 goto abort;
2655 if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
2656 goto abort;
2657
2658
2659
2660
2661
2662
2663 __SetPageUptodate(page);
2664
2665 if (is_zone_device_page(page)) {
2666 if (is_device_private_page(page)) {
2667 swp_entry_t swp_entry;
2668
2669 swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
2670 entry = swp_entry_to_pte(swp_entry);
2671 } else if (is_device_public_page(page)) {
2672 entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
2673 if (vma->vm_flags & VM_WRITE)
2674 entry = pte_mkwrite(pte_mkdirty(entry));
2675 entry = pte_mkdevmap(entry);
2676 }
2677 } else {
2678 entry = mk_pte(page, vma->vm_page_prot);
2679 if (vma->vm_flags & VM_WRITE)
2680 entry = pte_mkwrite(pte_mkdirty(entry));
2681 }
2682
2683 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2684
2685 if (pte_present(*ptep)) {
2686 unsigned long pfn = pte_pfn(*ptep);
2687
2688 if (!is_zero_pfn(pfn)) {
2689 pte_unmap_unlock(ptep, ptl);
2690 mem_cgroup_cancel_charge(page, memcg, false);
2691 goto abort;
2692 }
2693 flush = true;
2694 } else if (!pte_none(*ptep)) {
2695 pte_unmap_unlock(ptep, ptl);
2696 mem_cgroup_cancel_charge(page, memcg, false);
2697 goto abort;
2698 }
2699
2700
2701
2702
2703
2704 if (userfaultfd_missing(vma)) {
2705 pte_unmap_unlock(ptep, ptl);
2706 mem_cgroup_cancel_charge(page, memcg, false);
2707 goto abort;
2708 }
2709
2710 inc_mm_counter(mm, MM_ANONPAGES);
2711 page_add_new_anon_rmap(page, vma, addr, false);
2712 mem_cgroup_commit_charge(page, memcg, false, false);
2713 if (!is_zone_device_page(page))
2714 lru_cache_add_active_or_unevictable(page, vma);
2715 get_page(page);
2716
2717 if (flush) {
2718 flush_cache_page(vma, addr, pte_pfn(*ptep));
2719 ptep_clear_flush_notify(vma, addr, ptep);
2720 set_pte_at_notify(mm, addr, ptep, entry);
2721 update_mmu_cache(vma, addr, ptep);
2722 } else {
2723
2724 set_pte_at(mm, addr, ptep, entry);
2725 update_mmu_cache(vma, addr, ptep);
2726 }
2727
2728 pte_unmap_unlock(ptep, ptl);
2729 *src = MIGRATE_PFN_MIGRATE;
2730 return;
2731
2732abort:
2733 *src &= ~MIGRATE_PFN_MIGRATE;
2734}
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744static void migrate_vma_pages(struct migrate_vma *migrate)
2745{
2746 const unsigned long npages = migrate->npages;
2747 const unsigned long start = migrate->start;
2748 struct vm_area_struct *vma = migrate->vma;
2749 struct mm_struct *mm = vma->vm_mm;
2750 unsigned long addr, i, mmu_start;
2751 bool notified = false;
2752
2753 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
2754 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2755 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2756 struct address_space *mapping;
2757 int r;
2758
2759 if (!newpage) {
2760 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2761 continue;
2762 }
2763
2764 if (!page) {
2765 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) {
2766 continue;
2767 }
2768 if (!notified) {
2769 mmu_start = addr;
2770 notified = true;
2771 mmu_notifier_invalidate_range_start(mm,
2772 mmu_start,
2773 migrate->end);
2774 }
2775 migrate_vma_insert_page(migrate, addr, newpage,
2776 &migrate->src[i],
2777 &migrate->dst[i]);
2778 continue;
2779 }
2780
2781 mapping = page_mapping(page);
2782
2783 if (is_zone_device_page(newpage)) {
2784 if (is_device_private_page(newpage)) {
2785
2786
2787
2788
2789 if (mapping) {
2790 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2791 continue;
2792 }
2793 } else if (!is_device_public_page(newpage)) {
2794
2795
2796
2797
2798 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2799 continue;
2800 }
2801 }
2802
2803 r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
2804 if (r != MIGRATEPAGE_SUCCESS)
2805 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2806 }
2807
2808 if (notified)
2809 mmu_notifier_invalidate_range_end(mm, mmu_start,
2810 migrate->end);
2811}
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824static void migrate_vma_finalize(struct migrate_vma *migrate)
2825{
2826 const unsigned long npages = migrate->npages;
2827 unsigned long i;
2828
2829 for (i = 0; i < npages; i++) {
2830 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2831 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2832
2833 if (!page) {
2834 if (newpage) {
2835 unlock_page(newpage);
2836 put_page(newpage);
2837 }
2838 continue;
2839 }
2840
2841 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
2842 if (newpage) {
2843 unlock_page(newpage);
2844 put_page(newpage);
2845 }
2846 newpage = page;
2847 }
2848
2849 remove_migration_ptes(page, newpage, false);
2850 unlock_page(page);
2851 migrate->cpages--;
2852
2853 if (is_zone_device_page(page))
2854 put_page(page);
2855 else
2856 putback_lru_page(page);
2857
2858 if (newpage != page) {
2859 unlock_page(newpage);
2860 if (is_zone_device_page(newpage))
2861 put_page(newpage);
2862 else
2863 putback_lru_page(newpage);
2864 }
2865 }
2866}
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919int migrate_vma(const struct migrate_vma_ops *ops,
2920 struct vm_area_struct *vma,
2921 unsigned long start,
2922 unsigned long end,
2923 unsigned long *src,
2924 unsigned long *dst,
2925 void *private)
2926{
2927 struct migrate_vma migrate;
2928
2929
2930 start &= PAGE_MASK;
2931 end &= PAGE_MASK;
2932 if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL))
2933 return -EINVAL;
2934 if (start < vma->vm_start || start >= vma->vm_end)
2935 return -EINVAL;
2936 if (end <= vma->vm_start || end > vma->vm_end)
2937 return -EINVAL;
2938 if (!ops || !src || !dst || start >= end)
2939 return -EINVAL;
2940
2941 memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
2942 migrate.src = src;
2943 migrate.dst = dst;
2944 migrate.start = start;
2945 migrate.npages = 0;
2946 migrate.cpages = 0;
2947 migrate.end = end;
2948 migrate.vma = vma;
2949
2950
2951 migrate_vma_collect(&migrate);
2952 if (!migrate.cpages)
2953 return 0;
2954
2955
2956 migrate_vma_prepare(&migrate);
2957 if (!migrate.cpages)
2958 return 0;
2959
2960
2961 migrate_vma_unmap(&migrate);
2962 if (!migrate.cpages)
2963 return 0;
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973 ops->alloc_and_copy(vma, src, dst, start, end, private);
2974
2975
2976 migrate_vma_pages(&migrate);
2977
2978 ops->finalize_and_map(vma, src, dst, start, end, private);
2979
2980
2981 migrate_vma_finalize(&migrate);
2982
2983 return 0;
2984}
2985EXPORT_SYMBOL(migrate_vma);
2986#endif
2987