1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/migrate.h>
17#include <linux/export.h>
18#include <linux/swap.h>
19#include <linux/swapops.h>
20#include <linux/pagemap.h>
21#include <linux/buffer_head.h>
22#include <linux/mm_inline.h>
23#include <linux/nsproxy.h>
24#include <linux/pagevec.h>
25#include <linux/ksm.h>
26#include <linux/rmap.h>
27#include <linux/topology.h>
28#include <linux/cpu.h>
29#include <linux/cpuset.h>
30#include <linux/writeback.h>
31#include <linux/mempolicy.h>
32#include <linux/vmalloc.h>
33#include <linux/security.h>
34#include <linux/backing-dev.h>
35#include <linux/compaction.h>
36#include <linux/syscalls.h>
37#include <linux/compat.h>
38#include <linux/hugetlb.h>
39#include <linux/hugetlb_cgroup.h>
40#include <linux/gfp.h>
41#include <linux/pagewalk.h>
42#include <linux/pfn_t.h>
43#include <linux/memremap.h>
44#include <linux/userfaultfd_k.h>
45#include <linux/balloon_compaction.h>
46#include <linux/mmu_notifier.h>
47#include <linux/page_idle.h>
48#include <linux/page_owner.h>
49#include <linux/sched/mm.h>
50#include <linux/ptrace.h>
51#include <linux/oom.h>
52#include <linux/memory.h>
53
54#include <asm/tlbflush.h>
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/migrate.h>
58
59#include "internal.h"
60
61int isolate_movable_page(struct page *page, isolate_mode_t mode)
62{
63 struct address_space *mapping;
64
65
66
67
68
69
70
71
72
73
74 if (unlikely(!get_page_unless_zero(page)))
75 goto out;
76
77
78
79
80
81
82 if (unlikely(!__PageMovable(page)))
83 goto out_putpage;
84
85
86
87
88
89
90
91
92
93
94
95 if (unlikely(!trylock_page(page)))
96 goto out_putpage;
97
98 if (!PageMovable(page) || PageIsolated(page))
99 goto out_no_isolated;
100
101 mapping = page_mapping(page);
102 VM_BUG_ON_PAGE(!mapping, page);
103
104 if (!mapping->a_ops->isolate_page(page, mode))
105 goto out_no_isolated;
106
107
108 WARN_ON_ONCE(PageIsolated(page));
109 __SetPageIsolated(page);
110 unlock_page(page);
111
112 return 0;
113
114out_no_isolated:
115 unlock_page(page);
116out_putpage:
117 put_page(page);
118out:
119 return -EBUSY;
120}
121
122static void putback_movable_page(struct page *page)
123{
124 struct address_space *mapping;
125
126 mapping = page_mapping(page);
127 mapping->a_ops->putback_page(page);
128 __ClearPageIsolated(page);
129}
130
131
132
133
134
135
136
137
138
139void putback_movable_pages(struct list_head *l)
140{
141 struct page *page;
142 struct page *page2;
143
144 list_for_each_entry_safe(page, page2, l, lru) {
145 if (unlikely(PageHuge(page))) {
146 putback_active_hugepage(page);
147 continue;
148 }
149 list_del(&page->lru);
150
151
152
153
154
155 if (unlikely(__PageMovable(page))) {
156 VM_BUG_ON_PAGE(!PageIsolated(page), page);
157 lock_page(page);
158 if (PageMovable(page))
159 putback_movable_page(page);
160 else
161 __ClearPageIsolated(page);
162 unlock_page(page);
163 put_page(page);
164 } else {
165 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
166 page_is_file_lru(page), -thp_nr_pages(page));
167 putback_lru_page(page);
168 }
169 }
170}
171
172
173
174
175static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
176 unsigned long addr, void *old)
177{
178 struct page_vma_mapped_walk pvmw = {
179 .page = old,
180 .vma = vma,
181 .address = addr,
182 .flags = PVMW_SYNC | PVMW_MIGRATION,
183 };
184 struct page *new;
185 pte_t pte;
186 swp_entry_t entry;
187
188 VM_BUG_ON_PAGE(PageTail(page), page);
189 while (page_vma_mapped_walk(&pvmw)) {
190 if (PageKsm(page))
191 new = page;
192 else
193 new = page - pvmw.page->index +
194 linear_page_index(vma, pvmw.address);
195
196#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
197
198 if (!pvmw.pte) {
199 VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page);
200 remove_migration_pmd(&pvmw, new);
201 continue;
202 }
203#endif
204
205 get_page(new);
206 pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot)));
207 if (pte_swp_soft_dirty(*pvmw.pte))
208 pte = pte_mksoft_dirty(pte);
209
210
211
212
213 entry = pte_to_swp_entry(*pvmw.pte);
214 if (is_writable_migration_entry(entry))
215 pte = maybe_mkwrite(pte, vma);
216 else if (pte_swp_uffd_wp(*pvmw.pte))
217 pte = pte_mkuffd_wp(pte);
218
219 if (unlikely(is_device_private_page(new))) {
220 if (pte_write(pte))
221 entry = make_writable_device_private_entry(
222 page_to_pfn(new));
223 else
224 entry = make_readable_device_private_entry(
225 page_to_pfn(new));
226 pte = swp_entry_to_pte(entry);
227 if (pte_swp_soft_dirty(*pvmw.pte))
228 pte = pte_swp_mksoft_dirty(pte);
229 if (pte_swp_uffd_wp(*pvmw.pte))
230 pte = pte_swp_mkuffd_wp(pte);
231 }
232
233#ifdef CONFIG_HUGETLB_PAGE
234 if (PageHuge(new)) {
235 unsigned int shift = huge_page_shift(hstate_vma(vma));
236
237 pte = pte_mkhuge(pte);
238 pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
239 set_huge_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
240 if (PageAnon(new))
241 hugepage_add_anon_rmap(new, vma, pvmw.address);
242 else
243 page_dup_rmap(new, true);
244 } else
245#endif
246 {
247 set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
248
249 if (PageAnon(new))
250 page_add_anon_rmap(new, vma, pvmw.address, false);
251 else
252 page_add_file_rmap(new, false);
253 }
254 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
255 mlock_vma_page(new);
256
257 if (PageTransHuge(page) && PageMlocked(page))
258 clear_page_mlock(page);
259
260
261 update_mmu_cache(vma, pvmw.address, pvmw.pte);
262 }
263
264 return true;
265}
266
267
268
269
270
271void remove_migration_ptes(struct page *old, struct page *new, bool locked)
272{
273 struct rmap_walk_control rwc = {
274 .rmap_one = remove_migration_pte,
275 .arg = old,
276 };
277
278 if (locked)
279 rmap_walk_locked(new, &rwc);
280 else
281 rmap_walk(new, &rwc);
282}
283
284
285
286
287
288
289void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
290 spinlock_t *ptl)
291{
292 pte_t pte;
293 swp_entry_t entry;
294 struct page *page;
295
296 spin_lock(ptl);
297 pte = *ptep;
298 if (!is_swap_pte(pte))
299 goto out;
300
301 entry = pte_to_swp_entry(pte);
302 if (!is_migration_entry(entry))
303 goto out;
304
305 page = pfn_swap_entry_to_page(entry);
306 page = compound_head(page);
307
308
309
310
311
312
313 if (!get_page_unless_zero(page))
314 goto out;
315 pte_unmap_unlock(ptep, ptl);
316 put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
317 return;
318out:
319 pte_unmap_unlock(ptep, ptl);
320}
321
322void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
323 unsigned long address)
324{
325 spinlock_t *ptl = pte_lockptr(mm, pmd);
326 pte_t *ptep = pte_offset_map(pmd, address);
327 __migration_entry_wait(mm, ptep, ptl);
328}
329
330void migration_entry_wait_huge(struct vm_area_struct *vma,
331 struct mm_struct *mm, pte_t *pte)
332{
333 spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
334 __migration_entry_wait(mm, pte, ptl);
335}
336
337#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
338void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
339{
340 spinlock_t *ptl;
341 struct page *page;
342
343 ptl = pmd_lock(mm, pmd);
344 if (!is_pmd_migration_entry(*pmd))
345 goto unlock;
346 page = pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd));
347 if (!get_page_unless_zero(page))
348 goto unlock;
349 spin_unlock(ptl);
350 put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
351 return;
352unlock:
353 spin_unlock(ptl);
354}
355#endif
356
357static int expected_page_refs(struct address_space *mapping, struct page *page)
358{
359 int expected_count = 1;
360
361
362
363
364
365 expected_count += is_device_private_page(page);
366 if (mapping)
367 expected_count += thp_nr_pages(page) + page_has_private(page);
368
369 return expected_count;
370}
371
372
373
374
375
376
377
378
379
380int migrate_page_move_mapping(struct address_space *mapping,
381 struct page *newpage, struct page *page, int extra_count)
382{
383 XA_STATE(xas, &mapping->i_pages, page_index(page));
384 struct zone *oldzone, *newzone;
385 int dirty;
386 int expected_count = expected_page_refs(mapping, page) + extra_count;
387 int nr = thp_nr_pages(page);
388
389 if (!mapping) {
390
391 if (page_count(page) != expected_count)
392 return -EAGAIN;
393
394
395 newpage->index = page->index;
396 newpage->mapping = page->mapping;
397 if (PageSwapBacked(page))
398 __SetPageSwapBacked(newpage);
399
400 return MIGRATEPAGE_SUCCESS;
401 }
402
403 oldzone = page_zone(page);
404 newzone = page_zone(newpage);
405
406 xas_lock_irq(&xas);
407 if (page_count(page) != expected_count || xas_load(&xas) != page) {
408 xas_unlock_irq(&xas);
409 return -EAGAIN;
410 }
411
412 if (!page_ref_freeze(page, expected_count)) {
413 xas_unlock_irq(&xas);
414 return -EAGAIN;
415 }
416
417
418
419
420
421 newpage->index = page->index;
422 newpage->mapping = page->mapping;
423 page_ref_add(newpage, nr);
424 if (PageSwapBacked(page)) {
425 __SetPageSwapBacked(newpage);
426 if (PageSwapCache(page)) {
427 SetPageSwapCache(newpage);
428 set_page_private(newpage, page_private(page));
429 }
430 } else {
431 VM_BUG_ON_PAGE(PageSwapCache(page), page);
432 }
433
434
435 dirty = PageDirty(page);
436 if (dirty) {
437 ClearPageDirty(page);
438 SetPageDirty(newpage);
439 }
440
441 xas_store(&xas, newpage);
442 if (PageTransHuge(page)) {
443 int i;
444
445 for (i = 1; i < nr; i++) {
446 xas_next(&xas);
447 xas_store(&xas, newpage);
448 }
449 }
450
451
452
453
454
455
456 page_ref_unfreeze(page, expected_count - nr);
457
458 xas_unlock(&xas);
459
460
461
462
463
464
465
466
467
468
469
470
471 if (newzone != oldzone) {
472 struct lruvec *old_lruvec, *new_lruvec;
473 struct mem_cgroup *memcg;
474
475 memcg = page_memcg(page);
476 old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
477 new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
478
479 __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
480 __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
481 if (PageSwapBacked(page) && !PageSwapCache(page)) {
482 __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
483 __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
484 }
485#ifdef CONFIG_SWAP
486 if (PageSwapCache(page)) {
487 __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr);
488 __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr);
489 }
490#endif
491 if (dirty && mapping_can_writeback(mapping)) {
492 __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
493 __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
494 __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr);
495 __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr);
496 }
497 }
498 local_irq_enable();
499
500 return MIGRATEPAGE_SUCCESS;
501}
502EXPORT_SYMBOL(migrate_page_move_mapping);
503
504
505
506
507
508int migrate_huge_page_move_mapping(struct address_space *mapping,
509 struct page *newpage, struct page *page)
510{
511 XA_STATE(xas, &mapping->i_pages, page_index(page));
512 int expected_count;
513
514 xas_lock_irq(&xas);
515 expected_count = 2 + page_has_private(page);
516 if (page_count(page) != expected_count || xas_load(&xas) != page) {
517 xas_unlock_irq(&xas);
518 return -EAGAIN;
519 }
520
521 if (!page_ref_freeze(page, expected_count)) {
522 xas_unlock_irq(&xas);
523 return -EAGAIN;
524 }
525
526 newpage->index = page->index;
527 newpage->mapping = page->mapping;
528
529 get_page(newpage);
530
531 xas_store(&xas, newpage);
532
533 page_ref_unfreeze(page, expected_count - 1);
534
535 xas_unlock_irq(&xas);
536
537 return MIGRATEPAGE_SUCCESS;
538}
539
540
541
542
543void migrate_page_states(struct page *newpage, struct page *page)
544{
545 int cpupid;
546
547 if (PageError(page))
548 SetPageError(newpage);
549 if (PageReferenced(page))
550 SetPageReferenced(newpage);
551 if (PageUptodate(page))
552 SetPageUptodate(newpage);
553 if (TestClearPageActive(page)) {
554 VM_BUG_ON_PAGE(PageUnevictable(page), page);
555 SetPageActive(newpage);
556 } else if (TestClearPageUnevictable(page))
557 SetPageUnevictable(newpage);
558 if (PageWorkingset(page))
559 SetPageWorkingset(newpage);
560 if (PageChecked(page))
561 SetPageChecked(newpage);
562 if (PageMappedToDisk(page))
563 SetPageMappedToDisk(newpage);
564
565
566 if (PageDirty(page))
567 SetPageDirty(newpage);
568
569 if (page_is_young(page))
570 set_page_young(newpage);
571 if (page_is_idle(page))
572 set_page_idle(newpage);
573
574
575
576
577
578 cpupid = page_cpupid_xchg_last(page, -1);
579 page_cpupid_xchg_last(newpage, cpupid);
580
581 ksm_migrate_page(newpage, page);
582
583
584
585
586 if (PageSwapCache(page))
587 ClearPageSwapCache(page);
588 ClearPagePrivate(page);
589
590
591 if (!PageHuge(page))
592 set_page_private(page, 0);
593
594
595
596
597
598 if (PageWriteback(newpage))
599 end_page_writeback(newpage);
600
601
602
603
604
605
606 if (PageReadahead(page))
607 SetPageReadahead(newpage);
608
609 copy_page_owner(page, newpage);
610
611 if (!PageHuge(page))
612 mem_cgroup_migrate(page, newpage);
613}
614EXPORT_SYMBOL(migrate_page_states);
615
616void migrate_page_copy(struct page *newpage, struct page *page)
617{
618 if (PageHuge(page) || PageTransHuge(page))
619 copy_huge_page(newpage, page);
620 else
621 copy_highpage(newpage, page);
622
623 migrate_page_states(newpage, page);
624}
625EXPORT_SYMBOL(migrate_page_copy);
626
627
628
629
630
631
632
633
634
635
636
637int migrate_page(struct address_space *mapping,
638 struct page *newpage, struct page *page,
639 enum migrate_mode mode)
640{
641 int rc;
642
643 BUG_ON(PageWriteback(page));
644
645 rc = migrate_page_move_mapping(mapping, newpage, page, 0);
646
647 if (rc != MIGRATEPAGE_SUCCESS)
648 return rc;
649
650 if (mode != MIGRATE_SYNC_NO_COPY)
651 migrate_page_copy(newpage, page);
652 else
653 migrate_page_states(newpage, page);
654 return MIGRATEPAGE_SUCCESS;
655}
656EXPORT_SYMBOL(migrate_page);
657
658#ifdef CONFIG_BLOCK
659
660static bool buffer_migrate_lock_buffers(struct buffer_head *head,
661 enum migrate_mode mode)
662{
663 struct buffer_head *bh = head;
664
665
666 if (mode != MIGRATE_ASYNC) {
667 do {
668 lock_buffer(bh);
669 bh = bh->b_this_page;
670
671 } while (bh != head);
672
673 return true;
674 }
675
676
677 do {
678 if (!trylock_buffer(bh)) {
679
680
681
682
683 struct buffer_head *failed_bh = bh;
684 bh = head;
685 while (bh != failed_bh) {
686 unlock_buffer(bh);
687 bh = bh->b_this_page;
688 }
689 return false;
690 }
691
692 bh = bh->b_this_page;
693 } while (bh != head);
694 return true;
695}
696
697static int __buffer_migrate_page(struct address_space *mapping,
698 struct page *newpage, struct page *page, enum migrate_mode mode,
699 bool check_refs)
700{
701 struct buffer_head *bh, *head;
702 int rc;
703 int expected_count;
704
705 if (!page_has_buffers(page))
706 return migrate_page(mapping, newpage, page, mode);
707
708
709 expected_count = expected_page_refs(mapping, page);
710 if (page_count(page) != expected_count)
711 return -EAGAIN;
712
713 head = page_buffers(page);
714 if (!buffer_migrate_lock_buffers(head, mode))
715 return -EAGAIN;
716
717 if (check_refs) {
718 bool busy;
719 bool invalidated = false;
720
721recheck_buffers:
722 busy = false;
723 spin_lock(&mapping->private_lock);
724 bh = head;
725 do {
726 if (atomic_read(&bh->b_count)) {
727 busy = true;
728 break;
729 }
730 bh = bh->b_this_page;
731 } while (bh != head);
732 if (busy) {
733 if (invalidated) {
734 rc = -EAGAIN;
735 goto unlock_buffers;
736 }
737 spin_unlock(&mapping->private_lock);
738 invalidate_bh_lrus();
739 invalidated = true;
740 goto recheck_buffers;
741 }
742 }
743
744 rc = migrate_page_move_mapping(mapping, newpage, page, 0);
745 if (rc != MIGRATEPAGE_SUCCESS)
746 goto unlock_buffers;
747
748 attach_page_private(newpage, detach_page_private(page));
749
750 bh = head;
751 do {
752 set_bh_page(bh, newpage, bh_offset(bh));
753 bh = bh->b_this_page;
754
755 } while (bh != head);
756
757 if (mode != MIGRATE_SYNC_NO_COPY)
758 migrate_page_copy(newpage, page);
759 else
760 migrate_page_states(newpage, page);
761
762 rc = MIGRATEPAGE_SUCCESS;
763unlock_buffers:
764 if (check_refs)
765 spin_unlock(&mapping->private_lock);
766 bh = head;
767 do {
768 unlock_buffer(bh);
769 bh = bh->b_this_page;
770
771 } while (bh != head);
772
773 return rc;
774}
775
776
777
778
779
780
781int buffer_migrate_page(struct address_space *mapping,
782 struct page *newpage, struct page *page, enum migrate_mode mode)
783{
784 return __buffer_migrate_page(mapping, newpage, page, mode, false);
785}
786EXPORT_SYMBOL(buffer_migrate_page);
787
788
789
790
791
792
793
794int buffer_migrate_page_norefs(struct address_space *mapping,
795 struct page *newpage, struct page *page, enum migrate_mode mode)
796{
797 return __buffer_migrate_page(mapping, newpage, page, mode, true);
798}
799#endif
800
801
802
803
804static int writeout(struct address_space *mapping, struct page *page)
805{
806 struct writeback_control wbc = {
807 .sync_mode = WB_SYNC_NONE,
808 .nr_to_write = 1,
809 .range_start = 0,
810 .range_end = LLONG_MAX,
811 .for_reclaim = 1
812 };
813 int rc;
814
815 if (!mapping->a_ops->writepage)
816
817 return -EINVAL;
818
819 if (!clear_page_dirty_for_io(page))
820
821 return -EAGAIN;
822
823
824
825
826
827
828
829
830
831 remove_migration_ptes(page, page, false);
832
833 rc = mapping->a_ops->writepage(page, &wbc);
834
835 if (rc != AOP_WRITEPAGE_ACTIVATE)
836
837 lock_page(page);
838
839 return (rc < 0) ? -EIO : -EAGAIN;
840}
841
842
843
844
845static int fallback_migrate_page(struct address_space *mapping,
846 struct page *newpage, struct page *page, enum migrate_mode mode)
847{
848 if (PageDirty(page)) {
849
850 switch (mode) {
851 case MIGRATE_SYNC:
852 case MIGRATE_SYNC_NO_COPY:
853 break;
854 default:
855 return -EBUSY;
856 }
857 return writeout(mapping, page);
858 }
859
860
861
862
863
864 if (page_has_private(page) &&
865 !try_to_release_page(page, GFP_KERNEL))
866 return mode == MIGRATE_SYNC ? -EAGAIN : -EBUSY;
867
868 return migrate_page(mapping, newpage, page, mode);
869}
870
871
872
873
874
875
876
877
878
879
880
881
882static int move_to_new_page(struct page *newpage, struct page *page,
883 enum migrate_mode mode)
884{
885 struct address_space *mapping;
886 int rc = -EAGAIN;
887 bool is_lru = !__PageMovable(page);
888
889 VM_BUG_ON_PAGE(!PageLocked(page), page);
890 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
891
892 mapping = page_mapping(page);
893
894 if (likely(is_lru)) {
895 if (!mapping)
896 rc = migrate_page(mapping, newpage, page, mode);
897 else if (mapping->a_ops->migratepage)
898
899
900
901
902
903
904
905 rc = mapping->a_ops->migratepage(mapping, newpage,
906 page, mode);
907 else
908 rc = fallback_migrate_page(mapping, newpage,
909 page, mode);
910 } else {
911
912
913
914
915 VM_BUG_ON_PAGE(!PageIsolated(page), page);
916 if (!PageMovable(page)) {
917 rc = MIGRATEPAGE_SUCCESS;
918 __ClearPageIsolated(page);
919 goto out;
920 }
921
922 rc = mapping->a_ops->migratepage(mapping, newpage,
923 page, mode);
924 WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
925 !PageIsolated(page));
926 }
927
928
929
930
931
932 if (rc == MIGRATEPAGE_SUCCESS) {
933 if (__PageMovable(page)) {
934 VM_BUG_ON_PAGE(!PageIsolated(page), page);
935
936
937
938
939
940 __ClearPageIsolated(page);
941 }
942
943
944
945
946
947
948 if (!PageMappingFlags(page))
949 page->mapping = NULL;
950
951 if (likely(!is_zone_device_page(newpage)))
952 flush_dcache_page(newpage);
953
954 }
955out:
956 return rc;
957}
958
959static int __unmap_and_move(struct page *page, struct page *newpage,
960 int force, enum migrate_mode mode)
961{
962 int rc = -EAGAIN;
963 bool page_was_mapped = false;
964 struct anon_vma *anon_vma = NULL;
965 bool is_lru = !__PageMovable(page);
966
967 if (!trylock_page(page)) {
968 if (!force || mode == MIGRATE_ASYNC)
969 goto out;
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984 if (current->flags & PF_MEMALLOC)
985 goto out;
986
987 lock_page(page);
988 }
989
990 if (PageWriteback(page)) {
991
992
993
994
995
996
997 switch (mode) {
998 case MIGRATE_SYNC:
999 case MIGRATE_SYNC_NO_COPY:
1000 break;
1001 default:
1002 rc = -EBUSY;
1003 goto out_unlock;
1004 }
1005 if (!force)
1006 goto out_unlock;
1007 wait_on_page_writeback(page);
1008 }
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024 if (PageAnon(page) && !PageKsm(page))
1025 anon_vma = page_get_anon_vma(page);
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035 if (unlikely(!trylock_page(newpage)))
1036 goto out_unlock;
1037
1038 if (unlikely(!is_lru)) {
1039 rc = move_to_new_page(newpage, page, mode);
1040 goto out_unlock_both;
1041 }
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055 if (!page->mapping) {
1056 VM_BUG_ON_PAGE(PageAnon(page), page);
1057 if (page_has_private(page)) {
1058 try_to_free_buffers(page);
1059 goto out_unlock_both;
1060 }
1061 } else if (page_mapped(page)) {
1062
1063 VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
1064 page);
1065 try_to_migrate(page, 0);
1066 page_was_mapped = true;
1067 }
1068
1069 if (!page_mapped(page))
1070 rc = move_to_new_page(newpage, page, mode);
1071
1072 if (page_was_mapped)
1073 remove_migration_ptes(page,
1074 rc == MIGRATEPAGE_SUCCESS ? newpage : page, false);
1075
1076out_unlock_both:
1077 unlock_page(newpage);
1078out_unlock:
1079
1080 if (anon_vma)
1081 put_anon_vma(anon_vma);
1082 unlock_page(page);
1083out:
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 if (rc == MIGRATEPAGE_SUCCESS) {
1094 if (unlikely(!is_lru))
1095 put_page(newpage);
1096 else
1097 putback_lru_page(newpage);
1098 }
1099
1100 return rc;
1101}
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145static int node_demotion[MAX_NUMNODES] __read_mostly =
1146 {[0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE};
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157int next_demotion_node(int node)
1158{
1159 int target;
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170 rcu_read_lock();
1171 target = READ_ONCE(node_demotion[node]);
1172 rcu_read_unlock();
1173
1174 return target;
1175}
1176
1177
1178
1179
1180
1181static int unmap_and_move(new_page_t get_new_page,
1182 free_page_t put_new_page,
1183 unsigned long private, struct page *page,
1184 int force, enum migrate_mode mode,
1185 enum migrate_reason reason,
1186 struct list_head *ret)
1187{
1188 int rc = MIGRATEPAGE_SUCCESS;
1189 struct page *newpage = NULL;
1190
1191 if (!thp_migration_supported() && PageTransHuge(page))
1192 return -ENOSYS;
1193
1194 if (page_count(page) == 1) {
1195
1196 ClearPageActive(page);
1197 ClearPageUnevictable(page);
1198 if (unlikely(__PageMovable(page))) {
1199 lock_page(page);
1200 if (!PageMovable(page))
1201 __ClearPageIsolated(page);
1202 unlock_page(page);
1203 }
1204 goto out;
1205 }
1206
1207 newpage = get_new_page(page, private);
1208 if (!newpage)
1209 return -ENOMEM;
1210
1211 rc = __unmap_and_move(page, newpage, force, mode);
1212 if (rc == MIGRATEPAGE_SUCCESS)
1213 set_page_owner_migrate_reason(newpage, reason);
1214
1215out:
1216 if (rc != -EAGAIN) {
1217
1218
1219
1220
1221
1222 list_del(&page->lru);
1223 }
1224
1225
1226
1227
1228
1229
1230 if (rc == MIGRATEPAGE_SUCCESS) {
1231
1232
1233
1234
1235
1236 if (likely(!__PageMovable(page)))
1237 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
1238 page_is_file_lru(page), -thp_nr_pages(page));
1239
1240 if (reason != MR_MEMORY_FAILURE)
1241
1242
1243
1244 put_page(page);
1245 } else {
1246 if (rc != -EAGAIN)
1247 list_add_tail(&page->lru, ret);
1248
1249 if (put_new_page)
1250 put_new_page(newpage, private);
1251 else
1252 put_page(newpage);
1253 }
1254
1255 return rc;
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276static int unmap_and_move_huge_page(new_page_t get_new_page,
1277 free_page_t put_new_page, unsigned long private,
1278 struct page *hpage, int force,
1279 enum migrate_mode mode, int reason,
1280 struct list_head *ret)
1281{
1282 int rc = -EAGAIN;
1283 int page_was_mapped = 0;
1284 struct page *new_hpage;
1285 struct anon_vma *anon_vma = NULL;
1286 struct address_space *mapping = NULL;
1287
1288
1289
1290
1291
1292
1293
1294
1295 if (!hugepage_migration_supported(page_hstate(hpage))) {
1296 list_move_tail(&hpage->lru, ret);
1297 return -ENOSYS;
1298 }
1299
1300 if (page_count(hpage) == 1) {
1301
1302 putback_active_hugepage(hpage);
1303 return MIGRATEPAGE_SUCCESS;
1304 }
1305
1306 new_hpage = get_new_page(hpage, private);
1307 if (!new_hpage)
1308 return -ENOMEM;
1309
1310 if (!trylock_page(hpage)) {
1311 if (!force)
1312 goto out;
1313 switch (mode) {
1314 case MIGRATE_SYNC:
1315 case MIGRATE_SYNC_NO_COPY:
1316 break;
1317 default:
1318 goto out;
1319 }
1320 lock_page(hpage);
1321 }
1322
1323
1324
1325
1326
1327
1328 if (hugetlb_page_subpool(hpage) && !page_mapping(hpage)) {
1329 rc = -EBUSY;
1330 goto out_unlock;
1331 }
1332
1333 if (PageAnon(hpage))
1334 anon_vma = page_get_anon_vma(hpage);
1335
1336 if (unlikely(!trylock_page(new_hpage)))
1337 goto put_anon;
1338
1339 if (page_mapped(hpage)) {
1340 bool mapping_locked = false;
1341 enum ttu_flags ttu = 0;
1342
1343 if (!PageAnon(hpage)) {
1344
1345
1346
1347
1348
1349
1350 mapping = hugetlb_page_mapping_lock_write(hpage);
1351 if (unlikely(!mapping))
1352 goto unlock_put_anon;
1353
1354 mapping_locked = true;
1355 ttu |= TTU_RMAP_LOCKED;
1356 }
1357
1358 try_to_migrate(hpage, ttu);
1359 page_was_mapped = 1;
1360
1361 if (mapping_locked)
1362 i_mmap_unlock_write(mapping);
1363 }
1364
1365 if (!page_mapped(hpage))
1366 rc = move_to_new_page(new_hpage, hpage, mode);
1367
1368 if (page_was_mapped)
1369 remove_migration_ptes(hpage,
1370 rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false);
1371
1372unlock_put_anon:
1373 unlock_page(new_hpage);
1374
1375put_anon:
1376 if (anon_vma)
1377 put_anon_vma(anon_vma);
1378
1379 if (rc == MIGRATEPAGE_SUCCESS) {
1380 move_hugetlb_state(hpage, new_hpage, reason);
1381 put_new_page = NULL;
1382 }
1383
1384out_unlock:
1385 unlock_page(hpage);
1386out:
1387 if (rc == MIGRATEPAGE_SUCCESS)
1388 putback_active_hugepage(hpage);
1389 else if (rc != -EAGAIN)
1390 list_move_tail(&hpage->lru, ret);
1391
1392
1393
1394
1395
1396
1397 if (put_new_page)
1398 put_new_page(new_hpage, private);
1399 else
1400 putback_active_hugepage(new_hpage);
1401
1402 return rc;
1403}
1404
1405static inline int try_split_thp(struct page *page, struct page **page2,
1406 struct list_head *from)
1407{
1408 int rc = 0;
1409
1410 lock_page(page);
1411 rc = split_huge_page_to_list(page, from);
1412 unlock_page(page);
1413 if (!rc)
1414 list_safe_reset_next(page, *page2, lru);
1415
1416 return rc;
1417}
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442int migrate_pages(struct list_head *from, new_page_t get_new_page,
1443 free_page_t put_new_page, unsigned long private,
1444 enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
1445{
1446 int retry = 1;
1447 int thp_retry = 1;
1448 int nr_failed = 0;
1449 int nr_succeeded = 0;
1450 int nr_thp_succeeded = 0;
1451 int nr_thp_failed = 0;
1452 int nr_thp_split = 0;
1453 int pass = 0;
1454 bool is_thp = false;
1455 struct page *page;
1456 struct page *page2;
1457 int swapwrite = current->flags & PF_SWAPWRITE;
1458 int rc, nr_subpages;
1459 LIST_HEAD(ret_pages);
1460 bool nosplit = (reason == MR_NUMA_MISPLACED);
1461
1462 trace_mm_migrate_pages_start(mode, reason);
1463
1464 if (!swapwrite)
1465 current->flags |= PF_SWAPWRITE;
1466
1467 for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
1468 retry = 0;
1469 thp_retry = 0;
1470
1471 list_for_each_entry_safe(page, page2, from, lru) {
1472retry:
1473
1474
1475
1476
1477
1478 is_thp = PageTransHuge(page) && !PageHuge(page);
1479 nr_subpages = thp_nr_pages(page);
1480 cond_resched();
1481
1482 if (PageHuge(page))
1483 rc = unmap_and_move_huge_page(get_new_page,
1484 put_new_page, private, page,
1485 pass > 2, mode, reason,
1486 &ret_pages);
1487 else
1488 rc = unmap_and_move(get_new_page, put_new_page,
1489 private, page, pass > 2, mode,
1490 reason, &ret_pages);
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 switch(rc) {
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 case -ENOSYS:
1513
1514 if (is_thp) {
1515 if (!try_split_thp(page, &page2, from)) {
1516 nr_thp_split++;
1517 goto retry;
1518 }
1519
1520 nr_thp_failed++;
1521 nr_failed += nr_subpages;
1522 break;
1523 }
1524
1525
1526 nr_failed++;
1527 break;
1528 case -ENOMEM:
1529
1530
1531
1532
1533
1534 if (is_thp && !nosplit) {
1535 if (!try_split_thp(page, &page2, from)) {
1536 nr_thp_split++;
1537 goto retry;
1538 }
1539
1540 nr_thp_failed++;
1541 nr_failed += nr_subpages;
1542 goto out;
1543 }
1544 nr_failed++;
1545 goto out;
1546 case -EAGAIN:
1547 if (is_thp) {
1548 thp_retry++;
1549 break;
1550 }
1551 retry++;
1552 break;
1553 case MIGRATEPAGE_SUCCESS:
1554 if (is_thp) {
1555 nr_thp_succeeded++;
1556 nr_succeeded += nr_subpages;
1557 break;
1558 }
1559 nr_succeeded++;
1560 break;
1561 default:
1562
1563
1564
1565
1566
1567
1568 if (is_thp) {
1569 nr_thp_failed++;
1570 nr_failed += nr_subpages;
1571 break;
1572 }
1573 nr_failed++;
1574 break;
1575 }
1576 }
1577 }
1578 nr_failed += retry + thp_retry;
1579 nr_thp_failed += thp_retry;
1580 rc = nr_failed;
1581out:
1582
1583
1584
1585
1586 list_splice(&ret_pages, from);
1587
1588 count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
1589 count_vm_events(PGMIGRATE_FAIL, nr_failed);
1590 count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
1591 count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed);
1592 count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split);
1593 trace_mm_migrate_pages(nr_succeeded, nr_failed, nr_thp_succeeded,
1594 nr_thp_failed, nr_thp_split, mode, reason);
1595
1596 if (!swapwrite)
1597 current->flags &= ~PF_SWAPWRITE;
1598
1599 if (ret_succeeded)
1600 *ret_succeeded = nr_succeeded;
1601
1602 return rc;
1603}
1604
1605struct page *alloc_migration_target(struct page *page, unsigned long private)
1606{
1607 struct migration_target_control *mtc;
1608 gfp_t gfp_mask;
1609 unsigned int order = 0;
1610 struct page *new_page = NULL;
1611 int nid;
1612 int zidx;
1613
1614 mtc = (struct migration_target_control *)private;
1615 gfp_mask = mtc->gfp_mask;
1616 nid = mtc->nid;
1617 if (nid == NUMA_NO_NODE)
1618 nid = page_to_nid(page);
1619
1620 if (PageHuge(page)) {
1621 struct hstate *h = page_hstate(compound_head(page));
1622
1623 gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
1624 return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
1625 }
1626
1627 if (PageTransHuge(page)) {
1628
1629
1630
1631
1632 gfp_mask &= ~__GFP_RECLAIM;
1633 gfp_mask |= GFP_TRANSHUGE;
1634 order = HPAGE_PMD_ORDER;
1635 }
1636 zidx = zone_idx(page_zone(page));
1637 if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
1638 gfp_mask |= __GFP_HIGHMEM;
1639
1640 new_page = __alloc_pages(gfp_mask, order, nid, mtc->nmask);
1641
1642 if (new_page && PageTransHuge(new_page))
1643 prep_transhuge_page(new_page);
1644
1645 return new_page;
1646}
1647
1648#ifdef CONFIG_NUMA
1649
1650static int store_status(int __user *status, int start, int value, int nr)
1651{
1652 while (nr-- > 0) {
1653 if (put_user(value, status + start))
1654 return -EFAULT;
1655 start++;
1656 }
1657
1658 return 0;
1659}
1660
1661static int do_move_pages_to_node(struct mm_struct *mm,
1662 struct list_head *pagelist, int node)
1663{
1664 int err;
1665 struct migration_target_control mtc = {
1666 .nid = node,
1667 .gfp_mask = GFP_HIGHUSER_MOVABLE | __GFP_THISNODE,
1668 };
1669
1670 err = migrate_pages(pagelist, alloc_migration_target, NULL,
1671 (unsigned long)&mtc, MIGRATE_SYNC, MR_SYSCALL, NULL);
1672 if (err)
1673 putback_movable_pages(pagelist);
1674 return err;
1675}
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
1687 int node, struct list_head *pagelist, bool migrate_all)
1688{
1689 struct vm_area_struct *vma;
1690 struct page *page;
1691 unsigned int follflags;
1692 int err;
1693
1694 mmap_read_lock(mm);
1695 err = -EFAULT;
1696 vma = find_vma(mm, addr);
1697 if (!vma || addr < vma->vm_start || !vma_migratable(vma))
1698 goto out;
1699
1700
1701 follflags = FOLL_GET | FOLL_DUMP;
1702 page = follow_page(vma, addr, follflags);
1703
1704 err = PTR_ERR(page);
1705 if (IS_ERR(page))
1706 goto out;
1707
1708 err = -ENOENT;
1709 if (!page)
1710 goto out;
1711
1712 err = 0;
1713 if (page_to_nid(page) == node)
1714 goto out_putpage;
1715
1716 err = -EACCES;
1717 if (page_mapcount(page) > 1 && !migrate_all)
1718 goto out_putpage;
1719
1720 if (PageHuge(page)) {
1721 if (PageHead(page)) {
1722 isolate_huge_page(page, pagelist);
1723 err = 1;
1724 }
1725 } else {
1726 struct page *head;
1727
1728 head = compound_head(page);
1729 err = isolate_lru_page(head);
1730 if (err)
1731 goto out_putpage;
1732
1733 err = 1;
1734 list_add_tail(&head->lru, pagelist);
1735 mod_node_page_state(page_pgdat(head),
1736 NR_ISOLATED_ANON + page_is_file_lru(head),
1737 thp_nr_pages(head));
1738 }
1739out_putpage:
1740
1741
1742
1743
1744
1745 put_page(page);
1746out:
1747 mmap_read_unlock(mm);
1748 return err;
1749}
1750
1751static int move_pages_and_store_status(struct mm_struct *mm, int node,
1752 struct list_head *pagelist, int __user *status,
1753 int start, int i, unsigned long nr_pages)
1754{
1755 int err;
1756
1757 if (list_empty(pagelist))
1758 return 0;
1759
1760 err = do_move_pages_to_node(mm, pagelist, node);
1761 if (err) {
1762
1763
1764
1765
1766
1767
1768
1769
1770 if (err > 0)
1771 err += nr_pages - i - 1;
1772 return err;
1773 }
1774 return store_status(status, start, node, i - start);
1775}
1776
1777
1778
1779
1780
1781static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1782 unsigned long nr_pages,
1783 const void __user * __user *pages,
1784 const int __user *nodes,
1785 int __user *status, int flags)
1786{
1787 int current_node = NUMA_NO_NODE;
1788 LIST_HEAD(pagelist);
1789 int start, i;
1790 int err = 0, err1;
1791
1792 lru_cache_disable();
1793
1794 for (i = start = 0; i < nr_pages; i++) {
1795 const void __user *p;
1796 unsigned long addr;
1797 int node;
1798
1799 err = -EFAULT;
1800 if (get_user(p, pages + i))
1801 goto out_flush;
1802 if (get_user(node, nodes + i))
1803 goto out_flush;
1804 addr = (unsigned long)untagged_addr(p);
1805
1806 err = -ENODEV;
1807 if (node < 0 || node >= MAX_NUMNODES)
1808 goto out_flush;
1809 if (!node_state(node, N_MEMORY))
1810 goto out_flush;
1811
1812 err = -EACCES;
1813 if (!node_isset(node, task_nodes))
1814 goto out_flush;
1815
1816 if (current_node == NUMA_NO_NODE) {
1817 current_node = node;
1818 start = i;
1819 } else if (node != current_node) {
1820 err = move_pages_and_store_status(mm, current_node,
1821 &pagelist, status, start, i, nr_pages);
1822 if (err)
1823 goto out;
1824 start = i;
1825 current_node = node;
1826 }
1827
1828
1829
1830
1831
1832 err = add_page_for_migration(mm, addr, current_node,
1833 &pagelist, flags & MPOL_MF_MOVE_ALL);
1834
1835 if (err > 0) {
1836
1837 continue;
1838 }
1839
1840
1841
1842
1843
1844 err = store_status(status, i, err ? : current_node, 1);
1845 if (err)
1846 goto out_flush;
1847
1848 err = move_pages_and_store_status(mm, current_node, &pagelist,
1849 status, start, i, nr_pages);
1850 if (err)
1851 goto out;
1852 current_node = NUMA_NO_NODE;
1853 }
1854out_flush:
1855
1856 err1 = move_pages_and_store_status(mm, current_node, &pagelist,
1857 status, start, i, nr_pages);
1858 if (err >= 0)
1859 err = err1;
1860out:
1861 lru_cache_enable();
1862 return err;
1863}
1864
1865
1866
1867
1868static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1869 const void __user **pages, int *status)
1870{
1871 unsigned long i;
1872
1873 mmap_read_lock(mm);
1874
1875 for (i = 0; i < nr_pages; i++) {
1876 unsigned long addr = (unsigned long)(*pages);
1877 struct vm_area_struct *vma;
1878 struct page *page;
1879 int err = -EFAULT;
1880
1881 vma = vma_lookup(mm, addr);
1882 if (!vma)
1883 goto set_status;
1884
1885
1886 page = follow_page(vma, addr, FOLL_DUMP);
1887
1888 err = PTR_ERR(page);
1889 if (IS_ERR(page))
1890 goto set_status;
1891
1892 err = page ? page_to_nid(page) : -ENOENT;
1893set_status:
1894 *status = err;
1895
1896 pages++;
1897 status++;
1898 }
1899
1900 mmap_read_unlock(mm);
1901}
1902
1903static int get_compat_pages_array(const void __user *chunk_pages[],
1904 const void __user * __user *pages,
1905 unsigned long chunk_nr)
1906{
1907 compat_uptr_t __user *pages32 = (compat_uptr_t __user *)pages;
1908 compat_uptr_t p;
1909 int i;
1910
1911 for (i = 0; i < chunk_nr; i++) {
1912 if (get_user(p, pages32 + i))
1913 return -EFAULT;
1914 chunk_pages[i] = compat_ptr(p);
1915 }
1916
1917 return 0;
1918}
1919
1920
1921
1922
1923
1924static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1925 const void __user * __user *pages,
1926 int __user *status)
1927{
1928#define DO_PAGES_STAT_CHUNK_NR 16
1929 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1930 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1931
1932 while (nr_pages) {
1933 unsigned long chunk_nr;
1934
1935 chunk_nr = nr_pages;
1936 if (chunk_nr > DO_PAGES_STAT_CHUNK_NR)
1937 chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1938
1939 if (in_compat_syscall()) {
1940 if (get_compat_pages_array(chunk_pages, pages,
1941 chunk_nr))
1942 break;
1943 } else {
1944 if (copy_from_user(chunk_pages, pages,
1945 chunk_nr * sizeof(*chunk_pages)))
1946 break;
1947 }
1948
1949 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1950
1951 if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status)))
1952 break;
1953
1954 pages += chunk_nr;
1955 status += chunk_nr;
1956 nr_pages -= chunk_nr;
1957 }
1958 return nr_pages ? -EFAULT : 0;
1959}
1960
1961static struct mm_struct *find_mm_struct(pid_t pid, nodemask_t *mem_nodes)
1962{
1963 struct task_struct *task;
1964 struct mm_struct *mm;
1965
1966
1967
1968
1969
1970 if (!pid) {
1971 mmget(current->mm);
1972 *mem_nodes = cpuset_mems_allowed(current);
1973 return current->mm;
1974 }
1975
1976
1977 rcu_read_lock();
1978 task = find_task_by_vpid(pid);
1979 if (!task) {
1980 rcu_read_unlock();
1981 return ERR_PTR(-ESRCH);
1982 }
1983 get_task_struct(task);
1984
1985
1986
1987
1988
1989 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
1990 rcu_read_unlock();
1991 mm = ERR_PTR(-EPERM);
1992 goto out;
1993 }
1994 rcu_read_unlock();
1995
1996 mm = ERR_PTR(security_task_movememory(task));
1997 if (IS_ERR(mm))
1998 goto out;
1999 *mem_nodes = cpuset_mems_allowed(task);
2000 mm = get_task_mm(task);
2001out:
2002 put_task_struct(task);
2003 if (!mm)
2004 mm = ERR_PTR(-EINVAL);
2005 return mm;
2006}
2007
2008
2009
2010
2011
2012static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
2013 const void __user * __user *pages,
2014 const int __user *nodes,
2015 int __user *status, int flags)
2016{
2017 struct mm_struct *mm;
2018 int err;
2019 nodemask_t task_nodes;
2020
2021
2022 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
2023 return -EINVAL;
2024
2025 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
2026 return -EPERM;
2027
2028 mm = find_mm_struct(pid, &task_nodes);
2029 if (IS_ERR(mm))
2030 return PTR_ERR(mm);
2031
2032 if (nodes)
2033 err = do_pages_move(mm, task_nodes, nr_pages, pages,
2034 nodes, status, flags);
2035 else
2036 err = do_pages_stat(mm, nr_pages, pages, status);
2037
2038 mmput(mm);
2039 return err;
2040}
2041
2042SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
2043 const void __user * __user *, pages,
2044 const int __user *, nodes,
2045 int __user *, status, int, flags)
2046{
2047 return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
2048}
2049
2050#ifdef CONFIG_NUMA_BALANCING
2051
2052
2053
2054
2055static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
2056 unsigned long nr_migrate_pages)
2057{
2058 int z;
2059
2060 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
2061 struct zone *zone = pgdat->node_zones + z;
2062
2063 if (!populated_zone(zone))
2064 continue;
2065
2066
2067 if (!zone_watermark_ok(zone, 0,
2068 high_wmark_pages(zone) +
2069 nr_migrate_pages,
2070 ZONE_MOVABLE, 0))
2071 continue;
2072 return true;
2073 }
2074 return false;
2075}
2076
2077static struct page *alloc_misplaced_dst_page(struct page *page,
2078 unsigned long data)
2079{
2080 int nid = (int) data;
2081 struct page *newpage;
2082
2083 newpage = __alloc_pages_node(nid,
2084 (GFP_HIGHUSER_MOVABLE |
2085 __GFP_THISNODE | __GFP_NOMEMALLOC |
2086 __GFP_NORETRY | __GFP_NOWARN) &
2087 ~__GFP_RECLAIM, 0);
2088
2089 return newpage;
2090}
2091
2092static struct page *alloc_misplaced_dst_page_thp(struct page *page,
2093 unsigned long data)
2094{
2095 int nid = (int) data;
2096 struct page *newpage;
2097
2098 newpage = alloc_pages_node(nid, (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
2099 HPAGE_PMD_ORDER);
2100 if (!newpage)
2101 goto out;
2102
2103 prep_transhuge_page(newpage);
2104
2105out:
2106 return newpage;
2107}
2108
2109static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
2110{
2111 int page_lru;
2112 int nr_pages = thp_nr_pages(page);
2113
2114 VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
2115
2116
2117 if (PageTransHuge(page) && total_mapcount(page) > 1)
2118 return 0;
2119
2120
2121 if (!migrate_balanced_pgdat(pgdat, nr_pages))
2122 return 0;
2123
2124 if (isolate_lru_page(page))
2125 return 0;
2126
2127 page_lru = page_is_file_lru(page);
2128 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
2129 nr_pages);
2130
2131
2132
2133
2134
2135
2136 put_page(page);
2137 return 1;
2138}
2139
2140
2141
2142
2143
2144
2145int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
2146 int node)
2147{
2148 pg_data_t *pgdat = NODE_DATA(node);
2149 int isolated;
2150 int nr_remaining;
2151 LIST_HEAD(migratepages);
2152 new_page_t *new;
2153 bool compound;
2154 int nr_pages = thp_nr_pages(page);
2155
2156
2157
2158
2159
2160
2161 compound = PageTransHuge(page);
2162
2163 if (compound)
2164 new = alloc_misplaced_dst_page_thp;
2165 else
2166 new = alloc_misplaced_dst_page;
2167
2168
2169
2170
2171
2172 if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
2173 (vma->vm_flags & VM_EXEC))
2174 goto out;
2175
2176
2177
2178
2179
2180 if (page_is_file_lru(page) && PageDirty(page))
2181 goto out;
2182
2183 isolated = numamigrate_isolate_page(pgdat, page);
2184 if (!isolated)
2185 goto out;
2186
2187 list_add(&page->lru, &migratepages);
2188 nr_remaining = migrate_pages(&migratepages, *new, NULL, node,
2189 MIGRATE_ASYNC, MR_NUMA_MISPLACED, NULL);
2190 if (nr_remaining) {
2191 if (!list_empty(&migratepages)) {
2192 list_del(&page->lru);
2193 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
2194 page_is_file_lru(page), -nr_pages);
2195 putback_lru_page(page);
2196 }
2197 isolated = 0;
2198 } else
2199 count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_pages);
2200 BUG_ON(!list_empty(&migratepages));
2201 return isolated;
2202
2203out:
2204 put_page(page);
2205 return 0;
2206}
2207#endif
2208#endif
2209
2210#ifdef CONFIG_DEVICE_PRIVATE
2211static int migrate_vma_collect_skip(unsigned long start,
2212 unsigned long end,
2213 struct mm_walk *walk)
2214{
2215 struct migrate_vma *migrate = walk->private;
2216 unsigned long addr;
2217
2218 for (addr = start; addr < end; addr += PAGE_SIZE) {
2219 migrate->dst[migrate->npages] = 0;
2220 migrate->src[migrate->npages++] = 0;
2221 }
2222
2223 return 0;
2224}
2225
2226static int migrate_vma_collect_hole(unsigned long start,
2227 unsigned long end,
2228 __always_unused int depth,
2229 struct mm_walk *walk)
2230{
2231 struct migrate_vma *migrate = walk->private;
2232 unsigned long addr;
2233
2234
2235 if (!vma_is_anonymous(walk->vma))
2236 return migrate_vma_collect_skip(start, end, walk);
2237
2238 for (addr = start; addr < end; addr += PAGE_SIZE) {
2239 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
2240 migrate->dst[migrate->npages] = 0;
2241 migrate->npages++;
2242 migrate->cpages++;
2243 }
2244
2245 return 0;
2246}
2247
2248static int migrate_vma_collect_pmd(pmd_t *pmdp,
2249 unsigned long start,
2250 unsigned long end,
2251 struct mm_walk *walk)
2252{
2253 struct migrate_vma *migrate = walk->private;
2254 struct vm_area_struct *vma = walk->vma;
2255 struct mm_struct *mm = vma->vm_mm;
2256 unsigned long addr = start, unmapped = 0;
2257 spinlock_t *ptl;
2258 pte_t *ptep;
2259
2260again:
2261 if (pmd_none(*pmdp))
2262 return migrate_vma_collect_hole(start, end, -1, walk);
2263
2264 if (pmd_trans_huge(*pmdp)) {
2265 struct page *page;
2266
2267 ptl = pmd_lock(mm, pmdp);
2268 if (unlikely(!pmd_trans_huge(*pmdp))) {
2269 spin_unlock(ptl);
2270 goto again;
2271 }
2272
2273 page = pmd_page(*pmdp);
2274 if (is_huge_zero_page(page)) {
2275 spin_unlock(ptl);
2276 split_huge_pmd(vma, pmdp, addr);
2277 if (pmd_trans_unstable(pmdp))
2278 return migrate_vma_collect_skip(start, end,
2279 walk);
2280 } else {
2281 int ret;
2282
2283 get_page(page);
2284 spin_unlock(ptl);
2285 if (unlikely(!trylock_page(page)))
2286 return migrate_vma_collect_skip(start, end,
2287 walk);
2288 ret = split_huge_page(page);
2289 unlock_page(page);
2290 put_page(page);
2291 if (ret)
2292 return migrate_vma_collect_skip(start, end,
2293 walk);
2294 if (pmd_none(*pmdp))
2295 return migrate_vma_collect_hole(start, end, -1,
2296 walk);
2297 }
2298 }
2299
2300 if (unlikely(pmd_bad(*pmdp)))
2301 return migrate_vma_collect_skip(start, end, walk);
2302
2303 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2304 arch_enter_lazy_mmu_mode();
2305
2306 for (; addr < end; addr += PAGE_SIZE, ptep++) {
2307 unsigned long mpfn = 0, pfn;
2308 struct page *page;
2309 swp_entry_t entry;
2310 pte_t pte;
2311
2312 pte = *ptep;
2313
2314 if (pte_none(pte)) {
2315 if (vma_is_anonymous(vma)) {
2316 mpfn = MIGRATE_PFN_MIGRATE;
2317 migrate->cpages++;
2318 }
2319 goto next;
2320 }
2321
2322 if (!pte_present(pte)) {
2323
2324
2325
2326
2327
2328 entry = pte_to_swp_entry(pte);
2329 if (!is_device_private_entry(entry))
2330 goto next;
2331
2332 page = pfn_swap_entry_to_page(entry);
2333 if (!(migrate->flags &
2334 MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
2335 page->pgmap->owner != migrate->pgmap_owner)
2336 goto next;
2337
2338 mpfn = migrate_pfn(page_to_pfn(page)) |
2339 MIGRATE_PFN_MIGRATE;
2340 if (is_writable_device_private_entry(entry))
2341 mpfn |= MIGRATE_PFN_WRITE;
2342 } else {
2343 if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
2344 goto next;
2345 pfn = pte_pfn(pte);
2346 if (is_zero_pfn(pfn)) {
2347 mpfn = MIGRATE_PFN_MIGRATE;
2348 migrate->cpages++;
2349 goto next;
2350 }
2351 page = vm_normal_page(migrate->vma, addr, pte);
2352 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
2353 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
2354 }
2355
2356
2357 if (!page || !page->mapping || PageTransCompound(page)) {
2358 mpfn = 0;
2359 goto next;
2360 }
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371 get_page(page);
2372 migrate->cpages++;
2373
2374
2375
2376
2377
2378
2379 if (trylock_page(page)) {
2380 pte_t swp_pte;
2381
2382 mpfn |= MIGRATE_PFN_LOCKED;
2383 ptep_get_and_clear(mm, addr, ptep);
2384
2385
2386 if (mpfn & MIGRATE_PFN_WRITE)
2387 entry = make_writable_migration_entry(
2388 page_to_pfn(page));
2389 else
2390 entry = make_readable_migration_entry(
2391 page_to_pfn(page));
2392 swp_pte = swp_entry_to_pte(entry);
2393 if (pte_present(pte)) {
2394 if (pte_soft_dirty(pte))
2395 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2396 if (pte_uffd_wp(pte))
2397 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2398 } else {
2399 if (pte_swp_soft_dirty(pte))
2400 swp_pte = pte_swp_mksoft_dirty(swp_pte);
2401 if (pte_swp_uffd_wp(pte))
2402 swp_pte = pte_swp_mkuffd_wp(swp_pte);
2403 }
2404 set_pte_at(mm, addr, ptep, swp_pte);
2405
2406
2407
2408
2409
2410
2411 page_remove_rmap(page, false);
2412 put_page(page);
2413
2414 if (pte_present(pte))
2415 unmapped++;
2416 }
2417
2418next:
2419 migrate->dst[migrate->npages] = 0;
2420 migrate->src[migrate->npages++] = mpfn;
2421 }
2422 arch_leave_lazy_mmu_mode();
2423 pte_unmap_unlock(ptep - 1, ptl);
2424
2425
2426 if (unmapped)
2427 flush_tlb_range(walk->vma, start, end);
2428
2429 return 0;
2430}
2431
2432static const struct mm_walk_ops migrate_vma_walk_ops = {
2433 .pmd_entry = migrate_vma_collect_pmd,
2434 .pte_hole = migrate_vma_collect_hole,
2435};
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445static void migrate_vma_collect(struct migrate_vma *migrate)
2446{
2447 struct mmu_notifier_range range;
2448
2449
2450
2451
2452
2453
2454 mmu_notifier_range_init_owner(&range, MMU_NOTIFY_MIGRATE, 0,
2455 migrate->vma, migrate->vma->vm_mm, migrate->start, migrate->end,
2456 migrate->pgmap_owner);
2457 mmu_notifier_invalidate_range_start(&range);
2458
2459 walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
2460 &migrate_vma_walk_ops, migrate);
2461
2462 mmu_notifier_invalidate_range_end(&range);
2463 migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
2464}
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474static bool migrate_vma_check_page(struct page *page)
2475{
2476
2477
2478
2479
2480
2481 int extra = 1;
2482
2483
2484
2485
2486
2487
2488 if (PageCompound(page))
2489 return false;
2490
2491
2492 if (is_zone_device_page(page)) {
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506 return is_device_private_page(page);
2507 }
2508
2509
2510 if (page_mapping(page))
2511 extra += 1 + page_has_private(page);
2512
2513 if ((page_count(page) - extra) > page_mapcount(page))
2514 return false;
2515
2516 return true;
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528static void migrate_vma_prepare(struct migrate_vma *migrate)
2529{
2530 const unsigned long npages = migrate->npages;
2531 const unsigned long start = migrate->start;
2532 unsigned long addr, i, restore = 0;
2533 bool allow_drain = true;
2534
2535 lru_add_drain();
2536
2537 for (i = 0; (i < npages) && migrate->cpages; i++) {
2538 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2539 bool remap = true;
2540
2541 if (!page)
2542 continue;
2543
2544 if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
2545
2546
2547
2548
2549
2550
2551
2552
2553 if (!trylock_page(page)) {
2554 migrate->src[i] = 0;
2555 migrate->cpages--;
2556 put_page(page);
2557 continue;
2558 }
2559 remap = false;
2560 migrate->src[i] |= MIGRATE_PFN_LOCKED;
2561 }
2562
2563
2564 if (!is_zone_device_page(page)) {
2565 if (!PageLRU(page) && allow_drain) {
2566
2567 lru_add_drain_all();
2568 allow_drain = false;
2569 }
2570
2571 if (isolate_lru_page(page)) {
2572 if (remap) {
2573 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2574 migrate->cpages--;
2575 restore++;
2576 } else {
2577 migrate->src[i] = 0;
2578 unlock_page(page);
2579 migrate->cpages--;
2580 put_page(page);
2581 }
2582 continue;
2583 }
2584
2585
2586 put_page(page);
2587 }
2588
2589 if (!migrate_vma_check_page(page)) {
2590 if (remap) {
2591 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2592 migrate->cpages--;
2593 restore++;
2594
2595 if (!is_zone_device_page(page)) {
2596 get_page(page);
2597 putback_lru_page(page);
2598 }
2599 } else {
2600 migrate->src[i] = 0;
2601 unlock_page(page);
2602 migrate->cpages--;
2603
2604 if (!is_zone_device_page(page))
2605 putback_lru_page(page);
2606 else
2607 put_page(page);
2608 }
2609 }
2610 }
2611
2612 for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
2613 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2614
2615 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2616 continue;
2617
2618 remove_migration_pte(page, migrate->vma, addr, page);
2619
2620 migrate->src[i] = 0;
2621 unlock_page(page);
2622 put_page(page);
2623 restore--;
2624 }
2625}
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638static void migrate_vma_unmap(struct migrate_vma *migrate)
2639{
2640 const unsigned long npages = migrate->npages;
2641 const unsigned long start = migrate->start;
2642 unsigned long addr, i, restore = 0;
2643
2644 for (i = 0; i < npages; i++) {
2645 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2646
2647 if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2648 continue;
2649
2650 if (page_mapped(page)) {
2651 try_to_migrate(page, 0);
2652 if (page_mapped(page))
2653 goto restore;
2654 }
2655
2656 if (migrate_vma_check_page(page))
2657 continue;
2658
2659restore:
2660 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2661 migrate->cpages--;
2662 restore++;
2663 }
2664
2665 for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
2666 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2667
2668 if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
2669 continue;
2670
2671 remove_migration_ptes(page, page, false);
2672
2673 migrate->src[i] = 0;
2674 unlock_page(page);
2675 restore--;
2676
2677 if (is_zone_device_page(page))
2678 put_page(page);
2679 else
2680 putback_lru_page(page);
2681 }
2682}
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748int migrate_vma_setup(struct migrate_vma *args)
2749{
2750 long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
2751
2752 args->start &= PAGE_MASK;
2753 args->end &= PAGE_MASK;
2754 if (!args->vma || is_vm_hugetlb_page(args->vma) ||
2755 (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
2756 return -EINVAL;
2757 if (nr_pages <= 0)
2758 return -EINVAL;
2759 if (args->start < args->vma->vm_start ||
2760 args->start >= args->vma->vm_end)
2761 return -EINVAL;
2762 if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
2763 return -EINVAL;
2764 if (!args->src || !args->dst)
2765 return -EINVAL;
2766
2767 memset(args->src, 0, sizeof(*args->src) * nr_pages);
2768 args->cpages = 0;
2769 args->npages = 0;
2770
2771 migrate_vma_collect(args);
2772
2773 if (args->cpages)
2774 migrate_vma_prepare(args);
2775 if (args->cpages)
2776 migrate_vma_unmap(args);
2777
2778
2779
2780
2781
2782
2783 return 0;
2784
2785}
2786EXPORT_SYMBOL(migrate_vma_setup);
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796static void migrate_vma_insert_page(struct migrate_vma *migrate,
2797 unsigned long addr,
2798 struct page *page,
2799 unsigned long *src)
2800{
2801 struct vm_area_struct *vma = migrate->vma;
2802 struct mm_struct *mm = vma->vm_mm;
2803 bool flush = false;
2804 spinlock_t *ptl;
2805 pte_t entry;
2806 pgd_t *pgdp;
2807 p4d_t *p4dp;
2808 pud_t *pudp;
2809 pmd_t *pmdp;
2810 pte_t *ptep;
2811
2812
2813 if (!vma_is_anonymous(vma))
2814 goto abort;
2815
2816 pgdp = pgd_offset(mm, addr);
2817 p4dp = p4d_alloc(mm, pgdp, addr);
2818 if (!p4dp)
2819 goto abort;
2820 pudp = pud_alloc(mm, p4dp, addr);
2821 if (!pudp)
2822 goto abort;
2823 pmdp = pmd_alloc(mm, pudp, addr);
2824 if (!pmdp)
2825 goto abort;
2826
2827 if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
2828 goto abort;
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840 if (pte_alloc(mm, pmdp))
2841 goto abort;
2842
2843
2844 if (unlikely(pmd_trans_unstable(pmdp)))
2845 goto abort;
2846
2847 if (unlikely(anon_vma_prepare(vma)))
2848 goto abort;
2849 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
2850 goto abort;
2851
2852
2853
2854
2855
2856
2857 __SetPageUptodate(page);
2858
2859 if (is_zone_device_page(page)) {
2860 if (is_device_private_page(page)) {
2861 swp_entry_t swp_entry;
2862
2863 if (vma->vm_flags & VM_WRITE)
2864 swp_entry = make_writable_device_private_entry(
2865 page_to_pfn(page));
2866 else
2867 swp_entry = make_readable_device_private_entry(
2868 page_to_pfn(page));
2869 entry = swp_entry_to_pte(swp_entry);
2870 } else {
2871
2872
2873
2874
2875 pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
2876 goto abort;
2877 }
2878 } else {
2879 entry = mk_pte(page, vma->vm_page_prot);
2880 if (vma->vm_flags & VM_WRITE)
2881 entry = pte_mkwrite(pte_mkdirty(entry));
2882 }
2883
2884 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
2885
2886 if (check_stable_address_space(mm))
2887 goto unlock_abort;
2888
2889 if (pte_present(*ptep)) {
2890 unsigned long pfn = pte_pfn(*ptep);
2891
2892 if (!is_zero_pfn(pfn))
2893 goto unlock_abort;
2894 flush = true;
2895 } else if (!pte_none(*ptep))
2896 goto unlock_abort;
2897
2898
2899
2900
2901
2902 if (userfaultfd_missing(vma))
2903 goto unlock_abort;
2904
2905 inc_mm_counter(mm, MM_ANONPAGES);
2906 page_add_new_anon_rmap(page, vma, addr, false);
2907 if (!is_zone_device_page(page))
2908 lru_cache_add_inactive_or_unevictable(page, vma);
2909 get_page(page);
2910
2911 if (flush) {
2912 flush_cache_page(vma, addr, pte_pfn(*ptep));
2913 ptep_clear_flush_notify(vma, addr, ptep);
2914 set_pte_at_notify(mm, addr, ptep, entry);
2915 update_mmu_cache(vma, addr, ptep);
2916 } else {
2917
2918 set_pte_at(mm, addr, ptep, entry);
2919 update_mmu_cache(vma, addr, ptep);
2920 }
2921
2922 pte_unmap_unlock(ptep, ptl);
2923 *src = MIGRATE_PFN_MIGRATE;
2924 return;
2925
2926unlock_abort:
2927 pte_unmap_unlock(ptep, ptl);
2928abort:
2929 *src &= ~MIGRATE_PFN_MIGRATE;
2930}
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940void migrate_vma_pages(struct migrate_vma *migrate)
2941{
2942 const unsigned long npages = migrate->npages;
2943 const unsigned long start = migrate->start;
2944 struct mmu_notifier_range range;
2945 unsigned long addr, i;
2946 bool notified = false;
2947
2948 for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
2949 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
2950 struct page *page = migrate_pfn_to_page(migrate->src[i]);
2951 struct address_space *mapping;
2952 int r;
2953
2954 if (!newpage) {
2955 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2956 continue;
2957 }
2958
2959 if (!page) {
2960 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE))
2961 continue;
2962 if (!notified) {
2963 notified = true;
2964
2965 mmu_notifier_range_init_owner(&range,
2966 MMU_NOTIFY_MIGRATE, 0, migrate->vma,
2967 migrate->vma->vm_mm, addr, migrate->end,
2968 migrate->pgmap_owner);
2969 mmu_notifier_invalidate_range_start(&range);
2970 }
2971 migrate_vma_insert_page(migrate, addr, newpage,
2972 &migrate->src[i]);
2973 continue;
2974 }
2975
2976 mapping = page_mapping(page);
2977
2978 if (is_zone_device_page(newpage)) {
2979 if (is_device_private_page(newpage)) {
2980
2981
2982
2983
2984 if (mapping) {
2985 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2986 continue;
2987 }
2988 } else {
2989
2990
2991
2992
2993 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
2994 continue;
2995 }
2996 }
2997
2998 r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
2999 if (r != MIGRATEPAGE_SUCCESS)
3000 migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
3001 }
3002
3003
3004
3005
3006
3007
3008 if (notified)
3009 mmu_notifier_invalidate_range_only_end(&range);
3010}
3011EXPORT_SYMBOL(migrate_vma_pages);
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024void migrate_vma_finalize(struct migrate_vma *migrate)
3025{
3026 const unsigned long npages = migrate->npages;
3027 unsigned long i;
3028
3029 for (i = 0; i < npages; i++) {
3030 struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
3031 struct page *page = migrate_pfn_to_page(migrate->src[i]);
3032
3033 if (!page) {
3034 if (newpage) {
3035 unlock_page(newpage);
3036 put_page(newpage);
3037 }
3038 continue;
3039 }
3040
3041 if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
3042 if (newpage) {
3043 unlock_page(newpage);
3044 put_page(newpage);
3045 }
3046 newpage = page;
3047 }
3048
3049 remove_migration_ptes(page, newpage, false);
3050 unlock_page(page);
3051
3052 if (is_zone_device_page(page))
3053 put_page(page);
3054 else
3055 putback_lru_page(page);
3056
3057 if (newpage != page) {
3058 unlock_page(newpage);
3059 if (is_zone_device_page(newpage))
3060 put_page(newpage);
3061 else
3062 putback_lru_page(newpage);
3063 }
3064 }
3065}
3066EXPORT_SYMBOL(migrate_vma_finalize);
3067#endif
3068
3069#if defined(CONFIG_HOTPLUG_CPU)
3070
3071static void __disable_all_migrate_targets(void)
3072{
3073 int node;
3074
3075 for_each_online_node(node)
3076 node_demotion[node] = NUMA_NO_NODE;
3077}
3078
3079static void disable_all_migrate_targets(void)
3080{
3081 __disable_all_migrate_targets();
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095 synchronize_rcu();
3096}
3097
3098
3099
3100
3101
3102
3103static int establish_migrate_target(int node, nodemask_t *used)
3104{
3105 int migration_target;
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115 if (node_demotion[node] != NUMA_NO_NODE)
3116 return NUMA_NO_NODE;
3117
3118 migration_target = find_next_best_node(node, used);
3119 if (migration_target == NUMA_NO_NODE)
3120 return NUMA_NO_NODE;
3121
3122 node_demotion[node] = migration_target;
3123
3124 return migration_target;
3125}
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145static void __set_migration_target_nodes(void)
3146{
3147 nodemask_t next_pass = NODE_MASK_NONE;
3148 nodemask_t this_pass = NODE_MASK_NONE;
3149 nodemask_t used_targets = NODE_MASK_NONE;
3150 int node;
3151
3152
3153
3154
3155
3156
3157 disable_all_migrate_targets();
3158
3159
3160
3161
3162
3163 next_pass = node_states[N_CPU];
3164again:
3165 this_pass = next_pass;
3166 next_pass = NODE_MASK_NONE;
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178 nodes_or(used_targets, used_targets, this_pass);
3179 for_each_node_mask(node, this_pass) {
3180 int target_node = establish_migrate_target(node, &used_targets);
3181
3182 if (target_node == NUMA_NO_NODE)
3183 continue;
3184
3185
3186
3187
3188
3189
3190 node_set(target_node, next_pass);
3191 }
3192
3193
3194
3195
3196
3197 if (!nodes_empty(next_pass))
3198 goto again;
3199}
3200
3201
3202
3203
3204static void set_migration_target_nodes(void)
3205{
3206 get_online_mems();
3207 __set_migration_target_nodes();
3208 put_online_mems();
3209}
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
3223 unsigned long action, void *_arg)
3224{
3225 struct memory_notify *arg = _arg;
3226
3227
3228
3229
3230
3231
3232 if (arg->status_change_nid < 0)
3233 return notifier_from_errno(0);
3234
3235 switch (action) {
3236 case MEM_GOING_OFFLINE:
3237
3238
3239
3240
3241
3242
3243 disable_all_migrate_targets();
3244 break;
3245 case MEM_OFFLINE:
3246 case MEM_ONLINE:
3247
3248
3249
3250
3251 __set_migration_target_nodes();
3252 break;
3253 case MEM_CANCEL_OFFLINE:
3254
3255
3256
3257
3258 __set_migration_target_nodes();
3259 break;
3260 case MEM_GOING_ONLINE:
3261 case MEM_CANCEL_ONLINE:
3262 break;
3263 }
3264
3265 return notifier_from_errno(0);
3266}
3267
3268
3269
3270
3271
3272
3273
3274
3275static int migration_online_cpu(unsigned int cpu)
3276{
3277 set_migration_target_nodes();
3278 return 0;
3279}
3280
3281static int migration_offline_cpu(unsigned int cpu)
3282{
3283 set_migration_target_nodes();
3284 return 0;
3285}
3286
3287static int __init migrate_on_reclaim_init(void)
3288{
3289 int ret;
3290
3291 ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
3292 NULL, migration_offline_cpu);
3293
3294
3295
3296
3297
3298
3299 WARN_ON(ret < 0);
3300 ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
3301 migration_online_cpu, NULL);
3302 WARN_ON(ret < 0);
3303
3304 hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
3305 return 0;
3306}
3307late_initcall(migrate_on_reclaim_init);
3308#endif
3309