1
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/err.h>
5#include <linux/spinlock.h>
6
7#include <linux/mm.h>
8#include <linux/memremap.h>
9#include <linux/pagemap.h>
10#include <linux/rmap.h>
11#include <linux/swap.h>
12#include <linux/swapops.h>
13#include <linux/secretmem.h>
14
15#include <linux/sched/signal.h>
16#include <linux/rwsem.h>
17#include <linux/hugetlb.h>
18#include <linux/migrate.h>
19#include <linux/mm_inline.h>
20#include <linux/sched/mm.h>
21
22#include <asm/mmu_context.h>
23#include <asm/tlbflush.h>
24
25#include "internal.h"
26
27struct follow_page_context {
28 struct dev_pagemap *pgmap;
29 unsigned int page_mask;
30};
31
32static inline void sanity_check_pinned_pages(struct page **pages,
33 unsigned long npages)
34{
35 if (!IS_ENABLED(CONFIG_DEBUG_VM))
36 return;
37
38
39
40
41
42
43
44
45
46
47
48
49
50 for (; npages; npages--, pages++) {
51 struct page *page = *pages;
52 struct folio *folio = page_folio(page);
53
54 if (!folio_test_anon(folio))
55 continue;
56 if (!folio_test_large(folio) || folio_test_hugetlb(folio))
57 VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
58 else
59
60 VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page) &&
61 !PageAnonExclusive(page), page);
62 }
63}
64
65
66
67
68
69static inline struct folio *try_get_folio(struct page *page, int refs)
70{
71 struct folio *folio;
72
73retry:
74 folio = page_folio(page);
75 if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
76 return NULL;
77 if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
78 return NULL;
79
80
81
82
83
84
85
86
87
88
89 if (unlikely(page_folio(page) != folio)) {
90 if (!put_devmap_managed_page_refs(&folio->page, refs))
91 folio_put_refs(folio, refs);
92 goto retry;
93 }
94
95 return folio;
96}
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
125{
126 if (flags & FOLL_GET)
127 return try_get_folio(page, refs);
128 else if (flags & FOLL_PIN) {
129 struct folio *folio;
130
131
132
133
134
135
136 if (unlikely((flags & FOLL_LONGTERM) &&
137 !is_pinnable_page(page)))
138 return NULL;
139
140
141
142
143
144 folio = try_get_folio(page, refs);
145 if (!folio)
146 return NULL;
147
148
149
150
151
152
153
154
155
156 if (folio_test_large(folio))
157 atomic_add(refs, folio_pincount_ptr(folio));
158 else
159 folio_ref_add(folio,
160 refs * (GUP_PIN_COUNTING_BIAS - 1));
161 node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
162
163 return folio;
164 }
165
166 WARN_ON_ONCE(1);
167 return NULL;
168}
169
170static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
171{
172 if (flags & FOLL_PIN) {
173 node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
174 if (folio_test_large(folio))
175 atomic_sub(refs, folio_pincount_ptr(folio));
176 else
177 refs *= GUP_PIN_COUNTING_BIAS;
178 }
179
180 if (!put_devmap_managed_page_refs(&folio->page, refs))
181 folio_put_refs(folio, refs);
182}
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202bool __must_check try_grab_page(struct page *page, unsigned int flags)
203{
204 struct folio *folio = page_folio(page);
205
206 WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
207 if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
208 return false;
209
210 if (flags & FOLL_GET)
211 folio_ref_inc(folio);
212 else if (flags & FOLL_PIN) {
213
214
215
216
217
218 if (folio_test_large(folio)) {
219 folio_ref_add(folio, 1);
220 atomic_add(1, folio_pincount_ptr(folio));
221 } else {
222 folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
223 }
224
225 node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
226 }
227
228 return true;
229}
230
231
232
233
234
235
236
237
238
239
240void unpin_user_page(struct page *page)
241{
242 sanity_check_pinned_pages(&page, 1);
243 gup_put_folio(page_folio(page), 1, FOLL_PIN);
244}
245EXPORT_SYMBOL(unpin_user_page);
246
247static inline struct folio *gup_folio_range_next(struct page *start,
248 unsigned long npages, unsigned long i, unsigned int *ntails)
249{
250 struct page *next = nth_page(start, i);
251 struct folio *folio = page_folio(next);
252 unsigned int nr = 1;
253
254 if (folio_test_large(folio))
255 nr = min_t(unsigned int, npages - i,
256 folio_nr_pages(folio) - folio_page_idx(folio, next));
257
258 *ntails = nr;
259 return folio;
260}
261
262static inline struct folio *gup_folio_next(struct page **list,
263 unsigned long npages, unsigned long i, unsigned int *ntails)
264{
265 struct folio *folio = page_folio(list[i]);
266 unsigned int nr;
267
268 for (nr = i + 1; nr < npages; nr++) {
269 if (page_folio(list[nr]) != folio)
270 break;
271 }
272
273 *ntails = nr - i;
274 return folio;
275}
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
300 bool make_dirty)
301{
302 unsigned long i;
303 struct folio *folio;
304 unsigned int nr;
305
306 if (!make_dirty) {
307 unpin_user_pages(pages, npages);
308 return;
309 }
310
311 sanity_check_pinned_pages(pages, npages);
312 for (i = 0; i < npages; i += nr) {
313 folio = gup_folio_next(pages, npages, i, &nr);
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334 if (!folio_test_dirty(folio)) {
335 folio_lock(folio);
336 folio_mark_dirty(folio);
337 folio_unlock(folio);
338 }
339 gup_put_folio(folio, nr, FOLL_PIN);
340 }
341}
342EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
366 bool make_dirty)
367{
368 unsigned long i;
369 struct folio *folio;
370 unsigned int nr;
371
372 for (i = 0; i < npages; i += nr) {
373 folio = gup_folio_range_next(page, npages, i, &nr);
374 if (make_dirty && !folio_test_dirty(folio)) {
375 folio_lock(folio);
376 folio_mark_dirty(folio);
377 folio_unlock(folio);
378 }
379 gup_put_folio(folio, nr, FOLL_PIN);
380 }
381}
382EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
383
384static void unpin_user_pages_lockless(struct page **pages, unsigned long npages)
385{
386 unsigned long i;
387 struct folio *folio;
388 unsigned int nr;
389
390
391
392
393
394
395 for (i = 0; i < npages; i += nr) {
396 folio = gup_folio_next(pages, npages, i, &nr);
397 gup_put_folio(folio, nr, FOLL_PIN);
398 }
399}
400
401
402
403
404
405
406
407
408
409
410void unpin_user_pages(struct page **pages, unsigned long npages)
411{
412 unsigned long i;
413 struct folio *folio;
414 unsigned int nr;
415
416
417
418
419
420
421 if (WARN_ON(IS_ERR_VALUE(npages)))
422 return;
423
424 sanity_check_pinned_pages(pages, npages);
425 for (i = 0; i < npages; i += nr) {
426 folio = gup_folio_next(pages, npages, i, &nr);
427 gup_put_folio(folio, nr, FOLL_PIN);
428 }
429}
430EXPORT_SYMBOL(unpin_user_pages);
431
432
433
434
435
436
437static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
438{
439 if (!test_bit(MMF_HAS_PINNED, mm_flags))
440 set_bit(MMF_HAS_PINNED, mm_flags);
441}
442
443#ifdef CONFIG_MMU
444static struct page *no_page_table(struct vm_area_struct *vma,
445 unsigned int flags)
446{
447
448
449
450
451
452
453
454
455 if ((flags & FOLL_DUMP) &&
456 (vma_is_anonymous(vma) || !vma->vm_ops->fault))
457 return ERR_PTR(-EFAULT);
458 return NULL;
459}
460
461static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
462 pte_t *pte, unsigned int flags)
463{
464 if (flags & FOLL_TOUCH) {
465 pte_t entry = *pte;
466
467 if (flags & FOLL_WRITE)
468 entry = pte_mkdirty(entry);
469 entry = pte_mkyoung(entry);
470
471 if (!pte_same(*pte, entry)) {
472 set_pte_at(vma->vm_mm, address, pte, entry);
473 update_mmu_cache(vma, address, pte);
474 }
475 }
476
477
478 return -EEXIST;
479}
480
481
482
483
484
485static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
486{
487 return pte_write(pte) ||
488 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
489}
490
491static struct page *follow_page_pte(struct vm_area_struct *vma,
492 unsigned long address, pmd_t *pmd, unsigned int flags,
493 struct dev_pagemap **pgmap)
494{
495 struct mm_struct *mm = vma->vm_mm;
496 struct page *page;
497 spinlock_t *ptl;
498 pte_t *ptep, pte;
499 int ret;
500
501
502 if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
503 (FOLL_PIN | FOLL_GET)))
504 return ERR_PTR(-EINVAL);
505retry:
506 if (unlikely(pmd_bad(*pmd)))
507 return no_page_table(vma, flags);
508
509 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
510 pte = *ptep;
511 if (!pte_present(pte)) {
512 swp_entry_t entry;
513
514
515
516
517
518 if (likely(!(flags & FOLL_MIGRATION)))
519 goto no_page;
520 if (pte_none(pte))
521 goto no_page;
522 entry = pte_to_swp_entry(pte);
523 if (!is_migration_entry(entry))
524 goto no_page;
525 pte_unmap_unlock(ptep, ptl);
526 migration_entry_wait(mm, pmd, address);
527 goto retry;
528 }
529 if ((flags & FOLL_NUMA) && pte_protnone(pte))
530 goto no_page;
531 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
532 pte_unmap_unlock(ptep, ptl);
533 return NULL;
534 }
535
536 page = vm_normal_page(vma, address, pte);
537 if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
538
539
540
541
542
543 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
544 if (*pgmap)
545 page = pte_page(pte);
546 else
547 goto no_page;
548 } else if (unlikely(!page)) {
549 if (flags & FOLL_DUMP) {
550
551 page = ERR_PTR(-EFAULT);
552 goto out;
553 }
554
555 if (is_zero_pfn(pte_pfn(pte))) {
556 page = pte_page(pte);
557 } else {
558 ret = follow_pfn_pte(vma, address, ptep, flags);
559 page = ERR_PTR(ret);
560 goto out;
561 }
562 }
563
564 if (!pte_write(pte) && gup_must_unshare(flags, page)) {
565 page = ERR_PTR(-EMLINK);
566 goto out;
567 }
568
569 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
570 !PageAnonExclusive(page), page);
571
572
573 if (unlikely(!try_grab_page(page, flags))) {
574 page = ERR_PTR(-ENOMEM);
575 goto out;
576 }
577
578
579
580
581
582 if (flags & FOLL_PIN) {
583 ret = arch_make_page_accessible(page);
584 if (ret) {
585 unpin_user_page(page);
586 page = ERR_PTR(ret);
587 goto out;
588 }
589 }
590 if (flags & FOLL_TOUCH) {
591 if ((flags & FOLL_WRITE) &&
592 !pte_dirty(pte) && !PageDirty(page))
593 set_page_dirty(page);
594
595
596
597
598
599 mark_page_accessed(page);
600 }
601out:
602 pte_unmap_unlock(ptep, ptl);
603 return page;
604no_page:
605 pte_unmap_unlock(ptep, ptl);
606 if (!pte_none(pte))
607 return NULL;
608 return no_page_table(vma, flags);
609}
610
611static struct page *follow_pmd_mask(struct vm_area_struct *vma,
612 unsigned long address, pud_t *pudp,
613 unsigned int flags,
614 struct follow_page_context *ctx)
615{
616 pmd_t *pmd, pmdval;
617 spinlock_t *ptl;
618 struct page *page;
619 struct mm_struct *mm = vma->vm_mm;
620
621 pmd = pmd_offset(pudp, address);
622
623
624
625
626 pmdval = READ_ONCE(*pmd);
627 if (pmd_none(pmdval))
628 return no_page_table(vma, flags);
629 if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
630 page = follow_huge_pmd(mm, address, pmd, flags);
631 if (page)
632 return page;
633 return no_page_table(vma, flags);
634 }
635 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
636 page = follow_huge_pd(vma, address,
637 __hugepd(pmd_val(pmdval)), flags,
638 PMD_SHIFT);
639 if (page)
640 return page;
641 return no_page_table(vma, flags);
642 }
643retry:
644 if (!pmd_present(pmdval)) {
645
646
647
648
649 VM_BUG_ON(!thp_migration_supported() ||
650 !is_pmd_migration_entry(pmdval));
651
652 if (likely(!(flags & FOLL_MIGRATION)))
653 return no_page_table(vma, flags);
654
655 pmd_migration_entry_wait(mm, pmd);
656 pmdval = READ_ONCE(*pmd);
657
658
659
660
661 if (pmd_none(pmdval))
662 return no_page_table(vma, flags);
663 goto retry;
664 }
665 if (pmd_devmap(pmdval)) {
666 ptl = pmd_lock(mm, pmd);
667 page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
668 spin_unlock(ptl);
669 if (page)
670 return page;
671 }
672 if (likely(!pmd_trans_huge(pmdval)))
673 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
674
675 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
676 return no_page_table(vma, flags);
677
678retry_locked:
679 ptl = pmd_lock(mm, pmd);
680 if (unlikely(pmd_none(*pmd))) {
681 spin_unlock(ptl);
682 return no_page_table(vma, flags);
683 }
684 if (unlikely(!pmd_present(*pmd))) {
685 spin_unlock(ptl);
686 if (likely(!(flags & FOLL_MIGRATION)))
687 return no_page_table(vma, flags);
688 pmd_migration_entry_wait(mm, pmd);
689 goto retry_locked;
690 }
691 if (unlikely(!pmd_trans_huge(*pmd))) {
692 spin_unlock(ptl);
693 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
694 }
695 if (flags & FOLL_SPLIT_PMD) {
696 int ret;
697 page = pmd_page(*pmd);
698 if (is_huge_zero_page(page)) {
699 spin_unlock(ptl);
700 ret = 0;
701 split_huge_pmd(vma, pmd, address);
702 if (pmd_trans_unstable(pmd))
703 ret = -EBUSY;
704 } else {
705 spin_unlock(ptl);
706 split_huge_pmd(vma, pmd, address);
707 ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
708 }
709
710 return ret ? ERR_PTR(ret) :
711 follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
712 }
713 page = follow_trans_huge_pmd(vma, address, pmd, flags);
714 spin_unlock(ptl);
715 ctx->page_mask = HPAGE_PMD_NR - 1;
716 return page;
717}
718
719static struct page *follow_pud_mask(struct vm_area_struct *vma,
720 unsigned long address, p4d_t *p4dp,
721 unsigned int flags,
722 struct follow_page_context *ctx)
723{
724 pud_t *pud;
725 spinlock_t *ptl;
726 struct page *page;
727 struct mm_struct *mm = vma->vm_mm;
728
729 pud = pud_offset(p4dp, address);
730 if (pud_none(*pud))
731 return no_page_table(vma, flags);
732 if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
733 page = follow_huge_pud(mm, address, pud, flags);
734 if (page)
735 return page;
736 return no_page_table(vma, flags);
737 }
738 if (is_hugepd(__hugepd(pud_val(*pud)))) {
739 page = follow_huge_pd(vma, address,
740 __hugepd(pud_val(*pud)), flags,
741 PUD_SHIFT);
742 if (page)
743 return page;
744 return no_page_table(vma, flags);
745 }
746 if (pud_devmap(*pud)) {
747 ptl = pud_lock(mm, pud);
748 page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
749 spin_unlock(ptl);
750 if (page)
751 return page;
752 }
753 if (unlikely(pud_bad(*pud)))
754 return no_page_table(vma, flags);
755
756 return follow_pmd_mask(vma, address, pud, flags, ctx);
757}
758
759static struct page *follow_p4d_mask(struct vm_area_struct *vma,
760 unsigned long address, pgd_t *pgdp,
761 unsigned int flags,
762 struct follow_page_context *ctx)
763{
764 p4d_t *p4d;
765 struct page *page;
766
767 p4d = p4d_offset(pgdp, address);
768 if (p4d_none(*p4d))
769 return no_page_table(vma, flags);
770 BUILD_BUG_ON(p4d_huge(*p4d));
771 if (unlikely(p4d_bad(*p4d)))
772 return no_page_table(vma, flags);
773
774 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
775 page = follow_huge_pd(vma, address,
776 __hugepd(p4d_val(*p4d)), flags,
777 P4D_SHIFT);
778 if (page)
779 return page;
780 return no_page_table(vma, flags);
781 }
782 return follow_pud_mask(vma, address, p4d, flags, ctx);
783}
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809static struct page *follow_page_mask(struct vm_area_struct *vma,
810 unsigned long address, unsigned int flags,
811 struct follow_page_context *ctx)
812{
813 pgd_t *pgd;
814 struct page *page;
815 struct mm_struct *mm = vma->vm_mm;
816
817 ctx->page_mask = 0;
818
819
820 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
821 if (!IS_ERR(page)) {
822 WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
823 return page;
824 }
825
826 pgd = pgd_offset(mm, address);
827
828 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
829 return no_page_table(vma, flags);
830
831 if (pgd_huge(*pgd)) {
832 page = follow_huge_pgd(mm, address, pgd, flags);
833 if (page)
834 return page;
835 return no_page_table(vma, flags);
836 }
837 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
838 page = follow_huge_pd(vma, address,
839 __hugepd(pgd_val(*pgd)), flags,
840 PGDIR_SHIFT);
841 if (page)
842 return page;
843 return no_page_table(vma, flags);
844 }
845
846 return follow_p4d_mask(vma, address, pgd, flags, ctx);
847}
848
849struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
850 unsigned int foll_flags)
851{
852 struct follow_page_context ctx = { NULL };
853 struct page *page;
854
855 if (vma_is_secretmem(vma))
856 return NULL;
857
858 if (foll_flags & FOLL_PIN)
859 return NULL;
860
861 page = follow_page_mask(vma, address, foll_flags, &ctx);
862 if (ctx.pgmap)
863 put_dev_pagemap(ctx.pgmap);
864 return page;
865}
866
867static int get_gate_page(struct mm_struct *mm, unsigned long address,
868 unsigned int gup_flags, struct vm_area_struct **vma,
869 struct page **page)
870{
871 pgd_t *pgd;
872 p4d_t *p4d;
873 pud_t *pud;
874 pmd_t *pmd;
875 pte_t *pte;
876 int ret = -EFAULT;
877
878
879 if (gup_flags & FOLL_WRITE)
880 return -EFAULT;
881 if (address > TASK_SIZE)
882 pgd = pgd_offset_k(address);
883 else
884 pgd = pgd_offset_gate(mm, address);
885 if (pgd_none(*pgd))
886 return -EFAULT;
887 p4d = p4d_offset(pgd, address);
888 if (p4d_none(*p4d))
889 return -EFAULT;
890 pud = pud_offset(p4d, address);
891 if (pud_none(*pud))
892 return -EFAULT;
893 pmd = pmd_offset(pud, address);
894 if (!pmd_present(*pmd))
895 return -EFAULT;
896 VM_BUG_ON(pmd_trans_huge(*pmd));
897 pte = pte_offset_map(pmd, address);
898 if (pte_none(*pte))
899 goto unmap;
900 *vma = get_gate_vma(mm);
901 if (!page)
902 goto out;
903 *page = vm_normal_page(*vma, address, *pte);
904 if (!*page) {
905 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
906 goto unmap;
907 *page = pte_page(*pte);
908 }
909 if (unlikely(!try_grab_page(*page, gup_flags))) {
910 ret = -ENOMEM;
911 goto unmap;
912 }
913out:
914 ret = 0;
915unmap:
916 pte_unmap(pte);
917 return ret;
918}
919
920
921
922
923
924
925static int faultin_page(struct vm_area_struct *vma,
926 unsigned long address, unsigned int *flags, bool unshare,
927 int *locked)
928{
929 unsigned int fault_flags = 0;
930 vm_fault_t ret;
931
932 if (*flags & FOLL_NOFAULT)
933 return -EFAULT;
934 if (*flags & FOLL_WRITE)
935 fault_flags |= FAULT_FLAG_WRITE;
936 if (*flags & FOLL_REMOTE)
937 fault_flags |= FAULT_FLAG_REMOTE;
938 if (locked)
939 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
940 if (*flags & FOLL_NOWAIT)
941 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
942 if (*flags & FOLL_TRIED) {
943
944
945
946
947 fault_flags |= FAULT_FLAG_TRIED;
948 }
949 if (unshare) {
950 fault_flags |= FAULT_FLAG_UNSHARE;
951
952 VM_BUG_ON(fault_flags & FAULT_FLAG_WRITE);
953 }
954
955 ret = handle_mm_fault(vma, address, fault_flags, NULL);
956 if (ret & VM_FAULT_ERROR) {
957 int err = vm_fault_to_errno(ret, *flags);
958
959 if (err)
960 return err;
961 BUG();
962 }
963
964 if (ret & VM_FAULT_RETRY) {
965 if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
966 *locked = 0;
967 return -EBUSY;
968 }
969
970
971
972
973
974
975
976
977
978
979 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
980 *flags |= FOLL_COW;
981 return 0;
982}
983
984static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
985{
986 vm_flags_t vm_flags = vma->vm_flags;
987 int write = (gup_flags & FOLL_WRITE);
988 int foreign = (gup_flags & FOLL_REMOTE);
989
990 if (vm_flags & (VM_IO | VM_PFNMAP))
991 return -EFAULT;
992
993 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
994 return -EFAULT;
995
996 if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
997 return -EOPNOTSUPP;
998
999 if (vma_is_secretmem(vma))
1000 return -EFAULT;
1001
1002 if (write) {
1003 if (!(vm_flags & VM_WRITE)) {
1004 if (!(gup_flags & FOLL_FORCE))
1005 return -EFAULT;
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 if (!is_cow_mapping(vm_flags))
1016 return -EFAULT;
1017 }
1018 } else if (!(vm_flags & VM_READ)) {
1019 if (!(gup_flags & FOLL_FORCE))
1020 return -EFAULT;
1021
1022
1023
1024
1025 if (!(vm_flags & VM_MAYREAD))
1026 return -EFAULT;
1027 }
1028
1029
1030
1031
1032 if (!arch_vma_access_permitted(vma, write, false, foreign))
1033 return -EFAULT;
1034 return 0;
1035}
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097static long __get_user_pages(struct mm_struct *mm,
1098 unsigned long start, unsigned long nr_pages,
1099 unsigned int gup_flags, struct page **pages,
1100 struct vm_area_struct **vmas, int *locked)
1101{
1102 long ret = 0, i = 0;
1103 struct vm_area_struct *vma = NULL;
1104 struct follow_page_context ctx = { NULL };
1105
1106 if (!nr_pages)
1107 return 0;
1108
1109 start = untagged_addr(start);
1110
1111 VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
1112
1113
1114
1115
1116
1117
1118 if (!(gup_flags & FOLL_FORCE))
1119 gup_flags |= FOLL_NUMA;
1120
1121 do {
1122 struct page *page;
1123 unsigned int foll_flags = gup_flags;
1124 unsigned int page_increm;
1125
1126
1127 if (!vma || start >= vma->vm_end) {
1128 vma = find_extend_vma(mm, start);
1129 if (!vma && in_gate_area(mm, start)) {
1130 ret = get_gate_page(mm, start & PAGE_MASK,
1131 gup_flags, &vma,
1132 pages ? &pages[i] : NULL);
1133 if (ret)
1134 goto out;
1135 ctx.page_mask = 0;
1136 goto next_page;
1137 }
1138
1139 if (!vma) {
1140 ret = -EFAULT;
1141 goto out;
1142 }
1143 ret = check_vma_flags(vma, gup_flags);
1144 if (ret)
1145 goto out;
1146
1147 if (is_vm_hugetlb_page(vma)) {
1148 i = follow_hugetlb_page(mm, vma, pages, vmas,
1149 &start, &nr_pages, i,
1150 gup_flags, locked);
1151 if (locked && *locked == 0) {
1152
1153
1154
1155
1156
1157 BUG_ON(gup_flags & FOLL_NOWAIT);
1158 goto out;
1159 }
1160 continue;
1161 }
1162 }
1163retry:
1164
1165
1166
1167
1168 if (fatal_signal_pending(current)) {
1169 ret = -EINTR;
1170 goto out;
1171 }
1172 cond_resched();
1173
1174 page = follow_page_mask(vma, start, foll_flags, &ctx);
1175 if (!page || PTR_ERR(page) == -EMLINK) {
1176 ret = faultin_page(vma, start, &foll_flags,
1177 PTR_ERR(page) == -EMLINK, locked);
1178 switch (ret) {
1179 case 0:
1180 goto retry;
1181 case -EBUSY:
1182 ret = 0;
1183 fallthrough;
1184 case -EFAULT:
1185 case -ENOMEM:
1186 case -EHWPOISON:
1187 goto out;
1188 }
1189 BUG();
1190 } else if (PTR_ERR(page) == -EEXIST) {
1191
1192
1193
1194
1195
1196
1197 if (pages) {
1198 ret = PTR_ERR(page);
1199 goto out;
1200 }
1201
1202 goto next_page;
1203 } else if (IS_ERR(page)) {
1204 ret = PTR_ERR(page);
1205 goto out;
1206 }
1207 if (pages) {
1208 pages[i] = page;
1209 flush_anon_page(vma, page, start);
1210 flush_dcache_page(page);
1211 ctx.page_mask = 0;
1212 }
1213next_page:
1214 if (vmas) {
1215 vmas[i] = vma;
1216 ctx.page_mask = 0;
1217 }
1218 page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
1219 if (page_increm > nr_pages)
1220 page_increm = nr_pages;
1221 i += page_increm;
1222 start += page_increm * PAGE_SIZE;
1223 nr_pages -= page_increm;
1224 } while (nr_pages);
1225out:
1226 if (ctx.pgmap)
1227 put_dev_pagemap(ctx.pgmap);
1228 return i ? i : ret;
1229}
1230
1231static bool vma_permits_fault(struct vm_area_struct *vma,
1232 unsigned int fault_flags)
1233{
1234 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
1235 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
1236 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
1237
1238 if (!(vm_flags & vma->vm_flags))
1239 return false;
1240
1241
1242
1243
1244
1245
1246
1247
1248 if (!arch_vma_access_permitted(vma, write, false, foreign))
1249 return false;
1250
1251 return true;
1252}
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283int fixup_user_fault(struct mm_struct *mm,
1284 unsigned long address, unsigned int fault_flags,
1285 bool *unlocked)
1286{
1287 struct vm_area_struct *vma;
1288 vm_fault_t ret;
1289
1290 address = untagged_addr(address);
1291
1292 if (unlocked)
1293 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
1294
1295retry:
1296 vma = find_extend_vma(mm, address);
1297 if (!vma || address < vma->vm_start)
1298 return -EFAULT;
1299
1300 if (!vma_permits_fault(vma, fault_flags))
1301 return -EFAULT;
1302
1303 if ((fault_flags & FAULT_FLAG_KILLABLE) &&
1304 fatal_signal_pending(current))
1305 return -EINTR;
1306
1307 ret = handle_mm_fault(vma, address, fault_flags, NULL);
1308 if (ret & VM_FAULT_ERROR) {
1309 int err = vm_fault_to_errno(ret, 0);
1310
1311 if (err)
1312 return err;
1313 BUG();
1314 }
1315
1316 if (ret & VM_FAULT_RETRY) {
1317 mmap_read_lock(mm);
1318 *unlocked = true;
1319 fault_flags |= FAULT_FLAG_TRIED;
1320 goto retry;
1321 }
1322
1323 return 0;
1324}
1325EXPORT_SYMBOL_GPL(fixup_user_fault);
1326
1327
1328
1329
1330
1331static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
1332 unsigned long start,
1333 unsigned long nr_pages,
1334 struct page **pages,
1335 struct vm_area_struct **vmas,
1336 int *locked,
1337 unsigned int flags)
1338{
1339 long ret, pages_done;
1340 bool lock_dropped;
1341
1342 if (locked) {
1343
1344 BUG_ON(vmas);
1345
1346 BUG_ON(*locked != 1);
1347 }
1348
1349 if (flags & FOLL_PIN)
1350 mm_set_has_pinned_flag(&mm->flags);
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361 if (pages && !(flags & FOLL_PIN))
1362 flags |= FOLL_GET;
1363
1364 pages_done = 0;
1365 lock_dropped = false;
1366 for (;;) {
1367 ret = __get_user_pages(mm, start, nr_pages, flags, pages,
1368 vmas, locked);
1369 if (!locked)
1370
1371 return ret;
1372
1373
1374 if (!*locked) {
1375 BUG_ON(ret < 0);
1376 BUG_ON(ret >= nr_pages);
1377 }
1378
1379 if (ret > 0) {
1380 nr_pages -= ret;
1381 pages_done += ret;
1382 if (!nr_pages)
1383 break;
1384 }
1385 if (*locked) {
1386
1387
1388
1389
1390 if (!pages_done)
1391 pages_done = ret;
1392 break;
1393 }
1394
1395
1396
1397
1398 if (likely(pages))
1399 pages += ret;
1400 start += ret << PAGE_SHIFT;
1401 lock_dropped = true;
1402
1403retry:
1404
1405
1406
1407
1408
1409
1410
1411
1412 if (fatal_signal_pending(current)) {
1413 if (!pages_done)
1414 pages_done = -EINTR;
1415 break;
1416 }
1417
1418 ret = mmap_read_lock_killable(mm);
1419 if (ret) {
1420 BUG_ON(ret > 0);
1421 if (!pages_done)
1422 pages_done = ret;
1423 break;
1424 }
1425
1426 *locked = 1;
1427 ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
1428 pages, NULL, locked);
1429 if (!*locked) {
1430
1431 BUG_ON(ret != 0);
1432 goto retry;
1433 }
1434 if (ret != 1) {
1435 BUG_ON(ret > 1);
1436 if (!pages_done)
1437 pages_done = ret;
1438 break;
1439 }
1440 nr_pages--;
1441 pages_done++;
1442 if (!nr_pages)
1443 break;
1444 if (likely(pages))
1445 pages++;
1446 start += PAGE_SIZE;
1447 }
1448 if (lock_dropped && *locked) {
1449
1450
1451
1452
1453 mmap_read_unlock(mm);
1454 *locked = 0;
1455 }
1456 return pages_done;
1457}
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479long populate_vma_page_range(struct vm_area_struct *vma,
1480 unsigned long start, unsigned long end, int *locked)
1481{
1482 struct mm_struct *mm = vma->vm_mm;
1483 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1484 int gup_flags;
1485 long ret;
1486
1487 VM_BUG_ON(!PAGE_ALIGNED(start));
1488 VM_BUG_ON(!PAGE_ALIGNED(end));
1489 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1490 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1491 mmap_assert_locked(mm);
1492
1493
1494
1495
1496
1497 if (vma->vm_flags & VM_LOCKONFAULT)
1498 return nr_pages;
1499
1500 gup_flags = FOLL_TOUCH;
1501
1502
1503
1504
1505
1506 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1507 gup_flags |= FOLL_WRITE;
1508
1509
1510
1511
1512
1513 if (vma_is_accessible(vma))
1514 gup_flags |= FOLL_FORCE;
1515
1516
1517
1518
1519
1520 ret = __get_user_pages(mm, start, nr_pages, gup_flags,
1521 NULL, NULL, locked);
1522 lru_add_drain();
1523 return ret;
1524}
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
1550 unsigned long end, bool write, int *locked)
1551{
1552 struct mm_struct *mm = vma->vm_mm;
1553 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1554 int gup_flags;
1555 long ret;
1556
1557 VM_BUG_ON(!PAGE_ALIGNED(start));
1558 VM_BUG_ON(!PAGE_ALIGNED(end));
1559 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1560 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1561 mmap_assert_locked(mm);
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572 gup_flags = FOLL_TOUCH | FOLL_HWPOISON;
1573 if (write)
1574 gup_flags |= FOLL_WRITE;
1575
1576
1577
1578
1579
1580 if (check_vma_flags(vma, gup_flags))
1581 return -EINVAL;
1582
1583 ret = __get_user_pages(mm, start, nr_pages, gup_flags,
1584 NULL, NULL, locked);
1585 lru_add_drain();
1586 return ret;
1587}
1588
1589
1590
1591
1592
1593
1594
1595
1596int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1597{
1598 struct mm_struct *mm = current->mm;
1599 unsigned long end, nstart, nend;
1600 struct vm_area_struct *vma = NULL;
1601 int locked = 0;
1602 long ret = 0;
1603
1604 end = start + len;
1605
1606 for (nstart = start; nstart < end; nstart = nend) {
1607
1608
1609
1610
1611 if (!locked) {
1612 locked = 1;
1613 mmap_read_lock(mm);
1614 vma = find_vma(mm, nstart);
1615 } else if (nstart >= vma->vm_end)
1616 vma = vma->vm_next;
1617 if (!vma || vma->vm_start >= end)
1618 break;
1619
1620
1621
1622
1623 nend = min(end, vma->vm_end);
1624 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1625 continue;
1626 if (nstart < vma->vm_start)
1627 nstart = vma->vm_start;
1628
1629
1630
1631
1632
1633 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1634 if (ret < 0) {
1635 if (ignore_errors) {
1636 ret = 0;
1637 continue;
1638 }
1639 break;
1640 }
1641 nend = nstart + ret * PAGE_SIZE;
1642 ret = 0;
1643 }
1644 if (locked)
1645 mmap_read_unlock(mm);
1646 return ret;
1647}
1648#else
1649static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
1650 unsigned long nr_pages, struct page **pages,
1651 struct vm_area_struct **vmas, int *locked,
1652 unsigned int foll_flags)
1653{
1654 struct vm_area_struct *vma;
1655 unsigned long vm_flags;
1656 long i;
1657
1658
1659
1660
1661 vm_flags = (foll_flags & FOLL_WRITE) ?
1662 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1663 vm_flags &= (foll_flags & FOLL_FORCE) ?
1664 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1665
1666 for (i = 0; i < nr_pages; i++) {
1667 vma = find_vma(mm, start);
1668 if (!vma)
1669 goto finish_or_fault;
1670
1671
1672 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1673 !(vm_flags & vma->vm_flags))
1674 goto finish_or_fault;
1675
1676 if (pages) {
1677 pages[i] = virt_to_page(start);
1678 if (pages[i])
1679 get_page(pages[i]);
1680 }
1681 if (vmas)
1682 vmas[i] = vma;
1683 start = (start + PAGE_SIZE) & PAGE_MASK;
1684 }
1685
1686 return i;
1687
1688finish_or_fault:
1689 return i ? : -EFAULT;
1690}
1691#endif
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701size_t fault_in_writeable(char __user *uaddr, size_t size)
1702{
1703 char __user *start = uaddr, *end;
1704
1705 if (unlikely(size == 0))
1706 return 0;
1707 if (!user_write_access_begin(uaddr, size))
1708 return size;
1709 if (!PAGE_ALIGNED(uaddr)) {
1710 unsafe_put_user(0, uaddr, out);
1711 uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
1712 }
1713 end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
1714 if (unlikely(end < start))
1715 end = NULL;
1716 while (uaddr != end) {
1717 unsafe_put_user(0, uaddr, out);
1718 uaddr += PAGE_SIZE;
1719 }
1720
1721out:
1722 user_write_access_end();
1723 if (size > uaddr - start)
1724 return size - (uaddr - start);
1725 return 0;
1726}
1727EXPORT_SYMBOL(fault_in_writeable);
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741size_t fault_in_subpage_writeable(char __user *uaddr, size_t size)
1742{
1743 size_t faulted_in;
1744
1745
1746
1747
1748
1749
1750 faulted_in = size - fault_in_writeable(uaddr, size);
1751 if (faulted_in)
1752 faulted_in -= probe_subpage_writeable(uaddr, faulted_in);
1753
1754 return size - faulted_in;
1755}
1756EXPORT_SYMBOL(fault_in_subpage_writeable);
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
1777{
1778 unsigned long start = (unsigned long)uaddr, end;
1779 struct mm_struct *mm = current->mm;
1780 bool unlocked = false;
1781
1782 if (unlikely(size == 0))
1783 return 0;
1784 end = PAGE_ALIGN(start + size);
1785 if (end < start)
1786 end = 0;
1787
1788 mmap_read_lock(mm);
1789 do {
1790 if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
1791 break;
1792 start = (start + PAGE_SIZE) & PAGE_MASK;
1793 } while (start != end);
1794 mmap_read_unlock(mm);
1795
1796 if (size > (unsigned long)uaddr - start)
1797 return size - ((unsigned long)uaddr - start);
1798 return 0;
1799}
1800EXPORT_SYMBOL(fault_in_safe_writeable);
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810size_t fault_in_readable(const char __user *uaddr, size_t size)
1811{
1812 const char __user *start = uaddr, *end;
1813 volatile char c;
1814
1815 if (unlikely(size == 0))
1816 return 0;
1817 if (!user_read_access_begin(uaddr, size))
1818 return size;
1819 if (!PAGE_ALIGNED(uaddr)) {
1820 unsafe_get_user(c, uaddr, out);
1821 uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
1822 }
1823 end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
1824 if (unlikely(end < start))
1825 end = NULL;
1826 while (uaddr != end) {
1827 unsafe_get_user(c, uaddr, out);
1828 uaddr += PAGE_SIZE;
1829 }
1830
1831out:
1832 user_read_access_end();
1833 (void)c;
1834 if (size > uaddr - start)
1835 return size - (uaddr - start);
1836 return 0;
1837}
1838EXPORT_SYMBOL(fault_in_readable);
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854#ifdef CONFIG_ELF_CORE
1855struct page *get_dump_page(unsigned long addr)
1856{
1857 struct mm_struct *mm = current->mm;
1858 struct page *page;
1859 int locked = 1;
1860 int ret;
1861
1862 if (mmap_read_lock_killable(mm))
1863 return NULL;
1864 ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
1865 FOLL_FORCE | FOLL_DUMP | FOLL_GET);
1866 if (locked)
1867 mmap_read_unlock(mm);
1868 return (ret == 1) ? page : NULL;
1869}
1870#endif
1871
1872#ifdef CONFIG_MIGRATION
1873
1874
1875
1876
1877
1878
1879static long check_and_migrate_movable_pages(unsigned long nr_pages,
1880 struct page **pages,
1881 unsigned int gup_flags)
1882{
1883 unsigned long isolation_error_count = 0, i;
1884 struct folio *prev_folio = NULL;
1885 LIST_HEAD(movable_page_list);
1886 bool drain_allow = true;
1887 int ret = 0;
1888
1889 for (i = 0; i < nr_pages; i++) {
1890 struct folio *folio = page_folio(pages[i]);
1891
1892 if (folio == prev_folio)
1893 continue;
1894 prev_folio = folio;
1895
1896 if (folio_is_pinnable(folio))
1897 continue;
1898
1899
1900
1901
1902 if (folio_test_hugetlb(folio)) {
1903 if (!isolate_huge_page(&folio->page,
1904 &movable_page_list))
1905 isolation_error_count++;
1906 continue;
1907 }
1908
1909 if (!folio_test_lru(folio) && drain_allow) {
1910 lru_add_drain_all();
1911 drain_allow = false;
1912 }
1913
1914 if (folio_isolate_lru(folio)) {
1915 isolation_error_count++;
1916 continue;
1917 }
1918 list_add_tail(&folio->lru, &movable_page_list);
1919 node_stat_mod_folio(folio,
1920 NR_ISOLATED_ANON + folio_is_file_lru(folio),
1921 folio_nr_pages(folio));
1922 }
1923
1924 if (!list_empty(&movable_page_list) || isolation_error_count)
1925 goto unpin_pages;
1926
1927
1928
1929
1930
1931 return nr_pages;
1932
1933unpin_pages:
1934 if (gup_flags & FOLL_PIN) {
1935 unpin_user_pages(pages, nr_pages);
1936 } else {
1937 for (i = 0; i < nr_pages; i++)
1938 put_page(pages[i]);
1939 }
1940
1941 if (!list_empty(&movable_page_list)) {
1942 struct migration_target_control mtc = {
1943 .nid = NUMA_NO_NODE,
1944 .gfp_mask = GFP_USER | __GFP_NOWARN,
1945 };
1946
1947 ret = migrate_pages(&movable_page_list, alloc_migration_target,
1948 NULL, (unsigned long)&mtc, MIGRATE_SYNC,
1949 MR_LONGTERM_PIN, NULL);
1950 if (ret > 0)
1951 ret = -ENOMEM;
1952 }
1953
1954 if (ret && !list_empty(&movable_page_list))
1955 putback_movable_pages(&movable_page_list);
1956 return ret;
1957}
1958#else
1959static long check_and_migrate_movable_pages(unsigned long nr_pages,
1960 struct page **pages,
1961 unsigned int gup_flags)
1962{
1963 return nr_pages;
1964}
1965#endif
1966
1967
1968
1969
1970
1971static long __gup_longterm_locked(struct mm_struct *mm,
1972 unsigned long start,
1973 unsigned long nr_pages,
1974 struct page **pages,
1975 struct vm_area_struct **vmas,
1976 unsigned int gup_flags)
1977{
1978 unsigned int flags;
1979 long rc;
1980
1981 if (!(gup_flags & FOLL_LONGTERM))
1982 return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
1983 NULL, gup_flags);
1984 flags = memalloc_pin_save();
1985 do {
1986 rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
1987 NULL, gup_flags);
1988 if (rc <= 0)
1989 break;
1990 rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
1991 } while (!rc);
1992 memalloc_pin_restore(flags);
1993
1994 return rc;
1995}
1996
1997static bool is_valid_gup_flags(unsigned int gup_flags)
1998{
1999
2000
2001
2002
2003 if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
2004 return false;
2005
2006
2007
2008
2009
2010 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
2011 return false;
2012
2013 return true;
2014}
2015
2016#ifdef CONFIG_MMU
2017static long __get_user_pages_remote(struct mm_struct *mm,
2018 unsigned long start, unsigned long nr_pages,
2019 unsigned int gup_flags, struct page **pages,
2020 struct vm_area_struct **vmas, int *locked)
2021{
2022
2023
2024
2025
2026
2027
2028
2029 if (gup_flags & FOLL_LONGTERM) {
2030 if (WARN_ON_ONCE(locked))
2031 return -EINVAL;
2032
2033
2034
2035
2036 return __gup_longterm_locked(mm, start, nr_pages, pages,
2037 vmas, gup_flags | FOLL_TOUCH |
2038 FOLL_REMOTE);
2039 }
2040
2041 return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
2042 locked,
2043 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
2044}
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106long get_user_pages_remote(struct mm_struct *mm,
2107 unsigned long start, unsigned long nr_pages,
2108 unsigned int gup_flags, struct page **pages,
2109 struct vm_area_struct **vmas, int *locked)
2110{
2111 if (!is_valid_gup_flags(gup_flags))
2112 return -EINVAL;
2113
2114 return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
2115 pages, vmas, locked);
2116}
2117EXPORT_SYMBOL(get_user_pages_remote);
2118
2119#else
2120long get_user_pages_remote(struct mm_struct *mm,
2121 unsigned long start, unsigned long nr_pages,
2122 unsigned int gup_flags, struct page **pages,
2123 struct vm_area_struct **vmas, int *locked)
2124{
2125 return 0;
2126}
2127
2128static long __get_user_pages_remote(struct mm_struct *mm,
2129 unsigned long start, unsigned long nr_pages,
2130 unsigned int gup_flags, struct page **pages,
2131 struct vm_area_struct **vmas, int *locked)
2132{
2133 return 0;
2134}
2135#endif
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153long get_user_pages(unsigned long start, unsigned long nr_pages,
2154 unsigned int gup_flags, struct page **pages,
2155 struct vm_area_struct **vmas)
2156{
2157 if (!is_valid_gup_flags(gup_flags))
2158 return -EINVAL;
2159
2160 return __gup_longterm_locked(current->mm, start, nr_pages,
2161 pages, vmas, gup_flags | FOLL_TOUCH);
2162}
2163EXPORT_SYMBOL(get_user_pages);
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2181 struct page **pages, unsigned int gup_flags)
2182{
2183 struct mm_struct *mm = current->mm;
2184 int locked = 1;
2185 long ret;
2186
2187
2188
2189
2190
2191
2192
2193 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
2194 return -EINVAL;
2195
2196 mmap_read_lock(mm);
2197 ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
2198 &locked, gup_flags | FOLL_TOUCH);
2199 if (locked)
2200 mmap_read_unlock(mm);
2201 return ret;
2202}
2203EXPORT_SYMBOL(get_user_pages_unlocked);
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238#ifdef CONFIG_HAVE_FAST_GUP
2239
2240static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
2241 unsigned int flags,
2242 struct page **pages)
2243{
2244 while ((*nr) - nr_start) {
2245 struct page *page = pages[--(*nr)];
2246
2247 ClearPageReferenced(page);
2248 if (flags & FOLL_PIN)
2249 unpin_user_page(page);
2250 else
2251 put_page(page);
2252 }
2253}
2254
2255#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
2256static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
2257 unsigned int flags, struct page **pages, int *nr)
2258{
2259 struct dev_pagemap *pgmap = NULL;
2260 int nr_start = *nr, ret = 0;
2261 pte_t *ptep, *ptem;
2262
2263 ptem = ptep = pte_offset_map(&pmd, addr);
2264 do {
2265 pte_t pte = ptep_get_lockless(ptep);
2266 struct page *page;
2267 struct folio *folio;
2268
2269
2270
2271
2272
2273 if (pte_protnone(pte))
2274 goto pte_unmap;
2275
2276 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2277 goto pte_unmap;
2278
2279 if (pte_devmap(pte)) {
2280 if (unlikely(flags & FOLL_LONGTERM))
2281 goto pte_unmap;
2282
2283 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
2284 if (unlikely(!pgmap)) {
2285 undo_dev_pagemap(nr, nr_start, flags, pages);
2286 goto pte_unmap;
2287 }
2288 } else if (pte_special(pte))
2289 goto pte_unmap;
2290
2291 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2292 page = pte_page(pte);
2293
2294 folio = try_grab_folio(page, 1, flags);
2295 if (!folio)
2296 goto pte_unmap;
2297
2298 if (unlikely(page_is_secretmem(page))) {
2299 gup_put_folio(folio, 1, flags);
2300 goto pte_unmap;
2301 }
2302
2303 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2304 gup_put_folio(folio, 1, flags);
2305 goto pte_unmap;
2306 }
2307
2308 if (!pte_write(pte) && gup_must_unshare(flags, page)) {
2309 gup_put_folio(folio, 1, flags);
2310 goto pte_unmap;
2311 }
2312
2313
2314
2315
2316
2317
2318
2319 if (flags & FOLL_PIN) {
2320 ret = arch_make_page_accessible(page);
2321 if (ret) {
2322 gup_put_folio(folio, 1, flags);
2323 goto pte_unmap;
2324 }
2325 }
2326 folio_set_referenced(folio);
2327 pages[*nr] = page;
2328 (*nr)++;
2329 } while (ptep++, addr += PAGE_SIZE, addr != end);
2330
2331 ret = 1;
2332
2333pte_unmap:
2334 if (pgmap)
2335 put_dev_pagemap(pgmap);
2336 pte_unmap(ptem);
2337 return ret;
2338}
2339#else
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
2351 unsigned int flags, struct page **pages, int *nr)
2352{
2353 return 0;
2354}
2355#endif
2356
2357#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
2358static int __gup_device_huge(unsigned long pfn, unsigned long addr,
2359 unsigned long end, unsigned int flags,
2360 struct page **pages, int *nr)
2361{
2362 int nr_start = *nr;
2363 struct dev_pagemap *pgmap = NULL;
2364
2365 do {
2366 struct page *page = pfn_to_page(pfn);
2367
2368 pgmap = get_dev_pagemap(pfn, pgmap);
2369 if (unlikely(!pgmap)) {
2370 undo_dev_pagemap(nr, nr_start, flags, pages);
2371 break;
2372 }
2373 SetPageReferenced(page);
2374 pages[*nr] = page;
2375 if (unlikely(!try_grab_page(page, flags))) {
2376 undo_dev_pagemap(nr, nr_start, flags, pages);
2377 break;
2378 }
2379 (*nr)++;
2380 pfn++;
2381 } while (addr += PAGE_SIZE, addr != end);
2382
2383 put_dev_pagemap(pgmap);
2384 return addr == end;
2385}
2386
2387static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2388 unsigned long end, unsigned int flags,
2389 struct page **pages, int *nr)
2390{
2391 unsigned long fault_pfn;
2392 int nr_start = *nr;
2393
2394 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
2395 if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
2396 return 0;
2397
2398 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2399 undo_dev_pagemap(nr, nr_start, flags, pages);
2400 return 0;
2401 }
2402 return 1;
2403}
2404
2405static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2406 unsigned long end, unsigned int flags,
2407 struct page **pages, int *nr)
2408{
2409 unsigned long fault_pfn;
2410 int nr_start = *nr;
2411
2412 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
2413 if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
2414 return 0;
2415
2416 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2417 undo_dev_pagemap(nr, nr_start, flags, pages);
2418 return 0;
2419 }
2420 return 1;
2421}
2422#else
2423static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2424 unsigned long end, unsigned int flags,
2425 struct page **pages, int *nr)
2426{
2427 BUILD_BUG();
2428 return 0;
2429}
2430
2431static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
2432 unsigned long end, unsigned int flags,
2433 struct page **pages, int *nr)
2434{
2435 BUILD_BUG();
2436 return 0;
2437}
2438#endif
2439
2440static int record_subpages(struct page *page, unsigned long addr,
2441 unsigned long end, struct page **pages)
2442{
2443 int nr;
2444
2445 for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
2446 pages[nr] = nth_page(page, nr);
2447
2448 return nr;
2449}
2450
2451#ifdef CONFIG_ARCH_HAS_HUGEPD
2452static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
2453 unsigned long sz)
2454{
2455 unsigned long __boundary = (addr + sz) & ~(sz-1);
2456 return (__boundary - 1 < end - 1) ? __boundary : end;
2457}
2458
2459static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
2460 unsigned long end, unsigned int flags,
2461 struct page **pages, int *nr)
2462{
2463 unsigned long pte_end;
2464 struct page *page;
2465 struct folio *folio;
2466 pte_t pte;
2467 int refs;
2468
2469 pte_end = (addr + sz) & ~(sz-1);
2470 if (pte_end < end)
2471 end = pte_end;
2472
2473 pte = huge_ptep_get(ptep);
2474
2475 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2476 return 0;
2477
2478
2479 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2480
2481 page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
2482 refs = record_subpages(page, addr, end, pages + *nr);
2483
2484 folio = try_grab_folio(page, refs, flags);
2485 if (!folio)
2486 return 0;
2487
2488 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2489 gup_put_folio(folio, refs, flags);
2490 return 0;
2491 }
2492
2493 if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) {
2494 gup_put_folio(folio, refs, flags);
2495 return 0;
2496 }
2497
2498 *nr += refs;
2499 folio_set_referenced(folio);
2500 return 1;
2501}
2502
2503static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2504 unsigned int pdshift, unsigned long end, unsigned int flags,
2505 struct page **pages, int *nr)
2506{
2507 pte_t *ptep;
2508 unsigned long sz = 1UL << hugepd_shift(hugepd);
2509 unsigned long next;
2510
2511 ptep = hugepte_offset(hugepd, addr, pdshift);
2512 do {
2513 next = hugepte_addr_end(addr, end, sz);
2514 if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
2515 return 0;
2516 } while (ptep++, addr = next, addr != end);
2517
2518 return 1;
2519}
2520#else
2521static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2522 unsigned int pdshift, unsigned long end, unsigned int flags,
2523 struct page **pages, int *nr)
2524{
2525 return 0;
2526}
2527#endif
2528
2529static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2530 unsigned long end, unsigned int flags,
2531 struct page **pages, int *nr)
2532{
2533 struct page *page;
2534 struct folio *folio;
2535 int refs;
2536
2537 if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
2538 return 0;
2539
2540 if (pmd_devmap(orig)) {
2541 if (unlikely(flags & FOLL_LONGTERM))
2542 return 0;
2543 return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
2544 pages, nr);
2545 }
2546
2547 page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
2548 refs = record_subpages(page, addr, end, pages + *nr);
2549
2550 folio = try_grab_folio(page, refs, flags);
2551 if (!folio)
2552 return 0;
2553
2554 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2555 gup_put_folio(folio, refs, flags);
2556 return 0;
2557 }
2558
2559 if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) {
2560 gup_put_folio(folio, refs, flags);
2561 return 0;
2562 }
2563
2564 *nr += refs;
2565 folio_set_referenced(folio);
2566 return 1;
2567}
2568
2569static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2570 unsigned long end, unsigned int flags,
2571 struct page **pages, int *nr)
2572{
2573 struct page *page;
2574 struct folio *folio;
2575 int refs;
2576
2577 if (!pud_access_permitted(orig, flags & FOLL_WRITE))
2578 return 0;
2579
2580 if (pud_devmap(orig)) {
2581 if (unlikely(flags & FOLL_LONGTERM))
2582 return 0;
2583 return __gup_device_huge_pud(orig, pudp, addr, end, flags,
2584 pages, nr);
2585 }
2586
2587 page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
2588 refs = record_subpages(page, addr, end, pages + *nr);
2589
2590 folio = try_grab_folio(page, refs, flags);
2591 if (!folio)
2592 return 0;
2593
2594 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2595 gup_put_folio(folio, refs, flags);
2596 return 0;
2597 }
2598
2599 if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) {
2600 gup_put_folio(folio, refs, flags);
2601 return 0;
2602 }
2603
2604 *nr += refs;
2605 folio_set_referenced(folio);
2606 return 1;
2607}
2608
2609static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
2610 unsigned long end, unsigned int flags,
2611 struct page **pages, int *nr)
2612{
2613 int refs;
2614 struct page *page;
2615 struct folio *folio;
2616
2617 if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
2618 return 0;
2619
2620 BUILD_BUG_ON(pgd_devmap(orig));
2621
2622 page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
2623 refs = record_subpages(page, addr, end, pages + *nr);
2624
2625 folio = try_grab_folio(page, refs, flags);
2626 if (!folio)
2627 return 0;
2628
2629 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
2630 gup_put_folio(folio, refs, flags);
2631 return 0;
2632 }
2633
2634 *nr += refs;
2635 folio_set_referenced(folio);
2636 return 1;
2637}
2638
2639static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
2640 unsigned int flags, struct page **pages, int *nr)
2641{
2642 unsigned long next;
2643 pmd_t *pmdp;
2644
2645 pmdp = pmd_offset_lockless(pudp, pud, addr);
2646 do {
2647 pmd_t pmd = READ_ONCE(*pmdp);
2648
2649 next = pmd_addr_end(addr, end);
2650 if (!pmd_present(pmd))
2651 return 0;
2652
2653 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
2654 pmd_devmap(pmd))) {
2655
2656
2657
2658
2659
2660 if (pmd_protnone(pmd))
2661 return 0;
2662
2663 if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
2664 pages, nr))
2665 return 0;
2666
2667 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
2668
2669
2670
2671
2672 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
2673 PMD_SHIFT, next, flags, pages, nr))
2674 return 0;
2675 } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
2676 return 0;
2677 } while (pmdp++, addr = next, addr != end);
2678
2679 return 1;
2680}
2681
2682static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
2683 unsigned int flags, struct page **pages, int *nr)
2684{
2685 unsigned long next;
2686 pud_t *pudp;
2687
2688 pudp = pud_offset_lockless(p4dp, p4d, addr);
2689 do {
2690 pud_t pud = READ_ONCE(*pudp);
2691
2692 next = pud_addr_end(addr, end);
2693 if (unlikely(!pud_present(pud)))
2694 return 0;
2695 if (unlikely(pud_huge(pud))) {
2696 if (!gup_huge_pud(pud, pudp, addr, next, flags,
2697 pages, nr))
2698 return 0;
2699 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
2700 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
2701 PUD_SHIFT, next, flags, pages, nr))
2702 return 0;
2703 } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
2704 return 0;
2705 } while (pudp++, addr = next, addr != end);
2706
2707 return 1;
2708}
2709
2710static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
2711 unsigned int flags, struct page **pages, int *nr)
2712{
2713 unsigned long next;
2714 p4d_t *p4dp;
2715
2716 p4dp = p4d_offset_lockless(pgdp, pgd, addr);
2717 do {
2718 p4d_t p4d = READ_ONCE(*p4dp);
2719
2720 next = p4d_addr_end(addr, end);
2721 if (p4d_none(p4d))
2722 return 0;
2723 BUILD_BUG_ON(p4d_huge(p4d));
2724 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
2725 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
2726 P4D_SHIFT, next, flags, pages, nr))
2727 return 0;
2728 } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
2729 return 0;
2730 } while (p4dp++, addr = next, addr != end);
2731
2732 return 1;
2733}
2734
2735static void gup_pgd_range(unsigned long addr, unsigned long end,
2736 unsigned int flags, struct page **pages, int *nr)
2737{
2738 unsigned long next;
2739 pgd_t *pgdp;
2740
2741 pgdp = pgd_offset(current->mm, addr);
2742 do {
2743 pgd_t pgd = READ_ONCE(*pgdp);
2744
2745 next = pgd_addr_end(addr, end);
2746 if (pgd_none(pgd))
2747 return;
2748 if (unlikely(pgd_huge(pgd))) {
2749 if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
2750 pages, nr))
2751 return;
2752 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
2753 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
2754 PGDIR_SHIFT, next, flags, pages, nr))
2755 return;
2756 } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
2757 return;
2758 } while (pgdp++, addr = next, addr != end);
2759}
2760#else
2761static inline void gup_pgd_range(unsigned long addr, unsigned long end,
2762 unsigned int flags, struct page **pages, int *nr)
2763{
2764}
2765#endif
2766
2767#ifndef gup_fast_permitted
2768
2769
2770
2771
2772static bool gup_fast_permitted(unsigned long start, unsigned long end)
2773{
2774 return true;
2775}
2776#endif
2777
2778static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
2779 unsigned int gup_flags, struct page **pages)
2780{
2781 int ret;
2782
2783
2784
2785
2786
2787 if (gup_flags & FOLL_LONGTERM) {
2788 mmap_read_lock(current->mm);
2789 ret = __gup_longterm_locked(current->mm,
2790 start, nr_pages,
2791 pages, NULL, gup_flags);
2792 mmap_read_unlock(current->mm);
2793 } else {
2794 ret = get_user_pages_unlocked(start, nr_pages,
2795 pages, gup_flags);
2796 }
2797
2798 return ret;
2799}
2800
2801static unsigned long lockless_pages_from_mm(unsigned long start,
2802 unsigned long end,
2803 unsigned int gup_flags,
2804 struct page **pages)
2805{
2806 unsigned long flags;
2807 int nr_pinned = 0;
2808 unsigned seq;
2809
2810 if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
2811 !gup_fast_permitted(start, end))
2812 return 0;
2813
2814 if (gup_flags & FOLL_PIN) {
2815 seq = raw_read_seqcount(¤t->mm->write_protect_seq);
2816 if (seq & 1)
2817 return 0;
2818 }
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831 local_irq_save(flags);
2832 gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
2833 local_irq_restore(flags);
2834
2835
2836
2837
2838
2839 if (gup_flags & FOLL_PIN) {
2840 if (read_seqcount_retry(¤t->mm->write_protect_seq, seq)) {
2841 unpin_user_pages_lockless(pages, nr_pinned);
2842 return 0;
2843 } else {
2844 sanity_check_pinned_pages(pages, nr_pinned);
2845 }
2846 }
2847 return nr_pinned;
2848}
2849
2850static int internal_get_user_pages_fast(unsigned long start,
2851 unsigned long nr_pages,
2852 unsigned int gup_flags,
2853 struct page **pages)
2854{
2855 unsigned long len, end;
2856 unsigned long nr_pinned;
2857 int ret;
2858
2859 if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
2860 FOLL_FORCE | FOLL_PIN | FOLL_GET |
2861 FOLL_FAST_ONLY | FOLL_NOFAULT)))
2862 return -EINVAL;
2863
2864 if (gup_flags & FOLL_PIN)
2865 mm_set_has_pinned_flag(¤t->mm->flags);
2866
2867 if (!(gup_flags & FOLL_FAST_ONLY))
2868 might_lock_read(¤t->mm->mmap_lock);
2869
2870 start = untagged_addr(start) & PAGE_MASK;
2871 len = nr_pages << PAGE_SHIFT;
2872 if (check_add_overflow(start, len, &end))
2873 return 0;
2874 if (unlikely(!access_ok((void __user *)start, len)))
2875 return -EFAULT;
2876
2877 nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
2878 if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
2879 return nr_pinned;
2880
2881
2882 start += nr_pinned << PAGE_SHIFT;
2883 pages += nr_pinned;
2884 ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
2885 pages);
2886 if (ret < 0) {
2887
2888
2889
2890
2891 if (nr_pinned)
2892 return nr_pinned;
2893 return ret;
2894 }
2895 return ret + nr_pinned;
2896}
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918int get_user_pages_fast_only(unsigned long start, int nr_pages,
2919 unsigned int gup_flags, struct page **pages)
2920{
2921 int nr_pinned;
2922
2923
2924
2925
2926
2927
2928
2929 gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
2930
2931 nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
2932 pages);
2933
2934
2935
2936
2937
2938
2939
2940 if (nr_pinned < 0)
2941 nr_pinned = 0;
2942
2943 return nr_pinned;
2944}
2945EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963int get_user_pages_fast(unsigned long start, int nr_pages,
2964 unsigned int gup_flags, struct page **pages)
2965{
2966 if (!is_valid_gup_flags(gup_flags))
2967 return -EINVAL;
2968
2969
2970
2971
2972
2973
2974
2975 gup_flags |= FOLL_GET;
2976 return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
2977}
2978EXPORT_SYMBOL_GPL(get_user_pages_fast);
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996int pin_user_pages_fast(unsigned long start, int nr_pages,
2997 unsigned int gup_flags, struct page **pages)
2998{
2999
3000 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3001 return -EINVAL;
3002
3003 if (WARN_ON_ONCE(!pages))
3004 return -EINVAL;
3005
3006 gup_flags |= FOLL_PIN;
3007 return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
3008}
3009EXPORT_SYMBOL_GPL(pin_user_pages_fast);
3010
3011
3012
3013
3014
3015
3016
3017int pin_user_pages_fast_only(unsigned long start, int nr_pages,
3018 unsigned int gup_flags, struct page **pages)
3019{
3020 int nr_pinned;
3021
3022
3023
3024
3025
3026 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3027 return 0;
3028
3029 if (WARN_ON_ONCE(!pages))
3030 return 0;
3031
3032
3033
3034
3035 gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
3036 nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
3037 pages);
3038
3039
3040
3041
3042
3043 if (nr_pinned < 0)
3044 nr_pinned = 0;
3045
3046 return nr_pinned;
3047}
3048EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072long pin_user_pages_remote(struct mm_struct *mm,
3073 unsigned long start, unsigned long nr_pages,
3074 unsigned int gup_flags, struct page **pages,
3075 struct vm_area_struct **vmas, int *locked)
3076{
3077
3078 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3079 return -EINVAL;
3080
3081 if (WARN_ON_ONCE(!pages))
3082 return -EINVAL;
3083
3084 gup_flags |= FOLL_PIN;
3085 return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
3086 pages, vmas, locked);
3087}
3088EXPORT_SYMBOL(pin_user_pages_remote);
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107long pin_user_pages(unsigned long start, unsigned long nr_pages,
3108 unsigned int gup_flags, struct page **pages,
3109 struct vm_area_struct **vmas)
3110{
3111
3112 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3113 return -EINVAL;
3114
3115 if (WARN_ON_ONCE(!pages))
3116 return -EINVAL;
3117
3118 gup_flags |= FOLL_PIN;
3119 return __gup_longterm_locked(current->mm, start, nr_pages,
3120 pages, vmas, gup_flags);
3121}
3122EXPORT_SYMBOL(pin_user_pages);
3123
3124
3125
3126
3127
3128
3129long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
3130 struct page **pages, unsigned int gup_flags)
3131{
3132
3133 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3134 return -EINVAL;
3135
3136 if (WARN_ON_ONCE(!pages))
3137 return -EINVAL;
3138
3139 gup_flags |= FOLL_PIN;
3140 return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
3141}
3142EXPORT_SYMBOL(pin_user_pages_unlocked);
3143