1
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/err.h>
5#include <linux/spinlock.h>
6
7#include <linux/mm.h>
8#include <linux/memremap.h>
9#include <linux/pagemap.h>
10#include <linux/rmap.h>
11#include <linux/swap.h>
12#include <linux/swapops.h>
13#include <linux/secretmem.h>
14
15#include <linux/sched/signal.h>
16#include <linux/rwsem.h>
17#include <linux/hugetlb.h>
18#include <linux/migrate.h>
19#include <linux/mm_inline.h>
20#include <linux/sched/mm.h>
21
22#include <asm/mmu_context.h>
23#include <asm/tlbflush.h>
24
25#include "internal.h"
26
27struct follow_page_context {
28 struct dev_pagemap *pgmap;
29 unsigned int page_mask;
30};
31
32static void hpage_pincount_add(struct page *page, int refs)
33{
34 VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
35 VM_BUG_ON_PAGE(page != compound_head(page), page);
36
37 atomic_add(refs, compound_pincount_ptr(page));
38}
39
40static void hpage_pincount_sub(struct page *page, int refs)
41{
42 VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
43 VM_BUG_ON_PAGE(page != compound_head(page), page);
44
45 atomic_sub(refs, compound_pincount_ptr(page));
46}
47
48
49static void put_page_refs(struct page *page, int refs)
50{
51#ifdef CONFIG_DEBUG_VM
52 if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
53 return;
54#endif
55
56
57
58
59
60 if (refs > 1)
61 page_ref_sub(page, refs - 1);
62 put_page(page);
63}
64
65
66
67
68
69static inline struct page *try_get_compound_head(struct page *page, int refs)
70{
71 struct page *head = compound_head(page);
72
73 if (WARN_ON_ONCE(page_ref_count(head) < 0))
74 return NULL;
75 if (unlikely(!page_cache_add_speculative(head, refs)))
76 return NULL;
77
78
79
80
81
82
83
84
85
86
87 if (unlikely(compound_head(page) != head)) {
88 put_page_refs(head, refs);
89 return NULL;
90 }
91
92 return head;
93}
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127struct page *try_grab_compound_head(struct page *page,
128 int refs, unsigned int flags)
129{
130 if (flags & FOLL_GET)
131 return try_get_compound_head(page, refs);
132 else if (flags & FOLL_PIN) {
133
134
135
136
137
138 if (unlikely((flags & FOLL_LONGTERM) &&
139 !is_pinnable_page(page)))
140 return NULL;
141
142
143
144
145
146 page = try_get_compound_head(page, refs);
147 if (!page)
148 return NULL;
149
150
151
152
153
154
155
156
157
158
159
160 if (hpage_pincount_available(page))
161 hpage_pincount_add(page, refs);
162 else
163 page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
164
165 mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
166 refs);
167
168 return page;
169 }
170
171 WARN_ON_ONCE(1);
172 return NULL;
173}
174
175static void put_compound_head(struct page *page, int refs, unsigned int flags)
176{
177 if (flags & FOLL_PIN) {
178 mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED,
179 refs);
180
181 if (hpage_pincount_available(page))
182 hpage_pincount_sub(page, refs);
183 else
184 refs *= GUP_PIN_COUNTING_BIAS;
185 }
186
187 put_page_refs(page, refs);
188}
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209bool __must_check try_grab_page(struct page *page, unsigned int flags)
210{
211 if (!(flags & (FOLL_GET | FOLL_PIN)))
212 return true;
213
214 return try_grab_compound_head(page, 1, flags);
215}
216
217
218
219
220
221
222
223
224
225
226void unpin_user_page(struct page *page)
227{
228 put_compound_head(compound_head(page), 1, FOLL_PIN);
229}
230EXPORT_SYMBOL(unpin_user_page);
231
232static inline void compound_range_next(unsigned long i, unsigned long npages,
233 struct page **list, struct page **head,
234 unsigned int *ntails)
235{
236 struct page *next, *page;
237 unsigned int nr = 1;
238
239 if (i >= npages)
240 return;
241
242 next = *list + i;
243 page = compound_head(next);
244 if (PageCompound(page) && compound_order(page) >= 1)
245 nr = min_t(unsigned int,
246 page + compound_nr(page) - next, npages - i);
247
248 *head = page;
249 *ntails = nr;
250}
251
252#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \
253 for (__i = 0, \
254 compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \
255 __i < __npages; __i += __ntails, \
256 compound_range_next(__i, __npages, __list, &(__head), &(__ntails)))
257
258static inline void compound_next(unsigned long i, unsigned long npages,
259 struct page **list, struct page **head,
260 unsigned int *ntails)
261{
262 struct page *page;
263 unsigned int nr;
264
265 if (i >= npages)
266 return;
267
268 page = compound_head(list[i]);
269 for (nr = i + 1; nr < npages; nr++) {
270 if (compound_head(list[nr]) != page)
271 break;
272 }
273
274 *head = page;
275 *ntails = nr - i;
276}
277
278#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \
279 for (__i = 0, \
280 compound_next(__i, __npages, __list, &(__head), &(__ntails)); \
281 __i < __npages; __i += __ntails, \
282 compound_next(__i, __npages, __list, &(__head), &(__ntails)))
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
307 bool make_dirty)
308{
309 unsigned long index;
310 struct page *head;
311 unsigned int ntails;
312
313 if (!make_dirty) {
314 unpin_user_pages(pages, npages);
315 return;
316 }
317
318 for_each_compound_head(index, pages, npages, head, ntails) {
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 if (!PageDirty(head))
340 set_page_dirty_lock(head);
341 put_compound_head(head, ntails, FOLL_PIN);
342 }
343}
344EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
368 bool make_dirty)
369{
370 unsigned long index;
371 struct page *head;
372 unsigned int ntails;
373
374 for_each_compound_range(index, &page, npages, head, ntails) {
375 if (make_dirty && !PageDirty(head))
376 set_page_dirty_lock(head);
377 put_compound_head(head, ntails, FOLL_PIN);
378 }
379}
380EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
381
382
383
384
385
386
387
388
389
390
391void unpin_user_pages(struct page **pages, unsigned long npages)
392{
393 unsigned long index;
394 struct page *head;
395 unsigned int ntails;
396
397
398
399
400
401
402 if (WARN_ON(IS_ERR_VALUE(npages)))
403 return;
404
405 for_each_compound_head(index, pages, npages, head, ntails)
406 put_compound_head(head, ntails, FOLL_PIN);
407}
408EXPORT_SYMBOL(unpin_user_pages);
409
410
411
412
413
414
415static inline void mm_set_has_pinned_flag(unsigned long *mm_flags)
416{
417 if (!test_bit(MMF_HAS_PINNED, mm_flags))
418 set_bit(MMF_HAS_PINNED, mm_flags);
419}
420
421#ifdef CONFIG_MMU
422static struct page *no_page_table(struct vm_area_struct *vma,
423 unsigned int flags)
424{
425
426
427
428
429
430
431
432
433 if ((flags & FOLL_DUMP) &&
434 (vma_is_anonymous(vma) || !vma->vm_ops->fault))
435 return ERR_PTR(-EFAULT);
436 return NULL;
437}
438
439static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
440 pte_t *pte, unsigned int flags)
441{
442
443 if (flags & FOLL_GET)
444 return -EFAULT;
445
446 if (flags & FOLL_TOUCH) {
447 pte_t entry = *pte;
448
449 if (flags & FOLL_WRITE)
450 entry = pte_mkdirty(entry);
451 entry = pte_mkyoung(entry);
452
453 if (!pte_same(*pte, entry)) {
454 set_pte_at(vma->vm_mm, address, pte, entry);
455 update_mmu_cache(vma, address, pte);
456 }
457 }
458
459
460 return -EEXIST;
461}
462
463
464
465
466
467static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
468{
469 return pte_write(pte) ||
470 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
471}
472
473static struct page *follow_page_pte(struct vm_area_struct *vma,
474 unsigned long address, pmd_t *pmd, unsigned int flags,
475 struct dev_pagemap **pgmap)
476{
477 struct mm_struct *mm = vma->vm_mm;
478 struct page *page;
479 spinlock_t *ptl;
480 pte_t *ptep, pte;
481 int ret;
482
483
484 if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
485 (FOLL_PIN | FOLL_GET)))
486 return ERR_PTR(-EINVAL);
487retry:
488 if (unlikely(pmd_bad(*pmd)))
489 return no_page_table(vma, flags);
490
491 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
492 pte = *ptep;
493 if (!pte_present(pte)) {
494 swp_entry_t entry;
495
496
497
498
499
500 if (likely(!(flags & FOLL_MIGRATION)))
501 goto no_page;
502 if (pte_none(pte))
503 goto no_page;
504 entry = pte_to_swp_entry(pte);
505 if (!is_migration_entry(entry))
506 goto no_page;
507 pte_unmap_unlock(ptep, ptl);
508 migration_entry_wait(mm, pmd, address);
509 goto retry;
510 }
511 if ((flags & FOLL_NUMA) && pte_protnone(pte))
512 goto no_page;
513 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
514 pte_unmap_unlock(ptep, ptl);
515 return NULL;
516 }
517
518 page = vm_normal_page(vma, address, pte);
519 if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
520
521
522
523
524
525 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
526 if (*pgmap)
527 page = pte_page(pte);
528 else
529 goto no_page;
530 } else if (unlikely(!page)) {
531 if (flags & FOLL_DUMP) {
532
533 page = ERR_PTR(-EFAULT);
534 goto out;
535 }
536
537 if (is_zero_pfn(pte_pfn(pte))) {
538 page = pte_page(pte);
539 } else {
540 ret = follow_pfn_pte(vma, address, ptep, flags);
541 page = ERR_PTR(ret);
542 goto out;
543 }
544 }
545
546
547 if (unlikely(!try_grab_page(page, flags))) {
548 page = ERR_PTR(-ENOMEM);
549 goto out;
550 }
551
552
553
554
555
556 if (flags & FOLL_PIN) {
557 ret = arch_make_page_accessible(page);
558 if (ret) {
559 unpin_user_page(page);
560 page = ERR_PTR(ret);
561 goto out;
562 }
563 }
564 if (flags & FOLL_TOUCH) {
565 if ((flags & FOLL_WRITE) &&
566 !pte_dirty(pte) && !PageDirty(page))
567 set_page_dirty(page);
568
569
570
571
572
573 mark_page_accessed(page);
574 }
575 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
576
577 if (PageTransCompound(page))
578 goto out;
579
580
581
582
583
584
585
586
587
588
589 if (page->mapping && trylock_page(page)) {
590 lru_add_drain();
591
592
593
594
595
596
597 mlock_vma_page(page);
598 unlock_page(page);
599 }
600 }
601out:
602 pte_unmap_unlock(ptep, ptl);
603 return page;
604no_page:
605 pte_unmap_unlock(ptep, ptl);
606 if (!pte_none(pte))
607 return NULL;
608 return no_page_table(vma, flags);
609}
610
611static struct page *follow_pmd_mask(struct vm_area_struct *vma,
612 unsigned long address, pud_t *pudp,
613 unsigned int flags,
614 struct follow_page_context *ctx)
615{
616 pmd_t *pmd, pmdval;
617 spinlock_t *ptl;
618 struct page *page;
619 struct mm_struct *mm = vma->vm_mm;
620
621 pmd = pmd_offset(pudp, address);
622
623
624
625
626 pmdval = READ_ONCE(*pmd);
627 if (pmd_none(pmdval))
628 return no_page_table(vma, flags);
629 if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
630 page = follow_huge_pmd(mm, address, pmd, flags);
631 if (page)
632 return page;
633 return no_page_table(vma, flags);
634 }
635 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
636 page = follow_huge_pd(vma, address,
637 __hugepd(pmd_val(pmdval)), flags,
638 PMD_SHIFT);
639 if (page)
640 return page;
641 return no_page_table(vma, flags);
642 }
643retry:
644 if (!pmd_present(pmdval)) {
645 if (likely(!(flags & FOLL_MIGRATION)))
646 return no_page_table(vma, flags);
647 VM_BUG_ON(thp_migration_supported() &&
648 !is_pmd_migration_entry(pmdval));
649 if (is_pmd_migration_entry(pmdval))
650 pmd_migration_entry_wait(mm, pmd);
651 pmdval = READ_ONCE(*pmd);
652
653
654
655
656 if (pmd_none(pmdval))
657 return no_page_table(vma, flags);
658 goto retry;
659 }
660 if (pmd_devmap(pmdval)) {
661 ptl = pmd_lock(mm, pmd);
662 page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
663 spin_unlock(ptl);
664 if (page)
665 return page;
666 }
667 if (likely(!pmd_trans_huge(pmdval)))
668 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
669
670 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
671 return no_page_table(vma, flags);
672
673retry_locked:
674 ptl = pmd_lock(mm, pmd);
675 if (unlikely(pmd_none(*pmd))) {
676 spin_unlock(ptl);
677 return no_page_table(vma, flags);
678 }
679 if (unlikely(!pmd_present(*pmd))) {
680 spin_unlock(ptl);
681 if (likely(!(flags & FOLL_MIGRATION)))
682 return no_page_table(vma, flags);
683 pmd_migration_entry_wait(mm, pmd);
684 goto retry_locked;
685 }
686 if (unlikely(!pmd_trans_huge(*pmd))) {
687 spin_unlock(ptl);
688 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
689 }
690 if (flags & FOLL_SPLIT_PMD) {
691 int ret;
692 page = pmd_page(*pmd);
693 if (is_huge_zero_page(page)) {
694 spin_unlock(ptl);
695 ret = 0;
696 split_huge_pmd(vma, pmd, address);
697 if (pmd_trans_unstable(pmd))
698 ret = -EBUSY;
699 } else {
700 spin_unlock(ptl);
701 split_huge_pmd(vma, pmd, address);
702 ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
703 }
704
705 return ret ? ERR_PTR(ret) :
706 follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
707 }
708 page = follow_trans_huge_pmd(vma, address, pmd, flags);
709 spin_unlock(ptl);
710 ctx->page_mask = HPAGE_PMD_NR - 1;
711 return page;
712}
713
714static struct page *follow_pud_mask(struct vm_area_struct *vma,
715 unsigned long address, p4d_t *p4dp,
716 unsigned int flags,
717 struct follow_page_context *ctx)
718{
719 pud_t *pud;
720 spinlock_t *ptl;
721 struct page *page;
722 struct mm_struct *mm = vma->vm_mm;
723
724 pud = pud_offset(p4dp, address);
725 if (pud_none(*pud))
726 return no_page_table(vma, flags);
727 if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
728 page = follow_huge_pud(mm, address, pud, flags);
729 if (page)
730 return page;
731 return no_page_table(vma, flags);
732 }
733 if (is_hugepd(__hugepd(pud_val(*pud)))) {
734 page = follow_huge_pd(vma, address,
735 __hugepd(pud_val(*pud)), flags,
736 PUD_SHIFT);
737 if (page)
738 return page;
739 return no_page_table(vma, flags);
740 }
741 if (pud_devmap(*pud)) {
742 ptl = pud_lock(mm, pud);
743 page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
744 spin_unlock(ptl);
745 if (page)
746 return page;
747 }
748 if (unlikely(pud_bad(*pud)))
749 return no_page_table(vma, flags);
750
751 return follow_pmd_mask(vma, address, pud, flags, ctx);
752}
753
754static struct page *follow_p4d_mask(struct vm_area_struct *vma,
755 unsigned long address, pgd_t *pgdp,
756 unsigned int flags,
757 struct follow_page_context *ctx)
758{
759 p4d_t *p4d;
760 struct page *page;
761
762 p4d = p4d_offset(pgdp, address);
763 if (p4d_none(*p4d))
764 return no_page_table(vma, flags);
765 BUILD_BUG_ON(p4d_huge(*p4d));
766 if (unlikely(p4d_bad(*p4d)))
767 return no_page_table(vma, flags);
768
769 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
770 page = follow_huge_pd(vma, address,
771 __hugepd(p4d_val(*p4d)), flags,
772 P4D_SHIFT);
773 if (page)
774 return page;
775 return no_page_table(vma, flags);
776 }
777 return follow_pud_mask(vma, address, p4d, flags, ctx);
778}
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799static struct page *follow_page_mask(struct vm_area_struct *vma,
800 unsigned long address, unsigned int flags,
801 struct follow_page_context *ctx)
802{
803 pgd_t *pgd;
804 struct page *page;
805 struct mm_struct *mm = vma->vm_mm;
806
807 ctx->page_mask = 0;
808
809
810 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
811 if (!IS_ERR(page)) {
812 WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
813 return page;
814 }
815
816 pgd = pgd_offset(mm, address);
817
818 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
819 return no_page_table(vma, flags);
820
821 if (pgd_huge(*pgd)) {
822 page = follow_huge_pgd(mm, address, pgd, flags);
823 if (page)
824 return page;
825 return no_page_table(vma, flags);
826 }
827 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
828 page = follow_huge_pd(vma, address,
829 __hugepd(pgd_val(*pgd)), flags,
830 PGDIR_SHIFT);
831 if (page)
832 return page;
833 return no_page_table(vma, flags);
834 }
835
836 return follow_p4d_mask(vma, address, pgd, flags, ctx);
837}
838
839struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
840 unsigned int foll_flags)
841{
842 struct follow_page_context ctx = { NULL };
843 struct page *page;
844
845 if (vma_is_secretmem(vma))
846 return NULL;
847
848 page = follow_page_mask(vma, address, foll_flags, &ctx);
849 if (ctx.pgmap)
850 put_dev_pagemap(ctx.pgmap);
851 return page;
852}
853
854static int get_gate_page(struct mm_struct *mm, unsigned long address,
855 unsigned int gup_flags, struct vm_area_struct **vma,
856 struct page **page)
857{
858 pgd_t *pgd;
859 p4d_t *p4d;
860 pud_t *pud;
861 pmd_t *pmd;
862 pte_t *pte;
863 int ret = -EFAULT;
864
865
866 if (gup_flags & FOLL_WRITE)
867 return -EFAULT;
868 if (address > TASK_SIZE)
869 pgd = pgd_offset_k(address);
870 else
871 pgd = pgd_offset_gate(mm, address);
872 if (pgd_none(*pgd))
873 return -EFAULT;
874 p4d = p4d_offset(pgd, address);
875 if (p4d_none(*p4d))
876 return -EFAULT;
877 pud = pud_offset(p4d, address);
878 if (pud_none(*pud))
879 return -EFAULT;
880 pmd = pmd_offset(pud, address);
881 if (!pmd_present(*pmd))
882 return -EFAULT;
883 VM_BUG_ON(pmd_trans_huge(*pmd));
884 pte = pte_offset_map(pmd, address);
885 if (pte_none(*pte))
886 goto unmap;
887 *vma = get_gate_vma(mm);
888 if (!page)
889 goto out;
890 *page = vm_normal_page(*vma, address, *pte);
891 if (!*page) {
892 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
893 goto unmap;
894 *page = pte_page(*pte);
895 }
896 if (unlikely(!try_grab_page(*page, gup_flags))) {
897 ret = -ENOMEM;
898 goto unmap;
899 }
900out:
901 ret = 0;
902unmap:
903 pte_unmap(pte);
904 return ret;
905}
906
907
908
909
910
911
912static int faultin_page(struct vm_area_struct *vma,
913 unsigned long address, unsigned int *flags, int *locked)
914{
915 unsigned int fault_flags = 0;
916 vm_fault_t ret;
917
918
919 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
920 return -ENOENT;
921 if (*flags & FOLL_NOFAULT)
922 return -EFAULT;
923 if (*flags & FOLL_WRITE)
924 fault_flags |= FAULT_FLAG_WRITE;
925 if (*flags & FOLL_REMOTE)
926 fault_flags |= FAULT_FLAG_REMOTE;
927 if (locked)
928 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
929 if (*flags & FOLL_NOWAIT)
930 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
931 if (*flags & FOLL_TRIED) {
932
933
934
935
936 fault_flags |= FAULT_FLAG_TRIED;
937 }
938
939 ret = handle_mm_fault(vma, address, fault_flags, NULL);
940 if (ret & VM_FAULT_ERROR) {
941 int err = vm_fault_to_errno(ret, *flags);
942
943 if (err)
944 return err;
945 BUG();
946 }
947
948 if (ret & VM_FAULT_RETRY) {
949 if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
950 *locked = 0;
951 return -EBUSY;
952 }
953
954
955
956
957
958
959
960
961
962
963 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
964 *flags |= FOLL_COW;
965 return 0;
966}
967
968static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
969{
970 vm_flags_t vm_flags = vma->vm_flags;
971 int write = (gup_flags & FOLL_WRITE);
972 int foreign = (gup_flags & FOLL_REMOTE);
973
974 if (vm_flags & (VM_IO | VM_PFNMAP))
975 return -EFAULT;
976
977 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
978 return -EFAULT;
979
980 if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
981 return -EOPNOTSUPP;
982
983 if (vma_is_secretmem(vma))
984 return -EFAULT;
985
986 if (write) {
987 if (!(vm_flags & VM_WRITE)) {
988 if (!(gup_flags & FOLL_FORCE))
989 return -EFAULT;
990
991
992
993
994
995
996
997
998
999 if (!is_cow_mapping(vm_flags))
1000 return -EFAULT;
1001 }
1002 } else if (!(vm_flags & VM_READ)) {
1003 if (!(gup_flags & FOLL_FORCE))
1004 return -EFAULT;
1005
1006
1007
1008
1009 if (!(vm_flags & VM_MAYREAD))
1010 return -EFAULT;
1011 }
1012
1013
1014
1015
1016 if (!arch_vma_access_permitted(vma, write, false, foreign))
1017 return -EFAULT;
1018 return 0;
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081static long __get_user_pages(struct mm_struct *mm,
1082 unsigned long start, unsigned long nr_pages,
1083 unsigned int gup_flags, struct page **pages,
1084 struct vm_area_struct **vmas, int *locked)
1085{
1086 long ret = 0, i = 0;
1087 struct vm_area_struct *vma = NULL;
1088 struct follow_page_context ctx = { NULL };
1089
1090 if (!nr_pages)
1091 return 0;
1092
1093 start = untagged_addr(start);
1094
1095 VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
1096
1097
1098
1099
1100
1101
1102 if (!(gup_flags & FOLL_FORCE))
1103 gup_flags |= FOLL_NUMA;
1104
1105 do {
1106 struct page *page;
1107 unsigned int foll_flags = gup_flags;
1108 unsigned int page_increm;
1109
1110
1111 if (!vma || start >= vma->vm_end) {
1112 vma = find_extend_vma(mm, start);
1113 if (!vma && in_gate_area(mm, start)) {
1114 ret = get_gate_page(mm, start & PAGE_MASK,
1115 gup_flags, &vma,
1116 pages ? &pages[i] : NULL);
1117 if (ret)
1118 goto out;
1119 ctx.page_mask = 0;
1120 goto next_page;
1121 }
1122
1123 if (!vma) {
1124 ret = -EFAULT;
1125 goto out;
1126 }
1127 ret = check_vma_flags(vma, gup_flags);
1128 if (ret)
1129 goto out;
1130
1131 if (is_vm_hugetlb_page(vma)) {
1132 i = follow_hugetlb_page(mm, vma, pages, vmas,
1133 &start, &nr_pages, i,
1134 gup_flags, locked);
1135 if (locked && *locked == 0) {
1136
1137
1138
1139
1140
1141 BUG_ON(gup_flags & FOLL_NOWAIT);
1142 goto out;
1143 }
1144 continue;
1145 }
1146 }
1147retry:
1148
1149
1150
1151
1152 if (fatal_signal_pending(current)) {
1153 ret = -EINTR;
1154 goto out;
1155 }
1156 cond_resched();
1157
1158 page = follow_page_mask(vma, start, foll_flags, &ctx);
1159 if (!page) {
1160 ret = faultin_page(vma, start, &foll_flags, locked);
1161 switch (ret) {
1162 case 0:
1163 goto retry;
1164 case -EBUSY:
1165 ret = 0;
1166 fallthrough;
1167 case -EFAULT:
1168 case -ENOMEM:
1169 case -EHWPOISON:
1170 goto out;
1171 case -ENOENT:
1172 goto next_page;
1173 }
1174 BUG();
1175 } else if (PTR_ERR(page) == -EEXIST) {
1176
1177
1178
1179
1180 goto next_page;
1181 } else if (IS_ERR(page)) {
1182 ret = PTR_ERR(page);
1183 goto out;
1184 }
1185 if (pages) {
1186 pages[i] = page;
1187 flush_anon_page(vma, page, start);
1188 flush_dcache_page(page);
1189 ctx.page_mask = 0;
1190 }
1191next_page:
1192 if (vmas) {
1193 vmas[i] = vma;
1194 ctx.page_mask = 0;
1195 }
1196 page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
1197 if (page_increm > nr_pages)
1198 page_increm = nr_pages;
1199 i += page_increm;
1200 start += page_increm * PAGE_SIZE;
1201 nr_pages -= page_increm;
1202 } while (nr_pages);
1203out:
1204 if (ctx.pgmap)
1205 put_dev_pagemap(ctx.pgmap);
1206 return i ? i : ret;
1207}
1208
1209static bool vma_permits_fault(struct vm_area_struct *vma,
1210 unsigned int fault_flags)
1211{
1212 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
1213 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
1214 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
1215
1216 if (!(vm_flags & vma->vm_flags))
1217 return false;
1218
1219
1220
1221
1222
1223
1224
1225
1226 if (!arch_vma_access_permitted(vma, write, false, foreign))
1227 return false;
1228
1229 return true;
1230}
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261int fixup_user_fault(struct mm_struct *mm,
1262 unsigned long address, unsigned int fault_flags,
1263 bool *unlocked)
1264{
1265 struct vm_area_struct *vma;
1266 vm_fault_t ret;
1267
1268 address = untagged_addr(address);
1269
1270 if (unlocked)
1271 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
1272
1273retry:
1274 vma = find_extend_vma(mm, address);
1275 if (!vma || address < vma->vm_start)
1276 return -EFAULT;
1277
1278 if (!vma_permits_fault(vma, fault_flags))
1279 return -EFAULT;
1280
1281 if ((fault_flags & FAULT_FLAG_KILLABLE) &&
1282 fatal_signal_pending(current))
1283 return -EINTR;
1284
1285 ret = handle_mm_fault(vma, address, fault_flags, NULL);
1286 if (ret & VM_FAULT_ERROR) {
1287 int err = vm_fault_to_errno(ret, 0);
1288
1289 if (err)
1290 return err;
1291 BUG();
1292 }
1293
1294 if (ret & VM_FAULT_RETRY) {
1295 mmap_read_lock(mm);
1296 *unlocked = true;
1297 fault_flags |= FAULT_FLAG_TRIED;
1298 goto retry;
1299 }
1300
1301 return 0;
1302}
1303EXPORT_SYMBOL_GPL(fixup_user_fault);
1304
1305
1306
1307
1308
1309static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
1310 unsigned long start,
1311 unsigned long nr_pages,
1312 struct page **pages,
1313 struct vm_area_struct **vmas,
1314 int *locked,
1315 unsigned int flags)
1316{
1317 long ret, pages_done;
1318 bool lock_dropped;
1319
1320 if (locked) {
1321
1322 BUG_ON(vmas);
1323
1324 BUG_ON(*locked != 1);
1325 }
1326
1327 if (flags & FOLL_PIN)
1328 mm_set_has_pinned_flag(&mm->flags);
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 if (pages && !(flags & FOLL_PIN))
1340 flags |= FOLL_GET;
1341
1342 pages_done = 0;
1343 lock_dropped = false;
1344 for (;;) {
1345 ret = __get_user_pages(mm, start, nr_pages, flags, pages,
1346 vmas, locked);
1347 if (!locked)
1348
1349 return ret;
1350
1351
1352 if (!*locked) {
1353 BUG_ON(ret < 0);
1354 BUG_ON(ret >= nr_pages);
1355 }
1356
1357 if (ret > 0) {
1358 nr_pages -= ret;
1359 pages_done += ret;
1360 if (!nr_pages)
1361 break;
1362 }
1363 if (*locked) {
1364
1365
1366
1367
1368 if (!pages_done)
1369 pages_done = ret;
1370 break;
1371 }
1372
1373
1374
1375
1376 if (likely(pages))
1377 pages += ret;
1378 start += ret << PAGE_SHIFT;
1379 lock_dropped = true;
1380
1381retry:
1382
1383
1384
1385
1386
1387
1388
1389
1390 if (fatal_signal_pending(current)) {
1391 if (!pages_done)
1392 pages_done = -EINTR;
1393 break;
1394 }
1395
1396 ret = mmap_read_lock_killable(mm);
1397 if (ret) {
1398 BUG_ON(ret > 0);
1399 if (!pages_done)
1400 pages_done = ret;
1401 break;
1402 }
1403
1404 *locked = 1;
1405 ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
1406 pages, NULL, locked);
1407 if (!*locked) {
1408
1409 BUG_ON(ret != 0);
1410 goto retry;
1411 }
1412 if (ret != 1) {
1413 BUG_ON(ret > 1);
1414 if (!pages_done)
1415 pages_done = ret;
1416 break;
1417 }
1418 nr_pages--;
1419 pages_done++;
1420 if (!nr_pages)
1421 break;
1422 if (likely(pages))
1423 pages++;
1424 start += PAGE_SIZE;
1425 }
1426 if (lock_dropped && *locked) {
1427
1428
1429
1430
1431 mmap_read_unlock(mm);
1432 *locked = 0;
1433 }
1434 return pages_done;
1435}
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457long populate_vma_page_range(struct vm_area_struct *vma,
1458 unsigned long start, unsigned long end, int *locked)
1459{
1460 struct mm_struct *mm = vma->vm_mm;
1461 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1462 int gup_flags;
1463
1464 VM_BUG_ON(!PAGE_ALIGNED(start));
1465 VM_BUG_ON(!PAGE_ALIGNED(end));
1466 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1467 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1468 mmap_assert_locked(mm);
1469
1470 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1471 if (vma->vm_flags & VM_LOCKONFAULT)
1472 gup_flags &= ~FOLL_POPULATE;
1473
1474
1475
1476
1477
1478 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1479 gup_flags |= FOLL_WRITE;
1480
1481
1482
1483
1484
1485 if (vma_is_accessible(vma))
1486 gup_flags |= FOLL_FORCE;
1487
1488
1489
1490
1491
1492 return __get_user_pages(mm, start, nr_pages, gup_flags,
1493 NULL, NULL, locked);
1494}
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
1520 unsigned long end, bool write, int *locked)
1521{
1522 struct mm_struct *mm = vma->vm_mm;
1523 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1524 int gup_flags;
1525
1526 VM_BUG_ON(!PAGE_ALIGNED(start));
1527 VM_BUG_ON(!PAGE_ALIGNED(end));
1528 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1529 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1530 mmap_assert_locked(mm);
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON;
1543 if (write)
1544 gup_flags |= FOLL_WRITE;
1545
1546
1547
1548
1549
1550 if (check_vma_flags(vma, gup_flags))
1551 return -EINVAL;
1552
1553 return __get_user_pages(mm, start, nr_pages, gup_flags,
1554 NULL, NULL, locked);
1555}
1556
1557
1558
1559
1560
1561
1562
1563
1564int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1565{
1566 struct mm_struct *mm = current->mm;
1567 unsigned long end, nstart, nend;
1568 struct vm_area_struct *vma = NULL;
1569 int locked = 0;
1570 long ret = 0;
1571
1572 end = start + len;
1573
1574 for (nstart = start; nstart < end; nstart = nend) {
1575
1576
1577
1578
1579 if (!locked) {
1580 locked = 1;
1581 mmap_read_lock(mm);
1582 vma = find_vma(mm, nstart);
1583 } else if (nstart >= vma->vm_end)
1584 vma = vma->vm_next;
1585 if (!vma || vma->vm_start >= end)
1586 break;
1587
1588
1589
1590
1591 nend = min(end, vma->vm_end);
1592 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1593 continue;
1594 if (nstart < vma->vm_start)
1595 nstart = vma->vm_start;
1596
1597
1598
1599
1600
1601 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1602 if (ret < 0) {
1603 if (ignore_errors) {
1604 ret = 0;
1605 continue;
1606 }
1607 break;
1608 }
1609 nend = nstart + ret * PAGE_SIZE;
1610 ret = 0;
1611 }
1612 if (locked)
1613 mmap_read_unlock(mm);
1614 return ret;
1615}
1616#else
1617static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
1618 unsigned long nr_pages, struct page **pages,
1619 struct vm_area_struct **vmas, int *locked,
1620 unsigned int foll_flags)
1621{
1622 struct vm_area_struct *vma;
1623 unsigned long vm_flags;
1624 long i;
1625
1626
1627
1628
1629 vm_flags = (foll_flags & FOLL_WRITE) ?
1630 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1631 vm_flags &= (foll_flags & FOLL_FORCE) ?
1632 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1633
1634 for (i = 0; i < nr_pages; i++) {
1635 vma = find_vma(mm, start);
1636 if (!vma)
1637 goto finish_or_fault;
1638
1639
1640 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1641 !(vm_flags & vma->vm_flags))
1642 goto finish_or_fault;
1643
1644 if (pages) {
1645 pages[i] = virt_to_page(start);
1646 if (pages[i])
1647 get_page(pages[i]);
1648 }
1649 if (vmas)
1650 vmas[i] = vma;
1651 start = (start + PAGE_SIZE) & PAGE_MASK;
1652 }
1653
1654 return i;
1655
1656finish_or_fault:
1657 return i ? : -EFAULT;
1658}
1659#endif
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669size_t fault_in_writeable(char __user *uaddr, size_t size)
1670{
1671 char __user *start = uaddr, *end;
1672
1673 if (unlikely(size == 0))
1674 return 0;
1675 if (!PAGE_ALIGNED(uaddr)) {
1676 if (unlikely(__put_user(0, uaddr) != 0))
1677 return size;
1678 uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
1679 }
1680 end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
1681 if (unlikely(end < start))
1682 end = NULL;
1683 while (uaddr != end) {
1684 if (unlikely(__put_user(0, uaddr) != 0))
1685 goto out;
1686 uaddr += PAGE_SIZE;
1687 }
1688
1689out:
1690 if (size > uaddr - start)
1691 return size - (uaddr - start);
1692 return 0;
1693}
1694EXPORT_SYMBOL(fault_in_writeable);
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
1715{
1716 unsigned long start = (unsigned long)untagged_addr(uaddr);
1717 unsigned long end, nstart, nend;
1718 struct mm_struct *mm = current->mm;
1719 struct vm_area_struct *vma = NULL;
1720 int locked = 0;
1721
1722 nstart = start & PAGE_MASK;
1723 end = PAGE_ALIGN(start + size);
1724 if (end < nstart)
1725 end = 0;
1726 for (; nstart != end; nstart = nend) {
1727 unsigned long nr_pages;
1728 long ret;
1729
1730 if (!locked) {
1731 locked = 1;
1732 mmap_read_lock(mm);
1733 vma = find_vma(mm, nstart);
1734 } else if (nstart >= vma->vm_end)
1735 vma = vma->vm_next;
1736 if (!vma || vma->vm_start >= end)
1737 break;
1738 nend = end ? min(end, vma->vm_end) : vma->vm_end;
1739 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1740 continue;
1741 if (nstart < vma->vm_start)
1742 nstart = vma->vm_start;
1743 nr_pages = (nend - nstart) / PAGE_SIZE;
1744 ret = __get_user_pages_locked(mm, nstart, nr_pages,
1745 NULL, NULL, &locked,
1746 FOLL_TOUCH | FOLL_WRITE);
1747 if (ret <= 0)
1748 break;
1749 nend = nstart + ret * PAGE_SIZE;
1750 }
1751 if (locked)
1752 mmap_read_unlock(mm);
1753 if (nstart == end)
1754 return 0;
1755 return size - min_t(size_t, nstart - start, size);
1756}
1757EXPORT_SYMBOL(fault_in_safe_writeable);
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767size_t fault_in_readable(const char __user *uaddr, size_t size)
1768{
1769 const char __user *start = uaddr, *end;
1770 volatile char c;
1771
1772 if (unlikely(size == 0))
1773 return 0;
1774 if (!PAGE_ALIGNED(uaddr)) {
1775 if (unlikely(__get_user(c, uaddr) != 0))
1776 return size;
1777 uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
1778 }
1779 end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
1780 if (unlikely(end < start))
1781 end = NULL;
1782 while (uaddr != end) {
1783 if (unlikely(__get_user(c, uaddr) != 0))
1784 goto out;
1785 uaddr += PAGE_SIZE;
1786 }
1787
1788out:
1789 (void)c;
1790 if (size > uaddr - start)
1791 return size - (uaddr - start);
1792 return 0;
1793}
1794EXPORT_SYMBOL(fault_in_readable);
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810#ifdef CONFIG_ELF_CORE
1811struct page *get_dump_page(unsigned long addr)
1812{
1813 struct mm_struct *mm = current->mm;
1814 struct page *page;
1815 int locked = 1;
1816 int ret;
1817
1818 if (mmap_read_lock_killable(mm))
1819 return NULL;
1820 ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
1821 FOLL_FORCE | FOLL_DUMP | FOLL_GET);
1822 if (locked)
1823 mmap_read_unlock(mm);
1824 return (ret == 1) ? page : NULL;
1825}
1826#endif
1827
1828#ifdef CONFIG_MIGRATION
1829
1830
1831
1832
1833
1834
1835static long check_and_migrate_movable_pages(unsigned long nr_pages,
1836 struct page **pages,
1837 unsigned int gup_flags)
1838{
1839 unsigned long i;
1840 unsigned long isolation_error_count = 0;
1841 bool drain_allow = true;
1842 LIST_HEAD(movable_page_list);
1843 long ret = 0;
1844 struct page *prev_head = NULL;
1845 struct page *head;
1846 struct migration_target_control mtc = {
1847 .nid = NUMA_NO_NODE,
1848 .gfp_mask = GFP_USER | __GFP_NOWARN,
1849 };
1850
1851 for (i = 0; i < nr_pages; i++) {
1852 head = compound_head(pages[i]);
1853 if (head == prev_head)
1854 continue;
1855 prev_head = head;
1856
1857
1858
1859
1860 if (!is_pinnable_page(head)) {
1861 if (PageHuge(head)) {
1862 if (!isolate_huge_page(head, &movable_page_list))
1863 isolation_error_count++;
1864 } else {
1865 if (!PageLRU(head) && drain_allow) {
1866 lru_add_drain_all();
1867 drain_allow = false;
1868 }
1869
1870 if (isolate_lru_page(head)) {
1871 isolation_error_count++;
1872 continue;
1873 }
1874 list_add_tail(&head->lru, &movable_page_list);
1875 mod_node_page_state(page_pgdat(head),
1876 NR_ISOLATED_ANON +
1877 page_is_file_lru(head),
1878 thp_nr_pages(head));
1879 }
1880 }
1881 }
1882
1883
1884
1885
1886
1887 if (list_empty(&movable_page_list) && !isolation_error_count)
1888 return nr_pages;
1889
1890 if (gup_flags & FOLL_PIN) {
1891 unpin_user_pages(pages, nr_pages);
1892 } else {
1893 for (i = 0; i < nr_pages; i++)
1894 put_page(pages[i]);
1895 }
1896 if (!list_empty(&movable_page_list)) {
1897 ret = migrate_pages(&movable_page_list, alloc_migration_target,
1898 NULL, (unsigned long)&mtc, MIGRATE_SYNC,
1899 MR_LONGTERM_PIN, NULL);
1900 if (ret && !list_empty(&movable_page_list))
1901 putback_movable_pages(&movable_page_list);
1902 }
1903
1904 return ret > 0 ? -ENOMEM : ret;
1905}
1906#else
1907static long check_and_migrate_movable_pages(unsigned long nr_pages,
1908 struct page **pages,
1909 unsigned int gup_flags)
1910{
1911 return nr_pages;
1912}
1913#endif
1914
1915
1916
1917
1918
1919static long __gup_longterm_locked(struct mm_struct *mm,
1920 unsigned long start,
1921 unsigned long nr_pages,
1922 struct page **pages,
1923 struct vm_area_struct **vmas,
1924 unsigned int gup_flags)
1925{
1926 unsigned int flags;
1927 long rc;
1928
1929 if (!(gup_flags & FOLL_LONGTERM))
1930 return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
1931 NULL, gup_flags);
1932 flags = memalloc_pin_save();
1933 do {
1934 rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
1935 NULL, gup_flags);
1936 if (rc <= 0)
1937 break;
1938 rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
1939 } while (!rc);
1940 memalloc_pin_restore(flags);
1941
1942 return rc;
1943}
1944
1945static bool is_valid_gup_flags(unsigned int gup_flags)
1946{
1947
1948
1949
1950
1951 if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
1952 return false;
1953
1954
1955
1956
1957
1958 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1959 return false;
1960
1961 return true;
1962}
1963
1964#ifdef CONFIG_MMU
1965static long __get_user_pages_remote(struct mm_struct *mm,
1966 unsigned long start, unsigned long nr_pages,
1967 unsigned int gup_flags, struct page **pages,
1968 struct vm_area_struct **vmas, int *locked)
1969{
1970
1971
1972
1973
1974
1975
1976
1977 if (gup_flags & FOLL_LONGTERM) {
1978 if (WARN_ON_ONCE(locked))
1979 return -EINVAL;
1980
1981
1982
1983
1984 return __gup_longterm_locked(mm, start, nr_pages, pages,
1985 vmas, gup_flags | FOLL_TOUCH |
1986 FOLL_REMOTE);
1987 }
1988
1989 return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
1990 locked,
1991 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
1992}
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054long get_user_pages_remote(struct mm_struct *mm,
2055 unsigned long start, unsigned long nr_pages,
2056 unsigned int gup_flags, struct page **pages,
2057 struct vm_area_struct **vmas, int *locked)
2058{
2059 if (!is_valid_gup_flags(gup_flags))
2060 return -EINVAL;
2061
2062 return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
2063 pages, vmas, locked);
2064}
2065EXPORT_SYMBOL(get_user_pages_remote);
2066
2067#else
2068long get_user_pages_remote(struct mm_struct *mm,
2069 unsigned long start, unsigned long nr_pages,
2070 unsigned int gup_flags, struct page **pages,
2071 struct vm_area_struct **vmas, int *locked)
2072{
2073 return 0;
2074}
2075
2076static long __get_user_pages_remote(struct mm_struct *mm,
2077 unsigned long start, unsigned long nr_pages,
2078 unsigned int gup_flags, struct page **pages,
2079 struct vm_area_struct **vmas, int *locked)
2080{
2081 return 0;
2082}
2083#endif
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101long get_user_pages(unsigned long start, unsigned long nr_pages,
2102 unsigned int gup_flags, struct page **pages,
2103 struct vm_area_struct **vmas)
2104{
2105 if (!is_valid_gup_flags(gup_flags))
2106 return -EINVAL;
2107
2108 return __gup_longterm_locked(current->mm, start, nr_pages,
2109 pages, vmas, gup_flags | FOLL_TOUCH);
2110}
2111EXPORT_SYMBOL(get_user_pages);
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
2148 unsigned int gup_flags, struct page **pages,
2149 int *locked)
2150{
2151
2152
2153
2154
2155
2156
2157 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
2158 return -EINVAL;
2159
2160
2161
2162
2163 if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
2164 return -EINVAL;
2165
2166 return __get_user_pages_locked(current->mm, start, nr_pages,
2167 pages, NULL, locked,
2168 gup_flags | FOLL_TOUCH);
2169}
2170EXPORT_SYMBOL(get_user_pages_locked);
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
2188 struct page **pages, unsigned int gup_flags)
2189{
2190 struct mm_struct *mm = current->mm;
2191 int locked = 1;
2192 long ret;
2193
2194
2195
2196
2197
2198
2199
2200 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
2201 return -EINVAL;
2202
2203 mmap_read_lock(mm);
2204 ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
2205 &locked, gup_flags | FOLL_TOUCH);
2206 if (locked)
2207 mmap_read_unlock(mm);
2208 return ret;
2209}
2210EXPORT_SYMBOL(get_user_pages_unlocked);
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245#ifdef CONFIG_HAVE_FAST_GUP
2246
2247static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
2248 unsigned int flags,
2249 struct page **pages)
2250{
2251 while ((*nr) - nr_start) {
2252 struct page *page = pages[--(*nr)];
2253
2254 ClearPageReferenced(page);
2255 if (flags & FOLL_PIN)
2256 unpin_user_page(page);
2257 else
2258 put_page(page);
2259 }
2260}
2261
2262#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
2263static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
2264 unsigned int flags, struct page **pages, int *nr)
2265{
2266 struct dev_pagemap *pgmap = NULL;
2267 int nr_start = *nr, ret = 0;
2268 pte_t *ptep, *ptem;
2269
2270 ptem = ptep = pte_offset_map(&pmd, addr);
2271 do {
2272 pte_t pte = ptep_get_lockless(ptep);
2273 struct page *head, *page;
2274
2275
2276
2277
2278
2279 if (pte_protnone(pte))
2280 goto pte_unmap;
2281
2282 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2283 goto pte_unmap;
2284
2285 if (pte_devmap(pte)) {
2286 if (unlikely(flags & FOLL_LONGTERM))
2287 goto pte_unmap;
2288
2289 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
2290 if (unlikely(!pgmap)) {
2291 undo_dev_pagemap(nr, nr_start, flags, pages);
2292 goto pte_unmap;
2293 }
2294 } else if (pte_special(pte))
2295 goto pte_unmap;
2296
2297 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2298 page = pte_page(pte);
2299
2300 head = try_grab_compound_head(page, 1, flags);
2301 if (!head)
2302 goto pte_unmap;
2303
2304 if (unlikely(page_is_secretmem(page))) {
2305 put_compound_head(head, 1, flags);
2306 goto pte_unmap;
2307 }
2308
2309 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2310 put_compound_head(head, 1, flags);
2311 goto pte_unmap;
2312 }
2313
2314 VM_BUG_ON_PAGE(compound_head(page) != head, page);
2315
2316
2317
2318
2319
2320
2321
2322 if (flags & FOLL_PIN) {
2323 ret = arch_make_page_accessible(page);
2324 if (ret) {
2325 unpin_user_page(page);
2326 goto pte_unmap;
2327 }
2328 }
2329 SetPageReferenced(page);
2330 pages[*nr] = page;
2331 (*nr)++;
2332
2333 } while (ptep++, addr += PAGE_SIZE, addr != end);
2334
2335 ret = 1;
2336
2337pte_unmap:
2338 if (pgmap)
2339 put_dev_pagemap(pgmap);
2340 pte_unmap(ptem);
2341 return ret;
2342}
2343#else
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
2355 unsigned int flags, struct page **pages, int *nr)
2356{
2357 return 0;
2358}
2359#endif
2360
2361#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
2362static int __gup_device_huge(unsigned long pfn, unsigned long addr,
2363 unsigned long end, unsigned int flags,
2364 struct page **pages, int *nr)
2365{
2366 int nr_start = *nr;
2367 struct dev_pagemap *pgmap = NULL;
2368
2369 do {
2370 struct page *page = pfn_to_page(pfn);
2371
2372 pgmap = get_dev_pagemap(pfn, pgmap);
2373 if (unlikely(!pgmap)) {
2374 undo_dev_pagemap(nr, nr_start, flags, pages);
2375 break;
2376 }
2377 SetPageReferenced(page);
2378 pages[*nr] = page;
2379 if (unlikely(!try_grab_page(page, flags))) {
2380 undo_dev_pagemap(nr, nr_start, flags, pages);
2381 break;
2382 }
2383 (*nr)++;
2384 pfn++;
2385 } while (addr += PAGE_SIZE, addr != end);
2386
2387 put_dev_pagemap(pgmap);
2388 return addr == end;
2389}
2390
2391static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2392 unsigned long end, unsigned int flags,
2393 struct page **pages, int *nr)
2394{
2395 unsigned long fault_pfn;
2396 int nr_start = *nr;
2397
2398 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
2399 if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
2400 return 0;
2401
2402 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2403 undo_dev_pagemap(nr, nr_start, flags, pages);
2404 return 0;
2405 }
2406 return 1;
2407}
2408
2409static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2410 unsigned long end, unsigned int flags,
2411 struct page **pages, int *nr)
2412{
2413 unsigned long fault_pfn;
2414 int nr_start = *nr;
2415
2416 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
2417 if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
2418 return 0;
2419
2420 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2421 undo_dev_pagemap(nr, nr_start, flags, pages);
2422 return 0;
2423 }
2424 return 1;
2425}
2426#else
2427static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2428 unsigned long end, unsigned int flags,
2429 struct page **pages, int *nr)
2430{
2431 BUILD_BUG();
2432 return 0;
2433}
2434
2435static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
2436 unsigned long end, unsigned int flags,
2437 struct page **pages, int *nr)
2438{
2439 BUILD_BUG();
2440 return 0;
2441}
2442#endif
2443
2444static int record_subpages(struct page *page, unsigned long addr,
2445 unsigned long end, struct page **pages)
2446{
2447 int nr;
2448
2449 for (nr = 0; addr != end; addr += PAGE_SIZE)
2450 pages[nr++] = page++;
2451
2452 return nr;
2453}
2454
2455#ifdef CONFIG_ARCH_HAS_HUGEPD
2456static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
2457 unsigned long sz)
2458{
2459 unsigned long __boundary = (addr + sz) & ~(sz-1);
2460 return (__boundary - 1 < end - 1) ? __boundary : end;
2461}
2462
2463static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
2464 unsigned long end, unsigned int flags,
2465 struct page **pages, int *nr)
2466{
2467 unsigned long pte_end;
2468 struct page *head, *page;
2469 pte_t pte;
2470 int refs;
2471
2472 pte_end = (addr + sz) & ~(sz-1);
2473 if (pte_end < end)
2474 end = pte_end;
2475
2476 pte = huge_ptep_get(ptep);
2477
2478 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
2479 return 0;
2480
2481
2482 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2483
2484 head = pte_page(pte);
2485 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
2486 refs = record_subpages(page, addr, end, pages + *nr);
2487
2488 head = try_grab_compound_head(head, refs, flags);
2489 if (!head)
2490 return 0;
2491
2492 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2493 put_compound_head(head, refs, flags);
2494 return 0;
2495 }
2496
2497 *nr += refs;
2498 SetPageReferenced(head);
2499 return 1;
2500}
2501
2502static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2503 unsigned int pdshift, unsigned long end, unsigned int flags,
2504 struct page **pages, int *nr)
2505{
2506 pte_t *ptep;
2507 unsigned long sz = 1UL << hugepd_shift(hugepd);
2508 unsigned long next;
2509
2510 ptep = hugepte_offset(hugepd, addr, pdshift);
2511 do {
2512 next = hugepte_addr_end(addr, end, sz);
2513 if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
2514 return 0;
2515 } while (ptep++, addr = next, addr != end);
2516
2517 return 1;
2518}
2519#else
2520static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2521 unsigned int pdshift, unsigned long end, unsigned int flags,
2522 struct page **pages, int *nr)
2523{
2524 return 0;
2525}
2526#endif
2527
2528static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2529 unsigned long end, unsigned int flags,
2530 struct page **pages, int *nr)
2531{
2532 struct page *head, *page;
2533 int refs;
2534
2535 if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
2536 return 0;
2537
2538 if (pmd_devmap(orig)) {
2539 if (unlikely(flags & FOLL_LONGTERM))
2540 return 0;
2541 return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
2542 pages, nr);
2543 }
2544
2545 page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
2546 refs = record_subpages(page, addr, end, pages + *nr);
2547
2548 head = try_grab_compound_head(pmd_page(orig), refs, flags);
2549 if (!head)
2550 return 0;
2551
2552 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2553 put_compound_head(head, refs, flags);
2554 return 0;
2555 }
2556
2557 *nr += refs;
2558 SetPageReferenced(head);
2559 return 1;
2560}
2561
2562static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2563 unsigned long end, unsigned int flags,
2564 struct page **pages, int *nr)
2565{
2566 struct page *head, *page;
2567 int refs;
2568
2569 if (!pud_access_permitted(orig, flags & FOLL_WRITE))
2570 return 0;
2571
2572 if (pud_devmap(orig)) {
2573 if (unlikely(flags & FOLL_LONGTERM))
2574 return 0;
2575 return __gup_device_huge_pud(orig, pudp, addr, end, flags,
2576 pages, nr);
2577 }
2578
2579 page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
2580 refs = record_subpages(page, addr, end, pages + *nr);
2581
2582 head = try_grab_compound_head(pud_page(orig), refs, flags);
2583 if (!head)
2584 return 0;
2585
2586 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2587 put_compound_head(head, refs, flags);
2588 return 0;
2589 }
2590
2591 *nr += refs;
2592 SetPageReferenced(head);
2593 return 1;
2594}
2595
2596static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
2597 unsigned long end, unsigned int flags,
2598 struct page **pages, int *nr)
2599{
2600 int refs;
2601 struct page *head, *page;
2602
2603 if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
2604 return 0;
2605
2606 BUILD_BUG_ON(pgd_devmap(orig));
2607
2608 page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
2609 refs = record_subpages(page, addr, end, pages + *nr);
2610
2611 head = try_grab_compound_head(pgd_page(orig), refs, flags);
2612 if (!head)
2613 return 0;
2614
2615 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
2616 put_compound_head(head, refs, flags);
2617 return 0;
2618 }
2619
2620 *nr += refs;
2621 SetPageReferenced(head);
2622 return 1;
2623}
2624
2625static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
2626 unsigned int flags, struct page **pages, int *nr)
2627{
2628 unsigned long next;
2629 pmd_t *pmdp;
2630
2631 pmdp = pmd_offset_lockless(pudp, pud, addr);
2632 do {
2633 pmd_t pmd = READ_ONCE(*pmdp);
2634
2635 next = pmd_addr_end(addr, end);
2636 if (!pmd_present(pmd))
2637 return 0;
2638
2639 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
2640 pmd_devmap(pmd))) {
2641
2642
2643
2644
2645
2646 if (pmd_protnone(pmd))
2647 return 0;
2648
2649 if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
2650 pages, nr))
2651 return 0;
2652
2653 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
2654
2655
2656
2657
2658 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
2659 PMD_SHIFT, next, flags, pages, nr))
2660 return 0;
2661 } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
2662 return 0;
2663 } while (pmdp++, addr = next, addr != end);
2664
2665 return 1;
2666}
2667
2668static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
2669 unsigned int flags, struct page **pages, int *nr)
2670{
2671 unsigned long next;
2672 pud_t *pudp;
2673
2674 pudp = pud_offset_lockless(p4dp, p4d, addr);
2675 do {
2676 pud_t pud = READ_ONCE(*pudp);
2677
2678 next = pud_addr_end(addr, end);
2679 if (unlikely(!pud_present(pud)))
2680 return 0;
2681 if (unlikely(pud_huge(pud))) {
2682 if (!gup_huge_pud(pud, pudp, addr, next, flags,
2683 pages, nr))
2684 return 0;
2685 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
2686 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
2687 PUD_SHIFT, next, flags, pages, nr))
2688 return 0;
2689 } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
2690 return 0;
2691 } while (pudp++, addr = next, addr != end);
2692
2693 return 1;
2694}
2695
2696static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
2697 unsigned int flags, struct page **pages, int *nr)
2698{
2699 unsigned long next;
2700 p4d_t *p4dp;
2701
2702 p4dp = p4d_offset_lockless(pgdp, pgd, addr);
2703 do {
2704 p4d_t p4d = READ_ONCE(*p4dp);
2705
2706 next = p4d_addr_end(addr, end);
2707 if (p4d_none(p4d))
2708 return 0;
2709 BUILD_BUG_ON(p4d_huge(p4d));
2710 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
2711 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
2712 P4D_SHIFT, next, flags, pages, nr))
2713 return 0;
2714 } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
2715 return 0;
2716 } while (p4dp++, addr = next, addr != end);
2717
2718 return 1;
2719}
2720
2721static void gup_pgd_range(unsigned long addr, unsigned long end,
2722 unsigned int flags, struct page **pages, int *nr)
2723{
2724 unsigned long next;
2725 pgd_t *pgdp;
2726
2727 pgdp = pgd_offset(current->mm, addr);
2728 do {
2729 pgd_t pgd = READ_ONCE(*pgdp);
2730
2731 next = pgd_addr_end(addr, end);
2732 if (pgd_none(pgd))
2733 return;
2734 if (unlikely(pgd_huge(pgd))) {
2735 if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
2736 pages, nr))
2737 return;
2738 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
2739 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
2740 PGDIR_SHIFT, next, flags, pages, nr))
2741 return;
2742 } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
2743 return;
2744 } while (pgdp++, addr = next, addr != end);
2745}
2746#else
2747static inline void gup_pgd_range(unsigned long addr, unsigned long end,
2748 unsigned int flags, struct page **pages, int *nr)
2749{
2750}
2751#endif
2752
2753#ifndef gup_fast_permitted
2754
2755
2756
2757
2758static bool gup_fast_permitted(unsigned long start, unsigned long end)
2759{
2760 return true;
2761}
2762#endif
2763
2764static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
2765 unsigned int gup_flags, struct page **pages)
2766{
2767 int ret;
2768
2769
2770
2771
2772
2773 if (gup_flags & FOLL_LONGTERM) {
2774 mmap_read_lock(current->mm);
2775 ret = __gup_longterm_locked(current->mm,
2776 start, nr_pages,
2777 pages, NULL, gup_flags);
2778 mmap_read_unlock(current->mm);
2779 } else {
2780 ret = get_user_pages_unlocked(start, nr_pages,
2781 pages, gup_flags);
2782 }
2783
2784 return ret;
2785}
2786
2787static unsigned long lockless_pages_from_mm(unsigned long start,
2788 unsigned long end,
2789 unsigned int gup_flags,
2790 struct page **pages)
2791{
2792 unsigned long flags;
2793 int nr_pinned = 0;
2794 unsigned seq;
2795
2796 if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
2797 !gup_fast_permitted(start, end))
2798 return 0;
2799
2800 if (gup_flags & FOLL_PIN) {
2801 seq = raw_read_seqcount(¤t->mm->write_protect_seq);
2802 if (seq & 1)
2803 return 0;
2804 }
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817 local_irq_save(flags);
2818 gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
2819 local_irq_restore(flags);
2820
2821
2822
2823
2824
2825 if (gup_flags & FOLL_PIN) {
2826 if (read_seqcount_retry(¤t->mm->write_protect_seq, seq)) {
2827 unpin_user_pages(pages, nr_pinned);
2828 return 0;
2829 }
2830 }
2831 return nr_pinned;
2832}
2833
2834static int internal_get_user_pages_fast(unsigned long start,
2835 unsigned long nr_pages,
2836 unsigned int gup_flags,
2837 struct page **pages)
2838{
2839 unsigned long len, end;
2840 unsigned long nr_pinned;
2841 int ret;
2842
2843 if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
2844 FOLL_FORCE | FOLL_PIN | FOLL_GET |
2845 FOLL_FAST_ONLY | FOLL_NOFAULT)))
2846 return -EINVAL;
2847
2848 if (gup_flags & FOLL_PIN)
2849 mm_set_has_pinned_flag(¤t->mm->flags);
2850
2851 if (!(gup_flags & FOLL_FAST_ONLY))
2852 might_lock_read(¤t->mm->mmap_lock);
2853
2854 start = untagged_addr(start) & PAGE_MASK;
2855 len = nr_pages << PAGE_SHIFT;
2856 if (check_add_overflow(start, len, &end))
2857 return 0;
2858 if (unlikely(!access_ok((void __user *)start, len)))
2859 return -EFAULT;
2860
2861 nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
2862 if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
2863 return nr_pinned;
2864
2865
2866 start += nr_pinned << PAGE_SHIFT;
2867 pages += nr_pinned;
2868 ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
2869 pages);
2870 if (ret < 0) {
2871
2872
2873
2874
2875 if (nr_pinned)
2876 return nr_pinned;
2877 return ret;
2878 }
2879 return ret + nr_pinned;
2880}
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902int get_user_pages_fast_only(unsigned long start, int nr_pages,
2903 unsigned int gup_flags, struct page **pages)
2904{
2905 int nr_pinned;
2906
2907
2908
2909
2910
2911
2912
2913 gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
2914
2915 nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
2916 pages);
2917
2918
2919
2920
2921
2922
2923
2924 if (nr_pinned < 0)
2925 nr_pinned = 0;
2926
2927 return nr_pinned;
2928}
2929EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947int get_user_pages_fast(unsigned long start, int nr_pages,
2948 unsigned int gup_flags, struct page **pages)
2949{
2950 if (!is_valid_gup_flags(gup_flags))
2951 return -EINVAL;
2952
2953
2954
2955
2956
2957
2958
2959 gup_flags |= FOLL_GET;
2960 return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
2961}
2962EXPORT_SYMBOL_GPL(get_user_pages_fast);
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980int pin_user_pages_fast(unsigned long start, int nr_pages,
2981 unsigned int gup_flags, struct page **pages)
2982{
2983
2984 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
2985 return -EINVAL;
2986
2987 gup_flags |= FOLL_PIN;
2988 return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
2989}
2990EXPORT_SYMBOL_GPL(pin_user_pages_fast);
2991
2992
2993
2994
2995
2996
2997
2998int pin_user_pages_fast_only(unsigned long start, int nr_pages,
2999 unsigned int gup_flags, struct page **pages)
3000{
3001 int nr_pinned;
3002
3003
3004
3005
3006
3007 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3008 return 0;
3009
3010
3011
3012
3013 gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
3014 nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
3015 pages);
3016
3017
3018
3019
3020
3021 if (nr_pinned < 0)
3022 nr_pinned = 0;
3023
3024 return nr_pinned;
3025}
3026EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051long pin_user_pages_remote(struct mm_struct *mm,
3052 unsigned long start, unsigned long nr_pages,
3053 unsigned int gup_flags, struct page **pages,
3054 struct vm_area_struct **vmas, int *locked)
3055{
3056
3057 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3058 return -EINVAL;
3059
3060 gup_flags |= FOLL_PIN;
3061 return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
3062 pages, vmas, locked);
3063}
3064EXPORT_SYMBOL(pin_user_pages_remote);
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084long pin_user_pages(unsigned long start, unsigned long nr_pages,
3085 unsigned int gup_flags, struct page **pages,
3086 struct vm_area_struct **vmas)
3087{
3088
3089 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3090 return -EINVAL;
3091
3092 gup_flags |= FOLL_PIN;
3093 return __gup_longterm_locked(current->mm, start, nr_pages,
3094 pages, vmas, gup_flags);
3095}
3096EXPORT_SYMBOL(pin_user_pages);
3097
3098
3099
3100
3101
3102
3103long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
3104 struct page **pages, unsigned int gup_flags)
3105{
3106
3107 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3108 return -EINVAL;
3109
3110 gup_flags |= FOLL_PIN;
3111 return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
3112}
3113EXPORT_SYMBOL(pin_user_pages_unlocked);
3114
3115
3116
3117
3118
3119
3120long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
3121 unsigned int gup_flags, struct page **pages,
3122 int *locked)
3123{
3124
3125
3126
3127
3128
3129
3130 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
3131 return -EINVAL;
3132
3133
3134 if (WARN_ON_ONCE(gup_flags & FOLL_GET))
3135 return -EINVAL;
3136
3137 gup_flags |= FOLL_PIN;
3138 return __get_user_pages_locked(current->mm, start, nr_pages,
3139 pages, NULL, locked,
3140 gup_flags | FOLL_TOUCH);
3141}
3142EXPORT_SYMBOL(pin_user_pages_locked);
3143