1
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/err.h>
5#include <linux/spinlock.h>
6
7#include <linux/mm.h>
8#include <linux/memremap.h>
9#include <linux/pagemap.h>
10#include <linux/rmap.h>
11#include <linux/swap.h>
12#include <linux/swapops.h>
13
14#include <linux/sched/signal.h>
15#include <linux/rwsem.h>
16#include <linux/hugetlb.h>
17#include <linux/migrate.h>
18#include <linux/mm_inline.h>
19#include <linux/sched/mm.h>
20
21#include <asm/mmu_context.h>
22#include <asm/pgtable.h>
23#include <asm/tlbflush.h>
24
25#include "internal.h"
26
27struct follow_page_context {
28 struct dev_pagemap *pgmap;
29 unsigned int page_mask;
30};
31
32typedef int (*set_dirty_func_t)(struct page *page);
33
34static void __put_user_pages_dirty(struct page **pages,
35 unsigned long npages,
36 set_dirty_func_t sdf)
37{
38 unsigned long index;
39
40 for (index = 0; index < npages; index++) {
41 struct page *page = compound_head(pages[index]);
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 if (!PageDirty(page))
62 sdf(page);
63
64 put_user_page(page);
65 }
66}
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87void put_user_pages_dirty(struct page **pages, unsigned long npages)
88{
89 __put_user_pages_dirty(pages, npages, set_page_dirty);
90}
91EXPORT_SYMBOL(put_user_pages_dirty);
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
109{
110 __put_user_pages_dirty(pages, npages, set_page_dirty_lock);
111}
112EXPORT_SYMBOL(put_user_pages_dirty_lock);
113
114
115
116
117
118
119
120
121
122
123void put_user_pages(struct page **pages, unsigned long npages)
124{
125 unsigned long index;
126
127
128
129
130
131
132 for (index = 0; index < npages; index++)
133 put_user_page(pages[index]);
134}
135EXPORT_SYMBOL(put_user_pages);
136
137#ifdef CONFIG_MMU
138static struct page *no_page_table(struct vm_area_struct *vma,
139 unsigned int flags)
140{
141
142
143
144
145
146
147
148
149 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
150 return ERR_PTR(-EFAULT);
151 return NULL;
152}
153
154static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
155 pte_t *pte, unsigned int flags)
156{
157
158 if (flags & FOLL_GET)
159 return -EFAULT;
160
161 if (flags & FOLL_TOUCH) {
162 pte_t entry = *pte;
163
164 if (flags & FOLL_WRITE)
165 entry = pte_mkdirty(entry);
166 entry = pte_mkyoung(entry);
167
168 if (!pte_same(*pte, entry)) {
169 set_pte_at(vma->vm_mm, address, pte, entry);
170 update_mmu_cache(vma, address, pte);
171 }
172 }
173
174
175 return -EEXIST;
176}
177
178
179
180
181
182static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
183{
184 return pte_write(pte) ||
185 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
186}
187
188static struct page *follow_page_pte(struct vm_area_struct *vma,
189 unsigned long address, pmd_t *pmd, unsigned int flags,
190 struct dev_pagemap **pgmap)
191{
192 struct mm_struct *mm = vma->vm_mm;
193 struct page *page;
194 spinlock_t *ptl;
195 pte_t *ptep, pte;
196
197retry:
198 if (unlikely(pmd_bad(*pmd)))
199 return no_page_table(vma, flags);
200
201 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
202 pte = *ptep;
203 if (!pte_present(pte)) {
204 swp_entry_t entry;
205
206
207
208
209
210 if (likely(!(flags & FOLL_MIGRATION)))
211 goto no_page;
212 if (pte_none(pte))
213 goto no_page;
214 entry = pte_to_swp_entry(pte);
215 if (!is_migration_entry(entry))
216 goto no_page;
217 pte_unmap_unlock(ptep, ptl);
218 migration_entry_wait(mm, pmd, address);
219 goto retry;
220 }
221 if ((flags & FOLL_NUMA) && pte_protnone(pte))
222 goto no_page;
223 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
224 pte_unmap_unlock(ptep, ptl);
225 return NULL;
226 }
227
228 page = vm_normal_page(vma, address, pte);
229 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
230
231
232
233
234 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
235 if (*pgmap)
236 page = pte_page(pte);
237 else
238 goto no_page;
239 } else if (unlikely(!page)) {
240 if (flags & FOLL_DUMP) {
241
242 page = ERR_PTR(-EFAULT);
243 goto out;
244 }
245
246 if (is_zero_pfn(pte_pfn(pte))) {
247 page = pte_page(pte);
248 } else {
249 int ret;
250
251 ret = follow_pfn_pte(vma, address, ptep, flags);
252 page = ERR_PTR(ret);
253 goto out;
254 }
255 }
256
257 if (flags & FOLL_SPLIT && PageTransCompound(page)) {
258 int ret;
259 get_page(page);
260 pte_unmap_unlock(ptep, ptl);
261 lock_page(page);
262 ret = split_huge_page(page);
263 unlock_page(page);
264 put_page(page);
265 if (ret)
266 return ERR_PTR(ret);
267 goto retry;
268 }
269
270 if (flags & FOLL_GET) {
271 if (unlikely(!try_get_page(page))) {
272 page = ERR_PTR(-ENOMEM);
273 goto out;
274 }
275 }
276 if (flags & FOLL_TOUCH) {
277 if ((flags & FOLL_WRITE) &&
278 !pte_dirty(pte) && !PageDirty(page))
279 set_page_dirty(page);
280
281
282
283
284
285 mark_page_accessed(page);
286 }
287 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
288
289 if (PageTransCompound(page))
290 goto out;
291
292
293
294
295
296
297
298
299
300
301 if (page->mapping && trylock_page(page)) {
302 lru_add_drain();
303
304
305
306
307
308
309 mlock_vma_page(page);
310 unlock_page(page);
311 }
312 }
313out:
314 pte_unmap_unlock(ptep, ptl);
315 return page;
316no_page:
317 pte_unmap_unlock(ptep, ptl);
318 if (!pte_none(pte))
319 return NULL;
320 return no_page_table(vma, flags);
321}
322
323static struct page *follow_pmd_mask(struct vm_area_struct *vma,
324 unsigned long address, pud_t *pudp,
325 unsigned int flags,
326 struct follow_page_context *ctx)
327{
328 pmd_t *pmd, pmdval;
329 spinlock_t *ptl;
330 struct page *page;
331 struct mm_struct *mm = vma->vm_mm;
332
333 pmd = pmd_offset(pudp, address);
334
335
336
337
338 pmdval = READ_ONCE(*pmd);
339 if (pmd_none(pmdval))
340 return no_page_table(vma, flags);
341 if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
342 page = follow_huge_pmd(mm, address, pmd, flags);
343 if (page)
344 return page;
345 return no_page_table(vma, flags);
346 }
347 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
348 page = follow_huge_pd(vma, address,
349 __hugepd(pmd_val(pmdval)), flags,
350 PMD_SHIFT);
351 if (page)
352 return page;
353 return no_page_table(vma, flags);
354 }
355retry:
356 if (!pmd_present(pmdval)) {
357 if (likely(!(flags & FOLL_MIGRATION)))
358 return no_page_table(vma, flags);
359 VM_BUG_ON(thp_migration_supported() &&
360 !is_pmd_migration_entry(pmdval));
361 if (is_pmd_migration_entry(pmdval))
362 pmd_migration_entry_wait(mm, pmd);
363 pmdval = READ_ONCE(*pmd);
364
365
366
367
368 if (pmd_none(pmdval))
369 return no_page_table(vma, flags);
370 goto retry;
371 }
372 if (pmd_devmap(pmdval)) {
373 ptl = pmd_lock(mm, pmd);
374 page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
375 spin_unlock(ptl);
376 if (page)
377 return page;
378 }
379 if (likely(!pmd_trans_huge(pmdval)))
380 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
381
382 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
383 return no_page_table(vma, flags);
384
385retry_locked:
386 ptl = pmd_lock(mm, pmd);
387 if (unlikely(pmd_none(*pmd))) {
388 spin_unlock(ptl);
389 return no_page_table(vma, flags);
390 }
391 if (unlikely(!pmd_present(*pmd))) {
392 spin_unlock(ptl);
393 if (likely(!(flags & FOLL_MIGRATION)))
394 return no_page_table(vma, flags);
395 pmd_migration_entry_wait(mm, pmd);
396 goto retry_locked;
397 }
398 if (unlikely(!pmd_trans_huge(*pmd))) {
399 spin_unlock(ptl);
400 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
401 }
402 if (flags & FOLL_SPLIT) {
403 int ret;
404 page = pmd_page(*pmd);
405 if (is_huge_zero_page(page)) {
406 spin_unlock(ptl);
407 ret = 0;
408 split_huge_pmd(vma, pmd, address);
409 if (pmd_trans_unstable(pmd))
410 ret = -EBUSY;
411 } else {
412 if (unlikely(!try_get_page(page))) {
413 spin_unlock(ptl);
414 return ERR_PTR(-ENOMEM);
415 }
416 spin_unlock(ptl);
417 lock_page(page);
418 ret = split_huge_page(page);
419 unlock_page(page);
420 put_page(page);
421 if (pmd_none(*pmd))
422 return no_page_table(vma, flags);
423 }
424
425 return ret ? ERR_PTR(ret) :
426 follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
427 }
428 page = follow_trans_huge_pmd(vma, address, pmd, flags);
429 spin_unlock(ptl);
430 ctx->page_mask = HPAGE_PMD_NR - 1;
431 return page;
432}
433
434static struct page *follow_pud_mask(struct vm_area_struct *vma,
435 unsigned long address, p4d_t *p4dp,
436 unsigned int flags,
437 struct follow_page_context *ctx)
438{
439 pud_t *pud;
440 spinlock_t *ptl;
441 struct page *page;
442 struct mm_struct *mm = vma->vm_mm;
443
444 pud = pud_offset(p4dp, address);
445 if (pud_none(*pud))
446 return no_page_table(vma, flags);
447 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
448 page = follow_huge_pud(mm, address, pud, flags);
449 if (page)
450 return page;
451 return no_page_table(vma, flags);
452 }
453 if (is_hugepd(__hugepd(pud_val(*pud)))) {
454 page = follow_huge_pd(vma, address,
455 __hugepd(pud_val(*pud)), flags,
456 PUD_SHIFT);
457 if (page)
458 return page;
459 return no_page_table(vma, flags);
460 }
461 if (pud_devmap(*pud)) {
462 ptl = pud_lock(mm, pud);
463 page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
464 spin_unlock(ptl);
465 if (page)
466 return page;
467 }
468 if (unlikely(pud_bad(*pud)))
469 return no_page_table(vma, flags);
470
471 return follow_pmd_mask(vma, address, pud, flags, ctx);
472}
473
474static struct page *follow_p4d_mask(struct vm_area_struct *vma,
475 unsigned long address, pgd_t *pgdp,
476 unsigned int flags,
477 struct follow_page_context *ctx)
478{
479 p4d_t *p4d;
480 struct page *page;
481
482 p4d = p4d_offset(pgdp, address);
483 if (p4d_none(*p4d))
484 return no_page_table(vma, flags);
485 BUILD_BUG_ON(p4d_huge(*p4d));
486 if (unlikely(p4d_bad(*p4d)))
487 return no_page_table(vma, flags);
488
489 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
490 page = follow_huge_pd(vma, address,
491 __hugepd(p4d_val(*p4d)), flags,
492 P4D_SHIFT);
493 if (page)
494 return page;
495 return no_page_table(vma, flags);
496 }
497 return follow_pud_mask(vma, address, p4d, flags, ctx);
498}
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519static struct page *follow_page_mask(struct vm_area_struct *vma,
520 unsigned long address, unsigned int flags,
521 struct follow_page_context *ctx)
522{
523 pgd_t *pgd;
524 struct page *page;
525 struct mm_struct *mm = vma->vm_mm;
526
527 ctx->page_mask = 0;
528
529
530 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
531 if (!IS_ERR(page)) {
532 BUG_ON(flags & FOLL_GET);
533 return page;
534 }
535
536 pgd = pgd_offset(mm, address);
537
538 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
539 return no_page_table(vma, flags);
540
541 if (pgd_huge(*pgd)) {
542 page = follow_huge_pgd(mm, address, pgd, flags);
543 if (page)
544 return page;
545 return no_page_table(vma, flags);
546 }
547 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
548 page = follow_huge_pd(vma, address,
549 __hugepd(pgd_val(*pgd)), flags,
550 PGDIR_SHIFT);
551 if (page)
552 return page;
553 return no_page_table(vma, flags);
554 }
555
556 return follow_p4d_mask(vma, address, pgd, flags, ctx);
557}
558
559struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
560 unsigned int foll_flags)
561{
562 struct follow_page_context ctx = { NULL };
563 struct page *page;
564
565 page = follow_page_mask(vma, address, foll_flags, &ctx);
566 if (ctx.pgmap)
567 put_dev_pagemap(ctx.pgmap);
568 return page;
569}
570
571static int get_gate_page(struct mm_struct *mm, unsigned long address,
572 unsigned int gup_flags, struct vm_area_struct **vma,
573 struct page **page)
574{
575 pgd_t *pgd;
576 p4d_t *p4d;
577 pud_t *pud;
578 pmd_t *pmd;
579 pte_t *pte;
580 int ret = -EFAULT;
581
582
583 if (gup_flags & FOLL_WRITE)
584 return -EFAULT;
585 if (address > TASK_SIZE)
586 pgd = pgd_offset_k(address);
587 else
588 pgd = pgd_offset_gate(mm, address);
589 if (pgd_none(*pgd))
590 return -EFAULT;
591 p4d = p4d_offset(pgd, address);
592 if (p4d_none(*p4d))
593 return -EFAULT;
594 pud = pud_offset(p4d, address);
595 if (pud_none(*pud))
596 return -EFAULT;
597 pmd = pmd_offset(pud, address);
598 if (!pmd_present(*pmd))
599 return -EFAULT;
600 VM_BUG_ON(pmd_trans_huge(*pmd));
601 pte = pte_offset_map(pmd, address);
602 if (pte_none(*pte))
603 goto unmap;
604 *vma = get_gate_vma(mm);
605 if (!page)
606 goto out;
607 *page = vm_normal_page(*vma, address, *pte);
608 if (!*page) {
609 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
610 goto unmap;
611 *page = pte_page(*pte);
612 }
613 if (unlikely(!try_get_page(*page))) {
614 ret = -ENOMEM;
615 goto unmap;
616 }
617out:
618 ret = 0;
619unmap:
620 pte_unmap(pte);
621 return ret;
622}
623
624
625
626
627
628
629static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
630 unsigned long address, unsigned int *flags, int *nonblocking)
631{
632 unsigned int fault_flags = 0;
633 vm_fault_t ret;
634
635
636 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
637 return -ENOENT;
638 if (*flags & FOLL_WRITE)
639 fault_flags |= FAULT_FLAG_WRITE;
640 if (*flags & FOLL_REMOTE)
641 fault_flags |= FAULT_FLAG_REMOTE;
642 if (nonblocking)
643 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
644 if (*flags & FOLL_NOWAIT)
645 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
646 if (*flags & FOLL_TRIED) {
647 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
648 fault_flags |= FAULT_FLAG_TRIED;
649 }
650
651 ret = handle_mm_fault(vma, address, fault_flags);
652 if (ret & VM_FAULT_ERROR) {
653 int err = vm_fault_to_errno(ret, *flags);
654
655 if (err)
656 return err;
657 BUG();
658 }
659
660 if (tsk) {
661 if (ret & VM_FAULT_MAJOR)
662 tsk->maj_flt++;
663 else
664 tsk->min_flt++;
665 }
666
667 if (ret & VM_FAULT_RETRY) {
668 if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
669 *nonblocking = 0;
670 return -EBUSY;
671 }
672
673
674
675
676
677
678
679
680
681
682 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
683 *flags |= FOLL_COW;
684 return 0;
685}
686
687static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
688{
689 vm_flags_t vm_flags = vma->vm_flags;
690 int write = (gup_flags & FOLL_WRITE);
691 int foreign = (gup_flags & FOLL_REMOTE);
692
693 if (vm_flags & (VM_IO | VM_PFNMAP))
694 return -EFAULT;
695
696 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
697 return -EFAULT;
698
699 if (write) {
700 if (!(vm_flags & VM_WRITE)) {
701 if (!(gup_flags & FOLL_FORCE))
702 return -EFAULT;
703
704
705
706
707
708
709
710
711
712 if (!is_cow_mapping(vm_flags))
713 return -EFAULT;
714 }
715 } else if (!(vm_flags & VM_READ)) {
716 if (!(gup_flags & FOLL_FORCE))
717 return -EFAULT;
718
719
720
721
722 if (!(vm_flags & VM_MAYREAD))
723 return -EFAULT;
724 }
725
726
727
728
729 if (!arch_vma_access_permitted(vma, write, false, foreign))
730 return -EFAULT;
731 return 0;
732}
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
791 unsigned long start, unsigned long nr_pages,
792 unsigned int gup_flags, struct page **pages,
793 struct vm_area_struct **vmas, int *nonblocking)
794{
795 long ret = 0, i = 0;
796 struct vm_area_struct *vma = NULL;
797 struct follow_page_context ctx = { NULL };
798
799 if (!nr_pages)
800 return 0;
801
802 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
803
804
805
806
807
808
809 if (!(gup_flags & FOLL_FORCE))
810 gup_flags |= FOLL_NUMA;
811
812 do {
813 struct page *page;
814 unsigned int foll_flags = gup_flags;
815 unsigned int page_increm;
816
817
818 if (!vma || start >= vma->vm_end) {
819 vma = find_extend_vma(mm, start);
820 if (!vma && in_gate_area(mm, start)) {
821 ret = get_gate_page(mm, start & PAGE_MASK,
822 gup_flags, &vma,
823 pages ? &pages[i] : NULL);
824 if (ret)
825 goto out;
826 ctx.page_mask = 0;
827 goto next_page;
828 }
829
830 if (!vma || check_vma_flags(vma, gup_flags)) {
831 ret = -EFAULT;
832 goto out;
833 }
834 if (is_vm_hugetlb_page(vma)) {
835 i = follow_hugetlb_page(mm, vma, pages, vmas,
836 &start, &nr_pages, i,
837 gup_flags, nonblocking);
838 continue;
839 }
840 }
841retry:
842
843
844
845
846 if (fatal_signal_pending(current)) {
847 ret = -ERESTARTSYS;
848 goto out;
849 }
850 cond_resched();
851
852 page = follow_page_mask(vma, start, foll_flags, &ctx);
853 if (!page) {
854 ret = faultin_page(tsk, vma, start, &foll_flags,
855 nonblocking);
856 switch (ret) {
857 case 0:
858 goto retry;
859 case -EBUSY:
860 ret = 0;
861
862 case -EFAULT:
863 case -ENOMEM:
864 case -EHWPOISON:
865 goto out;
866 case -ENOENT:
867 goto next_page;
868 }
869 BUG();
870 } else if (PTR_ERR(page) == -EEXIST) {
871
872
873
874
875 goto next_page;
876 } else if (IS_ERR(page)) {
877 ret = PTR_ERR(page);
878 goto out;
879 }
880 if (pages) {
881 pages[i] = page;
882 flush_anon_page(vma, page, start);
883 flush_dcache_page(page);
884 ctx.page_mask = 0;
885 }
886next_page:
887 if (vmas) {
888 vmas[i] = vma;
889 ctx.page_mask = 0;
890 }
891 page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
892 if (page_increm > nr_pages)
893 page_increm = nr_pages;
894 i += page_increm;
895 start += page_increm * PAGE_SIZE;
896 nr_pages -= page_increm;
897 } while (nr_pages);
898out:
899 if (ctx.pgmap)
900 put_dev_pagemap(ctx.pgmap);
901 return i ? i : ret;
902}
903
904static bool vma_permits_fault(struct vm_area_struct *vma,
905 unsigned int fault_flags)
906{
907 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
908 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
909 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
910
911 if (!(vm_flags & vma->vm_flags))
912 return false;
913
914
915
916
917
918
919
920
921 if (!arch_vma_access_permitted(vma, write, false, foreign))
922 return false;
923
924 return true;
925}
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
958 unsigned long address, unsigned int fault_flags,
959 bool *unlocked)
960{
961 struct vm_area_struct *vma;
962 vm_fault_t ret, major = 0;
963
964 if (unlocked)
965 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
966
967retry:
968 vma = find_extend_vma(mm, address);
969 if (!vma || address < vma->vm_start)
970 return -EFAULT;
971
972 if (!vma_permits_fault(vma, fault_flags))
973 return -EFAULT;
974
975 ret = handle_mm_fault(vma, address, fault_flags);
976 major |= ret & VM_FAULT_MAJOR;
977 if (ret & VM_FAULT_ERROR) {
978 int err = vm_fault_to_errno(ret, 0);
979
980 if (err)
981 return err;
982 BUG();
983 }
984
985 if (ret & VM_FAULT_RETRY) {
986 down_read(&mm->mmap_sem);
987 if (!(fault_flags & FAULT_FLAG_TRIED)) {
988 *unlocked = true;
989 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
990 fault_flags |= FAULT_FLAG_TRIED;
991 goto retry;
992 }
993 }
994
995 if (tsk) {
996 if (major)
997 tsk->maj_flt++;
998 else
999 tsk->min_flt++;
1000 }
1001 return 0;
1002}
1003EXPORT_SYMBOL_GPL(fixup_user_fault);
1004
1005static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
1006 struct mm_struct *mm,
1007 unsigned long start,
1008 unsigned long nr_pages,
1009 struct page **pages,
1010 struct vm_area_struct **vmas,
1011 int *locked,
1012 unsigned int flags)
1013{
1014 long ret, pages_done;
1015 bool lock_dropped;
1016
1017 if (locked) {
1018
1019 BUG_ON(vmas);
1020
1021 BUG_ON(*locked != 1);
1022 }
1023
1024 if (pages)
1025 flags |= FOLL_GET;
1026
1027 pages_done = 0;
1028 lock_dropped = false;
1029 for (;;) {
1030 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
1031 vmas, locked);
1032 if (!locked)
1033
1034 return ret;
1035
1036
1037 if (!*locked) {
1038 BUG_ON(ret < 0);
1039 BUG_ON(ret >= nr_pages);
1040 }
1041
1042 if (ret > 0) {
1043 nr_pages -= ret;
1044 pages_done += ret;
1045 if (!nr_pages)
1046 break;
1047 }
1048 if (*locked) {
1049
1050
1051
1052
1053 if (!pages_done)
1054 pages_done = ret;
1055 break;
1056 }
1057
1058
1059
1060
1061 if (likely(pages))
1062 pages += ret;
1063 start += ret << PAGE_SHIFT;
1064
1065
1066
1067
1068
1069
1070 *locked = 1;
1071 lock_dropped = true;
1072 down_read(&mm->mmap_sem);
1073 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
1074 pages, NULL, NULL);
1075 if (ret != 1) {
1076 BUG_ON(ret > 1);
1077 if (!pages_done)
1078 pages_done = ret;
1079 break;
1080 }
1081 nr_pages--;
1082 pages_done++;
1083 if (!nr_pages)
1084 break;
1085 if (likely(pages))
1086 pages++;
1087 start += PAGE_SIZE;
1088 }
1089 if (lock_dropped && *locked) {
1090
1091
1092
1093
1094 up_read(&mm->mmap_sem);
1095 *locked = 0;
1096 }
1097 return pages_done;
1098}
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
1157 unsigned long start, unsigned long nr_pages,
1158 unsigned int gup_flags, struct page **pages,
1159 struct vm_area_struct **vmas, int *locked)
1160{
1161
1162
1163
1164
1165
1166
1167 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1168 return -EINVAL;
1169
1170 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1171 locked,
1172 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
1173}
1174EXPORT_SYMBOL(get_user_pages_remote);
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195long populate_vma_page_range(struct vm_area_struct *vma,
1196 unsigned long start, unsigned long end, int *nonblocking)
1197{
1198 struct mm_struct *mm = vma->vm_mm;
1199 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1200 int gup_flags;
1201
1202 VM_BUG_ON(start & ~PAGE_MASK);
1203 VM_BUG_ON(end & ~PAGE_MASK);
1204 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1205 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1206 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1207
1208 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1209 if (vma->vm_flags & VM_LOCKONFAULT)
1210 gup_flags &= ~FOLL_POPULATE;
1211
1212
1213
1214
1215
1216 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1217 gup_flags |= FOLL_WRITE;
1218
1219
1220
1221
1222
1223 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
1224 gup_flags |= FOLL_FORCE;
1225
1226
1227
1228
1229
1230 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
1231 NULL, NULL, nonblocking);
1232}
1233
1234
1235
1236
1237
1238
1239
1240
1241int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1242{
1243 struct mm_struct *mm = current->mm;
1244 unsigned long end, nstart, nend;
1245 struct vm_area_struct *vma = NULL;
1246 int locked = 0;
1247 long ret = 0;
1248
1249 end = start + len;
1250
1251 for (nstart = start; nstart < end; nstart = nend) {
1252
1253
1254
1255
1256 if (!locked) {
1257 locked = 1;
1258 down_read(&mm->mmap_sem);
1259 vma = find_vma(mm, nstart);
1260 } else if (nstart >= vma->vm_end)
1261 vma = vma->vm_next;
1262 if (!vma || vma->vm_start >= end)
1263 break;
1264
1265
1266
1267
1268 nend = min(end, vma->vm_end);
1269 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1270 continue;
1271 if (nstart < vma->vm_start)
1272 nstart = vma->vm_start;
1273
1274
1275
1276
1277
1278 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1279 if (ret < 0) {
1280 if (ignore_errors) {
1281 ret = 0;
1282 continue;
1283 }
1284 break;
1285 }
1286 nend = nstart + ret * PAGE_SIZE;
1287 ret = 0;
1288 }
1289 if (locked)
1290 up_read(&mm->mmap_sem);
1291 return ret;
1292}
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308#ifdef CONFIG_ELF_CORE
1309struct page *get_dump_page(unsigned long addr)
1310{
1311 struct vm_area_struct *vma;
1312 struct page *page;
1313
1314 if (__get_user_pages(current, current->mm, addr, 1,
1315 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1316 NULL) < 1)
1317 return NULL;
1318 flush_cache_page(vma, addr, page_to_pfn(page));
1319 return page;
1320}
1321#endif
1322#else
1323static long __get_user_pages_locked(struct task_struct *tsk,
1324 struct mm_struct *mm, unsigned long start,
1325 unsigned long nr_pages, struct page **pages,
1326 struct vm_area_struct **vmas, int *locked,
1327 unsigned int foll_flags)
1328{
1329 struct vm_area_struct *vma;
1330 unsigned long vm_flags;
1331 int i;
1332
1333
1334
1335
1336 vm_flags = (foll_flags & FOLL_WRITE) ?
1337 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
1338 vm_flags &= (foll_flags & FOLL_FORCE) ?
1339 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
1340
1341 for (i = 0; i < nr_pages; i++) {
1342 vma = find_vma(mm, start);
1343 if (!vma)
1344 goto finish_or_fault;
1345
1346
1347 if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
1348 !(vm_flags & vma->vm_flags))
1349 goto finish_or_fault;
1350
1351 if (pages) {
1352 pages[i] = virt_to_page(start);
1353 if (pages[i])
1354 get_page(pages[i]);
1355 }
1356 if (vmas)
1357 vmas[i] = vma;
1358 start = (start + PAGE_SIZE) & PAGE_MASK;
1359 }
1360
1361 return i;
1362
1363finish_or_fault:
1364 return i ? : -EFAULT;
1365}
1366#endif
1367
1368#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
1369static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
1370{
1371 long i;
1372 struct vm_area_struct *vma_prev = NULL;
1373
1374 for (i = 0; i < nr_pages; i++) {
1375 struct vm_area_struct *vma = vmas[i];
1376
1377 if (vma == vma_prev)
1378 continue;
1379
1380 vma_prev = vma;
1381
1382 if (vma_is_fsdax(vma))
1383 return true;
1384 }
1385 return false;
1386}
1387
1388#ifdef CONFIG_CMA
1389static struct page *new_non_cma_page(struct page *page, unsigned long private)
1390{
1391
1392
1393
1394
1395 int nid = page_to_nid(page);
1396
1397
1398
1399
1400
1401
1402
1403 gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
1404
1405 if (PageHighMem(page))
1406 gfp_mask |= __GFP_HIGHMEM;
1407
1408#ifdef CONFIG_HUGETLB_PAGE
1409 if (PageHuge(page)) {
1410 struct hstate *h = page_hstate(page);
1411
1412
1413
1414
1415 return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
1416 }
1417#endif
1418 if (PageTransHuge(page)) {
1419 struct page *thp;
1420
1421
1422
1423 gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
1424
1425
1426
1427
1428
1429 thp_gfpmask &= ~__GFP_MOVABLE;
1430 thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
1431 if (!thp)
1432 return NULL;
1433 prep_transhuge_page(thp);
1434 return thp;
1435 }
1436
1437 return __alloc_pages_node(nid, gfp_mask, 0);
1438}
1439
1440static long check_and_migrate_cma_pages(struct task_struct *tsk,
1441 struct mm_struct *mm,
1442 unsigned long start,
1443 unsigned long nr_pages,
1444 struct page **pages,
1445 struct vm_area_struct **vmas,
1446 unsigned int gup_flags)
1447{
1448 unsigned long i;
1449 unsigned long step;
1450 bool drain_allow = true;
1451 bool migrate_allow = true;
1452 LIST_HEAD(cma_page_list);
1453
1454check_again:
1455 for (i = 0; i < nr_pages;) {
1456
1457 struct page *head = compound_head(pages[i]);
1458
1459
1460
1461
1462
1463 step = (1 << compound_order(head)) - (pages[i] - head);
1464
1465
1466
1467
1468
1469 if (is_migrate_cma_page(head)) {
1470 if (PageHuge(head))
1471 isolate_huge_page(head, &cma_page_list);
1472 else {
1473 if (!PageLRU(head) && drain_allow) {
1474 lru_add_drain_all();
1475 drain_allow = false;
1476 }
1477
1478 if (!isolate_lru_page(head)) {
1479 list_add_tail(&head->lru, &cma_page_list);
1480 mod_node_page_state(page_pgdat(head),
1481 NR_ISOLATED_ANON +
1482 page_is_file_cache(head),
1483 hpage_nr_pages(head));
1484 }
1485 }
1486 }
1487
1488 i += step;
1489 }
1490
1491 if (!list_empty(&cma_page_list)) {
1492
1493
1494
1495 for (i = 0; i < nr_pages; i++)
1496 put_page(pages[i]);
1497
1498 if (migrate_pages(&cma_page_list, new_non_cma_page,
1499 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
1500
1501
1502
1503
1504 migrate_allow = false;
1505
1506 if (!list_empty(&cma_page_list))
1507 putback_movable_pages(&cma_page_list);
1508 }
1509
1510
1511
1512
1513
1514 nr_pages = __get_user_pages_locked(tsk, mm, start, nr_pages,
1515 pages, vmas, NULL,
1516 gup_flags);
1517
1518 if ((nr_pages > 0) && migrate_allow) {
1519 drain_allow = true;
1520 goto check_again;
1521 }
1522 }
1523
1524 return nr_pages;
1525}
1526#else
1527static long check_and_migrate_cma_pages(struct task_struct *tsk,
1528 struct mm_struct *mm,
1529 unsigned long start,
1530 unsigned long nr_pages,
1531 struct page **pages,
1532 struct vm_area_struct **vmas,
1533 unsigned int gup_flags)
1534{
1535 return nr_pages;
1536}
1537#endif
1538
1539
1540
1541
1542
1543static long __gup_longterm_locked(struct task_struct *tsk,
1544 struct mm_struct *mm,
1545 unsigned long start,
1546 unsigned long nr_pages,
1547 struct page **pages,
1548 struct vm_area_struct **vmas,
1549 unsigned int gup_flags)
1550{
1551 struct vm_area_struct **vmas_tmp = vmas;
1552 unsigned long flags = 0;
1553 long rc, i;
1554
1555 if (gup_flags & FOLL_LONGTERM) {
1556 if (!pages)
1557 return -EINVAL;
1558
1559 if (!vmas_tmp) {
1560 vmas_tmp = kcalloc(nr_pages,
1561 sizeof(struct vm_area_struct *),
1562 GFP_KERNEL);
1563 if (!vmas_tmp)
1564 return -ENOMEM;
1565 }
1566 flags = memalloc_nocma_save();
1567 }
1568
1569 rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
1570 vmas_tmp, NULL, gup_flags);
1571
1572 if (gup_flags & FOLL_LONGTERM) {
1573 memalloc_nocma_restore(flags);
1574 if (rc < 0)
1575 goto out;
1576
1577 if (check_dax_vmas(vmas_tmp, rc)) {
1578 for (i = 0; i < rc; i++)
1579 put_page(pages[i]);
1580 rc = -EOPNOTSUPP;
1581 goto out;
1582 }
1583
1584 rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
1585 vmas_tmp, gup_flags);
1586 }
1587
1588out:
1589 if (vmas_tmp != vmas)
1590 kfree(vmas_tmp);
1591 return rc;
1592}
1593#else
1594static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
1595 struct mm_struct *mm,
1596 unsigned long start,
1597 unsigned long nr_pages,
1598 struct page **pages,
1599 struct vm_area_struct **vmas,
1600 unsigned int flags)
1601{
1602 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1603 NULL, flags);
1604}
1605#endif
1606
1607
1608
1609
1610
1611
1612
1613
1614long get_user_pages(unsigned long start, unsigned long nr_pages,
1615 unsigned int gup_flags, struct page **pages,
1616 struct vm_area_struct **vmas)
1617{
1618 return __gup_longterm_locked(current, current->mm, start, nr_pages,
1619 pages, vmas, gup_flags | FOLL_TOUCH);
1620}
1621EXPORT_SYMBOL(get_user_pages);
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
1645 unsigned int gup_flags, struct page **pages,
1646 int *locked)
1647{
1648
1649
1650
1651
1652
1653
1654 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1655 return -EINVAL;
1656
1657 return __get_user_pages_locked(current, current->mm, start, nr_pages,
1658 pages, NULL, locked,
1659 gup_flags | FOLL_TOUCH);
1660}
1661EXPORT_SYMBOL(get_user_pages_locked);
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
1679 struct page **pages, unsigned int gup_flags)
1680{
1681 struct mm_struct *mm = current->mm;
1682 int locked = 1;
1683 long ret;
1684
1685
1686
1687
1688
1689
1690
1691 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1692 return -EINVAL;
1693
1694 down_read(&mm->mmap_sem);
1695 ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
1696 &locked, gup_flags | FOLL_TOUCH);
1697 if (locked)
1698 up_read(&mm->mmap_sem);
1699 return ret;
1700}
1701EXPORT_SYMBOL(get_user_pages_unlocked);
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736#ifdef CONFIG_HAVE_FAST_GUP
1737#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769static inline pte_t gup_get_pte(pte_t *ptep)
1770{
1771 pte_t pte;
1772
1773 do {
1774 pte.pte_low = ptep->pte_low;
1775 smp_rmb();
1776 pte.pte_high = ptep->pte_high;
1777 smp_rmb();
1778 } while (unlikely(pte.pte_low != ptep->pte_low));
1779
1780 return pte;
1781}
1782#else
1783
1784
1785
1786static inline pte_t gup_get_pte(pte_t *ptep)
1787{
1788 return READ_ONCE(*ptep);
1789}
1790#endif
1791
1792static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
1793 struct page **pages)
1794{
1795 while ((*nr) - nr_start) {
1796 struct page *page = pages[--(*nr)];
1797
1798 ClearPageReferenced(page);
1799 put_page(page);
1800 }
1801}
1802
1803
1804
1805
1806
1807static inline struct page *try_get_compound_head(struct page *page, int refs)
1808{
1809 struct page *head = compound_head(page);
1810 if (WARN_ON_ONCE(page_ref_count(head) < 0))
1811 return NULL;
1812 if (unlikely(!page_cache_add_speculative(head, refs)))
1813 return NULL;
1814 return head;
1815}
1816
1817#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
1818static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1819 unsigned int flags, struct page **pages, int *nr)
1820{
1821 struct dev_pagemap *pgmap = NULL;
1822 int nr_start = *nr, ret = 0;
1823 pte_t *ptep, *ptem;
1824
1825 ptem = ptep = pte_offset_map(&pmd, addr);
1826 do {
1827 pte_t pte = gup_get_pte(ptep);
1828 struct page *head, *page;
1829
1830
1831
1832
1833
1834 if (pte_protnone(pte))
1835 goto pte_unmap;
1836
1837 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
1838 goto pte_unmap;
1839
1840 if (pte_devmap(pte)) {
1841 if (unlikely(flags & FOLL_LONGTERM))
1842 goto pte_unmap;
1843
1844 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
1845 if (unlikely(!pgmap)) {
1846 undo_dev_pagemap(nr, nr_start, pages);
1847 goto pte_unmap;
1848 }
1849 } else if (pte_special(pte))
1850 goto pte_unmap;
1851
1852 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1853 page = pte_page(pte);
1854
1855 head = try_get_compound_head(page, 1);
1856 if (!head)
1857 goto pte_unmap;
1858
1859 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1860 put_page(head);
1861 goto pte_unmap;
1862 }
1863
1864 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1865
1866 SetPageReferenced(page);
1867 pages[*nr] = page;
1868 (*nr)++;
1869
1870 } while (ptep++, addr += PAGE_SIZE, addr != end);
1871
1872 ret = 1;
1873
1874pte_unmap:
1875 if (pgmap)
1876 put_dev_pagemap(pgmap);
1877 pte_unmap(ptem);
1878 return ret;
1879}
1880#else
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1892 unsigned int flags, struct page **pages, int *nr)
1893{
1894 return 0;
1895}
1896#endif
1897
1898#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1899static int __gup_device_huge(unsigned long pfn, unsigned long addr,
1900 unsigned long end, struct page **pages, int *nr)
1901{
1902 int nr_start = *nr;
1903 struct dev_pagemap *pgmap = NULL;
1904
1905 do {
1906 struct page *page = pfn_to_page(pfn);
1907
1908 pgmap = get_dev_pagemap(pfn, pgmap);
1909 if (unlikely(!pgmap)) {
1910 undo_dev_pagemap(nr, nr_start, pages);
1911 return 0;
1912 }
1913 SetPageReferenced(page);
1914 pages[*nr] = page;
1915 get_page(page);
1916 (*nr)++;
1917 pfn++;
1918 } while (addr += PAGE_SIZE, addr != end);
1919
1920 if (pgmap)
1921 put_dev_pagemap(pgmap);
1922 return 1;
1923}
1924
1925static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1926 unsigned long end, struct page **pages, int *nr)
1927{
1928 unsigned long fault_pfn;
1929 int nr_start = *nr;
1930
1931 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1932 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1933 return 0;
1934
1935 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1936 undo_dev_pagemap(nr, nr_start, pages);
1937 return 0;
1938 }
1939 return 1;
1940}
1941
1942static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1943 unsigned long end, struct page **pages, int *nr)
1944{
1945 unsigned long fault_pfn;
1946 int nr_start = *nr;
1947
1948 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1949 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1950 return 0;
1951
1952 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1953 undo_dev_pagemap(nr, nr_start, pages);
1954 return 0;
1955 }
1956 return 1;
1957}
1958#else
1959static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1960 unsigned long end, struct page **pages, int *nr)
1961{
1962 BUILD_BUG();
1963 return 0;
1964}
1965
1966static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
1967 unsigned long end, struct page **pages, int *nr)
1968{
1969 BUILD_BUG();
1970 return 0;
1971}
1972#endif
1973
1974#ifdef CONFIG_ARCH_HAS_HUGEPD
1975static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
1976 unsigned long sz)
1977{
1978 unsigned long __boundary = (addr + sz) & ~(sz-1);
1979 return (__boundary - 1 < end - 1) ? __boundary : end;
1980}
1981
1982static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1983 unsigned long end, int write, struct page **pages, int *nr)
1984{
1985 unsigned long pte_end;
1986 struct page *head, *page;
1987 pte_t pte;
1988 int refs;
1989
1990 pte_end = (addr + sz) & ~(sz-1);
1991 if (pte_end < end)
1992 end = pte_end;
1993
1994 pte = READ_ONCE(*ptep);
1995
1996 if (!pte_access_permitted(pte, write))
1997 return 0;
1998
1999
2000 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
2001
2002 refs = 0;
2003 head = pte_page(pte);
2004
2005 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
2006 do {
2007 VM_BUG_ON(compound_head(page) != head);
2008 pages[*nr] = page;
2009 (*nr)++;
2010 page++;
2011 refs++;
2012 } while (addr += PAGE_SIZE, addr != end);
2013
2014 head = try_get_compound_head(head, refs);
2015 if (!head) {
2016 *nr -= refs;
2017 return 0;
2018 }
2019
2020 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
2021
2022 *nr -= refs;
2023 while (refs--)
2024 put_page(head);
2025 return 0;
2026 }
2027
2028 SetPageReferenced(head);
2029 return 1;
2030}
2031
2032static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2033 unsigned int pdshift, unsigned long end, int write,
2034 struct page **pages, int *nr)
2035{
2036 pte_t *ptep;
2037 unsigned long sz = 1UL << hugepd_shift(hugepd);
2038 unsigned long next;
2039
2040 ptep = hugepte_offset(hugepd, addr, pdshift);
2041 do {
2042 next = hugepte_addr_end(addr, end, sz);
2043 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
2044 return 0;
2045 } while (ptep++, addr = next, addr != end);
2046
2047 return 1;
2048}
2049#else
2050static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
2051 unsigned pdshift, unsigned long end, int write,
2052 struct page **pages, int *nr)
2053{
2054 return 0;
2055}
2056#endif
2057
2058static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
2059 unsigned long end, unsigned int flags, struct page **pages, int *nr)
2060{
2061 struct page *head, *page;
2062 int refs;
2063
2064 if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
2065 return 0;
2066
2067 if (pmd_devmap(orig)) {
2068 if (unlikely(flags & FOLL_LONGTERM))
2069 return 0;
2070 return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
2071 }
2072
2073 refs = 0;
2074 page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
2075 do {
2076 pages[*nr] = page;
2077 (*nr)++;
2078 page++;
2079 refs++;
2080 } while (addr += PAGE_SIZE, addr != end);
2081
2082 head = try_get_compound_head(pmd_page(orig), refs);
2083 if (!head) {
2084 *nr -= refs;
2085 return 0;
2086 }
2087
2088 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
2089 *nr -= refs;
2090 while (refs--)
2091 put_page(head);
2092 return 0;
2093 }
2094
2095 SetPageReferenced(head);
2096 return 1;
2097}
2098
2099static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
2100 unsigned long end, unsigned int flags, struct page **pages, int *nr)
2101{
2102 struct page *head, *page;
2103 int refs;
2104
2105 if (!pud_access_permitted(orig, flags & FOLL_WRITE))
2106 return 0;
2107
2108 if (pud_devmap(orig)) {
2109 if (unlikely(flags & FOLL_LONGTERM))
2110 return 0;
2111 return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
2112 }
2113
2114 refs = 0;
2115 page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
2116 do {
2117 pages[*nr] = page;
2118 (*nr)++;
2119 page++;
2120 refs++;
2121 } while (addr += PAGE_SIZE, addr != end);
2122
2123 head = try_get_compound_head(pud_page(orig), refs);
2124 if (!head) {
2125 *nr -= refs;
2126 return 0;
2127 }
2128
2129 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
2130 *nr -= refs;
2131 while (refs--)
2132 put_page(head);
2133 return 0;
2134 }
2135
2136 SetPageReferenced(head);
2137 return 1;
2138}
2139
2140static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
2141 unsigned long end, unsigned int flags,
2142 struct page **pages, int *nr)
2143{
2144 int refs;
2145 struct page *head, *page;
2146
2147 if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
2148 return 0;
2149
2150 BUILD_BUG_ON(pgd_devmap(orig));
2151 refs = 0;
2152 page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
2153 do {
2154 pages[*nr] = page;
2155 (*nr)++;
2156 page++;
2157 refs++;
2158 } while (addr += PAGE_SIZE, addr != end);
2159
2160 head = try_get_compound_head(pgd_page(orig), refs);
2161 if (!head) {
2162 *nr -= refs;
2163 return 0;
2164 }
2165
2166 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
2167 *nr -= refs;
2168 while (refs--)
2169 put_page(head);
2170 return 0;
2171 }
2172
2173 SetPageReferenced(head);
2174 return 1;
2175}
2176
2177static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
2178 unsigned int flags, struct page **pages, int *nr)
2179{
2180 unsigned long next;
2181 pmd_t *pmdp;
2182
2183 pmdp = pmd_offset(&pud, addr);
2184 do {
2185 pmd_t pmd = READ_ONCE(*pmdp);
2186
2187 next = pmd_addr_end(addr, end);
2188 if (!pmd_present(pmd))
2189 return 0;
2190
2191 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
2192 pmd_devmap(pmd))) {
2193
2194
2195
2196
2197
2198 if (pmd_protnone(pmd))
2199 return 0;
2200
2201 if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
2202 pages, nr))
2203 return 0;
2204
2205 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
2206
2207
2208
2209
2210 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
2211 PMD_SHIFT, next, flags, pages, nr))
2212 return 0;
2213 } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
2214 return 0;
2215 } while (pmdp++, addr = next, addr != end);
2216
2217 return 1;
2218}
2219
2220static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
2221 unsigned int flags, struct page **pages, int *nr)
2222{
2223 unsigned long next;
2224 pud_t *pudp;
2225
2226 pudp = pud_offset(&p4d, addr);
2227 do {
2228 pud_t pud = READ_ONCE(*pudp);
2229
2230 next = pud_addr_end(addr, end);
2231 if (pud_none(pud))
2232 return 0;
2233 if (unlikely(pud_huge(pud))) {
2234 if (!gup_huge_pud(pud, pudp, addr, next, flags,
2235 pages, nr))
2236 return 0;
2237 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
2238 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
2239 PUD_SHIFT, next, flags, pages, nr))
2240 return 0;
2241 } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
2242 return 0;
2243 } while (pudp++, addr = next, addr != end);
2244
2245 return 1;
2246}
2247
2248static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
2249 unsigned int flags, struct page **pages, int *nr)
2250{
2251 unsigned long next;
2252 p4d_t *p4dp;
2253
2254 p4dp = p4d_offset(&pgd, addr);
2255 do {
2256 p4d_t p4d = READ_ONCE(*p4dp);
2257
2258 next = p4d_addr_end(addr, end);
2259 if (p4d_none(p4d))
2260 return 0;
2261 BUILD_BUG_ON(p4d_huge(p4d));
2262 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
2263 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
2264 P4D_SHIFT, next, flags, pages, nr))
2265 return 0;
2266 } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
2267 return 0;
2268 } while (p4dp++, addr = next, addr != end);
2269
2270 return 1;
2271}
2272
2273static void gup_pgd_range(unsigned long addr, unsigned long end,
2274 unsigned int flags, struct page **pages, int *nr)
2275{
2276 unsigned long next;
2277 pgd_t *pgdp;
2278
2279 pgdp = pgd_offset(current->mm, addr);
2280 do {
2281 pgd_t pgd = READ_ONCE(*pgdp);
2282
2283 next = pgd_addr_end(addr, end);
2284 if (pgd_none(pgd))
2285 return;
2286 if (unlikely(pgd_huge(pgd))) {
2287 if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
2288 pages, nr))
2289 return;
2290 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
2291 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
2292 PGDIR_SHIFT, next, flags, pages, nr))
2293 return;
2294 } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
2295 return;
2296 } while (pgdp++, addr = next, addr != end);
2297}
2298#else
2299static inline void gup_pgd_range(unsigned long addr, unsigned long end,
2300 unsigned int flags, struct page **pages, int *nr)
2301{
2302}
2303#endif
2304
2305#ifndef gup_fast_permitted
2306
2307
2308
2309
2310static bool gup_fast_permitted(unsigned long start, unsigned long end)
2311{
2312 return true;
2313}
2314#endif
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
2326 struct page **pages)
2327{
2328 unsigned long len, end;
2329 unsigned long flags;
2330 int nr = 0;
2331
2332 start = untagged_addr(start) & PAGE_MASK;
2333 len = (unsigned long) nr_pages << PAGE_SHIFT;
2334 end = start + len;
2335
2336 if (end <= start)
2337 return 0;
2338 if (unlikely(!access_ok((void __user *)start, len)))
2339 return 0;
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353 if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
2354 gup_fast_permitted(start, end)) {
2355 local_irq_save(flags);
2356 gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
2357 local_irq_restore(flags);
2358 }
2359
2360 return nr;
2361}
2362EXPORT_SYMBOL_GPL(__get_user_pages_fast);
2363
2364static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
2365 unsigned int gup_flags, struct page **pages)
2366{
2367 int ret;
2368
2369
2370
2371
2372
2373 if (gup_flags & FOLL_LONGTERM) {
2374 down_read(¤t->mm->mmap_sem);
2375 ret = __gup_longterm_locked(current, current->mm,
2376 start, nr_pages,
2377 pages, NULL, gup_flags);
2378 up_read(¤t->mm->mmap_sem);
2379 } else {
2380 ret = get_user_pages_unlocked(start, nr_pages,
2381 pages, gup_flags);
2382 }
2383
2384 return ret;
2385}
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403int get_user_pages_fast(unsigned long start, int nr_pages,
2404 unsigned int gup_flags, struct page **pages)
2405{
2406 unsigned long addr, len, end;
2407 int nr = 0, ret = 0;
2408
2409 if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM)))
2410 return -EINVAL;
2411
2412 start = untagged_addr(start) & PAGE_MASK;
2413 addr = start;
2414 len = (unsigned long) nr_pages << PAGE_SHIFT;
2415 end = start + len;
2416
2417 if (end <= start)
2418 return 0;
2419 if (unlikely(!access_ok((void __user *)start, len)))
2420 return -EFAULT;
2421
2422 if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
2423 gup_fast_permitted(start, end)) {
2424 local_irq_disable();
2425 gup_pgd_range(addr, end, gup_flags, pages, &nr);
2426 local_irq_enable();
2427 ret = nr;
2428 }
2429
2430 if (nr < nr_pages) {
2431
2432 start += nr << PAGE_SHIFT;
2433 pages += nr;
2434
2435 ret = __gup_longterm_unlocked(start, nr_pages - nr,
2436 gup_flags, pages);
2437
2438
2439 if (nr > 0) {
2440 if (ret < 0)
2441 ret = nr;
2442 else
2443 ret += nr;
2444 }
2445 }
2446
2447 return ret;
2448}
2449EXPORT_SYMBOL_GPL(get_user_pages_fast);
2450