1
2#include <linux/kernel.h>
3#include <linux/errno.h>
4#include <linux/err.h>
5#include <linux/spinlock.h>
6
7#include <linux/mm.h>
8#include <linux/memremap.h>
9#include <linux/pagemap.h>
10#include <linux/rmap.h>
11#include <linux/swap.h>
12#include <linux/swapops.h>
13
14#include <linux/sched/signal.h>
15#include <linux/rwsem.h>
16#include <linux/hugetlb.h>
17#include <linux/migrate.h>
18#include <linux/mm_inline.h>
19#include <linux/sched/mm.h>
20
21#include <asm/mmu_context.h>
22#include <asm/pgtable.h>
23#include <asm/tlbflush.h>
24
25#include "internal.h"
26
27struct follow_page_context {
28 struct dev_pagemap *pgmap;
29 unsigned int page_mask;
30};
31
32typedef int (*set_dirty_func_t)(struct page *page);
33
34static void __put_user_pages_dirty(struct page **pages,
35 unsigned long npages,
36 set_dirty_func_t sdf)
37{
38 unsigned long index;
39
40 for (index = 0; index < npages; index++) {
41 struct page *page = compound_head(pages[index]);
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61 if (!PageDirty(page))
62 sdf(page);
63
64 put_user_page(page);
65 }
66}
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87void put_user_pages_dirty(struct page **pages, unsigned long npages)
88{
89 __put_user_pages_dirty(pages, npages, set_page_dirty);
90}
91EXPORT_SYMBOL(put_user_pages_dirty);
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
109{
110 __put_user_pages_dirty(pages, npages, set_page_dirty_lock);
111}
112EXPORT_SYMBOL(put_user_pages_dirty_lock);
113
114
115
116
117
118
119
120
121
122
123void put_user_pages(struct page **pages, unsigned long npages)
124{
125 unsigned long index;
126
127
128
129
130
131
132 for (index = 0; index < npages; index++)
133 put_user_page(pages[index]);
134}
135EXPORT_SYMBOL(put_user_pages);
136
137static struct page *no_page_table(struct vm_area_struct *vma,
138 unsigned int flags)
139{
140
141
142
143
144
145
146
147
148 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
149 return ERR_PTR(-EFAULT);
150 return NULL;
151}
152
153static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
154 pte_t *pte, unsigned int flags)
155{
156
157 if (flags & FOLL_GET)
158 return -EFAULT;
159
160 if (flags & FOLL_TOUCH) {
161 pte_t entry = *pte;
162
163 if (flags & FOLL_WRITE)
164 entry = pte_mkdirty(entry);
165 entry = pte_mkyoung(entry);
166
167 if (!pte_same(*pte, entry)) {
168 set_pte_at(vma->vm_mm, address, pte, entry);
169 update_mmu_cache(vma, address, pte);
170 }
171 }
172
173
174 return -EEXIST;
175}
176
177
178
179
180
181static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
182{
183 return pte_write(pte) ||
184 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
185}
186
187static struct page *follow_page_pte(struct vm_area_struct *vma,
188 unsigned long address, pmd_t *pmd, unsigned int flags,
189 struct dev_pagemap **pgmap)
190{
191 struct mm_struct *mm = vma->vm_mm;
192 struct page *page;
193 spinlock_t *ptl;
194 pte_t *ptep, pte;
195
196retry:
197 if (unlikely(pmd_bad(*pmd)))
198 return no_page_table(vma, flags);
199
200 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
201 pte = *ptep;
202 if (!pte_present(pte)) {
203 swp_entry_t entry;
204
205
206
207
208
209 if (likely(!(flags & FOLL_MIGRATION)))
210 goto no_page;
211 if (pte_none(pte))
212 goto no_page;
213 entry = pte_to_swp_entry(pte);
214 if (!is_migration_entry(entry))
215 goto no_page;
216 pte_unmap_unlock(ptep, ptl);
217 migration_entry_wait(mm, pmd, address);
218 goto retry;
219 }
220 if ((flags & FOLL_NUMA) && pte_protnone(pte))
221 goto no_page;
222 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
223 pte_unmap_unlock(ptep, ptl);
224 return NULL;
225 }
226
227 page = vm_normal_page(vma, address, pte);
228 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
229
230
231
232
233 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
234 if (*pgmap)
235 page = pte_page(pte);
236 else
237 goto no_page;
238 } else if (unlikely(!page)) {
239 if (flags & FOLL_DUMP) {
240
241 page = ERR_PTR(-EFAULT);
242 goto out;
243 }
244
245 if (is_zero_pfn(pte_pfn(pte))) {
246 page = pte_page(pte);
247 } else {
248 int ret;
249
250 ret = follow_pfn_pte(vma, address, ptep, flags);
251 page = ERR_PTR(ret);
252 goto out;
253 }
254 }
255
256 if (flags & FOLL_SPLIT && PageTransCompound(page)) {
257 int ret;
258 get_page(page);
259 pte_unmap_unlock(ptep, ptl);
260 lock_page(page);
261 ret = split_huge_page(page);
262 unlock_page(page);
263 put_page(page);
264 if (ret)
265 return ERR_PTR(ret);
266 goto retry;
267 }
268
269 if (flags & FOLL_GET) {
270 if (unlikely(!try_get_page(page))) {
271 page = ERR_PTR(-ENOMEM);
272 goto out;
273 }
274 }
275 if (flags & FOLL_TOUCH) {
276 if ((flags & FOLL_WRITE) &&
277 !pte_dirty(pte) && !PageDirty(page))
278 set_page_dirty(page);
279
280
281
282
283
284 mark_page_accessed(page);
285 }
286 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
287
288 if (PageTransCompound(page))
289 goto out;
290
291
292
293
294
295
296
297
298
299
300 if (page->mapping && trylock_page(page)) {
301 lru_add_drain();
302
303
304
305
306
307
308 mlock_vma_page(page);
309 unlock_page(page);
310 }
311 }
312out:
313 pte_unmap_unlock(ptep, ptl);
314 return page;
315no_page:
316 pte_unmap_unlock(ptep, ptl);
317 if (!pte_none(pte))
318 return NULL;
319 return no_page_table(vma, flags);
320}
321
322static struct page *follow_pmd_mask(struct vm_area_struct *vma,
323 unsigned long address, pud_t *pudp,
324 unsigned int flags,
325 struct follow_page_context *ctx)
326{
327 pmd_t *pmd, pmdval;
328 spinlock_t *ptl;
329 struct page *page;
330 struct mm_struct *mm = vma->vm_mm;
331
332 pmd = pmd_offset(pudp, address);
333
334
335
336
337 pmdval = READ_ONCE(*pmd);
338 if (pmd_none(pmdval))
339 return no_page_table(vma, flags);
340 if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
341 page = follow_huge_pmd(mm, address, pmd, flags);
342 if (page)
343 return page;
344 return no_page_table(vma, flags);
345 }
346 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
347 page = follow_huge_pd(vma, address,
348 __hugepd(pmd_val(pmdval)), flags,
349 PMD_SHIFT);
350 if (page)
351 return page;
352 return no_page_table(vma, flags);
353 }
354retry:
355 if (!pmd_present(pmdval)) {
356 if (likely(!(flags & FOLL_MIGRATION)))
357 return no_page_table(vma, flags);
358 VM_BUG_ON(thp_migration_supported() &&
359 !is_pmd_migration_entry(pmdval));
360 if (is_pmd_migration_entry(pmdval))
361 pmd_migration_entry_wait(mm, pmd);
362 pmdval = READ_ONCE(*pmd);
363
364
365
366
367 if (pmd_none(pmdval))
368 return no_page_table(vma, flags);
369 goto retry;
370 }
371 if (pmd_devmap(pmdval)) {
372 ptl = pmd_lock(mm, pmd);
373 page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
374 spin_unlock(ptl);
375 if (page)
376 return page;
377 }
378 if (likely(!pmd_trans_huge(pmdval)))
379 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
380
381 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
382 return no_page_table(vma, flags);
383
384retry_locked:
385 ptl = pmd_lock(mm, pmd);
386 if (unlikely(pmd_none(*pmd))) {
387 spin_unlock(ptl);
388 return no_page_table(vma, flags);
389 }
390 if (unlikely(!pmd_present(*pmd))) {
391 spin_unlock(ptl);
392 if (likely(!(flags & FOLL_MIGRATION)))
393 return no_page_table(vma, flags);
394 pmd_migration_entry_wait(mm, pmd);
395 goto retry_locked;
396 }
397 if (unlikely(!pmd_trans_huge(*pmd))) {
398 spin_unlock(ptl);
399 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
400 }
401 if (flags & FOLL_SPLIT) {
402 int ret;
403 page = pmd_page(*pmd);
404 if (is_huge_zero_page(page)) {
405 spin_unlock(ptl);
406 ret = 0;
407 split_huge_pmd(vma, pmd, address);
408 if (pmd_trans_unstable(pmd))
409 ret = -EBUSY;
410 } else {
411 if (unlikely(!try_get_page(page))) {
412 spin_unlock(ptl);
413 return ERR_PTR(-ENOMEM);
414 }
415 spin_unlock(ptl);
416 lock_page(page);
417 ret = split_huge_page(page);
418 unlock_page(page);
419 put_page(page);
420 if (pmd_none(*pmd))
421 return no_page_table(vma, flags);
422 }
423
424 return ret ? ERR_PTR(ret) :
425 follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
426 }
427 page = follow_trans_huge_pmd(vma, address, pmd, flags);
428 spin_unlock(ptl);
429 ctx->page_mask = HPAGE_PMD_NR - 1;
430 return page;
431}
432
433static struct page *follow_pud_mask(struct vm_area_struct *vma,
434 unsigned long address, p4d_t *p4dp,
435 unsigned int flags,
436 struct follow_page_context *ctx)
437{
438 pud_t *pud;
439 spinlock_t *ptl;
440 struct page *page;
441 struct mm_struct *mm = vma->vm_mm;
442
443 pud = pud_offset(p4dp, address);
444 if (pud_none(*pud))
445 return no_page_table(vma, flags);
446 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
447 page = follow_huge_pud(mm, address, pud, flags);
448 if (page)
449 return page;
450 return no_page_table(vma, flags);
451 }
452 if (is_hugepd(__hugepd(pud_val(*pud)))) {
453 page = follow_huge_pd(vma, address,
454 __hugepd(pud_val(*pud)), flags,
455 PUD_SHIFT);
456 if (page)
457 return page;
458 return no_page_table(vma, flags);
459 }
460 if (pud_devmap(*pud)) {
461 ptl = pud_lock(mm, pud);
462 page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
463 spin_unlock(ptl);
464 if (page)
465 return page;
466 }
467 if (unlikely(pud_bad(*pud)))
468 return no_page_table(vma, flags);
469
470 return follow_pmd_mask(vma, address, pud, flags, ctx);
471}
472
473static struct page *follow_p4d_mask(struct vm_area_struct *vma,
474 unsigned long address, pgd_t *pgdp,
475 unsigned int flags,
476 struct follow_page_context *ctx)
477{
478 p4d_t *p4d;
479 struct page *page;
480
481 p4d = p4d_offset(pgdp, address);
482 if (p4d_none(*p4d))
483 return no_page_table(vma, flags);
484 BUILD_BUG_ON(p4d_huge(*p4d));
485 if (unlikely(p4d_bad(*p4d)))
486 return no_page_table(vma, flags);
487
488 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
489 page = follow_huge_pd(vma, address,
490 __hugepd(p4d_val(*p4d)), flags,
491 P4D_SHIFT);
492 if (page)
493 return page;
494 return no_page_table(vma, flags);
495 }
496 return follow_pud_mask(vma, address, p4d, flags, ctx);
497}
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518struct page *follow_page_mask(struct vm_area_struct *vma,
519 unsigned long address, unsigned int flags,
520 struct follow_page_context *ctx)
521{
522 pgd_t *pgd;
523 struct page *page;
524 struct mm_struct *mm = vma->vm_mm;
525
526 ctx->page_mask = 0;
527
528
529 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
530 if (!IS_ERR(page)) {
531 BUG_ON(flags & FOLL_GET);
532 return page;
533 }
534
535 pgd = pgd_offset(mm, address);
536
537 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
538 return no_page_table(vma, flags);
539
540 if (pgd_huge(*pgd)) {
541 page = follow_huge_pgd(mm, address, pgd, flags);
542 if (page)
543 return page;
544 return no_page_table(vma, flags);
545 }
546 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
547 page = follow_huge_pd(vma, address,
548 __hugepd(pgd_val(*pgd)), flags,
549 PGDIR_SHIFT);
550 if (page)
551 return page;
552 return no_page_table(vma, flags);
553 }
554
555 return follow_p4d_mask(vma, address, pgd, flags, ctx);
556}
557
558struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
559 unsigned int foll_flags)
560{
561 struct follow_page_context ctx = { NULL };
562 struct page *page;
563
564 page = follow_page_mask(vma, address, foll_flags, &ctx);
565 if (ctx.pgmap)
566 put_dev_pagemap(ctx.pgmap);
567 return page;
568}
569
570static int get_gate_page(struct mm_struct *mm, unsigned long address,
571 unsigned int gup_flags, struct vm_area_struct **vma,
572 struct page **page)
573{
574 pgd_t *pgd;
575 p4d_t *p4d;
576 pud_t *pud;
577 pmd_t *pmd;
578 pte_t *pte;
579 int ret = -EFAULT;
580
581
582 if (gup_flags & FOLL_WRITE)
583 return -EFAULT;
584 if (address > TASK_SIZE)
585 pgd = pgd_offset_k(address);
586 else
587 pgd = pgd_offset_gate(mm, address);
588 BUG_ON(pgd_none(*pgd));
589 p4d = p4d_offset(pgd, address);
590 BUG_ON(p4d_none(*p4d));
591 pud = pud_offset(p4d, address);
592 BUG_ON(pud_none(*pud));
593 pmd = pmd_offset(pud, address);
594 if (!pmd_present(*pmd))
595 return -EFAULT;
596 VM_BUG_ON(pmd_trans_huge(*pmd));
597 pte = pte_offset_map(pmd, address);
598 if (pte_none(*pte))
599 goto unmap;
600 *vma = get_gate_vma(mm);
601 if (!page)
602 goto out;
603 *page = vm_normal_page(*vma, address, *pte);
604 if (!*page) {
605 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
606 goto unmap;
607 *page = pte_page(*pte);
608
609
610
611
612
613 if (is_device_public_page(*page))
614 goto unmap;
615 }
616 if (unlikely(!try_get_page(*page))) {
617 ret = -ENOMEM;
618 goto unmap;
619 }
620out:
621 ret = 0;
622unmap:
623 pte_unmap(pte);
624 return ret;
625}
626
627
628
629
630
631
632static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
633 unsigned long address, unsigned int *flags, int *nonblocking)
634{
635 unsigned int fault_flags = 0;
636 vm_fault_t ret;
637
638
639 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
640 return -ENOENT;
641 if (*flags & FOLL_WRITE)
642 fault_flags |= FAULT_FLAG_WRITE;
643 if (*flags & FOLL_REMOTE)
644 fault_flags |= FAULT_FLAG_REMOTE;
645 if (nonblocking)
646 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
647 if (*flags & FOLL_NOWAIT)
648 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
649 if (*flags & FOLL_TRIED) {
650 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
651 fault_flags |= FAULT_FLAG_TRIED;
652 }
653
654 ret = handle_mm_fault(vma, address, fault_flags);
655 if (ret & VM_FAULT_ERROR) {
656 int err = vm_fault_to_errno(ret, *flags);
657
658 if (err)
659 return err;
660 BUG();
661 }
662
663 if (tsk) {
664 if (ret & VM_FAULT_MAJOR)
665 tsk->maj_flt++;
666 else
667 tsk->min_flt++;
668 }
669
670 if (ret & VM_FAULT_RETRY) {
671 if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
672 *nonblocking = 0;
673 return -EBUSY;
674 }
675
676
677
678
679
680
681
682
683
684
685 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
686 *flags |= FOLL_COW;
687 return 0;
688}
689
690static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
691{
692 vm_flags_t vm_flags = vma->vm_flags;
693 int write = (gup_flags & FOLL_WRITE);
694 int foreign = (gup_flags & FOLL_REMOTE);
695
696 if (vm_flags & (VM_IO | VM_PFNMAP))
697 return -EFAULT;
698
699 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
700 return -EFAULT;
701
702 if (write) {
703 if (!(vm_flags & VM_WRITE)) {
704 if (!(gup_flags & FOLL_FORCE))
705 return -EFAULT;
706
707
708
709
710
711
712
713
714
715 if (!is_cow_mapping(vm_flags))
716 return -EFAULT;
717 }
718 } else if (!(vm_flags & VM_READ)) {
719 if (!(gup_flags & FOLL_FORCE))
720 return -EFAULT;
721
722
723
724
725 if (!(vm_flags & VM_MAYREAD))
726 return -EFAULT;
727 }
728
729
730
731
732 if (!arch_vma_access_permitted(vma, write, false, foreign))
733 return -EFAULT;
734 return 0;
735}
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
794 unsigned long start, unsigned long nr_pages,
795 unsigned int gup_flags, struct page **pages,
796 struct vm_area_struct **vmas, int *nonblocking)
797{
798 long ret = 0, i = 0;
799 struct vm_area_struct *vma = NULL;
800 struct follow_page_context ctx = { NULL };
801
802 if (!nr_pages)
803 return 0;
804
805 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
806
807
808
809
810
811
812 if (!(gup_flags & FOLL_FORCE))
813 gup_flags |= FOLL_NUMA;
814
815 do {
816 struct page *page;
817 unsigned int foll_flags = gup_flags;
818 unsigned int page_increm;
819
820
821 if (!vma || start >= vma->vm_end) {
822 vma = find_extend_vma(mm, start);
823 if (!vma && in_gate_area(mm, start)) {
824 ret = get_gate_page(mm, start & PAGE_MASK,
825 gup_flags, &vma,
826 pages ? &pages[i] : NULL);
827 if (ret)
828 goto out;
829 ctx.page_mask = 0;
830 goto next_page;
831 }
832
833 if (!vma || check_vma_flags(vma, gup_flags)) {
834 ret = -EFAULT;
835 goto out;
836 }
837 if (is_vm_hugetlb_page(vma)) {
838 i = follow_hugetlb_page(mm, vma, pages, vmas,
839 &start, &nr_pages, i,
840 gup_flags, nonblocking);
841 continue;
842 }
843 }
844retry:
845
846
847
848
849 if (fatal_signal_pending(current)) {
850 ret = -ERESTARTSYS;
851 goto out;
852 }
853 cond_resched();
854
855 page = follow_page_mask(vma, start, foll_flags, &ctx);
856 if (!page) {
857 ret = faultin_page(tsk, vma, start, &foll_flags,
858 nonblocking);
859 switch (ret) {
860 case 0:
861 goto retry;
862 case -EBUSY:
863 ret = 0;
864
865 case -EFAULT:
866 case -ENOMEM:
867 case -EHWPOISON:
868 goto out;
869 case -ENOENT:
870 goto next_page;
871 }
872 BUG();
873 } else if (PTR_ERR(page) == -EEXIST) {
874
875
876
877
878 goto next_page;
879 } else if (IS_ERR(page)) {
880 ret = PTR_ERR(page);
881 goto out;
882 }
883 if (pages) {
884 pages[i] = page;
885 flush_anon_page(vma, page, start);
886 flush_dcache_page(page);
887 ctx.page_mask = 0;
888 }
889next_page:
890 if (vmas) {
891 vmas[i] = vma;
892 ctx.page_mask = 0;
893 }
894 page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
895 if (page_increm > nr_pages)
896 page_increm = nr_pages;
897 i += page_increm;
898 start += page_increm * PAGE_SIZE;
899 nr_pages -= page_increm;
900 } while (nr_pages);
901out:
902 if (ctx.pgmap)
903 put_dev_pagemap(ctx.pgmap);
904 return i ? i : ret;
905}
906
907static bool vma_permits_fault(struct vm_area_struct *vma,
908 unsigned int fault_flags)
909{
910 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
911 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
912 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
913
914 if (!(vm_flags & vma->vm_flags))
915 return false;
916
917
918
919
920
921
922
923
924 if (!arch_vma_access_permitted(vma, write, false, foreign))
925 return false;
926
927 return true;
928}
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
961 unsigned long address, unsigned int fault_flags,
962 bool *unlocked)
963{
964 struct vm_area_struct *vma;
965 vm_fault_t ret, major = 0;
966
967 if (unlocked)
968 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
969
970retry:
971 vma = find_extend_vma(mm, address);
972 if (!vma || address < vma->vm_start)
973 return -EFAULT;
974
975 if (!vma_permits_fault(vma, fault_flags))
976 return -EFAULT;
977
978 ret = handle_mm_fault(vma, address, fault_flags);
979 major |= ret & VM_FAULT_MAJOR;
980 if (ret & VM_FAULT_ERROR) {
981 int err = vm_fault_to_errno(ret, 0);
982
983 if (err)
984 return err;
985 BUG();
986 }
987
988 if (ret & VM_FAULT_RETRY) {
989 down_read(&mm->mmap_sem);
990 if (!(fault_flags & FAULT_FLAG_TRIED)) {
991 *unlocked = true;
992 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
993 fault_flags |= FAULT_FLAG_TRIED;
994 goto retry;
995 }
996 }
997
998 if (tsk) {
999 if (major)
1000 tsk->maj_flt++;
1001 else
1002 tsk->min_flt++;
1003 }
1004 return 0;
1005}
1006EXPORT_SYMBOL_GPL(fixup_user_fault);
1007
1008static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
1009 struct mm_struct *mm,
1010 unsigned long start,
1011 unsigned long nr_pages,
1012 struct page **pages,
1013 struct vm_area_struct **vmas,
1014 int *locked,
1015 unsigned int flags)
1016{
1017 long ret, pages_done;
1018 bool lock_dropped;
1019
1020 if (locked) {
1021
1022 BUG_ON(vmas);
1023
1024 BUG_ON(*locked != 1);
1025 }
1026
1027 if (pages)
1028 flags |= FOLL_GET;
1029
1030 pages_done = 0;
1031 lock_dropped = false;
1032 for (;;) {
1033 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
1034 vmas, locked);
1035 if (!locked)
1036
1037 return ret;
1038
1039
1040 if (!*locked) {
1041 BUG_ON(ret < 0);
1042 BUG_ON(ret >= nr_pages);
1043 }
1044
1045 if (ret > 0) {
1046 nr_pages -= ret;
1047 pages_done += ret;
1048 if (!nr_pages)
1049 break;
1050 }
1051 if (*locked) {
1052
1053
1054
1055
1056 if (!pages_done)
1057 pages_done = ret;
1058 break;
1059 }
1060
1061
1062
1063
1064 if (likely(pages))
1065 pages += ret;
1066 start += ret << PAGE_SHIFT;
1067
1068
1069
1070
1071
1072
1073 *locked = 1;
1074 lock_dropped = true;
1075 down_read(&mm->mmap_sem);
1076 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
1077 pages, NULL, NULL);
1078 if (ret != 1) {
1079 BUG_ON(ret > 1);
1080 if (!pages_done)
1081 pages_done = ret;
1082 break;
1083 }
1084 nr_pages--;
1085 pages_done++;
1086 if (!nr_pages)
1087 break;
1088 if (likely(pages))
1089 pages++;
1090 start += PAGE_SIZE;
1091 }
1092 if (lock_dropped && *locked) {
1093
1094
1095
1096
1097 up_read(&mm->mmap_sem);
1098 *locked = 0;
1099 }
1100 return pages_done;
1101}
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
1125 unsigned int gup_flags, struct page **pages,
1126 int *locked)
1127{
1128
1129
1130
1131
1132
1133
1134 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1135 return -EINVAL;
1136
1137 return __get_user_pages_locked(current, current->mm, start, nr_pages,
1138 pages, NULL, locked,
1139 gup_flags | FOLL_TOUCH);
1140}
1141EXPORT_SYMBOL(get_user_pages_locked);
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
1159 struct page **pages, unsigned int gup_flags)
1160{
1161 struct mm_struct *mm = current->mm;
1162 int locked = 1;
1163 long ret;
1164
1165
1166
1167
1168
1169
1170
1171 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1172 return -EINVAL;
1173
1174 down_read(&mm->mmap_sem);
1175 ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
1176 &locked, gup_flags | FOLL_TOUCH);
1177 if (locked)
1178 up_read(&mm->mmap_sem);
1179 return ret;
1180}
1181EXPORT_SYMBOL(get_user_pages_unlocked);
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
1240 unsigned long start, unsigned long nr_pages,
1241 unsigned int gup_flags, struct page **pages,
1242 struct vm_area_struct **vmas, int *locked)
1243{
1244
1245
1246
1247
1248
1249
1250 if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
1251 return -EINVAL;
1252
1253 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1254 locked,
1255 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
1256}
1257EXPORT_SYMBOL(get_user_pages_remote);
1258
1259#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
1260static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
1261{
1262 long i;
1263 struct vm_area_struct *vma_prev = NULL;
1264
1265 for (i = 0; i < nr_pages; i++) {
1266 struct vm_area_struct *vma = vmas[i];
1267
1268 if (vma == vma_prev)
1269 continue;
1270
1271 vma_prev = vma;
1272
1273 if (vma_is_fsdax(vma))
1274 return true;
1275 }
1276 return false;
1277}
1278
1279#ifdef CONFIG_CMA
1280static struct page *new_non_cma_page(struct page *page, unsigned long private)
1281{
1282
1283
1284
1285
1286 int nid = page_to_nid(page);
1287
1288
1289
1290
1291
1292
1293
1294 gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
1295
1296 if (PageHighMem(page))
1297 gfp_mask |= __GFP_HIGHMEM;
1298
1299#ifdef CONFIG_HUGETLB_PAGE
1300 if (PageHuge(page)) {
1301 struct hstate *h = page_hstate(page);
1302
1303
1304
1305
1306 return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
1307 }
1308#endif
1309 if (PageTransHuge(page)) {
1310 struct page *thp;
1311
1312
1313
1314 gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
1315
1316
1317
1318
1319
1320 thp_gfpmask &= ~__GFP_MOVABLE;
1321 thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
1322 if (!thp)
1323 return NULL;
1324 prep_transhuge_page(thp);
1325 return thp;
1326 }
1327
1328 return __alloc_pages_node(nid, gfp_mask, 0);
1329}
1330
1331static long check_and_migrate_cma_pages(struct task_struct *tsk,
1332 struct mm_struct *mm,
1333 unsigned long start,
1334 unsigned long nr_pages,
1335 struct page **pages,
1336 struct vm_area_struct **vmas,
1337 unsigned int gup_flags)
1338{
1339 long i;
1340 bool drain_allow = true;
1341 bool migrate_allow = true;
1342 LIST_HEAD(cma_page_list);
1343
1344check_again:
1345 for (i = 0; i < nr_pages; i++) {
1346
1347
1348
1349
1350
1351 if (is_migrate_cma_page(pages[i])) {
1352
1353 struct page *head = compound_head(pages[i]);
1354
1355 if (PageHuge(head)) {
1356 isolate_huge_page(head, &cma_page_list);
1357 } else {
1358 if (!PageLRU(head) && drain_allow) {
1359 lru_add_drain_all();
1360 drain_allow = false;
1361 }
1362
1363 if (!isolate_lru_page(head)) {
1364 list_add_tail(&head->lru, &cma_page_list);
1365 mod_node_page_state(page_pgdat(head),
1366 NR_ISOLATED_ANON +
1367 page_is_file_cache(head),
1368 hpage_nr_pages(head));
1369 }
1370 }
1371 }
1372 }
1373
1374 if (!list_empty(&cma_page_list)) {
1375
1376
1377
1378 for (i = 0; i < nr_pages; i++)
1379 put_page(pages[i]);
1380
1381 if (migrate_pages(&cma_page_list, new_non_cma_page,
1382 NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
1383
1384
1385
1386
1387 migrate_allow = false;
1388
1389 if (!list_empty(&cma_page_list))
1390 putback_movable_pages(&cma_page_list);
1391 }
1392
1393
1394
1395
1396
1397 nr_pages = __get_user_pages_locked(tsk, mm, start, nr_pages,
1398 pages, vmas, NULL,
1399 gup_flags);
1400
1401 if ((nr_pages > 0) && migrate_allow) {
1402 drain_allow = true;
1403 goto check_again;
1404 }
1405 }
1406
1407 return nr_pages;
1408}
1409#else
1410static long check_and_migrate_cma_pages(struct task_struct *tsk,
1411 struct mm_struct *mm,
1412 unsigned long start,
1413 unsigned long nr_pages,
1414 struct page **pages,
1415 struct vm_area_struct **vmas,
1416 unsigned int gup_flags)
1417{
1418 return nr_pages;
1419}
1420#endif
1421
1422
1423
1424
1425
1426static long __gup_longterm_locked(struct task_struct *tsk,
1427 struct mm_struct *mm,
1428 unsigned long start,
1429 unsigned long nr_pages,
1430 struct page **pages,
1431 struct vm_area_struct **vmas,
1432 unsigned int gup_flags)
1433{
1434 struct vm_area_struct **vmas_tmp = vmas;
1435 unsigned long flags = 0;
1436 long rc, i;
1437
1438 if (gup_flags & FOLL_LONGTERM) {
1439 if (!pages)
1440 return -EINVAL;
1441
1442 if (!vmas_tmp) {
1443 vmas_tmp = kcalloc(nr_pages,
1444 sizeof(struct vm_area_struct *),
1445 GFP_KERNEL);
1446 if (!vmas_tmp)
1447 return -ENOMEM;
1448 }
1449 flags = memalloc_nocma_save();
1450 }
1451
1452 rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
1453 vmas_tmp, NULL, gup_flags);
1454
1455 if (gup_flags & FOLL_LONGTERM) {
1456 memalloc_nocma_restore(flags);
1457 if (rc < 0)
1458 goto out;
1459
1460 if (check_dax_vmas(vmas_tmp, rc)) {
1461 for (i = 0; i < rc; i++)
1462 put_page(pages[i]);
1463 rc = -EOPNOTSUPP;
1464 goto out;
1465 }
1466
1467 rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
1468 vmas_tmp, gup_flags);
1469 }
1470
1471out:
1472 if (vmas_tmp != vmas)
1473 kfree(vmas_tmp);
1474 return rc;
1475}
1476#else
1477static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
1478 struct mm_struct *mm,
1479 unsigned long start,
1480 unsigned long nr_pages,
1481 struct page **pages,
1482 struct vm_area_struct **vmas,
1483 unsigned int flags)
1484{
1485 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1486 NULL, flags);
1487}
1488#endif
1489
1490
1491
1492
1493
1494
1495
1496
1497long get_user_pages(unsigned long start, unsigned long nr_pages,
1498 unsigned int gup_flags, struct page **pages,
1499 struct vm_area_struct **vmas)
1500{
1501 return __gup_longterm_locked(current, current->mm, start, nr_pages,
1502 pages, vmas, gup_flags | FOLL_TOUCH);
1503}
1504EXPORT_SYMBOL(get_user_pages);
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525long populate_vma_page_range(struct vm_area_struct *vma,
1526 unsigned long start, unsigned long end, int *nonblocking)
1527{
1528 struct mm_struct *mm = vma->vm_mm;
1529 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1530 int gup_flags;
1531
1532 VM_BUG_ON(start & ~PAGE_MASK);
1533 VM_BUG_ON(end & ~PAGE_MASK);
1534 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1535 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1536 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1537
1538 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1539 if (vma->vm_flags & VM_LOCKONFAULT)
1540 gup_flags &= ~FOLL_POPULATE;
1541
1542
1543
1544
1545
1546 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1547 gup_flags |= FOLL_WRITE;
1548
1549
1550
1551
1552
1553 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
1554 gup_flags |= FOLL_FORCE;
1555
1556
1557
1558
1559
1560 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
1561 NULL, NULL, nonblocking);
1562}
1563
1564
1565
1566
1567
1568
1569
1570
1571int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1572{
1573 struct mm_struct *mm = current->mm;
1574 unsigned long end, nstart, nend;
1575 struct vm_area_struct *vma = NULL;
1576 int locked = 0;
1577 long ret = 0;
1578
1579 end = start + len;
1580
1581 for (nstart = start; nstart < end; nstart = nend) {
1582
1583
1584
1585
1586 if (!locked) {
1587 locked = 1;
1588 down_read(&mm->mmap_sem);
1589 vma = find_vma(mm, nstart);
1590 } else if (nstart >= vma->vm_end)
1591 vma = vma->vm_next;
1592 if (!vma || vma->vm_start >= end)
1593 break;
1594
1595
1596
1597
1598 nend = min(end, vma->vm_end);
1599 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1600 continue;
1601 if (nstart < vma->vm_start)
1602 nstart = vma->vm_start;
1603
1604
1605
1606
1607
1608 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1609 if (ret < 0) {
1610 if (ignore_errors) {
1611 ret = 0;
1612 continue;
1613 }
1614 break;
1615 }
1616 nend = nstart + ret * PAGE_SIZE;
1617 ret = 0;
1618 }
1619 if (locked)
1620 up_read(&mm->mmap_sem);
1621 return ret;
1622}
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638#ifdef CONFIG_ELF_CORE
1639struct page *get_dump_page(unsigned long addr)
1640{
1641 struct vm_area_struct *vma;
1642 struct page *page;
1643
1644 if (__get_user_pages(current, current->mm, addr, 1,
1645 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1646 NULL) < 1)
1647 return NULL;
1648 flush_cache_page(vma, addr, page_to_pfn(page));
1649 return page;
1650}
1651#endif
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686#ifdef CONFIG_HAVE_GENERIC_GUP
1687
1688#ifndef gup_get_pte
1689
1690
1691
1692
1693static inline pte_t gup_get_pte(pte_t *ptep)
1694{
1695 return READ_ONCE(*ptep);
1696}
1697#endif
1698
1699static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
1700{
1701 while ((*nr) - nr_start) {
1702 struct page *page = pages[--(*nr)];
1703
1704 ClearPageReferenced(page);
1705 put_page(page);
1706 }
1707}
1708
1709
1710
1711
1712
1713static inline struct page *try_get_compound_head(struct page *page, int refs)
1714{
1715 struct page *head = compound_head(page);
1716 if (WARN_ON_ONCE(page_ref_count(head) < 0))
1717 return NULL;
1718 if (unlikely(!page_cache_add_speculative(head, refs)))
1719 return NULL;
1720 return head;
1721}
1722
1723#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
1724static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1725 unsigned int flags, struct page **pages, int *nr)
1726{
1727 struct dev_pagemap *pgmap = NULL;
1728 int nr_start = *nr, ret = 0;
1729 pte_t *ptep, *ptem;
1730
1731 ptem = ptep = pte_offset_map(&pmd, addr);
1732 do {
1733 pte_t pte = gup_get_pte(ptep);
1734 struct page *head, *page;
1735
1736
1737
1738
1739
1740 if (pte_protnone(pte))
1741 goto pte_unmap;
1742
1743 if (!pte_access_permitted(pte, flags & FOLL_WRITE))
1744 goto pte_unmap;
1745
1746 if (pte_devmap(pte)) {
1747 if (unlikely(flags & FOLL_LONGTERM))
1748 goto pte_unmap;
1749
1750 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
1751 if (unlikely(!pgmap)) {
1752 undo_dev_pagemap(nr, nr_start, pages);
1753 goto pte_unmap;
1754 }
1755 } else if (pte_special(pte))
1756 goto pte_unmap;
1757
1758 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1759 page = pte_page(pte);
1760
1761 head = try_get_compound_head(page, 1);
1762 if (!head)
1763 goto pte_unmap;
1764
1765 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1766 put_page(head);
1767 goto pte_unmap;
1768 }
1769
1770 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1771
1772 SetPageReferenced(page);
1773 pages[*nr] = page;
1774 (*nr)++;
1775
1776 } while (ptep++, addr += PAGE_SIZE, addr != end);
1777
1778 ret = 1;
1779
1780pte_unmap:
1781 if (pgmap)
1782 put_dev_pagemap(pgmap);
1783 pte_unmap(ptem);
1784 return ret;
1785}
1786#else
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1798 unsigned int flags, struct page **pages, int *nr)
1799{
1800 return 0;
1801}
1802#endif
1803
1804#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1805static int __gup_device_huge(unsigned long pfn, unsigned long addr,
1806 unsigned long end, struct page **pages, int *nr)
1807{
1808 int nr_start = *nr;
1809 struct dev_pagemap *pgmap = NULL;
1810
1811 do {
1812 struct page *page = pfn_to_page(pfn);
1813
1814 pgmap = get_dev_pagemap(pfn, pgmap);
1815 if (unlikely(!pgmap)) {
1816 undo_dev_pagemap(nr, nr_start, pages);
1817 return 0;
1818 }
1819 SetPageReferenced(page);
1820 pages[*nr] = page;
1821 get_page(page);
1822 (*nr)++;
1823 pfn++;
1824 } while (addr += PAGE_SIZE, addr != end);
1825
1826 if (pgmap)
1827 put_dev_pagemap(pgmap);
1828 return 1;
1829}
1830
1831static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1832 unsigned long end, struct page **pages, int *nr)
1833{
1834 unsigned long fault_pfn;
1835 int nr_start = *nr;
1836
1837 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1838 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1839 return 0;
1840
1841 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1842 undo_dev_pagemap(nr, nr_start, pages);
1843 return 0;
1844 }
1845 return 1;
1846}
1847
1848static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1849 unsigned long end, struct page **pages, int *nr)
1850{
1851 unsigned long fault_pfn;
1852 int nr_start = *nr;
1853
1854 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1855 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1856 return 0;
1857
1858 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1859 undo_dev_pagemap(nr, nr_start, pages);
1860 return 0;
1861 }
1862 return 1;
1863}
1864#else
1865static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1866 unsigned long end, struct page **pages, int *nr)
1867{
1868 BUILD_BUG();
1869 return 0;
1870}
1871
1872static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
1873 unsigned long end, struct page **pages, int *nr)
1874{
1875 BUILD_BUG();
1876 return 0;
1877}
1878#endif
1879
1880static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1881 unsigned long end, unsigned int flags, struct page **pages, int *nr)
1882{
1883 struct page *head, *page;
1884 int refs;
1885
1886 if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
1887 return 0;
1888
1889 if (pmd_devmap(orig)) {
1890 if (unlikely(flags & FOLL_LONGTERM))
1891 return 0;
1892 return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
1893 }
1894
1895 refs = 0;
1896 page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1897 do {
1898 pages[*nr] = page;
1899 (*nr)++;
1900 page++;
1901 refs++;
1902 } while (addr += PAGE_SIZE, addr != end);
1903
1904 head = try_get_compound_head(pmd_page(orig), refs);
1905 if (!head) {
1906 *nr -= refs;
1907 return 0;
1908 }
1909
1910 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1911 *nr -= refs;
1912 while (refs--)
1913 put_page(head);
1914 return 0;
1915 }
1916
1917 SetPageReferenced(head);
1918 return 1;
1919}
1920
1921static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1922 unsigned long end, unsigned int flags, struct page **pages, int *nr)
1923{
1924 struct page *head, *page;
1925 int refs;
1926
1927 if (!pud_access_permitted(orig, flags & FOLL_WRITE))
1928 return 0;
1929
1930 if (pud_devmap(orig)) {
1931 if (unlikely(flags & FOLL_LONGTERM))
1932 return 0;
1933 return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
1934 }
1935
1936 refs = 0;
1937 page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1938 do {
1939 pages[*nr] = page;
1940 (*nr)++;
1941 page++;
1942 refs++;
1943 } while (addr += PAGE_SIZE, addr != end);
1944
1945 head = try_get_compound_head(pud_page(orig), refs);
1946 if (!head) {
1947 *nr -= refs;
1948 return 0;
1949 }
1950
1951 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1952 *nr -= refs;
1953 while (refs--)
1954 put_page(head);
1955 return 0;
1956 }
1957
1958 SetPageReferenced(head);
1959 return 1;
1960}
1961
1962static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
1963 unsigned long end, unsigned int flags,
1964 struct page **pages, int *nr)
1965{
1966 int refs;
1967 struct page *head, *page;
1968
1969 if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
1970 return 0;
1971
1972 BUILD_BUG_ON(pgd_devmap(orig));
1973 refs = 0;
1974 page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
1975 do {
1976 pages[*nr] = page;
1977 (*nr)++;
1978 page++;
1979 refs++;
1980 } while (addr += PAGE_SIZE, addr != end);
1981
1982 head = try_get_compound_head(pgd_page(orig), refs);
1983 if (!head) {
1984 *nr -= refs;
1985 return 0;
1986 }
1987
1988 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
1989 *nr -= refs;
1990 while (refs--)
1991 put_page(head);
1992 return 0;
1993 }
1994
1995 SetPageReferenced(head);
1996 return 1;
1997}
1998
1999static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
2000 unsigned int flags, struct page **pages, int *nr)
2001{
2002 unsigned long next;
2003 pmd_t *pmdp;
2004
2005 pmdp = pmd_offset(&pud, addr);
2006 do {
2007 pmd_t pmd = READ_ONCE(*pmdp);
2008
2009 next = pmd_addr_end(addr, end);
2010 if (!pmd_present(pmd))
2011 return 0;
2012
2013 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
2014 pmd_devmap(pmd))) {
2015
2016
2017
2018
2019
2020 if (pmd_protnone(pmd))
2021 return 0;
2022
2023 if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
2024 pages, nr))
2025 return 0;
2026
2027 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
2028
2029
2030
2031
2032 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
2033 PMD_SHIFT, next, flags, pages, nr))
2034 return 0;
2035 } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
2036 return 0;
2037 } while (pmdp++, addr = next, addr != end);
2038
2039 return 1;
2040}
2041
2042static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
2043 unsigned int flags, struct page **pages, int *nr)
2044{
2045 unsigned long next;
2046 pud_t *pudp;
2047
2048 pudp = pud_offset(&p4d, addr);
2049 do {
2050 pud_t pud = READ_ONCE(*pudp);
2051
2052 next = pud_addr_end(addr, end);
2053 if (pud_none(pud))
2054 return 0;
2055 if (unlikely(pud_huge(pud))) {
2056 if (!gup_huge_pud(pud, pudp, addr, next, flags,
2057 pages, nr))
2058 return 0;
2059 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
2060 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
2061 PUD_SHIFT, next, flags, pages, nr))
2062 return 0;
2063 } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
2064 return 0;
2065 } while (pudp++, addr = next, addr != end);
2066
2067 return 1;
2068}
2069
2070static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
2071 unsigned int flags, struct page **pages, int *nr)
2072{
2073 unsigned long next;
2074 p4d_t *p4dp;
2075
2076 p4dp = p4d_offset(&pgd, addr);
2077 do {
2078 p4d_t p4d = READ_ONCE(*p4dp);
2079
2080 next = p4d_addr_end(addr, end);
2081 if (p4d_none(p4d))
2082 return 0;
2083 BUILD_BUG_ON(p4d_huge(p4d));
2084 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
2085 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
2086 P4D_SHIFT, next, flags, pages, nr))
2087 return 0;
2088 } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
2089 return 0;
2090 } while (p4dp++, addr = next, addr != end);
2091
2092 return 1;
2093}
2094
2095static void gup_pgd_range(unsigned long addr, unsigned long end,
2096 unsigned int flags, struct page **pages, int *nr)
2097{
2098 unsigned long next;
2099 pgd_t *pgdp;
2100
2101 pgdp = pgd_offset(current->mm, addr);
2102 do {
2103 pgd_t pgd = READ_ONCE(*pgdp);
2104
2105 next = pgd_addr_end(addr, end);
2106 if (pgd_none(pgd))
2107 return;
2108 if (unlikely(pgd_huge(pgd))) {
2109 if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
2110 pages, nr))
2111 return;
2112 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
2113 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
2114 PGDIR_SHIFT, next, flags, pages, nr))
2115 return;
2116 } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
2117 return;
2118 } while (pgdp++, addr = next, addr != end);
2119}
2120
2121#ifndef gup_fast_permitted
2122
2123
2124
2125
2126bool gup_fast_permitted(unsigned long start, int nr_pages)
2127{
2128 unsigned long len, end;
2129
2130 len = (unsigned long) nr_pages << PAGE_SHIFT;
2131 end = start + len;
2132 return end >= start;
2133}
2134#endif
2135
2136
2137
2138
2139
2140
2141
2142int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
2143 struct page **pages)
2144{
2145 unsigned long len, end;
2146 unsigned long flags;
2147 int nr = 0;
2148
2149 start &= PAGE_MASK;
2150 len = (unsigned long) nr_pages << PAGE_SHIFT;
2151 end = start + len;
2152
2153 if (unlikely(!access_ok((void __user *)start, len)))
2154 return 0;
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168 if (gup_fast_permitted(start, nr_pages)) {
2169 local_irq_save(flags);
2170 gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
2171 local_irq_restore(flags);
2172 }
2173
2174 return nr;
2175}
2176
2177static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
2178 unsigned int gup_flags, struct page **pages)
2179{
2180 int ret;
2181
2182
2183
2184
2185
2186 if (gup_flags & FOLL_LONGTERM) {
2187 down_read(¤t->mm->mmap_sem);
2188 ret = __gup_longterm_locked(current, current->mm,
2189 start, nr_pages,
2190 pages, NULL, gup_flags);
2191 up_read(¤t->mm->mmap_sem);
2192 } else {
2193 ret = get_user_pages_unlocked(start, nr_pages,
2194 pages, gup_flags);
2195 }
2196
2197 return ret;
2198}
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216int get_user_pages_fast(unsigned long start, int nr_pages,
2217 unsigned int gup_flags, struct page **pages)
2218{
2219 unsigned long addr, len, end;
2220 int nr = 0, ret = 0;
2221
2222 start &= PAGE_MASK;
2223 addr = start;
2224 len = (unsigned long) nr_pages << PAGE_SHIFT;
2225 end = start + len;
2226
2227 if (nr_pages <= 0)
2228 return 0;
2229
2230 if (unlikely(!access_ok((void __user *)start, len)))
2231 return -EFAULT;
2232
2233 if (gup_fast_permitted(start, nr_pages)) {
2234 local_irq_disable();
2235 gup_pgd_range(addr, end, gup_flags, pages, &nr);
2236 local_irq_enable();
2237 ret = nr;
2238 }
2239
2240 if (nr < nr_pages) {
2241
2242 start += nr << PAGE_SHIFT;
2243 pages += nr;
2244
2245 ret = __gup_longterm_unlocked(start, nr_pages - nr,
2246 gup_flags, pages);
2247
2248
2249 if (nr > 0) {
2250 if (ret < 0)
2251 ret = nr;
2252 else
2253 ret += nr;
2254 }
2255 }
2256
2257 return ret;
2258}
2259
2260#endif
2261