1#include <linux/kernel.h>
2#include <linux/errno.h>
3#include <linux/err.h>
4#include <linux/spinlock.h>
5
6#include <linux/mm.h>
7#include <linux/memremap.h>
8#include <linux/pagemap.h>
9#include <linux/rmap.h>
10#include <linux/swap.h>
11#include <linux/swapops.h>
12
13#include <linux/sched/signal.h>
14#include <linux/rwsem.h>
15#include <linux/hugetlb.h>
16
17#include <asm/mmu_context.h>
18#include <asm/pgtable.h>
19#include <asm/tlbflush.h>
20
21#include "internal.h"
22
23static struct page *no_page_table(struct vm_area_struct *vma,
24 unsigned int flags)
25{
26
27
28
29
30
31
32
33
34 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
35 return ERR_PTR(-EFAULT);
36 return NULL;
37}
38
39static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
40 pte_t *pte, unsigned int flags)
41{
42
43 if (flags & FOLL_GET)
44 return -EFAULT;
45
46 if (flags & FOLL_TOUCH) {
47 pte_t entry = *pte;
48
49 if (flags & FOLL_WRITE)
50 entry = pte_mkdirty(entry);
51 entry = pte_mkyoung(entry);
52
53 if (!pte_same(*pte, entry)) {
54 set_pte_at(vma->vm_mm, address, pte, entry);
55 update_mmu_cache(vma, address, pte);
56 }
57 }
58
59
60 return -EEXIST;
61}
62
63
64
65
66
67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
68{
69 return pte_write(pte) ||
70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
71}
72
73static struct page *follow_page_pte(struct vm_area_struct *vma,
74 unsigned long address, pmd_t *pmd, unsigned int flags)
75{
76 struct mm_struct *mm = vma->vm_mm;
77 struct dev_pagemap *pgmap = NULL;
78 struct page *page;
79 spinlock_t *ptl;
80 pte_t *ptep, pte;
81
82retry:
83 if (unlikely(pmd_bad(*pmd)))
84 return no_page_table(vma, flags);
85
86 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
87 pte = *ptep;
88 if (!pte_present(pte)) {
89 swp_entry_t entry;
90
91
92
93
94
95 if (likely(!(flags & FOLL_MIGRATION)))
96 goto no_page;
97 if (pte_none(pte))
98 goto no_page;
99 entry = pte_to_swp_entry(pte);
100 if (!is_migration_entry(entry))
101 goto no_page;
102 pte_unmap_unlock(ptep, ptl);
103 migration_entry_wait(mm, pmd, address);
104 goto retry;
105 }
106 if ((flags & FOLL_NUMA) && pte_protnone(pte))
107 goto no_page;
108 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
109 pte_unmap_unlock(ptep, ptl);
110 return NULL;
111 }
112
113 page = vm_normal_page(vma, address, pte);
114 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
115
116
117
118
119 pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
120 if (pgmap)
121 page = pte_page(pte);
122 else
123 goto no_page;
124 } else if (unlikely(!page)) {
125 if (flags & FOLL_DUMP) {
126
127 page = ERR_PTR(-EFAULT);
128 goto out;
129 }
130
131 if (is_zero_pfn(pte_pfn(pte))) {
132 page = pte_page(pte);
133 } else {
134 int ret;
135
136 ret = follow_pfn_pte(vma, address, ptep, flags);
137 page = ERR_PTR(ret);
138 goto out;
139 }
140 }
141
142 if (flags & FOLL_SPLIT && PageTransCompound(page)) {
143 int ret;
144 get_page(page);
145 pte_unmap_unlock(ptep, ptl);
146 lock_page(page);
147 ret = split_huge_page(page);
148 unlock_page(page);
149 put_page(page);
150 if (ret)
151 return ERR_PTR(ret);
152 goto retry;
153 }
154
155 if (flags & FOLL_GET) {
156 get_page(page);
157
158
159 if (pgmap) {
160 put_dev_pagemap(pgmap);
161 pgmap = NULL;
162 }
163 }
164 if (flags & FOLL_TOUCH) {
165 if ((flags & FOLL_WRITE) &&
166 !pte_dirty(pte) && !PageDirty(page))
167 set_page_dirty(page);
168
169
170
171
172
173 mark_page_accessed(page);
174 }
175 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
176
177 if (PageTransCompound(page))
178 goto out;
179
180
181
182
183
184
185
186
187
188
189 if (page->mapping && trylock_page(page)) {
190 lru_add_drain();
191
192
193
194
195
196
197 mlock_vma_page(page);
198 unlock_page(page);
199 }
200 }
201out:
202 pte_unmap_unlock(ptep, ptl);
203 return page;
204no_page:
205 pte_unmap_unlock(ptep, ptl);
206 if (!pte_none(pte))
207 return NULL;
208 return no_page_table(vma, flags);
209}
210
211static struct page *follow_pmd_mask(struct vm_area_struct *vma,
212 unsigned long address, pud_t *pudp,
213 unsigned int flags, unsigned int *page_mask)
214{
215 pmd_t *pmd, pmdval;
216 spinlock_t *ptl;
217 struct page *page;
218 struct mm_struct *mm = vma->vm_mm;
219
220 pmd = pmd_offset(pudp, address);
221
222
223
224
225 pmdval = READ_ONCE(*pmd);
226 if (pmd_none(pmdval))
227 return no_page_table(vma, flags);
228 if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
229 page = follow_huge_pmd(mm, address, pmd, flags);
230 if (page)
231 return page;
232 return no_page_table(vma, flags);
233 }
234 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
235 page = follow_huge_pd(vma, address,
236 __hugepd(pmd_val(pmdval)), flags,
237 PMD_SHIFT);
238 if (page)
239 return page;
240 return no_page_table(vma, flags);
241 }
242retry:
243 if (!pmd_present(pmdval)) {
244 if (likely(!(flags & FOLL_MIGRATION)))
245 return no_page_table(vma, flags);
246 VM_BUG_ON(thp_migration_supported() &&
247 !is_pmd_migration_entry(pmdval));
248 if (is_pmd_migration_entry(pmdval))
249 pmd_migration_entry_wait(mm, pmd);
250 pmdval = READ_ONCE(*pmd);
251
252
253
254
255 if (pmd_none(pmdval))
256 return no_page_table(vma, flags);
257 goto retry;
258 }
259 if (pmd_devmap(pmdval)) {
260 ptl = pmd_lock(mm, pmd);
261 page = follow_devmap_pmd(vma, address, pmd, flags);
262 spin_unlock(ptl);
263 if (page)
264 return page;
265 }
266 if (likely(!pmd_trans_huge(pmdval)))
267 return follow_page_pte(vma, address, pmd, flags);
268
269 if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
270 return no_page_table(vma, flags);
271
272retry_locked:
273 ptl = pmd_lock(mm, pmd);
274 if (unlikely(pmd_none(*pmd))) {
275 spin_unlock(ptl);
276 return no_page_table(vma, flags);
277 }
278 if (unlikely(!pmd_present(*pmd))) {
279 spin_unlock(ptl);
280 if (likely(!(flags & FOLL_MIGRATION)))
281 return no_page_table(vma, flags);
282 pmd_migration_entry_wait(mm, pmd);
283 goto retry_locked;
284 }
285 if (unlikely(!pmd_trans_huge(*pmd))) {
286 spin_unlock(ptl);
287 return follow_page_pte(vma, address, pmd, flags);
288 }
289 if (flags & FOLL_SPLIT) {
290 int ret;
291 page = pmd_page(*pmd);
292 if (is_huge_zero_page(page)) {
293 spin_unlock(ptl);
294 ret = 0;
295 split_huge_pmd(vma, pmd, address);
296 if (pmd_trans_unstable(pmd))
297 ret = -EBUSY;
298 } else {
299 get_page(page);
300 spin_unlock(ptl);
301 lock_page(page);
302 ret = split_huge_page(page);
303 unlock_page(page);
304 put_page(page);
305 if (pmd_none(*pmd))
306 return no_page_table(vma, flags);
307 }
308
309 return ret ? ERR_PTR(ret) :
310 follow_page_pte(vma, address, pmd, flags);
311 }
312 page = follow_trans_huge_pmd(vma, address, pmd, flags);
313 spin_unlock(ptl);
314 *page_mask = HPAGE_PMD_NR - 1;
315 return page;
316}
317
318
319static struct page *follow_pud_mask(struct vm_area_struct *vma,
320 unsigned long address, p4d_t *p4dp,
321 unsigned int flags, unsigned int *page_mask)
322{
323 pud_t *pud;
324 spinlock_t *ptl;
325 struct page *page;
326 struct mm_struct *mm = vma->vm_mm;
327
328 pud = pud_offset(p4dp, address);
329 if (pud_none(*pud))
330 return no_page_table(vma, flags);
331 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
332 page = follow_huge_pud(mm, address, pud, flags);
333 if (page)
334 return page;
335 return no_page_table(vma, flags);
336 }
337 if (is_hugepd(__hugepd(pud_val(*pud)))) {
338 page = follow_huge_pd(vma, address,
339 __hugepd(pud_val(*pud)), flags,
340 PUD_SHIFT);
341 if (page)
342 return page;
343 return no_page_table(vma, flags);
344 }
345 if (pud_devmap(*pud)) {
346 ptl = pud_lock(mm, pud);
347 page = follow_devmap_pud(vma, address, pud, flags);
348 spin_unlock(ptl);
349 if (page)
350 return page;
351 }
352 if (unlikely(pud_bad(*pud)))
353 return no_page_table(vma, flags);
354
355 return follow_pmd_mask(vma, address, pud, flags, page_mask);
356}
357
358
359static struct page *follow_p4d_mask(struct vm_area_struct *vma,
360 unsigned long address, pgd_t *pgdp,
361 unsigned int flags, unsigned int *page_mask)
362{
363 p4d_t *p4d;
364 struct page *page;
365
366 p4d = p4d_offset(pgdp, address);
367 if (p4d_none(*p4d))
368 return no_page_table(vma, flags);
369 BUILD_BUG_ON(p4d_huge(*p4d));
370 if (unlikely(p4d_bad(*p4d)))
371 return no_page_table(vma, flags);
372
373 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
374 page = follow_huge_pd(vma, address,
375 __hugepd(p4d_val(*p4d)), flags,
376 P4D_SHIFT);
377 if (page)
378 return page;
379 return no_page_table(vma, flags);
380 }
381 return follow_pud_mask(vma, address, p4d, flags, page_mask);
382}
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397struct page *follow_page_mask(struct vm_area_struct *vma,
398 unsigned long address, unsigned int flags,
399 unsigned int *page_mask)
400{
401 pgd_t *pgd;
402 struct page *page;
403 struct mm_struct *mm = vma->vm_mm;
404
405 *page_mask = 0;
406
407
408 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
409 if (!IS_ERR(page)) {
410 BUG_ON(flags & FOLL_GET);
411 return page;
412 }
413
414 pgd = pgd_offset(mm, address);
415
416 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
417 return no_page_table(vma, flags);
418
419 if (pgd_huge(*pgd)) {
420 page = follow_huge_pgd(mm, address, pgd, flags);
421 if (page)
422 return page;
423 return no_page_table(vma, flags);
424 }
425 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
426 page = follow_huge_pd(vma, address,
427 __hugepd(pgd_val(*pgd)), flags,
428 PGDIR_SHIFT);
429 if (page)
430 return page;
431 return no_page_table(vma, flags);
432 }
433
434 return follow_p4d_mask(vma, address, pgd, flags, page_mask);
435}
436
437static int get_gate_page(struct mm_struct *mm, unsigned long address,
438 unsigned int gup_flags, struct vm_area_struct **vma,
439 struct page **page)
440{
441 pgd_t *pgd;
442 p4d_t *p4d;
443 pud_t *pud;
444 pmd_t *pmd;
445 pte_t *pte;
446 int ret = -EFAULT;
447
448
449 if (gup_flags & FOLL_WRITE)
450 return -EFAULT;
451 if (address > TASK_SIZE)
452 pgd = pgd_offset_k(address);
453 else
454 pgd = pgd_offset_gate(mm, address);
455 BUG_ON(pgd_none(*pgd));
456 p4d = p4d_offset(pgd, address);
457 BUG_ON(p4d_none(*p4d));
458 pud = pud_offset(p4d, address);
459 BUG_ON(pud_none(*pud));
460 pmd = pmd_offset(pud, address);
461 if (!pmd_present(*pmd))
462 return -EFAULT;
463 VM_BUG_ON(pmd_trans_huge(*pmd));
464 pte = pte_offset_map(pmd, address);
465 if (pte_none(*pte))
466 goto unmap;
467 *vma = get_gate_vma(mm);
468 if (!page)
469 goto out;
470 *page = vm_normal_page(*vma, address, *pte);
471 if (!*page) {
472 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
473 goto unmap;
474 *page = pte_page(*pte);
475
476
477
478
479
480 if (is_device_public_page(*page))
481 goto unmap;
482 }
483 get_page(*page);
484out:
485 ret = 0;
486unmap:
487 pte_unmap(pte);
488 return ret;
489}
490
491
492
493
494
495
496static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
497 unsigned long address, unsigned int *flags, int *nonblocking)
498{
499 unsigned int fault_flags = 0;
500 vm_fault_t ret;
501
502
503 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
504 return -ENOENT;
505 if (*flags & FOLL_WRITE)
506 fault_flags |= FAULT_FLAG_WRITE;
507 if (*flags & FOLL_REMOTE)
508 fault_flags |= FAULT_FLAG_REMOTE;
509 if (nonblocking)
510 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
511 if (*flags & FOLL_NOWAIT)
512 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
513 if (*flags & FOLL_TRIED) {
514 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
515 fault_flags |= FAULT_FLAG_TRIED;
516 }
517
518 ret = handle_mm_fault(vma, address, fault_flags);
519 if (ret & VM_FAULT_ERROR) {
520 int err = vm_fault_to_errno(ret, *flags);
521
522 if (err)
523 return err;
524 BUG();
525 }
526
527 if (tsk) {
528 if (ret & VM_FAULT_MAJOR)
529 tsk->maj_flt++;
530 else
531 tsk->min_flt++;
532 }
533
534 if (ret & VM_FAULT_RETRY) {
535 if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
536 *nonblocking = 0;
537 return -EBUSY;
538 }
539
540
541
542
543
544
545
546
547
548
549 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
550 *flags |= FOLL_COW;
551 return 0;
552}
553
554static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
555{
556 vm_flags_t vm_flags = vma->vm_flags;
557 int write = (gup_flags & FOLL_WRITE);
558 int foreign = (gup_flags & FOLL_REMOTE);
559
560 if (vm_flags & (VM_IO | VM_PFNMAP))
561 return -EFAULT;
562
563 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
564 return -EFAULT;
565
566 if (write) {
567 if (!(vm_flags & VM_WRITE)) {
568 if (!(gup_flags & FOLL_FORCE))
569 return -EFAULT;
570
571
572
573
574
575
576
577
578
579 if (!is_cow_mapping(vm_flags))
580 return -EFAULT;
581 }
582 } else if (!(vm_flags & VM_READ)) {
583 if (!(gup_flags & FOLL_FORCE))
584 return -EFAULT;
585
586
587
588
589 if (!(vm_flags & VM_MAYREAD))
590 return -EFAULT;
591 }
592
593
594
595
596 if (!arch_vma_access_permitted(vma, write, false, foreign))
597 return -EFAULT;
598 return 0;
599}
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
658 unsigned long start, unsigned long nr_pages,
659 unsigned int gup_flags, struct page **pages,
660 struct vm_area_struct **vmas, int *nonblocking)
661{
662 long i = 0;
663 unsigned int page_mask;
664 struct vm_area_struct *vma = NULL;
665
666 if (!nr_pages)
667 return 0;
668
669 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
670
671
672
673
674
675
676 if (!(gup_flags & FOLL_FORCE))
677 gup_flags |= FOLL_NUMA;
678
679 do {
680 struct page *page;
681 unsigned int foll_flags = gup_flags;
682 unsigned int page_increm;
683
684
685 if (!vma || start >= vma->vm_end) {
686 vma = find_extend_vma(mm, start);
687 if (!vma && in_gate_area(mm, start)) {
688 int ret;
689 ret = get_gate_page(mm, start & PAGE_MASK,
690 gup_flags, &vma,
691 pages ? &pages[i] : NULL);
692 if (ret)
693 return i ? : ret;
694 page_mask = 0;
695 goto next_page;
696 }
697
698 if (!vma || check_vma_flags(vma, gup_flags))
699 return i ? : -EFAULT;
700 if (is_vm_hugetlb_page(vma)) {
701 i = follow_hugetlb_page(mm, vma, pages, vmas,
702 &start, &nr_pages, i,
703 gup_flags, nonblocking);
704 continue;
705 }
706 }
707retry:
708
709
710
711
712 if (unlikely(fatal_signal_pending(current)))
713 return i ? i : -ERESTARTSYS;
714 cond_resched();
715 page = follow_page_mask(vma, start, foll_flags, &page_mask);
716 if (!page) {
717 int ret;
718 ret = faultin_page(tsk, vma, start, &foll_flags,
719 nonblocking);
720 switch (ret) {
721 case 0:
722 goto retry;
723 case -EFAULT:
724 case -ENOMEM:
725 case -EHWPOISON:
726 return i ? i : ret;
727 case -EBUSY:
728 return i;
729 case -ENOENT:
730 goto next_page;
731 }
732 BUG();
733 } else if (PTR_ERR(page) == -EEXIST) {
734
735
736
737
738 goto next_page;
739 } else if (IS_ERR(page)) {
740 return i ? i : PTR_ERR(page);
741 }
742 if (pages) {
743 pages[i] = page;
744 flush_anon_page(vma, page, start);
745 flush_dcache_page(page);
746 page_mask = 0;
747 }
748next_page:
749 if (vmas) {
750 vmas[i] = vma;
751 page_mask = 0;
752 }
753 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
754 if (page_increm > nr_pages)
755 page_increm = nr_pages;
756 i += page_increm;
757 start += page_increm * PAGE_SIZE;
758 nr_pages -= page_increm;
759 } while (nr_pages);
760 return i;
761}
762
763static bool vma_permits_fault(struct vm_area_struct *vma,
764 unsigned int fault_flags)
765{
766 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
767 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
768 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
769
770 if (!(vm_flags & vma->vm_flags))
771 return false;
772
773
774
775
776
777
778
779
780 if (!arch_vma_access_permitted(vma, write, false, foreign))
781 return false;
782
783 return true;
784}
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
817 unsigned long address, unsigned int fault_flags,
818 bool *unlocked)
819{
820 struct vm_area_struct *vma;
821 vm_fault_t ret, major = 0;
822
823 if (unlocked)
824 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
825
826retry:
827 vma = find_extend_vma(mm, address);
828 if (!vma || address < vma->vm_start)
829 return -EFAULT;
830
831 if (!vma_permits_fault(vma, fault_flags))
832 return -EFAULT;
833
834 ret = handle_mm_fault(vma, address, fault_flags);
835 major |= ret & VM_FAULT_MAJOR;
836 if (ret & VM_FAULT_ERROR) {
837 int err = vm_fault_to_errno(ret, 0);
838
839 if (err)
840 return err;
841 BUG();
842 }
843
844 if (ret & VM_FAULT_RETRY) {
845 down_read(&mm->mmap_sem);
846 if (!(fault_flags & FAULT_FLAG_TRIED)) {
847 *unlocked = true;
848 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
849 fault_flags |= FAULT_FLAG_TRIED;
850 goto retry;
851 }
852 }
853
854 if (tsk) {
855 if (major)
856 tsk->maj_flt++;
857 else
858 tsk->min_flt++;
859 }
860 return 0;
861}
862EXPORT_SYMBOL_GPL(fixup_user_fault);
863
864static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
865 struct mm_struct *mm,
866 unsigned long start,
867 unsigned long nr_pages,
868 struct page **pages,
869 struct vm_area_struct **vmas,
870 int *locked,
871 unsigned int flags)
872{
873 long ret, pages_done;
874 bool lock_dropped;
875
876 if (locked) {
877
878 BUG_ON(vmas);
879
880 BUG_ON(*locked != 1);
881 }
882
883 if (pages)
884 flags |= FOLL_GET;
885
886 pages_done = 0;
887 lock_dropped = false;
888 for (;;) {
889 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
890 vmas, locked);
891 if (!locked)
892
893 return ret;
894
895
896 if (!*locked) {
897 BUG_ON(ret < 0);
898 BUG_ON(ret >= nr_pages);
899 }
900
901 if (!pages)
902
903 return ret;
904
905 if (ret > 0) {
906 nr_pages -= ret;
907 pages_done += ret;
908 if (!nr_pages)
909 break;
910 }
911 if (*locked) {
912
913
914
915
916 if (!pages_done)
917 pages_done = ret;
918 break;
919 }
920
921 pages += ret;
922 start += ret << PAGE_SHIFT;
923
924
925
926
927
928
929 *locked = 1;
930 lock_dropped = true;
931 down_read(&mm->mmap_sem);
932 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
933 pages, NULL, NULL);
934 if (ret != 1) {
935 BUG_ON(ret > 1);
936 if (!pages_done)
937 pages_done = ret;
938 break;
939 }
940 nr_pages--;
941 pages_done++;
942 if (!nr_pages)
943 break;
944 pages++;
945 start += PAGE_SIZE;
946 }
947 if (lock_dropped && *locked) {
948
949
950
951
952 up_read(&mm->mmap_sem);
953 *locked = 0;
954 }
955 return pages_done;
956}
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
980 unsigned int gup_flags, struct page **pages,
981 int *locked)
982{
983 return __get_user_pages_locked(current, current->mm, start, nr_pages,
984 pages, NULL, locked,
985 gup_flags | FOLL_TOUCH);
986}
987EXPORT_SYMBOL(get_user_pages_locked);
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
1005 struct page **pages, unsigned int gup_flags)
1006{
1007 struct mm_struct *mm = current->mm;
1008 int locked = 1;
1009 long ret;
1010
1011 down_read(&mm->mmap_sem);
1012 ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
1013 &locked, gup_flags | FOLL_TOUCH);
1014 if (locked)
1015 up_read(&mm->mmap_sem);
1016 return ret;
1017}
1018EXPORT_SYMBOL(get_user_pages_unlocked);
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
1077 unsigned long start, unsigned long nr_pages,
1078 unsigned int gup_flags, struct page **pages,
1079 struct vm_area_struct **vmas, int *locked)
1080{
1081 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1082 locked,
1083 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
1084}
1085EXPORT_SYMBOL(get_user_pages_remote);
1086
1087
1088
1089
1090
1091
1092
1093
1094long get_user_pages(unsigned long start, unsigned long nr_pages,
1095 unsigned int gup_flags, struct page **pages,
1096 struct vm_area_struct **vmas)
1097{
1098 return __get_user_pages_locked(current, current->mm, start, nr_pages,
1099 pages, vmas, NULL,
1100 gup_flags | FOLL_TOUCH);
1101}
1102EXPORT_SYMBOL(get_user_pages);
1103
1104#ifdef CONFIG_FS_DAX
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
1118 unsigned int gup_flags, struct page **pages,
1119 struct vm_area_struct **vmas_arg)
1120{
1121 struct vm_area_struct **vmas = vmas_arg;
1122 struct vm_area_struct *vma_prev = NULL;
1123 long rc, i;
1124
1125 if (!pages)
1126 return -EINVAL;
1127
1128 if (!vmas) {
1129 vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
1130 GFP_KERNEL);
1131 if (!vmas)
1132 return -ENOMEM;
1133 }
1134
1135 rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
1136
1137 for (i = 0; i < rc; i++) {
1138 struct vm_area_struct *vma = vmas[i];
1139
1140 if (vma == vma_prev)
1141 continue;
1142
1143 vma_prev = vma;
1144
1145 if (vma_is_fsdax(vma))
1146 break;
1147 }
1148
1149
1150
1151
1152
1153
1154 if (i >= rc)
1155 goto out;
1156
1157 for (i = 0; i < rc; i++)
1158 put_page(pages[i]);
1159 rc = -EOPNOTSUPP;
1160out:
1161 if (vmas != vmas_arg)
1162 kfree(vmas);
1163 return rc;
1164}
1165EXPORT_SYMBOL(get_user_pages_longterm);
1166#endif
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187long populate_vma_page_range(struct vm_area_struct *vma,
1188 unsigned long start, unsigned long end, int *nonblocking)
1189{
1190 struct mm_struct *mm = vma->vm_mm;
1191 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1192 int gup_flags;
1193
1194 VM_BUG_ON(start & ~PAGE_MASK);
1195 VM_BUG_ON(end & ~PAGE_MASK);
1196 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1197 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1198 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1199
1200 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1201 if (vma->vm_flags & VM_LOCKONFAULT)
1202 gup_flags &= ~FOLL_POPULATE;
1203
1204
1205
1206
1207
1208 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1209 gup_flags |= FOLL_WRITE;
1210
1211
1212
1213
1214
1215 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
1216 gup_flags |= FOLL_FORCE;
1217
1218
1219
1220
1221
1222 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
1223 NULL, NULL, nonblocking);
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1234{
1235 struct mm_struct *mm = current->mm;
1236 unsigned long end, nstart, nend;
1237 struct vm_area_struct *vma = NULL;
1238 int locked = 0;
1239 long ret = 0;
1240
1241 end = start + len;
1242
1243 for (nstart = start; nstart < end; nstart = nend) {
1244
1245
1246
1247
1248 if (!locked) {
1249 locked = 1;
1250 down_read(&mm->mmap_sem);
1251 vma = find_vma(mm, nstart);
1252 } else if (nstart >= vma->vm_end)
1253 vma = vma->vm_next;
1254 if (!vma || vma->vm_start >= end)
1255 break;
1256
1257
1258
1259
1260 nend = min(end, vma->vm_end);
1261 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1262 continue;
1263 if (nstart < vma->vm_start)
1264 nstart = vma->vm_start;
1265
1266
1267
1268
1269
1270 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1271 if (ret < 0) {
1272 if (ignore_errors) {
1273 ret = 0;
1274 continue;
1275 }
1276 break;
1277 }
1278 nend = nstart + ret * PAGE_SIZE;
1279 ret = 0;
1280 }
1281 if (locked)
1282 up_read(&mm->mmap_sem);
1283 return ret;
1284}
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300#ifdef CONFIG_ELF_CORE
1301struct page *get_dump_page(unsigned long addr)
1302{
1303 struct vm_area_struct *vma;
1304 struct page *page;
1305
1306 if (__get_user_pages(current, current->mm, addr, 1,
1307 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1308 NULL) < 1)
1309 return NULL;
1310 flush_cache_page(vma, addr, page_to_pfn(page));
1311 return page;
1312}
1313#endif
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348#ifdef CONFIG_HAVE_GENERIC_GUP
1349
1350#ifndef gup_get_pte
1351
1352
1353
1354
1355static inline pte_t gup_get_pte(pte_t *ptep)
1356{
1357 return READ_ONCE(*ptep);
1358}
1359#endif
1360
1361static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
1362{
1363 while ((*nr) - nr_start) {
1364 struct page *page = pages[--(*nr)];
1365
1366 ClearPageReferenced(page);
1367 put_page(page);
1368 }
1369}
1370
1371#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
1372static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1373 int write, struct page **pages, int *nr)
1374{
1375 struct dev_pagemap *pgmap = NULL;
1376 int nr_start = *nr, ret = 0;
1377 pte_t *ptep, *ptem;
1378
1379 ptem = ptep = pte_offset_map(&pmd, addr);
1380 do {
1381 pte_t pte = gup_get_pte(ptep);
1382 struct page *head, *page;
1383
1384
1385
1386
1387
1388 if (pte_protnone(pte))
1389 goto pte_unmap;
1390
1391 if (!pte_access_permitted(pte, write))
1392 goto pte_unmap;
1393
1394 if (pte_devmap(pte)) {
1395 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
1396 if (unlikely(!pgmap)) {
1397 undo_dev_pagemap(nr, nr_start, pages);
1398 goto pte_unmap;
1399 }
1400 } else if (pte_special(pte))
1401 goto pte_unmap;
1402
1403 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1404 page = pte_page(pte);
1405 head = compound_head(page);
1406
1407 if (!page_cache_get_speculative(head))
1408 goto pte_unmap;
1409
1410 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1411 put_page(head);
1412 goto pte_unmap;
1413 }
1414
1415 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1416
1417 SetPageReferenced(page);
1418 pages[*nr] = page;
1419 (*nr)++;
1420
1421 } while (ptep++, addr += PAGE_SIZE, addr != end);
1422
1423 ret = 1;
1424
1425pte_unmap:
1426 if (pgmap)
1427 put_dev_pagemap(pgmap);
1428 pte_unmap(ptem);
1429 return ret;
1430}
1431#else
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1443 int write, struct page **pages, int *nr)
1444{
1445 return 0;
1446}
1447#endif
1448
1449#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1450static int __gup_device_huge(unsigned long pfn, unsigned long addr,
1451 unsigned long end, struct page **pages, int *nr)
1452{
1453 int nr_start = *nr;
1454 struct dev_pagemap *pgmap = NULL;
1455
1456 do {
1457 struct page *page = pfn_to_page(pfn);
1458
1459 pgmap = get_dev_pagemap(pfn, pgmap);
1460 if (unlikely(!pgmap)) {
1461 undo_dev_pagemap(nr, nr_start, pages);
1462 return 0;
1463 }
1464 SetPageReferenced(page);
1465 pages[*nr] = page;
1466 get_page(page);
1467 (*nr)++;
1468 pfn++;
1469 } while (addr += PAGE_SIZE, addr != end);
1470
1471 if (pgmap)
1472 put_dev_pagemap(pgmap);
1473 return 1;
1474}
1475
1476static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1477 unsigned long end, struct page **pages, int *nr)
1478{
1479 unsigned long fault_pfn;
1480 int nr_start = *nr;
1481
1482 fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1483 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1484 return 0;
1485
1486 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1487 undo_dev_pagemap(nr, nr_start, pages);
1488 return 0;
1489 }
1490 return 1;
1491}
1492
1493static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1494 unsigned long end, struct page **pages, int *nr)
1495{
1496 unsigned long fault_pfn;
1497 int nr_start = *nr;
1498
1499 fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1500 if (!__gup_device_huge(fault_pfn, addr, end, pages, nr))
1501 return 0;
1502
1503 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1504 undo_dev_pagemap(nr, nr_start, pages);
1505 return 0;
1506 }
1507 return 1;
1508}
1509#else
1510static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1511 unsigned long end, struct page **pages, int *nr)
1512{
1513 BUILD_BUG();
1514 return 0;
1515}
1516
1517static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
1518 unsigned long end, struct page **pages, int *nr)
1519{
1520 BUILD_BUG();
1521 return 0;
1522}
1523#endif
1524
1525static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1526 unsigned long end, int write, struct page **pages, int *nr)
1527{
1528 struct page *head, *page;
1529 int refs;
1530
1531 if (!pmd_access_permitted(orig, write))
1532 return 0;
1533
1534 if (pmd_devmap(orig))
1535 return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr);
1536
1537 refs = 0;
1538 page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1539 do {
1540 pages[*nr] = page;
1541 (*nr)++;
1542 page++;
1543 refs++;
1544 } while (addr += PAGE_SIZE, addr != end);
1545
1546 head = compound_head(pmd_page(orig));
1547 if (!page_cache_add_speculative(head, refs)) {
1548 *nr -= refs;
1549 return 0;
1550 }
1551
1552 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1553 *nr -= refs;
1554 while (refs--)
1555 put_page(head);
1556 return 0;
1557 }
1558
1559 SetPageReferenced(head);
1560 return 1;
1561}
1562
1563static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1564 unsigned long end, int write, struct page **pages, int *nr)
1565{
1566 struct page *head, *page;
1567 int refs;
1568
1569 if (!pud_access_permitted(orig, write))
1570 return 0;
1571
1572 if (pud_devmap(orig))
1573 return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr);
1574
1575 refs = 0;
1576 page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1577 do {
1578 pages[*nr] = page;
1579 (*nr)++;
1580 page++;
1581 refs++;
1582 } while (addr += PAGE_SIZE, addr != end);
1583
1584 head = compound_head(pud_page(orig));
1585 if (!page_cache_add_speculative(head, refs)) {
1586 *nr -= refs;
1587 return 0;
1588 }
1589
1590 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1591 *nr -= refs;
1592 while (refs--)
1593 put_page(head);
1594 return 0;
1595 }
1596
1597 SetPageReferenced(head);
1598 return 1;
1599}
1600
1601static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
1602 unsigned long end, int write,
1603 struct page **pages, int *nr)
1604{
1605 int refs;
1606 struct page *head, *page;
1607
1608 if (!pgd_access_permitted(orig, write))
1609 return 0;
1610
1611 BUILD_BUG_ON(pgd_devmap(orig));
1612 refs = 0;
1613 page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
1614 do {
1615 pages[*nr] = page;
1616 (*nr)++;
1617 page++;
1618 refs++;
1619 } while (addr += PAGE_SIZE, addr != end);
1620
1621 head = compound_head(pgd_page(orig));
1622 if (!page_cache_add_speculative(head, refs)) {
1623 *nr -= refs;
1624 return 0;
1625 }
1626
1627 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
1628 *nr -= refs;
1629 while (refs--)
1630 put_page(head);
1631 return 0;
1632 }
1633
1634 SetPageReferenced(head);
1635 return 1;
1636}
1637
1638static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
1639 int write, struct page **pages, int *nr)
1640{
1641 unsigned long next;
1642 pmd_t *pmdp;
1643
1644 pmdp = pmd_offset(&pud, addr);
1645 do {
1646 pmd_t pmd = READ_ONCE(*pmdp);
1647
1648 next = pmd_addr_end(addr, end);
1649 if (!pmd_present(pmd))
1650 return 0;
1651
1652 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
1653
1654
1655
1656
1657
1658 if (pmd_protnone(pmd))
1659 return 0;
1660
1661 if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
1662 pages, nr))
1663 return 0;
1664
1665 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
1666
1667
1668
1669
1670 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
1671 PMD_SHIFT, next, write, pages, nr))
1672 return 0;
1673 } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
1674 return 0;
1675 } while (pmdp++, addr = next, addr != end);
1676
1677 return 1;
1678}
1679
1680static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
1681 int write, struct page **pages, int *nr)
1682{
1683 unsigned long next;
1684 pud_t *pudp;
1685
1686 pudp = pud_offset(&p4d, addr);
1687 do {
1688 pud_t pud = READ_ONCE(*pudp);
1689
1690 next = pud_addr_end(addr, end);
1691 if (pud_none(pud))
1692 return 0;
1693 if (unlikely(pud_huge(pud))) {
1694 if (!gup_huge_pud(pud, pudp, addr, next, write,
1695 pages, nr))
1696 return 0;
1697 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
1698 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
1699 PUD_SHIFT, next, write, pages, nr))
1700 return 0;
1701 } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
1702 return 0;
1703 } while (pudp++, addr = next, addr != end);
1704
1705 return 1;
1706}
1707
1708static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1709 int write, struct page **pages, int *nr)
1710{
1711 unsigned long next;
1712 p4d_t *p4dp;
1713
1714 p4dp = p4d_offset(&pgd, addr);
1715 do {
1716 p4d_t p4d = READ_ONCE(*p4dp);
1717
1718 next = p4d_addr_end(addr, end);
1719 if (p4d_none(p4d))
1720 return 0;
1721 BUILD_BUG_ON(p4d_huge(p4d));
1722 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
1723 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
1724 P4D_SHIFT, next, write, pages, nr))
1725 return 0;
1726 } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
1727 return 0;
1728 } while (p4dp++, addr = next, addr != end);
1729
1730 return 1;
1731}
1732
1733static void gup_pgd_range(unsigned long addr, unsigned long end,
1734 int write, struct page **pages, int *nr)
1735{
1736 unsigned long next;
1737 pgd_t *pgdp;
1738
1739 pgdp = pgd_offset(current->mm, addr);
1740 do {
1741 pgd_t pgd = READ_ONCE(*pgdp);
1742
1743 next = pgd_addr_end(addr, end);
1744 if (pgd_none(pgd))
1745 return;
1746 if (unlikely(pgd_huge(pgd))) {
1747 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1748 pages, nr))
1749 return;
1750 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1751 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1752 PGDIR_SHIFT, next, write, pages, nr))
1753 return;
1754 } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
1755 return;
1756 } while (pgdp++, addr = next, addr != end);
1757}
1758
1759#ifndef gup_fast_permitted
1760
1761
1762
1763
1764bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1765{
1766 unsigned long len, end;
1767
1768 len = (unsigned long) nr_pages << PAGE_SHIFT;
1769 end = start + len;
1770 return end >= start;
1771}
1772#endif
1773
1774
1775
1776
1777
1778
1779
1780int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1781 struct page **pages)
1782{
1783 unsigned long addr, len, end;
1784 unsigned long flags;
1785 int nr = 0;
1786
1787 start &= PAGE_MASK;
1788 addr = start;
1789 len = (unsigned long) nr_pages << PAGE_SHIFT;
1790 end = start + len;
1791
1792 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1793 (void __user *)start, len)))
1794 return 0;
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808 if (gup_fast_permitted(start, nr_pages, write)) {
1809 local_irq_save(flags);
1810 gup_pgd_range(addr, end, write, pages, &nr);
1811 local_irq_restore(flags);
1812 }
1813
1814 return nr;
1815}
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1834 struct page **pages)
1835{
1836 unsigned long addr, len, end;
1837 int nr = 0, ret = 0;
1838
1839 start &= PAGE_MASK;
1840 addr = start;
1841 len = (unsigned long) nr_pages << PAGE_SHIFT;
1842 end = start + len;
1843
1844 if (nr_pages <= 0)
1845 return 0;
1846
1847 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1848 (void __user *)start, len)))
1849 return -EFAULT;
1850
1851 if (gup_fast_permitted(start, nr_pages, write)) {
1852 local_irq_disable();
1853 gup_pgd_range(addr, end, write, pages, &nr);
1854 local_irq_enable();
1855 ret = nr;
1856 }
1857
1858 if (nr < nr_pages) {
1859
1860 start += nr << PAGE_SHIFT;
1861 pages += nr;
1862
1863 ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
1864 write ? FOLL_WRITE : 0);
1865
1866
1867 if (nr > 0) {
1868 if (ret < 0)
1869 ret = nr;
1870 else
1871 ret += nr;
1872 }
1873 }
1874
1875 return ret;
1876}
1877
1878#endif
1879