1#include <linux/kernel.h>
2#include <linux/errno.h>
3#include <linux/err.h>
4#include <linux/spinlock.h>
5
6#include <linux/mm.h>
7#include <linux/memremap.h>
8#include <linux/pagemap.h>
9#include <linux/rmap.h>
10#include <linux/swap.h>
11#include <linux/swapops.h>
12
13#include <linux/sched/signal.h>
14#include <linux/rwsem.h>
15#include <linux/hugetlb.h>
16
17#include <asm/mmu_context.h>
18#include <asm/pgtable.h>
19#include <asm/tlbflush.h>
20
21#include "internal.h"
22
23static struct page *no_page_table(struct vm_area_struct *vma,
24 unsigned int flags)
25{
26
27
28
29
30
31
32
33
34 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
35 return ERR_PTR(-EFAULT);
36 return NULL;
37}
38
39static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
40 pte_t *pte, unsigned int flags)
41{
42
43 if (flags & FOLL_GET)
44 return -EFAULT;
45
46 if (flags & FOLL_TOUCH) {
47 pte_t entry = *pte;
48
49 if (flags & FOLL_WRITE)
50 entry = pte_mkdirty(entry);
51 entry = pte_mkyoung(entry);
52
53 if (!pte_same(*pte, entry)) {
54 set_pte_at(vma->vm_mm, address, pte, entry);
55 update_mmu_cache(vma, address, pte);
56 }
57 }
58
59
60 return -EEXIST;
61}
62
63
64
65
66
67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
68{
69 return pte_write(pte) ||
70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
71}
72
73static struct page *follow_page_pte(struct vm_area_struct *vma,
74 unsigned long address, pmd_t *pmd, unsigned int flags)
75{
76 struct mm_struct *mm = vma->vm_mm;
77 struct dev_pagemap *pgmap = NULL;
78 struct page *page;
79 spinlock_t *ptl;
80 pte_t *ptep, pte;
81
82retry:
83 if (unlikely(pmd_bad(*pmd)))
84 return no_page_table(vma, flags);
85
86 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
87 pte = *ptep;
88 if (!pte_present(pte)) {
89 swp_entry_t entry;
90
91
92
93
94
95 if (likely(!(flags & FOLL_MIGRATION)))
96 goto no_page;
97 if (pte_none(pte))
98 goto no_page;
99 entry = pte_to_swp_entry(pte);
100 if (!is_migration_entry(entry))
101 goto no_page;
102 pte_unmap_unlock(ptep, ptl);
103 migration_entry_wait(mm, pmd, address);
104 goto retry;
105 }
106 if ((flags & FOLL_NUMA) && pte_protnone(pte))
107 goto no_page;
108 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
109 pte_unmap_unlock(ptep, ptl);
110 return NULL;
111 }
112
113 page = vm_normal_page(vma, address, pte);
114 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
115
116
117
118
119 pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
120 if (pgmap)
121 page = pte_page(pte);
122 else
123 goto no_page;
124 } else if (unlikely(!page)) {
125 if (flags & FOLL_DUMP) {
126
127 page = ERR_PTR(-EFAULT);
128 goto out;
129 }
130
131 if (is_zero_pfn(pte_pfn(pte))) {
132 page = pte_page(pte);
133 } else {
134 int ret;
135
136 ret = follow_pfn_pte(vma, address, ptep, flags);
137 page = ERR_PTR(ret);
138 goto out;
139 }
140 }
141
142 if (flags & FOLL_SPLIT && PageTransCompound(page)) {
143 int ret;
144 get_page(page);
145 pte_unmap_unlock(ptep, ptl);
146 lock_page(page);
147 ret = split_huge_page(page);
148 unlock_page(page);
149 put_page(page);
150 if (ret)
151 return ERR_PTR(ret);
152 goto retry;
153 }
154
155 if (flags & FOLL_GET) {
156 get_page(page);
157
158
159 if (pgmap) {
160 put_dev_pagemap(pgmap);
161 pgmap = NULL;
162 }
163 }
164 if (flags & FOLL_TOUCH) {
165 if ((flags & FOLL_WRITE) &&
166 !pte_dirty(pte) && !PageDirty(page))
167 set_page_dirty(page);
168
169
170
171
172
173 mark_page_accessed(page);
174 }
175 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
176
177 if (PageTransCompound(page))
178 goto out;
179
180
181
182
183
184
185
186
187
188
189 if (page->mapping && trylock_page(page)) {
190 lru_add_drain();
191
192
193
194
195
196
197 mlock_vma_page(page);
198 unlock_page(page);
199 }
200 }
201out:
202 pte_unmap_unlock(ptep, ptl);
203 return page;
204no_page:
205 pte_unmap_unlock(ptep, ptl);
206 if (!pte_none(pte))
207 return NULL;
208 return no_page_table(vma, flags);
209}
210
211static struct page *follow_pmd_mask(struct vm_area_struct *vma,
212 unsigned long address, pud_t *pudp,
213 unsigned int flags, unsigned int *page_mask)
214{
215 pmd_t *pmd;
216 spinlock_t *ptl;
217 struct page *page;
218 struct mm_struct *mm = vma->vm_mm;
219
220 pmd = pmd_offset(pudp, address);
221 if (pmd_none(*pmd))
222 return no_page_table(vma, flags);
223 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
224 page = follow_huge_pmd(mm, address, pmd, flags);
225 if (page)
226 return page;
227 return no_page_table(vma, flags);
228 }
229 if (is_hugepd(__hugepd(pmd_val(*pmd)))) {
230 page = follow_huge_pd(vma, address,
231 __hugepd(pmd_val(*pmd)), flags,
232 PMD_SHIFT);
233 if (page)
234 return page;
235 return no_page_table(vma, flags);
236 }
237retry:
238 if (!pmd_present(*pmd)) {
239 if (likely(!(flags & FOLL_MIGRATION)))
240 return no_page_table(vma, flags);
241 VM_BUG_ON(thp_migration_supported() &&
242 !is_pmd_migration_entry(*pmd));
243 if (is_pmd_migration_entry(*pmd))
244 pmd_migration_entry_wait(mm, pmd);
245 goto retry;
246 }
247 if (pmd_devmap(*pmd)) {
248 ptl = pmd_lock(mm, pmd);
249 page = follow_devmap_pmd(vma, address, pmd, flags);
250 spin_unlock(ptl);
251 if (page)
252 return page;
253 }
254 if (likely(!pmd_trans_huge(*pmd)))
255 return follow_page_pte(vma, address, pmd, flags);
256
257 if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
258 return no_page_table(vma, flags);
259
260retry_locked:
261 ptl = pmd_lock(mm, pmd);
262 if (unlikely(!pmd_present(*pmd))) {
263 spin_unlock(ptl);
264 if (likely(!(flags & FOLL_MIGRATION)))
265 return no_page_table(vma, flags);
266 pmd_migration_entry_wait(mm, pmd);
267 goto retry_locked;
268 }
269 if (unlikely(!pmd_trans_huge(*pmd))) {
270 spin_unlock(ptl);
271 return follow_page_pte(vma, address, pmd, flags);
272 }
273 if (flags & FOLL_SPLIT) {
274 int ret;
275 page = pmd_page(*pmd);
276 if (is_huge_zero_page(page)) {
277 spin_unlock(ptl);
278 ret = 0;
279 split_huge_pmd(vma, pmd, address);
280 if (pmd_trans_unstable(pmd))
281 ret = -EBUSY;
282 } else {
283 get_page(page);
284 spin_unlock(ptl);
285 lock_page(page);
286 ret = split_huge_page(page);
287 unlock_page(page);
288 put_page(page);
289 if (pmd_none(*pmd))
290 return no_page_table(vma, flags);
291 }
292
293 return ret ? ERR_PTR(ret) :
294 follow_page_pte(vma, address, pmd, flags);
295 }
296 page = follow_trans_huge_pmd(vma, address, pmd, flags);
297 spin_unlock(ptl);
298 *page_mask = HPAGE_PMD_NR - 1;
299 return page;
300}
301
302
303static struct page *follow_pud_mask(struct vm_area_struct *vma,
304 unsigned long address, p4d_t *p4dp,
305 unsigned int flags, unsigned int *page_mask)
306{
307 pud_t *pud;
308 spinlock_t *ptl;
309 struct page *page;
310 struct mm_struct *mm = vma->vm_mm;
311
312 pud = pud_offset(p4dp, address);
313 if (pud_none(*pud))
314 return no_page_table(vma, flags);
315 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
316 page = follow_huge_pud(mm, address, pud, flags);
317 if (page)
318 return page;
319 return no_page_table(vma, flags);
320 }
321 if (is_hugepd(__hugepd(pud_val(*pud)))) {
322 page = follow_huge_pd(vma, address,
323 __hugepd(pud_val(*pud)), flags,
324 PUD_SHIFT);
325 if (page)
326 return page;
327 return no_page_table(vma, flags);
328 }
329 if (pud_devmap(*pud)) {
330 ptl = pud_lock(mm, pud);
331 page = follow_devmap_pud(vma, address, pud, flags);
332 spin_unlock(ptl);
333 if (page)
334 return page;
335 }
336 if (unlikely(pud_bad(*pud)))
337 return no_page_table(vma, flags);
338
339 return follow_pmd_mask(vma, address, pud, flags, page_mask);
340}
341
342
343static struct page *follow_p4d_mask(struct vm_area_struct *vma,
344 unsigned long address, pgd_t *pgdp,
345 unsigned int flags, unsigned int *page_mask)
346{
347 p4d_t *p4d;
348 struct page *page;
349
350 p4d = p4d_offset(pgdp, address);
351 if (p4d_none(*p4d))
352 return no_page_table(vma, flags);
353 BUILD_BUG_ON(p4d_huge(*p4d));
354 if (unlikely(p4d_bad(*p4d)))
355 return no_page_table(vma, flags);
356
357 if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
358 page = follow_huge_pd(vma, address,
359 __hugepd(p4d_val(*p4d)), flags,
360 P4D_SHIFT);
361 if (page)
362 return page;
363 return no_page_table(vma, flags);
364 }
365 return follow_pud_mask(vma, address, p4d, flags, page_mask);
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381struct page *follow_page_mask(struct vm_area_struct *vma,
382 unsigned long address, unsigned int flags,
383 unsigned int *page_mask)
384{
385 pgd_t *pgd;
386 struct page *page;
387 struct mm_struct *mm = vma->vm_mm;
388
389 *page_mask = 0;
390
391
392 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
393 if (!IS_ERR(page)) {
394 BUG_ON(flags & FOLL_GET);
395 return page;
396 }
397
398 pgd = pgd_offset(mm, address);
399
400 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
401 return no_page_table(vma, flags);
402
403 if (pgd_huge(*pgd)) {
404 page = follow_huge_pgd(mm, address, pgd, flags);
405 if (page)
406 return page;
407 return no_page_table(vma, flags);
408 }
409 if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
410 page = follow_huge_pd(vma, address,
411 __hugepd(pgd_val(*pgd)), flags,
412 PGDIR_SHIFT);
413 if (page)
414 return page;
415 return no_page_table(vma, flags);
416 }
417
418 return follow_p4d_mask(vma, address, pgd, flags, page_mask);
419}
420
421static int get_gate_page(struct mm_struct *mm, unsigned long address,
422 unsigned int gup_flags, struct vm_area_struct **vma,
423 struct page **page)
424{
425 pgd_t *pgd;
426 p4d_t *p4d;
427 pud_t *pud;
428 pmd_t *pmd;
429 pte_t *pte;
430 int ret = -EFAULT;
431
432
433 if (gup_flags & FOLL_WRITE)
434 return -EFAULT;
435 if (address > TASK_SIZE)
436 pgd = pgd_offset_k(address);
437 else
438 pgd = pgd_offset_gate(mm, address);
439 BUG_ON(pgd_none(*pgd));
440 p4d = p4d_offset(pgd, address);
441 BUG_ON(p4d_none(*p4d));
442 pud = pud_offset(p4d, address);
443 BUG_ON(pud_none(*pud));
444 pmd = pmd_offset(pud, address);
445 if (!pmd_present(*pmd))
446 return -EFAULT;
447 VM_BUG_ON(pmd_trans_huge(*pmd));
448 pte = pte_offset_map(pmd, address);
449 if (pte_none(*pte))
450 goto unmap;
451 *vma = get_gate_vma(mm);
452 if (!page)
453 goto out;
454 *page = vm_normal_page(*vma, address, *pte);
455 if (!*page) {
456 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
457 goto unmap;
458 *page = pte_page(*pte);
459
460
461
462
463
464 if (is_device_public_page(*page))
465 goto unmap;
466 }
467 get_page(*page);
468out:
469 ret = 0;
470unmap:
471 pte_unmap(pte);
472 return ret;
473}
474
475
476
477
478
479
480static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
481 unsigned long address, unsigned int *flags, int *nonblocking)
482{
483 unsigned int fault_flags = 0;
484 int ret;
485
486
487 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
488 return -ENOENT;
489 if (*flags & FOLL_WRITE)
490 fault_flags |= FAULT_FLAG_WRITE;
491 if (*flags & FOLL_REMOTE)
492 fault_flags |= FAULT_FLAG_REMOTE;
493 if (nonblocking)
494 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
495 if (*flags & FOLL_NOWAIT)
496 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
497 if (*flags & FOLL_TRIED) {
498 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
499 fault_flags |= FAULT_FLAG_TRIED;
500 }
501
502 ret = handle_mm_fault(vma, address, fault_flags);
503 if (ret & VM_FAULT_ERROR) {
504 int err = vm_fault_to_errno(ret, *flags);
505
506 if (err)
507 return err;
508 BUG();
509 }
510
511 if (tsk) {
512 if (ret & VM_FAULT_MAJOR)
513 tsk->maj_flt++;
514 else
515 tsk->min_flt++;
516 }
517
518 if (ret & VM_FAULT_RETRY) {
519 if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
520 *nonblocking = 0;
521 return -EBUSY;
522 }
523
524
525
526
527
528
529
530
531
532
533 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
534 *flags |= FOLL_COW;
535 return 0;
536}
537
538static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
539{
540 vm_flags_t vm_flags = vma->vm_flags;
541 int write = (gup_flags & FOLL_WRITE);
542 int foreign = (gup_flags & FOLL_REMOTE);
543
544 if (vm_flags & (VM_IO | VM_PFNMAP))
545 return -EFAULT;
546
547 if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
548 return -EFAULT;
549
550 if (write) {
551 if (!(vm_flags & VM_WRITE)) {
552 if (!(gup_flags & FOLL_FORCE))
553 return -EFAULT;
554
555
556
557
558
559
560
561
562
563 if (!is_cow_mapping(vm_flags))
564 return -EFAULT;
565 }
566 } else if (!(vm_flags & VM_READ)) {
567 if (!(gup_flags & FOLL_FORCE))
568 return -EFAULT;
569
570
571
572
573 if (!(vm_flags & VM_MAYREAD))
574 return -EFAULT;
575 }
576
577
578
579
580 if (!arch_vma_access_permitted(vma, write, false, foreign))
581 return -EFAULT;
582 return 0;
583}
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
642 unsigned long start, unsigned long nr_pages,
643 unsigned int gup_flags, struct page **pages,
644 struct vm_area_struct **vmas, int *nonblocking)
645{
646 long i = 0;
647 unsigned int page_mask;
648 struct vm_area_struct *vma = NULL;
649
650 if (!nr_pages)
651 return 0;
652
653 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
654
655
656
657
658
659
660 if (!(gup_flags & FOLL_FORCE))
661 gup_flags |= FOLL_NUMA;
662
663 do {
664 struct page *page;
665 unsigned int foll_flags = gup_flags;
666 unsigned int page_increm;
667
668
669 if (!vma || start >= vma->vm_end) {
670 vma = find_extend_vma(mm, start);
671 if (!vma && in_gate_area(mm, start)) {
672 int ret;
673 ret = get_gate_page(mm, start & PAGE_MASK,
674 gup_flags, &vma,
675 pages ? &pages[i] : NULL);
676 if (ret)
677 return i ? : ret;
678 page_mask = 0;
679 goto next_page;
680 }
681
682 if (!vma || check_vma_flags(vma, gup_flags))
683 return i ? : -EFAULT;
684 if (is_vm_hugetlb_page(vma)) {
685 i = follow_hugetlb_page(mm, vma, pages, vmas,
686 &start, &nr_pages, i,
687 gup_flags, nonblocking);
688 continue;
689 }
690 }
691retry:
692
693
694
695
696 if (unlikely(fatal_signal_pending(current)))
697 return i ? i : -ERESTARTSYS;
698 cond_resched();
699 page = follow_page_mask(vma, start, foll_flags, &page_mask);
700 if (!page) {
701 int ret;
702 ret = faultin_page(tsk, vma, start, &foll_flags,
703 nonblocking);
704 switch (ret) {
705 case 0:
706 goto retry;
707 case -EFAULT:
708 case -ENOMEM:
709 case -EHWPOISON:
710 return i ? i : ret;
711 case -EBUSY:
712 return i;
713 case -ENOENT:
714 goto next_page;
715 }
716 BUG();
717 } else if (PTR_ERR(page) == -EEXIST) {
718
719
720
721
722 goto next_page;
723 } else if (IS_ERR(page)) {
724 return i ? i : PTR_ERR(page);
725 }
726 if (pages) {
727 pages[i] = page;
728 flush_anon_page(vma, page, start);
729 flush_dcache_page(page);
730 page_mask = 0;
731 }
732next_page:
733 if (vmas) {
734 vmas[i] = vma;
735 page_mask = 0;
736 }
737 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
738 if (page_increm > nr_pages)
739 page_increm = nr_pages;
740 i += page_increm;
741 start += page_increm * PAGE_SIZE;
742 nr_pages -= page_increm;
743 } while (nr_pages);
744 return i;
745}
746
747static bool vma_permits_fault(struct vm_area_struct *vma,
748 unsigned int fault_flags)
749{
750 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
751 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
752 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
753
754 if (!(vm_flags & vma->vm_flags))
755 return false;
756
757
758
759
760
761
762
763
764 if (!arch_vma_access_permitted(vma, write, false, foreign))
765 return false;
766
767 return true;
768}
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
801 unsigned long address, unsigned int fault_flags,
802 bool *unlocked)
803{
804 struct vm_area_struct *vma;
805 int ret, major = 0;
806
807 if (unlocked)
808 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
809
810retry:
811 vma = find_extend_vma(mm, address);
812 if (!vma || address < vma->vm_start)
813 return -EFAULT;
814
815 if (!vma_permits_fault(vma, fault_flags))
816 return -EFAULT;
817
818 ret = handle_mm_fault(vma, address, fault_flags);
819 major |= ret & VM_FAULT_MAJOR;
820 if (ret & VM_FAULT_ERROR) {
821 int err = vm_fault_to_errno(ret, 0);
822
823 if (err)
824 return err;
825 BUG();
826 }
827
828 if (ret & VM_FAULT_RETRY) {
829 down_read(&mm->mmap_sem);
830 if (!(fault_flags & FAULT_FLAG_TRIED)) {
831 *unlocked = true;
832 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
833 fault_flags |= FAULT_FLAG_TRIED;
834 goto retry;
835 }
836 }
837
838 if (tsk) {
839 if (major)
840 tsk->maj_flt++;
841 else
842 tsk->min_flt++;
843 }
844 return 0;
845}
846EXPORT_SYMBOL_GPL(fixup_user_fault);
847
848static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
849 struct mm_struct *mm,
850 unsigned long start,
851 unsigned long nr_pages,
852 struct page **pages,
853 struct vm_area_struct **vmas,
854 int *locked,
855 unsigned int flags)
856{
857 long ret, pages_done;
858 bool lock_dropped;
859
860 if (locked) {
861
862 BUG_ON(vmas);
863
864 BUG_ON(*locked != 1);
865 }
866
867 if (pages)
868 flags |= FOLL_GET;
869
870 pages_done = 0;
871 lock_dropped = false;
872 for (;;) {
873 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
874 vmas, locked);
875 if (!locked)
876
877 return ret;
878
879
880 if (!*locked) {
881 BUG_ON(ret < 0);
882 BUG_ON(ret >= nr_pages);
883 }
884
885 if (!pages)
886
887 return ret;
888
889 if (ret > 0) {
890 nr_pages -= ret;
891 pages_done += ret;
892 if (!nr_pages)
893 break;
894 }
895 if (*locked) {
896
897
898
899
900 if (!pages_done)
901 pages_done = ret;
902 break;
903 }
904
905 pages += ret;
906 start += ret << PAGE_SHIFT;
907
908
909
910
911
912
913 *locked = 1;
914 lock_dropped = true;
915 down_read(&mm->mmap_sem);
916 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
917 pages, NULL, NULL);
918 if (ret != 1) {
919 BUG_ON(ret > 1);
920 if (!pages_done)
921 pages_done = ret;
922 break;
923 }
924 nr_pages--;
925 pages_done++;
926 if (!nr_pages)
927 break;
928 pages++;
929 start += PAGE_SIZE;
930 }
931 if (lock_dropped && *locked) {
932
933
934
935
936 up_read(&mm->mmap_sem);
937 *locked = 0;
938 }
939 return pages_done;
940}
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
964 unsigned int gup_flags, struct page **pages,
965 int *locked)
966{
967 return __get_user_pages_locked(current, current->mm, start, nr_pages,
968 pages, NULL, locked,
969 gup_flags | FOLL_TOUCH);
970}
971EXPORT_SYMBOL(get_user_pages_locked);
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
989 struct page **pages, unsigned int gup_flags)
990{
991 struct mm_struct *mm = current->mm;
992 int locked = 1;
993 long ret;
994
995 down_read(&mm->mmap_sem);
996 ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
997 &locked, gup_flags | FOLL_TOUCH);
998 if (locked)
999 up_read(&mm->mmap_sem);
1000 return ret;
1001}
1002EXPORT_SYMBOL(get_user_pages_unlocked);
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
1061 unsigned long start, unsigned long nr_pages,
1062 unsigned int gup_flags, struct page **pages,
1063 struct vm_area_struct **vmas, int *locked)
1064{
1065 return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
1066 locked,
1067 gup_flags | FOLL_TOUCH | FOLL_REMOTE);
1068}
1069EXPORT_SYMBOL(get_user_pages_remote);
1070
1071
1072
1073
1074
1075
1076
1077
1078long get_user_pages(unsigned long start, unsigned long nr_pages,
1079 unsigned int gup_flags, struct page **pages,
1080 struct vm_area_struct **vmas)
1081{
1082 return __get_user_pages_locked(current, current->mm, start, nr_pages,
1083 pages, vmas, NULL,
1084 gup_flags | FOLL_TOUCH);
1085}
1086EXPORT_SYMBOL(get_user_pages);
1087
1088#ifdef CONFIG_FS_DAX
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
1102 unsigned int gup_flags, struct page **pages,
1103 struct vm_area_struct **vmas_arg)
1104{
1105 struct vm_area_struct **vmas = vmas_arg;
1106 struct vm_area_struct *vma_prev = NULL;
1107 long rc, i;
1108
1109 if (!pages)
1110 return -EINVAL;
1111
1112 if (!vmas) {
1113 vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
1114 GFP_KERNEL);
1115 if (!vmas)
1116 return -ENOMEM;
1117 }
1118
1119 rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
1120
1121 for (i = 0; i < rc; i++) {
1122 struct vm_area_struct *vma = vmas[i];
1123
1124 if (vma == vma_prev)
1125 continue;
1126
1127 vma_prev = vma;
1128
1129 if (vma_is_fsdax(vma))
1130 break;
1131 }
1132
1133
1134
1135
1136
1137
1138 if (i >= rc)
1139 goto out;
1140
1141 for (i = 0; i < rc; i++)
1142 put_page(pages[i]);
1143 rc = -EOPNOTSUPP;
1144out:
1145 if (vmas != vmas_arg)
1146 kfree(vmas);
1147 return rc;
1148}
1149EXPORT_SYMBOL(get_user_pages_longterm);
1150#endif
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171long populate_vma_page_range(struct vm_area_struct *vma,
1172 unsigned long start, unsigned long end, int *nonblocking)
1173{
1174 struct mm_struct *mm = vma->vm_mm;
1175 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1176 int gup_flags;
1177
1178 VM_BUG_ON(start & ~PAGE_MASK);
1179 VM_BUG_ON(end & ~PAGE_MASK);
1180 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1181 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1182 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1183
1184 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1185 if (vma->vm_flags & VM_LOCKONFAULT)
1186 gup_flags &= ~FOLL_POPULATE;
1187
1188
1189
1190
1191
1192 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1193 gup_flags |= FOLL_WRITE;
1194
1195
1196
1197
1198
1199 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
1200 gup_flags |= FOLL_FORCE;
1201
1202
1203
1204
1205
1206 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
1207 NULL, NULL, nonblocking);
1208}
1209
1210
1211
1212
1213
1214
1215
1216
1217int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1218{
1219 struct mm_struct *mm = current->mm;
1220 unsigned long end, nstart, nend;
1221 struct vm_area_struct *vma = NULL;
1222 int locked = 0;
1223 long ret = 0;
1224
1225 VM_BUG_ON(start & ~PAGE_MASK);
1226 VM_BUG_ON(len != PAGE_ALIGN(len));
1227 end = start + len;
1228
1229 for (nstart = start; nstart < end; nstart = nend) {
1230
1231
1232
1233
1234 if (!locked) {
1235 locked = 1;
1236 down_read(&mm->mmap_sem);
1237 vma = find_vma(mm, nstart);
1238 } else if (nstart >= vma->vm_end)
1239 vma = vma->vm_next;
1240 if (!vma || vma->vm_start >= end)
1241 break;
1242
1243
1244
1245
1246 nend = min(end, vma->vm_end);
1247 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1248 continue;
1249 if (nstart < vma->vm_start)
1250 nstart = vma->vm_start;
1251
1252
1253
1254
1255
1256 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1257 if (ret < 0) {
1258 if (ignore_errors) {
1259 ret = 0;
1260 continue;
1261 }
1262 break;
1263 }
1264 nend = nstart + ret * PAGE_SIZE;
1265 ret = 0;
1266 }
1267 if (locked)
1268 up_read(&mm->mmap_sem);
1269 return ret;
1270}
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286#ifdef CONFIG_ELF_CORE
1287struct page *get_dump_page(unsigned long addr)
1288{
1289 struct vm_area_struct *vma;
1290 struct page *page;
1291
1292 if (__get_user_pages(current, current->mm, addr, 1,
1293 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1294 NULL) < 1)
1295 return NULL;
1296 flush_cache_page(vma, addr, page_to_pfn(page));
1297 return page;
1298}
1299#endif
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334#ifdef CONFIG_HAVE_GENERIC_GUP
1335
1336#ifndef gup_get_pte
1337
1338
1339
1340
1341static inline pte_t gup_get_pte(pte_t *ptep)
1342{
1343 return READ_ONCE(*ptep);
1344}
1345#endif
1346
1347static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
1348{
1349 while ((*nr) - nr_start) {
1350 struct page *page = pages[--(*nr)];
1351
1352 ClearPageReferenced(page);
1353 put_page(page);
1354 }
1355}
1356
1357#ifdef __HAVE_ARCH_PTE_SPECIAL
1358static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1359 int write, struct page **pages, int *nr)
1360{
1361 struct dev_pagemap *pgmap = NULL;
1362 int nr_start = *nr, ret = 0;
1363 pte_t *ptep, *ptem;
1364
1365 ptem = ptep = pte_offset_map(&pmd, addr);
1366 do {
1367 pte_t pte = gup_get_pte(ptep);
1368 struct page *head, *page;
1369
1370
1371
1372
1373
1374 if (pte_protnone(pte))
1375 goto pte_unmap;
1376
1377 if (!pte_access_permitted(pte, write))
1378 goto pte_unmap;
1379
1380 if (pte_devmap(pte)) {
1381 pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
1382 if (unlikely(!pgmap)) {
1383 undo_dev_pagemap(nr, nr_start, pages);
1384 goto pte_unmap;
1385 }
1386 } else if (pte_special(pte))
1387 goto pte_unmap;
1388
1389 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1390 page = pte_page(pte);
1391 head = compound_head(page);
1392
1393 if (!page_cache_get_speculative(head))
1394 goto pte_unmap;
1395
1396 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1397 put_page(head);
1398 goto pte_unmap;
1399 }
1400
1401 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1402
1403 SetPageReferenced(page);
1404 pages[*nr] = page;
1405 (*nr)++;
1406
1407 } while (ptep++, addr += PAGE_SIZE, addr != end);
1408
1409 ret = 1;
1410
1411pte_unmap:
1412 if (pgmap)
1413 put_dev_pagemap(pgmap);
1414 pte_unmap(ptem);
1415 return ret;
1416}
1417#else
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1429 int write, struct page **pages, int *nr)
1430{
1431 return 0;
1432}
1433#endif
1434
1435#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
1436static int __gup_device_huge(unsigned long pfn, unsigned long addr,
1437 unsigned long end, struct page **pages, int *nr)
1438{
1439 int nr_start = *nr;
1440 struct dev_pagemap *pgmap = NULL;
1441
1442 do {
1443 struct page *page = pfn_to_page(pfn);
1444
1445 pgmap = get_dev_pagemap(pfn, pgmap);
1446 if (unlikely(!pgmap)) {
1447 undo_dev_pagemap(nr, nr_start, pages);
1448 return 0;
1449 }
1450 SetPageReferenced(page);
1451 pages[*nr] = page;
1452 get_page(page);
1453 (*nr)++;
1454 pfn++;
1455 } while (addr += PAGE_SIZE, addr != end);
1456
1457 if (pgmap)
1458 put_dev_pagemap(pgmap);
1459 return 1;
1460}
1461
1462static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
1463 unsigned long end, struct page **pages, int *nr)
1464{
1465 unsigned long fault_pfn;
1466
1467 fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1468 return __gup_device_huge(fault_pfn, addr, end, pages, nr);
1469}
1470
1471static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
1472 unsigned long end, struct page **pages, int *nr)
1473{
1474 unsigned long fault_pfn;
1475
1476 fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1477 return __gup_device_huge(fault_pfn, addr, end, pages, nr);
1478}
1479#else
1480static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
1481 unsigned long end, struct page **pages, int *nr)
1482{
1483 BUILD_BUG();
1484 return 0;
1485}
1486
1487static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
1488 unsigned long end, struct page **pages, int *nr)
1489{
1490 BUILD_BUG();
1491 return 0;
1492}
1493#endif
1494
1495static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1496 unsigned long end, int write, struct page **pages, int *nr)
1497{
1498 struct page *head, *page;
1499 int refs;
1500
1501 if (!pmd_access_permitted(orig, write))
1502 return 0;
1503
1504 if (pmd_devmap(orig))
1505 return __gup_device_huge_pmd(orig, addr, end, pages, nr);
1506
1507 refs = 0;
1508 page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1509 do {
1510 pages[*nr] = page;
1511 (*nr)++;
1512 page++;
1513 refs++;
1514 } while (addr += PAGE_SIZE, addr != end);
1515
1516 head = compound_head(pmd_page(orig));
1517 if (!page_cache_add_speculative(head, refs)) {
1518 *nr -= refs;
1519 return 0;
1520 }
1521
1522 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1523 *nr -= refs;
1524 while (refs--)
1525 put_page(head);
1526 return 0;
1527 }
1528
1529 SetPageReferenced(head);
1530 return 1;
1531}
1532
1533static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1534 unsigned long end, int write, struct page **pages, int *nr)
1535{
1536 struct page *head, *page;
1537 int refs;
1538
1539 if (!pud_access_permitted(orig, write))
1540 return 0;
1541
1542 if (pud_devmap(orig))
1543 return __gup_device_huge_pud(orig, addr, end, pages, nr);
1544
1545 refs = 0;
1546 page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1547 do {
1548 pages[*nr] = page;
1549 (*nr)++;
1550 page++;
1551 refs++;
1552 } while (addr += PAGE_SIZE, addr != end);
1553
1554 head = compound_head(pud_page(orig));
1555 if (!page_cache_add_speculative(head, refs)) {
1556 *nr -= refs;
1557 return 0;
1558 }
1559
1560 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1561 *nr -= refs;
1562 while (refs--)
1563 put_page(head);
1564 return 0;
1565 }
1566
1567 SetPageReferenced(head);
1568 return 1;
1569}
1570
1571static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
1572 unsigned long end, int write,
1573 struct page **pages, int *nr)
1574{
1575 int refs;
1576 struct page *head, *page;
1577
1578 if (!pgd_access_permitted(orig, write))
1579 return 0;
1580
1581 BUILD_BUG_ON(pgd_devmap(orig));
1582 refs = 0;
1583 page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
1584 do {
1585 pages[*nr] = page;
1586 (*nr)++;
1587 page++;
1588 refs++;
1589 } while (addr += PAGE_SIZE, addr != end);
1590
1591 head = compound_head(pgd_page(orig));
1592 if (!page_cache_add_speculative(head, refs)) {
1593 *nr -= refs;
1594 return 0;
1595 }
1596
1597 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
1598 *nr -= refs;
1599 while (refs--)
1600 put_page(head);
1601 return 0;
1602 }
1603
1604 SetPageReferenced(head);
1605 return 1;
1606}
1607
1608static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
1609 int write, struct page **pages, int *nr)
1610{
1611 unsigned long next;
1612 pmd_t *pmdp;
1613
1614 pmdp = pmd_offset(&pud, addr);
1615 do {
1616 pmd_t pmd = READ_ONCE(*pmdp);
1617
1618 next = pmd_addr_end(addr, end);
1619 if (!pmd_present(pmd))
1620 return 0;
1621
1622 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
1623
1624
1625
1626
1627
1628 if (pmd_protnone(pmd))
1629 return 0;
1630
1631 if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
1632 pages, nr))
1633 return 0;
1634
1635 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
1636
1637
1638
1639
1640 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
1641 PMD_SHIFT, next, write, pages, nr))
1642 return 0;
1643 } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
1644 return 0;
1645 } while (pmdp++, addr = next, addr != end);
1646
1647 return 1;
1648}
1649
1650static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
1651 int write, struct page **pages, int *nr)
1652{
1653 unsigned long next;
1654 pud_t *pudp;
1655
1656 pudp = pud_offset(&p4d, addr);
1657 do {
1658 pud_t pud = READ_ONCE(*pudp);
1659
1660 next = pud_addr_end(addr, end);
1661 if (pud_none(pud))
1662 return 0;
1663 if (unlikely(pud_huge(pud))) {
1664 if (!gup_huge_pud(pud, pudp, addr, next, write,
1665 pages, nr))
1666 return 0;
1667 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
1668 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
1669 PUD_SHIFT, next, write, pages, nr))
1670 return 0;
1671 } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
1672 return 0;
1673 } while (pudp++, addr = next, addr != end);
1674
1675 return 1;
1676}
1677
1678static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
1679 int write, struct page **pages, int *nr)
1680{
1681 unsigned long next;
1682 p4d_t *p4dp;
1683
1684 p4dp = p4d_offset(&pgd, addr);
1685 do {
1686 p4d_t p4d = READ_ONCE(*p4dp);
1687
1688 next = p4d_addr_end(addr, end);
1689 if (p4d_none(p4d))
1690 return 0;
1691 BUILD_BUG_ON(p4d_huge(p4d));
1692 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
1693 if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
1694 P4D_SHIFT, next, write, pages, nr))
1695 return 0;
1696 } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
1697 return 0;
1698 } while (p4dp++, addr = next, addr != end);
1699
1700 return 1;
1701}
1702
1703static void gup_pgd_range(unsigned long addr, unsigned long end,
1704 int write, struct page **pages, int *nr)
1705{
1706 unsigned long next;
1707 pgd_t *pgdp;
1708
1709 pgdp = pgd_offset(current->mm, addr);
1710 do {
1711 pgd_t pgd = READ_ONCE(*pgdp);
1712
1713 next = pgd_addr_end(addr, end);
1714 if (pgd_none(pgd))
1715 return;
1716 if (unlikely(pgd_huge(pgd))) {
1717 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1718 pages, nr))
1719 return;
1720 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1721 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1722 PGDIR_SHIFT, next, write, pages, nr))
1723 return;
1724 } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
1725 return;
1726 } while (pgdp++, addr = next, addr != end);
1727}
1728
1729#ifndef gup_fast_permitted
1730
1731
1732
1733
1734bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
1735{
1736 unsigned long len, end;
1737
1738 len = (unsigned long) nr_pages << PAGE_SHIFT;
1739 end = start + len;
1740 return end >= start;
1741}
1742#endif
1743
1744
1745
1746
1747
1748
1749
1750int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1751 struct page **pages)
1752{
1753 unsigned long addr, len, end;
1754 unsigned long flags;
1755 int nr = 0;
1756
1757 start &= PAGE_MASK;
1758 addr = start;
1759 len = (unsigned long) nr_pages << PAGE_SHIFT;
1760 end = start + len;
1761
1762 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1763 (void __user *)start, len)))
1764 return 0;
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778 if (gup_fast_permitted(start, nr_pages, write)) {
1779 local_irq_save(flags);
1780 gup_pgd_range(addr, end, write, pages, &nr);
1781 local_irq_restore(flags);
1782 }
1783
1784 return nr;
1785}
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1804 struct page **pages)
1805{
1806 unsigned long addr, len, end;
1807 int nr = 0, ret = 0;
1808
1809 start &= PAGE_MASK;
1810 addr = start;
1811 len = (unsigned long) nr_pages << PAGE_SHIFT;
1812 end = start + len;
1813
1814 if (nr_pages <= 0)
1815 return 0;
1816
1817 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1818 (void __user *)start, len)))
1819 return -EFAULT;
1820
1821 if (gup_fast_permitted(start, nr_pages, write)) {
1822 local_irq_disable();
1823 gup_pgd_range(addr, end, write, pages, &nr);
1824 local_irq_enable();
1825 ret = nr;
1826 }
1827
1828 if (nr < nr_pages) {
1829
1830 start += nr << PAGE_SHIFT;
1831 pages += nr;
1832
1833 ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
1834 write ? FOLL_WRITE : 0);
1835
1836
1837 if (nr > 0) {
1838 if (ret < 0)
1839 ret = nr;
1840 else
1841 ret += nr;
1842 }
1843 }
1844
1845 return ret;
1846}
1847
1848#endif
1849