1#include <linux/kernel.h>
2#include <linux/errno.h>
3#include <linux/err.h>
4#include <linux/spinlock.h>
5
6#include <linux/mm.h>
7#include <linux/memremap.h>
8#include <linux/pagemap.h>
9#include <linux/rmap.h>
10#include <linux/swap.h>
11#include <linux/swapops.h>
12
13#include <linux/sched.h>
14#include <linux/rwsem.h>
15#include <linux/hugetlb.h>
16
17#include <asm/mmu_context.h>
18#include <asm/pgtable.h>
19#include <asm/tlbflush.h>
20
21#include "internal.h"
22
23static struct page *no_page_table(struct vm_area_struct *vma,
24 unsigned int flags)
25{
26
27
28
29
30
31
32
33
34 if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
35 return ERR_PTR(-EFAULT);
36 return NULL;
37}
38
39static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
40 pte_t *pte, unsigned int flags)
41{
42
43 if (flags & FOLL_GET)
44 return -EFAULT;
45
46 if (flags & FOLL_TOUCH) {
47 pte_t entry = *pte;
48
49 if (flags & FOLL_WRITE)
50 entry = pte_mkdirty(entry);
51 entry = pte_mkyoung(entry);
52
53 if (!pte_same(*pte, entry)) {
54 set_pte_at(vma->vm_mm, address, pte, entry);
55 update_mmu_cache(vma, address, pte);
56 }
57 }
58
59
60 return -EEXIST;
61}
62
63static struct page *follow_page_pte(struct vm_area_struct *vma,
64 unsigned long address, pmd_t *pmd, unsigned int flags)
65{
66 struct mm_struct *mm = vma->vm_mm;
67 struct dev_pagemap *pgmap = NULL;
68 struct page *page;
69 spinlock_t *ptl;
70 pte_t *ptep, pte;
71
72retry:
73 if (unlikely(pmd_bad(*pmd)))
74 return no_page_table(vma, flags);
75
76 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
77 pte = *ptep;
78 if (!pte_present(pte)) {
79 swp_entry_t entry;
80
81
82
83
84
85 if (likely(!(flags & FOLL_MIGRATION)))
86 goto no_page;
87 if (pte_none(pte))
88 goto no_page;
89 entry = pte_to_swp_entry(pte);
90 if (!is_migration_entry(entry))
91 goto no_page;
92 pte_unmap_unlock(ptep, ptl);
93 migration_entry_wait(mm, pmd, address);
94 goto retry;
95 }
96 if ((flags & FOLL_NUMA) && pte_protnone(pte))
97 goto no_page;
98 if ((flags & FOLL_WRITE) && !pte_write(pte)) {
99 pte_unmap_unlock(ptep, ptl);
100 return NULL;
101 }
102
103 page = vm_normal_page(vma, address, pte);
104 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
105
106
107
108
109 pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
110 if (pgmap)
111 page = pte_page(pte);
112 else
113 goto no_page;
114 } else if (unlikely(!page)) {
115 if (flags & FOLL_DUMP) {
116
117 page = ERR_PTR(-EFAULT);
118 goto out;
119 }
120
121 if (is_zero_pfn(pte_pfn(pte))) {
122 page = pte_page(pte);
123 } else {
124 int ret;
125
126 ret = follow_pfn_pte(vma, address, ptep, flags);
127 page = ERR_PTR(ret);
128 goto out;
129 }
130 }
131
132 if (flags & FOLL_SPLIT && PageTransCompound(page)) {
133 int ret;
134 get_page(page);
135 pte_unmap_unlock(ptep, ptl);
136 lock_page(page);
137 ret = split_huge_page(page);
138 unlock_page(page);
139 put_page(page);
140 if (ret)
141 return ERR_PTR(ret);
142 goto retry;
143 }
144
145 if (flags & FOLL_GET) {
146 get_page(page);
147
148
149 if (pgmap) {
150 put_dev_pagemap(pgmap);
151 pgmap = NULL;
152 }
153 }
154 if (flags & FOLL_TOUCH) {
155 if ((flags & FOLL_WRITE) &&
156 !pte_dirty(pte) && !PageDirty(page))
157 set_page_dirty(page);
158
159
160
161
162
163 mark_page_accessed(page);
164 }
165 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
166
167 if (PageTransCompound(page))
168 goto out;
169
170
171
172
173
174
175
176
177
178
179 if (page->mapping && trylock_page(page)) {
180 lru_add_drain();
181
182
183
184
185
186
187 mlock_vma_page(page);
188 unlock_page(page);
189 }
190 }
191out:
192 pte_unmap_unlock(ptep, ptl);
193 return page;
194no_page:
195 pte_unmap_unlock(ptep, ptl);
196 if (!pte_none(pte))
197 return NULL;
198 return no_page_table(vma, flags);
199}
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214struct page *follow_page_mask(struct vm_area_struct *vma,
215 unsigned long address, unsigned int flags,
216 unsigned int *page_mask)
217{
218 pgd_t *pgd;
219 pud_t *pud;
220 pmd_t *pmd;
221 spinlock_t *ptl;
222 struct page *page;
223 struct mm_struct *mm = vma->vm_mm;
224
225 *page_mask = 0;
226
227 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
228 if (!IS_ERR(page)) {
229 BUG_ON(flags & FOLL_GET);
230 return page;
231 }
232
233 pgd = pgd_offset(mm, address);
234 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
235 return no_page_table(vma, flags);
236
237 pud = pud_offset(pgd, address);
238 if (pud_none(*pud))
239 return no_page_table(vma, flags);
240 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
241 page = follow_huge_pud(mm, address, pud, flags);
242 if (page)
243 return page;
244 return no_page_table(vma, flags);
245 }
246 if (unlikely(pud_bad(*pud)))
247 return no_page_table(vma, flags);
248
249 pmd = pmd_offset(pud, address);
250 if (pmd_none(*pmd))
251 return no_page_table(vma, flags);
252 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
253 page = follow_huge_pmd(mm, address, pmd, flags);
254 if (page)
255 return page;
256 return no_page_table(vma, flags);
257 }
258 if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
259 return no_page_table(vma, flags);
260 if (pmd_devmap(*pmd)) {
261 ptl = pmd_lock(mm, pmd);
262 page = follow_devmap_pmd(vma, address, pmd, flags);
263 spin_unlock(ptl);
264 if (page)
265 return page;
266 }
267 if (likely(!pmd_trans_huge(*pmd)))
268 return follow_page_pte(vma, address, pmd, flags);
269
270 ptl = pmd_lock(mm, pmd);
271 if (unlikely(!pmd_trans_huge(*pmd))) {
272 spin_unlock(ptl);
273 return follow_page_pte(vma, address, pmd, flags);
274 }
275 if (flags & FOLL_SPLIT) {
276 int ret;
277 page = pmd_page(*pmd);
278 if (is_huge_zero_page(page)) {
279 spin_unlock(ptl);
280 ret = 0;
281 split_huge_pmd(vma, pmd, address);
282 } else {
283 get_page(page);
284 spin_unlock(ptl);
285 lock_page(page);
286 ret = split_huge_page(page);
287 unlock_page(page);
288 put_page(page);
289 }
290
291 return ret ? ERR_PTR(ret) :
292 follow_page_pte(vma, address, pmd, flags);
293 }
294
295 page = follow_trans_huge_pmd(vma, address, pmd, flags);
296 spin_unlock(ptl);
297 *page_mask = HPAGE_PMD_NR - 1;
298 return page;
299}
300
301static int get_gate_page(struct mm_struct *mm, unsigned long address,
302 unsigned int gup_flags, struct vm_area_struct **vma,
303 struct page **page)
304{
305 pgd_t *pgd;
306 pud_t *pud;
307 pmd_t *pmd;
308 pte_t *pte;
309 int ret = -EFAULT;
310
311
312 if (gup_flags & FOLL_WRITE)
313 return -EFAULT;
314 if (address > TASK_SIZE)
315 pgd = pgd_offset_k(address);
316 else
317 pgd = pgd_offset_gate(mm, address);
318 BUG_ON(pgd_none(*pgd));
319 pud = pud_offset(pgd, address);
320 BUG_ON(pud_none(*pud));
321 pmd = pmd_offset(pud, address);
322 if (pmd_none(*pmd))
323 return -EFAULT;
324 VM_BUG_ON(pmd_trans_huge(*pmd));
325 pte = pte_offset_map(pmd, address);
326 if (pte_none(*pte))
327 goto unmap;
328 *vma = get_gate_vma(mm);
329 if (!page)
330 goto out;
331 *page = vm_normal_page(*vma, address, *pte);
332 if (!*page) {
333 if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
334 goto unmap;
335 *page = pte_page(*pte);
336 }
337 get_page(*page);
338out:
339 ret = 0;
340unmap:
341 pte_unmap(pte);
342 return ret;
343}
344
345
346
347
348
349
350static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
351 unsigned long address, unsigned int *flags, int *nonblocking)
352{
353 struct mm_struct *mm = vma->vm_mm;
354 unsigned int fault_flags = 0;
355 int ret;
356
357
358 if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
359 return -ENOENT;
360
361 if ((*flags & FOLL_POPULATE) &&
362 (stack_guard_page_start(vma, address) ||
363 stack_guard_page_end(vma, address + PAGE_SIZE)))
364 return -ENOENT;
365 if (*flags & FOLL_WRITE)
366 fault_flags |= FAULT_FLAG_WRITE;
367 if (*flags & FOLL_REMOTE)
368 fault_flags |= FAULT_FLAG_REMOTE;
369 if (nonblocking)
370 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
371 if (*flags & FOLL_NOWAIT)
372 fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
373 if (*flags & FOLL_TRIED) {
374 VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
375 fault_flags |= FAULT_FLAG_TRIED;
376 }
377
378 ret = handle_mm_fault(mm, vma, address, fault_flags);
379 if (ret & VM_FAULT_ERROR) {
380 if (ret & VM_FAULT_OOM)
381 return -ENOMEM;
382 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
383 return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT;
384 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
385 return -EFAULT;
386 BUG();
387 }
388
389 if (tsk) {
390 if (ret & VM_FAULT_MAJOR)
391 tsk->maj_flt++;
392 else
393 tsk->min_flt++;
394 }
395
396 if (ret & VM_FAULT_RETRY) {
397 if (nonblocking)
398 *nonblocking = 0;
399 return -EBUSY;
400 }
401
402
403
404
405
406
407
408
409
410
411 if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
412 *flags &= ~FOLL_WRITE;
413 return 0;
414}
415
416static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
417{
418 vm_flags_t vm_flags = vma->vm_flags;
419 int write = (gup_flags & FOLL_WRITE);
420 int foreign = (gup_flags & FOLL_REMOTE);
421
422 if (vm_flags & (VM_IO | VM_PFNMAP))
423 return -EFAULT;
424
425 if (write) {
426 if (!(vm_flags & VM_WRITE)) {
427 if (!(gup_flags & FOLL_FORCE))
428 return -EFAULT;
429
430
431
432
433
434
435
436
437
438 if (!is_cow_mapping(vm_flags))
439 return -EFAULT;
440 }
441 } else if (!(vm_flags & VM_READ)) {
442 if (!(gup_flags & FOLL_FORCE))
443 return -EFAULT;
444
445
446
447
448 if (!(vm_flags & VM_MAYREAD))
449 return -EFAULT;
450 }
451
452
453
454
455 if (!arch_vma_access_permitted(vma, write, false, foreign))
456 return -EFAULT;
457 return 0;
458}
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
517 unsigned long start, unsigned long nr_pages,
518 unsigned int gup_flags, struct page **pages,
519 struct vm_area_struct **vmas, int *nonblocking)
520{
521 long i = 0;
522 unsigned int page_mask;
523 struct vm_area_struct *vma = NULL;
524
525 if (!nr_pages)
526 return 0;
527
528 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
529
530
531
532
533
534
535 if (!(gup_flags & FOLL_FORCE))
536 gup_flags |= FOLL_NUMA;
537
538 do {
539 struct page *page;
540 unsigned int foll_flags = gup_flags;
541 unsigned int page_increm;
542
543
544 if (!vma || start >= vma->vm_end) {
545 vma = find_extend_vma(mm, start);
546 if (!vma && in_gate_area(mm, start)) {
547 int ret;
548 ret = get_gate_page(mm, start & PAGE_MASK,
549 gup_flags, &vma,
550 pages ? &pages[i] : NULL);
551 if (ret)
552 return i ? : ret;
553 page_mask = 0;
554 goto next_page;
555 }
556
557 if (!vma || check_vma_flags(vma, gup_flags))
558 return i ? : -EFAULT;
559 if (is_vm_hugetlb_page(vma)) {
560 i = follow_hugetlb_page(mm, vma, pages, vmas,
561 &start, &nr_pages, i,
562 gup_flags);
563 continue;
564 }
565 }
566retry:
567
568
569
570
571 if (unlikely(fatal_signal_pending(current)))
572 return i ? i : -ERESTARTSYS;
573 cond_resched();
574 page = follow_page_mask(vma, start, foll_flags, &page_mask);
575 if (!page) {
576 int ret;
577 ret = faultin_page(tsk, vma, start, &foll_flags,
578 nonblocking);
579 switch (ret) {
580 case 0:
581 goto retry;
582 case -EFAULT:
583 case -ENOMEM:
584 case -EHWPOISON:
585 return i ? i : ret;
586 case -EBUSY:
587 return i;
588 case -ENOENT:
589 goto next_page;
590 }
591 BUG();
592 } else if (PTR_ERR(page) == -EEXIST) {
593
594
595
596
597 goto next_page;
598 } else if (IS_ERR(page)) {
599 return i ? i : PTR_ERR(page);
600 }
601 if (pages) {
602 pages[i] = page;
603 flush_anon_page(vma, page, start);
604 flush_dcache_page(page);
605 page_mask = 0;
606 }
607next_page:
608 if (vmas) {
609 vmas[i] = vma;
610 page_mask = 0;
611 }
612 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
613 if (page_increm > nr_pages)
614 page_increm = nr_pages;
615 i += page_increm;
616 start += page_increm * PAGE_SIZE;
617 nr_pages -= page_increm;
618 } while (nr_pages);
619 return i;
620}
621EXPORT_SYMBOL(__get_user_pages);
622
623bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
624{
625 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
626 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
627 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
628
629 if (!(vm_flags & vma->vm_flags))
630 return false;
631
632
633
634
635
636
637
638
639 if (!arch_vma_access_permitted(vma, write, false, foreign))
640 return false;
641
642 return true;
643}
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
676 unsigned long address, unsigned int fault_flags,
677 bool *unlocked)
678{
679 struct vm_area_struct *vma;
680 int ret, major = 0;
681
682 if (unlocked)
683 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
684
685retry:
686 vma = find_extend_vma(mm, address);
687 if (!vma || address < vma->vm_start)
688 return -EFAULT;
689
690 if (!vma_permits_fault(vma, fault_flags))
691 return -EFAULT;
692
693 ret = handle_mm_fault(mm, vma, address, fault_flags);
694 major |= ret & VM_FAULT_MAJOR;
695 if (ret & VM_FAULT_ERROR) {
696 if (ret & VM_FAULT_OOM)
697 return -ENOMEM;
698 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
699 return -EHWPOISON;
700 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
701 return -EFAULT;
702 BUG();
703 }
704
705 if (ret & VM_FAULT_RETRY) {
706 down_read(&mm->mmap_sem);
707 if (!(fault_flags & FAULT_FLAG_TRIED)) {
708 *unlocked = true;
709 fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
710 fault_flags |= FAULT_FLAG_TRIED;
711 goto retry;
712 }
713 }
714
715 if (tsk) {
716 if (major)
717 tsk->maj_flt++;
718 else
719 tsk->min_flt++;
720 }
721 return 0;
722}
723
724static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
725 struct mm_struct *mm,
726 unsigned long start,
727 unsigned long nr_pages,
728 int write, int force,
729 struct page **pages,
730 struct vm_area_struct **vmas,
731 int *locked, bool notify_drop,
732 unsigned int flags)
733{
734 long ret, pages_done;
735 bool lock_dropped;
736
737 if (locked) {
738
739 BUG_ON(vmas);
740
741 BUG_ON(*locked != 1);
742 }
743
744 if (pages)
745 flags |= FOLL_GET;
746 if (write)
747 flags |= FOLL_WRITE;
748 if (force)
749 flags |= FOLL_FORCE;
750
751 pages_done = 0;
752 lock_dropped = false;
753 for (;;) {
754 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
755 vmas, locked);
756 if (!locked)
757
758 return ret;
759
760
761 if (!*locked) {
762 BUG_ON(ret < 0);
763 BUG_ON(ret >= nr_pages);
764 }
765
766 if (!pages)
767
768 return ret;
769
770 if (ret > 0) {
771 nr_pages -= ret;
772 pages_done += ret;
773 if (!nr_pages)
774 break;
775 }
776 if (*locked) {
777
778 if (!pages_done)
779 pages_done = ret;
780 break;
781 }
782
783 pages += ret;
784 start += ret << PAGE_SHIFT;
785
786
787
788
789
790
791 *locked = 1;
792 lock_dropped = true;
793 down_read(&mm->mmap_sem);
794 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
795 pages, NULL, NULL);
796 if (ret != 1) {
797 BUG_ON(ret > 1);
798 if (!pages_done)
799 pages_done = ret;
800 break;
801 }
802 nr_pages--;
803 pages_done++;
804 if (!nr_pages)
805 break;
806 pages++;
807 start += PAGE_SIZE;
808 }
809 if (notify_drop && lock_dropped && *locked) {
810
811
812
813
814 up_read(&mm->mmap_sem);
815 *locked = 0;
816 }
817 return pages_done;
818}
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
842 int write, int force, struct page **pages,
843 int *locked)
844{
845 return __get_user_pages_locked(current, current->mm, start, nr_pages,
846 write, force, pages, NULL, locked, true,
847 FOLL_TOUCH);
848}
849EXPORT_SYMBOL(get_user_pages_locked);
850
851
852
853
854
855
856
857
858
859
860
861__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
862 unsigned long start, unsigned long nr_pages,
863 int write, int force, struct page **pages,
864 unsigned int gup_flags)
865{
866 long ret;
867 int locked = 1;
868 down_read(&mm->mmap_sem);
869 ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
870 pages, NULL, &locked, false, gup_flags);
871 if (locked)
872 up_read(&mm->mmap_sem);
873 return ret;
874}
875EXPORT_SYMBOL(__get_user_pages_unlocked);
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
895 int write, int force, struct page **pages)
896{
897 return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
898 write, force, pages, FOLL_TOUCH);
899}
900EXPORT_SYMBOL(get_user_pages_unlocked);
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
958 unsigned long start, unsigned long nr_pages,
959 int write, int force, struct page **pages,
960 struct vm_area_struct **vmas)
961{
962 return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
963 pages, vmas, NULL, false,
964 FOLL_TOUCH | FOLL_REMOTE);
965}
966EXPORT_SYMBOL(get_user_pages_remote);
967
968
969
970
971
972
973
974long get_user_pages(unsigned long start, unsigned long nr_pages,
975 int write, int force, struct page **pages,
976 struct vm_area_struct **vmas)
977{
978 return __get_user_pages_locked(current, current->mm, start, nr_pages,
979 write, force, pages, vmas, NULL, false,
980 FOLL_TOUCH);
981}
982EXPORT_SYMBOL(get_user_pages);
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003long populate_vma_page_range(struct vm_area_struct *vma,
1004 unsigned long start, unsigned long end, int *nonblocking)
1005{
1006 struct mm_struct *mm = vma->vm_mm;
1007 unsigned long nr_pages = (end - start) / PAGE_SIZE;
1008 int gup_flags;
1009
1010 VM_BUG_ON(start & ~PAGE_MASK);
1011 VM_BUG_ON(end & ~PAGE_MASK);
1012 VM_BUG_ON_VMA(start < vma->vm_start, vma);
1013 VM_BUG_ON_VMA(end > vma->vm_end, vma);
1014 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
1015
1016 gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
1017 if (vma->vm_flags & VM_LOCKONFAULT)
1018 gup_flags &= ~FOLL_POPULATE;
1019
1020
1021
1022
1023
1024 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
1025 gup_flags |= FOLL_WRITE;
1026
1027
1028
1029
1030
1031 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
1032 gup_flags |= FOLL_FORCE;
1033
1034
1035
1036
1037
1038 return __get_user_pages(current, mm, start, nr_pages, gup_flags,
1039 NULL, NULL, nonblocking);
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
1050{
1051 struct mm_struct *mm = current->mm;
1052 unsigned long end, nstart, nend;
1053 struct vm_area_struct *vma = NULL;
1054 int locked = 0;
1055 long ret = 0;
1056
1057 VM_BUG_ON(start & ~PAGE_MASK);
1058 VM_BUG_ON(len != PAGE_ALIGN(len));
1059 end = start + len;
1060
1061 for (nstart = start; nstart < end; nstart = nend) {
1062
1063
1064
1065
1066 if (!locked) {
1067 locked = 1;
1068 down_read(&mm->mmap_sem);
1069 vma = find_vma(mm, nstart);
1070 } else if (nstart >= vma->vm_end)
1071 vma = vma->vm_next;
1072 if (!vma || vma->vm_start >= end)
1073 break;
1074
1075
1076
1077
1078 nend = min(end, vma->vm_end);
1079 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
1080 continue;
1081 if (nstart < vma->vm_start)
1082 nstart = vma->vm_start;
1083
1084
1085
1086
1087
1088 ret = populate_vma_page_range(vma, nstart, nend, &locked);
1089 if (ret < 0) {
1090 if (ignore_errors) {
1091 ret = 0;
1092 continue;
1093 }
1094 break;
1095 }
1096 nend = nstart + ret * PAGE_SIZE;
1097 ret = 0;
1098 }
1099 if (locked)
1100 up_read(&mm->mmap_sem);
1101 return ret;
1102}
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118#ifdef CONFIG_ELF_CORE
1119struct page *get_dump_page(unsigned long addr)
1120{
1121 struct vm_area_struct *vma;
1122 struct page *page;
1123
1124 if (__get_user_pages(current, current->mm, addr, 1,
1125 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
1126 NULL) < 1)
1127 return NULL;
1128 flush_cache_page(vma, addr, page_to_pfn(page));
1129 return page;
1130}
1131#endif
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
1167
1168#ifdef __HAVE_ARCH_PTE_SPECIAL
1169static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1170 int write, struct page **pages, int *nr)
1171{
1172 pte_t *ptep, *ptem;
1173 int ret = 0;
1174
1175 ptem = ptep = pte_offset_map(&pmd, addr);
1176 do {
1177
1178
1179
1180
1181
1182
1183
1184 pte_t pte = READ_ONCE(*ptep);
1185 struct page *head, *page;
1186
1187
1188
1189
1190
1191 if (!pte_present(pte) || pte_special(pte) ||
1192 pte_protnone(pte) || (write && !pte_write(pte)))
1193 goto pte_unmap;
1194
1195 if (!arch_pte_access_permitted(pte, write))
1196 goto pte_unmap;
1197
1198 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1199 page = pte_page(pte);
1200 head = compound_head(page);
1201
1202 if (!page_cache_get_speculative(head))
1203 goto pte_unmap;
1204
1205 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1206 put_page(head);
1207 goto pte_unmap;
1208 }
1209
1210 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1211 pages[*nr] = page;
1212 (*nr)++;
1213
1214 } while (ptep++, addr += PAGE_SIZE, addr != end);
1215
1216 ret = 1;
1217
1218pte_unmap:
1219 pte_unmap(ptem);
1220 return ret;
1221}
1222#else
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
1234 int write, struct page **pages, int *nr)
1235{
1236 return 0;
1237}
1238#endif
1239
1240static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
1241 unsigned long end, int write, struct page **pages, int *nr)
1242{
1243 struct page *head, *page;
1244 int refs;
1245
1246 if (write && !pmd_write(orig))
1247 return 0;
1248
1249 refs = 0;
1250 head = pmd_page(orig);
1251 page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
1252 do {
1253 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1254 pages[*nr] = page;
1255 (*nr)++;
1256 page++;
1257 refs++;
1258 } while (addr += PAGE_SIZE, addr != end);
1259
1260 if (!page_cache_add_speculative(head, refs)) {
1261 *nr -= refs;
1262 return 0;
1263 }
1264
1265 if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
1266 *nr -= refs;
1267 while (refs--)
1268 put_page(head);
1269 return 0;
1270 }
1271
1272 return 1;
1273}
1274
1275static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
1276 unsigned long end, int write, struct page **pages, int *nr)
1277{
1278 struct page *head, *page;
1279 int refs;
1280
1281 if (write && !pud_write(orig))
1282 return 0;
1283
1284 refs = 0;
1285 head = pud_page(orig);
1286 page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
1287 do {
1288 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1289 pages[*nr] = page;
1290 (*nr)++;
1291 page++;
1292 refs++;
1293 } while (addr += PAGE_SIZE, addr != end);
1294
1295 if (!page_cache_add_speculative(head, refs)) {
1296 *nr -= refs;
1297 return 0;
1298 }
1299
1300 if (unlikely(pud_val(orig) != pud_val(*pudp))) {
1301 *nr -= refs;
1302 while (refs--)
1303 put_page(head);
1304 return 0;
1305 }
1306
1307 return 1;
1308}
1309
1310static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
1311 unsigned long end, int write,
1312 struct page **pages, int *nr)
1313{
1314 int refs;
1315 struct page *head, *page;
1316
1317 if (write && !pgd_write(orig))
1318 return 0;
1319
1320 refs = 0;
1321 head = pgd_page(orig);
1322 page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
1323 do {
1324 VM_BUG_ON_PAGE(compound_head(page) != head, page);
1325 pages[*nr] = page;
1326 (*nr)++;
1327 page++;
1328 refs++;
1329 } while (addr += PAGE_SIZE, addr != end);
1330
1331 if (!page_cache_add_speculative(head, refs)) {
1332 *nr -= refs;
1333 return 0;
1334 }
1335
1336 if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
1337 *nr -= refs;
1338 while (refs--)
1339 put_page(head);
1340 return 0;
1341 }
1342
1343 return 1;
1344}
1345
1346static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
1347 int write, struct page **pages, int *nr)
1348{
1349 unsigned long next;
1350 pmd_t *pmdp;
1351
1352 pmdp = pmd_offset(&pud, addr);
1353 do {
1354 pmd_t pmd = READ_ONCE(*pmdp);
1355
1356 next = pmd_addr_end(addr, end);
1357 if (pmd_none(pmd))
1358 return 0;
1359
1360 if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
1361
1362
1363
1364
1365
1366 if (pmd_protnone(pmd))
1367 return 0;
1368
1369 if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
1370 pages, nr))
1371 return 0;
1372
1373 } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
1374
1375
1376
1377
1378 if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
1379 PMD_SHIFT, next, write, pages, nr))
1380 return 0;
1381 } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
1382 return 0;
1383 } while (pmdp++, addr = next, addr != end);
1384
1385 return 1;
1386}
1387
1388static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
1389 int write, struct page **pages, int *nr)
1390{
1391 unsigned long next;
1392 pud_t *pudp;
1393
1394 pudp = pud_offset(&pgd, addr);
1395 do {
1396 pud_t pud = READ_ONCE(*pudp);
1397
1398 next = pud_addr_end(addr, end);
1399 if (pud_none(pud))
1400 return 0;
1401 if (unlikely(pud_huge(pud))) {
1402 if (!gup_huge_pud(pud, pudp, addr, next, write,
1403 pages, nr))
1404 return 0;
1405 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
1406 if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
1407 PUD_SHIFT, next, write, pages, nr))
1408 return 0;
1409 } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
1410 return 0;
1411 } while (pudp++, addr = next, addr != end);
1412
1413 return 1;
1414}
1415
1416
1417
1418
1419
1420int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
1421 struct page **pages)
1422{
1423 struct mm_struct *mm = current->mm;
1424 unsigned long addr, len, end;
1425 unsigned long next, flags;
1426 pgd_t *pgdp;
1427 int nr = 0;
1428
1429 start &= PAGE_MASK;
1430 addr = start;
1431 len = (unsigned long) nr_pages << PAGE_SHIFT;
1432 end = start + len;
1433
1434 if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
1435 start, len)))
1436 return 0;
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 local_irq_save(flags);
1451 pgdp = pgd_offset(mm, addr);
1452 do {
1453 pgd_t pgd = READ_ONCE(*pgdp);
1454
1455 next = pgd_addr_end(addr, end);
1456 if (pgd_none(pgd))
1457 break;
1458 if (unlikely(pgd_huge(pgd))) {
1459 if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
1460 pages, &nr))
1461 break;
1462 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
1463 if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
1464 PGDIR_SHIFT, next, write, pages, &nr))
1465 break;
1466 } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
1467 break;
1468 } while (pgdp++, addr = next, addr != end);
1469 local_irq_restore(flags);
1470
1471 return nr;
1472}
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490int get_user_pages_fast(unsigned long start, int nr_pages, int write,
1491 struct page **pages)
1492{
1493 int nr, ret;
1494
1495 start &= PAGE_MASK;
1496 nr = __get_user_pages_fast(start, nr_pages, write, pages);
1497 ret = nr;
1498
1499 if (nr < nr_pages) {
1500
1501 start += nr << PAGE_SHIFT;
1502 pages += nr;
1503
1504 ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
1505
1506
1507 if (nr > 0) {
1508 if (ret < 0)
1509 ret = nr;
1510 else
1511 ret += nr;
1512 }
1513 }
1514
1515 return ret;
1516}
1517
1518#endif
1519