1#include <linux/mm.h>
2#include <linux/hugetlb.h>
3#include <linux/swap.h>
4#include <linux/memremap.h>
5#include <linux/pagemap.h>
6#include <linux/rmap.h>
7#include <linux/writeback.h>
8#include <linux/mmu_notifier.h>
9#include <linux/swapops.h>
10#include <asm/mmu_context.h>
11#include <asm/tlbflush.h>
12
13#include "internal.h"
14
15static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
16 pte_t *pte, unsigned int flags)
17{
18
19 if (flags & FOLL_GET)
20 return -EFAULT;
21
22 if (flags & FOLL_TOUCH) {
23 pte_t entry = *pte;
24
25 if (flags & FOLL_WRITE)
26 entry = pte_mkdirty(entry);
27 entry = pte_mkyoung(entry);
28
29 if (!pte_same(*pte, entry)) {
30 set_pte_at(vma->vm_mm, address, pte, entry);
31 update_mmu_cache(vma, address, pte);
32 }
33 }
34
35
36 return -EEXIST;
37}
38
39
40
41
42
43static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
44{
45 return pte_write(pte) ||
46 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
47}
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62struct page *follow_page_mask(struct vm_area_struct *vma,
63 unsigned long address, unsigned int flags,
64 unsigned int *page_mask)
65{
66 struct dev_pagemap *pgmap = NULL;
67 pgd_t *pgd;
68 pud_t *pud;
69 pmd_t *pmd;
70 pte_t *ptep, pte;
71 spinlock_t *ptl;
72 struct page *page;
73 struct mm_struct *mm = vma->vm_mm;
74
75 *page_mask = 0;
76
77 page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
78 if (!IS_ERR(page)) {
79
80
81
82
83
84
85
86
87
88
89
90
91
92 if (page)
93 BUG_ON(flags & FOLL_GET);
94 goto out;
95 }
96
97 page = NULL;
98 pgd = pgd_offset(mm, address);
99 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
100 goto no_page_table;
101
102 pud = pud_offset(pgd, address);
103 if (pud_none(*pud))
104 goto no_page_table;
105 if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
106 page = follow_huge_pud(mm, address, pud, flags);
107 if (page)
108 return page;
109 goto no_page_table;
110 }
111 if (pud_devmap(*pud)) {
112 ptl = pud_lock(mm, pud);
113 page = follow_devmap_pud(vma, address, pud, flags);
114 spin_unlock(ptl);
115 if (page)
116 return page;
117 }
118 if (unlikely(pud_bad(*pud)))
119 goto no_page_table;
120
121 pmd = pmd_offset(pud, address);
122 if (pmd_none(*pmd))
123 goto no_page_table;
124 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
125 page = follow_huge_pmd(mm, address, pmd, flags);
126 if (page)
127 return page;
128 goto no_page_table;
129 }
130 if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
131 goto no_page_table;
132 if (pmd_devmap(*pmd)) {
133 ptl = pmd_lock(mm, pmd);
134 page = follow_devmap_pmd(vma, address, pmd, flags);
135 spin_unlock(ptl);
136 if (page)
137 return page;
138 }
139 if (pmd_trans_huge(*pmd)) {
140 if (flags & FOLL_SPLIT) {
141 split_huge_page_pmd(vma, address, pmd);
142 goto split_fallthrough;
143 }
144 ptl = pmd_lock(mm, pmd);
145 if (likely(pmd_trans_huge(*pmd))) {
146 if (unlikely(pmd_trans_splitting(*pmd))) {
147 spin_unlock(ptl);
148 wait_split_huge_page(vma->anon_vma, pmd);
149 } else {
150 page = follow_trans_huge_pmd(vma, address,
151 pmd, flags);
152 spin_unlock(ptl);
153 *page_mask = HPAGE_PMD_NR - 1;
154 goto out;
155 }
156 } else
157 spin_unlock(ptl);
158
159 }
160split_fallthrough:
161 if (unlikely(pmd_bad(*pmd)))
162 goto no_page_table;
163
164 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
165
166 pte = *ptep;
167 if (!pte_present(pte)) {
168 swp_entry_t entry;
169
170
171
172
173
174 if (likely(!(flags & FOLL_MIGRATION)))
175 goto no_page;
176 if (pte_none(pte) || pte_file(pte))
177 goto no_page;
178 entry = pte_to_swp_entry(pte);
179 if (!is_migration_entry(entry))
180 goto no_page;
181 if (is_migration_entry(entry)) {
182 pte_unmap_unlock(ptep, ptl);
183 migration_entry_wait(mm, pmd, address);
184 goto split_fallthrough;
185 }
186 goto no_page;
187 }
188 if ((flags & FOLL_NUMA) && pte_numa(pte))
189 goto no_page;
190 if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags))
191 goto unlock;
192
193 page = vm_normal_page(vma, address, pte);
194 if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
195
196
197
198
199 pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
200 if (pgmap)
201 page = pte_page(pte);
202 else
203 goto no_page;
204 } else if (unlikely(!page)) {
205 if (flags & FOLL_DUMP) {
206
207 page = ERR_PTR(-EFAULT);
208 goto unlock;
209 }
210
211 if (is_zero_pfn(pte_pfn(pte))) {
212 page = pte_page(pte);
213 } else {
214 int ret;
215
216 ret = follow_pfn_pte(vma, address, ptep, flags);
217 page = ERR_PTR(ret);
218 goto unlock;
219 }
220 }
221
222 if (flags & FOLL_GET) {
223 get_page_foll(page);
224
225
226 if (pgmap) {
227 put_dev_pagemap(pgmap);
228 pgmap = NULL;
229 }
230 }
231 if (flags & FOLL_TOUCH) {
232 if ((flags & FOLL_WRITE) &&
233 !pte_dirty(pte) && !PageDirty(page))
234 set_page_dirty(page);
235
236
237
238
239
240 mark_page_accessed(page);
241 }
242 if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
243
244
245
246
247
248
249
250
251
252 if (page->mapping && trylock_page(page)) {
253 lru_add_drain();
254
255
256
257
258
259
260 mlock_vma_page(page);
261 unlock_page(page);
262 }
263 }
264unlock:
265 pte_unmap_unlock(ptep, ptl);
266out:
267 return page;
268
269no_page:
270 pte_unmap_unlock(ptep, ptl);
271 if (!pte_none(pte))
272 return page;
273
274no_page_table:
275
276
277
278
279
280
281
282
283 if ((flags & FOLL_DUMP) &&
284 (!vma->vm_ops || !vma->vm_ops->fault))
285 return ERR_PTR(-EFAULT);
286 return page;
287}
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
339 unsigned long start, unsigned long nr_pages,
340 unsigned int gup_flags, struct page **pages,
341 struct vm_area_struct **vmas, int *nonblocking)
342{
343 long i;
344 unsigned long vm_flags;
345 unsigned int page_mask;
346 int write = (gup_flags & FOLL_WRITE);
347 int foreign = (gup_flags & FOLL_REMOTE);
348
349 if (!nr_pages)
350 return 0;
351
352 VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
353
354
355
356
357
358 vm_flags = (gup_flags & FOLL_WRITE) ?
359 (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
360 vm_flags &= (gup_flags & FOLL_FORCE) ?
361 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
362
363
364
365
366
367
368
369
370
371
372 if (!(gup_flags & FOLL_FORCE))
373 gup_flags |= FOLL_NUMA;
374
375 i = 0;
376
377 do {
378 struct vm_area_struct *vma;
379
380 vma = find_extend_vma(mm, start);
381 if (!vma && in_gate_area(mm, start)) {
382 unsigned long pg = start & PAGE_MASK;
383 pgd_t *pgd;
384 pud_t *pud;
385 pmd_t *pmd;
386 pte_t *pte;
387
388
389 if (gup_flags & FOLL_WRITE)
390 return i ? : -EFAULT;
391 if (pg > TASK_SIZE)
392 pgd = pgd_offset_k(pg);
393 else
394 pgd = pgd_offset_gate(mm, pg);
395 BUG_ON(pgd_none(*pgd));
396 pud = pud_offset(pgd, pg);
397 BUG_ON(pud_none(*pud));
398 pmd = pmd_offset(pud, pg);
399 if (pmd_none(*pmd))
400 return i ? : -EFAULT;
401 VM_BUG_ON(pmd_trans_huge(*pmd));
402 pte = pte_offset_map(pmd, pg);
403 if (pte_none(*pte)) {
404 pte_unmap(pte);
405 return i ? : -EFAULT;
406 }
407 vma = get_gate_vma(mm);
408 if (pages) {
409 struct page *page;
410
411 page = vm_normal_page(vma, start, *pte);
412 if (!page) {
413 if (!(gup_flags & FOLL_DUMP) &&
414 is_zero_pfn(pte_pfn(*pte)))
415 page = pte_page(*pte);
416 else {
417 pte_unmap(pte);
418 return i ? : -EFAULT;
419 }
420 }
421 pages[i] = page;
422 get_page(page);
423 }
424 pte_unmap(pte);
425 page_mask = 0;
426 goto next_page;
427 }
428
429 if (!vma ||
430 (vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
431 !(vm_flags & vma->vm_flags))
432 return i ? : -EFAULT;
433
434
435
436
437
438 if (!arch_vma_access_permitted(vma, write, false, foreign))
439 return i ? : -EFAULT;
440
441 if (is_vm_hugetlb_page(vma)) {
442 i = follow_hugetlb_page(mm, vma, pages, vmas,
443 &start, &nr_pages, i,
444 gup_flags, nonblocking);
445 continue;
446 }
447
448 do {
449 struct page *page;
450 unsigned int foll_flags = gup_flags;
451 unsigned int page_increm;
452
453
454
455
456
457 if (unlikely(fatal_signal_pending(current)))
458 return i ? i : -ERESTARTSYS;
459
460 cond_resched();
461 while (!(page = follow_page_mask(vma, start,
462 foll_flags, &page_mask))) {
463 int ret;
464 unsigned int fault_flags = 0;
465
466 if (foll_flags & FOLL_WRITE)
467 fault_flags |= FAULT_FLAG_WRITE;
468 if (foll_flags & FOLL_REMOTE)
469 fault_flags |= FAULT_FLAG_REMOTE;
470 if (nonblocking)
471 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
472 if (foll_flags & FOLL_NOWAIT)
473 fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
474 if (foll_flags & FOLL_TRIED) {
475 WARN_ON_ONCE(fault_flags &
476 FAULT_FLAG_ALLOW_RETRY);
477 fault_flags |= FAULT_FLAG_TRIED;
478 }
479
480 ret = handle_mm_fault(vma, start,
481 fault_flags);
482
483 if (ret & VM_FAULT_ERROR) {
484 if (ret & VM_FAULT_OOM)
485 return i ? i : -ENOMEM;
486 if (ret & (VM_FAULT_HWPOISON |
487 VM_FAULT_HWPOISON_LARGE)) {
488 if (i)
489 return i;
490 else if (gup_flags & FOLL_HWPOISON)
491 return -EHWPOISON;
492 else
493 return -EFAULT;
494 }
495 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
496 return i ? i : -EFAULT;
497 BUG();
498 }
499
500 if (tsk) {
501 if (ret & VM_FAULT_MAJOR)
502 tsk->maj_flt++;
503 else
504 tsk->min_flt++;
505 }
506
507 if (ret & VM_FAULT_RETRY) {
508 if (nonblocking)
509 *nonblocking = 0;
510 return i;
511 }
512
513
514
515
516
517
518
519
520
521
522
523
524
525 if ((ret & VM_FAULT_WRITE) &&
526 !(vma->vm_flags & VM_WRITE))
527 foll_flags |= FOLL_COW;
528
529 cond_resched();
530 }
531 if (PTR_ERR(page) == -EEXIST) {
532
533
534
535
536 goto next_page;
537 } else if (IS_ERR(page)) {
538 return i ? i : PTR_ERR(page);
539 }
540 if (pages) {
541 pages[i] = page;
542
543 flush_anon_page(vma, page, start);
544 flush_dcache_page(page);
545 page_mask = 0;
546 }
547next_page:
548 if (vmas) {
549 vmas[i] = vma;
550 page_mask = 0;
551 }
552 page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
553 if (page_increm > nr_pages)
554 page_increm = nr_pages;
555 i += page_increm;
556 start += page_increm * PAGE_SIZE;
557 nr_pages -= page_increm;
558 } while (nr_pages && start < vma->vm_end);
559 } while (nr_pages);
560 return i;
561}
562EXPORT_SYMBOL(__get_user_pages);
563
564bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
565{
566 bool write = !!(fault_flags & FAULT_FLAG_WRITE);
567 bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
568 vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
569
570 if (!(vm_flags & vma->vm_flags))
571 return false;
572
573
574
575
576
577
578
579
580 if (!arch_vma_access_permitted(vma, write, false, foreign))
581 return false;
582
583 return true;
584}
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
614 unsigned long address, unsigned int fault_flags)
615{
616 struct vm_area_struct *vma;
617 int ret;
618
619 vma = find_extend_vma(mm, address);
620 if (!vma || address < vma->vm_start)
621 return -EFAULT;
622
623 if (!vma_permits_fault(vma, fault_flags))
624 return -EFAULT;
625
626 ret = handle_mm_fault(vma, address, fault_flags);
627 if (ret & VM_FAULT_ERROR) {
628 if (ret & VM_FAULT_OOM)
629 return -ENOMEM;
630 if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE))
631 return -EHWPOISON;
632 if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV))
633 return -EFAULT;
634 BUG();
635 }
636 if (tsk) {
637 if (ret & VM_FAULT_MAJOR)
638 tsk->maj_flt++;
639 else
640 tsk->min_flt++;
641 }
642 return 0;
643}
644EXPORT_SYMBOL_GPL(fixup_user_fault);
645
646static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
647 struct mm_struct *mm,
648 unsigned long start,
649 unsigned long nr_pages,
650 int write, int force,
651 struct page **pages,
652 struct vm_area_struct **vmas,
653 int *locked, bool notify_drop,
654 unsigned int flags)
655{
656 long ret, pages_done;
657 bool lock_dropped;
658
659 if (locked) {
660
661 BUG_ON(vmas);
662
663 BUG_ON(*locked != 1);
664 }
665
666 if (pages)
667 flags |= FOLL_GET;
668 if (write)
669 flags |= FOLL_WRITE;
670 if (force)
671 flags |= FOLL_FORCE;
672
673 pages_done = 0;
674 lock_dropped = false;
675 for (;;) {
676 ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
677 vmas, locked);
678 if (!locked)
679
680 return ret;
681
682
683 if (!*locked) {
684 BUG_ON(ret < 0);
685 BUG_ON(ret >= nr_pages);
686 }
687
688 if (!pages)
689
690 return ret;
691
692 if (ret > 0) {
693 nr_pages -= ret;
694 pages_done += ret;
695 if (!nr_pages)
696 break;
697 }
698 if (*locked) {
699
700 if (!pages_done)
701 pages_done = ret;
702 break;
703 }
704
705 pages += ret;
706 start += ret << PAGE_SHIFT;
707
708
709
710
711
712
713 *locked = 1;
714 lock_dropped = true;
715 down_read(&mm->mmap_sem);
716 ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
717 pages, NULL, NULL);
718 if (ret != 1) {
719 BUG_ON(ret > 1);
720 if (!pages_done)
721 pages_done = ret;
722 break;
723 }
724 nr_pages--;
725 pages_done++;
726 if (!nr_pages)
727 break;
728 pages++;
729 start += PAGE_SIZE;
730 }
731 if (notify_drop && lock_dropped && *locked) {
732
733
734
735
736 up_read(&mm->mmap_sem);
737 *locked = 0;
738 }
739 return pages_done;
740}
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm,
764 unsigned long start, unsigned long nr_pages,
765 int write, int force, struct page **pages,
766 int *locked)
767{
768 return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
769 pages, NULL, locked, true, FOLL_TOUCH);
770}
771EXPORT_SYMBOL(get_user_pages_locked);
772
773
774
775
776
777
778
779
780
781
782
783__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
784 unsigned long start, unsigned long nr_pages,
785 int write, int force, struct page **pages,
786 unsigned int gup_flags)
787{
788 long ret;
789 int locked = 1;
790 down_read(&mm->mmap_sem);
791 ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
792 pages, NULL, &locked, false, gup_flags);
793 if (locked)
794 up_read(&mm->mmap_sem);
795 return ret;
796}
797EXPORT_SYMBOL(__get_user_pages_unlocked);
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
817 unsigned long start, unsigned long nr_pages,
818 int write, int force, struct page **pages)
819{
820 return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
821 force, pages, FOLL_TOUCH);
822}
823EXPORT_SYMBOL(get_user_pages_unlocked);
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
882 unsigned long start, unsigned long nr_pages,
883 int write, int force, struct page **pages,
884 struct vm_area_struct **vmas)
885{
886 return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
887 pages, vmas, NULL, false,
888 FOLL_TOUCH | FOLL_REMOTE);
889}
890EXPORT_SYMBOL(get_user_pages_remote);
891
892
893
894
895
896long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
897 unsigned long start, unsigned long nr_pages,
898 int write, int force, struct page **pages,
899 struct vm_area_struct **vmas)
900{
901 return __get_user_pages_locked(tsk, mm, start, nr_pages,
902 write, force, pages, vmas, NULL, false,
903 FOLL_TOUCH);
904}
905EXPORT_SYMBOL(get_user_pages);
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921#ifdef CONFIG_ELF_CORE
922struct page *get_dump_page(unsigned long addr)
923{
924 struct vm_area_struct *vma;
925 struct page *page;
926
927 if (__get_user_pages(current, current->mm, addr, 1,
928 FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
929 NULL) < 1)
930 return NULL;
931 flush_cache_page(vma, addr, page_to_pfn(page));
932 return page;
933}
934#endif
935