1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/export.h>
14#include <linux/compiler.h>
15#include <linux/dax.h>
16#include <linux/fs.h>
17#include <linux/sched/signal.h>
18#include <linux/uaccess.h>
19#include <linux/capability.h>
20#include <linux/kernel_stat.h>
21#include <linux/gfp.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/mman.h>
25#include <linux/pagemap.h>
26#include <linux/file.h>
27#include <linux/uio.h>
28#include <linux/error-injection.h>
29#include <linux/hash.h>
30#include <linux/writeback.h>
31#include <linux/backing-dev.h>
32#include <linux/pagevec.h>
33#include <linux/blkdev.h>
34#include <linux/security.h>
35#include <linux/cpuset.h>
36#include <linux/hugetlb.h>
37#include <linux/memcontrol.h>
38#include <linux/cleancache.h>
39#include <linux/shmem_fs.h>
40#include <linux/rmap.h>
41#include <linux/delayacct.h>
42#include <linux/psi.h>
43#include <linux/ramfs.h>
44#include <linux/page_idle.h>
45#include <asm/pgalloc.h>
46#include <asm/tlbflush.h>
47#include "internal.h"
48
49#define CREATE_TRACE_POINTS
50#include <trace/events/filemap.h>
51
52
53
54
55#include <linux/buffer_head.h>
56
57#include <asm/mman.h>
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static void page_cache_delete(struct address_space *mapping,
123 struct page *page, void *shadow)
124{
125 XA_STATE(xas, &mapping->i_pages, page->index);
126 unsigned int nr = 1;
127
128 mapping_set_update(&xas, mapping);
129
130
131 if (!PageHuge(page)) {
132 xas_set_order(&xas, page->index, compound_order(page));
133 nr = compound_nr(page);
134 }
135
136 VM_BUG_ON_PAGE(!PageLocked(page), page);
137 VM_BUG_ON_PAGE(PageTail(page), page);
138 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
139
140 xas_store(&xas, shadow);
141 xas_init_marks(&xas);
142
143 page->mapping = NULL;
144
145 mapping->nrpages -= nr;
146}
147
148static void unaccount_page_cache_page(struct address_space *mapping,
149 struct page *page)
150{
151 int nr;
152
153
154
155
156
157
158 if (PageUptodate(page) && PageMappedToDisk(page))
159 cleancache_put_page(page);
160 else
161 cleancache_invalidate_page(mapping, page);
162
163 VM_BUG_ON_PAGE(PageTail(page), page);
164 VM_BUG_ON_PAGE(page_mapped(page), page);
165 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
166 int mapcount;
167
168 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
169 current->comm, page_to_pfn(page));
170 dump_page(page, "still mapped when deleted");
171 dump_stack();
172 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
173
174 mapcount = page_mapcount(page);
175 if (mapping_exiting(mapping) &&
176 page_count(page) >= mapcount + 2) {
177
178
179
180
181
182
183 page_mapcount_reset(page);
184 page_ref_sub(page, mapcount);
185 }
186 }
187
188
189 if (PageHuge(page))
190 return;
191
192 nr = thp_nr_pages(page);
193
194 __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr);
195 if (PageSwapBacked(page)) {
196 __mod_lruvec_page_state(page, NR_SHMEM, -nr);
197 if (PageTransHuge(page))
198 __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr);
199 } else if (PageTransHuge(page)) {
200 __mod_lruvec_page_state(page, NR_FILE_THPS, -nr);
201 filemap_nr_thps_dec(mapping);
202 }
203
204
205
206
207
208
209
210
211
212
213
214 if (WARN_ON_ONCE(PageDirty(page)))
215 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
216}
217
218
219
220
221
222
223void __delete_from_page_cache(struct page *page, void *shadow)
224{
225 struct address_space *mapping = page->mapping;
226
227 trace_mm_filemap_delete_from_page_cache(page);
228
229 unaccount_page_cache_page(mapping, page);
230 page_cache_delete(mapping, page, shadow);
231}
232
233static void page_cache_free_page(struct address_space *mapping,
234 struct page *page)
235{
236 void (*freepage)(struct page *);
237
238 freepage = mapping->a_ops->freepage;
239 if (freepage)
240 freepage(page);
241
242 if (PageTransHuge(page) && !PageHuge(page)) {
243 page_ref_sub(page, thp_nr_pages(page));
244 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
245 } else {
246 put_page(page);
247 }
248}
249
250
251
252
253
254
255
256
257
258void delete_from_page_cache(struct page *page)
259{
260 struct address_space *mapping = page_mapping(page);
261 unsigned long flags;
262
263 BUG_ON(!PageLocked(page));
264 xa_lock_irqsave(&mapping->i_pages, flags);
265 __delete_from_page_cache(page, NULL);
266 xa_unlock_irqrestore(&mapping->i_pages, flags);
267
268 page_cache_free_page(mapping, page);
269}
270EXPORT_SYMBOL(delete_from_page_cache);
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286static void page_cache_delete_batch(struct address_space *mapping,
287 struct pagevec *pvec)
288{
289 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
290 int total_pages = 0;
291 int i = 0;
292 struct page *page;
293
294 mapping_set_update(&xas, mapping);
295 xas_for_each(&xas, page, ULONG_MAX) {
296 if (i >= pagevec_count(pvec))
297 break;
298
299
300 if (xa_is_value(page))
301 continue;
302
303
304
305
306
307
308
309 if (page != pvec->pages[i]) {
310 VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
311 page);
312 continue;
313 }
314
315 WARN_ON_ONCE(!PageLocked(page));
316
317 if (page->index == xas.xa_index)
318 page->mapping = NULL;
319
320
321
322
323
324
325
326 if (page->index + compound_nr(page) - 1 == xas.xa_index)
327 i++;
328 xas_store(&xas, NULL);
329 total_pages++;
330 }
331 mapping->nrpages -= total_pages;
332}
333
334void delete_from_page_cache_batch(struct address_space *mapping,
335 struct pagevec *pvec)
336{
337 int i;
338 unsigned long flags;
339
340 if (!pagevec_count(pvec))
341 return;
342
343 xa_lock_irqsave(&mapping->i_pages, flags);
344 for (i = 0; i < pagevec_count(pvec); i++) {
345 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
346
347 unaccount_page_cache_page(mapping, pvec->pages[i]);
348 }
349 page_cache_delete_batch(mapping, pvec);
350 xa_unlock_irqrestore(&mapping->i_pages, flags);
351
352 for (i = 0; i < pagevec_count(pvec); i++)
353 page_cache_free_page(mapping, pvec->pages[i]);
354}
355
356int filemap_check_errors(struct address_space *mapping)
357{
358 int ret = 0;
359
360 if (test_bit(AS_ENOSPC, &mapping->flags) &&
361 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
362 ret = -ENOSPC;
363 if (test_bit(AS_EIO, &mapping->flags) &&
364 test_and_clear_bit(AS_EIO, &mapping->flags))
365 ret = -EIO;
366 return ret;
367}
368EXPORT_SYMBOL(filemap_check_errors);
369
370static int filemap_check_and_keep_errors(struct address_space *mapping)
371{
372
373 if (test_bit(AS_EIO, &mapping->flags))
374 return -EIO;
375 if (test_bit(AS_ENOSPC, &mapping->flags))
376 return -ENOSPC;
377 return 0;
378}
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
398 loff_t end, int sync_mode)
399{
400 int ret;
401 struct writeback_control wbc = {
402 .sync_mode = sync_mode,
403 .nr_to_write = LONG_MAX,
404 .range_start = start,
405 .range_end = end,
406 };
407
408 if (!mapping_can_writeback(mapping) ||
409 !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
410 return 0;
411
412 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
413 ret = do_writepages(mapping, &wbc);
414 wbc_detach_inode(&wbc);
415 return ret;
416}
417
418static inline int __filemap_fdatawrite(struct address_space *mapping,
419 int sync_mode)
420{
421 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
422}
423
424int filemap_fdatawrite(struct address_space *mapping)
425{
426 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
427}
428EXPORT_SYMBOL(filemap_fdatawrite);
429
430int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
431 loff_t end)
432{
433 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
434}
435EXPORT_SYMBOL(filemap_fdatawrite_range);
436
437
438
439
440
441
442
443
444
445
446int filemap_flush(struct address_space *mapping)
447{
448 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
449}
450EXPORT_SYMBOL(filemap_flush);
451
452
453
454
455
456
457
458
459
460
461
462
463
464bool filemap_range_has_page(struct address_space *mapping,
465 loff_t start_byte, loff_t end_byte)
466{
467 struct page *page;
468 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
469 pgoff_t max = end_byte >> PAGE_SHIFT;
470
471 if (end_byte < start_byte)
472 return false;
473
474 rcu_read_lock();
475 for (;;) {
476 page = xas_find(&xas, max);
477 if (xas_retry(&xas, page))
478 continue;
479
480 if (xa_is_value(page))
481 continue;
482
483
484
485
486
487 break;
488 }
489 rcu_read_unlock();
490
491 return page != NULL;
492}
493EXPORT_SYMBOL(filemap_range_has_page);
494
495static void __filemap_fdatawait_range(struct address_space *mapping,
496 loff_t start_byte, loff_t end_byte)
497{
498 pgoff_t index = start_byte >> PAGE_SHIFT;
499 pgoff_t end = end_byte >> PAGE_SHIFT;
500 struct pagevec pvec;
501 int nr_pages;
502
503 if (end_byte < start_byte)
504 return;
505
506 pagevec_init(&pvec);
507 while (index <= end) {
508 unsigned i;
509
510 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
511 end, PAGECACHE_TAG_WRITEBACK);
512 if (!nr_pages)
513 break;
514
515 for (i = 0; i < nr_pages; i++) {
516 struct page *page = pvec.pages[i];
517
518 wait_on_page_writeback(page);
519 ClearPageError(page);
520 }
521 pagevec_release(&pvec);
522 cond_resched();
523 }
524}
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
543 loff_t end_byte)
544{
545 __filemap_fdatawait_range(mapping, start_byte, end_byte);
546 return filemap_check_errors(mapping);
547}
548EXPORT_SYMBOL(filemap_fdatawait_range);
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
565 loff_t start_byte, loff_t end_byte)
566{
567 __filemap_fdatawait_range(mapping, start_byte, end_byte);
568 return filemap_check_and_keep_errors(mapping);
569}
570EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
589{
590 struct address_space *mapping = file->f_mapping;
591
592 __filemap_fdatawait_range(mapping, start_byte, end_byte);
593 return file_check_and_advance_wb_err(file);
594}
595EXPORT_SYMBOL(file_fdatawait_range);
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611int filemap_fdatawait_keep_errors(struct address_space *mapping)
612{
613 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
614 return filemap_check_and_keep_errors(mapping);
615}
616EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
617
618
619static bool mapping_needs_writeback(struct address_space *mapping)
620{
621 return mapping->nrpages;
622}
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638bool filemap_range_needs_writeback(struct address_space *mapping,
639 loff_t start_byte, loff_t end_byte)
640{
641 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
642 pgoff_t max = end_byte >> PAGE_SHIFT;
643 struct page *page;
644
645 if (!mapping_needs_writeback(mapping))
646 return false;
647 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
648 !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
649 return false;
650 if (end_byte < start_byte)
651 return false;
652
653 rcu_read_lock();
654 xas_for_each(&xas, page, max) {
655 if (xas_retry(&xas, page))
656 continue;
657 if (xa_is_value(page))
658 continue;
659 if (PageDirty(page) || PageLocked(page) || PageWriteback(page))
660 break;
661 }
662 rcu_read_unlock();
663 return page != NULL;
664}
665EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680int filemap_write_and_wait_range(struct address_space *mapping,
681 loff_t lstart, loff_t lend)
682{
683 int err = 0;
684
685 if (mapping_needs_writeback(mapping)) {
686 err = __filemap_fdatawrite_range(mapping, lstart, lend,
687 WB_SYNC_ALL);
688
689
690
691
692
693
694 if (err != -EIO) {
695 int err2 = filemap_fdatawait_range(mapping,
696 lstart, lend);
697 if (!err)
698 err = err2;
699 } else {
700
701 filemap_check_errors(mapping);
702 }
703 } else {
704 err = filemap_check_errors(mapping);
705 }
706 return err;
707}
708EXPORT_SYMBOL(filemap_write_and_wait_range);
709
710void __filemap_set_wb_err(struct address_space *mapping, int err)
711{
712 errseq_t eseq = errseq_set(&mapping->wb_err, err);
713
714 trace_filemap_set_wb_err(mapping, eseq);
715}
716EXPORT_SYMBOL(__filemap_set_wb_err);
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742int file_check_and_advance_wb_err(struct file *file)
743{
744 int err = 0;
745 errseq_t old = READ_ONCE(file->f_wb_err);
746 struct address_space *mapping = file->f_mapping;
747
748
749 if (errseq_check(&mapping->wb_err, old)) {
750
751 spin_lock(&file->f_lock);
752 old = file->f_wb_err;
753 err = errseq_check_and_advance(&mapping->wb_err,
754 &file->f_wb_err);
755 trace_file_check_and_advance_wb_err(file, old);
756 spin_unlock(&file->f_lock);
757 }
758
759
760
761
762
763
764 clear_bit(AS_EIO, &mapping->flags);
765 clear_bit(AS_ENOSPC, &mapping->flags);
766 return err;
767}
768EXPORT_SYMBOL(file_check_and_advance_wb_err);
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
787{
788 int err = 0, err2;
789 struct address_space *mapping = file->f_mapping;
790
791 if (mapping_needs_writeback(mapping)) {
792 err = __filemap_fdatawrite_range(mapping, lstart, lend,
793 WB_SYNC_ALL);
794
795 if (err != -EIO)
796 __filemap_fdatawait_range(mapping, lstart, lend);
797 }
798 err2 = file_check_and_advance_wb_err(file);
799 if (!err)
800 err = err2;
801 return err;
802}
803EXPORT_SYMBOL(file_write_and_wait_range);
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818void replace_page_cache_page(struct page *old, struct page *new)
819{
820 struct address_space *mapping = old->mapping;
821 void (*freepage)(struct page *) = mapping->a_ops->freepage;
822 pgoff_t offset = old->index;
823 XA_STATE(xas, &mapping->i_pages, offset);
824 unsigned long flags;
825
826 VM_BUG_ON_PAGE(!PageLocked(old), old);
827 VM_BUG_ON_PAGE(!PageLocked(new), new);
828 VM_BUG_ON_PAGE(new->mapping, new);
829
830 get_page(new);
831 new->mapping = mapping;
832 new->index = offset;
833
834 mem_cgroup_migrate(old, new);
835
836 xas_lock_irqsave(&xas, flags);
837 xas_store(&xas, new);
838
839 old->mapping = NULL;
840
841 if (!PageHuge(old))
842 __dec_lruvec_page_state(old, NR_FILE_PAGES);
843 if (!PageHuge(new))
844 __inc_lruvec_page_state(new, NR_FILE_PAGES);
845 if (PageSwapBacked(old))
846 __dec_lruvec_page_state(old, NR_SHMEM);
847 if (PageSwapBacked(new))
848 __inc_lruvec_page_state(new, NR_SHMEM);
849 xas_unlock_irqrestore(&xas, flags);
850 if (freepage)
851 freepage(old);
852 put_page(old);
853}
854EXPORT_SYMBOL_GPL(replace_page_cache_page);
855
856noinline int __add_to_page_cache_locked(struct page *page,
857 struct address_space *mapping,
858 pgoff_t offset, gfp_t gfp,
859 void **shadowp)
860{
861 XA_STATE(xas, &mapping->i_pages, offset);
862 int huge = PageHuge(page);
863 int error;
864 bool charged = false;
865
866 VM_BUG_ON_PAGE(!PageLocked(page), page);
867 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
868 mapping_set_update(&xas, mapping);
869
870 get_page(page);
871 page->mapping = mapping;
872 page->index = offset;
873
874 if (!huge) {
875 error = mem_cgroup_charge(page, NULL, gfp);
876 if (error)
877 goto error;
878 charged = true;
879 }
880
881 gfp &= GFP_RECLAIM_MASK;
882
883 do {
884 unsigned int order = xa_get_order(xas.xa, xas.xa_index);
885 void *entry, *old = NULL;
886
887 if (order > thp_order(page))
888 xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
889 order, gfp);
890 xas_lock_irq(&xas);
891 xas_for_each_conflict(&xas, entry) {
892 old = entry;
893 if (!xa_is_value(entry)) {
894 xas_set_err(&xas, -EEXIST);
895 goto unlock;
896 }
897 }
898
899 if (old) {
900 if (shadowp)
901 *shadowp = old;
902
903 order = xa_get_order(xas.xa, xas.xa_index);
904 if (order > thp_order(page)) {
905 xas_split(&xas, old, order);
906 xas_reset(&xas);
907 }
908 }
909
910 xas_store(&xas, page);
911 if (xas_error(&xas))
912 goto unlock;
913
914 mapping->nrpages++;
915
916
917 if (!huge)
918 __inc_lruvec_page_state(page, NR_FILE_PAGES);
919unlock:
920 xas_unlock_irq(&xas);
921 } while (xas_nomem(&xas, gfp));
922
923 if (xas_error(&xas)) {
924 error = xas_error(&xas);
925 if (charged)
926 mem_cgroup_uncharge(page);
927 goto error;
928 }
929
930 trace_mm_filemap_add_to_page_cache(page);
931 return 0;
932error:
933 page->mapping = NULL;
934
935 put_page(page);
936 return error;
937}
938ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
939
940
941
942
943
944
945
946
947
948
949
950
951
952int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
953 pgoff_t offset, gfp_t gfp_mask)
954{
955 return __add_to_page_cache_locked(page, mapping, offset,
956 gfp_mask, NULL);
957}
958EXPORT_SYMBOL(add_to_page_cache_locked);
959
960int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
961 pgoff_t offset, gfp_t gfp_mask)
962{
963 void *shadow = NULL;
964 int ret;
965
966 __SetPageLocked(page);
967 ret = __add_to_page_cache_locked(page, mapping, offset,
968 gfp_mask, &shadow);
969 if (unlikely(ret))
970 __ClearPageLocked(page);
971 else {
972
973
974
975
976
977
978
979
980 WARN_ON_ONCE(PageActive(page));
981 if (!(gfp_mask & __GFP_WRITE) && shadow)
982 workingset_refault(page, shadow);
983 lru_cache_add(page);
984 }
985 return ret;
986}
987EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
988
989#ifdef CONFIG_NUMA
990struct page *__page_cache_alloc(gfp_t gfp)
991{
992 int n;
993 struct page *page;
994
995 if (cpuset_do_page_mem_spread()) {
996 unsigned int cpuset_mems_cookie;
997 do {
998 cpuset_mems_cookie = read_mems_allowed_begin();
999 n = cpuset_mem_spread_node();
1000 page = __alloc_pages_node(n, gfp, 0);
1001 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
1002
1003 return page;
1004 }
1005 return alloc_pages(gfp, 0);
1006}
1007EXPORT_SYMBOL(__page_cache_alloc);
1008#endif
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020#define PAGE_WAIT_TABLE_BITS 8
1021#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
1022static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
1023
1024static wait_queue_head_t *page_waitqueue(struct page *page)
1025{
1026 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
1027}
1028
1029void __init pagecache_init(void)
1030{
1031 int i;
1032
1033 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
1034 init_waitqueue_head(&page_wait_table[i]);
1035
1036 page_writeback_init();
1037}
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
1074{
1075 unsigned int flags;
1076 struct wait_page_key *key = arg;
1077 struct wait_page_queue *wait_page
1078 = container_of(wait, struct wait_page_queue, wait);
1079
1080 if (!wake_page_match(wait_page, key))
1081 return 0;
1082
1083
1084
1085
1086
1087 flags = wait->flags;
1088 if (flags & WQ_FLAG_EXCLUSIVE) {
1089 if (test_bit(key->bit_nr, &key->page->flags))
1090 return -1;
1091 if (flags & WQ_FLAG_CUSTOM) {
1092 if (test_and_set_bit(key->bit_nr, &key->page->flags))
1093 return -1;
1094 flags |= WQ_FLAG_DONE;
1095 }
1096 }
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107 smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
1108 wake_up_state(wait->private, mode);
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120 list_del_init_careful(&wait->entry);
1121 return (flags & WQ_FLAG_EXCLUSIVE) != 0;
1122}
1123
1124static void wake_up_page_bit(struct page *page, int bit_nr)
1125{
1126 wait_queue_head_t *q = page_waitqueue(page);
1127 struct wait_page_key key;
1128 unsigned long flags;
1129 wait_queue_entry_t bookmark;
1130
1131 key.page = page;
1132 key.bit_nr = bit_nr;
1133 key.page_match = 0;
1134
1135 bookmark.flags = 0;
1136 bookmark.private = NULL;
1137 bookmark.func = NULL;
1138 INIT_LIST_HEAD(&bookmark.entry);
1139
1140 spin_lock_irqsave(&q->lock, flags);
1141 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1142
1143 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
1144
1145
1146
1147
1148
1149
1150 spin_unlock_irqrestore(&q->lock, flags);
1151 cpu_relax();
1152 spin_lock_irqsave(&q->lock, flags);
1153 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1154 }
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165 if (!waitqueue_active(q) || !key.page_match) {
1166 ClearPageWaiters(page);
1167
1168
1169
1170
1171
1172
1173
1174 }
1175 spin_unlock_irqrestore(&q->lock, flags);
1176}
1177
1178static void wake_up_page(struct page *page, int bit)
1179{
1180 if (!PageWaiters(page))
1181 return;
1182 wake_up_page_bit(page, bit);
1183}
1184
1185
1186
1187
1188enum behavior {
1189 EXCLUSIVE,
1190
1191
1192 SHARED,
1193
1194
1195 DROP,
1196
1197
1198};
1199
1200
1201
1202
1203
1204static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
1205 struct wait_queue_entry *wait)
1206{
1207 if (wait->flags & WQ_FLAG_EXCLUSIVE) {
1208 if (test_and_set_bit(bit_nr, &page->flags))
1209 return false;
1210 } else if (test_bit(bit_nr, &page->flags))
1211 return false;
1212
1213 wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
1214 return true;
1215}
1216
1217
1218int sysctl_page_lock_unfairness = 5;
1219
1220static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1221 struct page *page, int bit_nr, int state, enum behavior behavior)
1222{
1223 int unfairness = sysctl_page_lock_unfairness;
1224 struct wait_page_queue wait_page;
1225 wait_queue_entry_t *wait = &wait_page.wait;
1226 bool thrashing = false;
1227 bool delayacct = false;
1228 unsigned long pflags;
1229
1230 if (bit_nr == PG_locked &&
1231 !PageUptodate(page) && PageWorkingset(page)) {
1232 if (!PageSwapBacked(page)) {
1233 delayacct_thrashing_start();
1234 delayacct = true;
1235 }
1236 psi_memstall_enter(&pflags);
1237 thrashing = true;
1238 }
1239
1240 init_wait(wait);
1241 wait->func = wake_page_function;
1242 wait_page.page = page;
1243 wait_page.bit_nr = bit_nr;
1244
1245repeat:
1246 wait->flags = 0;
1247 if (behavior == EXCLUSIVE) {
1248 wait->flags = WQ_FLAG_EXCLUSIVE;
1249 if (--unfairness < 0)
1250 wait->flags |= WQ_FLAG_CUSTOM;
1251 }
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267 spin_lock_irq(&q->lock);
1268 SetPageWaiters(page);
1269 if (!trylock_page_bit_common(page, bit_nr, wait))
1270 __add_wait_queue_entry_tail(q, wait);
1271 spin_unlock_irq(&q->lock);
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281 if (behavior == DROP)
1282 put_page(page);
1283
1284
1285
1286
1287
1288
1289
1290 for (;;) {
1291 unsigned int flags;
1292
1293 set_current_state(state);
1294
1295
1296 flags = smp_load_acquire(&wait->flags);
1297 if (!(flags & WQ_FLAG_WOKEN)) {
1298 if (signal_pending_state(state, current))
1299 break;
1300
1301 io_schedule();
1302 continue;
1303 }
1304
1305
1306 if (behavior != EXCLUSIVE)
1307 break;
1308
1309
1310 if (flags & WQ_FLAG_DONE)
1311 break;
1312
1313
1314
1315
1316
1317
1318
1319 if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
1320 goto repeat;
1321
1322 wait->flags |= WQ_FLAG_DONE;
1323 break;
1324 }
1325
1326
1327
1328
1329
1330
1331
1332 finish_wait(q, wait);
1333
1334 if (thrashing) {
1335 if (delayacct)
1336 delayacct_thrashing_end();
1337 psi_memstall_leave(&pflags);
1338 }
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353 if (behavior == EXCLUSIVE)
1354 return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;
1355
1356 return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
1357}
1358
1359void wait_on_page_bit(struct page *page, int bit_nr)
1360{
1361 wait_queue_head_t *q = page_waitqueue(page);
1362 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
1363}
1364EXPORT_SYMBOL(wait_on_page_bit);
1365
1366int wait_on_page_bit_killable(struct page *page, int bit_nr)
1367{
1368 wait_queue_head_t *q = page_waitqueue(page);
1369 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
1370}
1371EXPORT_SYMBOL(wait_on_page_bit_killable);
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386int put_and_wait_on_page_locked(struct page *page, int state)
1387{
1388 wait_queue_head_t *q;
1389
1390 page = compound_head(page);
1391 q = page_waitqueue(page);
1392 return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
1393}
1394
1395
1396
1397
1398
1399
1400
1401
1402void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
1403{
1404 wait_queue_head_t *q = page_waitqueue(page);
1405 unsigned long flags;
1406
1407 spin_lock_irqsave(&q->lock, flags);
1408 __add_wait_queue_entry_tail(q, waiter);
1409 SetPageWaiters(page);
1410 spin_unlock_irqrestore(&q->lock, flags);
1411}
1412EXPORT_SYMBOL_GPL(add_page_wait_queue);
1413
1414#ifndef clear_bit_unlock_is_negative_byte
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
1429{
1430 clear_bit_unlock(nr, mem);
1431
1432 return test_bit(PG_waiters, mem);
1433}
1434
1435#endif
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452void unlock_page(struct page *page)
1453{
1454 BUILD_BUG_ON(PG_waiters != 7);
1455 page = compound_head(page);
1456 VM_BUG_ON_PAGE(!PageLocked(page), page);
1457 if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
1458 wake_up_page_bit(page, PG_locked);
1459}
1460EXPORT_SYMBOL(unlock_page);
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473void end_page_private_2(struct page *page)
1474{
1475 page = compound_head(page);
1476 VM_BUG_ON_PAGE(!PagePrivate2(page), page);
1477 clear_bit_unlock(PG_private_2, &page->flags);
1478 wake_up_page_bit(page, PG_private_2);
1479 put_page(page);
1480}
1481EXPORT_SYMBOL(end_page_private_2);
1482
1483
1484
1485
1486
1487
1488
1489void wait_on_page_private_2(struct page *page)
1490{
1491 page = compound_head(page);
1492 while (PagePrivate2(page))
1493 wait_on_page_bit(page, PG_private_2);
1494}
1495EXPORT_SYMBOL(wait_on_page_private_2);
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508int wait_on_page_private_2_killable(struct page *page)
1509{
1510 int ret = 0;
1511
1512 page = compound_head(page);
1513 while (PagePrivate2(page)) {
1514 ret = wait_on_page_bit_killable(page, PG_private_2);
1515 if (ret < 0)
1516 break;
1517 }
1518
1519 return ret;
1520}
1521EXPORT_SYMBOL(wait_on_page_private_2_killable);
1522
1523
1524
1525
1526
1527void end_page_writeback(struct page *page)
1528{
1529
1530
1531
1532
1533
1534
1535
1536 if (PageReclaim(page)) {
1537 ClearPageReclaim(page);
1538 rotate_reclaimable_page(page);
1539 }
1540
1541
1542
1543
1544
1545
1546
1547 get_page(page);
1548 if (!test_clear_page_writeback(page))
1549 BUG();
1550
1551 smp_mb__after_atomic();
1552 wake_up_page(page, PG_writeback);
1553 put_page(page);
1554}
1555EXPORT_SYMBOL(end_page_writeback);
1556
1557
1558
1559
1560
1561void page_endio(struct page *page, bool is_write, int err)
1562{
1563 if (!is_write) {
1564 if (!err) {
1565 SetPageUptodate(page);
1566 } else {
1567 ClearPageUptodate(page);
1568 SetPageError(page);
1569 }
1570 unlock_page(page);
1571 } else {
1572 if (err) {
1573 struct address_space *mapping;
1574
1575 SetPageError(page);
1576 mapping = page_mapping(page);
1577 if (mapping)
1578 mapping_set_error(mapping, err);
1579 }
1580 end_page_writeback(page);
1581 }
1582}
1583EXPORT_SYMBOL_GPL(page_endio);
1584
1585
1586
1587
1588
1589void __lock_page(struct page *__page)
1590{
1591 struct page *page = compound_head(__page);
1592 wait_queue_head_t *q = page_waitqueue(page);
1593 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
1594 EXCLUSIVE);
1595}
1596EXPORT_SYMBOL(__lock_page);
1597
1598int __lock_page_killable(struct page *__page)
1599{
1600 struct page *page = compound_head(__page);
1601 wait_queue_head_t *q = page_waitqueue(page);
1602 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
1603 EXCLUSIVE);
1604}
1605EXPORT_SYMBOL_GPL(__lock_page_killable);
1606
1607int __lock_page_async(struct page *page, struct wait_page_queue *wait)
1608{
1609 struct wait_queue_head *q = page_waitqueue(page);
1610 int ret = 0;
1611
1612 wait->page = page;
1613 wait->bit_nr = PG_locked;
1614
1615 spin_lock_irq(&q->lock);
1616 __add_wait_queue_entry_tail(q, &wait->wait);
1617 SetPageWaiters(page);
1618 ret = !trylock_page(page);
1619
1620
1621
1622
1623
1624
1625 if (!ret)
1626 __remove_wait_queue(q, &wait->wait);
1627 else
1628 ret = -EIOCBQUEUED;
1629 spin_unlock_irq(&q->lock);
1630 return ret;
1631}
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
1645 unsigned int flags)
1646{
1647 if (fault_flag_allow_retry_first(flags)) {
1648
1649
1650
1651
1652 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1653 return 0;
1654
1655 mmap_read_unlock(mm);
1656 if (flags & FAULT_FLAG_KILLABLE)
1657 wait_on_page_locked_killable(page);
1658 else
1659 wait_on_page_locked(page);
1660 return 0;
1661 }
1662 if (flags & FAULT_FLAG_KILLABLE) {
1663 int ret;
1664
1665 ret = __lock_page_killable(page);
1666 if (ret) {
1667 mmap_read_unlock(mm);
1668 return 0;
1669 }
1670 } else {
1671 __lock_page(page);
1672 }
1673 return 1;
1674
1675}
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696pgoff_t page_cache_next_miss(struct address_space *mapping,
1697 pgoff_t index, unsigned long max_scan)
1698{
1699 XA_STATE(xas, &mapping->i_pages, index);
1700
1701 while (max_scan--) {
1702 void *entry = xas_next(&xas);
1703 if (!entry || xa_is_value(entry))
1704 break;
1705 if (xas.xa_index == 0)
1706 break;
1707 }
1708
1709 return xas.xa_index;
1710}
1711EXPORT_SYMBOL(page_cache_next_miss);
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732pgoff_t page_cache_prev_miss(struct address_space *mapping,
1733 pgoff_t index, unsigned long max_scan)
1734{
1735 XA_STATE(xas, &mapping->i_pages, index);
1736
1737 while (max_scan--) {
1738 void *entry = xas_prev(&xas);
1739 if (!entry || xa_is_value(entry))
1740 break;
1741 if (xas.xa_index == ULONG_MAX)
1742 break;
1743 }
1744
1745 return xas.xa_index;
1746}
1747EXPORT_SYMBOL(page_cache_prev_miss);
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762static struct page *mapping_get_entry(struct address_space *mapping,
1763 pgoff_t index)
1764{
1765 XA_STATE(xas, &mapping->i_pages, index);
1766 struct page *page;
1767
1768 rcu_read_lock();
1769repeat:
1770 xas_reset(&xas);
1771 page = xas_load(&xas);
1772 if (xas_retry(&xas, page))
1773 goto repeat;
1774
1775
1776
1777
1778 if (!page || xa_is_value(page))
1779 goto out;
1780
1781 if (!page_cache_get_speculative(page))
1782 goto repeat;
1783
1784
1785
1786
1787
1788
1789 if (unlikely(page != xas_reload(&xas))) {
1790 put_page(page);
1791 goto repeat;
1792 }
1793out:
1794 rcu_read_unlock();
1795
1796 return page;
1797}
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
1834 int fgp_flags, gfp_t gfp_mask)
1835{
1836 struct page *page;
1837
1838repeat:
1839 page = mapping_get_entry(mapping, index);
1840 if (xa_is_value(page)) {
1841 if (fgp_flags & FGP_ENTRY)
1842 return page;
1843 page = NULL;
1844 }
1845 if (!page)
1846 goto no_page;
1847
1848 if (fgp_flags & FGP_LOCK) {
1849 if (fgp_flags & FGP_NOWAIT) {
1850 if (!trylock_page(page)) {
1851 put_page(page);
1852 return NULL;
1853 }
1854 } else {
1855 lock_page(page);
1856 }
1857
1858
1859 if (unlikely(page->mapping != mapping)) {
1860 unlock_page(page);
1861 put_page(page);
1862 goto repeat;
1863 }
1864 VM_BUG_ON_PAGE(!thp_contains(page, index), page);
1865 }
1866
1867 if (fgp_flags & FGP_ACCESSED)
1868 mark_page_accessed(page);
1869 else if (fgp_flags & FGP_WRITE) {
1870
1871 if (page_is_idle(page))
1872 clear_page_idle(page);
1873 }
1874 if (!(fgp_flags & FGP_HEAD))
1875 page = find_subpage(page, index);
1876
1877no_page:
1878 if (!page && (fgp_flags & FGP_CREAT)) {
1879 int err;
1880 if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
1881 gfp_mask |= __GFP_WRITE;
1882 if (fgp_flags & FGP_NOFS)
1883 gfp_mask &= ~__GFP_FS;
1884
1885 page = __page_cache_alloc(gfp_mask);
1886 if (!page)
1887 return NULL;
1888
1889 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
1890 fgp_flags |= FGP_LOCK;
1891
1892
1893 if (fgp_flags & FGP_ACCESSED)
1894 __SetPageReferenced(page);
1895
1896 err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
1897 if (unlikely(err)) {
1898 put_page(page);
1899 page = NULL;
1900 if (err == -EEXIST)
1901 goto repeat;
1902 }
1903
1904
1905
1906
1907
1908 if (page && (fgp_flags & FGP_FOR_MMAP))
1909 unlock_page(page);
1910 }
1911
1912 return page;
1913}
1914EXPORT_SYMBOL(pagecache_get_page);
1915
1916static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
1917 xa_mark_t mark)
1918{
1919 struct page *page;
1920
1921retry:
1922 if (mark == XA_PRESENT)
1923 page = xas_find(xas, max);
1924 else
1925 page = xas_find_marked(xas, max, mark);
1926
1927 if (xas_retry(xas, page))
1928 goto retry;
1929
1930
1931
1932
1933
1934 if (!page || xa_is_value(page))
1935 return page;
1936
1937 if (!page_cache_get_speculative(page))
1938 goto reset;
1939
1940
1941 if (unlikely(page != xas_reload(xas))) {
1942 put_page(page);
1943 goto reset;
1944 }
1945
1946 return page;
1947reset:
1948 xas_reset(xas);
1949 goto retry;
1950}
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
1979 pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
1980{
1981 XA_STATE(xas, &mapping->i_pages, start);
1982 struct page *page;
1983 unsigned int ret = 0;
1984 unsigned nr_entries = PAGEVEC_SIZE;
1985
1986 rcu_read_lock();
1987 while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
1988
1989
1990
1991
1992 if (!xa_is_value(page) && PageTransHuge(page) &&
1993 !PageHuge(page)) {
1994 page = find_subpage(page, xas.xa_index);
1995 nr_entries = ret + 1;
1996 }
1997
1998 indices[ret] = xas.xa_index;
1999 pvec->pages[ret] = page;
2000 if (++ret == nr_entries)
2001 break;
2002 }
2003 rcu_read_unlock();
2004
2005 pvec->nr = ret;
2006 return ret;
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
2031 pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
2032{
2033 XA_STATE(xas, &mapping->i_pages, start);
2034 struct page *page;
2035
2036 rcu_read_lock();
2037 while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
2038 if (!xa_is_value(page)) {
2039 if (page->index < start)
2040 goto put;
2041 VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
2042 if (page->index + thp_nr_pages(page) - 1 > end)
2043 goto put;
2044 if (!trylock_page(page))
2045 goto put;
2046 if (page->mapping != mapping || PageWriteback(page))
2047 goto unlock;
2048 VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index),
2049 page);
2050 }
2051 indices[pvec->nr] = xas.xa_index;
2052 if (!pagevec_add(pvec, page))
2053 break;
2054 goto next;
2055unlock:
2056 unlock_page(page);
2057put:
2058 put_page(page);
2059next:
2060 if (!xa_is_value(page) && PageTransHuge(page)) {
2061 unsigned int nr_pages = thp_nr_pages(page);
2062
2063
2064 xas_set(&xas, page->index + nr_pages);
2065 if (xas.xa_index < nr_pages)
2066 break;
2067 }
2068 }
2069 rcu_read_unlock();
2070
2071 return pagevec_count(pvec);
2072}
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
2096 pgoff_t end, unsigned int nr_pages,
2097 struct page **pages)
2098{
2099 XA_STATE(xas, &mapping->i_pages, *start);
2100 struct page *page;
2101 unsigned ret = 0;
2102
2103 if (unlikely(!nr_pages))
2104 return 0;
2105
2106 rcu_read_lock();
2107 while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
2108
2109 if (xa_is_value(page))
2110 continue;
2111
2112 pages[ret] = find_subpage(page, xas.xa_index);
2113 if (++ret == nr_pages) {
2114 *start = xas.xa_index + 1;
2115 goto out;
2116 }
2117 }
2118
2119
2120
2121
2122
2123
2124
2125 if (end == (pgoff_t)-1)
2126 *start = (pgoff_t)-1;
2127 else
2128 *start = end + 1;
2129out:
2130 rcu_read_unlock();
2131
2132 return ret;
2133}
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
2148 unsigned int nr_pages, struct page **pages)
2149{
2150 XA_STATE(xas, &mapping->i_pages, index);
2151 struct page *page;
2152 unsigned int ret = 0;
2153
2154 if (unlikely(!nr_pages))
2155 return 0;
2156
2157 rcu_read_lock();
2158 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
2159 if (xas_retry(&xas, page))
2160 continue;
2161
2162
2163
2164
2165 if (xa_is_value(page))
2166 break;
2167
2168 if (!page_cache_get_speculative(page))
2169 goto retry;
2170
2171
2172 if (unlikely(page != xas_reload(&xas)))
2173 goto put_page;
2174
2175 pages[ret] = find_subpage(page, xas.xa_index);
2176 if (++ret == nr_pages)
2177 break;
2178 continue;
2179put_page:
2180 put_page(page);
2181retry:
2182 xas_reset(&xas);
2183 }
2184 rcu_read_unlock();
2185 return ret;
2186}
2187EXPORT_SYMBOL(find_get_pages_contig);
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
2205 pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
2206 struct page **pages)
2207{
2208 XA_STATE(xas, &mapping->i_pages, *index);
2209 struct page *page;
2210 unsigned ret = 0;
2211
2212 if (unlikely(!nr_pages))
2213 return 0;
2214
2215 rcu_read_lock();
2216 while ((page = find_get_entry(&xas, end, tag))) {
2217
2218
2219
2220
2221
2222 if (xa_is_value(page))
2223 continue;
2224
2225 pages[ret] = page;
2226 if (++ret == nr_pages) {
2227 *index = page->index + thp_nr_pages(page);
2228 goto out;
2229 }
2230 }
2231
2232
2233
2234
2235
2236
2237
2238 if (end == (pgoff_t)-1)
2239 *index = (pgoff_t)-1;
2240 else
2241 *index = end + 1;
2242out:
2243 rcu_read_unlock();
2244
2245 return ret;
2246}
2247EXPORT_SYMBOL(find_get_pages_range_tag);
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264static void shrink_readahead_size_eio(struct file_ra_state *ra)
2265{
2266 ra->ra_pages /= 4;
2267}
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278static void filemap_get_read_batch(struct address_space *mapping,
2279 pgoff_t index, pgoff_t max, struct pagevec *pvec)
2280{
2281 XA_STATE(xas, &mapping->i_pages, index);
2282 struct page *head;
2283
2284 rcu_read_lock();
2285 for (head = xas_load(&xas); head; head = xas_next(&xas)) {
2286 if (xas_retry(&xas, head))
2287 continue;
2288 if (xas.xa_index > max || xa_is_value(head))
2289 break;
2290 if (!page_cache_get_speculative(head))
2291 goto retry;
2292
2293
2294 if (unlikely(head != xas_reload(&xas)))
2295 goto put_page;
2296
2297 if (!pagevec_add(pvec, head))
2298 break;
2299 if (!PageUptodate(head))
2300 break;
2301 if (PageReadahead(head))
2302 break;
2303 xas.xa_index = head->index + thp_nr_pages(head) - 1;
2304 xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
2305 continue;
2306put_page:
2307 put_page(head);
2308retry:
2309 xas_reset(&xas);
2310 }
2311 rcu_read_unlock();
2312}
2313
2314static int filemap_read_page(struct file *file, struct address_space *mapping,
2315 struct page *page)
2316{
2317 int error;
2318
2319
2320
2321
2322
2323
2324 ClearPageError(page);
2325
2326 error = mapping->a_ops->readpage(file, page);
2327 if (error)
2328 return error;
2329
2330 error = wait_on_page_locked_killable(page);
2331 if (error)
2332 return error;
2333 if (PageUptodate(page))
2334 return 0;
2335 shrink_readahead_size_eio(&file->f_ra);
2336 return -EIO;
2337}
2338
2339static bool filemap_range_uptodate(struct address_space *mapping,
2340 loff_t pos, struct iov_iter *iter, struct page *page)
2341{
2342 int count;
2343
2344 if (PageUptodate(page))
2345 return true;
2346
2347 if (iov_iter_is_pipe(iter))
2348 return false;
2349 if (!mapping->a_ops->is_partially_uptodate)
2350 return false;
2351 if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page)))
2352 return false;
2353
2354 count = iter->count;
2355 if (page_offset(page) > pos) {
2356 count -= page_offset(page) - pos;
2357 pos = 0;
2358 } else {
2359 pos -= page_offset(page);
2360 }
2361
2362 return mapping->a_ops->is_partially_uptodate(page, pos, count);
2363}
2364
2365static int filemap_update_page(struct kiocb *iocb,
2366 struct address_space *mapping, struct iov_iter *iter,
2367 struct page *page)
2368{
2369 int error;
2370
2371 if (!trylock_page(page)) {
2372 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
2373 return -EAGAIN;
2374 if (!(iocb->ki_flags & IOCB_WAITQ)) {
2375 put_and_wait_on_page_locked(page, TASK_KILLABLE);
2376 return AOP_TRUNCATED_PAGE;
2377 }
2378 error = __lock_page_async(page, iocb->ki_waitq);
2379 if (error)
2380 return error;
2381 }
2382
2383 if (!page->mapping)
2384 goto truncated;
2385
2386 error = 0;
2387 if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
2388 goto unlock;
2389
2390 error = -EAGAIN;
2391 if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
2392 goto unlock;
2393
2394 error = filemap_read_page(iocb->ki_filp, mapping, page);
2395 if (error == AOP_TRUNCATED_PAGE)
2396 put_page(page);
2397 return error;
2398truncated:
2399 unlock_page(page);
2400 put_page(page);
2401 return AOP_TRUNCATED_PAGE;
2402unlock:
2403 unlock_page(page);
2404 return error;
2405}
2406
2407static int filemap_create_page(struct file *file,
2408 struct address_space *mapping, pgoff_t index,
2409 struct pagevec *pvec)
2410{
2411 struct page *page;
2412 int error;
2413
2414 page = page_cache_alloc(mapping);
2415 if (!page)
2416 return -ENOMEM;
2417
2418 error = add_to_page_cache_lru(page, mapping, index,
2419 mapping_gfp_constraint(mapping, GFP_KERNEL));
2420 if (error == -EEXIST)
2421 error = AOP_TRUNCATED_PAGE;
2422 if (error)
2423 goto error;
2424
2425 error = filemap_read_page(file, mapping, page);
2426 if (error)
2427 goto error;
2428
2429 pagevec_add(pvec, page);
2430 return 0;
2431error:
2432 put_page(page);
2433 return error;
2434}
2435
2436static int filemap_readahead(struct kiocb *iocb, struct file *file,
2437 struct address_space *mapping, struct page *page,
2438 pgoff_t last_index)
2439{
2440 if (iocb->ki_flags & IOCB_NOIO)
2441 return -EAGAIN;
2442 page_cache_async_readahead(mapping, &file->f_ra, file, page,
2443 page->index, last_index - page->index);
2444 return 0;
2445}
2446
2447static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
2448 struct pagevec *pvec)
2449{
2450 struct file *filp = iocb->ki_filp;
2451 struct address_space *mapping = filp->f_mapping;
2452 struct file_ra_state *ra = &filp->f_ra;
2453 pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
2454 pgoff_t last_index;
2455 struct page *page;
2456 int err = 0;
2457
2458 last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
2459retry:
2460 if (fatal_signal_pending(current))
2461 return -EINTR;
2462
2463 filemap_get_read_batch(mapping, index, last_index, pvec);
2464 if (!pagevec_count(pvec)) {
2465 if (iocb->ki_flags & IOCB_NOIO)
2466 return -EAGAIN;
2467 page_cache_sync_readahead(mapping, ra, filp, index,
2468 last_index - index);
2469 filemap_get_read_batch(mapping, index, last_index, pvec);
2470 }
2471 if (!pagevec_count(pvec)) {
2472 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
2473 return -EAGAIN;
2474 err = filemap_create_page(filp, mapping,
2475 iocb->ki_pos >> PAGE_SHIFT, pvec);
2476 if (err == AOP_TRUNCATED_PAGE)
2477 goto retry;
2478 return err;
2479 }
2480
2481 page = pvec->pages[pagevec_count(pvec) - 1];
2482 if (PageReadahead(page)) {
2483 err = filemap_readahead(iocb, filp, mapping, page, last_index);
2484 if (err)
2485 goto err;
2486 }
2487 if (!PageUptodate(page)) {
2488 if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1)
2489 iocb->ki_flags |= IOCB_NOWAIT;
2490 err = filemap_update_page(iocb, mapping, iter, page);
2491 if (err)
2492 goto err;
2493 }
2494
2495 return 0;
2496err:
2497 if (err < 0)
2498 put_page(page);
2499 if (likely(--pvec->nr))
2500 return 0;
2501 if (err == AOP_TRUNCATED_PAGE)
2502 goto retry;
2503 return err;
2504}
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
2520 ssize_t already_read)
2521{
2522 struct file *filp = iocb->ki_filp;
2523 struct file_ra_state *ra = &filp->f_ra;
2524 struct address_space *mapping = filp->f_mapping;
2525 struct inode *inode = mapping->host;
2526 struct pagevec pvec;
2527 int i, error = 0;
2528 bool writably_mapped;
2529 loff_t isize, end_offset;
2530
2531 if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
2532 return 0;
2533 if (unlikely(!iov_iter_count(iter)))
2534 return 0;
2535
2536 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
2537 pagevec_init(&pvec);
2538
2539 do {
2540 cond_resched();
2541
2542
2543
2544
2545
2546
2547 if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
2548 iocb->ki_flags |= IOCB_NOWAIT;
2549
2550 error = filemap_get_pages(iocb, iter, &pvec);
2551 if (error < 0)
2552 break;
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562 isize = i_size_read(inode);
2563 if (unlikely(iocb->ki_pos >= isize))
2564 goto put_pages;
2565 end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
2566
2567
2568
2569
2570
2571 writably_mapped = mapping_writably_mapped(mapping);
2572
2573
2574
2575
2576
2577 if (iocb->ki_pos >> PAGE_SHIFT !=
2578 ra->prev_pos >> PAGE_SHIFT)
2579 mark_page_accessed(pvec.pages[0]);
2580
2581 for (i = 0; i < pagevec_count(&pvec); i++) {
2582 struct page *page = pvec.pages[i];
2583 size_t page_size = thp_size(page);
2584 size_t offset = iocb->ki_pos & (page_size - 1);
2585 size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
2586 page_size - offset);
2587 size_t copied;
2588
2589 if (end_offset < page_offset(page))
2590 break;
2591 if (i > 0)
2592 mark_page_accessed(page);
2593
2594
2595
2596
2597
2598 if (writably_mapped) {
2599 int j;
2600
2601 for (j = 0; j < thp_nr_pages(page); j++)
2602 flush_dcache_page(page + j);
2603 }
2604
2605 copied = copy_page_to_iter(page, offset, bytes, iter);
2606
2607 already_read += copied;
2608 iocb->ki_pos += copied;
2609 ra->prev_pos = iocb->ki_pos;
2610
2611 if (copied < bytes) {
2612 error = -EFAULT;
2613 break;
2614 }
2615 }
2616put_pages:
2617 for (i = 0; i < pagevec_count(&pvec); i++)
2618 put_page(pvec.pages[i]);
2619 pagevec_reinit(&pvec);
2620 } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
2621
2622 file_accessed(filp);
2623
2624 return already_read ? already_read : error;
2625}
2626EXPORT_SYMBOL_GPL(filemap_read);
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649ssize_t
2650generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2651{
2652 size_t count = iov_iter_count(iter);
2653 ssize_t retval = 0;
2654
2655 if (!count)
2656 return 0;
2657
2658 if (iocb->ki_flags & IOCB_DIRECT) {
2659 struct file *file = iocb->ki_filp;
2660 struct address_space *mapping = file->f_mapping;
2661 struct inode *inode = mapping->host;
2662 loff_t size;
2663
2664 size = i_size_read(inode);
2665 if (iocb->ki_flags & IOCB_NOWAIT) {
2666 if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
2667 iocb->ki_pos + count - 1))
2668 return -EAGAIN;
2669 } else {
2670 retval = filemap_write_and_wait_range(mapping,
2671 iocb->ki_pos,
2672 iocb->ki_pos + count - 1);
2673 if (retval < 0)
2674 return retval;
2675 }
2676
2677 file_accessed(file);
2678
2679 retval = mapping->a_ops->direct_IO(iocb, iter);
2680 if (retval >= 0) {
2681 iocb->ki_pos += retval;
2682 count -= retval;
2683 }
2684 if (retval != -EIOCBQUEUED)
2685 iov_iter_revert(iter, count - iov_iter_count(iter));
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696 if (retval < 0 || !count || iocb->ki_pos >= size ||
2697 IS_DAX(inode))
2698 return retval;
2699 }
2700
2701 return filemap_read(iocb, iter, retval);
2702}
2703EXPORT_SYMBOL(generic_file_read_iter);
2704
2705static inline loff_t page_seek_hole_data(struct xa_state *xas,
2706 struct address_space *mapping, struct page *page,
2707 loff_t start, loff_t end, bool seek_data)
2708{
2709 const struct address_space_operations *ops = mapping->a_ops;
2710 size_t offset, bsz = i_blocksize(mapping->host);
2711
2712 if (xa_is_value(page) || PageUptodate(page))
2713 return seek_data ? start : end;
2714 if (!ops->is_partially_uptodate)
2715 return seek_data ? end : start;
2716
2717 xas_pause(xas);
2718 rcu_read_unlock();
2719 lock_page(page);
2720 if (unlikely(page->mapping != mapping))
2721 goto unlock;
2722
2723 offset = offset_in_thp(page, start) & ~(bsz - 1);
2724
2725 do {
2726 if (ops->is_partially_uptodate(page, offset, bsz) == seek_data)
2727 break;
2728 start = (start + bsz) & ~(bsz - 1);
2729 offset += bsz;
2730 } while (offset < thp_size(page));
2731unlock:
2732 unlock_page(page);
2733 rcu_read_lock();
2734 return start;
2735}
2736
2737static inline
2738unsigned int seek_page_size(struct xa_state *xas, struct page *page)
2739{
2740 if (xa_is_value(page))
2741 return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
2742 return thp_size(page);
2743}
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
2764 loff_t end, int whence)
2765{
2766 XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
2767 pgoff_t max = (end - 1) >> PAGE_SHIFT;
2768 bool seek_data = (whence == SEEK_DATA);
2769 struct page *page;
2770
2771 if (end <= start)
2772 return -ENXIO;
2773
2774 rcu_read_lock();
2775 while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
2776 loff_t pos = (u64)xas.xa_index << PAGE_SHIFT;
2777 unsigned int seek_size;
2778
2779 if (start < pos) {
2780 if (!seek_data)
2781 goto unlock;
2782 start = pos;
2783 }
2784
2785 seek_size = seek_page_size(&xas, page);
2786 pos = round_up(pos + 1, seek_size);
2787 start = page_seek_hole_data(&xas, mapping, page, start, pos,
2788 seek_data);
2789 if (start < pos)
2790 goto unlock;
2791 if (start >= end)
2792 break;
2793 if (seek_size > PAGE_SIZE)
2794 xas_set(&xas, pos >> PAGE_SHIFT);
2795 if (!xa_is_value(page))
2796 put_page(page);
2797 }
2798 if (seek_data)
2799 start = -ENXIO;
2800unlock:
2801 rcu_read_unlock();
2802 if (page && !xa_is_value(page))
2803 put_page(page);
2804 if (start > end)
2805 return end;
2806 return start;
2807}
2808
2809#ifdef CONFIG_MMU
2810#define MMAP_LOTSAMISS (100)
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
2823 struct file **fpin)
2824{
2825 if (trylock_page(page))
2826 return 1;
2827
2828
2829
2830
2831
2832
2833 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2834 return 0;
2835
2836 *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
2837 if (vmf->flags & FAULT_FLAG_KILLABLE) {
2838 if (__lock_page_killable(page)) {
2839
2840
2841
2842
2843
2844
2845 if (*fpin == NULL)
2846 mmap_read_unlock(vmf->vma->vm_mm);
2847 return 0;
2848 }
2849 } else
2850 __lock_page(page);
2851 return 1;
2852}
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
2863{
2864 struct file *file = vmf->vma->vm_file;
2865 struct file_ra_state *ra = &file->f_ra;
2866 struct address_space *mapping = file->f_mapping;
2867 DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
2868 struct file *fpin = NULL;
2869 unsigned int mmap_miss;
2870
2871
2872 if (vmf->vma->vm_flags & VM_RAND_READ)
2873 return fpin;
2874 if (!ra->ra_pages)
2875 return fpin;
2876
2877 if (vmf->vma->vm_flags & VM_SEQ_READ) {
2878 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2879 page_cache_sync_ra(&ractl, ra->ra_pages);
2880 return fpin;
2881 }
2882
2883
2884 mmap_miss = READ_ONCE(ra->mmap_miss);
2885 if (mmap_miss < MMAP_LOTSAMISS * 10)
2886 WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
2887
2888
2889
2890
2891
2892 if (mmap_miss > MMAP_LOTSAMISS)
2893 return fpin;
2894
2895
2896
2897
2898 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2899 ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
2900 ra->size = ra->ra_pages;
2901 ra->async_size = ra->ra_pages / 4;
2902 ractl._index = ra->start;
2903 do_page_cache_ra(&ractl, ra->size, ra->async_size);
2904 return fpin;
2905}
2906
2907
2908
2909
2910
2911
2912static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
2913 struct page *page)
2914{
2915 struct file *file = vmf->vma->vm_file;
2916 struct file_ra_state *ra = &file->f_ra;
2917 struct address_space *mapping = file->f_mapping;
2918 struct file *fpin = NULL;
2919 unsigned int mmap_miss;
2920 pgoff_t offset = vmf->pgoff;
2921
2922
2923 if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
2924 return fpin;
2925 mmap_miss = READ_ONCE(ra->mmap_miss);
2926 if (mmap_miss)
2927 WRITE_ONCE(ra->mmap_miss, --mmap_miss);
2928 if (PageReadahead(page)) {
2929 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2930 page_cache_async_readahead(mapping, ra, file,
2931 page, offset, ra->ra_pages);
2932 }
2933 return fpin;
2934}
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959vm_fault_t filemap_fault(struct vm_fault *vmf)
2960{
2961 int error;
2962 struct file *file = vmf->vma->vm_file;
2963 struct file *fpin = NULL;
2964 struct address_space *mapping = file->f_mapping;
2965 struct inode *inode = mapping->host;
2966 pgoff_t offset = vmf->pgoff;
2967 pgoff_t max_off;
2968 struct page *page;
2969 vm_fault_t ret = 0;
2970
2971 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2972 if (unlikely(offset >= max_off))
2973 return VM_FAULT_SIGBUS;
2974
2975
2976
2977
2978 page = find_get_page(mapping, offset);
2979 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2980
2981
2982
2983
2984 fpin = do_async_mmap_readahead(vmf, page);
2985 } else if (!page) {
2986
2987 count_vm_event(PGMAJFAULT);
2988 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
2989 ret = VM_FAULT_MAJOR;
2990 fpin = do_sync_mmap_readahead(vmf);
2991retry_find:
2992 page = pagecache_get_page(mapping, offset,
2993 FGP_CREAT|FGP_FOR_MMAP,
2994 vmf->gfp_mask);
2995 if (!page) {
2996 if (fpin)
2997 goto out_retry;
2998 return VM_FAULT_OOM;
2999 }
3000 }
3001
3002 if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
3003 goto out_retry;
3004
3005
3006 if (unlikely(compound_head(page)->mapping != mapping)) {
3007 unlock_page(page);
3008 put_page(page);
3009 goto retry_find;
3010 }
3011 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
3012
3013
3014
3015
3016
3017 if (unlikely(!PageUptodate(page)))
3018 goto page_not_uptodate;
3019
3020
3021
3022
3023
3024
3025 if (fpin) {
3026 unlock_page(page);
3027 goto out_retry;
3028 }
3029
3030
3031
3032
3033
3034 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
3035 if (unlikely(offset >= max_off)) {
3036 unlock_page(page);
3037 put_page(page);
3038 return VM_FAULT_SIGBUS;
3039 }
3040
3041 vmf->page = page;
3042 return ret | VM_FAULT_LOCKED;
3043
3044page_not_uptodate:
3045
3046
3047
3048
3049
3050
3051 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
3052 error = filemap_read_page(file, mapping, page);
3053 if (fpin)
3054 goto out_retry;
3055 put_page(page);
3056
3057 if (!error || error == AOP_TRUNCATED_PAGE)
3058 goto retry_find;
3059
3060 return VM_FAULT_SIGBUS;
3061
3062out_retry:
3063
3064
3065
3066
3067
3068 if (page)
3069 put_page(page);
3070 if (fpin)
3071 fput(fpin);
3072 return ret | VM_FAULT_RETRY;
3073}
3074EXPORT_SYMBOL(filemap_fault);
3075
3076static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
3077{
3078 struct mm_struct *mm = vmf->vma->vm_mm;
3079
3080
3081 if (pmd_trans_huge(*vmf->pmd)) {
3082 unlock_page(page);
3083 put_page(page);
3084 return true;
3085 }
3086
3087 if (pmd_none(*vmf->pmd) && PageTransHuge(page)) {
3088 vm_fault_t ret = do_set_pmd(vmf, page);
3089 if (!ret) {
3090
3091 unlock_page(page);
3092 return true;
3093 }
3094 }
3095
3096 if (pmd_none(*vmf->pmd)) {
3097 vmf->ptl = pmd_lock(mm, vmf->pmd);
3098 if (likely(pmd_none(*vmf->pmd))) {
3099 mm_inc_nr_ptes(mm);
3100 pmd_populate(mm, vmf->pmd, vmf->prealloc_pte);
3101 vmf->prealloc_pte = NULL;
3102 }
3103 spin_unlock(vmf->ptl);
3104 }
3105
3106
3107 if (pmd_devmap_trans_unstable(vmf->pmd)) {
3108 unlock_page(page);
3109 put_page(page);
3110 return true;
3111 }
3112
3113 return false;
3114}
3115
3116static struct page *next_uptodate_page(struct page *page,
3117 struct address_space *mapping,
3118 struct xa_state *xas, pgoff_t end_pgoff)
3119{
3120 unsigned long max_idx;
3121
3122 do {
3123 if (!page)
3124 return NULL;
3125 if (xas_retry(xas, page))
3126 continue;
3127 if (xa_is_value(page))
3128 continue;
3129 if (PageLocked(page))
3130 continue;
3131 if (!page_cache_get_speculative(page))
3132 continue;
3133
3134 if (unlikely(page != xas_reload(xas)))
3135 goto skip;
3136 if (!PageUptodate(page) || PageReadahead(page))
3137 goto skip;
3138 if (PageHWPoison(page))
3139 goto skip;
3140 if (!trylock_page(page))
3141 goto skip;
3142 if (page->mapping != mapping)
3143 goto unlock;
3144 if (!PageUptodate(page))
3145 goto unlock;
3146 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
3147 if (xas->xa_index >= max_idx)
3148 goto unlock;
3149 return page;
3150unlock:
3151 unlock_page(page);
3152skip:
3153 put_page(page);
3154 } while ((page = xas_next_entry(xas, end_pgoff)) != NULL);
3155
3156 return NULL;
3157}
3158
3159static inline struct page *first_map_page(struct address_space *mapping,
3160 struct xa_state *xas,
3161 pgoff_t end_pgoff)
3162{
3163 return next_uptodate_page(xas_find(xas, end_pgoff),
3164 mapping, xas, end_pgoff);
3165}
3166
3167static inline struct page *next_map_page(struct address_space *mapping,
3168 struct xa_state *xas,
3169 pgoff_t end_pgoff)
3170{
3171 return next_uptodate_page(xas_next_entry(xas, end_pgoff),
3172 mapping, xas, end_pgoff);
3173}
3174
3175vm_fault_t filemap_map_pages(struct vm_fault *vmf,
3176 pgoff_t start_pgoff, pgoff_t end_pgoff)
3177{
3178 struct vm_area_struct *vma = vmf->vma;
3179 struct file *file = vma->vm_file;
3180 struct address_space *mapping = file->f_mapping;
3181 pgoff_t last_pgoff = start_pgoff;
3182 unsigned long addr;
3183 XA_STATE(xas, &mapping->i_pages, start_pgoff);
3184 struct page *head, *page;
3185 unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
3186 vm_fault_t ret = 0;
3187
3188 rcu_read_lock();
3189 head = first_map_page(mapping, &xas, end_pgoff);
3190 if (!head)
3191 goto out;
3192
3193 if (filemap_map_pmd(vmf, head)) {
3194 ret = VM_FAULT_NOPAGE;
3195 goto out;
3196 }
3197
3198 addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT);
3199 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
3200 do {
3201 page = find_subpage(head, xas.xa_index);
3202 if (PageHWPoison(page))
3203 goto unlock;
3204
3205 if (mmap_miss > 0)
3206 mmap_miss--;
3207
3208 addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
3209 vmf->pte += xas.xa_index - last_pgoff;
3210 last_pgoff = xas.xa_index;
3211
3212 if (!pte_none(*vmf->pte))
3213 goto unlock;
3214
3215
3216 if (vmf->address == addr)
3217 ret = VM_FAULT_NOPAGE;
3218
3219 do_set_pte(vmf, page, addr);
3220
3221 update_mmu_cache(vma, addr, vmf->pte);
3222 unlock_page(head);
3223 continue;
3224unlock:
3225 unlock_page(head);
3226 put_page(head);
3227 } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL);
3228 pte_unmap_unlock(vmf->pte, vmf->ptl);
3229out:
3230 rcu_read_unlock();
3231 WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
3232 return ret;
3233}
3234EXPORT_SYMBOL(filemap_map_pages);
3235
3236vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
3237{
3238 struct address_space *mapping = vmf->vma->vm_file->f_mapping;
3239 struct page *page = vmf->page;
3240 vm_fault_t ret = VM_FAULT_LOCKED;
3241
3242 sb_start_pagefault(mapping->host->i_sb);
3243 file_update_time(vmf->vma->vm_file);
3244 lock_page(page);
3245 if (page->mapping != mapping) {
3246 unlock_page(page);
3247 ret = VM_FAULT_NOPAGE;
3248 goto out;
3249 }
3250
3251
3252
3253
3254
3255 set_page_dirty(page);
3256 wait_for_stable_page(page);
3257out:
3258 sb_end_pagefault(mapping->host->i_sb);
3259 return ret;
3260}
3261
3262const struct vm_operations_struct generic_file_vm_ops = {
3263 .fault = filemap_fault,
3264 .map_pages = filemap_map_pages,
3265 .page_mkwrite = filemap_page_mkwrite,
3266};
3267
3268
3269
3270int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
3271{
3272 struct address_space *mapping = file->f_mapping;
3273
3274 if (!mapping->a_ops->readpage)
3275 return -ENOEXEC;
3276 file_accessed(file);
3277 vma->vm_ops = &generic_file_vm_ops;
3278 return 0;
3279}
3280
3281
3282
3283
3284int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
3285{
3286 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
3287 return -EINVAL;
3288 return generic_file_mmap(file, vma);
3289}
3290#else
3291vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
3292{
3293 return VM_FAULT_SIGBUS;
3294}
3295int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
3296{
3297 return -ENOSYS;
3298}
3299int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
3300{
3301 return -ENOSYS;
3302}
3303#endif
3304
3305EXPORT_SYMBOL(filemap_page_mkwrite);
3306EXPORT_SYMBOL(generic_file_mmap);
3307EXPORT_SYMBOL(generic_file_readonly_mmap);
3308
3309static struct page *wait_on_page_read(struct page *page)
3310{
3311 if (!IS_ERR(page)) {
3312 wait_on_page_locked(page);
3313 if (!PageUptodate(page)) {
3314 put_page(page);
3315 page = ERR_PTR(-EIO);
3316 }
3317 }
3318 return page;
3319}
3320
3321static struct page *do_read_cache_page(struct address_space *mapping,
3322 pgoff_t index,
3323 int (*filler)(void *, struct page *),
3324 void *data,
3325 gfp_t gfp)
3326{
3327 struct page *page;
3328 int err;
3329repeat:
3330 page = find_get_page(mapping, index);
3331 if (!page) {
3332 page = __page_cache_alloc(gfp);
3333 if (!page)
3334 return ERR_PTR(-ENOMEM);
3335 err = add_to_page_cache_lru(page, mapping, index, gfp);
3336 if (unlikely(err)) {
3337 put_page(page);
3338 if (err == -EEXIST)
3339 goto repeat;
3340
3341 return ERR_PTR(err);
3342 }
3343
3344filler:
3345 if (filler)
3346 err = filler(data, page);
3347 else
3348 err = mapping->a_ops->readpage(data, page);
3349
3350 if (err < 0) {
3351 put_page(page);
3352 return ERR_PTR(err);
3353 }
3354
3355 page = wait_on_page_read(page);
3356 if (IS_ERR(page))
3357 return page;
3358 goto out;
3359 }
3360 if (PageUptodate(page))
3361 goto out;
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394 wait_on_page_locked(page);
3395 if (PageUptodate(page))
3396 goto out;
3397
3398
3399 lock_page(page);
3400
3401
3402 if (!page->mapping) {
3403 unlock_page(page);
3404 put_page(page);
3405 goto repeat;
3406 }
3407
3408
3409 if (PageUptodate(page)) {
3410 unlock_page(page);
3411 goto out;
3412 }
3413
3414
3415
3416
3417
3418
3419
3420 ClearPageError(page);
3421 goto filler;
3422
3423out:
3424 mark_page_accessed(page);
3425 return page;
3426}
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442struct page *read_cache_page(struct address_space *mapping,
3443 pgoff_t index,
3444 int (*filler)(void *, struct page *),
3445 void *data)
3446{
3447 return do_read_cache_page(mapping, index, filler, data,
3448 mapping_gfp_mask(mapping));
3449}
3450EXPORT_SYMBOL(read_cache_page);
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465struct page *read_cache_page_gfp(struct address_space *mapping,
3466 pgoff_t index,
3467 gfp_t gfp)
3468{
3469 return do_read_cache_page(mapping, index, NULL, NULL, gfp);
3470}
3471EXPORT_SYMBOL(read_cache_page_gfp);
3472
3473int pagecache_write_begin(struct file *file, struct address_space *mapping,
3474 loff_t pos, unsigned len, unsigned flags,
3475 struct page **pagep, void **fsdata)
3476{
3477 const struct address_space_operations *aops = mapping->a_ops;
3478
3479 return aops->write_begin(file, mapping, pos, len, flags,
3480 pagep, fsdata);
3481}
3482EXPORT_SYMBOL(pagecache_write_begin);
3483
3484int pagecache_write_end(struct file *file, struct address_space *mapping,
3485 loff_t pos, unsigned len, unsigned copied,
3486 struct page *page, void *fsdata)
3487{
3488 const struct address_space_operations *aops = mapping->a_ops;
3489
3490 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
3491}
3492EXPORT_SYMBOL(pagecache_write_end);
3493
3494
3495
3496
3497void dio_warn_stale_pagecache(struct file *filp)
3498{
3499 static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
3500 char pathname[128];
3501 char *path;
3502
3503 errseq_set(&filp->f_mapping->wb_err, -EIO);
3504 if (__ratelimit(&_rs)) {
3505 path = file_path(filp, pathname, sizeof(pathname));
3506 if (IS_ERR(path))
3507 path = "(unknown)";
3508 pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
3509 pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
3510 current->comm);
3511 }
3512}
3513
3514ssize_t
3515generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
3516{
3517 struct file *file = iocb->ki_filp;
3518 struct address_space *mapping = file->f_mapping;
3519 struct inode *inode = mapping->host;
3520 loff_t pos = iocb->ki_pos;
3521 ssize_t written;
3522 size_t write_len;
3523 pgoff_t end;
3524
3525 write_len = iov_iter_count(from);
3526 end = (pos + write_len - 1) >> PAGE_SHIFT;
3527
3528 if (iocb->ki_flags & IOCB_NOWAIT) {
3529
3530 if (filemap_range_has_page(file->f_mapping, pos,
3531 pos + write_len - 1))
3532 return -EAGAIN;
3533 } else {
3534 written = filemap_write_and_wait_range(mapping, pos,
3535 pos + write_len - 1);
3536 if (written)
3537 goto out;
3538 }
3539
3540
3541
3542
3543
3544
3545
3546 written = invalidate_inode_pages2_range(mapping,
3547 pos >> PAGE_SHIFT, end);
3548
3549
3550
3551
3552 if (written) {
3553 if (written == -EBUSY)
3554 return 0;
3555 goto out;
3556 }
3557
3558 written = mapping->a_ops->direct_IO(iocb, from);
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577 if (written > 0 && mapping->nrpages &&
3578 invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
3579 dio_warn_stale_pagecache(file);
3580
3581 if (written > 0) {
3582 pos += written;
3583 write_len -= written;
3584 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
3585 i_size_write(inode, pos);
3586 mark_inode_dirty(inode);
3587 }
3588 iocb->ki_pos = pos;
3589 }
3590 if (written != -EIOCBQUEUED)
3591 iov_iter_revert(from, write_len - iov_iter_count(from));
3592out:
3593 return written;
3594}
3595EXPORT_SYMBOL(generic_file_direct_write);
3596
3597
3598
3599
3600
3601struct page *grab_cache_page_write_begin(struct address_space *mapping,
3602 pgoff_t index, unsigned flags)
3603{
3604 struct page *page;
3605 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
3606
3607 if (flags & AOP_FLAG_NOFS)
3608 fgp_flags |= FGP_NOFS;
3609
3610 page = pagecache_get_page(mapping, index, fgp_flags,
3611 mapping_gfp_mask(mapping));
3612 if (page)
3613 wait_for_stable_page(page);
3614
3615 return page;
3616}
3617EXPORT_SYMBOL(grab_cache_page_write_begin);
3618
3619ssize_t generic_perform_write(struct file *file,
3620 struct iov_iter *i, loff_t pos)
3621{
3622 struct address_space *mapping = file->f_mapping;
3623 const struct address_space_operations *a_ops = mapping->a_ops;
3624 long status = 0;
3625 ssize_t written = 0;
3626 unsigned int flags = 0;
3627
3628 do {
3629 struct page *page;
3630 unsigned long offset;
3631 unsigned long bytes;
3632 size_t copied;
3633 void *fsdata;
3634
3635 offset = (pos & (PAGE_SIZE - 1));
3636 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3637 iov_iter_count(i));
3638
3639again:
3640
3641
3642
3643
3644
3645
3646 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
3647 status = -EFAULT;
3648 break;
3649 }
3650
3651 if (fatal_signal_pending(current)) {
3652 status = -EINTR;
3653 break;
3654 }
3655
3656 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
3657 &page, &fsdata);
3658 if (unlikely(status < 0))
3659 break;
3660
3661 if (mapping_writably_mapped(mapping))
3662 flush_dcache_page(page);
3663
3664 copied = copy_page_from_iter_atomic(page, offset, bytes, i);
3665 flush_dcache_page(page);
3666
3667 status = a_ops->write_end(file, mapping, pos, bytes, copied,
3668 page, fsdata);
3669 if (unlikely(status != copied)) {
3670 iov_iter_revert(i, copied - max(status, 0L));
3671 if (unlikely(status < 0))
3672 break;
3673 }
3674 cond_resched();
3675
3676 if (unlikely(status == 0)) {
3677
3678
3679
3680
3681
3682
3683 if (copied)
3684 bytes = copied;
3685 goto again;
3686 }
3687 pos += status;
3688 written += status;
3689
3690 balance_dirty_pages_ratelimited(mapping);
3691 } while (iov_iter_count(i));
3692
3693 return written ? written : status;
3694}
3695EXPORT_SYMBOL(generic_perform_write);
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3719{
3720 struct file *file = iocb->ki_filp;
3721 struct address_space *mapping = file->f_mapping;
3722 struct inode *inode = mapping->host;
3723 ssize_t written = 0;
3724 ssize_t err;
3725 ssize_t status;
3726
3727
3728 current->backing_dev_info = inode_to_bdi(inode);
3729 err = file_remove_privs(file);
3730 if (err)
3731 goto out;
3732
3733 err = file_update_time(file);
3734 if (err)
3735 goto out;
3736
3737 if (iocb->ki_flags & IOCB_DIRECT) {
3738 loff_t pos, endbyte;
3739
3740 written = generic_file_direct_write(iocb, from);
3741
3742
3743
3744
3745
3746
3747
3748 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
3749 goto out;
3750
3751 status = generic_perform_write(file, from, pos = iocb->ki_pos);
3752
3753
3754
3755
3756
3757
3758
3759 if (unlikely(status < 0)) {
3760 err = status;
3761 goto out;
3762 }
3763
3764
3765
3766
3767
3768 endbyte = pos + status - 1;
3769 err = filemap_write_and_wait_range(mapping, pos, endbyte);
3770 if (err == 0) {
3771 iocb->ki_pos = endbyte + 1;
3772 written += status;
3773 invalidate_mapping_pages(mapping,
3774 pos >> PAGE_SHIFT,
3775 endbyte >> PAGE_SHIFT);
3776 } else {
3777
3778
3779
3780
3781 }
3782 } else {
3783 written = generic_perform_write(file, from, iocb->ki_pos);
3784 if (likely(written > 0))
3785 iocb->ki_pos += written;
3786 }
3787out:
3788 current->backing_dev_info = NULL;
3789 return written ? written : err;
3790}
3791EXPORT_SYMBOL(__generic_file_write_iter);
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3807{
3808 struct file *file = iocb->ki_filp;
3809 struct inode *inode = file->f_mapping->host;
3810 ssize_t ret;
3811
3812 inode_lock(inode);
3813 ret = generic_write_checks(iocb, from);
3814 if (ret > 0)
3815 ret = __generic_file_write_iter(iocb, from);
3816 inode_unlock(inode);
3817
3818 if (ret > 0)
3819 ret = generic_write_sync(iocb, ret);
3820 return ret;
3821}
3822EXPORT_SYMBOL(generic_file_write_iter);
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841int try_to_release_page(struct page *page, gfp_t gfp_mask)
3842{
3843 struct address_space * const mapping = page->mapping;
3844
3845 BUG_ON(!PageLocked(page));
3846 if (PageWriteback(page))
3847 return 0;
3848
3849 if (mapping && mapping->a_ops->releasepage)
3850 return mapping->a_ops->releasepage(page, gfp_mask);
3851 return try_to_free_buffers(page);
3852}
3853
3854EXPORT_SYMBOL(try_to_release_page);
3855