1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/export.h>
14#include <linux/compiler.h>
15#include <linux/dax.h>
16#include <linux/fs.h>
17#include <linux/sched/signal.h>
18#include <linux/uaccess.h>
19#include <linux/capability.h>
20#include <linux/kernel_stat.h>
21#include <linux/gfp.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/mman.h>
25#include <linux/pagemap.h>
26#include <linux/file.h>
27#include <linux/uio.h>
28#include <linux/error-injection.h>
29#include <linux/hash.h>
30#include <linux/writeback.h>
31#include <linux/backing-dev.h>
32#include <linux/pagevec.h>
33#include <linux/blkdev.h>
34#include <linux/security.h>
35#include <linux/cpuset.h>
36#include <linux/hugetlb.h>
37#include <linux/memcontrol.h>
38#include <linux/cleancache.h>
39#include <linux/shmem_fs.h>
40#include <linux/rmap.h>
41#include <linux/delayacct.h>
42#include <linux/psi.h>
43#include <linux/ramfs.h>
44#include <linux/page_idle.h>
45#include "internal.h"
46
47#define CREATE_TRACE_POINTS
48#include <trace/events/filemap.h>
49
50
51
52
53#include <linux/buffer_head.h>
54
55#include <asm/mman.h>
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120static void page_cache_delete(struct address_space *mapping,
121 struct page *page, void *shadow)
122{
123 XA_STATE(xas, &mapping->i_pages, page->index);
124 unsigned int nr = 1;
125
126 mapping_set_update(&xas, mapping);
127
128
129 if (!PageHuge(page)) {
130 xas_set_order(&xas, page->index, compound_order(page));
131 nr = compound_nr(page);
132 }
133
134 VM_BUG_ON_PAGE(!PageLocked(page), page);
135 VM_BUG_ON_PAGE(PageTail(page), page);
136 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
137
138 xas_store(&xas, shadow);
139 xas_init_marks(&xas);
140
141 page->mapping = NULL;
142
143
144 if (shadow) {
145 mapping->nrexceptional += nr;
146
147
148
149
150
151
152 smp_wmb();
153 }
154 mapping->nrpages -= nr;
155}
156
157static void unaccount_page_cache_page(struct address_space *mapping,
158 struct page *page)
159{
160 int nr;
161
162
163
164
165
166
167 if (PageUptodate(page) && PageMappedToDisk(page))
168 cleancache_put_page(page);
169 else
170 cleancache_invalidate_page(mapping, page);
171
172 VM_BUG_ON_PAGE(PageTail(page), page);
173 VM_BUG_ON_PAGE(page_mapped(page), page);
174 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
175 int mapcount;
176
177 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
178 current->comm, page_to_pfn(page));
179 dump_page(page, "still mapped when deleted");
180 dump_stack();
181 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
182
183 mapcount = page_mapcount(page);
184 if (mapping_exiting(mapping) &&
185 page_count(page) >= mapcount + 2) {
186
187
188
189
190
191
192 page_mapcount_reset(page);
193 page_ref_sub(page, mapcount);
194 }
195 }
196
197
198 if (PageHuge(page))
199 return;
200
201 nr = thp_nr_pages(page);
202
203 __mod_lruvec_page_state(page, NR_FILE_PAGES, -nr);
204 if (PageSwapBacked(page)) {
205 __mod_lruvec_page_state(page, NR_SHMEM, -nr);
206 if (PageTransHuge(page))
207 __dec_node_page_state(page, NR_SHMEM_THPS);
208 } else if (PageTransHuge(page)) {
209 __dec_node_page_state(page, NR_FILE_THPS);
210 filemap_nr_thps_dec(mapping);
211 }
212
213
214
215
216
217
218
219
220
221
222
223 if (WARN_ON_ONCE(PageDirty(page)))
224 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
225}
226
227
228
229
230
231
232void __delete_from_page_cache(struct page *page, void *shadow)
233{
234 struct address_space *mapping = page->mapping;
235
236 trace_mm_filemap_delete_from_page_cache(page);
237
238 unaccount_page_cache_page(mapping, page);
239 page_cache_delete(mapping, page, shadow);
240}
241
242static void page_cache_free_page(struct address_space *mapping,
243 struct page *page)
244{
245 void (*freepage)(struct page *);
246
247 freepage = mapping->a_ops->freepage;
248 if (freepage)
249 freepage(page);
250
251 if (PageTransHuge(page) && !PageHuge(page)) {
252 page_ref_sub(page, thp_nr_pages(page));
253 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
254 } else {
255 put_page(page);
256 }
257}
258
259
260
261
262
263
264
265
266
267void delete_from_page_cache(struct page *page)
268{
269 struct address_space *mapping = page_mapping(page);
270 unsigned long flags;
271
272 BUG_ON(!PageLocked(page));
273 xa_lock_irqsave(&mapping->i_pages, flags);
274 __delete_from_page_cache(page, NULL);
275 xa_unlock_irqrestore(&mapping->i_pages, flags);
276
277 page_cache_free_page(mapping, page);
278}
279EXPORT_SYMBOL(delete_from_page_cache);
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295static void page_cache_delete_batch(struct address_space *mapping,
296 struct pagevec *pvec)
297{
298 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
299 int total_pages = 0;
300 int i = 0;
301 struct page *page;
302
303 mapping_set_update(&xas, mapping);
304 xas_for_each(&xas, page, ULONG_MAX) {
305 if (i >= pagevec_count(pvec))
306 break;
307
308
309 if (xa_is_value(page))
310 continue;
311
312
313
314
315
316
317
318 if (page != pvec->pages[i]) {
319 VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
320 page);
321 continue;
322 }
323
324 WARN_ON_ONCE(!PageLocked(page));
325
326 if (page->index == xas.xa_index)
327 page->mapping = NULL;
328
329
330
331
332
333
334
335 if (page->index + compound_nr(page) - 1 == xas.xa_index)
336 i++;
337 xas_store(&xas, NULL);
338 total_pages++;
339 }
340 mapping->nrpages -= total_pages;
341}
342
343void delete_from_page_cache_batch(struct address_space *mapping,
344 struct pagevec *pvec)
345{
346 int i;
347 unsigned long flags;
348
349 if (!pagevec_count(pvec))
350 return;
351
352 xa_lock_irqsave(&mapping->i_pages, flags);
353 for (i = 0; i < pagevec_count(pvec); i++) {
354 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
355
356 unaccount_page_cache_page(mapping, pvec->pages[i]);
357 }
358 page_cache_delete_batch(mapping, pvec);
359 xa_unlock_irqrestore(&mapping->i_pages, flags);
360
361 for (i = 0; i < pagevec_count(pvec); i++)
362 page_cache_free_page(mapping, pvec->pages[i]);
363}
364
365int filemap_check_errors(struct address_space *mapping)
366{
367 int ret = 0;
368
369 if (test_bit(AS_ENOSPC, &mapping->flags) &&
370 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
371 ret = -ENOSPC;
372 if (test_bit(AS_EIO, &mapping->flags) &&
373 test_and_clear_bit(AS_EIO, &mapping->flags))
374 ret = -EIO;
375 return ret;
376}
377EXPORT_SYMBOL(filemap_check_errors);
378
379static int filemap_check_and_keep_errors(struct address_space *mapping)
380{
381
382 if (test_bit(AS_EIO, &mapping->flags))
383 return -EIO;
384 if (test_bit(AS_ENOSPC, &mapping->flags))
385 return -ENOSPC;
386 return 0;
387}
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
407 loff_t end, int sync_mode)
408{
409 int ret;
410 struct writeback_control wbc = {
411 .sync_mode = sync_mode,
412 .nr_to_write = LONG_MAX,
413 .range_start = start,
414 .range_end = end,
415 };
416
417 if (!mapping_can_writeback(mapping) ||
418 !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
419 return 0;
420
421 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
422 ret = do_writepages(mapping, &wbc);
423 wbc_detach_inode(&wbc);
424 return ret;
425}
426
427static inline int __filemap_fdatawrite(struct address_space *mapping,
428 int sync_mode)
429{
430 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
431}
432
433int filemap_fdatawrite(struct address_space *mapping)
434{
435 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
436}
437EXPORT_SYMBOL(filemap_fdatawrite);
438
439int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
440 loff_t end)
441{
442 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
443}
444EXPORT_SYMBOL(filemap_fdatawrite_range);
445
446
447
448
449
450
451
452
453
454
455int filemap_flush(struct address_space *mapping)
456{
457 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
458}
459EXPORT_SYMBOL(filemap_flush);
460
461
462
463
464
465
466
467
468
469
470
471
472
473bool filemap_range_has_page(struct address_space *mapping,
474 loff_t start_byte, loff_t end_byte)
475{
476 struct page *page;
477 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
478 pgoff_t max = end_byte >> PAGE_SHIFT;
479
480 if (end_byte < start_byte)
481 return false;
482
483 rcu_read_lock();
484 for (;;) {
485 page = xas_find(&xas, max);
486 if (xas_retry(&xas, page))
487 continue;
488
489 if (xa_is_value(page))
490 continue;
491
492
493
494
495
496 break;
497 }
498 rcu_read_unlock();
499
500 return page != NULL;
501}
502EXPORT_SYMBOL(filemap_range_has_page);
503
504static void __filemap_fdatawait_range(struct address_space *mapping,
505 loff_t start_byte, loff_t end_byte)
506{
507 pgoff_t index = start_byte >> PAGE_SHIFT;
508 pgoff_t end = end_byte >> PAGE_SHIFT;
509 struct pagevec pvec;
510 int nr_pages;
511
512 if (end_byte < start_byte)
513 return;
514
515 pagevec_init(&pvec);
516 while (index <= end) {
517 unsigned i;
518
519 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
520 end, PAGECACHE_TAG_WRITEBACK);
521 if (!nr_pages)
522 break;
523
524 for (i = 0; i < nr_pages; i++) {
525 struct page *page = pvec.pages[i];
526
527 wait_on_page_writeback(page);
528 ClearPageError(page);
529 }
530 pagevec_release(&pvec);
531 cond_resched();
532 }
533}
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
552 loff_t end_byte)
553{
554 __filemap_fdatawait_range(mapping, start_byte, end_byte);
555 return filemap_check_errors(mapping);
556}
557EXPORT_SYMBOL(filemap_fdatawait_range);
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
574 loff_t start_byte, loff_t end_byte)
575{
576 __filemap_fdatawait_range(mapping, start_byte, end_byte);
577 return filemap_check_and_keep_errors(mapping);
578}
579EXPORT_SYMBOL(filemap_fdatawait_range_keep_errors);
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
598{
599 struct address_space *mapping = file->f_mapping;
600
601 __filemap_fdatawait_range(mapping, start_byte, end_byte);
602 return file_check_and_advance_wb_err(file);
603}
604EXPORT_SYMBOL(file_fdatawait_range);
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620int filemap_fdatawait_keep_errors(struct address_space *mapping)
621{
622 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
623 return filemap_check_and_keep_errors(mapping);
624}
625EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
626
627
628static bool mapping_needs_writeback(struct address_space *mapping)
629{
630 if (dax_mapping(mapping))
631 return mapping->nrexceptional;
632
633 return mapping->nrpages;
634}
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649int filemap_write_and_wait_range(struct address_space *mapping,
650 loff_t lstart, loff_t lend)
651{
652 int err = 0;
653
654 if (mapping_needs_writeback(mapping)) {
655 err = __filemap_fdatawrite_range(mapping, lstart, lend,
656 WB_SYNC_ALL);
657
658
659
660
661
662
663 if (err != -EIO) {
664 int err2 = filemap_fdatawait_range(mapping,
665 lstart, lend);
666 if (!err)
667 err = err2;
668 } else {
669
670 filemap_check_errors(mapping);
671 }
672 } else {
673 err = filemap_check_errors(mapping);
674 }
675 return err;
676}
677EXPORT_SYMBOL(filemap_write_and_wait_range);
678
679void __filemap_set_wb_err(struct address_space *mapping, int err)
680{
681 errseq_t eseq = errseq_set(&mapping->wb_err, err);
682
683 trace_filemap_set_wb_err(mapping, eseq);
684}
685EXPORT_SYMBOL(__filemap_set_wb_err);
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711int file_check_and_advance_wb_err(struct file *file)
712{
713 int err = 0;
714 errseq_t old = READ_ONCE(file->f_wb_err);
715 struct address_space *mapping = file->f_mapping;
716
717
718 if (errseq_check(&mapping->wb_err, old)) {
719
720 spin_lock(&file->f_lock);
721 old = file->f_wb_err;
722 err = errseq_check_and_advance(&mapping->wb_err,
723 &file->f_wb_err);
724 trace_file_check_and_advance_wb_err(file, old);
725 spin_unlock(&file->f_lock);
726 }
727
728
729
730
731
732
733 clear_bit(AS_EIO, &mapping->flags);
734 clear_bit(AS_ENOSPC, &mapping->flags);
735 return err;
736}
737EXPORT_SYMBOL(file_check_and_advance_wb_err);
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
756{
757 int err = 0, err2;
758 struct address_space *mapping = file->f_mapping;
759
760 if (mapping_needs_writeback(mapping)) {
761 err = __filemap_fdatawrite_range(mapping, lstart, lend,
762 WB_SYNC_ALL);
763
764 if (err != -EIO)
765 __filemap_fdatawait_range(mapping, lstart, lend);
766 }
767 err2 = file_check_and_advance_wb_err(file);
768 if (!err)
769 err = err2;
770 return err;
771}
772EXPORT_SYMBOL(file_write_and_wait_range);
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
791{
792 struct address_space *mapping = old->mapping;
793 void (*freepage)(struct page *) = mapping->a_ops->freepage;
794 pgoff_t offset = old->index;
795 XA_STATE(xas, &mapping->i_pages, offset);
796 unsigned long flags;
797
798 VM_BUG_ON_PAGE(!PageLocked(old), old);
799 VM_BUG_ON_PAGE(!PageLocked(new), new);
800 VM_BUG_ON_PAGE(new->mapping, new);
801
802 get_page(new);
803 new->mapping = mapping;
804 new->index = offset;
805
806 mem_cgroup_migrate(old, new);
807
808 xas_lock_irqsave(&xas, flags);
809 xas_store(&xas, new);
810
811 old->mapping = NULL;
812
813 if (!PageHuge(old))
814 __dec_lruvec_page_state(old, NR_FILE_PAGES);
815 if (!PageHuge(new))
816 __inc_lruvec_page_state(new, NR_FILE_PAGES);
817 if (PageSwapBacked(old))
818 __dec_lruvec_page_state(old, NR_SHMEM);
819 if (PageSwapBacked(new))
820 __inc_lruvec_page_state(new, NR_SHMEM);
821 xas_unlock_irqrestore(&xas, flags);
822 if (freepage)
823 freepage(old);
824 put_page(old);
825
826 return 0;
827}
828EXPORT_SYMBOL_GPL(replace_page_cache_page);
829
830noinline int __add_to_page_cache_locked(struct page *page,
831 struct address_space *mapping,
832 pgoff_t offset, gfp_t gfp,
833 void **shadowp)
834{
835 XA_STATE(xas, &mapping->i_pages, offset);
836 int huge = PageHuge(page);
837 int error;
838
839 VM_BUG_ON_PAGE(!PageLocked(page), page);
840 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
841 mapping_set_update(&xas, mapping);
842
843 get_page(page);
844 page->mapping = mapping;
845 page->index = offset;
846
847 if (!huge) {
848 error = mem_cgroup_charge(page, current->mm, gfp);
849 if (error)
850 goto error;
851 }
852
853 gfp &= GFP_RECLAIM_MASK;
854
855 do {
856 unsigned int order = xa_get_order(xas.xa, xas.xa_index);
857 void *entry, *old = NULL;
858
859 if (order > thp_order(page))
860 xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
861 order, gfp);
862 xas_lock_irq(&xas);
863 xas_for_each_conflict(&xas, entry) {
864 old = entry;
865 if (!xa_is_value(entry)) {
866 xas_set_err(&xas, -EEXIST);
867 goto unlock;
868 }
869 }
870
871 if (old) {
872 if (shadowp)
873 *shadowp = old;
874
875 order = xa_get_order(xas.xa, xas.xa_index);
876 if (order > thp_order(page)) {
877 xas_split(&xas, old, order);
878 xas_reset(&xas);
879 }
880 }
881
882 xas_store(&xas, page);
883 if (xas_error(&xas))
884 goto unlock;
885
886 if (old)
887 mapping->nrexceptional--;
888 mapping->nrpages++;
889
890
891 if (!huge)
892 __inc_lruvec_page_state(page, NR_FILE_PAGES);
893unlock:
894 xas_unlock_irq(&xas);
895 } while (xas_nomem(&xas, gfp));
896
897 if (xas_error(&xas)) {
898 error = xas_error(&xas);
899 goto error;
900 }
901
902 trace_mm_filemap_add_to_page_cache(page);
903 return 0;
904error:
905 page->mapping = NULL;
906
907 put_page(page);
908 return error;
909}
910ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
911
912
913
914
915
916
917
918
919
920
921
922
923
924int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
925 pgoff_t offset, gfp_t gfp_mask)
926{
927 return __add_to_page_cache_locked(page, mapping, offset,
928 gfp_mask, NULL);
929}
930EXPORT_SYMBOL(add_to_page_cache_locked);
931
932int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
933 pgoff_t offset, gfp_t gfp_mask)
934{
935 void *shadow = NULL;
936 int ret;
937
938 __SetPageLocked(page);
939 ret = __add_to_page_cache_locked(page, mapping, offset,
940 gfp_mask, &shadow);
941 if (unlikely(ret))
942 __ClearPageLocked(page);
943 else {
944
945
946
947
948
949
950
951
952 WARN_ON_ONCE(PageActive(page));
953 if (!(gfp_mask & __GFP_WRITE) && shadow)
954 workingset_refault(page, shadow);
955 lru_cache_add(page);
956 }
957 return ret;
958}
959EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
960
961#ifdef CONFIG_NUMA
962struct page *__page_cache_alloc(gfp_t gfp)
963{
964 int n;
965 struct page *page;
966
967 if (cpuset_do_page_mem_spread()) {
968 unsigned int cpuset_mems_cookie;
969 do {
970 cpuset_mems_cookie = read_mems_allowed_begin();
971 n = cpuset_mem_spread_node();
972 page = __alloc_pages_node(n, gfp, 0);
973 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
974
975 return page;
976 }
977 return alloc_pages(gfp, 0);
978}
979EXPORT_SYMBOL(__page_cache_alloc);
980#endif
981
982
983
984
985
986
987
988
989
990
991
992#define PAGE_WAIT_TABLE_BITS 8
993#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
994static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
995
996static wait_queue_head_t *page_waitqueue(struct page *page)
997{
998 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
999}
1000
1001void __init pagecache_init(void)
1002{
1003 int i;
1004
1005 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
1006 init_waitqueue_head(&page_wait_table[i]);
1007
1008 page_writeback_init();
1009}
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
1046{
1047 unsigned int flags;
1048 struct wait_page_key *key = arg;
1049 struct wait_page_queue *wait_page
1050 = container_of(wait, struct wait_page_queue, wait);
1051
1052 if (!wake_page_match(wait_page, key))
1053 return 0;
1054
1055
1056
1057
1058
1059 flags = wait->flags;
1060 if (flags & WQ_FLAG_EXCLUSIVE) {
1061 if (test_bit(key->bit_nr, &key->page->flags))
1062 return -1;
1063 if (flags & WQ_FLAG_CUSTOM) {
1064 if (test_and_set_bit(key->bit_nr, &key->page->flags))
1065 return -1;
1066 flags |= WQ_FLAG_DONE;
1067 }
1068 }
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
1080 wake_up_state(wait->private, mode);
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096 smp_mb();
1097 list_del_init(&wait->entry);
1098 return (flags & WQ_FLAG_EXCLUSIVE) != 0;
1099}
1100
1101static void wake_up_page_bit(struct page *page, int bit_nr)
1102{
1103 wait_queue_head_t *q = page_waitqueue(page);
1104 struct wait_page_key key;
1105 unsigned long flags;
1106 wait_queue_entry_t bookmark;
1107
1108 key.page = page;
1109 key.bit_nr = bit_nr;
1110 key.page_match = 0;
1111
1112 bookmark.flags = 0;
1113 bookmark.private = NULL;
1114 bookmark.func = NULL;
1115 INIT_LIST_HEAD(&bookmark.entry);
1116
1117 spin_lock_irqsave(&q->lock, flags);
1118 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1119
1120 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
1121
1122
1123
1124
1125
1126
1127 spin_unlock_irqrestore(&q->lock, flags);
1128 cpu_relax();
1129 spin_lock_irqsave(&q->lock, flags);
1130 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1131 }
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142 if (!waitqueue_active(q) || !key.page_match) {
1143 ClearPageWaiters(page);
1144
1145
1146
1147
1148
1149
1150
1151 }
1152 spin_unlock_irqrestore(&q->lock, flags);
1153}
1154
1155static void wake_up_page(struct page *page, int bit)
1156{
1157 if (!PageWaiters(page))
1158 return;
1159 wake_up_page_bit(page, bit);
1160}
1161
1162
1163
1164
1165enum behavior {
1166 EXCLUSIVE,
1167
1168
1169 SHARED,
1170
1171
1172 DROP,
1173
1174
1175};
1176
1177
1178
1179
1180
1181static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
1182 struct wait_queue_entry *wait)
1183{
1184 if (wait->flags & WQ_FLAG_EXCLUSIVE) {
1185 if (test_and_set_bit(bit_nr, &page->flags))
1186 return false;
1187 } else if (test_bit(bit_nr, &page->flags))
1188 return false;
1189
1190 wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
1191 return true;
1192}
1193
1194
1195int sysctl_page_lock_unfairness = 5;
1196
1197static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1198 struct page *page, int bit_nr, int state, enum behavior behavior)
1199{
1200 int unfairness = sysctl_page_lock_unfairness;
1201 struct wait_page_queue wait_page;
1202 wait_queue_entry_t *wait = &wait_page.wait;
1203 bool thrashing = false;
1204 bool delayacct = false;
1205 unsigned long pflags;
1206
1207 if (bit_nr == PG_locked &&
1208 !PageUptodate(page) && PageWorkingset(page)) {
1209 if (!PageSwapBacked(page)) {
1210 delayacct_thrashing_start();
1211 delayacct = true;
1212 }
1213 psi_memstall_enter(&pflags);
1214 thrashing = true;
1215 }
1216
1217 init_wait(wait);
1218 wait->func = wake_page_function;
1219 wait_page.page = page;
1220 wait_page.bit_nr = bit_nr;
1221
1222repeat:
1223 wait->flags = 0;
1224 if (behavior == EXCLUSIVE) {
1225 wait->flags = WQ_FLAG_EXCLUSIVE;
1226 if (--unfairness < 0)
1227 wait->flags |= WQ_FLAG_CUSTOM;
1228 }
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244 spin_lock_irq(&q->lock);
1245 SetPageWaiters(page);
1246 if (!trylock_page_bit_common(page, bit_nr, wait))
1247 __add_wait_queue_entry_tail(q, wait);
1248 spin_unlock_irq(&q->lock);
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258 if (behavior == DROP)
1259 put_page(page);
1260
1261
1262
1263
1264
1265
1266
1267 for (;;) {
1268 unsigned int flags;
1269
1270 set_current_state(state);
1271
1272
1273 flags = smp_load_acquire(&wait->flags);
1274 if (!(flags & WQ_FLAG_WOKEN)) {
1275 if (signal_pending_state(state, current))
1276 break;
1277
1278 io_schedule();
1279 continue;
1280 }
1281
1282
1283 if (behavior != EXCLUSIVE)
1284 break;
1285
1286
1287 if (flags & WQ_FLAG_DONE)
1288 break;
1289
1290
1291
1292
1293
1294
1295
1296 if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
1297 goto repeat;
1298
1299 wait->flags |= WQ_FLAG_DONE;
1300 break;
1301 }
1302
1303
1304
1305
1306
1307
1308
1309 finish_wait(q, wait);
1310
1311 if (thrashing) {
1312 if (delayacct)
1313 delayacct_thrashing_end();
1314 psi_memstall_leave(&pflags);
1315 }
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330 if (behavior == EXCLUSIVE)
1331 return wait->flags & WQ_FLAG_DONE ? 0 : -EINTR;
1332
1333 return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
1334}
1335
1336void wait_on_page_bit(struct page *page, int bit_nr)
1337{
1338 wait_queue_head_t *q = page_waitqueue(page);
1339 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
1340}
1341EXPORT_SYMBOL(wait_on_page_bit);
1342
1343int wait_on_page_bit_killable(struct page *page, int bit_nr)
1344{
1345 wait_queue_head_t *q = page_waitqueue(page);
1346 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
1347}
1348EXPORT_SYMBOL(wait_on_page_bit_killable);
1349
1350static int __wait_on_page_locked_async(struct page *page,
1351 struct wait_page_queue *wait, bool set)
1352{
1353 struct wait_queue_head *q = page_waitqueue(page);
1354 int ret = 0;
1355
1356 wait->page = page;
1357 wait->bit_nr = PG_locked;
1358
1359 spin_lock_irq(&q->lock);
1360 __add_wait_queue_entry_tail(q, &wait->wait);
1361 SetPageWaiters(page);
1362 if (set)
1363 ret = !trylock_page(page);
1364 else
1365 ret = PageLocked(page);
1366
1367
1368
1369
1370
1371
1372 if (!ret)
1373 __remove_wait_queue(q, &wait->wait);
1374 else
1375 ret = -EIOCBQUEUED;
1376 spin_unlock_irq(&q->lock);
1377 return ret;
1378}
1379
1380static int wait_on_page_locked_async(struct page *page,
1381 struct wait_page_queue *wait)
1382{
1383 if (!PageLocked(page))
1384 return 0;
1385 return __wait_on_page_locked_async(compound_head(page), wait, false);
1386}
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398void put_and_wait_on_page_locked(struct page *page)
1399{
1400 wait_queue_head_t *q;
1401
1402 page = compound_head(page);
1403 q = page_waitqueue(page);
1404 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
1405}
1406
1407
1408
1409
1410
1411
1412
1413
1414void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
1415{
1416 wait_queue_head_t *q = page_waitqueue(page);
1417 unsigned long flags;
1418
1419 spin_lock_irqsave(&q->lock, flags);
1420 __add_wait_queue_entry_tail(q, waiter);
1421 SetPageWaiters(page);
1422 spin_unlock_irqrestore(&q->lock, flags);
1423}
1424EXPORT_SYMBOL_GPL(add_page_wait_queue);
1425
1426#ifndef clear_bit_unlock_is_negative_byte
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
1441{
1442 clear_bit_unlock(nr, mem);
1443
1444 return test_bit(PG_waiters, mem);
1445}
1446
1447#endif
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464void unlock_page(struct page *page)
1465{
1466 BUILD_BUG_ON(PG_waiters != 7);
1467 page = compound_head(page);
1468 VM_BUG_ON_PAGE(!PageLocked(page), page);
1469 if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
1470 wake_up_page_bit(page, PG_locked);
1471}
1472EXPORT_SYMBOL(unlock_page);
1473
1474
1475
1476
1477
1478void end_page_writeback(struct page *page)
1479{
1480
1481
1482
1483
1484
1485
1486
1487 if (PageReclaim(page)) {
1488 ClearPageReclaim(page);
1489 rotate_reclaimable_page(page);
1490 }
1491
1492
1493
1494
1495
1496
1497
1498 get_page(page);
1499 if (!test_clear_page_writeback(page))
1500 BUG();
1501
1502 smp_mb__after_atomic();
1503 wake_up_page(page, PG_writeback);
1504 put_page(page);
1505}
1506EXPORT_SYMBOL(end_page_writeback);
1507
1508
1509
1510
1511
1512void page_endio(struct page *page, bool is_write, int err)
1513{
1514 if (!is_write) {
1515 if (!err) {
1516 SetPageUptodate(page);
1517 } else {
1518 ClearPageUptodate(page);
1519 SetPageError(page);
1520 }
1521 unlock_page(page);
1522 } else {
1523 if (err) {
1524 struct address_space *mapping;
1525
1526 SetPageError(page);
1527 mapping = page_mapping(page);
1528 if (mapping)
1529 mapping_set_error(mapping, err);
1530 }
1531 end_page_writeback(page);
1532 }
1533}
1534EXPORT_SYMBOL_GPL(page_endio);
1535
1536
1537
1538
1539
1540void __lock_page(struct page *__page)
1541{
1542 struct page *page = compound_head(__page);
1543 wait_queue_head_t *q = page_waitqueue(page);
1544 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
1545 EXCLUSIVE);
1546}
1547EXPORT_SYMBOL(__lock_page);
1548
1549int __lock_page_killable(struct page *__page)
1550{
1551 struct page *page = compound_head(__page);
1552 wait_queue_head_t *q = page_waitqueue(page);
1553 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
1554 EXCLUSIVE);
1555}
1556EXPORT_SYMBOL_GPL(__lock_page_killable);
1557
1558int __lock_page_async(struct page *page, struct wait_page_queue *wait)
1559{
1560 return __wait_on_page_locked_async(page, wait, true);
1561}
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
1575 unsigned int flags)
1576{
1577 if (fault_flag_allow_retry_first(flags)) {
1578
1579
1580
1581
1582 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1583 return 0;
1584
1585 mmap_read_unlock(mm);
1586 if (flags & FAULT_FLAG_KILLABLE)
1587 wait_on_page_locked_killable(page);
1588 else
1589 wait_on_page_locked(page);
1590 return 0;
1591 } else {
1592 if (flags & FAULT_FLAG_KILLABLE) {
1593 int ret;
1594
1595 ret = __lock_page_killable(page);
1596 if (ret) {
1597 mmap_read_unlock(mm);
1598 return 0;
1599 }
1600 } else
1601 __lock_page(page);
1602 return 1;
1603 }
1604}
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625pgoff_t page_cache_next_miss(struct address_space *mapping,
1626 pgoff_t index, unsigned long max_scan)
1627{
1628 XA_STATE(xas, &mapping->i_pages, index);
1629
1630 while (max_scan--) {
1631 void *entry = xas_next(&xas);
1632 if (!entry || xa_is_value(entry))
1633 break;
1634 if (xas.xa_index == 0)
1635 break;
1636 }
1637
1638 return xas.xa_index;
1639}
1640EXPORT_SYMBOL(page_cache_next_miss);
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661pgoff_t page_cache_prev_miss(struct address_space *mapping,
1662 pgoff_t index, unsigned long max_scan)
1663{
1664 XA_STATE(xas, &mapping->i_pages, index);
1665
1666 while (max_scan--) {
1667 void *entry = xas_prev(&xas);
1668 if (!entry || xa_is_value(entry))
1669 break;
1670 if (xas.xa_index == ULONG_MAX)
1671 break;
1672 }
1673
1674 return xas.xa_index;
1675}
1676EXPORT_SYMBOL(page_cache_prev_miss);
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
1692{
1693 XA_STATE(xas, &mapping->i_pages, index);
1694 struct page *page;
1695
1696 rcu_read_lock();
1697repeat:
1698 xas_reset(&xas);
1699 page = xas_load(&xas);
1700 if (xas_retry(&xas, page))
1701 goto repeat;
1702
1703
1704
1705
1706 if (!page || xa_is_value(page))
1707 goto out;
1708
1709 if (!page_cache_get_speculative(page))
1710 goto repeat;
1711
1712
1713
1714
1715
1716
1717 if (unlikely(page != xas_reload(&xas))) {
1718 put_page(page);
1719 goto repeat;
1720 }
1721out:
1722 rcu_read_unlock();
1723
1724 return page;
1725}
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
1742{
1743 struct page *page;
1744
1745repeat:
1746 page = find_get_entry(mapping, index);
1747 if (page && !xa_is_value(page)) {
1748 lock_page(page);
1749
1750 if (unlikely(page->mapping != mapping)) {
1751 unlock_page(page);
1752 put_page(page);
1753 goto repeat;
1754 }
1755 VM_BUG_ON_PAGE(!thp_contains(page, index), page);
1756 }
1757 return page;
1758}
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
1793 int fgp_flags, gfp_t gfp_mask)
1794{
1795 struct page *page;
1796
1797repeat:
1798 page = find_get_entry(mapping, index);
1799 if (xa_is_value(page))
1800 page = NULL;
1801 if (!page)
1802 goto no_page;
1803
1804 if (fgp_flags & FGP_LOCK) {
1805 if (fgp_flags & FGP_NOWAIT) {
1806 if (!trylock_page(page)) {
1807 put_page(page);
1808 return NULL;
1809 }
1810 } else {
1811 lock_page(page);
1812 }
1813
1814
1815 if (unlikely(page->mapping != mapping)) {
1816 unlock_page(page);
1817 put_page(page);
1818 goto repeat;
1819 }
1820 VM_BUG_ON_PAGE(!thp_contains(page, index), page);
1821 }
1822
1823 if (fgp_flags & FGP_ACCESSED)
1824 mark_page_accessed(page);
1825 else if (fgp_flags & FGP_WRITE) {
1826
1827 if (page_is_idle(page))
1828 clear_page_idle(page);
1829 }
1830 if (!(fgp_flags & FGP_HEAD))
1831 page = find_subpage(page, index);
1832
1833no_page:
1834 if (!page && (fgp_flags & FGP_CREAT)) {
1835 int err;
1836 if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
1837 gfp_mask |= __GFP_WRITE;
1838 if (fgp_flags & FGP_NOFS)
1839 gfp_mask &= ~__GFP_FS;
1840
1841 page = __page_cache_alloc(gfp_mask);
1842 if (!page)
1843 return NULL;
1844
1845 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
1846 fgp_flags |= FGP_LOCK;
1847
1848
1849 if (fgp_flags & FGP_ACCESSED)
1850 __SetPageReferenced(page);
1851
1852 err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
1853 if (unlikely(err)) {
1854 put_page(page);
1855 page = NULL;
1856 if (err == -EEXIST)
1857 goto repeat;
1858 }
1859
1860
1861
1862
1863
1864 if (page && (fgp_flags & FGP_FOR_MMAP))
1865 unlock_page(page);
1866 }
1867
1868 return page;
1869}
1870EXPORT_SYMBOL(pagecache_get_page);
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899unsigned find_get_entries(struct address_space *mapping,
1900 pgoff_t start, unsigned int nr_entries,
1901 struct page **entries, pgoff_t *indices)
1902{
1903 XA_STATE(xas, &mapping->i_pages, start);
1904 struct page *page;
1905 unsigned int ret = 0;
1906
1907 if (!nr_entries)
1908 return 0;
1909
1910 rcu_read_lock();
1911 xas_for_each(&xas, page, ULONG_MAX) {
1912 if (xas_retry(&xas, page))
1913 continue;
1914
1915
1916
1917
1918
1919 if (xa_is_value(page))
1920 goto export;
1921
1922 if (!page_cache_get_speculative(page))
1923 goto retry;
1924
1925
1926 if (unlikely(page != xas_reload(&xas)))
1927 goto put_page;
1928
1929
1930
1931
1932
1933 if (PageTransHuge(page) && !PageHuge(page)) {
1934 page = find_subpage(page, xas.xa_index);
1935 nr_entries = ret + 1;
1936 }
1937export:
1938 indices[ret] = xas.xa_index;
1939 entries[ret] = page;
1940 if (++ret == nr_entries)
1941 break;
1942 continue;
1943put_page:
1944 put_page(page);
1945retry:
1946 xas_reset(&xas);
1947 }
1948 rcu_read_unlock();
1949 return ret;
1950}
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
1974 pgoff_t end, unsigned int nr_pages,
1975 struct page **pages)
1976{
1977 XA_STATE(xas, &mapping->i_pages, *start);
1978 struct page *page;
1979 unsigned ret = 0;
1980
1981 if (unlikely(!nr_pages))
1982 return 0;
1983
1984 rcu_read_lock();
1985 xas_for_each(&xas, page, end) {
1986 if (xas_retry(&xas, page))
1987 continue;
1988
1989 if (xa_is_value(page))
1990 continue;
1991
1992 if (!page_cache_get_speculative(page))
1993 goto retry;
1994
1995
1996 if (unlikely(page != xas_reload(&xas)))
1997 goto put_page;
1998
1999 pages[ret] = find_subpage(page, xas.xa_index);
2000 if (++ret == nr_pages) {
2001 *start = xas.xa_index + 1;
2002 goto out;
2003 }
2004 continue;
2005put_page:
2006 put_page(page);
2007retry:
2008 xas_reset(&xas);
2009 }
2010
2011
2012
2013
2014
2015
2016
2017 if (end == (pgoff_t)-1)
2018 *start = (pgoff_t)-1;
2019 else
2020 *start = end + 1;
2021out:
2022 rcu_read_unlock();
2023
2024 return ret;
2025}
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
2040 unsigned int nr_pages, struct page **pages)
2041{
2042 XA_STATE(xas, &mapping->i_pages, index);
2043 struct page *page;
2044 unsigned int ret = 0;
2045
2046 if (unlikely(!nr_pages))
2047 return 0;
2048
2049 rcu_read_lock();
2050 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
2051 if (xas_retry(&xas, page))
2052 continue;
2053
2054
2055
2056
2057 if (xa_is_value(page))
2058 break;
2059
2060 if (!page_cache_get_speculative(page))
2061 goto retry;
2062
2063
2064 if (unlikely(page != xas_reload(&xas)))
2065 goto put_page;
2066
2067 pages[ret] = find_subpage(page, xas.xa_index);
2068 if (++ret == nr_pages)
2069 break;
2070 continue;
2071put_page:
2072 put_page(page);
2073retry:
2074 xas_reset(&xas);
2075 }
2076 rcu_read_unlock();
2077 return ret;
2078}
2079EXPORT_SYMBOL(find_get_pages_contig);
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
2096 pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
2097 struct page **pages)
2098{
2099 XA_STATE(xas, &mapping->i_pages, *index);
2100 struct page *page;
2101 unsigned ret = 0;
2102
2103 if (unlikely(!nr_pages))
2104 return 0;
2105
2106 rcu_read_lock();
2107 xas_for_each_marked(&xas, page, end, tag) {
2108 if (xas_retry(&xas, page))
2109 continue;
2110
2111
2112
2113
2114
2115 if (xa_is_value(page))
2116 continue;
2117
2118 if (!page_cache_get_speculative(page))
2119 goto retry;
2120
2121
2122 if (unlikely(page != xas_reload(&xas)))
2123 goto put_page;
2124
2125 pages[ret] = find_subpage(page, xas.xa_index);
2126 if (++ret == nr_pages) {
2127 *index = xas.xa_index + 1;
2128 goto out;
2129 }
2130 continue;
2131put_page:
2132 put_page(page);
2133retry:
2134 xas_reset(&xas);
2135 }
2136
2137
2138
2139
2140
2141
2142
2143 if (end == (pgoff_t)-1)
2144 *index = (pgoff_t)-1;
2145 else
2146 *index = end + 1;
2147out:
2148 rcu_read_unlock();
2149
2150 return ret;
2151}
2152EXPORT_SYMBOL(find_get_pages_range_tag);
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169static void shrink_readahead_size_eio(struct file_ra_state *ra)
2170{
2171 ra->ra_pages /= 4;
2172}
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190ssize_t generic_file_buffered_read(struct kiocb *iocb,
2191 struct iov_iter *iter, ssize_t written)
2192{
2193 struct file *filp = iocb->ki_filp;
2194 struct address_space *mapping = filp->f_mapping;
2195 struct inode *inode = mapping->host;
2196 struct file_ra_state *ra = &filp->f_ra;
2197 loff_t *ppos = &iocb->ki_pos;
2198 pgoff_t index;
2199 pgoff_t last_index;
2200 pgoff_t prev_index;
2201 unsigned long offset;
2202 unsigned int prev_offset;
2203 int error = 0;
2204
2205 if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
2206 return 0;
2207 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
2208
2209 index = *ppos >> PAGE_SHIFT;
2210 prev_index = ra->prev_pos >> PAGE_SHIFT;
2211 prev_offset = ra->prev_pos & (PAGE_SIZE-1);
2212 last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
2213 offset = *ppos & ~PAGE_MASK;
2214
2215
2216
2217
2218
2219
2220 if (written && (iocb->ki_flags & IOCB_WAITQ))
2221 iocb->ki_flags |= IOCB_NOWAIT;
2222
2223 for (;;) {
2224 struct page *page;
2225 pgoff_t end_index;
2226 loff_t isize;
2227 unsigned long nr, ret;
2228
2229 cond_resched();
2230find_page:
2231 if (fatal_signal_pending(current)) {
2232 error = -EINTR;
2233 goto out;
2234 }
2235
2236 page = find_get_page(mapping, index);
2237 if (!page) {
2238 if (iocb->ki_flags & IOCB_NOIO)
2239 goto would_block;
2240 page_cache_sync_readahead(mapping,
2241 ra, filp,
2242 index, last_index - index);
2243 page = find_get_page(mapping, index);
2244 if (unlikely(page == NULL))
2245 goto no_cached_page;
2246 }
2247 if (PageReadahead(page)) {
2248 if (iocb->ki_flags & IOCB_NOIO) {
2249 put_page(page);
2250 goto out;
2251 }
2252 page_cache_async_readahead(mapping,
2253 ra, filp, page,
2254 index, last_index - index);
2255 }
2256 if (!PageUptodate(page)) {
2257
2258
2259
2260
2261
2262 if (iocb->ki_flags & IOCB_WAITQ) {
2263 if (written) {
2264 put_page(page);
2265 goto out;
2266 }
2267 error = wait_on_page_locked_async(page,
2268 iocb->ki_waitq);
2269 } else {
2270 if (iocb->ki_flags & IOCB_NOWAIT) {
2271 put_page(page);
2272 goto would_block;
2273 }
2274 error = wait_on_page_locked_killable(page);
2275 }
2276 if (unlikely(error))
2277 goto readpage_error;
2278 if (PageUptodate(page))
2279 goto page_ok;
2280
2281 if (inode->i_blkbits == PAGE_SHIFT ||
2282 !mapping->a_ops->is_partially_uptodate)
2283 goto page_not_up_to_date;
2284
2285 if (unlikely(iov_iter_is_pipe(iter)))
2286 goto page_not_up_to_date;
2287 if (!trylock_page(page))
2288 goto page_not_up_to_date;
2289
2290 if (!page->mapping)
2291 goto page_not_up_to_date_locked;
2292 if (!mapping->a_ops->is_partially_uptodate(page,
2293 offset, iter->count))
2294 goto page_not_up_to_date_locked;
2295 unlock_page(page);
2296 }
2297page_ok:
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307 isize = i_size_read(inode);
2308 end_index = (isize - 1) >> PAGE_SHIFT;
2309 if (unlikely(!isize || index > end_index)) {
2310 put_page(page);
2311 goto out;
2312 }
2313
2314
2315 nr = PAGE_SIZE;
2316 if (index == end_index) {
2317 nr = ((isize - 1) & ~PAGE_MASK) + 1;
2318 if (nr <= offset) {
2319 put_page(page);
2320 goto out;
2321 }
2322 }
2323 nr = nr - offset;
2324
2325
2326
2327
2328
2329 if (mapping_writably_mapped(mapping))
2330 flush_dcache_page(page);
2331
2332
2333
2334
2335
2336 if (prev_index != index || offset != prev_offset)
2337 mark_page_accessed(page);
2338 prev_index = index;
2339
2340
2341
2342
2343
2344
2345 ret = copy_page_to_iter(page, offset, nr, iter);
2346 offset += ret;
2347 index += offset >> PAGE_SHIFT;
2348 offset &= ~PAGE_MASK;
2349 prev_offset = offset;
2350
2351 put_page(page);
2352 written += ret;
2353 if (!iov_iter_count(iter))
2354 goto out;
2355 if (ret < nr) {
2356 error = -EFAULT;
2357 goto out;
2358 }
2359 continue;
2360
2361page_not_up_to_date:
2362
2363 if (iocb->ki_flags & IOCB_WAITQ) {
2364 if (written) {
2365 put_page(page);
2366 goto out;
2367 }
2368 error = lock_page_async(page, iocb->ki_waitq);
2369 } else {
2370 error = lock_page_killable(page);
2371 }
2372 if (unlikely(error))
2373 goto readpage_error;
2374
2375page_not_up_to_date_locked:
2376
2377 if (!page->mapping) {
2378 unlock_page(page);
2379 put_page(page);
2380 continue;
2381 }
2382
2383
2384 if (PageUptodate(page)) {
2385 unlock_page(page);
2386 goto page_ok;
2387 }
2388
2389readpage:
2390 if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
2391 unlock_page(page);
2392 put_page(page);
2393 goto would_block;
2394 }
2395
2396
2397
2398
2399
2400 ClearPageError(page);
2401
2402 error = mapping->a_ops->readpage(filp, page);
2403
2404 if (unlikely(error)) {
2405 if (error == AOP_TRUNCATED_PAGE) {
2406 put_page(page);
2407 error = 0;
2408 goto find_page;
2409 }
2410 goto readpage_error;
2411 }
2412
2413 if (!PageUptodate(page)) {
2414 if (iocb->ki_flags & IOCB_WAITQ) {
2415 if (written) {
2416 put_page(page);
2417 goto out;
2418 }
2419 error = lock_page_async(page, iocb->ki_waitq);
2420 } else {
2421 error = lock_page_killable(page);
2422 }
2423
2424 if (unlikely(error))
2425 goto readpage_error;
2426 if (!PageUptodate(page)) {
2427 if (page->mapping == NULL) {
2428
2429
2430
2431 unlock_page(page);
2432 put_page(page);
2433 goto find_page;
2434 }
2435 unlock_page(page);
2436 shrink_readahead_size_eio(ra);
2437 error = -EIO;
2438 goto readpage_error;
2439 }
2440 unlock_page(page);
2441 }
2442
2443 goto page_ok;
2444
2445readpage_error:
2446
2447 put_page(page);
2448 goto out;
2449
2450no_cached_page:
2451
2452
2453
2454
2455 page = page_cache_alloc(mapping);
2456 if (!page) {
2457 error = -ENOMEM;
2458 goto out;
2459 }
2460 error = add_to_page_cache_lru(page, mapping, index,
2461 mapping_gfp_constraint(mapping, GFP_KERNEL));
2462 if (error) {
2463 put_page(page);
2464 if (error == -EEXIST) {
2465 error = 0;
2466 goto find_page;
2467 }
2468 goto out;
2469 }
2470 goto readpage;
2471 }
2472
2473would_block:
2474 error = -EAGAIN;
2475out:
2476 ra->prev_pos = prev_index;
2477 ra->prev_pos <<= PAGE_SHIFT;
2478 ra->prev_pos |= prev_offset;
2479
2480 *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
2481 file_accessed(filp);
2482 return written ? written : error;
2483}
2484EXPORT_SYMBOL_GPL(generic_file_buffered_read);
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507ssize_t
2508generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2509{
2510 size_t count = iov_iter_count(iter);
2511 ssize_t retval = 0;
2512
2513 if (!count)
2514 goto out;
2515
2516 if (iocb->ki_flags & IOCB_DIRECT) {
2517 struct file *file = iocb->ki_filp;
2518 struct address_space *mapping = file->f_mapping;
2519 struct inode *inode = mapping->host;
2520 loff_t size;
2521
2522 size = i_size_read(inode);
2523 if (iocb->ki_flags & IOCB_NOWAIT) {
2524 if (filemap_range_has_page(mapping, iocb->ki_pos,
2525 iocb->ki_pos + count - 1))
2526 return -EAGAIN;
2527 } else {
2528 retval = filemap_write_and_wait_range(mapping,
2529 iocb->ki_pos,
2530 iocb->ki_pos + count - 1);
2531 if (retval < 0)
2532 goto out;
2533 }
2534
2535 file_accessed(file);
2536
2537 retval = mapping->a_ops->direct_IO(iocb, iter);
2538 if (retval >= 0) {
2539 iocb->ki_pos += retval;
2540 count -= retval;
2541 }
2542 iov_iter_revert(iter, count - iov_iter_count(iter));
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553 if (retval < 0 || !count || iocb->ki_pos >= size ||
2554 IS_DAX(inode))
2555 goto out;
2556 }
2557
2558 retval = generic_file_buffered_read(iocb, iter, retval);
2559out:
2560 return retval;
2561}
2562EXPORT_SYMBOL(generic_file_read_iter);
2563
2564#ifdef CONFIG_MMU
2565#define MMAP_LOTSAMISS (100)
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
2578 struct file **fpin)
2579{
2580 if (trylock_page(page))
2581 return 1;
2582
2583
2584
2585
2586
2587
2588 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2589 return 0;
2590
2591 *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
2592 if (vmf->flags & FAULT_FLAG_KILLABLE) {
2593 if (__lock_page_killable(page)) {
2594
2595
2596
2597
2598
2599
2600 if (*fpin == NULL)
2601 mmap_read_unlock(vmf->vma->vm_mm);
2602 return 0;
2603 }
2604 } else
2605 __lock_page(page);
2606 return 1;
2607}
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
2618{
2619 struct file *file = vmf->vma->vm_file;
2620 struct file_ra_state *ra = &file->f_ra;
2621 struct address_space *mapping = file->f_mapping;
2622 DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
2623 struct file *fpin = NULL;
2624 unsigned int mmap_miss;
2625
2626
2627 if (vmf->vma->vm_flags & VM_RAND_READ)
2628 return fpin;
2629 if (!ra->ra_pages)
2630 return fpin;
2631
2632 if (vmf->vma->vm_flags & VM_SEQ_READ) {
2633 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2634 page_cache_sync_ra(&ractl, ra, ra->ra_pages);
2635 return fpin;
2636 }
2637
2638
2639 mmap_miss = READ_ONCE(ra->mmap_miss);
2640 if (mmap_miss < MMAP_LOTSAMISS * 10)
2641 WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
2642
2643
2644
2645
2646
2647 if (mmap_miss > MMAP_LOTSAMISS)
2648 return fpin;
2649
2650
2651
2652
2653 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2654 ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
2655 ra->size = ra->ra_pages;
2656 ra->async_size = ra->ra_pages / 4;
2657 ractl._index = ra->start;
2658 do_page_cache_ra(&ractl, ra->size, ra->async_size);
2659 return fpin;
2660}
2661
2662
2663
2664
2665
2666
2667static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
2668 struct page *page)
2669{
2670 struct file *file = vmf->vma->vm_file;
2671 struct file_ra_state *ra = &file->f_ra;
2672 struct address_space *mapping = file->f_mapping;
2673 struct file *fpin = NULL;
2674 unsigned int mmap_miss;
2675 pgoff_t offset = vmf->pgoff;
2676
2677
2678 if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
2679 return fpin;
2680 mmap_miss = READ_ONCE(ra->mmap_miss);
2681 if (mmap_miss)
2682 WRITE_ONCE(ra->mmap_miss, --mmap_miss);
2683 if (PageReadahead(page)) {
2684 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2685 page_cache_async_readahead(mapping, ra, file,
2686 page, offset, ra->ra_pages);
2687 }
2688 return fpin;
2689}
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714vm_fault_t filemap_fault(struct vm_fault *vmf)
2715{
2716 int error;
2717 struct file *file = vmf->vma->vm_file;
2718 struct file *fpin = NULL;
2719 struct address_space *mapping = file->f_mapping;
2720 struct file_ra_state *ra = &file->f_ra;
2721 struct inode *inode = mapping->host;
2722 pgoff_t offset = vmf->pgoff;
2723 pgoff_t max_off;
2724 struct page *page;
2725 vm_fault_t ret = 0;
2726
2727 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2728 if (unlikely(offset >= max_off))
2729 return VM_FAULT_SIGBUS;
2730
2731
2732
2733
2734 page = find_get_page(mapping, offset);
2735 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2736
2737
2738
2739
2740 fpin = do_async_mmap_readahead(vmf, page);
2741 } else if (!page) {
2742
2743 count_vm_event(PGMAJFAULT);
2744 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
2745 ret = VM_FAULT_MAJOR;
2746 fpin = do_sync_mmap_readahead(vmf);
2747retry_find:
2748 page = pagecache_get_page(mapping, offset,
2749 FGP_CREAT|FGP_FOR_MMAP,
2750 vmf->gfp_mask);
2751 if (!page) {
2752 if (fpin)
2753 goto out_retry;
2754 return VM_FAULT_OOM;
2755 }
2756 }
2757
2758 if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
2759 goto out_retry;
2760
2761
2762 if (unlikely(compound_head(page)->mapping != mapping)) {
2763 unlock_page(page);
2764 put_page(page);
2765 goto retry_find;
2766 }
2767 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
2768
2769
2770
2771
2772
2773 if (unlikely(!PageUptodate(page)))
2774 goto page_not_uptodate;
2775
2776
2777
2778
2779
2780
2781 if (fpin) {
2782 unlock_page(page);
2783 goto out_retry;
2784 }
2785
2786
2787
2788
2789
2790 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2791 if (unlikely(offset >= max_off)) {
2792 unlock_page(page);
2793 put_page(page);
2794 return VM_FAULT_SIGBUS;
2795 }
2796
2797 vmf->page = page;
2798 return ret | VM_FAULT_LOCKED;
2799
2800page_not_uptodate:
2801
2802
2803
2804
2805
2806
2807 ClearPageError(page);
2808 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2809 error = mapping->a_ops->readpage(file, page);
2810 if (!error) {
2811 wait_on_page_locked(page);
2812 if (!PageUptodate(page))
2813 error = -EIO;
2814 }
2815 if (fpin)
2816 goto out_retry;
2817 put_page(page);
2818
2819 if (!error || error == AOP_TRUNCATED_PAGE)
2820 goto retry_find;
2821
2822 shrink_readahead_size_eio(ra);
2823 return VM_FAULT_SIGBUS;
2824
2825out_retry:
2826
2827
2828
2829
2830
2831 if (page)
2832 put_page(page);
2833 if (fpin)
2834 fput(fpin);
2835 return ret | VM_FAULT_RETRY;
2836}
2837EXPORT_SYMBOL(filemap_fault);
2838
2839void filemap_map_pages(struct vm_fault *vmf,
2840 pgoff_t start_pgoff, pgoff_t end_pgoff)
2841{
2842 struct file *file = vmf->vma->vm_file;
2843 struct address_space *mapping = file->f_mapping;
2844 pgoff_t last_pgoff = start_pgoff;
2845 unsigned long max_idx;
2846 XA_STATE(xas, &mapping->i_pages, start_pgoff);
2847 struct page *head, *page;
2848 unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
2849
2850 rcu_read_lock();
2851 xas_for_each(&xas, head, end_pgoff) {
2852 if (xas_retry(&xas, head))
2853 continue;
2854 if (xa_is_value(head))
2855 goto next;
2856
2857
2858
2859
2860
2861 if (PageLocked(head))
2862 goto next;
2863 if (!page_cache_get_speculative(head))
2864 goto next;
2865
2866
2867 if (unlikely(head != xas_reload(&xas)))
2868 goto skip;
2869 page = find_subpage(head, xas.xa_index);
2870
2871 if (!PageUptodate(head) ||
2872 PageReadahead(page) ||
2873 PageHWPoison(page))
2874 goto skip;
2875 if (!trylock_page(head))
2876 goto skip;
2877
2878 if (head->mapping != mapping || !PageUptodate(head))
2879 goto unlock;
2880
2881 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
2882 if (xas.xa_index >= max_idx)
2883 goto unlock;
2884
2885 if (mmap_miss > 0)
2886 mmap_miss--;
2887
2888 vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
2889 if (vmf->pte)
2890 vmf->pte += xas.xa_index - last_pgoff;
2891 last_pgoff = xas.xa_index;
2892 if (alloc_set_pte(vmf, page))
2893 goto unlock;
2894 unlock_page(head);
2895 goto next;
2896unlock:
2897 unlock_page(head);
2898skip:
2899 put_page(head);
2900next:
2901
2902 if (pmd_trans_huge(*vmf->pmd))
2903 break;
2904 }
2905 rcu_read_unlock();
2906 WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss);
2907}
2908EXPORT_SYMBOL(filemap_map_pages);
2909
2910vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2911{
2912 struct page *page = vmf->page;
2913 struct inode *inode = file_inode(vmf->vma->vm_file);
2914 vm_fault_t ret = VM_FAULT_LOCKED;
2915
2916 sb_start_pagefault(inode->i_sb);
2917 file_update_time(vmf->vma->vm_file);
2918 lock_page(page);
2919 if (page->mapping != inode->i_mapping) {
2920 unlock_page(page);
2921 ret = VM_FAULT_NOPAGE;
2922 goto out;
2923 }
2924
2925
2926
2927
2928
2929 set_page_dirty(page);
2930 wait_for_stable_page(page);
2931out:
2932 sb_end_pagefault(inode->i_sb);
2933 return ret;
2934}
2935
2936const struct vm_operations_struct generic_file_vm_ops = {
2937 .fault = filemap_fault,
2938 .map_pages = filemap_map_pages,
2939 .page_mkwrite = filemap_page_mkwrite,
2940};
2941
2942
2943
2944int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2945{
2946 struct address_space *mapping = file->f_mapping;
2947
2948 if (!mapping->a_ops->readpage)
2949 return -ENOEXEC;
2950 file_accessed(file);
2951 vma->vm_ops = &generic_file_vm_ops;
2952 return 0;
2953}
2954
2955
2956
2957
2958int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
2959{
2960 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2961 return -EINVAL;
2962 return generic_file_mmap(file, vma);
2963}
2964#else
2965vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2966{
2967 return VM_FAULT_SIGBUS;
2968}
2969int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2970{
2971 return -ENOSYS;
2972}
2973int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
2974{
2975 return -ENOSYS;
2976}
2977#endif
2978
2979EXPORT_SYMBOL(filemap_page_mkwrite);
2980EXPORT_SYMBOL(generic_file_mmap);
2981EXPORT_SYMBOL(generic_file_readonly_mmap);
2982
2983static struct page *wait_on_page_read(struct page *page)
2984{
2985 if (!IS_ERR(page)) {
2986 wait_on_page_locked(page);
2987 if (!PageUptodate(page)) {
2988 put_page(page);
2989 page = ERR_PTR(-EIO);
2990 }
2991 }
2992 return page;
2993}
2994
2995static struct page *do_read_cache_page(struct address_space *mapping,
2996 pgoff_t index,
2997 int (*filler)(void *, struct page *),
2998 void *data,
2999 gfp_t gfp)
3000{
3001 struct page *page;
3002 int err;
3003repeat:
3004 page = find_get_page(mapping, index);
3005 if (!page) {
3006 page = __page_cache_alloc(gfp);
3007 if (!page)
3008 return ERR_PTR(-ENOMEM);
3009 err = add_to_page_cache_lru(page, mapping, index, gfp);
3010 if (unlikely(err)) {
3011 put_page(page);
3012 if (err == -EEXIST)
3013 goto repeat;
3014
3015 return ERR_PTR(err);
3016 }
3017
3018filler:
3019 if (filler)
3020 err = filler(data, page);
3021 else
3022 err = mapping->a_ops->readpage(data, page);
3023
3024 if (err < 0) {
3025 put_page(page);
3026 return ERR_PTR(err);
3027 }
3028
3029 page = wait_on_page_read(page);
3030 if (IS_ERR(page))
3031 return page;
3032 goto out;
3033 }
3034 if (PageUptodate(page))
3035 goto out;
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068 wait_on_page_locked(page);
3069 if (PageUptodate(page))
3070 goto out;
3071
3072
3073 lock_page(page);
3074
3075
3076 if (!page->mapping) {
3077 unlock_page(page);
3078 put_page(page);
3079 goto repeat;
3080 }
3081
3082
3083 if (PageUptodate(page)) {
3084 unlock_page(page);
3085 goto out;
3086 }
3087
3088
3089
3090
3091
3092
3093
3094 ClearPageError(page);
3095 goto filler;
3096
3097out:
3098 mark_page_accessed(page);
3099 return page;
3100}
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116struct page *read_cache_page(struct address_space *mapping,
3117 pgoff_t index,
3118 int (*filler)(void *, struct page *),
3119 void *data)
3120{
3121 return do_read_cache_page(mapping, index, filler, data,
3122 mapping_gfp_mask(mapping));
3123}
3124EXPORT_SYMBOL(read_cache_page);
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139struct page *read_cache_page_gfp(struct address_space *mapping,
3140 pgoff_t index,
3141 gfp_t gfp)
3142{
3143 return do_read_cache_page(mapping, index, NULL, NULL, gfp);
3144}
3145EXPORT_SYMBOL(read_cache_page_gfp);
3146
3147int pagecache_write_begin(struct file *file, struct address_space *mapping,
3148 loff_t pos, unsigned len, unsigned flags,
3149 struct page **pagep, void **fsdata)
3150{
3151 const struct address_space_operations *aops = mapping->a_ops;
3152
3153 return aops->write_begin(file, mapping, pos, len, flags,
3154 pagep, fsdata);
3155}
3156EXPORT_SYMBOL(pagecache_write_begin);
3157
3158int pagecache_write_end(struct file *file, struct address_space *mapping,
3159 loff_t pos, unsigned len, unsigned copied,
3160 struct page *page, void *fsdata)
3161{
3162 const struct address_space_operations *aops = mapping->a_ops;
3163
3164 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
3165}
3166EXPORT_SYMBOL(pagecache_write_end);
3167
3168
3169
3170
3171void dio_warn_stale_pagecache(struct file *filp)
3172{
3173 static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
3174 char pathname[128];
3175 struct inode *inode = file_inode(filp);
3176 char *path;
3177
3178 errseq_set(&inode->i_mapping->wb_err, -EIO);
3179 if (__ratelimit(&_rs)) {
3180 path = file_path(filp, pathname, sizeof(pathname));
3181 if (IS_ERR(path))
3182 path = "(unknown)";
3183 pr_crit("Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!\n");
3184 pr_crit("File: %s PID: %d Comm: %.20s\n", path, current->pid,
3185 current->comm);
3186 }
3187}
3188
3189ssize_t
3190generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
3191{
3192 struct file *file = iocb->ki_filp;
3193 struct address_space *mapping = file->f_mapping;
3194 struct inode *inode = mapping->host;
3195 loff_t pos = iocb->ki_pos;
3196 ssize_t written;
3197 size_t write_len;
3198 pgoff_t end;
3199
3200 write_len = iov_iter_count(from);
3201 end = (pos + write_len - 1) >> PAGE_SHIFT;
3202
3203 if (iocb->ki_flags & IOCB_NOWAIT) {
3204
3205 if (filemap_range_has_page(inode->i_mapping, pos,
3206 pos + write_len - 1))
3207 return -EAGAIN;
3208 } else {
3209 written = filemap_write_and_wait_range(mapping, pos,
3210 pos + write_len - 1);
3211 if (written)
3212 goto out;
3213 }
3214
3215
3216
3217
3218
3219
3220
3221 written = invalidate_inode_pages2_range(mapping,
3222 pos >> PAGE_SHIFT, end);
3223
3224
3225
3226
3227 if (written) {
3228 if (written == -EBUSY)
3229 return 0;
3230 goto out;
3231 }
3232
3233 written = mapping->a_ops->direct_IO(iocb, from);
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252 if (written > 0 && mapping->nrpages &&
3253 invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
3254 dio_warn_stale_pagecache(file);
3255
3256 if (written > 0) {
3257 pos += written;
3258 write_len -= written;
3259 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
3260 i_size_write(inode, pos);
3261 mark_inode_dirty(inode);
3262 }
3263 iocb->ki_pos = pos;
3264 }
3265 iov_iter_revert(from, write_len - iov_iter_count(from));
3266out:
3267 return written;
3268}
3269EXPORT_SYMBOL(generic_file_direct_write);
3270
3271
3272
3273
3274
3275struct page *grab_cache_page_write_begin(struct address_space *mapping,
3276 pgoff_t index, unsigned flags)
3277{
3278 struct page *page;
3279 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
3280
3281 if (flags & AOP_FLAG_NOFS)
3282 fgp_flags |= FGP_NOFS;
3283
3284 page = pagecache_get_page(mapping, index, fgp_flags,
3285 mapping_gfp_mask(mapping));
3286 if (page)
3287 wait_for_stable_page(page);
3288
3289 return page;
3290}
3291EXPORT_SYMBOL(grab_cache_page_write_begin);
3292
3293ssize_t generic_perform_write(struct file *file,
3294 struct iov_iter *i, loff_t pos)
3295{
3296 struct address_space *mapping = file->f_mapping;
3297 const struct address_space_operations *a_ops = mapping->a_ops;
3298 long status = 0;
3299 ssize_t written = 0;
3300 unsigned int flags = 0;
3301
3302 do {
3303 struct page *page;
3304 unsigned long offset;
3305 unsigned long bytes;
3306 size_t copied;
3307 void *fsdata;
3308
3309 offset = (pos & (PAGE_SIZE - 1));
3310 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3311 iov_iter_count(i));
3312
3313again:
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
3325 status = -EFAULT;
3326 break;
3327 }
3328
3329 if (fatal_signal_pending(current)) {
3330 status = -EINTR;
3331 break;
3332 }
3333
3334 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
3335 &page, &fsdata);
3336 if (unlikely(status < 0))
3337 break;
3338
3339 if (mapping_writably_mapped(mapping))
3340 flush_dcache_page(page);
3341
3342 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
3343 flush_dcache_page(page);
3344
3345 status = a_ops->write_end(file, mapping, pos, bytes, copied,
3346 page, fsdata);
3347 if (unlikely(status < 0))
3348 break;
3349 copied = status;
3350
3351 cond_resched();
3352
3353 iov_iter_advance(i, copied);
3354 if (unlikely(copied == 0)) {
3355
3356
3357
3358
3359
3360
3361
3362
3363 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3364 iov_iter_single_seg_count(i));
3365 goto again;
3366 }
3367 pos += copied;
3368 written += copied;
3369
3370 balance_dirty_pages_ratelimited(mapping);
3371 } while (iov_iter_count(i));
3372
3373 return written ? written : status;
3374}
3375EXPORT_SYMBOL(generic_perform_write);
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3399{
3400 struct file *file = iocb->ki_filp;
3401 struct address_space * mapping = file->f_mapping;
3402 struct inode *inode = mapping->host;
3403 ssize_t written = 0;
3404 ssize_t err;
3405 ssize_t status;
3406
3407
3408 current->backing_dev_info = inode_to_bdi(inode);
3409 err = file_remove_privs(file);
3410 if (err)
3411 goto out;
3412
3413 err = file_update_time(file);
3414 if (err)
3415 goto out;
3416
3417 if (iocb->ki_flags & IOCB_DIRECT) {
3418 loff_t pos, endbyte;
3419
3420 written = generic_file_direct_write(iocb, from);
3421
3422
3423
3424
3425
3426
3427
3428 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
3429 goto out;
3430
3431 status = generic_perform_write(file, from, pos = iocb->ki_pos);
3432
3433
3434
3435
3436
3437
3438
3439 if (unlikely(status < 0)) {
3440 err = status;
3441 goto out;
3442 }
3443
3444
3445
3446
3447
3448 endbyte = pos + status - 1;
3449 err = filemap_write_and_wait_range(mapping, pos, endbyte);
3450 if (err == 0) {
3451 iocb->ki_pos = endbyte + 1;
3452 written += status;
3453 invalidate_mapping_pages(mapping,
3454 pos >> PAGE_SHIFT,
3455 endbyte >> PAGE_SHIFT);
3456 } else {
3457
3458
3459
3460
3461 }
3462 } else {
3463 written = generic_perform_write(file, from, iocb->ki_pos);
3464 if (likely(written > 0))
3465 iocb->ki_pos += written;
3466 }
3467out:
3468 current->backing_dev_info = NULL;
3469 return written ? written : err;
3470}
3471EXPORT_SYMBOL(__generic_file_write_iter);
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3487{
3488 struct file *file = iocb->ki_filp;
3489 struct inode *inode = file->f_mapping->host;
3490 ssize_t ret;
3491
3492 inode_lock(inode);
3493 ret = generic_write_checks(iocb, from);
3494 if (ret > 0)
3495 ret = __generic_file_write_iter(iocb, from);
3496 inode_unlock(inode);
3497
3498 if (ret > 0)
3499 ret = generic_write_sync(iocb, ret);
3500 return ret;
3501}
3502EXPORT_SYMBOL(generic_file_write_iter);
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521int try_to_release_page(struct page *page, gfp_t gfp_mask)
3522{
3523 struct address_space * const mapping = page->mapping;
3524
3525 BUG_ON(!PageLocked(page));
3526 if (PageWriteback(page))
3527 return 0;
3528
3529 if (mapping && mapping->a_ops->releasepage)
3530 return mapping->a_ops->releasepage(page, gfp_mask);
3531 return try_to_free_buffers(page);
3532}
3533
3534EXPORT_SYMBOL(try_to_release_page);
3535