1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/export.h>
14#include <linux/compiler.h>
15#include <linux/dax.h>
16#include <linux/fs.h>
17#include <linux/sched/signal.h>
18#include <linux/uaccess.h>
19#include <linux/capability.h>
20#include <linux/kernel_stat.h>
21#include <linux/gfp.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/mman.h>
25#include <linux/pagemap.h>
26#include <linux/file.h>
27#include <linux/uio.h>
28#include <linux/error-injection.h>
29#include <linux/hash.h>
30#include <linux/writeback.h>
31#include <linux/backing-dev.h>
32#include <linux/pagevec.h>
33#include <linux/blkdev.h>
34#include <linux/security.h>
35#include <linux/cpuset.h>
36#include <linux/hugetlb.h>
37#include <linux/memcontrol.h>
38#include <linux/cleancache.h>
39#include <linux/shmem_fs.h>
40#include <linux/rmap.h>
41#include <linux/delayacct.h>
42#include <linux/psi.h>
43#include "internal.h"
44
45#define CREATE_TRACE_POINTS
46#include <trace/events/filemap.h>
47
48
49
50
51#include <linux/buffer_head.h>
52
53#include <asm/mman.h>
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118static void page_cache_delete(struct address_space *mapping,
119 struct page *page, void *shadow)
120{
121 XA_STATE(xas, &mapping->i_pages, page->index);
122 unsigned int nr = 1;
123
124 mapping_set_update(&xas, mapping);
125
126
127 if (!PageHuge(page)) {
128 xas_set_order(&xas, page->index, compound_order(page));
129 nr = 1U << compound_order(page);
130 }
131
132 VM_BUG_ON_PAGE(!PageLocked(page), page);
133 VM_BUG_ON_PAGE(PageTail(page), page);
134 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
135
136 xas_store(&xas, shadow);
137 xas_init_marks(&xas);
138
139 page->mapping = NULL;
140
141
142 if (shadow) {
143 mapping->nrexceptional += nr;
144
145
146
147
148
149
150 smp_wmb();
151 }
152 mapping->nrpages -= nr;
153}
154
155static void unaccount_page_cache_page(struct address_space *mapping,
156 struct page *page)
157{
158 int nr;
159
160
161
162
163
164
165 if (PageUptodate(page) && PageMappedToDisk(page))
166 cleancache_put_page(page);
167 else
168 cleancache_invalidate_page(mapping, page);
169
170 VM_BUG_ON_PAGE(PageTail(page), page);
171 VM_BUG_ON_PAGE(page_mapped(page), page);
172 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
173 int mapcount;
174
175 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
176 current->comm, page_to_pfn(page));
177 dump_page(page, "still mapped when deleted");
178 dump_stack();
179 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
180
181 mapcount = page_mapcount(page);
182 if (mapping_exiting(mapping) &&
183 page_count(page) >= mapcount + 2) {
184
185
186
187
188
189
190 page_mapcount_reset(page);
191 page_ref_sub(page, mapcount);
192 }
193 }
194
195
196 if (PageHuge(page))
197 return;
198
199 nr = hpage_nr_pages(page);
200
201 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
202 if (PageSwapBacked(page)) {
203 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
204 if (PageTransHuge(page))
205 __dec_node_page_state(page, NR_SHMEM_THPS);
206 } else {
207 VM_BUG_ON_PAGE(PageTransHuge(page), page);
208 }
209
210
211
212
213
214
215
216
217
218
219
220 if (WARN_ON_ONCE(PageDirty(page)))
221 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
222}
223
224
225
226
227
228
229void __delete_from_page_cache(struct page *page, void *shadow)
230{
231 struct address_space *mapping = page->mapping;
232
233 trace_mm_filemap_delete_from_page_cache(page);
234
235 unaccount_page_cache_page(mapping, page);
236 page_cache_delete(mapping, page, shadow);
237}
238
239static void page_cache_free_page(struct address_space *mapping,
240 struct page *page)
241{
242 void (*freepage)(struct page *);
243
244 freepage = mapping->a_ops->freepage;
245 if (freepage)
246 freepage(page);
247
248 if (PageTransHuge(page) && !PageHuge(page)) {
249 page_ref_sub(page, HPAGE_PMD_NR);
250 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
251 } else {
252 put_page(page);
253 }
254}
255
256
257
258
259
260
261
262
263
264void delete_from_page_cache(struct page *page)
265{
266 struct address_space *mapping = page_mapping(page);
267 unsigned long flags;
268
269 BUG_ON(!PageLocked(page));
270 xa_lock_irqsave(&mapping->i_pages, flags);
271 __delete_from_page_cache(page, NULL);
272 xa_unlock_irqrestore(&mapping->i_pages, flags);
273
274 page_cache_free_page(mapping, page);
275}
276EXPORT_SYMBOL(delete_from_page_cache);
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292static void page_cache_delete_batch(struct address_space *mapping,
293 struct pagevec *pvec)
294{
295 XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
296 int total_pages = 0;
297 int i = 0, tail_pages = 0;
298 struct page *page;
299
300 mapping_set_update(&xas, mapping);
301 xas_for_each(&xas, page, ULONG_MAX) {
302 if (i >= pagevec_count(pvec) && !tail_pages)
303 break;
304 if (xa_is_value(page))
305 continue;
306 if (!tail_pages) {
307
308
309
310
311
312 if (page != pvec->pages[i]) {
313 VM_BUG_ON_PAGE(page->index >
314 pvec->pages[i]->index, page);
315 continue;
316 }
317 WARN_ON_ONCE(!PageLocked(page));
318 if (PageTransHuge(page) && !PageHuge(page))
319 tail_pages = HPAGE_PMD_NR - 1;
320 page->mapping = NULL;
321
322
323
324
325 i++;
326 } else {
327 VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
328 != pvec->pages[i]->index, page);
329 tail_pages--;
330 }
331 xas_store(&xas, NULL);
332 total_pages++;
333 }
334 mapping->nrpages -= total_pages;
335}
336
337void delete_from_page_cache_batch(struct address_space *mapping,
338 struct pagevec *pvec)
339{
340 int i;
341 unsigned long flags;
342
343 if (!pagevec_count(pvec))
344 return;
345
346 xa_lock_irqsave(&mapping->i_pages, flags);
347 for (i = 0; i < pagevec_count(pvec); i++) {
348 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
349
350 unaccount_page_cache_page(mapping, pvec->pages[i]);
351 }
352 page_cache_delete_batch(mapping, pvec);
353 xa_unlock_irqrestore(&mapping->i_pages, flags);
354
355 for (i = 0; i < pagevec_count(pvec); i++)
356 page_cache_free_page(mapping, pvec->pages[i]);
357}
358
359int filemap_check_errors(struct address_space *mapping)
360{
361 int ret = 0;
362
363 if (test_bit(AS_ENOSPC, &mapping->flags) &&
364 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
365 ret = -ENOSPC;
366 if (test_bit(AS_EIO, &mapping->flags) &&
367 test_and_clear_bit(AS_EIO, &mapping->flags))
368 ret = -EIO;
369 return ret;
370}
371EXPORT_SYMBOL(filemap_check_errors);
372
373static int filemap_check_and_keep_errors(struct address_space *mapping)
374{
375
376 if (test_bit(AS_EIO, &mapping->flags))
377 return -EIO;
378 if (test_bit(AS_ENOSPC, &mapping->flags))
379 return -ENOSPC;
380 return 0;
381}
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
401 loff_t end, int sync_mode)
402{
403 int ret;
404 struct writeback_control wbc = {
405 .sync_mode = sync_mode,
406 .nr_to_write = LONG_MAX,
407 .range_start = start,
408 .range_end = end,
409 };
410
411 if (!mapping_cap_writeback_dirty(mapping))
412 return 0;
413
414 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
415 ret = do_writepages(mapping, &wbc);
416 wbc_detach_inode(&wbc);
417 return ret;
418}
419
420static inline int __filemap_fdatawrite(struct address_space *mapping,
421 int sync_mode)
422{
423 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
424}
425
426int filemap_fdatawrite(struct address_space *mapping)
427{
428 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
429}
430EXPORT_SYMBOL(filemap_fdatawrite);
431
432int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
433 loff_t end)
434{
435 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
436}
437EXPORT_SYMBOL(filemap_fdatawrite_range);
438
439
440
441
442
443
444
445
446
447
448int filemap_flush(struct address_space *mapping)
449{
450 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
451}
452EXPORT_SYMBOL(filemap_flush);
453
454
455
456
457
458
459
460
461
462
463
464
465
466bool filemap_range_has_page(struct address_space *mapping,
467 loff_t start_byte, loff_t end_byte)
468{
469 struct page *page;
470 XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
471 pgoff_t max = end_byte >> PAGE_SHIFT;
472
473 if (end_byte < start_byte)
474 return false;
475
476 rcu_read_lock();
477 for (;;) {
478 page = xas_find(&xas, max);
479 if (xas_retry(&xas, page))
480 continue;
481
482 if (xa_is_value(page))
483 continue;
484
485
486
487
488
489 break;
490 }
491 rcu_read_unlock();
492
493 return page != NULL;
494}
495EXPORT_SYMBOL(filemap_range_has_page);
496
497static void __filemap_fdatawait_range(struct address_space *mapping,
498 loff_t start_byte, loff_t end_byte)
499{
500 pgoff_t index = start_byte >> PAGE_SHIFT;
501 pgoff_t end = end_byte >> PAGE_SHIFT;
502 struct pagevec pvec;
503 int nr_pages;
504
505 if (end_byte < start_byte)
506 return;
507
508 pagevec_init(&pvec);
509 while (index <= end) {
510 unsigned i;
511
512 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
513 end, PAGECACHE_TAG_WRITEBACK);
514 if (!nr_pages)
515 break;
516
517 for (i = 0; i < nr_pages; i++) {
518 struct page *page = pvec.pages[i];
519
520 wait_on_page_writeback(page);
521 ClearPageError(page);
522 }
523 pagevec_release(&pvec);
524 cond_resched();
525 }
526}
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
545 loff_t end_byte)
546{
547 __filemap_fdatawait_range(mapping, start_byte, end_byte);
548 return filemap_check_errors(mapping);
549}
550EXPORT_SYMBOL(filemap_fdatawait_range);
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
569{
570 struct address_space *mapping = file->f_mapping;
571
572 __filemap_fdatawait_range(mapping, start_byte, end_byte);
573 return file_check_and_advance_wb_err(file);
574}
575EXPORT_SYMBOL(file_fdatawait_range);
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591int filemap_fdatawait_keep_errors(struct address_space *mapping)
592{
593 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
594 return filemap_check_and_keep_errors(mapping);
595}
596EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
597
598static bool mapping_needs_writeback(struct address_space *mapping)
599{
600 return (!dax_mapping(mapping) && mapping->nrpages) ||
601 (dax_mapping(mapping) && mapping->nrexceptional);
602}
603
604int filemap_write_and_wait(struct address_space *mapping)
605{
606 int err = 0;
607
608 if (mapping_needs_writeback(mapping)) {
609 err = filemap_fdatawrite(mapping);
610
611
612
613
614
615
616 if (err != -EIO) {
617 int err2 = filemap_fdatawait(mapping);
618 if (!err)
619 err = err2;
620 } else {
621
622 filemap_check_errors(mapping);
623 }
624 } else {
625 err = filemap_check_errors(mapping);
626 }
627 return err;
628}
629EXPORT_SYMBOL(filemap_write_and_wait);
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644int filemap_write_and_wait_range(struct address_space *mapping,
645 loff_t lstart, loff_t lend)
646{
647 int err = 0;
648
649 if (mapping_needs_writeback(mapping)) {
650 err = __filemap_fdatawrite_range(mapping, lstart, lend,
651 WB_SYNC_ALL);
652
653 if (err != -EIO) {
654 int err2 = filemap_fdatawait_range(mapping,
655 lstart, lend);
656 if (!err)
657 err = err2;
658 } else {
659
660 filemap_check_errors(mapping);
661 }
662 } else {
663 err = filemap_check_errors(mapping);
664 }
665 return err;
666}
667EXPORT_SYMBOL(filemap_write_and_wait_range);
668
669void __filemap_set_wb_err(struct address_space *mapping, int err)
670{
671 errseq_t eseq = errseq_set(&mapping->wb_err, err);
672
673 trace_filemap_set_wb_err(mapping, eseq);
674}
675EXPORT_SYMBOL(__filemap_set_wb_err);
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701int file_check_and_advance_wb_err(struct file *file)
702{
703 int err = 0;
704 errseq_t old = READ_ONCE(file->f_wb_err);
705 struct address_space *mapping = file->f_mapping;
706
707
708 if (errseq_check(&mapping->wb_err, old)) {
709
710 spin_lock(&file->f_lock);
711 old = file->f_wb_err;
712 err = errseq_check_and_advance(&mapping->wb_err,
713 &file->f_wb_err);
714 trace_file_check_and_advance_wb_err(file, old);
715 spin_unlock(&file->f_lock);
716 }
717
718
719
720
721
722
723 clear_bit(AS_EIO, &mapping->flags);
724 clear_bit(AS_ENOSPC, &mapping->flags);
725 return err;
726}
727EXPORT_SYMBOL(file_check_and_advance_wb_err);
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
746{
747 int err = 0, err2;
748 struct address_space *mapping = file->f_mapping;
749
750 if (mapping_needs_writeback(mapping)) {
751 err = __filemap_fdatawrite_range(mapping, lstart, lend,
752 WB_SYNC_ALL);
753
754 if (err != -EIO)
755 __filemap_fdatawait_range(mapping, lstart, lend);
756 }
757 err2 = file_check_and_advance_wb_err(file);
758 if (!err)
759 err = err2;
760 return err;
761}
762EXPORT_SYMBOL(file_write_and_wait_range);
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
781{
782 struct address_space *mapping = old->mapping;
783 void (*freepage)(struct page *) = mapping->a_ops->freepage;
784 pgoff_t offset = old->index;
785 XA_STATE(xas, &mapping->i_pages, offset);
786 unsigned long flags;
787
788 VM_BUG_ON_PAGE(!PageLocked(old), old);
789 VM_BUG_ON_PAGE(!PageLocked(new), new);
790 VM_BUG_ON_PAGE(new->mapping, new);
791
792 get_page(new);
793 new->mapping = mapping;
794 new->index = offset;
795
796 xas_lock_irqsave(&xas, flags);
797 xas_store(&xas, new);
798
799 old->mapping = NULL;
800
801 if (!PageHuge(old))
802 __dec_node_page_state(new, NR_FILE_PAGES);
803 if (!PageHuge(new))
804 __inc_node_page_state(new, NR_FILE_PAGES);
805 if (PageSwapBacked(old))
806 __dec_node_page_state(new, NR_SHMEM);
807 if (PageSwapBacked(new))
808 __inc_node_page_state(new, NR_SHMEM);
809 xas_unlock_irqrestore(&xas, flags);
810 mem_cgroup_migrate(old, new);
811 if (freepage)
812 freepage(old);
813 put_page(old);
814
815 return 0;
816}
817EXPORT_SYMBOL_GPL(replace_page_cache_page);
818
819static int __add_to_page_cache_locked(struct page *page,
820 struct address_space *mapping,
821 pgoff_t offset, gfp_t gfp_mask,
822 void **shadowp)
823{
824 XA_STATE(xas, &mapping->i_pages, offset);
825 int huge = PageHuge(page);
826 struct mem_cgroup *memcg;
827 int error;
828 void *old;
829
830 VM_BUG_ON_PAGE(!PageLocked(page), page);
831 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
832 mapping_set_update(&xas, mapping);
833
834 if (!huge) {
835 error = mem_cgroup_try_charge(page, current->mm,
836 gfp_mask, &memcg, false);
837 if (error)
838 return error;
839 }
840
841 get_page(page);
842 page->mapping = mapping;
843 page->index = offset;
844
845 do {
846 xas_lock_irq(&xas);
847 old = xas_load(&xas);
848 if (old && !xa_is_value(old))
849 xas_set_err(&xas, -EEXIST);
850 xas_store(&xas, page);
851 if (xas_error(&xas))
852 goto unlock;
853
854 if (xa_is_value(old)) {
855 mapping->nrexceptional--;
856 if (shadowp)
857 *shadowp = old;
858 }
859 mapping->nrpages++;
860
861
862 if (!huge)
863 __inc_node_page_state(page, NR_FILE_PAGES);
864unlock:
865 xas_unlock_irq(&xas);
866 } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
867
868 if (xas_error(&xas))
869 goto error;
870
871 if (!huge)
872 mem_cgroup_commit_charge(page, memcg, false, false);
873 trace_mm_filemap_add_to_page_cache(page);
874 return 0;
875error:
876 page->mapping = NULL;
877
878 if (!huge)
879 mem_cgroup_cancel_charge(page, memcg, false);
880 put_page(page);
881 return xas_error(&xas);
882}
883ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
884
885
886
887
888
889
890
891
892
893
894
895
896
897int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
898 pgoff_t offset, gfp_t gfp_mask)
899{
900 return __add_to_page_cache_locked(page, mapping, offset,
901 gfp_mask, NULL);
902}
903EXPORT_SYMBOL(add_to_page_cache_locked);
904
905int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
906 pgoff_t offset, gfp_t gfp_mask)
907{
908 void *shadow = NULL;
909 int ret;
910
911 __SetPageLocked(page);
912 ret = __add_to_page_cache_locked(page, mapping, offset,
913 gfp_mask, &shadow);
914 if (unlikely(ret))
915 __ClearPageLocked(page);
916 else {
917
918
919
920
921
922
923
924
925 WARN_ON_ONCE(PageActive(page));
926 if (!(gfp_mask & __GFP_WRITE) && shadow)
927 workingset_refault(page, shadow);
928 lru_cache_add(page);
929 }
930 return ret;
931}
932EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
933
934#ifdef CONFIG_NUMA
935struct page *__page_cache_alloc(gfp_t gfp)
936{
937 int n;
938 struct page *page;
939
940 if (cpuset_do_page_mem_spread()) {
941 unsigned int cpuset_mems_cookie;
942 do {
943 cpuset_mems_cookie = read_mems_allowed_begin();
944 n = cpuset_mem_spread_node();
945 page = __alloc_pages_node(n, gfp, 0);
946 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
947
948 return page;
949 }
950 return alloc_pages(gfp, 0);
951}
952EXPORT_SYMBOL(__page_cache_alloc);
953#endif
954
955
956
957
958
959
960
961
962
963
964
965#define PAGE_WAIT_TABLE_BITS 8
966#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
967static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
968
969static wait_queue_head_t *page_waitqueue(struct page *page)
970{
971 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
972}
973
974void __init pagecache_init(void)
975{
976 int i;
977
978 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
979 init_waitqueue_head(&page_wait_table[i]);
980
981 page_writeback_init();
982}
983
984
985struct wait_page_key {
986 struct page *page;
987 int bit_nr;
988 int page_match;
989};
990
991struct wait_page_queue {
992 struct page *page;
993 int bit_nr;
994 wait_queue_entry_t wait;
995};
996
997static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
998{
999 struct wait_page_key *key = arg;
1000 struct wait_page_queue *wait_page
1001 = container_of(wait, struct wait_page_queue, wait);
1002
1003 if (wait_page->page != key->page)
1004 return 0;
1005 key->page_match = 1;
1006
1007 if (wait_page->bit_nr != key->bit_nr)
1008 return 0;
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018 if (test_bit(key->bit_nr, &key->page->flags))
1019 return -1;
1020
1021 return autoremove_wake_function(wait, mode, sync, key);
1022}
1023
1024static void wake_up_page_bit(struct page *page, int bit_nr)
1025{
1026 wait_queue_head_t *q = page_waitqueue(page);
1027 struct wait_page_key key;
1028 unsigned long flags;
1029 wait_queue_entry_t bookmark;
1030
1031 key.page = page;
1032 key.bit_nr = bit_nr;
1033 key.page_match = 0;
1034
1035 bookmark.flags = 0;
1036 bookmark.private = NULL;
1037 bookmark.func = NULL;
1038 INIT_LIST_HEAD(&bookmark.entry);
1039
1040 spin_lock_irqsave(&q->lock, flags);
1041 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1042
1043 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
1044
1045
1046
1047
1048
1049
1050 spin_unlock_irqrestore(&q->lock, flags);
1051 cpu_relax();
1052 spin_lock_irqsave(&q->lock, flags);
1053 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1054 }
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 if (!waitqueue_active(q) || !key.page_match) {
1066 ClearPageWaiters(page);
1067
1068
1069
1070
1071
1072
1073
1074 }
1075 spin_unlock_irqrestore(&q->lock, flags);
1076}
1077
1078static void wake_up_page(struct page *page, int bit)
1079{
1080 if (!PageWaiters(page))
1081 return;
1082 wake_up_page_bit(page, bit);
1083}
1084
1085
1086
1087
1088enum behavior {
1089 EXCLUSIVE,
1090
1091
1092 SHARED,
1093
1094
1095 DROP,
1096
1097
1098};
1099
1100static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1101 struct page *page, int bit_nr, int state, enum behavior behavior)
1102{
1103 struct wait_page_queue wait_page;
1104 wait_queue_entry_t *wait = &wait_page.wait;
1105 bool bit_is_set;
1106 bool thrashing = false;
1107 bool delayacct = false;
1108 unsigned long pflags;
1109 int ret = 0;
1110
1111 if (bit_nr == PG_locked &&
1112 !PageUptodate(page) && PageWorkingset(page)) {
1113 if (!PageSwapBacked(page)) {
1114 delayacct_thrashing_start();
1115 delayacct = true;
1116 }
1117 psi_memstall_enter(&pflags);
1118 thrashing = true;
1119 }
1120
1121 init_wait(wait);
1122 wait->flags = behavior == EXCLUSIVE ? WQ_FLAG_EXCLUSIVE : 0;
1123 wait->func = wake_page_function;
1124 wait_page.page = page;
1125 wait_page.bit_nr = bit_nr;
1126
1127 for (;;) {
1128 spin_lock_irq(&q->lock);
1129
1130 if (likely(list_empty(&wait->entry))) {
1131 __add_wait_queue_entry_tail(q, wait);
1132 SetPageWaiters(page);
1133 }
1134
1135 set_current_state(state);
1136
1137 spin_unlock_irq(&q->lock);
1138
1139 bit_is_set = test_bit(bit_nr, &page->flags);
1140 if (behavior == DROP)
1141 put_page(page);
1142
1143 if (likely(bit_is_set))
1144 io_schedule();
1145
1146 if (behavior == EXCLUSIVE) {
1147 if (!test_and_set_bit_lock(bit_nr, &page->flags))
1148 break;
1149 } else if (behavior == SHARED) {
1150 if (!test_bit(bit_nr, &page->flags))
1151 break;
1152 }
1153
1154 if (signal_pending_state(state, current)) {
1155 ret = -EINTR;
1156 break;
1157 }
1158
1159 if (behavior == DROP) {
1160
1161
1162
1163
1164
1165
1166
1167 break;
1168 }
1169 }
1170
1171 finish_wait(q, wait);
1172
1173 if (thrashing) {
1174 if (delayacct)
1175 delayacct_thrashing_end();
1176 psi_memstall_leave(&pflags);
1177 }
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187 return ret;
1188}
1189
1190void wait_on_page_bit(struct page *page, int bit_nr)
1191{
1192 wait_queue_head_t *q = page_waitqueue(page);
1193 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
1194}
1195EXPORT_SYMBOL(wait_on_page_bit);
1196
1197int wait_on_page_bit_killable(struct page *page, int bit_nr)
1198{
1199 wait_queue_head_t *q = page_waitqueue(page);
1200 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
1201}
1202EXPORT_SYMBOL(wait_on_page_bit_killable);
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214void put_and_wait_on_page_locked(struct page *page)
1215{
1216 wait_queue_head_t *q;
1217
1218 page = compound_head(page);
1219 q = page_waitqueue(page);
1220 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
1221}
1222
1223
1224
1225
1226
1227
1228
1229
1230void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
1231{
1232 wait_queue_head_t *q = page_waitqueue(page);
1233 unsigned long flags;
1234
1235 spin_lock_irqsave(&q->lock, flags);
1236 __add_wait_queue_entry_tail(q, waiter);
1237 SetPageWaiters(page);
1238 spin_unlock_irqrestore(&q->lock, flags);
1239}
1240EXPORT_SYMBOL_GPL(add_page_wait_queue);
1241
1242#ifndef clear_bit_unlock_is_negative_byte
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
1257{
1258 clear_bit_unlock(nr, mem);
1259
1260 return test_bit(PG_waiters, mem);
1261}
1262
1263#endif
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280void unlock_page(struct page *page)
1281{
1282 BUILD_BUG_ON(PG_waiters != 7);
1283 page = compound_head(page);
1284 VM_BUG_ON_PAGE(!PageLocked(page), page);
1285 if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
1286 wake_up_page_bit(page, PG_locked);
1287}
1288EXPORT_SYMBOL(unlock_page);
1289
1290
1291
1292
1293
1294void end_page_writeback(struct page *page)
1295{
1296
1297
1298
1299
1300
1301
1302
1303 if (PageReclaim(page)) {
1304 ClearPageReclaim(page);
1305 rotate_reclaimable_page(page);
1306 }
1307
1308 if (!test_clear_page_writeback(page))
1309 BUG();
1310
1311 smp_mb__after_atomic();
1312 wake_up_page(page, PG_writeback);
1313}
1314EXPORT_SYMBOL(end_page_writeback);
1315
1316
1317
1318
1319
1320void page_endio(struct page *page, bool is_write, int err)
1321{
1322 if (!is_write) {
1323 if (!err) {
1324 SetPageUptodate(page);
1325 } else {
1326 ClearPageUptodate(page);
1327 SetPageError(page);
1328 }
1329 unlock_page(page);
1330 } else {
1331 if (err) {
1332 struct address_space *mapping;
1333
1334 SetPageError(page);
1335 mapping = page_mapping(page);
1336 if (mapping)
1337 mapping_set_error(mapping, err);
1338 }
1339 end_page_writeback(page);
1340 }
1341}
1342EXPORT_SYMBOL_GPL(page_endio);
1343
1344
1345
1346
1347
1348void __lock_page(struct page *__page)
1349{
1350 struct page *page = compound_head(__page);
1351 wait_queue_head_t *q = page_waitqueue(page);
1352 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
1353 EXCLUSIVE);
1354}
1355EXPORT_SYMBOL(__lock_page);
1356
1357int __lock_page_killable(struct page *__page)
1358{
1359 struct page *page = compound_head(__page);
1360 wait_queue_head_t *q = page_waitqueue(page);
1361 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
1362 EXCLUSIVE);
1363}
1364EXPORT_SYMBOL_GPL(__lock_page_killable);
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
1378 unsigned int flags)
1379{
1380 if (flags & FAULT_FLAG_ALLOW_RETRY) {
1381
1382
1383
1384
1385 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1386 return 0;
1387
1388 up_read(&mm->mmap_sem);
1389 if (flags & FAULT_FLAG_KILLABLE)
1390 wait_on_page_locked_killable(page);
1391 else
1392 wait_on_page_locked(page);
1393 return 0;
1394 } else {
1395 if (flags & FAULT_FLAG_KILLABLE) {
1396 int ret;
1397
1398 ret = __lock_page_killable(page);
1399 if (ret) {
1400 up_read(&mm->mmap_sem);
1401 return 0;
1402 }
1403 } else
1404 __lock_page(page);
1405 return 1;
1406 }
1407}
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428pgoff_t page_cache_next_miss(struct address_space *mapping,
1429 pgoff_t index, unsigned long max_scan)
1430{
1431 XA_STATE(xas, &mapping->i_pages, index);
1432
1433 while (max_scan--) {
1434 void *entry = xas_next(&xas);
1435 if (!entry || xa_is_value(entry))
1436 break;
1437 if (xas.xa_index == 0)
1438 break;
1439 }
1440
1441 return xas.xa_index;
1442}
1443EXPORT_SYMBOL(page_cache_next_miss);
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464pgoff_t page_cache_prev_miss(struct address_space *mapping,
1465 pgoff_t index, unsigned long max_scan)
1466{
1467 XA_STATE(xas, &mapping->i_pages, index);
1468
1469 while (max_scan--) {
1470 void *entry = xas_prev(&xas);
1471 if (!entry || xa_is_value(entry))
1472 break;
1473 if (xas.xa_index == ULONG_MAX)
1474 break;
1475 }
1476
1477 return xas.xa_index;
1478}
1479EXPORT_SYMBOL(page_cache_prev_miss);
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1495{
1496 XA_STATE(xas, &mapping->i_pages, offset);
1497 struct page *head, *page;
1498
1499 rcu_read_lock();
1500repeat:
1501 xas_reset(&xas);
1502 page = xas_load(&xas);
1503 if (xas_retry(&xas, page))
1504 goto repeat;
1505
1506
1507
1508
1509 if (!page || xa_is_value(page))
1510 goto out;
1511
1512 head = compound_head(page);
1513 if (!page_cache_get_speculative(head))
1514 goto repeat;
1515
1516
1517 if (compound_head(page) != head) {
1518 put_page(head);
1519 goto repeat;
1520 }
1521
1522
1523
1524
1525
1526
1527 if (unlikely(page != xas_reload(&xas))) {
1528 put_page(head);
1529 goto repeat;
1530 }
1531out:
1532 rcu_read_unlock();
1533
1534 return page;
1535}
1536EXPORT_SYMBOL(find_get_entry);
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
1555{
1556 struct page *page;
1557
1558repeat:
1559 page = find_get_entry(mapping, offset);
1560 if (page && !xa_is_value(page)) {
1561 lock_page(page);
1562
1563 if (unlikely(page_mapping(page) != mapping)) {
1564 unlock_page(page);
1565 put_page(page);
1566 goto repeat;
1567 }
1568 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
1569 }
1570 return page;
1571}
1572EXPORT_SYMBOL(find_lock_entry);
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
1605 int fgp_flags, gfp_t gfp_mask)
1606{
1607 struct page *page;
1608
1609repeat:
1610 page = find_get_entry(mapping, offset);
1611 if (xa_is_value(page))
1612 page = NULL;
1613 if (!page)
1614 goto no_page;
1615
1616 if (fgp_flags & FGP_LOCK) {
1617 if (fgp_flags & FGP_NOWAIT) {
1618 if (!trylock_page(page)) {
1619 put_page(page);
1620 return NULL;
1621 }
1622 } else {
1623 lock_page(page);
1624 }
1625
1626
1627 if (unlikely(page->mapping != mapping)) {
1628 unlock_page(page);
1629 put_page(page);
1630 goto repeat;
1631 }
1632 VM_BUG_ON_PAGE(page->index != offset, page);
1633 }
1634
1635 if (fgp_flags & FGP_ACCESSED)
1636 mark_page_accessed(page);
1637
1638no_page:
1639 if (!page && (fgp_flags & FGP_CREAT)) {
1640 int err;
1641 if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
1642 gfp_mask |= __GFP_WRITE;
1643 if (fgp_flags & FGP_NOFS)
1644 gfp_mask &= ~__GFP_FS;
1645
1646 page = __page_cache_alloc(gfp_mask);
1647 if (!page)
1648 return NULL;
1649
1650 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
1651 fgp_flags |= FGP_LOCK;
1652
1653
1654 if (fgp_flags & FGP_ACCESSED)
1655 __SetPageReferenced(page);
1656
1657 err = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
1658 if (unlikely(err)) {
1659 put_page(page);
1660 page = NULL;
1661 if (err == -EEXIST)
1662 goto repeat;
1663 }
1664
1665
1666
1667
1668
1669 if (page && (fgp_flags & FGP_FOR_MMAP))
1670 unlock_page(page);
1671 }
1672
1673 return page;
1674}
1675EXPORT_SYMBOL(pagecache_get_page);
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699unsigned find_get_entries(struct address_space *mapping,
1700 pgoff_t start, unsigned int nr_entries,
1701 struct page **entries, pgoff_t *indices)
1702{
1703 XA_STATE(xas, &mapping->i_pages, start);
1704 struct page *page;
1705 unsigned int ret = 0;
1706
1707 if (!nr_entries)
1708 return 0;
1709
1710 rcu_read_lock();
1711 xas_for_each(&xas, page, ULONG_MAX) {
1712 struct page *head;
1713 if (xas_retry(&xas, page))
1714 continue;
1715
1716
1717
1718
1719
1720 if (xa_is_value(page))
1721 goto export;
1722
1723 head = compound_head(page);
1724 if (!page_cache_get_speculative(head))
1725 goto retry;
1726
1727
1728 if (compound_head(page) != head)
1729 goto put_page;
1730
1731
1732 if (unlikely(page != xas_reload(&xas)))
1733 goto put_page;
1734
1735export:
1736 indices[ret] = xas.xa_index;
1737 entries[ret] = page;
1738 if (++ret == nr_entries)
1739 break;
1740 continue;
1741put_page:
1742 put_page(head);
1743retry:
1744 xas_reset(&xas);
1745 }
1746 rcu_read_unlock();
1747 return ret;
1748}
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
1772 pgoff_t end, unsigned int nr_pages,
1773 struct page **pages)
1774{
1775 XA_STATE(xas, &mapping->i_pages, *start);
1776 struct page *page;
1777 unsigned ret = 0;
1778
1779 if (unlikely(!nr_pages))
1780 return 0;
1781
1782 rcu_read_lock();
1783 xas_for_each(&xas, page, end) {
1784 struct page *head;
1785 if (xas_retry(&xas, page))
1786 continue;
1787
1788 if (xa_is_value(page))
1789 continue;
1790
1791 head = compound_head(page);
1792 if (!page_cache_get_speculative(head))
1793 goto retry;
1794
1795
1796 if (compound_head(page) != head)
1797 goto put_page;
1798
1799
1800 if (unlikely(page != xas_reload(&xas)))
1801 goto put_page;
1802
1803 pages[ret] = page;
1804 if (++ret == nr_pages) {
1805 *start = xas.xa_index + 1;
1806 goto out;
1807 }
1808 continue;
1809put_page:
1810 put_page(head);
1811retry:
1812 xas_reset(&xas);
1813 }
1814
1815
1816
1817
1818
1819
1820
1821 if (end == (pgoff_t)-1)
1822 *start = (pgoff_t)-1;
1823 else
1824 *start = end + 1;
1825out:
1826 rcu_read_unlock();
1827
1828 return ret;
1829}
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1844 unsigned int nr_pages, struct page **pages)
1845{
1846 XA_STATE(xas, &mapping->i_pages, index);
1847 struct page *page;
1848 unsigned int ret = 0;
1849
1850 if (unlikely(!nr_pages))
1851 return 0;
1852
1853 rcu_read_lock();
1854 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1855 struct page *head;
1856 if (xas_retry(&xas, page))
1857 continue;
1858
1859
1860
1861
1862 if (xa_is_value(page))
1863 break;
1864
1865 head = compound_head(page);
1866 if (!page_cache_get_speculative(head))
1867 goto retry;
1868
1869
1870 if (compound_head(page) != head)
1871 goto put_page;
1872
1873
1874 if (unlikely(page != xas_reload(&xas)))
1875 goto put_page;
1876
1877 pages[ret] = page;
1878 if (++ret == nr_pages)
1879 break;
1880 continue;
1881put_page:
1882 put_page(head);
1883retry:
1884 xas_reset(&xas);
1885 }
1886 rcu_read_unlock();
1887 return ret;
1888}
1889EXPORT_SYMBOL(find_get_pages_contig);
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
1906 pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
1907 struct page **pages)
1908{
1909 XA_STATE(xas, &mapping->i_pages, *index);
1910 struct page *page;
1911 unsigned ret = 0;
1912
1913 if (unlikely(!nr_pages))
1914 return 0;
1915
1916 rcu_read_lock();
1917 xas_for_each_marked(&xas, page, end, tag) {
1918 struct page *head;
1919 if (xas_retry(&xas, page))
1920 continue;
1921
1922
1923
1924
1925
1926 if (xa_is_value(page))
1927 continue;
1928
1929 head = compound_head(page);
1930 if (!page_cache_get_speculative(head))
1931 goto retry;
1932
1933
1934 if (compound_head(page) != head)
1935 goto put_page;
1936
1937
1938 if (unlikely(page != xas_reload(&xas)))
1939 goto put_page;
1940
1941 pages[ret] = page;
1942 if (++ret == nr_pages) {
1943 *index = xas.xa_index + 1;
1944 goto out;
1945 }
1946 continue;
1947put_page:
1948 put_page(head);
1949retry:
1950 xas_reset(&xas);
1951 }
1952
1953
1954
1955
1956
1957
1958
1959 if (end == (pgoff_t)-1)
1960 *index = (pgoff_t)-1;
1961 else
1962 *index = end + 1;
1963out:
1964 rcu_read_unlock();
1965
1966 return ret;
1967}
1968EXPORT_SYMBOL(find_get_pages_range_tag);
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985static void shrink_readahead_size_eio(struct file *filp,
1986 struct file_ra_state *ra)
1987{
1988 ra->ra_pages /= 4;
1989}
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007static ssize_t generic_file_buffered_read(struct kiocb *iocb,
2008 struct iov_iter *iter, ssize_t written)
2009{
2010 struct file *filp = iocb->ki_filp;
2011 struct address_space *mapping = filp->f_mapping;
2012 struct inode *inode = mapping->host;
2013 struct file_ra_state *ra = &filp->f_ra;
2014 loff_t *ppos = &iocb->ki_pos;
2015 pgoff_t index;
2016 pgoff_t last_index;
2017 pgoff_t prev_index;
2018 unsigned long offset;
2019 unsigned int prev_offset;
2020 int error = 0;
2021
2022 if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
2023 return 0;
2024 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
2025
2026 index = *ppos >> PAGE_SHIFT;
2027 prev_index = ra->prev_pos >> PAGE_SHIFT;
2028 prev_offset = ra->prev_pos & (PAGE_SIZE-1);
2029 last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
2030 offset = *ppos & ~PAGE_MASK;
2031
2032 for (;;) {
2033 struct page *page;
2034 pgoff_t end_index;
2035 loff_t isize;
2036 unsigned long nr, ret;
2037
2038 cond_resched();
2039find_page:
2040 if (fatal_signal_pending(current)) {
2041 error = -EINTR;
2042 goto out;
2043 }
2044
2045 page = find_get_page(mapping, index);
2046 if (!page) {
2047 if (iocb->ki_flags & IOCB_NOWAIT)
2048 goto would_block;
2049 page_cache_sync_readahead(mapping,
2050 ra, filp,
2051 index, last_index - index);
2052 page = find_get_page(mapping, index);
2053 if (unlikely(page == NULL))
2054 goto no_cached_page;
2055 }
2056 if (PageReadahead(page)) {
2057 page_cache_async_readahead(mapping,
2058 ra, filp, page,
2059 index, last_index - index);
2060 }
2061 if (!PageUptodate(page)) {
2062 if (iocb->ki_flags & IOCB_NOWAIT) {
2063 put_page(page);
2064 goto would_block;
2065 }
2066
2067
2068
2069
2070
2071
2072 error = wait_on_page_locked_killable(page);
2073 if (unlikely(error))
2074 goto readpage_error;
2075 if (PageUptodate(page))
2076 goto page_ok;
2077
2078 if (inode->i_blkbits == PAGE_SHIFT ||
2079 !mapping->a_ops->is_partially_uptodate)
2080 goto page_not_up_to_date;
2081
2082 if (unlikely(iov_iter_is_pipe(iter)))
2083 goto page_not_up_to_date;
2084 if (!trylock_page(page))
2085 goto page_not_up_to_date;
2086
2087 if (!page->mapping)
2088 goto page_not_up_to_date_locked;
2089 if (!mapping->a_ops->is_partially_uptodate(page,
2090 offset, iter->count))
2091 goto page_not_up_to_date_locked;
2092 unlock_page(page);
2093 }
2094page_ok:
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104 isize = i_size_read(inode);
2105 end_index = (isize - 1) >> PAGE_SHIFT;
2106 if (unlikely(!isize || index > end_index)) {
2107 put_page(page);
2108 goto out;
2109 }
2110
2111
2112 nr = PAGE_SIZE;
2113 if (index == end_index) {
2114 nr = ((isize - 1) & ~PAGE_MASK) + 1;
2115 if (nr <= offset) {
2116 put_page(page);
2117 goto out;
2118 }
2119 }
2120 nr = nr - offset;
2121
2122
2123
2124
2125
2126 if (mapping_writably_mapped(mapping))
2127 flush_dcache_page(page);
2128
2129
2130
2131
2132
2133 if (prev_index != index || offset != prev_offset)
2134 mark_page_accessed(page);
2135 prev_index = index;
2136
2137
2138
2139
2140
2141
2142 ret = copy_page_to_iter(page, offset, nr, iter);
2143 offset += ret;
2144 index += offset >> PAGE_SHIFT;
2145 offset &= ~PAGE_MASK;
2146 prev_offset = offset;
2147
2148 put_page(page);
2149 written += ret;
2150 if (!iov_iter_count(iter))
2151 goto out;
2152 if (ret < nr) {
2153 error = -EFAULT;
2154 goto out;
2155 }
2156 continue;
2157
2158page_not_up_to_date:
2159
2160 error = lock_page_killable(page);
2161 if (unlikely(error))
2162 goto readpage_error;
2163
2164page_not_up_to_date_locked:
2165
2166 if (!page->mapping) {
2167 unlock_page(page);
2168 put_page(page);
2169 continue;
2170 }
2171
2172
2173 if (PageUptodate(page)) {
2174 unlock_page(page);
2175 goto page_ok;
2176 }
2177
2178readpage:
2179
2180
2181
2182
2183
2184 ClearPageError(page);
2185
2186 error = mapping->a_ops->readpage(filp, page);
2187
2188 if (unlikely(error)) {
2189 if (error == AOP_TRUNCATED_PAGE) {
2190 put_page(page);
2191 error = 0;
2192 goto find_page;
2193 }
2194 goto readpage_error;
2195 }
2196
2197 if (!PageUptodate(page)) {
2198 error = lock_page_killable(page);
2199 if (unlikely(error))
2200 goto readpage_error;
2201 if (!PageUptodate(page)) {
2202 if (page->mapping == NULL) {
2203
2204
2205
2206 unlock_page(page);
2207 put_page(page);
2208 goto find_page;
2209 }
2210 unlock_page(page);
2211 shrink_readahead_size_eio(filp, ra);
2212 error = -EIO;
2213 goto readpage_error;
2214 }
2215 unlock_page(page);
2216 }
2217
2218 goto page_ok;
2219
2220readpage_error:
2221
2222 put_page(page);
2223 goto out;
2224
2225no_cached_page:
2226
2227
2228
2229
2230 page = page_cache_alloc(mapping);
2231 if (!page) {
2232 error = -ENOMEM;
2233 goto out;
2234 }
2235 error = add_to_page_cache_lru(page, mapping, index,
2236 mapping_gfp_constraint(mapping, GFP_KERNEL));
2237 if (error) {
2238 put_page(page);
2239 if (error == -EEXIST) {
2240 error = 0;
2241 goto find_page;
2242 }
2243 goto out;
2244 }
2245 goto readpage;
2246 }
2247
2248would_block:
2249 error = -EAGAIN;
2250out:
2251 ra->prev_pos = prev_index;
2252 ra->prev_pos <<= PAGE_SHIFT;
2253 ra->prev_pos |= prev_offset;
2254
2255 *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
2256 file_accessed(filp);
2257 return written ? written : error;
2258}
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271ssize_t
2272generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2273{
2274 size_t count = iov_iter_count(iter);
2275 ssize_t retval = 0;
2276
2277 if (!count)
2278 goto out;
2279
2280 if (iocb->ki_flags & IOCB_DIRECT) {
2281 struct file *file = iocb->ki_filp;
2282 struct address_space *mapping = file->f_mapping;
2283 struct inode *inode = mapping->host;
2284 loff_t size;
2285
2286 size = i_size_read(inode);
2287 if (iocb->ki_flags & IOCB_NOWAIT) {
2288 if (filemap_range_has_page(mapping, iocb->ki_pos,
2289 iocb->ki_pos + count - 1))
2290 return -EAGAIN;
2291 } else {
2292 retval = filemap_write_and_wait_range(mapping,
2293 iocb->ki_pos,
2294 iocb->ki_pos + count - 1);
2295 if (retval < 0)
2296 goto out;
2297 }
2298
2299 file_accessed(file);
2300
2301 retval = mapping->a_ops->direct_IO(iocb, iter);
2302 if (retval >= 0) {
2303 iocb->ki_pos += retval;
2304 count -= retval;
2305 }
2306 iov_iter_revert(iter, count - iov_iter_count(iter));
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317 if (retval < 0 || !count || iocb->ki_pos >= size ||
2318 IS_DAX(inode))
2319 goto out;
2320 }
2321
2322 retval = generic_file_buffered_read(iocb, iter, retval);
2323out:
2324 return retval;
2325}
2326EXPORT_SYMBOL(generic_file_read_iter);
2327
2328#ifdef CONFIG_MMU
2329#define MMAP_LOTSAMISS (100)
2330static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
2331 struct file *fpin)
2332{
2333 int flags = vmf->flags;
2334
2335 if (fpin)
2336 return fpin;
2337
2338
2339
2340
2341
2342
2343 if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) ==
2344 FAULT_FLAG_ALLOW_RETRY) {
2345 fpin = get_file(vmf->vma->vm_file);
2346 up_read(&vmf->vma->vm_mm->mmap_sem);
2347 }
2348 return fpin;
2349}
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
2363 struct file **fpin)
2364{
2365 if (trylock_page(page))
2366 return 1;
2367
2368
2369
2370
2371
2372
2373 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2374 return 0;
2375
2376 *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
2377 if (vmf->flags & FAULT_FLAG_KILLABLE) {
2378 if (__lock_page_killable(page)) {
2379
2380
2381
2382
2383
2384
2385 if (*fpin == NULL)
2386 up_read(&vmf->vma->vm_mm->mmap_sem);
2387 return 0;
2388 }
2389 } else
2390 __lock_page(page);
2391 return 1;
2392}
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
2403{
2404 struct file *file = vmf->vma->vm_file;
2405 struct file_ra_state *ra = &file->f_ra;
2406 struct address_space *mapping = file->f_mapping;
2407 struct file *fpin = NULL;
2408 pgoff_t offset = vmf->pgoff;
2409
2410
2411 if (vmf->vma->vm_flags & VM_RAND_READ)
2412 return fpin;
2413 if (!ra->ra_pages)
2414 return fpin;
2415
2416 if (vmf->vma->vm_flags & VM_SEQ_READ) {
2417 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2418 page_cache_sync_readahead(mapping, ra, file, offset,
2419 ra->ra_pages);
2420 return fpin;
2421 }
2422
2423
2424 if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
2425 ra->mmap_miss++;
2426
2427
2428
2429
2430
2431 if (ra->mmap_miss > MMAP_LOTSAMISS)
2432 return fpin;
2433
2434
2435
2436
2437 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2438 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
2439 ra->size = ra->ra_pages;
2440 ra->async_size = ra->ra_pages / 4;
2441 ra_submit(ra, mapping, file);
2442 return fpin;
2443}
2444
2445
2446
2447
2448
2449
2450static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
2451 struct page *page)
2452{
2453 struct file *file = vmf->vma->vm_file;
2454 struct file_ra_state *ra = &file->f_ra;
2455 struct address_space *mapping = file->f_mapping;
2456 struct file *fpin = NULL;
2457 pgoff_t offset = vmf->pgoff;
2458
2459
2460 if (vmf->vma->vm_flags & VM_RAND_READ)
2461 return fpin;
2462 if (ra->mmap_miss > 0)
2463 ra->mmap_miss--;
2464 if (PageReadahead(page)) {
2465 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2466 page_cache_async_readahead(mapping, ra, file,
2467 page, offset, ra->ra_pages);
2468 }
2469 return fpin;
2470}
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497vm_fault_t filemap_fault(struct vm_fault *vmf)
2498{
2499 int error;
2500 struct file *file = vmf->vma->vm_file;
2501 struct file *fpin = NULL;
2502 struct address_space *mapping = file->f_mapping;
2503 struct file_ra_state *ra = &file->f_ra;
2504 struct inode *inode = mapping->host;
2505 pgoff_t offset = vmf->pgoff;
2506 pgoff_t max_off;
2507 struct page *page;
2508 vm_fault_t ret = 0;
2509
2510 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2511 if (unlikely(offset >= max_off))
2512 return VM_FAULT_SIGBUS;
2513
2514
2515
2516
2517 page = find_get_page(mapping, offset);
2518 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2519
2520
2521
2522
2523 fpin = do_async_mmap_readahead(vmf, page);
2524 } else if (!page) {
2525
2526 count_vm_event(PGMAJFAULT);
2527 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
2528 ret = VM_FAULT_MAJOR;
2529 fpin = do_sync_mmap_readahead(vmf);
2530retry_find:
2531 page = pagecache_get_page(mapping, offset,
2532 FGP_CREAT|FGP_FOR_MMAP,
2533 vmf->gfp_mask);
2534 if (!page) {
2535 if (fpin)
2536 goto out_retry;
2537 return vmf_error(-ENOMEM);
2538 }
2539 }
2540
2541 if (!lock_page_maybe_drop_mmap(vmf, page, &fpin))
2542 goto out_retry;
2543
2544
2545 if (unlikely(page->mapping != mapping)) {
2546 unlock_page(page);
2547 put_page(page);
2548 goto retry_find;
2549 }
2550 VM_BUG_ON_PAGE(page->index != offset, page);
2551
2552
2553
2554
2555
2556 if (unlikely(!PageUptodate(page)))
2557 goto page_not_uptodate;
2558
2559
2560
2561
2562
2563
2564 if (fpin) {
2565 unlock_page(page);
2566 goto out_retry;
2567 }
2568
2569
2570
2571
2572
2573 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2574 if (unlikely(offset >= max_off)) {
2575 unlock_page(page);
2576 put_page(page);
2577 return VM_FAULT_SIGBUS;
2578 }
2579
2580 vmf->page = page;
2581 return ret | VM_FAULT_LOCKED;
2582
2583page_not_uptodate:
2584
2585
2586
2587
2588
2589
2590 ClearPageError(page);
2591 fpin = maybe_unlock_mmap_for_io(vmf, fpin);
2592 error = mapping->a_ops->readpage(file, page);
2593 if (!error) {
2594 wait_on_page_locked(page);
2595 if (!PageUptodate(page))
2596 error = -EIO;
2597 }
2598 if (fpin)
2599 goto out_retry;
2600 put_page(page);
2601
2602 if (!error || error == AOP_TRUNCATED_PAGE)
2603 goto retry_find;
2604
2605
2606 shrink_readahead_size_eio(file, ra);
2607 return VM_FAULT_SIGBUS;
2608
2609out_retry:
2610
2611
2612
2613
2614
2615 if (page)
2616 put_page(page);
2617 if (fpin)
2618 fput(fpin);
2619 return ret | VM_FAULT_RETRY;
2620}
2621EXPORT_SYMBOL(filemap_fault);
2622
2623void filemap_map_pages(struct vm_fault *vmf,
2624 pgoff_t start_pgoff, pgoff_t end_pgoff)
2625{
2626 struct file *file = vmf->vma->vm_file;
2627 struct address_space *mapping = file->f_mapping;
2628 pgoff_t last_pgoff = start_pgoff;
2629 unsigned long max_idx;
2630 XA_STATE(xas, &mapping->i_pages, start_pgoff);
2631 struct page *head, *page;
2632
2633 rcu_read_lock();
2634 xas_for_each(&xas, page, end_pgoff) {
2635 if (xas_retry(&xas, page))
2636 continue;
2637 if (xa_is_value(page))
2638 goto next;
2639
2640 head = compound_head(page);
2641
2642
2643
2644
2645
2646 if (PageLocked(head))
2647 goto next;
2648 if (!page_cache_get_speculative(head))
2649 goto next;
2650
2651
2652 if (compound_head(page) != head)
2653 goto skip;
2654
2655
2656 if (unlikely(page != xas_reload(&xas)))
2657 goto skip;
2658
2659 if (!PageUptodate(page) ||
2660 PageReadahead(page) ||
2661 PageHWPoison(page))
2662 goto skip;
2663 if (!trylock_page(page))
2664 goto skip;
2665
2666 if (page->mapping != mapping || !PageUptodate(page))
2667 goto unlock;
2668
2669 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
2670 if (page->index >= max_idx)
2671 goto unlock;
2672
2673 if (file->f_ra.mmap_miss > 0)
2674 file->f_ra.mmap_miss--;
2675
2676 vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
2677 if (vmf->pte)
2678 vmf->pte += xas.xa_index - last_pgoff;
2679 last_pgoff = xas.xa_index;
2680 if (alloc_set_pte(vmf, NULL, page))
2681 goto unlock;
2682 unlock_page(page);
2683 goto next;
2684unlock:
2685 unlock_page(page);
2686skip:
2687 put_page(page);
2688next:
2689
2690 if (pmd_trans_huge(*vmf->pmd))
2691 break;
2692 }
2693 rcu_read_unlock();
2694}
2695EXPORT_SYMBOL(filemap_map_pages);
2696
2697vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2698{
2699 struct page *page = vmf->page;
2700 struct inode *inode = file_inode(vmf->vma->vm_file);
2701 vm_fault_t ret = VM_FAULT_LOCKED;
2702
2703 sb_start_pagefault(inode->i_sb);
2704 file_update_time(vmf->vma->vm_file);
2705 lock_page(page);
2706 if (page->mapping != inode->i_mapping) {
2707 unlock_page(page);
2708 ret = VM_FAULT_NOPAGE;
2709 goto out;
2710 }
2711
2712
2713
2714
2715
2716 set_page_dirty(page);
2717 wait_for_stable_page(page);
2718out:
2719 sb_end_pagefault(inode->i_sb);
2720 return ret;
2721}
2722
2723const struct vm_operations_struct generic_file_vm_ops = {
2724 .fault = filemap_fault,
2725 .map_pages = filemap_map_pages,
2726 .page_mkwrite = filemap_page_mkwrite,
2727};
2728
2729
2730
2731int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2732{
2733 struct address_space *mapping = file->f_mapping;
2734
2735 if (!mapping->a_ops->readpage)
2736 return -ENOEXEC;
2737 file_accessed(file);
2738 vma->vm_ops = &generic_file_vm_ops;
2739 return 0;
2740}
2741
2742
2743
2744
2745int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
2746{
2747 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2748 return -EINVAL;
2749 return generic_file_mmap(file, vma);
2750}
2751#else
2752vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2753{
2754 return VM_FAULT_SIGBUS;
2755}
2756int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2757{
2758 return -ENOSYS;
2759}
2760int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
2761{
2762 return -ENOSYS;
2763}
2764#endif
2765
2766EXPORT_SYMBOL(filemap_page_mkwrite);
2767EXPORT_SYMBOL(generic_file_mmap);
2768EXPORT_SYMBOL(generic_file_readonly_mmap);
2769
2770static struct page *wait_on_page_read(struct page *page)
2771{
2772 if (!IS_ERR(page)) {
2773 wait_on_page_locked(page);
2774 if (!PageUptodate(page)) {
2775 put_page(page);
2776 page = ERR_PTR(-EIO);
2777 }
2778 }
2779 return page;
2780}
2781
2782static struct page *do_read_cache_page(struct address_space *mapping,
2783 pgoff_t index,
2784 int (*filler)(void *, struct page *),
2785 void *data,
2786 gfp_t gfp)
2787{
2788 struct page *page;
2789 int err;
2790repeat:
2791 page = find_get_page(mapping, index);
2792 if (!page) {
2793 page = __page_cache_alloc(gfp);
2794 if (!page)
2795 return ERR_PTR(-ENOMEM);
2796 err = add_to_page_cache_lru(page, mapping, index, gfp);
2797 if (unlikely(err)) {
2798 put_page(page);
2799 if (err == -EEXIST)
2800 goto repeat;
2801
2802 return ERR_PTR(err);
2803 }
2804
2805filler:
2806 err = filler(data, page);
2807 if (err < 0) {
2808 put_page(page);
2809 return ERR_PTR(err);
2810 }
2811
2812 page = wait_on_page_read(page);
2813 if (IS_ERR(page))
2814 return page;
2815 goto out;
2816 }
2817 if (PageUptodate(page))
2818 goto out;
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851 wait_on_page_locked(page);
2852 if (PageUptodate(page))
2853 goto out;
2854
2855
2856 lock_page(page);
2857
2858
2859 if (!page->mapping) {
2860 unlock_page(page);
2861 put_page(page);
2862 goto repeat;
2863 }
2864
2865
2866 if (PageUptodate(page)) {
2867 unlock_page(page);
2868 goto out;
2869 }
2870 goto filler;
2871
2872out:
2873 mark_page_accessed(page);
2874 return page;
2875}
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891struct page *read_cache_page(struct address_space *mapping,
2892 pgoff_t index,
2893 int (*filler)(void *, struct page *),
2894 void *data)
2895{
2896 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
2897}
2898EXPORT_SYMBOL(read_cache_page);
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913struct page *read_cache_page_gfp(struct address_space *mapping,
2914 pgoff_t index,
2915 gfp_t gfp)
2916{
2917 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
2918
2919 return do_read_cache_page(mapping, index, filler, NULL, gfp);
2920}
2921EXPORT_SYMBOL(read_cache_page_gfp);
2922
2923
2924
2925
2926
2927
2928static int generic_access_check_limits(struct file *file, loff_t pos,
2929 loff_t *count)
2930{
2931 struct inode *inode = file->f_mapping->host;
2932 loff_t max_size = inode->i_sb->s_maxbytes;
2933
2934 if (!(file->f_flags & O_LARGEFILE))
2935 max_size = MAX_NON_LFS;
2936
2937 if (unlikely(pos >= max_size))
2938 return -EFBIG;
2939 *count = min(*count, max_size - pos);
2940 return 0;
2941}
2942
2943static int generic_write_check_limits(struct file *file, loff_t pos,
2944 loff_t *count)
2945{
2946 loff_t limit = rlimit(RLIMIT_FSIZE);
2947
2948 if (limit != RLIM_INFINITY) {
2949 if (pos >= limit) {
2950 send_sig(SIGXFSZ, current, 0);
2951 return -EFBIG;
2952 }
2953 *count = min(*count, limit - pos);
2954 }
2955
2956 return generic_access_check_limits(file, pos, count);
2957}
2958
2959
2960
2961
2962
2963
2964
2965
2966inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
2967{
2968 struct file *file = iocb->ki_filp;
2969 struct inode *inode = file->f_mapping->host;
2970 loff_t count;
2971 int ret;
2972
2973 if (!iov_iter_count(from))
2974 return 0;
2975
2976
2977 if (iocb->ki_flags & IOCB_APPEND)
2978 iocb->ki_pos = i_size_read(inode);
2979
2980 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
2981 return -EINVAL;
2982
2983 count = iov_iter_count(from);
2984 ret = generic_write_check_limits(file, iocb->ki_pos, &count);
2985 if (ret)
2986 return ret;
2987
2988 iov_iter_truncate(from, count);
2989 return iov_iter_count(from);
2990}
2991EXPORT_SYMBOL(generic_write_checks);
2992
2993
2994
2995
2996
2997
2998
2999
3000int generic_remap_checks(struct file *file_in, loff_t pos_in,
3001 struct file *file_out, loff_t pos_out,
3002 loff_t *req_count, unsigned int remap_flags)
3003{
3004 struct inode *inode_in = file_in->f_mapping->host;
3005 struct inode *inode_out = file_out->f_mapping->host;
3006 uint64_t count = *req_count;
3007 uint64_t bcount;
3008 loff_t size_in, size_out;
3009 loff_t bs = inode_out->i_sb->s_blocksize;
3010 int ret;
3011
3012
3013 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
3014 return -EINVAL;
3015
3016
3017 if (pos_in + count < pos_in || pos_out + count < pos_out)
3018 return -EINVAL;
3019
3020 size_in = i_size_read(inode_in);
3021 size_out = i_size_read(inode_out);
3022
3023
3024 if ((remap_flags & REMAP_FILE_DEDUP) &&
3025 (pos_in >= size_in || pos_in + count > size_in ||
3026 pos_out >= size_out || pos_out + count > size_out))
3027 return -EINVAL;
3028
3029
3030 if (pos_in >= size_in)
3031 return -EINVAL;
3032 count = min(count, size_in - (uint64_t)pos_in);
3033
3034 ret = generic_access_check_limits(file_in, pos_in, &count);
3035 if (ret)
3036 return ret;
3037
3038 ret = generic_write_check_limits(file_out, pos_out, &count);
3039 if (ret)
3040 return ret;
3041
3042
3043
3044
3045
3046
3047
3048
3049 if (pos_in + count == size_in) {
3050 bcount = ALIGN(size_in, bs) - pos_in;
3051 } else {
3052 if (!IS_ALIGNED(count, bs))
3053 count = ALIGN_DOWN(count, bs);
3054 bcount = count;
3055 }
3056
3057
3058 if (inode_in == inode_out &&
3059 pos_out + bcount > pos_in &&
3060 pos_out < pos_in + bcount)
3061 return -EINVAL;
3062
3063
3064
3065
3066
3067 if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
3068 return -EINVAL;
3069
3070 *req_count = count;
3071 return 0;
3072}
3073
3074int pagecache_write_begin(struct file *file, struct address_space *mapping,
3075 loff_t pos, unsigned len, unsigned flags,
3076 struct page **pagep, void **fsdata)
3077{
3078 const struct address_space_operations *aops = mapping->a_ops;
3079
3080 return aops->write_begin(file, mapping, pos, len, flags,
3081 pagep, fsdata);
3082}
3083EXPORT_SYMBOL(pagecache_write_begin);
3084
3085int pagecache_write_end(struct file *file, struct address_space *mapping,
3086 loff_t pos, unsigned len, unsigned copied,
3087 struct page *page, void *fsdata)
3088{
3089 const struct address_space_operations *aops = mapping->a_ops;
3090
3091 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
3092}
3093EXPORT_SYMBOL(pagecache_write_end);
3094
3095ssize_t
3096generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
3097{
3098 struct file *file = iocb->ki_filp;
3099 struct address_space *mapping = file->f_mapping;
3100 struct inode *inode = mapping->host;
3101 loff_t pos = iocb->ki_pos;
3102 ssize_t written;
3103 size_t write_len;
3104 pgoff_t end;
3105
3106 write_len = iov_iter_count(from);
3107 end = (pos + write_len - 1) >> PAGE_SHIFT;
3108
3109 if (iocb->ki_flags & IOCB_NOWAIT) {
3110
3111 if (filemap_range_has_page(inode->i_mapping, pos,
3112 pos + write_len - 1))
3113 return -EAGAIN;
3114 } else {
3115 written = filemap_write_and_wait_range(mapping, pos,
3116 pos + write_len - 1);
3117 if (written)
3118 goto out;
3119 }
3120
3121
3122
3123
3124
3125
3126
3127 written = invalidate_inode_pages2_range(mapping,
3128 pos >> PAGE_SHIFT, end);
3129
3130
3131
3132
3133 if (written) {
3134 if (written == -EBUSY)
3135 return 0;
3136 goto out;
3137 }
3138
3139 written = mapping->a_ops->direct_IO(iocb, from);
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154 if (mapping->nrpages)
3155 invalidate_inode_pages2_range(mapping,
3156 pos >> PAGE_SHIFT, end);
3157
3158 if (written > 0) {
3159 pos += written;
3160 write_len -= written;
3161 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
3162 i_size_write(inode, pos);
3163 mark_inode_dirty(inode);
3164 }
3165 iocb->ki_pos = pos;
3166 }
3167 iov_iter_revert(from, write_len - iov_iter_count(from));
3168out:
3169 return written;
3170}
3171EXPORT_SYMBOL(generic_file_direct_write);
3172
3173
3174
3175
3176
3177struct page *grab_cache_page_write_begin(struct address_space *mapping,
3178 pgoff_t index, unsigned flags)
3179{
3180 struct page *page;
3181 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
3182
3183 if (flags & AOP_FLAG_NOFS)
3184 fgp_flags |= FGP_NOFS;
3185
3186 page = pagecache_get_page(mapping, index, fgp_flags,
3187 mapping_gfp_mask(mapping));
3188 if (page)
3189 wait_for_stable_page(page);
3190
3191 return page;
3192}
3193EXPORT_SYMBOL(grab_cache_page_write_begin);
3194
3195ssize_t generic_perform_write(struct file *file,
3196 struct iov_iter *i, loff_t pos)
3197{
3198 struct address_space *mapping = file->f_mapping;
3199 const struct address_space_operations *a_ops = mapping->a_ops;
3200 long status = 0;
3201 ssize_t written = 0;
3202 unsigned int flags = 0;
3203
3204 do {
3205 struct page *page;
3206 unsigned long offset;
3207 unsigned long bytes;
3208 size_t copied;
3209 void *fsdata;
3210
3211 offset = (pos & (PAGE_SIZE - 1));
3212 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3213 iov_iter_count(i));
3214
3215again:
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
3227 status = -EFAULT;
3228 break;
3229 }
3230
3231 if (fatal_signal_pending(current)) {
3232 status = -EINTR;
3233 break;
3234 }
3235
3236 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
3237 &page, &fsdata);
3238 if (unlikely(status < 0))
3239 break;
3240
3241 if (mapping_writably_mapped(mapping))
3242 flush_dcache_page(page);
3243
3244 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
3245 flush_dcache_page(page);
3246
3247 status = a_ops->write_end(file, mapping, pos, bytes, copied,
3248 page, fsdata);
3249 if (unlikely(status < 0))
3250 break;
3251 copied = status;
3252
3253 cond_resched();
3254
3255 iov_iter_advance(i, copied);
3256 if (unlikely(copied == 0)) {
3257
3258
3259
3260
3261
3262
3263
3264
3265 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3266 iov_iter_single_seg_count(i));
3267 goto again;
3268 }
3269 pos += copied;
3270 written += copied;
3271
3272 balance_dirty_pages_ratelimited(mapping);
3273 } while (iov_iter_count(i));
3274
3275 return written ? written : status;
3276}
3277EXPORT_SYMBOL(generic_perform_write);
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3301{
3302 struct file *file = iocb->ki_filp;
3303 struct address_space * mapping = file->f_mapping;
3304 struct inode *inode = mapping->host;
3305 ssize_t written = 0;
3306 ssize_t err;
3307 ssize_t status;
3308
3309
3310 current->backing_dev_info = inode_to_bdi(inode);
3311 err = file_remove_privs(file);
3312 if (err)
3313 goto out;
3314
3315 err = file_update_time(file);
3316 if (err)
3317 goto out;
3318
3319 if (iocb->ki_flags & IOCB_DIRECT) {
3320 loff_t pos, endbyte;
3321
3322 written = generic_file_direct_write(iocb, from);
3323
3324
3325
3326
3327
3328
3329
3330 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
3331 goto out;
3332
3333 status = generic_perform_write(file, from, pos = iocb->ki_pos);
3334
3335
3336
3337
3338
3339
3340
3341 if (unlikely(status < 0)) {
3342 err = status;
3343 goto out;
3344 }
3345
3346
3347
3348
3349
3350 endbyte = pos + status - 1;
3351 err = filemap_write_and_wait_range(mapping, pos, endbyte);
3352 if (err == 0) {
3353 iocb->ki_pos = endbyte + 1;
3354 written += status;
3355 invalidate_mapping_pages(mapping,
3356 pos >> PAGE_SHIFT,
3357 endbyte >> PAGE_SHIFT);
3358 } else {
3359
3360
3361
3362
3363 }
3364 } else {
3365 written = generic_perform_write(file, from, iocb->ki_pos);
3366 if (likely(written > 0))
3367 iocb->ki_pos += written;
3368 }
3369out:
3370 current->backing_dev_info = NULL;
3371 return written ? written : err;
3372}
3373EXPORT_SYMBOL(__generic_file_write_iter);
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3389{
3390 struct file *file = iocb->ki_filp;
3391 struct inode *inode = file->f_mapping->host;
3392 ssize_t ret;
3393
3394 inode_lock(inode);
3395 ret = generic_write_checks(iocb, from);
3396 if (ret > 0)
3397 ret = __generic_file_write_iter(iocb, from);
3398 inode_unlock(inode);
3399
3400 if (ret > 0)
3401 ret = generic_write_sync(iocb, ret);
3402 return ret;
3403}
3404EXPORT_SYMBOL(generic_file_write_iter);
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423int try_to_release_page(struct page *page, gfp_t gfp_mask)
3424{
3425 struct address_space * const mapping = page->mapping;
3426
3427 BUG_ON(!PageLocked(page));
3428 if (PageWriteback(page))
3429 return 0;
3430
3431 if (mapping && mapping->a_ops->releasepage)
3432 return mapping->a_ops->releasepage(page, gfp_mask);
3433 return try_to_free_buffers(page);
3434}
3435
3436EXPORT_SYMBOL(try_to_release_page);
3437