1
2
3
4
5
6
7
8
9
10
11
12
13#ifdef __GENKSYMS__
14#define _LINUX_DELAYACCT_H
15#endif
16
17
18
19
20
21
22#include <linux/export.h>
23#include <linux/compiler.h>
24#include <linux/dax.h>
25#include <linux/fs.h>
26#include <linux/sched/signal.h>
27#include <linux/uaccess.h>
28#include <linux/capability.h>
29#include <linux/kernel_stat.h>
30#include <linux/gfp.h>
31#include <linux/mm.h>
32#include <linux/swap.h>
33#include <linux/mman.h>
34#include <linux/pagemap.h>
35#include <linux/file.h>
36#include <linux/uio.h>
37#include <linux/hash.h>
38#include <linux/writeback.h>
39#include <linux/backing-dev.h>
40#include <linux/pagevec.h>
41#include <linux/blkdev.h>
42#include <linux/security.h>
43#include <linux/cpuset.h>
44#include <linux/hugetlb.h>
45#include <linux/memcontrol.h>
46#include <linux/cleancache.h>
47#include <linux/shmem_fs.h>
48#include <linux/rmap.h>
49#include <linux/delayacct.h>
50#include <linux/psi.h>
51#include "internal.h"
52
53#define CREATE_TRACE_POINTS
54#include <trace/events/filemap.h>
55
56
57
58
59#include <linux/buffer_head.h>
60
61#include <asm/mman.h>
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126static int page_cache_tree_insert(struct address_space *mapping,
127 struct page *page, void **shadowp)
128{
129 struct radix_tree_node *node;
130 void **slot;
131 int error;
132
133 error = __radix_tree_create(&mapping->i_pages, page->index, 0,
134 &node, &slot);
135 if (error)
136 return error;
137 if (*slot) {
138 void *p;
139
140 p = radix_tree_deref_slot_protected(slot,
141 &mapping->i_pages.xa_lock);
142 if (!xa_is_value(p))
143 return -EEXIST;
144
145 mapping->nrexceptional--;
146 if (shadowp)
147 *shadowp = p;
148 }
149 __radix_tree_replace(&mapping->i_pages, node, slot, page,
150 workingset_lookup_update(mapping));
151 mapping->nrpages++;
152 return 0;
153}
154
155static void page_cache_tree_delete(struct address_space *mapping,
156 struct page *page, void *shadow)
157{
158 int i, nr;
159
160
161 nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
162
163 VM_BUG_ON_PAGE(!PageLocked(page), page);
164 VM_BUG_ON_PAGE(PageTail(page), page);
165 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
166
167 for (i = 0; i < nr; i++) {
168 struct radix_tree_node *node;
169 void **slot;
170
171 __radix_tree_lookup(&mapping->i_pages, page->index + i,
172 &node, &slot);
173
174 VM_BUG_ON_PAGE(!node && nr != 1, page);
175
176 radix_tree_clear_tags(&mapping->i_pages, node, slot);
177 __radix_tree_replace(&mapping->i_pages, node, slot, shadow,
178 workingset_lookup_update(mapping));
179 }
180
181 page->mapping = NULL;
182
183
184 if (shadow) {
185 mapping->nrexceptional += nr;
186
187
188
189
190
191
192 smp_wmb();
193 }
194 mapping->nrpages -= nr;
195}
196
197static void unaccount_page_cache_page(struct address_space *mapping,
198 struct page *page)
199{
200 int nr;
201
202
203
204
205
206
207 if (PageUptodate(page) && PageMappedToDisk(page))
208 cleancache_put_page(page);
209 else
210 cleancache_invalidate_page(mapping, page);
211
212 VM_BUG_ON_PAGE(PageTail(page), page);
213 VM_BUG_ON_PAGE(page_mapped(page), page);
214 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
215 int mapcount;
216
217 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
218 current->comm, page_to_pfn(page));
219 dump_page(page, "still mapped when deleted");
220 dump_stack();
221 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
222
223 mapcount = page_mapcount(page);
224 if (mapping_exiting(mapping) &&
225 page_count(page) >= mapcount + 2) {
226
227
228
229
230
231
232 page_mapcount_reset(page);
233 page_ref_sub(page, mapcount);
234 }
235 }
236
237
238 if (PageHuge(page))
239 return;
240
241 nr = hpage_nr_pages(page);
242
243 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
244 if (PageSwapBacked(page)) {
245 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
246 if (PageTransHuge(page))
247 __dec_node_page_state(page, NR_SHMEM_THPS);
248 } else {
249 VM_BUG_ON_PAGE(PageTransHuge(page), page);
250 }
251
252
253
254
255
256
257
258
259
260
261
262 if (WARN_ON_ONCE(PageDirty(page)))
263 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
264}
265
266
267
268
269
270
271void __delete_from_page_cache(struct page *page, void *shadow)
272{
273 struct address_space *mapping = page->mapping;
274
275 trace_mm_filemap_delete_from_page_cache(page);
276
277 unaccount_page_cache_page(mapping, page);
278 page_cache_tree_delete(mapping, page, shadow);
279}
280
281static void page_cache_free_page(struct address_space *mapping,
282 struct page *page)
283{
284 void (*freepage)(struct page *);
285
286 freepage = mapping->a_ops->freepage;
287 if (freepage)
288 freepage(page);
289
290 if (PageTransHuge(page) && !PageHuge(page)) {
291 page_ref_sub(page, HPAGE_PMD_NR);
292 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
293 } else {
294 put_page(page);
295 }
296}
297
298
299
300
301
302
303
304
305
306void delete_from_page_cache(struct page *page)
307{
308 struct address_space *mapping = page_mapping(page);
309 unsigned long flags;
310
311 BUG_ON(!PageLocked(page));
312 xa_lock_irqsave(&mapping->i_pages, flags);
313 __delete_from_page_cache(page, NULL);
314 xa_unlock_irqrestore(&mapping->i_pages, flags);
315
316 page_cache_free_page(mapping, page);
317}
318EXPORT_SYMBOL(delete_from_page_cache);
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334static void
335page_cache_tree_delete_batch(struct address_space *mapping,
336 struct pagevec *pvec)
337{
338 struct radix_tree_iter iter;
339 void **slot;
340 int total_pages = 0;
341 int i = 0, tail_pages = 0;
342 struct page *page;
343 pgoff_t start;
344
345 start = pvec->pages[0]->index;
346 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
347 if (i >= pagevec_count(pvec) && !tail_pages)
348 break;
349 page = radix_tree_deref_slot_protected(slot,
350 &mapping->i_pages.xa_lock);
351 if (xa_is_value(page))
352 continue;
353 if (!tail_pages) {
354
355
356
357
358
359 if (page != pvec->pages[i])
360 continue;
361 WARN_ON_ONCE(!PageLocked(page));
362 if (PageTransHuge(page) && !PageHuge(page))
363 tail_pages = HPAGE_PMD_NR - 1;
364 page->mapping = NULL;
365
366
367
368
369 i++;
370 } else {
371 tail_pages--;
372 }
373 radix_tree_clear_tags(&mapping->i_pages, iter.node, slot);
374 __radix_tree_replace(&mapping->i_pages, iter.node, slot, NULL,
375 workingset_lookup_update(mapping));
376 total_pages++;
377 }
378 mapping->nrpages -= total_pages;
379}
380
381void delete_from_page_cache_batch(struct address_space *mapping,
382 struct pagevec *pvec)
383{
384 int i;
385 unsigned long flags;
386
387 if (!pagevec_count(pvec))
388 return;
389
390 xa_lock_irqsave(&mapping->i_pages, flags);
391 for (i = 0; i < pagevec_count(pvec); i++) {
392 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
393
394 unaccount_page_cache_page(mapping, pvec->pages[i]);
395 }
396 page_cache_tree_delete_batch(mapping, pvec);
397 xa_unlock_irqrestore(&mapping->i_pages, flags);
398
399 for (i = 0; i < pagevec_count(pvec); i++)
400 page_cache_free_page(mapping, pvec->pages[i]);
401}
402
403int filemap_check_errors(struct address_space *mapping)
404{
405 int ret = 0;
406
407 if (test_bit(AS_ENOSPC, &mapping->flags) &&
408 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
409 ret = -ENOSPC;
410 if (test_bit(AS_EIO, &mapping->flags) &&
411 test_and_clear_bit(AS_EIO, &mapping->flags))
412 ret = -EIO;
413 return ret;
414}
415EXPORT_SYMBOL(filemap_check_errors);
416
417static int filemap_check_and_keep_errors(struct address_space *mapping)
418{
419
420 if (test_bit(AS_EIO, &mapping->flags))
421 return -EIO;
422 if (test_bit(AS_ENOSPC, &mapping->flags))
423 return -ENOSPC;
424 return 0;
425}
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
443 loff_t end, int sync_mode)
444{
445 int ret;
446 struct writeback_control wbc = {
447 .sync_mode = sync_mode,
448 .nr_to_write = LONG_MAX,
449 .range_start = start,
450 .range_end = end,
451 };
452
453 if (!mapping_cap_writeback_dirty(mapping))
454 return 0;
455
456 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
457 ret = do_writepages(mapping, &wbc);
458 wbc_detach_inode(&wbc);
459 return ret;
460}
461
462static inline int __filemap_fdatawrite(struct address_space *mapping,
463 int sync_mode)
464{
465 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
466}
467
468int filemap_fdatawrite(struct address_space *mapping)
469{
470 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
471}
472EXPORT_SYMBOL(filemap_fdatawrite);
473
474int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
475 loff_t end)
476{
477 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
478}
479EXPORT_SYMBOL(filemap_fdatawrite_range);
480
481
482
483
484
485
486
487
488int filemap_flush(struct address_space *mapping)
489{
490 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
491}
492EXPORT_SYMBOL(filemap_flush);
493
494
495
496
497
498
499
500
501
502
503bool filemap_range_has_page(struct address_space *mapping,
504 loff_t start_byte, loff_t end_byte)
505{
506 pgoff_t index = start_byte >> PAGE_SHIFT;
507 pgoff_t end = end_byte >> PAGE_SHIFT;
508 struct page *page;
509
510 if (end_byte < start_byte)
511 return false;
512
513 if (mapping->nrpages == 0)
514 return false;
515
516 if (!find_get_pages_range(mapping, &index, end, 1, &page))
517 return false;
518 put_page(page);
519 return true;
520}
521EXPORT_SYMBOL(filemap_range_has_page);
522
523static void __filemap_fdatawait_range(struct address_space *mapping,
524 loff_t start_byte, loff_t end_byte)
525{
526 pgoff_t index = start_byte >> PAGE_SHIFT;
527 pgoff_t end = end_byte >> PAGE_SHIFT;
528 struct pagevec pvec;
529 int nr_pages;
530
531 if (end_byte < start_byte)
532 return;
533
534 pagevec_init(&pvec);
535 while (index <= end) {
536 unsigned i;
537
538 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
539 end, PAGECACHE_TAG_WRITEBACK);
540 if (!nr_pages)
541 break;
542
543 for (i = 0; i < nr_pages; i++) {
544 struct page *page = pvec.pages[i];
545
546 wait_on_page_writeback(page);
547 ClearPageError(page);
548 }
549 pagevec_release(&pvec);
550 cond_resched();
551 }
552}
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
569 loff_t end_byte)
570{
571 __filemap_fdatawait_range(mapping, start_byte, end_byte);
572 return filemap_check_errors(mapping);
573}
574EXPORT_SYMBOL(filemap_fdatawait_range);
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
591{
592 struct address_space *mapping = file->f_mapping;
593
594 __filemap_fdatawait_range(mapping, start_byte, end_byte);
595 return file_check_and_advance_wb_err(file);
596}
597EXPORT_SYMBOL(file_fdatawait_range);
598
599
600
601
602
603
604
605
606
607
608
609
610
611int filemap_fdatawait_keep_errors(struct address_space *mapping)
612{
613 __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
614 return filemap_check_and_keep_errors(mapping);
615}
616EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
617
618static bool mapping_needs_writeback(struct address_space *mapping)
619{
620 return (!dax_mapping(mapping) && mapping->nrpages) ||
621 (dax_mapping(mapping) && mapping->nrexceptional);
622}
623
624int filemap_write_and_wait(struct address_space *mapping)
625{
626 int err = 0;
627
628 if (mapping_needs_writeback(mapping)) {
629 err = filemap_fdatawrite(mapping);
630
631
632
633
634
635
636 if (err != -EIO) {
637 int err2 = filemap_fdatawait(mapping);
638 if (!err)
639 err = err2;
640 } else {
641
642 filemap_check_errors(mapping);
643 }
644 } else {
645 err = filemap_check_errors(mapping);
646 }
647 return err;
648}
649EXPORT_SYMBOL(filemap_write_and_wait);
650
651
652
653
654
655
656
657
658
659
660
661
662int filemap_write_and_wait_range(struct address_space *mapping,
663 loff_t lstart, loff_t lend)
664{
665 int err = 0;
666
667 if (mapping_needs_writeback(mapping)) {
668 err = __filemap_fdatawrite_range(mapping, lstart, lend,
669 WB_SYNC_ALL);
670
671 if (err != -EIO) {
672 int err2 = filemap_fdatawait_range(mapping,
673 lstart, lend);
674 if (!err)
675 err = err2;
676 } else {
677
678 filemap_check_errors(mapping);
679 }
680 } else {
681 err = filemap_check_errors(mapping);
682 }
683 return err;
684}
685EXPORT_SYMBOL(filemap_write_and_wait_range);
686
687void __filemap_set_wb_err(struct address_space *mapping, int err)
688{
689 errseq_t eseq = errseq_set(&mapping->wb_err, err);
690
691 trace_filemap_set_wb_err(mapping, eseq);
692}
693EXPORT_SYMBOL(__filemap_set_wb_err);
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717int file_check_and_advance_wb_err(struct file *file)
718{
719 int err = 0;
720 errseq_t old = READ_ONCE(file->f_wb_err);
721 struct address_space *mapping = file->f_mapping;
722
723
724 if (errseq_check(&mapping->wb_err, old)) {
725
726 spin_lock(&file->f_lock);
727 old = file->f_wb_err;
728 err = errseq_check_and_advance(&mapping->wb_err,
729 &file->f_wb_err);
730 trace_file_check_and_advance_wb_err(file, old);
731 spin_unlock(&file->f_lock);
732 }
733
734
735
736
737
738
739 clear_bit(AS_EIO, &mapping->flags);
740 clear_bit(AS_ENOSPC, &mapping->flags);
741 return err;
742}
743EXPORT_SYMBOL(file_check_and_advance_wb_err);
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
760{
761 int err = 0, err2;
762 struct address_space *mapping = file->f_mapping;
763
764 if (mapping_needs_writeback(mapping)) {
765 err = __filemap_fdatawrite_range(mapping, lstart, lend,
766 WB_SYNC_ALL);
767
768 if (err != -EIO)
769 __filemap_fdatawait_range(mapping, lstart, lend);
770 }
771 err2 = file_check_and_advance_wb_err(file);
772 if (!err)
773 err = err2;
774 return err;
775}
776EXPORT_SYMBOL(file_write_and_wait_range);
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
794{
795 int error;
796
797 VM_BUG_ON_PAGE(!PageLocked(old), old);
798 VM_BUG_ON_PAGE(!PageLocked(new), new);
799 VM_BUG_ON_PAGE(new->mapping, new);
800
801 error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK);
802 if (!error) {
803 struct address_space *mapping = old->mapping;
804 void (*freepage)(struct page *);
805 unsigned long flags;
806
807 pgoff_t offset = old->index;
808 freepage = mapping->a_ops->freepage;
809
810 get_page(new);
811 new->mapping = mapping;
812 new->index = offset;
813
814 xa_lock_irqsave(&mapping->i_pages, flags);
815 __delete_from_page_cache(old, NULL);
816 error = page_cache_tree_insert(mapping, new, NULL);
817 BUG_ON(error);
818
819
820
821
822 if (!PageHuge(new))
823 __inc_node_page_state(new, NR_FILE_PAGES);
824 if (PageSwapBacked(new))
825 __inc_node_page_state(new, NR_SHMEM);
826 xa_unlock_irqrestore(&mapping->i_pages, flags);
827 mem_cgroup_migrate(old, new);
828 radix_tree_preload_end();
829 if (freepage)
830 freepage(old);
831 put_page(old);
832 }
833
834 return error;
835}
836EXPORT_SYMBOL_GPL(replace_page_cache_page);
837
838static int __add_to_page_cache_locked(struct page *page,
839 struct address_space *mapping,
840 pgoff_t offset, gfp_t gfp_mask,
841 void **shadowp)
842{
843 int huge = PageHuge(page);
844 struct mem_cgroup *memcg;
845 int error;
846
847 VM_BUG_ON_PAGE(!PageLocked(page), page);
848 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
849
850 if (!huge) {
851 error = mem_cgroup_try_charge(page, current->mm,
852 gfp_mask, &memcg, false);
853 if (error)
854 return error;
855 }
856
857 error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK);
858 if (error) {
859 if (!huge)
860 mem_cgroup_cancel_charge(page, memcg, false);
861 return error;
862 }
863
864 get_page(page);
865 page->mapping = mapping;
866 page->index = offset;
867
868 xa_lock_irq(&mapping->i_pages);
869 error = page_cache_tree_insert(mapping, page, shadowp);
870 radix_tree_preload_end();
871 if (unlikely(error))
872 goto err_insert;
873
874
875 if (!huge)
876 __inc_node_page_state(page, NR_FILE_PAGES);
877 xa_unlock_irq(&mapping->i_pages);
878 if (!huge)
879 mem_cgroup_commit_charge(page, memcg, false, false);
880 trace_mm_filemap_add_to_page_cache(page);
881 return 0;
882err_insert:
883 page->mapping = NULL;
884
885 xa_unlock_irq(&mapping->i_pages);
886 if (!huge)
887 mem_cgroup_cancel_charge(page, memcg, false);
888 put_page(page);
889 return error;
890}
891
892
893
894
895
896
897
898
899
900
901
902int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
903 pgoff_t offset, gfp_t gfp_mask)
904{
905 return __add_to_page_cache_locked(page, mapping, offset,
906 gfp_mask, NULL);
907}
908EXPORT_SYMBOL(add_to_page_cache_locked);
909
910int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
911 pgoff_t offset, gfp_t gfp_mask)
912{
913 void *shadow = NULL;
914 int ret;
915
916 __SetPageLocked(page);
917 ret = __add_to_page_cache_locked(page, mapping, offset,
918 gfp_mask, &shadow);
919 if (unlikely(ret))
920 __ClearPageLocked(page);
921 else {
922
923
924
925
926
927
928
929
930 WARN_ON_ONCE(PageActive(page));
931 if (!(gfp_mask & __GFP_WRITE) && shadow)
932 workingset_refault(page, shadow);
933 lru_cache_add(page);
934 }
935 return ret;
936}
937EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
938
939#ifdef CONFIG_NUMA
940struct page *__page_cache_alloc(gfp_t gfp)
941{
942 int n;
943 struct page *page;
944
945 if (cpuset_do_page_mem_spread()) {
946 unsigned int cpuset_mems_cookie;
947 do {
948 cpuset_mems_cookie = read_mems_allowed_begin();
949 n = cpuset_mem_spread_node();
950 page = __alloc_pages_node(n, gfp, 0);
951 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
952
953 return page;
954 }
955 return alloc_pages(gfp, 0);
956}
957EXPORT_SYMBOL(__page_cache_alloc);
958#endif
959
960
961
962
963
964
965
966
967
968
969
970#define PAGE_WAIT_TABLE_BITS 8
971#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
972static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
973
974static wait_queue_head_t *page_waitqueue(struct page *page)
975{
976 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
977}
978
979void __init pagecache_init(void)
980{
981 int i;
982
983 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
984 init_waitqueue_head(&page_wait_table[i]);
985
986 page_writeback_init();
987}
988
989
990struct wait_page_key {
991 struct page *page;
992 int bit_nr;
993 int page_match;
994};
995
996struct wait_page_queue {
997 struct page *page;
998 int bit_nr;
999 wait_queue_entry_t wait;
1000};
1001
1002static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
1003{
1004 struct wait_page_key *key = arg;
1005 struct wait_page_queue *wait_page
1006 = container_of(wait, struct wait_page_queue, wait);
1007
1008 if (wait_page->page != key->page)
1009 return 0;
1010 key->page_match = 1;
1011
1012 if (wait_page->bit_nr != key->bit_nr)
1013 return 0;
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 if (test_bit(key->bit_nr, &key->page->flags))
1024 return -1;
1025
1026 return autoremove_wake_function(wait, mode, sync, key);
1027}
1028
1029static void wake_up_page_bit(struct page *page, int bit_nr)
1030{
1031 wait_queue_head_t *q = page_waitqueue(page);
1032 struct wait_page_key key;
1033 unsigned long flags;
1034 wait_queue_entry_t bookmark;
1035
1036 key.page = page;
1037 key.bit_nr = bit_nr;
1038 key.page_match = 0;
1039
1040 bookmark.flags = 0;
1041 bookmark.private = NULL;
1042 bookmark.func = NULL;
1043 INIT_LIST_HEAD(&bookmark.entry);
1044
1045 spin_lock_irqsave(&q->lock, flags);
1046 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1047
1048 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
1049
1050
1051
1052
1053
1054
1055 spin_unlock_irqrestore(&q->lock, flags);
1056 cpu_relax();
1057 spin_lock_irqsave(&q->lock, flags);
1058 __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
1059 }
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070 if (!waitqueue_active(q) || !key.page_match) {
1071 ClearPageWaiters(page);
1072
1073
1074
1075
1076
1077
1078
1079 }
1080 spin_unlock_irqrestore(&q->lock, flags);
1081}
1082
1083static void wake_up_page(struct page *page, int bit)
1084{
1085 if (!PageWaiters(page))
1086 return;
1087 wake_up_page_bit(page, bit);
1088}
1089
1090
1091
1092
1093enum behavior {
1094 EXCLUSIVE,
1095
1096
1097 SHARED,
1098
1099
1100 DROP,
1101
1102
1103};
1104
1105static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1106 struct page *page, int bit_nr, int state, enum behavior behavior)
1107{
1108 struct wait_page_queue wait_page;
1109 wait_queue_entry_t *wait = &wait_page.wait;
1110 bool bit_is_set;
1111 bool thrashing = false;
1112 unsigned long pflags;
1113 int ret = 0;
1114
1115 if (bit_nr == PG_locked &&
1116 !PageUptodate(page) && PageWorkingset(page)) {
1117 if (!PageSwapBacked(page))
1118 delayacct_thrashing_start();
1119 psi_memstall_enter(&pflags);
1120 thrashing = true;
1121 }
1122
1123 init_wait(wait);
1124 wait->flags = behavior == EXCLUSIVE ? WQ_FLAG_EXCLUSIVE : 0;
1125 wait->func = wake_page_function;
1126 wait_page.page = page;
1127 wait_page.bit_nr = bit_nr;
1128
1129 for (;;) {
1130 spin_lock_irq(&q->lock);
1131
1132 if (likely(list_empty(&wait->entry))) {
1133 __add_wait_queue_entry_tail(q, wait);
1134 SetPageWaiters(page);
1135 }
1136
1137 set_current_state(state);
1138
1139 spin_unlock_irq(&q->lock);
1140
1141 bit_is_set = test_bit(bit_nr, &page->flags);
1142 if (behavior == DROP)
1143 put_page(page);
1144
1145 if (likely(bit_is_set))
1146 io_schedule();
1147
1148 if (behavior == EXCLUSIVE) {
1149 if (!test_and_set_bit_lock(bit_nr, &page->flags))
1150 break;
1151 } else if (behavior == SHARED) {
1152 if (!test_bit(bit_nr, &page->flags))
1153 break;
1154 }
1155
1156 if (unlikely(signal_pending_state(state, current))) {
1157 ret = -EINTR;
1158 break;
1159 }
1160
1161 if (behavior == DROP) {
1162
1163
1164
1165
1166
1167
1168
1169 break;
1170 }
1171 }
1172
1173 finish_wait(q, wait);
1174
1175 if (thrashing) {
1176 if (!PageSwapBacked(page))
1177 delayacct_thrashing_end();
1178 psi_memstall_leave(&pflags);
1179 }
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189 return ret;
1190}
1191
1192void wait_on_page_bit(struct page *page, int bit_nr)
1193{
1194 wait_queue_head_t *q = page_waitqueue(page);
1195 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
1196}
1197EXPORT_SYMBOL(wait_on_page_bit);
1198
1199int wait_on_page_bit_killable(struct page *page, int bit_nr)
1200{
1201 wait_queue_head_t *q = page_waitqueue(page);
1202 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
1203}
1204EXPORT_SYMBOL(wait_on_page_bit_killable);
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216void put_and_wait_on_page_locked(struct page *page)
1217{
1218 wait_queue_head_t *q;
1219
1220 page = compound_head(page);
1221 q = page_waitqueue(page);
1222 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
1223}
1224
1225
1226
1227
1228
1229
1230
1231
1232void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
1233{
1234 wait_queue_head_t *q = page_waitqueue(page);
1235 unsigned long flags;
1236
1237 spin_lock_irqsave(&q->lock, flags);
1238 __add_wait_queue_entry_tail(q, waiter);
1239 SetPageWaiters(page);
1240 spin_unlock_irqrestore(&q->lock, flags);
1241}
1242EXPORT_SYMBOL_GPL(add_page_wait_queue);
1243
1244#ifndef clear_bit_unlock_is_negative_byte
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
1259{
1260 clear_bit_unlock(nr, mem);
1261
1262 return test_bit(PG_waiters, mem);
1263}
1264
1265#endif
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282void unlock_page(struct page *page)
1283{
1284 BUILD_BUG_ON(PG_waiters != 7);
1285 page = compound_head(page);
1286 VM_BUG_ON_PAGE(!PageLocked(page), page);
1287 if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
1288 wake_up_page_bit(page, PG_locked);
1289}
1290EXPORT_SYMBOL(unlock_page);
1291
1292
1293
1294
1295
1296void end_page_writeback(struct page *page)
1297{
1298
1299
1300
1301
1302
1303
1304
1305 if (PageReclaim(page)) {
1306 ClearPageReclaim(page);
1307 rotate_reclaimable_page(page);
1308 }
1309
1310 if (!test_clear_page_writeback(page))
1311 BUG();
1312
1313 smp_mb__after_atomic();
1314 wake_up_page(page, PG_writeback);
1315}
1316EXPORT_SYMBOL(end_page_writeback);
1317
1318
1319
1320
1321
1322void page_endio(struct page *page, bool is_write, int err)
1323{
1324 if (!is_write) {
1325 if (!err) {
1326 SetPageUptodate(page);
1327 } else {
1328 ClearPageUptodate(page);
1329 SetPageError(page);
1330 }
1331 unlock_page(page);
1332 } else {
1333 if (err) {
1334 struct address_space *mapping;
1335
1336 SetPageError(page);
1337 mapping = page_mapping(page);
1338 if (mapping)
1339 mapping_set_error(mapping, err);
1340 }
1341 end_page_writeback(page);
1342 }
1343}
1344EXPORT_SYMBOL_GPL(page_endio);
1345
1346
1347
1348
1349
1350void __lock_page(struct page *__page)
1351{
1352 struct page *page = compound_head(__page);
1353 wait_queue_head_t *q = page_waitqueue(page);
1354 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
1355 EXCLUSIVE);
1356}
1357EXPORT_SYMBOL(__lock_page);
1358
1359int __lock_page_killable(struct page *__page)
1360{
1361 struct page *page = compound_head(__page);
1362 wait_queue_head_t *q = page_waitqueue(page);
1363 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
1364 EXCLUSIVE);
1365}
1366EXPORT_SYMBOL_GPL(__lock_page_killable);
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
1380 unsigned int flags)
1381{
1382 if (flags & FAULT_FLAG_ALLOW_RETRY) {
1383
1384
1385
1386
1387 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1388 return 0;
1389
1390 up_read(&mm->mmap_sem);
1391 if (flags & FAULT_FLAG_KILLABLE)
1392 wait_on_page_locked_killable(page);
1393 else
1394 wait_on_page_locked(page);
1395 return 0;
1396 } else {
1397 if (flags & FAULT_FLAG_KILLABLE) {
1398 int ret;
1399
1400 ret = __lock_page_killable(page);
1401 if (ret) {
1402 up_read(&mm->mmap_sem);
1403 return 0;
1404 }
1405 } else
1406 __lock_page(page);
1407 return 1;
1408 }
1409}
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432pgoff_t page_cache_next_hole(struct address_space *mapping,
1433 pgoff_t index, unsigned long max_scan)
1434{
1435 unsigned long i;
1436
1437 for (i = 0; i < max_scan; i++) {
1438 struct page *page;
1439
1440 page = radix_tree_lookup(&mapping->i_pages, index);
1441 if (!page || xa_is_value(page))
1442 break;
1443 index++;
1444 if (index == 0)
1445 break;
1446 }
1447
1448 return index;
1449}
1450EXPORT_SYMBOL(page_cache_next_hole);
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473pgoff_t page_cache_prev_hole(struct address_space *mapping,
1474 pgoff_t index, unsigned long max_scan)
1475{
1476 unsigned long i;
1477
1478 for (i = 0; i < max_scan; i++) {
1479 struct page *page;
1480
1481 page = radix_tree_lookup(&mapping->i_pages, index);
1482 if (!page || xa_is_value(page))
1483 break;
1484 index--;
1485 if (index == ULONG_MAX)
1486 break;
1487 }
1488
1489 return index;
1490}
1491EXPORT_SYMBOL(page_cache_prev_hole);
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1507{
1508 void **pagep;
1509 struct page *head, *page;
1510
1511 rcu_read_lock();
1512repeat:
1513 page = NULL;
1514 pagep = radix_tree_lookup_slot(&mapping->i_pages, offset);
1515 if (pagep) {
1516 page = radix_tree_deref_slot(pagep);
1517 if (unlikely(!page))
1518 goto out;
1519 if (radix_tree_exception(page)) {
1520 if (radix_tree_deref_retry(page))
1521 goto repeat;
1522
1523
1524
1525
1526
1527 goto out;
1528 }
1529
1530 head = compound_head(page);
1531 if (!page_cache_get_speculative(head))
1532 goto repeat;
1533
1534
1535 if (compound_head(page) != head) {
1536 put_page(head);
1537 goto repeat;
1538 }
1539
1540
1541
1542
1543
1544
1545 if (unlikely(page != *pagep)) {
1546 put_page(head);
1547 goto repeat;
1548 }
1549 }
1550out:
1551 rcu_read_unlock();
1552
1553 return page;
1554}
1555EXPORT_SYMBOL(find_get_entry);
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
1574{
1575 struct page *page;
1576
1577repeat:
1578 page = find_get_entry(mapping, offset);
1579 if (page && !radix_tree_exception(page)) {
1580 lock_page(page);
1581
1582 if (unlikely(page_mapping(page) != mapping)) {
1583 unlock_page(page);
1584 put_page(page);
1585 goto repeat;
1586 }
1587 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
1588 }
1589 return page;
1590}
1591EXPORT_SYMBOL(find_lock_entry);
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
1619 int fgp_flags, gfp_t gfp_mask)
1620{
1621 struct page *page;
1622
1623repeat:
1624 page = find_get_entry(mapping, offset);
1625 if (xa_is_value(page))
1626 page = NULL;
1627 if (!page)
1628 goto no_page;
1629
1630 if (fgp_flags & FGP_LOCK) {
1631 if (fgp_flags & FGP_NOWAIT) {
1632 if (!trylock_page(page)) {
1633 put_page(page);
1634 return NULL;
1635 }
1636 } else {
1637 lock_page(page);
1638 }
1639
1640
1641 if (unlikely(page->mapping != mapping)) {
1642 unlock_page(page);
1643 put_page(page);
1644 goto repeat;
1645 }
1646 VM_BUG_ON_PAGE(page->index != offset, page);
1647 }
1648
1649 if (page && (fgp_flags & FGP_ACCESSED))
1650 mark_page_accessed(page);
1651
1652no_page:
1653 if (!page && (fgp_flags & FGP_CREAT)) {
1654 int err;
1655 if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
1656 gfp_mask |= __GFP_WRITE;
1657 if (fgp_flags & FGP_NOFS)
1658 gfp_mask &= ~__GFP_FS;
1659
1660 page = __page_cache_alloc(gfp_mask);
1661 if (!page)
1662 return NULL;
1663
1664 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
1665 fgp_flags |= FGP_LOCK;
1666
1667
1668 if (fgp_flags & FGP_ACCESSED)
1669 __SetPageReferenced(page);
1670
1671 err = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
1672 if (unlikely(err)) {
1673 put_page(page);
1674 page = NULL;
1675 if (err == -EEXIST)
1676 goto repeat;
1677 }
1678 }
1679
1680 return page;
1681}
1682EXPORT_SYMBOL(pagecache_get_page);
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707unsigned find_get_entries(struct address_space *mapping,
1708 pgoff_t start, unsigned int nr_entries,
1709 struct page **entries, pgoff_t *indices)
1710{
1711 void **slot;
1712 unsigned int ret = 0;
1713 struct radix_tree_iter iter;
1714
1715 if (!nr_entries)
1716 return 0;
1717
1718 rcu_read_lock();
1719 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
1720 struct page *head, *page;
1721repeat:
1722 page = radix_tree_deref_slot(slot);
1723 if (unlikely(!page))
1724 continue;
1725 if (radix_tree_exception(page)) {
1726 if (radix_tree_deref_retry(page)) {
1727 slot = radix_tree_iter_retry(&iter);
1728 continue;
1729 }
1730
1731
1732
1733
1734
1735 goto export;
1736 }
1737
1738 head = compound_head(page);
1739 if (!page_cache_get_speculative(head))
1740 goto repeat;
1741
1742
1743 if (compound_head(page) != head) {
1744 put_page(head);
1745 goto repeat;
1746 }
1747
1748
1749 if (unlikely(page != *slot)) {
1750 put_page(head);
1751 goto repeat;
1752 }
1753export:
1754 indices[ret] = iter.index;
1755 entries[ret] = page;
1756 if (++ret == nr_entries)
1757 break;
1758 }
1759 rcu_read_unlock();
1760 return ret;
1761}
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
1785 pgoff_t end, unsigned int nr_pages,
1786 struct page **pages)
1787{
1788 struct radix_tree_iter iter;
1789 void **slot;
1790 unsigned ret = 0;
1791
1792 if (unlikely(!nr_pages))
1793 return 0;
1794
1795 rcu_read_lock();
1796 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, *start) {
1797 struct page *head, *page;
1798
1799 if (iter.index > end)
1800 break;
1801repeat:
1802 page = radix_tree_deref_slot(slot);
1803 if (unlikely(!page))
1804 continue;
1805
1806 if (radix_tree_exception(page)) {
1807 if (radix_tree_deref_retry(page)) {
1808 slot = radix_tree_iter_retry(&iter);
1809 continue;
1810 }
1811
1812
1813
1814
1815
1816 continue;
1817 }
1818
1819 head = compound_head(page);
1820 if (!page_cache_get_speculative(head))
1821 goto repeat;
1822
1823
1824 if (compound_head(page) != head) {
1825 put_page(head);
1826 goto repeat;
1827 }
1828
1829
1830 if (unlikely(page != *slot)) {
1831 put_page(head);
1832 goto repeat;
1833 }
1834
1835 pages[ret] = page;
1836 if (++ret == nr_pages) {
1837 *start = pages[ret - 1]->index + 1;
1838 goto out;
1839 }
1840 }
1841
1842
1843
1844
1845
1846
1847
1848 if (end == (pgoff_t)-1)
1849 *start = (pgoff_t)-1;
1850 else
1851 *start = end + 1;
1852out:
1853 rcu_read_unlock();
1854
1855 return ret;
1856}
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1871 unsigned int nr_pages, struct page **pages)
1872{
1873 struct radix_tree_iter iter;
1874 void **slot;
1875 unsigned int ret = 0;
1876
1877 if (unlikely(!nr_pages))
1878 return 0;
1879
1880 rcu_read_lock();
1881 radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
1882 struct page *head, *page;
1883repeat:
1884 page = radix_tree_deref_slot(slot);
1885
1886 if (unlikely(!page))
1887 break;
1888
1889 if (radix_tree_exception(page)) {
1890 if (radix_tree_deref_retry(page)) {
1891 slot = radix_tree_iter_retry(&iter);
1892 continue;
1893 }
1894
1895
1896
1897
1898
1899 break;
1900 }
1901
1902 head = compound_head(page);
1903 if (!page_cache_get_speculative(head))
1904 goto repeat;
1905
1906
1907 if (compound_head(page) != head) {
1908 put_page(head);
1909 goto repeat;
1910 }
1911
1912
1913 if (unlikely(page != *slot)) {
1914 put_page(head);
1915 goto repeat;
1916 }
1917
1918
1919
1920
1921
1922
1923 if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
1924 put_page(page);
1925 break;
1926 }
1927
1928 pages[ret] = page;
1929 if (++ret == nr_pages)
1930 break;
1931 }
1932 rcu_read_unlock();
1933 return ret;
1934}
1935EXPORT_SYMBOL(find_get_pages_contig);
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
1950 pgoff_t end, int tag, unsigned int nr_pages,
1951 struct page **pages)
1952{
1953 struct radix_tree_iter iter;
1954 void **slot;
1955 unsigned ret = 0;
1956
1957 if (unlikely(!nr_pages))
1958 return 0;
1959
1960 rcu_read_lock();
1961 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
1962 struct page *head, *page;
1963
1964 if (iter.index > end)
1965 break;
1966repeat:
1967 page = radix_tree_deref_slot(slot);
1968 if (unlikely(!page))
1969 continue;
1970
1971 if (radix_tree_exception(page)) {
1972 if (radix_tree_deref_retry(page)) {
1973 slot = radix_tree_iter_retry(&iter);
1974 continue;
1975 }
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987 continue;
1988 }
1989
1990 head = compound_head(page);
1991 if (!page_cache_get_speculative(head))
1992 goto repeat;
1993
1994
1995 if (compound_head(page) != head) {
1996 put_page(head);
1997 goto repeat;
1998 }
1999
2000
2001 if (unlikely(page != *slot)) {
2002 put_page(head);
2003 goto repeat;
2004 }
2005
2006 pages[ret] = page;
2007 if (++ret == nr_pages) {
2008 *index = pages[ret - 1]->index + 1;
2009 goto out;
2010 }
2011 }
2012
2013
2014
2015
2016
2017
2018
2019 if (end == (pgoff_t)-1)
2020 *index = (pgoff_t)-1;
2021 else
2022 *index = end + 1;
2023out:
2024 rcu_read_unlock();
2025
2026 return ret;
2027}
2028EXPORT_SYMBOL(find_get_pages_range_tag);
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
2043 int tag, unsigned int nr_entries,
2044 struct page **entries, pgoff_t *indices)
2045{
2046 void **slot;
2047 unsigned int ret = 0;
2048 struct radix_tree_iter iter;
2049
2050 if (!nr_entries)
2051 return 0;
2052
2053 rcu_read_lock();
2054 radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
2055 struct page *head, *page;
2056repeat:
2057 page = radix_tree_deref_slot(slot);
2058 if (unlikely(!page))
2059 continue;
2060 if (radix_tree_exception(page)) {
2061 if (radix_tree_deref_retry(page)) {
2062 slot = radix_tree_iter_retry(&iter);
2063 continue;
2064 }
2065
2066
2067
2068
2069
2070
2071 goto export;
2072 }
2073
2074 head = compound_head(page);
2075 if (!page_cache_get_speculative(head))
2076 goto repeat;
2077
2078
2079 if (compound_head(page) != head) {
2080 put_page(head);
2081 goto repeat;
2082 }
2083
2084
2085 if (unlikely(page != *slot)) {
2086 put_page(head);
2087 goto repeat;
2088 }
2089export:
2090 indices[ret] = iter.index;
2091 entries[ret] = page;
2092 if (++ret == nr_entries)
2093 break;
2094 }
2095 rcu_read_unlock();
2096 return ret;
2097}
2098EXPORT_SYMBOL(find_get_entries_tag);
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115static void shrink_readahead_size_eio(struct file *filp,
2116 struct file_ra_state *ra)
2117{
2118 ra->ra_pages /= 4;
2119}
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133static ssize_t generic_file_buffered_read(struct kiocb *iocb,
2134 struct iov_iter *iter, ssize_t written)
2135{
2136 struct file *filp = iocb->ki_filp;
2137 struct address_space *mapping = filp->f_mapping;
2138 struct inode *inode = mapping->host;
2139 struct file_ra_state *ra = &filp->f_ra;
2140 loff_t *ppos = &iocb->ki_pos;
2141 pgoff_t index;
2142 pgoff_t last_index;
2143 pgoff_t prev_index;
2144 unsigned long offset;
2145 unsigned int prev_offset;
2146 int error = 0;
2147
2148 if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
2149 return 0;
2150 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
2151
2152 index = *ppos >> PAGE_SHIFT;
2153 prev_index = ra->prev_pos >> PAGE_SHIFT;
2154 prev_offset = ra->prev_pos & (PAGE_SIZE-1);
2155 last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
2156 offset = *ppos & ~PAGE_MASK;
2157
2158 for (;;) {
2159 struct page *page;
2160 pgoff_t end_index;
2161 loff_t isize;
2162 unsigned long nr, ret;
2163
2164 cond_resched();
2165find_page:
2166 if (fatal_signal_pending(current)) {
2167 error = -EINTR;
2168 goto out;
2169 }
2170
2171 page = find_get_page(mapping, index);
2172 if (!page) {
2173 if (iocb->ki_flags & IOCB_NOWAIT)
2174 goto would_block;
2175 page_cache_sync_readahead(mapping,
2176 ra, filp,
2177 index, last_index - index);
2178 page = find_get_page(mapping, index);
2179 if (unlikely(page == NULL))
2180 goto no_cached_page;
2181 }
2182 if (PageReadahead(page)) {
2183 page_cache_async_readahead(mapping,
2184 ra, filp, page,
2185 index, last_index - index);
2186 }
2187 if (!PageUptodate(page)) {
2188 if (iocb->ki_flags & IOCB_NOWAIT) {
2189 put_page(page);
2190 goto would_block;
2191 }
2192
2193
2194
2195
2196
2197
2198 error = wait_on_page_locked_killable(page);
2199 if (unlikely(error))
2200 goto readpage_error;
2201 if (PageUptodate(page))
2202 goto page_ok;
2203
2204 if (inode->i_blkbits == PAGE_SHIFT ||
2205 !mapping->a_ops->is_partially_uptodate)
2206 goto page_not_up_to_date;
2207
2208 if (unlikely(iter->type & ITER_PIPE))
2209 goto page_not_up_to_date;
2210 if (!trylock_page(page))
2211 goto page_not_up_to_date;
2212
2213 if (!page->mapping)
2214 goto page_not_up_to_date_locked;
2215 if (!mapping->a_ops->is_partially_uptodate(page,
2216 offset, iter->count))
2217 goto page_not_up_to_date_locked;
2218 unlock_page(page);
2219 }
2220page_ok:
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230 isize = i_size_read(inode);
2231 end_index = (isize - 1) >> PAGE_SHIFT;
2232 if (unlikely(!isize || index > end_index)) {
2233 put_page(page);
2234 goto out;
2235 }
2236
2237
2238 nr = PAGE_SIZE;
2239 if (index == end_index) {
2240 nr = ((isize - 1) & ~PAGE_MASK) + 1;
2241 if (nr <= offset) {
2242 put_page(page);
2243 goto out;
2244 }
2245 }
2246 nr = nr - offset;
2247
2248
2249
2250
2251
2252 if (mapping_writably_mapped(mapping))
2253 flush_dcache_page(page);
2254
2255
2256
2257
2258
2259 if (prev_index != index || offset != prev_offset)
2260 mark_page_accessed(page);
2261 prev_index = index;
2262
2263
2264
2265
2266
2267
2268 ret = copy_page_to_iter(page, offset, nr, iter);
2269 offset += ret;
2270 index += offset >> PAGE_SHIFT;
2271 offset &= ~PAGE_MASK;
2272 prev_offset = offset;
2273
2274 put_page(page);
2275 written += ret;
2276 if (!iov_iter_count(iter))
2277 goto out;
2278 if (ret < nr) {
2279 error = -EFAULT;
2280 goto out;
2281 }
2282 continue;
2283
2284page_not_up_to_date:
2285
2286 error = lock_page_killable(page);
2287 if (unlikely(error))
2288 goto readpage_error;
2289
2290page_not_up_to_date_locked:
2291
2292 if (!page->mapping) {
2293 unlock_page(page);
2294 put_page(page);
2295 continue;
2296 }
2297
2298
2299 if (PageUptodate(page)) {
2300 unlock_page(page);
2301 goto page_ok;
2302 }
2303
2304readpage:
2305
2306
2307
2308
2309
2310 ClearPageError(page);
2311
2312 error = mapping->a_ops->readpage(filp, page);
2313
2314 if (unlikely(error)) {
2315 if (error == AOP_TRUNCATED_PAGE) {
2316 put_page(page);
2317 error = 0;
2318 goto find_page;
2319 }
2320 goto readpage_error;
2321 }
2322
2323 if (!PageUptodate(page)) {
2324 error = lock_page_killable(page);
2325 if (unlikely(error))
2326 goto readpage_error;
2327 if (!PageUptodate(page)) {
2328 if (page->mapping == NULL) {
2329
2330
2331
2332 unlock_page(page);
2333 put_page(page);
2334 goto find_page;
2335 }
2336 unlock_page(page);
2337 shrink_readahead_size_eio(filp, ra);
2338 error = -EIO;
2339 goto readpage_error;
2340 }
2341 unlock_page(page);
2342 }
2343
2344 goto page_ok;
2345
2346readpage_error:
2347
2348 put_page(page);
2349 goto out;
2350
2351no_cached_page:
2352
2353
2354
2355
2356 page = page_cache_alloc(mapping);
2357 if (!page) {
2358 error = -ENOMEM;
2359 goto out;
2360 }
2361 error = add_to_page_cache_lru(page, mapping, index,
2362 mapping_gfp_constraint(mapping, GFP_KERNEL));
2363 if (error) {
2364 put_page(page);
2365 if (error == -EEXIST) {
2366 error = 0;
2367 goto find_page;
2368 }
2369 goto out;
2370 }
2371 goto readpage;
2372 }
2373
2374would_block:
2375 error = -EAGAIN;
2376out:
2377 ra->prev_pos = prev_index;
2378 ra->prev_pos <<= PAGE_SHIFT;
2379 ra->prev_pos |= prev_offset;
2380
2381 *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
2382 file_accessed(filp);
2383 return written ? written : error;
2384}
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394ssize_t
2395generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2396{
2397 size_t count = iov_iter_count(iter);
2398 ssize_t retval = 0;
2399
2400 if (!count)
2401 goto out;
2402
2403 if (iocb->ki_flags & IOCB_DIRECT) {
2404 struct file *file = iocb->ki_filp;
2405 struct address_space *mapping = file->f_mapping;
2406 struct inode *inode = mapping->host;
2407 loff_t size;
2408
2409 size = i_size_read(inode);
2410 if (iocb->ki_flags & IOCB_NOWAIT) {
2411 if (filemap_range_has_page(mapping, iocb->ki_pos,
2412 iocb->ki_pos + count - 1))
2413 return -EAGAIN;
2414 } else {
2415 retval = filemap_write_and_wait_range(mapping,
2416 iocb->ki_pos,
2417 iocb->ki_pos + count - 1);
2418 if (retval < 0)
2419 goto out;
2420 }
2421
2422 file_accessed(file);
2423
2424 retval = mapping->a_ops->direct_IO(iocb, iter);
2425 if (retval >= 0) {
2426 iocb->ki_pos += retval;
2427 count -= retval;
2428 }
2429 iov_iter_revert(iter, count - iov_iter_count(iter));
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440 if (retval < 0 || !count || iocb->ki_pos >= size ||
2441 IS_DAX(inode))
2442 goto out;
2443 }
2444
2445 retval = generic_file_buffered_read(iocb, iter, retval);
2446out:
2447 return retval;
2448}
2449EXPORT_SYMBOL(generic_file_read_iter);
2450
2451#ifdef CONFIG_MMU
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
2462{
2463 struct address_space *mapping = file->f_mapping;
2464 struct page *page;
2465 int ret;
2466
2467 do {
2468 page = __page_cache_alloc(gfp_mask);
2469 if (!page)
2470 return -ENOMEM;
2471
2472 ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
2473 if (ret == 0)
2474 ret = mapping->a_ops->readpage(file, page);
2475 else if (ret == -EEXIST)
2476 ret = 0;
2477
2478 put_page(page);
2479
2480 } while (ret == AOP_TRUNCATED_PAGE);
2481
2482 return ret;
2483}
2484
2485#define MMAP_LOTSAMISS (100)
2486
2487
2488
2489
2490
2491static void do_sync_mmap_readahead(struct vm_area_struct *vma,
2492 struct file_ra_state *ra,
2493 struct file *file,
2494 pgoff_t offset)
2495{
2496 struct address_space *mapping = file->f_mapping;
2497
2498
2499 if (vma->vm_flags & VM_RAND_READ)
2500 return;
2501 if (!ra->ra_pages)
2502 return;
2503
2504 if (vma->vm_flags & VM_SEQ_READ) {
2505 page_cache_sync_readahead(mapping, ra, file, offset,
2506 ra->ra_pages);
2507 return;
2508 }
2509
2510
2511 if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
2512 ra->mmap_miss++;
2513
2514
2515
2516
2517
2518 if (ra->mmap_miss > MMAP_LOTSAMISS)
2519 return;
2520
2521
2522
2523
2524 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
2525 ra->size = ra->ra_pages;
2526 ra->async_size = ra->ra_pages / 4;
2527 ra_submit(ra, mapping, file);
2528}
2529
2530
2531
2532
2533
2534static void do_async_mmap_readahead(struct vm_area_struct *vma,
2535 struct file_ra_state *ra,
2536 struct file *file,
2537 struct page *page,
2538 pgoff_t offset)
2539{
2540 struct address_space *mapping = file->f_mapping;
2541
2542
2543 if (vma->vm_flags & VM_RAND_READ)
2544 return;
2545 if (ra->mmap_miss > 0)
2546 ra->mmap_miss--;
2547 if (PageReadahead(page))
2548 page_cache_async_readahead(mapping, ra, file,
2549 page, offset, ra->ra_pages);
2550}
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575vm_fault_t filemap_fault(struct vm_fault *vmf)
2576{
2577 int error;
2578 struct file *file = vmf->vma->vm_file;
2579 struct address_space *mapping = file->f_mapping;
2580 struct file_ra_state *ra = &file->f_ra;
2581 struct inode *inode = mapping->host;
2582 pgoff_t offset = vmf->pgoff;
2583 pgoff_t max_off;
2584 struct page *page;
2585 vm_fault_t ret = 0;
2586
2587 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2588 if (unlikely(offset >= max_off))
2589 return VM_FAULT_SIGBUS;
2590
2591
2592
2593
2594 page = find_get_page(mapping, offset);
2595 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2596
2597
2598
2599
2600 do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
2601 } else if (!page) {
2602
2603 do_sync_mmap_readahead(vmf->vma, ra, file, offset);
2604 count_vm_event(PGMAJFAULT);
2605 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
2606 ret = VM_FAULT_MAJOR;
2607retry_find:
2608 page = find_get_page(mapping, offset);
2609 if (!page)
2610 goto no_cached_page;
2611 }
2612
2613 if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
2614 put_page(page);
2615 return ret | VM_FAULT_RETRY;
2616 }
2617
2618
2619 if (unlikely(page->mapping != mapping)) {
2620 unlock_page(page);
2621 put_page(page);
2622 goto retry_find;
2623 }
2624 VM_BUG_ON_PAGE(page->index != offset, page);
2625
2626
2627
2628
2629
2630 if (unlikely(!PageUptodate(page)))
2631 goto page_not_uptodate;
2632
2633
2634
2635
2636
2637 max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
2638 if (unlikely(offset >= max_off)) {
2639 unlock_page(page);
2640 put_page(page);
2641 return VM_FAULT_SIGBUS;
2642 }
2643
2644 vmf->page = page;
2645 return ret | VM_FAULT_LOCKED;
2646
2647no_cached_page:
2648
2649
2650
2651
2652 error = page_cache_read(file, offset, vmf->gfp_mask);
2653
2654
2655
2656
2657
2658
2659 if (error >= 0)
2660 goto retry_find;
2661
2662
2663
2664
2665
2666
2667 if (error == -ENOMEM)
2668 return VM_FAULT_OOM;
2669 return VM_FAULT_SIGBUS;
2670
2671page_not_uptodate:
2672
2673
2674
2675
2676
2677
2678 ClearPageError(page);
2679 error = mapping->a_ops->readpage(file, page);
2680 if (!error) {
2681 wait_on_page_locked(page);
2682 if (!PageUptodate(page))
2683 error = -EIO;
2684 }
2685 put_page(page);
2686
2687 if (!error || error == AOP_TRUNCATED_PAGE)
2688 goto retry_find;
2689
2690
2691 shrink_readahead_size_eio(file, ra);
2692 return VM_FAULT_SIGBUS;
2693}
2694EXPORT_SYMBOL(filemap_fault);
2695
2696void filemap_map_pages(struct vm_fault *vmf,
2697 pgoff_t start_pgoff, pgoff_t end_pgoff)
2698{
2699 struct radix_tree_iter iter;
2700 void **slot;
2701 struct file *file = vmf->vma->vm_file;
2702 struct address_space *mapping = file->f_mapping;
2703 pgoff_t last_pgoff = start_pgoff;
2704 unsigned long max_idx;
2705 struct page *head, *page;
2706
2707 rcu_read_lock();
2708 radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) {
2709 if (iter.index > end_pgoff)
2710 break;
2711repeat:
2712 page = radix_tree_deref_slot(slot);
2713 if (unlikely(!page))
2714 goto next;
2715 if (radix_tree_exception(page)) {
2716 if (radix_tree_deref_retry(page)) {
2717 slot = radix_tree_iter_retry(&iter);
2718 continue;
2719 }
2720 goto next;
2721 }
2722
2723 head = compound_head(page);
2724 if (!page_cache_get_speculative(head))
2725 goto repeat;
2726
2727
2728 if (compound_head(page) != head) {
2729 put_page(head);
2730 goto repeat;
2731 }
2732
2733
2734 if (unlikely(page != *slot)) {
2735 put_page(head);
2736 goto repeat;
2737 }
2738
2739 if (!PageUptodate(page) ||
2740 PageReadahead(page) ||
2741 PageHWPoison(page))
2742 goto skip;
2743 if (!trylock_page(page))
2744 goto skip;
2745
2746 if (page->mapping != mapping || !PageUptodate(page))
2747 goto unlock;
2748
2749 max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
2750 if (page->index >= max_idx)
2751 goto unlock;
2752
2753 if (file->f_ra.mmap_miss > 0)
2754 file->f_ra.mmap_miss--;
2755
2756 vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
2757 if (vmf->pte)
2758 vmf->pte += iter.index - last_pgoff;
2759 last_pgoff = iter.index;
2760 if (alloc_set_pte(vmf, NULL, page))
2761 goto unlock;
2762 unlock_page(page);
2763 goto next;
2764unlock:
2765 unlock_page(page);
2766skip:
2767 put_page(page);
2768next:
2769
2770 if (pmd_trans_huge(*vmf->pmd))
2771 break;
2772 if (iter.index == end_pgoff)
2773 break;
2774 }
2775 rcu_read_unlock();
2776}
2777EXPORT_SYMBOL(filemap_map_pages);
2778
2779vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
2780{
2781 struct page *page = vmf->page;
2782 struct inode *inode = file_inode(vmf->vma->vm_file);
2783 vm_fault_t ret = VM_FAULT_LOCKED;
2784
2785 sb_start_pagefault(inode->i_sb);
2786 file_update_time(vmf->vma->vm_file);
2787 lock_page(page);
2788 if (page->mapping != inode->i_mapping) {
2789 unlock_page(page);
2790 ret = VM_FAULT_NOPAGE;
2791 goto out;
2792 }
2793
2794
2795
2796
2797
2798 set_page_dirty(page);
2799 wait_for_stable_page(page);
2800out:
2801 sb_end_pagefault(inode->i_sb);
2802 return ret;
2803}
2804
2805const struct vm_operations_struct generic_file_vm_ops = {
2806 .fault = filemap_fault,
2807 .map_pages = filemap_map_pages,
2808 .page_mkwrite = filemap_page_mkwrite,
2809};
2810
2811
2812
2813int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2814{
2815 struct address_space *mapping = file->f_mapping;
2816
2817 if (!mapping->a_ops->readpage)
2818 return -ENOEXEC;
2819 file_accessed(file);
2820 vma->vm_ops = &generic_file_vm_ops;
2821 return 0;
2822}
2823
2824
2825
2826
2827int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
2828{
2829 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2830 return -EINVAL;
2831 return generic_file_mmap(file, vma);
2832}
2833#else
2834int filemap_page_mkwrite(struct vm_fault *vmf)
2835{
2836 return -ENOSYS;
2837}
2838int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2839{
2840 return -ENOSYS;
2841}
2842int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
2843{
2844 return -ENOSYS;
2845}
2846#endif
2847
2848EXPORT_SYMBOL(filemap_page_mkwrite);
2849EXPORT_SYMBOL(generic_file_mmap);
2850EXPORT_SYMBOL(generic_file_readonly_mmap);
2851
2852static struct page *wait_on_page_read(struct page *page)
2853{
2854 if (!IS_ERR(page)) {
2855 wait_on_page_locked(page);
2856 if (!PageUptodate(page)) {
2857 put_page(page);
2858 page = ERR_PTR(-EIO);
2859 }
2860 }
2861 return page;
2862}
2863
2864static struct page *do_read_cache_page(struct address_space *mapping,
2865 pgoff_t index,
2866 int (*filler)(void *, struct page *),
2867 void *data,
2868 gfp_t gfp)
2869{
2870 struct page *page;
2871 int err;
2872repeat:
2873 page = find_get_page(mapping, index);
2874 if (!page) {
2875 page = __page_cache_alloc(gfp);
2876 if (!page)
2877 return ERR_PTR(-ENOMEM);
2878 err = add_to_page_cache_lru(page, mapping, index, gfp);
2879 if (unlikely(err)) {
2880 put_page(page);
2881 if (err == -EEXIST)
2882 goto repeat;
2883
2884 return ERR_PTR(err);
2885 }
2886
2887filler:
2888 err = filler(data, page);
2889 if (err < 0) {
2890 put_page(page);
2891 return ERR_PTR(err);
2892 }
2893
2894 page = wait_on_page_read(page);
2895 if (IS_ERR(page))
2896 return page;
2897 goto out;
2898 }
2899 if (PageUptodate(page))
2900 goto out;
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933 wait_on_page_locked(page);
2934 if (PageUptodate(page))
2935 goto out;
2936
2937
2938 lock_page(page);
2939
2940
2941 if (!page->mapping) {
2942 unlock_page(page);
2943 put_page(page);
2944 goto repeat;
2945 }
2946
2947
2948 if (PageUptodate(page)) {
2949 unlock_page(page);
2950 goto out;
2951 }
2952 goto filler;
2953
2954out:
2955 mark_page_accessed(page);
2956 return page;
2957}
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971struct page *read_cache_page(struct address_space *mapping,
2972 pgoff_t index,
2973 int (*filler)(void *, struct page *),
2974 void *data)
2975{
2976 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
2977}
2978EXPORT_SYMBOL(read_cache_page);
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991struct page *read_cache_page_gfp(struct address_space *mapping,
2992 pgoff_t index,
2993 gfp_t gfp)
2994{
2995 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
2996
2997 return do_read_cache_page(mapping, index, filler, NULL, gfp);
2998}
2999EXPORT_SYMBOL(read_cache_page_gfp);
3000
3001
3002
3003
3004
3005
3006static int generic_access_check_limits(struct file *file, loff_t pos,
3007 loff_t *count)
3008{
3009 struct inode *inode = file->f_mapping->host;
3010 loff_t max_size = inode->i_sb->s_maxbytes;
3011
3012 if (!(file->f_flags & O_LARGEFILE))
3013 max_size = MAX_NON_LFS;
3014
3015 if (unlikely(pos >= max_size))
3016 return -EFBIG;
3017 *count = min(*count, max_size - pos);
3018 return 0;
3019}
3020
3021static int generic_write_check_limits(struct file *file, loff_t pos,
3022 loff_t *count)
3023{
3024 loff_t limit = rlimit(RLIMIT_FSIZE);
3025
3026 if (limit != RLIM_INFINITY) {
3027 if (pos >= limit) {
3028 send_sig(SIGXFSZ, current, 0);
3029 return -EFBIG;
3030 }
3031 *count = min(*count, limit - pos);
3032 }
3033
3034 return generic_access_check_limits(file, pos, count);
3035}
3036
3037
3038
3039
3040
3041
3042
3043
3044inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
3045{
3046 struct file *file = iocb->ki_filp;
3047 struct inode *inode = file->f_mapping->host;
3048 loff_t count;
3049 int ret;
3050
3051 if (!iov_iter_count(from))
3052 return 0;
3053
3054
3055 if (iocb->ki_flags & IOCB_APPEND)
3056 iocb->ki_pos = i_size_read(inode);
3057
3058 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
3059 return -EINVAL;
3060
3061 count = iov_iter_count(from);
3062 ret = generic_write_check_limits(file, iocb->ki_pos, &count);
3063 if (ret)
3064 return ret;
3065
3066 iov_iter_truncate(from, count);
3067 return iov_iter_count(from);
3068}
3069EXPORT_SYMBOL(generic_write_checks);
3070
3071
3072
3073
3074
3075
3076
3077
3078int generic_remap_checks(struct file *file_in, loff_t pos_in,
3079 struct file *file_out, loff_t pos_out,
3080 loff_t *req_count, unsigned int remap_flags)
3081{
3082 struct inode *inode_in = file_in->f_mapping->host;
3083 struct inode *inode_out = file_out->f_mapping->host;
3084 uint64_t count = *req_count;
3085 uint64_t bcount;
3086 loff_t size_in, size_out;
3087 loff_t bs = inode_out->i_sb->s_blocksize;
3088 int ret;
3089
3090
3091 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
3092 return -EINVAL;
3093
3094
3095 if (pos_in + count < pos_in || pos_out + count < pos_out)
3096 return -EINVAL;
3097
3098 size_in = i_size_read(inode_in);
3099 size_out = i_size_read(inode_out);
3100
3101
3102 if ((remap_flags & REMAP_FILE_DEDUP) &&
3103 (pos_in >= size_in || pos_in + count > size_in ||
3104 pos_out >= size_out || pos_out + count > size_out))
3105 return -EINVAL;
3106
3107
3108 if (pos_in >= size_in)
3109 return -EINVAL;
3110 count = min(count, size_in - (uint64_t)pos_in);
3111
3112 ret = generic_access_check_limits(file_in, pos_in, &count);
3113 if (ret)
3114 return ret;
3115
3116 ret = generic_write_check_limits(file_out, pos_out, &count);
3117 if (ret)
3118 return ret;
3119
3120
3121
3122
3123
3124
3125
3126
3127 if (pos_in + count == size_in) {
3128 bcount = ALIGN(size_in, bs) - pos_in;
3129 } else {
3130 if (!IS_ALIGNED(count, bs))
3131 count = ALIGN_DOWN(count, bs);
3132 bcount = count;
3133 }
3134
3135
3136 if (inode_in == inode_out &&
3137 pos_out + bcount > pos_in &&
3138 pos_out < pos_in + bcount)
3139 return -EINVAL;
3140
3141
3142
3143
3144
3145 if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
3146 return -EINVAL;
3147
3148 *req_count = count;
3149 return 0;
3150}
3151
3152int pagecache_write_begin(struct file *file, struct address_space *mapping,
3153 loff_t pos, unsigned len, unsigned flags,
3154 struct page **pagep, void **fsdata)
3155{
3156 const struct address_space_operations *aops = mapping->a_ops;
3157
3158 return aops->write_begin(file, mapping, pos, len, flags,
3159 pagep, fsdata);
3160}
3161EXPORT_SYMBOL(pagecache_write_begin);
3162
3163int pagecache_write_end(struct file *file, struct address_space *mapping,
3164 loff_t pos, unsigned len, unsigned copied,
3165 struct page *page, void *fsdata)
3166{
3167 const struct address_space_operations *aops = mapping->a_ops;
3168
3169 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
3170}
3171EXPORT_SYMBOL(pagecache_write_end);
3172
3173ssize_t
3174generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
3175{
3176 struct file *file = iocb->ki_filp;
3177 struct address_space *mapping = file->f_mapping;
3178 struct inode *inode = mapping->host;
3179 loff_t pos = iocb->ki_pos;
3180 ssize_t written;
3181 size_t write_len;
3182 pgoff_t end;
3183
3184 write_len = iov_iter_count(from);
3185 end = (pos + write_len - 1) >> PAGE_SHIFT;
3186
3187 if (iocb->ki_flags & IOCB_NOWAIT) {
3188
3189 if (filemap_range_has_page(inode->i_mapping, pos,
3190 pos + iov_iter_count(from)))
3191 return -EAGAIN;
3192 } else {
3193 written = filemap_write_and_wait_range(mapping, pos,
3194 pos + write_len - 1);
3195 if (written)
3196 goto out;
3197 }
3198
3199
3200
3201
3202
3203
3204
3205 written = invalidate_inode_pages2_range(mapping,
3206 pos >> PAGE_SHIFT, end);
3207
3208
3209
3210
3211 if (written) {
3212 if (written == -EBUSY)
3213 return 0;
3214 goto out;
3215 }
3216
3217 written = mapping->a_ops->direct_IO(iocb, from);
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232 if (mapping->nrpages)
3233 invalidate_inode_pages2_range(mapping,
3234 pos >> PAGE_SHIFT, end);
3235
3236 if (written > 0) {
3237 pos += written;
3238 write_len -= written;
3239 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
3240 i_size_write(inode, pos);
3241 mark_inode_dirty(inode);
3242 }
3243 iocb->ki_pos = pos;
3244 }
3245 iov_iter_revert(from, write_len - iov_iter_count(from));
3246out:
3247 return written;
3248}
3249EXPORT_SYMBOL(generic_file_direct_write);
3250
3251
3252
3253
3254
3255struct page *grab_cache_page_write_begin(struct address_space *mapping,
3256 pgoff_t index, unsigned flags)
3257{
3258 struct page *page;
3259 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
3260
3261 if (flags & AOP_FLAG_NOFS)
3262 fgp_flags |= FGP_NOFS;
3263
3264 page = pagecache_get_page(mapping, index, fgp_flags,
3265 mapping_gfp_mask(mapping));
3266 if (page)
3267 wait_for_stable_page(page);
3268
3269 return page;
3270}
3271EXPORT_SYMBOL(grab_cache_page_write_begin);
3272
3273ssize_t generic_perform_write(struct file *file,
3274 struct iov_iter *i, loff_t pos)
3275{
3276 struct address_space *mapping = file->f_mapping;
3277 const struct address_space_operations *a_ops = mapping->a_ops;
3278 long status = 0;
3279 ssize_t written = 0;
3280 unsigned int flags = 0;
3281
3282 do {
3283 struct page *page;
3284 unsigned long offset;
3285 unsigned long bytes;
3286 size_t copied;
3287 void *fsdata;
3288
3289 offset = (pos & (PAGE_SIZE - 1));
3290 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3291 iov_iter_count(i));
3292
3293again:
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
3305 status = -EFAULT;
3306 break;
3307 }
3308
3309 if (fatal_signal_pending(current)) {
3310 status = -EINTR;
3311 break;
3312 }
3313
3314 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
3315 &page, &fsdata);
3316 if (unlikely(status < 0))
3317 break;
3318
3319 if (mapping_writably_mapped(mapping))
3320 flush_dcache_page(page);
3321
3322 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
3323 flush_dcache_page(page);
3324
3325 status = a_ops->write_end(file, mapping, pos, bytes, copied,
3326 page, fsdata);
3327 if (unlikely(status < 0))
3328 break;
3329 copied = status;
3330
3331 cond_resched();
3332
3333 iov_iter_advance(i, copied);
3334 if (unlikely(copied == 0)) {
3335
3336
3337
3338
3339
3340
3341
3342
3343 bytes = min_t(unsigned long, PAGE_SIZE - offset,
3344 iov_iter_single_seg_count(i));
3345 goto again;
3346 }
3347 pos += copied;
3348 written += copied;
3349
3350 balance_dirty_pages_ratelimited(mapping);
3351 } while (iov_iter_count(i));
3352
3353 return written ? written : status;
3354}
3355EXPORT_SYMBOL(generic_perform_write);
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3375{
3376 struct file *file = iocb->ki_filp;
3377 struct address_space * mapping = file->f_mapping;
3378 struct inode *inode = mapping->host;
3379 ssize_t written = 0;
3380 ssize_t err;
3381 ssize_t status;
3382
3383
3384 current->backing_dev_info = inode_to_bdi(inode);
3385 err = file_remove_privs(file);
3386 if (err)
3387 goto out;
3388
3389 err = file_update_time(file);
3390 if (err)
3391 goto out;
3392
3393 if (iocb->ki_flags & IOCB_DIRECT) {
3394 loff_t pos, endbyte;
3395
3396 written = generic_file_direct_write(iocb, from);
3397
3398
3399
3400
3401
3402
3403
3404 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
3405 goto out;
3406
3407 status = generic_perform_write(file, from, pos = iocb->ki_pos);
3408
3409
3410
3411
3412
3413
3414
3415 if (unlikely(status < 0)) {
3416 err = status;
3417 goto out;
3418 }
3419
3420
3421
3422
3423
3424 endbyte = pos + status - 1;
3425 err = filemap_write_and_wait_range(mapping, pos, endbyte);
3426 if (err == 0) {
3427 iocb->ki_pos = endbyte + 1;
3428 written += status;
3429 invalidate_mapping_pages(mapping,
3430 pos >> PAGE_SHIFT,
3431 endbyte >> PAGE_SHIFT);
3432 } else {
3433
3434
3435
3436
3437 }
3438 } else {
3439 written = generic_perform_write(file, from, iocb->ki_pos);
3440 if (likely(written > 0))
3441 iocb->ki_pos += written;
3442 }
3443out:
3444 current->backing_dev_info = NULL;
3445 return written ? written : err;
3446}
3447EXPORT_SYMBOL(__generic_file_write_iter);
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
3459{
3460 struct file *file = iocb->ki_filp;
3461 struct inode *inode = file->f_mapping->host;
3462 ssize_t ret;
3463
3464 inode_lock(inode);
3465 ret = generic_write_checks(iocb, from);
3466 if (ret > 0)
3467 ret = __generic_file_write_iter(iocb, from);
3468 inode_unlock(inode);
3469
3470 if (ret > 0)
3471 ret = generic_write_sync(iocb, ret);
3472 return ret;
3473}
3474EXPORT_SYMBOL(generic_file_write_iter);
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493int try_to_release_page(struct page *page, gfp_t gfp_mask)
3494{
3495 struct address_space * const mapping = page->mapping;
3496
3497 BUG_ON(!PageLocked(page));
3498 if (PageWriteback(page))
3499 return 0;
3500
3501 if (mapping && mapping->a_ops->releasepage)
3502 return mapping->a_ops->releasepage(page, gfp_mask);
3503 return try_to_free_buffers(page);
3504}
3505
3506EXPORT_SYMBOL(try_to_release_page);
3507