1
2
3
4
5
6
7
8
9
10
11
12#include <linux/export.h>
13#include <linux/compiler.h>
14#include <linux/dax.h>
15#include <linux/fs.h>
16#include <linux/uaccess.h>
17#include <linux/capability.h>
18#include <linux/kernel_stat.h>
19#include <linux/gfp.h>
20#include <linux/mm.h>
21#include <linux/swap.h>
22#include <linux/mman.h>
23#include <linux/pagemap.h>
24#include <linux/file.h>
25#include <linux/uio.h>
26#include <linux/hash.h>
27#include <linux/writeback.h>
28#include <linux/backing-dev.h>
29#include <linux/pagevec.h>
30#include <linux/blkdev.h>
31#include <linux/security.h>
32#include <linux/cpuset.h>
33#include <linux/hardirq.h>
34#include <linux/hugetlb.h>
35#include <linux/memcontrol.h>
36#include <linux/cleancache.h>
37#include <linux/rmap.h>
38#include "internal.h"
39
40#define CREATE_TRACE_POINTS
41#include <trace/events/filemap.h>
42
43
44
45
46#include <linux/buffer_head.h>
47
48#include <asm/mman.h>
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113static int page_cache_tree_insert(struct address_space *mapping,
114 struct page *page, void **shadowp)
115{
116 struct radix_tree_node *node;
117 void **slot;
118 int error;
119
120 error = __radix_tree_create(&mapping->page_tree, page->index, 0,
121 &node, &slot);
122 if (error)
123 return error;
124 if (*slot) {
125 void *p;
126
127 p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
128 if (!radix_tree_exceptional_entry(p))
129 return -EEXIST;
130
131 mapping->nrexceptional--;
132 if (!dax_mapping(mapping)) {
133 if (shadowp)
134 *shadowp = p;
135 if (node)
136 workingset_node_shadows_dec(node);
137 } else {
138
139 WARN_ON_ONCE(p !=
140 (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
141 RADIX_DAX_ENTRY_LOCK));
142
143 if (node)
144 workingset_node_pages_dec(node);
145
146 dax_wake_mapping_entry_waiter(mapping, page->index,
147 false);
148 }
149 }
150 radix_tree_replace_slot(slot, page);
151 mapping->nrpages++;
152 if (node) {
153 workingset_node_pages_inc(node);
154
155
156
157
158
159
160
161
162 if (!list_empty(&node->private_list))
163 list_lru_del(&workingset_shadow_nodes,
164 &node->private_list);
165 }
166 return 0;
167}
168
169static void page_cache_tree_delete(struct address_space *mapping,
170 struct page *page, void *shadow)
171{
172 struct radix_tree_node *node;
173 int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
174
175 VM_BUG_ON_PAGE(!PageLocked(page), page);
176 VM_BUG_ON_PAGE(PageTail(page), page);
177 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
178
179 if (shadow) {
180 mapping->nrexceptional += nr;
181
182
183
184
185
186
187 smp_wmb();
188 }
189 mapping->nrpages -= nr;
190
191 for (i = 0; i < nr; i++) {
192 node = radix_tree_replace_clear_tags(&mapping->page_tree,
193 page->index + i, shadow);
194 if (!node) {
195 VM_BUG_ON_PAGE(nr != 1, page);
196 return;
197 }
198
199 workingset_node_pages_dec(node);
200 if (shadow)
201 workingset_node_shadows_inc(node);
202 else
203 if (__radix_tree_delete_node(&mapping->page_tree, node))
204 continue;
205
206
207
208
209
210
211
212
213
214
215 if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
216 list_empty(&node->private_list)) {
217 node->private_data = mapping;
218 list_lru_add(&workingset_shadow_nodes,
219 &node->private_list);
220 }
221 }
222}
223
224
225
226
227
228
229void __delete_from_page_cache(struct page *page, void *shadow)
230{
231 struct address_space *mapping = page->mapping;
232 int nr = hpage_nr_pages(page);
233
234 trace_mm_filemap_delete_from_page_cache(page);
235
236
237
238
239
240 if (PageUptodate(page) && PageMappedToDisk(page))
241 cleancache_put_page(page);
242 else
243 cleancache_invalidate_page(mapping, page);
244
245 VM_BUG_ON_PAGE(PageTail(page), page);
246 VM_BUG_ON_PAGE(page_mapped(page), page);
247 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
248 int mapcount;
249
250 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
251 current->comm, page_to_pfn(page));
252 dump_page(page, "still mapped when deleted");
253 dump_stack();
254 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
255
256 mapcount = page_mapcount(page);
257 if (mapping_exiting(mapping) &&
258 page_count(page) >= mapcount + 2) {
259
260
261
262
263
264
265 page_mapcount_reset(page);
266 page_ref_sub(page, mapcount);
267 }
268 }
269
270 page_cache_tree_delete(mapping, page, shadow);
271
272 page->mapping = NULL;
273
274
275
276 if (!PageHuge(page))
277 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
278 if (PageSwapBacked(page)) {
279 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
280 if (PageTransHuge(page))
281 __dec_node_page_state(page, NR_SHMEM_THPS);
282 } else {
283 VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
284 }
285
286
287
288
289
290
291
292
293
294 if (WARN_ON_ONCE(PageDirty(page)))
295 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
296}
297
298
299
300
301
302
303
304
305
306void delete_from_page_cache(struct page *page)
307{
308 struct address_space *mapping = page_mapping(page);
309 unsigned long flags;
310 void (*freepage)(struct page *);
311
312 BUG_ON(!PageLocked(page));
313
314 freepage = mapping->a_ops->freepage;
315
316 spin_lock_irqsave(&mapping->tree_lock, flags);
317 __delete_from_page_cache(page, NULL);
318 spin_unlock_irqrestore(&mapping->tree_lock, flags);
319
320 if (freepage)
321 freepage(page);
322
323 if (PageTransHuge(page) && !PageHuge(page)) {
324 page_ref_sub(page, HPAGE_PMD_NR);
325 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
326 } else {
327 put_page(page);
328 }
329}
330EXPORT_SYMBOL(delete_from_page_cache);
331
332int filemap_check_errors(struct address_space *mapping)
333{
334 int ret = 0;
335
336 if (test_bit(AS_ENOSPC, &mapping->flags) &&
337 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
338 ret = -ENOSPC;
339 if (test_bit(AS_EIO, &mapping->flags) &&
340 test_and_clear_bit(AS_EIO, &mapping->flags))
341 ret = -EIO;
342 return ret;
343}
344EXPORT_SYMBOL(filemap_check_errors);
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
362 loff_t end, int sync_mode)
363{
364 int ret;
365 struct writeback_control wbc = {
366 .sync_mode = sync_mode,
367 .nr_to_write = LONG_MAX,
368 .range_start = start,
369 .range_end = end,
370 };
371
372 if (!mapping_cap_writeback_dirty(mapping))
373 return 0;
374
375 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
376 ret = do_writepages(mapping, &wbc);
377 wbc_detach_inode(&wbc);
378 return ret;
379}
380
381static inline int __filemap_fdatawrite(struct address_space *mapping,
382 int sync_mode)
383{
384 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
385}
386
387int filemap_fdatawrite(struct address_space *mapping)
388{
389 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
390}
391EXPORT_SYMBOL(filemap_fdatawrite);
392
393int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
394 loff_t end)
395{
396 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
397}
398EXPORT_SYMBOL(filemap_fdatawrite_range);
399
400
401
402
403
404
405
406
407int filemap_flush(struct address_space *mapping)
408{
409 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
410}
411EXPORT_SYMBOL(filemap_flush);
412
413static int __filemap_fdatawait_range(struct address_space *mapping,
414 loff_t start_byte, loff_t end_byte)
415{
416 pgoff_t index = start_byte >> PAGE_SHIFT;
417 pgoff_t end = end_byte >> PAGE_SHIFT;
418 struct pagevec pvec;
419 int nr_pages;
420 int ret = 0;
421
422 if (end_byte < start_byte)
423 goto out;
424
425 pagevec_init(&pvec, 0);
426 while ((index <= end) &&
427 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
428 PAGECACHE_TAG_WRITEBACK,
429 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
430 unsigned i;
431
432 for (i = 0; i < nr_pages; i++) {
433 struct page *page = pvec.pages[i];
434
435
436 if (page->index > end)
437 continue;
438
439 wait_on_page_writeback(page);
440 if (TestClearPageError(page))
441 ret = -EIO;
442 }
443 pagevec_release(&pvec);
444 cond_resched();
445 }
446out:
447 return ret;
448}
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
465 loff_t end_byte)
466{
467 int ret, ret2;
468
469 ret = __filemap_fdatawait_range(mapping, start_byte, end_byte);
470 ret2 = filemap_check_errors(mapping);
471 if (!ret)
472 ret = ret2;
473
474 return ret;
475}
476EXPORT_SYMBOL(filemap_fdatawait_range);
477
478
479
480
481
482
483
484
485
486
487
488
489
490void filemap_fdatawait_keep_errors(struct address_space *mapping)
491{
492 loff_t i_size = i_size_read(mapping->host);
493
494 if (i_size == 0)
495 return;
496
497 __filemap_fdatawait_range(mapping, 0, i_size - 1);
498}
499
500
501
502
503
504
505
506
507
508
509
510
511
512int filemap_fdatawait(struct address_space *mapping)
513{
514 loff_t i_size = i_size_read(mapping->host);
515
516 if (i_size == 0)
517 return 0;
518
519 return filemap_fdatawait_range(mapping, 0, i_size - 1);
520}
521EXPORT_SYMBOL(filemap_fdatawait);
522
523int filemap_write_and_wait(struct address_space *mapping)
524{
525 int err = 0;
526
527 if ((!dax_mapping(mapping) && mapping->nrpages) ||
528 (dax_mapping(mapping) && mapping->nrexceptional)) {
529 err = filemap_fdatawrite(mapping);
530
531
532
533
534
535
536 if (err != -EIO) {
537 int err2 = filemap_fdatawait(mapping);
538 if (!err)
539 err = err2;
540 }
541 } else {
542 err = filemap_check_errors(mapping);
543 }
544 return err;
545}
546EXPORT_SYMBOL(filemap_write_and_wait);
547
548
549
550
551
552
553
554
555
556
557
558
559int filemap_write_and_wait_range(struct address_space *mapping,
560 loff_t lstart, loff_t lend)
561{
562 int err = 0;
563
564 if ((!dax_mapping(mapping) && mapping->nrpages) ||
565 (dax_mapping(mapping) && mapping->nrexceptional)) {
566 err = __filemap_fdatawrite_range(mapping, lstart, lend,
567 WB_SYNC_ALL);
568
569 if (err != -EIO) {
570 int err2 = filemap_fdatawait_range(mapping,
571 lstart, lend);
572 if (!err)
573 err = err2;
574 }
575 } else {
576 err = filemap_check_errors(mapping);
577 }
578 return err;
579}
580EXPORT_SYMBOL(filemap_write_and_wait_range);
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
598{
599 int error;
600
601 VM_BUG_ON_PAGE(!PageLocked(old), old);
602 VM_BUG_ON_PAGE(!PageLocked(new), new);
603 VM_BUG_ON_PAGE(new->mapping, new);
604
605 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
606 if (!error) {
607 struct address_space *mapping = old->mapping;
608 void (*freepage)(struct page *);
609 unsigned long flags;
610
611 pgoff_t offset = old->index;
612 freepage = mapping->a_ops->freepage;
613
614 get_page(new);
615 new->mapping = mapping;
616 new->index = offset;
617
618 spin_lock_irqsave(&mapping->tree_lock, flags);
619 __delete_from_page_cache(old, NULL);
620 error = page_cache_tree_insert(mapping, new, NULL);
621 BUG_ON(error);
622 mapping->nrpages++;
623
624
625
626
627 if (!PageHuge(new))
628 __inc_node_page_state(new, NR_FILE_PAGES);
629 if (PageSwapBacked(new))
630 __inc_node_page_state(new, NR_SHMEM);
631 spin_unlock_irqrestore(&mapping->tree_lock, flags);
632 mem_cgroup_migrate(old, new);
633 radix_tree_preload_end();
634 if (freepage)
635 freepage(old);
636 put_page(old);
637 }
638
639 return error;
640}
641EXPORT_SYMBOL_GPL(replace_page_cache_page);
642
643static int __add_to_page_cache_locked(struct page *page,
644 struct address_space *mapping,
645 pgoff_t offset, gfp_t gfp_mask,
646 void **shadowp)
647{
648 int huge = PageHuge(page);
649 struct mem_cgroup *memcg;
650 int error;
651
652 VM_BUG_ON_PAGE(!PageLocked(page), page);
653 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
654
655 if (!huge) {
656 error = mem_cgroup_try_charge(page, current->mm,
657 gfp_mask, &memcg, false);
658 if (error)
659 return error;
660 }
661
662 error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
663 if (error) {
664 if (!huge)
665 mem_cgroup_cancel_charge(page, memcg, false);
666 return error;
667 }
668
669 get_page(page);
670 page->mapping = mapping;
671 page->index = offset;
672
673 spin_lock_irq(&mapping->tree_lock);
674 error = page_cache_tree_insert(mapping, page, shadowp);
675 radix_tree_preload_end();
676 if (unlikely(error))
677 goto err_insert;
678
679
680 if (!huge)
681 __inc_node_page_state(page, NR_FILE_PAGES);
682 spin_unlock_irq(&mapping->tree_lock);
683 if (!huge)
684 mem_cgroup_commit_charge(page, memcg, false, false);
685 trace_mm_filemap_add_to_page_cache(page);
686 return 0;
687err_insert:
688 page->mapping = NULL;
689
690 spin_unlock_irq(&mapping->tree_lock);
691 if (!huge)
692 mem_cgroup_cancel_charge(page, memcg, false);
693 put_page(page);
694 return error;
695}
696
697
698
699
700
701
702
703
704
705
706
707int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
708 pgoff_t offset, gfp_t gfp_mask)
709{
710 return __add_to_page_cache_locked(page, mapping, offset,
711 gfp_mask, NULL);
712}
713EXPORT_SYMBOL(add_to_page_cache_locked);
714
715int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
716 pgoff_t offset, gfp_t gfp_mask)
717{
718 void *shadow = NULL;
719 int ret;
720
721 __SetPageLocked(page);
722 ret = __add_to_page_cache_locked(page, mapping, offset,
723 gfp_mask, &shadow);
724 if (unlikely(ret))
725 __ClearPageLocked(page);
726 else {
727
728
729
730
731
732
733
734
735 if (!(gfp_mask & __GFP_WRITE) &&
736 shadow && workingset_refault(shadow)) {
737 SetPageActive(page);
738 workingset_activation(page);
739 } else
740 ClearPageActive(page);
741 lru_cache_add(page);
742 }
743 return ret;
744}
745EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
746
747#ifdef CONFIG_NUMA
748struct page *__page_cache_alloc(gfp_t gfp)
749{
750 int n;
751 struct page *page;
752
753 if (cpuset_do_page_mem_spread()) {
754 unsigned int cpuset_mems_cookie;
755 do {
756 cpuset_mems_cookie = read_mems_allowed_begin();
757 n = cpuset_mem_spread_node();
758 page = __alloc_pages_node(n, gfp, 0);
759 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
760
761 return page;
762 }
763 return alloc_pages(gfp, 0);
764}
765EXPORT_SYMBOL(__page_cache_alloc);
766#endif
767
768
769
770
771
772
773
774
775
776
777
778wait_queue_head_t *page_waitqueue(struct page *page)
779{
780 const struct zone *zone = page_zone(page);
781
782 return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
783}
784EXPORT_SYMBOL(page_waitqueue);
785
786void wait_on_page_bit(struct page *page, int bit_nr)
787{
788 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
789
790 if (test_bit(bit_nr, &page->flags))
791 __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
792 TASK_UNINTERRUPTIBLE);
793}
794EXPORT_SYMBOL(wait_on_page_bit);
795
796int wait_on_page_bit_killable(struct page *page, int bit_nr)
797{
798 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
799
800 if (!test_bit(bit_nr, &page->flags))
801 return 0;
802
803 return __wait_on_bit(page_waitqueue(page), &wait,
804 bit_wait_io, TASK_KILLABLE);
805}
806
807int wait_on_page_bit_killable_timeout(struct page *page,
808 int bit_nr, unsigned long timeout)
809{
810 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
811
812 wait.key.timeout = jiffies + timeout;
813 if (!test_bit(bit_nr, &page->flags))
814 return 0;
815 return __wait_on_bit(page_waitqueue(page), &wait,
816 bit_wait_io_timeout, TASK_KILLABLE);
817}
818EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
819
820
821
822
823
824
825
826
827void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
828{
829 wait_queue_head_t *q = page_waitqueue(page);
830 unsigned long flags;
831
832 spin_lock_irqsave(&q->lock, flags);
833 __add_wait_queue(q, waiter);
834 spin_unlock_irqrestore(&q->lock, flags);
835}
836EXPORT_SYMBOL_GPL(add_page_wait_queue);
837
838
839
840
841
842
843
844
845
846
847
848
849
850void unlock_page(struct page *page)
851{
852 page = compound_head(page);
853 VM_BUG_ON_PAGE(!PageLocked(page), page);
854 clear_bit_unlock(PG_locked, &page->flags);
855 smp_mb__after_atomic();
856 wake_up_page(page, PG_locked);
857}
858EXPORT_SYMBOL(unlock_page);
859
860
861
862
863
864void end_page_writeback(struct page *page)
865{
866
867
868
869
870
871
872
873 if (PageReclaim(page)) {
874 ClearPageReclaim(page);
875 rotate_reclaimable_page(page);
876 }
877
878 if (!test_clear_page_writeback(page))
879 BUG();
880
881 smp_mb__after_atomic();
882 wake_up_page(page, PG_writeback);
883}
884EXPORT_SYMBOL(end_page_writeback);
885
886
887
888
889
890void page_endio(struct page *page, bool is_write, int err)
891{
892 if (!is_write) {
893 if (!err) {
894 SetPageUptodate(page);
895 } else {
896 ClearPageUptodate(page);
897 SetPageError(page);
898 }
899 unlock_page(page);
900 } else {
901 if (err) {
902 SetPageError(page);
903 if (page->mapping)
904 mapping_set_error(page->mapping, err);
905 }
906 end_page_writeback(page);
907 }
908}
909EXPORT_SYMBOL_GPL(page_endio);
910
911
912
913
914
915void __lock_page(struct page *page)
916{
917 struct page *page_head = compound_head(page);
918 DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
919
920 __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
921 TASK_UNINTERRUPTIBLE);
922}
923EXPORT_SYMBOL(__lock_page);
924
925int __lock_page_killable(struct page *page)
926{
927 struct page *page_head = compound_head(page);
928 DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked);
929
930 return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
931 bit_wait_io, TASK_KILLABLE);
932}
933EXPORT_SYMBOL_GPL(__lock_page_killable);
934
935
936
937
938
939
940
941
942
943
944
945
946int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
947 unsigned int flags)
948{
949 if (flags & FAULT_FLAG_ALLOW_RETRY) {
950
951
952
953
954 if (flags & FAULT_FLAG_RETRY_NOWAIT)
955 return 0;
956
957 up_read(&mm->mmap_sem);
958 if (flags & FAULT_FLAG_KILLABLE)
959 wait_on_page_locked_killable(page);
960 else
961 wait_on_page_locked(page);
962 return 0;
963 } else {
964 if (flags & FAULT_FLAG_KILLABLE) {
965 int ret;
966
967 ret = __lock_page_killable(page);
968 if (ret) {
969 up_read(&mm->mmap_sem);
970 return 0;
971 }
972 } else
973 __lock_page(page);
974 return 1;
975 }
976}
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999pgoff_t page_cache_next_hole(struct address_space *mapping,
1000 pgoff_t index, unsigned long max_scan)
1001{
1002 unsigned long i;
1003
1004 for (i = 0; i < max_scan; i++) {
1005 struct page *page;
1006
1007 page = radix_tree_lookup(&mapping->page_tree, index);
1008 if (!page || radix_tree_exceptional_entry(page))
1009 break;
1010 index++;
1011 if (index == 0)
1012 break;
1013 }
1014
1015 return index;
1016}
1017EXPORT_SYMBOL(page_cache_next_hole);
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040pgoff_t page_cache_prev_hole(struct address_space *mapping,
1041 pgoff_t index, unsigned long max_scan)
1042{
1043 unsigned long i;
1044
1045 for (i = 0; i < max_scan; i++) {
1046 struct page *page;
1047
1048 page = radix_tree_lookup(&mapping->page_tree, index);
1049 if (!page || radix_tree_exceptional_entry(page))
1050 break;
1051 index--;
1052 if (index == ULONG_MAX)
1053 break;
1054 }
1055
1056 return index;
1057}
1058EXPORT_SYMBOL(page_cache_prev_hole);
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1074{
1075 void **pagep;
1076 struct page *head, *page;
1077
1078 rcu_read_lock();
1079repeat:
1080 page = NULL;
1081 pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
1082 if (pagep) {
1083 page = radix_tree_deref_slot(pagep);
1084 if (unlikely(!page))
1085 goto out;
1086 if (radix_tree_exception(page)) {
1087 if (radix_tree_deref_retry(page))
1088 goto repeat;
1089
1090
1091
1092
1093
1094 goto out;
1095 }
1096
1097 head = compound_head(page);
1098 if (!page_cache_get_speculative(head))
1099 goto repeat;
1100
1101
1102 if (compound_head(page) != head) {
1103 put_page(head);
1104 goto repeat;
1105 }
1106
1107
1108
1109
1110
1111
1112 if (unlikely(page != *pagep)) {
1113 put_page(head);
1114 goto repeat;
1115 }
1116 }
1117out:
1118 rcu_read_unlock();
1119
1120 return page;
1121}
1122EXPORT_SYMBOL(find_get_entry);
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
1141{
1142 struct page *page;
1143
1144repeat:
1145 page = find_get_entry(mapping, offset);
1146 if (page && !radix_tree_exception(page)) {
1147 lock_page(page);
1148
1149 if (unlikely(page_mapping(page) != mapping)) {
1150 unlock_page(page);
1151 put_page(page);
1152 goto repeat;
1153 }
1154 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
1155 }
1156 return page;
1157}
1158EXPORT_SYMBOL(find_lock_entry);
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
1184 int fgp_flags, gfp_t gfp_mask)
1185{
1186 struct page *page;
1187
1188repeat:
1189 page = find_get_entry(mapping, offset);
1190 if (radix_tree_exceptional_entry(page))
1191 page = NULL;
1192 if (!page)
1193 goto no_page;
1194
1195 if (fgp_flags & FGP_LOCK) {
1196 if (fgp_flags & FGP_NOWAIT) {
1197 if (!trylock_page(page)) {
1198 put_page(page);
1199 return NULL;
1200 }
1201 } else {
1202 lock_page(page);
1203 }
1204
1205
1206 if (unlikely(page->mapping != mapping)) {
1207 unlock_page(page);
1208 put_page(page);
1209 goto repeat;
1210 }
1211 VM_BUG_ON_PAGE(page->index != offset, page);
1212 }
1213
1214 if (page && (fgp_flags & FGP_ACCESSED))
1215 mark_page_accessed(page);
1216
1217no_page:
1218 if (!page && (fgp_flags & FGP_CREAT)) {
1219 int err;
1220 if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
1221 gfp_mask |= __GFP_WRITE;
1222 if (fgp_flags & FGP_NOFS)
1223 gfp_mask &= ~__GFP_FS;
1224
1225 page = __page_cache_alloc(gfp_mask);
1226 if (!page)
1227 return NULL;
1228
1229 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
1230 fgp_flags |= FGP_LOCK;
1231
1232
1233 if (fgp_flags & FGP_ACCESSED)
1234 __SetPageReferenced(page);
1235
1236 err = add_to_page_cache_lru(page, mapping, offset,
1237 gfp_mask & GFP_RECLAIM_MASK);
1238 if (unlikely(err)) {
1239 put_page(page);
1240 page = NULL;
1241 if (err == -EEXIST)
1242 goto repeat;
1243 }
1244 }
1245
1246 return page;
1247}
1248EXPORT_SYMBOL(pagecache_get_page);
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273unsigned find_get_entries(struct address_space *mapping,
1274 pgoff_t start, unsigned int nr_entries,
1275 struct page **entries, pgoff_t *indices)
1276{
1277 void **slot;
1278 unsigned int ret = 0;
1279 struct radix_tree_iter iter;
1280
1281 if (!nr_entries)
1282 return 0;
1283
1284 rcu_read_lock();
1285 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1286 struct page *head, *page;
1287repeat:
1288 page = radix_tree_deref_slot(slot);
1289 if (unlikely(!page))
1290 continue;
1291 if (radix_tree_exception(page)) {
1292 if (radix_tree_deref_retry(page)) {
1293 slot = radix_tree_iter_retry(&iter);
1294 continue;
1295 }
1296
1297
1298
1299
1300
1301 goto export;
1302 }
1303
1304 head = compound_head(page);
1305 if (!page_cache_get_speculative(head))
1306 goto repeat;
1307
1308
1309 if (compound_head(page) != head) {
1310 put_page(head);
1311 goto repeat;
1312 }
1313
1314
1315 if (unlikely(page != *slot)) {
1316 put_page(head);
1317 goto repeat;
1318 }
1319export:
1320 indices[ret] = iter.index;
1321 entries[ret] = page;
1322 if (++ret == nr_entries)
1323 break;
1324 }
1325 rcu_read_unlock();
1326 return ret;
1327}
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
1346 unsigned int nr_pages, struct page **pages)
1347{
1348 struct radix_tree_iter iter;
1349 void **slot;
1350 unsigned ret = 0;
1351
1352 if (unlikely(!nr_pages))
1353 return 0;
1354
1355 rcu_read_lock();
1356 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1357 struct page *head, *page;
1358repeat:
1359 page = radix_tree_deref_slot(slot);
1360 if (unlikely(!page))
1361 continue;
1362
1363 if (radix_tree_exception(page)) {
1364 if (radix_tree_deref_retry(page)) {
1365 slot = radix_tree_iter_retry(&iter);
1366 continue;
1367 }
1368
1369
1370
1371
1372
1373 continue;
1374 }
1375
1376 head = compound_head(page);
1377 if (!page_cache_get_speculative(head))
1378 goto repeat;
1379
1380
1381 if (compound_head(page) != head) {
1382 put_page(head);
1383 goto repeat;
1384 }
1385
1386
1387 if (unlikely(page != *slot)) {
1388 put_page(head);
1389 goto repeat;
1390 }
1391
1392 pages[ret] = page;
1393 if (++ret == nr_pages)
1394 break;
1395 }
1396
1397 rcu_read_unlock();
1398 return ret;
1399}
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1414 unsigned int nr_pages, struct page **pages)
1415{
1416 struct radix_tree_iter iter;
1417 void **slot;
1418 unsigned int ret = 0;
1419
1420 if (unlikely(!nr_pages))
1421 return 0;
1422
1423 rcu_read_lock();
1424 radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
1425 struct page *head, *page;
1426repeat:
1427 page = radix_tree_deref_slot(slot);
1428
1429 if (unlikely(!page))
1430 break;
1431
1432 if (radix_tree_exception(page)) {
1433 if (radix_tree_deref_retry(page)) {
1434 slot = radix_tree_iter_retry(&iter);
1435 continue;
1436 }
1437
1438
1439
1440
1441
1442 break;
1443 }
1444
1445 head = compound_head(page);
1446 if (!page_cache_get_speculative(head))
1447 goto repeat;
1448
1449
1450 if (compound_head(page) != head) {
1451 put_page(head);
1452 goto repeat;
1453 }
1454
1455
1456 if (unlikely(page != *slot)) {
1457 put_page(head);
1458 goto repeat;
1459 }
1460
1461
1462
1463
1464
1465
1466 if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
1467 put_page(page);
1468 break;
1469 }
1470
1471 pages[ret] = page;
1472 if (++ret == nr_pages)
1473 break;
1474 }
1475 rcu_read_unlock();
1476 return ret;
1477}
1478EXPORT_SYMBOL(find_get_pages_contig);
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
1492 int tag, unsigned int nr_pages, struct page **pages)
1493{
1494 struct radix_tree_iter iter;
1495 void **slot;
1496 unsigned ret = 0;
1497
1498 if (unlikely(!nr_pages))
1499 return 0;
1500
1501 rcu_read_lock();
1502 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1503 &iter, *index, tag) {
1504 struct page *head, *page;
1505repeat:
1506 page = radix_tree_deref_slot(slot);
1507 if (unlikely(!page))
1508 continue;
1509
1510 if (radix_tree_exception(page)) {
1511 if (radix_tree_deref_retry(page)) {
1512 slot = radix_tree_iter_retry(&iter);
1513 continue;
1514 }
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526 continue;
1527 }
1528
1529 head = compound_head(page);
1530 if (!page_cache_get_speculative(head))
1531 goto repeat;
1532
1533
1534 if (compound_head(page) != head) {
1535 put_page(head);
1536 goto repeat;
1537 }
1538
1539
1540 if (unlikely(page != *slot)) {
1541 put_page(head);
1542 goto repeat;
1543 }
1544
1545 pages[ret] = page;
1546 if (++ret == nr_pages)
1547 break;
1548 }
1549
1550 rcu_read_unlock();
1551
1552 if (ret)
1553 *index = pages[ret - 1]->index + 1;
1554
1555 return ret;
1556}
1557EXPORT_SYMBOL(find_get_pages_tag);
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
1572 int tag, unsigned int nr_entries,
1573 struct page **entries, pgoff_t *indices)
1574{
1575 void **slot;
1576 unsigned int ret = 0;
1577 struct radix_tree_iter iter;
1578
1579 if (!nr_entries)
1580 return 0;
1581
1582 rcu_read_lock();
1583 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1584 &iter, start, tag) {
1585 struct page *head, *page;
1586repeat:
1587 page = radix_tree_deref_slot(slot);
1588 if (unlikely(!page))
1589 continue;
1590 if (radix_tree_exception(page)) {
1591 if (radix_tree_deref_retry(page)) {
1592 slot = radix_tree_iter_retry(&iter);
1593 continue;
1594 }
1595
1596
1597
1598
1599
1600
1601 goto export;
1602 }
1603
1604 head = compound_head(page);
1605 if (!page_cache_get_speculative(head))
1606 goto repeat;
1607
1608
1609 if (compound_head(page) != head) {
1610 put_page(head);
1611 goto repeat;
1612 }
1613
1614
1615 if (unlikely(page != *slot)) {
1616 put_page(head);
1617 goto repeat;
1618 }
1619export:
1620 indices[ret] = iter.index;
1621 entries[ret] = page;
1622 if (++ret == nr_entries)
1623 break;
1624 }
1625 rcu_read_unlock();
1626 return ret;
1627}
1628EXPORT_SYMBOL(find_get_entries_tag);
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645static void shrink_readahead_size_eio(struct file *filp,
1646 struct file_ra_state *ra)
1647{
1648 ra->ra_pages /= 4;
1649}
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
1665 struct iov_iter *iter, ssize_t written)
1666{
1667 struct address_space *mapping = filp->f_mapping;
1668 struct inode *inode = mapping->host;
1669 struct file_ra_state *ra = &filp->f_ra;
1670 pgoff_t index;
1671 pgoff_t last_index;
1672 pgoff_t prev_index;
1673 unsigned long offset;
1674 unsigned int prev_offset;
1675 int error = 0;
1676
1677 index = *ppos >> PAGE_SHIFT;
1678 prev_index = ra->prev_pos >> PAGE_SHIFT;
1679 prev_offset = ra->prev_pos & (PAGE_SIZE-1);
1680 last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
1681 offset = *ppos & ~PAGE_MASK;
1682
1683 for (;;) {
1684 struct page *page;
1685 pgoff_t end_index;
1686 loff_t isize;
1687 unsigned long nr, ret;
1688
1689 cond_resched();
1690find_page:
1691 page = find_get_page(mapping, index);
1692 if (!page) {
1693 page_cache_sync_readahead(mapping,
1694 ra, filp,
1695 index, last_index - index);
1696 page = find_get_page(mapping, index);
1697 if (unlikely(page == NULL))
1698 goto no_cached_page;
1699 }
1700 if (PageReadahead(page)) {
1701 page_cache_async_readahead(mapping,
1702 ra, filp, page,
1703 index, last_index - index);
1704 }
1705 if (!PageUptodate(page)) {
1706
1707
1708
1709
1710
1711 wait_on_page_locked_killable(page);
1712 if (PageUptodate(page))
1713 goto page_ok;
1714
1715 if (inode->i_blkbits == PAGE_SHIFT ||
1716 !mapping->a_ops->is_partially_uptodate)
1717 goto page_not_up_to_date;
1718 if (!trylock_page(page))
1719 goto page_not_up_to_date;
1720
1721 if (!page->mapping)
1722 goto page_not_up_to_date_locked;
1723 if (!mapping->a_ops->is_partially_uptodate(page,
1724 offset, iter->count))
1725 goto page_not_up_to_date_locked;
1726 unlock_page(page);
1727 }
1728page_ok:
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738 isize = i_size_read(inode);
1739 end_index = (isize - 1) >> PAGE_SHIFT;
1740 if (unlikely(!isize || index > end_index)) {
1741 put_page(page);
1742 goto out;
1743 }
1744
1745
1746 nr = PAGE_SIZE;
1747 if (index == end_index) {
1748 nr = ((isize - 1) & ~PAGE_MASK) + 1;
1749 if (nr <= offset) {
1750 put_page(page);
1751 goto out;
1752 }
1753 }
1754 nr = nr - offset;
1755
1756
1757
1758
1759
1760 if (mapping_writably_mapped(mapping))
1761 flush_dcache_page(page);
1762
1763
1764
1765
1766
1767 if (prev_index != index || offset != prev_offset)
1768 mark_page_accessed(page);
1769 prev_index = index;
1770
1771
1772
1773
1774
1775
1776 ret = copy_page_to_iter(page, offset, nr, iter);
1777 offset += ret;
1778 index += offset >> PAGE_SHIFT;
1779 offset &= ~PAGE_MASK;
1780 prev_offset = offset;
1781
1782 put_page(page);
1783 written += ret;
1784 if (!iov_iter_count(iter))
1785 goto out;
1786 if (ret < nr) {
1787 error = -EFAULT;
1788 goto out;
1789 }
1790 continue;
1791
1792page_not_up_to_date:
1793
1794 error = lock_page_killable(page);
1795 if (unlikely(error))
1796 goto readpage_error;
1797
1798page_not_up_to_date_locked:
1799
1800 if (!page->mapping) {
1801 unlock_page(page);
1802 put_page(page);
1803 continue;
1804 }
1805
1806
1807 if (PageUptodate(page)) {
1808 unlock_page(page);
1809 goto page_ok;
1810 }
1811
1812readpage:
1813
1814
1815
1816
1817
1818 ClearPageError(page);
1819
1820 error = mapping->a_ops->readpage(filp, page);
1821
1822 if (unlikely(error)) {
1823 if (error == AOP_TRUNCATED_PAGE) {
1824 put_page(page);
1825 error = 0;
1826 goto find_page;
1827 }
1828 goto readpage_error;
1829 }
1830
1831 if (!PageUptodate(page)) {
1832 error = lock_page_killable(page);
1833 if (unlikely(error))
1834 goto readpage_error;
1835 if (!PageUptodate(page)) {
1836 if (page->mapping == NULL) {
1837
1838
1839
1840 unlock_page(page);
1841 put_page(page);
1842 goto find_page;
1843 }
1844 unlock_page(page);
1845 shrink_readahead_size_eio(filp, ra);
1846 error = -EIO;
1847 goto readpage_error;
1848 }
1849 unlock_page(page);
1850 }
1851
1852 goto page_ok;
1853
1854readpage_error:
1855
1856 put_page(page);
1857 goto out;
1858
1859no_cached_page:
1860
1861
1862
1863
1864 page = page_cache_alloc_cold(mapping);
1865 if (!page) {
1866 error = -ENOMEM;
1867 goto out;
1868 }
1869 error = add_to_page_cache_lru(page, mapping, index,
1870 mapping_gfp_constraint(mapping, GFP_KERNEL));
1871 if (error) {
1872 put_page(page);
1873 if (error == -EEXIST) {
1874 error = 0;
1875 goto find_page;
1876 }
1877 goto out;
1878 }
1879 goto readpage;
1880 }
1881
1882out:
1883 ra->prev_pos = prev_index;
1884 ra->prev_pos <<= PAGE_SHIFT;
1885 ra->prev_pos |= prev_offset;
1886
1887 *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
1888 file_accessed(filp);
1889 return written ? written : error;
1890}
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900ssize_t
1901generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1902{
1903 struct file *file = iocb->ki_filp;
1904 ssize_t retval = 0;
1905 size_t count = iov_iter_count(iter);
1906
1907 if (!count)
1908 goto out;
1909
1910 if (iocb->ki_flags & IOCB_DIRECT) {
1911 struct address_space *mapping = file->f_mapping;
1912 struct inode *inode = mapping->host;
1913 loff_t size;
1914
1915 size = i_size_read(inode);
1916 retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
1917 iocb->ki_pos + count - 1);
1918 if (!retval) {
1919 struct iov_iter data = *iter;
1920 retval = mapping->a_ops->direct_IO(iocb, &data);
1921 }
1922
1923 if (retval > 0) {
1924 iocb->ki_pos += retval;
1925 iov_iter_advance(iter, retval);
1926 }
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937 if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size ||
1938 IS_DAX(inode)) {
1939 file_accessed(file);
1940 goto out;
1941 }
1942 }
1943
1944 retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval);
1945out:
1946 return retval;
1947}
1948EXPORT_SYMBOL(generic_file_read_iter);
1949
1950#ifdef CONFIG_MMU
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
1961{
1962 struct address_space *mapping = file->f_mapping;
1963 struct page *page;
1964 int ret;
1965
1966 do {
1967 page = __page_cache_alloc(gfp_mask|__GFP_COLD);
1968 if (!page)
1969 return -ENOMEM;
1970
1971 ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL);
1972 if (ret == 0)
1973 ret = mapping->a_ops->readpage(file, page);
1974 else if (ret == -EEXIST)
1975 ret = 0;
1976
1977 put_page(page);
1978
1979 } while (ret == AOP_TRUNCATED_PAGE);
1980
1981 return ret;
1982}
1983
1984#define MMAP_LOTSAMISS (100)
1985
1986
1987
1988
1989
1990static void do_sync_mmap_readahead(struct vm_area_struct *vma,
1991 struct file_ra_state *ra,
1992 struct file *file,
1993 pgoff_t offset)
1994{
1995 struct address_space *mapping = file->f_mapping;
1996
1997
1998 if (vma->vm_flags & VM_RAND_READ)
1999 return;
2000 if (!ra->ra_pages)
2001 return;
2002
2003 if (vma->vm_flags & VM_SEQ_READ) {
2004 page_cache_sync_readahead(mapping, ra, file, offset,
2005 ra->ra_pages);
2006 return;
2007 }
2008
2009
2010 if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
2011 ra->mmap_miss++;
2012
2013
2014
2015
2016
2017 if (ra->mmap_miss > MMAP_LOTSAMISS)
2018 return;
2019
2020
2021
2022
2023 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
2024 ra->size = ra->ra_pages;
2025 ra->async_size = ra->ra_pages / 4;
2026 ra_submit(ra, mapping, file);
2027}
2028
2029
2030
2031
2032
2033static void do_async_mmap_readahead(struct vm_area_struct *vma,
2034 struct file_ra_state *ra,
2035 struct file *file,
2036 struct page *page,
2037 pgoff_t offset)
2038{
2039 struct address_space *mapping = file->f_mapping;
2040
2041
2042 if (vma->vm_flags & VM_RAND_READ)
2043 return;
2044 if (ra->mmap_miss > 0)
2045 ra->mmap_miss--;
2046 if (PageReadahead(page))
2047 page_cache_async_readahead(mapping, ra, file,
2048 page, offset, ra->ra_pages);
2049}
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2076{
2077 int error;
2078 struct file *file = vma->vm_file;
2079 struct address_space *mapping = file->f_mapping;
2080 struct file_ra_state *ra = &file->f_ra;
2081 struct inode *inode = mapping->host;
2082 pgoff_t offset = vmf->pgoff;
2083 struct page *page;
2084 loff_t size;
2085 int ret = 0;
2086
2087 size = round_up(i_size_read(inode), PAGE_SIZE);
2088 if (offset >= size >> PAGE_SHIFT)
2089 return VM_FAULT_SIGBUS;
2090
2091
2092
2093
2094 page = find_get_page(mapping, offset);
2095 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2096
2097
2098
2099
2100 do_async_mmap_readahead(vma, ra, file, page, offset);
2101 } else if (!page) {
2102
2103 do_sync_mmap_readahead(vma, ra, file, offset);
2104 count_vm_event(PGMAJFAULT);
2105 mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
2106 ret = VM_FAULT_MAJOR;
2107retry_find:
2108 page = find_get_page(mapping, offset);
2109 if (!page)
2110 goto no_cached_page;
2111 }
2112
2113 if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
2114 put_page(page);
2115 return ret | VM_FAULT_RETRY;
2116 }
2117
2118
2119 if (unlikely(page->mapping != mapping)) {
2120 unlock_page(page);
2121 put_page(page);
2122 goto retry_find;
2123 }
2124 VM_BUG_ON_PAGE(page->index != offset, page);
2125
2126
2127
2128
2129
2130 if (unlikely(!PageUptodate(page)))
2131 goto page_not_uptodate;
2132
2133
2134
2135
2136
2137 size = round_up(i_size_read(inode), PAGE_SIZE);
2138 if (unlikely(offset >= size >> PAGE_SHIFT)) {
2139 unlock_page(page);
2140 put_page(page);
2141 return VM_FAULT_SIGBUS;
2142 }
2143
2144 vmf->page = page;
2145 return ret | VM_FAULT_LOCKED;
2146
2147no_cached_page:
2148
2149
2150
2151
2152 error = page_cache_read(file, offset, vmf->gfp_mask);
2153
2154
2155
2156
2157
2158
2159 if (error >= 0)
2160 goto retry_find;
2161
2162
2163
2164
2165
2166
2167 if (error == -ENOMEM)
2168 return VM_FAULT_OOM;
2169 return VM_FAULT_SIGBUS;
2170
2171page_not_uptodate:
2172
2173
2174
2175
2176
2177
2178 ClearPageError(page);
2179 error = mapping->a_ops->readpage(file, page);
2180 if (!error) {
2181 wait_on_page_locked(page);
2182 if (!PageUptodate(page))
2183 error = -EIO;
2184 }
2185 put_page(page);
2186
2187 if (!error || error == AOP_TRUNCATED_PAGE)
2188 goto retry_find;
2189
2190
2191 shrink_readahead_size_eio(file, ra);
2192 return VM_FAULT_SIGBUS;
2193}
2194EXPORT_SYMBOL(filemap_fault);
2195
2196void filemap_map_pages(struct fault_env *fe,
2197 pgoff_t start_pgoff, pgoff_t end_pgoff)
2198{
2199 struct radix_tree_iter iter;
2200 void **slot;
2201 struct file *file = fe->vma->vm_file;
2202 struct address_space *mapping = file->f_mapping;
2203 pgoff_t last_pgoff = start_pgoff;
2204 loff_t size;
2205 struct page *head, *page;
2206
2207 rcu_read_lock();
2208 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
2209 start_pgoff) {
2210 if (iter.index > end_pgoff)
2211 break;
2212repeat:
2213 page = radix_tree_deref_slot(slot);
2214 if (unlikely(!page))
2215 goto next;
2216 if (radix_tree_exception(page)) {
2217 if (radix_tree_deref_retry(page)) {
2218 slot = radix_tree_iter_retry(&iter);
2219 continue;
2220 }
2221 goto next;
2222 }
2223
2224 head = compound_head(page);
2225 if (!page_cache_get_speculative(head))
2226 goto repeat;
2227
2228
2229 if (compound_head(page) != head) {
2230 put_page(head);
2231 goto repeat;
2232 }
2233
2234
2235 if (unlikely(page != *slot)) {
2236 put_page(head);
2237 goto repeat;
2238 }
2239
2240 if (!PageUptodate(page) ||
2241 PageReadahead(page) ||
2242 PageHWPoison(page))
2243 goto skip;
2244 if (!trylock_page(page))
2245 goto skip;
2246
2247 if (page->mapping != mapping || !PageUptodate(page))
2248 goto unlock;
2249
2250 size = round_up(i_size_read(mapping->host), PAGE_SIZE);
2251 if (page->index >= size >> PAGE_SHIFT)
2252 goto unlock;
2253
2254 if (file->f_ra.mmap_miss > 0)
2255 file->f_ra.mmap_miss--;
2256
2257 fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
2258 if (fe->pte)
2259 fe->pte += iter.index - last_pgoff;
2260 last_pgoff = iter.index;
2261 if (alloc_set_pte(fe, NULL, page))
2262 goto unlock;
2263 unlock_page(page);
2264 goto next;
2265unlock:
2266 unlock_page(page);
2267skip:
2268 put_page(page);
2269next:
2270
2271 if (pmd_trans_huge(*fe->pmd))
2272 break;
2273 if (iter.index == end_pgoff)
2274 break;
2275 }
2276 rcu_read_unlock();
2277}
2278EXPORT_SYMBOL(filemap_map_pages);
2279
2280int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2281{
2282 struct page *page = vmf->page;
2283 struct inode *inode = file_inode(vma->vm_file);
2284 int ret = VM_FAULT_LOCKED;
2285
2286 sb_start_pagefault(inode->i_sb);
2287 file_update_time(vma->vm_file);
2288 lock_page(page);
2289 if (page->mapping != inode->i_mapping) {
2290 unlock_page(page);
2291 ret = VM_FAULT_NOPAGE;
2292 goto out;
2293 }
2294
2295
2296
2297
2298
2299 set_page_dirty(page);
2300 wait_for_stable_page(page);
2301out:
2302 sb_end_pagefault(inode->i_sb);
2303 return ret;
2304}
2305EXPORT_SYMBOL(filemap_page_mkwrite);
2306
2307const struct vm_operations_struct generic_file_vm_ops = {
2308 .fault = filemap_fault,
2309 .map_pages = filemap_map_pages,
2310 .page_mkwrite = filemap_page_mkwrite,
2311};
2312
2313
2314
2315int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2316{
2317 struct address_space *mapping = file->f_mapping;
2318
2319 if (!mapping->a_ops->readpage)
2320 return -ENOEXEC;
2321 file_accessed(file);
2322 vma->vm_ops = &generic_file_vm_ops;
2323 return 0;
2324}
2325
2326
2327
2328
2329int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
2330{
2331 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2332 return -EINVAL;
2333 return generic_file_mmap(file, vma);
2334}
2335#else
2336int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2337{
2338 return -ENOSYS;
2339}
2340int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
2341{
2342 return -ENOSYS;
2343}
2344#endif
2345
2346EXPORT_SYMBOL(generic_file_mmap);
2347EXPORT_SYMBOL(generic_file_readonly_mmap);
2348
2349static struct page *wait_on_page_read(struct page *page)
2350{
2351 if (!IS_ERR(page)) {
2352 wait_on_page_locked(page);
2353 if (!PageUptodate(page)) {
2354 put_page(page);
2355 page = ERR_PTR(-EIO);
2356 }
2357 }
2358 return page;
2359}
2360
2361static struct page *do_read_cache_page(struct address_space *mapping,
2362 pgoff_t index,
2363 int (*filler)(void *, struct page *),
2364 void *data,
2365 gfp_t gfp)
2366{
2367 struct page *page;
2368 int err;
2369repeat:
2370 page = find_get_page(mapping, index);
2371 if (!page) {
2372 page = __page_cache_alloc(gfp | __GFP_COLD);
2373 if (!page)
2374 return ERR_PTR(-ENOMEM);
2375 err = add_to_page_cache_lru(page, mapping, index, gfp);
2376 if (unlikely(err)) {
2377 put_page(page);
2378 if (err == -EEXIST)
2379 goto repeat;
2380
2381 return ERR_PTR(err);
2382 }
2383
2384filler:
2385 err = filler(data, page);
2386 if (err < 0) {
2387 put_page(page);
2388 return ERR_PTR(err);
2389 }
2390
2391 page = wait_on_page_read(page);
2392 if (IS_ERR(page))
2393 return page;
2394 goto out;
2395 }
2396 if (PageUptodate(page))
2397 goto out;
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430 wait_on_page_locked(page);
2431 if (PageUptodate(page))
2432 goto out;
2433
2434
2435 lock_page(page);
2436
2437
2438 if (!page->mapping) {
2439 unlock_page(page);
2440 put_page(page);
2441 goto repeat;
2442 }
2443
2444
2445 if (PageUptodate(page)) {
2446 unlock_page(page);
2447 goto out;
2448 }
2449 goto filler;
2450
2451out:
2452 mark_page_accessed(page);
2453 return page;
2454}
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468struct page *read_cache_page(struct address_space *mapping,
2469 pgoff_t index,
2470 int (*filler)(void *, struct page *),
2471 void *data)
2472{
2473 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
2474}
2475EXPORT_SYMBOL(read_cache_page);
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488struct page *read_cache_page_gfp(struct address_space *mapping,
2489 pgoff_t index,
2490 gfp_t gfp)
2491{
2492 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
2493
2494 return do_read_cache_page(mapping, index, filler, NULL, gfp);
2495}
2496EXPORT_SYMBOL(read_cache_page_gfp);
2497
2498
2499
2500
2501
2502
2503
2504
2505inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
2506{
2507 struct file *file = iocb->ki_filp;
2508 struct inode *inode = file->f_mapping->host;
2509 unsigned long limit = rlimit(RLIMIT_FSIZE);
2510 loff_t pos;
2511
2512 if (!iov_iter_count(from))
2513 return 0;
2514
2515
2516 if (iocb->ki_flags & IOCB_APPEND)
2517 iocb->ki_pos = i_size_read(inode);
2518
2519 pos = iocb->ki_pos;
2520
2521 if (limit != RLIM_INFINITY) {
2522 if (iocb->ki_pos >= limit) {
2523 send_sig(SIGXFSZ, current, 0);
2524 return -EFBIG;
2525 }
2526 iov_iter_truncate(from, limit - (unsigned long)pos);
2527 }
2528
2529
2530
2531
2532 if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
2533 !(file->f_flags & O_LARGEFILE))) {
2534 if (pos >= MAX_NON_LFS)
2535 return -EFBIG;
2536 iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
2537 }
2538
2539
2540
2541
2542
2543
2544
2545
2546 if (unlikely(pos >= inode->i_sb->s_maxbytes))
2547 return -EFBIG;
2548
2549 iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
2550 return iov_iter_count(from);
2551}
2552EXPORT_SYMBOL(generic_write_checks);
2553
2554int pagecache_write_begin(struct file *file, struct address_space *mapping,
2555 loff_t pos, unsigned len, unsigned flags,
2556 struct page **pagep, void **fsdata)
2557{
2558 const struct address_space_operations *aops = mapping->a_ops;
2559
2560 return aops->write_begin(file, mapping, pos, len, flags,
2561 pagep, fsdata);
2562}
2563EXPORT_SYMBOL(pagecache_write_begin);
2564
2565int pagecache_write_end(struct file *file, struct address_space *mapping,
2566 loff_t pos, unsigned len, unsigned copied,
2567 struct page *page, void *fsdata)
2568{
2569 const struct address_space_operations *aops = mapping->a_ops;
2570
2571 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
2572}
2573EXPORT_SYMBOL(pagecache_write_end);
2574
2575ssize_t
2576generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2577{
2578 struct file *file = iocb->ki_filp;
2579 struct address_space *mapping = file->f_mapping;
2580 struct inode *inode = mapping->host;
2581 loff_t pos = iocb->ki_pos;
2582 ssize_t written;
2583 size_t write_len;
2584 pgoff_t end;
2585 struct iov_iter data;
2586
2587 write_len = iov_iter_count(from);
2588 end = (pos + write_len - 1) >> PAGE_SHIFT;
2589
2590 written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
2591 if (written)
2592 goto out;
2593
2594
2595
2596
2597
2598
2599
2600 if (mapping->nrpages) {
2601 written = invalidate_inode_pages2_range(mapping,
2602 pos >> PAGE_SHIFT, end);
2603
2604
2605
2606
2607 if (written) {
2608 if (written == -EBUSY)
2609 return 0;
2610 goto out;
2611 }
2612 }
2613
2614 data = *from;
2615 written = mapping->a_ops->direct_IO(iocb, &data);
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625 if (mapping->nrpages) {
2626 invalidate_inode_pages2_range(mapping,
2627 pos >> PAGE_SHIFT, end);
2628 }
2629
2630 if (written > 0) {
2631 pos += written;
2632 iov_iter_advance(from, written);
2633 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
2634 i_size_write(inode, pos);
2635 mark_inode_dirty(inode);
2636 }
2637 iocb->ki_pos = pos;
2638 }
2639out:
2640 return written;
2641}
2642EXPORT_SYMBOL(generic_file_direct_write);
2643
2644
2645
2646
2647
2648struct page *grab_cache_page_write_begin(struct address_space *mapping,
2649 pgoff_t index, unsigned flags)
2650{
2651 struct page *page;
2652 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
2653
2654 if (flags & AOP_FLAG_NOFS)
2655 fgp_flags |= FGP_NOFS;
2656
2657 page = pagecache_get_page(mapping, index, fgp_flags,
2658 mapping_gfp_mask(mapping));
2659 if (page)
2660 wait_for_stable_page(page);
2661
2662 return page;
2663}
2664EXPORT_SYMBOL(grab_cache_page_write_begin);
2665
2666ssize_t generic_perform_write(struct file *file,
2667 struct iov_iter *i, loff_t pos)
2668{
2669 struct address_space *mapping = file->f_mapping;
2670 const struct address_space_operations *a_ops = mapping->a_ops;
2671 long status = 0;
2672 ssize_t written = 0;
2673 unsigned int flags = 0;
2674
2675
2676
2677
2678 if (!iter_is_iovec(i))
2679 flags |= AOP_FLAG_UNINTERRUPTIBLE;
2680
2681 do {
2682 struct page *page;
2683 unsigned long offset;
2684 unsigned long bytes;
2685 size_t copied;
2686 void *fsdata;
2687
2688 offset = (pos & (PAGE_SIZE - 1));
2689 bytes = min_t(unsigned long, PAGE_SIZE - offset,
2690 iov_iter_count(i));
2691
2692again:
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
2704 status = -EFAULT;
2705 break;
2706 }
2707
2708 if (fatal_signal_pending(current)) {
2709 status = -EINTR;
2710 break;
2711 }
2712
2713 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
2714 &page, &fsdata);
2715 if (unlikely(status < 0))
2716 break;
2717
2718 if (mapping_writably_mapped(mapping))
2719 flush_dcache_page(page);
2720
2721 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2722 flush_dcache_page(page);
2723
2724 status = a_ops->write_end(file, mapping, pos, bytes, copied,
2725 page, fsdata);
2726 if (unlikely(status < 0))
2727 break;
2728 copied = status;
2729
2730 cond_resched();
2731
2732 iov_iter_advance(i, copied);
2733 if (unlikely(copied == 0)) {
2734
2735
2736
2737
2738
2739
2740
2741
2742 bytes = min_t(unsigned long, PAGE_SIZE - offset,
2743 iov_iter_single_seg_count(i));
2744 goto again;
2745 }
2746 pos += copied;
2747 written += copied;
2748
2749 balance_dirty_pages_ratelimited(mapping);
2750 } while (iov_iter_count(i));
2751
2752 return written ? written : status;
2753}
2754EXPORT_SYMBOL(generic_perform_write);
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2774{
2775 struct file *file = iocb->ki_filp;
2776 struct address_space * mapping = file->f_mapping;
2777 struct inode *inode = mapping->host;
2778 ssize_t written = 0;
2779 ssize_t err;
2780 ssize_t status;
2781
2782
2783 current->backing_dev_info = inode_to_bdi(inode);
2784 err = file_remove_privs(file);
2785 if (err)
2786 goto out;
2787
2788 err = file_update_time(file);
2789 if (err)
2790 goto out;
2791
2792 if (iocb->ki_flags & IOCB_DIRECT) {
2793 loff_t pos, endbyte;
2794
2795 written = generic_file_direct_write(iocb, from);
2796
2797
2798
2799
2800
2801
2802
2803 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
2804 goto out;
2805
2806 status = generic_perform_write(file, from, pos = iocb->ki_pos);
2807
2808
2809
2810
2811
2812
2813
2814 if (unlikely(status < 0)) {
2815 err = status;
2816 goto out;
2817 }
2818
2819
2820
2821
2822
2823 endbyte = pos + status - 1;
2824 err = filemap_write_and_wait_range(mapping, pos, endbyte);
2825 if (err == 0) {
2826 iocb->ki_pos = endbyte + 1;
2827 written += status;
2828 invalidate_mapping_pages(mapping,
2829 pos >> PAGE_SHIFT,
2830 endbyte >> PAGE_SHIFT);
2831 } else {
2832
2833
2834
2835
2836 }
2837 } else {
2838 written = generic_perform_write(file, from, iocb->ki_pos);
2839 if (likely(written > 0))
2840 iocb->ki_pos += written;
2841 }
2842out:
2843 current->backing_dev_info = NULL;
2844 return written ? written : err;
2845}
2846EXPORT_SYMBOL(__generic_file_write_iter);
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2858{
2859 struct file *file = iocb->ki_filp;
2860 struct inode *inode = file->f_mapping->host;
2861 ssize_t ret;
2862
2863 inode_lock(inode);
2864 ret = generic_write_checks(iocb, from);
2865 if (ret > 0)
2866 ret = __generic_file_write_iter(iocb, from);
2867 inode_unlock(inode);
2868
2869 if (ret > 0)
2870 ret = generic_write_sync(iocb, ret);
2871 return ret;
2872}
2873EXPORT_SYMBOL(generic_file_write_iter);
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892int try_to_release_page(struct page *page, gfp_t gfp_mask)
2893{
2894 struct address_space * const mapping = page->mapping;
2895
2896 BUG_ON(!PageLocked(page));
2897 if (PageWriteback(page))
2898 return 0;
2899
2900 if (mapping && mapping->a_ops->releasepage)
2901 return mapping->a_ops->releasepage(page, gfp_mask);
2902 return try_to_free_buffers(page);
2903}
2904
2905EXPORT_SYMBOL(try_to_release_page);
2906