1
2
3
4
5
6
7
8
9
10
11
12#include <linux/export.h>
13#include <linux/compiler.h>
14#include <linux/dax.h>
15#include <linux/fs.h>
16#include <linux/sched/signal.h>
17#include <linux/uaccess.h>
18#include <linux/capability.h>
19#include <linux/kernel_stat.h>
20#include <linux/gfp.h>
21#include <linux/mm.h>
22#include <linux/swap.h>
23#include <linux/mman.h>
24#include <linux/pagemap.h>
25#include <linux/file.h>
26#include <linux/uio.h>
27#include <linux/hash.h>
28#include <linux/writeback.h>
29#include <linux/backing-dev.h>
30#include <linux/pagevec.h>
31#include <linux/blkdev.h>
32#include <linux/security.h>
33#include <linux/cpuset.h>
34#include <linux/hardirq.h>
35#include <linux/hugetlb.h>
36#include <linux/memcontrol.h>
37#include <linux/cleancache.h>
38#include <linux/rmap.h>
39#include "internal.h"
40
41#define CREATE_TRACE_POINTS
42#include <trace/events/filemap.h>
43
44
45
46
47#include <linux/buffer_head.h>
48
49#include <asm/mman.h>
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114static int page_cache_tree_insert(struct address_space *mapping,
115 struct page *page, void **shadowp)
116{
117 struct radix_tree_node *node;
118 void **slot;
119 int error;
120
121 error = __radix_tree_create(&mapping->page_tree, page->index, 0,
122 &node, &slot);
123 if (error)
124 return error;
125 if (*slot) {
126 void *p;
127
128 p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
129 if (!radix_tree_exceptional_entry(p))
130 return -EEXIST;
131
132 mapping->nrexceptional--;
133 if (!dax_mapping(mapping)) {
134 if (shadowp)
135 *shadowp = p;
136 } else {
137
138 WARN_ON_ONCE(p !=
139 dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
140
141 dax_wake_mapping_entry_waiter(mapping, page->index, p,
142 true);
143 }
144 }
145 __radix_tree_replace(&mapping->page_tree, node, slot, page,
146 workingset_update_node, mapping);
147 mapping->nrpages++;
148 return 0;
149}
150
151static void page_cache_tree_delete(struct address_space *mapping,
152 struct page *page, void *shadow)
153{
154 int i, nr;
155
156
157 nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
158
159 VM_BUG_ON_PAGE(!PageLocked(page), page);
160 VM_BUG_ON_PAGE(PageTail(page), page);
161 VM_BUG_ON_PAGE(nr != 1 && shadow, page);
162
163 for (i = 0; i < nr; i++) {
164 struct radix_tree_node *node;
165 void **slot;
166
167 __radix_tree_lookup(&mapping->page_tree, page->index + i,
168 &node, &slot);
169
170 VM_BUG_ON_PAGE(!node && nr != 1, page);
171
172 radix_tree_clear_tags(&mapping->page_tree, node, slot);
173 __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
174 workingset_update_node, mapping);
175 }
176
177 if (shadow) {
178 mapping->nrexceptional += nr;
179
180
181
182
183
184
185 smp_wmb();
186 }
187 mapping->nrpages -= nr;
188}
189
190
191
192
193
194
195void __delete_from_page_cache(struct page *page, void *shadow)
196{
197 struct address_space *mapping = page->mapping;
198 int nr = hpage_nr_pages(page);
199
200 trace_mm_filemap_delete_from_page_cache(page);
201
202
203
204
205
206 if (PageUptodate(page) && PageMappedToDisk(page))
207 cleancache_put_page(page);
208 else
209 cleancache_invalidate_page(mapping, page);
210
211 VM_BUG_ON_PAGE(PageTail(page), page);
212 VM_BUG_ON_PAGE(page_mapped(page), page);
213 if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(page_mapped(page))) {
214 int mapcount;
215
216 pr_alert("BUG: Bad page cache in process %s pfn:%05lx\n",
217 current->comm, page_to_pfn(page));
218 dump_page(page, "still mapped when deleted");
219 dump_stack();
220 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
221
222 mapcount = page_mapcount(page);
223 if (mapping_exiting(mapping) &&
224 page_count(page) >= mapcount + 2) {
225
226
227
228
229
230
231 page_mapcount_reset(page);
232 page_ref_sub(page, mapcount);
233 }
234 }
235
236 page_cache_tree_delete(mapping, page, shadow);
237
238 page->mapping = NULL;
239
240
241
242 if (!PageHuge(page))
243 __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
244 if (PageSwapBacked(page)) {
245 __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
246 if (PageTransHuge(page))
247 __dec_node_page_state(page, NR_SHMEM_THPS);
248 } else {
249 VM_BUG_ON_PAGE(PageTransHuge(page) && !PageHuge(page), page);
250 }
251
252
253
254
255
256
257
258
259
260 if (WARN_ON_ONCE(PageDirty(page)))
261 account_page_cleaned(page, mapping, inode_to_wb(mapping->host));
262}
263
264
265
266
267
268
269
270
271
272void delete_from_page_cache(struct page *page)
273{
274 struct address_space *mapping = page_mapping(page);
275 unsigned long flags;
276 void (*freepage)(struct page *);
277
278 BUG_ON(!PageLocked(page));
279
280 freepage = mapping->a_ops->freepage;
281
282 spin_lock_irqsave(&mapping->tree_lock, flags);
283 __delete_from_page_cache(page, NULL);
284 spin_unlock_irqrestore(&mapping->tree_lock, flags);
285
286 if (freepage)
287 freepage(page);
288
289 if (PageTransHuge(page) && !PageHuge(page)) {
290 page_ref_sub(page, HPAGE_PMD_NR);
291 VM_BUG_ON_PAGE(page_count(page) <= 0, page);
292 } else {
293 put_page(page);
294 }
295}
296EXPORT_SYMBOL(delete_from_page_cache);
297
298int filemap_check_errors(struct address_space *mapping)
299{
300 int ret = 0;
301
302 if (test_bit(AS_ENOSPC, &mapping->flags) &&
303 test_and_clear_bit(AS_ENOSPC, &mapping->flags))
304 ret = -ENOSPC;
305 if (test_bit(AS_EIO, &mapping->flags) &&
306 test_and_clear_bit(AS_EIO, &mapping->flags))
307 ret = -EIO;
308 return ret;
309}
310EXPORT_SYMBOL(filemap_check_errors);
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
328 loff_t end, int sync_mode)
329{
330 int ret;
331 struct writeback_control wbc = {
332 .sync_mode = sync_mode,
333 .nr_to_write = LONG_MAX,
334 .range_start = start,
335 .range_end = end,
336 };
337
338 if (!mapping_cap_writeback_dirty(mapping))
339 return 0;
340
341 wbc_attach_fdatawrite_inode(&wbc, mapping->host);
342 ret = do_writepages(mapping, &wbc);
343 wbc_detach_inode(&wbc);
344 return ret;
345}
346
347static inline int __filemap_fdatawrite(struct address_space *mapping,
348 int sync_mode)
349{
350 return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
351}
352
353int filemap_fdatawrite(struct address_space *mapping)
354{
355 return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
356}
357EXPORT_SYMBOL(filemap_fdatawrite);
358
359int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
360 loff_t end)
361{
362 return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
363}
364EXPORT_SYMBOL(filemap_fdatawrite_range);
365
366
367
368
369
370
371
372
373int filemap_flush(struct address_space *mapping)
374{
375 return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
376}
377EXPORT_SYMBOL(filemap_flush);
378
379static int __filemap_fdatawait_range(struct address_space *mapping,
380 loff_t start_byte, loff_t end_byte)
381{
382 pgoff_t index = start_byte >> PAGE_SHIFT;
383 pgoff_t end = end_byte >> PAGE_SHIFT;
384 struct pagevec pvec;
385 int nr_pages;
386 int ret = 0;
387
388 if (end_byte < start_byte)
389 goto out;
390
391 pagevec_init(&pvec, 0);
392 while ((index <= end) &&
393 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
394 PAGECACHE_TAG_WRITEBACK,
395 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
396 unsigned i;
397
398 for (i = 0; i < nr_pages; i++) {
399 struct page *page = pvec.pages[i];
400
401
402 if (page->index > end)
403 continue;
404
405 wait_on_page_writeback(page);
406 if (TestClearPageError(page))
407 ret = -EIO;
408 }
409 pagevec_release(&pvec);
410 cond_resched();
411 }
412out:
413 return ret;
414}
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
431 loff_t end_byte)
432{
433 int ret, ret2;
434
435 ret = __filemap_fdatawait_range(mapping, start_byte, end_byte);
436 ret2 = filemap_check_errors(mapping);
437 if (!ret)
438 ret = ret2;
439
440 return ret;
441}
442EXPORT_SYMBOL(filemap_fdatawait_range);
443
444
445
446
447
448
449
450
451
452
453
454
455
456void filemap_fdatawait_keep_errors(struct address_space *mapping)
457{
458 loff_t i_size = i_size_read(mapping->host);
459
460 if (i_size == 0)
461 return;
462
463 __filemap_fdatawait_range(mapping, 0, i_size - 1);
464}
465
466
467
468
469
470
471
472
473
474
475
476
477
478int filemap_fdatawait(struct address_space *mapping)
479{
480 loff_t i_size = i_size_read(mapping->host);
481
482 if (i_size == 0)
483 return 0;
484
485 return filemap_fdatawait_range(mapping, 0, i_size - 1);
486}
487EXPORT_SYMBOL(filemap_fdatawait);
488
489int filemap_write_and_wait(struct address_space *mapping)
490{
491 int err = 0;
492
493 if ((!dax_mapping(mapping) && mapping->nrpages) ||
494 (dax_mapping(mapping) && mapping->nrexceptional)) {
495 err = filemap_fdatawrite(mapping);
496
497
498
499
500
501
502 if (err != -EIO) {
503 int err2 = filemap_fdatawait(mapping);
504 if (!err)
505 err = err2;
506 }
507 } else {
508 err = filemap_check_errors(mapping);
509 }
510 return err;
511}
512EXPORT_SYMBOL(filemap_write_and_wait);
513
514
515
516
517
518
519
520
521
522
523
524
525int filemap_write_and_wait_range(struct address_space *mapping,
526 loff_t lstart, loff_t lend)
527{
528 int err = 0;
529
530 if ((!dax_mapping(mapping) && mapping->nrpages) ||
531 (dax_mapping(mapping) && mapping->nrexceptional)) {
532 err = __filemap_fdatawrite_range(mapping, lstart, lend,
533 WB_SYNC_ALL);
534
535 if (err != -EIO) {
536 int err2 = filemap_fdatawait_range(mapping,
537 lstart, lend);
538 if (!err)
539 err = err2;
540 }
541 } else {
542 err = filemap_check_errors(mapping);
543 }
544 return err;
545}
546EXPORT_SYMBOL(filemap_write_and_wait_range);
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
564{
565 int error;
566
567 VM_BUG_ON_PAGE(!PageLocked(old), old);
568 VM_BUG_ON_PAGE(!PageLocked(new), new);
569 VM_BUG_ON_PAGE(new->mapping, new);
570
571 error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
572 if (!error) {
573 struct address_space *mapping = old->mapping;
574 void (*freepage)(struct page *);
575 unsigned long flags;
576
577 pgoff_t offset = old->index;
578 freepage = mapping->a_ops->freepage;
579
580 get_page(new);
581 new->mapping = mapping;
582 new->index = offset;
583
584 spin_lock_irqsave(&mapping->tree_lock, flags);
585 __delete_from_page_cache(old, NULL);
586 error = page_cache_tree_insert(mapping, new, NULL);
587 BUG_ON(error);
588
589
590
591
592 if (!PageHuge(new))
593 __inc_node_page_state(new, NR_FILE_PAGES);
594 if (PageSwapBacked(new))
595 __inc_node_page_state(new, NR_SHMEM);
596 spin_unlock_irqrestore(&mapping->tree_lock, flags);
597 mem_cgroup_migrate(old, new);
598 radix_tree_preload_end();
599 if (freepage)
600 freepage(old);
601 put_page(old);
602 }
603
604 return error;
605}
606EXPORT_SYMBOL_GPL(replace_page_cache_page);
607
608static int __add_to_page_cache_locked(struct page *page,
609 struct address_space *mapping,
610 pgoff_t offset, gfp_t gfp_mask,
611 void **shadowp)
612{
613 int huge = PageHuge(page);
614 struct mem_cgroup *memcg;
615 int error;
616
617 VM_BUG_ON_PAGE(!PageLocked(page), page);
618 VM_BUG_ON_PAGE(PageSwapBacked(page), page);
619
620 if (!huge) {
621 error = mem_cgroup_try_charge(page, current->mm,
622 gfp_mask, &memcg, false);
623 if (error)
624 return error;
625 }
626
627 error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
628 if (error) {
629 if (!huge)
630 mem_cgroup_cancel_charge(page, memcg, false);
631 return error;
632 }
633
634 get_page(page);
635 page->mapping = mapping;
636 page->index = offset;
637
638 spin_lock_irq(&mapping->tree_lock);
639 error = page_cache_tree_insert(mapping, page, shadowp);
640 radix_tree_preload_end();
641 if (unlikely(error))
642 goto err_insert;
643
644
645 if (!huge)
646 __inc_node_page_state(page, NR_FILE_PAGES);
647 spin_unlock_irq(&mapping->tree_lock);
648 if (!huge)
649 mem_cgroup_commit_charge(page, memcg, false, false);
650 trace_mm_filemap_add_to_page_cache(page);
651 return 0;
652err_insert:
653 page->mapping = NULL;
654
655 spin_unlock_irq(&mapping->tree_lock);
656 if (!huge)
657 mem_cgroup_cancel_charge(page, memcg, false);
658 put_page(page);
659 return error;
660}
661
662
663
664
665
666
667
668
669
670
671
672int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
673 pgoff_t offset, gfp_t gfp_mask)
674{
675 return __add_to_page_cache_locked(page, mapping, offset,
676 gfp_mask, NULL);
677}
678EXPORT_SYMBOL(add_to_page_cache_locked);
679
680int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
681 pgoff_t offset, gfp_t gfp_mask)
682{
683 void *shadow = NULL;
684 int ret;
685
686 __SetPageLocked(page);
687 ret = __add_to_page_cache_locked(page, mapping, offset,
688 gfp_mask, &shadow);
689 if (unlikely(ret))
690 __ClearPageLocked(page);
691 else {
692
693
694
695
696
697
698
699
700 if (!(gfp_mask & __GFP_WRITE) &&
701 shadow && workingset_refault(shadow)) {
702 SetPageActive(page);
703 workingset_activation(page);
704 } else
705 ClearPageActive(page);
706 lru_cache_add(page);
707 }
708 return ret;
709}
710EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
711
712#ifdef CONFIG_NUMA
713struct page *__page_cache_alloc(gfp_t gfp)
714{
715 int n;
716 struct page *page;
717
718 if (cpuset_do_page_mem_spread()) {
719 unsigned int cpuset_mems_cookie;
720 do {
721 cpuset_mems_cookie = read_mems_allowed_begin();
722 n = cpuset_mem_spread_node();
723 page = __alloc_pages_node(n, gfp, 0);
724 } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
725
726 return page;
727 }
728 return alloc_pages(gfp, 0);
729}
730EXPORT_SYMBOL(__page_cache_alloc);
731#endif
732
733
734
735
736
737
738
739
740
741
742
743#define PAGE_WAIT_TABLE_BITS 8
744#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
745static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
746
747static wait_queue_head_t *page_waitqueue(struct page *page)
748{
749 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
750}
751
752void __init pagecache_init(void)
753{
754 int i;
755
756 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
757 init_waitqueue_head(&page_wait_table[i]);
758
759 page_writeback_init();
760}
761
762struct wait_page_key {
763 struct page *page;
764 int bit_nr;
765 int page_match;
766};
767
768struct wait_page_queue {
769 struct page *page;
770 int bit_nr;
771 wait_queue_t wait;
772};
773
774static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
775{
776 struct wait_page_key *key = arg;
777 struct wait_page_queue *wait_page
778 = container_of(wait, struct wait_page_queue, wait);
779
780 if (wait_page->page != key->page)
781 return 0;
782 key->page_match = 1;
783
784 if (wait_page->bit_nr != key->bit_nr)
785 return 0;
786 if (test_bit(key->bit_nr, &key->page->flags))
787 return 0;
788
789 return autoremove_wake_function(wait, mode, sync, key);
790}
791
792static void wake_up_page_bit(struct page *page, int bit_nr)
793{
794 wait_queue_head_t *q = page_waitqueue(page);
795 struct wait_page_key key;
796 unsigned long flags;
797
798 key.page = page;
799 key.bit_nr = bit_nr;
800 key.page_match = 0;
801
802 spin_lock_irqsave(&q->lock, flags);
803 __wake_up_locked_key(q, TASK_NORMAL, &key);
804
805
806
807
808
809
810
811
812
813 if (!waitqueue_active(q) || !key.page_match) {
814 ClearPageWaiters(page);
815
816
817
818
819
820
821
822 }
823 spin_unlock_irqrestore(&q->lock, flags);
824}
825
826static void wake_up_page(struct page *page, int bit)
827{
828 if (!PageWaiters(page))
829 return;
830 wake_up_page_bit(page, bit);
831}
832
833static inline int wait_on_page_bit_common(wait_queue_head_t *q,
834 struct page *page, int bit_nr, int state, bool lock)
835{
836 struct wait_page_queue wait_page;
837 wait_queue_t *wait = &wait_page.wait;
838 int ret = 0;
839
840 init_wait(wait);
841 wait->func = wake_page_function;
842 wait_page.page = page;
843 wait_page.bit_nr = bit_nr;
844
845 for (;;) {
846 spin_lock_irq(&q->lock);
847
848 if (likely(list_empty(&wait->task_list))) {
849 if (lock)
850 __add_wait_queue_tail_exclusive(q, wait);
851 else
852 __add_wait_queue(q, wait);
853 SetPageWaiters(page);
854 }
855
856 set_current_state(state);
857
858 spin_unlock_irq(&q->lock);
859
860 if (likely(test_bit(bit_nr, &page->flags))) {
861 io_schedule();
862 if (unlikely(signal_pending_state(state, current))) {
863 ret = -EINTR;
864 break;
865 }
866 }
867
868 if (lock) {
869 if (!test_and_set_bit_lock(bit_nr, &page->flags))
870 break;
871 } else {
872 if (!test_bit(bit_nr, &page->flags))
873 break;
874 }
875 }
876
877 finish_wait(q, wait);
878
879
880
881
882
883
884
885
886
887 return ret;
888}
889
890void wait_on_page_bit(struct page *page, int bit_nr)
891{
892 wait_queue_head_t *q = page_waitqueue(page);
893 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
894}
895EXPORT_SYMBOL(wait_on_page_bit);
896
897int wait_on_page_bit_killable(struct page *page, int bit_nr)
898{
899 wait_queue_head_t *q = page_waitqueue(page);
900 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
901}
902
903
904
905
906
907
908
909
910void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
911{
912 wait_queue_head_t *q = page_waitqueue(page);
913 unsigned long flags;
914
915 spin_lock_irqsave(&q->lock, flags);
916 __add_wait_queue(q, waiter);
917 SetPageWaiters(page);
918 spin_unlock_irqrestore(&q->lock, flags);
919}
920EXPORT_SYMBOL_GPL(add_page_wait_queue);
921
922#ifndef clear_bit_unlock_is_negative_byte
923
924
925
926
927
928
929
930
931
932
933
934
935
936static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
937{
938 clear_bit_unlock(nr, mem);
939
940 return test_bit(PG_waiters, mem);
941}
942
943#endif
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960void unlock_page(struct page *page)
961{
962 BUILD_BUG_ON(PG_waiters != 7);
963 page = compound_head(page);
964 VM_BUG_ON_PAGE(!PageLocked(page), page);
965 if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
966 wake_up_page_bit(page, PG_locked);
967}
968EXPORT_SYMBOL(unlock_page);
969
970
971
972
973
974void end_page_writeback(struct page *page)
975{
976
977
978
979
980
981
982
983 if (PageReclaim(page)) {
984 ClearPageReclaim(page);
985 rotate_reclaimable_page(page);
986 }
987
988 if (!test_clear_page_writeback(page))
989 BUG();
990
991 smp_mb__after_atomic();
992 wake_up_page(page, PG_writeback);
993}
994EXPORT_SYMBOL(end_page_writeback);
995
996
997
998
999
1000void page_endio(struct page *page, bool is_write, int err)
1001{
1002 if (!is_write) {
1003 if (!err) {
1004 SetPageUptodate(page);
1005 } else {
1006 ClearPageUptodate(page);
1007 SetPageError(page);
1008 }
1009 unlock_page(page);
1010 } else {
1011 if (err) {
1012 struct address_space *mapping;
1013
1014 SetPageError(page);
1015 mapping = page_mapping(page);
1016 if (mapping)
1017 mapping_set_error(mapping, err);
1018 }
1019 end_page_writeback(page);
1020 }
1021}
1022EXPORT_SYMBOL_GPL(page_endio);
1023
1024
1025
1026
1027
1028void __lock_page(struct page *__page)
1029{
1030 struct page *page = compound_head(__page);
1031 wait_queue_head_t *q = page_waitqueue(page);
1032 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
1033}
1034EXPORT_SYMBOL(__lock_page);
1035
1036int __lock_page_killable(struct page *__page)
1037{
1038 struct page *page = compound_head(__page);
1039 wait_queue_head_t *q = page_waitqueue(page);
1040 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
1041}
1042EXPORT_SYMBOL_GPL(__lock_page_killable);
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
1056 unsigned int flags)
1057{
1058 if (flags & FAULT_FLAG_ALLOW_RETRY) {
1059
1060
1061
1062
1063 if (flags & FAULT_FLAG_RETRY_NOWAIT)
1064 return 0;
1065
1066 up_read(&mm->mmap_sem);
1067 if (flags & FAULT_FLAG_KILLABLE)
1068 wait_on_page_locked_killable(page);
1069 else
1070 wait_on_page_locked(page);
1071 return 0;
1072 } else {
1073 if (flags & FAULT_FLAG_KILLABLE) {
1074 int ret;
1075
1076 ret = __lock_page_killable(page);
1077 if (ret) {
1078 up_read(&mm->mmap_sem);
1079 return 0;
1080 }
1081 } else
1082 __lock_page(page);
1083 return 1;
1084 }
1085}
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108pgoff_t page_cache_next_hole(struct address_space *mapping,
1109 pgoff_t index, unsigned long max_scan)
1110{
1111 unsigned long i;
1112
1113 for (i = 0; i < max_scan; i++) {
1114 struct page *page;
1115
1116 page = radix_tree_lookup(&mapping->page_tree, index);
1117 if (!page || radix_tree_exceptional_entry(page))
1118 break;
1119 index++;
1120 if (index == 0)
1121 break;
1122 }
1123
1124 return index;
1125}
1126EXPORT_SYMBOL(page_cache_next_hole);
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149pgoff_t page_cache_prev_hole(struct address_space *mapping,
1150 pgoff_t index, unsigned long max_scan)
1151{
1152 unsigned long i;
1153
1154 for (i = 0; i < max_scan; i++) {
1155 struct page *page;
1156
1157 page = radix_tree_lookup(&mapping->page_tree, index);
1158 if (!page || radix_tree_exceptional_entry(page))
1159 break;
1160 index--;
1161 if (index == ULONG_MAX)
1162 break;
1163 }
1164
1165 return index;
1166}
1167EXPORT_SYMBOL(page_cache_prev_hole);
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
1183{
1184 void **pagep;
1185 struct page *head, *page;
1186
1187 rcu_read_lock();
1188repeat:
1189 page = NULL;
1190 pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
1191 if (pagep) {
1192 page = radix_tree_deref_slot(pagep);
1193 if (unlikely(!page))
1194 goto out;
1195 if (radix_tree_exception(page)) {
1196 if (radix_tree_deref_retry(page))
1197 goto repeat;
1198
1199
1200
1201
1202
1203 goto out;
1204 }
1205
1206 head = compound_head(page);
1207 if (!page_cache_get_speculative(head))
1208 goto repeat;
1209
1210
1211 if (compound_head(page) != head) {
1212 put_page(head);
1213 goto repeat;
1214 }
1215
1216
1217
1218
1219
1220
1221 if (unlikely(page != *pagep)) {
1222 put_page(head);
1223 goto repeat;
1224 }
1225 }
1226out:
1227 rcu_read_unlock();
1228
1229 return page;
1230}
1231EXPORT_SYMBOL(find_get_entry);
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
1250{
1251 struct page *page;
1252
1253repeat:
1254 page = find_get_entry(mapping, offset);
1255 if (page && !radix_tree_exception(page)) {
1256 lock_page(page);
1257
1258 if (unlikely(page_mapping(page) != mapping)) {
1259 unlock_page(page);
1260 put_page(page);
1261 goto repeat;
1262 }
1263 VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
1264 }
1265 return page;
1266}
1267EXPORT_SYMBOL(find_lock_entry);
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
1293 int fgp_flags, gfp_t gfp_mask)
1294{
1295 struct page *page;
1296
1297repeat:
1298 page = find_get_entry(mapping, offset);
1299 if (radix_tree_exceptional_entry(page))
1300 page = NULL;
1301 if (!page)
1302 goto no_page;
1303
1304 if (fgp_flags & FGP_LOCK) {
1305 if (fgp_flags & FGP_NOWAIT) {
1306 if (!trylock_page(page)) {
1307 put_page(page);
1308 return NULL;
1309 }
1310 } else {
1311 lock_page(page);
1312 }
1313
1314
1315 if (unlikely(page->mapping != mapping)) {
1316 unlock_page(page);
1317 put_page(page);
1318 goto repeat;
1319 }
1320 VM_BUG_ON_PAGE(page->index != offset, page);
1321 }
1322
1323 if (page && (fgp_flags & FGP_ACCESSED))
1324 mark_page_accessed(page);
1325
1326no_page:
1327 if (!page && (fgp_flags & FGP_CREAT)) {
1328 int err;
1329 if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
1330 gfp_mask |= __GFP_WRITE;
1331 if (fgp_flags & FGP_NOFS)
1332 gfp_mask &= ~__GFP_FS;
1333
1334 page = __page_cache_alloc(gfp_mask);
1335 if (!page)
1336 return NULL;
1337
1338 if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
1339 fgp_flags |= FGP_LOCK;
1340
1341
1342 if (fgp_flags & FGP_ACCESSED)
1343 __SetPageReferenced(page);
1344
1345 err = add_to_page_cache_lru(page, mapping, offset,
1346 gfp_mask & GFP_RECLAIM_MASK);
1347 if (unlikely(err)) {
1348 put_page(page);
1349 page = NULL;
1350 if (err == -EEXIST)
1351 goto repeat;
1352 }
1353 }
1354
1355 return page;
1356}
1357EXPORT_SYMBOL(pagecache_get_page);
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382unsigned find_get_entries(struct address_space *mapping,
1383 pgoff_t start, unsigned int nr_entries,
1384 struct page **entries, pgoff_t *indices)
1385{
1386 void **slot;
1387 unsigned int ret = 0;
1388 struct radix_tree_iter iter;
1389
1390 if (!nr_entries)
1391 return 0;
1392
1393 rcu_read_lock();
1394 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1395 struct page *head, *page;
1396repeat:
1397 page = radix_tree_deref_slot(slot);
1398 if (unlikely(!page))
1399 continue;
1400 if (radix_tree_exception(page)) {
1401 if (radix_tree_deref_retry(page)) {
1402 slot = radix_tree_iter_retry(&iter);
1403 continue;
1404 }
1405
1406
1407
1408
1409
1410 goto export;
1411 }
1412
1413 head = compound_head(page);
1414 if (!page_cache_get_speculative(head))
1415 goto repeat;
1416
1417
1418 if (compound_head(page) != head) {
1419 put_page(head);
1420 goto repeat;
1421 }
1422
1423
1424 if (unlikely(page != *slot)) {
1425 put_page(head);
1426 goto repeat;
1427 }
1428export:
1429 indices[ret] = iter.index;
1430 entries[ret] = page;
1431 if (++ret == nr_entries)
1432 break;
1433 }
1434 rcu_read_unlock();
1435 return ret;
1436}
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
1455 unsigned int nr_pages, struct page **pages)
1456{
1457 struct radix_tree_iter iter;
1458 void **slot;
1459 unsigned ret = 0;
1460
1461 if (unlikely(!nr_pages))
1462 return 0;
1463
1464 rcu_read_lock();
1465 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
1466 struct page *head, *page;
1467repeat:
1468 page = radix_tree_deref_slot(slot);
1469 if (unlikely(!page))
1470 continue;
1471
1472 if (radix_tree_exception(page)) {
1473 if (radix_tree_deref_retry(page)) {
1474 slot = radix_tree_iter_retry(&iter);
1475 continue;
1476 }
1477
1478
1479
1480
1481
1482 continue;
1483 }
1484
1485 head = compound_head(page);
1486 if (!page_cache_get_speculative(head))
1487 goto repeat;
1488
1489
1490 if (compound_head(page) != head) {
1491 put_page(head);
1492 goto repeat;
1493 }
1494
1495
1496 if (unlikely(page != *slot)) {
1497 put_page(head);
1498 goto repeat;
1499 }
1500
1501 pages[ret] = page;
1502 if (++ret == nr_pages)
1503 break;
1504 }
1505
1506 rcu_read_unlock();
1507 return ret;
1508}
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
1523 unsigned int nr_pages, struct page **pages)
1524{
1525 struct radix_tree_iter iter;
1526 void **slot;
1527 unsigned int ret = 0;
1528
1529 if (unlikely(!nr_pages))
1530 return 0;
1531
1532 rcu_read_lock();
1533 radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
1534 struct page *head, *page;
1535repeat:
1536 page = radix_tree_deref_slot(slot);
1537
1538 if (unlikely(!page))
1539 break;
1540
1541 if (radix_tree_exception(page)) {
1542 if (radix_tree_deref_retry(page)) {
1543 slot = radix_tree_iter_retry(&iter);
1544 continue;
1545 }
1546
1547
1548
1549
1550
1551 break;
1552 }
1553
1554 head = compound_head(page);
1555 if (!page_cache_get_speculative(head))
1556 goto repeat;
1557
1558
1559 if (compound_head(page) != head) {
1560 put_page(head);
1561 goto repeat;
1562 }
1563
1564
1565 if (unlikely(page != *slot)) {
1566 put_page(head);
1567 goto repeat;
1568 }
1569
1570
1571
1572
1573
1574
1575 if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
1576 put_page(page);
1577 break;
1578 }
1579
1580 pages[ret] = page;
1581 if (++ret == nr_pages)
1582 break;
1583 }
1584 rcu_read_unlock();
1585 return ret;
1586}
1587EXPORT_SYMBOL(find_get_pages_contig);
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
1601 int tag, unsigned int nr_pages, struct page **pages)
1602{
1603 struct radix_tree_iter iter;
1604 void **slot;
1605 unsigned ret = 0;
1606
1607 if (unlikely(!nr_pages))
1608 return 0;
1609
1610 rcu_read_lock();
1611 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1612 &iter, *index, tag) {
1613 struct page *head, *page;
1614repeat:
1615 page = radix_tree_deref_slot(slot);
1616 if (unlikely(!page))
1617 continue;
1618
1619 if (radix_tree_exception(page)) {
1620 if (radix_tree_deref_retry(page)) {
1621 slot = radix_tree_iter_retry(&iter);
1622 continue;
1623 }
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635 continue;
1636 }
1637
1638 head = compound_head(page);
1639 if (!page_cache_get_speculative(head))
1640 goto repeat;
1641
1642
1643 if (compound_head(page) != head) {
1644 put_page(head);
1645 goto repeat;
1646 }
1647
1648
1649 if (unlikely(page != *slot)) {
1650 put_page(head);
1651 goto repeat;
1652 }
1653
1654 pages[ret] = page;
1655 if (++ret == nr_pages)
1656 break;
1657 }
1658
1659 rcu_read_unlock();
1660
1661 if (ret)
1662 *index = pages[ret - 1]->index + 1;
1663
1664 return ret;
1665}
1666EXPORT_SYMBOL(find_get_pages_tag);
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
1681 int tag, unsigned int nr_entries,
1682 struct page **entries, pgoff_t *indices)
1683{
1684 void **slot;
1685 unsigned int ret = 0;
1686 struct radix_tree_iter iter;
1687
1688 if (!nr_entries)
1689 return 0;
1690
1691 rcu_read_lock();
1692 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1693 &iter, start, tag) {
1694 struct page *head, *page;
1695repeat:
1696 page = radix_tree_deref_slot(slot);
1697 if (unlikely(!page))
1698 continue;
1699 if (radix_tree_exception(page)) {
1700 if (radix_tree_deref_retry(page)) {
1701 slot = radix_tree_iter_retry(&iter);
1702 continue;
1703 }
1704
1705
1706
1707
1708
1709
1710 goto export;
1711 }
1712
1713 head = compound_head(page);
1714 if (!page_cache_get_speculative(head))
1715 goto repeat;
1716
1717
1718 if (compound_head(page) != head) {
1719 put_page(head);
1720 goto repeat;
1721 }
1722
1723
1724 if (unlikely(page != *slot)) {
1725 put_page(head);
1726 goto repeat;
1727 }
1728export:
1729 indices[ret] = iter.index;
1730 entries[ret] = page;
1731 if (++ret == nr_entries)
1732 break;
1733 }
1734 rcu_read_unlock();
1735 return ret;
1736}
1737EXPORT_SYMBOL(find_get_entries_tag);
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754static void shrink_readahead_size_eio(struct file *filp,
1755 struct file_ra_state *ra)
1756{
1757 ra->ra_pages /= 4;
1758}
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
1774 struct iov_iter *iter, ssize_t written)
1775{
1776 struct address_space *mapping = filp->f_mapping;
1777 struct inode *inode = mapping->host;
1778 struct file_ra_state *ra = &filp->f_ra;
1779 pgoff_t index;
1780 pgoff_t last_index;
1781 pgoff_t prev_index;
1782 unsigned long offset;
1783 unsigned int prev_offset;
1784 int error = 0;
1785
1786 if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
1787 return 0;
1788 iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
1789
1790 index = *ppos >> PAGE_SHIFT;
1791 prev_index = ra->prev_pos >> PAGE_SHIFT;
1792 prev_offset = ra->prev_pos & (PAGE_SIZE-1);
1793 last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
1794 offset = *ppos & ~PAGE_MASK;
1795
1796 for (;;) {
1797 struct page *page;
1798 pgoff_t end_index;
1799 loff_t isize;
1800 unsigned long nr, ret;
1801
1802 cond_resched();
1803find_page:
1804 if (fatal_signal_pending(current)) {
1805 error = -EINTR;
1806 goto out;
1807 }
1808
1809 page = find_get_page(mapping, index);
1810 if (!page) {
1811 page_cache_sync_readahead(mapping,
1812 ra, filp,
1813 index, last_index - index);
1814 page = find_get_page(mapping, index);
1815 if (unlikely(page == NULL))
1816 goto no_cached_page;
1817 }
1818 if (PageReadahead(page)) {
1819 page_cache_async_readahead(mapping,
1820 ra, filp, page,
1821 index, last_index - index);
1822 }
1823 if (!PageUptodate(page)) {
1824
1825
1826
1827
1828
1829 error = wait_on_page_locked_killable(page);
1830 if (unlikely(error))
1831 goto readpage_error;
1832 if (PageUptodate(page))
1833 goto page_ok;
1834
1835 if (inode->i_blkbits == PAGE_SHIFT ||
1836 !mapping->a_ops->is_partially_uptodate)
1837 goto page_not_up_to_date;
1838
1839 if (unlikely(iter->type & ITER_PIPE))
1840 goto page_not_up_to_date;
1841 if (!trylock_page(page))
1842 goto page_not_up_to_date;
1843
1844 if (!page->mapping)
1845 goto page_not_up_to_date_locked;
1846 if (!mapping->a_ops->is_partially_uptodate(page,
1847 offset, iter->count))
1848 goto page_not_up_to_date_locked;
1849 unlock_page(page);
1850 }
1851page_ok:
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861 isize = i_size_read(inode);
1862 end_index = (isize - 1) >> PAGE_SHIFT;
1863 if (unlikely(!isize || index > end_index)) {
1864 put_page(page);
1865 goto out;
1866 }
1867
1868
1869 nr = PAGE_SIZE;
1870 if (index == end_index) {
1871 nr = ((isize - 1) & ~PAGE_MASK) + 1;
1872 if (nr <= offset) {
1873 put_page(page);
1874 goto out;
1875 }
1876 }
1877 nr = nr - offset;
1878
1879
1880
1881
1882
1883 if (mapping_writably_mapped(mapping))
1884 flush_dcache_page(page);
1885
1886
1887
1888
1889
1890 if (prev_index != index || offset != prev_offset)
1891 mark_page_accessed(page);
1892 prev_index = index;
1893
1894
1895
1896
1897
1898
1899 ret = copy_page_to_iter(page, offset, nr, iter);
1900 offset += ret;
1901 index += offset >> PAGE_SHIFT;
1902 offset &= ~PAGE_MASK;
1903 prev_offset = offset;
1904
1905 put_page(page);
1906 written += ret;
1907 if (!iov_iter_count(iter))
1908 goto out;
1909 if (ret < nr) {
1910 error = -EFAULT;
1911 goto out;
1912 }
1913 continue;
1914
1915page_not_up_to_date:
1916
1917 error = lock_page_killable(page);
1918 if (unlikely(error))
1919 goto readpage_error;
1920
1921page_not_up_to_date_locked:
1922
1923 if (!page->mapping) {
1924 unlock_page(page);
1925 put_page(page);
1926 continue;
1927 }
1928
1929
1930 if (PageUptodate(page)) {
1931 unlock_page(page);
1932 goto page_ok;
1933 }
1934
1935readpage:
1936
1937
1938
1939
1940
1941 ClearPageError(page);
1942
1943 error = mapping->a_ops->readpage(filp, page);
1944
1945 if (unlikely(error)) {
1946 if (error == AOP_TRUNCATED_PAGE) {
1947 put_page(page);
1948 error = 0;
1949 goto find_page;
1950 }
1951 goto readpage_error;
1952 }
1953
1954 if (!PageUptodate(page)) {
1955 error = lock_page_killable(page);
1956 if (unlikely(error))
1957 goto readpage_error;
1958 if (!PageUptodate(page)) {
1959 if (page->mapping == NULL) {
1960
1961
1962
1963 unlock_page(page);
1964 put_page(page);
1965 goto find_page;
1966 }
1967 unlock_page(page);
1968 shrink_readahead_size_eio(filp, ra);
1969 error = -EIO;
1970 goto readpage_error;
1971 }
1972 unlock_page(page);
1973 }
1974
1975 goto page_ok;
1976
1977readpage_error:
1978
1979 put_page(page);
1980 goto out;
1981
1982no_cached_page:
1983
1984
1985
1986
1987 page = page_cache_alloc_cold(mapping);
1988 if (!page) {
1989 error = -ENOMEM;
1990 goto out;
1991 }
1992 error = add_to_page_cache_lru(page, mapping, index,
1993 mapping_gfp_constraint(mapping, GFP_KERNEL));
1994 if (error) {
1995 put_page(page);
1996 if (error == -EEXIST) {
1997 error = 0;
1998 goto find_page;
1999 }
2000 goto out;
2001 }
2002 goto readpage;
2003 }
2004
2005out:
2006 ra->prev_pos = prev_index;
2007 ra->prev_pos <<= PAGE_SHIFT;
2008 ra->prev_pos |= prev_offset;
2009
2010 *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
2011 file_accessed(filp);
2012 return written ? written : error;
2013}
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023ssize_t
2024generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2025{
2026 struct file *file = iocb->ki_filp;
2027 ssize_t retval = 0;
2028 size_t count = iov_iter_count(iter);
2029
2030 if (!count)
2031 goto out;
2032
2033 if (iocb->ki_flags & IOCB_DIRECT) {
2034 struct address_space *mapping = file->f_mapping;
2035 struct inode *inode = mapping->host;
2036 struct iov_iter data = *iter;
2037 loff_t size;
2038
2039 size = i_size_read(inode);
2040 retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
2041 iocb->ki_pos + count - 1);
2042 if (retval < 0)
2043 goto out;
2044
2045 file_accessed(file);
2046
2047 retval = mapping->a_ops->direct_IO(iocb, &data);
2048 if (retval >= 0) {
2049 iocb->ki_pos += retval;
2050 iov_iter_advance(iter, retval);
2051 }
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062 if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size ||
2063 IS_DAX(inode))
2064 goto out;
2065 }
2066
2067 retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval);
2068out:
2069 return retval;
2070}
2071EXPORT_SYMBOL(generic_file_read_iter);
2072
2073#ifdef CONFIG_MMU
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
2084{
2085 struct address_space *mapping = file->f_mapping;
2086 struct page *page;
2087 int ret;
2088
2089 do {
2090 page = __page_cache_alloc(gfp_mask|__GFP_COLD);
2091 if (!page)
2092 return -ENOMEM;
2093
2094 ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask & GFP_KERNEL);
2095 if (ret == 0)
2096 ret = mapping->a_ops->readpage(file, page);
2097 else if (ret == -EEXIST)
2098 ret = 0;
2099
2100 put_page(page);
2101
2102 } while (ret == AOP_TRUNCATED_PAGE);
2103
2104 return ret;
2105}
2106
2107#define MMAP_LOTSAMISS (100)
2108
2109
2110
2111
2112
2113static void do_sync_mmap_readahead(struct vm_area_struct *vma,
2114 struct file_ra_state *ra,
2115 struct file *file,
2116 pgoff_t offset)
2117{
2118 struct address_space *mapping = file->f_mapping;
2119
2120
2121 if (vma->vm_flags & VM_RAND_READ)
2122 return;
2123 if (!ra->ra_pages)
2124 return;
2125
2126 if (vma->vm_flags & VM_SEQ_READ) {
2127 page_cache_sync_readahead(mapping, ra, file, offset,
2128 ra->ra_pages);
2129 return;
2130 }
2131
2132
2133 if (ra->mmap_miss < MMAP_LOTSAMISS * 10)
2134 ra->mmap_miss++;
2135
2136
2137
2138
2139
2140 if (ra->mmap_miss > MMAP_LOTSAMISS)
2141 return;
2142
2143
2144
2145
2146 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
2147 ra->size = ra->ra_pages;
2148 ra->async_size = ra->ra_pages / 4;
2149 ra_submit(ra, mapping, file);
2150}
2151
2152
2153
2154
2155
2156static void do_async_mmap_readahead(struct vm_area_struct *vma,
2157 struct file_ra_state *ra,
2158 struct file *file,
2159 struct page *page,
2160 pgoff_t offset)
2161{
2162 struct address_space *mapping = file->f_mapping;
2163
2164
2165 if (vma->vm_flags & VM_RAND_READ)
2166 return;
2167 if (ra->mmap_miss > 0)
2168 ra->mmap_miss--;
2169 if (PageReadahead(page))
2170 page_cache_async_readahead(mapping, ra, file,
2171 page, offset, ra->ra_pages);
2172}
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197int filemap_fault(struct vm_fault *vmf)
2198{
2199 int error;
2200 struct file *file = vmf->vma->vm_file;
2201 struct address_space *mapping = file->f_mapping;
2202 struct file_ra_state *ra = &file->f_ra;
2203 struct inode *inode = mapping->host;
2204 pgoff_t offset = vmf->pgoff;
2205 struct page *page;
2206 loff_t size;
2207 int ret = 0;
2208
2209 size = round_up(i_size_read(inode), PAGE_SIZE);
2210 if (offset >= size >> PAGE_SHIFT)
2211 return VM_FAULT_SIGBUS;
2212
2213
2214
2215
2216 page = find_get_page(mapping, offset);
2217 if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
2218
2219
2220
2221
2222 do_async_mmap_readahead(vmf->vma, ra, file, page, offset);
2223 } else if (!page) {
2224
2225 do_sync_mmap_readahead(vmf->vma, ra, file, offset);
2226 count_vm_event(PGMAJFAULT);
2227 mem_cgroup_count_vm_event(vmf->vma->vm_mm, PGMAJFAULT);
2228 ret = VM_FAULT_MAJOR;
2229retry_find:
2230 page = find_get_page(mapping, offset);
2231 if (!page)
2232 goto no_cached_page;
2233 }
2234
2235 if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
2236 put_page(page);
2237 return ret | VM_FAULT_RETRY;
2238 }
2239
2240
2241 if (unlikely(page->mapping != mapping)) {
2242 unlock_page(page);
2243 put_page(page);
2244 goto retry_find;
2245 }
2246 VM_BUG_ON_PAGE(page->index != offset, page);
2247
2248
2249
2250
2251
2252 if (unlikely(!PageUptodate(page)))
2253 goto page_not_uptodate;
2254
2255
2256
2257
2258
2259 size = round_up(i_size_read(inode), PAGE_SIZE);
2260 if (unlikely(offset >= size >> PAGE_SHIFT)) {
2261 unlock_page(page);
2262 put_page(page);
2263 return VM_FAULT_SIGBUS;
2264 }
2265
2266 vmf->page = page;
2267 return ret | VM_FAULT_LOCKED;
2268
2269no_cached_page:
2270
2271
2272
2273
2274 error = page_cache_read(file, offset, vmf->gfp_mask);
2275
2276
2277
2278
2279
2280
2281 if (error >= 0)
2282 goto retry_find;
2283
2284
2285
2286
2287
2288
2289 if (error == -ENOMEM)
2290 return VM_FAULT_OOM;
2291 return VM_FAULT_SIGBUS;
2292
2293page_not_uptodate:
2294
2295
2296
2297
2298
2299
2300 ClearPageError(page);
2301 error = mapping->a_ops->readpage(file, page);
2302 if (!error) {
2303 wait_on_page_locked(page);
2304 if (!PageUptodate(page))
2305 error = -EIO;
2306 }
2307 put_page(page);
2308
2309 if (!error || error == AOP_TRUNCATED_PAGE)
2310 goto retry_find;
2311
2312
2313 shrink_readahead_size_eio(file, ra);
2314 return VM_FAULT_SIGBUS;
2315}
2316EXPORT_SYMBOL(filemap_fault);
2317
2318void filemap_map_pages(struct vm_fault *vmf,
2319 pgoff_t start_pgoff, pgoff_t end_pgoff)
2320{
2321 struct radix_tree_iter iter;
2322 void **slot;
2323 struct file *file = vmf->vma->vm_file;
2324 struct address_space *mapping = file->f_mapping;
2325 pgoff_t last_pgoff = start_pgoff;
2326 loff_t size;
2327 struct page *head, *page;
2328
2329 rcu_read_lock();
2330 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
2331 start_pgoff) {
2332 if (iter.index > end_pgoff)
2333 break;
2334repeat:
2335 page = radix_tree_deref_slot(slot);
2336 if (unlikely(!page))
2337 goto next;
2338 if (radix_tree_exception(page)) {
2339 if (radix_tree_deref_retry(page)) {
2340 slot = radix_tree_iter_retry(&iter);
2341 continue;
2342 }
2343 goto next;
2344 }
2345
2346 head = compound_head(page);
2347 if (!page_cache_get_speculative(head))
2348 goto repeat;
2349
2350
2351 if (compound_head(page) != head) {
2352 put_page(head);
2353 goto repeat;
2354 }
2355
2356
2357 if (unlikely(page != *slot)) {
2358 put_page(head);
2359 goto repeat;
2360 }
2361
2362 if (!PageUptodate(page) ||
2363 PageReadahead(page) ||
2364 PageHWPoison(page))
2365 goto skip;
2366 if (!trylock_page(page))
2367 goto skip;
2368
2369 if (page->mapping != mapping || !PageUptodate(page))
2370 goto unlock;
2371
2372 size = round_up(i_size_read(mapping->host), PAGE_SIZE);
2373 if (page->index >= size >> PAGE_SHIFT)
2374 goto unlock;
2375
2376 if (file->f_ra.mmap_miss > 0)
2377 file->f_ra.mmap_miss--;
2378
2379 vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT;
2380 if (vmf->pte)
2381 vmf->pte += iter.index - last_pgoff;
2382 last_pgoff = iter.index;
2383 if (alloc_set_pte(vmf, NULL, page))
2384 goto unlock;
2385 unlock_page(page);
2386 goto next;
2387unlock:
2388 unlock_page(page);
2389skip:
2390 put_page(page);
2391next:
2392
2393 if (pmd_trans_huge(*vmf->pmd))
2394 break;
2395 if (iter.index == end_pgoff)
2396 break;
2397 }
2398 rcu_read_unlock();
2399}
2400EXPORT_SYMBOL(filemap_map_pages);
2401
2402int filemap_page_mkwrite(struct vm_fault *vmf)
2403{
2404 struct page *page = vmf->page;
2405 struct inode *inode = file_inode(vmf->vma->vm_file);
2406 int ret = VM_FAULT_LOCKED;
2407
2408 sb_start_pagefault(inode->i_sb);
2409 file_update_time(vmf->vma->vm_file);
2410 lock_page(page);
2411 if (page->mapping != inode->i_mapping) {
2412 unlock_page(page);
2413 ret = VM_FAULT_NOPAGE;
2414 goto out;
2415 }
2416
2417
2418
2419
2420
2421 set_page_dirty(page);
2422 wait_for_stable_page(page);
2423out:
2424 sb_end_pagefault(inode->i_sb);
2425 return ret;
2426}
2427EXPORT_SYMBOL(filemap_page_mkwrite);
2428
2429const struct vm_operations_struct generic_file_vm_ops = {
2430 .fault = filemap_fault,
2431 .map_pages = filemap_map_pages,
2432 .page_mkwrite = filemap_page_mkwrite,
2433};
2434
2435
2436
2437int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2438{
2439 struct address_space *mapping = file->f_mapping;
2440
2441 if (!mapping->a_ops->readpage)
2442 return -ENOEXEC;
2443 file_accessed(file);
2444 vma->vm_ops = &generic_file_vm_ops;
2445 return 0;
2446}
2447
2448
2449
2450
2451int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
2452{
2453 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
2454 return -EINVAL;
2455 return generic_file_mmap(file, vma);
2456}
2457#else
2458int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
2459{
2460 return -ENOSYS;
2461}
2462int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
2463{
2464 return -ENOSYS;
2465}
2466#endif
2467
2468EXPORT_SYMBOL(generic_file_mmap);
2469EXPORT_SYMBOL(generic_file_readonly_mmap);
2470
2471static struct page *wait_on_page_read(struct page *page)
2472{
2473 if (!IS_ERR(page)) {
2474 wait_on_page_locked(page);
2475 if (!PageUptodate(page)) {
2476 put_page(page);
2477 page = ERR_PTR(-EIO);
2478 }
2479 }
2480 return page;
2481}
2482
2483static struct page *do_read_cache_page(struct address_space *mapping,
2484 pgoff_t index,
2485 int (*filler)(void *, struct page *),
2486 void *data,
2487 gfp_t gfp)
2488{
2489 struct page *page;
2490 int err;
2491repeat:
2492 page = find_get_page(mapping, index);
2493 if (!page) {
2494 page = __page_cache_alloc(gfp | __GFP_COLD);
2495 if (!page)
2496 return ERR_PTR(-ENOMEM);
2497 err = add_to_page_cache_lru(page, mapping, index, gfp);
2498 if (unlikely(err)) {
2499 put_page(page);
2500 if (err == -EEXIST)
2501 goto repeat;
2502
2503 return ERR_PTR(err);
2504 }
2505
2506filler:
2507 err = filler(data, page);
2508 if (err < 0) {
2509 put_page(page);
2510 return ERR_PTR(err);
2511 }
2512
2513 page = wait_on_page_read(page);
2514 if (IS_ERR(page))
2515 return page;
2516 goto out;
2517 }
2518 if (PageUptodate(page))
2519 goto out;
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552 wait_on_page_locked(page);
2553 if (PageUptodate(page))
2554 goto out;
2555
2556
2557 lock_page(page);
2558
2559
2560 if (!page->mapping) {
2561 unlock_page(page);
2562 put_page(page);
2563 goto repeat;
2564 }
2565
2566
2567 if (PageUptodate(page)) {
2568 unlock_page(page);
2569 goto out;
2570 }
2571 goto filler;
2572
2573out:
2574 mark_page_accessed(page);
2575 return page;
2576}
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590struct page *read_cache_page(struct address_space *mapping,
2591 pgoff_t index,
2592 int (*filler)(void *, struct page *),
2593 void *data)
2594{
2595 return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
2596}
2597EXPORT_SYMBOL(read_cache_page);
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610struct page *read_cache_page_gfp(struct address_space *mapping,
2611 pgoff_t index,
2612 gfp_t gfp)
2613{
2614 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
2615
2616 return do_read_cache_page(mapping, index, filler, NULL, gfp);
2617}
2618EXPORT_SYMBOL(read_cache_page_gfp);
2619
2620
2621
2622
2623
2624
2625
2626
2627inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
2628{
2629 struct file *file = iocb->ki_filp;
2630 struct inode *inode = file->f_mapping->host;
2631 unsigned long limit = rlimit(RLIMIT_FSIZE);
2632 loff_t pos;
2633
2634 if (!iov_iter_count(from))
2635 return 0;
2636
2637
2638 if (iocb->ki_flags & IOCB_APPEND)
2639 iocb->ki_pos = i_size_read(inode);
2640
2641 pos = iocb->ki_pos;
2642
2643 if (limit != RLIM_INFINITY) {
2644 if (iocb->ki_pos >= limit) {
2645 send_sig(SIGXFSZ, current, 0);
2646 return -EFBIG;
2647 }
2648 iov_iter_truncate(from, limit - (unsigned long)pos);
2649 }
2650
2651
2652
2653
2654 if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
2655 !(file->f_flags & O_LARGEFILE))) {
2656 if (pos >= MAX_NON_LFS)
2657 return -EFBIG;
2658 iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
2659 }
2660
2661
2662
2663
2664
2665
2666
2667
2668 if (unlikely(pos >= inode->i_sb->s_maxbytes))
2669 return -EFBIG;
2670
2671 iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
2672 return iov_iter_count(from);
2673}
2674EXPORT_SYMBOL(generic_write_checks);
2675
2676int pagecache_write_begin(struct file *file, struct address_space *mapping,
2677 loff_t pos, unsigned len, unsigned flags,
2678 struct page **pagep, void **fsdata)
2679{
2680 const struct address_space_operations *aops = mapping->a_ops;
2681
2682 return aops->write_begin(file, mapping, pos, len, flags,
2683 pagep, fsdata);
2684}
2685EXPORT_SYMBOL(pagecache_write_begin);
2686
2687int pagecache_write_end(struct file *file, struct address_space *mapping,
2688 loff_t pos, unsigned len, unsigned copied,
2689 struct page *page, void *fsdata)
2690{
2691 const struct address_space_operations *aops = mapping->a_ops;
2692
2693 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
2694}
2695EXPORT_SYMBOL(pagecache_write_end);
2696
2697ssize_t
2698generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2699{
2700 struct file *file = iocb->ki_filp;
2701 struct address_space *mapping = file->f_mapping;
2702 struct inode *inode = mapping->host;
2703 loff_t pos = iocb->ki_pos;
2704 ssize_t written;
2705 size_t write_len;
2706 pgoff_t end;
2707 struct iov_iter data;
2708
2709 write_len = iov_iter_count(from);
2710 end = (pos + write_len - 1) >> PAGE_SHIFT;
2711
2712 written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
2713 if (written)
2714 goto out;
2715
2716
2717
2718
2719
2720
2721
2722 if (mapping->nrpages) {
2723 written = invalidate_inode_pages2_range(mapping,
2724 pos >> PAGE_SHIFT, end);
2725
2726
2727
2728
2729 if (written) {
2730 if (written == -EBUSY)
2731 return 0;
2732 goto out;
2733 }
2734 }
2735
2736 data = *from;
2737 written = mapping->a_ops->direct_IO(iocb, &data);
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747 if (mapping->nrpages) {
2748 invalidate_inode_pages2_range(mapping,
2749 pos >> PAGE_SHIFT, end);
2750 }
2751
2752 if (written > 0) {
2753 pos += written;
2754 iov_iter_advance(from, written);
2755 if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
2756 i_size_write(inode, pos);
2757 mark_inode_dirty(inode);
2758 }
2759 iocb->ki_pos = pos;
2760 }
2761out:
2762 return written;
2763}
2764EXPORT_SYMBOL(generic_file_direct_write);
2765
2766
2767
2768
2769
2770struct page *grab_cache_page_write_begin(struct address_space *mapping,
2771 pgoff_t index, unsigned flags)
2772{
2773 struct page *page;
2774 int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
2775
2776 if (flags & AOP_FLAG_NOFS)
2777 fgp_flags |= FGP_NOFS;
2778
2779 page = pagecache_get_page(mapping, index, fgp_flags,
2780 mapping_gfp_mask(mapping));
2781 if (page)
2782 wait_for_stable_page(page);
2783
2784 return page;
2785}
2786EXPORT_SYMBOL(grab_cache_page_write_begin);
2787
2788ssize_t generic_perform_write(struct file *file,
2789 struct iov_iter *i, loff_t pos)
2790{
2791 struct address_space *mapping = file->f_mapping;
2792 const struct address_space_operations *a_ops = mapping->a_ops;
2793 long status = 0;
2794 ssize_t written = 0;
2795 unsigned int flags = 0;
2796
2797
2798
2799
2800 if (!iter_is_iovec(i))
2801 flags |= AOP_FLAG_UNINTERRUPTIBLE;
2802
2803 do {
2804 struct page *page;
2805 unsigned long offset;
2806 unsigned long bytes;
2807 size_t copied;
2808 void *fsdata;
2809
2810 offset = (pos & (PAGE_SIZE - 1));
2811 bytes = min_t(unsigned long, PAGE_SIZE - offset,
2812 iov_iter_count(i));
2813
2814again:
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
2826 status = -EFAULT;
2827 break;
2828 }
2829
2830 if (fatal_signal_pending(current)) {
2831 status = -EINTR;
2832 break;
2833 }
2834
2835 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
2836 &page, &fsdata);
2837 if (unlikely(status < 0))
2838 break;
2839
2840 if (mapping_writably_mapped(mapping))
2841 flush_dcache_page(page);
2842
2843 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2844 flush_dcache_page(page);
2845
2846 status = a_ops->write_end(file, mapping, pos, bytes, copied,
2847 page, fsdata);
2848 if (unlikely(status < 0))
2849 break;
2850 copied = status;
2851
2852 cond_resched();
2853
2854 iov_iter_advance(i, copied);
2855 if (unlikely(copied == 0)) {
2856
2857
2858
2859
2860
2861
2862
2863
2864 bytes = min_t(unsigned long, PAGE_SIZE - offset,
2865 iov_iter_single_seg_count(i));
2866 goto again;
2867 }
2868 pos += copied;
2869 written += copied;
2870
2871 balance_dirty_pages_ratelimited(mapping);
2872 } while (iov_iter_count(i));
2873
2874 return written ? written : status;
2875}
2876EXPORT_SYMBOL(generic_perform_write);
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2896{
2897 struct file *file = iocb->ki_filp;
2898 struct address_space * mapping = file->f_mapping;
2899 struct inode *inode = mapping->host;
2900 ssize_t written = 0;
2901 ssize_t err;
2902 ssize_t status;
2903
2904
2905 current->backing_dev_info = inode_to_bdi(inode);
2906 err = file_remove_privs(file);
2907 if (err)
2908 goto out;
2909
2910 err = file_update_time(file);
2911 if (err)
2912 goto out;
2913
2914 if (iocb->ki_flags & IOCB_DIRECT) {
2915 loff_t pos, endbyte;
2916
2917 written = generic_file_direct_write(iocb, from);
2918
2919
2920
2921
2922
2923
2924
2925 if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
2926 goto out;
2927
2928 status = generic_perform_write(file, from, pos = iocb->ki_pos);
2929
2930
2931
2932
2933
2934
2935
2936 if (unlikely(status < 0)) {
2937 err = status;
2938 goto out;
2939 }
2940
2941
2942
2943
2944
2945 endbyte = pos + status - 1;
2946 err = filemap_write_and_wait_range(mapping, pos, endbyte);
2947 if (err == 0) {
2948 iocb->ki_pos = endbyte + 1;
2949 written += status;
2950 invalidate_mapping_pages(mapping,
2951 pos >> PAGE_SHIFT,
2952 endbyte >> PAGE_SHIFT);
2953 } else {
2954
2955
2956
2957
2958 }
2959 } else {
2960 written = generic_perform_write(file, from, iocb->ki_pos);
2961 if (likely(written > 0))
2962 iocb->ki_pos += written;
2963 }
2964out:
2965 current->backing_dev_info = NULL;
2966 return written ? written : err;
2967}
2968EXPORT_SYMBOL(__generic_file_write_iter);
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2980{
2981 struct file *file = iocb->ki_filp;
2982 struct inode *inode = file->f_mapping->host;
2983 ssize_t ret;
2984
2985 inode_lock(inode);
2986 ret = generic_write_checks(iocb, from);
2987 if (ret > 0)
2988 ret = __generic_file_write_iter(iocb, from);
2989 inode_unlock(inode);
2990
2991 if (ret > 0)
2992 ret = generic_write_sync(iocb, ret);
2993 return ret;
2994}
2995EXPORT_SYMBOL(generic_file_write_iter);
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014int try_to_release_page(struct page *page, gfp_t gfp_mask)
3015{
3016 struct address_space * const mapping = page->mapping;
3017
3018 BUG_ON(!PageLocked(page));
3019 if (PageWriteback(page))
3020 return 0;
3021
3022 if (mapping && mapping->a_ops->releasepage)
3023 return mapping->a_ops->releasepage(page, gfp_mask);
3024 return try_to_free_buffers(page);
3025}
3026
3027EXPORT_SYMBOL(try_to_release_page);
3028