1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/dax.h>
13#include <linux/gfp.h>
14#include <linux/export.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18#include <linux/pagemap.h>
19#include <linux/syscalls.h>
20#include <linux/file.h>
21#include <linux/mm_inline.h>
22#include <linux/blk-cgroup.h>
23#include <linux/fadvise.h>
24#include <linux/sched/mm.h>
25
26#include "internal.h"
27
28
29
30
31
32void
33file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
34{
35 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
36 ra->prev_pos = -1;
37}
38EXPORT_SYMBOL_GPL(file_ra_state_init);
39
40
41
42
43
44
45
46
47static void read_cache_pages_invalidate_page(struct address_space *mapping,
48 struct page *page)
49{
50 if (page_has_private(page)) {
51 if (!trylock_page(page))
52 BUG();
53 page->mapping = mapping;
54 do_invalidatepage(page, 0, PAGE_SIZE);
55 page->mapping = NULL;
56 unlock_page(page);
57 }
58 put_page(page);
59}
60
61
62
63
64static void read_cache_pages_invalidate_pages(struct address_space *mapping,
65 struct list_head *pages)
66{
67 struct page *victim;
68
69 while (!list_empty(pages)) {
70 victim = lru_to_page(pages);
71 list_del(&victim->lru);
72 read_cache_pages_invalidate_page(mapping, victim);
73 }
74}
75
76
77
78
79
80
81
82
83
84
85
86
87
88int read_cache_pages(struct address_space *mapping, struct list_head *pages,
89 int (*filler)(void *, struct page *), void *data)
90{
91 struct page *page;
92 int ret = 0;
93
94 while (!list_empty(pages)) {
95 page = lru_to_page(pages);
96 list_del(&page->lru);
97 if (add_to_page_cache_lru(page, mapping, page->index,
98 readahead_gfp_mask(mapping))) {
99 read_cache_pages_invalidate_page(mapping, page);
100 continue;
101 }
102 put_page(page);
103
104 ret = filler(data, page);
105 if (unlikely(ret)) {
106 read_cache_pages_invalidate_pages(mapping, pages);
107 break;
108 }
109 task_io_account_read(PAGE_SIZE);
110 }
111 return ret;
112}
113
114EXPORT_SYMBOL(read_cache_pages);
115
116static void read_pages(struct readahead_control *rac, struct list_head *pages,
117 bool skip_page)
118{
119 const struct address_space_operations *aops = rac->mapping->a_ops;
120 struct page *page;
121 struct blk_plug plug;
122
123 if (!readahead_count(rac))
124 goto out;
125
126 blk_start_plug(&plug);
127
128 if (aops->readahead) {
129 aops->readahead(rac);
130
131 while ((page = readahead_page(rac))) {
132 unlock_page(page);
133 put_page(page);
134 }
135 } else if (aops->readpages) {
136 aops->readpages(rac->file, rac->mapping, pages,
137 readahead_count(rac));
138
139 put_pages_list(pages);
140 rac->_index += rac->_nr_pages;
141 rac->_nr_pages = 0;
142 } else {
143 while ((page = readahead_page(rac))) {
144 aops->readpage(rac->file, page);
145 put_page(page);
146 }
147 }
148
149 blk_finish_plug(&plug);
150
151 BUG_ON(!list_empty(pages));
152 BUG_ON(readahead_count(rac));
153
154out:
155 if (skip_page)
156 rac->_index++;
157}
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173void page_cache_ra_unbounded(struct readahead_control *ractl,
174 unsigned long nr_to_read, unsigned long lookahead_size)
175{
176 struct address_space *mapping = ractl->mapping;
177 unsigned long index = readahead_index(ractl);
178 LIST_HEAD(page_pool);
179 gfp_t gfp_mask = readahead_gfp_mask(mapping);
180 unsigned long i;
181
182
183
184
185
186
187
188
189
190
191
192 unsigned int nofs = memalloc_nofs_save();
193
194 filemap_invalidate_lock_shared(mapping);
195
196
197
198 for (i = 0; i < nr_to_read; i++) {
199 struct folio *folio = xa_load(&mapping->i_pages, index + i);
200
201 if (folio && !xa_is_value(folio)) {
202
203
204
205
206
207
208
209
210 read_pages(ractl, &page_pool, true);
211 i = ractl->_index + ractl->_nr_pages - index - 1;
212 continue;
213 }
214
215 folio = filemap_alloc_folio(gfp_mask, 0);
216 if (!folio)
217 break;
218 if (mapping->a_ops->readpages) {
219 folio->index = index + i;
220 list_add(&folio->lru, &page_pool);
221 } else if (filemap_add_folio(mapping, folio, index + i,
222 gfp_mask) < 0) {
223 folio_put(folio);
224 read_pages(ractl, &page_pool, true);
225 i = ractl->_index + ractl->_nr_pages - index - 1;
226 continue;
227 }
228 if (i == nr_to_read - lookahead_size)
229 folio_set_readahead(folio);
230 ractl->_nr_pages++;
231 }
232
233
234
235
236
237
238 read_pages(ractl, &page_pool, false);
239 filemap_invalidate_unlock_shared(mapping);
240 memalloc_nofs_restore(nofs);
241}
242EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
243
244
245
246
247
248
249
250void do_page_cache_ra(struct readahead_control *ractl,
251 unsigned long nr_to_read, unsigned long lookahead_size)
252{
253 struct inode *inode = ractl->mapping->host;
254 unsigned long index = readahead_index(ractl);
255 loff_t isize = i_size_read(inode);
256 pgoff_t end_index;
257
258 if (isize == 0)
259 return;
260
261 end_index = (isize - 1) >> PAGE_SHIFT;
262 if (index > end_index)
263 return;
264
265 if (nr_to_read > end_index - index)
266 nr_to_read = end_index - index + 1;
267
268 page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
269}
270
271
272
273
274
275void force_page_cache_ra(struct readahead_control *ractl,
276 unsigned long nr_to_read)
277{
278 struct address_space *mapping = ractl->mapping;
279 struct file_ra_state *ra = ractl->ra;
280 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
281 unsigned long max_pages, index;
282
283 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
284 !mapping->a_ops->readahead))
285 return;
286
287
288
289
290
291 index = readahead_index(ractl);
292 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
293 nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
294 while (nr_to_read) {
295 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
296
297 if (this_chunk > nr_to_read)
298 this_chunk = nr_to_read;
299 ractl->_index = index;
300 do_page_cache_ra(ractl, this_chunk, 0);
301
302 index += this_chunk;
303 nr_to_read -= this_chunk;
304 }
305}
306
307
308
309
310
311
312
313static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
314{
315 unsigned long newsize = roundup_pow_of_two(size);
316
317 if (newsize <= max / 32)
318 newsize = newsize * 4;
319 else if (newsize <= max / 4)
320 newsize = newsize * 2;
321 else
322 newsize = max;
323
324 return newsize;
325}
326
327
328
329
330
331static unsigned long get_next_ra_size(struct file_ra_state *ra,
332 unsigned long max)
333{
334 unsigned long cur = ra->size;
335
336 if (cur < max / 16)
337 return 4 * cur;
338 if (cur <= max / 2)
339 return 2 * cur;
340 return max;
341}
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388static pgoff_t count_history_pages(struct address_space *mapping,
389 pgoff_t index, unsigned long max)
390{
391 pgoff_t head;
392
393 rcu_read_lock();
394 head = page_cache_prev_miss(mapping, index - 1, max);
395 rcu_read_unlock();
396
397 return index - 1 - head;
398}
399
400
401
402
403static int try_context_readahead(struct address_space *mapping,
404 struct file_ra_state *ra,
405 pgoff_t index,
406 unsigned long req_size,
407 unsigned long max)
408{
409 pgoff_t size;
410
411 size = count_history_pages(mapping, index, max);
412
413
414
415
416
417 if (size <= req_size)
418 return 0;
419
420
421
422
423
424 if (size >= index)
425 size *= 2;
426
427 ra->start = index;
428 ra->size = min(size + req_size, max);
429 ra->async_size = 1;
430
431 return 1;
432}
433
434
435
436
437static void ondemand_readahead(struct readahead_control *ractl,
438 bool hit_readahead_marker, unsigned long req_size)
439{
440 struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
441 struct file_ra_state *ra = ractl->ra;
442 unsigned long max_pages = ra->ra_pages;
443 unsigned long add_pages;
444 unsigned long index = readahead_index(ractl);
445 pgoff_t prev_index;
446
447
448
449
450
451 if (req_size > max_pages && bdi->io_pages > max_pages)
452 max_pages = min(req_size, bdi->io_pages);
453
454
455
456
457 if (!index)
458 goto initial_readahead;
459
460
461
462
463
464 if ((index == (ra->start + ra->size - ra->async_size) ||
465 index == (ra->start + ra->size))) {
466 ra->start += ra->size;
467 ra->size = get_next_ra_size(ra, max_pages);
468 ra->async_size = ra->size;
469 goto readit;
470 }
471
472
473
474
475
476
477
478 if (hit_readahead_marker) {
479 pgoff_t start;
480
481 rcu_read_lock();
482 start = page_cache_next_miss(ractl->mapping, index + 1,
483 max_pages);
484 rcu_read_unlock();
485
486 if (!start || start - index > max_pages)
487 return;
488
489 ra->start = start;
490 ra->size = start - index;
491 ra->size += req_size;
492 ra->size = get_next_ra_size(ra, max_pages);
493 ra->async_size = ra->size;
494 goto readit;
495 }
496
497
498
499
500 if (req_size > max_pages)
501 goto initial_readahead;
502
503
504
505
506
507
508 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
509 if (index - prev_index <= 1UL)
510 goto initial_readahead;
511
512
513
514
515
516 if (try_context_readahead(ractl->mapping, ra, index, req_size,
517 max_pages))
518 goto readit;
519
520
521
522
523
524 do_page_cache_ra(ractl, req_size, 0);
525 return;
526
527initial_readahead:
528 ra->start = index;
529 ra->size = get_init_ra_size(req_size, max_pages);
530 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
531
532readit:
533
534
535
536
537
538
539 if (index == ra->start && ra->size == ra->async_size) {
540 add_pages = get_next_ra_size(ra, max_pages);
541 if (ra->size + add_pages <= max_pages) {
542 ra->async_size = add_pages;
543 ra->size += add_pages;
544 } else {
545 ra->size = max_pages;
546 ra->async_size = max_pages >> 1;
547 }
548 }
549
550 ractl->_index = ra->start;
551 do_page_cache_ra(ractl, ra->size, ra->async_size);
552}
553
554void page_cache_sync_ra(struct readahead_control *ractl,
555 unsigned long req_count)
556{
557 bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
558
559
560
561
562
563
564
565 if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
566 if (!ractl->file)
567 return;
568 req_count = 1;
569 do_forced_ra = true;
570 }
571
572
573 if (do_forced_ra) {
574 force_page_cache_ra(ractl, req_count);
575 return;
576 }
577
578
579 ondemand_readahead(ractl, false, req_count);
580}
581EXPORT_SYMBOL_GPL(page_cache_sync_ra);
582
583void page_cache_async_ra(struct readahead_control *ractl,
584 struct folio *folio, unsigned long req_count)
585{
586
587 if (!ractl->ra->ra_pages)
588 return;
589
590
591
592
593 if (folio_test_writeback(folio))
594 return;
595
596 folio_clear_readahead(folio);
597
598
599
600
601 if (inode_read_congested(ractl->mapping->host))
602 return;
603
604 if (blk_cgroup_congested())
605 return;
606
607
608 ondemand_readahead(ractl, true, req_count);
609}
610EXPORT_SYMBOL_GPL(page_cache_async_ra);
611
612ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
613{
614 ssize_t ret;
615 struct fd f;
616
617 ret = -EBADF;
618 f = fdget(fd);
619 if (!f.file || !(f.file->f_mode & FMODE_READ))
620 goto out;
621
622
623
624
625
626
627 ret = -EINVAL;
628 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
629 !S_ISREG(file_inode(f.file)->i_mode))
630 goto out;
631
632 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
633out:
634 fdput(f);
635 return ret;
636}
637
638SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
639{
640 return ksys_readahead(fd, offset, count);
641}
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661void readahead_expand(struct readahead_control *ractl,
662 loff_t new_start, size_t new_len)
663{
664 struct address_space *mapping = ractl->mapping;
665 struct file_ra_state *ra = ractl->ra;
666 pgoff_t new_index, new_nr_pages;
667 gfp_t gfp_mask = readahead_gfp_mask(mapping);
668
669 new_index = new_start / PAGE_SIZE;
670
671
672 while (ractl->_index > new_index) {
673 unsigned long index = ractl->_index - 1;
674 struct page *page = xa_load(&mapping->i_pages, index);
675
676 if (page && !xa_is_value(page))
677 return;
678
679 page = __page_cache_alloc(gfp_mask);
680 if (!page)
681 return;
682 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
683 put_page(page);
684 return;
685 }
686
687 ractl->_nr_pages++;
688 ractl->_index = page->index;
689 }
690
691 new_len += new_start - readahead_pos(ractl);
692 new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
693
694
695 while (ractl->_nr_pages < new_nr_pages) {
696 unsigned long index = ractl->_index + ractl->_nr_pages;
697 struct page *page = xa_load(&mapping->i_pages, index);
698
699 if (page && !xa_is_value(page))
700 return;
701
702 page = __page_cache_alloc(gfp_mask);
703 if (!page)
704 return;
705 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
706 put_page(page);
707 return;
708 }
709 ractl->_nr_pages++;
710 if (ra) {
711 ra->size++;
712 ra->async_size++;
713 }
714 }
715}
716EXPORT_SYMBOL(readahead_expand);
717