1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/dax.h>
13#include <linux/gfp.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22#include <linux/mm_inline.h>
23#include <linux/blk-cgroup.h>
24#include <linux/fadvise.h>
25#include <linux/sched/mm.h>
26
27#include "internal.h"
28
29
30
31
32
33void
34file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
35{
36 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
37 ra->prev_pos = -1;
38}
39EXPORT_SYMBOL_GPL(file_ra_state_init);
40
41
42
43
44
45
46
47
48static void read_cache_pages_invalidate_page(struct address_space *mapping,
49 struct page *page)
50{
51 if (page_has_private(page)) {
52 if (!trylock_page(page))
53 BUG();
54 page->mapping = mapping;
55 do_invalidatepage(page, 0, PAGE_SIZE);
56 page->mapping = NULL;
57 unlock_page(page);
58 }
59 put_page(page);
60}
61
62
63
64
65static void read_cache_pages_invalidate_pages(struct address_space *mapping,
66 struct list_head *pages)
67{
68 struct page *victim;
69
70 while (!list_empty(pages)) {
71 victim = lru_to_page(pages);
72 list_del(&victim->lru);
73 read_cache_pages_invalidate_page(mapping, victim);
74 }
75}
76
77
78
79
80
81
82
83
84
85
86
87
88
89int read_cache_pages(struct address_space *mapping, struct list_head *pages,
90 int (*filler)(void *, struct page *), void *data)
91{
92 struct page *page;
93 int ret = 0;
94
95 while (!list_empty(pages)) {
96 page = lru_to_page(pages);
97 list_del(&page->lru);
98 if (add_to_page_cache_lru(page, mapping, page->index,
99 readahead_gfp_mask(mapping))) {
100 read_cache_pages_invalidate_page(mapping, page);
101 continue;
102 }
103 put_page(page);
104
105 ret = filler(data, page);
106 if (unlikely(ret)) {
107 read_cache_pages_invalidate_pages(mapping, pages);
108 break;
109 }
110 task_io_account_read(PAGE_SIZE);
111 }
112 return ret;
113}
114
115EXPORT_SYMBOL(read_cache_pages);
116
117static void read_pages(struct readahead_control *rac, struct list_head *pages,
118 bool skip_page)
119{
120 const struct address_space_operations *aops = rac->mapping->a_ops;
121 struct page *page;
122 struct blk_plug plug;
123
124 if (!readahead_count(rac))
125 goto out;
126
127 blk_start_plug(&plug);
128
129 if (aops->readahead) {
130 aops->readahead(rac);
131
132 while ((page = readahead_page(rac))) {
133 unlock_page(page);
134 put_page(page);
135 }
136 } else if (aops->readpages) {
137 aops->readpages(rac->file, rac->mapping, pages,
138 readahead_count(rac));
139
140 put_pages_list(pages);
141 rac->_index += rac->_nr_pages;
142 rac->_nr_pages = 0;
143 } else {
144 while ((page = readahead_page(rac))) {
145 aops->readpage(rac->file, page);
146 put_page(page);
147 }
148 }
149
150 blk_finish_plug(&plug);
151
152 BUG_ON(!list_empty(pages));
153 BUG_ON(readahead_count(rac));
154
155out:
156 if (skip_page)
157 rac->_index++;
158}
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174void page_cache_ra_unbounded(struct readahead_control *ractl,
175 unsigned long nr_to_read, unsigned long lookahead_size)
176{
177 struct address_space *mapping = ractl->mapping;
178 unsigned long index = readahead_index(ractl);
179 LIST_HEAD(page_pool);
180 gfp_t gfp_mask = readahead_gfp_mask(mapping);
181 unsigned long i;
182
183
184
185
186
187
188
189
190
191
192
193 unsigned int nofs = memalloc_nofs_save();
194
195
196
197
198 for (i = 0; i < nr_to_read; i++) {
199 struct page *page = xa_load(&mapping->i_pages, index + i);
200
201 if (page && !xa_is_value(page)) {
202
203
204
205
206
207
208
209
210 read_pages(ractl, &page_pool, true);
211 i = ractl->_index + ractl->_nr_pages - index - 1;
212 continue;
213 }
214
215 page = __page_cache_alloc(gfp_mask);
216 if (!page)
217 break;
218 if (mapping->a_ops->readpages) {
219 page->index = index + i;
220 list_add(&page->lru, &page_pool);
221 } else if (add_to_page_cache_lru(page, mapping, index + i,
222 gfp_mask) < 0) {
223 put_page(page);
224 read_pages(ractl, &page_pool, true);
225 i = ractl->_index + ractl->_nr_pages - index - 1;
226 continue;
227 }
228 if (i == nr_to_read - lookahead_size)
229 SetPageReadahead(page);
230 ractl->_nr_pages++;
231 }
232
233
234
235
236
237
238 read_pages(ractl, &page_pool, false);
239 memalloc_nofs_restore(nofs);
240}
241EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
242
243
244
245
246
247
248
249void do_page_cache_ra(struct readahead_control *ractl,
250 unsigned long nr_to_read, unsigned long lookahead_size)
251{
252 struct inode *inode = ractl->mapping->host;
253 unsigned long index = readahead_index(ractl);
254 loff_t isize = i_size_read(inode);
255 pgoff_t end_index;
256
257 if (isize == 0)
258 return;
259
260 end_index = (isize - 1) >> PAGE_SHIFT;
261 if (index > end_index)
262 return;
263
264 if (nr_to_read > end_index - index)
265 nr_to_read = end_index - index + 1;
266
267 page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
268}
269
270
271
272
273
274void force_page_cache_ra(struct readahead_control *ractl,
275 unsigned long nr_to_read)
276{
277 struct address_space *mapping = ractl->mapping;
278 struct file_ra_state *ra = ractl->ra;
279 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
280 unsigned long max_pages, index;
281
282 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
283 !mapping->a_ops->readahead))
284 return;
285
286
287
288
289
290 index = readahead_index(ractl);
291 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
292 nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
293 while (nr_to_read) {
294 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
295
296 if (this_chunk > nr_to_read)
297 this_chunk = nr_to_read;
298 ractl->_index = index;
299 do_page_cache_ra(ractl, this_chunk, 0);
300
301 index += this_chunk;
302 nr_to_read -= this_chunk;
303 }
304}
305
306
307
308
309
310
311
312static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
313{
314 unsigned long newsize = roundup_pow_of_two(size);
315
316 if (newsize <= max / 32)
317 newsize = newsize * 4;
318 else if (newsize <= max / 4)
319 newsize = newsize * 2;
320 else
321 newsize = max;
322
323 return newsize;
324}
325
326
327
328
329
330static unsigned long get_next_ra_size(struct file_ra_state *ra,
331 unsigned long max)
332{
333 unsigned long cur = ra->size;
334
335 if (cur < max / 16)
336 return 4 * cur;
337 if (cur <= max / 2)
338 return 2 * cur;
339 return max;
340}
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387static pgoff_t count_history_pages(struct address_space *mapping,
388 pgoff_t index, unsigned long max)
389{
390 pgoff_t head;
391
392 rcu_read_lock();
393 head = page_cache_prev_miss(mapping, index - 1, max);
394 rcu_read_unlock();
395
396 return index - 1 - head;
397}
398
399
400
401
402static int try_context_readahead(struct address_space *mapping,
403 struct file_ra_state *ra,
404 pgoff_t index,
405 unsigned long req_size,
406 unsigned long max)
407{
408 pgoff_t size;
409
410 size = count_history_pages(mapping, index, max);
411
412
413
414
415
416 if (size <= req_size)
417 return 0;
418
419
420
421
422
423 if (size >= index)
424 size *= 2;
425
426 ra->start = index;
427 ra->size = min(size + req_size, max);
428 ra->async_size = 1;
429
430 return 1;
431}
432
433
434
435
436static void ondemand_readahead(struct readahead_control *ractl,
437 bool hit_readahead_marker, unsigned long req_size)
438{
439 struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
440 struct file_ra_state *ra = ractl->ra;
441 unsigned long max_pages = ra->ra_pages;
442 unsigned long add_pages;
443 unsigned long index = readahead_index(ractl);
444 pgoff_t prev_index;
445
446
447
448
449
450 if (req_size > max_pages && bdi->io_pages > max_pages)
451 max_pages = min(req_size, bdi->io_pages);
452
453
454
455
456 if (!index)
457 goto initial_readahead;
458
459
460
461
462
463 if ((index == (ra->start + ra->size - ra->async_size) ||
464 index == (ra->start + ra->size))) {
465 ra->start += ra->size;
466 ra->size = get_next_ra_size(ra, max_pages);
467 ra->async_size = ra->size;
468 goto readit;
469 }
470
471
472
473
474
475
476
477 if (hit_readahead_marker) {
478 pgoff_t start;
479
480 rcu_read_lock();
481 start = page_cache_next_miss(ractl->mapping, index + 1,
482 max_pages);
483 rcu_read_unlock();
484
485 if (!start || start - index > max_pages)
486 return;
487
488 ra->start = start;
489 ra->size = start - index;
490 ra->size += req_size;
491 ra->size = get_next_ra_size(ra, max_pages);
492 ra->async_size = ra->size;
493 goto readit;
494 }
495
496
497
498
499 if (req_size > max_pages)
500 goto initial_readahead;
501
502
503
504
505
506
507 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
508 if (index - prev_index <= 1UL)
509 goto initial_readahead;
510
511
512
513
514
515 if (try_context_readahead(ractl->mapping, ra, index, req_size,
516 max_pages))
517 goto readit;
518
519
520
521
522
523 do_page_cache_ra(ractl, req_size, 0);
524 return;
525
526initial_readahead:
527 ra->start = index;
528 ra->size = get_init_ra_size(req_size, max_pages);
529 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
530
531readit:
532
533
534
535
536
537
538 if (index == ra->start && ra->size == ra->async_size) {
539 add_pages = get_next_ra_size(ra, max_pages);
540 if (ra->size + add_pages <= max_pages) {
541 ra->async_size = add_pages;
542 ra->size += add_pages;
543 } else {
544 ra->size = max_pages;
545 ra->async_size = max_pages >> 1;
546 }
547 }
548
549 ractl->_index = ra->start;
550 do_page_cache_ra(ractl, ra->size, ra->async_size);
551}
552
553void page_cache_sync_ra(struct readahead_control *ractl,
554 unsigned long req_count)
555{
556 bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
557
558
559
560
561
562
563
564 if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
565 if (!ractl->file)
566 return;
567 req_count = 1;
568 do_forced_ra = true;
569 }
570
571
572 if (do_forced_ra) {
573 force_page_cache_ra(ractl, req_count);
574 return;
575 }
576
577
578 ondemand_readahead(ractl, false, req_count);
579}
580EXPORT_SYMBOL_GPL(page_cache_sync_ra);
581
582void page_cache_async_ra(struct readahead_control *ractl,
583 struct page *page, unsigned long req_count)
584{
585
586 if (!ractl->ra->ra_pages)
587 return;
588
589
590
591
592 if (PageWriteback(page))
593 return;
594
595 ClearPageReadahead(page);
596
597
598
599
600 if (inode_read_congested(ractl->mapping->host))
601 return;
602
603 if (blk_cgroup_congested())
604 return;
605
606
607 ondemand_readahead(ractl, true, req_count);
608}
609EXPORT_SYMBOL_GPL(page_cache_async_ra);
610
611ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
612{
613 ssize_t ret;
614 struct fd f;
615
616 ret = -EBADF;
617 f = fdget(fd);
618 if (!f.file || !(f.file->f_mode & FMODE_READ))
619 goto out;
620
621
622
623
624
625
626 ret = -EINVAL;
627 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
628 !S_ISREG(file_inode(f.file)->i_mode))
629 goto out;
630
631 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
632out:
633 fdput(f);
634 return ret;
635}
636
637SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
638{
639 return ksys_readahead(fd, offset, count);
640}
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660void readahead_expand(struct readahead_control *ractl,
661 loff_t new_start, size_t new_len)
662{
663 struct address_space *mapping = ractl->mapping;
664 struct file_ra_state *ra = ractl->ra;
665 pgoff_t new_index, new_nr_pages;
666 gfp_t gfp_mask = readahead_gfp_mask(mapping);
667
668 new_index = new_start / PAGE_SIZE;
669
670
671 while (ractl->_index > new_index) {
672 unsigned long index = ractl->_index - 1;
673 struct page *page = xa_load(&mapping->i_pages, index);
674
675 if (page && !xa_is_value(page))
676 return;
677
678 page = __page_cache_alloc(gfp_mask);
679 if (!page)
680 return;
681 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
682 put_page(page);
683 return;
684 }
685
686 ractl->_nr_pages++;
687 ractl->_index = page->index;
688 }
689
690 new_len += new_start - readahead_pos(ractl);
691 new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
692
693
694 while (ractl->_nr_pages < new_nr_pages) {
695 unsigned long index = ractl->_index + ractl->_nr_pages;
696 struct page *page = xa_load(&mapping->i_pages, index);
697
698 if (page && !xa_is_value(page))
699 return;
700
701 page = __page_cache_alloc(gfp_mask);
702 if (!page)
703 return;
704 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
705 put_page(page);
706 return;
707 }
708 ractl->_nr_pages++;
709 if (ra) {
710 ra->size++;
711 ra->async_size++;
712 }
713 }
714}
715EXPORT_SYMBOL(readahead_expand);
716