1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/dax.h>
13#include <linux/gfp.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22#include <linux/mm_inline.h>
23#include <linux/blk-cgroup.h>
24#include <linux/fadvise.h>
25#include <linux/sched/mm.h>
26
27#include "internal.h"
28
29
30
31
32
33void
34file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
35{
36 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
37 ra->prev_pos = -1;
38}
39EXPORT_SYMBOL_GPL(file_ra_state_init);
40
41
42
43
44
45
46
47
48static void read_cache_pages_invalidate_page(struct address_space *mapping,
49 struct page *page)
50{
51 if (page_has_private(page)) {
52 if (!trylock_page(page))
53 BUG();
54 page->mapping = mapping;
55 do_invalidatepage(page, 0, PAGE_SIZE);
56 page->mapping = NULL;
57 unlock_page(page);
58 }
59 put_page(page);
60}
61
62
63
64
65static void read_cache_pages_invalidate_pages(struct address_space *mapping,
66 struct list_head *pages)
67{
68 struct page *victim;
69
70 while (!list_empty(pages)) {
71 victim = lru_to_page(pages);
72 list_del(&victim->lru);
73 read_cache_pages_invalidate_page(mapping, victim);
74 }
75}
76
77
78
79
80
81
82
83
84
85
86
87
88
89int read_cache_pages(struct address_space *mapping, struct list_head *pages,
90 int (*filler)(void *, struct page *), void *data)
91{
92 struct page *page;
93 int ret = 0;
94
95 while (!list_empty(pages)) {
96 page = lru_to_page(pages);
97 list_del(&page->lru);
98 if (add_to_page_cache_lru(page, mapping, page->index,
99 readahead_gfp_mask(mapping))) {
100 read_cache_pages_invalidate_page(mapping, page);
101 continue;
102 }
103 put_page(page);
104
105 ret = filler(data, page);
106 if (unlikely(ret)) {
107 read_cache_pages_invalidate_pages(mapping, pages);
108 break;
109 }
110 task_io_account_read(PAGE_SIZE);
111 }
112 return ret;
113}
114
115EXPORT_SYMBOL(read_cache_pages);
116
117static void read_pages(struct readahead_control *rac, struct list_head *pages,
118 bool skip_page)
119{
120 const struct address_space_operations *aops = rac->mapping->a_ops;
121 struct page *page;
122 struct blk_plug plug;
123
124 if (!readahead_count(rac))
125 goto out;
126
127 blk_start_plug(&plug);
128
129 if (aops->readahead) {
130 aops->readahead(rac);
131
132 while ((page = readahead_page(rac))) {
133 unlock_page(page);
134 put_page(page);
135 }
136 } else if (aops->readpages) {
137 aops->readpages(rac->file, rac->mapping, pages,
138 readahead_count(rac));
139
140 put_pages_list(pages);
141 rac->_index += rac->_nr_pages;
142 rac->_nr_pages = 0;
143 } else {
144 while ((page = readahead_page(rac))) {
145 aops->readpage(rac->file, page);
146 put_page(page);
147 }
148 }
149
150 blk_finish_plug(&plug);
151
152 BUG_ON(!list_empty(pages));
153 BUG_ON(readahead_count(rac));
154
155out:
156 if (skip_page)
157 rac->_index++;
158}
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174void page_cache_ra_unbounded(struct readahead_control *ractl,
175 unsigned long nr_to_read, unsigned long lookahead_size)
176{
177 struct address_space *mapping = ractl->mapping;
178 unsigned long index = readahead_index(ractl);
179 LIST_HEAD(page_pool);
180 gfp_t gfp_mask = readahead_gfp_mask(mapping);
181 unsigned long i;
182
183
184
185
186
187
188
189
190
191
192
193 unsigned int nofs = memalloc_nofs_save();
194
195 filemap_invalidate_lock_shared(mapping);
196
197
198
199 for (i = 0; i < nr_to_read; i++) {
200 struct page *page = xa_load(&mapping->i_pages, index + i);
201
202 if (page && !xa_is_value(page)) {
203
204
205
206
207
208
209
210
211 read_pages(ractl, &page_pool, true);
212 i = ractl->_index + ractl->_nr_pages - index - 1;
213 continue;
214 }
215
216 page = __page_cache_alloc(gfp_mask);
217 if (!page)
218 break;
219 if (mapping->a_ops->readpages) {
220 page->index = index + i;
221 list_add(&page->lru, &page_pool);
222 } else if (add_to_page_cache_lru(page, mapping, index + i,
223 gfp_mask) < 0) {
224 put_page(page);
225 read_pages(ractl, &page_pool, true);
226 i = ractl->_index + ractl->_nr_pages - index - 1;
227 continue;
228 }
229 if (i == nr_to_read - lookahead_size)
230 SetPageReadahead(page);
231 ractl->_nr_pages++;
232 }
233
234
235
236
237
238
239 read_pages(ractl, &page_pool, false);
240 filemap_invalidate_unlock_shared(mapping);
241 memalloc_nofs_restore(nofs);
242}
243EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
244
245
246
247
248
249
250
251void do_page_cache_ra(struct readahead_control *ractl,
252 unsigned long nr_to_read, unsigned long lookahead_size)
253{
254 struct inode *inode = ractl->mapping->host;
255 unsigned long index = readahead_index(ractl);
256 loff_t isize = i_size_read(inode);
257 pgoff_t end_index;
258
259 if (isize == 0)
260 return;
261
262 end_index = (isize - 1) >> PAGE_SHIFT;
263 if (index > end_index)
264 return;
265
266 if (nr_to_read > end_index - index)
267 nr_to_read = end_index - index + 1;
268
269 page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
270}
271
272
273
274
275
276void force_page_cache_ra(struct readahead_control *ractl,
277 unsigned long nr_to_read)
278{
279 struct address_space *mapping = ractl->mapping;
280 struct file_ra_state *ra = ractl->ra;
281 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
282 unsigned long max_pages, index;
283
284 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
285 !mapping->a_ops->readahead))
286 return;
287
288
289
290
291
292 index = readahead_index(ractl);
293 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
294 nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
295 while (nr_to_read) {
296 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
297
298 if (this_chunk > nr_to_read)
299 this_chunk = nr_to_read;
300 ractl->_index = index;
301 do_page_cache_ra(ractl, this_chunk, 0);
302
303 index += this_chunk;
304 nr_to_read -= this_chunk;
305 }
306}
307
308
309
310
311
312
313
314static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
315{
316 unsigned long newsize = roundup_pow_of_two(size);
317
318 if (newsize <= max / 32)
319 newsize = newsize * 4;
320 else if (newsize <= max / 4)
321 newsize = newsize * 2;
322 else
323 newsize = max;
324
325 return newsize;
326}
327
328
329
330
331
332static unsigned long get_next_ra_size(struct file_ra_state *ra,
333 unsigned long max)
334{
335 unsigned long cur = ra->size;
336
337 if (cur < max / 16)
338 return 4 * cur;
339 if (cur <= max / 2)
340 return 2 * cur;
341 return max;
342}
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389static pgoff_t count_history_pages(struct address_space *mapping,
390 pgoff_t index, unsigned long max)
391{
392 pgoff_t head;
393
394 rcu_read_lock();
395 head = page_cache_prev_miss(mapping, index - 1, max);
396 rcu_read_unlock();
397
398 return index - 1 - head;
399}
400
401
402
403
404static int try_context_readahead(struct address_space *mapping,
405 struct file_ra_state *ra,
406 pgoff_t index,
407 unsigned long req_size,
408 unsigned long max)
409{
410 pgoff_t size;
411
412 size = count_history_pages(mapping, index, max);
413
414
415
416
417
418 if (size <= req_size)
419 return 0;
420
421
422
423
424
425 if (size >= index)
426 size *= 2;
427
428 ra->start = index;
429 ra->size = min(size + req_size, max);
430 ra->async_size = 1;
431
432 return 1;
433}
434
435
436
437
438static void ondemand_readahead(struct readahead_control *ractl,
439 bool hit_readahead_marker, unsigned long req_size)
440{
441 struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
442 struct file_ra_state *ra = ractl->ra;
443 unsigned long max_pages = ra->ra_pages;
444 unsigned long add_pages;
445 unsigned long index = readahead_index(ractl);
446 pgoff_t prev_index;
447
448
449
450
451
452 if (req_size > max_pages && bdi->io_pages > max_pages)
453 max_pages = min(req_size, bdi->io_pages);
454
455
456
457
458 if (!index)
459 goto initial_readahead;
460
461
462
463
464
465 if ((index == (ra->start + ra->size - ra->async_size) ||
466 index == (ra->start + ra->size))) {
467 ra->start += ra->size;
468 ra->size = get_next_ra_size(ra, max_pages);
469 ra->async_size = ra->size;
470 goto readit;
471 }
472
473
474
475
476
477
478
479 if (hit_readahead_marker) {
480 pgoff_t start;
481
482 rcu_read_lock();
483 start = page_cache_next_miss(ractl->mapping, index + 1,
484 max_pages);
485 rcu_read_unlock();
486
487 if (!start || start - index > max_pages)
488 return;
489
490 ra->start = start;
491 ra->size = start - index;
492 ra->size += req_size;
493 ra->size = get_next_ra_size(ra, max_pages);
494 ra->async_size = ra->size;
495 goto readit;
496 }
497
498
499
500
501 if (req_size > max_pages)
502 goto initial_readahead;
503
504
505
506
507
508
509 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
510 if (index - prev_index <= 1UL)
511 goto initial_readahead;
512
513
514
515
516
517 if (try_context_readahead(ractl->mapping, ra, index, req_size,
518 max_pages))
519 goto readit;
520
521
522
523
524
525 do_page_cache_ra(ractl, req_size, 0);
526 return;
527
528initial_readahead:
529 ra->start = index;
530 ra->size = get_init_ra_size(req_size, max_pages);
531 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
532
533readit:
534
535
536
537
538
539
540 if (index == ra->start && ra->size == ra->async_size) {
541 add_pages = get_next_ra_size(ra, max_pages);
542 if (ra->size + add_pages <= max_pages) {
543 ra->async_size = add_pages;
544 ra->size += add_pages;
545 } else {
546 ra->size = max_pages;
547 ra->async_size = max_pages >> 1;
548 }
549 }
550
551 ractl->_index = ra->start;
552 do_page_cache_ra(ractl, ra->size, ra->async_size);
553}
554
555void page_cache_sync_ra(struct readahead_control *ractl,
556 unsigned long req_count)
557{
558 bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
559
560
561
562
563
564
565
566 if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
567 if (!ractl->file)
568 return;
569 req_count = 1;
570 do_forced_ra = true;
571 }
572
573
574 if (do_forced_ra) {
575 force_page_cache_ra(ractl, req_count);
576 return;
577 }
578
579
580 ondemand_readahead(ractl, false, req_count);
581}
582EXPORT_SYMBOL_GPL(page_cache_sync_ra);
583
584void page_cache_async_ra(struct readahead_control *ractl,
585 struct page *page, unsigned long req_count)
586{
587
588 if (!ractl->ra->ra_pages)
589 return;
590
591
592
593
594 if (PageWriteback(page))
595 return;
596
597 ClearPageReadahead(page);
598
599
600
601
602 if (inode_read_congested(ractl->mapping->host))
603 return;
604
605 if (blk_cgroup_congested())
606 return;
607
608
609 ondemand_readahead(ractl, true, req_count);
610}
611EXPORT_SYMBOL_GPL(page_cache_async_ra);
612
613ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
614{
615 ssize_t ret;
616 struct fd f;
617
618 ret = -EBADF;
619 f = fdget(fd);
620 if (!f.file || !(f.file->f_mode & FMODE_READ))
621 goto out;
622
623
624
625
626
627
628 ret = -EINVAL;
629 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
630 !S_ISREG(file_inode(f.file)->i_mode))
631 goto out;
632
633 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
634out:
635 fdput(f);
636 return ret;
637}
638
639SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
640{
641 return ksys_readahead(fd, offset, count);
642}
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662void readahead_expand(struct readahead_control *ractl,
663 loff_t new_start, size_t new_len)
664{
665 struct address_space *mapping = ractl->mapping;
666 struct file_ra_state *ra = ractl->ra;
667 pgoff_t new_index, new_nr_pages;
668 gfp_t gfp_mask = readahead_gfp_mask(mapping);
669
670 new_index = new_start / PAGE_SIZE;
671
672
673 while (ractl->_index > new_index) {
674 unsigned long index = ractl->_index - 1;
675 struct page *page = xa_load(&mapping->i_pages, index);
676
677 if (page && !xa_is_value(page))
678 return;
679
680 page = __page_cache_alloc(gfp_mask);
681 if (!page)
682 return;
683 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
684 put_page(page);
685 return;
686 }
687
688 ractl->_nr_pages++;
689 ractl->_index = page->index;
690 }
691
692 new_len += new_start - readahead_pos(ractl);
693 new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
694
695
696 while (ractl->_nr_pages < new_nr_pages) {
697 unsigned long index = ractl->_index + ractl->_nr_pages;
698 struct page *page = xa_load(&mapping->i_pages, index);
699
700 if (page && !xa_is_value(page))
701 return;
702
703 page = __page_cache_alloc(gfp_mask);
704 if (!page)
705 return;
706 if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
707 put_page(page);
708 return;
709 }
710 ractl->_nr_pages++;
711 if (ra) {
712 ra->size++;
713 ra->async_size++;
714 }
715 }
716}
717EXPORT_SYMBOL(readahead_expand);
718