1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/dax.h>
13#include <linux/gfp.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22#include <linux/mm_inline.h>
23#include <linux/blk-cgroup.h>
24#include <linux/fadvise.h>
25#include <linux/sched/mm.h>
26
27#include "internal.h"
28
29
30
31
32
33void
34file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
35{
36 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
37 ra->prev_pos = -1;
38}
39EXPORT_SYMBOL_GPL(file_ra_state_init);
40
41
42
43
44
45
46
47
48static void read_cache_pages_invalidate_page(struct address_space *mapping,
49 struct page *page)
50{
51 if (page_has_private(page)) {
52 if (!trylock_page(page))
53 BUG();
54 page->mapping = mapping;
55 do_invalidatepage(page, 0, PAGE_SIZE);
56 page->mapping = NULL;
57 unlock_page(page);
58 }
59 put_page(page);
60}
61
62
63
64
65static void read_cache_pages_invalidate_pages(struct address_space *mapping,
66 struct list_head *pages)
67{
68 struct page *victim;
69
70 while (!list_empty(pages)) {
71 victim = lru_to_page(pages);
72 list_del(&victim->lru);
73 read_cache_pages_invalidate_page(mapping, victim);
74 }
75}
76
77
78
79
80
81
82
83
84
85
86
87
88
89int read_cache_pages(struct address_space *mapping, struct list_head *pages,
90 int (*filler)(void *, struct page *), void *data)
91{
92 struct page *page;
93 int ret = 0;
94
95 while (!list_empty(pages)) {
96 page = lru_to_page(pages);
97 list_del(&page->lru);
98 if (add_to_page_cache_lru(page, mapping, page->index,
99 readahead_gfp_mask(mapping))) {
100 read_cache_pages_invalidate_page(mapping, page);
101 continue;
102 }
103 put_page(page);
104
105 ret = filler(data, page);
106 if (unlikely(ret)) {
107 read_cache_pages_invalidate_pages(mapping, pages);
108 break;
109 }
110 task_io_account_read(PAGE_SIZE);
111 }
112 return ret;
113}
114
115EXPORT_SYMBOL(read_cache_pages);
116
117static void read_pages(struct readahead_control *rac, struct list_head *pages,
118 bool skip_page)
119{
120 const struct address_space_operations *aops = rac->mapping->a_ops;
121 struct page *page;
122 struct blk_plug plug;
123
124 if (!readahead_count(rac))
125 goto out;
126
127 blk_start_plug(&plug);
128
129 if (aops->readahead) {
130 aops->readahead(rac);
131
132 while ((page = readahead_page(rac))) {
133 unlock_page(page);
134 put_page(page);
135 }
136 } else if (aops->readpages) {
137 aops->readpages(rac->file, rac->mapping, pages,
138 readahead_count(rac));
139
140 put_pages_list(pages);
141 rac->_index += rac->_nr_pages;
142 rac->_nr_pages = 0;
143 } else {
144 while ((page = readahead_page(rac))) {
145 aops->readpage(rac->file, page);
146 put_page(page);
147 }
148 }
149
150 blk_finish_plug(&plug);
151
152 BUG_ON(!list_empty(pages));
153 BUG_ON(readahead_count(rac));
154
155out:
156 if (skip_page)
157 rac->_index++;
158}
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176void page_cache_readahead_unbounded(struct address_space *mapping,
177 struct file *file, pgoff_t index, unsigned long nr_to_read,
178 unsigned long lookahead_size)
179{
180 LIST_HEAD(page_pool);
181 gfp_t gfp_mask = readahead_gfp_mask(mapping);
182 struct readahead_control rac = {
183 .mapping = mapping,
184 .file = file,
185 ._index = index,
186 };
187 unsigned long i;
188
189
190
191
192
193
194
195
196
197
198
199 unsigned int nofs = memalloc_nofs_save();
200
201
202
203
204 for (i = 0; i < nr_to_read; i++) {
205 struct page *page = xa_load(&mapping->i_pages, index + i);
206
207 BUG_ON(index + i != rac._index + rac._nr_pages);
208
209 if (page && !xa_is_value(page)) {
210
211
212
213
214
215
216
217
218 read_pages(&rac, &page_pool, true);
219 continue;
220 }
221
222 page = __page_cache_alloc(gfp_mask);
223 if (!page)
224 break;
225 if (mapping->a_ops->readpages) {
226 page->index = index + i;
227 list_add(&page->lru, &page_pool);
228 } else if (add_to_page_cache_lru(page, mapping, index + i,
229 gfp_mask) < 0) {
230 put_page(page);
231 read_pages(&rac, &page_pool, true);
232 continue;
233 }
234 if (i == nr_to_read - lookahead_size)
235 SetPageReadahead(page);
236 rac._nr_pages++;
237 }
238
239
240
241
242
243
244 read_pages(&rac, &page_pool, false);
245 memalloc_nofs_restore(nofs);
246}
247EXPORT_SYMBOL_GPL(page_cache_readahead_unbounded);
248
249
250
251
252
253
254
255void __do_page_cache_readahead(struct address_space *mapping,
256 struct file *file, pgoff_t index, unsigned long nr_to_read,
257 unsigned long lookahead_size)
258{
259 struct inode *inode = mapping->host;
260 loff_t isize = i_size_read(inode);
261 pgoff_t end_index;
262
263 if (isize == 0)
264 return;
265
266 end_index = (isize - 1) >> PAGE_SHIFT;
267 if (index > end_index)
268 return;
269
270 if (nr_to_read > end_index - index)
271 nr_to_read = end_index - index + 1;
272
273 page_cache_readahead_unbounded(mapping, file, index, nr_to_read,
274 lookahead_size);
275}
276
277
278
279
280
281void force_page_cache_readahead(struct address_space *mapping,
282 struct file *filp, pgoff_t index, unsigned long nr_to_read)
283{
284 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
285 struct file_ra_state *ra = &filp->f_ra;
286 unsigned long max_pages;
287
288 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
289 !mapping->a_ops->readahead))
290 return;
291
292
293
294
295
296 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
297 nr_to_read = min(nr_to_read, max_pages);
298 while (nr_to_read) {
299 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
300
301 if (this_chunk > nr_to_read)
302 this_chunk = nr_to_read;
303 __do_page_cache_readahead(mapping, filp, index, this_chunk, 0);
304
305 index += this_chunk;
306 nr_to_read -= this_chunk;
307 }
308}
309
310
311
312
313
314
315
316static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
317{
318 unsigned long newsize = roundup_pow_of_two(size);
319
320 if (newsize <= max / 32)
321 newsize = newsize * 4;
322 else if (newsize <= max / 4)
323 newsize = newsize * 2;
324 else
325 newsize = max;
326
327 return newsize;
328}
329
330
331
332
333
334static unsigned long get_next_ra_size(struct file_ra_state *ra,
335 unsigned long max)
336{
337 unsigned long cur = ra->size;
338
339 if (cur < max / 16)
340 return 4 * cur;
341 if (cur <= max / 2)
342 return 2 * cur;
343 return max;
344}
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391static pgoff_t count_history_pages(struct address_space *mapping,
392 pgoff_t index, unsigned long max)
393{
394 pgoff_t head;
395
396 rcu_read_lock();
397 head = page_cache_prev_miss(mapping, index - 1, max);
398 rcu_read_unlock();
399
400 return index - 1 - head;
401}
402
403
404
405
406static int try_context_readahead(struct address_space *mapping,
407 struct file_ra_state *ra,
408 pgoff_t index,
409 unsigned long req_size,
410 unsigned long max)
411{
412 pgoff_t size;
413
414 size = count_history_pages(mapping, index, max);
415
416
417
418
419
420 if (size <= req_size)
421 return 0;
422
423
424
425
426
427 if (size >= index)
428 size *= 2;
429
430 ra->start = index;
431 ra->size = min(size + req_size, max);
432 ra->async_size = 1;
433
434 return 1;
435}
436
437
438
439
440static void ondemand_readahead(struct address_space *mapping,
441 struct file_ra_state *ra, struct file *filp,
442 bool hit_readahead_marker, pgoff_t index,
443 unsigned long req_size)
444{
445 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
446 unsigned long max_pages = ra->ra_pages;
447 unsigned long add_pages;
448 pgoff_t prev_index;
449
450
451
452
453
454 if (req_size > max_pages && bdi->io_pages > max_pages)
455 max_pages = min(req_size, bdi->io_pages);
456
457
458
459
460 if (!index)
461 goto initial_readahead;
462
463
464
465
466
467 if ((index == (ra->start + ra->size - ra->async_size) ||
468 index == (ra->start + ra->size))) {
469 ra->start += ra->size;
470 ra->size = get_next_ra_size(ra, max_pages);
471 ra->async_size = ra->size;
472 goto readit;
473 }
474
475
476
477
478
479
480
481 if (hit_readahead_marker) {
482 pgoff_t start;
483
484 rcu_read_lock();
485 start = page_cache_next_miss(mapping, index + 1, max_pages);
486 rcu_read_unlock();
487
488 if (!start || start - index > max_pages)
489 return;
490
491 ra->start = start;
492 ra->size = start - index;
493 ra->size += req_size;
494 ra->size = get_next_ra_size(ra, max_pages);
495 ra->async_size = ra->size;
496 goto readit;
497 }
498
499
500
501
502 if (req_size > max_pages)
503 goto initial_readahead;
504
505
506
507
508
509
510 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
511 if (index - prev_index <= 1UL)
512 goto initial_readahead;
513
514
515
516
517
518 if (try_context_readahead(mapping, ra, index, req_size, max_pages))
519 goto readit;
520
521
522
523
524
525 __do_page_cache_readahead(mapping, filp, index, req_size, 0);
526 return;
527
528initial_readahead:
529 ra->start = index;
530 ra->size = get_init_ra_size(req_size, max_pages);
531 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
532
533readit:
534
535
536
537
538
539
540 if (index == ra->start && ra->size == ra->async_size) {
541 add_pages = get_next_ra_size(ra, max_pages);
542 if (ra->size + add_pages <= max_pages) {
543 ra->async_size = add_pages;
544 ra->size += add_pages;
545 } else {
546 ra->size = max_pages;
547 ra->async_size = max_pages >> 1;
548 }
549 }
550
551 ra_submit(ra, mapping, filp);
552}
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567void page_cache_sync_readahead(struct address_space *mapping,
568 struct file_ra_state *ra, struct file *filp,
569 pgoff_t index, unsigned long req_count)
570{
571
572 if (!ra->ra_pages)
573 return;
574
575 if (blk_cgroup_congested())
576 return;
577
578
579 if (filp && (filp->f_mode & FMODE_RANDOM)) {
580 force_page_cache_readahead(mapping, filp, index, req_count);
581 return;
582 }
583
584
585 ondemand_readahead(mapping, ra, filp, false, index, req_count);
586}
587EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603void
604page_cache_async_readahead(struct address_space *mapping,
605 struct file_ra_state *ra, struct file *filp,
606 struct page *page, pgoff_t index,
607 unsigned long req_count)
608{
609
610 if (!ra->ra_pages)
611 return;
612
613
614
615
616 if (PageWriteback(page))
617 return;
618
619 ClearPageReadahead(page);
620
621
622
623
624 if (inode_read_congested(mapping->host))
625 return;
626
627 if (blk_cgroup_congested())
628 return;
629
630
631 ondemand_readahead(mapping, ra, filp, true, index, req_count);
632}
633EXPORT_SYMBOL_GPL(page_cache_async_readahead);
634
635ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
636{
637 ssize_t ret;
638 struct fd f;
639
640 ret = -EBADF;
641 f = fdget(fd);
642 if (!f.file || !(f.file->f_mode & FMODE_READ))
643 goto out;
644
645
646
647
648
649
650 ret = -EINVAL;
651 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
652 !S_ISREG(file_inode(f.file)->i_mode))
653 goto out;
654
655 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
656out:
657 fdput(f);
658 return ret;
659}
660
661SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
662{
663 return ksys_readahead(fd, offset, count);
664}
665