1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/dax.h>
13#include <linux/gfp.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22#include <linux/mm_inline.h>
23#include <linux/blk-cgroup.h>
24#include <linux/fadvise.h>
25#include <linux/sched/mm.h>
26
27#include "internal.h"
28
29
30
31
32
33void
34file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
35{
36 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
37 ra->prev_pos = -1;
38}
39EXPORT_SYMBOL_GPL(file_ra_state_init);
40
41
42
43
44
45
46
47
48static void read_cache_pages_invalidate_page(struct address_space *mapping,
49 struct page *page)
50{
51 if (page_has_private(page)) {
52 if (!trylock_page(page))
53 BUG();
54 page->mapping = mapping;
55 do_invalidatepage(page, 0, PAGE_SIZE);
56 page->mapping = NULL;
57 unlock_page(page);
58 }
59 put_page(page);
60}
61
62
63
64
65static void read_cache_pages_invalidate_pages(struct address_space *mapping,
66 struct list_head *pages)
67{
68 struct page *victim;
69
70 while (!list_empty(pages)) {
71 victim = lru_to_page(pages);
72 list_del(&victim->lru);
73 read_cache_pages_invalidate_page(mapping, victim);
74 }
75}
76
77
78
79
80
81
82
83
84
85
86
87
88
89int read_cache_pages(struct address_space *mapping, struct list_head *pages,
90 int (*filler)(void *, struct page *), void *data)
91{
92 struct page *page;
93 int ret = 0;
94
95 while (!list_empty(pages)) {
96 page = lru_to_page(pages);
97 list_del(&page->lru);
98 if (add_to_page_cache_lru(page, mapping, page->index,
99 readahead_gfp_mask(mapping))) {
100 read_cache_pages_invalidate_page(mapping, page);
101 continue;
102 }
103 put_page(page);
104
105 ret = filler(data, page);
106 if (unlikely(ret)) {
107 read_cache_pages_invalidate_pages(mapping, pages);
108 break;
109 }
110 task_io_account_read(PAGE_SIZE);
111 }
112 return ret;
113}
114
115EXPORT_SYMBOL(read_cache_pages);
116
117static void read_pages(struct readahead_control *rac, struct list_head *pages,
118 bool skip_page)
119{
120 const struct address_space_operations *aops = rac->mapping->a_ops;
121 struct page *page;
122 struct blk_plug plug;
123
124 if (!readahead_count(rac))
125 goto out;
126
127 blk_start_plug(&plug);
128
129 if (aops->readahead) {
130 aops->readahead(rac);
131
132 while ((page = readahead_page(rac))) {
133 unlock_page(page);
134 put_page(page);
135 }
136 } else if (aops->readpages) {
137 aops->readpages(rac->file, rac->mapping, pages,
138 readahead_count(rac));
139
140 put_pages_list(pages);
141 rac->_index += rac->_nr_pages;
142 rac->_nr_pages = 0;
143 } else {
144 while ((page = readahead_page(rac))) {
145 aops->readpage(rac->file, page);
146 put_page(page);
147 }
148 }
149
150 blk_finish_plug(&plug);
151
152 BUG_ON(!list_empty(pages));
153 BUG_ON(readahead_count(rac));
154
155out:
156 if (skip_page)
157 rac->_index++;
158}
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174void page_cache_ra_unbounded(struct readahead_control *ractl,
175 unsigned long nr_to_read, unsigned long lookahead_size)
176{
177 struct address_space *mapping = ractl->mapping;
178 unsigned long index = readahead_index(ractl);
179 LIST_HEAD(page_pool);
180 gfp_t gfp_mask = readahead_gfp_mask(mapping);
181 unsigned long i;
182
183
184
185
186
187
188
189
190
191
192
193 unsigned int nofs = memalloc_nofs_save();
194
195
196
197
198 for (i = 0; i < nr_to_read; i++) {
199 struct page *page = xa_load(&mapping->i_pages, index + i);
200
201 BUG_ON(index + i != ractl->_index + ractl->_nr_pages);
202
203 if (page && !xa_is_value(page)) {
204
205
206
207
208
209
210
211
212 read_pages(ractl, &page_pool, true);
213 continue;
214 }
215
216 page = __page_cache_alloc(gfp_mask);
217 if (!page)
218 break;
219 if (mapping->a_ops->readpages) {
220 page->index = index + i;
221 list_add(&page->lru, &page_pool);
222 } else if (add_to_page_cache_lru(page, mapping, index + i,
223 gfp_mask) < 0) {
224 put_page(page);
225 read_pages(ractl, &page_pool, true);
226 continue;
227 }
228 if (i == nr_to_read - lookahead_size)
229 SetPageReadahead(page);
230 ractl->_nr_pages++;
231 }
232
233
234
235
236
237
238 read_pages(ractl, &page_pool, false);
239 memalloc_nofs_restore(nofs);
240}
241EXPORT_SYMBOL_GPL(page_cache_ra_unbounded);
242
243
244
245
246
247
248
249void do_page_cache_ra(struct readahead_control *ractl,
250 unsigned long nr_to_read, unsigned long lookahead_size)
251{
252 struct inode *inode = ractl->mapping->host;
253 unsigned long index = readahead_index(ractl);
254 loff_t isize = i_size_read(inode);
255 pgoff_t end_index;
256
257 if (isize == 0)
258 return;
259
260 end_index = (isize - 1) >> PAGE_SHIFT;
261 if (index > end_index)
262 return;
263
264 if (nr_to_read > end_index - index)
265 nr_to_read = end_index - index + 1;
266
267 page_cache_ra_unbounded(ractl, nr_to_read, lookahead_size);
268}
269
270
271
272
273
274void force_page_cache_ra(struct readahead_control *ractl,
275 struct file_ra_state *ra, unsigned long nr_to_read)
276{
277 struct address_space *mapping = ractl->mapping;
278 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
279 unsigned long max_pages, index;
280
281 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
282 !mapping->a_ops->readahead))
283 return;
284
285
286
287
288
289 index = readahead_index(ractl);
290 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
291 nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
292 while (nr_to_read) {
293 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
294
295 if (this_chunk > nr_to_read)
296 this_chunk = nr_to_read;
297 ractl->_index = index;
298 do_page_cache_ra(ractl, this_chunk, 0);
299
300 index += this_chunk;
301 nr_to_read -= this_chunk;
302 }
303}
304
305
306
307
308
309
310
311static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
312{
313 unsigned long newsize = roundup_pow_of_two(size);
314
315 if (newsize <= max / 32)
316 newsize = newsize * 4;
317 else if (newsize <= max / 4)
318 newsize = newsize * 2;
319 else
320 newsize = max;
321
322 return newsize;
323}
324
325
326
327
328
329static unsigned long get_next_ra_size(struct file_ra_state *ra,
330 unsigned long max)
331{
332 unsigned long cur = ra->size;
333
334 if (cur < max / 16)
335 return 4 * cur;
336 if (cur <= max / 2)
337 return 2 * cur;
338 return max;
339}
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386static pgoff_t count_history_pages(struct address_space *mapping,
387 pgoff_t index, unsigned long max)
388{
389 pgoff_t head;
390
391 rcu_read_lock();
392 head = page_cache_prev_miss(mapping, index - 1, max);
393 rcu_read_unlock();
394
395 return index - 1 - head;
396}
397
398
399
400
401static int try_context_readahead(struct address_space *mapping,
402 struct file_ra_state *ra,
403 pgoff_t index,
404 unsigned long req_size,
405 unsigned long max)
406{
407 pgoff_t size;
408
409 size = count_history_pages(mapping, index, max);
410
411
412
413
414
415 if (size <= req_size)
416 return 0;
417
418
419
420
421
422 if (size >= index)
423 size *= 2;
424
425 ra->start = index;
426 ra->size = min(size + req_size, max);
427 ra->async_size = 1;
428
429 return 1;
430}
431
432
433
434
435static void ondemand_readahead(struct readahead_control *ractl,
436 struct file_ra_state *ra, bool hit_readahead_marker,
437 unsigned long req_size)
438{
439 struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
440 unsigned long max_pages = ra->ra_pages;
441 unsigned long add_pages;
442 unsigned long index = readahead_index(ractl);
443 pgoff_t prev_index;
444
445
446
447
448
449 if (req_size > max_pages && bdi->io_pages > max_pages)
450 max_pages = min(req_size, bdi->io_pages);
451
452
453
454
455 if (!index)
456 goto initial_readahead;
457
458
459
460
461
462 if ((index == (ra->start + ra->size - ra->async_size) ||
463 index == (ra->start + ra->size))) {
464 ra->start += ra->size;
465 ra->size = get_next_ra_size(ra, max_pages);
466 ra->async_size = ra->size;
467 goto readit;
468 }
469
470
471
472
473
474
475
476 if (hit_readahead_marker) {
477 pgoff_t start;
478
479 rcu_read_lock();
480 start = page_cache_next_miss(ractl->mapping, index + 1,
481 max_pages);
482 rcu_read_unlock();
483
484 if (!start || start - index > max_pages)
485 return;
486
487 ra->start = start;
488 ra->size = start - index;
489 ra->size += req_size;
490 ra->size = get_next_ra_size(ra, max_pages);
491 ra->async_size = ra->size;
492 goto readit;
493 }
494
495
496
497
498 if (req_size > max_pages)
499 goto initial_readahead;
500
501
502
503
504
505
506 prev_index = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
507 if (index - prev_index <= 1UL)
508 goto initial_readahead;
509
510
511
512
513
514 if (try_context_readahead(ractl->mapping, ra, index, req_size,
515 max_pages))
516 goto readit;
517
518
519
520
521
522 do_page_cache_ra(ractl, req_size, 0);
523 return;
524
525initial_readahead:
526 ra->start = index;
527 ra->size = get_init_ra_size(req_size, max_pages);
528 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
529
530readit:
531
532
533
534
535
536
537 if (index == ra->start && ra->size == ra->async_size) {
538 add_pages = get_next_ra_size(ra, max_pages);
539 if (ra->size + add_pages <= max_pages) {
540 ra->async_size = add_pages;
541 ra->size += add_pages;
542 } else {
543 ra->size = max_pages;
544 ra->async_size = max_pages >> 1;
545 }
546 }
547
548 ractl->_index = ra->start;
549 do_page_cache_ra(ractl, ra->size, ra->async_size);
550}
551
552void page_cache_sync_ra(struct readahead_control *ractl,
553 struct file_ra_state *ra, unsigned long req_count)
554{
555 bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
556
557
558
559
560
561
562
563 if (!ra->ra_pages || blk_cgroup_congested()) {
564 if (!ractl->file)
565 return;
566 req_count = 1;
567 do_forced_ra = true;
568 }
569
570
571 if (do_forced_ra) {
572 force_page_cache_ra(ractl, ra, req_count);
573 return;
574 }
575
576
577 ondemand_readahead(ractl, ra, false, req_count);
578}
579EXPORT_SYMBOL_GPL(page_cache_sync_ra);
580
581void page_cache_async_ra(struct readahead_control *ractl,
582 struct file_ra_state *ra, struct page *page,
583 unsigned long req_count)
584{
585
586 if (!ra->ra_pages)
587 return;
588
589
590
591
592 if (PageWriteback(page))
593 return;
594
595 ClearPageReadahead(page);
596
597
598
599
600 if (inode_read_congested(ractl->mapping->host))
601 return;
602
603 if (blk_cgroup_congested())
604 return;
605
606
607 ondemand_readahead(ractl, ra, true, req_count);
608}
609EXPORT_SYMBOL_GPL(page_cache_async_ra);
610
611ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
612{
613 ssize_t ret;
614 struct fd f;
615
616 ret = -EBADF;
617 f = fdget(fd);
618 if (!f.file || !(f.file->f_mode & FMODE_READ))
619 goto out;
620
621
622
623
624
625
626 ret = -EINVAL;
627 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
628 !S_ISREG(file_inode(f.file)->i_mode))
629 goto out;
630
631 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
632out:
633 fdput(f);
634 return ret;
635}
636
637SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
638{
639 return ksys_readahead(fd, offset, count);
640}
641