1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/dax.h>
12#include <linux/gfp.h>
13#include <linux/export.h>
14#include <linux/blkdev.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18#include <linux/pagemap.h>
19#include <linux/syscalls.h>
20#include <linux/file.h>
21#include <linux/mm_inline.h>
22#include <linux/blk-cgroup.h>
23#include <linux/fadvise.h>
24
25#include "internal.h"
26
27
28
29
30
31void
32file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
33{
34 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
35 ra->prev_pos = -1;
36}
37EXPORT_SYMBOL_GPL(file_ra_state_init);
38
39
40
41
42
43
44
45
46static void read_cache_pages_invalidate_page(struct address_space *mapping,
47 struct page *page)
48{
49 if (page_has_private(page)) {
50 if (!trylock_page(page))
51 BUG();
52 page->mapping = mapping;
53 do_invalidatepage(page, 0, PAGE_SIZE);
54 page->mapping = NULL;
55 unlock_page(page);
56 }
57 put_page(page);
58}
59
60
61
62
63static void read_cache_pages_invalidate_pages(struct address_space *mapping,
64 struct list_head *pages)
65{
66 struct page *victim;
67
68 while (!list_empty(pages)) {
69 victim = lru_to_page(pages);
70 list_del(&victim->lru);
71 read_cache_pages_invalidate_page(mapping, victim);
72 }
73}
74
75
76
77
78
79
80
81
82
83
84
85
86
87int read_cache_pages(struct address_space *mapping, struct list_head *pages,
88 int (*filler)(void *, struct page *), void *data)
89{
90 struct page *page;
91 int ret = 0;
92
93 while (!list_empty(pages)) {
94 page = lru_to_page(pages);
95 list_del(&page->lru);
96 if (add_to_page_cache_lru(page, mapping, page->index,
97 readahead_gfp_mask(mapping))) {
98 read_cache_pages_invalidate_page(mapping, page);
99 continue;
100 }
101 put_page(page);
102
103 ret = filler(data, page);
104 if (unlikely(ret)) {
105 read_cache_pages_invalidate_pages(mapping, pages);
106 break;
107 }
108 task_io_account_read(PAGE_SIZE);
109 }
110 return ret;
111}
112
113EXPORT_SYMBOL(read_cache_pages);
114
115static int read_pages(struct address_space *mapping, struct file *filp,
116 struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
117{
118 struct blk_plug plug;
119 unsigned page_idx;
120 int ret;
121
122 blk_start_plug(&plug);
123
124 if (mapping->a_ops->readpages) {
125 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
126
127 put_pages_list(pages);
128 goto out;
129 }
130
131 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
132 struct page *page = lru_to_page(pages);
133 list_del(&page->lru);
134 if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
135 mapping->a_ops->readpage(filp, page);
136 put_page(page);
137 }
138 ret = 0;
139
140out:
141 blk_finish_plug(&plug);
142
143 return ret;
144}
145
146
147
148
149
150
151
152
153
154unsigned int __do_page_cache_readahead(struct address_space *mapping,
155 struct file *filp, pgoff_t offset, unsigned long nr_to_read,
156 unsigned long lookahead_size)
157{
158 struct inode *inode = mapping->host;
159 struct page *page;
160 unsigned long end_index;
161 LIST_HEAD(page_pool);
162 int page_idx;
163 unsigned int nr_pages = 0;
164 loff_t isize = i_size_read(inode);
165 gfp_t gfp_mask = readahead_gfp_mask(mapping);
166
167 if (isize == 0)
168 goto out;
169
170 end_index = ((isize - 1) >> PAGE_SHIFT);
171
172
173
174
175 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
176 pgoff_t page_offset = offset + page_idx;
177
178 if (page_offset > end_index)
179 break;
180
181 page = xa_load(&mapping->i_pages, page_offset);
182 if (page && !xa_is_value(page)) {
183
184
185
186
187
188 if (nr_pages)
189 read_pages(mapping, filp, &page_pool, nr_pages,
190 gfp_mask);
191 nr_pages = 0;
192 continue;
193 }
194
195 page = __page_cache_alloc(gfp_mask);
196 if (!page)
197 break;
198 page->index = page_offset;
199 list_add(&page->lru, &page_pool);
200 if (page_idx == nr_to_read - lookahead_size)
201 SetPageReadahead(page);
202 nr_pages++;
203 }
204
205
206
207
208
209
210 if (nr_pages)
211 read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask);
212 BUG_ON(!list_empty(&page_pool));
213out:
214 return nr_pages;
215}
216
217
218
219
220
221int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
222 pgoff_t offset, unsigned long nr_to_read)
223{
224 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
225 struct file_ra_state *ra = &filp->f_ra;
226 unsigned long max_pages;
227
228 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
229 return -EINVAL;
230
231
232
233
234
235 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
236 nr_to_read = min(nr_to_read, max_pages);
237 while (nr_to_read) {
238 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
239
240 if (this_chunk > nr_to_read)
241 this_chunk = nr_to_read;
242 __do_page_cache_readahead(mapping, filp, offset, this_chunk, 0);
243
244 offset += this_chunk;
245 nr_to_read -= this_chunk;
246 }
247 return 0;
248}
249
250
251
252
253
254
255
256static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
257{
258 unsigned long newsize = roundup_pow_of_two(size);
259
260 if (newsize <= max / 32)
261 newsize = newsize * 4;
262 else if (newsize <= max / 4)
263 newsize = newsize * 2;
264 else
265 newsize = max;
266
267 return newsize;
268}
269
270
271
272
273
274static unsigned long get_next_ra_size(struct file_ra_state *ra,
275 unsigned long max)
276{
277 unsigned long cur = ra->size;
278 unsigned long newsize;
279
280 if (cur < max / 16)
281 newsize = 4 * cur;
282 else
283 newsize = 2 * cur;
284
285 return min(newsize, max);
286}
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333static pgoff_t count_history_pages(struct address_space *mapping,
334 pgoff_t offset, unsigned long max)
335{
336 pgoff_t head;
337
338 rcu_read_lock();
339 head = page_cache_prev_miss(mapping, offset - 1, max);
340 rcu_read_unlock();
341
342 return offset - 1 - head;
343}
344
345
346
347
348static int try_context_readahead(struct address_space *mapping,
349 struct file_ra_state *ra,
350 pgoff_t offset,
351 unsigned long req_size,
352 unsigned long max)
353{
354 pgoff_t size;
355
356 size = count_history_pages(mapping, offset, max);
357
358
359
360
361
362 if (size <= req_size)
363 return 0;
364
365
366
367
368
369 if (size >= offset)
370 size *= 2;
371
372 ra->start = offset;
373 ra->size = min(size + req_size, max);
374 ra->async_size = 1;
375
376 return 1;
377}
378
379
380
381
382static unsigned long
383ondemand_readahead(struct address_space *mapping,
384 struct file_ra_state *ra, struct file *filp,
385 bool hit_readahead_marker, pgoff_t offset,
386 unsigned long req_size)
387{
388 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
389 unsigned long max_pages = ra->ra_pages;
390 unsigned long add_pages;
391 pgoff_t prev_offset;
392
393
394
395
396
397 if (req_size > max_pages && bdi->io_pages > max_pages)
398 max_pages = min(req_size, bdi->io_pages);
399
400
401
402
403 if (!offset)
404 goto initial_readahead;
405
406
407
408
409
410 if ((offset == (ra->start + ra->size - ra->async_size) ||
411 offset == (ra->start + ra->size))) {
412 ra->start += ra->size;
413 ra->size = get_next_ra_size(ra, max_pages);
414 ra->async_size = ra->size;
415 goto readit;
416 }
417
418
419
420
421
422
423
424 if (hit_readahead_marker) {
425 pgoff_t start;
426
427 rcu_read_lock();
428 start = page_cache_next_miss(mapping, offset + 1, max_pages);
429 rcu_read_unlock();
430
431 if (!start || start - offset > max_pages)
432 return 0;
433
434 ra->start = start;
435 ra->size = start - offset;
436 ra->size += req_size;
437 ra->size = get_next_ra_size(ra, max_pages);
438 ra->async_size = ra->size;
439 goto readit;
440 }
441
442
443
444
445 if (req_size > max_pages)
446 goto initial_readahead;
447
448
449
450
451
452
453 prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
454 if (offset - prev_offset <= 1UL)
455 goto initial_readahead;
456
457
458
459
460
461 if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
462 goto readit;
463
464
465
466
467
468 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
469
470initial_readahead:
471 ra->start = offset;
472 ra->size = get_init_ra_size(req_size, max_pages);
473 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
474
475readit:
476
477
478
479
480
481
482 if (offset == ra->start && ra->size == ra->async_size) {
483 add_pages = get_next_ra_size(ra, max_pages);
484 if (ra->size + add_pages <= max_pages) {
485 ra->async_size = add_pages;
486 ra->size += add_pages;
487 } else {
488 ra->size = max_pages;
489 ra->async_size = max_pages >> 1;
490 }
491 }
492
493 return ra_submit(ra, mapping, filp);
494}
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510void page_cache_sync_readahead(struct address_space *mapping,
511 struct file_ra_state *ra, struct file *filp,
512 pgoff_t offset, unsigned long req_size)
513{
514
515 if (!ra->ra_pages)
516 return;
517
518 if (blk_cgroup_congested())
519 return;
520
521
522 if (filp && (filp->f_mode & FMODE_RANDOM)) {
523 force_page_cache_readahead(mapping, filp, offset, req_size);
524 return;
525 }
526
527
528 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
529}
530EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547void
548page_cache_async_readahead(struct address_space *mapping,
549 struct file_ra_state *ra, struct file *filp,
550 struct page *page, pgoff_t offset,
551 unsigned long req_size)
552{
553
554 if (!ra->ra_pages)
555 return;
556
557
558
559
560 if (PageWriteback(page))
561 return;
562
563 ClearPageReadahead(page);
564
565
566
567
568 if (inode_read_congested(mapping->host))
569 return;
570
571 if (blk_cgroup_congested())
572 return;
573
574
575 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
576}
577EXPORT_SYMBOL_GPL(page_cache_async_readahead);
578
579ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
580{
581 ssize_t ret;
582 struct fd f;
583
584 ret = -EBADF;
585 f = fdget(fd);
586 if (!f.file || !(f.file->f_mode & FMODE_READ))
587 goto out;
588
589
590
591
592
593
594 ret = -EINVAL;
595 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
596 !S_ISREG(file_inode(f.file)->i_mode))
597 goto out;
598
599 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
600out:
601 fdput(f);
602 return ret;
603}
604
605SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
606{
607 return ksys_readahead(fd, offset, count);
608}
609