1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/dax.h>
12#include <linux/gfp.h>
13#include <linux/export.h>
14#include <linux/blkdev.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18#include <linux/pagemap.h>
19#include <linux/syscalls.h>
20#include <linux/file.h>
21#include <linux/mm_inline.h>
22#include <linux/blk-cgroup.h>
23#include <linux/fadvise.h>
24
25#include "internal.h"
26
27
28
29
30
31void
32file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
33{
34 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
35 ra->prev_pos = -1;
36}
37EXPORT_SYMBOL_GPL(file_ra_state_init);
38
39
40
41
42
43
44
45
46static void read_cache_pages_invalidate_page(struct address_space *mapping,
47 struct page *page)
48{
49 if (page_has_private(page)) {
50 if (!trylock_page(page))
51 BUG();
52 page->mapping = mapping;
53 do_invalidatepage(page, 0, PAGE_SIZE);
54 page->mapping = NULL;
55 unlock_page(page);
56 }
57 put_page(page);
58}
59
60
61
62
63static void read_cache_pages_invalidate_pages(struct address_space *mapping,
64 struct list_head *pages)
65{
66 struct page *victim;
67
68 while (!list_empty(pages)) {
69 victim = lru_to_page(pages);
70 list_del(&victim->lru);
71 read_cache_pages_invalidate_page(mapping, victim);
72 }
73}
74
75
76
77
78
79
80
81
82
83
84
85int read_cache_pages(struct address_space *mapping, struct list_head *pages,
86 int (*filler)(void *, struct page *), void *data)
87{
88 struct page *page;
89 int ret = 0;
90
91 while (!list_empty(pages)) {
92 page = lru_to_page(pages);
93 list_del(&page->lru);
94 if (add_to_page_cache_lru(page, mapping, page->index,
95 readahead_gfp_mask(mapping))) {
96 read_cache_pages_invalidate_page(mapping, page);
97 continue;
98 }
99 put_page(page);
100
101 ret = filler(data, page);
102 if (unlikely(ret)) {
103 read_cache_pages_invalidate_pages(mapping, pages);
104 break;
105 }
106 task_io_account_read(PAGE_SIZE);
107 }
108 return ret;
109}
110
111EXPORT_SYMBOL(read_cache_pages);
112
113static int read_pages(struct address_space *mapping, struct file *filp,
114 struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
115{
116 struct blk_plug plug;
117 unsigned page_idx;
118 int ret;
119
120 blk_start_plug(&plug);
121
122 if (mapping->a_ops->readpages) {
123 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
124
125 put_pages_list(pages);
126 goto out;
127 }
128
129 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
130 struct page *page = lru_to_page(pages);
131 list_del(&page->lru);
132 if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
133 mapping->a_ops->readpage(filp, page);
134 put_page(page);
135 }
136 ret = 0;
137
138out:
139 blk_finish_plug(&plug);
140
141 return ret;
142}
143
144
145
146
147
148
149
150
151
152unsigned int __do_page_cache_readahead(struct address_space *mapping,
153 struct file *filp, pgoff_t offset, unsigned long nr_to_read,
154 unsigned long lookahead_size)
155{
156 struct inode *inode = mapping->host;
157 struct page *page;
158 unsigned long end_index;
159 LIST_HEAD(page_pool);
160 int page_idx;
161 unsigned int nr_pages = 0;
162 loff_t isize = i_size_read(inode);
163 gfp_t gfp_mask = readahead_gfp_mask(mapping);
164
165 if (isize == 0)
166 goto out;
167
168 end_index = ((isize - 1) >> PAGE_SHIFT);
169
170
171
172
173 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
174 pgoff_t page_offset = offset + page_idx;
175
176 if (page_offset > end_index)
177 break;
178
179 page = xa_load(&mapping->i_pages, page_offset);
180 if (page && !xa_is_value(page)) {
181
182
183
184
185
186 if (nr_pages)
187 read_pages(mapping, filp, &page_pool, nr_pages,
188 gfp_mask);
189 nr_pages = 0;
190 continue;
191 }
192
193 page = __page_cache_alloc(gfp_mask);
194 if (!page)
195 break;
196 page->index = page_offset;
197 list_add(&page->lru, &page_pool);
198 if (page_idx == nr_to_read - lookahead_size)
199 SetPageReadahead(page);
200 nr_pages++;
201 }
202
203
204
205
206
207
208 if (nr_pages)
209 read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask);
210 BUG_ON(!list_empty(&page_pool));
211out:
212 return nr_pages;
213}
214
215
216
217
218
219int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
220 pgoff_t offset, unsigned long nr_to_read)
221{
222 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
223 struct file_ra_state *ra = &filp->f_ra;
224 unsigned long max_pages;
225
226 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
227 return -EINVAL;
228
229
230
231
232
233 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
234 nr_to_read = min(nr_to_read, max_pages);
235 while (nr_to_read) {
236 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
237
238 if (this_chunk > nr_to_read)
239 this_chunk = nr_to_read;
240 __do_page_cache_readahead(mapping, filp, offset, this_chunk, 0);
241
242 offset += this_chunk;
243 nr_to_read -= this_chunk;
244 }
245 return 0;
246}
247
248
249
250
251
252
253
254static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
255{
256 unsigned long newsize = roundup_pow_of_two(size);
257
258 if (newsize <= max / 32)
259 newsize = newsize * 4;
260 else if (newsize <= max / 4)
261 newsize = newsize * 2;
262 else
263 newsize = max;
264
265 return newsize;
266}
267
268
269
270
271
272static unsigned long get_next_ra_size(struct file_ra_state *ra,
273 unsigned long max)
274{
275 unsigned long cur = ra->size;
276 unsigned long newsize;
277
278 if (cur < max / 16)
279 newsize = 4 * cur;
280 else
281 newsize = 2 * cur;
282
283 return min(newsize, max);
284}
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331static pgoff_t count_history_pages(struct address_space *mapping,
332 pgoff_t offset, unsigned long max)
333{
334 pgoff_t head;
335
336 rcu_read_lock();
337 head = page_cache_prev_miss(mapping, offset - 1, max);
338 rcu_read_unlock();
339
340 return offset - 1 - head;
341}
342
343
344
345
346static int try_context_readahead(struct address_space *mapping,
347 struct file_ra_state *ra,
348 pgoff_t offset,
349 unsigned long req_size,
350 unsigned long max)
351{
352 pgoff_t size;
353
354 size = count_history_pages(mapping, offset, max);
355
356
357
358
359
360 if (size <= req_size)
361 return 0;
362
363
364
365
366
367 if (size >= offset)
368 size *= 2;
369
370 ra->start = offset;
371 ra->size = min(size + req_size, max);
372 ra->async_size = 1;
373
374 return 1;
375}
376
377
378
379
380static unsigned long
381ondemand_readahead(struct address_space *mapping,
382 struct file_ra_state *ra, struct file *filp,
383 bool hit_readahead_marker, pgoff_t offset,
384 unsigned long req_size)
385{
386 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
387 unsigned long max_pages = ra->ra_pages;
388 unsigned long add_pages;
389 pgoff_t prev_offset;
390
391
392
393
394
395 if (req_size > max_pages && bdi->io_pages > max_pages)
396 max_pages = min(req_size, bdi->io_pages);
397
398
399
400
401 if (!offset)
402 goto initial_readahead;
403
404
405
406
407
408 if ((offset == (ra->start + ra->size - ra->async_size) ||
409 offset == (ra->start + ra->size))) {
410 ra->start += ra->size;
411 ra->size = get_next_ra_size(ra, max_pages);
412 ra->async_size = ra->size;
413 goto readit;
414 }
415
416
417
418
419
420
421
422 if (hit_readahead_marker) {
423 pgoff_t start;
424
425 rcu_read_lock();
426 start = page_cache_next_miss(mapping, offset + 1, max_pages);
427 rcu_read_unlock();
428
429 if (!start || start - offset > max_pages)
430 return 0;
431
432 ra->start = start;
433 ra->size = start - offset;
434 ra->size += req_size;
435 ra->size = get_next_ra_size(ra, max_pages);
436 ra->async_size = ra->size;
437 goto readit;
438 }
439
440
441
442
443 if (req_size > max_pages)
444 goto initial_readahead;
445
446
447
448
449
450
451 prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
452 if (offset - prev_offset <= 1UL)
453 goto initial_readahead;
454
455
456
457
458
459 if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
460 goto readit;
461
462
463
464
465
466 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
467
468initial_readahead:
469 ra->start = offset;
470 ra->size = get_init_ra_size(req_size, max_pages);
471 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
472
473readit:
474
475
476
477
478
479
480 if (offset == ra->start && ra->size == ra->async_size) {
481 add_pages = get_next_ra_size(ra, max_pages);
482 if (ra->size + add_pages <= max_pages) {
483 ra->async_size = add_pages;
484 ra->size += add_pages;
485 } else {
486 ra->size = max_pages;
487 ra->async_size = max_pages >> 1;
488 }
489 }
490
491 return ra_submit(ra, mapping, filp);
492}
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508void page_cache_sync_readahead(struct address_space *mapping,
509 struct file_ra_state *ra, struct file *filp,
510 pgoff_t offset, unsigned long req_size)
511{
512
513 if (!ra->ra_pages)
514 return;
515
516 if (blk_cgroup_congested())
517 return;
518
519
520 if (filp && (filp->f_mode & FMODE_RANDOM)) {
521 force_page_cache_readahead(mapping, filp, offset, req_size);
522 return;
523 }
524
525
526 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
527}
528EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545void
546page_cache_async_readahead(struct address_space *mapping,
547 struct file_ra_state *ra, struct file *filp,
548 struct page *page, pgoff_t offset,
549 unsigned long req_size)
550{
551
552 if (!ra->ra_pages)
553 return;
554
555
556
557
558 if (PageWriteback(page))
559 return;
560
561 ClearPageReadahead(page);
562
563
564
565
566 if (inode_read_congested(mapping->host))
567 return;
568
569 if (blk_cgroup_congested())
570 return;
571
572
573 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
574}
575EXPORT_SYMBOL_GPL(page_cache_async_readahead);
576
577ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
578{
579 ssize_t ret;
580 struct fd f;
581
582 ret = -EBADF;
583 f = fdget(fd);
584 if (!f.file || !(f.file->f_mode & FMODE_READ))
585 goto out;
586
587
588
589
590
591
592 ret = -EINVAL;
593 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
594 !S_ISREG(file_inode(f.file)->i_mode))
595 goto out;
596
597 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
598out:
599 fdput(f);
600 return ret;
601}
602
603SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
604{
605 return ksys_readahead(fd, offset, count);
606}
607