1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/dax.h>
13#include <linux/gfp.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22#include <linux/mm_inline.h>
23#include <linux/blk-cgroup.h>
24#include <linux/fadvise.h>
25
26#include "internal.h"
27
28
29
30
31
32void
33file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
34{
35 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
36 ra->prev_pos = -1;
37}
38EXPORT_SYMBOL_GPL(file_ra_state_init);
39
40
41
42
43
44
45
46
47static void read_cache_pages_invalidate_page(struct address_space *mapping,
48 struct page *page)
49{
50 if (page_has_private(page)) {
51 if (!trylock_page(page))
52 BUG();
53 page->mapping = mapping;
54 do_invalidatepage(page, 0, PAGE_SIZE);
55 page->mapping = NULL;
56 unlock_page(page);
57 }
58 put_page(page);
59}
60
61
62
63
64static void read_cache_pages_invalidate_pages(struct address_space *mapping,
65 struct list_head *pages)
66{
67 struct page *victim;
68
69 while (!list_empty(pages)) {
70 victim = lru_to_page(pages);
71 list_del(&victim->lru);
72 read_cache_pages_invalidate_page(mapping, victim);
73 }
74}
75
76
77
78
79
80
81
82
83
84
85
86
87
88int read_cache_pages(struct address_space *mapping, struct list_head *pages,
89 int (*filler)(void *, struct page *), void *data)
90{
91 struct page *page;
92 int ret = 0;
93
94 while (!list_empty(pages)) {
95 page = lru_to_page(pages);
96 list_del(&page->lru);
97 if (add_to_page_cache_lru(page, mapping, page->index,
98 readahead_gfp_mask(mapping))) {
99 read_cache_pages_invalidate_page(mapping, page);
100 continue;
101 }
102 put_page(page);
103
104 ret = filler(data, page);
105 if (unlikely(ret)) {
106 read_cache_pages_invalidate_pages(mapping, pages);
107 break;
108 }
109 task_io_account_read(PAGE_SIZE);
110 }
111 return ret;
112}
113
114EXPORT_SYMBOL(read_cache_pages);
115
116static int read_pages(struct address_space *mapping, struct file *filp,
117 struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
118{
119 struct blk_plug plug;
120 unsigned page_idx;
121 int ret;
122
123 blk_start_plug(&plug);
124
125 if (mapping->a_ops->readpages) {
126 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
127
128 put_pages_list(pages);
129 goto out;
130 }
131
132 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
133 struct page *page = lru_to_page(pages);
134 list_del(&page->lru);
135 if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
136 mapping->a_ops->readpage(filp, page);
137 put_page(page);
138 }
139 ret = 0;
140
141out:
142 blk_finish_plug(&plug);
143
144 return ret;
145}
146
147
148
149
150
151
152
153
154
155unsigned int __do_page_cache_readahead(struct address_space *mapping,
156 struct file *filp, pgoff_t offset, unsigned long nr_to_read,
157 unsigned long lookahead_size)
158{
159 struct inode *inode = mapping->host;
160 struct page *page;
161 unsigned long end_index;
162 LIST_HEAD(page_pool);
163 int page_idx;
164 unsigned int nr_pages = 0;
165 loff_t isize = i_size_read(inode);
166 gfp_t gfp_mask = readahead_gfp_mask(mapping);
167
168 if (isize == 0)
169 goto out;
170
171 end_index = ((isize - 1) >> PAGE_SHIFT);
172
173
174
175
176 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
177 pgoff_t page_offset = offset + page_idx;
178
179 if (page_offset > end_index)
180 break;
181
182 page = xa_load(&mapping->i_pages, page_offset);
183 if (page && !xa_is_value(page)) {
184
185
186
187
188
189 if (nr_pages)
190 read_pages(mapping, filp, &page_pool, nr_pages,
191 gfp_mask);
192 nr_pages = 0;
193 continue;
194 }
195
196 page = __page_cache_alloc(gfp_mask);
197 if (!page)
198 break;
199 page->index = page_offset;
200 list_add(&page->lru, &page_pool);
201 if (page_idx == nr_to_read - lookahead_size)
202 SetPageReadahead(page);
203 nr_pages++;
204 }
205
206
207
208
209
210
211 if (nr_pages)
212 read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask);
213 BUG_ON(!list_empty(&page_pool));
214out:
215 return nr_pages;
216}
217
218
219
220
221
222int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
223 pgoff_t offset, unsigned long nr_to_read)
224{
225 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
226 struct file_ra_state *ra = &filp->f_ra;
227 unsigned long max_pages;
228
229 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
230 return -EINVAL;
231
232
233
234
235
236 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
237 nr_to_read = min(nr_to_read, max_pages);
238 while (nr_to_read) {
239 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
240
241 if (this_chunk > nr_to_read)
242 this_chunk = nr_to_read;
243 __do_page_cache_readahead(mapping, filp, offset, this_chunk, 0);
244
245 offset += this_chunk;
246 nr_to_read -= this_chunk;
247 }
248 return 0;
249}
250
251
252
253
254
255
256
257static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
258{
259 unsigned long newsize = roundup_pow_of_two(size);
260
261 if (newsize <= max / 32)
262 newsize = newsize * 4;
263 else if (newsize <= max / 4)
264 newsize = newsize * 2;
265 else
266 newsize = max;
267
268 return newsize;
269}
270
271
272
273
274
275static unsigned long get_next_ra_size(struct file_ra_state *ra,
276 unsigned long max)
277{
278 unsigned long cur = ra->size;
279
280 if (cur < max / 16)
281 return 4 * cur;
282 if (cur <= max / 2)
283 return 2 * cur;
284 return max;
285}
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332static pgoff_t count_history_pages(struct address_space *mapping,
333 pgoff_t offset, unsigned long max)
334{
335 pgoff_t head;
336
337 rcu_read_lock();
338 head = page_cache_prev_miss(mapping, offset - 1, max);
339 rcu_read_unlock();
340
341 return offset - 1 - head;
342}
343
344
345
346
347static int try_context_readahead(struct address_space *mapping,
348 struct file_ra_state *ra,
349 pgoff_t offset,
350 unsigned long req_size,
351 unsigned long max)
352{
353 pgoff_t size;
354
355 size = count_history_pages(mapping, offset, max);
356
357
358
359
360
361 if (size <= req_size)
362 return 0;
363
364
365
366
367
368 if (size >= offset)
369 size *= 2;
370
371 ra->start = offset;
372 ra->size = min(size + req_size, max);
373 ra->async_size = 1;
374
375 return 1;
376}
377
378
379
380
381static unsigned long
382ondemand_readahead(struct address_space *mapping,
383 struct file_ra_state *ra, struct file *filp,
384 bool hit_readahead_marker, pgoff_t offset,
385 unsigned long req_size)
386{
387 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
388 unsigned long max_pages = ra->ra_pages;
389 unsigned long add_pages;
390 pgoff_t prev_offset;
391
392
393
394
395
396 if (req_size > max_pages && bdi->io_pages > max_pages)
397 max_pages = min(req_size, bdi->io_pages);
398
399
400
401
402 if (!offset)
403 goto initial_readahead;
404
405
406
407
408
409 if ((offset == (ra->start + ra->size - ra->async_size) ||
410 offset == (ra->start + ra->size))) {
411 ra->start += ra->size;
412 ra->size = get_next_ra_size(ra, max_pages);
413 ra->async_size = ra->size;
414 goto readit;
415 }
416
417
418
419
420
421
422
423 if (hit_readahead_marker) {
424 pgoff_t start;
425
426 rcu_read_lock();
427 start = page_cache_next_miss(mapping, offset + 1, max_pages);
428 rcu_read_unlock();
429
430 if (!start || start - offset > max_pages)
431 return 0;
432
433 ra->start = start;
434 ra->size = start - offset;
435 ra->size += req_size;
436 ra->size = get_next_ra_size(ra, max_pages);
437 ra->async_size = ra->size;
438 goto readit;
439 }
440
441
442
443
444 if (req_size > max_pages)
445 goto initial_readahead;
446
447
448
449
450
451
452 prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
453 if (offset - prev_offset <= 1UL)
454 goto initial_readahead;
455
456
457
458
459
460 if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
461 goto readit;
462
463
464
465
466
467 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
468
469initial_readahead:
470 ra->start = offset;
471 ra->size = get_init_ra_size(req_size, max_pages);
472 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
473
474readit:
475
476
477
478
479
480
481 if (offset == ra->start && ra->size == ra->async_size) {
482 add_pages = get_next_ra_size(ra, max_pages);
483 if (ra->size + add_pages <= max_pages) {
484 ra->async_size = add_pages;
485 ra->size += add_pages;
486 } else {
487 ra->size = max_pages;
488 ra->async_size = max_pages >> 1;
489 }
490 }
491
492 return ra_submit(ra, mapping, filp);
493}
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509void page_cache_sync_readahead(struct address_space *mapping,
510 struct file_ra_state *ra, struct file *filp,
511 pgoff_t offset, unsigned long req_size)
512{
513
514 if (!ra->ra_pages)
515 return;
516
517 if (blk_cgroup_congested())
518 return;
519
520
521 if (filp && (filp->f_mode & FMODE_RANDOM)) {
522 force_page_cache_readahead(mapping, filp, offset, req_size);
523 return;
524 }
525
526
527 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
528}
529EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546void
547page_cache_async_readahead(struct address_space *mapping,
548 struct file_ra_state *ra, struct file *filp,
549 struct page *page, pgoff_t offset,
550 unsigned long req_size)
551{
552
553 if (!ra->ra_pages)
554 return;
555
556
557
558
559 if (PageWriteback(page))
560 return;
561
562 ClearPageReadahead(page);
563
564
565
566
567 if (inode_read_congested(mapping->host))
568 return;
569
570 if (blk_cgroup_congested())
571 return;
572
573
574 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
575}
576EXPORT_SYMBOL_GPL(page_cache_async_readahead);
577
578ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
579{
580 ssize_t ret;
581 struct fd f;
582
583 ret = -EBADF;
584 f = fdget(fd);
585 if (!f.file || !(f.file->f_mode & FMODE_READ))
586 goto out;
587
588
589
590
591
592
593 ret = -EINVAL;
594 if (!f.file->f_mapping || !f.file->f_mapping->a_ops ||
595 !S_ISREG(file_inode(f.file)->i_mode))
596 goto out;
597
598 ret = vfs_fadvise(f.file, offset, count, POSIX_FADV_WILLNEED);
599out:
600 fdput(f);
601 return ret;
602}
603
604SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
605{
606 return ksys_readahead(fd, offset, count);
607}
608