1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/dax.h>
12#include <linux/gfp.h>
13#include <linux/export.h>
14#include <linux/blkdev.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18#include <linux/pagemap.h>
19#include <linux/syscalls.h>
20#include <linux/file.h>
21#include <linux/mm_inline.h>
22
23#include "internal.h"
24
25
26
27
28
29void
30file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
31{
32 ra->ra_pages = inode_to_bdi(mapping->host)->ra_pages;
33 ra->prev_pos = -1;
34}
35EXPORT_SYMBOL_GPL(file_ra_state_init);
36
37
38
39
40
41
42
43
44static void read_cache_pages_invalidate_page(struct address_space *mapping,
45 struct page *page)
46{
47 if (page_has_private(page)) {
48 if (!trylock_page(page))
49 BUG();
50 page->mapping = mapping;
51 do_invalidatepage(page, 0, PAGE_SIZE);
52 page->mapping = NULL;
53 unlock_page(page);
54 }
55 put_page(page);
56}
57
58
59
60
61static void read_cache_pages_invalidate_pages(struct address_space *mapping,
62 struct list_head *pages)
63{
64 struct page *victim;
65
66 while (!list_empty(pages)) {
67 victim = lru_to_page(pages);
68 list_del(&victim->lru);
69 read_cache_pages_invalidate_page(mapping, victim);
70 }
71}
72
73
74
75
76
77
78
79
80
81
82
83int read_cache_pages(struct address_space *mapping, struct list_head *pages,
84 int (*filler)(void *, struct page *), void *data)
85{
86 struct page *page;
87 int ret = 0;
88
89 while (!list_empty(pages)) {
90 page = lru_to_page(pages);
91 list_del(&page->lru);
92 if (add_to_page_cache_lru(page, mapping, page->index,
93 readahead_gfp_mask(mapping))) {
94 read_cache_pages_invalidate_page(mapping, page);
95 continue;
96 }
97 put_page(page);
98
99 ret = filler(data, page);
100 if (unlikely(ret)) {
101 read_cache_pages_invalidate_pages(mapping, pages);
102 break;
103 }
104 task_io_account_read(PAGE_SIZE);
105 }
106 return ret;
107}
108
109EXPORT_SYMBOL(read_cache_pages);
110
111static int read_pages(struct address_space *mapping, struct file *filp,
112 struct list_head *pages, unsigned int nr_pages, gfp_t gfp)
113{
114 struct blk_plug plug;
115 unsigned page_idx;
116 int ret;
117
118 blk_start_plug(&plug);
119
120 if (mapping->a_ops->readpages) {
121 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
122
123 put_pages_list(pages);
124 goto out;
125 }
126
127 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
128 struct page *page = lru_to_page(pages);
129 list_del(&page->lru);
130 if (!add_to_page_cache_lru(page, mapping, page->index, gfp))
131 mapping->a_ops->readpage(filp, page);
132 put_page(page);
133 }
134 ret = 0;
135
136out:
137 blk_finish_plug(&plug);
138
139 return ret;
140}
141
142
143
144
145
146
147
148
149
150int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
151 pgoff_t offset, unsigned long nr_to_read,
152 unsigned long lookahead_size)
153{
154 struct inode *inode = mapping->host;
155 struct page *page;
156 unsigned long end_index;
157 LIST_HEAD(page_pool);
158 int page_idx;
159 int ret = 0;
160 loff_t isize = i_size_read(inode);
161 gfp_t gfp_mask = readahead_gfp_mask(mapping);
162
163 if (isize == 0)
164 goto out;
165
166 end_index = ((isize - 1) >> PAGE_SHIFT);
167
168
169
170
171 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
172 pgoff_t page_offset = offset + page_idx;
173
174 if (page_offset > end_index)
175 break;
176
177 rcu_read_lock();
178 page = radix_tree_lookup(&mapping->page_tree, page_offset);
179 rcu_read_unlock();
180 if (page && !radix_tree_exceptional_entry(page))
181 continue;
182
183 page = __page_cache_alloc(gfp_mask);
184 if (!page)
185 break;
186 page->index = page_offset;
187 list_add(&page->lru, &page_pool);
188 if (page_idx == nr_to_read - lookahead_size)
189 SetPageReadahead(page);
190 ret++;
191 }
192
193
194
195
196
197
198 if (ret)
199 read_pages(mapping, filp, &page_pool, ret, gfp_mask);
200 BUG_ON(!list_empty(&page_pool));
201out:
202 return ret;
203}
204
205
206
207
208
209int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
210 pgoff_t offset, unsigned long nr_to_read)
211{
212 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
213 struct file_ra_state *ra = &filp->f_ra;
214 unsigned long max_pages;
215
216 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
217 return -EINVAL;
218
219
220
221
222
223 max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
224 nr_to_read = min(nr_to_read, max_pages);
225 while (nr_to_read) {
226 int err;
227
228 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
229
230 if (this_chunk > nr_to_read)
231 this_chunk = nr_to_read;
232 err = __do_page_cache_readahead(mapping, filp,
233 offset, this_chunk, 0);
234 if (err < 0)
235 return err;
236
237 offset += this_chunk;
238 nr_to_read -= this_chunk;
239 }
240 return 0;
241}
242
243
244
245
246
247
248
249static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
250{
251 unsigned long newsize = roundup_pow_of_two(size);
252
253 if (newsize <= max / 32)
254 newsize = newsize * 4;
255 else if (newsize <= max / 4)
256 newsize = newsize * 2;
257 else
258 newsize = max;
259
260 return newsize;
261}
262
263
264
265
266
267static unsigned long get_next_ra_size(struct file_ra_state *ra,
268 unsigned long max)
269{
270 unsigned long cur = ra->size;
271 unsigned long newsize;
272
273 if (cur < max / 16)
274 newsize = 4 * cur;
275 else
276 newsize = 2 * cur;
277
278 return min(newsize, max);
279}
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326static pgoff_t count_history_pages(struct address_space *mapping,
327 pgoff_t offset, unsigned long max)
328{
329 pgoff_t head;
330
331 rcu_read_lock();
332 head = page_cache_prev_hole(mapping, offset - 1, max);
333 rcu_read_unlock();
334
335 return offset - 1 - head;
336}
337
338
339
340
341static int try_context_readahead(struct address_space *mapping,
342 struct file_ra_state *ra,
343 pgoff_t offset,
344 unsigned long req_size,
345 unsigned long max)
346{
347 pgoff_t size;
348
349 size = count_history_pages(mapping, offset, max);
350
351
352
353
354
355 if (size <= req_size)
356 return 0;
357
358
359
360
361
362 if (size >= offset)
363 size *= 2;
364
365 ra->start = offset;
366 ra->size = min(size + req_size, max);
367 ra->async_size = 1;
368
369 return 1;
370}
371
372
373
374
375static unsigned long
376ondemand_readahead(struct address_space *mapping,
377 struct file_ra_state *ra, struct file *filp,
378 bool hit_readahead_marker, pgoff_t offset,
379 unsigned long req_size)
380{
381 struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
382 unsigned long max_pages = ra->ra_pages;
383 pgoff_t prev_offset;
384
385
386
387
388
389 if (req_size > max_pages && bdi->io_pages > max_pages)
390 max_pages = min(req_size, bdi->io_pages);
391
392
393
394
395 if (!offset)
396 goto initial_readahead;
397
398
399
400
401
402 if ((offset == (ra->start + ra->size - ra->async_size) ||
403 offset == (ra->start + ra->size))) {
404 ra->start += ra->size;
405 ra->size = get_next_ra_size(ra, max_pages);
406 ra->async_size = ra->size;
407 goto readit;
408 }
409
410
411
412
413
414
415
416 if (hit_readahead_marker) {
417 pgoff_t start;
418
419 rcu_read_lock();
420 start = page_cache_next_hole(mapping, offset + 1, max_pages);
421 rcu_read_unlock();
422
423 if (!start || start - offset > max_pages)
424 return 0;
425
426 ra->start = start;
427 ra->size = start - offset;
428 ra->size += req_size;
429 ra->size = get_next_ra_size(ra, max_pages);
430 ra->async_size = ra->size;
431 goto readit;
432 }
433
434
435
436
437 if (req_size > max_pages)
438 goto initial_readahead;
439
440
441
442
443
444
445 prev_offset = (unsigned long long)ra->prev_pos >> PAGE_SHIFT;
446 if (offset - prev_offset <= 1UL)
447 goto initial_readahead;
448
449
450
451
452
453 if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
454 goto readit;
455
456
457
458
459
460 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
461
462initial_readahead:
463 ra->start = offset;
464 ra->size = get_init_ra_size(req_size, max_pages);
465 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
466
467readit:
468
469
470
471
472
473 if (offset == ra->start && ra->size == ra->async_size) {
474 ra->async_size = get_next_ra_size(ra, max_pages);
475 ra->size += ra->async_size;
476 }
477
478 return ra_submit(ra, mapping, filp);
479}
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495void page_cache_sync_readahead(struct address_space *mapping,
496 struct file_ra_state *ra, struct file *filp,
497 pgoff_t offset, unsigned long req_size)
498{
499
500 if (!ra->ra_pages)
501 return;
502
503
504 if (filp && (filp->f_mode & FMODE_RANDOM)) {
505 force_page_cache_readahead(mapping, filp, offset, req_size);
506 return;
507 }
508
509
510 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
511}
512EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529void
530page_cache_async_readahead(struct address_space *mapping,
531 struct file_ra_state *ra, struct file *filp,
532 struct page *page, pgoff_t offset,
533 unsigned long req_size)
534{
535
536 if (!ra->ra_pages)
537 return;
538
539
540
541
542 if (PageWriteback(page))
543 return;
544
545 ClearPageReadahead(page);
546
547
548
549
550 if (inode_read_congested(mapping->host))
551 return;
552
553
554 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
555}
556EXPORT_SYMBOL_GPL(page_cache_async_readahead);
557
558static ssize_t
559do_readahead(struct address_space *mapping, struct file *filp,
560 pgoff_t index, unsigned long nr)
561{
562 if (!mapping || !mapping->a_ops)
563 return -EINVAL;
564
565
566
567
568
569
570 if (dax_mapping(mapping))
571 return 0;
572
573 return force_page_cache_readahead(mapping, filp, index, nr);
574}
575
576SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
577{
578 ssize_t ret;
579 struct fd f;
580
581 ret = -EBADF;
582 f = fdget(fd);
583 if (f.file) {
584 if (f.file->f_mode & FMODE_READ) {
585 struct address_space *mapping = f.file->f_mapping;
586 pgoff_t start = offset >> PAGE_SHIFT;
587 pgoff_t end = (offset + count - 1) >> PAGE_SHIFT;
588 unsigned long len = end - start + 1;
589 ret = do_readahead(mapping, f.file, start, len);
590 }
591 fdput(f);
592 }
593 return ret;
594}
595