1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/gfp.h>
13#include <linux/mm.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22
23
24
25
26
27void
28file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
29{
30 ra->ra_pages = mapping->backing_dev_info->ra_pages;
31 ra->prev_pos = -1;
32}
33EXPORT_SYMBOL_GPL(file_ra_state_init);
34
35#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
36
37
38
39
40
41
42
43
44static void read_cache_pages_invalidate_page(struct address_space *mapping,
45 struct page *page)
46{
47 if (page_has_private(page)) {
48 if (!trylock_page(page))
49 BUG();
50 page->mapping = mapping;
51 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
52 page->mapping = NULL;
53 unlock_page(page);
54 }
55 page_cache_release(page);
56}
57
58
59
60
61static void read_cache_pages_invalidate_pages(struct address_space *mapping,
62 struct list_head *pages)
63{
64 struct page *victim;
65
66 while (!list_empty(pages)) {
67 victim = list_to_page(pages);
68 list_del(&victim->lru);
69 read_cache_pages_invalidate_page(mapping, victim);
70 }
71}
72
73
74
75
76
77
78
79
80
81
82
83int read_cache_pages(struct address_space *mapping, struct list_head *pages,
84 int (*filler)(void *, struct page *), void *data)
85{
86 struct page *page;
87 int ret = 0;
88
89 while (!list_empty(pages)) {
90 page = list_to_page(pages);
91 list_del(&page->lru);
92 if (add_to_page_cache_lru(page, mapping,
93 page->index, GFP_KERNEL)) {
94 read_cache_pages_invalidate_page(mapping, page);
95 continue;
96 }
97 page_cache_release(page);
98
99 ret = filler(data, page);
100 if (unlikely(ret)) {
101 read_cache_pages_invalidate_pages(mapping, pages);
102 break;
103 }
104 task_io_account_read(PAGE_CACHE_SIZE);
105 }
106 return ret;
107}
108
109EXPORT_SYMBOL(read_cache_pages);
110
111static int read_pages(struct address_space *mapping, struct file *filp,
112 struct list_head *pages, unsigned nr_pages)
113{
114 struct blk_plug plug;
115 unsigned page_idx;
116 int ret;
117
118 blk_start_plug(&plug);
119
120 if (mapping->a_ops->readpages) {
121 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
122
123 put_pages_list(pages);
124 goto out;
125 }
126
127 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
128 struct page *page = list_to_page(pages);
129 list_del(&page->lru);
130 if (!add_to_page_cache_lru(page, mapping,
131 page->index, GFP_KERNEL)) {
132 mapping->a_ops->readpage(filp, page);
133 }
134 page_cache_release(page);
135 }
136 ret = 0;
137
138out:
139 blk_finish_plug(&plug);
140
141 return ret;
142}
143
144
145
146
147
148
149
150
151
152static int
153__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
154 pgoff_t offset, unsigned long nr_to_read,
155 unsigned long lookahead_size)
156{
157 struct inode *inode = mapping->host;
158 struct page *page;
159 unsigned long end_index;
160 LIST_HEAD(page_pool);
161 int page_idx;
162 int ret = 0;
163 loff_t isize = i_size_read(inode);
164
165 if (isize == 0)
166 goto out;
167
168 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
169
170
171
172
173 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
174 pgoff_t page_offset = offset + page_idx;
175
176 if (page_offset > end_index)
177 break;
178
179 rcu_read_lock();
180 page = radix_tree_lookup(&mapping->page_tree, page_offset);
181 rcu_read_unlock();
182 if (page)
183 continue;
184
185 page = page_cache_alloc_readahead(mapping);
186 if (!page)
187 break;
188 page->index = page_offset;
189 list_add(&page->lru, &page_pool);
190 if (page_idx == nr_to_read - lookahead_size)
191 SetPageReadahead(page);
192 ret++;
193 }
194
195
196
197
198
199
200 if (ret)
201 read_pages(mapping, filp, &page_pool, ret);
202 BUG_ON(!list_empty(&page_pool));
203out:
204 return ret;
205}
206
207
208
209
210
211int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
212 pgoff_t offset, unsigned long nr_to_read)
213{
214 int ret = 0;
215
216 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
217 return -EINVAL;
218
219 nr_to_read = max_sane_readahead(nr_to_read);
220 while (nr_to_read) {
221 int err;
222
223 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
224
225 if (this_chunk > nr_to_read)
226 this_chunk = nr_to_read;
227 err = __do_page_cache_readahead(mapping, filp,
228 offset, this_chunk, 0);
229 if (err < 0) {
230 ret = err;
231 break;
232 }
233 ret += err;
234 offset += this_chunk;
235 nr_to_read -= this_chunk;
236 }
237 return ret;
238}
239
240
241
242
243
244unsigned long max_sane_readahead(unsigned long nr)
245{
246 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
247 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
248}
249
250
251
252
253unsigned long ra_submit(struct file_ra_state *ra,
254 struct address_space *mapping, struct file *filp)
255{
256 int actual;
257
258 actual = __do_page_cache_readahead(mapping, filp,
259 ra->start, ra->size, ra->async_size);
260
261 return actual;
262}
263
264
265
266
267
268
269
270static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
271{
272 unsigned long newsize = roundup_pow_of_two(size);
273
274 if (newsize <= max / 32)
275 newsize = newsize * 4;
276 else if (newsize <= max / 4)
277 newsize = newsize * 2;
278 else
279 newsize = max;
280
281 return newsize;
282}
283
284
285
286
287
288static unsigned long get_next_ra_size(struct file_ra_state *ra,
289 unsigned long max)
290{
291 unsigned long cur = ra->size;
292 unsigned long newsize;
293
294 if (cur < max / 16)
295 newsize = 4 * cur;
296 else
297 newsize = 2 * cur;
298
299 return min(newsize, max);
300}
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347static pgoff_t count_history_pages(struct address_space *mapping,
348 struct file_ra_state *ra,
349 pgoff_t offset, unsigned long max)
350{
351 pgoff_t head;
352
353 rcu_read_lock();
354 head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
355 rcu_read_unlock();
356
357 return offset - 1 - head;
358}
359
360
361
362
363static int try_context_readahead(struct address_space *mapping,
364 struct file_ra_state *ra,
365 pgoff_t offset,
366 unsigned long req_size,
367 unsigned long max)
368{
369 pgoff_t size;
370
371 size = count_history_pages(mapping, ra, offset, max);
372
373
374
375
376
377 if (size <= req_size)
378 return 0;
379
380
381
382
383
384 if (size >= offset)
385 size *= 2;
386
387 ra->start = offset;
388 ra->size = min(size + req_size, max);
389 ra->async_size = 1;
390
391 return 1;
392}
393
394
395
396
397static unsigned long
398ondemand_readahead(struct address_space *mapping,
399 struct file_ra_state *ra, struct file *filp,
400 bool hit_readahead_marker, pgoff_t offset,
401 unsigned long req_size)
402{
403 unsigned long max = max_sane_readahead(ra->ra_pages);
404 pgoff_t prev_offset;
405
406
407
408
409 if (!offset)
410 goto initial_readahead;
411
412
413
414
415
416 if ((offset == (ra->start + ra->size - ra->async_size) ||
417 offset == (ra->start + ra->size))) {
418 ra->start += ra->size;
419 ra->size = get_next_ra_size(ra, max);
420 ra->async_size = ra->size;
421 goto readit;
422 }
423
424
425
426
427
428
429
430 if (hit_readahead_marker) {
431 pgoff_t start;
432
433 rcu_read_lock();
434 start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
435 rcu_read_unlock();
436
437 if (!start || start - offset > max)
438 return 0;
439
440 ra->start = start;
441 ra->size = start - offset;
442 ra->size += req_size;
443 ra->size = get_next_ra_size(ra, max);
444 ra->async_size = ra->size;
445 goto readit;
446 }
447
448
449
450
451 if (req_size > max)
452 goto initial_readahead;
453
454
455
456
457
458
459 prev_offset = (unsigned long long)ra->prev_pos >> PAGE_CACHE_SHIFT;
460 if (offset - prev_offset <= 1UL)
461 goto initial_readahead;
462
463
464
465
466
467 if (try_context_readahead(mapping, ra, offset, req_size, max))
468 goto readit;
469
470
471
472
473
474 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
475
476initial_readahead:
477 ra->start = offset;
478 ra->size = get_init_ra_size(req_size, max);
479 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
480
481readit:
482
483
484
485
486
487 if (offset == ra->start && ra->size == ra->async_size) {
488 ra->async_size = get_next_ra_size(ra, max);
489 ra->size += ra->async_size;
490 }
491
492 return ra_submit(ra, mapping, filp);
493}
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509void page_cache_sync_readahead(struct address_space *mapping,
510 struct file_ra_state *ra, struct file *filp,
511 pgoff_t offset, unsigned long req_size)
512{
513
514 if (!ra->ra_pages)
515 return;
516
517
518 if (filp && (filp->f_mode & FMODE_RANDOM)) {
519 force_page_cache_readahead(mapping, filp, offset, req_size);
520 return;
521 }
522
523
524 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
525}
526EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543void
544page_cache_async_readahead(struct address_space *mapping,
545 struct file_ra_state *ra, struct file *filp,
546 struct page *page, pgoff_t offset,
547 unsigned long req_size)
548{
549
550 if (!ra->ra_pages)
551 return;
552
553
554
555
556 if (PageWriteback(page))
557 return;
558
559 ClearPageReadahead(page);
560
561
562
563
564 if (bdi_read_congested(mapping->backing_dev_info))
565 return;
566
567
568 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
569}
570EXPORT_SYMBOL_GPL(page_cache_async_readahead);
571
572static ssize_t
573do_readahead(struct address_space *mapping, struct file *filp,
574 pgoff_t index, unsigned long nr)
575{
576 if (!mapping || !mapping->a_ops)
577 return -EINVAL;
578
579 force_page_cache_readahead(mapping, filp, index, nr);
580 return 0;
581}
582
583SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
584{
585 ssize_t ret;
586 struct fd f;
587
588 ret = -EBADF;
589 f = fdget(fd);
590 if (f.file) {
591 if (f.file->f_mode & FMODE_READ) {
592 struct address_space *mapping = f.file->f_mapping;
593 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
594 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
595 unsigned long len = end - start + 1;
596 ret = do_readahead(mapping, f.file, start, len);
597 }
598 fdput(f);
599 }
600 return ret;
601}
602