1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/gfp.h>
13#include <linux/mm.h>
14#include <linux/export.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20#include <linux/syscalls.h>
21#include <linux/file.h>
22
23
24
25
26
27void
28file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
29{
30 ra->ra_pages = mapping->backing_dev_info->ra_pages;
31 ra->prev_pos = -1;
32}
33EXPORT_SYMBOL_GPL(file_ra_state_init);
34
35#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
36
37
38
39
40
41
42
43
44static void read_cache_pages_invalidate_page(struct address_space *mapping,
45 struct page *page)
46{
47 if (page_has_private(page)) {
48 if (!trylock_page(page))
49 BUG();
50 page->mapping = mapping;
51 do_invalidatepage(page, 0);
52 page->mapping = NULL;
53 unlock_page(page);
54 }
55 page_cache_release(page);
56}
57
58
59
60
61static void read_cache_pages_invalidate_pages(struct address_space *mapping,
62 struct list_head *pages)
63{
64 struct page *victim;
65
66 while (!list_empty(pages)) {
67 victim = list_to_page(pages);
68 list_del(&victim->lru);
69 read_cache_pages_invalidate_page(mapping, victim);
70 }
71}
72
73
74
75
76
77
78
79
80
81
82
83int read_cache_pages(struct address_space *mapping, struct list_head *pages,
84 int (*filler)(void *, struct page *), void *data)
85{
86 struct page *page;
87 int ret = 0;
88
89 while (!list_empty(pages)) {
90 page = list_to_page(pages);
91 list_del(&page->lru);
92 if (add_to_page_cache_lru(page, mapping,
93 page->index, GFP_KERNEL)) {
94 read_cache_pages_invalidate_page(mapping, page);
95 continue;
96 }
97 page_cache_release(page);
98
99 ret = filler(data, page);
100 if (unlikely(ret)) {
101 read_cache_pages_invalidate_pages(mapping, pages);
102 break;
103 }
104 task_io_account_read(PAGE_CACHE_SIZE);
105 }
106 return ret;
107}
108
109EXPORT_SYMBOL(read_cache_pages);
110
111static int read_pages(struct address_space *mapping, struct file *filp,
112 struct list_head *pages, unsigned nr_pages)
113{
114 struct blk_plug plug;
115 unsigned page_idx;
116 int ret;
117
118 blk_start_plug(&plug);
119
120 if (mapping->a_ops->readpages) {
121 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
122
123 put_pages_list(pages);
124 goto out;
125 }
126
127 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
128 struct page *page = list_to_page(pages);
129 list_del(&page->lru);
130 if (!add_to_page_cache_lru(page, mapping,
131 page->index, GFP_KERNEL)) {
132 mapping->a_ops->readpage(filp, page);
133 }
134 page_cache_release(page);
135 }
136 ret = 0;
137
138out:
139 blk_finish_plug(&plug);
140
141 return ret;
142}
143
144
145
146
147
148
149
150
151
152static int
153__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
154 pgoff_t offset, unsigned long nr_to_read,
155 unsigned long lookahead_size)
156{
157 struct inode *inode = mapping->host;
158 struct page *page;
159 unsigned long end_index;
160 LIST_HEAD(page_pool);
161 int page_idx;
162 int ret = 0;
163 loff_t isize = i_size_read(inode);
164
165 if (isize == 0)
166 goto out;
167
168 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
169
170
171
172
173 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
174 pgoff_t page_offset = offset + page_idx;
175
176 if (page_offset > end_index)
177 break;
178
179 rcu_read_lock();
180 page = radix_tree_lookup(&mapping->page_tree, page_offset);
181 rcu_read_unlock();
182 if (page)
183 continue;
184
185 page = page_cache_alloc_readahead(mapping);
186 if (!page)
187 break;
188 page->index = page_offset;
189 list_add(&page->lru, &page_pool);
190 if (page_idx == nr_to_read - lookahead_size)
191 SetPageReadahead(page);
192 ret++;
193 }
194
195
196
197
198
199
200 if (ret)
201 read_pages(mapping, filp, &page_pool, ret);
202 BUG_ON(!list_empty(&page_pool));
203out:
204 return ret;
205}
206
207
208
209
210
211int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
212 pgoff_t offset, unsigned long nr_to_read)
213{
214 int ret = 0;
215
216 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
217 return -EINVAL;
218
219 nr_to_read = max_sane_readahead(nr_to_read);
220 while (nr_to_read) {
221 int err;
222
223 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
224
225 if (this_chunk > nr_to_read)
226 this_chunk = nr_to_read;
227 err = __do_page_cache_readahead(mapping, filp,
228 offset, this_chunk, 0);
229 if (err < 0) {
230 ret = err;
231 break;
232 }
233 ret += err;
234 offset += this_chunk;
235 nr_to_read -= this_chunk;
236 }
237 return ret;
238}
239
240
241
242
243
244unsigned long max_sane_readahead(unsigned long nr)
245{
246 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
247 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
248}
249
250
251
252
253unsigned long ra_submit(struct file_ra_state *ra,
254 struct address_space *mapping, struct file *filp)
255{
256 int actual;
257
258 actual = __do_page_cache_readahead(mapping, filp,
259 ra->start, ra->size, ra->async_size);
260
261 return actual;
262}
263
264
265
266
267
268
269
270static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
271{
272 unsigned long newsize = roundup_pow_of_two(size);
273
274 if (newsize <= max / 32)
275 newsize = newsize * 4;
276 else if (newsize <= max / 4)
277 newsize = newsize * 2;
278 else
279 newsize = max;
280
281 return newsize;
282}
283
284
285
286
287
288static unsigned long get_next_ra_size(struct file_ra_state *ra,
289 unsigned long max)
290{
291 unsigned long cur = ra->size;
292 unsigned long newsize;
293
294 if (cur < max / 16)
295 newsize = 4 * cur;
296 else
297 newsize = 2 * cur;
298
299 return min(newsize, max);
300}
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347static pgoff_t count_history_pages(struct address_space *mapping,
348 struct file_ra_state *ra,
349 pgoff_t offset, unsigned long max)
350{
351 pgoff_t head;
352
353 rcu_read_lock();
354 head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
355 rcu_read_unlock();
356
357 return offset - 1 - head;
358}
359
360
361
362
363static int try_context_readahead(struct address_space *mapping,
364 struct file_ra_state *ra,
365 pgoff_t offset,
366 unsigned long req_size,
367 unsigned long max)
368{
369 pgoff_t size;
370
371 size = count_history_pages(mapping, ra, offset, max);
372
373
374
375
376
377 if (!size)
378 return 0;
379
380
381
382
383
384 if (size >= offset)
385 size *= 2;
386
387 ra->start = offset;
388 ra->size = get_init_ra_size(size + req_size, max);
389 ra->async_size = ra->size;
390
391 return 1;
392}
393
394
395
396
397static unsigned long
398ondemand_readahead(struct address_space *mapping,
399 struct file_ra_state *ra, struct file *filp,
400 bool hit_readahead_marker, pgoff_t offset,
401 unsigned long req_size)
402{
403 unsigned long max = max_sane_readahead(ra->ra_pages);
404
405
406
407
408 if (!offset)
409 goto initial_readahead;
410
411
412
413
414
415 if ((offset == (ra->start + ra->size - ra->async_size) ||
416 offset == (ra->start + ra->size))) {
417 ra->start += ra->size;
418 ra->size = get_next_ra_size(ra, max);
419 ra->async_size = ra->size;
420 goto readit;
421 }
422
423
424
425
426
427
428
429 if (hit_readahead_marker) {
430 pgoff_t start;
431
432 rcu_read_lock();
433 start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
434 rcu_read_unlock();
435
436 if (!start || start - offset > max)
437 return 0;
438
439 ra->start = start;
440 ra->size = start - offset;
441 ra->size += req_size;
442 ra->size = get_next_ra_size(ra, max);
443 ra->async_size = ra->size;
444 goto readit;
445 }
446
447
448
449
450 if (req_size > max)
451 goto initial_readahead;
452
453
454
455
456 if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
457 goto initial_readahead;
458
459
460
461
462
463 if (try_context_readahead(mapping, ra, offset, req_size, max))
464 goto readit;
465
466
467
468
469
470 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
471
472initial_readahead:
473 ra->start = offset;
474 ra->size = get_init_ra_size(req_size, max);
475 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
476
477readit:
478
479
480
481
482
483 if (offset == ra->start && ra->size == ra->async_size) {
484 ra->async_size = get_next_ra_size(ra, max);
485 ra->size += ra->async_size;
486 }
487
488 return ra_submit(ra, mapping, filp);
489}
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505void page_cache_sync_readahead(struct address_space *mapping,
506 struct file_ra_state *ra, struct file *filp,
507 pgoff_t offset, unsigned long req_size)
508{
509
510 if (!ra->ra_pages)
511 return;
512
513
514 if (filp && (filp->f_mode & FMODE_RANDOM)) {
515 force_page_cache_readahead(mapping, filp, offset, req_size);
516 return;
517 }
518
519
520 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
521}
522EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539void
540page_cache_async_readahead(struct address_space *mapping,
541 struct file_ra_state *ra, struct file *filp,
542 struct page *page, pgoff_t offset,
543 unsigned long req_size)
544{
545
546 if (!ra->ra_pages)
547 return;
548
549
550
551
552 if (PageWriteback(page))
553 return;
554
555 ClearPageReadahead(page);
556
557
558
559
560 if (bdi_read_congested(mapping->backing_dev_info))
561 return;
562
563
564 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
565}
566EXPORT_SYMBOL_GPL(page_cache_async_readahead);
567
568static ssize_t
569do_readahead(struct address_space *mapping, struct file *filp,
570 pgoff_t index, unsigned long nr)
571{
572 if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
573 return -EINVAL;
574
575 force_page_cache_readahead(mapping, filp, index, nr);
576 return 0;
577}
578
579SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
580{
581 ssize_t ret;
582 struct fd f;
583
584 ret = -EBADF;
585 f = fdget(fd);
586 if (f.file) {
587 if (f.file->f_mode & FMODE_READ) {
588 struct address_space *mapping = f.file->f_mapping;
589 pgoff_t start = offset >> PAGE_CACHE_SHIFT;
590 pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
591 unsigned long len = end - start + 1;
592 ret = do_readahead(mapping, f.file, start, len);
593 }
594 fdput(f);
595 }
596 return ret;
597}
598