1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/gfp.h>
13#include <linux/mm.h>
14#include <linux/module.h>
15#include <linux/blkdev.h>
16#include <linux/backing-dev.h>
17#include <linux/task_io_accounting_ops.h>
18#include <linux/pagevec.h>
19#include <linux/pagemap.h>
20
21
22
23
24
25void
26file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
27{
28 ra->ra_pages = mapping->backing_dev_info->ra_pages;
29 ra->prev_pos = -1;
30}
31EXPORT_SYMBOL_GPL(file_ra_state_init);
32
33#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
34
35
36
37
38
39
40
41
42static void read_cache_pages_invalidate_page(struct address_space *mapping,
43 struct page *page)
44{
45 if (page_has_private(page)) {
46 if (!trylock_page(page))
47 BUG();
48 page->mapping = mapping;
49 do_invalidatepage(page, 0);
50 page->mapping = NULL;
51 unlock_page(page);
52 }
53 page_cache_release(page);
54}
55
56
57
58
59static void read_cache_pages_invalidate_pages(struct address_space *mapping,
60 struct list_head *pages)
61{
62 struct page *victim;
63
64 while (!list_empty(pages)) {
65 victim = list_to_page(pages);
66 list_del(&victim->lru);
67 read_cache_pages_invalidate_page(mapping, victim);
68 }
69}
70
71
72
73
74
75
76
77
78
79
80
81int read_cache_pages(struct address_space *mapping, struct list_head *pages,
82 int (*filler)(void *, struct page *), void *data)
83{
84 struct page *page;
85 int ret = 0;
86
87 while (!list_empty(pages)) {
88 page = list_to_page(pages);
89 list_del(&page->lru);
90 if (add_to_page_cache_lru(page, mapping,
91 page->index, GFP_KERNEL)) {
92 read_cache_pages_invalidate_page(mapping, page);
93 continue;
94 }
95 page_cache_release(page);
96
97 ret = filler(data, page);
98 if (unlikely(ret)) {
99 read_cache_pages_invalidate_pages(mapping, pages);
100 break;
101 }
102 task_io_account_read(PAGE_CACHE_SIZE);
103 }
104 return ret;
105}
106
107EXPORT_SYMBOL(read_cache_pages);
108
109static int read_pages(struct address_space *mapping, struct file *filp,
110 struct list_head *pages, unsigned nr_pages)
111{
112 struct blk_plug plug;
113 unsigned page_idx;
114 int ret;
115
116 blk_start_plug(&plug);
117
118 if (mapping->a_ops->readpages) {
119 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
120
121 put_pages_list(pages);
122 goto out;
123 }
124
125 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
126 struct page *page = list_to_page(pages);
127 list_del(&page->lru);
128 if (!add_to_page_cache_lru(page, mapping,
129 page->index, GFP_KERNEL)) {
130 mapping->a_ops->readpage(filp, page);
131 }
132 page_cache_release(page);
133 }
134 ret = 0;
135
136out:
137 blk_finish_plug(&plug);
138
139 return ret;
140}
141
142
143
144
145
146
147
148
149
150static int
151__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
152 pgoff_t offset, unsigned long nr_to_read,
153 unsigned long lookahead_size)
154{
155 struct inode *inode = mapping->host;
156 struct page *page;
157 unsigned long end_index;
158 LIST_HEAD(page_pool);
159 int page_idx;
160 int ret = 0;
161 loff_t isize = i_size_read(inode);
162
163 if (isize == 0)
164 goto out;
165
166 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
167
168
169
170
171 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
172 pgoff_t page_offset = offset + page_idx;
173
174 if (page_offset > end_index)
175 break;
176
177 rcu_read_lock();
178 page = radix_tree_lookup(&mapping->page_tree, page_offset);
179 rcu_read_unlock();
180 if (page)
181 continue;
182
183 page = page_cache_alloc_cold(mapping);
184 if (!page)
185 break;
186 page->index = page_offset;
187 list_add(&page->lru, &page_pool);
188 if (page_idx == nr_to_read - lookahead_size)
189 SetPageReadahead(page);
190 ret++;
191 }
192
193
194
195
196
197
198 if (ret)
199 read_pages(mapping, filp, &page_pool, ret);
200 BUG_ON(!list_empty(&page_pool));
201out:
202 return ret;
203}
204
205
206
207
208
209int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
210 pgoff_t offset, unsigned long nr_to_read)
211{
212 int ret = 0;
213
214 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
215 return -EINVAL;
216
217 nr_to_read = max_sane_readahead(nr_to_read);
218 while (nr_to_read) {
219 int err;
220
221 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
222
223 if (this_chunk > nr_to_read)
224 this_chunk = nr_to_read;
225 err = __do_page_cache_readahead(mapping, filp,
226 offset, this_chunk, 0);
227 if (err < 0) {
228 ret = err;
229 break;
230 }
231 ret += err;
232 offset += this_chunk;
233 nr_to_read -= this_chunk;
234 }
235 return ret;
236}
237
238
239
240
241
242unsigned long max_sane_readahead(unsigned long nr)
243{
244 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
245 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
246}
247
248
249
250
251unsigned long ra_submit(struct file_ra_state *ra,
252 struct address_space *mapping, struct file *filp)
253{
254 int actual;
255
256 actual = __do_page_cache_readahead(mapping, filp,
257 ra->start, ra->size, ra->async_size);
258
259 return actual;
260}
261
262
263
264
265
266
267
268static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
269{
270 unsigned long newsize = roundup_pow_of_two(size);
271
272 if (newsize <= max / 32)
273 newsize = newsize * 4;
274 else if (newsize <= max / 4)
275 newsize = newsize * 2;
276 else
277 newsize = max;
278
279 return newsize;
280}
281
282
283
284
285
286static unsigned long get_next_ra_size(struct file_ra_state *ra,
287 unsigned long max)
288{
289 unsigned long cur = ra->size;
290 unsigned long newsize;
291
292 if (cur < max / 16)
293 newsize = 4 * cur;
294 else
295 newsize = 2 * cur;
296
297 return min(newsize, max);
298}
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345static pgoff_t count_history_pages(struct address_space *mapping,
346 struct file_ra_state *ra,
347 pgoff_t offset, unsigned long max)
348{
349 pgoff_t head;
350
351 rcu_read_lock();
352 head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
353 rcu_read_unlock();
354
355 return offset - 1 - head;
356}
357
358
359
360
361static int try_context_readahead(struct address_space *mapping,
362 struct file_ra_state *ra,
363 pgoff_t offset,
364 unsigned long req_size,
365 unsigned long max)
366{
367 pgoff_t size;
368
369 size = count_history_pages(mapping, ra, offset, max);
370
371
372
373
374
375 if (!size)
376 return 0;
377
378
379
380
381
382 if (size >= offset)
383 size *= 2;
384
385 ra->start = offset;
386 ra->size = get_init_ra_size(size + req_size, max);
387 ra->async_size = ra->size;
388
389 return 1;
390}
391
392
393
394
395static unsigned long
396ondemand_readahead(struct address_space *mapping,
397 struct file_ra_state *ra, struct file *filp,
398 bool hit_readahead_marker, pgoff_t offset,
399 unsigned long req_size)
400{
401 unsigned long max = max_sane_readahead(ra->ra_pages);
402
403
404
405
406 if (!offset)
407 goto initial_readahead;
408
409
410
411
412
413 if ((offset == (ra->start + ra->size - ra->async_size) ||
414 offset == (ra->start + ra->size))) {
415 ra->start += ra->size;
416 ra->size = get_next_ra_size(ra, max);
417 ra->async_size = ra->size;
418 goto readit;
419 }
420
421
422
423
424
425
426
427 if (hit_readahead_marker) {
428 pgoff_t start;
429
430 rcu_read_lock();
431 start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
432 rcu_read_unlock();
433
434 if (!start || start - offset > max)
435 return 0;
436
437 ra->start = start;
438 ra->size = start - offset;
439 ra->size += req_size;
440 ra->size = get_next_ra_size(ra, max);
441 ra->async_size = ra->size;
442 goto readit;
443 }
444
445
446
447
448 if (req_size > max)
449 goto initial_readahead;
450
451
452
453
454 if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
455 goto initial_readahead;
456
457
458
459
460
461 if (try_context_readahead(mapping, ra, offset, req_size, max))
462 goto readit;
463
464
465
466
467
468 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
469
470initial_readahead:
471 ra->start = offset;
472 ra->size = get_init_ra_size(req_size, max);
473 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
474
475readit:
476
477
478
479
480
481 if (offset == ra->start && ra->size == ra->async_size) {
482 ra->async_size = get_next_ra_size(ra, max);
483 ra->size += ra->async_size;
484 }
485
486 return ra_submit(ra, mapping, filp);
487}
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503void page_cache_sync_readahead(struct address_space *mapping,
504 struct file_ra_state *ra, struct file *filp,
505 pgoff_t offset, unsigned long req_size)
506{
507
508 if (!ra->ra_pages)
509 return;
510
511
512 if (filp && (filp->f_mode & FMODE_RANDOM)) {
513 force_page_cache_readahead(mapping, filp, offset, req_size);
514 return;
515 }
516
517
518 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
519}
520EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537void
538page_cache_async_readahead(struct address_space *mapping,
539 struct file_ra_state *ra, struct file *filp,
540 struct page *page, pgoff_t offset,
541 unsigned long req_size)
542{
543
544 if (!ra->ra_pages)
545 return;
546
547
548
549
550 if (PageWriteback(page))
551 return;
552
553 ClearPageReadahead(page);
554
555
556
557
558 if (bdi_read_congested(mapping->backing_dev_info))
559 return;
560
561
562 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
563}
564EXPORT_SYMBOL_GPL(page_cache_async_readahead);
565