1
2
3
4
5
6
7
8
9
10#include <linux/kernel.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/module.h>
14#include <linux/blkdev.h>
15#include <linux/backing-dev.h>
16#include <linux/task_io_accounting_ops.h>
17#include <linux/pagevec.h>
18#include <linux/pagemap.h>
19
20
21
22
23
24void
25file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
26{
27 ra->ra_pages = mapping->backing_dev_info->ra_pages;
28 ra->prev_pos = -1;
29}
30EXPORT_SYMBOL_GPL(file_ra_state_init);
31
32#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
33
34
35
36
37
38
39
40
41static void read_cache_pages_invalidate_page(struct address_space *mapping,
42 struct page *page)
43{
44 if (page_has_private(page)) {
45 if (!trylock_page(page))
46 BUG();
47 page->mapping = mapping;
48 do_invalidatepage(page, 0);
49 page->mapping = NULL;
50 unlock_page(page);
51 }
52 page_cache_release(page);
53}
54
55
56
57
58static void read_cache_pages_invalidate_pages(struct address_space *mapping,
59 struct list_head *pages)
60{
61 struct page *victim;
62
63 while (!list_empty(pages)) {
64 victim = list_to_page(pages);
65 list_del(&victim->lru);
66 read_cache_pages_invalidate_page(mapping, victim);
67 }
68}
69
70
71
72
73
74
75
76
77
78
79
80int read_cache_pages(struct address_space *mapping, struct list_head *pages,
81 int (*filler)(void *, struct page *), void *data)
82{
83 struct page *page;
84 int ret = 0;
85
86 while (!list_empty(pages)) {
87 page = list_to_page(pages);
88 list_del(&page->lru);
89 if (add_to_page_cache_lru(page, mapping,
90 page->index, GFP_KERNEL)) {
91 read_cache_pages_invalidate_page(mapping, page);
92 continue;
93 }
94 page_cache_release(page);
95
96 ret = filler(data, page);
97 if (unlikely(ret)) {
98 read_cache_pages_invalidate_pages(mapping, pages);
99 break;
100 }
101 task_io_account_read(PAGE_CACHE_SIZE);
102 }
103 return ret;
104}
105
106EXPORT_SYMBOL(read_cache_pages);
107
108static int read_pages(struct address_space *mapping, struct file *filp,
109 struct list_head *pages, unsigned nr_pages)
110{
111 unsigned page_idx;
112 int ret;
113
114 if (mapping->a_ops->readpages) {
115 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
116
117 put_pages_list(pages);
118 goto out;
119 }
120
121 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
122 struct page *page = list_to_page(pages);
123 list_del(&page->lru);
124 if (!add_to_page_cache_lru(page, mapping,
125 page->index, GFP_KERNEL)) {
126 mapping->a_ops->readpage(filp, page);
127 }
128 page_cache_release(page);
129 }
130 ret = 0;
131out:
132 return ret;
133}
134
135
136
137
138
139
140
141
142
143static int
144__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
145 pgoff_t offset, unsigned long nr_to_read,
146 unsigned long lookahead_size)
147{
148 struct inode *inode = mapping->host;
149 struct page *page;
150 unsigned long end_index;
151 LIST_HEAD(page_pool);
152 int page_idx;
153 int ret = 0;
154 loff_t isize = i_size_read(inode);
155
156 if (isize == 0)
157 goto out;
158
159 end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
160
161
162
163
164 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
165 pgoff_t page_offset = offset + page_idx;
166
167 if (page_offset > end_index)
168 break;
169
170 rcu_read_lock();
171 page = radix_tree_lookup(&mapping->page_tree, page_offset);
172 rcu_read_unlock();
173 if (page)
174 continue;
175
176 page = page_cache_alloc_cold(mapping);
177 if (!page)
178 break;
179 page->index = page_offset;
180 list_add(&page->lru, &page_pool);
181 if (page_idx == nr_to_read - lookahead_size)
182 SetPageReadahead(page);
183 ret++;
184 }
185
186
187
188
189
190
191 if (ret)
192 read_pages(mapping, filp, &page_pool, ret);
193 BUG_ON(!list_empty(&page_pool));
194out:
195 return ret;
196}
197
198
199
200
201
202int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
203 pgoff_t offset, unsigned long nr_to_read)
204{
205 int ret = 0;
206
207 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
208 return -EINVAL;
209
210 nr_to_read = max_sane_readahead(nr_to_read);
211 while (nr_to_read) {
212 int err;
213
214 unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_CACHE_SIZE;
215
216 if (this_chunk > nr_to_read)
217 this_chunk = nr_to_read;
218 err = __do_page_cache_readahead(mapping, filp,
219 offset, this_chunk, 0);
220 if (err < 0) {
221 ret = err;
222 break;
223 }
224 ret += err;
225 offset += this_chunk;
226 nr_to_read -= this_chunk;
227 }
228 return ret;
229}
230
231
232
233
234
235unsigned long max_sane_readahead(unsigned long nr)
236{
237 return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
238 + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
239}
240
241
242
243
244unsigned long ra_submit(struct file_ra_state *ra,
245 struct address_space *mapping, struct file *filp)
246{
247 int actual;
248
249 actual = __do_page_cache_readahead(mapping, filp,
250 ra->start, ra->size, ra->async_size);
251
252 return actual;
253}
254
255
256
257
258
259
260
261static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
262{
263 unsigned long newsize = roundup_pow_of_two(size);
264
265 if (newsize <= max / 32)
266 newsize = newsize * 4;
267 else if (newsize <= max / 4)
268 newsize = newsize * 2;
269 else
270 newsize = max;
271
272 return newsize;
273}
274
275
276
277
278
279static unsigned long get_next_ra_size(struct file_ra_state *ra,
280 unsigned long max)
281{
282 unsigned long cur = ra->size;
283 unsigned long newsize;
284
285 if (cur < max / 16)
286 newsize = 4 * cur;
287 else
288 newsize = 2 * cur;
289
290 return min(newsize, max);
291}
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static pgoff_t count_history_pages(struct address_space *mapping,
339 struct file_ra_state *ra,
340 pgoff_t offset, unsigned long max)
341{
342 pgoff_t head;
343
344 rcu_read_lock();
345 head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max);
346 rcu_read_unlock();
347
348 return offset - 1 - head;
349}
350
351
352
353
354static int try_context_readahead(struct address_space *mapping,
355 struct file_ra_state *ra,
356 pgoff_t offset,
357 unsigned long req_size,
358 unsigned long max)
359{
360 pgoff_t size;
361
362 size = count_history_pages(mapping, ra, offset, max);
363
364
365
366
367
368 if (!size)
369 return 0;
370
371
372
373
374
375 if (size >= offset)
376 size *= 2;
377
378 ra->start = offset;
379 ra->size = get_init_ra_size(size + req_size, max);
380 ra->async_size = ra->size;
381
382 return 1;
383}
384
385
386
387
388static unsigned long
389ondemand_readahead(struct address_space *mapping,
390 struct file_ra_state *ra, struct file *filp,
391 bool hit_readahead_marker, pgoff_t offset,
392 unsigned long req_size)
393{
394 unsigned long max = max_sane_readahead(ra->ra_pages);
395
396
397
398
399 if (!offset)
400 goto initial_readahead;
401
402
403
404
405
406 if ((offset == (ra->start + ra->size - ra->async_size) ||
407 offset == (ra->start + ra->size))) {
408 ra->start += ra->size;
409 ra->size = get_next_ra_size(ra, max);
410 ra->async_size = ra->size;
411 goto readit;
412 }
413
414
415
416
417
418
419
420 if (hit_readahead_marker) {
421 pgoff_t start;
422
423 rcu_read_lock();
424 start = radix_tree_next_hole(&mapping->page_tree, offset+1,max);
425 rcu_read_unlock();
426
427 if (!start || start - offset > max)
428 return 0;
429
430 ra->start = start;
431 ra->size = start - offset;
432 ra->size += req_size;
433 ra->size = get_next_ra_size(ra, max);
434 ra->async_size = ra->size;
435 goto readit;
436 }
437
438
439
440
441 if (req_size > max)
442 goto initial_readahead;
443
444
445
446
447 if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL)
448 goto initial_readahead;
449
450
451
452
453
454 if (try_context_readahead(mapping, ra, offset, req_size, max))
455 goto readit;
456
457
458
459
460
461 return __do_page_cache_readahead(mapping, filp, offset, req_size, 0);
462
463initial_readahead:
464 ra->start = offset;
465 ra->size = get_init_ra_size(req_size, max);
466 ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
467
468readit:
469
470
471
472
473
474 if (offset == ra->start && ra->size == ra->async_size) {
475 ra->async_size = get_next_ra_size(ra, max);
476 ra->size += ra->async_size;
477 }
478
479 return ra_submit(ra, mapping, filp);
480}
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496void page_cache_sync_readahead(struct address_space *mapping,
497 struct file_ra_state *ra, struct file *filp,
498 pgoff_t offset, unsigned long req_size)
499{
500
501 if (!ra->ra_pages)
502 return;
503
504
505 ondemand_readahead(mapping, ra, filp, false, offset, req_size);
506}
507EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524void
525page_cache_async_readahead(struct address_space *mapping,
526 struct file_ra_state *ra, struct file *filp,
527 struct page *page, pgoff_t offset,
528 unsigned long req_size)
529{
530
531 if (!ra->ra_pages)
532 return;
533
534
535
536
537 if (PageWriteback(page))
538 return;
539
540 ClearPageReadahead(page);
541
542
543
544
545 if (bdi_read_congested(mapping->backing_dev_info))
546 return;
547
548
549 ondemand_readahead(mapping, ra, filp, true, offset, req_size);
550}
551EXPORT_SYMBOL_GPL(page_cache_async_readahead);
552