1
2
3
4
5
6
7
8
9
10
11
12#include <linux/rbtree.h>
13#include <linux/list_sort.h>
14#include "ext4.h"
15#include "extents_status.h"
16
17#include <trace/events/ext4.h>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143static struct kmem_cache *ext4_es_cachep;
144
145static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan);
150static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
151 struct ext4_inode_info *locked_ei);
152
153int __init ext4_init_es(void)
154{
155 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
156 sizeof(struct extent_status),
157 0, (SLAB_RECLAIM_ACCOUNT), NULL);
158 if (ext4_es_cachep == NULL)
159 return -ENOMEM;
160 return 0;
161}
162
163void ext4_exit_es(void)
164{
165 if (ext4_es_cachep)
166 kmem_cache_destroy(ext4_es_cachep);
167}
168
169void ext4_es_init_tree(struct ext4_es_tree *tree)
170{
171 tree->root = RB_ROOT;
172 tree->cache_es = NULL;
173}
174
175#ifdef ES_DEBUG__
176static void ext4_es_print_tree(struct inode *inode)
177{
178 struct ext4_es_tree *tree;
179 struct rb_node *node;
180
181 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
182 tree = &EXT4_I(inode)->i_es_tree;
183 node = rb_first(&tree->root);
184 while (node) {
185 struct extent_status *es;
186 es = rb_entry(node, struct extent_status, rb_node);
187 printk(KERN_DEBUG " [%u/%u) %llu %x",
188 es->es_lblk, es->es_len,
189 ext4_es_pblock(es), ext4_es_status(es));
190 node = rb_next(node);
191 }
192 printk(KERN_DEBUG "\n");
193}
194#else
195#define ext4_es_print_tree(inode)
196#endif
197
198static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
199{
200 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
201 return es->es_lblk + es->es_len - 1;
202}
203
204
205
206
207
208static struct extent_status *__es_tree_search(struct rb_root *root,
209 ext4_lblk_t lblk)
210{
211 struct rb_node *node = root->rb_node;
212 struct extent_status *es = NULL;
213
214 while (node) {
215 es = rb_entry(node, struct extent_status, rb_node);
216 if (lblk < es->es_lblk)
217 node = node->rb_left;
218 else if (lblk > ext4_es_end(es))
219 node = node->rb_right;
220 else
221 return es;
222 }
223
224 if (es && lblk < es->es_lblk)
225 return es;
226
227 if (es && lblk > ext4_es_end(es)) {
228 node = rb_next(&es->rb_node);
229 return node ? rb_entry(node, struct extent_status, rb_node) :
230 NULL;
231 }
232
233 return NULL;
234}
235
236
237
238
239
240
241
242
243
244
245void ext4_es_find_delayed_extent_range(struct inode *inode,
246 ext4_lblk_t lblk, ext4_lblk_t end,
247 struct extent_status *es)
248{
249 struct ext4_es_tree *tree = NULL;
250 struct extent_status *es1 = NULL;
251 struct rb_node *node;
252
253 BUG_ON(es == NULL);
254 BUG_ON(end < lblk);
255 trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
256
257 read_lock(&EXT4_I(inode)->i_es_lock);
258 tree = &EXT4_I(inode)->i_es_tree;
259
260
261 es->es_lblk = es->es_len = es->es_pblk = 0;
262 if (tree->cache_es) {
263 es1 = tree->cache_es;
264 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
265 es_debug("%u cached by [%u/%u) %llu %x\n",
266 lblk, es1->es_lblk, es1->es_len,
267 ext4_es_pblock(es1), ext4_es_status(es1));
268 goto out;
269 }
270 }
271
272 es1 = __es_tree_search(&tree->root, lblk);
273
274out:
275 if (es1 && !ext4_es_is_delayed(es1)) {
276 while ((node = rb_next(&es1->rb_node)) != NULL) {
277 es1 = rb_entry(node, struct extent_status, rb_node);
278 if (es1->es_lblk > end) {
279 es1 = NULL;
280 break;
281 }
282 if (ext4_es_is_delayed(es1))
283 break;
284 }
285 }
286
287 if (es1 && ext4_es_is_delayed(es1)) {
288 tree->cache_es = es1;
289 es->es_lblk = es1->es_lblk;
290 es->es_len = es1->es_len;
291 es->es_pblk = es1->es_pblk;
292 }
293
294 read_unlock(&EXT4_I(inode)->i_es_lock);
295
296 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
297}
298
299static struct extent_status *
300ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
301 ext4_fsblk_t pblk)
302{
303 struct extent_status *es;
304 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
305 if (es == NULL)
306 return NULL;
307 es->es_lblk = lblk;
308 es->es_len = len;
309 es->es_pblk = pblk;
310
311
312
313
314 if (!ext4_es_is_delayed(es)) {
315 EXT4_I(inode)->i_es_lru_nr++;
316 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
317 }
318
319 return es;
320}
321
322static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
323{
324
325 if (!ext4_es_is_delayed(es)) {
326 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
327 EXT4_I(inode)->i_es_lru_nr--;
328 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
329 }
330
331 kmem_cache_free(ext4_es_cachep, es);
332}
333
334
335
336
337
338
339
340
341static int ext4_es_can_be_merged(struct extent_status *es1,
342 struct extent_status *es2)
343{
344 if (ext4_es_status(es1) != ext4_es_status(es2))
345 return 0;
346
347 if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
348 return 0;
349
350 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
351 return 0;
352
353 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
354 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
355 return 1;
356
357 if (ext4_es_is_hole(es1))
358 return 1;
359
360
361 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
362 return 1;
363
364 return 0;
365}
366
367static struct extent_status *
368ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
369{
370 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
371 struct extent_status *es1;
372 struct rb_node *node;
373
374 node = rb_prev(&es->rb_node);
375 if (!node)
376 return es;
377
378 es1 = rb_entry(node, struct extent_status, rb_node);
379 if (ext4_es_can_be_merged(es1, es)) {
380 es1->es_len += es->es_len;
381 rb_erase(&es->rb_node, &tree->root);
382 ext4_es_free_extent(inode, es);
383 es = es1;
384 }
385
386 return es;
387}
388
389static struct extent_status *
390ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
391{
392 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
393 struct extent_status *es1;
394 struct rb_node *node;
395
396 node = rb_next(&es->rb_node);
397 if (!node)
398 return es;
399
400 es1 = rb_entry(node, struct extent_status, rb_node);
401 if (ext4_es_can_be_merged(es, es1)) {
402 es->es_len += es1->es_len;
403 rb_erase(node, &tree->root);
404 ext4_es_free_extent(inode, es1);
405 }
406
407 return es;
408}
409
410#ifdef ES_AGGRESSIVE_TEST
411#include "ext4_extents.h"
412
413static void ext4_es_insert_extent_ext_check(struct inode *inode,
414 struct extent_status *es)
415{
416 struct ext4_ext_path *path = NULL;
417 struct ext4_extent *ex;
418 ext4_lblk_t ee_block;
419 ext4_fsblk_t ee_start;
420 unsigned short ee_len;
421 int depth, ee_status, es_status;
422
423 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
424 if (IS_ERR(path))
425 return;
426
427 depth = ext_depth(inode);
428 ex = path[depth].p_ext;
429
430 if (ex) {
431
432 ee_block = le32_to_cpu(ex->ee_block);
433 ee_start = ext4_ext_pblock(ex);
434 ee_len = ext4_ext_get_actual_len(ex);
435
436 ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
437 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
438
439
440
441
442
443 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
444 if (in_range(es->es_lblk, ee_block, ee_len)) {
445 pr_warn("ES insert assertion failed for "
446 "inode: %lu we can find an extent "
447 "at block [%d/%d/%llu/%c], but we "
448 "want to add a delayed/hole extent "
449 "[%d/%d/%llu/%x]\n",
450 inode->i_ino, ee_block, ee_len,
451 ee_start, ee_status ? 'u' : 'w',
452 es->es_lblk, es->es_len,
453 ext4_es_pblock(es), ext4_es_status(es));
454 }
455 goto out;
456 }
457
458
459
460
461
462 if (es->es_lblk < ee_block ||
463 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
464 pr_warn("ES insert assertion failed for inode: %lu "
465 "ex_status [%d/%d/%llu/%c] != "
466 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
467 ee_block, ee_len, ee_start,
468 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
469 ext4_es_pblock(es), es_status ? 'u' : 'w');
470 goto out;
471 }
472
473 if (ee_status ^ es_status) {
474 pr_warn("ES insert assertion failed for inode: %lu "
475 "ex_status [%d/%d/%llu/%c] != "
476 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
477 ee_block, ee_len, ee_start,
478 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
479 ext4_es_pblock(es), es_status ? 'u' : 'w');
480 }
481 } else {
482
483
484
485
486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
487 pr_warn("ES insert assertion failed for inode: %lu "
488 "can't find an extent at block %d but we want "
489 "to add a written/unwritten extent "
490 "[%d/%d/%llu/%x]\n", inode->i_ino,
491 es->es_lblk, es->es_lblk, es->es_len,
492 ext4_es_pblock(es), ext4_es_status(es));
493 }
494 }
495out:
496 if (path) {
497 ext4_ext_drop_refs(path);
498 kfree(path);
499 }
500}
501
502static void ext4_es_insert_extent_ind_check(struct inode *inode,
503 struct extent_status *es)
504{
505 struct ext4_map_blocks map;
506 int retval;
507
508
509
510
511
512
513
514
515 map.m_lblk = es->es_lblk;
516 map.m_len = es->es_len;
517
518 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
519 if (retval > 0) {
520 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
521
522
523
524
525 pr_warn("ES insert assertion failed for inode: %lu "
526 "We can find blocks but we want to add a "
527 "delayed/hole extent [%d/%d/%llu/%x]\n",
528 inode->i_ino, es->es_lblk, es->es_len,
529 ext4_es_pblock(es), ext4_es_status(es));
530 return;
531 } else if (ext4_es_is_written(es)) {
532 if (retval != es->es_len) {
533 pr_warn("ES insert assertion failed for "
534 "inode: %lu retval %d != es_len %d\n",
535 inode->i_ino, retval, es->es_len);
536 return;
537 }
538 if (map.m_pblk != ext4_es_pblock(es)) {
539 pr_warn("ES insert assertion failed for "
540 "inode: %lu m_pblk %llu != "
541 "es_pblk %llu\n",
542 inode->i_ino, map.m_pblk,
543 ext4_es_pblock(es));
544 return;
545 }
546 } else {
547
548
549
550
551 BUG_ON(1);
552 }
553 } else if (retval == 0) {
554 if (ext4_es_is_written(es)) {
555 pr_warn("ES insert assertion failed for inode: %lu "
556 "We can't find the block but we want to add "
557 "a written extent [%d/%d/%llu/%x]\n",
558 inode->i_ino, es->es_lblk, es->es_len,
559 ext4_es_pblock(es), ext4_es_status(es));
560 return;
561 }
562 }
563}
564
565static inline void ext4_es_insert_extent_check(struct inode *inode,
566 struct extent_status *es)
567{
568
569
570
571
572 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
573 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
574 ext4_es_insert_extent_ext_check(inode, es);
575 else
576 ext4_es_insert_extent_ind_check(inode, es);
577}
578#else
579static inline void ext4_es_insert_extent_check(struct inode *inode,
580 struct extent_status *es)
581{
582}
583#endif
584
585static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
586{
587 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
588 struct rb_node **p = &tree->root.rb_node;
589 struct rb_node *parent = NULL;
590 struct extent_status *es;
591
592 while (*p) {
593 parent = *p;
594 es = rb_entry(parent, struct extent_status, rb_node);
595
596 if (newes->es_lblk < es->es_lblk) {
597 if (ext4_es_can_be_merged(newes, es)) {
598
599
600
601
602 es->es_lblk = newes->es_lblk;
603 es->es_len += newes->es_len;
604 if (ext4_es_is_written(es) ||
605 ext4_es_is_unwritten(es))
606 ext4_es_store_pblock(es,
607 newes->es_pblk);
608 es = ext4_es_try_to_merge_left(inode, es);
609 goto out;
610 }
611 p = &(*p)->rb_left;
612 } else if (newes->es_lblk > ext4_es_end(es)) {
613 if (ext4_es_can_be_merged(es, newes)) {
614 es->es_len += newes->es_len;
615 es = ext4_es_try_to_merge_right(inode, es);
616 goto out;
617 }
618 p = &(*p)->rb_right;
619 } else {
620 BUG_ON(1);
621 return -EINVAL;
622 }
623 }
624
625 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
626 newes->es_pblk);
627 if (!es)
628 return -ENOMEM;
629 rb_link_node(&es->rb_node, parent, p);
630 rb_insert_color(&es->rb_node, &tree->root);
631
632out:
633 tree->cache_es = es;
634 return 0;
635}
636
637
638
639
640
641
642
643int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
644 ext4_lblk_t len, ext4_fsblk_t pblk,
645 unsigned int status)
646{
647 struct extent_status newes;
648 ext4_lblk_t end = lblk + len - 1;
649 int err = 0;
650
651 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
652 lblk, len, pblk, status, inode->i_ino);
653
654 if (!len)
655 return 0;
656
657 BUG_ON(end < lblk);
658
659 newes.es_lblk = lblk;
660 newes.es_len = len;
661 ext4_es_store_pblock_status(&newes, pblk, status);
662 trace_ext4_es_insert_extent(inode, &newes);
663
664 ext4_es_insert_extent_check(inode, &newes);
665
666 write_lock(&EXT4_I(inode)->i_es_lock);
667 err = __es_remove_extent(inode, lblk, end);
668 if (err != 0)
669 goto error;
670retry:
671 err = __es_insert_extent(inode, &newes);
672 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
673 EXT4_I(inode)))
674 goto retry;
675 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
676 err = 0;
677
678error:
679 write_unlock(&EXT4_I(inode)->i_es_lock);
680
681 ext4_es_print_tree(inode);
682
683 return err;
684}
685
686
687
688
689
690
691void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
692 ext4_lblk_t len, ext4_fsblk_t pblk,
693 unsigned int status)
694{
695 struct extent_status *es;
696 struct extent_status newes;
697 ext4_lblk_t end = lblk + len - 1;
698
699 newes.es_lblk = lblk;
700 newes.es_len = len;
701 ext4_es_store_pblock_status(&newes, pblk, status);
702 trace_ext4_es_cache_extent(inode, &newes);
703
704 if (!len)
705 return;
706
707 BUG_ON(end < lblk);
708
709 write_lock(&EXT4_I(inode)->i_es_lock);
710
711 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
712 if (!es || es->es_lblk > end)
713 __es_insert_extent(inode, &newes);
714 write_unlock(&EXT4_I(inode)->i_es_lock);
715}
716
717
718
719
720
721
722
723
724int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
725 struct extent_status *es)
726{
727 struct ext4_es_tree *tree;
728 struct extent_status *es1 = NULL;
729 struct rb_node *node;
730 int found = 0;
731
732 trace_ext4_es_lookup_extent_enter(inode, lblk);
733 es_debug("lookup extent in block %u\n", lblk);
734
735 tree = &EXT4_I(inode)->i_es_tree;
736 read_lock(&EXT4_I(inode)->i_es_lock);
737
738
739 es->es_lblk = es->es_len = es->es_pblk = 0;
740 if (tree->cache_es) {
741 es1 = tree->cache_es;
742 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
743 es_debug("%u cached by [%u/%u)\n",
744 lblk, es1->es_lblk, es1->es_len);
745 found = 1;
746 goto out;
747 }
748 }
749
750 node = tree->root.rb_node;
751 while (node) {
752 es1 = rb_entry(node, struct extent_status, rb_node);
753 if (lblk < es1->es_lblk)
754 node = node->rb_left;
755 else if (lblk > ext4_es_end(es1))
756 node = node->rb_right;
757 else {
758 found = 1;
759 break;
760 }
761 }
762
763out:
764 if (found) {
765 BUG_ON(!es1);
766 es->es_lblk = es1->es_lblk;
767 es->es_len = es1->es_len;
768 es->es_pblk = es1->es_pblk;
769 }
770
771 read_unlock(&EXT4_I(inode)->i_es_lock);
772
773 trace_ext4_es_lookup_extent_exit(inode, es, found);
774 return found;
775}
776
777static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
778 ext4_lblk_t end)
779{
780 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
781 struct rb_node *node;
782 struct extent_status *es;
783 struct extent_status orig_es;
784 ext4_lblk_t len1, len2;
785 ext4_fsblk_t block;
786 int err;
787
788retry:
789 err = 0;
790 es = __es_tree_search(&tree->root, lblk);
791 if (!es)
792 goto out;
793 if (es->es_lblk > end)
794 goto out;
795
796
797 tree->cache_es = NULL;
798
799 orig_es.es_lblk = es->es_lblk;
800 orig_es.es_len = es->es_len;
801 orig_es.es_pblk = es->es_pblk;
802
803 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
804 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
805 if (len1 > 0)
806 es->es_len = len1;
807 if (len2 > 0) {
808 if (len1 > 0) {
809 struct extent_status newes;
810
811 newes.es_lblk = end + 1;
812 newes.es_len = len2;
813 block = 0x7FDEADBEEFULL;
814 if (ext4_es_is_written(&orig_es) ||
815 ext4_es_is_unwritten(&orig_es))
816 block = ext4_es_pblock(&orig_es) +
817 orig_es.es_len - len2;
818 ext4_es_store_pblock_status(&newes, block,
819 ext4_es_status(&orig_es));
820 err = __es_insert_extent(inode, &newes);
821 if (err) {
822 es->es_lblk = orig_es.es_lblk;
823 es->es_len = orig_es.es_len;
824 if ((err == -ENOMEM) &&
825 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
826 EXT4_I(inode)))
827 goto retry;
828 goto out;
829 }
830 } else {
831 es->es_lblk = end + 1;
832 es->es_len = len2;
833 if (ext4_es_is_written(es) ||
834 ext4_es_is_unwritten(es)) {
835 block = orig_es.es_pblk + orig_es.es_len - len2;
836 ext4_es_store_pblock(es, block);
837 }
838 }
839 goto out;
840 }
841
842 if (len1 > 0) {
843 node = rb_next(&es->rb_node);
844 if (node)
845 es = rb_entry(node, struct extent_status, rb_node);
846 else
847 es = NULL;
848 }
849
850 while (es && ext4_es_end(es) <= end) {
851 node = rb_next(&es->rb_node);
852 rb_erase(&es->rb_node, &tree->root);
853 ext4_es_free_extent(inode, es);
854 if (!node) {
855 es = NULL;
856 break;
857 }
858 es = rb_entry(node, struct extent_status, rb_node);
859 }
860
861 if (es && es->es_lblk < end + 1) {
862 ext4_lblk_t orig_len = es->es_len;
863
864 len1 = ext4_es_end(es) - end;
865 es->es_lblk = end + 1;
866 es->es_len = len1;
867 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
868 block = es->es_pblk + orig_len - len1;
869 ext4_es_store_pblock(es, block);
870 }
871 }
872
873out:
874 return err;
875}
876
877
878
879
880
881
882int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
883 ext4_lblk_t len)
884{
885 ext4_lblk_t end;
886 int err = 0;
887
888 trace_ext4_es_remove_extent(inode, lblk, len);
889 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
890 lblk, len, inode->i_ino);
891
892 if (!len)
893 return err;
894
895 end = lblk + len - 1;
896 BUG_ON(end < lblk);
897
898 write_lock(&EXT4_I(inode)->i_es_lock);
899 err = __es_remove_extent(inode, lblk, end);
900 write_unlock(&EXT4_I(inode)->i_es_lock);
901 ext4_es_print_tree(inode);
902 return err;
903}
904
905static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
906 struct list_head *b)
907{
908 struct ext4_inode_info *eia, *eib;
909 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
910 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
911
912 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
913 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
914 return 1;
915 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
916 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
917 return -1;
918 if (eia->i_touch_when == eib->i_touch_when)
919 return 0;
920 if (time_after(eia->i_touch_when, eib->i_touch_when))
921 return 1;
922 else
923 return -1;
924}
925
926static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
927 struct ext4_inode_info *locked_ei)
928{
929 struct ext4_inode_info *ei;
930 struct list_head *cur, *tmp;
931 LIST_HEAD(skipped);
932 int nr_shrunk = 0;
933 int retried = 0, skip_precached = 1, nr_skipped = 0;
934
935 spin_lock(&sbi->s_es_lru_lock);
936
937retry:
938 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
939 int shrunk;
940
941
942
943
944
945 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
946 break;
947
948 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
949
950
951
952
953
954
955 if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
956 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
957 EXT4_STATE_EXT_PRECACHED))) {
958 nr_skipped++;
959 list_move_tail(cur, &skipped);
960 continue;
961 }
962
963 if (ei->i_es_lru_nr == 0 || ei == locked_ei)
964 continue;
965
966 write_lock(&ei->i_es_lock);
967 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
968 if (ei->i_es_lru_nr == 0)
969 list_del_init(&ei->i_es_lru);
970 write_unlock(&ei->i_es_lock);
971
972 nr_shrunk += shrunk;
973 nr_to_scan -= shrunk;
974 if (nr_to_scan == 0)
975 break;
976 }
977
978
979 list_splice_tail(&skipped, &sbi->s_es_lru);
980 INIT_LIST_HEAD(&skipped);
981
982
983
984
985
986 if ((nr_shrunk == 0) && nr_skipped && !retried) {
987 retried++;
988 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
989 sbi->s_es_last_sorted = jiffies;
990 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
991 i_es_lru);
992
993
994
995
996 if (ext4_test_inode_state(&ei->vfs_inode,
997 EXT4_STATE_EXT_PRECACHED))
998 skip_precached = 0;
999 goto retry;
1000 }
1001
1002 spin_unlock(&sbi->s_es_lru_lock);
1003
1004 if (locked_ei && nr_shrunk == 0)
1005 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
1006
1007 return nr_shrunk;
1008}
1009
1010static unsigned long ext4_es_count(struct shrinker *shrink,
1011 struct shrink_control *sc)
1012{
1013 unsigned long nr;
1014 struct ext4_sb_info *sbi;
1015
1016 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1017 nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
1018 trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
1019 return nr;
1020}
1021
1022static unsigned long ext4_es_scan(struct shrinker *shrink,
1023 struct shrink_control *sc)
1024{
1025 struct ext4_sb_info *sbi = container_of(shrink,
1026 struct ext4_sb_info, s_es_shrinker);
1027 int nr_to_scan = sc->nr_to_scan;
1028 int ret, nr_shrunk;
1029
1030 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
1031 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
1032
1033 if (!nr_to_scan)
1034 return ret;
1035
1036 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
1037
1038 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
1039 return nr_shrunk;
1040}
1041
1042void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1043{
1044 INIT_LIST_HEAD(&sbi->s_es_lru);
1045 spin_lock_init(&sbi->s_es_lru_lock);
1046 sbi->s_es_last_sorted = 0;
1047 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1048 sbi->s_es_shrinker.count_objects = ext4_es_count;
1049 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1050 register_shrinker(&sbi->s_es_shrinker);
1051}
1052
1053void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1054{
1055 unregister_shrinker(&sbi->s_es_shrinker);
1056}
1057
1058void ext4_es_lru_add(struct inode *inode)
1059{
1060 struct ext4_inode_info *ei = EXT4_I(inode);
1061 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1062
1063 ei->i_touch_when = jiffies;
1064
1065 if (!list_empty(&ei->i_es_lru))
1066 return;
1067
1068 spin_lock(&sbi->s_es_lru_lock);
1069 if (list_empty(&ei->i_es_lru))
1070 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
1071 spin_unlock(&sbi->s_es_lru_lock);
1072}
1073
1074void ext4_es_lru_del(struct inode *inode)
1075{
1076 struct ext4_inode_info *ei = EXT4_I(inode);
1077 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1078
1079 spin_lock(&sbi->s_es_lru_lock);
1080 if (!list_empty(&ei->i_es_lru))
1081 list_del_init(&ei->i_es_lru);
1082 spin_unlock(&sbi->s_es_lru_lock);
1083}
1084
1085static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1086 int nr_to_scan)
1087{
1088 struct inode *inode = &ei->vfs_inode;
1089 struct ext4_es_tree *tree = &ei->i_es_tree;
1090 struct rb_node *node;
1091 struct extent_status *es;
1092 unsigned long nr_shrunk = 0;
1093 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1094 DEFAULT_RATELIMIT_BURST);
1095
1096 if (ei->i_es_lru_nr == 0)
1097 return 0;
1098
1099 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1100 __ratelimit(&_rs))
1101 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1102
1103 node = rb_first(&tree->root);
1104 while (node != NULL) {
1105 es = rb_entry(node, struct extent_status, rb_node);
1106 node = rb_next(&es->rb_node);
1107
1108
1109
1110
1111 if (!ext4_es_is_delayed(es)) {
1112 rb_erase(&es->rb_node, &tree->root);
1113 ext4_es_free_extent(inode, es);
1114 nr_shrunk++;
1115 if (--nr_to_scan == 0)
1116 break;
1117 }
1118 }
1119 tree->cache_es = NULL;
1120 return nr_shrunk;
1121}
1122