1
2
3
4
5
6
7
8
9
10
11
12#include <linux/rbtree.h>
13#include <linux/list_sort.h>
14#include "ext4.h"
15#include "extents_status.h"
16
17#include <trace/events/ext4.h>
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143static struct kmem_cache *ext4_es_cachep;
144
145static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
146static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
147 ext4_lblk_t end);
148static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
149 int nr_to_scan);
150static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
151 struct ext4_inode_info *locked_ei);
152
153int __init ext4_init_es(void)
154{
155 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
156 sizeof(struct extent_status),
157 0, (SLAB_RECLAIM_ACCOUNT), NULL);
158 if (ext4_es_cachep == NULL)
159 return -ENOMEM;
160 return 0;
161}
162
163void ext4_exit_es(void)
164{
165 if (ext4_es_cachep)
166 kmem_cache_destroy(ext4_es_cachep);
167}
168
169void ext4_es_init_tree(struct ext4_es_tree *tree)
170{
171 tree->root = RB_ROOT;
172 tree->cache_es = NULL;
173}
174
175#ifdef ES_DEBUG__
176static void ext4_es_print_tree(struct inode *inode)
177{
178 struct ext4_es_tree *tree;
179 struct rb_node *node;
180
181 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
182 tree = &EXT4_I(inode)->i_es_tree;
183 node = rb_first(&tree->root);
184 while (node) {
185 struct extent_status *es;
186 es = rb_entry(node, struct extent_status, rb_node);
187 printk(KERN_DEBUG " [%u/%u) %llu %llx",
188 es->es_lblk, es->es_len,
189 ext4_es_pblock(es), ext4_es_status(es));
190 node = rb_next(node);
191 }
192 printk(KERN_DEBUG "\n");
193}
194#else
195#define ext4_es_print_tree(inode)
196#endif
197
198static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
199{
200 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
201 return es->es_lblk + es->es_len - 1;
202}
203
204
205
206
207
208static struct extent_status *__es_tree_search(struct rb_root *root,
209 ext4_lblk_t lblk)
210{
211 struct rb_node *node = root->rb_node;
212 struct extent_status *es = NULL;
213
214 while (node) {
215 es = rb_entry(node, struct extent_status, rb_node);
216 if (lblk < es->es_lblk)
217 node = node->rb_left;
218 else if (lblk > ext4_es_end(es))
219 node = node->rb_right;
220 else
221 return es;
222 }
223
224 if (es && lblk < es->es_lblk)
225 return es;
226
227 if (es && lblk > ext4_es_end(es)) {
228 node = rb_next(&es->rb_node);
229 return node ? rb_entry(node, struct extent_status, rb_node) :
230 NULL;
231 }
232
233 return NULL;
234}
235
236
237
238
239
240
241
242
243
244
245void ext4_es_find_delayed_extent_range(struct inode *inode,
246 ext4_lblk_t lblk, ext4_lblk_t end,
247 struct extent_status *es)
248{
249 struct ext4_es_tree *tree = NULL;
250 struct extent_status *es1 = NULL;
251 struct rb_node *node;
252
253 BUG_ON(es == NULL);
254 BUG_ON(end < lblk);
255 trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
256
257 read_lock(&EXT4_I(inode)->i_es_lock);
258 tree = &EXT4_I(inode)->i_es_tree;
259
260
261 es->es_lblk = es->es_len = es->es_pblk = 0;
262 if (tree->cache_es) {
263 es1 = tree->cache_es;
264 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
265 es_debug("%u cached by [%u/%u) %llu %x\n",
266 lblk, es1->es_lblk, es1->es_len,
267 ext4_es_pblock(es1), ext4_es_status(es1));
268 goto out;
269 }
270 }
271
272 es1 = __es_tree_search(&tree->root, lblk);
273
274out:
275 if (es1 && !ext4_es_is_delayed(es1)) {
276 while ((node = rb_next(&es1->rb_node)) != NULL) {
277 es1 = rb_entry(node, struct extent_status, rb_node);
278 if (es1->es_lblk > end) {
279 es1 = NULL;
280 break;
281 }
282 if (ext4_es_is_delayed(es1))
283 break;
284 }
285 }
286
287 if (es1 && ext4_es_is_delayed(es1)) {
288 tree->cache_es = es1;
289 es->es_lblk = es1->es_lblk;
290 es->es_len = es1->es_len;
291 es->es_pblk = es1->es_pblk;
292 }
293
294 read_unlock(&EXT4_I(inode)->i_es_lock);
295
296 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
297}
298
299static struct extent_status *
300ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
301 ext4_fsblk_t pblk)
302{
303 struct extent_status *es;
304 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
305 if (es == NULL)
306 return NULL;
307 es->es_lblk = lblk;
308 es->es_len = len;
309 es->es_pblk = pblk;
310
311
312
313
314 if (!ext4_es_is_delayed(es)) {
315 EXT4_I(inode)->i_es_lru_nr++;
316 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
317 }
318
319 return es;
320}
321
322static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
323{
324
325 if (!ext4_es_is_delayed(es)) {
326 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
327 EXT4_I(inode)->i_es_lru_nr--;
328 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
329 }
330
331 kmem_cache_free(ext4_es_cachep, es);
332}
333
334
335
336
337
338
339
340
341static int ext4_es_can_be_merged(struct extent_status *es1,
342 struct extent_status *es2)
343{
344 if (ext4_es_status(es1) != ext4_es_status(es2))
345 return 0;
346
347 if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
348 return 0;
349
350 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
351 return 0;
352
353 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
354 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
355 return 1;
356
357 if (ext4_es_is_hole(es1))
358 return 1;
359
360
361 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
362 return 1;
363
364 return 0;
365}
366
367static struct extent_status *
368ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
369{
370 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
371 struct extent_status *es1;
372 struct rb_node *node;
373
374 node = rb_prev(&es->rb_node);
375 if (!node)
376 return es;
377
378 es1 = rb_entry(node, struct extent_status, rb_node);
379 if (ext4_es_can_be_merged(es1, es)) {
380 es1->es_len += es->es_len;
381 rb_erase(&es->rb_node, &tree->root);
382 ext4_es_free_extent(inode, es);
383 es = es1;
384 }
385
386 return es;
387}
388
389static struct extent_status *
390ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
391{
392 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
393 struct extent_status *es1;
394 struct rb_node *node;
395
396 node = rb_next(&es->rb_node);
397 if (!node)
398 return es;
399
400 es1 = rb_entry(node, struct extent_status, rb_node);
401 if (ext4_es_can_be_merged(es, es1)) {
402 es->es_len += es1->es_len;
403 rb_erase(node, &tree->root);
404 ext4_es_free_extent(inode, es1);
405 }
406
407 return es;
408}
409
410#ifdef ES_AGGRESSIVE_TEST
411#include "ext4_extents.h"
412
413static void ext4_es_insert_extent_ext_check(struct inode *inode,
414 struct extent_status *es)
415{
416 struct ext4_ext_path *path = NULL;
417 struct ext4_extent *ex;
418 ext4_lblk_t ee_block;
419 ext4_fsblk_t ee_start;
420 unsigned short ee_len;
421 int depth, ee_status, es_status;
422
423 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
424 if (IS_ERR(path))
425 return;
426
427 depth = ext_depth(inode);
428 ex = path[depth].p_ext;
429
430 if (ex) {
431
432 ee_block = le32_to_cpu(ex->ee_block);
433 ee_start = ext4_ext_pblock(ex);
434 ee_len = ext4_ext_get_actual_len(ex);
435
436 ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
437 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
438
439
440
441
442
443 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
444 if (in_range(es->es_lblk, ee_block, ee_len)) {
445 pr_warn("ES insert assertion failed for "
446 "inode: %lu we can find an extent "
447 "at block [%d/%d/%llu/%c], but we "
448 "want to add an delayed/hole extent "
449 "[%d/%d/%llu/%llx]\n",
450 inode->i_ino, ee_block, ee_len,
451 ee_start, ee_status ? 'u' : 'w',
452 es->es_lblk, es->es_len,
453 ext4_es_pblock(es), ext4_es_status(es));
454 }
455 goto out;
456 }
457
458
459
460
461
462 if (es->es_lblk < ee_block ||
463 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
464 pr_warn("ES insert assertion failed for inode: %lu "
465 "ex_status [%d/%d/%llu/%c] != "
466 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
467 ee_block, ee_len, ee_start,
468 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
469 ext4_es_pblock(es), es_status ? 'u' : 'w');
470 goto out;
471 }
472
473 if (ee_status ^ es_status) {
474 pr_warn("ES insert assertion failed for inode: %lu "
475 "ex_status [%d/%d/%llu/%c] != "
476 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
477 ee_block, ee_len, ee_start,
478 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
479 ext4_es_pblock(es), es_status ? 'u' : 'w');
480 }
481 } else {
482
483
484
485
486 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
487 pr_warn("ES insert assertion failed for inode: %lu "
488 "can't find an extent at block %d but we want "
489 "to add an written/unwritten extent "
490 "[%d/%d/%llu/%llx]\n", inode->i_ino,
491 es->es_lblk, es->es_lblk, es->es_len,
492 ext4_es_pblock(es), ext4_es_status(es));
493 }
494 }
495out:
496 if (path) {
497 ext4_ext_drop_refs(path);
498 kfree(path);
499 }
500}
501
502static void ext4_es_insert_extent_ind_check(struct inode *inode,
503 struct extent_status *es)
504{
505 struct ext4_map_blocks map;
506 int retval;
507
508
509
510
511
512
513
514
515 map.m_lblk = es->es_lblk;
516 map.m_len = es->es_len;
517
518 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
519 if (retval > 0) {
520 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
521
522
523
524
525 pr_warn("ES insert assertion failed for inode: %lu "
526 "We can find blocks but we want to add a "
527 "delayed/hole extent [%d/%d/%llu/%llx]\n",
528 inode->i_ino, es->es_lblk, es->es_len,
529 ext4_es_pblock(es), ext4_es_status(es));
530 return;
531 } else if (ext4_es_is_written(es)) {
532 if (retval != es->es_len) {
533 pr_warn("ES insert assertion failed for "
534 "inode: %lu retval %d != es_len %d\n",
535 inode->i_ino, retval, es->es_len);
536 return;
537 }
538 if (map.m_pblk != ext4_es_pblock(es)) {
539 pr_warn("ES insert assertion failed for "
540 "inode: %lu m_pblk %llu != "
541 "es_pblk %llu\n",
542 inode->i_ino, map.m_pblk,
543 ext4_es_pblock(es));
544 return;
545 }
546 } else {
547
548
549
550
551 BUG_ON(1);
552 }
553 } else if (retval == 0) {
554 if (ext4_es_is_written(es)) {
555 pr_warn("ES insert assertion failed for inode: %lu "
556 "We can't find the block but we want to add "
557 "an written extent [%d/%d/%llu/%llx]\n",
558 inode->i_ino, es->es_lblk, es->es_len,
559 ext4_es_pblock(es), ext4_es_status(es));
560 return;
561 }
562 }
563}
564
565static inline void ext4_es_insert_extent_check(struct inode *inode,
566 struct extent_status *es)
567{
568
569
570
571
572 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
573 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
574 ext4_es_insert_extent_ext_check(inode, es);
575 else
576 ext4_es_insert_extent_ind_check(inode, es);
577}
578#else
579static inline void ext4_es_insert_extent_check(struct inode *inode,
580 struct extent_status *es)
581{
582}
583#endif
584
585static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
586{
587 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
588 struct rb_node **p = &tree->root.rb_node;
589 struct rb_node *parent = NULL;
590 struct extent_status *es;
591
592 while (*p) {
593 parent = *p;
594 es = rb_entry(parent, struct extent_status, rb_node);
595
596 if (newes->es_lblk < es->es_lblk) {
597 if (ext4_es_can_be_merged(newes, es)) {
598
599
600
601
602 es->es_lblk = newes->es_lblk;
603 es->es_len += newes->es_len;
604 if (ext4_es_is_written(es) ||
605 ext4_es_is_unwritten(es))
606 ext4_es_store_pblock(es,
607 newes->es_pblk);
608 es = ext4_es_try_to_merge_left(inode, es);
609 goto out;
610 }
611 p = &(*p)->rb_left;
612 } else if (newes->es_lblk > ext4_es_end(es)) {
613 if (ext4_es_can_be_merged(es, newes)) {
614 es->es_len += newes->es_len;
615 es = ext4_es_try_to_merge_right(inode, es);
616 goto out;
617 }
618 p = &(*p)->rb_right;
619 } else {
620 BUG_ON(1);
621 return -EINVAL;
622 }
623 }
624
625 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
626 newes->es_pblk);
627 if (!es)
628 return -ENOMEM;
629 rb_link_node(&es->rb_node, parent, p);
630 rb_insert_color(&es->rb_node, &tree->root);
631
632out:
633 tree->cache_es = es;
634 return 0;
635}
636
637
638
639
640
641
642
643int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
644 ext4_lblk_t len, ext4_fsblk_t pblk,
645 unsigned int status)
646{
647 struct extent_status newes;
648 ext4_lblk_t end = lblk + len - 1;
649 int err = 0;
650
651 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
652 lblk, len, pblk, status, inode->i_ino);
653
654 if (!len)
655 return 0;
656
657 BUG_ON(end < lblk);
658
659 newes.es_lblk = lblk;
660 newes.es_len = len;
661 ext4_es_store_pblock(&newes, pblk);
662 ext4_es_store_status(&newes, status);
663 trace_ext4_es_insert_extent(inode, &newes);
664
665 ext4_es_insert_extent_check(inode, &newes);
666
667 write_lock(&EXT4_I(inode)->i_es_lock);
668 err = __es_remove_extent(inode, lblk, end);
669 if (err != 0)
670 goto error;
671retry:
672 err = __es_insert_extent(inode, &newes);
673 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
674 EXT4_I(inode)))
675 goto retry;
676 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
677 err = 0;
678
679error:
680 write_unlock(&EXT4_I(inode)->i_es_lock);
681
682 ext4_es_print_tree(inode);
683
684 return err;
685}
686
687
688
689
690
691
692void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
693 ext4_lblk_t len, ext4_fsblk_t pblk,
694 unsigned int status)
695{
696 struct extent_status *es;
697 struct extent_status newes;
698 ext4_lblk_t end = lblk + len - 1;
699
700 newes.es_lblk = lblk;
701 newes.es_len = len;
702 ext4_es_store_pblock(&newes, pblk);
703 ext4_es_store_status(&newes, status);
704 trace_ext4_es_cache_extent(inode, &newes);
705
706 if (!len)
707 return;
708
709 BUG_ON(end < lblk);
710
711 write_lock(&EXT4_I(inode)->i_es_lock);
712
713 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
714 if (!es || es->es_lblk > end)
715 __es_insert_extent(inode, &newes);
716 write_unlock(&EXT4_I(inode)->i_es_lock);
717}
718
719
720
721
722
723
724
725
726int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
727 struct extent_status *es)
728{
729 struct ext4_es_tree *tree;
730 struct extent_status *es1 = NULL;
731 struct rb_node *node;
732 int found = 0;
733
734 trace_ext4_es_lookup_extent_enter(inode, lblk);
735 es_debug("lookup extent in block %u\n", lblk);
736
737 tree = &EXT4_I(inode)->i_es_tree;
738 read_lock(&EXT4_I(inode)->i_es_lock);
739
740
741 es->es_lblk = es->es_len = es->es_pblk = 0;
742 if (tree->cache_es) {
743 es1 = tree->cache_es;
744 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
745 es_debug("%u cached by [%u/%u)\n",
746 lblk, es1->es_lblk, es1->es_len);
747 found = 1;
748 goto out;
749 }
750 }
751
752 node = tree->root.rb_node;
753 while (node) {
754 es1 = rb_entry(node, struct extent_status, rb_node);
755 if (lblk < es1->es_lblk)
756 node = node->rb_left;
757 else if (lblk > ext4_es_end(es1))
758 node = node->rb_right;
759 else {
760 found = 1;
761 break;
762 }
763 }
764
765out:
766 if (found) {
767 BUG_ON(!es1);
768 es->es_lblk = es1->es_lblk;
769 es->es_len = es1->es_len;
770 es->es_pblk = es1->es_pblk;
771 }
772
773 read_unlock(&EXT4_I(inode)->i_es_lock);
774
775 trace_ext4_es_lookup_extent_exit(inode, es, found);
776 return found;
777}
778
779static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
780 ext4_lblk_t end)
781{
782 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
783 struct rb_node *node;
784 struct extent_status *es;
785 struct extent_status orig_es;
786 ext4_lblk_t len1, len2;
787 ext4_fsblk_t block;
788 int err;
789
790retry:
791 err = 0;
792 es = __es_tree_search(&tree->root, lblk);
793 if (!es)
794 goto out;
795 if (es->es_lblk > end)
796 goto out;
797
798
799 tree->cache_es = NULL;
800
801 orig_es.es_lblk = es->es_lblk;
802 orig_es.es_len = es->es_len;
803 orig_es.es_pblk = es->es_pblk;
804
805 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
806 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
807 if (len1 > 0)
808 es->es_len = len1;
809 if (len2 > 0) {
810 if (len1 > 0) {
811 struct extent_status newes;
812
813 newes.es_lblk = end + 1;
814 newes.es_len = len2;
815 if (ext4_es_is_written(&orig_es) ||
816 ext4_es_is_unwritten(&orig_es)) {
817 block = ext4_es_pblock(&orig_es) +
818 orig_es.es_len - len2;
819 ext4_es_store_pblock(&newes, block);
820 }
821 ext4_es_store_status(&newes, ext4_es_status(&orig_es));
822 err = __es_insert_extent(inode, &newes);
823 if (err) {
824 es->es_lblk = orig_es.es_lblk;
825 es->es_len = orig_es.es_len;
826 if ((err == -ENOMEM) &&
827 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
828 EXT4_I(inode)))
829 goto retry;
830 goto out;
831 }
832 } else {
833 es->es_lblk = end + 1;
834 es->es_len = len2;
835 if (ext4_es_is_written(es) ||
836 ext4_es_is_unwritten(es)) {
837 block = orig_es.es_pblk + orig_es.es_len - len2;
838 ext4_es_store_pblock(es, block);
839 }
840 }
841 goto out;
842 }
843
844 if (len1 > 0) {
845 node = rb_next(&es->rb_node);
846 if (node)
847 es = rb_entry(node, struct extent_status, rb_node);
848 else
849 es = NULL;
850 }
851
852 while (es && ext4_es_end(es) <= end) {
853 node = rb_next(&es->rb_node);
854 rb_erase(&es->rb_node, &tree->root);
855 ext4_es_free_extent(inode, es);
856 if (!node) {
857 es = NULL;
858 break;
859 }
860 es = rb_entry(node, struct extent_status, rb_node);
861 }
862
863 if (es && es->es_lblk < end + 1) {
864 ext4_lblk_t orig_len = es->es_len;
865
866 len1 = ext4_es_end(es) - end;
867 es->es_lblk = end + 1;
868 es->es_len = len1;
869 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
870 block = es->es_pblk + orig_len - len1;
871 ext4_es_store_pblock(es, block);
872 }
873 }
874
875out:
876 return err;
877}
878
879
880
881
882
883
884int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
885 ext4_lblk_t len)
886{
887 ext4_lblk_t end;
888 int err = 0;
889
890 trace_ext4_es_remove_extent(inode, lblk, len);
891 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
892 lblk, len, inode->i_ino);
893
894 if (!len)
895 return err;
896
897 end = lblk + len - 1;
898 BUG_ON(end < lblk);
899
900 write_lock(&EXT4_I(inode)->i_es_lock);
901 err = __es_remove_extent(inode, lblk, end);
902 write_unlock(&EXT4_I(inode)->i_es_lock);
903 ext4_es_print_tree(inode);
904 return err;
905}
906
907static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
908 struct list_head *b)
909{
910 struct ext4_inode_info *eia, *eib;
911 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
912 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
913
914 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
915 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
916 return 1;
917 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
918 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
919 return -1;
920 if (eia->i_touch_when == eib->i_touch_when)
921 return 0;
922 if (time_after(eia->i_touch_when, eib->i_touch_when))
923 return 1;
924 else
925 return -1;
926}
927
928static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
929 struct ext4_inode_info *locked_ei)
930{
931 struct ext4_inode_info *ei;
932 struct list_head *cur, *tmp;
933 LIST_HEAD(skipped);
934 int nr_shrunk = 0;
935 int retried = 0, skip_precached = 1, nr_skipped = 0;
936
937 spin_lock(&sbi->s_es_lru_lock);
938
939retry:
940 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
941 int shrunk;
942
943
944
945
946
947 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
948 break;
949
950 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
951
952
953
954
955
956
957 if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
958 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
959 EXT4_STATE_EXT_PRECACHED))) {
960 nr_skipped++;
961 list_move_tail(cur, &skipped);
962 continue;
963 }
964
965 if (ei->i_es_lru_nr == 0 || ei == locked_ei)
966 continue;
967
968 write_lock(&ei->i_es_lock);
969 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
970 if (ei->i_es_lru_nr == 0)
971 list_del_init(&ei->i_es_lru);
972 write_unlock(&ei->i_es_lock);
973
974 nr_shrunk += shrunk;
975 nr_to_scan -= shrunk;
976 if (nr_to_scan == 0)
977 break;
978 }
979
980
981 list_splice_tail(&skipped, &sbi->s_es_lru);
982 INIT_LIST_HEAD(&skipped);
983
984
985
986
987
988 if ((nr_shrunk == 0) && nr_skipped && !retried) {
989 retried++;
990 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
991 sbi->s_es_last_sorted = jiffies;
992 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
993 i_es_lru);
994
995
996
997
998 if (ext4_test_inode_state(&ei->vfs_inode,
999 EXT4_STATE_EXT_PRECACHED))
1000 skip_precached = 0;
1001 goto retry;
1002 }
1003
1004 spin_unlock(&sbi->s_es_lru_lock);
1005
1006 if (locked_ei && nr_shrunk == 0)
1007 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
1008
1009 return nr_shrunk;
1010}
1011
1012static unsigned long ext4_es_count(struct shrinker *shrink,
1013 struct shrink_control *sc)
1014{
1015 unsigned long nr;
1016 struct ext4_sb_info *sbi;
1017
1018 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1019 nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
1020 trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
1021 return nr;
1022}
1023
1024static unsigned long ext4_es_scan(struct shrinker *shrink,
1025 struct shrink_control *sc)
1026{
1027 struct ext4_sb_info *sbi = container_of(shrink,
1028 struct ext4_sb_info, s_es_shrinker);
1029 int nr_to_scan = sc->nr_to_scan;
1030 int ret, nr_shrunk;
1031
1032 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
1033 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
1034
1035 if (!nr_to_scan)
1036 return ret;
1037
1038 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
1039
1040 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
1041 return nr_shrunk;
1042}
1043
1044void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1045{
1046 INIT_LIST_HEAD(&sbi->s_es_lru);
1047 spin_lock_init(&sbi->s_es_lru_lock);
1048 sbi->s_es_last_sorted = 0;
1049 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1050 sbi->s_es_shrinker.count_objects = ext4_es_count;
1051 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1052 register_shrinker(&sbi->s_es_shrinker);
1053}
1054
1055void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1056{
1057 unregister_shrinker(&sbi->s_es_shrinker);
1058}
1059
1060void ext4_es_lru_add(struct inode *inode)
1061{
1062 struct ext4_inode_info *ei = EXT4_I(inode);
1063 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1064
1065 ei->i_touch_when = jiffies;
1066
1067 if (!list_empty(&ei->i_es_lru))
1068 return;
1069
1070 spin_lock(&sbi->s_es_lru_lock);
1071 if (list_empty(&ei->i_es_lru))
1072 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
1073 spin_unlock(&sbi->s_es_lru_lock);
1074}
1075
1076void ext4_es_lru_del(struct inode *inode)
1077{
1078 struct ext4_inode_info *ei = EXT4_I(inode);
1079 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1080
1081 spin_lock(&sbi->s_es_lru_lock);
1082 if (!list_empty(&ei->i_es_lru))
1083 list_del_init(&ei->i_es_lru);
1084 spin_unlock(&sbi->s_es_lru_lock);
1085}
1086
1087static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1088 int nr_to_scan)
1089{
1090 struct inode *inode = &ei->vfs_inode;
1091 struct ext4_es_tree *tree = &ei->i_es_tree;
1092 struct rb_node *node;
1093 struct extent_status *es;
1094 unsigned long nr_shrunk = 0;
1095 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1096 DEFAULT_RATELIMIT_BURST);
1097
1098 if (ei->i_es_lru_nr == 0)
1099 return 0;
1100
1101 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1102 __ratelimit(&_rs))
1103 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1104
1105 node = rb_first(&tree->root);
1106 while (node != NULL) {
1107 es = rb_entry(node, struct extent_status, rb_node);
1108 node = rb_next(&es->rb_node);
1109
1110
1111
1112
1113 if (!ext4_es_is_delayed(es)) {
1114 rb_erase(&es->rb_node, &tree->root);
1115 ext4_es_free_extent(inode, es);
1116 nr_shrunk++;
1117 if (--nr_to_scan == 0)
1118 break;
1119 }
1120 }
1121 tree->cache_es = NULL;
1122 return nr_shrunk;
1123}
1124