1
2
3
4
5
6
7
8
9
10
11
12#include <linux/rbtree.h>
13#include <linux/list_sort.h>
14#include "ext4.h"
15#include "extents_status.h"
16#include "ext4_extents.h"
17
18#include <trace/events/ext4.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144static struct kmem_cache *ext4_es_cachep;
145
146static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
147static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
148 ext4_lblk_t end);
149static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
150 int nr_to_scan);
151static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152 struct ext4_inode_info *locked_ei);
153
154int __init ext4_init_es(void)
155{
156 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
157 sizeof(struct extent_status),
158 0, (SLAB_RECLAIM_ACCOUNT), NULL);
159 if (ext4_es_cachep == NULL)
160 return -ENOMEM;
161 return 0;
162}
163
164void ext4_exit_es(void)
165{
166 if (ext4_es_cachep)
167 kmem_cache_destroy(ext4_es_cachep);
168}
169
170void ext4_es_init_tree(struct ext4_es_tree *tree)
171{
172 tree->root = RB_ROOT;
173 tree->cache_es = NULL;
174}
175
176#ifdef ES_DEBUG__
177static void ext4_es_print_tree(struct inode *inode)
178{
179 struct ext4_es_tree *tree;
180 struct rb_node *node;
181
182 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
183 tree = &EXT4_I(inode)->i_es_tree;
184 node = rb_first(&tree->root);
185 while (node) {
186 struct extent_status *es;
187 es = rb_entry(node, struct extent_status, rb_node);
188 printk(KERN_DEBUG " [%u/%u) %llu %llx",
189 es->es_lblk, es->es_len,
190 ext4_es_pblock(es), ext4_es_status(es));
191 node = rb_next(node);
192 }
193 printk(KERN_DEBUG "\n");
194}
195#else
196#define ext4_es_print_tree(inode)
197#endif
198
199static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
200{
201 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
202 return es->es_lblk + es->es_len - 1;
203}
204
205
206
207
208
209static struct extent_status *__es_tree_search(struct rb_root *root,
210 ext4_lblk_t lblk)
211{
212 struct rb_node *node = root->rb_node;
213 struct extent_status *es = NULL;
214
215 while (node) {
216 es = rb_entry(node, struct extent_status, rb_node);
217 if (lblk < es->es_lblk)
218 node = node->rb_left;
219 else if (lblk > ext4_es_end(es))
220 node = node->rb_right;
221 else
222 return es;
223 }
224
225 if (es && lblk < es->es_lblk)
226 return es;
227
228 if (es && lblk > ext4_es_end(es)) {
229 node = rb_next(&es->rb_node);
230 return node ? rb_entry(node, struct extent_status, rb_node) :
231 NULL;
232 }
233
234 return NULL;
235}
236
237
238
239
240
241
242
243
244
245
246void ext4_es_find_delayed_extent_range(struct inode *inode,
247 ext4_lblk_t lblk, ext4_lblk_t end,
248 struct extent_status *es)
249{
250 struct ext4_es_tree *tree = NULL;
251 struct extent_status *es1 = NULL;
252 struct rb_node *node;
253
254 BUG_ON(es == NULL);
255 BUG_ON(end < lblk);
256 trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
257
258 read_lock(&EXT4_I(inode)->i_es_lock);
259 tree = &EXT4_I(inode)->i_es_tree;
260
261
262 es->es_lblk = es->es_len = es->es_pblk = 0;
263 if (tree->cache_es) {
264 es1 = tree->cache_es;
265 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
266 es_debug("%u cached by [%u/%u) %llu %llx\n",
267 lblk, es1->es_lblk, es1->es_len,
268 ext4_es_pblock(es1), ext4_es_status(es1));
269 goto out;
270 }
271 }
272
273 es1 = __es_tree_search(&tree->root, lblk);
274
275out:
276 if (es1 && !ext4_es_is_delayed(es1)) {
277 while ((node = rb_next(&es1->rb_node)) != NULL) {
278 es1 = rb_entry(node, struct extent_status, rb_node);
279 if (es1->es_lblk > end) {
280 es1 = NULL;
281 break;
282 }
283 if (ext4_es_is_delayed(es1))
284 break;
285 }
286 }
287
288 if (es1 && ext4_es_is_delayed(es1)) {
289 tree->cache_es = es1;
290 es->es_lblk = es1->es_lblk;
291 es->es_len = es1->es_len;
292 es->es_pblk = es1->es_pblk;
293 }
294
295 read_unlock(&EXT4_I(inode)->i_es_lock);
296
297 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
298}
299
300static struct extent_status *
301ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
302 ext4_fsblk_t pblk)
303{
304 struct extent_status *es;
305 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
306 if (es == NULL)
307 return NULL;
308 es->es_lblk = lblk;
309 es->es_len = len;
310 es->es_pblk = pblk;
311
312
313
314
315 if (!ext4_es_is_delayed(es)) {
316 EXT4_I(inode)->i_es_lru_nr++;
317 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
318 }
319
320 return es;
321}
322
323static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
324{
325
326 if (!ext4_es_is_delayed(es)) {
327 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
328 EXT4_I(inode)->i_es_lru_nr--;
329 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
330 }
331
332 kmem_cache_free(ext4_es_cachep, es);
333}
334
335
336
337
338
339
340
341
342static int ext4_es_can_be_merged(struct extent_status *es1,
343 struct extent_status *es2)
344{
345 if (ext4_es_status(es1) != ext4_es_status(es2))
346 return 0;
347
348 if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
349 return 0;
350
351 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
352 return 0;
353
354 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
355 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
356 return 1;
357
358 if (ext4_es_is_hole(es1))
359 return 1;
360
361
362 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
363 return 1;
364
365 return 0;
366}
367
368static struct extent_status *
369ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
370{
371 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
372 struct extent_status *es1;
373 struct rb_node *node;
374
375 node = rb_prev(&es->rb_node);
376 if (!node)
377 return es;
378
379 es1 = rb_entry(node, struct extent_status, rb_node);
380 if (ext4_es_can_be_merged(es1, es)) {
381 es1->es_len += es->es_len;
382 rb_erase(&es->rb_node, &tree->root);
383 ext4_es_free_extent(inode, es);
384 es = es1;
385 }
386
387 return es;
388}
389
390static struct extent_status *
391ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
392{
393 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
394 struct extent_status *es1;
395 struct rb_node *node;
396
397 node = rb_next(&es->rb_node);
398 if (!node)
399 return es;
400
401 es1 = rb_entry(node, struct extent_status, rb_node);
402 if (ext4_es_can_be_merged(es, es1)) {
403 es->es_len += es1->es_len;
404 rb_erase(node, &tree->root);
405 ext4_es_free_extent(inode, es1);
406 }
407
408 return es;
409}
410
411#ifdef ES_AGGRESSIVE_TEST
412static void ext4_es_insert_extent_ext_check(struct inode *inode,
413 struct extent_status *es)
414{
415 struct ext4_ext_path *path = NULL;
416 struct ext4_extent *ex;
417 ext4_lblk_t ee_block;
418 ext4_fsblk_t ee_start;
419 unsigned short ee_len;
420 int depth, ee_status, es_status;
421
422 path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
423 if (IS_ERR(path))
424 return;
425
426 depth = ext_depth(inode);
427 ex = path[depth].p_ext;
428
429 if (ex) {
430
431 ee_block = le32_to_cpu(ex->ee_block);
432 ee_start = ext4_ext_pblock(ex);
433 ee_len = ext4_ext_get_actual_len(ex);
434
435 ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
436 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
437
438
439
440
441
442 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
443 if (in_range(es->es_lblk, ee_block, ee_len)) {
444 pr_warn("ES insert assertion failed for "
445 "inode: %lu we can find an extent "
446 "at block [%d/%d/%llu/%c], but we "
447 "want to add an delayed/hole extent "
448 "[%d/%d/%llu/%llx]\n",
449 inode->i_ino, ee_block, ee_len,
450 ee_start, ee_status ? 'u' : 'w',
451 es->es_lblk, es->es_len,
452 ext4_es_pblock(es), ext4_es_status(es));
453 }
454 goto out;
455 }
456
457
458
459
460
461 if (es->es_lblk < ee_block ||
462 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
463 pr_warn("ES insert assertion failed for inode: %lu "
464 "ex_status [%d/%d/%llu/%c] != "
465 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
466 ee_block, ee_len, ee_start,
467 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
468 ext4_es_pblock(es), es_status ? 'u' : 'w');
469 goto out;
470 }
471
472 if (ee_status ^ es_status) {
473 pr_warn("ES insert assertion failed for inode: %lu "
474 "ex_status [%d/%d/%llu/%c] != "
475 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
476 ee_block, ee_len, ee_start,
477 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
478 ext4_es_pblock(es), es_status ? 'u' : 'w');
479 }
480 } else {
481
482
483
484
485 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
486 pr_warn("ES insert assertion failed for inode: %lu "
487 "can't find an extent at block %d but we want "
488 "to add an written/unwritten extent "
489 "[%d/%d/%llu/%llx]\n", inode->i_ino,
490 es->es_lblk, es->es_lblk, es->es_len,
491 ext4_es_pblock(es), ext4_es_status(es));
492 }
493 }
494out:
495 if (path) {
496 ext4_ext_drop_refs(path);
497 kfree(path);
498 }
499}
500
501static void ext4_es_insert_extent_ind_check(struct inode *inode,
502 struct extent_status *es)
503{
504 struct ext4_map_blocks map;
505 int retval;
506
507
508
509
510
511
512
513
514 map.m_lblk = es->es_lblk;
515 map.m_len = es->es_len;
516
517 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
518 if (retval > 0) {
519 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
520
521
522
523
524 pr_warn("ES insert assertion failed for inode: %lu "
525 "We can find blocks but we want to add a "
526 "delayed/hole extent [%d/%d/%llu/%llx]\n",
527 inode->i_ino, es->es_lblk, es->es_len,
528 ext4_es_pblock(es), ext4_es_status(es));
529 return;
530 } else if (ext4_es_is_written(es)) {
531 if (retval != es->es_len) {
532 pr_warn("ES insert assertion failed for "
533 "inode: %lu retval %d != es_len %d\n",
534 inode->i_ino, retval, es->es_len);
535 return;
536 }
537 if (map.m_pblk != ext4_es_pblock(es)) {
538 pr_warn("ES insert assertion failed for "
539 "inode: %lu m_pblk %llu != "
540 "es_pblk %llu\n",
541 inode->i_ino, map.m_pblk,
542 ext4_es_pblock(es));
543 return;
544 }
545 } else {
546
547
548
549
550 BUG_ON(1);
551 }
552 } else if (retval == 0) {
553 if (ext4_es_is_written(es)) {
554 pr_warn("ES insert assertion failed for inode: %lu "
555 "We can't find the block but we want to add "
556 "an written extent [%d/%d/%llu/%llx]\n",
557 inode->i_ino, es->es_lblk, es->es_len,
558 ext4_es_pblock(es), ext4_es_status(es));
559 return;
560 }
561 }
562}
563
564static inline void ext4_es_insert_extent_check(struct inode *inode,
565 struct extent_status *es)
566{
567
568
569
570
571 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
572 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
573 ext4_es_insert_extent_ext_check(inode, es);
574 else
575 ext4_es_insert_extent_ind_check(inode, es);
576}
577#else
578static inline void ext4_es_insert_extent_check(struct inode *inode,
579 struct extent_status *es)
580{
581}
582#endif
583
584static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
585{
586 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
587 struct rb_node **p = &tree->root.rb_node;
588 struct rb_node *parent = NULL;
589 struct extent_status *es;
590
591 while (*p) {
592 parent = *p;
593 es = rb_entry(parent, struct extent_status, rb_node);
594
595 if (newes->es_lblk < es->es_lblk) {
596 if (ext4_es_can_be_merged(newes, es)) {
597
598
599
600
601 es->es_lblk = newes->es_lblk;
602 es->es_len += newes->es_len;
603 if (ext4_es_is_written(es) ||
604 ext4_es_is_unwritten(es))
605 ext4_es_store_pblock(es,
606 newes->es_pblk);
607 es = ext4_es_try_to_merge_left(inode, es);
608 goto out;
609 }
610 p = &(*p)->rb_left;
611 } else if (newes->es_lblk > ext4_es_end(es)) {
612 if (ext4_es_can_be_merged(es, newes)) {
613 es->es_len += newes->es_len;
614 es = ext4_es_try_to_merge_right(inode, es);
615 goto out;
616 }
617 p = &(*p)->rb_right;
618 } else {
619 BUG_ON(1);
620 return -EINVAL;
621 }
622 }
623
624 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
625 newes->es_pblk);
626 if (!es)
627 return -ENOMEM;
628 rb_link_node(&es->rb_node, parent, p);
629 rb_insert_color(&es->rb_node, &tree->root);
630
631out:
632 tree->cache_es = es;
633 return 0;
634}
635
636
637
638
639
640
641
642int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
643 ext4_lblk_t len, ext4_fsblk_t pblk,
644 unsigned long long status)
645{
646 struct extent_status newes;
647 ext4_lblk_t end = lblk + len - 1;
648 int err = 0;
649
650 es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
651 lblk, len, pblk, status, inode->i_ino);
652
653 if (!len)
654 return 0;
655
656 BUG_ON(end < lblk);
657
658 newes.es_lblk = lblk;
659 newes.es_len = len;
660 ext4_es_store_pblock(&newes, pblk);
661 ext4_es_store_status(&newes, status);
662 trace_ext4_es_insert_extent(inode, &newes);
663
664 ext4_es_insert_extent_check(inode, &newes);
665
666 write_lock(&EXT4_I(inode)->i_es_lock);
667 err = __es_remove_extent(inode, lblk, end);
668 if (err != 0)
669 goto error;
670retry:
671 err = __es_insert_extent(inode, &newes);
672 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
673 EXT4_I(inode)))
674 goto retry;
675 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
676 err = 0;
677
678error:
679 write_unlock(&EXT4_I(inode)->i_es_lock);
680
681 ext4_es_print_tree(inode);
682
683 return err;
684}
685
686
687
688
689
690
691
692
693int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
694 struct extent_status *es)
695{
696 struct ext4_es_tree *tree;
697 struct extent_status *es1 = NULL;
698 struct rb_node *node;
699 int found = 0;
700
701 trace_ext4_es_lookup_extent_enter(inode, lblk);
702 es_debug("lookup extent in block %u\n", lblk);
703
704 tree = &EXT4_I(inode)->i_es_tree;
705 read_lock(&EXT4_I(inode)->i_es_lock);
706
707
708 es->es_lblk = es->es_len = es->es_pblk = 0;
709 if (tree->cache_es) {
710 es1 = tree->cache_es;
711 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
712 es_debug("%u cached by [%u/%u)\n",
713 lblk, es1->es_lblk, es1->es_len);
714 found = 1;
715 goto out;
716 }
717 }
718
719 node = tree->root.rb_node;
720 while (node) {
721 es1 = rb_entry(node, struct extent_status, rb_node);
722 if (lblk < es1->es_lblk)
723 node = node->rb_left;
724 else if (lblk > ext4_es_end(es1))
725 node = node->rb_right;
726 else {
727 found = 1;
728 break;
729 }
730 }
731
732out:
733 if (found) {
734 BUG_ON(!es1);
735 es->es_lblk = es1->es_lblk;
736 es->es_len = es1->es_len;
737 es->es_pblk = es1->es_pblk;
738 }
739
740 read_unlock(&EXT4_I(inode)->i_es_lock);
741
742 trace_ext4_es_lookup_extent_exit(inode, es, found);
743 return found;
744}
745
746static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
747 ext4_lblk_t end)
748{
749 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
750 struct rb_node *node;
751 struct extent_status *es;
752 struct extent_status orig_es;
753 ext4_lblk_t len1, len2;
754 ext4_fsblk_t block;
755 int err;
756
757retry:
758 err = 0;
759 es = __es_tree_search(&tree->root, lblk);
760 if (!es)
761 goto out;
762 if (es->es_lblk > end)
763 goto out;
764
765
766 tree->cache_es = NULL;
767
768 orig_es.es_lblk = es->es_lblk;
769 orig_es.es_len = es->es_len;
770 orig_es.es_pblk = es->es_pblk;
771
772 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
773 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
774 if (len1 > 0)
775 es->es_len = len1;
776 if (len2 > 0) {
777 if (len1 > 0) {
778 struct extent_status newes;
779
780 newes.es_lblk = end + 1;
781 newes.es_len = len2;
782 if (ext4_es_is_written(&orig_es) ||
783 ext4_es_is_unwritten(&orig_es)) {
784 block = ext4_es_pblock(&orig_es) +
785 orig_es.es_len - len2;
786 ext4_es_store_pblock(&newes, block);
787 }
788 ext4_es_store_status(&newes, ext4_es_status(&orig_es));
789 err = __es_insert_extent(inode, &newes);
790 if (err) {
791 es->es_lblk = orig_es.es_lblk;
792 es->es_len = orig_es.es_len;
793 if ((err == -ENOMEM) &&
794 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
795 EXT4_I(inode)))
796 goto retry;
797 goto out;
798 }
799 } else {
800 es->es_lblk = end + 1;
801 es->es_len = len2;
802 if (ext4_es_is_written(es) ||
803 ext4_es_is_unwritten(es)) {
804 block = orig_es.es_pblk + orig_es.es_len - len2;
805 ext4_es_store_pblock(es, block);
806 }
807 }
808 goto out;
809 }
810
811 if (len1 > 0) {
812 node = rb_next(&es->rb_node);
813 if (node)
814 es = rb_entry(node, struct extent_status, rb_node);
815 else
816 es = NULL;
817 }
818
819 while (es && ext4_es_end(es) <= end) {
820 node = rb_next(&es->rb_node);
821 rb_erase(&es->rb_node, &tree->root);
822 ext4_es_free_extent(inode, es);
823 if (!node) {
824 es = NULL;
825 break;
826 }
827 es = rb_entry(node, struct extent_status, rb_node);
828 }
829
830 if (es && es->es_lblk < end + 1) {
831 ext4_lblk_t orig_len = es->es_len;
832
833 len1 = ext4_es_end(es) - end;
834 es->es_lblk = end + 1;
835 es->es_len = len1;
836 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
837 block = es->es_pblk + orig_len - len1;
838 ext4_es_store_pblock(es, block);
839 }
840 }
841
842out:
843 return err;
844}
845
846
847
848
849
850
851int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
852 ext4_lblk_t len)
853{
854 ext4_lblk_t end;
855 int err = 0;
856
857 trace_ext4_es_remove_extent(inode, lblk, len);
858 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
859 lblk, len, inode->i_ino);
860
861 if (!len)
862 return err;
863
864 end = lblk + len - 1;
865 BUG_ON(end < lblk);
866
867 write_lock(&EXT4_I(inode)->i_es_lock);
868 err = __es_remove_extent(inode, lblk, end);
869 write_unlock(&EXT4_I(inode)->i_es_lock);
870 ext4_es_print_tree(inode);
871 return err;
872}
873
874int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
875{
876 ext4_lblk_t ee_block;
877 ext4_fsblk_t ee_pblock;
878 unsigned int ee_len;
879
880 ee_block = le32_to_cpu(ex->ee_block);
881 ee_len = ext4_ext_get_actual_len(ex);
882 ee_pblock = ext4_ext_pblock(ex);
883
884 if (ee_len == 0)
885 return 0;
886
887 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
888 EXTENT_STATUS_WRITTEN);
889}
890
891static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
892 struct list_head *b)
893{
894 struct ext4_inode_info *eia, *eib;
895 eia = list_entry(a, struct ext4_inode_info, i_es_lru);
896 eib = list_entry(b, struct ext4_inode_info, i_es_lru);
897
898 if (eia->i_touch_when == eib->i_touch_when)
899 return 0;
900 if (time_after(eia->i_touch_when, eib->i_touch_when))
901 return 1;
902 else
903 return -1;
904}
905
906static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
907 struct ext4_inode_info *locked_ei)
908{
909 struct ext4_inode_info *ei;
910 struct list_head *cur, *tmp;
911 LIST_HEAD(skiped);
912 int ret, nr_shrunk = 0;
913
914 spin_lock(&sbi->s_es_lru_lock);
915
916
917
918
919
920 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
921 if (sbi->s_es_last_sorted < ei->i_touch_when) {
922 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
923 sbi->s_es_last_sorted = jiffies;
924 }
925
926 list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
927
928
929
930
931 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
932 break;
933
934 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
935
936
937 if (sbi->s_es_last_sorted < ei->i_touch_when) {
938 list_move_tail(cur, &skiped);
939 continue;
940 }
941
942 if (ei->i_es_lru_nr == 0 || ei == locked_ei)
943 continue;
944
945 write_lock(&ei->i_es_lock);
946 ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
947 if (ei->i_es_lru_nr == 0)
948 list_del_init(&ei->i_es_lru);
949 write_unlock(&ei->i_es_lock);
950
951 nr_shrunk += ret;
952 nr_to_scan -= ret;
953 if (nr_to_scan == 0)
954 break;
955 }
956
957
958 list_splice_tail(&skiped, &sbi->s_es_lru);
959 spin_unlock(&sbi->s_es_lru_lock);
960
961 if (locked_ei && nr_shrunk == 0)
962 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
963
964 return nr_shrunk;
965}
966
967static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
968{
969 struct ext4_sb_info *sbi = container_of(shrink,
970 struct ext4_sb_info, s_es_shrinker);
971 int nr_to_scan = sc->nr_to_scan;
972 int ret, nr_shrunk;
973
974 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
975 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
976
977 if (!nr_to_scan)
978 return ret;
979
980 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
981
982 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
983 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
984 return ret;
985}
986
987void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
988{
989 INIT_LIST_HEAD(&sbi->s_es_lru);
990 spin_lock_init(&sbi->s_es_lru_lock);
991 sbi->s_es_last_sorted = 0;
992 sbi->s_es_shrinker.shrink = ext4_es_shrink;
993 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
994 register_shrinker(&sbi->s_es_shrinker);
995}
996
997void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
998{
999 unregister_shrinker(&sbi->s_es_shrinker);
1000}
1001
1002void ext4_es_lru_add(struct inode *inode)
1003{
1004 struct ext4_inode_info *ei = EXT4_I(inode);
1005 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1006
1007 ei->i_touch_when = jiffies;
1008
1009 if (!list_empty(&ei->i_es_lru))
1010 return;
1011
1012 spin_lock(&sbi->s_es_lru_lock);
1013 if (list_empty(&ei->i_es_lru))
1014 list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
1015 spin_unlock(&sbi->s_es_lru_lock);
1016}
1017
1018void ext4_es_lru_del(struct inode *inode)
1019{
1020 struct ext4_inode_info *ei = EXT4_I(inode);
1021 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1022
1023 spin_lock(&sbi->s_es_lru_lock);
1024 if (!list_empty(&ei->i_es_lru))
1025 list_del_init(&ei->i_es_lru);
1026 spin_unlock(&sbi->s_es_lru_lock);
1027}
1028
1029static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
1030 int nr_to_scan)
1031{
1032 struct inode *inode = &ei->vfs_inode;
1033 struct ext4_es_tree *tree = &ei->i_es_tree;
1034 struct rb_node *node;
1035 struct extent_status *es;
1036 int nr_shrunk = 0;
1037
1038 if (ei->i_es_lru_nr == 0)
1039 return 0;
1040
1041 node = rb_first(&tree->root);
1042 while (node != NULL) {
1043 es = rb_entry(node, struct extent_status, rb_node);
1044 node = rb_next(&es->rb_node);
1045
1046
1047
1048
1049 if (!ext4_es_is_delayed(es)) {
1050 rb_erase(&es->rb_node, &tree->root);
1051 ext4_es_free_extent(inode, es);
1052 nr_shrunk++;
1053 if (--nr_to_scan == 0)
1054 break;
1055 }
1056 }
1057 tree->cache_es = NULL;
1058 return nr_shrunk;
1059}
1060