1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/list_sort.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
16#include "ext4.h"
17
18#include <trace/events/ext4.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144static struct kmem_cache *ext4_es_cachep;
145static struct kmem_cache *ext4_pending_cachep;
146
147static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149 ext4_lblk_t end, int *reserved);
150static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
151static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152 struct ext4_inode_info *locked_ei);
153static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
154 ext4_lblk_t len);
155
156int __init ext4_init_es(void)
157{
158 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
159 sizeof(struct extent_status),
160 0, (SLAB_RECLAIM_ACCOUNT), NULL);
161 if (ext4_es_cachep == NULL)
162 return -ENOMEM;
163 return 0;
164}
165
166void ext4_exit_es(void)
167{
168 kmem_cache_destroy(ext4_es_cachep);
169}
170
171void ext4_es_init_tree(struct ext4_es_tree *tree)
172{
173 tree->root = RB_ROOT;
174 tree->cache_es = NULL;
175}
176
177#ifdef ES_DEBUG__
178static void ext4_es_print_tree(struct inode *inode)
179{
180 struct ext4_es_tree *tree;
181 struct rb_node *node;
182
183 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
184 tree = &EXT4_I(inode)->i_es_tree;
185 node = rb_first(&tree->root);
186 while (node) {
187 struct extent_status *es;
188 es = rb_entry(node, struct extent_status, rb_node);
189 printk(KERN_DEBUG " [%u/%u) %llu %x",
190 es->es_lblk, es->es_len,
191 ext4_es_pblock(es), ext4_es_status(es));
192 node = rb_next(node);
193 }
194 printk(KERN_DEBUG "\n");
195}
196#else
197#define ext4_es_print_tree(inode)
198#endif
199
200static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
201{
202 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
203 return es->es_lblk + es->es_len - 1;
204}
205
206
207
208
209
210static struct extent_status *__es_tree_search(struct rb_root *root,
211 ext4_lblk_t lblk)
212{
213 struct rb_node *node = root->rb_node;
214 struct extent_status *es = NULL;
215
216 while (node) {
217 es = rb_entry(node, struct extent_status, rb_node);
218 if (lblk < es->es_lblk)
219 node = node->rb_left;
220 else if (lblk > ext4_es_end(es))
221 node = node->rb_right;
222 else
223 return es;
224 }
225
226 if (es && lblk < es->es_lblk)
227 return es;
228
229 if (es && lblk > ext4_es_end(es)) {
230 node = rb_next(&es->rb_node);
231 return node ? rb_entry(node, struct extent_status, rb_node) :
232 NULL;
233 }
234
235 return NULL;
236}
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256static void __es_find_extent_range(struct inode *inode,
257 int (*matching_fn)(struct extent_status *es),
258 ext4_lblk_t lblk, ext4_lblk_t end,
259 struct extent_status *es)
260{
261 struct ext4_es_tree *tree = NULL;
262 struct extent_status *es1 = NULL;
263 struct rb_node *node;
264
265 WARN_ON(es == NULL);
266 WARN_ON(end < lblk);
267
268 tree = &EXT4_I(inode)->i_es_tree;
269
270
271 es->es_lblk = es->es_len = es->es_pblk = 0;
272 if (tree->cache_es) {
273 es1 = tree->cache_es;
274 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
275 es_debug("%u cached by [%u/%u) %llu %x\n",
276 lblk, es1->es_lblk, es1->es_len,
277 ext4_es_pblock(es1), ext4_es_status(es1));
278 goto out;
279 }
280 }
281
282 es1 = __es_tree_search(&tree->root, lblk);
283
284out:
285 if (es1 && !matching_fn(es1)) {
286 while ((node = rb_next(&es1->rb_node)) != NULL) {
287 es1 = rb_entry(node, struct extent_status, rb_node);
288 if (es1->es_lblk > end) {
289 es1 = NULL;
290 break;
291 }
292 if (matching_fn(es1))
293 break;
294 }
295 }
296
297 if (es1 && matching_fn(es1)) {
298 tree->cache_es = es1;
299 es->es_lblk = es1->es_lblk;
300 es->es_len = es1->es_len;
301 es->es_pblk = es1->es_pblk;
302 }
303
304}
305
306
307
308
309void ext4_es_find_extent_range(struct inode *inode,
310 int (*matching_fn)(struct extent_status *es),
311 ext4_lblk_t lblk, ext4_lblk_t end,
312 struct extent_status *es)
313{
314 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
315 return;
316
317 trace_ext4_es_find_extent_range_enter(inode, lblk);
318
319 read_lock(&EXT4_I(inode)->i_es_lock);
320 __es_find_extent_range(inode, matching_fn, lblk, end, es);
321 read_unlock(&EXT4_I(inode)->i_es_lock);
322
323 trace_ext4_es_find_extent_range_exit(inode, es);
324}
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341static bool __es_scan_range(struct inode *inode,
342 int (*matching_fn)(struct extent_status *es),
343 ext4_lblk_t start, ext4_lblk_t end)
344{
345 struct extent_status es;
346
347 __es_find_extent_range(inode, matching_fn, start, end, &es);
348 if (es.es_len == 0)
349 return false;
350 else if (es.es_lblk <= start &&
351 start < es.es_lblk + es.es_len)
352 return true;
353 else if (start <= es.es_lblk && es.es_lblk <= end)
354 return true;
355 else
356 return false;
357}
358
359
360
361bool ext4_es_scan_range(struct inode *inode,
362 int (*matching_fn)(struct extent_status *es),
363 ext4_lblk_t lblk, ext4_lblk_t end)
364{
365 bool ret;
366
367 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
368 return false;
369
370 read_lock(&EXT4_I(inode)->i_es_lock);
371 ret = __es_scan_range(inode, matching_fn, lblk, end);
372 read_unlock(&EXT4_I(inode)->i_es_lock);
373
374 return ret;
375}
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391static bool __es_scan_clu(struct inode *inode,
392 int (*matching_fn)(struct extent_status *es),
393 ext4_lblk_t lblk)
394{
395 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
396 ext4_lblk_t lblk_start, lblk_end;
397
398 lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
399 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
400
401 return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
402}
403
404
405
406
407bool ext4_es_scan_clu(struct inode *inode,
408 int (*matching_fn)(struct extent_status *es),
409 ext4_lblk_t lblk)
410{
411 bool ret;
412
413 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
414 return false;
415
416 read_lock(&EXT4_I(inode)->i_es_lock);
417 ret = __es_scan_clu(inode, matching_fn, lblk);
418 read_unlock(&EXT4_I(inode)->i_es_lock);
419
420 return ret;
421}
422
423static void ext4_es_list_add(struct inode *inode)
424{
425 struct ext4_inode_info *ei = EXT4_I(inode);
426 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
427
428 if (!list_empty(&ei->i_es_list))
429 return;
430
431 spin_lock(&sbi->s_es_lock);
432 if (list_empty(&ei->i_es_list)) {
433 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
434 sbi->s_es_nr_inode++;
435 }
436 spin_unlock(&sbi->s_es_lock);
437}
438
439static void ext4_es_list_del(struct inode *inode)
440{
441 struct ext4_inode_info *ei = EXT4_I(inode);
442 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
443
444 spin_lock(&sbi->s_es_lock);
445 if (!list_empty(&ei->i_es_list)) {
446 list_del_init(&ei->i_es_list);
447 sbi->s_es_nr_inode--;
448 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
449 }
450 spin_unlock(&sbi->s_es_lock);
451}
452
453static struct extent_status *
454ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
455 ext4_fsblk_t pblk)
456{
457 struct extent_status *es;
458 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
459 if (es == NULL)
460 return NULL;
461 es->es_lblk = lblk;
462 es->es_len = len;
463 es->es_pblk = pblk;
464
465
466
467
468 if (!ext4_es_is_delayed(es)) {
469 if (!EXT4_I(inode)->i_es_shk_nr++)
470 ext4_es_list_add(inode);
471 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
472 s_es_stats.es_stats_shk_cnt);
473 }
474
475 EXT4_I(inode)->i_es_all_nr++;
476 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
477
478 return es;
479}
480
481static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
482{
483 EXT4_I(inode)->i_es_all_nr--;
484 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
485
486
487 if (!ext4_es_is_delayed(es)) {
488 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
489 if (!--EXT4_I(inode)->i_es_shk_nr)
490 ext4_es_list_del(inode);
491 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
492 s_es_stats.es_stats_shk_cnt);
493 }
494
495 kmem_cache_free(ext4_es_cachep, es);
496}
497
498
499
500
501
502
503
504
505static int ext4_es_can_be_merged(struct extent_status *es1,
506 struct extent_status *es2)
507{
508 if (ext4_es_type(es1) != ext4_es_type(es2))
509 return 0;
510
511 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
512 pr_warn("ES assertion failed when merging extents. "
513 "The sum of lengths of es1 (%d) and es2 (%d) "
514 "is bigger than allowed file size (%d)\n",
515 es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
516 WARN_ON(1);
517 return 0;
518 }
519
520 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
521 return 0;
522
523 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
524 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
525 return 1;
526
527 if (ext4_es_is_hole(es1))
528 return 1;
529
530
531 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
532 return 1;
533
534 return 0;
535}
536
537static struct extent_status *
538ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
539{
540 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
541 struct extent_status *es1;
542 struct rb_node *node;
543
544 node = rb_prev(&es->rb_node);
545 if (!node)
546 return es;
547
548 es1 = rb_entry(node, struct extent_status, rb_node);
549 if (ext4_es_can_be_merged(es1, es)) {
550 es1->es_len += es->es_len;
551 if (ext4_es_is_referenced(es))
552 ext4_es_set_referenced(es1);
553 rb_erase(&es->rb_node, &tree->root);
554 ext4_es_free_extent(inode, es);
555 es = es1;
556 }
557
558 return es;
559}
560
561static struct extent_status *
562ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
563{
564 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
565 struct extent_status *es1;
566 struct rb_node *node;
567
568 node = rb_next(&es->rb_node);
569 if (!node)
570 return es;
571
572 es1 = rb_entry(node, struct extent_status, rb_node);
573 if (ext4_es_can_be_merged(es, es1)) {
574 es->es_len += es1->es_len;
575 if (ext4_es_is_referenced(es1))
576 ext4_es_set_referenced(es);
577 rb_erase(node, &tree->root);
578 ext4_es_free_extent(inode, es1);
579 }
580
581 return es;
582}
583
584#ifdef ES_AGGRESSIVE_TEST
585#include "ext4_extents.h"
586
587static void ext4_es_insert_extent_ext_check(struct inode *inode,
588 struct extent_status *es)
589{
590 struct ext4_ext_path *path = NULL;
591 struct ext4_extent *ex;
592 ext4_lblk_t ee_block;
593 ext4_fsblk_t ee_start;
594 unsigned short ee_len;
595 int depth, ee_status, es_status;
596
597 path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
598 if (IS_ERR(path))
599 return;
600
601 depth = ext_depth(inode);
602 ex = path[depth].p_ext;
603
604 if (ex) {
605
606 ee_block = le32_to_cpu(ex->ee_block);
607 ee_start = ext4_ext_pblock(ex);
608 ee_len = ext4_ext_get_actual_len(ex);
609
610 ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
611 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
612
613
614
615
616
617 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
618 if (in_range(es->es_lblk, ee_block, ee_len)) {
619 pr_warn("ES insert assertion failed for "
620 "inode: %lu we can find an extent "
621 "at block [%d/%d/%llu/%c], but we "
622 "want to add a delayed/hole extent "
623 "[%d/%d/%llu/%x]\n",
624 inode->i_ino, ee_block, ee_len,
625 ee_start, ee_status ? 'u' : 'w',
626 es->es_lblk, es->es_len,
627 ext4_es_pblock(es), ext4_es_status(es));
628 }
629 goto out;
630 }
631
632
633
634
635
636 if (es->es_lblk < ee_block ||
637 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
638 pr_warn("ES insert assertion failed for inode: %lu "
639 "ex_status [%d/%d/%llu/%c] != "
640 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
641 ee_block, ee_len, ee_start,
642 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
643 ext4_es_pblock(es), es_status ? 'u' : 'w');
644 goto out;
645 }
646
647 if (ee_status ^ es_status) {
648 pr_warn("ES insert assertion failed for inode: %lu "
649 "ex_status [%d/%d/%llu/%c] != "
650 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
651 ee_block, ee_len, ee_start,
652 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
653 ext4_es_pblock(es), es_status ? 'u' : 'w');
654 }
655 } else {
656
657
658
659
660 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
661 pr_warn("ES insert assertion failed for inode: %lu "
662 "can't find an extent at block %d but we want "
663 "to add a written/unwritten extent "
664 "[%d/%d/%llu/%x]\n", inode->i_ino,
665 es->es_lblk, es->es_lblk, es->es_len,
666 ext4_es_pblock(es), ext4_es_status(es));
667 }
668 }
669out:
670 ext4_ext_drop_refs(path);
671 kfree(path);
672}
673
674static void ext4_es_insert_extent_ind_check(struct inode *inode,
675 struct extent_status *es)
676{
677 struct ext4_map_blocks map;
678 int retval;
679
680
681
682
683
684
685
686
687 map.m_lblk = es->es_lblk;
688 map.m_len = es->es_len;
689
690 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
691 if (retval > 0) {
692 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
693
694
695
696
697 pr_warn("ES insert assertion failed for inode: %lu "
698 "We can find blocks but we want to add a "
699 "delayed/hole extent [%d/%d/%llu/%x]\n",
700 inode->i_ino, es->es_lblk, es->es_len,
701 ext4_es_pblock(es), ext4_es_status(es));
702 return;
703 } else if (ext4_es_is_written(es)) {
704 if (retval != es->es_len) {
705 pr_warn("ES insert assertion failed for "
706 "inode: %lu retval %d != es_len %d\n",
707 inode->i_ino, retval, es->es_len);
708 return;
709 }
710 if (map.m_pblk != ext4_es_pblock(es)) {
711 pr_warn("ES insert assertion failed for "
712 "inode: %lu m_pblk %llu != "
713 "es_pblk %llu\n",
714 inode->i_ino, map.m_pblk,
715 ext4_es_pblock(es));
716 return;
717 }
718 } else {
719
720
721
722
723 BUG();
724 }
725 } else if (retval == 0) {
726 if (ext4_es_is_written(es)) {
727 pr_warn("ES insert assertion failed for inode: %lu "
728 "We can't find the block but we want to add "
729 "a written extent [%d/%d/%llu/%x]\n",
730 inode->i_ino, es->es_lblk, es->es_len,
731 ext4_es_pblock(es), ext4_es_status(es));
732 return;
733 }
734 }
735}
736
737static inline void ext4_es_insert_extent_check(struct inode *inode,
738 struct extent_status *es)
739{
740
741
742
743
744 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
745 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
746 ext4_es_insert_extent_ext_check(inode, es);
747 else
748 ext4_es_insert_extent_ind_check(inode, es);
749}
750#else
751static inline void ext4_es_insert_extent_check(struct inode *inode,
752 struct extent_status *es)
753{
754}
755#endif
756
757static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
758{
759 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
760 struct rb_node **p = &tree->root.rb_node;
761 struct rb_node *parent = NULL;
762 struct extent_status *es;
763
764 while (*p) {
765 parent = *p;
766 es = rb_entry(parent, struct extent_status, rb_node);
767
768 if (newes->es_lblk < es->es_lblk) {
769 if (ext4_es_can_be_merged(newes, es)) {
770
771
772
773
774 es->es_lblk = newes->es_lblk;
775 es->es_len += newes->es_len;
776 if (ext4_es_is_written(es) ||
777 ext4_es_is_unwritten(es))
778 ext4_es_store_pblock(es,
779 newes->es_pblk);
780 es = ext4_es_try_to_merge_left(inode, es);
781 goto out;
782 }
783 p = &(*p)->rb_left;
784 } else if (newes->es_lblk > ext4_es_end(es)) {
785 if (ext4_es_can_be_merged(es, newes)) {
786 es->es_len += newes->es_len;
787 es = ext4_es_try_to_merge_right(inode, es);
788 goto out;
789 }
790 p = &(*p)->rb_right;
791 } else {
792 BUG();
793 return -EINVAL;
794 }
795 }
796
797 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
798 newes->es_pblk);
799 if (!es)
800 return -ENOMEM;
801 rb_link_node(&es->rb_node, parent, p);
802 rb_insert_color(&es->rb_node, &tree->root);
803
804out:
805 tree->cache_es = es;
806 return 0;
807}
808
809
810
811
812
813
814
815int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
816 ext4_lblk_t len, ext4_fsblk_t pblk,
817 unsigned int status)
818{
819 struct extent_status newes;
820 ext4_lblk_t end = lblk + len - 1;
821 int err = 0;
822 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
823
824 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
825 return 0;
826
827 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
828 lblk, len, pblk, status, inode->i_ino);
829
830 if (!len)
831 return 0;
832
833 BUG_ON(end < lblk);
834
835 if ((status & EXTENT_STATUS_DELAYED) &&
836 (status & EXTENT_STATUS_WRITTEN)) {
837 ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
838 " delayed and written which can potentially "
839 " cause data loss.", lblk, len);
840 WARN_ON(1);
841 }
842
843 newes.es_lblk = lblk;
844 newes.es_len = len;
845 ext4_es_store_pblock_status(&newes, pblk, status);
846 trace_ext4_es_insert_extent(inode, &newes);
847
848 ext4_es_insert_extent_check(inode, &newes);
849
850 write_lock(&EXT4_I(inode)->i_es_lock);
851 err = __es_remove_extent(inode, lblk, end, NULL);
852 if (err != 0)
853 goto error;
854retry:
855 err = __es_insert_extent(inode, &newes);
856 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
857 128, EXT4_I(inode)))
858 goto retry;
859 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
860 err = 0;
861
862 if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
863 (status & EXTENT_STATUS_WRITTEN ||
864 status & EXTENT_STATUS_UNWRITTEN))
865 __revise_pending(inode, lblk, len);
866
867error:
868 write_unlock(&EXT4_I(inode)->i_es_lock);
869
870 ext4_es_print_tree(inode);
871
872 return err;
873}
874
875
876
877
878
879
880void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
881 ext4_lblk_t len, ext4_fsblk_t pblk,
882 unsigned int status)
883{
884 struct extent_status *es;
885 struct extent_status newes;
886 ext4_lblk_t end = lblk + len - 1;
887
888 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
889 return;
890
891 newes.es_lblk = lblk;
892 newes.es_len = len;
893 ext4_es_store_pblock_status(&newes, pblk, status);
894 trace_ext4_es_cache_extent(inode, &newes);
895
896 if (!len)
897 return;
898
899 BUG_ON(end < lblk);
900
901 write_lock(&EXT4_I(inode)->i_es_lock);
902
903 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
904 if (!es || es->es_lblk > end)
905 __es_insert_extent(inode, &newes);
906 write_unlock(&EXT4_I(inode)->i_es_lock);
907}
908
909
910
911
912
913
914
915
916int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
917 ext4_lblk_t *next_lblk,
918 struct extent_status *es)
919{
920 struct ext4_es_tree *tree;
921 struct ext4_es_stats *stats;
922 struct extent_status *es1 = NULL;
923 struct rb_node *node;
924 int found = 0;
925
926 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
927 return 0;
928
929 trace_ext4_es_lookup_extent_enter(inode, lblk);
930 es_debug("lookup extent in block %u\n", lblk);
931
932 tree = &EXT4_I(inode)->i_es_tree;
933 read_lock(&EXT4_I(inode)->i_es_lock);
934
935
936 es->es_lblk = es->es_len = es->es_pblk = 0;
937 if (tree->cache_es) {
938 es1 = tree->cache_es;
939 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
940 es_debug("%u cached by [%u/%u)\n",
941 lblk, es1->es_lblk, es1->es_len);
942 found = 1;
943 goto out;
944 }
945 }
946
947 node = tree->root.rb_node;
948 while (node) {
949 es1 = rb_entry(node, struct extent_status, rb_node);
950 if (lblk < es1->es_lblk)
951 node = node->rb_left;
952 else if (lblk > ext4_es_end(es1))
953 node = node->rb_right;
954 else {
955 found = 1;
956 break;
957 }
958 }
959
960out:
961 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
962 if (found) {
963 BUG_ON(!es1);
964 es->es_lblk = es1->es_lblk;
965 es->es_len = es1->es_len;
966 es->es_pblk = es1->es_pblk;
967 if (!ext4_es_is_referenced(es1))
968 ext4_es_set_referenced(es1);
969 percpu_counter_inc(&stats->es_stats_cache_hits);
970 if (next_lblk) {
971 node = rb_next(&es1->rb_node);
972 if (node) {
973 es1 = rb_entry(node, struct extent_status,
974 rb_node);
975 *next_lblk = es1->es_lblk;
976 } else
977 *next_lblk = 0;
978 }
979 } else {
980 percpu_counter_inc(&stats->es_stats_cache_misses);
981 }
982
983 read_unlock(&EXT4_I(inode)->i_es_lock);
984
985 trace_ext4_es_lookup_extent_exit(inode, es, found);
986 return found;
987}
988
989struct rsvd_count {
990 int ndelonly;
991 bool first_do_lblk_found;
992 ext4_lblk_t first_do_lblk;
993 ext4_lblk_t last_do_lblk;
994 struct extent_status *left_es;
995 bool partial;
996 ext4_lblk_t lclu;
997};
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
1011 struct extent_status *es, struct rsvd_count *rc)
1012{
1013 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1014 struct rb_node *node;
1015
1016 rc->ndelonly = 0;
1017
1018
1019
1020
1021
1022
1023
1024 if (sbi->s_cluster_ratio > 1) {
1025 rc->first_do_lblk_found = false;
1026 if (lblk > es->es_lblk) {
1027 rc->left_es = es;
1028 } else {
1029 node = rb_prev(&es->rb_node);
1030 rc->left_es = node ? rb_entry(node,
1031 struct extent_status,
1032 rb_node) : NULL;
1033 }
1034 rc->partial = false;
1035 }
1036}
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
1053 struct extent_status *es, struct rsvd_count *rc)
1054{
1055 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1056 ext4_lblk_t i, end, nclu;
1057
1058 if (!ext4_es_is_delonly(es))
1059 return;
1060
1061 WARN_ON(len <= 0);
1062
1063 if (sbi->s_cluster_ratio == 1) {
1064 rc->ndelonly += (int) len;
1065 return;
1066 }
1067
1068
1069
1070 i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
1071 end = lblk + (ext4_lblk_t) len - 1;
1072 end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
1073
1074
1075 if (!rc->first_do_lblk_found) {
1076 rc->first_do_lblk = i;
1077 rc->first_do_lblk_found = true;
1078 }
1079
1080
1081 rc->last_do_lblk = end;
1082
1083
1084
1085
1086
1087 if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
1088 rc->ndelonly++;
1089 rc->partial = false;
1090 }
1091
1092
1093
1094
1095
1096 if (EXT4_LBLK_COFF(sbi, i) != 0) {
1097 if (end >= EXT4_LBLK_CFILL(sbi, i)) {
1098 rc->ndelonly++;
1099 rc->partial = false;
1100 i = EXT4_LBLK_CFILL(sbi, i) + 1;
1101 }
1102 }
1103
1104
1105
1106
1107
1108 if ((i + sbi->s_cluster_ratio - 1) <= end) {
1109 nclu = (end - i + 1) >> sbi->s_cluster_bits;
1110 rc->ndelonly += nclu;
1111 i += nclu << sbi->s_cluster_bits;
1112 }
1113
1114
1115
1116
1117
1118 if (!rc->partial && i <= end) {
1119 rc->partial = true;
1120 rc->lclu = EXT4_B2C(sbi, i);
1121 }
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134static struct pending_reservation *__pr_tree_search(struct rb_root *root,
1135 ext4_lblk_t lclu)
1136{
1137 struct rb_node *node = root->rb_node;
1138 struct pending_reservation *pr = NULL;
1139
1140 while (node) {
1141 pr = rb_entry(node, struct pending_reservation, rb_node);
1142 if (lclu < pr->lclu)
1143 node = node->rb_left;
1144 else if (lclu > pr->lclu)
1145 node = node->rb_right;
1146 else
1147 return pr;
1148 }
1149 if (pr && lclu < pr->lclu)
1150 return pr;
1151 if (pr && lclu > pr->lclu) {
1152 node = rb_next(&pr->rb_node);
1153 return node ? rb_entry(node, struct pending_reservation,
1154 rb_node) : NULL;
1155 }
1156 return NULL;
1157}
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
1176 struct extent_status *right_es,
1177 struct rsvd_count *rc)
1178{
1179 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1180 struct pending_reservation *pr;
1181 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1182 struct rb_node *node;
1183 ext4_lblk_t first_lclu, last_lclu;
1184 bool left_delonly, right_delonly, count_pending;
1185 struct extent_status *es;
1186
1187 if (sbi->s_cluster_ratio > 1) {
1188
1189 if (rc->partial)
1190 rc->ndelonly++;
1191
1192 if (rc->ndelonly == 0)
1193 return 0;
1194
1195 first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
1196 last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
1197
1198
1199
1200
1201
1202
1203 left_delonly = right_delonly = false;
1204
1205 es = rc->left_es;
1206 while (es && ext4_es_end(es) >=
1207 EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
1208 if (ext4_es_is_delonly(es)) {
1209 rc->ndelonly--;
1210 left_delonly = true;
1211 break;
1212 }
1213 node = rb_prev(&es->rb_node);
1214 if (!node)
1215 break;
1216 es = rb_entry(node, struct extent_status, rb_node);
1217 }
1218 if (right_es && (!left_delonly || first_lclu != last_lclu)) {
1219 if (end < ext4_es_end(right_es)) {
1220 es = right_es;
1221 } else {
1222 node = rb_next(&right_es->rb_node);
1223 es = node ? rb_entry(node, struct extent_status,
1224 rb_node) : NULL;
1225 }
1226 while (es && es->es_lblk <=
1227 EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
1228 if (ext4_es_is_delonly(es)) {
1229 rc->ndelonly--;
1230 right_delonly = true;
1231 break;
1232 }
1233 node = rb_next(&es->rb_node);
1234 if (!node)
1235 break;
1236 es = rb_entry(node, struct extent_status,
1237 rb_node);
1238 }
1239 }
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250 if (first_lclu == last_lclu) {
1251 if (left_delonly | right_delonly)
1252 count_pending = false;
1253 else
1254 count_pending = true;
1255 } else {
1256 if (left_delonly)
1257 first_lclu++;
1258 if (right_delonly)
1259 last_lclu--;
1260 if (first_lclu <= last_lclu)
1261 count_pending = true;
1262 else
1263 count_pending = false;
1264 }
1265
1266
1267
1268
1269
1270
1271
1272 if (count_pending) {
1273 pr = __pr_tree_search(&tree->root, first_lclu);
1274 while (pr && pr->lclu <= last_lclu) {
1275 rc->ndelonly--;
1276 node = rb_next(&pr->rb_node);
1277 rb_erase(&pr->rb_node, &tree->root);
1278 kmem_cache_free(ext4_pending_cachep, pr);
1279 if (!node)
1280 break;
1281 pr = rb_entry(node, struct pending_reservation,
1282 rb_node);
1283 }
1284 }
1285 }
1286 return rc->ndelonly;
1287}
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1304 ext4_lblk_t end, int *reserved)
1305{
1306 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
1307 struct rb_node *node;
1308 struct extent_status *es;
1309 struct extent_status orig_es;
1310 ext4_lblk_t len1, len2;
1311 ext4_fsblk_t block;
1312 int err;
1313 bool count_reserved = true;
1314 struct rsvd_count rc;
1315
1316 if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
1317 count_reserved = false;
1318retry:
1319 err = 0;
1320
1321 es = __es_tree_search(&tree->root, lblk);
1322 if (!es)
1323 goto out;
1324 if (es->es_lblk > end)
1325 goto out;
1326
1327
1328 tree->cache_es = NULL;
1329 if (count_reserved)
1330 init_rsvd(inode, lblk, es, &rc);
1331
1332 orig_es.es_lblk = es->es_lblk;
1333 orig_es.es_len = es->es_len;
1334 orig_es.es_pblk = es->es_pblk;
1335
1336 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
1337 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
1338 if (len1 > 0)
1339 es->es_len = len1;
1340 if (len2 > 0) {
1341 if (len1 > 0) {
1342 struct extent_status newes;
1343
1344 newes.es_lblk = end + 1;
1345 newes.es_len = len2;
1346 block = 0x7FDEADBEEFULL;
1347 if (ext4_es_is_written(&orig_es) ||
1348 ext4_es_is_unwritten(&orig_es))
1349 block = ext4_es_pblock(&orig_es) +
1350 orig_es.es_len - len2;
1351 ext4_es_store_pblock_status(&newes, block,
1352 ext4_es_status(&orig_es));
1353 err = __es_insert_extent(inode, &newes);
1354 if (err) {
1355 es->es_lblk = orig_es.es_lblk;
1356 es->es_len = orig_es.es_len;
1357 if ((err == -ENOMEM) &&
1358 __es_shrink(EXT4_SB(inode->i_sb),
1359 128, EXT4_I(inode)))
1360 goto retry;
1361 goto out;
1362 }
1363 } else {
1364 es->es_lblk = end + 1;
1365 es->es_len = len2;
1366 if (ext4_es_is_written(es) ||
1367 ext4_es_is_unwritten(es)) {
1368 block = orig_es.es_pblk + orig_es.es_len - len2;
1369 ext4_es_store_pblock(es, block);
1370 }
1371 }
1372 if (count_reserved)
1373 count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
1374 &orig_es, &rc);
1375 goto out;
1376 }
1377
1378 if (len1 > 0) {
1379 if (count_reserved)
1380 count_rsvd(inode, lblk, orig_es.es_len - len1,
1381 &orig_es, &rc);
1382 node = rb_next(&es->rb_node);
1383 if (node)
1384 es = rb_entry(node, struct extent_status, rb_node);
1385 else
1386 es = NULL;
1387 }
1388
1389 while (es && ext4_es_end(es) <= end) {
1390 if (count_reserved)
1391 count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
1392 node = rb_next(&es->rb_node);
1393 rb_erase(&es->rb_node, &tree->root);
1394 ext4_es_free_extent(inode, es);
1395 if (!node) {
1396 es = NULL;
1397 break;
1398 }
1399 es = rb_entry(node, struct extent_status, rb_node);
1400 }
1401
1402 if (es && es->es_lblk < end + 1) {
1403 ext4_lblk_t orig_len = es->es_len;
1404
1405 len1 = ext4_es_end(es) - end;
1406 if (count_reserved)
1407 count_rsvd(inode, es->es_lblk, orig_len - len1,
1408 es, &rc);
1409 es->es_lblk = end + 1;
1410 es->es_len = len1;
1411 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
1412 block = es->es_pblk + orig_len - len1;
1413 ext4_es_store_pblock(es, block);
1414 }
1415 }
1416
1417 if (count_reserved)
1418 *reserved = get_rsvd(inode, end, es, &rc);
1419out:
1420 return err;
1421}
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1434 ext4_lblk_t len)
1435{
1436 ext4_lblk_t end;
1437 int err = 0;
1438 int reserved = 0;
1439
1440 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
1441 return 0;
1442
1443 trace_ext4_es_remove_extent(inode, lblk, len);
1444 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
1445 lblk, len, inode->i_ino);
1446
1447 if (!len)
1448 return err;
1449
1450 end = lblk + len - 1;
1451 BUG_ON(end < lblk);
1452
1453
1454
1455
1456
1457
1458 write_lock(&EXT4_I(inode)->i_es_lock);
1459 err = __es_remove_extent(inode, lblk, end, &reserved);
1460 write_unlock(&EXT4_I(inode)->i_es_lock);
1461 ext4_es_print_tree(inode);
1462 ext4_da_release_space(inode, reserved);
1463 return err;
1464}
1465
1466static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
1467 struct ext4_inode_info *locked_ei)
1468{
1469 struct ext4_inode_info *ei;
1470 struct ext4_es_stats *es_stats;
1471 ktime_t start_time;
1472 u64 scan_time;
1473 int nr_to_walk;
1474 int nr_shrunk = 0;
1475 int retried = 0, nr_skipped = 0;
1476
1477 es_stats = &sbi->s_es_stats;
1478 start_time = ktime_get();
1479
1480retry:
1481 spin_lock(&sbi->s_es_lock);
1482 nr_to_walk = sbi->s_es_nr_inode;
1483 while (nr_to_walk-- > 0) {
1484 if (list_empty(&sbi->s_es_list)) {
1485 spin_unlock(&sbi->s_es_lock);
1486 goto out;
1487 }
1488 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
1489 i_es_list);
1490
1491 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
1492
1493
1494
1495
1496
1497 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
1498 EXT4_STATE_EXT_PRECACHED)) {
1499 nr_skipped++;
1500 continue;
1501 }
1502
1503 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
1504 nr_skipped++;
1505 continue;
1506 }
1507
1508
1509
1510
1511 spin_unlock(&sbi->s_es_lock);
1512
1513 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
1514 write_unlock(&ei->i_es_lock);
1515
1516 if (nr_to_scan <= 0)
1517 goto out;
1518 spin_lock(&sbi->s_es_lock);
1519 }
1520 spin_unlock(&sbi->s_es_lock);
1521
1522
1523
1524
1525
1526 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1527 retried++;
1528 goto retry;
1529 }
1530
1531 if (locked_ei && nr_shrunk == 0)
1532 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1533
1534out:
1535 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1536 if (likely(es_stats->es_stats_scan_time))
1537 es_stats->es_stats_scan_time = (scan_time +
1538 es_stats->es_stats_scan_time*3) / 4;
1539 else
1540 es_stats->es_stats_scan_time = scan_time;
1541 if (scan_time > es_stats->es_stats_max_scan_time)
1542 es_stats->es_stats_max_scan_time = scan_time;
1543 if (likely(es_stats->es_stats_shrunk))
1544 es_stats->es_stats_shrunk = (nr_shrunk +
1545 es_stats->es_stats_shrunk*3) / 4;
1546 else
1547 es_stats->es_stats_shrunk = nr_shrunk;
1548
1549 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1550 nr_skipped, retried);
1551 return nr_shrunk;
1552}
1553
1554static unsigned long ext4_es_count(struct shrinker *shrink,
1555 struct shrink_control *sc)
1556{
1557 unsigned long nr;
1558 struct ext4_sb_info *sbi;
1559
1560 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1561 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1562 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1563 return nr;
1564}
1565
1566static unsigned long ext4_es_scan(struct shrinker *shrink,
1567 struct shrink_control *sc)
1568{
1569 struct ext4_sb_info *sbi = container_of(shrink,
1570 struct ext4_sb_info, s_es_shrinker);
1571 int nr_to_scan = sc->nr_to_scan;
1572 int ret, nr_shrunk;
1573
1574 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1575 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1576
1577 nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1578
1579 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1580 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1581 return nr_shrunk;
1582}
1583
1584int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
1585{
1586 struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
1587 struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1588 struct ext4_inode_info *ei, *max = NULL;
1589 unsigned int inode_cnt = 0;
1590
1591 if (v != SEQ_START_TOKEN)
1592 return 0;
1593
1594
1595 spin_lock(&sbi->s_es_lock);
1596 list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1597 inode_cnt++;
1598 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1599 max = ei;
1600 else if (!max)
1601 max = ei;
1602 }
1603 spin_unlock(&sbi->s_es_lock);
1604
1605 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1606 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1607 percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1608 seq_printf(seq, " %lld/%lld cache hits/misses\n",
1609 percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
1610 percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
1611 if (inode_cnt)
1612 seq_printf(seq, " %d inodes on list\n", inode_cnt);
1613
1614 seq_printf(seq, "average:\n %llu us scan time\n",
1615 div_u64(es_stats->es_stats_scan_time, 1000));
1616 seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
1617 if (inode_cnt)
1618 seq_printf(seq,
1619 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1620 " %llu us max scan time\n",
1621 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1622 div_u64(es_stats->es_stats_max_scan_time, 1000));
1623
1624 return 0;
1625}
1626
1627int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1628{
1629 int err;
1630
1631
1632 BUILD_BUG_ON(ES_SHIFT < 48);
1633 INIT_LIST_HEAD(&sbi->s_es_list);
1634 sbi->s_es_nr_inode = 0;
1635 spin_lock_init(&sbi->s_es_lock);
1636 sbi->s_es_stats.es_stats_shrunk = 0;
1637 err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
1638 GFP_KERNEL);
1639 if (err)
1640 return err;
1641 err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
1642 GFP_KERNEL);
1643 if (err)
1644 goto err1;
1645 sbi->s_es_stats.es_stats_scan_time = 0;
1646 sbi->s_es_stats.es_stats_max_scan_time = 0;
1647 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1648 if (err)
1649 goto err2;
1650 err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1651 if (err)
1652 goto err3;
1653
1654 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1655 sbi->s_es_shrinker.count_objects = ext4_es_count;
1656 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1657 err = register_shrinker(&sbi->s_es_shrinker);
1658 if (err)
1659 goto err4;
1660
1661 return 0;
1662err4:
1663 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1664err3:
1665 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1666err2:
1667 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1668err1:
1669 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1670 return err;
1671}
1672
1673void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1674{
1675 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1676 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1677 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1678 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1679 unregister_shrinker(&sbi->s_es_shrinker);
1680}
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1691 int *nr_to_scan, int *nr_shrunk)
1692{
1693 struct inode *inode = &ei->vfs_inode;
1694 struct ext4_es_tree *tree = &ei->i_es_tree;
1695 struct extent_status *es;
1696 struct rb_node *node;
1697
1698 es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1699 if (!es)
1700 goto out_wrap;
1701
1702 while (*nr_to_scan > 0) {
1703 if (es->es_lblk > end) {
1704 ei->i_es_shrink_lblk = end + 1;
1705 return 0;
1706 }
1707
1708 (*nr_to_scan)--;
1709 node = rb_next(&es->rb_node);
1710
1711
1712
1713
1714 if (ext4_es_is_delayed(es))
1715 goto next;
1716 if (ext4_es_is_referenced(es)) {
1717 ext4_es_clear_referenced(es);
1718 goto next;
1719 }
1720
1721 rb_erase(&es->rb_node, &tree->root);
1722 ext4_es_free_extent(inode, es);
1723 (*nr_shrunk)++;
1724next:
1725 if (!node)
1726 goto out_wrap;
1727 es = rb_entry(node, struct extent_status, rb_node);
1728 }
1729 ei->i_es_shrink_lblk = es->es_lblk;
1730 return 1;
1731out_wrap:
1732 ei->i_es_shrink_lblk = 0;
1733 return 0;
1734}
1735
1736static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1737{
1738 struct inode *inode = &ei->vfs_inode;
1739 int nr_shrunk = 0;
1740 ext4_lblk_t start = ei->i_es_shrink_lblk;
1741 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1742 DEFAULT_RATELIMIT_BURST);
1743
1744 if (ei->i_es_shk_nr == 0)
1745 return 0;
1746
1747 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1748 __ratelimit(&_rs))
1749 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1750
1751 if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1752 start != 0)
1753 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1754
1755 ei->i_es_tree.cache_es = NULL;
1756 return nr_shrunk;
1757}
1758
1759
1760
1761
1762
1763
1764void ext4_clear_inode_es(struct inode *inode)
1765{
1766 struct ext4_inode_info *ei = EXT4_I(inode);
1767 struct extent_status *es;
1768 struct ext4_es_tree *tree;
1769 struct rb_node *node;
1770
1771 write_lock(&ei->i_es_lock);
1772 tree = &EXT4_I(inode)->i_es_tree;
1773 tree->cache_es = NULL;
1774 node = rb_first(&tree->root);
1775 while (node) {
1776 es = rb_entry(node, struct extent_status, rb_node);
1777 node = rb_next(node);
1778 if (!ext4_es_is_delayed(es)) {
1779 rb_erase(&es->rb_node, &tree->root);
1780 ext4_es_free_extent(inode, es);
1781 }
1782 }
1783 ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
1784 write_unlock(&ei->i_es_lock);
1785}
1786
1787#ifdef ES_DEBUG__
1788static void ext4_print_pending_tree(struct inode *inode)
1789{
1790 struct ext4_pending_tree *tree;
1791 struct rb_node *node;
1792 struct pending_reservation *pr;
1793
1794 printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
1795 tree = &EXT4_I(inode)->i_pending_tree;
1796 node = rb_first(&tree->root);
1797 while (node) {
1798 pr = rb_entry(node, struct pending_reservation, rb_node);
1799 printk(KERN_DEBUG " %u", pr->lclu);
1800 node = rb_next(node);
1801 }
1802 printk(KERN_DEBUG "\n");
1803}
1804#else
1805#define ext4_print_pending_tree(inode)
1806#endif
1807
1808int __init ext4_init_pending(void)
1809{
1810 ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
1811 sizeof(struct pending_reservation),
1812 0, (SLAB_RECLAIM_ACCOUNT), NULL);
1813 if (ext4_pending_cachep == NULL)
1814 return -ENOMEM;
1815 return 0;
1816}
1817
1818void ext4_exit_pending(void)
1819{
1820 kmem_cache_destroy(ext4_pending_cachep);
1821}
1822
1823void ext4_init_pending_tree(struct ext4_pending_tree *tree)
1824{
1825 tree->root = RB_ROOT;
1826}
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837static struct pending_reservation *__get_pending(struct inode *inode,
1838 ext4_lblk_t lclu)
1839{
1840 struct ext4_pending_tree *tree;
1841 struct rb_node *node;
1842 struct pending_reservation *pr = NULL;
1843
1844 tree = &EXT4_I(inode)->i_pending_tree;
1845 node = (&tree->root)->rb_node;
1846
1847 while (node) {
1848 pr = rb_entry(node, struct pending_reservation, rb_node);
1849 if (lclu < pr->lclu)
1850 node = node->rb_left;
1851 else if (lclu > pr->lclu)
1852 node = node->rb_right;
1853 else if (lclu == pr->lclu)
1854 return pr;
1855 }
1856 return NULL;
1857}
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
1870{
1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1872 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1873 struct rb_node **p = &tree->root.rb_node;
1874 struct rb_node *parent = NULL;
1875 struct pending_reservation *pr;
1876 ext4_lblk_t lclu;
1877 int ret = 0;
1878
1879 lclu = EXT4_B2C(sbi, lblk);
1880
1881 while (*p) {
1882 parent = *p;
1883 pr = rb_entry(parent, struct pending_reservation, rb_node);
1884
1885 if (lclu < pr->lclu) {
1886 p = &(*p)->rb_left;
1887 } else if (lclu > pr->lclu) {
1888 p = &(*p)->rb_right;
1889 } else {
1890
1891 goto out;
1892 }
1893 }
1894
1895 pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
1896 if (pr == NULL) {
1897 ret = -ENOMEM;
1898 goto out;
1899 }
1900 pr->lclu = lclu;
1901
1902 rb_link_node(&pr->rb_node, parent, p);
1903 rb_insert_color(&pr->rb_node, &tree->root);
1904
1905out:
1906 return ret;
1907}
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
1919{
1920 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1921 struct pending_reservation *pr;
1922 struct ext4_pending_tree *tree;
1923
1924 pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
1925 if (pr != NULL) {
1926 tree = &EXT4_I(inode)->i_pending_tree;
1927 rb_erase(&pr->rb_node, &tree->root);
1928 kmem_cache_free(ext4_pending_cachep, pr);
1929 }
1930}
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
1942{
1943 struct ext4_inode_info *ei = EXT4_I(inode);
1944
1945 write_lock(&ei->i_es_lock);
1946 __remove_pending(inode, lblk);
1947 write_unlock(&ei->i_es_lock);
1948}
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
1961{
1962 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1963 struct ext4_inode_info *ei = EXT4_I(inode);
1964 bool ret;
1965
1966 read_lock(&ei->i_es_lock);
1967 ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
1968 read_unlock(&ei->i_es_lock);
1969
1970 return ret;
1971}
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
1986 bool allocated)
1987{
1988 struct extent_status newes;
1989 int err = 0;
1990
1991 if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
1992 return 0;
1993
1994 es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
1995 lblk, inode->i_ino);
1996
1997 newes.es_lblk = lblk;
1998 newes.es_len = 1;
1999 ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
2000 trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
2001
2002 ext4_es_insert_extent_check(inode, &newes);
2003
2004 write_lock(&EXT4_I(inode)->i_es_lock);
2005
2006 err = __es_remove_extent(inode, lblk, lblk, NULL);
2007 if (err != 0)
2008 goto error;
2009retry:
2010 err = __es_insert_extent(inode, &newes);
2011 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
2012 128, EXT4_I(inode)))
2013 goto retry;
2014 if (err != 0)
2015 goto error;
2016
2017 if (allocated)
2018 __insert_pending(inode, lblk);
2019
2020error:
2021 write_unlock(&EXT4_I(inode)->i_es_lock);
2022
2023 ext4_es_print_tree(inode);
2024 ext4_print_pending_tree(inode);
2025
2026 return err;
2027}
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
2043 ext4_lblk_t end)
2044{
2045 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
2046 struct extent_status *es;
2047 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2048 struct rb_node *node;
2049 ext4_lblk_t first_lclu, last_lclu;
2050 unsigned long long last_counted_lclu;
2051 unsigned int n = 0;
2052
2053
2054 last_counted_lclu = ~0ULL;
2055
2056 es = __es_tree_search(&tree->root, start);
2057
2058 while (es && (es->es_lblk <= end)) {
2059 if (ext4_es_is_delonly(es)) {
2060 if (es->es_lblk <= start)
2061 first_lclu = EXT4_B2C(sbi, start);
2062 else
2063 first_lclu = EXT4_B2C(sbi, es->es_lblk);
2064
2065 if (ext4_es_end(es) >= end)
2066 last_lclu = EXT4_B2C(sbi, end);
2067 else
2068 last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
2069
2070 if (first_lclu == last_counted_lclu)
2071 n += last_lclu - first_lclu;
2072 else
2073 n += last_lclu - first_lclu + 1;
2074 last_counted_lclu = last_lclu;
2075 }
2076 node = rb_next(&es->rb_node);
2077 if (!node)
2078 break;
2079 es = rb_entry(node, struct extent_status, rb_node);
2080 }
2081
2082 return n;
2083}
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
2096 ext4_lblk_t len)
2097{
2098 struct ext4_inode_info *ei = EXT4_I(inode);
2099 ext4_lblk_t end;
2100 unsigned int n;
2101
2102 if (len == 0)
2103 return 0;
2104
2105 end = lblk + len - 1;
2106 WARN_ON(end < lblk);
2107
2108 read_lock(&ei->i_es_lock);
2109
2110 n = __es_delayed_clu(inode, lblk, end);
2111
2112 read_unlock(&ei->i_es_lock);
2113
2114 return n;
2115}
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
2133 ext4_lblk_t len)
2134{
2135 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2136 ext4_lblk_t end = lblk + len - 1;
2137 ext4_lblk_t first, last;
2138 bool f_del = false, l_del = false;
2139
2140 if (len == 0)
2141 return;
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156 if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
2157 first = EXT4_LBLK_CMASK(sbi, lblk);
2158 if (first != lblk)
2159 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2160 first, lblk - 1);
2161 if (f_del) {
2162 __insert_pending(inode, first);
2163 } else {
2164 last = EXT4_LBLK_CMASK(sbi, end) +
2165 sbi->s_cluster_ratio - 1;
2166 if (last != end)
2167 l_del = __es_scan_range(inode,
2168 &ext4_es_is_delonly,
2169 end + 1, last);
2170 if (l_del)
2171 __insert_pending(inode, last);
2172 else
2173 __remove_pending(inode, last);
2174 }
2175 } else {
2176 first = EXT4_LBLK_CMASK(sbi, lblk);
2177 if (first != lblk)
2178 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2179 first, lblk - 1);
2180 if (f_del)
2181 __insert_pending(inode, first);
2182 else
2183 __remove_pending(inode, first);
2184
2185 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
2186 if (last != end)
2187 l_del = __es_scan_range(inode, &ext4_es_is_delonly,
2188 end + 1, last);
2189 if (l_del)
2190 __insert_pending(inode, last);
2191 else
2192 __remove_pending(inode, last);
2193 }
2194}
2195