1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/list_sort.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
16#include "ext4.h"
17
18#include <trace/events/ext4.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144static struct kmem_cache *ext4_es_cachep;
145static struct kmem_cache *ext4_pending_cachep;
146
147static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149 ext4_lblk_t end, int *reserved);
150static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
151static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152 struct ext4_inode_info *locked_ei);
153static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
154 ext4_lblk_t len);
155
156int __init ext4_init_es(void)
157{
158 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
159 sizeof(struct extent_status),
160 0, (SLAB_RECLAIM_ACCOUNT), NULL);
161 if (ext4_es_cachep == NULL)
162 return -ENOMEM;
163 return 0;
164}
165
166void ext4_exit_es(void)
167{
168 kmem_cache_destroy(ext4_es_cachep);
169}
170
171void ext4_es_init_tree(struct ext4_es_tree *tree)
172{
173 tree->root = RB_ROOT;
174 tree->cache_es = NULL;
175}
176
177#ifdef ES_DEBUG__
178static void ext4_es_print_tree(struct inode *inode)
179{
180 struct ext4_es_tree *tree;
181 struct rb_node *node;
182
183 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
184 tree = &EXT4_I(inode)->i_es_tree;
185 node = rb_first(&tree->root);
186 while (node) {
187 struct extent_status *es;
188 es = rb_entry(node, struct extent_status, rb_node);
189 printk(KERN_DEBUG " [%u/%u) %llu %x",
190 es->es_lblk, es->es_len,
191 ext4_es_pblock(es), ext4_es_status(es));
192 node = rb_next(node);
193 }
194 printk(KERN_DEBUG "\n");
195}
196#else
197#define ext4_es_print_tree(inode)
198#endif
199
200static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
201{
202 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
203 return es->es_lblk + es->es_len - 1;
204}
205
206
207
208
209
210static struct extent_status *__es_tree_search(struct rb_root *root,
211 ext4_lblk_t lblk)
212{
213 struct rb_node *node = root->rb_node;
214 struct extent_status *es = NULL;
215
216 while (node) {
217 es = rb_entry(node, struct extent_status, rb_node);
218 if (lblk < es->es_lblk)
219 node = node->rb_left;
220 else if (lblk > ext4_es_end(es))
221 node = node->rb_right;
222 else
223 return es;
224 }
225
226 if (es && lblk < es->es_lblk)
227 return es;
228
229 if (es && lblk > ext4_es_end(es)) {
230 node = rb_next(&es->rb_node);
231 return node ? rb_entry(node, struct extent_status, rb_node) :
232 NULL;
233 }
234
235 return NULL;
236}
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256static void __es_find_extent_range(struct inode *inode,
257 int (*matching_fn)(struct extent_status *es),
258 ext4_lblk_t lblk, ext4_lblk_t end,
259 struct extent_status *es)
260{
261 struct ext4_es_tree *tree = NULL;
262 struct extent_status *es1 = NULL;
263 struct rb_node *node;
264
265 WARN_ON(es == NULL);
266 WARN_ON(end < lblk);
267
268 tree = &EXT4_I(inode)->i_es_tree;
269
270
271 es->es_lblk = es->es_len = es->es_pblk = 0;
272 if (tree->cache_es) {
273 es1 = tree->cache_es;
274 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
275 es_debug("%u cached by [%u/%u) %llu %x\n",
276 lblk, es1->es_lblk, es1->es_len,
277 ext4_es_pblock(es1), ext4_es_status(es1));
278 goto out;
279 }
280 }
281
282 es1 = __es_tree_search(&tree->root, lblk);
283
284out:
285 if (es1 && !matching_fn(es1)) {
286 while ((node = rb_next(&es1->rb_node)) != NULL) {
287 es1 = rb_entry(node, struct extent_status, rb_node);
288 if (es1->es_lblk > end) {
289 es1 = NULL;
290 break;
291 }
292 if (matching_fn(es1))
293 break;
294 }
295 }
296
297 if (es1 && matching_fn(es1)) {
298 tree->cache_es = es1;
299 es->es_lblk = es1->es_lblk;
300 es->es_len = es1->es_len;
301 es->es_pblk = es1->es_pblk;
302 }
303
304}
305
306
307
308
309void ext4_es_find_extent_range(struct inode *inode,
310 int (*matching_fn)(struct extent_status *es),
311 ext4_lblk_t lblk, ext4_lblk_t end,
312 struct extent_status *es)
313{
314 trace_ext4_es_find_extent_range_enter(inode, lblk);
315
316 read_lock(&EXT4_I(inode)->i_es_lock);
317 __es_find_extent_range(inode, matching_fn, lblk, end, es);
318 read_unlock(&EXT4_I(inode)->i_es_lock);
319
320 trace_ext4_es_find_extent_range_exit(inode, es);
321}
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static bool __es_scan_range(struct inode *inode,
339 int (*matching_fn)(struct extent_status *es),
340 ext4_lblk_t start, ext4_lblk_t end)
341{
342 struct extent_status es;
343
344 __es_find_extent_range(inode, matching_fn, start, end, &es);
345 if (es.es_len == 0)
346 return false;
347 else if (es.es_lblk <= start &&
348 start < es.es_lblk + es.es_len)
349 return true;
350 else if (start <= es.es_lblk && es.es_lblk <= end)
351 return true;
352 else
353 return false;
354}
355
356
357
358bool ext4_es_scan_range(struct inode *inode,
359 int (*matching_fn)(struct extent_status *es),
360 ext4_lblk_t lblk, ext4_lblk_t end)
361{
362 bool ret;
363
364 read_lock(&EXT4_I(inode)->i_es_lock);
365 ret = __es_scan_range(inode, matching_fn, lblk, end);
366 read_unlock(&EXT4_I(inode)->i_es_lock);
367
368 return ret;
369}
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385static bool __es_scan_clu(struct inode *inode,
386 int (*matching_fn)(struct extent_status *es),
387 ext4_lblk_t lblk)
388{
389 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
390 ext4_lblk_t lblk_start, lblk_end;
391
392 lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
393 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
394
395 return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
396}
397
398
399
400
401bool ext4_es_scan_clu(struct inode *inode,
402 int (*matching_fn)(struct extent_status *es),
403 ext4_lblk_t lblk)
404{
405 bool ret;
406
407 read_lock(&EXT4_I(inode)->i_es_lock);
408 ret = __es_scan_clu(inode, matching_fn, lblk);
409 read_unlock(&EXT4_I(inode)->i_es_lock);
410
411 return ret;
412}
413
414static void ext4_es_list_add(struct inode *inode)
415{
416 struct ext4_inode_info *ei = EXT4_I(inode);
417 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
418
419 if (!list_empty(&ei->i_es_list))
420 return;
421
422 spin_lock(&sbi->s_es_lock);
423 if (list_empty(&ei->i_es_list)) {
424 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
425 sbi->s_es_nr_inode++;
426 }
427 spin_unlock(&sbi->s_es_lock);
428}
429
430static void ext4_es_list_del(struct inode *inode)
431{
432 struct ext4_inode_info *ei = EXT4_I(inode);
433 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
434
435 spin_lock(&sbi->s_es_lock);
436 if (!list_empty(&ei->i_es_list)) {
437 list_del_init(&ei->i_es_list);
438 sbi->s_es_nr_inode--;
439 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
440 }
441 spin_unlock(&sbi->s_es_lock);
442}
443
444static struct extent_status *
445ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
446 ext4_fsblk_t pblk)
447{
448 struct extent_status *es;
449 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
450 if (es == NULL)
451 return NULL;
452 es->es_lblk = lblk;
453 es->es_len = len;
454 es->es_pblk = pblk;
455
456
457
458
459 if (!ext4_es_is_delayed(es)) {
460 if (!EXT4_I(inode)->i_es_shk_nr++)
461 ext4_es_list_add(inode);
462 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
463 s_es_stats.es_stats_shk_cnt);
464 }
465
466 EXT4_I(inode)->i_es_all_nr++;
467 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
468
469 return es;
470}
471
472static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
473{
474 EXT4_I(inode)->i_es_all_nr--;
475 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
476
477
478 if (!ext4_es_is_delayed(es)) {
479 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
480 if (!--EXT4_I(inode)->i_es_shk_nr)
481 ext4_es_list_del(inode);
482 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
483 s_es_stats.es_stats_shk_cnt);
484 }
485
486 kmem_cache_free(ext4_es_cachep, es);
487}
488
489
490
491
492
493
494
495
496static int ext4_es_can_be_merged(struct extent_status *es1,
497 struct extent_status *es2)
498{
499 if (ext4_es_type(es1) != ext4_es_type(es2))
500 return 0;
501
502 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
503 pr_warn("ES assertion failed when merging extents. "
504 "The sum of lengths of es1 (%d) and es2 (%d) "
505 "is bigger than allowed file size (%d)\n",
506 es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
507 WARN_ON(1);
508 return 0;
509 }
510
511 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
512 return 0;
513
514 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
515 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
516 return 1;
517
518 if (ext4_es_is_hole(es1))
519 return 1;
520
521
522 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
523 return 1;
524
525 return 0;
526}
527
528static struct extent_status *
529ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
530{
531 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
532 struct extent_status *es1;
533 struct rb_node *node;
534
535 node = rb_prev(&es->rb_node);
536 if (!node)
537 return es;
538
539 es1 = rb_entry(node, struct extent_status, rb_node);
540 if (ext4_es_can_be_merged(es1, es)) {
541 es1->es_len += es->es_len;
542 if (ext4_es_is_referenced(es))
543 ext4_es_set_referenced(es1);
544 rb_erase(&es->rb_node, &tree->root);
545 ext4_es_free_extent(inode, es);
546 es = es1;
547 }
548
549 return es;
550}
551
552static struct extent_status *
553ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
554{
555 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
556 struct extent_status *es1;
557 struct rb_node *node;
558
559 node = rb_next(&es->rb_node);
560 if (!node)
561 return es;
562
563 es1 = rb_entry(node, struct extent_status, rb_node);
564 if (ext4_es_can_be_merged(es, es1)) {
565 es->es_len += es1->es_len;
566 if (ext4_es_is_referenced(es1))
567 ext4_es_set_referenced(es);
568 rb_erase(node, &tree->root);
569 ext4_es_free_extent(inode, es1);
570 }
571
572 return es;
573}
574
575#ifdef ES_AGGRESSIVE_TEST
576#include "ext4_extents.h"
577
578static void ext4_es_insert_extent_ext_check(struct inode *inode,
579 struct extent_status *es)
580{
581 struct ext4_ext_path *path = NULL;
582 struct ext4_extent *ex;
583 ext4_lblk_t ee_block;
584 ext4_fsblk_t ee_start;
585 unsigned short ee_len;
586 int depth, ee_status, es_status;
587
588 path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
589 if (IS_ERR(path))
590 return;
591
592 depth = ext_depth(inode);
593 ex = path[depth].p_ext;
594
595 if (ex) {
596
597 ee_block = le32_to_cpu(ex->ee_block);
598 ee_start = ext4_ext_pblock(ex);
599 ee_len = ext4_ext_get_actual_len(ex);
600
601 ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
602 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
603
604
605
606
607
608 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
609 if (in_range(es->es_lblk, ee_block, ee_len)) {
610 pr_warn("ES insert assertion failed for "
611 "inode: %lu we can find an extent "
612 "at block [%d/%d/%llu/%c], but we "
613 "want to add a delayed/hole extent "
614 "[%d/%d/%llu/%x]\n",
615 inode->i_ino, ee_block, ee_len,
616 ee_start, ee_status ? 'u' : 'w',
617 es->es_lblk, es->es_len,
618 ext4_es_pblock(es), ext4_es_status(es));
619 }
620 goto out;
621 }
622
623
624
625
626
627 if (es->es_lblk < ee_block ||
628 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
629 pr_warn("ES insert assertion failed for inode: %lu "
630 "ex_status [%d/%d/%llu/%c] != "
631 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
632 ee_block, ee_len, ee_start,
633 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
634 ext4_es_pblock(es), es_status ? 'u' : 'w');
635 goto out;
636 }
637
638 if (ee_status ^ es_status) {
639 pr_warn("ES insert assertion failed for inode: %lu "
640 "ex_status [%d/%d/%llu/%c] != "
641 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
642 ee_block, ee_len, ee_start,
643 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
644 ext4_es_pblock(es), es_status ? 'u' : 'w');
645 }
646 } else {
647
648
649
650
651 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
652 pr_warn("ES insert assertion failed for inode: %lu "
653 "can't find an extent at block %d but we want "
654 "to add a written/unwritten extent "
655 "[%d/%d/%llu/%x]\n", inode->i_ino,
656 es->es_lblk, es->es_lblk, es->es_len,
657 ext4_es_pblock(es), ext4_es_status(es));
658 }
659 }
660out:
661 ext4_ext_drop_refs(path);
662 kfree(path);
663}
664
665static void ext4_es_insert_extent_ind_check(struct inode *inode,
666 struct extent_status *es)
667{
668 struct ext4_map_blocks map;
669 int retval;
670
671
672
673
674
675
676
677
678 map.m_lblk = es->es_lblk;
679 map.m_len = es->es_len;
680
681 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
682 if (retval > 0) {
683 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
684
685
686
687
688 pr_warn("ES insert assertion failed for inode: %lu "
689 "We can find blocks but we want to add a "
690 "delayed/hole extent [%d/%d/%llu/%x]\n",
691 inode->i_ino, es->es_lblk, es->es_len,
692 ext4_es_pblock(es), ext4_es_status(es));
693 return;
694 } else if (ext4_es_is_written(es)) {
695 if (retval != es->es_len) {
696 pr_warn("ES insert assertion failed for "
697 "inode: %lu retval %d != es_len %d\n",
698 inode->i_ino, retval, es->es_len);
699 return;
700 }
701 if (map.m_pblk != ext4_es_pblock(es)) {
702 pr_warn("ES insert assertion failed for "
703 "inode: %lu m_pblk %llu != "
704 "es_pblk %llu\n",
705 inode->i_ino, map.m_pblk,
706 ext4_es_pblock(es));
707 return;
708 }
709 } else {
710
711
712
713
714 BUG();
715 }
716 } else if (retval == 0) {
717 if (ext4_es_is_written(es)) {
718 pr_warn("ES insert assertion failed for inode: %lu "
719 "We can't find the block but we want to add "
720 "a written extent [%d/%d/%llu/%x]\n",
721 inode->i_ino, es->es_lblk, es->es_len,
722 ext4_es_pblock(es), ext4_es_status(es));
723 return;
724 }
725 }
726}
727
728static inline void ext4_es_insert_extent_check(struct inode *inode,
729 struct extent_status *es)
730{
731
732
733
734
735 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
736 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
737 ext4_es_insert_extent_ext_check(inode, es);
738 else
739 ext4_es_insert_extent_ind_check(inode, es);
740}
741#else
742static inline void ext4_es_insert_extent_check(struct inode *inode,
743 struct extent_status *es)
744{
745}
746#endif
747
748static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
749{
750 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
751 struct rb_node **p = &tree->root.rb_node;
752 struct rb_node *parent = NULL;
753 struct extent_status *es;
754
755 while (*p) {
756 parent = *p;
757 es = rb_entry(parent, struct extent_status, rb_node);
758
759 if (newes->es_lblk < es->es_lblk) {
760 if (ext4_es_can_be_merged(newes, es)) {
761
762
763
764
765 es->es_lblk = newes->es_lblk;
766 es->es_len += newes->es_len;
767 if (ext4_es_is_written(es) ||
768 ext4_es_is_unwritten(es))
769 ext4_es_store_pblock(es,
770 newes->es_pblk);
771 es = ext4_es_try_to_merge_left(inode, es);
772 goto out;
773 }
774 p = &(*p)->rb_left;
775 } else if (newes->es_lblk > ext4_es_end(es)) {
776 if (ext4_es_can_be_merged(es, newes)) {
777 es->es_len += newes->es_len;
778 es = ext4_es_try_to_merge_right(inode, es);
779 goto out;
780 }
781 p = &(*p)->rb_right;
782 } else {
783 BUG();
784 return -EINVAL;
785 }
786 }
787
788 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
789 newes->es_pblk);
790 if (!es)
791 return -ENOMEM;
792 rb_link_node(&es->rb_node, parent, p);
793 rb_insert_color(&es->rb_node, &tree->root);
794
795out:
796 tree->cache_es = es;
797 return 0;
798}
799
800
801
802
803
804
805
806int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
807 ext4_lblk_t len, ext4_fsblk_t pblk,
808 unsigned int status)
809{
810 struct extent_status newes;
811 ext4_lblk_t end = lblk + len - 1;
812 int err = 0;
813 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
814
815 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
816 lblk, len, pblk, status, inode->i_ino);
817
818 if (!len)
819 return 0;
820
821 BUG_ON(end < lblk);
822
823 if ((status & EXTENT_STATUS_DELAYED) &&
824 (status & EXTENT_STATUS_WRITTEN)) {
825 ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
826 " delayed and written which can potentially "
827 " cause data loss.", lblk, len);
828 WARN_ON(1);
829 }
830
831 newes.es_lblk = lblk;
832 newes.es_len = len;
833 ext4_es_store_pblock_status(&newes, pblk, status);
834 trace_ext4_es_insert_extent(inode, &newes);
835
836 ext4_es_insert_extent_check(inode, &newes);
837
838 write_lock(&EXT4_I(inode)->i_es_lock);
839 err = __es_remove_extent(inode, lblk, end, NULL);
840 if (err != 0)
841 goto error;
842retry:
843 err = __es_insert_extent(inode, &newes);
844 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
845 128, EXT4_I(inode)))
846 goto retry;
847 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
848 err = 0;
849
850 if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
851 (status & EXTENT_STATUS_WRITTEN ||
852 status & EXTENT_STATUS_UNWRITTEN))
853 __revise_pending(inode, lblk, len);
854
855error:
856 write_unlock(&EXT4_I(inode)->i_es_lock);
857
858 ext4_es_print_tree(inode);
859
860 return err;
861}
862
863
864
865
866
867
868void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
869 ext4_lblk_t len, ext4_fsblk_t pblk,
870 unsigned int status)
871{
872 struct extent_status *es;
873 struct extent_status newes;
874 ext4_lblk_t end = lblk + len - 1;
875
876 newes.es_lblk = lblk;
877 newes.es_len = len;
878 ext4_es_store_pblock_status(&newes, pblk, status);
879 trace_ext4_es_cache_extent(inode, &newes);
880
881 if (!len)
882 return;
883
884 BUG_ON(end < lblk);
885
886 write_lock(&EXT4_I(inode)->i_es_lock);
887
888 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
889 if (!es || es->es_lblk > end)
890 __es_insert_extent(inode, &newes);
891 write_unlock(&EXT4_I(inode)->i_es_lock);
892}
893
894
895
896
897
898
899
900
901int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
902 ext4_lblk_t *next_lblk,
903 struct extent_status *es)
904{
905 struct ext4_es_tree *tree;
906 struct ext4_es_stats *stats;
907 struct extent_status *es1 = NULL;
908 struct rb_node *node;
909 int found = 0;
910
911 trace_ext4_es_lookup_extent_enter(inode, lblk);
912 es_debug("lookup extent in block %u\n", lblk);
913
914 tree = &EXT4_I(inode)->i_es_tree;
915 read_lock(&EXT4_I(inode)->i_es_lock);
916
917
918 es->es_lblk = es->es_len = es->es_pblk = 0;
919 if (tree->cache_es) {
920 es1 = tree->cache_es;
921 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
922 es_debug("%u cached by [%u/%u)\n",
923 lblk, es1->es_lblk, es1->es_len);
924 found = 1;
925 goto out;
926 }
927 }
928
929 node = tree->root.rb_node;
930 while (node) {
931 es1 = rb_entry(node, struct extent_status, rb_node);
932 if (lblk < es1->es_lblk)
933 node = node->rb_left;
934 else if (lblk > ext4_es_end(es1))
935 node = node->rb_right;
936 else {
937 found = 1;
938 break;
939 }
940 }
941
942out:
943 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
944 if (found) {
945 BUG_ON(!es1);
946 es->es_lblk = es1->es_lblk;
947 es->es_len = es1->es_len;
948 es->es_pblk = es1->es_pblk;
949 if (!ext4_es_is_referenced(es1))
950 ext4_es_set_referenced(es1);
951 percpu_counter_inc(&stats->es_stats_cache_hits);
952 if (next_lblk) {
953 node = rb_next(&es1->rb_node);
954 if (node) {
955 es1 = rb_entry(node, struct extent_status,
956 rb_node);
957 *next_lblk = es1->es_lblk;
958 } else
959 *next_lblk = 0;
960 }
961 } else {
962 percpu_counter_inc(&stats->es_stats_cache_misses);
963 }
964
965 read_unlock(&EXT4_I(inode)->i_es_lock);
966
967 trace_ext4_es_lookup_extent_exit(inode, es, found);
968 return found;
969}
970
971struct rsvd_count {
972 int ndelonly;
973 bool first_do_lblk_found;
974 ext4_lblk_t first_do_lblk;
975 ext4_lblk_t last_do_lblk;
976 struct extent_status *left_es;
977 bool partial;
978 ext4_lblk_t lclu;
979};
980
981
982
983
984
985
986
987
988
989
990
991
992static void init_rsvd(struct inode *inode, ext4_lblk_t lblk,
993 struct extent_status *es, struct rsvd_count *rc)
994{
995 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
996 struct rb_node *node;
997
998 rc->ndelonly = 0;
999
1000
1001
1002
1003
1004
1005
1006 if (sbi->s_cluster_ratio > 1) {
1007 rc->first_do_lblk_found = false;
1008 if (lblk > es->es_lblk) {
1009 rc->left_es = es;
1010 } else {
1011 node = rb_prev(&es->rb_node);
1012 rc->left_es = node ? rb_entry(node,
1013 struct extent_status,
1014 rb_node) : NULL;
1015 }
1016 rc->partial = false;
1017 }
1018}
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034static void count_rsvd(struct inode *inode, ext4_lblk_t lblk, long len,
1035 struct extent_status *es, struct rsvd_count *rc)
1036{
1037 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1038 ext4_lblk_t i, end, nclu;
1039
1040 if (!ext4_es_is_delonly(es))
1041 return;
1042
1043 WARN_ON(len <= 0);
1044
1045 if (sbi->s_cluster_ratio == 1) {
1046 rc->ndelonly += (int) len;
1047 return;
1048 }
1049
1050
1051
1052 i = (lblk < es->es_lblk) ? es->es_lblk : lblk;
1053 end = lblk + (ext4_lblk_t) len - 1;
1054 end = (end > ext4_es_end(es)) ? ext4_es_end(es) : end;
1055
1056
1057 if (rc->first_do_lblk_found == false) {
1058 rc->first_do_lblk = i;
1059 rc->first_do_lblk_found = true;
1060 }
1061
1062
1063 rc->last_do_lblk = end;
1064
1065
1066
1067
1068
1069 if (rc->partial && (rc->lclu != EXT4_B2C(sbi, i))) {
1070 rc->ndelonly++;
1071 rc->partial = false;
1072 }
1073
1074
1075
1076
1077
1078 if (EXT4_LBLK_COFF(sbi, i) != 0) {
1079 if (end >= EXT4_LBLK_CFILL(sbi, i)) {
1080 rc->ndelonly++;
1081 rc->partial = false;
1082 i = EXT4_LBLK_CFILL(sbi, i) + 1;
1083 }
1084 }
1085
1086
1087
1088
1089
1090 if ((i + sbi->s_cluster_ratio - 1) <= end) {
1091 nclu = (end - i + 1) >> sbi->s_cluster_bits;
1092 rc->ndelonly += nclu;
1093 i += nclu << sbi->s_cluster_bits;
1094 }
1095
1096
1097
1098
1099
1100 if (!rc->partial && i <= end) {
1101 rc->partial = true;
1102 rc->lclu = EXT4_B2C(sbi, i);
1103 }
1104}
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116static struct pending_reservation *__pr_tree_search(struct rb_root *root,
1117 ext4_lblk_t lclu)
1118{
1119 struct rb_node *node = root->rb_node;
1120 struct pending_reservation *pr = NULL;
1121
1122 while (node) {
1123 pr = rb_entry(node, struct pending_reservation, rb_node);
1124 if (lclu < pr->lclu)
1125 node = node->rb_left;
1126 else if (lclu > pr->lclu)
1127 node = node->rb_right;
1128 else
1129 return pr;
1130 }
1131 if (pr && lclu < pr->lclu)
1132 return pr;
1133 if (pr && lclu > pr->lclu) {
1134 node = rb_next(&pr->rb_node);
1135 return node ? rb_entry(node, struct pending_reservation,
1136 rb_node) : NULL;
1137 }
1138 return NULL;
1139}
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
1158 struct extent_status *right_es,
1159 struct rsvd_count *rc)
1160{
1161 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1162 struct pending_reservation *pr;
1163 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1164 struct rb_node *node;
1165 ext4_lblk_t first_lclu, last_lclu;
1166 bool left_delonly, right_delonly, count_pending;
1167 struct extent_status *es;
1168
1169 if (sbi->s_cluster_ratio > 1) {
1170
1171 if (rc->partial)
1172 rc->ndelonly++;
1173
1174 if (rc->ndelonly == 0)
1175 return 0;
1176
1177 first_lclu = EXT4_B2C(sbi, rc->first_do_lblk);
1178 last_lclu = EXT4_B2C(sbi, rc->last_do_lblk);
1179
1180
1181
1182
1183
1184
1185 left_delonly = right_delonly = false;
1186
1187 es = rc->left_es;
1188 while (es && ext4_es_end(es) >=
1189 EXT4_LBLK_CMASK(sbi, rc->first_do_lblk)) {
1190 if (ext4_es_is_delonly(es)) {
1191 rc->ndelonly--;
1192 left_delonly = true;
1193 break;
1194 }
1195 node = rb_prev(&es->rb_node);
1196 if (!node)
1197 break;
1198 es = rb_entry(node, struct extent_status, rb_node);
1199 }
1200 if (right_es && (!left_delonly || first_lclu != last_lclu)) {
1201 if (end < ext4_es_end(right_es)) {
1202 es = right_es;
1203 } else {
1204 node = rb_next(&right_es->rb_node);
1205 es = node ? rb_entry(node, struct extent_status,
1206 rb_node) : NULL;
1207 }
1208 while (es && es->es_lblk <=
1209 EXT4_LBLK_CFILL(sbi, rc->last_do_lblk)) {
1210 if (ext4_es_is_delonly(es)) {
1211 rc->ndelonly--;
1212 right_delonly = true;
1213 break;
1214 }
1215 node = rb_next(&es->rb_node);
1216 if (!node)
1217 break;
1218 es = rb_entry(node, struct extent_status,
1219 rb_node);
1220 }
1221 }
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232 if (first_lclu == last_lclu) {
1233 if (left_delonly | right_delonly)
1234 count_pending = false;
1235 else
1236 count_pending = true;
1237 } else {
1238 if (left_delonly)
1239 first_lclu++;
1240 if (right_delonly)
1241 last_lclu--;
1242 if (first_lclu <= last_lclu)
1243 count_pending = true;
1244 else
1245 count_pending = false;
1246 }
1247
1248
1249
1250
1251
1252
1253
1254 if (count_pending) {
1255 pr = __pr_tree_search(&tree->root, first_lclu);
1256 while (pr && pr->lclu <= last_lclu) {
1257 rc->ndelonly--;
1258 node = rb_next(&pr->rb_node);
1259 rb_erase(&pr->rb_node, &tree->root);
1260 kmem_cache_free(ext4_pending_cachep, pr);
1261 if (!node)
1262 break;
1263 pr = rb_entry(node, struct pending_reservation,
1264 rb_node);
1265 }
1266 }
1267 }
1268 return rc->ndelonly;
1269}
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1286 ext4_lblk_t end, int *reserved)
1287{
1288 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
1289 struct rb_node *node;
1290 struct extent_status *es;
1291 struct extent_status orig_es;
1292 ext4_lblk_t len1, len2;
1293 ext4_fsblk_t block;
1294 int err;
1295 bool count_reserved = true;
1296 struct rsvd_count rc;
1297
1298 if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
1299 count_reserved = false;
1300retry:
1301 err = 0;
1302
1303 es = __es_tree_search(&tree->root, lblk);
1304 if (!es)
1305 goto out;
1306 if (es->es_lblk > end)
1307 goto out;
1308
1309
1310 tree->cache_es = NULL;
1311 if (count_reserved)
1312 init_rsvd(inode, lblk, es, &rc);
1313
1314 orig_es.es_lblk = es->es_lblk;
1315 orig_es.es_len = es->es_len;
1316 orig_es.es_pblk = es->es_pblk;
1317
1318 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
1319 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
1320 if (len1 > 0)
1321 es->es_len = len1;
1322 if (len2 > 0) {
1323 if (len1 > 0) {
1324 struct extent_status newes;
1325
1326 newes.es_lblk = end + 1;
1327 newes.es_len = len2;
1328 block = 0x7FDEADBEEFULL;
1329 if (ext4_es_is_written(&orig_es) ||
1330 ext4_es_is_unwritten(&orig_es))
1331 block = ext4_es_pblock(&orig_es) +
1332 orig_es.es_len - len2;
1333 ext4_es_store_pblock_status(&newes, block,
1334 ext4_es_status(&orig_es));
1335 err = __es_insert_extent(inode, &newes);
1336 if (err) {
1337 es->es_lblk = orig_es.es_lblk;
1338 es->es_len = orig_es.es_len;
1339 if ((err == -ENOMEM) &&
1340 __es_shrink(EXT4_SB(inode->i_sb),
1341 128, EXT4_I(inode)))
1342 goto retry;
1343 goto out;
1344 }
1345 } else {
1346 es->es_lblk = end + 1;
1347 es->es_len = len2;
1348 if (ext4_es_is_written(es) ||
1349 ext4_es_is_unwritten(es)) {
1350 block = orig_es.es_pblk + orig_es.es_len - len2;
1351 ext4_es_store_pblock(es, block);
1352 }
1353 }
1354 if (count_reserved)
1355 count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
1356 &orig_es, &rc);
1357 goto out;
1358 }
1359
1360 if (len1 > 0) {
1361 if (count_reserved)
1362 count_rsvd(inode, lblk, orig_es.es_len - len1,
1363 &orig_es, &rc);
1364 node = rb_next(&es->rb_node);
1365 if (node)
1366 es = rb_entry(node, struct extent_status, rb_node);
1367 else
1368 es = NULL;
1369 }
1370
1371 while (es && ext4_es_end(es) <= end) {
1372 if (count_reserved)
1373 count_rsvd(inode, es->es_lblk, es->es_len, es, &rc);
1374 node = rb_next(&es->rb_node);
1375 rb_erase(&es->rb_node, &tree->root);
1376 ext4_es_free_extent(inode, es);
1377 if (!node) {
1378 es = NULL;
1379 break;
1380 }
1381 es = rb_entry(node, struct extent_status, rb_node);
1382 }
1383
1384 if (es && es->es_lblk < end + 1) {
1385 ext4_lblk_t orig_len = es->es_len;
1386
1387 len1 = ext4_es_end(es) - end;
1388 if (count_reserved)
1389 count_rsvd(inode, es->es_lblk, orig_len - len1,
1390 es, &rc);
1391 es->es_lblk = end + 1;
1392 es->es_len = len1;
1393 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
1394 block = es->es_pblk + orig_len - len1;
1395 ext4_es_store_pblock(es, block);
1396 }
1397 }
1398
1399 if (count_reserved)
1400 *reserved = get_rsvd(inode, end, es, &rc);
1401out:
1402 return err;
1403}
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1416 ext4_lblk_t len)
1417{
1418 ext4_lblk_t end;
1419 int err = 0;
1420 int reserved = 0;
1421
1422 trace_ext4_es_remove_extent(inode, lblk, len);
1423 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
1424 lblk, len, inode->i_ino);
1425
1426 if (!len)
1427 return err;
1428
1429 end = lblk + len - 1;
1430 BUG_ON(end < lblk);
1431
1432
1433
1434
1435
1436
1437 write_lock(&EXT4_I(inode)->i_es_lock);
1438 err = __es_remove_extent(inode, lblk, end, &reserved);
1439 write_unlock(&EXT4_I(inode)->i_es_lock);
1440 ext4_es_print_tree(inode);
1441 ext4_da_release_space(inode, reserved);
1442 return err;
1443}
1444
1445static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
1446 struct ext4_inode_info *locked_ei)
1447{
1448 struct ext4_inode_info *ei;
1449 struct ext4_es_stats *es_stats;
1450 ktime_t start_time;
1451 u64 scan_time;
1452 int nr_to_walk;
1453 int nr_shrunk = 0;
1454 int retried = 0, nr_skipped = 0;
1455
1456 es_stats = &sbi->s_es_stats;
1457 start_time = ktime_get();
1458
1459retry:
1460 spin_lock(&sbi->s_es_lock);
1461 nr_to_walk = sbi->s_es_nr_inode;
1462 while (nr_to_walk-- > 0) {
1463 if (list_empty(&sbi->s_es_list)) {
1464 spin_unlock(&sbi->s_es_lock);
1465 goto out;
1466 }
1467 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
1468 i_es_list);
1469
1470 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
1471
1472
1473
1474
1475
1476 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
1477 EXT4_STATE_EXT_PRECACHED)) {
1478 nr_skipped++;
1479 continue;
1480 }
1481
1482 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
1483 nr_skipped++;
1484 continue;
1485 }
1486
1487
1488
1489
1490 spin_unlock(&sbi->s_es_lock);
1491
1492 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
1493 write_unlock(&ei->i_es_lock);
1494
1495 if (nr_to_scan <= 0)
1496 goto out;
1497 spin_lock(&sbi->s_es_lock);
1498 }
1499 spin_unlock(&sbi->s_es_lock);
1500
1501
1502
1503
1504
1505 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1506 retried++;
1507 goto retry;
1508 }
1509
1510 if (locked_ei && nr_shrunk == 0)
1511 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1512
1513out:
1514 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1515 if (likely(es_stats->es_stats_scan_time))
1516 es_stats->es_stats_scan_time = (scan_time +
1517 es_stats->es_stats_scan_time*3) / 4;
1518 else
1519 es_stats->es_stats_scan_time = scan_time;
1520 if (scan_time > es_stats->es_stats_max_scan_time)
1521 es_stats->es_stats_max_scan_time = scan_time;
1522 if (likely(es_stats->es_stats_shrunk))
1523 es_stats->es_stats_shrunk = (nr_shrunk +
1524 es_stats->es_stats_shrunk*3) / 4;
1525 else
1526 es_stats->es_stats_shrunk = nr_shrunk;
1527
1528 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1529 nr_skipped, retried);
1530 return nr_shrunk;
1531}
1532
1533static unsigned long ext4_es_count(struct shrinker *shrink,
1534 struct shrink_control *sc)
1535{
1536 unsigned long nr;
1537 struct ext4_sb_info *sbi;
1538
1539 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1540 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1541 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1542 return nr;
1543}
1544
1545static unsigned long ext4_es_scan(struct shrinker *shrink,
1546 struct shrink_control *sc)
1547{
1548 struct ext4_sb_info *sbi = container_of(shrink,
1549 struct ext4_sb_info, s_es_shrinker);
1550 int nr_to_scan = sc->nr_to_scan;
1551 int ret, nr_shrunk;
1552
1553 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1554 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1555
1556 if (!nr_to_scan)
1557 return ret;
1558
1559 nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1560
1561 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1562 return nr_shrunk;
1563}
1564
1565int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
1566{
1567 struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
1568 struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1569 struct ext4_inode_info *ei, *max = NULL;
1570 unsigned int inode_cnt = 0;
1571
1572 if (v != SEQ_START_TOKEN)
1573 return 0;
1574
1575
1576 spin_lock(&sbi->s_es_lock);
1577 list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1578 inode_cnt++;
1579 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1580 max = ei;
1581 else if (!max)
1582 max = ei;
1583 }
1584 spin_unlock(&sbi->s_es_lock);
1585
1586 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1587 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1588 percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1589 seq_printf(seq, " %lld/%lld cache hits/misses\n",
1590 percpu_counter_sum_positive(&es_stats->es_stats_cache_hits),
1591 percpu_counter_sum_positive(&es_stats->es_stats_cache_misses));
1592 if (inode_cnt)
1593 seq_printf(seq, " %d inodes on list\n", inode_cnt);
1594
1595 seq_printf(seq, "average:\n %llu us scan time\n",
1596 div_u64(es_stats->es_stats_scan_time, 1000));
1597 seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
1598 if (inode_cnt)
1599 seq_printf(seq,
1600 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1601 " %llu us max scan time\n",
1602 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1603 div_u64(es_stats->es_stats_max_scan_time, 1000));
1604
1605 return 0;
1606}
1607
1608int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1609{
1610 int err;
1611
1612
1613 BUILD_BUG_ON(ES_SHIFT < 48);
1614 INIT_LIST_HEAD(&sbi->s_es_list);
1615 sbi->s_es_nr_inode = 0;
1616 spin_lock_init(&sbi->s_es_lock);
1617 sbi->s_es_stats.es_stats_shrunk = 0;
1618 err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_hits, 0,
1619 GFP_KERNEL);
1620 if (err)
1621 return err;
1622 err = percpu_counter_init(&sbi->s_es_stats.es_stats_cache_misses, 0,
1623 GFP_KERNEL);
1624 if (err)
1625 goto err1;
1626 sbi->s_es_stats.es_stats_scan_time = 0;
1627 sbi->s_es_stats.es_stats_max_scan_time = 0;
1628 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1629 if (err)
1630 goto err2;
1631 err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1632 if (err)
1633 goto err3;
1634
1635 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1636 sbi->s_es_shrinker.count_objects = ext4_es_count;
1637 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1638 err = register_shrinker(&sbi->s_es_shrinker);
1639 if (err)
1640 goto err4;
1641
1642 return 0;
1643err4:
1644 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1645err3:
1646 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1647err2:
1648 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1649err1:
1650 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1651 return err;
1652}
1653
1654void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1655{
1656 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_hits);
1657 percpu_counter_destroy(&sbi->s_es_stats.es_stats_cache_misses);
1658 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1659 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1660 unregister_shrinker(&sbi->s_es_shrinker);
1661}
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1672 int *nr_to_scan, int *nr_shrunk)
1673{
1674 struct inode *inode = &ei->vfs_inode;
1675 struct ext4_es_tree *tree = &ei->i_es_tree;
1676 struct extent_status *es;
1677 struct rb_node *node;
1678
1679 es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1680 if (!es)
1681 goto out_wrap;
1682
1683 while (*nr_to_scan > 0) {
1684 if (es->es_lblk > end) {
1685 ei->i_es_shrink_lblk = end + 1;
1686 return 0;
1687 }
1688
1689 (*nr_to_scan)--;
1690 node = rb_next(&es->rb_node);
1691
1692
1693
1694
1695 if (ext4_es_is_delayed(es))
1696 goto next;
1697 if (ext4_es_is_referenced(es)) {
1698 ext4_es_clear_referenced(es);
1699 goto next;
1700 }
1701
1702 rb_erase(&es->rb_node, &tree->root);
1703 ext4_es_free_extent(inode, es);
1704 (*nr_shrunk)++;
1705next:
1706 if (!node)
1707 goto out_wrap;
1708 es = rb_entry(node, struct extent_status, rb_node);
1709 }
1710 ei->i_es_shrink_lblk = es->es_lblk;
1711 return 1;
1712out_wrap:
1713 ei->i_es_shrink_lblk = 0;
1714 return 0;
1715}
1716
1717static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1718{
1719 struct inode *inode = &ei->vfs_inode;
1720 int nr_shrunk = 0;
1721 ext4_lblk_t start = ei->i_es_shrink_lblk;
1722 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1723 DEFAULT_RATELIMIT_BURST);
1724
1725 if (ei->i_es_shk_nr == 0)
1726 return 0;
1727
1728 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1729 __ratelimit(&_rs))
1730 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1731
1732 if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1733 start != 0)
1734 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1735
1736 ei->i_es_tree.cache_es = NULL;
1737 return nr_shrunk;
1738}
1739
1740
1741
1742
1743
1744
1745void ext4_clear_inode_es(struct inode *inode)
1746{
1747 struct ext4_inode_info *ei = EXT4_I(inode);
1748 struct extent_status *es;
1749 struct ext4_es_tree *tree;
1750 struct rb_node *node;
1751
1752 write_lock(&ei->i_es_lock);
1753 tree = &EXT4_I(inode)->i_es_tree;
1754 tree->cache_es = NULL;
1755 node = rb_first(&tree->root);
1756 while (node) {
1757 es = rb_entry(node, struct extent_status, rb_node);
1758 node = rb_next(node);
1759 if (!ext4_es_is_delayed(es)) {
1760 rb_erase(&es->rb_node, &tree->root);
1761 ext4_es_free_extent(inode, es);
1762 }
1763 }
1764 ext4_clear_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
1765 write_unlock(&ei->i_es_lock);
1766}
1767
1768#ifdef ES_DEBUG__
1769static void ext4_print_pending_tree(struct inode *inode)
1770{
1771 struct ext4_pending_tree *tree;
1772 struct rb_node *node;
1773 struct pending_reservation *pr;
1774
1775 printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
1776 tree = &EXT4_I(inode)->i_pending_tree;
1777 node = rb_first(&tree->root);
1778 while (node) {
1779 pr = rb_entry(node, struct pending_reservation, rb_node);
1780 printk(KERN_DEBUG " %u", pr->lclu);
1781 node = rb_next(node);
1782 }
1783 printk(KERN_DEBUG "\n");
1784}
1785#else
1786#define ext4_print_pending_tree(inode)
1787#endif
1788
1789int __init ext4_init_pending(void)
1790{
1791 ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
1792 sizeof(struct pending_reservation),
1793 0, (SLAB_RECLAIM_ACCOUNT), NULL);
1794 if (ext4_pending_cachep == NULL)
1795 return -ENOMEM;
1796 return 0;
1797}
1798
1799void ext4_exit_pending(void)
1800{
1801 kmem_cache_destroy(ext4_pending_cachep);
1802}
1803
1804void ext4_init_pending_tree(struct ext4_pending_tree *tree)
1805{
1806 tree->root = RB_ROOT;
1807}
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818static struct pending_reservation *__get_pending(struct inode *inode,
1819 ext4_lblk_t lclu)
1820{
1821 struct ext4_pending_tree *tree;
1822 struct rb_node *node;
1823 struct pending_reservation *pr = NULL;
1824
1825 tree = &EXT4_I(inode)->i_pending_tree;
1826 node = (&tree->root)->rb_node;
1827
1828 while (node) {
1829 pr = rb_entry(node, struct pending_reservation, rb_node);
1830 if (lclu < pr->lclu)
1831 node = node->rb_left;
1832 else if (lclu > pr->lclu)
1833 node = node->rb_right;
1834 else if (lclu == pr->lclu)
1835 return pr;
1836 }
1837 return NULL;
1838}
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
1851{
1852 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1853 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1854 struct rb_node **p = &tree->root.rb_node;
1855 struct rb_node *parent = NULL;
1856 struct pending_reservation *pr;
1857 ext4_lblk_t lclu;
1858 int ret = 0;
1859
1860 lclu = EXT4_B2C(sbi, lblk);
1861
1862 while (*p) {
1863 parent = *p;
1864 pr = rb_entry(parent, struct pending_reservation, rb_node);
1865
1866 if (lclu < pr->lclu) {
1867 p = &(*p)->rb_left;
1868 } else if (lclu > pr->lclu) {
1869 p = &(*p)->rb_right;
1870 } else {
1871
1872 goto out;
1873 }
1874 }
1875
1876 pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
1877 if (pr == NULL) {
1878 ret = -ENOMEM;
1879 goto out;
1880 }
1881 pr->lclu = lclu;
1882
1883 rb_link_node(&pr->rb_node, parent, p);
1884 rb_insert_color(&pr->rb_node, &tree->root);
1885
1886out:
1887 return ret;
1888}
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
1900{
1901 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1902 struct pending_reservation *pr;
1903 struct ext4_pending_tree *tree;
1904
1905 pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
1906 if (pr != NULL) {
1907 tree = &EXT4_I(inode)->i_pending_tree;
1908 rb_erase(&pr->rb_node, &tree->root);
1909 kmem_cache_free(ext4_pending_cachep, pr);
1910 }
1911}
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
1923{
1924 struct ext4_inode_info *ei = EXT4_I(inode);
1925
1926 write_lock(&ei->i_es_lock);
1927 __remove_pending(inode, lblk);
1928 write_unlock(&ei->i_es_lock);
1929}
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
1942{
1943 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1944 struct ext4_inode_info *ei = EXT4_I(inode);
1945 bool ret;
1946
1947 read_lock(&ei->i_es_lock);
1948 ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
1949 read_unlock(&ei->i_es_lock);
1950
1951 return ret;
1952}
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
1967 bool allocated)
1968{
1969 struct extent_status newes;
1970 int err = 0;
1971
1972 es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
1973 lblk, inode->i_ino);
1974
1975 newes.es_lblk = lblk;
1976 newes.es_len = 1;
1977 ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
1978 trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
1979
1980 ext4_es_insert_extent_check(inode, &newes);
1981
1982 write_lock(&EXT4_I(inode)->i_es_lock);
1983
1984 err = __es_remove_extent(inode, lblk, lblk, NULL);
1985 if (err != 0)
1986 goto error;
1987retry:
1988 err = __es_insert_extent(inode, &newes);
1989 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
1990 128, EXT4_I(inode)))
1991 goto retry;
1992 if (err != 0)
1993 goto error;
1994
1995 if (allocated)
1996 __insert_pending(inode, lblk);
1997
1998error:
1999 write_unlock(&EXT4_I(inode)->i_es_lock);
2000
2001 ext4_es_print_tree(inode);
2002 ext4_print_pending_tree(inode);
2003
2004 return err;
2005}
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
2021 ext4_lblk_t end)
2022{
2023 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
2024 struct extent_status *es;
2025 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2026 struct rb_node *node;
2027 ext4_lblk_t first_lclu, last_lclu;
2028 unsigned long long last_counted_lclu;
2029 unsigned int n = 0;
2030
2031
2032 last_counted_lclu = ~0ULL;
2033
2034 es = __es_tree_search(&tree->root, start);
2035
2036 while (es && (es->es_lblk <= end)) {
2037 if (ext4_es_is_delonly(es)) {
2038 if (es->es_lblk <= start)
2039 first_lclu = EXT4_B2C(sbi, start);
2040 else
2041 first_lclu = EXT4_B2C(sbi, es->es_lblk);
2042
2043 if (ext4_es_end(es) >= end)
2044 last_lclu = EXT4_B2C(sbi, end);
2045 else
2046 last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
2047
2048 if (first_lclu == last_counted_lclu)
2049 n += last_lclu - first_lclu;
2050 else
2051 n += last_lclu - first_lclu + 1;
2052 last_counted_lclu = last_lclu;
2053 }
2054 node = rb_next(&es->rb_node);
2055 if (!node)
2056 break;
2057 es = rb_entry(node, struct extent_status, rb_node);
2058 }
2059
2060 return n;
2061}
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
2074 ext4_lblk_t len)
2075{
2076 struct ext4_inode_info *ei = EXT4_I(inode);
2077 ext4_lblk_t end;
2078 unsigned int n;
2079
2080 if (len == 0)
2081 return 0;
2082
2083 end = lblk + len - 1;
2084 WARN_ON(end < lblk);
2085
2086 read_lock(&ei->i_es_lock);
2087
2088 n = __es_delayed_clu(inode, lblk, end);
2089
2090 read_unlock(&ei->i_es_lock);
2091
2092 return n;
2093}
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
2111 ext4_lblk_t len)
2112{
2113 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2114 ext4_lblk_t end = lblk + len - 1;
2115 ext4_lblk_t first, last;
2116 bool f_del = false, l_del = false;
2117
2118 if (len == 0)
2119 return;
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134 if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
2135 first = EXT4_LBLK_CMASK(sbi, lblk);
2136 if (first != lblk)
2137 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2138 first, lblk - 1);
2139 if (f_del) {
2140 __insert_pending(inode, first);
2141 } else {
2142 last = EXT4_LBLK_CMASK(sbi, end) +
2143 sbi->s_cluster_ratio - 1;
2144 if (last != end)
2145 l_del = __es_scan_range(inode,
2146 &ext4_es_is_delonly,
2147 end + 1, last);
2148 if (l_del)
2149 __insert_pending(inode, last);
2150 else
2151 __remove_pending(inode, last);
2152 }
2153 } else {
2154 first = EXT4_LBLK_CMASK(sbi, lblk);
2155 if (first != lblk)
2156 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
2157 first, lblk - 1);
2158 if (f_del)
2159 __insert_pending(inode, first);
2160 else
2161 __remove_pending(inode, first);
2162
2163 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
2164 if (last != end)
2165 l_del = __es_scan_range(inode, &ext4_es_is_delonly,
2166 end + 1, last);
2167 if (l_del)
2168 __insert_pending(inode, last);
2169 else
2170 __remove_pending(inode, last);
2171 }
2172}
2173