1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/list_sort.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
16#include "ext4.h"
17
18#include <trace/events/ext4.h>
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144static struct kmem_cache *ext4_es_cachep;
145static struct kmem_cache *ext4_pending_cachep;
146
147static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
148static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
149 ext4_lblk_t end);
150static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
151static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152 struct ext4_inode_info *locked_ei);
153static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
154 ext4_lblk_t len);
155
156int __init ext4_init_es(void)
157{
158 ext4_es_cachep = kmem_cache_create("ext4_extent_status",
159 sizeof(struct extent_status),
160 0, (SLAB_RECLAIM_ACCOUNT), NULL);
161 if (ext4_es_cachep == NULL)
162 return -ENOMEM;
163 return 0;
164}
165
166void ext4_exit_es(void)
167{
168 kmem_cache_destroy(ext4_es_cachep);
169}
170
171void ext4_es_init_tree(struct ext4_es_tree *tree)
172{
173 tree->root = RB_ROOT;
174 tree->cache_es = NULL;
175}
176
177#ifdef ES_DEBUG__
178static void ext4_es_print_tree(struct inode *inode)
179{
180 struct ext4_es_tree *tree;
181 struct rb_node *node;
182
183 printk(KERN_DEBUG "status extents for inode %lu:", inode->i_ino);
184 tree = &EXT4_I(inode)->i_es_tree;
185 node = rb_first(&tree->root);
186 while (node) {
187 struct extent_status *es;
188 es = rb_entry(node, struct extent_status, rb_node);
189 printk(KERN_DEBUG " [%u/%u) %llu %x",
190 es->es_lblk, es->es_len,
191 ext4_es_pblock(es), ext4_es_status(es));
192 node = rb_next(node);
193 }
194 printk(KERN_DEBUG "\n");
195}
196#else
197#define ext4_es_print_tree(inode)
198#endif
199
200static inline ext4_lblk_t ext4_es_end(struct extent_status *es)
201{
202 BUG_ON(es->es_lblk + es->es_len < es->es_lblk);
203 return es->es_lblk + es->es_len - 1;
204}
205
206
207
208
209
210static struct extent_status *__es_tree_search(struct rb_root *root,
211 ext4_lblk_t lblk)
212{
213 struct rb_node *node = root->rb_node;
214 struct extent_status *es = NULL;
215
216 while (node) {
217 es = rb_entry(node, struct extent_status, rb_node);
218 if (lblk < es->es_lblk)
219 node = node->rb_left;
220 else if (lblk > ext4_es_end(es))
221 node = node->rb_right;
222 else
223 return es;
224 }
225
226 if (es && lblk < es->es_lblk)
227 return es;
228
229 if (es && lblk > ext4_es_end(es)) {
230 node = rb_next(&es->rb_node);
231 return node ? rb_entry(node, struct extent_status, rb_node) :
232 NULL;
233 }
234
235 return NULL;
236}
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256static void __es_find_extent_range(struct inode *inode,
257 int (*matching_fn)(struct extent_status *es),
258 ext4_lblk_t lblk, ext4_lblk_t end,
259 struct extent_status *es)
260{
261 struct ext4_es_tree *tree = NULL;
262 struct extent_status *es1 = NULL;
263 struct rb_node *node;
264
265 WARN_ON(es == NULL);
266 WARN_ON(end < lblk);
267
268 tree = &EXT4_I(inode)->i_es_tree;
269
270
271 es->es_lblk = es->es_len = es->es_pblk = 0;
272 if (tree->cache_es) {
273 es1 = tree->cache_es;
274 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
275 es_debug("%u cached by [%u/%u) %llu %x\n",
276 lblk, es1->es_lblk, es1->es_len,
277 ext4_es_pblock(es1), ext4_es_status(es1));
278 goto out;
279 }
280 }
281
282 es1 = __es_tree_search(&tree->root, lblk);
283
284out:
285 if (es1 && !matching_fn(es1)) {
286 while ((node = rb_next(&es1->rb_node)) != NULL) {
287 es1 = rb_entry(node, struct extent_status, rb_node);
288 if (es1->es_lblk > end) {
289 es1 = NULL;
290 break;
291 }
292 if (matching_fn(es1))
293 break;
294 }
295 }
296
297 if (es1 && matching_fn(es1)) {
298 tree->cache_es = es1;
299 es->es_lblk = es1->es_lblk;
300 es->es_len = es1->es_len;
301 es->es_pblk = es1->es_pblk;
302 }
303
304}
305
306
307
308
309void ext4_es_find_extent_range(struct inode *inode,
310 int (*matching_fn)(struct extent_status *es),
311 ext4_lblk_t lblk, ext4_lblk_t end,
312 struct extent_status *es)
313{
314 trace_ext4_es_find_extent_range_enter(inode, lblk);
315
316 read_lock(&EXT4_I(inode)->i_es_lock);
317 __es_find_extent_range(inode, matching_fn, lblk, end, es);
318 read_unlock(&EXT4_I(inode)->i_es_lock);
319
320 trace_ext4_es_find_extent_range_exit(inode, es);
321}
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338static bool __es_scan_range(struct inode *inode,
339 int (*matching_fn)(struct extent_status *es),
340 ext4_lblk_t start, ext4_lblk_t end)
341{
342 struct extent_status es;
343
344 __es_find_extent_range(inode, matching_fn, start, end, &es);
345 if (es.es_len == 0)
346 return false;
347 else if (es.es_lblk <= start &&
348 start < es.es_lblk + es.es_len)
349 return true;
350 else if (start <= es.es_lblk && es.es_lblk <= end)
351 return true;
352 else
353 return false;
354}
355
356
357
358bool ext4_es_scan_range(struct inode *inode,
359 int (*matching_fn)(struct extent_status *es),
360 ext4_lblk_t lblk, ext4_lblk_t end)
361{
362 bool ret;
363
364 read_lock(&EXT4_I(inode)->i_es_lock);
365 ret = __es_scan_range(inode, matching_fn, lblk, end);
366 read_unlock(&EXT4_I(inode)->i_es_lock);
367
368 return ret;
369}
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385static bool __es_scan_clu(struct inode *inode,
386 int (*matching_fn)(struct extent_status *es),
387 ext4_lblk_t lblk)
388{
389 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
390 ext4_lblk_t lblk_start, lblk_end;
391
392 lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
393 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
394
395 return __es_scan_range(inode, matching_fn, lblk_start, lblk_end);
396}
397
398
399
400
401bool ext4_es_scan_clu(struct inode *inode,
402 int (*matching_fn)(struct extent_status *es),
403 ext4_lblk_t lblk)
404{
405 bool ret;
406
407 read_lock(&EXT4_I(inode)->i_es_lock);
408 ret = __es_scan_clu(inode, matching_fn, lblk);
409 read_unlock(&EXT4_I(inode)->i_es_lock);
410
411 return ret;
412}
413
414static void ext4_es_list_add(struct inode *inode)
415{
416 struct ext4_inode_info *ei = EXT4_I(inode);
417 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
418
419 if (!list_empty(&ei->i_es_list))
420 return;
421
422 spin_lock(&sbi->s_es_lock);
423 if (list_empty(&ei->i_es_list)) {
424 list_add_tail(&ei->i_es_list, &sbi->s_es_list);
425 sbi->s_es_nr_inode++;
426 }
427 spin_unlock(&sbi->s_es_lock);
428}
429
430static void ext4_es_list_del(struct inode *inode)
431{
432 struct ext4_inode_info *ei = EXT4_I(inode);
433 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
434
435 spin_lock(&sbi->s_es_lock);
436 if (!list_empty(&ei->i_es_list)) {
437 list_del_init(&ei->i_es_list);
438 sbi->s_es_nr_inode--;
439 WARN_ON_ONCE(sbi->s_es_nr_inode < 0);
440 }
441 spin_unlock(&sbi->s_es_lock);
442}
443
444static struct extent_status *
445ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
446 ext4_fsblk_t pblk)
447{
448 struct extent_status *es;
449 es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
450 if (es == NULL)
451 return NULL;
452 es->es_lblk = lblk;
453 es->es_len = len;
454 es->es_pblk = pblk;
455
456
457
458
459 if (!ext4_es_is_delayed(es)) {
460 if (!EXT4_I(inode)->i_es_shk_nr++)
461 ext4_es_list_add(inode);
462 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
463 s_es_stats.es_stats_shk_cnt);
464 }
465
466 EXT4_I(inode)->i_es_all_nr++;
467 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
468
469 return es;
470}
471
472static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
473{
474 EXT4_I(inode)->i_es_all_nr--;
475 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
476
477
478 if (!ext4_es_is_delayed(es)) {
479 BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
480 if (!--EXT4_I(inode)->i_es_shk_nr)
481 ext4_es_list_del(inode);
482 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
483 s_es_stats.es_stats_shk_cnt);
484 }
485
486 kmem_cache_free(ext4_es_cachep, es);
487}
488
489
490
491
492
493
494
495
496static int ext4_es_can_be_merged(struct extent_status *es1,
497 struct extent_status *es2)
498{
499 if (ext4_es_type(es1) != ext4_es_type(es2))
500 return 0;
501
502 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
503 pr_warn("ES assertion failed when merging extents. "
504 "The sum of lengths of es1 (%d) and es2 (%d) "
505 "is bigger than allowed file size (%d)\n",
506 es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
507 WARN_ON(1);
508 return 0;
509 }
510
511 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
512 return 0;
513
514 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
515 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
516 return 1;
517
518 if (ext4_es_is_hole(es1))
519 return 1;
520
521
522 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
523 return 1;
524
525 return 0;
526}
527
528static struct extent_status *
529ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es)
530{
531 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
532 struct extent_status *es1;
533 struct rb_node *node;
534
535 node = rb_prev(&es->rb_node);
536 if (!node)
537 return es;
538
539 es1 = rb_entry(node, struct extent_status, rb_node);
540 if (ext4_es_can_be_merged(es1, es)) {
541 es1->es_len += es->es_len;
542 if (ext4_es_is_referenced(es))
543 ext4_es_set_referenced(es1);
544 rb_erase(&es->rb_node, &tree->root);
545 ext4_es_free_extent(inode, es);
546 es = es1;
547 }
548
549 return es;
550}
551
552static struct extent_status *
553ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
554{
555 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
556 struct extent_status *es1;
557 struct rb_node *node;
558
559 node = rb_next(&es->rb_node);
560 if (!node)
561 return es;
562
563 es1 = rb_entry(node, struct extent_status, rb_node);
564 if (ext4_es_can_be_merged(es, es1)) {
565 es->es_len += es1->es_len;
566 if (ext4_es_is_referenced(es1))
567 ext4_es_set_referenced(es);
568 rb_erase(node, &tree->root);
569 ext4_es_free_extent(inode, es1);
570 }
571
572 return es;
573}
574
575#ifdef ES_AGGRESSIVE_TEST
576#include "ext4_extents.h"
577
578static void ext4_es_insert_extent_ext_check(struct inode *inode,
579 struct extent_status *es)
580{
581 struct ext4_ext_path *path = NULL;
582 struct ext4_extent *ex;
583 ext4_lblk_t ee_block;
584 ext4_fsblk_t ee_start;
585 unsigned short ee_len;
586 int depth, ee_status, es_status;
587
588 path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
589 if (IS_ERR(path))
590 return;
591
592 depth = ext_depth(inode);
593 ex = path[depth].p_ext;
594
595 if (ex) {
596
597 ee_block = le32_to_cpu(ex->ee_block);
598 ee_start = ext4_ext_pblock(ex);
599 ee_len = ext4_ext_get_actual_len(ex);
600
601 ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
602 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
603
604
605
606
607
608 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
609 if (in_range(es->es_lblk, ee_block, ee_len)) {
610 pr_warn("ES insert assertion failed for "
611 "inode: %lu we can find an extent "
612 "at block [%d/%d/%llu/%c], but we "
613 "want to add a delayed/hole extent "
614 "[%d/%d/%llu/%x]\n",
615 inode->i_ino, ee_block, ee_len,
616 ee_start, ee_status ? 'u' : 'w',
617 es->es_lblk, es->es_len,
618 ext4_es_pblock(es), ext4_es_status(es));
619 }
620 goto out;
621 }
622
623
624
625
626
627 if (es->es_lblk < ee_block ||
628 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
629 pr_warn("ES insert assertion failed for inode: %lu "
630 "ex_status [%d/%d/%llu/%c] != "
631 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
632 ee_block, ee_len, ee_start,
633 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
634 ext4_es_pblock(es), es_status ? 'u' : 'w');
635 goto out;
636 }
637
638 if (ee_status ^ es_status) {
639 pr_warn("ES insert assertion failed for inode: %lu "
640 "ex_status [%d/%d/%llu/%c] != "
641 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
642 ee_block, ee_len, ee_start,
643 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
644 ext4_es_pblock(es), es_status ? 'u' : 'w');
645 }
646 } else {
647
648
649
650
651 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
652 pr_warn("ES insert assertion failed for inode: %lu "
653 "can't find an extent at block %d but we want "
654 "to add a written/unwritten extent "
655 "[%d/%d/%llu/%x]\n", inode->i_ino,
656 es->es_lblk, es->es_lblk, es->es_len,
657 ext4_es_pblock(es), ext4_es_status(es));
658 }
659 }
660out:
661 ext4_ext_drop_refs(path);
662 kfree(path);
663}
664
665static void ext4_es_insert_extent_ind_check(struct inode *inode,
666 struct extent_status *es)
667{
668 struct ext4_map_blocks map;
669 int retval;
670
671
672
673
674
675
676
677
678 map.m_lblk = es->es_lblk;
679 map.m_len = es->es_len;
680
681 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
682 if (retval > 0) {
683 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
684
685
686
687
688 pr_warn("ES insert assertion failed for inode: %lu "
689 "We can find blocks but we want to add a "
690 "delayed/hole extent [%d/%d/%llu/%x]\n",
691 inode->i_ino, es->es_lblk, es->es_len,
692 ext4_es_pblock(es), ext4_es_status(es));
693 return;
694 } else if (ext4_es_is_written(es)) {
695 if (retval != es->es_len) {
696 pr_warn("ES insert assertion failed for "
697 "inode: %lu retval %d != es_len %d\n",
698 inode->i_ino, retval, es->es_len);
699 return;
700 }
701 if (map.m_pblk != ext4_es_pblock(es)) {
702 pr_warn("ES insert assertion failed for "
703 "inode: %lu m_pblk %llu != "
704 "es_pblk %llu\n",
705 inode->i_ino, map.m_pblk,
706 ext4_es_pblock(es));
707 return;
708 }
709 } else {
710
711
712
713
714 BUG();
715 }
716 } else if (retval == 0) {
717 if (ext4_es_is_written(es)) {
718 pr_warn("ES insert assertion failed for inode: %lu "
719 "We can't find the block but we want to add "
720 "a written extent [%d/%d/%llu/%x]\n",
721 inode->i_ino, es->es_lblk, es->es_len,
722 ext4_es_pblock(es), ext4_es_status(es));
723 return;
724 }
725 }
726}
727
728static inline void ext4_es_insert_extent_check(struct inode *inode,
729 struct extent_status *es)
730{
731
732
733
734
735 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
736 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
737 ext4_es_insert_extent_ext_check(inode, es);
738 else
739 ext4_es_insert_extent_ind_check(inode, es);
740}
741#else
742static inline void ext4_es_insert_extent_check(struct inode *inode,
743 struct extent_status *es)
744{
745}
746#endif
747
748static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
749{
750 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
751 struct rb_node **p = &tree->root.rb_node;
752 struct rb_node *parent = NULL;
753 struct extent_status *es;
754
755 while (*p) {
756 parent = *p;
757 es = rb_entry(parent, struct extent_status, rb_node);
758
759 if (newes->es_lblk < es->es_lblk) {
760 if (ext4_es_can_be_merged(newes, es)) {
761
762
763
764
765 es->es_lblk = newes->es_lblk;
766 es->es_len += newes->es_len;
767 if (ext4_es_is_written(es) ||
768 ext4_es_is_unwritten(es))
769 ext4_es_store_pblock(es,
770 newes->es_pblk);
771 es = ext4_es_try_to_merge_left(inode, es);
772 goto out;
773 }
774 p = &(*p)->rb_left;
775 } else if (newes->es_lblk > ext4_es_end(es)) {
776 if (ext4_es_can_be_merged(es, newes)) {
777 es->es_len += newes->es_len;
778 es = ext4_es_try_to_merge_right(inode, es);
779 goto out;
780 }
781 p = &(*p)->rb_right;
782 } else {
783 BUG();
784 return -EINVAL;
785 }
786 }
787
788 es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
789 newes->es_pblk);
790 if (!es)
791 return -ENOMEM;
792 rb_link_node(&es->rb_node, parent, p);
793 rb_insert_color(&es->rb_node, &tree->root);
794
795out:
796 tree->cache_es = es;
797 return 0;
798}
799
800
801
802
803
804
805
806int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
807 ext4_lblk_t len, ext4_fsblk_t pblk,
808 unsigned int status)
809{
810 struct extent_status newes;
811 ext4_lblk_t end = lblk + len - 1;
812 int err = 0;
813 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
814
815 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
816 lblk, len, pblk, status, inode->i_ino);
817
818 if (!len)
819 return 0;
820
821 BUG_ON(end < lblk);
822
823 if ((status & EXTENT_STATUS_DELAYED) &&
824 (status & EXTENT_STATUS_WRITTEN)) {
825 ext4_warning(inode->i_sb, "Inserting extent [%u/%u] as "
826 " delayed and written which can potentially "
827 " cause data loss.", lblk, len);
828 WARN_ON(1);
829 }
830
831 newes.es_lblk = lblk;
832 newes.es_len = len;
833 ext4_es_store_pblock_status(&newes, pblk, status);
834 trace_ext4_es_insert_extent(inode, &newes);
835
836 ext4_es_insert_extent_check(inode, &newes);
837
838 write_lock(&EXT4_I(inode)->i_es_lock);
839 err = __es_remove_extent(inode, lblk, end);
840 if (err != 0)
841 goto error;
842retry:
843 err = __es_insert_extent(inode, &newes);
844 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
845 128, EXT4_I(inode)))
846 goto retry;
847 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
848 err = 0;
849
850 if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
851 (status & EXTENT_STATUS_WRITTEN ||
852 status & EXTENT_STATUS_UNWRITTEN))
853 __revise_pending(inode, lblk, len);
854
855error:
856 write_unlock(&EXT4_I(inode)->i_es_lock);
857
858 ext4_es_print_tree(inode);
859
860 return err;
861}
862
863
864
865
866
867
868void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
869 ext4_lblk_t len, ext4_fsblk_t pblk,
870 unsigned int status)
871{
872 struct extent_status *es;
873 struct extent_status newes;
874 ext4_lblk_t end = lblk + len - 1;
875
876 newes.es_lblk = lblk;
877 newes.es_len = len;
878 ext4_es_store_pblock_status(&newes, pblk, status);
879 trace_ext4_es_cache_extent(inode, &newes);
880
881 if (!len)
882 return;
883
884 BUG_ON(end < lblk);
885
886 write_lock(&EXT4_I(inode)->i_es_lock);
887
888 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
889 if (!es || es->es_lblk > end)
890 __es_insert_extent(inode, &newes);
891 write_unlock(&EXT4_I(inode)->i_es_lock);
892}
893
894
895
896
897
898
899
900
901int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
902 struct extent_status *es)
903{
904 struct ext4_es_tree *tree;
905 struct ext4_es_stats *stats;
906 struct extent_status *es1 = NULL;
907 struct rb_node *node;
908 int found = 0;
909
910 trace_ext4_es_lookup_extent_enter(inode, lblk);
911 es_debug("lookup extent in block %u\n", lblk);
912
913 tree = &EXT4_I(inode)->i_es_tree;
914 read_lock(&EXT4_I(inode)->i_es_lock);
915
916
917 es->es_lblk = es->es_len = es->es_pblk = 0;
918 if (tree->cache_es) {
919 es1 = tree->cache_es;
920 if (in_range(lblk, es1->es_lblk, es1->es_len)) {
921 es_debug("%u cached by [%u/%u)\n",
922 lblk, es1->es_lblk, es1->es_len);
923 found = 1;
924 goto out;
925 }
926 }
927
928 node = tree->root.rb_node;
929 while (node) {
930 es1 = rb_entry(node, struct extent_status, rb_node);
931 if (lblk < es1->es_lblk)
932 node = node->rb_left;
933 else if (lblk > ext4_es_end(es1))
934 node = node->rb_right;
935 else {
936 found = 1;
937 break;
938 }
939 }
940
941out:
942 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
943 if (found) {
944 BUG_ON(!es1);
945 es->es_lblk = es1->es_lblk;
946 es->es_len = es1->es_len;
947 es->es_pblk = es1->es_pblk;
948 if (!ext4_es_is_referenced(es1))
949 ext4_es_set_referenced(es1);
950 stats->es_stats_cache_hits++;
951 } else {
952 stats->es_stats_cache_misses++;
953 }
954
955 read_unlock(&EXT4_I(inode)->i_es_lock);
956
957 trace_ext4_es_lookup_extent_exit(inode, es, found);
958 return found;
959}
960
961static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
962 ext4_lblk_t end)
963{
964 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
965 struct rb_node *node;
966 struct extent_status *es;
967 struct extent_status orig_es;
968 ext4_lblk_t len1, len2;
969 ext4_fsblk_t block;
970 int err;
971
972retry:
973 err = 0;
974 es = __es_tree_search(&tree->root, lblk);
975 if (!es)
976 goto out;
977 if (es->es_lblk > end)
978 goto out;
979
980
981 tree->cache_es = NULL;
982
983 orig_es.es_lblk = es->es_lblk;
984 orig_es.es_len = es->es_len;
985 orig_es.es_pblk = es->es_pblk;
986
987 len1 = lblk > es->es_lblk ? lblk - es->es_lblk : 0;
988 len2 = ext4_es_end(es) > end ? ext4_es_end(es) - end : 0;
989 if (len1 > 0)
990 es->es_len = len1;
991 if (len2 > 0) {
992 if (len1 > 0) {
993 struct extent_status newes;
994
995 newes.es_lblk = end + 1;
996 newes.es_len = len2;
997 block = 0x7FDEADBEEFULL;
998 if (ext4_es_is_written(&orig_es) ||
999 ext4_es_is_unwritten(&orig_es))
1000 block = ext4_es_pblock(&orig_es) +
1001 orig_es.es_len - len2;
1002 ext4_es_store_pblock_status(&newes, block,
1003 ext4_es_status(&orig_es));
1004 err = __es_insert_extent(inode, &newes);
1005 if (err) {
1006 es->es_lblk = orig_es.es_lblk;
1007 es->es_len = orig_es.es_len;
1008 if ((err == -ENOMEM) &&
1009 __es_shrink(EXT4_SB(inode->i_sb),
1010 128, EXT4_I(inode)))
1011 goto retry;
1012 goto out;
1013 }
1014 } else {
1015 es->es_lblk = end + 1;
1016 es->es_len = len2;
1017 if (ext4_es_is_written(es) ||
1018 ext4_es_is_unwritten(es)) {
1019 block = orig_es.es_pblk + orig_es.es_len - len2;
1020 ext4_es_store_pblock(es, block);
1021 }
1022 }
1023 goto out;
1024 }
1025
1026 if (len1 > 0) {
1027 node = rb_next(&es->rb_node);
1028 if (node)
1029 es = rb_entry(node, struct extent_status, rb_node);
1030 else
1031 es = NULL;
1032 }
1033
1034 while (es && ext4_es_end(es) <= end) {
1035 node = rb_next(&es->rb_node);
1036 rb_erase(&es->rb_node, &tree->root);
1037 ext4_es_free_extent(inode, es);
1038 if (!node) {
1039 es = NULL;
1040 break;
1041 }
1042 es = rb_entry(node, struct extent_status, rb_node);
1043 }
1044
1045 if (es && es->es_lblk < end + 1) {
1046 ext4_lblk_t orig_len = es->es_len;
1047
1048 len1 = ext4_es_end(es) - end;
1049 es->es_lblk = end + 1;
1050 es->es_len = len1;
1051 if (ext4_es_is_written(es) || ext4_es_is_unwritten(es)) {
1052 block = es->es_pblk + orig_len - len1;
1053 ext4_es_store_pblock(es, block);
1054 }
1055 }
1056
1057out:
1058 return err;
1059}
1060
1061
1062
1063
1064
1065
1066int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
1067 ext4_lblk_t len)
1068{
1069 ext4_lblk_t end;
1070 int err = 0;
1071
1072 trace_ext4_es_remove_extent(inode, lblk, len);
1073 es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
1074 lblk, len, inode->i_ino);
1075
1076 if (!len)
1077 return err;
1078
1079 end = lblk + len - 1;
1080 BUG_ON(end < lblk);
1081
1082
1083
1084
1085
1086
1087 write_lock(&EXT4_I(inode)->i_es_lock);
1088 err = __es_remove_extent(inode, lblk, end);
1089 write_unlock(&EXT4_I(inode)->i_es_lock);
1090 ext4_es_print_tree(inode);
1091 return err;
1092}
1093
1094static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
1095 struct ext4_inode_info *locked_ei)
1096{
1097 struct ext4_inode_info *ei;
1098 struct ext4_es_stats *es_stats;
1099 ktime_t start_time;
1100 u64 scan_time;
1101 int nr_to_walk;
1102 int nr_shrunk = 0;
1103 int retried = 0, nr_skipped = 0;
1104
1105 es_stats = &sbi->s_es_stats;
1106 start_time = ktime_get();
1107
1108retry:
1109 spin_lock(&sbi->s_es_lock);
1110 nr_to_walk = sbi->s_es_nr_inode;
1111 while (nr_to_walk-- > 0) {
1112 if (list_empty(&sbi->s_es_list)) {
1113 spin_unlock(&sbi->s_es_lock);
1114 goto out;
1115 }
1116 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
1117 i_es_list);
1118
1119 list_move_tail(&ei->i_es_list, &sbi->s_es_list);
1120
1121
1122
1123
1124
1125 if (!retried && ext4_test_inode_state(&ei->vfs_inode,
1126 EXT4_STATE_EXT_PRECACHED)) {
1127 nr_skipped++;
1128 continue;
1129 }
1130
1131 if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) {
1132 nr_skipped++;
1133 continue;
1134 }
1135
1136
1137
1138
1139 spin_unlock(&sbi->s_es_lock);
1140
1141 nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
1142 write_unlock(&ei->i_es_lock);
1143
1144 if (nr_to_scan <= 0)
1145 goto out;
1146 spin_lock(&sbi->s_es_lock);
1147 }
1148 spin_unlock(&sbi->s_es_lock);
1149
1150
1151
1152
1153
1154 if ((nr_shrunk == 0) && nr_skipped && !retried) {
1155 retried++;
1156 goto retry;
1157 }
1158
1159 if (locked_ei && nr_shrunk == 0)
1160 nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
1161
1162out:
1163 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1164 if (likely(es_stats->es_stats_scan_time))
1165 es_stats->es_stats_scan_time = (scan_time +
1166 es_stats->es_stats_scan_time*3) / 4;
1167 else
1168 es_stats->es_stats_scan_time = scan_time;
1169 if (scan_time > es_stats->es_stats_max_scan_time)
1170 es_stats->es_stats_max_scan_time = scan_time;
1171 if (likely(es_stats->es_stats_shrunk))
1172 es_stats->es_stats_shrunk = (nr_shrunk +
1173 es_stats->es_stats_shrunk*3) / 4;
1174 else
1175 es_stats->es_stats_shrunk = nr_shrunk;
1176
1177 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time,
1178 nr_skipped, retried);
1179 return nr_shrunk;
1180}
1181
1182static unsigned long ext4_es_count(struct shrinker *shrink,
1183 struct shrink_control *sc)
1184{
1185 unsigned long nr;
1186 struct ext4_sb_info *sbi;
1187
1188 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1189 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1190 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1191 return nr;
1192}
1193
1194static unsigned long ext4_es_scan(struct shrinker *shrink,
1195 struct shrink_control *sc)
1196{
1197 struct ext4_sb_info *sbi = container_of(shrink,
1198 struct ext4_sb_info, s_es_shrinker);
1199 int nr_to_scan = sc->nr_to_scan;
1200 int ret, nr_shrunk;
1201
1202 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
1203 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1204
1205 if (!nr_to_scan)
1206 return ret;
1207
1208 nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
1209
1210 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1211 return nr_shrunk;
1212}
1213
1214int ext4_seq_es_shrinker_info_show(struct seq_file *seq, void *v)
1215{
1216 struct ext4_sb_info *sbi = EXT4_SB((struct super_block *) seq->private);
1217 struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1218 struct ext4_inode_info *ei, *max = NULL;
1219 unsigned int inode_cnt = 0;
1220
1221 if (v != SEQ_START_TOKEN)
1222 return 0;
1223
1224
1225 spin_lock(&sbi->s_es_lock);
1226 list_for_each_entry(ei, &sbi->s_es_list, i_es_list) {
1227 inode_cnt++;
1228 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1229 max = ei;
1230 else if (!max)
1231 max = ei;
1232 }
1233 spin_unlock(&sbi->s_es_lock);
1234
1235 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1236 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1237 percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt));
1238 seq_printf(seq, " %lu/%lu cache hits/misses\n",
1239 es_stats->es_stats_cache_hits,
1240 es_stats->es_stats_cache_misses);
1241 if (inode_cnt)
1242 seq_printf(seq, " %d inodes on list\n", inode_cnt);
1243
1244 seq_printf(seq, "average:\n %llu us scan time\n",
1245 div_u64(es_stats->es_stats_scan_time, 1000));
1246 seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
1247 if (inode_cnt)
1248 seq_printf(seq,
1249 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1250 " %llu us max scan time\n",
1251 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr,
1252 div_u64(es_stats->es_stats_max_scan_time, 1000));
1253
1254 return 0;
1255}
1256
1257int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1258{
1259 int err;
1260
1261
1262 BUILD_BUG_ON(ES_SHIFT < 48);
1263 INIT_LIST_HEAD(&sbi->s_es_list);
1264 sbi->s_es_nr_inode = 0;
1265 spin_lock_init(&sbi->s_es_lock);
1266 sbi->s_es_stats.es_stats_shrunk = 0;
1267 sbi->s_es_stats.es_stats_cache_hits = 0;
1268 sbi->s_es_stats.es_stats_cache_misses = 0;
1269 sbi->s_es_stats.es_stats_scan_time = 0;
1270 sbi->s_es_stats.es_stats_max_scan_time = 0;
1271 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1272 if (err)
1273 return err;
1274 err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, 0, GFP_KERNEL);
1275 if (err)
1276 goto err1;
1277
1278 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1279 sbi->s_es_shrinker.count_objects = ext4_es_count;
1280 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1281 err = register_shrinker(&sbi->s_es_shrinker);
1282 if (err)
1283 goto err2;
1284
1285 return 0;
1286
1287err2:
1288 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1289err1:
1290 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1291 return err;
1292}
1293
1294void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1295{
1296 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1297 percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt);
1298 unregister_shrinker(&sbi->s_es_shrinker);
1299}
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
1310 int *nr_to_scan, int *nr_shrunk)
1311{
1312 struct inode *inode = &ei->vfs_inode;
1313 struct ext4_es_tree *tree = &ei->i_es_tree;
1314 struct extent_status *es;
1315 struct rb_node *node;
1316
1317 es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
1318 if (!es)
1319 goto out_wrap;
1320 while (*nr_to_scan > 0) {
1321 if (es->es_lblk > end) {
1322 ei->i_es_shrink_lblk = end + 1;
1323 return 0;
1324 }
1325
1326 (*nr_to_scan)--;
1327 node = rb_next(&es->rb_node);
1328
1329
1330
1331
1332 if (ext4_es_is_delayed(es))
1333 goto next;
1334 if (ext4_es_is_referenced(es)) {
1335 ext4_es_clear_referenced(es);
1336 goto next;
1337 }
1338
1339 rb_erase(&es->rb_node, &tree->root);
1340 ext4_es_free_extent(inode, es);
1341 (*nr_shrunk)++;
1342next:
1343 if (!node)
1344 goto out_wrap;
1345 es = rb_entry(node, struct extent_status, rb_node);
1346 }
1347 ei->i_es_shrink_lblk = es->es_lblk;
1348 return 1;
1349out_wrap:
1350 ei->i_es_shrink_lblk = 0;
1351 return 0;
1352}
1353
1354static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
1355{
1356 struct inode *inode = &ei->vfs_inode;
1357 int nr_shrunk = 0;
1358 ext4_lblk_t start = ei->i_es_shrink_lblk;
1359 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1360 DEFAULT_RATELIMIT_BURST);
1361
1362 if (ei->i_es_shk_nr == 0)
1363 return 0;
1364
1365 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
1366 __ratelimit(&_rs))
1367 ext4_warning(inode->i_sb, "forced shrink of precached extents");
1368
1369 if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
1370 start != 0)
1371 es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
1372
1373 ei->i_es_tree.cache_es = NULL;
1374 return nr_shrunk;
1375}
1376
1377#ifdef ES_DEBUG__
1378static void ext4_print_pending_tree(struct inode *inode)
1379{
1380 struct ext4_pending_tree *tree;
1381 struct rb_node *node;
1382 struct pending_reservation *pr;
1383
1384 printk(KERN_DEBUG "pending reservations for inode %lu:", inode->i_ino);
1385 tree = &EXT4_I(inode)->i_pending_tree;
1386 node = rb_first(&tree->root);
1387 while (node) {
1388 pr = rb_entry(node, struct pending_reservation, rb_node);
1389 printk(KERN_DEBUG " %u", pr->lclu);
1390 node = rb_next(node);
1391 }
1392 printk(KERN_DEBUG "\n");
1393}
1394#else
1395#define ext4_print_pending_tree(inode)
1396#endif
1397
1398int __init ext4_init_pending(void)
1399{
1400 ext4_pending_cachep = kmem_cache_create("ext4_pending_reservation",
1401 sizeof(struct pending_reservation),
1402 0, (SLAB_RECLAIM_ACCOUNT), NULL);
1403 if (ext4_pending_cachep == NULL)
1404 return -ENOMEM;
1405 return 0;
1406}
1407
1408void ext4_exit_pending(void)
1409{
1410 kmem_cache_destroy(ext4_pending_cachep);
1411}
1412
1413void ext4_init_pending_tree(struct ext4_pending_tree *tree)
1414{
1415 tree->root = RB_ROOT;
1416}
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427static struct pending_reservation *__get_pending(struct inode *inode,
1428 ext4_lblk_t lclu)
1429{
1430 struct ext4_pending_tree *tree;
1431 struct rb_node *node;
1432 struct pending_reservation *pr = NULL;
1433
1434 tree = &EXT4_I(inode)->i_pending_tree;
1435 node = (&tree->root)->rb_node;
1436
1437 while (node) {
1438 pr = rb_entry(node, struct pending_reservation, rb_node);
1439 if (lclu < pr->lclu)
1440 node = node->rb_left;
1441 else if (lclu > pr->lclu)
1442 node = node->rb_right;
1443 else if (lclu == pr->lclu)
1444 return pr;
1445 }
1446 return NULL;
1447}
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
1460{
1461 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1462 struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
1463 struct rb_node **p = &tree->root.rb_node;
1464 struct rb_node *parent = NULL;
1465 struct pending_reservation *pr;
1466 ext4_lblk_t lclu;
1467 int ret = 0;
1468
1469 lclu = EXT4_B2C(sbi, lblk);
1470
1471 while (*p) {
1472 parent = *p;
1473 pr = rb_entry(parent, struct pending_reservation, rb_node);
1474
1475 if (lclu < pr->lclu) {
1476 p = &(*p)->rb_left;
1477 } else if (lclu > pr->lclu) {
1478 p = &(*p)->rb_right;
1479 } else {
1480
1481 goto out;
1482 }
1483 }
1484
1485 pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
1486 if (pr == NULL) {
1487 ret = -ENOMEM;
1488 goto out;
1489 }
1490 pr->lclu = lclu;
1491
1492 rb_link_node(&pr->rb_node, parent, p);
1493 rb_insert_color(&pr->rb_node, &tree->root);
1494
1495out:
1496 return ret;
1497}
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
1509{
1510 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1511 struct pending_reservation *pr;
1512 struct ext4_pending_tree *tree;
1513
1514 pr = __get_pending(inode, EXT4_B2C(sbi, lblk));
1515 if (pr != NULL) {
1516 tree = &EXT4_I(inode)->i_pending_tree;
1517 rb_erase(&pr->rb_node, &tree->root);
1518 kmem_cache_free(ext4_pending_cachep, pr);
1519 }
1520}
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk)
1532{
1533 struct ext4_inode_info *ei = EXT4_I(inode);
1534
1535 write_lock(&ei->i_es_lock);
1536 __remove_pending(inode, lblk);
1537 write_unlock(&ei->i_es_lock);
1538}
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
1551{
1552 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1553 struct ext4_inode_info *ei = EXT4_I(inode);
1554 bool ret;
1555
1556 read_lock(&ei->i_es_lock);
1557 ret = (bool)(__get_pending(inode, EXT4_B2C(sbi, lblk)) != NULL);
1558 read_unlock(&ei->i_es_lock);
1559
1560 return ret;
1561}
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
1576 bool allocated)
1577{
1578 struct extent_status newes;
1579 int err = 0;
1580
1581 es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
1582 lblk, inode->i_ino);
1583
1584 newes.es_lblk = lblk;
1585 newes.es_len = 1;
1586 ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
1587 trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
1588
1589 ext4_es_insert_extent_check(inode, &newes);
1590
1591 write_lock(&EXT4_I(inode)->i_es_lock);
1592
1593 err = __es_remove_extent(inode, lblk, lblk);
1594 if (err != 0)
1595 goto error;
1596retry:
1597 err = __es_insert_extent(inode, &newes);
1598 if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
1599 128, EXT4_I(inode)))
1600 goto retry;
1601 if (err != 0)
1602 goto error;
1603
1604 if (allocated)
1605 __insert_pending(inode, lblk);
1606
1607error:
1608 write_unlock(&EXT4_I(inode)->i_es_lock);
1609
1610 ext4_es_print_tree(inode);
1611 ext4_print_pending_tree(inode);
1612
1613 return err;
1614}
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629static unsigned int __es_delayed_clu(struct inode *inode, ext4_lblk_t start,
1630 ext4_lblk_t end)
1631{
1632 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
1633 struct extent_status *es;
1634 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1635 struct rb_node *node;
1636 ext4_lblk_t first_lclu, last_lclu;
1637 unsigned long long last_counted_lclu;
1638 unsigned int n = 0;
1639
1640
1641 last_counted_lclu = ~0ULL;
1642
1643 es = __es_tree_search(&tree->root, start);
1644
1645 while (es && (es->es_lblk <= end)) {
1646 if (ext4_es_is_delonly(es)) {
1647 if (es->es_lblk <= start)
1648 first_lclu = EXT4_B2C(sbi, start);
1649 else
1650 first_lclu = EXT4_B2C(sbi, es->es_lblk);
1651
1652 if (ext4_es_end(es) >= end)
1653 last_lclu = EXT4_B2C(sbi, end);
1654 else
1655 last_lclu = EXT4_B2C(sbi, ext4_es_end(es));
1656
1657 if (first_lclu == last_counted_lclu)
1658 n += last_lclu - first_lclu;
1659 else
1660 n += last_lclu - first_lclu + 1;
1661 last_counted_lclu = last_lclu;
1662 }
1663 node = rb_next(&es->rb_node);
1664 if (!node)
1665 break;
1666 es = rb_entry(node, struct extent_status, rb_node);
1667 }
1668
1669 return n;
1670}
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
1683 ext4_lblk_t len)
1684{
1685 struct ext4_inode_info *ei = EXT4_I(inode);
1686 ext4_lblk_t end;
1687 unsigned int n;
1688
1689 if (len == 0)
1690 return 0;
1691
1692 end = lblk + len - 1;
1693 WARN_ON(end < lblk);
1694
1695 read_lock(&ei->i_es_lock);
1696
1697 n = __es_delayed_clu(inode, lblk, end);
1698
1699 read_unlock(&ei->i_es_lock);
1700
1701 return n;
1702}
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
1720 ext4_lblk_t len)
1721{
1722 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1723 ext4_lblk_t end = lblk + len - 1;
1724 ext4_lblk_t first, last;
1725 bool f_del = false, l_del = false;
1726
1727 if (len == 0)
1728 return;
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743 if (EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) {
1744 first = EXT4_LBLK_CMASK(sbi, lblk);
1745 if (first != lblk)
1746 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
1747 first, lblk - 1);
1748 if (f_del) {
1749 __insert_pending(inode, first);
1750 } else {
1751 last = EXT4_LBLK_CMASK(sbi, end) +
1752 sbi->s_cluster_ratio - 1;
1753 if (last != end)
1754 l_del = __es_scan_range(inode,
1755 &ext4_es_is_delonly,
1756 end + 1, last);
1757 if (l_del)
1758 __insert_pending(inode, last);
1759 else
1760 __remove_pending(inode, last);
1761 }
1762 } else {
1763 first = EXT4_LBLK_CMASK(sbi, lblk);
1764 if (first != lblk)
1765 f_del = __es_scan_range(inode, &ext4_es_is_delonly,
1766 first, lblk - 1);
1767 if (f_del)
1768 __insert_pending(inode, first);
1769 else
1770 __remove_pending(inode, first);
1771
1772 last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
1773 if (last != end)
1774 l_del = __es_scan_range(inode, &ext4_es_is_delonly,
1775 end + 1, last);
1776 if (l_del)
1777 __insert_pending(inode, last);
1778 else
1779 __remove_pending(inode, last);
1780 }
1781}
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793void ext4_es_remove_blks(struct inode *inode, ext4_lblk_t lblk,
1794 ext4_lblk_t len)
1795{
1796 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1797 unsigned int clu_size, reserved = 0;
1798 ext4_lblk_t last_lclu, first, length, remainder, last;
1799 bool delonly;
1800 int err = 0;
1801 struct pending_reservation *pr;
1802 struct ext4_pending_tree *tree;
1803
1804
1805
1806
1807
1808
1809
1810 clu_size = sbi->s_cluster_ratio;
1811 last_lclu = EXT4_B2C(sbi, lblk + len - 1);
1812
1813 write_lock(&EXT4_I(inode)->i_es_lock);
1814
1815 for (first = lblk, remainder = len;
1816 remainder > 0;
1817 first += length, remainder -= length) {
1818
1819 if (EXT4_B2C(sbi, first) == last_lclu)
1820 length = remainder;
1821 else
1822 length = clu_size - EXT4_LBLK_COFF(sbi, first);
1823
1824
1825
1826
1827
1828
1829
1830
1831 delonly = __es_scan_clu(inode, &ext4_es_is_delonly, first);
1832
1833
1834
1835
1836
1837 last = first + length - 1;
1838 err = __es_remove_extent(inode, first, last);
1839 if (err)
1840 ext4_warning(inode->i_sb,
1841 "%s: couldn't remove page (err = %d)",
1842 __func__, err);
1843
1844
1845 if (sbi->s_cluster_ratio == 1 && delonly) {
1846 reserved++;
1847 continue;
1848 }
1849
1850
1851
1852
1853
1854
1855 if (delonly &&
1856 !__es_scan_clu(inode, &ext4_es_is_delonly, first)) {
1857 pr = __get_pending(inode, EXT4_B2C(sbi, first));
1858 if (pr != NULL) {
1859 tree = &EXT4_I(inode)->i_pending_tree;
1860 rb_erase(&pr->rb_node, &tree->root);
1861 kmem_cache_free(ext4_pending_cachep, pr);
1862 } else {
1863 reserved++;
1864 }
1865 }
1866 }
1867
1868 write_unlock(&EXT4_I(inode)->i_es_lock);
1869
1870 ext4_da_release_space(inode, reserved);
1871}
1872