1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/cleancache.h>
16#include "extent_io.h"
17#include "extent_map.h"
18#include "ctree.h"
19#include "btrfs_inode.h"
20#include "volumes.h"
21#include "check-integrity.h"
22#include "locking.h"
23#include "rcu-string.h"
24#include "backref.h"
25#include "disk-io.h"
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29static struct bio_set btrfs_bioset;
30
31static inline bool extent_state_in_tree(const struct extent_state *state)
32{
33 return !RB_EMPTY_NODE(&state->rb_node);
34}
35
36#ifdef CONFIG_BTRFS_DEBUG
37static LIST_HEAD(buffers);
38static LIST_HEAD(states);
39
40static DEFINE_SPINLOCK(leak_lock);
41
42static inline
43void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
44{
45 unsigned long flags;
46
47 spin_lock_irqsave(&leak_lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(&leak_lock, flags);
50}
51
52static inline
53void btrfs_leak_debug_del(struct list_head *entry)
54{
55 unsigned long flags;
56
57 spin_lock_irqsave(&leak_lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(&leak_lock, flags);
60}
61
62static inline
63void btrfs_leak_debug_check(void)
64{
65 struct extent_state *state;
66 struct extent_buffer *eb;
67
68 while (!list_empty(&states)) {
69 state = list_entry(states.next, struct extent_state, leak_list);
70 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
71 state->start, state->end, state->state,
72 extent_state_in_tree(state),
73 refcount_read(&state->refs));
74 list_del(&state->leak_list);
75 kmem_cache_free(extent_state_cache, state);
76 }
77
78 while (!list_empty(&buffers)) {
79 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
80 pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
81 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85}
86
87#define btrfs_debug_check_extent_io_range(tree, start, end) \
88 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
89static inline void __btrfs_debug_check_extent_io_range(const char *caller,
90 struct extent_io_tree *tree, u64 start, u64 end)
91{
92 struct inode *inode = tree->private_data;
93 u64 isize;
94
95 if (!inode || !is_data_inode(inode))
96 return;
97
98 isize = i_size_read(inode);
99 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
100 btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
101 "%s: ino %llu isize %llu odd range [%llu,%llu]",
102 caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
103 }
104}
105#else
106#define btrfs_leak_debug_add(new, head) do {} while (0)
107#define btrfs_leak_debug_del(entry) do {} while (0)
108#define btrfs_leak_debug_check() do {} while (0)
109#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
110#endif
111
112#define BUFFER_LRU_MAX 64
113
114struct tree_entry {
115 u64 start;
116 u64 end;
117 struct rb_node rb_node;
118};
119
120struct extent_page_data {
121 struct bio *bio;
122 struct extent_io_tree *tree;
123
124
125
126 unsigned int extent_locked:1;
127
128
129 unsigned int sync_io:1;
130};
131
132static int add_extent_changeset(struct extent_state *state, unsigned bits,
133 struct extent_changeset *changeset,
134 int set)
135{
136 int ret;
137
138 if (!changeset)
139 return 0;
140 if (set && (state->state & bits) == bits)
141 return 0;
142 if (!set && (state->state & bits) == 0)
143 return 0;
144 changeset->bytes_changed += state->end - state->start + 1;
145 ret = ulist_add(&changeset->range_changed, state->start, state->end,
146 GFP_ATOMIC);
147 return ret;
148}
149
150static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
151 unsigned long bio_flags)
152{
153 blk_status_t ret = 0;
154 struct bio_vec *bvec = bio_last_bvec_all(bio);
155 struct bio_vec bv;
156 struct extent_io_tree *tree = bio->bi_private;
157 u64 start;
158
159 mp_bvec_last_segment(bvec, &bv);
160 start = page_offset(bv.bv_page) + bv.bv_offset;
161
162 bio->bi_private = NULL;
163
164 if (tree->ops)
165 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
166 mirror_num, bio_flags, start);
167 else
168 btrfsic_submit_bio(bio);
169
170 return blk_status_to_errno(ret);
171}
172
173static void flush_write_bio(struct extent_page_data *epd)
174{
175 if (epd->bio) {
176 int ret;
177
178 ret = submit_one_bio(epd->bio, 0, 0);
179 BUG_ON(ret < 0);
180 epd->bio = NULL;
181 }
182}
183
184int __init extent_io_init(void)
185{
186 extent_state_cache = kmem_cache_create("btrfs_extent_state",
187 sizeof(struct extent_state), 0,
188 SLAB_MEM_SPREAD, NULL);
189 if (!extent_state_cache)
190 return -ENOMEM;
191
192 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
193 sizeof(struct extent_buffer), 0,
194 SLAB_MEM_SPREAD, NULL);
195 if (!extent_buffer_cache)
196 goto free_state_cache;
197
198 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
199 offsetof(struct btrfs_io_bio, bio),
200 BIOSET_NEED_BVECS))
201 goto free_buffer_cache;
202
203 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
204 goto free_bioset;
205
206 return 0;
207
208free_bioset:
209 bioset_exit(&btrfs_bioset);
210
211free_buffer_cache:
212 kmem_cache_destroy(extent_buffer_cache);
213 extent_buffer_cache = NULL;
214
215free_state_cache:
216 kmem_cache_destroy(extent_state_cache);
217 extent_state_cache = NULL;
218 return -ENOMEM;
219}
220
221void __cold extent_io_exit(void)
222{
223 btrfs_leak_debug_check();
224
225
226
227
228
229 rcu_barrier();
230 kmem_cache_destroy(extent_state_cache);
231 kmem_cache_destroy(extent_buffer_cache);
232 bioset_exit(&btrfs_bioset);
233}
234
235void extent_io_tree_init(struct extent_io_tree *tree,
236 void *private_data)
237{
238 tree->state = RB_ROOT;
239 tree->ops = NULL;
240 tree->dirty_bytes = 0;
241 spin_lock_init(&tree->lock);
242 tree->private_data = private_data;
243}
244
245static struct extent_state *alloc_extent_state(gfp_t mask)
246{
247 struct extent_state *state;
248
249
250
251
252
253 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
254 state = kmem_cache_alloc(extent_state_cache, mask);
255 if (!state)
256 return state;
257 state->state = 0;
258 state->failrec = NULL;
259 RB_CLEAR_NODE(&state->rb_node);
260 btrfs_leak_debug_add(&state->leak_list, &states);
261 refcount_set(&state->refs, 1);
262 init_waitqueue_head(&state->wq);
263 trace_alloc_extent_state(state, mask, _RET_IP_);
264 return state;
265}
266
267void free_extent_state(struct extent_state *state)
268{
269 if (!state)
270 return;
271 if (refcount_dec_and_test(&state->refs)) {
272 WARN_ON(extent_state_in_tree(state));
273 btrfs_leak_debug_del(&state->leak_list);
274 trace_free_extent_state(state, _RET_IP_);
275 kmem_cache_free(extent_state_cache, state);
276 }
277}
278
279static struct rb_node *tree_insert(struct rb_root *root,
280 struct rb_node *search_start,
281 u64 offset,
282 struct rb_node *node,
283 struct rb_node ***p_in,
284 struct rb_node **parent_in)
285{
286 struct rb_node **p;
287 struct rb_node *parent = NULL;
288 struct tree_entry *entry;
289
290 if (p_in && parent_in) {
291 p = *p_in;
292 parent = *parent_in;
293 goto do_insert;
294 }
295
296 p = search_start ? &search_start : &root->rb_node;
297 while (*p) {
298 parent = *p;
299 entry = rb_entry(parent, struct tree_entry, rb_node);
300
301 if (offset < entry->start)
302 p = &(*p)->rb_left;
303 else if (offset > entry->end)
304 p = &(*p)->rb_right;
305 else
306 return parent;
307 }
308
309do_insert:
310 rb_link_node(node, parent, p);
311 rb_insert_color(node, root);
312 return NULL;
313}
314
315static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
316 struct rb_node **next_ret,
317 struct rb_node **prev_ret,
318 struct rb_node ***p_ret,
319 struct rb_node **parent_ret)
320{
321 struct rb_root *root = &tree->state;
322 struct rb_node **n = &root->rb_node;
323 struct rb_node *prev = NULL;
324 struct rb_node *orig_prev = NULL;
325 struct tree_entry *entry;
326 struct tree_entry *prev_entry = NULL;
327
328 while (*n) {
329 prev = *n;
330 entry = rb_entry(prev, struct tree_entry, rb_node);
331 prev_entry = entry;
332
333 if (offset < entry->start)
334 n = &(*n)->rb_left;
335 else if (offset > entry->end)
336 n = &(*n)->rb_right;
337 else
338 return *n;
339 }
340
341 if (p_ret)
342 *p_ret = n;
343 if (parent_ret)
344 *parent_ret = prev;
345
346 if (next_ret) {
347 orig_prev = prev;
348 while (prev && offset > prev_entry->end) {
349 prev = rb_next(prev);
350 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
351 }
352 *next_ret = prev;
353 prev = orig_prev;
354 }
355
356 if (prev_ret) {
357 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
358 while (prev && offset < prev_entry->start) {
359 prev = rb_prev(prev);
360 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
361 }
362 *prev_ret = prev;
363 }
364 return NULL;
365}
366
367static inline struct rb_node *
368tree_search_for_insert(struct extent_io_tree *tree,
369 u64 offset,
370 struct rb_node ***p_ret,
371 struct rb_node **parent_ret)
372{
373 struct rb_node *next= NULL;
374 struct rb_node *ret;
375
376 ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
377 if (!ret)
378 return next;
379 return ret;
380}
381
382static inline struct rb_node *tree_search(struct extent_io_tree *tree,
383 u64 offset)
384{
385 return tree_search_for_insert(tree, offset, NULL, NULL);
386}
387
388
389
390
391
392
393
394
395
396
397static void merge_state(struct extent_io_tree *tree,
398 struct extent_state *state)
399{
400 struct extent_state *other;
401 struct rb_node *other_node;
402
403 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
404 return;
405
406 other_node = rb_prev(&state->rb_node);
407 if (other_node) {
408 other = rb_entry(other_node, struct extent_state, rb_node);
409 if (other->end == state->start - 1 &&
410 other->state == state->state) {
411 if (tree->private_data &&
412 is_data_inode(tree->private_data))
413 btrfs_merge_delalloc_extent(tree->private_data,
414 state, other);
415 state->start = other->start;
416 rb_erase(&other->rb_node, &tree->state);
417 RB_CLEAR_NODE(&other->rb_node);
418 free_extent_state(other);
419 }
420 }
421 other_node = rb_next(&state->rb_node);
422 if (other_node) {
423 other = rb_entry(other_node, struct extent_state, rb_node);
424 if (other->start == state->end + 1 &&
425 other->state == state->state) {
426 if (tree->private_data &&
427 is_data_inode(tree->private_data))
428 btrfs_merge_delalloc_extent(tree->private_data,
429 state, other);
430 state->end = other->end;
431 rb_erase(&other->rb_node, &tree->state);
432 RB_CLEAR_NODE(&other->rb_node);
433 free_extent_state(other);
434 }
435 }
436}
437
438static void set_state_bits(struct extent_io_tree *tree,
439 struct extent_state *state, unsigned *bits,
440 struct extent_changeset *changeset);
441
442
443
444
445
446
447
448
449
450
451
452static int insert_state(struct extent_io_tree *tree,
453 struct extent_state *state, u64 start, u64 end,
454 struct rb_node ***p,
455 struct rb_node **parent,
456 unsigned *bits, struct extent_changeset *changeset)
457{
458 struct rb_node *node;
459
460 if (end < start)
461 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
462 end, start);
463 state->start = start;
464 state->end = end;
465
466 set_state_bits(tree, state, bits, changeset);
467
468 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
469 if (node) {
470 struct extent_state *found;
471 found = rb_entry(node, struct extent_state, rb_node);
472 pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
473 found->start, found->end, start, end);
474 return -EEXIST;
475 }
476 merge_state(tree, state);
477 return 0;
478}
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
495 struct extent_state *prealloc, u64 split)
496{
497 struct rb_node *node;
498
499 if (tree->private_data && is_data_inode(tree->private_data))
500 btrfs_split_delalloc_extent(tree->private_data, orig, split);
501
502 prealloc->start = orig->start;
503 prealloc->end = split - 1;
504 prealloc->state = orig->state;
505 orig->start = split;
506
507 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
508 &prealloc->rb_node, NULL, NULL);
509 if (node) {
510 free_extent_state(prealloc);
511 return -EEXIST;
512 }
513 return 0;
514}
515
516static struct extent_state *next_state(struct extent_state *state)
517{
518 struct rb_node *next = rb_next(&state->rb_node);
519 if (next)
520 return rb_entry(next, struct extent_state, rb_node);
521 else
522 return NULL;
523}
524
525
526
527
528
529
530
531
532static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
533 struct extent_state *state,
534 unsigned *bits, int wake,
535 struct extent_changeset *changeset)
536{
537 struct extent_state *next;
538 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
539 int ret;
540
541 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
542 u64 range = state->end - state->start + 1;
543 WARN_ON(range > tree->dirty_bytes);
544 tree->dirty_bytes -= range;
545 }
546
547 if (tree->private_data && is_data_inode(tree->private_data))
548 btrfs_clear_delalloc_extent(tree->private_data, state, bits);
549
550 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
551 BUG_ON(ret < 0);
552 state->state &= ~bits_to_clear;
553 if (wake)
554 wake_up(&state->wq);
555 if (state->state == 0) {
556 next = next_state(state);
557 if (extent_state_in_tree(state)) {
558 rb_erase(&state->rb_node, &tree->state);
559 RB_CLEAR_NODE(&state->rb_node);
560 free_extent_state(state);
561 } else {
562 WARN_ON(1);
563 }
564 } else {
565 merge_state(tree, state);
566 next = next_state(state);
567 }
568 return next;
569}
570
571static struct extent_state *
572alloc_extent_state_atomic(struct extent_state *prealloc)
573{
574 if (!prealloc)
575 prealloc = alloc_extent_state(GFP_ATOMIC);
576
577 return prealloc;
578}
579
580static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
581{
582 struct inode *inode = tree->private_data;
583
584 btrfs_panic(btrfs_sb(inode->i_sb), err,
585 "locking error: extent tree was modified by another thread while locked");
586}
587
588
589
590
591
592
593
594
595
596
597
598
599
600int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
601 unsigned bits, int wake, int delete,
602 struct extent_state **cached_state,
603 gfp_t mask, struct extent_changeset *changeset)
604{
605 struct extent_state *state;
606 struct extent_state *cached;
607 struct extent_state *prealloc = NULL;
608 struct rb_node *node;
609 u64 last_end;
610 int err;
611 int clear = 0;
612
613 btrfs_debug_check_extent_io_range(tree, start, end);
614
615 if (bits & EXTENT_DELALLOC)
616 bits |= EXTENT_NORESERVE;
617
618 if (delete)
619 bits |= ~EXTENT_CTLBITS;
620
621 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
622 clear = 1;
623again:
624 if (!prealloc && gfpflags_allow_blocking(mask)) {
625
626
627
628
629
630
631
632 prealloc = alloc_extent_state(mask);
633 }
634
635 spin_lock(&tree->lock);
636 if (cached_state) {
637 cached = *cached_state;
638
639 if (clear) {
640 *cached_state = NULL;
641 cached_state = NULL;
642 }
643
644 if (cached && extent_state_in_tree(cached) &&
645 cached->start <= start && cached->end > start) {
646 if (clear)
647 refcount_dec(&cached->refs);
648 state = cached;
649 goto hit_next;
650 }
651 if (clear)
652 free_extent_state(cached);
653 }
654
655
656
657
658 node = tree_search(tree, start);
659 if (!node)
660 goto out;
661 state = rb_entry(node, struct extent_state, rb_node);
662hit_next:
663 if (state->start > end)
664 goto out;
665 WARN_ON(state->end < start);
666 last_end = state->end;
667
668
669 if (!(state->state & bits)) {
670 state = next_state(state);
671 goto next;
672 }
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690 if (state->start < start) {
691 prealloc = alloc_extent_state_atomic(prealloc);
692 BUG_ON(!prealloc);
693 err = split_state(tree, state, prealloc, start);
694 if (err)
695 extent_io_tree_panic(tree, err);
696
697 prealloc = NULL;
698 if (err)
699 goto out;
700 if (state->end <= end) {
701 state = clear_state_bit(tree, state, &bits, wake,
702 changeset);
703 goto next;
704 }
705 goto search_again;
706 }
707
708
709
710
711
712
713 if (state->start <= end && state->end > end) {
714 prealloc = alloc_extent_state_atomic(prealloc);
715 BUG_ON(!prealloc);
716 err = split_state(tree, state, prealloc, end + 1);
717 if (err)
718 extent_io_tree_panic(tree, err);
719
720 if (wake)
721 wake_up(&state->wq);
722
723 clear_state_bit(tree, prealloc, &bits, wake, changeset);
724
725 prealloc = NULL;
726 goto out;
727 }
728
729 state = clear_state_bit(tree, state, &bits, wake, changeset);
730next:
731 if (last_end == (u64)-1)
732 goto out;
733 start = last_end + 1;
734 if (start <= end && state && !need_resched())
735 goto hit_next;
736
737search_again:
738 if (start > end)
739 goto out;
740 spin_unlock(&tree->lock);
741 if (gfpflags_allow_blocking(mask))
742 cond_resched();
743 goto again;
744
745out:
746 spin_unlock(&tree->lock);
747 if (prealloc)
748 free_extent_state(prealloc);
749
750 return 0;
751
752}
753
754static void wait_on_state(struct extent_io_tree *tree,
755 struct extent_state *state)
756 __releases(tree->lock)
757 __acquires(tree->lock)
758{
759 DEFINE_WAIT(wait);
760 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
761 spin_unlock(&tree->lock);
762 schedule();
763 spin_lock(&tree->lock);
764 finish_wait(&state->wq, &wait);
765}
766
767
768
769
770
771
772static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
773 unsigned long bits)
774{
775 struct extent_state *state;
776 struct rb_node *node;
777
778 btrfs_debug_check_extent_io_range(tree, start, end);
779
780 spin_lock(&tree->lock);
781again:
782 while (1) {
783
784
785
786
787 node = tree_search(tree, start);
788process_node:
789 if (!node)
790 break;
791
792 state = rb_entry(node, struct extent_state, rb_node);
793
794 if (state->start > end)
795 goto out;
796
797 if (state->state & bits) {
798 start = state->start;
799 refcount_inc(&state->refs);
800 wait_on_state(tree, state);
801 free_extent_state(state);
802 goto again;
803 }
804 start = state->end + 1;
805
806 if (start > end)
807 break;
808
809 if (!cond_resched_lock(&tree->lock)) {
810 node = rb_next(node);
811 goto process_node;
812 }
813 }
814out:
815 spin_unlock(&tree->lock);
816}
817
818static void set_state_bits(struct extent_io_tree *tree,
819 struct extent_state *state,
820 unsigned *bits, struct extent_changeset *changeset)
821{
822 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
823 int ret;
824
825 if (tree->private_data && is_data_inode(tree->private_data))
826 btrfs_set_delalloc_extent(tree->private_data, state, bits);
827
828 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
829 u64 range = state->end - state->start + 1;
830 tree->dirty_bytes += range;
831 }
832 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
833 BUG_ON(ret < 0);
834 state->state |= bits_to_set;
835}
836
837static void cache_state_if_flags(struct extent_state *state,
838 struct extent_state **cached_ptr,
839 unsigned flags)
840{
841 if (cached_ptr && !(*cached_ptr)) {
842 if (!flags || (state->state & flags)) {
843 *cached_ptr = state;
844 refcount_inc(&state->refs);
845 }
846 }
847}
848
849static void cache_state(struct extent_state *state,
850 struct extent_state **cached_ptr)
851{
852 return cache_state_if_flags(state, cached_ptr,
853 EXTENT_IOBITS | EXTENT_BOUNDARY);
854}
855
856
857
858
859
860
861
862
863
864
865
866
867static int __must_check
868__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
869 unsigned bits, unsigned exclusive_bits,
870 u64 *failed_start, struct extent_state **cached_state,
871 gfp_t mask, struct extent_changeset *changeset)
872{
873 struct extent_state *state;
874 struct extent_state *prealloc = NULL;
875 struct rb_node *node;
876 struct rb_node **p;
877 struct rb_node *parent;
878 int err = 0;
879 u64 last_start;
880 u64 last_end;
881
882 btrfs_debug_check_extent_io_range(tree, start, end);
883
884again:
885 if (!prealloc && gfpflags_allow_blocking(mask)) {
886
887
888
889
890
891
892
893 prealloc = alloc_extent_state(mask);
894 }
895
896 spin_lock(&tree->lock);
897 if (cached_state && *cached_state) {
898 state = *cached_state;
899 if (state->start <= start && state->end > start &&
900 extent_state_in_tree(state)) {
901 node = &state->rb_node;
902 goto hit_next;
903 }
904 }
905
906
907
908
909 node = tree_search_for_insert(tree, start, &p, &parent);
910 if (!node) {
911 prealloc = alloc_extent_state_atomic(prealloc);
912 BUG_ON(!prealloc);
913 err = insert_state(tree, prealloc, start, end,
914 &p, &parent, &bits, changeset);
915 if (err)
916 extent_io_tree_panic(tree, err);
917
918 cache_state(prealloc, cached_state);
919 prealloc = NULL;
920 goto out;
921 }
922 state = rb_entry(node, struct extent_state, rb_node);
923hit_next:
924 last_start = state->start;
925 last_end = state->end;
926
927
928
929
930
931
932
933 if (state->start == start && state->end <= end) {
934 if (state->state & exclusive_bits) {
935 *failed_start = state->start;
936 err = -EEXIST;
937 goto out;
938 }
939
940 set_state_bits(tree, state, &bits, changeset);
941 cache_state(state, cached_state);
942 merge_state(tree, state);
943 if (last_end == (u64)-1)
944 goto out;
945 start = last_end + 1;
946 state = next_state(state);
947 if (start < end && state && state->start == start &&
948 !need_resched())
949 goto hit_next;
950 goto search_again;
951 }
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969 if (state->start < start) {
970 if (state->state & exclusive_bits) {
971 *failed_start = start;
972 err = -EEXIST;
973 goto out;
974 }
975
976 prealloc = alloc_extent_state_atomic(prealloc);
977 BUG_ON(!prealloc);
978 err = split_state(tree, state, prealloc, start);
979 if (err)
980 extent_io_tree_panic(tree, err);
981
982 prealloc = NULL;
983 if (err)
984 goto out;
985 if (state->end <= end) {
986 set_state_bits(tree, state, &bits, changeset);
987 cache_state(state, cached_state);
988 merge_state(tree, state);
989 if (last_end == (u64)-1)
990 goto out;
991 start = last_end + 1;
992 state = next_state(state);
993 if (start < end && state && state->start == start &&
994 !need_resched())
995 goto hit_next;
996 }
997 goto search_again;
998 }
999
1000
1001
1002
1003
1004
1005
1006 if (state->start > start) {
1007 u64 this_end;
1008 if (end < last_start)
1009 this_end = end;
1010 else
1011 this_end = last_start - 1;
1012
1013 prealloc = alloc_extent_state_atomic(prealloc);
1014 BUG_ON(!prealloc);
1015
1016
1017
1018
1019
1020 err = insert_state(tree, prealloc, start, this_end,
1021 NULL, NULL, &bits, changeset);
1022 if (err)
1023 extent_io_tree_panic(tree, err);
1024
1025 cache_state(prealloc, cached_state);
1026 prealloc = NULL;
1027 start = this_end + 1;
1028 goto search_again;
1029 }
1030
1031
1032
1033
1034
1035
1036 if (state->start <= end && state->end > end) {
1037 if (state->state & exclusive_bits) {
1038 *failed_start = start;
1039 err = -EEXIST;
1040 goto out;
1041 }
1042
1043 prealloc = alloc_extent_state_atomic(prealloc);
1044 BUG_ON(!prealloc);
1045 err = split_state(tree, state, prealloc, end + 1);
1046 if (err)
1047 extent_io_tree_panic(tree, err);
1048
1049 set_state_bits(tree, prealloc, &bits, changeset);
1050 cache_state(prealloc, cached_state);
1051 merge_state(tree, prealloc);
1052 prealloc = NULL;
1053 goto out;
1054 }
1055
1056search_again:
1057 if (start > end)
1058 goto out;
1059 spin_unlock(&tree->lock);
1060 if (gfpflags_allow_blocking(mask))
1061 cond_resched();
1062 goto again;
1063
1064out:
1065 spin_unlock(&tree->lock);
1066 if (prealloc)
1067 free_extent_state(prealloc);
1068
1069 return err;
1070
1071}
1072
1073int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1074 unsigned bits, u64 * failed_start,
1075 struct extent_state **cached_state, gfp_t mask)
1076{
1077 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1078 cached_state, mask, NULL);
1079}
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1101 unsigned bits, unsigned clear_bits,
1102 struct extent_state **cached_state)
1103{
1104 struct extent_state *state;
1105 struct extent_state *prealloc = NULL;
1106 struct rb_node *node;
1107 struct rb_node **p;
1108 struct rb_node *parent;
1109 int err = 0;
1110 u64 last_start;
1111 u64 last_end;
1112 bool first_iteration = true;
1113
1114 btrfs_debug_check_extent_io_range(tree, start, end);
1115
1116again:
1117 if (!prealloc) {
1118
1119
1120
1121
1122
1123
1124
1125 prealloc = alloc_extent_state(GFP_NOFS);
1126 if (!prealloc && !first_iteration)
1127 return -ENOMEM;
1128 }
1129
1130 spin_lock(&tree->lock);
1131 if (cached_state && *cached_state) {
1132 state = *cached_state;
1133 if (state->start <= start && state->end > start &&
1134 extent_state_in_tree(state)) {
1135 node = &state->rb_node;
1136 goto hit_next;
1137 }
1138 }
1139
1140
1141
1142
1143
1144 node = tree_search_for_insert(tree, start, &p, &parent);
1145 if (!node) {
1146 prealloc = alloc_extent_state_atomic(prealloc);
1147 if (!prealloc) {
1148 err = -ENOMEM;
1149 goto out;
1150 }
1151 err = insert_state(tree, prealloc, start, end,
1152 &p, &parent, &bits, NULL);
1153 if (err)
1154 extent_io_tree_panic(tree, err);
1155 cache_state(prealloc, cached_state);
1156 prealloc = NULL;
1157 goto out;
1158 }
1159 state = rb_entry(node, struct extent_state, rb_node);
1160hit_next:
1161 last_start = state->start;
1162 last_end = state->end;
1163
1164
1165
1166
1167
1168
1169
1170 if (state->start == start && state->end <= end) {
1171 set_state_bits(tree, state, &bits, NULL);
1172 cache_state(state, cached_state);
1173 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1174 if (last_end == (u64)-1)
1175 goto out;
1176 start = last_end + 1;
1177 if (start < end && state && state->start == start &&
1178 !need_resched())
1179 goto hit_next;
1180 goto search_again;
1181 }
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199 if (state->start < start) {
1200 prealloc = alloc_extent_state_atomic(prealloc);
1201 if (!prealloc) {
1202 err = -ENOMEM;
1203 goto out;
1204 }
1205 err = split_state(tree, state, prealloc, start);
1206 if (err)
1207 extent_io_tree_panic(tree, err);
1208 prealloc = NULL;
1209 if (err)
1210 goto out;
1211 if (state->end <= end) {
1212 set_state_bits(tree, state, &bits, NULL);
1213 cache_state(state, cached_state);
1214 state = clear_state_bit(tree, state, &clear_bits, 0,
1215 NULL);
1216 if (last_end == (u64)-1)
1217 goto out;
1218 start = last_end + 1;
1219 if (start < end && state && state->start == start &&
1220 !need_resched())
1221 goto hit_next;
1222 }
1223 goto search_again;
1224 }
1225
1226
1227
1228
1229
1230
1231
1232 if (state->start > start) {
1233 u64 this_end;
1234 if (end < last_start)
1235 this_end = end;
1236 else
1237 this_end = last_start - 1;
1238
1239 prealloc = alloc_extent_state_atomic(prealloc);
1240 if (!prealloc) {
1241 err = -ENOMEM;
1242 goto out;
1243 }
1244
1245
1246
1247
1248
1249 err = insert_state(tree, prealloc, start, this_end,
1250 NULL, NULL, &bits, NULL);
1251 if (err)
1252 extent_io_tree_panic(tree, err);
1253 cache_state(prealloc, cached_state);
1254 prealloc = NULL;
1255 start = this_end + 1;
1256 goto search_again;
1257 }
1258
1259
1260
1261
1262
1263
1264 if (state->start <= end && state->end > end) {
1265 prealloc = alloc_extent_state_atomic(prealloc);
1266 if (!prealloc) {
1267 err = -ENOMEM;
1268 goto out;
1269 }
1270
1271 err = split_state(tree, state, prealloc, end + 1);
1272 if (err)
1273 extent_io_tree_panic(tree, err);
1274
1275 set_state_bits(tree, prealloc, &bits, NULL);
1276 cache_state(prealloc, cached_state);
1277 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1278 prealloc = NULL;
1279 goto out;
1280 }
1281
1282search_again:
1283 if (start > end)
1284 goto out;
1285 spin_unlock(&tree->lock);
1286 cond_resched();
1287 first_iteration = false;
1288 goto again;
1289
1290out:
1291 spin_unlock(&tree->lock);
1292 if (prealloc)
1293 free_extent_state(prealloc);
1294
1295 return err;
1296}
1297
1298
1299int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1300 unsigned bits, struct extent_changeset *changeset)
1301{
1302
1303
1304
1305
1306
1307
1308 BUG_ON(bits & EXTENT_LOCKED);
1309
1310 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1311 changeset);
1312}
1313
1314int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1315 unsigned bits, int wake, int delete,
1316 struct extent_state **cached)
1317{
1318 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1319 cached, GFP_NOFS, NULL);
1320}
1321
1322int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1323 unsigned bits, struct extent_changeset *changeset)
1324{
1325
1326
1327
1328
1329 BUG_ON(bits & EXTENT_LOCKED);
1330
1331 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1332 changeset);
1333}
1334
1335
1336
1337
1338
1339int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1340 struct extent_state **cached_state)
1341{
1342 int err;
1343 u64 failed_start;
1344
1345 while (1) {
1346 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1347 EXTENT_LOCKED, &failed_start,
1348 cached_state, GFP_NOFS, NULL);
1349 if (err == -EEXIST) {
1350 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1351 start = failed_start;
1352 } else
1353 break;
1354 WARN_ON(start > end);
1355 }
1356 return err;
1357}
1358
1359int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1360{
1361 int err;
1362 u64 failed_start;
1363
1364 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1365 &failed_start, NULL, GFP_NOFS, NULL);
1366 if (err == -EEXIST) {
1367 if (failed_start > start)
1368 clear_extent_bit(tree, start, failed_start - 1,
1369 EXTENT_LOCKED, 1, 0, NULL);
1370 return 0;
1371 }
1372 return 1;
1373}
1374
1375void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1376{
1377 unsigned long index = start >> PAGE_SHIFT;
1378 unsigned long end_index = end >> PAGE_SHIFT;
1379 struct page *page;
1380
1381 while (index <= end_index) {
1382 page = find_get_page(inode->i_mapping, index);
1383 BUG_ON(!page);
1384 clear_page_dirty_for_io(page);
1385 put_page(page);
1386 index++;
1387 }
1388}
1389
1390void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1391{
1392 unsigned long index = start >> PAGE_SHIFT;
1393 unsigned long end_index = end >> PAGE_SHIFT;
1394 struct page *page;
1395
1396 while (index <= end_index) {
1397 page = find_get_page(inode->i_mapping, index);
1398 BUG_ON(!page);
1399 __set_page_dirty_nobuffers(page);
1400 account_page_redirty(page);
1401 put_page(page);
1402 index++;
1403 }
1404}
1405
1406
1407
1408
1409
1410static struct extent_state *
1411find_first_extent_bit_state(struct extent_io_tree *tree,
1412 u64 start, unsigned bits)
1413{
1414 struct rb_node *node;
1415 struct extent_state *state;
1416
1417
1418
1419
1420
1421 node = tree_search(tree, start);
1422 if (!node)
1423 goto out;
1424
1425 while (1) {
1426 state = rb_entry(node, struct extent_state, rb_node);
1427 if (state->end >= start && (state->state & bits))
1428 return state;
1429
1430 node = rb_next(node);
1431 if (!node)
1432 break;
1433 }
1434out:
1435 return NULL;
1436}
1437
1438
1439
1440
1441
1442
1443
1444
1445int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1446 u64 *start_ret, u64 *end_ret, unsigned bits,
1447 struct extent_state **cached_state)
1448{
1449 struct extent_state *state;
1450 int ret = 1;
1451
1452 spin_lock(&tree->lock);
1453 if (cached_state && *cached_state) {
1454 state = *cached_state;
1455 if (state->end == start - 1 && extent_state_in_tree(state)) {
1456 while ((state = next_state(state)) != NULL) {
1457 if (state->state & bits)
1458 goto got_it;
1459 }
1460 free_extent_state(*cached_state);
1461 *cached_state = NULL;
1462 goto out;
1463 }
1464 free_extent_state(*cached_state);
1465 *cached_state = NULL;
1466 }
1467
1468 state = find_first_extent_bit_state(tree, start, bits);
1469got_it:
1470 if (state) {
1471 cache_state_if_flags(state, cached_state, 0);
1472 *start_ret = state->start;
1473 *end_ret = state->end;
1474 ret = 0;
1475 }
1476out:
1477 spin_unlock(&tree->lock);
1478 return ret;
1479}
1480
1481
1482
1483
1484
1485
1486
1487static noinline bool find_delalloc_range(struct extent_io_tree *tree,
1488 u64 *start, u64 *end, u64 max_bytes,
1489 struct extent_state **cached_state)
1490{
1491 struct rb_node *node;
1492 struct extent_state *state;
1493 u64 cur_start = *start;
1494 bool found = false;
1495 u64 total_bytes = 0;
1496
1497 spin_lock(&tree->lock);
1498
1499
1500
1501
1502
1503 node = tree_search(tree, cur_start);
1504 if (!node) {
1505 *end = (u64)-1;
1506 goto out;
1507 }
1508
1509 while (1) {
1510 state = rb_entry(node, struct extent_state, rb_node);
1511 if (found && (state->start != cur_start ||
1512 (state->state & EXTENT_BOUNDARY))) {
1513 goto out;
1514 }
1515 if (!(state->state & EXTENT_DELALLOC)) {
1516 if (!found)
1517 *end = state->end;
1518 goto out;
1519 }
1520 if (!found) {
1521 *start = state->start;
1522 *cached_state = state;
1523 refcount_inc(&state->refs);
1524 }
1525 found = true;
1526 *end = state->end;
1527 cur_start = state->end + 1;
1528 node = rb_next(node);
1529 total_bytes += state->end - state->start + 1;
1530 if (total_bytes >= max_bytes)
1531 break;
1532 if (!node)
1533 break;
1534 }
1535out:
1536 spin_unlock(&tree->lock);
1537 return found;
1538}
1539
1540static int __process_pages_contig(struct address_space *mapping,
1541 struct page *locked_page,
1542 pgoff_t start_index, pgoff_t end_index,
1543 unsigned long page_ops, pgoff_t *index_ret);
1544
1545static noinline void __unlock_for_delalloc(struct inode *inode,
1546 struct page *locked_page,
1547 u64 start, u64 end)
1548{
1549 unsigned long index = start >> PAGE_SHIFT;
1550 unsigned long end_index = end >> PAGE_SHIFT;
1551
1552 ASSERT(locked_page);
1553 if (index == locked_page->index && end_index == index)
1554 return;
1555
1556 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1557 PAGE_UNLOCK, NULL);
1558}
1559
1560static noinline int lock_delalloc_pages(struct inode *inode,
1561 struct page *locked_page,
1562 u64 delalloc_start,
1563 u64 delalloc_end)
1564{
1565 unsigned long index = delalloc_start >> PAGE_SHIFT;
1566 unsigned long index_ret = index;
1567 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1568 int ret;
1569
1570 ASSERT(locked_page);
1571 if (index == locked_page->index && index == end_index)
1572 return 0;
1573
1574 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1575 end_index, PAGE_LOCK, &index_ret);
1576 if (ret == -EAGAIN)
1577 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1578 (u64)index_ret << PAGE_SHIFT);
1579 return ret;
1580}
1581
1582
1583
1584
1585
1586
1587
1588
1589EXPORT_FOR_TESTS
1590noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
1591 struct extent_io_tree *tree,
1592 struct page *locked_page, u64 *start,
1593 u64 *end)
1594{
1595 u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
1596 u64 delalloc_start;
1597 u64 delalloc_end;
1598 bool found;
1599 struct extent_state *cached_state = NULL;
1600 int ret;
1601 int loops = 0;
1602
1603again:
1604
1605 delalloc_start = *start;
1606 delalloc_end = 0;
1607 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1608 max_bytes, &cached_state);
1609 if (!found || delalloc_end <= *start) {
1610 *start = delalloc_start;
1611 *end = delalloc_end;
1612 free_extent_state(cached_state);
1613 return false;
1614 }
1615
1616
1617
1618
1619
1620
1621 if (delalloc_start < *start)
1622 delalloc_start = *start;
1623
1624
1625
1626
1627 if (delalloc_end + 1 - delalloc_start > max_bytes)
1628 delalloc_end = delalloc_start + max_bytes - 1;
1629
1630
1631 ret = lock_delalloc_pages(inode, locked_page,
1632 delalloc_start, delalloc_end);
1633 ASSERT(!ret || ret == -EAGAIN);
1634 if (ret == -EAGAIN) {
1635
1636
1637
1638 free_extent_state(cached_state);
1639 cached_state = NULL;
1640 if (!loops) {
1641 max_bytes = PAGE_SIZE;
1642 loops = 1;
1643 goto again;
1644 } else {
1645 found = false;
1646 goto out_failed;
1647 }
1648 }
1649
1650
1651 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1652
1653
1654 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1655 EXTENT_DELALLOC, 1, cached_state);
1656 if (!ret) {
1657 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1658 &cached_state);
1659 __unlock_for_delalloc(inode, locked_page,
1660 delalloc_start, delalloc_end);
1661 cond_resched();
1662 goto again;
1663 }
1664 free_extent_state(cached_state);
1665 *start = delalloc_start;
1666 *end = delalloc_end;
1667out_failed:
1668 return found;
1669}
1670
1671static int __process_pages_contig(struct address_space *mapping,
1672 struct page *locked_page,
1673 pgoff_t start_index, pgoff_t end_index,
1674 unsigned long page_ops, pgoff_t *index_ret)
1675{
1676 unsigned long nr_pages = end_index - start_index + 1;
1677 unsigned long pages_locked = 0;
1678 pgoff_t index = start_index;
1679 struct page *pages[16];
1680 unsigned ret;
1681 int err = 0;
1682 int i;
1683
1684 if (page_ops & PAGE_LOCK) {
1685 ASSERT(page_ops == PAGE_LOCK);
1686 ASSERT(index_ret && *index_ret == start_index);
1687 }
1688
1689 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1690 mapping_set_error(mapping, -EIO);
1691
1692 while (nr_pages > 0) {
1693 ret = find_get_pages_contig(mapping, index,
1694 min_t(unsigned long,
1695 nr_pages, ARRAY_SIZE(pages)), pages);
1696 if (ret == 0) {
1697
1698
1699
1700
1701 ASSERT(page_ops & PAGE_LOCK);
1702 err = -EAGAIN;
1703 goto out;
1704 }
1705
1706 for (i = 0; i < ret; i++) {
1707 if (page_ops & PAGE_SET_PRIVATE2)
1708 SetPagePrivate2(pages[i]);
1709
1710 if (pages[i] == locked_page) {
1711 put_page(pages[i]);
1712 pages_locked++;
1713 continue;
1714 }
1715 if (page_ops & PAGE_CLEAR_DIRTY)
1716 clear_page_dirty_for_io(pages[i]);
1717 if (page_ops & PAGE_SET_WRITEBACK)
1718 set_page_writeback(pages[i]);
1719 if (page_ops & PAGE_SET_ERROR)
1720 SetPageError(pages[i]);
1721 if (page_ops & PAGE_END_WRITEBACK)
1722 end_page_writeback(pages[i]);
1723 if (page_ops & PAGE_UNLOCK)
1724 unlock_page(pages[i]);
1725 if (page_ops & PAGE_LOCK) {
1726 lock_page(pages[i]);
1727 if (!PageDirty(pages[i]) ||
1728 pages[i]->mapping != mapping) {
1729 unlock_page(pages[i]);
1730 put_page(pages[i]);
1731 err = -EAGAIN;
1732 goto out;
1733 }
1734 }
1735 put_page(pages[i]);
1736 pages_locked++;
1737 }
1738 nr_pages -= ret;
1739 index += ret;
1740 cond_resched();
1741 }
1742out:
1743 if (err && index_ret)
1744 *index_ret = start_index + pages_locked - 1;
1745 return err;
1746}
1747
1748void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1749 u64 delalloc_end, struct page *locked_page,
1750 unsigned clear_bits,
1751 unsigned long page_ops)
1752{
1753 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1754 NULL);
1755
1756 __process_pages_contig(inode->i_mapping, locked_page,
1757 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1758 page_ops, NULL);
1759}
1760
1761
1762
1763
1764
1765
1766u64 count_range_bits(struct extent_io_tree *tree,
1767 u64 *start, u64 search_end, u64 max_bytes,
1768 unsigned bits, int contig)
1769{
1770 struct rb_node *node;
1771 struct extent_state *state;
1772 u64 cur_start = *start;
1773 u64 total_bytes = 0;
1774 u64 last = 0;
1775 int found = 0;
1776
1777 if (WARN_ON(search_end <= cur_start))
1778 return 0;
1779
1780 spin_lock(&tree->lock);
1781 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1782 total_bytes = tree->dirty_bytes;
1783 goto out;
1784 }
1785
1786
1787
1788
1789 node = tree_search(tree, cur_start);
1790 if (!node)
1791 goto out;
1792
1793 while (1) {
1794 state = rb_entry(node, struct extent_state, rb_node);
1795 if (state->start > search_end)
1796 break;
1797 if (contig && found && state->start > last + 1)
1798 break;
1799 if (state->end >= cur_start && (state->state & bits) == bits) {
1800 total_bytes += min(search_end, state->end) + 1 -
1801 max(cur_start, state->start);
1802 if (total_bytes >= max_bytes)
1803 break;
1804 if (!found) {
1805 *start = max(cur_start, state->start);
1806 found = 1;
1807 }
1808 last = state->end;
1809 } else if (contig && found) {
1810 break;
1811 }
1812 node = rb_next(node);
1813 if (!node)
1814 break;
1815 }
1816out:
1817 spin_unlock(&tree->lock);
1818 return total_bytes;
1819}
1820
1821
1822
1823
1824
1825static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1826 struct io_failure_record *failrec)
1827{
1828 struct rb_node *node;
1829 struct extent_state *state;
1830 int ret = 0;
1831
1832 spin_lock(&tree->lock);
1833
1834
1835
1836
1837 node = tree_search(tree, start);
1838 if (!node) {
1839 ret = -ENOENT;
1840 goto out;
1841 }
1842 state = rb_entry(node, struct extent_state, rb_node);
1843 if (state->start != start) {
1844 ret = -ENOENT;
1845 goto out;
1846 }
1847 state->failrec = failrec;
1848out:
1849 spin_unlock(&tree->lock);
1850 return ret;
1851}
1852
1853static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1854 struct io_failure_record **failrec)
1855{
1856 struct rb_node *node;
1857 struct extent_state *state;
1858 int ret = 0;
1859
1860 spin_lock(&tree->lock);
1861
1862
1863
1864
1865 node = tree_search(tree, start);
1866 if (!node) {
1867 ret = -ENOENT;
1868 goto out;
1869 }
1870 state = rb_entry(node, struct extent_state, rb_node);
1871 if (state->start != start) {
1872 ret = -ENOENT;
1873 goto out;
1874 }
1875 *failrec = state->failrec;
1876out:
1877 spin_unlock(&tree->lock);
1878 return ret;
1879}
1880
1881
1882
1883
1884
1885
1886
1887int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1888 unsigned bits, int filled, struct extent_state *cached)
1889{
1890 struct extent_state *state = NULL;
1891 struct rb_node *node;
1892 int bitset = 0;
1893
1894 spin_lock(&tree->lock);
1895 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1896 cached->end > start)
1897 node = &cached->rb_node;
1898 else
1899 node = tree_search(tree, start);
1900 while (node && start <= end) {
1901 state = rb_entry(node, struct extent_state, rb_node);
1902
1903 if (filled && state->start > start) {
1904 bitset = 0;
1905 break;
1906 }
1907
1908 if (state->start > end)
1909 break;
1910
1911 if (state->state & bits) {
1912 bitset = 1;
1913 if (!filled)
1914 break;
1915 } else if (filled) {
1916 bitset = 0;
1917 break;
1918 }
1919
1920 if (state->end == (u64)-1)
1921 break;
1922
1923 start = state->end + 1;
1924 if (start > end)
1925 break;
1926 node = rb_next(node);
1927 if (!node) {
1928 if (filled)
1929 bitset = 0;
1930 break;
1931 }
1932 }
1933 spin_unlock(&tree->lock);
1934 return bitset;
1935}
1936
1937
1938
1939
1940
1941static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1942{
1943 u64 start = page_offset(page);
1944 u64 end = start + PAGE_SIZE - 1;
1945 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1946 SetPageUptodate(page);
1947}
1948
1949int free_io_failure(struct extent_io_tree *failure_tree,
1950 struct extent_io_tree *io_tree,
1951 struct io_failure_record *rec)
1952{
1953 int ret;
1954 int err = 0;
1955
1956 set_state_failrec(failure_tree, rec->start, NULL);
1957 ret = clear_extent_bits(failure_tree, rec->start,
1958 rec->start + rec->len - 1,
1959 EXTENT_LOCKED | EXTENT_DIRTY);
1960 if (ret)
1961 err = ret;
1962
1963 ret = clear_extent_bits(io_tree, rec->start,
1964 rec->start + rec->len - 1,
1965 EXTENT_DAMAGED);
1966 if (ret && !err)
1967 err = ret;
1968
1969 kfree(rec);
1970 return err;
1971}
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
1984 u64 length, u64 logical, struct page *page,
1985 unsigned int pg_offset, int mirror_num)
1986{
1987 struct bio *bio;
1988 struct btrfs_device *dev;
1989 u64 map_length = 0;
1990 u64 sector;
1991 struct btrfs_bio *bbio = NULL;
1992 int ret;
1993
1994 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
1995 BUG_ON(!mirror_num);
1996
1997 bio = btrfs_io_bio_alloc(1);
1998 bio->bi_iter.bi_size = 0;
1999 map_length = length;
2000
2001
2002
2003
2004
2005
2006 btrfs_bio_counter_inc_blocked(fs_info);
2007 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
2008
2009
2010
2011
2012
2013
2014 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
2015 &map_length, &bbio, 0);
2016 if (ret) {
2017 btrfs_bio_counter_dec(fs_info);
2018 bio_put(bio);
2019 return -EIO;
2020 }
2021 ASSERT(bbio->mirror_num == 1);
2022 } else {
2023 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2024 &map_length, &bbio, mirror_num);
2025 if (ret) {
2026 btrfs_bio_counter_dec(fs_info);
2027 bio_put(bio);
2028 return -EIO;
2029 }
2030 BUG_ON(mirror_num != bbio->mirror_num);
2031 }
2032
2033 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2034 bio->bi_iter.bi_sector = sector;
2035 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2036 btrfs_put_bbio(bbio);
2037 if (!dev || !dev->bdev ||
2038 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2039 btrfs_bio_counter_dec(fs_info);
2040 bio_put(bio);
2041 return -EIO;
2042 }
2043 bio_set_dev(bio, dev->bdev);
2044 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2045 bio_add_page(bio, page, length, pg_offset);
2046
2047 if (btrfsic_submit_bio_wait(bio)) {
2048
2049 btrfs_bio_counter_dec(fs_info);
2050 bio_put(bio);
2051 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2052 return -EIO;
2053 }
2054
2055 btrfs_info_rl_in_rcu(fs_info,
2056 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2057 ino, start,
2058 rcu_str_deref(dev->name), sector);
2059 btrfs_bio_counter_dec(fs_info);
2060 bio_put(bio);
2061 return 0;
2062}
2063
2064int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
2065 struct extent_buffer *eb, int mirror_num)
2066{
2067 u64 start = eb->start;
2068 int i, num_pages = num_extent_pages(eb);
2069 int ret = 0;
2070
2071 if (sb_rdonly(fs_info->sb))
2072 return -EROFS;
2073
2074 for (i = 0; i < num_pages; i++) {
2075 struct page *p = eb->pages[i];
2076
2077 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2078 start - page_offset(p), mirror_num);
2079 if (ret)
2080 break;
2081 start += PAGE_SIZE;
2082 }
2083
2084 return ret;
2085}
2086
2087
2088
2089
2090
2091int clean_io_failure(struct btrfs_fs_info *fs_info,
2092 struct extent_io_tree *failure_tree,
2093 struct extent_io_tree *io_tree, u64 start,
2094 struct page *page, u64 ino, unsigned int pg_offset)
2095{
2096 u64 private;
2097 struct io_failure_record *failrec;
2098 struct extent_state *state;
2099 int num_copies;
2100 int ret;
2101
2102 private = 0;
2103 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2104 EXTENT_DIRTY, 0);
2105 if (!ret)
2106 return 0;
2107
2108 ret = get_state_failrec(failure_tree, start, &failrec);
2109 if (ret)
2110 return 0;
2111
2112 BUG_ON(!failrec->this_mirror);
2113
2114 if (failrec->in_validation) {
2115
2116 btrfs_debug(fs_info,
2117 "clean_io_failure: freeing dummy error at %llu",
2118 failrec->start);
2119 goto out;
2120 }
2121 if (sb_rdonly(fs_info->sb))
2122 goto out;
2123
2124 spin_lock(&io_tree->lock);
2125 state = find_first_extent_bit_state(io_tree,
2126 failrec->start,
2127 EXTENT_LOCKED);
2128 spin_unlock(&io_tree->lock);
2129
2130 if (state && state->start <= failrec->start &&
2131 state->end >= failrec->start + failrec->len - 1) {
2132 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2133 failrec->len);
2134 if (num_copies > 1) {
2135 repair_io_failure(fs_info, ino, start, failrec->len,
2136 failrec->logical, page, pg_offset,
2137 failrec->failed_mirror);
2138 }
2139 }
2140
2141out:
2142 free_io_failure(failure_tree, io_tree, failrec);
2143
2144 return 0;
2145}
2146
2147
2148
2149
2150
2151
2152
2153void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2154{
2155 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2156 struct io_failure_record *failrec;
2157 struct extent_state *state, *next;
2158
2159 if (RB_EMPTY_ROOT(&failure_tree->state))
2160 return;
2161
2162 spin_lock(&failure_tree->lock);
2163 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2164 while (state) {
2165 if (state->start > end)
2166 break;
2167
2168 ASSERT(state->end <= end);
2169
2170 next = next_state(state);
2171
2172 failrec = state->failrec;
2173 free_extent_state(state);
2174 kfree(failrec);
2175
2176 state = next;
2177 }
2178 spin_unlock(&failure_tree->lock);
2179}
2180
2181int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2182 struct io_failure_record **failrec_ret)
2183{
2184 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2185 struct io_failure_record *failrec;
2186 struct extent_map *em;
2187 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2188 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2189 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2190 int ret;
2191 u64 logical;
2192
2193 ret = get_state_failrec(failure_tree, start, &failrec);
2194 if (ret) {
2195 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2196 if (!failrec)
2197 return -ENOMEM;
2198
2199 failrec->start = start;
2200 failrec->len = end - start + 1;
2201 failrec->this_mirror = 0;
2202 failrec->bio_flags = 0;
2203 failrec->in_validation = 0;
2204
2205 read_lock(&em_tree->lock);
2206 em = lookup_extent_mapping(em_tree, start, failrec->len);
2207 if (!em) {
2208 read_unlock(&em_tree->lock);
2209 kfree(failrec);
2210 return -EIO;
2211 }
2212
2213 if (em->start > start || em->start + em->len <= start) {
2214 free_extent_map(em);
2215 em = NULL;
2216 }
2217 read_unlock(&em_tree->lock);
2218 if (!em) {
2219 kfree(failrec);
2220 return -EIO;
2221 }
2222
2223 logical = start - em->start;
2224 logical = em->block_start + logical;
2225 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2226 logical = em->block_start;
2227 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2228 extent_set_compress_type(&failrec->bio_flags,
2229 em->compress_type);
2230 }
2231
2232 btrfs_debug(fs_info,
2233 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2234 logical, start, failrec->len);
2235
2236 failrec->logical = logical;
2237 free_extent_map(em);
2238
2239
2240 ret = set_extent_bits(failure_tree, start, end,
2241 EXTENT_LOCKED | EXTENT_DIRTY);
2242 if (ret >= 0)
2243 ret = set_state_failrec(failure_tree, start, failrec);
2244
2245 if (ret >= 0)
2246 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2247 if (ret < 0) {
2248 kfree(failrec);
2249 return ret;
2250 }
2251 } else {
2252 btrfs_debug(fs_info,
2253 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2254 failrec->logical, failrec->start, failrec->len,
2255 failrec->in_validation);
2256
2257
2258
2259
2260
2261 }
2262
2263 *failrec_ret = failrec;
2264
2265 return 0;
2266}
2267
2268bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
2269 struct io_failure_record *failrec, int failed_mirror)
2270{
2271 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2272 int num_copies;
2273
2274 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2275 if (num_copies == 1) {
2276
2277
2278
2279
2280
2281 btrfs_debug(fs_info,
2282 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2283 num_copies, failrec->this_mirror, failed_mirror);
2284 return false;
2285 }
2286
2287
2288
2289
2290
2291
2292 if (failed_bio_pages > 1) {
2293
2294
2295
2296
2297
2298
2299
2300
2301 BUG_ON(failrec->in_validation);
2302 failrec->in_validation = 1;
2303 failrec->this_mirror = failed_mirror;
2304 } else {
2305
2306
2307
2308
2309
2310 if (failrec->in_validation) {
2311 BUG_ON(failrec->this_mirror != failed_mirror);
2312 failrec->in_validation = 0;
2313 failrec->this_mirror = 0;
2314 }
2315 failrec->failed_mirror = failed_mirror;
2316 failrec->this_mirror++;
2317 if (failrec->this_mirror == failed_mirror)
2318 failrec->this_mirror++;
2319 }
2320
2321 if (failrec->this_mirror > num_copies) {
2322 btrfs_debug(fs_info,
2323 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2324 num_copies, failrec->this_mirror, failed_mirror);
2325 return false;
2326 }
2327
2328 return true;
2329}
2330
2331
2332struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2333 struct io_failure_record *failrec,
2334 struct page *page, int pg_offset, int icsum,
2335 bio_end_io_t *endio_func, void *data)
2336{
2337 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2338 struct bio *bio;
2339 struct btrfs_io_bio *btrfs_failed_bio;
2340 struct btrfs_io_bio *btrfs_bio;
2341
2342 bio = btrfs_io_bio_alloc(1);
2343 bio->bi_end_io = endio_func;
2344 bio->bi_iter.bi_sector = failrec->logical >> 9;
2345 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
2346 bio->bi_iter.bi_size = 0;
2347 bio->bi_private = data;
2348
2349 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2350 if (btrfs_failed_bio->csum) {
2351 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2352
2353 btrfs_bio = btrfs_io_bio(bio);
2354 btrfs_bio->csum = btrfs_bio->csum_inline;
2355 icsum *= csum_size;
2356 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2357 csum_size);
2358 }
2359
2360 bio_add_page(bio, page, failrec->len, pg_offset);
2361
2362 return bio;
2363}
2364
2365
2366
2367
2368
2369
2370
2371static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2372 struct page *page, u64 start, u64 end,
2373 int failed_mirror)
2374{
2375 struct io_failure_record *failrec;
2376 struct inode *inode = page->mapping->host;
2377 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2378 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2379 struct bio *bio;
2380 int read_mode = 0;
2381 blk_status_t status;
2382 int ret;
2383 unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
2384
2385 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2386
2387 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2388 if (ret)
2389 return ret;
2390
2391 if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
2392 failed_mirror)) {
2393 free_io_failure(failure_tree, tree, failrec);
2394 return -EIO;
2395 }
2396
2397 if (failed_bio_pages > 1)
2398 read_mode |= REQ_FAILFAST_DEV;
2399
2400 phy_offset >>= inode->i_sb->s_blocksize_bits;
2401 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2402 start - page_offset(page),
2403 (int)phy_offset, failed_bio->bi_end_io,
2404 NULL);
2405 bio->bi_opf = REQ_OP_READ | read_mode;
2406
2407 btrfs_debug(btrfs_sb(inode->i_sb),
2408 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2409 read_mode, failrec->this_mirror, failrec->in_validation);
2410
2411 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2412 failrec->bio_flags, 0);
2413 if (status) {
2414 free_io_failure(failure_tree, tree, failrec);
2415 bio_put(bio);
2416 ret = blk_status_to_errno(status);
2417 }
2418
2419 return ret;
2420}
2421
2422
2423
2424void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2425{
2426 int uptodate = (err == 0);
2427 int ret = 0;
2428
2429 btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
2430
2431 if (!uptodate) {
2432 ClearPageUptodate(page);
2433 SetPageError(page);
2434 ret = err < 0 ? err : -EIO;
2435 mapping_set_error(page->mapping, ret);
2436 }
2437}
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448static void end_bio_extent_writepage(struct bio *bio)
2449{
2450 int error = blk_status_to_errno(bio->bi_status);
2451 struct bio_vec *bvec;
2452 u64 start;
2453 u64 end;
2454 int i;
2455 struct bvec_iter_all iter_all;
2456
2457 ASSERT(!bio_flagged(bio, BIO_CLONED));
2458 bio_for_each_segment_all(bvec, bio, i, iter_all) {
2459 struct page *page = bvec->bv_page;
2460 struct inode *inode = page->mapping->host;
2461 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2462
2463
2464
2465
2466
2467
2468 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2469 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2470 btrfs_err(fs_info,
2471 "partial page write in btrfs with offset %u and length %u",
2472 bvec->bv_offset, bvec->bv_len);
2473 else
2474 btrfs_info(fs_info,
2475 "incomplete page write in btrfs with offset %u and length %u",
2476 bvec->bv_offset, bvec->bv_len);
2477 }
2478
2479 start = page_offset(page);
2480 end = start + bvec->bv_offset + bvec->bv_len - 1;
2481
2482 end_extent_writepage(page, error, start, end);
2483 end_page_writeback(page);
2484 }
2485
2486 bio_put(bio);
2487}
2488
2489static void
2490endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2491 int uptodate)
2492{
2493 struct extent_state *cached = NULL;
2494 u64 end = start + len - 1;
2495
2496 if (uptodate && tree->track_uptodate)
2497 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2498 unlock_extent_cached_atomic(tree, start, end, &cached);
2499}
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512static void end_bio_extent_readpage(struct bio *bio)
2513{
2514 struct bio_vec *bvec;
2515 int uptodate = !bio->bi_status;
2516 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2517 struct extent_io_tree *tree, *failure_tree;
2518 u64 offset = 0;
2519 u64 start;
2520 u64 end;
2521 u64 len;
2522 u64 extent_start = 0;
2523 u64 extent_len = 0;
2524 int mirror;
2525 int ret;
2526 int i;
2527 struct bvec_iter_all iter_all;
2528
2529 ASSERT(!bio_flagged(bio, BIO_CLONED));
2530 bio_for_each_segment_all(bvec, bio, i, iter_all) {
2531 struct page *page = bvec->bv_page;
2532 struct inode *inode = page->mapping->host;
2533 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2534 bool data_inode = btrfs_ino(BTRFS_I(inode))
2535 != BTRFS_BTREE_INODE_OBJECTID;
2536
2537 btrfs_debug(fs_info,
2538 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2539 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2540 io_bio->mirror_num);
2541 tree = &BTRFS_I(inode)->io_tree;
2542 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2543
2544
2545
2546
2547
2548
2549 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2550 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2551 btrfs_err(fs_info,
2552 "partial page read in btrfs with offset %u and length %u",
2553 bvec->bv_offset, bvec->bv_len);
2554 else
2555 btrfs_info(fs_info,
2556 "incomplete page read in btrfs with offset %u and length %u",
2557 bvec->bv_offset, bvec->bv_len);
2558 }
2559
2560 start = page_offset(page);
2561 end = start + bvec->bv_offset + bvec->bv_len - 1;
2562 len = bvec->bv_len;
2563
2564 mirror = io_bio->mirror_num;
2565 if (likely(uptodate)) {
2566 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2567 page, start, end,
2568 mirror);
2569 if (ret)
2570 uptodate = 0;
2571 else
2572 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2573 failure_tree, tree, start,
2574 page,
2575 btrfs_ino(BTRFS_I(inode)), 0);
2576 }
2577
2578 if (likely(uptodate))
2579 goto readpage_ok;
2580
2581 if (data_inode) {
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593 ret = bio_readpage_error(bio, offset, page, start, end,
2594 mirror);
2595 if (ret == 0) {
2596 uptodate = !bio->bi_status;
2597 offset += len;
2598 continue;
2599 }
2600 } else {
2601 struct extent_buffer *eb;
2602
2603 eb = (struct extent_buffer *)page->private;
2604 set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
2605 eb->read_mirror = mirror;
2606 atomic_dec(&eb->io_pages);
2607 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD,
2608 &eb->bflags))
2609 btree_readahead_hook(eb, -EIO);
2610
2611 ret = -EIO;
2612 }
2613readpage_ok:
2614 if (likely(uptodate)) {
2615 loff_t i_size = i_size_read(inode);
2616 pgoff_t end_index = i_size >> PAGE_SHIFT;
2617 unsigned off;
2618
2619
2620 off = offset_in_page(i_size);
2621 if (page->index == end_index && off)
2622 zero_user_segment(page, off, PAGE_SIZE);
2623 SetPageUptodate(page);
2624 } else {
2625 ClearPageUptodate(page);
2626 SetPageError(page);
2627 }
2628 unlock_page(page);
2629 offset += len;
2630
2631 if (unlikely(!uptodate)) {
2632 if (extent_len) {
2633 endio_readpage_release_extent(tree,
2634 extent_start,
2635 extent_len, 1);
2636 extent_start = 0;
2637 extent_len = 0;
2638 }
2639 endio_readpage_release_extent(tree, start,
2640 end - start + 1, 0);
2641 } else if (!extent_len) {
2642 extent_start = start;
2643 extent_len = end + 1 - start;
2644 } else if (extent_start + extent_len == start) {
2645 extent_len += end + 1 - start;
2646 } else {
2647 endio_readpage_release_extent(tree, extent_start,
2648 extent_len, uptodate);
2649 extent_start = start;
2650 extent_len = end + 1 - start;
2651 }
2652 }
2653
2654 if (extent_len)
2655 endio_readpage_release_extent(tree, extent_start, extent_len,
2656 uptodate);
2657 btrfs_io_bio_free_csum(io_bio);
2658 bio_put(bio);
2659}
2660
2661
2662
2663
2664
2665
2666static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2667{
2668 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2669}
2670
2671
2672
2673
2674
2675
2676struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
2677{
2678 struct bio *bio;
2679
2680 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
2681 bio_set_dev(bio, bdev);
2682 bio->bi_iter.bi_sector = first_byte >> 9;
2683 btrfs_io_bio_init(btrfs_io_bio(bio));
2684 return bio;
2685}
2686
2687struct bio *btrfs_bio_clone(struct bio *bio)
2688{
2689 struct btrfs_io_bio *btrfs_bio;
2690 struct bio *new;
2691
2692
2693 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
2694 btrfs_bio = btrfs_io_bio(new);
2695 btrfs_io_bio_init(btrfs_bio);
2696 btrfs_bio->iter = bio->bi_iter;
2697 return new;
2698}
2699
2700struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2701{
2702 struct bio *bio;
2703
2704
2705 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
2706 btrfs_io_bio_init(btrfs_io_bio(bio));
2707 return bio;
2708}
2709
2710struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2711{
2712 struct bio *bio;
2713 struct btrfs_io_bio *btrfs_bio;
2714
2715
2716 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
2717 ASSERT(bio);
2718
2719 btrfs_bio = btrfs_io_bio(bio);
2720 btrfs_io_bio_init(btrfs_bio);
2721
2722 bio_trim(bio, offset >> 9, size >> 9);
2723 btrfs_bio->iter = bio->bi_iter;
2724 return bio;
2725}
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2744 struct writeback_control *wbc,
2745 struct page *page, u64 offset,
2746 size_t size, unsigned long pg_offset,
2747 struct block_device *bdev,
2748 struct bio **bio_ret,
2749 bio_end_io_t end_io_func,
2750 int mirror_num,
2751 unsigned long prev_bio_flags,
2752 unsigned long bio_flags,
2753 bool force_bio_submit)
2754{
2755 int ret = 0;
2756 struct bio *bio;
2757 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2758 sector_t sector = offset >> 9;
2759
2760 ASSERT(bio_ret);
2761
2762 if (*bio_ret) {
2763 bool contig;
2764 bool can_merge = true;
2765
2766 bio = *bio_ret;
2767 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
2768 contig = bio->bi_iter.bi_sector == sector;
2769 else
2770 contig = bio_end_sector(bio) == sector;
2771
2772 ASSERT(tree->ops);
2773 if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags))
2774 can_merge = false;
2775
2776 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
2777 force_bio_submit ||
2778 bio_add_page(bio, page, page_size, pg_offset) < page_size) {
2779 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2780 if (ret < 0) {
2781 *bio_ret = NULL;
2782 return ret;
2783 }
2784 bio = NULL;
2785 } else {
2786 if (wbc)
2787 wbc_account_io(wbc, page, page_size);
2788 return 0;
2789 }
2790 }
2791
2792 bio = btrfs_bio_alloc(bdev, offset);
2793 bio_add_page(bio, page, page_size, pg_offset);
2794 bio->bi_end_io = end_io_func;
2795 bio->bi_private = tree;
2796 bio->bi_write_hint = page->mapping->host->i_write_hint;
2797 bio->bi_opf = opf;
2798 if (wbc) {
2799 wbc_init_bio(wbc, bio);
2800 wbc_account_io(wbc, page, page_size);
2801 }
2802
2803 *bio_ret = bio;
2804
2805 return ret;
2806}
2807
2808static void attach_extent_buffer_page(struct extent_buffer *eb,
2809 struct page *page)
2810{
2811 if (!PagePrivate(page)) {
2812 SetPagePrivate(page);
2813 get_page(page);
2814 set_page_private(page, (unsigned long)eb);
2815 } else {
2816 WARN_ON(page->private != (unsigned long)eb);
2817 }
2818}
2819
2820void set_page_extent_mapped(struct page *page)
2821{
2822 if (!PagePrivate(page)) {
2823 SetPagePrivate(page);
2824 get_page(page);
2825 set_page_private(page, EXTENT_PAGE_PRIVATE);
2826 }
2827}
2828
2829static struct extent_map *
2830__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2831 u64 start, u64 len, get_extent_t *get_extent,
2832 struct extent_map **em_cached)
2833{
2834 struct extent_map *em;
2835
2836 if (em_cached && *em_cached) {
2837 em = *em_cached;
2838 if (extent_map_in_tree(em) && start >= em->start &&
2839 start < extent_map_end(em)) {
2840 refcount_inc(&em->refs);
2841 return em;
2842 }
2843
2844 free_extent_map(em);
2845 *em_cached = NULL;
2846 }
2847
2848 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
2849 if (em_cached && !IS_ERR_OR_NULL(em)) {
2850 BUG_ON(*em_cached);
2851 refcount_inc(&em->refs);
2852 *em_cached = em;
2853 }
2854 return em;
2855}
2856
2857
2858
2859
2860
2861
2862
2863static int __do_readpage(struct extent_io_tree *tree,
2864 struct page *page,
2865 get_extent_t *get_extent,
2866 struct extent_map **em_cached,
2867 struct bio **bio, int mirror_num,
2868 unsigned long *bio_flags, unsigned int read_flags,
2869 u64 *prev_em_start)
2870{
2871 struct inode *inode = page->mapping->host;
2872 u64 start = page_offset(page);
2873 const u64 end = start + PAGE_SIZE - 1;
2874 u64 cur = start;
2875 u64 extent_offset;
2876 u64 last_byte = i_size_read(inode);
2877 u64 block_start;
2878 u64 cur_end;
2879 struct extent_map *em;
2880 struct block_device *bdev;
2881 int ret = 0;
2882 int nr = 0;
2883 size_t pg_offset = 0;
2884 size_t iosize;
2885 size_t disk_io_size;
2886 size_t blocksize = inode->i_sb->s_blocksize;
2887 unsigned long this_bio_flag = 0;
2888
2889 set_page_extent_mapped(page);
2890
2891 if (!PageUptodate(page)) {
2892 if (cleancache_get_page(page) == 0) {
2893 BUG_ON(blocksize != PAGE_SIZE);
2894 unlock_extent(tree, start, end);
2895 goto out;
2896 }
2897 }
2898
2899 if (page->index == last_byte >> PAGE_SHIFT) {
2900 char *userpage;
2901 size_t zero_offset = offset_in_page(last_byte);
2902
2903 if (zero_offset) {
2904 iosize = PAGE_SIZE - zero_offset;
2905 userpage = kmap_atomic(page);
2906 memset(userpage + zero_offset, 0, iosize);
2907 flush_dcache_page(page);
2908 kunmap_atomic(userpage);
2909 }
2910 }
2911 while (cur <= end) {
2912 bool force_bio_submit = false;
2913 u64 offset;
2914
2915 if (cur >= last_byte) {
2916 char *userpage;
2917 struct extent_state *cached = NULL;
2918
2919 iosize = PAGE_SIZE - pg_offset;
2920 userpage = kmap_atomic(page);
2921 memset(userpage + pg_offset, 0, iosize);
2922 flush_dcache_page(page);
2923 kunmap_atomic(userpage);
2924 set_extent_uptodate(tree, cur, cur + iosize - 1,
2925 &cached, GFP_NOFS);
2926 unlock_extent_cached(tree, cur,
2927 cur + iosize - 1, &cached);
2928 break;
2929 }
2930 em = __get_extent_map(inode, page, pg_offset, cur,
2931 end - cur + 1, get_extent, em_cached);
2932 if (IS_ERR_OR_NULL(em)) {
2933 SetPageError(page);
2934 unlock_extent(tree, cur, end);
2935 break;
2936 }
2937 extent_offset = cur - em->start;
2938 BUG_ON(extent_map_end(em) <= cur);
2939 BUG_ON(end < cur);
2940
2941 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2942 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2943 extent_set_compress_type(&this_bio_flag,
2944 em->compress_type);
2945 }
2946
2947 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2948 cur_end = min(extent_map_end(em) - 1, end);
2949 iosize = ALIGN(iosize, blocksize);
2950 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2951 disk_io_size = em->block_len;
2952 offset = em->block_start;
2953 } else {
2954 offset = em->block_start + extent_offset;
2955 disk_io_size = iosize;
2956 }
2957 bdev = em->bdev;
2958 block_start = em->block_start;
2959 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2960 block_start = EXTENT_MAP_HOLE;
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
2997 prev_em_start && *prev_em_start != (u64)-1 &&
2998 *prev_em_start != em->start)
2999 force_bio_submit = true;
3000
3001 if (prev_em_start)
3002 *prev_em_start = em->start;
3003
3004 free_extent_map(em);
3005 em = NULL;
3006
3007
3008 if (block_start == EXTENT_MAP_HOLE) {
3009 char *userpage;
3010 struct extent_state *cached = NULL;
3011
3012 userpage = kmap_atomic(page);
3013 memset(userpage + pg_offset, 0, iosize);
3014 flush_dcache_page(page);
3015 kunmap_atomic(userpage);
3016
3017 set_extent_uptodate(tree, cur, cur + iosize - 1,
3018 &cached, GFP_NOFS);
3019 unlock_extent_cached(tree, cur,
3020 cur + iosize - 1, &cached);
3021 cur = cur + iosize;
3022 pg_offset += iosize;
3023 continue;
3024 }
3025
3026 if (test_range_bit(tree, cur, cur_end,
3027 EXTENT_UPTODATE, 1, NULL)) {
3028 check_page_uptodate(tree, page);
3029 unlock_extent(tree, cur, cur + iosize - 1);
3030 cur = cur + iosize;
3031 pg_offset += iosize;
3032 continue;
3033 }
3034
3035
3036
3037 if (block_start == EXTENT_MAP_INLINE) {
3038 SetPageError(page);
3039 unlock_extent(tree, cur, cur + iosize - 1);
3040 cur = cur + iosize;
3041 pg_offset += iosize;
3042 continue;
3043 }
3044
3045 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
3046 page, offset, disk_io_size,
3047 pg_offset, bdev, bio,
3048 end_bio_extent_readpage, mirror_num,
3049 *bio_flags,
3050 this_bio_flag,
3051 force_bio_submit);
3052 if (!ret) {
3053 nr++;
3054 *bio_flags = this_bio_flag;
3055 } else {
3056 SetPageError(page);
3057 unlock_extent(tree, cur, cur + iosize - 1);
3058 goto out;
3059 }
3060 cur = cur + iosize;
3061 pg_offset += iosize;
3062 }
3063out:
3064 if (!nr) {
3065 if (!PageError(page))
3066 SetPageUptodate(page);
3067 unlock_page(page);
3068 }
3069 return ret;
3070}
3071
3072static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3073 struct page *pages[], int nr_pages,
3074 u64 start, u64 end,
3075 struct extent_map **em_cached,
3076 struct bio **bio,
3077 unsigned long *bio_flags,
3078 u64 *prev_em_start)
3079{
3080 struct inode *inode;
3081 struct btrfs_ordered_extent *ordered;
3082 int index;
3083
3084 inode = pages[0]->mapping->host;
3085 while (1) {
3086 lock_extent(tree, start, end);
3087 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3088 end - start + 1);
3089 if (!ordered)
3090 break;
3091 unlock_extent(tree, start, end);
3092 btrfs_start_ordered_extent(inode, ordered, 1);
3093 btrfs_put_ordered_extent(ordered);
3094 }
3095
3096 for (index = 0; index < nr_pages; index++) {
3097 __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
3098 bio, 0, bio_flags, REQ_RAHEAD, prev_em_start);
3099 put_page(pages[index]);
3100 }
3101}
3102
3103static void __extent_readpages(struct extent_io_tree *tree,
3104 struct page *pages[],
3105 int nr_pages,
3106 struct extent_map **em_cached,
3107 struct bio **bio, unsigned long *bio_flags,
3108 u64 *prev_em_start)
3109{
3110 u64 start = 0;
3111 u64 end = 0;
3112 u64 page_start;
3113 int index;
3114 int first_index = 0;
3115
3116 for (index = 0; index < nr_pages; index++) {
3117 page_start = page_offset(pages[index]);
3118 if (!end) {
3119 start = page_start;
3120 end = start + PAGE_SIZE - 1;
3121 first_index = index;
3122 } else if (end + 1 == page_start) {
3123 end += PAGE_SIZE;
3124 } else {
3125 __do_contiguous_readpages(tree, &pages[first_index],
3126 index - first_index, start,
3127 end, em_cached,
3128 bio, bio_flags,
3129 prev_em_start);
3130 start = page_start;
3131 end = start + PAGE_SIZE - 1;
3132 first_index = index;
3133 }
3134 }
3135
3136 if (end)
3137 __do_contiguous_readpages(tree, &pages[first_index],
3138 index - first_index, start,
3139 end, em_cached, bio,
3140 bio_flags, prev_em_start);
3141}
3142
3143static int __extent_read_full_page(struct extent_io_tree *tree,
3144 struct page *page,
3145 get_extent_t *get_extent,
3146 struct bio **bio, int mirror_num,
3147 unsigned long *bio_flags,
3148 unsigned int read_flags)
3149{
3150 struct inode *inode = page->mapping->host;
3151 struct btrfs_ordered_extent *ordered;
3152 u64 start = page_offset(page);
3153 u64 end = start + PAGE_SIZE - 1;
3154 int ret;
3155
3156 while (1) {
3157 lock_extent(tree, start, end);
3158 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3159 PAGE_SIZE);
3160 if (!ordered)
3161 break;
3162 unlock_extent(tree, start, end);
3163 btrfs_start_ordered_extent(inode, ordered, 1);
3164 btrfs_put_ordered_extent(ordered);
3165 }
3166
3167 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3168 bio_flags, read_flags, NULL);
3169 return ret;
3170}
3171
3172int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3173 get_extent_t *get_extent, int mirror_num)
3174{
3175 struct bio *bio = NULL;
3176 unsigned long bio_flags = 0;
3177 int ret;
3178
3179 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3180 &bio_flags, 0);
3181 if (bio)
3182 ret = submit_one_bio(bio, mirror_num, bio_flags);
3183 return ret;
3184}
3185
3186static void update_nr_written(struct writeback_control *wbc,
3187 unsigned long nr_written)
3188{
3189 wbc->nr_to_write -= nr_written;
3190}
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202static noinline_for_stack int writepage_delalloc(struct inode *inode,
3203 struct page *page, struct writeback_control *wbc,
3204 u64 delalloc_start, unsigned long *nr_written)
3205{
3206 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
3207 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3208 bool found;
3209 u64 delalloc_to_write = 0;
3210 u64 delalloc_end = 0;
3211 int ret;
3212 int page_started = 0;
3213
3214
3215 while (delalloc_end < page_end) {
3216 found = find_lock_delalloc_range(inode, tree,
3217 page,
3218 &delalloc_start,
3219 &delalloc_end);
3220 if (!found) {
3221 delalloc_start = delalloc_end + 1;
3222 continue;
3223 }
3224 ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
3225 delalloc_end, &page_started, nr_written, wbc);
3226
3227 if (ret) {
3228 SetPageError(page);
3229
3230
3231
3232
3233
3234
3235 ret = ret < 0 ? ret : -EIO;
3236 goto done;
3237 }
3238
3239
3240
3241
3242 delalloc_to_write += (delalloc_end - delalloc_start +
3243 PAGE_SIZE) >> PAGE_SHIFT;
3244 delalloc_start = delalloc_end + 1;
3245 }
3246 if (wbc->nr_to_write < delalloc_to_write) {
3247 int thresh = 8192;
3248
3249 if (delalloc_to_write < thresh * 2)
3250 thresh = delalloc_to_write;
3251 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3252 thresh);
3253 }
3254
3255
3256
3257
3258 if (page_started) {
3259
3260
3261
3262
3263
3264 wbc->nr_to_write -= *nr_written;
3265 return 1;
3266 }
3267
3268 ret = 0;
3269
3270done:
3271 return ret;
3272}
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3283 struct page *page,
3284 struct writeback_control *wbc,
3285 struct extent_page_data *epd,
3286 loff_t i_size,
3287 unsigned long nr_written,
3288 unsigned int write_flags, int *nr_ret)
3289{
3290 struct extent_io_tree *tree = epd->tree;
3291 u64 start = page_offset(page);
3292 u64 page_end = start + PAGE_SIZE - 1;
3293 u64 end;
3294 u64 cur = start;
3295 u64 extent_offset;
3296 u64 block_start;
3297 u64 iosize;
3298 struct extent_map *em;
3299 struct block_device *bdev;
3300 size_t pg_offset = 0;
3301 size_t blocksize;
3302 int ret = 0;
3303 int nr = 0;
3304 bool compressed;
3305
3306 ret = btrfs_writepage_cow_fixup(page, start, page_end);
3307 if (ret) {
3308
3309 if (ret == -EBUSY)
3310 wbc->pages_skipped++;
3311 else
3312 redirty_page_for_writepage(wbc, page);
3313
3314 update_nr_written(wbc, nr_written);
3315 unlock_page(page);
3316 return 1;
3317 }
3318
3319
3320
3321
3322
3323 update_nr_written(wbc, nr_written + 1);
3324
3325 end = page_end;
3326 if (i_size <= start) {
3327 btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
3328 goto done;
3329 }
3330
3331 blocksize = inode->i_sb->s_blocksize;
3332
3333 while (cur <= end) {
3334 u64 em_end;
3335 u64 offset;
3336
3337 if (cur >= i_size) {
3338 btrfs_writepage_endio_finish_ordered(page, cur,
3339 page_end, 1);
3340 break;
3341 }
3342 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
3343 end - cur + 1, 1);
3344 if (IS_ERR_OR_NULL(em)) {
3345 SetPageError(page);
3346 ret = PTR_ERR_OR_ZERO(em);
3347 break;
3348 }
3349
3350 extent_offset = cur - em->start;
3351 em_end = extent_map_end(em);
3352 BUG_ON(em_end <= cur);
3353 BUG_ON(end < cur);
3354 iosize = min(em_end - cur, end - cur + 1);
3355 iosize = ALIGN(iosize, blocksize);
3356 offset = em->block_start + extent_offset;
3357 bdev = em->bdev;
3358 block_start = em->block_start;
3359 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3360 free_extent_map(em);
3361 em = NULL;
3362
3363
3364
3365
3366
3367 if (compressed || block_start == EXTENT_MAP_HOLE ||
3368 block_start == EXTENT_MAP_INLINE) {
3369
3370
3371
3372
3373 if (!compressed)
3374 btrfs_writepage_endio_finish_ordered(page, cur,
3375 cur + iosize - 1,
3376 1);
3377 else if (compressed) {
3378
3379
3380
3381
3382 nr++;
3383 }
3384
3385 cur += iosize;
3386 pg_offset += iosize;
3387 continue;
3388 }
3389
3390 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
3391 if (!PageWriteback(page)) {
3392 btrfs_err(BTRFS_I(inode)->root->fs_info,
3393 "page %lu not writeback, cur %llu end %llu",
3394 page->index, cur, end);
3395 }
3396
3397 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3398 page, offset, iosize, pg_offset,
3399 bdev, &epd->bio,
3400 end_bio_extent_writepage,
3401 0, 0, 0, false);
3402 if (ret) {
3403 SetPageError(page);
3404 if (PageWriteback(page))
3405 end_page_writeback(page);
3406 }
3407
3408 cur = cur + iosize;
3409 pg_offset += iosize;
3410 nr++;
3411 }
3412done:
3413 *nr_ret = nr;
3414 return ret;
3415}
3416
3417
3418
3419
3420
3421
3422
3423static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3424 struct extent_page_data *epd)
3425{
3426 struct inode *inode = page->mapping->host;
3427 u64 start = page_offset(page);
3428 u64 page_end = start + PAGE_SIZE - 1;
3429 int ret;
3430 int nr = 0;
3431 size_t pg_offset = 0;
3432 loff_t i_size = i_size_read(inode);
3433 unsigned long end_index = i_size >> PAGE_SHIFT;
3434 unsigned int write_flags = 0;
3435 unsigned long nr_written = 0;
3436
3437 write_flags = wbc_to_write_flags(wbc);
3438
3439 trace___extent_writepage(page, inode, wbc);
3440
3441 WARN_ON(!PageLocked(page));
3442
3443 ClearPageError(page);
3444
3445 pg_offset = offset_in_page(i_size);
3446 if (page->index > end_index ||
3447 (page->index == end_index && !pg_offset)) {
3448 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3449 unlock_page(page);
3450 return 0;
3451 }
3452
3453 if (page->index == end_index) {
3454 char *userpage;
3455
3456 userpage = kmap_atomic(page);
3457 memset(userpage + pg_offset, 0,
3458 PAGE_SIZE - pg_offset);
3459 kunmap_atomic(userpage);
3460 flush_dcache_page(page);
3461 }
3462
3463 pg_offset = 0;
3464
3465 set_page_extent_mapped(page);
3466
3467 if (!epd->extent_locked) {
3468 ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
3469 if (ret == 1)
3470 goto done_unlocked;
3471 if (ret)
3472 goto done;
3473 }
3474
3475 ret = __extent_writepage_io(inode, page, wbc, epd,
3476 i_size, nr_written, write_flags, &nr);
3477 if (ret == 1)
3478 goto done_unlocked;
3479
3480done:
3481 if (nr == 0) {
3482
3483 set_page_writeback(page);
3484 end_page_writeback(page);
3485 }
3486 if (PageError(page)) {
3487 ret = ret < 0 ? ret : -EIO;
3488 end_extent_writepage(page, ret, start, page_end);
3489 }
3490 unlock_page(page);
3491 return ret;
3492
3493done_unlocked:
3494 return 0;
3495}
3496
3497void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3498{
3499 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3500 TASK_UNINTERRUPTIBLE);
3501}
3502
3503static noinline_for_stack int
3504lock_extent_buffer_for_io(struct extent_buffer *eb,
3505 struct btrfs_fs_info *fs_info,
3506 struct extent_page_data *epd)
3507{
3508 int i, num_pages;
3509 int flush = 0;
3510 int ret = 0;
3511
3512 if (!btrfs_try_tree_write_lock(eb)) {
3513 flush = 1;
3514 flush_write_bio(epd);
3515 btrfs_tree_lock(eb);
3516 }
3517
3518 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3519 btrfs_tree_unlock(eb);
3520 if (!epd->sync_io)
3521 return 0;
3522 if (!flush) {
3523 flush_write_bio(epd);
3524 flush = 1;
3525 }
3526 while (1) {
3527 wait_on_extent_buffer_writeback(eb);
3528 btrfs_tree_lock(eb);
3529 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3530 break;
3531 btrfs_tree_unlock(eb);
3532 }
3533 }
3534
3535
3536
3537
3538
3539
3540 spin_lock(&eb->refs_lock);
3541 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3542 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3543 spin_unlock(&eb->refs_lock);
3544 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3545 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3546 -eb->len,
3547 fs_info->dirty_metadata_batch);
3548 ret = 1;
3549 } else {
3550 spin_unlock(&eb->refs_lock);
3551 }
3552
3553 btrfs_tree_unlock(eb);
3554
3555 if (!ret)
3556 return ret;
3557
3558 num_pages = num_extent_pages(eb);
3559 for (i = 0; i < num_pages; i++) {
3560 struct page *p = eb->pages[i];
3561
3562 if (!trylock_page(p)) {
3563 if (!flush) {
3564 flush_write_bio(epd);
3565 flush = 1;
3566 }
3567 lock_page(p);
3568 }
3569 }
3570
3571 return ret;
3572}
3573
3574static void end_extent_buffer_writeback(struct extent_buffer *eb)
3575{
3576 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3577 smp_mb__after_atomic();
3578 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3579}
3580
3581static void set_btree_ioerr(struct page *page)
3582{
3583 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3584
3585 SetPageError(page);
3586 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3587 return;
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627 switch (eb->log_index) {
3628 case -1:
3629 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3630 break;
3631 case 0:
3632 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3633 break;
3634 case 1:
3635 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3636 break;
3637 default:
3638 BUG();
3639 }
3640}
3641
3642static void end_bio_extent_buffer_writepage(struct bio *bio)
3643{
3644 struct bio_vec *bvec;
3645 struct extent_buffer *eb;
3646 int i, done;
3647 struct bvec_iter_all iter_all;
3648
3649 ASSERT(!bio_flagged(bio, BIO_CLONED));
3650 bio_for_each_segment_all(bvec, bio, i, iter_all) {
3651 struct page *page = bvec->bv_page;
3652
3653 eb = (struct extent_buffer *)page->private;
3654 BUG_ON(!eb);
3655 done = atomic_dec_and_test(&eb->io_pages);
3656
3657 if (bio->bi_status ||
3658 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3659 ClearPageUptodate(page);
3660 set_btree_ioerr(page);
3661 }
3662
3663 end_page_writeback(page);
3664
3665 if (!done)
3666 continue;
3667
3668 end_extent_buffer_writeback(eb);
3669 }
3670
3671 bio_put(bio);
3672}
3673
3674static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3675 struct btrfs_fs_info *fs_info,
3676 struct writeback_control *wbc,
3677 struct extent_page_data *epd)
3678{
3679 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3680 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3681 u64 offset = eb->start;
3682 u32 nritems;
3683 int i, num_pages;
3684 unsigned long start, end;
3685 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3686 int ret = 0;
3687
3688 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3689 num_pages = num_extent_pages(eb);
3690 atomic_set(&eb->io_pages, num_pages);
3691
3692
3693 nritems = btrfs_header_nritems(eb);
3694 if (btrfs_header_level(eb) > 0) {
3695 end = btrfs_node_key_ptr_offset(nritems);
3696
3697 memzero_extent_buffer(eb, end, eb->len - end);
3698 } else {
3699
3700
3701
3702
3703 start = btrfs_item_nr_offset(nritems);
3704 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
3705 memzero_extent_buffer(eb, start, end - start);
3706 }
3707
3708 for (i = 0; i < num_pages; i++) {
3709 struct page *p = eb->pages[i];
3710
3711 clear_page_dirty_for_io(p);
3712 set_page_writeback(p);
3713 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3714 p, offset, PAGE_SIZE, 0, bdev,
3715 &epd->bio,
3716 end_bio_extent_buffer_writepage,
3717 0, 0, 0, false);
3718 if (ret) {
3719 set_btree_ioerr(p);
3720 if (PageWriteback(p))
3721 end_page_writeback(p);
3722 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3723 end_extent_buffer_writeback(eb);
3724 ret = -EIO;
3725 break;
3726 }
3727 offset += PAGE_SIZE;
3728 update_nr_written(wbc, 1);
3729 unlock_page(p);
3730 }
3731
3732 if (unlikely(ret)) {
3733 for (; i < num_pages; i++) {
3734 struct page *p = eb->pages[i];
3735 clear_page_dirty_for_io(p);
3736 unlock_page(p);
3737 }
3738 }
3739
3740 return ret;
3741}
3742
3743int btree_write_cache_pages(struct address_space *mapping,
3744 struct writeback_control *wbc)
3745{
3746 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3747 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3748 struct extent_buffer *eb, *prev_eb = NULL;
3749 struct extent_page_data epd = {
3750 .bio = NULL,
3751 .tree = tree,
3752 .extent_locked = 0,
3753 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3754 };
3755 int ret = 0;
3756 int done = 0;
3757 int nr_to_write_done = 0;
3758 struct pagevec pvec;
3759 int nr_pages;
3760 pgoff_t index;
3761 pgoff_t end;
3762 int scanned = 0;
3763 xa_mark_t tag;
3764
3765 pagevec_init(&pvec);
3766 if (wbc->range_cyclic) {
3767 index = mapping->writeback_index;
3768 end = -1;
3769 } else {
3770 index = wbc->range_start >> PAGE_SHIFT;
3771 end = wbc->range_end >> PAGE_SHIFT;
3772 scanned = 1;
3773 }
3774 if (wbc->sync_mode == WB_SYNC_ALL)
3775 tag = PAGECACHE_TAG_TOWRITE;
3776 else
3777 tag = PAGECACHE_TAG_DIRTY;
3778retry:
3779 if (wbc->sync_mode == WB_SYNC_ALL)
3780 tag_pages_for_writeback(mapping, index, end);
3781 while (!done && !nr_to_write_done && (index <= end) &&
3782 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
3783 tag))) {
3784 unsigned i;
3785
3786 scanned = 1;
3787 for (i = 0; i < nr_pages; i++) {
3788 struct page *page = pvec.pages[i];
3789
3790 if (!PagePrivate(page))
3791 continue;
3792
3793 spin_lock(&mapping->private_lock);
3794 if (!PagePrivate(page)) {
3795 spin_unlock(&mapping->private_lock);
3796 continue;
3797 }
3798
3799 eb = (struct extent_buffer *)page->private;
3800
3801
3802
3803
3804
3805
3806 if (WARN_ON(!eb)) {
3807 spin_unlock(&mapping->private_lock);
3808 continue;
3809 }
3810
3811 if (eb == prev_eb) {
3812 spin_unlock(&mapping->private_lock);
3813 continue;
3814 }
3815
3816 ret = atomic_inc_not_zero(&eb->refs);
3817 spin_unlock(&mapping->private_lock);
3818 if (!ret)
3819 continue;
3820
3821 prev_eb = eb;
3822 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3823 if (!ret) {
3824 free_extent_buffer(eb);
3825 continue;
3826 }
3827
3828 ret = write_one_eb(eb, fs_info, wbc, &epd);
3829 if (ret) {
3830 done = 1;
3831 free_extent_buffer(eb);
3832 break;
3833 }
3834 free_extent_buffer(eb);
3835
3836
3837
3838
3839
3840
3841 nr_to_write_done = wbc->nr_to_write <= 0;
3842 }
3843 pagevec_release(&pvec);
3844 cond_resched();
3845 }
3846 if (!scanned && !done) {
3847
3848
3849
3850
3851 scanned = 1;
3852 index = 0;
3853 goto retry;
3854 }
3855 flush_write_bio(&epd);
3856 return ret;
3857}
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873static int extent_write_cache_pages(struct address_space *mapping,
3874 struct writeback_control *wbc,
3875 struct extent_page_data *epd)
3876{
3877 struct inode *inode = mapping->host;
3878 int ret = 0;
3879 int done = 0;
3880 int nr_to_write_done = 0;
3881 struct pagevec pvec;
3882 int nr_pages;
3883 pgoff_t index;
3884 pgoff_t end;
3885 pgoff_t done_index;
3886 int range_whole = 0;
3887 int scanned = 0;
3888 xa_mark_t tag;
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899 if (!igrab(inode))
3900 return 0;
3901
3902 pagevec_init(&pvec);
3903 if (wbc->range_cyclic) {
3904 index = mapping->writeback_index;
3905 end = -1;
3906 } else {
3907 index = wbc->range_start >> PAGE_SHIFT;
3908 end = wbc->range_end >> PAGE_SHIFT;
3909 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3910 range_whole = 1;
3911 scanned = 1;
3912 }
3913
3914
3915
3916
3917
3918
3919
3920
3921 if (range_whole && wbc->nr_to_write == LONG_MAX &&
3922 test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
3923 &BTRFS_I(inode)->runtime_flags))
3924 wbc->tagged_writepages = 1;
3925
3926 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3927 tag = PAGECACHE_TAG_TOWRITE;
3928 else
3929 tag = PAGECACHE_TAG_DIRTY;
3930retry:
3931 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3932 tag_pages_for_writeback(mapping, index, end);
3933 done_index = index;
3934 while (!done && !nr_to_write_done && (index <= end) &&
3935 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
3936 &index, end, tag))) {
3937 unsigned i;
3938
3939 scanned = 1;
3940 for (i = 0; i < nr_pages; i++) {
3941 struct page *page = pvec.pages[i];
3942
3943 done_index = page->index;
3944
3945
3946
3947
3948
3949
3950
3951 if (!trylock_page(page)) {
3952 flush_write_bio(epd);
3953 lock_page(page);
3954 }
3955
3956 if (unlikely(page->mapping != mapping)) {
3957 unlock_page(page);
3958 continue;
3959 }
3960
3961 if (wbc->sync_mode != WB_SYNC_NONE) {
3962 if (PageWriteback(page))
3963 flush_write_bio(epd);
3964 wait_on_page_writeback(page);
3965 }
3966
3967 if (PageWriteback(page) ||
3968 !clear_page_dirty_for_io(page)) {
3969 unlock_page(page);
3970 continue;
3971 }
3972
3973 ret = __extent_writepage(page, wbc, epd);
3974
3975 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
3976 unlock_page(page);
3977 ret = 0;
3978 }
3979 if (ret < 0) {
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989 done_index = page->index + 1;
3990 done = 1;
3991 break;
3992 }
3993
3994
3995
3996
3997
3998
3999 nr_to_write_done = wbc->nr_to_write <= 0;
4000 }
4001 pagevec_release(&pvec);
4002 cond_resched();
4003 }
4004 if (!scanned && !done) {
4005
4006
4007
4008
4009 scanned = 1;
4010 index = 0;
4011 goto retry;
4012 }
4013
4014 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4015 mapping->writeback_index = done_index;
4016
4017 btrfs_add_delayed_iput(inode);
4018 return ret;
4019}
4020
4021int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4022{
4023 int ret;
4024 struct extent_page_data epd = {
4025 .bio = NULL,
4026 .tree = &BTRFS_I(page->mapping->host)->io_tree,
4027 .extent_locked = 0,
4028 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4029 };
4030
4031 ret = __extent_writepage(page, wbc, &epd);
4032
4033 flush_write_bio(&epd);
4034 return ret;
4035}
4036
4037int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4038 int mode)
4039{
4040 int ret = 0;
4041 struct address_space *mapping = inode->i_mapping;
4042 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
4043 struct page *page;
4044 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4045 PAGE_SHIFT;
4046
4047 struct extent_page_data epd = {
4048 .bio = NULL,
4049 .tree = tree,
4050 .extent_locked = 1,
4051 .sync_io = mode == WB_SYNC_ALL,
4052 };
4053 struct writeback_control wbc_writepages = {
4054 .sync_mode = mode,
4055 .nr_to_write = nr_pages * 2,
4056 .range_start = start,
4057 .range_end = end + 1,
4058 };
4059
4060 while (start <= end) {
4061 page = find_get_page(mapping, start >> PAGE_SHIFT);
4062 if (clear_page_dirty_for_io(page))
4063 ret = __extent_writepage(page, &wbc_writepages, &epd);
4064 else {
4065 btrfs_writepage_endio_finish_ordered(page, start,
4066 start + PAGE_SIZE - 1, 1);
4067 unlock_page(page);
4068 }
4069 put_page(page);
4070 start += PAGE_SIZE;
4071 }
4072
4073 flush_write_bio(&epd);
4074 return ret;
4075}
4076
4077int extent_writepages(struct address_space *mapping,
4078 struct writeback_control *wbc)
4079{
4080 int ret = 0;
4081 struct extent_page_data epd = {
4082 .bio = NULL,
4083 .tree = &BTRFS_I(mapping->host)->io_tree,
4084 .extent_locked = 0,
4085 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4086 };
4087
4088 ret = extent_write_cache_pages(mapping, wbc, &epd);
4089 flush_write_bio(&epd);
4090 return ret;
4091}
4092
4093int extent_readpages(struct address_space *mapping, struct list_head *pages,
4094 unsigned nr_pages)
4095{
4096 struct bio *bio = NULL;
4097 unsigned long bio_flags = 0;
4098 struct page *pagepool[16];
4099 struct extent_map *em_cached = NULL;
4100 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
4101 int nr = 0;
4102 u64 prev_em_start = (u64)-1;
4103
4104 while (!list_empty(pages)) {
4105 for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
4106 struct page *page = lru_to_page(pages);
4107
4108 prefetchw(&page->flags);
4109 list_del(&page->lru);
4110 if (add_to_page_cache_lru(page, mapping, page->index,
4111 readahead_gfp_mask(mapping))) {
4112 put_page(page);
4113 continue;
4114 }
4115
4116 pagepool[nr++] = page;
4117 }
4118
4119 __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
4120 &bio_flags, &prev_em_start);
4121 }
4122
4123 if (em_cached)
4124 free_extent_map(em_cached);
4125
4126 if (bio)
4127 return submit_one_bio(bio, 0, bio_flags);
4128 return 0;
4129}
4130
4131
4132
4133
4134
4135
4136int extent_invalidatepage(struct extent_io_tree *tree,
4137 struct page *page, unsigned long offset)
4138{
4139 struct extent_state *cached_state = NULL;
4140 u64 start = page_offset(page);
4141 u64 end = start + PAGE_SIZE - 1;
4142 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4143
4144 start += ALIGN(offset, blocksize);
4145 if (start > end)
4146 return 0;
4147
4148 lock_extent_bits(tree, start, end, &cached_state);
4149 wait_on_page_writeback(page);
4150 clear_extent_bit(tree, start, end,
4151 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4152 EXTENT_DO_ACCOUNTING,
4153 1, 1, &cached_state);
4154 return 0;
4155}
4156
4157
4158
4159
4160
4161
4162static int try_release_extent_state(struct extent_io_tree *tree,
4163 struct page *page, gfp_t mask)
4164{
4165 u64 start = page_offset(page);
4166 u64 end = start + PAGE_SIZE - 1;
4167 int ret = 1;
4168
4169 if (test_range_bit(tree, start, end,
4170 EXTENT_IOBITS, 0, NULL))
4171 ret = 0;
4172 else {
4173
4174
4175
4176
4177 ret = __clear_extent_bit(tree, start, end,
4178 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4179 0, 0, NULL, mask, NULL);
4180
4181
4182
4183
4184 if (ret < 0)
4185 ret = 0;
4186 else
4187 ret = 1;
4188 }
4189 return ret;
4190}
4191
4192
4193
4194
4195
4196
4197int try_release_extent_mapping(struct page *page, gfp_t mask)
4198{
4199 struct extent_map *em;
4200 u64 start = page_offset(page);
4201 u64 end = start + PAGE_SIZE - 1;
4202 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4203 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4204 struct extent_map_tree *map = &btrfs_inode->extent_tree;
4205
4206 if (gfpflags_allow_blocking(mask) &&
4207 page->mapping->host->i_size > SZ_16M) {
4208 u64 len;
4209 while (start <= end) {
4210 len = end - start + 1;
4211 write_lock(&map->lock);
4212 em = lookup_extent_mapping(map, start, len);
4213 if (!em) {
4214 write_unlock(&map->lock);
4215 break;
4216 }
4217 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4218 em->start != start) {
4219 write_unlock(&map->lock);
4220 free_extent_map(em);
4221 break;
4222 }
4223 if (!test_range_bit(tree, em->start,
4224 extent_map_end(em) - 1,
4225 EXTENT_LOCKED | EXTENT_WRITEBACK,
4226 0, NULL)) {
4227 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4228 &btrfs_inode->runtime_flags);
4229 remove_extent_mapping(map, em);
4230
4231 free_extent_map(em);
4232 }
4233 start = extent_map_end(em);
4234 write_unlock(&map->lock);
4235
4236
4237 free_extent_map(em);
4238 }
4239 }
4240 return try_release_extent_state(tree, page, mask);
4241}
4242
4243
4244
4245
4246
4247static struct extent_map *get_extent_skip_holes(struct inode *inode,
4248 u64 offset, u64 last)
4249{
4250 u64 sectorsize = btrfs_inode_sectorsize(inode);
4251 struct extent_map *em;
4252 u64 len;
4253
4254 if (offset >= last)
4255 return NULL;
4256
4257 while (1) {
4258 len = last - offset;
4259 if (len == 0)
4260 break;
4261 len = ALIGN(len, sectorsize);
4262 em = btrfs_get_extent_fiemap(BTRFS_I(inode), offset, len);
4263 if (IS_ERR_OR_NULL(em))
4264 return em;
4265
4266
4267 if (em->block_start != EXTENT_MAP_HOLE)
4268 return em;
4269
4270
4271 offset = extent_map_end(em);
4272 free_extent_map(em);
4273 if (offset >= last)
4274 break;
4275 }
4276 return NULL;
4277}
4278
4279
4280
4281
4282
4283
4284struct fiemap_cache {
4285 u64 offset;
4286 u64 phys;
4287 u64 len;
4288 u32 flags;
4289 bool cached;
4290};
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4303 struct fiemap_cache *cache,
4304 u64 offset, u64 phys, u64 len, u32 flags)
4305{
4306 int ret = 0;
4307
4308 if (!cache->cached)
4309 goto assign;
4310
4311
4312
4313
4314
4315
4316
4317
4318 if (cache->offset + cache->len > offset) {
4319 WARN_ON(1);
4320 return -EINVAL;
4321 }
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334 if (cache->offset + cache->len == offset &&
4335 cache->phys + cache->len == phys &&
4336 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4337 (flags & ~FIEMAP_EXTENT_LAST)) {
4338 cache->len += len;
4339 cache->flags |= flags;
4340 goto try_submit_last;
4341 }
4342
4343
4344 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4345 cache->len, cache->flags);
4346 cache->cached = false;
4347 if (ret)
4348 return ret;
4349assign:
4350 cache->cached = true;
4351 cache->offset = offset;
4352 cache->phys = phys;
4353 cache->len = len;
4354 cache->flags = flags;
4355try_submit_last:
4356 if (cache->flags & FIEMAP_EXTENT_LAST) {
4357 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4358 cache->phys, cache->len, cache->flags);
4359 cache->cached = false;
4360 }
4361 return ret;
4362}
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
4376 struct fiemap_extent_info *fieinfo,
4377 struct fiemap_cache *cache)
4378{
4379 int ret;
4380
4381 if (!cache->cached)
4382 return 0;
4383
4384 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4385 cache->len, cache->flags);
4386 cache->cached = false;
4387 if (ret > 0)
4388 ret = 0;
4389 return ret;
4390}
4391
4392int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4393 __u64 start, __u64 len)
4394{
4395 int ret = 0;
4396 u64 off = start;
4397 u64 max = start + len;
4398 u32 flags = 0;
4399 u32 found_type;
4400 u64 last;
4401 u64 last_for_get_extent = 0;
4402 u64 disko = 0;
4403 u64 isize = i_size_read(inode);
4404 struct btrfs_key found_key;
4405 struct extent_map *em = NULL;
4406 struct extent_state *cached_state = NULL;
4407 struct btrfs_path *path;
4408 struct btrfs_root *root = BTRFS_I(inode)->root;
4409 struct fiemap_cache cache = { 0 };
4410 int end = 0;
4411 u64 em_start = 0;
4412 u64 em_len = 0;
4413 u64 em_end = 0;
4414
4415 if (len == 0)
4416 return -EINVAL;
4417
4418 path = btrfs_alloc_path();
4419 if (!path)
4420 return -ENOMEM;
4421 path->leave_spinning = 1;
4422
4423 start = round_down(start, btrfs_inode_sectorsize(inode));
4424 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4425
4426
4427
4428
4429
4430 ret = btrfs_lookup_file_extent(NULL, root, path,
4431 btrfs_ino(BTRFS_I(inode)), -1, 0);
4432 if (ret < 0) {
4433 btrfs_free_path(path);
4434 return ret;
4435 } else {
4436 WARN_ON(!ret);
4437 if (ret == 1)
4438 ret = 0;
4439 }
4440
4441 path->slots[0]--;
4442 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4443 found_type = found_key.type;
4444
4445
4446 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4447 found_type != BTRFS_EXTENT_DATA_KEY) {
4448
4449 last = (u64)-1;
4450 last_for_get_extent = isize;
4451 } else {
4452
4453
4454
4455
4456
4457 last = found_key.offset;
4458 last_for_get_extent = last + 1;
4459 }
4460 btrfs_release_path(path);
4461
4462
4463
4464
4465
4466
4467 if (last < isize) {
4468 last = (u64)-1;
4469 last_for_get_extent = isize;
4470 }
4471
4472 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4473 &cached_state);
4474
4475 em = get_extent_skip_holes(inode, start, last_for_get_extent);
4476 if (!em)
4477 goto out;
4478 if (IS_ERR(em)) {
4479 ret = PTR_ERR(em);
4480 goto out;
4481 }
4482
4483 while (!end) {
4484 u64 offset_in_extent = 0;
4485
4486
4487 if (em->start >= max || extent_map_end(em) < off)
4488 break;
4489
4490
4491
4492
4493
4494
4495
4496 em_start = max(em->start, off);
4497
4498
4499
4500
4501
4502
4503
4504 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4505 offset_in_extent = em_start - em->start;
4506 em_end = extent_map_end(em);
4507 em_len = em_end - em_start;
4508 flags = 0;
4509 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4510 disko = em->block_start + offset_in_extent;
4511 else
4512 disko = 0;
4513
4514
4515
4516
4517 off = extent_map_end(em);
4518 if (off >= max)
4519 end = 1;
4520
4521 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4522 end = 1;
4523 flags |= FIEMAP_EXTENT_LAST;
4524 } else if (em->block_start == EXTENT_MAP_INLINE) {
4525 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4526 FIEMAP_EXTENT_NOT_ALIGNED);
4527 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4528 flags |= (FIEMAP_EXTENT_DELALLOC |
4529 FIEMAP_EXTENT_UNKNOWN);
4530 } else if (fieinfo->fi_extents_max) {
4531 u64 bytenr = em->block_start -
4532 (em->start - em->orig_start);
4533
4534
4535
4536
4537
4538
4539
4540
4541 ret = btrfs_check_shared(root,
4542 btrfs_ino(BTRFS_I(inode)),
4543 bytenr);
4544 if (ret < 0)
4545 goto out_free;
4546 if (ret)
4547 flags |= FIEMAP_EXTENT_SHARED;
4548 ret = 0;
4549 }
4550 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4551 flags |= FIEMAP_EXTENT_ENCODED;
4552 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4553 flags |= FIEMAP_EXTENT_UNWRITTEN;
4554
4555 free_extent_map(em);
4556 em = NULL;
4557 if ((em_start >= last) || em_len == (u64)-1 ||
4558 (last == (u64)-1 && isize <= em_end)) {
4559 flags |= FIEMAP_EXTENT_LAST;
4560 end = 1;
4561 }
4562
4563
4564 em = get_extent_skip_holes(inode, off, last_for_get_extent);
4565 if (IS_ERR(em)) {
4566 ret = PTR_ERR(em);
4567 goto out;
4568 }
4569 if (!em) {
4570 flags |= FIEMAP_EXTENT_LAST;
4571 end = 1;
4572 }
4573 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4574 em_len, flags);
4575 if (ret) {
4576 if (ret == 1)
4577 ret = 0;
4578 goto out_free;
4579 }
4580 }
4581out_free:
4582 if (!ret)
4583 ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
4584 free_extent_map(em);
4585out:
4586 btrfs_free_path(path);
4587 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4588 &cached_state);
4589 return ret;
4590}
4591
4592static void __free_extent_buffer(struct extent_buffer *eb)
4593{
4594 btrfs_leak_debug_del(&eb->leak_list);
4595 kmem_cache_free(extent_buffer_cache, eb);
4596}
4597
4598int extent_buffer_under_io(struct extent_buffer *eb)
4599{
4600 return (atomic_read(&eb->io_pages) ||
4601 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4602 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4603}
4604
4605
4606
4607
4608static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
4609{
4610 int i;
4611 int num_pages;
4612 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4613
4614 BUG_ON(extent_buffer_under_io(eb));
4615
4616 num_pages = num_extent_pages(eb);
4617 for (i = 0; i < num_pages; i++) {
4618 struct page *page = eb->pages[i];
4619
4620 if (!page)
4621 continue;
4622 if (mapped)
4623 spin_lock(&page->mapping->private_lock);
4624
4625
4626
4627
4628
4629
4630
4631 if (PagePrivate(page) &&
4632 page->private == (unsigned long)eb) {
4633 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4634 BUG_ON(PageDirty(page));
4635 BUG_ON(PageWriteback(page));
4636
4637
4638
4639
4640 ClearPagePrivate(page);
4641 set_page_private(page, 0);
4642
4643 put_page(page);
4644 }
4645
4646 if (mapped)
4647 spin_unlock(&page->mapping->private_lock);
4648
4649
4650 put_page(page);
4651 }
4652}
4653
4654
4655
4656
4657static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4658{
4659 btrfs_release_extent_buffer_pages(eb);
4660 __free_extent_buffer(eb);
4661}
4662
4663static struct extent_buffer *
4664__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4665 unsigned long len)
4666{
4667 struct extent_buffer *eb = NULL;
4668
4669 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4670 eb->start = start;
4671 eb->len = len;
4672 eb->fs_info = fs_info;
4673 eb->bflags = 0;
4674 rwlock_init(&eb->lock);
4675 atomic_set(&eb->write_locks, 0);
4676 atomic_set(&eb->read_locks, 0);
4677 atomic_set(&eb->blocking_readers, 0);
4678 atomic_set(&eb->blocking_writers, 0);
4679 atomic_set(&eb->spinning_readers, 0);
4680 atomic_set(&eb->spinning_writers, 0);
4681 eb->lock_nested = 0;
4682 init_waitqueue_head(&eb->write_lock_wq);
4683 init_waitqueue_head(&eb->read_lock_wq);
4684
4685 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4686
4687 spin_lock_init(&eb->refs_lock);
4688 atomic_set(&eb->refs, 1);
4689 atomic_set(&eb->io_pages, 0);
4690
4691
4692
4693
4694 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4695 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4696 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4697
4698 return eb;
4699}
4700
4701struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4702{
4703 int i;
4704 struct page *p;
4705 struct extent_buffer *new;
4706 int num_pages = num_extent_pages(src);
4707
4708 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4709 if (new == NULL)
4710 return NULL;
4711
4712 for (i = 0; i < num_pages; i++) {
4713 p = alloc_page(GFP_NOFS);
4714 if (!p) {
4715 btrfs_release_extent_buffer(new);
4716 return NULL;
4717 }
4718 attach_extent_buffer_page(new, p);
4719 WARN_ON(PageDirty(p));
4720 SetPageUptodate(p);
4721 new->pages[i] = p;
4722 copy_page(page_address(p), page_address(src->pages[i]));
4723 }
4724
4725 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4726 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
4727
4728 return new;
4729}
4730
4731struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4732 u64 start, unsigned long len)
4733{
4734 struct extent_buffer *eb;
4735 int num_pages;
4736 int i;
4737
4738 eb = __alloc_extent_buffer(fs_info, start, len);
4739 if (!eb)
4740 return NULL;
4741
4742 num_pages = num_extent_pages(eb);
4743 for (i = 0; i < num_pages; i++) {
4744 eb->pages[i] = alloc_page(GFP_NOFS);
4745 if (!eb->pages[i])
4746 goto err;
4747 }
4748 set_extent_buffer_uptodate(eb);
4749 btrfs_set_header_nritems(eb, 0);
4750 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4751
4752 return eb;
4753err:
4754 for (; i > 0; i--)
4755 __free_page(eb->pages[i - 1]);
4756 __free_extent_buffer(eb);
4757 return NULL;
4758}
4759
4760struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4761 u64 start)
4762{
4763 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
4764}
4765
4766static void check_buffer_tree_ref(struct extent_buffer *eb)
4767{
4768 int refs;
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789 refs = atomic_read(&eb->refs);
4790 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4791 return;
4792
4793 spin_lock(&eb->refs_lock);
4794 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4795 atomic_inc(&eb->refs);
4796 spin_unlock(&eb->refs_lock);
4797}
4798
4799static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4800 struct page *accessed)
4801{
4802 int num_pages, i;
4803
4804 check_buffer_tree_ref(eb);
4805
4806 num_pages = num_extent_pages(eb);
4807 for (i = 0; i < num_pages; i++) {
4808 struct page *p = eb->pages[i];
4809
4810 if (p != accessed)
4811 mark_page_accessed(p);
4812 }
4813}
4814
4815struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4816 u64 start)
4817{
4818 struct extent_buffer *eb;
4819
4820 rcu_read_lock();
4821 eb = radix_tree_lookup(&fs_info->buffer_radix,
4822 start >> PAGE_SHIFT);
4823 if (eb && atomic_inc_not_zero(&eb->refs)) {
4824 rcu_read_unlock();
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4841 spin_lock(&eb->refs_lock);
4842 spin_unlock(&eb->refs_lock);
4843 }
4844 mark_extent_buffer_accessed(eb, NULL);
4845 return eb;
4846 }
4847 rcu_read_unlock();
4848
4849 return NULL;
4850}
4851
4852#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4853struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4854 u64 start)
4855{
4856 struct extent_buffer *eb, *exists = NULL;
4857 int ret;
4858
4859 eb = find_extent_buffer(fs_info, start);
4860 if (eb)
4861 return eb;
4862 eb = alloc_dummy_extent_buffer(fs_info, start);
4863 if (!eb)
4864 return NULL;
4865 eb->fs_info = fs_info;
4866again:
4867 ret = radix_tree_preload(GFP_NOFS);
4868 if (ret)
4869 goto free_eb;
4870 spin_lock(&fs_info->buffer_lock);
4871 ret = radix_tree_insert(&fs_info->buffer_radix,
4872 start >> PAGE_SHIFT, eb);
4873 spin_unlock(&fs_info->buffer_lock);
4874 radix_tree_preload_end();
4875 if (ret == -EEXIST) {
4876 exists = find_extent_buffer(fs_info, start);
4877 if (exists)
4878 goto free_eb;
4879 else
4880 goto again;
4881 }
4882 check_buffer_tree_ref(eb);
4883 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4884
4885 return eb;
4886free_eb:
4887 btrfs_release_extent_buffer(eb);
4888 return exists;
4889}
4890#endif
4891
4892struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4893 u64 start)
4894{
4895 unsigned long len = fs_info->nodesize;
4896 int num_pages;
4897 int i;
4898 unsigned long index = start >> PAGE_SHIFT;
4899 struct extent_buffer *eb;
4900 struct extent_buffer *exists = NULL;
4901 struct page *p;
4902 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4903 int uptodate = 1;
4904 int ret;
4905
4906 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
4907 btrfs_err(fs_info, "bad tree block start %llu", start);
4908 return ERR_PTR(-EINVAL);
4909 }
4910
4911 eb = find_extent_buffer(fs_info, start);
4912 if (eb)
4913 return eb;
4914
4915 eb = __alloc_extent_buffer(fs_info, start, len);
4916 if (!eb)
4917 return ERR_PTR(-ENOMEM);
4918
4919 num_pages = num_extent_pages(eb);
4920 for (i = 0; i < num_pages; i++, index++) {
4921 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4922 if (!p) {
4923 exists = ERR_PTR(-ENOMEM);
4924 goto free_eb;
4925 }
4926
4927 spin_lock(&mapping->private_lock);
4928 if (PagePrivate(p)) {
4929
4930
4931
4932
4933
4934
4935
4936 exists = (struct extent_buffer *)p->private;
4937 if (atomic_inc_not_zero(&exists->refs)) {
4938 spin_unlock(&mapping->private_lock);
4939 unlock_page(p);
4940 put_page(p);
4941 mark_extent_buffer_accessed(exists, p);
4942 goto free_eb;
4943 }
4944 exists = NULL;
4945
4946
4947
4948
4949
4950 ClearPagePrivate(p);
4951 WARN_ON(PageDirty(p));
4952 put_page(p);
4953 }
4954 attach_extent_buffer_page(eb, p);
4955 spin_unlock(&mapping->private_lock);
4956 WARN_ON(PageDirty(p));
4957 eb->pages[i] = p;
4958 if (!PageUptodate(p))
4959 uptodate = 0;
4960
4961
4962
4963
4964
4965
4966
4967
4968 }
4969 if (uptodate)
4970 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
4971again:
4972 ret = radix_tree_preload(GFP_NOFS);
4973 if (ret) {
4974 exists = ERR_PTR(ret);
4975 goto free_eb;
4976 }
4977
4978 spin_lock(&fs_info->buffer_lock);
4979 ret = radix_tree_insert(&fs_info->buffer_radix,
4980 start >> PAGE_SHIFT, eb);
4981 spin_unlock(&fs_info->buffer_lock);
4982 radix_tree_preload_end();
4983 if (ret == -EEXIST) {
4984 exists = find_extent_buffer(fs_info, start);
4985 if (exists)
4986 goto free_eb;
4987 else
4988 goto again;
4989 }
4990
4991 check_buffer_tree_ref(eb);
4992 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4993
4994
4995
4996
4997
4998
4999 for (i = 0; i < num_pages; i++)
5000 unlock_page(eb->pages[i]);
5001 return eb;
5002
5003free_eb:
5004 WARN_ON(!atomic_dec_and_test(&eb->refs));
5005 for (i = 0; i < num_pages; i++) {
5006 if (eb->pages[i])
5007 unlock_page(eb->pages[i]);
5008 }
5009
5010 btrfs_release_extent_buffer(eb);
5011 return exists;
5012}
5013
5014static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5015{
5016 struct extent_buffer *eb =
5017 container_of(head, struct extent_buffer, rcu_head);
5018
5019 __free_extent_buffer(eb);
5020}
5021
5022static int release_extent_buffer(struct extent_buffer *eb)
5023{
5024 lockdep_assert_held(&eb->refs_lock);
5025
5026 WARN_ON(atomic_read(&eb->refs) == 0);
5027 if (atomic_dec_and_test(&eb->refs)) {
5028 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5029 struct btrfs_fs_info *fs_info = eb->fs_info;
5030
5031 spin_unlock(&eb->refs_lock);
5032
5033 spin_lock(&fs_info->buffer_lock);
5034 radix_tree_delete(&fs_info->buffer_radix,
5035 eb->start >> PAGE_SHIFT);
5036 spin_unlock(&fs_info->buffer_lock);
5037 } else {
5038 spin_unlock(&eb->refs_lock);
5039 }
5040
5041
5042 btrfs_release_extent_buffer_pages(eb);
5043#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5044 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
5045 __free_extent_buffer(eb);
5046 return 1;
5047 }
5048#endif
5049 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5050 return 1;
5051 }
5052 spin_unlock(&eb->refs_lock);
5053
5054 return 0;
5055}
5056
5057void free_extent_buffer(struct extent_buffer *eb)
5058{
5059 int refs;
5060 int old;
5061 if (!eb)
5062 return;
5063
5064 while (1) {
5065 refs = atomic_read(&eb->refs);
5066 if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
5067 || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
5068 refs == 1))
5069 break;
5070 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5071 if (old == refs)
5072 return;
5073 }
5074
5075 spin_lock(&eb->refs_lock);
5076 if (atomic_read(&eb->refs) == 2 &&
5077 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5078 !extent_buffer_under_io(eb) &&
5079 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5080 atomic_dec(&eb->refs);
5081
5082
5083
5084
5085
5086 release_extent_buffer(eb);
5087}
5088
5089void free_extent_buffer_stale(struct extent_buffer *eb)
5090{
5091 if (!eb)
5092 return;
5093
5094 spin_lock(&eb->refs_lock);
5095 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5096
5097 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5098 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5099 atomic_dec(&eb->refs);
5100 release_extent_buffer(eb);
5101}
5102
5103void clear_extent_buffer_dirty(struct extent_buffer *eb)
5104{
5105 int i;
5106 int num_pages;
5107 struct page *page;
5108
5109 num_pages = num_extent_pages(eb);
5110
5111 for (i = 0; i < num_pages; i++) {
5112 page = eb->pages[i];
5113 if (!PageDirty(page))
5114 continue;
5115
5116 lock_page(page);
5117 WARN_ON(!PagePrivate(page));
5118
5119 clear_page_dirty_for_io(page);
5120 xa_lock_irq(&page->mapping->i_pages);
5121 if (!PageDirty(page))
5122 __xa_clear_mark(&page->mapping->i_pages,
5123 page_index(page), PAGECACHE_TAG_DIRTY);
5124 xa_unlock_irq(&page->mapping->i_pages);
5125 ClearPageError(page);
5126 unlock_page(page);
5127 }
5128 WARN_ON(atomic_read(&eb->refs) == 0);
5129}
5130
5131bool set_extent_buffer_dirty(struct extent_buffer *eb)
5132{
5133 int i;
5134 int num_pages;
5135 bool was_dirty;
5136
5137 check_buffer_tree_ref(eb);
5138
5139 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5140
5141 num_pages = num_extent_pages(eb);
5142 WARN_ON(atomic_read(&eb->refs) == 0);
5143 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5144
5145 if (!was_dirty)
5146 for (i = 0; i < num_pages; i++)
5147 set_page_dirty(eb->pages[i]);
5148
5149#ifdef CONFIG_BTRFS_DEBUG
5150 for (i = 0; i < num_pages; i++)
5151 ASSERT(PageDirty(eb->pages[i]));
5152#endif
5153
5154 return was_dirty;
5155}
5156
5157void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5158{
5159 int i;
5160 struct page *page;
5161 int num_pages;
5162
5163 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5164 num_pages = num_extent_pages(eb);
5165 for (i = 0; i < num_pages; i++) {
5166 page = eb->pages[i];
5167 if (page)
5168 ClearPageUptodate(page);
5169 }
5170}
5171
5172void set_extent_buffer_uptodate(struct extent_buffer *eb)
5173{
5174 int i;
5175 struct page *page;
5176 int num_pages;
5177
5178 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5179 num_pages = num_extent_pages(eb);
5180 for (i = 0; i < num_pages; i++) {
5181 page = eb->pages[i];
5182 SetPageUptodate(page);
5183 }
5184}
5185
5186int read_extent_buffer_pages(struct extent_io_tree *tree,
5187 struct extent_buffer *eb, int wait, int mirror_num)
5188{
5189 int i;
5190 struct page *page;
5191 int err;
5192 int ret = 0;
5193 int locked_pages = 0;
5194 int all_uptodate = 1;
5195 int num_pages;
5196 unsigned long num_reads = 0;
5197 struct bio *bio = NULL;
5198 unsigned long bio_flags = 0;
5199
5200 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5201 return 0;
5202
5203 num_pages = num_extent_pages(eb);
5204 for (i = 0; i < num_pages; i++) {
5205 page = eb->pages[i];
5206 if (wait == WAIT_NONE) {
5207 if (!trylock_page(page))
5208 goto unlock_exit;
5209 } else {
5210 lock_page(page);
5211 }
5212 locked_pages++;
5213 }
5214
5215
5216
5217
5218
5219 for (i = 0; i < num_pages; i++) {
5220 page = eb->pages[i];
5221 if (!PageUptodate(page)) {
5222 num_reads++;
5223 all_uptodate = 0;
5224 }
5225 }
5226
5227 if (all_uptodate) {
5228 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5229 goto unlock_exit;
5230 }
5231
5232 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5233 eb->read_mirror = 0;
5234 atomic_set(&eb->io_pages, num_reads);
5235 for (i = 0; i < num_pages; i++) {
5236 page = eb->pages[i];
5237
5238 if (!PageUptodate(page)) {
5239 if (ret) {
5240 atomic_dec(&eb->io_pages);
5241 unlock_page(page);
5242 continue;
5243 }
5244
5245 ClearPageError(page);
5246 err = __extent_read_full_page(tree, page,
5247 btree_get_extent, &bio,
5248 mirror_num, &bio_flags,
5249 REQ_META);
5250 if (err) {
5251 ret = err;
5252
5253
5254
5255
5256
5257
5258
5259
5260 atomic_dec(&eb->io_pages);
5261 }
5262 } else {
5263 unlock_page(page);
5264 }
5265 }
5266
5267 if (bio) {
5268 err = submit_one_bio(bio, mirror_num, bio_flags);
5269 if (err)
5270 return err;
5271 }
5272
5273 if (ret || wait != WAIT_COMPLETE)
5274 return ret;
5275
5276 for (i = 0; i < num_pages; i++) {
5277 page = eb->pages[i];
5278 wait_on_page_locked(page);
5279 if (!PageUptodate(page))
5280 ret = -EIO;
5281 }
5282
5283 return ret;
5284
5285unlock_exit:
5286 while (locked_pages > 0) {
5287 locked_pages--;
5288 page = eb->pages[locked_pages];
5289 unlock_page(page);
5290 }
5291 return ret;
5292}
5293
5294void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5295 unsigned long start, unsigned long len)
5296{
5297 size_t cur;
5298 size_t offset;
5299 struct page *page;
5300 char *kaddr;
5301 char *dst = (char *)dstv;
5302 size_t start_offset = offset_in_page(eb->start);
5303 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5304
5305 if (start + len > eb->len) {
5306 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5307 eb->start, eb->len, start, len);
5308 memset(dst, 0, len);
5309 return;
5310 }
5311
5312 offset = offset_in_page(start_offset + start);
5313
5314 while (len > 0) {
5315 page = eb->pages[i];
5316
5317 cur = min(len, (PAGE_SIZE - offset));
5318 kaddr = page_address(page);
5319 memcpy(dst, kaddr + offset, cur);
5320
5321 dst += cur;
5322 len -= cur;
5323 offset = 0;
5324 i++;
5325 }
5326}
5327
5328int read_extent_buffer_to_user(const struct extent_buffer *eb,
5329 void __user *dstv,
5330 unsigned long start, unsigned long len)
5331{
5332 size_t cur;
5333 size_t offset;
5334 struct page *page;
5335 char *kaddr;
5336 char __user *dst = (char __user *)dstv;
5337 size_t start_offset = offset_in_page(eb->start);
5338 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5339 int ret = 0;
5340
5341 WARN_ON(start > eb->len);
5342 WARN_ON(start + len > eb->start + eb->len);
5343
5344 offset = offset_in_page(start_offset + start);
5345
5346 while (len > 0) {
5347 page = eb->pages[i];
5348
5349 cur = min(len, (PAGE_SIZE - offset));
5350 kaddr = page_address(page);
5351 if (copy_to_user(dst, kaddr + offset, cur)) {
5352 ret = -EFAULT;
5353 break;
5354 }
5355
5356 dst += cur;
5357 len -= cur;
5358 offset = 0;
5359 i++;
5360 }
5361
5362 return ret;
5363}
5364
5365
5366
5367
5368
5369
5370int map_private_extent_buffer(const struct extent_buffer *eb,
5371 unsigned long start, unsigned long min_len,
5372 char **map, unsigned long *map_start,
5373 unsigned long *map_len)
5374{
5375 size_t offset;
5376 char *kaddr;
5377 struct page *p;
5378 size_t start_offset = offset_in_page(eb->start);
5379 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5380 unsigned long end_i = (start_offset + start + min_len - 1) >>
5381 PAGE_SHIFT;
5382
5383 if (start + min_len > eb->len) {
5384 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5385 eb->start, eb->len, start, min_len);
5386 return -EINVAL;
5387 }
5388
5389 if (i != end_i)
5390 return 1;
5391
5392 if (i == 0) {
5393 offset = start_offset;
5394 *map_start = 0;
5395 } else {
5396 offset = 0;
5397 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5398 }
5399
5400 p = eb->pages[i];
5401 kaddr = page_address(p);
5402 *map = kaddr + offset;
5403 *map_len = PAGE_SIZE - offset;
5404 return 0;
5405}
5406
5407int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5408 unsigned long start, unsigned long len)
5409{
5410 size_t cur;
5411 size_t offset;
5412 struct page *page;
5413 char *kaddr;
5414 char *ptr = (char *)ptrv;
5415 size_t start_offset = offset_in_page(eb->start);
5416 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5417 int ret = 0;
5418
5419 WARN_ON(start > eb->len);
5420 WARN_ON(start + len > eb->start + eb->len);
5421
5422 offset = offset_in_page(start_offset + start);
5423
5424 while (len > 0) {
5425 page = eb->pages[i];
5426
5427 cur = min(len, (PAGE_SIZE - offset));
5428
5429 kaddr = page_address(page);
5430 ret = memcmp(ptr, kaddr + offset, cur);
5431 if (ret)
5432 break;
5433
5434 ptr += cur;
5435 len -= cur;
5436 offset = 0;
5437 i++;
5438 }
5439 return ret;
5440}
5441
5442void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5443 const void *srcv)
5444{
5445 char *kaddr;
5446
5447 WARN_ON(!PageUptodate(eb->pages[0]));
5448 kaddr = page_address(eb->pages[0]);
5449 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5450 BTRFS_FSID_SIZE);
5451}
5452
5453void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5454{
5455 char *kaddr;
5456
5457 WARN_ON(!PageUptodate(eb->pages[0]));
5458 kaddr = page_address(eb->pages[0]);
5459 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5460 BTRFS_FSID_SIZE);
5461}
5462
5463void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5464 unsigned long start, unsigned long len)
5465{
5466 size_t cur;
5467 size_t offset;
5468 struct page *page;
5469 char *kaddr;
5470 char *src = (char *)srcv;
5471 size_t start_offset = offset_in_page(eb->start);
5472 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5473
5474 WARN_ON(start > eb->len);
5475 WARN_ON(start + len > eb->start + eb->len);
5476
5477 offset = offset_in_page(start_offset + start);
5478
5479 while (len > 0) {
5480 page = eb->pages[i];
5481 WARN_ON(!PageUptodate(page));
5482
5483 cur = min(len, PAGE_SIZE - offset);
5484 kaddr = page_address(page);
5485 memcpy(kaddr + offset, src, cur);
5486
5487 src += cur;
5488 len -= cur;
5489 offset = 0;
5490 i++;
5491 }
5492}
5493
5494void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5495 unsigned long len)
5496{
5497 size_t cur;
5498 size_t offset;
5499 struct page *page;
5500 char *kaddr;
5501 size_t start_offset = offset_in_page(eb->start);
5502 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5503
5504 WARN_ON(start > eb->len);
5505 WARN_ON(start + len > eb->start + eb->len);
5506
5507 offset = offset_in_page(start_offset + start);
5508
5509 while (len > 0) {
5510 page = eb->pages[i];
5511 WARN_ON(!PageUptodate(page));
5512
5513 cur = min(len, PAGE_SIZE - offset);
5514 kaddr = page_address(page);
5515 memset(kaddr + offset, 0, cur);
5516
5517 len -= cur;
5518 offset = 0;
5519 i++;
5520 }
5521}
5522
5523void copy_extent_buffer_full(struct extent_buffer *dst,
5524 struct extent_buffer *src)
5525{
5526 int i;
5527 int num_pages;
5528
5529 ASSERT(dst->len == src->len);
5530
5531 num_pages = num_extent_pages(dst);
5532 for (i = 0; i < num_pages; i++)
5533 copy_page(page_address(dst->pages[i]),
5534 page_address(src->pages[i]));
5535}
5536
5537void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5538 unsigned long dst_offset, unsigned long src_offset,
5539 unsigned long len)
5540{
5541 u64 dst_len = dst->len;
5542 size_t cur;
5543 size_t offset;
5544 struct page *page;
5545 char *kaddr;
5546 size_t start_offset = offset_in_page(dst->start);
5547 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5548
5549 WARN_ON(src->len != dst_len);
5550
5551 offset = offset_in_page(start_offset + dst_offset);
5552
5553 while (len > 0) {
5554 page = dst->pages[i];
5555 WARN_ON(!PageUptodate(page));
5556
5557 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5558
5559 kaddr = page_address(page);
5560 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5561
5562 src_offset += cur;
5563 len -= cur;
5564 offset = 0;
5565 i++;
5566 }
5567}
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582static inline void eb_bitmap_offset(struct extent_buffer *eb,
5583 unsigned long start, unsigned long nr,
5584 unsigned long *page_index,
5585 size_t *page_offset)
5586{
5587 size_t start_offset = offset_in_page(eb->start);
5588 size_t byte_offset = BIT_BYTE(nr);
5589 size_t offset;
5590
5591
5592
5593
5594
5595
5596 offset = start_offset + start + byte_offset;
5597
5598 *page_index = offset >> PAGE_SHIFT;
5599 *page_offset = offset_in_page(offset);
5600}
5601
5602
5603
5604
5605
5606
5607
5608int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5609 unsigned long nr)
5610{
5611 u8 *kaddr;
5612 struct page *page;
5613 unsigned long i;
5614 size_t offset;
5615
5616 eb_bitmap_offset(eb, start, nr, &i, &offset);
5617 page = eb->pages[i];
5618 WARN_ON(!PageUptodate(page));
5619 kaddr = page_address(page);
5620 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5621}
5622
5623
5624
5625
5626
5627
5628
5629
5630void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5631 unsigned long pos, unsigned long len)
5632{
5633 u8 *kaddr;
5634 struct page *page;
5635 unsigned long i;
5636 size_t offset;
5637 const unsigned int size = pos + len;
5638 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5639 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5640
5641 eb_bitmap_offset(eb, start, pos, &i, &offset);
5642 page = eb->pages[i];
5643 WARN_ON(!PageUptodate(page));
5644 kaddr = page_address(page);
5645
5646 while (len >= bits_to_set) {
5647 kaddr[offset] |= mask_to_set;
5648 len -= bits_to_set;
5649 bits_to_set = BITS_PER_BYTE;
5650 mask_to_set = ~0;
5651 if (++offset >= PAGE_SIZE && len > 0) {
5652 offset = 0;
5653 page = eb->pages[++i];
5654 WARN_ON(!PageUptodate(page));
5655 kaddr = page_address(page);
5656 }
5657 }
5658 if (len) {
5659 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5660 kaddr[offset] |= mask_to_set;
5661 }
5662}
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5673 unsigned long pos, unsigned long len)
5674{
5675 u8 *kaddr;
5676 struct page *page;
5677 unsigned long i;
5678 size_t offset;
5679 const unsigned int size = pos + len;
5680 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5681 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5682
5683 eb_bitmap_offset(eb, start, pos, &i, &offset);
5684 page = eb->pages[i];
5685 WARN_ON(!PageUptodate(page));
5686 kaddr = page_address(page);
5687
5688 while (len >= bits_to_clear) {
5689 kaddr[offset] &= ~mask_to_clear;
5690 len -= bits_to_clear;
5691 bits_to_clear = BITS_PER_BYTE;
5692 mask_to_clear = ~0;
5693 if (++offset >= PAGE_SIZE && len > 0) {
5694 offset = 0;
5695 page = eb->pages[++i];
5696 WARN_ON(!PageUptodate(page));
5697 kaddr = page_address(page);
5698 }
5699 }
5700 if (len) {
5701 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5702 kaddr[offset] &= ~mask_to_clear;
5703 }
5704}
5705
5706static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5707{
5708 unsigned long distance = (src > dst) ? src - dst : dst - src;
5709 return distance < len;
5710}
5711
5712static void copy_pages(struct page *dst_page, struct page *src_page,
5713 unsigned long dst_off, unsigned long src_off,
5714 unsigned long len)
5715{
5716 char *dst_kaddr = page_address(dst_page);
5717 char *src_kaddr;
5718 int must_memmove = 0;
5719
5720 if (dst_page != src_page) {
5721 src_kaddr = page_address(src_page);
5722 } else {
5723 src_kaddr = dst_kaddr;
5724 if (areas_overlap(src_off, dst_off, len))
5725 must_memmove = 1;
5726 }
5727
5728 if (must_memmove)
5729 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5730 else
5731 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5732}
5733
5734void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5735 unsigned long src_offset, unsigned long len)
5736{
5737 struct btrfs_fs_info *fs_info = dst->fs_info;
5738 size_t cur;
5739 size_t dst_off_in_page;
5740 size_t src_off_in_page;
5741 size_t start_offset = offset_in_page(dst->start);
5742 unsigned long dst_i;
5743 unsigned long src_i;
5744
5745 if (src_offset + len > dst->len) {
5746 btrfs_err(fs_info,
5747 "memmove bogus src_offset %lu move len %lu dst len %lu",
5748 src_offset, len, dst->len);
5749 BUG_ON(1);
5750 }
5751 if (dst_offset + len > dst->len) {
5752 btrfs_err(fs_info,
5753 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5754 dst_offset, len, dst->len);
5755 BUG_ON(1);
5756 }
5757
5758 while (len > 0) {
5759 dst_off_in_page = offset_in_page(start_offset + dst_offset);
5760 src_off_in_page = offset_in_page(start_offset + src_offset);
5761
5762 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5763 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5764
5765 cur = min(len, (unsigned long)(PAGE_SIZE -
5766 src_off_in_page));
5767 cur = min_t(unsigned long, cur,
5768 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5769
5770 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5771 dst_off_in_page, src_off_in_page, cur);
5772
5773 src_offset += cur;
5774 dst_offset += cur;
5775 len -= cur;
5776 }
5777}
5778
5779void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5780 unsigned long src_offset, unsigned long len)
5781{
5782 struct btrfs_fs_info *fs_info = dst->fs_info;
5783 size_t cur;
5784 size_t dst_off_in_page;
5785 size_t src_off_in_page;
5786 unsigned long dst_end = dst_offset + len - 1;
5787 unsigned long src_end = src_offset + len - 1;
5788 size_t start_offset = offset_in_page(dst->start);
5789 unsigned long dst_i;
5790 unsigned long src_i;
5791
5792 if (src_offset + len > dst->len) {
5793 btrfs_err(fs_info,
5794 "memmove bogus src_offset %lu move len %lu len %lu",
5795 src_offset, len, dst->len);
5796 BUG_ON(1);
5797 }
5798 if (dst_offset + len > dst->len) {
5799 btrfs_err(fs_info,
5800 "memmove bogus dst_offset %lu move len %lu len %lu",
5801 dst_offset, len, dst->len);
5802 BUG_ON(1);
5803 }
5804 if (dst_offset < src_offset) {
5805 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5806 return;
5807 }
5808 while (len > 0) {
5809 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5810 src_i = (start_offset + src_end) >> PAGE_SHIFT;
5811
5812 dst_off_in_page = offset_in_page(start_offset + dst_end);
5813 src_off_in_page = offset_in_page(start_offset + src_end);
5814
5815 cur = min_t(unsigned long, len, src_off_in_page + 1);
5816 cur = min(cur, dst_off_in_page + 1);
5817 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5818 dst_off_in_page - cur + 1,
5819 src_off_in_page - cur + 1, cur);
5820
5821 dst_end -= cur;
5822 src_end -= cur;
5823 len -= cur;
5824 }
5825}
5826
5827int try_release_extent_buffer(struct page *page)
5828{
5829 struct extent_buffer *eb;
5830
5831
5832
5833
5834
5835 spin_lock(&page->mapping->private_lock);
5836 if (!PagePrivate(page)) {
5837 spin_unlock(&page->mapping->private_lock);
5838 return 1;
5839 }
5840
5841 eb = (struct extent_buffer *)page->private;
5842 BUG_ON(!eb);
5843
5844
5845
5846
5847
5848
5849 spin_lock(&eb->refs_lock);
5850 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5851 spin_unlock(&eb->refs_lock);
5852 spin_unlock(&page->mapping->private_lock);
5853 return 0;
5854 }
5855 spin_unlock(&page->mapping->private_lock);
5856
5857
5858
5859
5860
5861 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5862 spin_unlock(&eb->refs_lock);
5863 return 0;
5864 }
5865
5866 return release_extent_buffer(eb);
5867}
5868