1
2
3#include <linux/bitops.h>
4#include <linux/slab.h>
5#include <linux/bio.h>
6#include <linux/mm.h>
7#include <linux/pagemap.h>
8#include <linux/page-flags.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include <linux/prefetch.h>
15#include <linux/cleancache.h>
16#include "extent_io.h"
17#include "extent_map.h"
18#include "ctree.h"
19#include "btrfs_inode.h"
20#include "volumes.h"
21#include "check-integrity.h"
22#include "locking.h"
23#include "rcu-string.h"
24#include "backref.h"
25#include "disk-io.h"
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29static struct bio_set btrfs_bioset;
30
31static inline bool extent_state_in_tree(const struct extent_state *state)
32{
33 return !RB_EMPTY_NODE(&state->rb_node);
34}
35
36#ifdef CONFIG_BTRFS_DEBUG
37static LIST_HEAD(buffers);
38static LIST_HEAD(states);
39
40static DEFINE_SPINLOCK(leak_lock);
41
42static inline
43void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
44{
45 unsigned long flags;
46
47 spin_lock_irqsave(&leak_lock, flags);
48 list_add(new, head);
49 spin_unlock_irqrestore(&leak_lock, flags);
50}
51
52static inline
53void btrfs_leak_debug_del(struct list_head *entry)
54{
55 unsigned long flags;
56
57 spin_lock_irqsave(&leak_lock, flags);
58 list_del(entry);
59 spin_unlock_irqrestore(&leak_lock, flags);
60}
61
62static inline
63void btrfs_leak_debug_check(void)
64{
65 struct extent_state *state;
66 struct extent_buffer *eb;
67
68 while (!list_empty(&states)) {
69 state = list_entry(states.next, struct extent_state, leak_list);
70 pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
71 state->start, state->end, state->state,
72 extent_state_in_tree(state),
73 refcount_read(&state->refs));
74 list_del(&state->leak_list);
75 kmem_cache_free(extent_state_cache, state);
76 }
77
78 while (!list_empty(&buffers)) {
79 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
80 pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
81 eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
85}
86
87#define btrfs_debug_check_extent_io_range(tree, start, end) \
88 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
89static inline void __btrfs_debug_check_extent_io_range(const char *caller,
90 struct extent_io_tree *tree, u64 start, u64 end)
91{
92 if (tree->ops && tree->ops->check_extent_io_range)
93 tree->ops->check_extent_io_range(tree->private_data, caller,
94 start, end);
95}
96#else
97#define btrfs_leak_debug_add(new, head) do {} while (0)
98#define btrfs_leak_debug_del(entry) do {} while (0)
99#define btrfs_leak_debug_check() do {} while (0)
100#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
101#endif
102
103#define BUFFER_LRU_MAX 64
104
105struct tree_entry {
106 u64 start;
107 u64 end;
108 struct rb_node rb_node;
109};
110
111struct extent_page_data {
112 struct bio *bio;
113 struct extent_io_tree *tree;
114
115
116
117 unsigned int extent_locked:1;
118
119
120 unsigned int sync_io:1;
121};
122
123static int add_extent_changeset(struct extent_state *state, unsigned bits,
124 struct extent_changeset *changeset,
125 int set)
126{
127 int ret;
128
129 if (!changeset)
130 return 0;
131 if (set && (state->state & bits) == bits)
132 return 0;
133 if (!set && (state->state & bits) == 0)
134 return 0;
135 changeset->bytes_changed += state->end - state->start + 1;
136 ret = ulist_add(&changeset->range_changed, state->start, state->end,
137 GFP_ATOMIC);
138 return ret;
139}
140
141static void flush_write_bio(struct extent_page_data *epd);
142
143int __init extent_io_init(void)
144{
145 extent_state_cache = kmem_cache_create("btrfs_extent_state",
146 sizeof(struct extent_state), 0,
147 SLAB_MEM_SPREAD, NULL);
148 if (!extent_state_cache)
149 return -ENOMEM;
150
151 extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
152 sizeof(struct extent_buffer), 0,
153 SLAB_MEM_SPREAD, NULL);
154 if (!extent_buffer_cache)
155 goto free_state_cache;
156
157 if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
158 offsetof(struct btrfs_io_bio, bio),
159 BIOSET_NEED_BVECS))
160 goto free_buffer_cache;
161
162 if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
163 goto free_bioset;
164
165 return 0;
166
167free_bioset:
168 bioset_exit(&btrfs_bioset);
169
170free_buffer_cache:
171 kmem_cache_destroy(extent_buffer_cache);
172 extent_buffer_cache = NULL;
173
174free_state_cache:
175 kmem_cache_destroy(extent_state_cache);
176 extent_state_cache = NULL;
177 return -ENOMEM;
178}
179
180void __cold extent_io_exit(void)
181{
182 btrfs_leak_debug_check();
183
184
185
186
187
188 rcu_barrier();
189 kmem_cache_destroy(extent_state_cache);
190 kmem_cache_destroy(extent_buffer_cache);
191 bioset_exit(&btrfs_bioset);
192}
193
194void extent_io_tree_init(struct extent_io_tree *tree,
195 void *private_data)
196{
197 tree->state = RB_ROOT;
198 tree->ops = NULL;
199 tree->dirty_bytes = 0;
200 spin_lock_init(&tree->lock);
201 tree->private_data = private_data;
202}
203
204static struct extent_state *alloc_extent_state(gfp_t mask)
205{
206 struct extent_state *state;
207
208
209
210
211
212 mask &= ~(__GFP_DMA32|__GFP_HIGHMEM);
213 state = kmem_cache_alloc(extent_state_cache, mask);
214 if (!state)
215 return state;
216 state->state = 0;
217 state->failrec = NULL;
218 RB_CLEAR_NODE(&state->rb_node);
219 btrfs_leak_debug_add(&state->leak_list, &states);
220 refcount_set(&state->refs, 1);
221 init_waitqueue_head(&state->wq);
222 trace_alloc_extent_state(state, mask, _RET_IP_);
223 return state;
224}
225
226void free_extent_state(struct extent_state *state)
227{
228 if (!state)
229 return;
230 if (refcount_dec_and_test(&state->refs)) {
231 WARN_ON(extent_state_in_tree(state));
232 btrfs_leak_debug_del(&state->leak_list);
233 trace_free_extent_state(state, _RET_IP_);
234 kmem_cache_free(extent_state_cache, state);
235 }
236}
237
238static struct rb_node *tree_insert(struct rb_root *root,
239 struct rb_node *search_start,
240 u64 offset,
241 struct rb_node *node,
242 struct rb_node ***p_in,
243 struct rb_node **parent_in)
244{
245 struct rb_node **p;
246 struct rb_node *parent = NULL;
247 struct tree_entry *entry;
248
249 if (p_in && parent_in) {
250 p = *p_in;
251 parent = *parent_in;
252 goto do_insert;
253 }
254
255 p = search_start ? &search_start : &root->rb_node;
256 while (*p) {
257 parent = *p;
258 entry = rb_entry(parent, struct tree_entry, rb_node);
259
260 if (offset < entry->start)
261 p = &(*p)->rb_left;
262 else if (offset > entry->end)
263 p = &(*p)->rb_right;
264 else
265 return parent;
266 }
267
268do_insert:
269 rb_link_node(node, parent, p);
270 rb_insert_color(node, root);
271 return NULL;
272}
273
274static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
275 struct rb_node **prev_ret,
276 struct rb_node **next_ret,
277 struct rb_node ***p_ret,
278 struct rb_node **parent_ret)
279{
280 struct rb_root *root = &tree->state;
281 struct rb_node **n = &root->rb_node;
282 struct rb_node *prev = NULL;
283 struct rb_node *orig_prev = NULL;
284 struct tree_entry *entry;
285 struct tree_entry *prev_entry = NULL;
286
287 while (*n) {
288 prev = *n;
289 entry = rb_entry(prev, struct tree_entry, rb_node);
290 prev_entry = entry;
291
292 if (offset < entry->start)
293 n = &(*n)->rb_left;
294 else if (offset > entry->end)
295 n = &(*n)->rb_right;
296 else
297 return *n;
298 }
299
300 if (p_ret)
301 *p_ret = n;
302 if (parent_ret)
303 *parent_ret = prev;
304
305 if (prev_ret) {
306 orig_prev = prev;
307 while (prev && offset > prev_entry->end) {
308 prev = rb_next(prev);
309 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
310 }
311 *prev_ret = prev;
312 prev = orig_prev;
313 }
314
315 if (next_ret) {
316 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
317 while (prev && offset < prev_entry->start) {
318 prev = rb_prev(prev);
319 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
320 }
321 *next_ret = prev;
322 }
323 return NULL;
324}
325
326static inline struct rb_node *
327tree_search_for_insert(struct extent_io_tree *tree,
328 u64 offset,
329 struct rb_node ***p_ret,
330 struct rb_node **parent_ret)
331{
332 struct rb_node *prev = NULL;
333 struct rb_node *ret;
334
335 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
336 if (!ret)
337 return prev;
338 return ret;
339}
340
341static inline struct rb_node *tree_search(struct extent_io_tree *tree,
342 u64 offset)
343{
344 return tree_search_for_insert(tree, offset, NULL, NULL);
345}
346
347static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
348 struct extent_state *other)
349{
350 if (tree->ops && tree->ops->merge_extent_hook)
351 tree->ops->merge_extent_hook(tree->private_data, new, other);
352}
353
354
355
356
357
358
359
360
361
362
363static void merge_state(struct extent_io_tree *tree,
364 struct extent_state *state)
365{
366 struct extent_state *other;
367 struct rb_node *other_node;
368
369 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
370 return;
371
372 other_node = rb_prev(&state->rb_node);
373 if (other_node) {
374 other = rb_entry(other_node, struct extent_state, rb_node);
375 if (other->end == state->start - 1 &&
376 other->state == state->state) {
377 merge_cb(tree, state, other);
378 state->start = other->start;
379 rb_erase(&other->rb_node, &tree->state);
380 RB_CLEAR_NODE(&other->rb_node);
381 free_extent_state(other);
382 }
383 }
384 other_node = rb_next(&state->rb_node);
385 if (other_node) {
386 other = rb_entry(other_node, struct extent_state, rb_node);
387 if (other->start == state->end + 1 &&
388 other->state == state->state) {
389 merge_cb(tree, state, other);
390 state->end = other->end;
391 rb_erase(&other->rb_node, &tree->state);
392 RB_CLEAR_NODE(&other->rb_node);
393 free_extent_state(other);
394 }
395 }
396}
397
398static void set_state_cb(struct extent_io_tree *tree,
399 struct extent_state *state, unsigned *bits)
400{
401 if (tree->ops && tree->ops->set_bit_hook)
402 tree->ops->set_bit_hook(tree->private_data, state, bits);
403}
404
405static void clear_state_cb(struct extent_io_tree *tree,
406 struct extent_state *state, unsigned *bits)
407{
408 if (tree->ops && tree->ops->clear_bit_hook)
409 tree->ops->clear_bit_hook(tree->private_data, state, bits);
410}
411
412static void set_state_bits(struct extent_io_tree *tree,
413 struct extent_state *state, unsigned *bits,
414 struct extent_changeset *changeset);
415
416
417
418
419
420
421
422
423
424
425
426static int insert_state(struct extent_io_tree *tree,
427 struct extent_state *state, u64 start, u64 end,
428 struct rb_node ***p,
429 struct rb_node **parent,
430 unsigned *bits, struct extent_changeset *changeset)
431{
432 struct rb_node *node;
433
434 if (end < start)
435 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
436 end, start);
437 state->start = start;
438 state->end = end;
439
440 set_state_bits(tree, state, bits, changeset);
441
442 node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
443 if (node) {
444 struct extent_state *found;
445 found = rb_entry(node, struct extent_state, rb_node);
446 pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
447 found->start, found->end, start, end);
448 return -EEXIST;
449 }
450 merge_state(tree, state);
451 return 0;
452}
453
454static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
455 u64 split)
456{
457 if (tree->ops && tree->ops->split_extent_hook)
458 tree->ops->split_extent_hook(tree->private_data, orig, split);
459}
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
476 struct extent_state *prealloc, u64 split)
477{
478 struct rb_node *node;
479
480 split_cb(tree, orig, split);
481
482 prealloc->start = orig->start;
483 prealloc->end = split - 1;
484 prealloc->state = orig->state;
485 orig->start = split;
486
487 node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
488 &prealloc->rb_node, NULL, NULL);
489 if (node) {
490 free_extent_state(prealloc);
491 return -EEXIST;
492 }
493 return 0;
494}
495
496static struct extent_state *next_state(struct extent_state *state)
497{
498 struct rb_node *next = rb_next(&state->rb_node);
499 if (next)
500 return rb_entry(next, struct extent_state, rb_node);
501 else
502 return NULL;
503}
504
505
506
507
508
509
510
511
512static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
513 struct extent_state *state,
514 unsigned *bits, int wake,
515 struct extent_changeset *changeset)
516{
517 struct extent_state *next;
518 unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS;
519 int ret;
520
521 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
522 u64 range = state->end - state->start + 1;
523 WARN_ON(range > tree->dirty_bytes);
524 tree->dirty_bytes -= range;
525 }
526 clear_state_cb(tree, state, bits);
527 ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
528 BUG_ON(ret < 0);
529 state->state &= ~bits_to_clear;
530 if (wake)
531 wake_up(&state->wq);
532 if (state->state == 0) {
533 next = next_state(state);
534 if (extent_state_in_tree(state)) {
535 rb_erase(&state->rb_node, &tree->state);
536 RB_CLEAR_NODE(&state->rb_node);
537 free_extent_state(state);
538 } else {
539 WARN_ON(1);
540 }
541 } else {
542 merge_state(tree, state);
543 next = next_state(state);
544 }
545 return next;
546}
547
548static struct extent_state *
549alloc_extent_state_atomic(struct extent_state *prealloc)
550{
551 if (!prealloc)
552 prealloc = alloc_extent_state(GFP_ATOMIC);
553
554 return prealloc;
555}
556
557static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
558{
559 struct inode *inode = tree->private_data;
560
561 btrfs_panic(btrfs_sb(inode->i_sb), err,
562 "locking error: extent tree was modified by another thread while locked");
563}
564
565
566
567
568
569
570
571
572
573
574
575
576
577int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
578 unsigned bits, int wake, int delete,
579 struct extent_state **cached_state,
580 gfp_t mask, struct extent_changeset *changeset)
581{
582 struct extent_state *state;
583 struct extent_state *cached;
584 struct extent_state *prealloc = NULL;
585 struct rb_node *node;
586 u64 last_end;
587 int err;
588 int clear = 0;
589
590 btrfs_debug_check_extent_io_range(tree, start, end);
591
592 if (bits & EXTENT_DELALLOC)
593 bits |= EXTENT_NORESERVE;
594
595 if (delete)
596 bits |= ~EXTENT_CTLBITS;
597 bits |= EXTENT_FIRST_DELALLOC;
598
599 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
600 clear = 1;
601again:
602 if (!prealloc && gfpflags_allow_blocking(mask)) {
603
604
605
606
607
608
609
610 prealloc = alloc_extent_state(mask);
611 }
612
613 spin_lock(&tree->lock);
614 if (cached_state) {
615 cached = *cached_state;
616
617 if (clear) {
618 *cached_state = NULL;
619 cached_state = NULL;
620 }
621
622 if (cached && extent_state_in_tree(cached) &&
623 cached->start <= start && cached->end > start) {
624 if (clear)
625 refcount_dec(&cached->refs);
626 state = cached;
627 goto hit_next;
628 }
629 if (clear)
630 free_extent_state(cached);
631 }
632
633
634
635
636 node = tree_search(tree, start);
637 if (!node)
638 goto out;
639 state = rb_entry(node, struct extent_state, rb_node);
640hit_next:
641 if (state->start > end)
642 goto out;
643 WARN_ON(state->end < start);
644 last_end = state->end;
645
646
647 if (!(state->state & bits)) {
648 state = next_state(state);
649 goto next;
650 }
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668 if (state->start < start) {
669 prealloc = alloc_extent_state_atomic(prealloc);
670 BUG_ON(!prealloc);
671 err = split_state(tree, state, prealloc, start);
672 if (err)
673 extent_io_tree_panic(tree, err);
674
675 prealloc = NULL;
676 if (err)
677 goto out;
678 if (state->end <= end) {
679 state = clear_state_bit(tree, state, &bits, wake,
680 changeset);
681 goto next;
682 }
683 goto search_again;
684 }
685
686
687
688
689
690
691 if (state->start <= end && state->end > end) {
692 prealloc = alloc_extent_state_atomic(prealloc);
693 BUG_ON(!prealloc);
694 err = split_state(tree, state, prealloc, end + 1);
695 if (err)
696 extent_io_tree_panic(tree, err);
697
698 if (wake)
699 wake_up(&state->wq);
700
701 clear_state_bit(tree, prealloc, &bits, wake, changeset);
702
703 prealloc = NULL;
704 goto out;
705 }
706
707 state = clear_state_bit(tree, state, &bits, wake, changeset);
708next:
709 if (last_end == (u64)-1)
710 goto out;
711 start = last_end + 1;
712 if (start <= end && state && !need_resched())
713 goto hit_next;
714
715search_again:
716 if (start > end)
717 goto out;
718 spin_unlock(&tree->lock);
719 if (gfpflags_allow_blocking(mask))
720 cond_resched();
721 goto again;
722
723out:
724 spin_unlock(&tree->lock);
725 if (prealloc)
726 free_extent_state(prealloc);
727
728 return 0;
729
730}
731
732static void wait_on_state(struct extent_io_tree *tree,
733 struct extent_state *state)
734 __releases(tree->lock)
735 __acquires(tree->lock)
736{
737 DEFINE_WAIT(wait);
738 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
739 spin_unlock(&tree->lock);
740 schedule();
741 spin_lock(&tree->lock);
742 finish_wait(&state->wq, &wait);
743}
744
745
746
747
748
749
750static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
751 unsigned long bits)
752{
753 struct extent_state *state;
754 struct rb_node *node;
755
756 btrfs_debug_check_extent_io_range(tree, start, end);
757
758 spin_lock(&tree->lock);
759again:
760 while (1) {
761
762
763
764
765 node = tree_search(tree, start);
766process_node:
767 if (!node)
768 break;
769
770 state = rb_entry(node, struct extent_state, rb_node);
771
772 if (state->start > end)
773 goto out;
774
775 if (state->state & bits) {
776 start = state->start;
777 refcount_inc(&state->refs);
778 wait_on_state(tree, state);
779 free_extent_state(state);
780 goto again;
781 }
782 start = state->end + 1;
783
784 if (start > end)
785 break;
786
787 if (!cond_resched_lock(&tree->lock)) {
788 node = rb_next(node);
789 goto process_node;
790 }
791 }
792out:
793 spin_unlock(&tree->lock);
794}
795
796static void set_state_bits(struct extent_io_tree *tree,
797 struct extent_state *state,
798 unsigned *bits, struct extent_changeset *changeset)
799{
800 unsigned bits_to_set = *bits & ~EXTENT_CTLBITS;
801 int ret;
802
803 set_state_cb(tree, state, bits);
804 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
805 u64 range = state->end - state->start + 1;
806 tree->dirty_bytes += range;
807 }
808 ret = add_extent_changeset(state, bits_to_set, changeset, 1);
809 BUG_ON(ret < 0);
810 state->state |= bits_to_set;
811}
812
813static void cache_state_if_flags(struct extent_state *state,
814 struct extent_state **cached_ptr,
815 unsigned flags)
816{
817 if (cached_ptr && !(*cached_ptr)) {
818 if (!flags || (state->state & flags)) {
819 *cached_ptr = state;
820 refcount_inc(&state->refs);
821 }
822 }
823}
824
825static void cache_state(struct extent_state *state,
826 struct extent_state **cached_ptr)
827{
828 return cache_state_if_flags(state, cached_ptr,
829 EXTENT_IOBITS | EXTENT_BOUNDARY);
830}
831
832
833
834
835
836
837
838
839
840
841
842
843static int __must_check
844__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
845 unsigned bits, unsigned exclusive_bits,
846 u64 *failed_start, struct extent_state **cached_state,
847 gfp_t mask, struct extent_changeset *changeset)
848{
849 struct extent_state *state;
850 struct extent_state *prealloc = NULL;
851 struct rb_node *node;
852 struct rb_node **p;
853 struct rb_node *parent;
854 int err = 0;
855 u64 last_start;
856 u64 last_end;
857
858 btrfs_debug_check_extent_io_range(tree, start, end);
859
860 bits |= EXTENT_FIRST_DELALLOC;
861again:
862 if (!prealloc && gfpflags_allow_blocking(mask)) {
863
864
865
866
867
868
869
870 prealloc = alloc_extent_state(mask);
871 }
872
873 spin_lock(&tree->lock);
874 if (cached_state && *cached_state) {
875 state = *cached_state;
876 if (state->start <= start && state->end > start &&
877 extent_state_in_tree(state)) {
878 node = &state->rb_node;
879 goto hit_next;
880 }
881 }
882
883
884
885
886 node = tree_search_for_insert(tree, start, &p, &parent);
887 if (!node) {
888 prealloc = alloc_extent_state_atomic(prealloc);
889 BUG_ON(!prealloc);
890 err = insert_state(tree, prealloc, start, end,
891 &p, &parent, &bits, changeset);
892 if (err)
893 extent_io_tree_panic(tree, err);
894
895 cache_state(prealloc, cached_state);
896 prealloc = NULL;
897 goto out;
898 }
899 state = rb_entry(node, struct extent_state, rb_node);
900hit_next:
901 last_start = state->start;
902 last_end = state->end;
903
904
905
906
907
908
909
910 if (state->start == start && state->end <= end) {
911 if (state->state & exclusive_bits) {
912 *failed_start = state->start;
913 err = -EEXIST;
914 goto out;
915 }
916
917 set_state_bits(tree, state, &bits, changeset);
918 cache_state(state, cached_state);
919 merge_state(tree, state);
920 if (last_end == (u64)-1)
921 goto out;
922 start = last_end + 1;
923 state = next_state(state);
924 if (start < end && state && state->start == start &&
925 !need_resched())
926 goto hit_next;
927 goto search_again;
928 }
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946 if (state->start < start) {
947 if (state->state & exclusive_bits) {
948 *failed_start = start;
949 err = -EEXIST;
950 goto out;
951 }
952
953 prealloc = alloc_extent_state_atomic(prealloc);
954 BUG_ON(!prealloc);
955 err = split_state(tree, state, prealloc, start);
956 if (err)
957 extent_io_tree_panic(tree, err);
958
959 prealloc = NULL;
960 if (err)
961 goto out;
962 if (state->end <= end) {
963 set_state_bits(tree, state, &bits, changeset);
964 cache_state(state, cached_state);
965 merge_state(tree, state);
966 if (last_end == (u64)-1)
967 goto out;
968 start = last_end + 1;
969 state = next_state(state);
970 if (start < end && state && state->start == start &&
971 !need_resched())
972 goto hit_next;
973 }
974 goto search_again;
975 }
976
977
978
979
980
981
982
983 if (state->start > start) {
984 u64 this_end;
985 if (end < last_start)
986 this_end = end;
987 else
988 this_end = last_start - 1;
989
990 prealloc = alloc_extent_state_atomic(prealloc);
991 BUG_ON(!prealloc);
992
993
994
995
996
997 err = insert_state(tree, prealloc, start, this_end,
998 NULL, NULL, &bits, changeset);
999 if (err)
1000 extent_io_tree_panic(tree, err);
1001
1002 cache_state(prealloc, cached_state);
1003 prealloc = NULL;
1004 start = this_end + 1;
1005 goto search_again;
1006 }
1007
1008
1009
1010
1011
1012
1013 if (state->start <= end && state->end > end) {
1014 if (state->state & exclusive_bits) {
1015 *failed_start = start;
1016 err = -EEXIST;
1017 goto out;
1018 }
1019
1020 prealloc = alloc_extent_state_atomic(prealloc);
1021 BUG_ON(!prealloc);
1022 err = split_state(tree, state, prealloc, end + 1);
1023 if (err)
1024 extent_io_tree_panic(tree, err);
1025
1026 set_state_bits(tree, prealloc, &bits, changeset);
1027 cache_state(prealloc, cached_state);
1028 merge_state(tree, prealloc);
1029 prealloc = NULL;
1030 goto out;
1031 }
1032
1033search_again:
1034 if (start > end)
1035 goto out;
1036 spin_unlock(&tree->lock);
1037 if (gfpflags_allow_blocking(mask))
1038 cond_resched();
1039 goto again;
1040
1041out:
1042 spin_unlock(&tree->lock);
1043 if (prealloc)
1044 free_extent_state(prealloc);
1045
1046 return err;
1047
1048}
1049
1050int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1051 unsigned bits, u64 * failed_start,
1052 struct extent_state **cached_state, gfp_t mask)
1053{
1054 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
1055 cached_state, mask, NULL);
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1078 unsigned bits, unsigned clear_bits,
1079 struct extent_state **cached_state)
1080{
1081 struct extent_state *state;
1082 struct extent_state *prealloc = NULL;
1083 struct rb_node *node;
1084 struct rb_node **p;
1085 struct rb_node *parent;
1086 int err = 0;
1087 u64 last_start;
1088 u64 last_end;
1089 bool first_iteration = true;
1090
1091 btrfs_debug_check_extent_io_range(tree, start, end);
1092
1093again:
1094 if (!prealloc) {
1095
1096
1097
1098
1099
1100
1101
1102 prealloc = alloc_extent_state(GFP_NOFS);
1103 if (!prealloc && !first_iteration)
1104 return -ENOMEM;
1105 }
1106
1107 spin_lock(&tree->lock);
1108 if (cached_state && *cached_state) {
1109 state = *cached_state;
1110 if (state->start <= start && state->end > start &&
1111 extent_state_in_tree(state)) {
1112 node = &state->rb_node;
1113 goto hit_next;
1114 }
1115 }
1116
1117
1118
1119
1120
1121 node = tree_search_for_insert(tree, start, &p, &parent);
1122 if (!node) {
1123 prealloc = alloc_extent_state_atomic(prealloc);
1124 if (!prealloc) {
1125 err = -ENOMEM;
1126 goto out;
1127 }
1128 err = insert_state(tree, prealloc, start, end,
1129 &p, &parent, &bits, NULL);
1130 if (err)
1131 extent_io_tree_panic(tree, err);
1132 cache_state(prealloc, cached_state);
1133 prealloc = NULL;
1134 goto out;
1135 }
1136 state = rb_entry(node, struct extent_state, rb_node);
1137hit_next:
1138 last_start = state->start;
1139 last_end = state->end;
1140
1141
1142
1143
1144
1145
1146
1147 if (state->start == start && state->end <= end) {
1148 set_state_bits(tree, state, &bits, NULL);
1149 cache_state(state, cached_state);
1150 state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
1151 if (last_end == (u64)-1)
1152 goto out;
1153 start = last_end + 1;
1154 if (start < end && state && state->start == start &&
1155 !need_resched())
1156 goto hit_next;
1157 goto search_again;
1158 }
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176 if (state->start < start) {
1177 prealloc = alloc_extent_state_atomic(prealloc);
1178 if (!prealloc) {
1179 err = -ENOMEM;
1180 goto out;
1181 }
1182 err = split_state(tree, state, prealloc, start);
1183 if (err)
1184 extent_io_tree_panic(tree, err);
1185 prealloc = NULL;
1186 if (err)
1187 goto out;
1188 if (state->end <= end) {
1189 set_state_bits(tree, state, &bits, NULL);
1190 cache_state(state, cached_state);
1191 state = clear_state_bit(tree, state, &clear_bits, 0,
1192 NULL);
1193 if (last_end == (u64)-1)
1194 goto out;
1195 start = last_end + 1;
1196 if (start < end && state && state->start == start &&
1197 !need_resched())
1198 goto hit_next;
1199 }
1200 goto search_again;
1201 }
1202
1203
1204
1205
1206
1207
1208
1209 if (state->start > start) {
1210 u64 this_end;
1211 if (end < last_start)
1212 this_end = end;
1213 else
1214 this_end = last_start - 1;
1215
1216 prealloc = alloc_extent_state_atomic(prealloc);
1217 if (!prealloc) {
1218 err = -ENOMEM;
1219 goto out;
1220 }
1221
1222
1223
1224
1225
1226 err = insert_state(tree, prealloc, start, this_end,
1227 NULL, NULL, &bits, NULL);
1228 if (err)
1229 extent_io_tree_panic(tree, err);
1230 cache_state(prealloc, cached_state);
1231 prealloc = NULL;
1232 start = this_end + 1;
1233 goto search_again;
1234 }
1235
1236
1237
1238
1239
1240
1241 if (state->start <= end && state->end > end) {
1242 prealloc = alloc_extent_state_atomic(prealloc);
1243 if (!prealloc) {
1244 err = -ENOMEM;
1245 goto out;
1246 }
1247
1248 err = split_state(tree, state, prealloc, end + 1);
1249 if (err)
1250 extent_io_tree_panic(tree, err);
1251
1252 set_state_bits(tree, prealloc, &bits, NULL);
1253 cache_state(prealloc, cached_state);
1254 clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
1255 prealloc = NULL;
1256 goto out;
1257 }
1258
1259search_again:
1260 if (start > end)
1261 goto out;
1262 spin_unlock(&tree->lock);
1263 cond_resched();
1264 first_iteration = false;
1265 goto again;
1266
1267out:
1268 spin_unlock(&tree->lock);
1269 if (prealloc)
1270 free_extent_state(prealloc);
1271
1272 return err;
1273}
1274
1275
1276int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1277 unsigned bits, struct extent_changeset *changeset)
1278{
1279
1280
1281
1282
1283
1284
1285 BUG_ON(bits & EXTENT_LOCKED);
1286
1287 return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
1288 changeset);
1289}
1290
1291int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1292 unsigned bits, int wake, int delete,
1293 struct extent_state **cached)
1294{
1295 return __clear_extent_bit(tree, start, end, bits, wake, delete,
1296 cached, GFP_NOFS, NULL);
1297}
1298
1299int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1300 unsigned bits, struct extent_changeset *changeset)
1301{
1302
1303
1304
1305
1306 BUG_ON(bits & EXTENT_LOCKED);
1307
1308 return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
1309 changeset);
1310}
1311
1312
1313
1314
1315
1316int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1317 struct extent_state **cached_state)
1318{
1319 int err;
1320 u64 failed_start;
1321
1322 while (1) {
1323 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
1324 EXTENT_LOCKED, &failed_start,
1325 cached_state, GFP_NOFS, NULL);
1326 if (err == -EEXIST) {
1327 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
1328 start = failed_start;
1329 } else
1330 break;
1331 WARN_ON(start > end);
1332 }
1333 return err;
1334}
1335
1336int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1337{
1338 int err;
1339 u64 failed_start;
1340
1341 err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
1342 &failed_start, NULL, GFP_NOFS, NULL);
1343 if (err == -EEXIST) {
1344 if (failed_start > start)
1345 clear_extent_bit(tree, start, failed_start - 1,
1346 EXTENT_LOCKED, 1, 0, NULL);
1347 return 0;
1348 }
1349 return 1;
1350}
1351
1352void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1353{
1354 unsigned long index = start >> PAGE_SHIFT;
1355 unsigned long end_index = end >> PAGE_SHIFT;
1356 struct page *page;
1357
1358 while (index <= end_index) {
1359 page = find_get_page(inode->i_mapping, index);
1360 BUG_ON(!page);
1361 clear_page_dirty_for_io(page);
1362 put_page(page);
1363 index++;
1364 }
1365}
1366
1367void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1368{
1369 unsigned long index = start >> PAGE_SHIFT;
1370 unsigned long end_index = end >> PAGE_SHIFT;
1371 struct page *page;
1372
1373 while (index <= end_index) {
1374 page = find_get_page(inode->i_mapping, index);
1375 BUG_ON(!page);
1376 __set_page_dirty_nobuffers(page);
1377 account_page_redirty(page);
1378 put_page(page);
1379 index++;
1380 }
1381}
1382
1383
1384
1385
1386
1387static struct extent_state *
1388find_first_extent_bit_state(struct extent_io_tree *tree,
1389 u64 start, unsigned bits)
1390{
1391 struct rb_node *node;
1392 struct extent_state *state;
1393
1394
1395
1396
1397
1398 node = tree_search(tree, start);
1399 if (!node)
1400 goto out;
1401
1402 while (1) {
1403 state = rb_entry(node, struct extent_state, rb_node);
1404 if (state->end >= start && (state->state & bits))
1405 return state;
1406
1407 node = rb_next(node);
1408 if (!node)
1409 break;
1410 }
1411out:
1412 return NULL;
1413}
1414
1415
1416
1417
1418
1419
1420
1421
1422int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1423 u64 *start_ret, u64 *end_ret, unsigned bits,
1424 struct extent_state **cached_state)
1425{
1426 struct extent_state *state;
1427 struct rb_node *n;
1428 int ret = 1;
1429
1430 spin_lock(&tree->lock);
1431 if (cached_state && *cached_state) {
1432 state = *cached_state;
1433 if (state->end == start - 1 && extent_state_in_tree(state)) {
1434 n = rb_next(&state->rb_node);
1435 while (n) {
1436 state = rb_entry(n, struct extent_state,
1437 rb_node);
1438 if (state->state & bits)
1439 goto got_it;
1440 n = rb_next(n);
1441 }
1442 free_extent_state(*cached_state);
1443 *cached_state = NULL;
1444 goto out;
1445 }
1446 free_extent_state(*cached_state);
1447 *cached_state = NULL;
1448 }
1449
1450 state = find_first_extent_bit_state(tree, start, bits);
1451got_it:
1452 if (state) {
1453 cache_state_if_flags(state, cached_state, 0);
1454 *start_ret = state->start;
1455 *end_ret = state->end;
1456 ret = 0;
1457 }
1458out:
1459 spin_unlock(&tree->lock);
1460 return ret;
1461}
1462
1463
1464
1465
1466
1467
1468
1469static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1470 u64 *start, u64 *end, u64 max_bytes,
1471 struct extent_state **cached_state)
1472{
1473 struct rb_node *node;
1474 struct extent_state *state;
1475 u64 cur_start = *start;
1476 u64 found = 0;
1477 u64 total_bytes = 0;
1478
1479 spin_lock(&tree->lock);
1480
1481
1482
1483
1484
1485 node = tree_search(tree, cur_start);
1486 if (!node) {
1487 if (!found)
1488 *end = (u64)-1;
1489 goto out;
1490 }
1491
1492 while (1) {
1493 state = rb_entry(node, struct extent_state, rb_node);
1494 if (found && (state->start != cur_start ||
1495 (state->state & EXTENT_BOUNDARY))) {
1496 goto out;
1497 }
1498 if (!(state->state & EXTENT_DELALLOC)) {
1499 if (!found)
1500 *end = state->end;
1501 goto out;
1502 }
1503 if (!found) {
1504 *start = state->start;
1505 *cached_state = state;
1506 refcount_inc(&state->refs);
1507 }
1508 found++;
1509 *end = state->end;
1510 cur_start = state->end + 1;
1511 node = rb_next(node);
1512 total_bytes += state->end - state->start + 1;
1513 if (total_bytes >= max_bytes)
1514 break;
1515 if (!node)
1516 break;
1517 }
1518out:
1519 spin_unlock(&tree->lock);
1520 return found;
1521}
1522
1523static int __process_pages_contig(struct address_space *mapping,
1524 struct page *locked_page,
1525 pgoff_t start_index, pgoff_t end_index,
1526 unsigned long page_ops, pgoff_t *index_ret);
1527
1528static noinline void __unlock_for_delalloc(struct inode *inode,
1529 struct page *locked_page,
1530 u64 start, u64 end)
1531{
1532 unsigned long index = start >> PAGE_SHIFT;
1533 unsigned long end_index = end >> PAGE_SHIFT;
1534
1535 ASSERT(locked_page);
1536 if (index == locked_page->index && end_index == index)
1537 return;
1538
1539 __process_pages_contig(inode->i_mapping, locked_page, index, end_index,
1540 PAGE_UNLOCK, NULL);
1541}
1542
1543static noinline int lock_delalloc_pages(struct inode *inode,
1544 struct page *locked_page,
1545 u64 delalloc_start,
1546 u64 delalloc_end)
1547{
1548 unsigned long index = delalloc_start >> PAGE_SHIFT;
1549 unsigned long index_ret = index;
1550 unsigned long end_index = delalloc_end >> PAGE_SHIFT;
1551 int ret;
1552
1553 ASSERT(locked_page);
1554 if (index == locked_page->index && index == end_index)
1555 return 0;
1556
1557 ret = __process_pages_contig(inode->i_mapping, locked_page, index,
1558 end_index, PAGE_LOCK, &index_ret);
1559 if (ret == -EAGAIN)
1560 __unlock_for_delalloc(inode, locked_page, delalloc_start,
1561 (u64)index_ret << PAGE_SHIFT);
1562 return ret;
1563}
1564
1565
1566
1567
1568
1569
1570
1571STATIC u64 find_lock_delalloc_range(struct inode *inode,
1572 struct extent_io_tree *tree,
1573 struct page *locked_page, u64 *start,
1574 u64 *end, u64 max_bytes)
1575{
1576 u64 delalloc_start;
1577 u64 delalloc_end;
1578 u64 found;
1579 struct extent_state *cached_state = NULL;
1580 int ret;
1581 int loops = 0;
1582
1583again:
1584
1585 delalloc_start = *start;
1586 delalloc_end = 0;
1587 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1588 max_bytes, &cached_state);
1589 if (!found || delalloc_end <= *start) {
1590 *start = delalloc_start;
1591 *end = delalloc_end;
1592 free_extent_state(cached_state);
1593 return 0;
1594 }
1595
1596
1597
1598
1599
1600
1601 if (delalloc_start < *start)
1602 delalloc_start = *start;
1603
1604
1605
1606
1607 if (delalloc_end + 1 - delalloc_start > max_bytes)
1608 delalloc_end = delalloc_start + max_bytes - 1;
1609
1610
1611 ret = lock_delalloc_pages(inode, locked_page,
1612 delalloc_start, delalloc_end);
1613 if (ret == -EAGAIN) {
1614
1615
1616
1617 free_extent_state(cached_state);
1618 cached_state = NULL;
1619 if (!loops) {
1620 max_bytes = PAGE_SIZE;
1621 loops = 1;
1622 goto again;
1623 } else {
1624 found = 0;
1625 goto out_failed;
1626 }
1627 }
1628 BUG_ON(ret);
1629
1630
1631 lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
1632
1633
1634 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1635 EXTENT_DELALLOC, 1, cached_state);
1636 if (!ret) {
1637 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1638 &cached_state);
1639 __unlock_for_delalloc(inode, locked_page,
1640 delalloc_start, delalloc_end);
1641 cond_resched();
1642 goto again;
1643 }
1644 free_extent_state(cached_state);
1645 *start = delalloc_start;
1646 *end = delalloc_end;
1647out_failed:
1648 return found;
1649}
1650
1651static int __process_pages_contig(struct address_space *mapping,
1652 struct page *locked_page,
1653 pgoff_t start_index, pgoff_t end_index,
1654 unsigned long page_ops, pgoff_t *index_ret)
1655{
1656 unsigned long nr_pages = end_index - start_index + 1;
1657 unsigned long pages_locked = 0;
1658 pgoff_t index = start_index;
1659 struct page *pages[16];
1660 unsigned ret;
1661 int err = 0;
1662 int i;
1663
1664 if (page_ops & PAGE_LOCK) {
1665 ASSERT(page_ops == PAGE_LOCK);
1666 ASSERT(index_ret && *index_ret == start_index);
1667 }
1668
1669 if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
1670 mapping_set_error(mapping, -EIO);
1671
1672 while (nr_pages > 0) {
1673 ret = find_get_pages_contig(mapping, index,
1674 min_t(unsigned long,
1675 nr_pages, ARRAY_SIZE(pages)), pages);
1676 if (ret == 0) {
1677
1678
1679
1680
1681 ASSERT(page_ops & PAGE_LOCK);
1682 err = -EAGAIN;
1683 goto out;
1684 }
1685
1686 for (i = 0; i < ret; i++) {
1687 if (page_ops & PAGE_SET_PRIVATE2)
1688 SetPagePrivate2(pages[i]);
1689
1690 if (pages[i] == locked_page) {
1691 put_page(pages[i]);
1692 pages_locked++;
1693 continue;
1694 }
1695 if (page_ops & PAGE_CLEAR_DIRTY)
1696 clear_page_dirty_for_io(pages[i]);
1697 if (page_ops & PAGE_SET_WRITEBACK)
1698 set_page_writeback(pages[i]);
1699 if (page_ops & PAGE_SET_ERROR)
1700 SetPageError(pages[i]);
1701 if (page_ops & PAGE_END_WRITEBACK)
1702 end_page_writeback(pages[i]);
1703 if (page_ops & PAGE_UNLOCK)
1704 unlock_page(pages[i]);
1705 if (page_ops & PAGE_LOCK) {
1706 lock_page(pages[i]);
1707 if (!PageDirty(pages[i]) ||
1708 pages[i]->mapping != mapping) {
1709 unlock_page(pages[i]);
1710 put_page(pages[i]);
1711 err = -EAGAIN;
1712 goto out;
1713 }
1714 }
1715 put_page(pages[i]);
1716 pages_locked++;
1717 }
1718 nr_pages -= ret;
1719 index += ret;
1720 cond_resched();
1721 }
1722out:
1723 if (err && index_ret)
1724 *index_ret = start_index + pages_locked - 1;
1725 return err;
1726}
1727
1728void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1729 u64 delalloc_end, struct page *locked_page,
1730 unsigned clear_bits,
1731 unsigned long page_ops)
1732{
1733 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
1734 NULL);
1735
1736 __process_pages_contig(inode->i_mapping, locked_page,
1737 start >> PAGE_SHIFT, end >> PAGE_SHIFT,
1738 page_ops, NULL);
1739}
1740
1741
1742
1743
1744
1745
1746u64 count_range_bits(struct extent_io_tree *tree,
1747 u64 *start, u64 search_end, u64 max_bytes,
1748 unsigned bits, int contig)
1749{
1750 struct rb_node *node;
1751 struct extent_state *state;
1752 u64 cur_start = *start;
1753 u64 total_bytes = 0;
1754 u64 last = 0;
1755 int found = 0;
1756
1757 if (WARN_ON(search_end <= cur_start))
1758 return 0;
1759
1760 spin_lock(&tree->lock);
1761 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1762 total_bytes = tree->dirty_bytes;
1763 goto out;
1764 }
1765
1766
1767
1768
1769 node = tree_search(tree, cur_start);
1770 if (!node)
1771 goto out;
1772
1773 while (1) {
1774 state = rb_entry(node, struct extent_state, rb_node);
1775 if (state->start > search_end)
1776 break;
1777 if (contig && found && state->start > last + 1)
1778 break;
1779 if (state->end >= cur_start && (state->state & bits) == bits) {
1780 total_bytes += min(search_end, state->end) + 1 -
1781 max(cur_start, state->start);
1782 if (total_bytes >= max_bytes)
1783 break;
1784 if (!found) {
1785 *start = max(cur_start, state->start);
1786 found = 1;
1787 }
1788 last = state->end;
1789 } else if (contig && found) {
1790 break;
1791 }
1792 node = rb_next(node);
1793 if (!node)
1794 break;
1795 }
1796out:
1797 spin_unlock(&tree->lock);
1798 return total_bytes;
1799}
1800
1801
1802
1803
1804
1805static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
1806 struct io_failure_record *failrec)
1807{
1808 struct rb_node *node;
1809 struct extent_state *state;
1810 int ret = 0;
1811
1812 spin_lock(&tree->lock);
1813
1814
1815
1816
1817 node = tree_search(tree, start);
1818 if (!node) {
1819 ret = -ENOENT;
1820 goto out;
1821 }
1822 state = rb_entry(node, struct extent_state, rb_node);
1823 if (state->start != start) {
1824 ret = -ENOENT;
1825 goto out;
1826 }
1827 state->failrec = failrec;
1828out:
1829 spin_unlock(&tree->lock);
1830 return ret;
1831}
1832
1833static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
1834 struct io_failure_record **failrec)
1835{
1836 struct rb_node *node;
1837 struct extent_state *state;
1838 int ret = 0;
1839
1840 spin_lock(&tree->lock);
1841
1842
1843
1844
1845 node = tree_search(tree, start);
1846 if (!node) {
1847 ret = -ENOENT;
1848 goto out;
1849 }
1850 state = rb_entry(node, struct extent_state, rb_node);
1851 if (state->start != start) {
1852 ret = -ENOENT;
1853 goto out;
1854 }
1855 *failrec = state->failrec;
1856out:
1857 spin_unlock(&tree->lock);
1858 return ret;
1859}
1860
1861
1862
1863
1864
1865
1866
1867int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1868 unsigned bits, int filled, struct extent_state *cached)
1869{
1870 struct extent_state *state = NULL;
1871 struct rb_node *node;
1872 int bitset = 0;
1873
1874 spin_lock(&tree->lock);
1875 if (cached && extent_state_in_tree(cached) && cached->start <= start &&
1876 cached->end > start)
1877 node = &cached->rb_node;
1878 else
1879 node = tree_search(tree, start);
1880 while (node && start <= end) {
1881 state = rb_entry(node, struct extent_state, rb_node);
1882
1883 if (filled && state->start > start) {
1884 bitset = 0;
1885 break;
1886 }
1887
1888 if (state->start > end)
1889 break;
1890
1891 if (state->state & bits) {
1892 bitset = 1;
1893 if (!filled)
1894 break;
1895 } else if (filled) {
1896 bitset = 0;
1897 break;
1898 }
1899
1900 if (state->end == (u64)-1)
1901 break;
1902
1903 start = state->end + 1;
1904 if (start > end)
1905 break;
1906 node = rb_next(node);
1907 if (!node) {
1908 if (filled)
1909 bitset = 0;
1910 break;
1911 }
1912 }
1913 spin_unlock(&tree->lock);
1914 return bitset;
1915}
1916
1917
1918
1919
1920
1921static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1922{
1923 u64 start = page_offset(page);
1924 u64 end = start + PAGE_SIZE - 1;
1925 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1926 SetPageUptodate(page);
1927}
1928
1929int free_io_failure(struct extent_io_tree *failure_tree,
1930 struct extent_io_tree *io_tree,
1931 struct io_failure_record *rec)
1932{
1933 int ret;
1934 int err = 0;
1935
1936 set_state_failrec(failure_tree, rec->start, NULL);
1937 ret = clear_extent_bits(failure_tree, rec->start,
1938 rec->start + rec->len - 1,
1939 EXTENT_LOCKED | EXTENT_DIRTY);
1940 if (ret)
1941 err = ret;
1942
1943 ret = clear_extent_bits(io_tree, rec->start,
1944 rec->start + rec->len - 1,
1945 EXTENT_DAMAGED);
1946 if (ret && !err)
1947 err = ret;
1948
1949 kfree(rec);
1950 return err;
1951}
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
1964 u64 length, u64 logical, struct page *page,
1965 unsigned int pg_offset, int mirror_num)
1966{
1967 struct bio *bio;
1968 struct btrfs_device *dev;
1969 u64 map_length = 0;
1970 u64 sector;
1971 struct btrfs_bio *bbio = NULL;
1972 int ret;
1973
1974 ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
1975 BUG_ON(!mirror_num);
1976
1977 bio = btrfs_io_bio_alloc(1);
1978 bio->bi_iter.bi_size = 0;
1979 map_length = length;
1980
1981
1982
1983
1984
1985
1986 btrfs_bio_counter_inc_blocked(fs_info);
1987 if (btrfs_is_parity_mirror(fs_info, logical, length)) {
1988
1989
1990
1991
1992
1993
1994 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
1995 &map_length, &bbio, 0);
1996 if (ret) {
1997 btrfs_bio_counter_dec(fs_info);
1998 bio_put(bio);
1999 return -EIO;
2000 }
2001 ASSERT(bbio->mirror_num == 1);
2002 } else {
2003 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
2004 &map_length, &bbio, mirror_num);
2005 if (ret) {
2006 btrfs_bio_counter_dec(fs_info);
2007 bio_put(bio);
2008 return -EIO;
2009 }
2010 BUG_ON(mirror_num != bbio->mirror_num);
2011 }
2012
2013 sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
2014 bio->bi_iter.bi_sector = sector;
2015 dev = bbio->stripes[bbio->mirror_num - 1].dev;
2016 btrfs_put_bbio(bbio);
2017 if (!dev || !dev->bdev ||
2018 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
2019 btrfs_bio_counter_dec(fs_info);
2020 bio_put(bio);
2021 return -EIO;
2022 }
2023 bio_set_dev(bio, dev->bdev);
2024 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2025 bio_add_page(bio, page, length, pg_offset);
2026
2027 if (btrfsic_submit_bio_wait(bio)) {
2028
2029 btrfs_bio_counter_dec(fs_info);
2030 bio_put(bio);
2031 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
2032 return -EIO;
2033 }
2034
2035 btrfs_info_rl_in_rcu(fs_info,
2036 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
2037 ino, start,
2038 rcu_str_deref(dev->name), sector);
2039 btrfs_bio_counter_dec(fs_info);
2040 bio_put(bio);
2041 return 0;
2042}
2043
2044int repair_eb_io_failure(struct btrfs_fs_info *fs_info,
2045 struct extent_buffer *eb, int mirror_num)
2046{
2047 u64 start = eb->start;
2048 int i, num_pages = num_extent_pages(eb);
2049 int ret = 0;
2050
2051 if (sb_rdonly(fs_info->sb))
2052 return -EROFS;
2053
2054 for (i = 0; i < num_pages; i++) {
2055 struct page *p = eb->pages[i];
2056
2057 ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
2058 start - page_offset(p), mirror_num);
2059 if (ret)
2060 break;
2061 start += PAGE_SIZE;
2062 }
2063
2064 return ret;
2065}
2066
2067
2068
2069
2070
2071int clean_io_failure(struct btrfs_fs_info *fs_info,
2072 struct extent_io_tree *failure_tree,
2073 struct extent_io_tree *io_tree, u64 start,
2074 struct page *page, u64 ino, unsigned int pg_offset)
2075{
2076 u64 private;
2077 struct io_failure_record *failrec;
2078 struct extent_state *state;
2079 int num_copies;
2080 int ret;
2081
2082 private = 0;
2083 ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
2084 EXTENT_DIRTY, 0);
2085 if (!ret)
2086 return 0;
2087
2088 ret = get_state_failrec(failure_tree, start, &failrec);
2089 if (ret)
2090 return 0;
2091
2092 BUG_ON(!failrec->this_mirror);
2093
2094 if (failrec->in_validation) {
2095
2096 btrfs_debug(fs_info,
2097 "clean_io_failure: freeing dummy error at %llu",
2098 failrec->start);
2099 goto out;
2100 }
2101 if (sb_rdonly(fs_info->sb))
2102 goto out;
2103
2104 spin_lock(&io_tree->lock);
2105 state = find_first_extent_bit_state(io_tree,
2106 failrec->start,
2107 EXTENT_LOCKED);
2108 spin_unlock(&io_tree->lock);
2109
2110 if (state && state->start <= failrec->start &&
2111 state->end >= failrec->start + failrec->len - 1) {
2112 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2113 failrec->len);
2114 if (num_copies > 1) {
2115 repair_io_failure(fs_info, ino, start, failrec->len,
2116 failrec->logical, page, pg_offset,
2117 failrec->failed_mirror);
2118 }
2119 }
2120
2121out:
2122 free_io_failure(failure_tree, io_tree, failrec);
2123
2124 return 0;
2125}
2126
2127
2128
2129
2130
2131
2132
2133void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
2134{
2135 struct extent_io_tree *failure_tree = &inode->io_failure_tree;
2136 struct io_failure_record *failrec;
2137 struct extent_state *state, *next;
2138
2139 if (RB_EMPTY_ROOT(&failure_tree->state))
2140 return;
2141
2142 spin_lock(&failure_tree->lock);
2143 state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
2144 while (state) {
2145 if (state->start > end)
2146 break;
2147
2148 ASSERT(state->end <= end);
2149
2150 next = next_state(state);
2151
2152 failrec = state->failrec;
2153 free_extent_state(state);
2154 kfree(failrec);
2155
2156 state = next;
2157 }
2158 spin_unlock(&failure_tree->lock);
2159}
2160
2161int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2162 struct io_failure_record **failrec_ret)
2163{
2164 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2165 struct io_failure_record *failrec;
2166 struct extent_map *em;
2167 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2168 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2169 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2170 int ret;
2171 u64 logical;
2172
2173 ret = get_state_failrec(failure_tree, start, &failrec);
2174 if (ret) {
2175 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2176 if (!failrec)
2177 return -ENOMEM;
2178
2179 failrec->start = start;
2180 failrec->len = end - start + 1;
2181 failrec->this_mirror = 0;
2182 failrec->bio_flags = 0;
2183 failrec->in_validation = 0;
2184
2185 read_lock(&em_tree->lock);
2186 em = lookup_extent_mapping(em_tree, start, failrec->len);
2187 if (!em) {
2188 read_unlock(&em_tree->lock);
2189 kfree(failrec);
2190 return -EIO;
2191 }
2192
2193 if (em->start > start || em->start + em->len <= start) {
2194 free_extent_map(em);
2195 em = NULL;
2196 }
2197 read_unlock(&em_tree->lock);
2198 if (!em) {
2199 kfree(failrec);
2200 return -EIO;
2201 }
2202
2203 logical = start - em->start;
2204 logical = em->block_start + logical;
2205 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2206 logical = em->block_start;
2207 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
2208 extent_set_compress_type(&failrec->bio_flags,
2209 em->compress_type);
2210 }
2211
2212 btrfs_debug(fs_info,
2213 "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
2214 logical, start, failrec->len);
2215
2216 failrec->logical = logical;
2217 free_extent_map(em);
2218
2219
2220 ret = set_extent_bits(failure_tree, start, end,
2221 EXTENT_LOCKED | EXTENT_DIRTY);
2222 if (ret >= 0)
2223 ret = set_state_failrec(failure_tree, start, failrec);
2224
2225 if (ret >= 0)
2226 ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
2227 if (ret < 0) {
2228 kfree(failrec);
2229 return ret;
2230 }
2231 } else {
2232 btrfs_debug(fs_info,
2233 "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
2234 failrec->logical, failrec->start, failrec->len,
2235 failrec->in_validation);
2236
2237
2238
2239
2240
2241 }
2242
2243 *failrec_ret = failrec;
2244
2245 return 0;
2246}
2247
2248bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
2249 struct io_failure_record *failrec, int failed_mirror)
2250{
2251 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2252 int num_copies;
2253
2254 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2255 if (num_copies == 1) {
2256
2257
2258
2259
2260
2261 btrfs_debug(fs_info,
2262 "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2263 num_copies, failrec->this_mirror, failed_mirror);
2264 return false;
2265 }
2266
2267
2268
2269
2270
2271
2272 if (failed_bio_pages > 1) {
2273
2274
2275
2276
2277
2278
2279
2280
2281 BUG_ON(failrec->in_validation);
2282 failrec->in_validation = 1;
2283 failrec->this_mirror = failed_mirror;
2284 } else {
2285
2286
2287
2288
2289
2290 if (failrec->in_validation) {
2291 BUG_ON(failrec->this_mirror != failed_mirror);
2292 failrec->in_validation = 0;
2293 failrec->this_mirror = 0;
2294 }
2295 failrec->failed_mirror = failed_mirror;
2296 failrec->this_mirror++;
2297 if (failrec->this_mirror == failed_mirror)
2298 failrec->this_mirror++;
2299 }
2300
2301 if (failrec->this_mirror > num_copies) {
2302 btrfs_debug(fs_info,
2303 "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2304 num_copies, failrec->this_mirror, failed_mirror);
2305 return false;
2306 }
2307
2308 return true;
2309}
2310
2311
2312struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2313 struct io_failure_record *failrec,
2314 struct page *page, int pg_offset, int icsum,
2315 bio_end_io_t *endio_func, void *data)
2316{
2317 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2318 struct bio *bio;
2319 struct btrfs_io_bio *btrfs_failed_bio;
2320 struct btrfs_io_bio *btrfs_bio;
2321
2322 bio = btrfs_io_bio_alloc(1);
2323 bio->bi_end_io = endio_func;
2324 bio->bi_iter.bi_sector = failrec->logical >> 9;
2325 bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
2326 bio->bi_iter.bi_size = 0;
2327 bio->bi_private = data;
2328
2329 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2330 if (btrfs_failed_bio->csum) {
2331 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2332
2333 btrfs_bio = btrfs_io_bio(bio);
2334 btrfs_bio->csum = btrfs_bio->csum_inline;
2335 icsum *= csum_size;
2336 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2337 csum_size);
2338 }
2339
2340 bio_add_page(bio, page, failrec->len, pg_offset);
2341
2342 return bio;
2343}
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2354 struct page *page, u64 start, u64 end,
2355 int failed_mirror)
2356{
2357 struct io_failure_record *failrec;
2358 struct inode *inode = page->mapping->host;
2359 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2360 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2361 struct bio *bio;
2362 int read_mode = 0;
2363 blk_status_t status;
2364 int ret;
2365 unsigned failed_bio_pages = bio_pages_all(failed_bio);
2366
2367 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
2368
2369 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2370 if (ret)
2371 return ret;
2372
2373 if (!btrfs_check_repairable(inode, failed_bio_pages, failrec,
2374 failed_mirror)) {
2375 free_io_failure(failure_tree, tree, failrec);
2376 return -EIO;
2377 }
2378
2379 if (failed_bio_pages > 1)
2380 read_mode |= REQ_FAILFAST_DEV;
2381
2382 phy_offset >>= inode->i_sb->s_blocksize_bits;
2383 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2384 start - page_offset(page),
2385 (int)phy_offset, failed_bio->bi_end_io,
2386 NULL);
2387 bio->bi_opf = REQ_OP_READ | read_mode;
2388
2389 btrfs_debug(btrfs_sb(inode->i_sb),
2390 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2391 read_mode, failrec->this_mirror, failrec->in_validation);
2392
2393 status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
2394 failrec->bio_flags, 0);
2395 if (status) {
2396 free_io_failure(failure_tree, tree, failrec);
2397 bio_put(bio);
2398 ret = blk_status_to_errno(status);
2399 }
2400
2401 return ret;
2402}
2403
2404
2405
2406void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2407{
2408 int uptodate = (err == 0);
2409 struct extent_io_tree *tree;
2410 int ret = 0;
2411
2412 tree = &BTRFS_I(page->mapping->host)->io_tree;
2413
2414 if (tree->ops && tree->ops->writepage_end_io_hook)
2415 tree->ops->writepage_end_io_hook(page, start, end, NULL,
2416 uptodate);
2417
2418 if (!uptodate) {
2419 ClearPageUptodate(page);
2420 SetPageError(page);
2421 ret = err < 0 ? err : -EIO;
2422 mapping_set_error(page->mapping, ret);
2423 }
2424}
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435static void end_bio_extent_writepage(struct bio *bio)
2436{
2437 int error = blk_status_to_errno(bio->bi_status);
2438 struct bio_vec *bvec;
2439 u64 start;
2440 u64 end;
2441 int i;
2442
2443 ASSERT(!bio_flagged(bio, BIO_CLONED));
2444 bio_for_each_segment_all(bvec, bio, i) {
2445 struct page *page = bvec->bv_page;
2446 struct inode *inode = page->mapping->host;
2447 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2448
2449
2450
2451
2452
2453
2454 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2455 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2456 btrfs_err(fs_info,
2457 "partial page write in btrfs with offset %u and length %u",
2458 bvec->bv_offset, bvec->bv_len);
2459 else
2460 btrfs_info(fs_info,
2461 "incomplete page write in btrfs with offset %u and length %u",
2462 bvec->bv_offset, bvec->bv_len);
2463 }
2464
2465 start = page_offset(page);
2466 end = start + bvec->bv_offset + bvec->bv_len - 1;
2467
2468 end_extent_writepage(page, error, start, end);
2469 end_page_writeback(page);
2470 }
2471
2472 bio_put(bio);
2473}
2474
2475static void
2476endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2477 int uptodate)
2478{
2479 struct extent_state *cached = NULL;
2480 u64 end = start + len - 1;
2481
2482 if (uptodate && tree->track_uptodate)
2483 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2484 unlock_extent_cached_atomic(tree, start, end, &cached);
2485}
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498static void end_bio_extent_readpage(struct bio *bio)
2499{
2500 struct bio_vec *bvec;
2501 int uptodate = !bio->bi_status;
2502 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2503 struct extent_io_tree *tree, *failure_tree;
2504 u64 offset = 0;
2505 u64 start;
2506 u64 end;
2507 u64 len;
2508 u64 extent_start = 0;
2509 u64 extent_len = 0;
2510 int mirror;
2511 int ret;
2512 int i;
2513
2514 ASSERT(!bio_flagged(bio, BIO_CLONED));
2515 bio_for_each_segment_all(bvec, bio, i) {
2516 struct page *page = bvec->bv_page;
2517 struct inode *inode = page->mapping->host;
2518 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2519
2520 btrfs_debug(fs_info,
2521 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2522 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2523 io_bio->mirror_num);
2524 tree = &BTRFS_I(inode)->io_tree;
2525 failure_tree = &BTRFS_I(inode)->io_failure_tree;
2526
2527
2528
2529
2530
2531
2532 if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
2533 if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
2534 btrfs_err(fs_info,
2535 "partial page read in btrfs with offset %u and length %u",
2536 bvec->bv_offset, bvec->bv_len);
2537 else
2538 btrfs_info(fs_info,
2539 "incomplete page read in btrfs with offset %u and length %u",
2540 bvec->bv_offset, bvec->bv_len);
2541 }
2542
2543 start = page_offset(page);
2544 end = start + bvec->bv_offset + bvec->bv_len - 1;
2545 len = bvec->bv_len;
2546
2547 mirror = io_bio->mirror_num;
2548 if (likely(uptodate && tree->ops)) {
2549 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2550 page, start, end,
2551 mirror);
2552 if (ret)
2553 uptodate = 0;
2554 else
2555 clean_io_failure(BTRFS_I(inode)->root->fs_info,
2556 failure_tree, tree, start,
2557 page,
2558 btrfs_ino(BTRFS_I(inode)), 0);
2559 }
2560
2561 if (likely(uptodate))
2562 goto readpage_ok;
2563
2564 if (tree->ops) {
2565 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2566 if (ret == -EAGAIN) {
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581 ret = bio_readpage_error(bio, offset, page,
2582 start, end, mirror);
2583 if (ret == 0) {
2584 uptodate = !bio->bi_status;
2585 offset += len;
2586 continue;
2587 }
2588 }
2589
2590
2591
2592
2593
2594
2595 ASSERT(ret == -EIO);
2596 }
2597readpage_ok:
2598 if (likely(uptodate)) {
2599 loff_t i_size = i_size_read(inode);
2600 pgoff_t end_index = i_size >> PAGE_SHIFT;
2601 unsigned off;
2602
2603
2604 off = i_size & (PAGE_SIZE-1);
2605 if (page->index == end_index && off)
2606 zero_user_segment(page, off, PAGE_SIZE);
2607 SetPageUptodate(page);
2608 } else {
2609 ClearPageUptodate(page);
2610 SetPageError(page);
2611 }
2612 unlock_page(page);
2613 offset += len;
2614
2615 if (unlikely(!uptodate)) {
2616 if (extent_len) {
2617 endio_readpage_release_extent(tree,
2618 extent_start,
2619 extent_len, 1);
2620 extent_start = 0;
2621 extent_len = 0;
2622 }
2623 endio_readpage_release_extent(tree, start,
2624 end - start + 1, 0);
2625 } else if (!extent_len) {
2626 extent_start = start;
2627 extent_len = end + 1 - start;
2628 } else if (extent_start + extent_len == start) {
2629 extent_len += end + 1 - start;
2630 } else {
2631 endio_readpage_release_extent(tree, extent_start,
2632 extent_len, uptodate);
2633 extent_start = start;
2634 extent_len = end + 1 - start;
2635 }
2636 }
2637
2638 if (extent_len)
2639 endio_readpage_release_extent(tree, extent_start, extent_len,
2640 uptodate);
2641 if (io_bio->end_io)
2642 io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
2643 bio_put(bio);
2644}
2645
2646
2647
2648
2649
2650
2651static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
2652{
2653 memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
2654}
2655
2656
2657
2658
2659
2660
2661struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
2662{
2663 struct bio *bio;
2664
2665 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
2666 bio_set_dev(bio, bdev);
2667 bio->bi_iter.bi_sector = first_byte >> 9;
2668 btrfs_io_bio_init(btrfs_io_bio(bio));
2669 return bio;
2670}
2671
2672struct bio *btrfs_bio_clone(struct bio *bio)
2673{
2674 struct btrfs_io_bio *btrfs_bio;
2675 struct bio *new;
2676
2677
2678 new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
2679 btrfs_bio = btrfs_io_bio(new);
2680 btrfs_io_bio_init(btrfs_bio);
2681 btrfs_bio->iter = bio->bi_iter;
2682 return new;
2683}
2684
2685struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
2686{
2687 struct bio *bio;
2688
2689
2690 bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
2691 btrfs_io_bio_init(btrfs_io_bio(bio));
2692 return bio;
2693}
2694
2695struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
2696{
2697 struct bio *bio;
2698 struct btrfs_io_bio *btrfs_bio;
2699
2700
2701 bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
2702 ASSERT(bio);
2703
2704 btrfs_bio = btrfs_io_bio(bio);
2705 btrfs_io_bio_init(btrfs_bio);
2706
2707 bio_trim(bio, offset >> 9, size >> 9);
2708 btrfs_bio->iter = bio->bi_iter;
2709 return bio;
2710}
2711
2712static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2713 unsigned long bio_flags)
2714{
2715 blk_status_t ret = 0;
2716 struct bio_vec *bvec = bio_last_bvec_all(bio);
2717 struct page *page = bvec->bv_page;
2718 struct extent_io_tree *tree = bio->bi_private;
2719 u64 start;
2720
2721 start = page_offset(page) + bvec->bv_offset;
2722
2723 bio->bi_private = NULL;
2724
2725 if (tree->ops)
2726 ret = tree->ops->submit_bio_hook(tree->private_data, bio,
2727 mirror_num, bio_flags, start);
2728 else
2729 btrfsic_submit_bio(bio);
2730
2731 return blk_status_to_errno(ret);
2732}
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
2751 struct writeback_control *wbc,
2752 struct page *page, u64 offset,
2753 size_t size, unsigned long pg_offset,
2754 struct block_device *bdev,
2755 struct bio **bio_ret,
2756 bio_end_io_t end_io_func,
2757 int mirror_num,
2758 unsigned long prev_bio_flags,
2759 unsigned long bio_flags,
2760 bool force_bio_submit)
2761{
2762 int ret = 0;
2763 struct bio *bio;
2764 size_t page_size = min_t(size_t, size, PAGE_SIZE);
2765 sector_t sector = offset >> 9;
2766
2767 ASSERT(bio_ret);
2768
2769 if (*bio_ret) {
2770 bool contig;
2771 bool can_merge = true;
2772
2773 bio = *bio_ret;
2774 if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
2775 contig = bio->bi_iter.bi_sector == sector;
2776 else
2777 contig = bio_end_sector(bio) == sector;
2778
2779 if (tree->ops && btrfs_merge_bio_hook(page, offset, page_size,
2780 bio, bio_flags))
2781 can_merge = false;
2782
2783 if (prev_bio_flags != bio_flags || !contig || !can_merge ||
2784 force_bio_submit ||
2785 bio_add_page(bio, page, page_size, pg_offset) < page_size) {
2786 ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
2787 if (ret < 0) {
2788 *bio_ret = NULL;
2789 return ret;
2790 }
2791 bio = NULL;
2792 } else {
2793 if (wbc)
2794 wbc_account_io(wbc, page, page_size);
2795 return 0;
2796 }
2797 }
2798
2799 bio = btrfs_bio_alloc(bdev, offset);
2800 bio_add_page(bio, page, page_size, pg_offset);
2801 bio->bi_end_io = end_io_func;
2802 bio->bi_private = tree;
2803 bio->bi_write_hint = page->mapping->host->i_write_hint;
2804 bio->bi_opf = opf;
2805 if (wbc) {
2806 wbc_init_bio(wbc, bio);
2807 wbc_account_io(wbc, page, page_size);
2808 }
2809
2810 *bio_ret = bio;
2811
2812 return ret;
2813}
2814
2815static void attach_extent_buffer_page(struct extent_buffer *eb,
2816 struct page *page)
2817{
2818 if (!PagePrivate(page)) {
2819 SetPagePrivate(page);
2820 get_page(page);
2821 set_page_private(page, (unsigned long)eb);
2822 } else {
2823 WARN_ON(page->private != (unsigned long)eb);
2824 }
2825}
2826
2827void set_page_extent_mapped(struct page *page)
2828{
2829 if (!PagePrivate(page)) {
2830 SetPagePrivate(page);
2831 get_page(page);
2832 set_page_private(page, EXTENT_PAGE_PRIVATE);
2833 }
2834}
2835
2836static struct extent_map *
2837__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2838 u64 start, u64 len, get_extent_t *get_extent,
2839 struct extent_map **em_cached)
2840{
2841 struct extent_map *em;
2842
2843 if (em_cached && *em_cached) {
2844 em = *em_cached;
2845 if (extent_map_in_tree(em) && start >= em->start &&
2846 start < extent_map_end(em)) {
2847 refcount_inc(&em->refs);
2848 return em;
2849 }
2850
2851 free_extent_map(em);
2852 *em_cached = NULL;
2853 }
2854
2855 em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
2856 if (em_cached && !IS_ERR_OR_NULL(em)) {
2857 BUG_ON(*em_cached);
2858 refcount_inc(&em->refs);
2859 *em_cached = em;
2860 }
2861 return em;
2862}
2863
2864
2865
2866
2867
2868
2869
2870static int __do_readpage(struct extent_io_tree *tree,
2871 struct page *page,
2872 get_extent_t *get_extent,
2873 struct extent_map **em_cached,
2874 struct bio **bio, int mirror_num,
2875 unsigned long *bio_flags, unsigned int read_flags,
2876 u64 *prev_em_start)
2877{
2878 struct inode *inode = page->mapping->host;
2879 u64 start = page_offset(page);
2880 const u64 end = start + PAGE_SIZE - 1;
2881 u64 cur = start;
2882 u64 extent_offset;
2883 u64 last_byte = i_size_read(inode);
2884 u64 block_start;
2885 u64 cur_end;
2886 struct extent_map *em;
2887 struct block_device *bdev;
2888 int ret = 0;
2889 int nr = 0;
2890 size_t pg_offset = 0;
2891 size_t iosize;
2892 size_t disk_io_size;
2893 size_t blocksize = inode->i_sb->s_blocksize;
2894 unsigned long this_bio_flag = 0;
2895
2896 set_page_extent_mapped(page);
2897
2898 if (!PageUptodate(page)) {
2899 if (cleancache_get_page(page) == 0) {
2900 BUG_ON(blocksize != PAGE_SIZE);
2901 unlock_extent(tree, start, end);
2902 goto out;
2903 }
2904 }
2905
2906 if (page->index == last_byte >> PAGE_SHIFT) {
2907 char *userpage;
2908 size_t zero_offset = last_byte & (PAGE_SIZE - 1);
2909
2910 if (zero_offset) {
2911 iosize = PAGE_SIZE - zero_offset;
2912 userpage = kmap_atomic(page);
2913 memset(userpage + zero_offset, 0, iosize);
2914 flush_dcache_page(page);
2915 kunmap_atomic(userpage);
2916 }
2917 }
2918 while (cur <= end) {
2919 bool force_bio_submit = false;
2920 u64 offset;
2921
2922 if (cur >= last_byte) {
2923 char *userpage;
2924 struct extent_state *cached = NULL;
2925
2926 iosize = PAGE_SIZE - pg_offset;
2927 userpage = kmap_atomic(page);
2928 memset(userpage + pg_offset, 0, iosize);
2929 flush_dcache_page(page);
2930 kunmap_atomic(userpage);
2931 set_extent_uptodate(tree, cur, cur + iosize - 1,
2932 &cached, GFP_NOFS);
2933 unlock_extent_cached(tree, cur,
2934 cur + iosize - 1, &cached);
2935 break;
2936 }
2937 em = __get_extent_map(inode, page, pg_offset, cur,
2938 end - cur + 1, get_extent, em_cached);
2939 if (IS_ERR_OR_NULL(em)) {
2940 SetPageError(page);
2941 unlock_extent(tree, cur, end);
2942 break;
2943 }
2944 extent_offset = cur - em->start;
2945 BUG_ON(extent_map_end(em) <= cur);
2946 BUG_ON(end < cur);
2947
2948 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2949 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2950 extent_set_compress_type(&this_bio_flag,
2951 em->compress_type);
2952 }
2953
2954 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2955 cur_end = min(extent_map_end(em) - 1, end);
2956 iosize = ALIGN(iosize, blocksize);
2957 if (this_bio_flag & EXTENT_BIO_COMPRESSED) {
2958 disk_io_size = em->block_len;
2959 offset = em->block_start;
2960 } else {
2961 offset = em->block_start + extent_offset;
2962 disk_io_size = iosize;
2963 }
2964 bdev = em->bdev;
2965 block_start = em->block_start;
2966 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2967 block_start = EXTENT_MAP_HOLE;
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
3004 prev_em_start && *prev_em_start != (u64)-1 &&
3005 *prev_em_start != em->orig_start)
3006 force_bio_submit = true;
3007
3008 if (prev_em_start)
3009 *prev_em_start = em->orig_start;
3010
3011 free_extent_map(em);
3012 em = NULL;
3013
3014
3015 if (block_start == EXTENT_MAP_HOLE) {
3016 char *userpage;
3017 struct extent_state *cached = NULL;
3018
3019 userpage = kmap_atomic(page);
3020 memset(userpage + pg_offset, 0, iosize);
3021 flush_dcache_page(page);
3022 kunmap_atomic(userpage);
3023
3024 set_extent_uptodate(tree, cur, cur + iosize - 1,
3025 &cached, GFP_NOFS);
3026 unlock_extent_cached(tree, cur,
3027 cur + iosize - 1, &cached);
3028 cur = cur + iosize;
3029 pg_offset += iosize;
3030 continue;
3031 }
3032
3033 if (test_range_bit(tree, cur, cur_end,
3034 EXTENT_UPTODATE, 1, NULL)) {
3035 check_page_uptodate(tree, page);
3036 unlock_extent(tree, cur, cur + iosize - 1);
3037 cur = cur + iosize;
3038 pg_offset += iosize;
3039 continue;
3040 }
3041
3042
3043
3044 if (block_start == EXTENT_MAP_INLINE) {
3045 SetPageError(page);
3046 unlock_extent(tree, cur, cur + iosize - 1);
3047 cur = cur + iosize;
3048 pg_offset += iosize;
3049 continue;
3050 }
3051
3052 ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
3053 page, offset, disk_io_size,
3054 pg_offset, bdev, bio,
3055 end_bio_extent_readpage, mirror_num,
3056 *bio_flags,
3057 this_bio_flag,
3058 force_bio_submit);
3059 if (!ret) {
3060 nr++;
3061 *bio_flags = this_bio_flag;
3062 } else {
3063 SetPageError(page);
3064 unlock_extent(tree, cur, cur + iosize - 1);
3065 goto out;
3066 }
3067 cur = cur + iosize;
3068 pg_offset += iosize;
3069 }
3070out:
3071 if (!nr) {
3072 if (!PageError(page))
3073 SetPageUptodate(page);
3074 unlock_page(page);
3075 }
3076 return ret;
3077}
3078
3079static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
3080 struct page *pages[], int nr_pages,
3081 u64 start, u64 end,
3082 struct extent_map **em_cached,
3083 struct bio **bio,
3084 unsigned long *bio_flags,
3085 u64 *prev_em_start)
3086{
3087 struct inode *inode;
3088 struct btrfs_ordered_extent *ordered;
3089 int index;
3090
3091 inode = pages[0]->mapping->host;
3092 while (1) {
3093 lock_extent(tree, start, end);
3094 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3095 end - start + 1);
3096 if (!ordered)
3097 break;
3098 unlock_extent(tree, start, end);
3099 btrfs_start_ordered_extent(inode, ordered, 1);
3100 btrfs_put_ordered_extent(ordered);
3101 }
3102
3103 for (index = 0; index < nr_pages; index++) {
3104 __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
3105 bio, 0, bio_flags, REQ_RAHEAD, prev_em_start);
3106 put_page(pages[index]);
3107 }
3108}
3109
3110static void __extent_readpages(struct extent_io_tree *tree,
3111 struct page *pages[],
3112 int nr_pages,
3113 struct extent_map **em_cached,
3114 struct bio **bio, unsigned long *bio_flags,
3115 u64 *prev_em_start)
3116{
3117 u64 start = 0;
3118 u64 end = 0;
3119 u64 page_start;
3120 int index;
3121 int first_index = 0;
3122
3123 for (index = 0; index < nr_pages; index++) {
3124 page_start = page_offset(pages[index]);
3125 if (!end) {
3126 start = page_start;
3127 end = start + PAGE_SIZE - 1;
3128 first_index = index;
3129 } else if (end + 1 == page_start) {
3130 end += PAGE_SIZE;
3131 } else {
3132 __do_contiguous_readpages(tree, &pages[first_index],
3133 index - first_index, start,
3134 end, em_cached,
3135 bio, bio_flags,
3136 prev_em_start);
3137 start = page_start;
3138 end = start + PAGE_SIZE - 1;
3139 first_index = index;
3140 }
3141 }
3142
3143 if (end)
3144 __do_contiguous_readpages(tree, &pages[first_index],
3145 index - first_index, start,
3146 end, em_cached, bio,
3147 bio_flags, prev_em_start);
3148}
3149
3150static int __extent_read_full_page(struct extent_io_tree *tree,
3151 struct page *page,
3152 get_extent_t *get_extent,
3153 struct bio **bio, int mirror_num,
3154 unsigned long *bio_flags,
3155 unsigned int read_flags)
3156{
3157 struct inode *inode = page->mapping->host;
3158 struct btrfs_ordered_extent *ordered;
3159 u64 start = page_offset(page);
3160 u64 end = start + PAGE_SIZE - 1;
3161 int ret;
3162
3163 while (1) {
3164 lock_extent(tree, start, end);
3165 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
3166 PAGE_SIZE);
3167 if (!ordered)
3168 break;
3169 unlock_extent(tree, start, end);
3170 btrfs_start_ordered_extent(inode, ordered, 1);
3171 btrfs_put_ordered_extent(ordered);
3172 }
3173
3174 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3175 bio_flags, read_flags, NULL);
3176 return ret;
3177}
3178
3179int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3180 get_extent_t *get_extent, int mirror_num)
3181{
3182 struct bio *bio = NULL;
3183 unsigned long bio_flags = 0;
3184 int ret;
3185
3186 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
3187 &bio_flags, 0);
3188 if (bio)
3189 ret = submit_one_bio(bio, mirror_num, bio_flags);
3190 return ret;
3191}
3192
3193static void update_nr_written(struct writeback_control *wbc,
3194 unsigned long nr_written)
3195{
3196 wbc->nr_to_write -= nr_written;
3197}
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209static noinline_for_stack int writepage_delalloc(struct inode *inode,
3210 struct page *page, struct writeback_control *wbc,
3211 struct extent_page_data *epd,
3212 u64 delalloc_start,
3213 unsigned long *nr_written)
3214{
3215 struct extent_io_tree *tree = epd->tree;
3216 u64 page_end = delalloc_start + PAGE_SIZE - 1;
3217 u64 nr_delalloc;
3218 u64 delalloc_to_write = 0;
3219 u64 delalloc_end = 0;
3220 int ret;
3221 int page_started = 0;
3222
3223 if (epd->extent_locked || !tree->ops || !tree->ops->fill_delalloc)
3224 return 0;
3225
3226 while (delalloc_end < page_end) {
3227 nr_delalloc = find_lock_delalloc_range(inode, tree,
3228 page,
3229 &delalloc_start,
3230 &delalloc_end,
3231 BTRFS_MAX_EXTENT_SIZE);
3232 if (nr_delalloc == 0) {
3233 delalloc_start = delalloc_end + 1;
3234 continue;
3235 }
3236 ret = tree->ops->fill_delalloc(inode, page,
3237 delalloc_start,
3238 delalloc_end,
3239 &page_started,
3240 nr_written, wbc);
3241
3242 if (ret) {
3243 SetPageError(page);
3244
3245
3246
3247
3248
3249 ret = ret < 0 ? ret : -EIO;
3250 goto done;
3251 }
3252
3253
3254
3255
3256 delalloc_to_write += (delalloc_end - delalloc_start +
3257 PAGE_SIZE) >> PAGE_SHIFT;
3258 delalloc_start = delalloc_end + 1;
3259 }
3260 if (wbc->nr_to_write < delalloc_to_write) {
3261 int thresh = 8192;
3262
3263 if (delalloc_to_write < thresh * 2)
3264 thresh = delalloc_to_write;
3265 wbc->nr_to_write = min_t(u64, delalloc_to_write,
3266 thresh);
3267 }
3268
3269
3270
3271
3272 if (page_started) {
3273
3274
3275
3276
3277
3278 wbc->nr_to_write -= *nr_written;
3279 return 1;
3280 }
3281
3282 ret = 0;
3283
3284done:
3285 return ret;
3286}
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296static noinline_for_stack int __extent_writepage_io(struct inode *inode,
3297 struct page *page,
3298 struct writeback_control *wbc,
3299 struct extent_page_data *epd,
3300 loff_t i_size,
3301 unsigned long nr_written,
3302 unsigned int write_flags, int *nr_ret)
3303{
3304 struct extent_io_tree *tree = epd->tree;
3305 u64 start = page_offset(page);
3306 u64 page_end = start + PAGE_SIZE - 1;
3307 u64 end;
3308 u64 cur = start;
3309 u64 extent_offset;
3310 u64 block_start;
3311 u64 iosize;
3312 struct extent_map *em;
3313 struct block_device *bdev;
3314 size_t pg_offset = 0;
3315 size_t blocksize;
3316 int ret = 0;
3317 int nr = 0;
3318 bool compressed;
3319
3320 if (tree->ops && tree->ops->writepage_start_hook) {
3321 ret = tree->ops->writepage_start_hook(page, start,
3322 page_end);
3323 if (ret) {
3324
3325 if (ret == -EBUSY)
3326 wbc->pages_skipped++;
3327 else
3328 redirty_page_for_writepage(wbc, page);
3329
3330 update_nr_written(wbc, nr_written);
3331 unlock_page(page);
3332 return 1;
3333 }
3334 }
3335
3336
3337
3338
3339
3340 update_nr_written(wbc, nr_written + 1);
3341
3342 end = page_end;
3343 if (i_size <= start) {
3344 if (tree->ops && tree->ops->writepage_end_io_hook)
3345 tree->ops->writepage_end_io_hook(page, start,
3346 page_end, NULL, 1);
3347 goto done;
3348 }
3349
3350 blocksize = inode->i_sb->s_blocksize;
3351
3352 while (cur <= end) {
3353 u64 em_end;
3354 u64 offset;
3355
3356 if (cur >= i_size) {
3357 if (tree->ops && tree->ops->writepage_end_io_hook)
3358 tree->ops->writepage_end_io_hook(page, cur,
3359 page_end, NULL, 1);
3360 break;
3361 }
3362 em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
3363 end - cur + 1, 1);
3364 if (IS_ERR_OR_NULL(em)) {
3365 SetPageError(page);
3366 ret = PTR_ERR_OR_ZERO(em);
3367 break;
3368 }
3369
3370 extent_offset = cur - em->start;
3371 em_end = extent_map_end(em);
3372 BUG_ON(em_end <= cur);
3373 BUG_ON(end < cur);
3374 iosize = min(em_end - cur, end - cur + 1);
3375 iosize = ALIGN(iosize, blocksize);
3376 offset = em->block_start + extent_offset;
3377 bdev = em->bdev;
3378 block_start = em->block_start;
3379 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
3380 free_extent_map(em);
3381 em = NULL;
3382
3383
3384
3385
3386
3387 if (compressed || block_start == EXTENT_MAP_HOLE ||
3388 block_start == EXTENT_MAP_INLINE) {
3389
3390
3391
3392
3393 if (!compressed && tree->ops &&
3394 tree->ops->writepage_end_io_hook)
3395 tree->ops->writepage_end_io_hook(page, cur,
3396 cur + iosize - 1,
3397 NULL, 1);
3398 else if (compressed) {
3399
3400
3401
3402
3403 nr++;
3404 }
3405
3406 cur += iosize;
3407 pg_offset += iosize;
3408 continue;
3409 }
3410
3411 btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
3412 if (!PageWriteback(page)) {
3413 btrfs_err(BTRFS_I(inode)->root->fs_info,
3414 "page %lu not writeback, cur %llu end %llu",
3415 page->index, cur, end);
3416 }
3417
3418 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3419 page, offset, iosize, pg_offset,
3420 bdev, &epd->bio,
3421 end_bio_extent_writepage,
3422 0, 0, 0, false);
3423 if (ret) {
3424 SetPageError(page);
3425 if (PageWriteback(page))
3426 end_page_writeback(page);
3427 }
3428
3429 cur = cur + iosize;
3430 pg_offset += iosize;
3431 nr++;
3432 }
3433done:
3434 *nr_ret = nr;
3435 return ret;
3436}
3437
3438
3439
3440
3441
3442
3443
3444static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3445 struct extent_page_data *epd)
3446{
3447 struct inode *inode = page->mapping->host;
3448 u64 start = page_offset(page);
3449 u64 page_end = start + PAGE_SIZE - 1;
3450 int ret;
3451 int nr = 0;
3452 size_t pg_offset = 0;
3453 loff_t i_size = i_size_read(inode);
3454 unsigned long end_index = i_size >> PAGE_SHIFT;
3455 unsigned int write_flags = 0;
3456 unsigned long nr_written = 0;
3457
3458 write_flags = wbc_to_write_flags(wbc);
3459
3460 trace___extent_writepage(page, inode, wbc);
3461
3462 WARN_ON(!PageLocked(page));
3463
3464 ClearPageError(page);
3465
3466 pg_offset = i_size & (PAGE_SIZE - 1);
3467 if (page->index > end_index ||
3468 (page->index == end_index && !pg_offset)) {
3469 page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
3470 unlock_page(page);
3471 return 0;
3472 }
3473
3474 if (page->index == end_index) {
3475 char *userpage;
3476
3477 userpage = kmap_atomic(page);
3478 memset(userpage + pg_offset, 0,
3479 PAGE_SIZE - pg_offset);
3480 kunmap_atomic(userpage);
3481 flush_dcache_page(page);
3482 }
3483
3484 pg_offset = 0;
3485
3486 set_page_extent_mapped(page);
3487
3488 ret = writepage_delalloc(inode, page, wbc, epd, start, &nr_written);
3489 if (ret == 1)
3490 goto done_unlocked;
3491 if (ret)
3492 goto done;
3493
3494 ret = __extent_writepage_io(inode, page, wbc, epd,
3495 i_size, nr_written, write_flags, &nr);
3496 if (ret == 1)
3497 goto done_unlocked;
3498
3499done:
3500 if (nr == 0) {
3501
3502 set_page_writeback(page);
3503 end_page_writeback(page);
3504 }
3505 if (PageError(page)) {
3506 ret = ret < 0 ? ret : -EIO;
3507 end_extent_writepage(page, ret, start, page_end);
3508 }
3509 unlock_page(page);
3510 return ret;
3511
3512done_unlocked:
3513 return 0;
3514}
3515
3516void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3517{
3518 wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
3519 TASK_UNINTERRUPTIBLE);
3520}
3521
3522static noinline_for_stack int
3523lock_extent_buffer_for_io(struct extent_buffer *eb,
3524 struct btrfs_fs_info *fs_info,
3525 struct extent_page_data *epd)
3526{
3527 int i, num_pages;
3528 int flush = 0;
3529 int ret = 0;
3530
3531 if (!btrfs_try_tree_write_lock(eb)) {
3532 flush = 1;
3533 flush_write_bio(epd);
3534 btrfs_tree_lock(eb);
3535 }
3536
3537 if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
3538 btrfs_tree_unlock(eb);
3539 if (!epd->sync_io)
3540 return 0;
3541 if (!flush) {
3542 flush_write_bio(epd);
3543 flush = 1;
3544 }
3545 while (1) {
3546 wait_on_extent_buffer_writeback(eb);
3547 btrfs_tree_lock(eb);
3548 if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
3549 break;
3550 btrfs_tree_unlock(eb);
3551 }
3552 }
3553
3554
3555
3556
3557
3558
3559 spin_lock(&eb->refs_lock);
3560 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3561 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3562 spin_unlock(&eb->refs_lock);
3563 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3564 percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
3565 -eb->len,
3566 fs_info->dirty_metadata_batch);
3567 ret = 1;
3568 } else {
3569 spin_unlock(&eb->refs_lock);
3570 }
3571
3572 btrfs_tree_unlock(eb);
3573
3574 if (!ret)
3575 return ret;
3576
3577 num_pages = num_extent_pages(eb);
3578 for (i = 0; i < num_pages; i++) {
3579 struct page *p = eb->pages[i];
3580
3581 if (!trylock_page(p)) {
3582 if (!flush) {
3583 flush_write_bio(epd);
3584 flush = 1;
3585 }
3586 lock_page(p);
3587 }
3588 }
3589
3590 return ret;
3591}
3592
3593static void end_extent_buffer_writeback(struct extent_buffer *eb)
3594{
3595 clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3596 smp_mb__after_atomic();
3597 wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
3598}
3599
3600static void set_btree_ioerr(struct page *page)
3601{
3602 struct extent_buffer *eb = (struct extent_buffer *)page->private;
3603
3604 SetPageError(page);
3605 if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
3606 return;
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646 switch (eb->log_index) {
3647 case -1:
3648 set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags);
3649 break;
3650 case 0:
3651 set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags);
3652 break;
3653 case 1:
3654 set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags);
3655 break;
3656 default:
3657 BUG();
3658 }
3659}
3660
3661static void end_bio_extent_buffer_writepage(struct bio *bio)
3662{
3663 struct bio_vec *bvec;
3664 struct extent_buffer *eb;
3665 int i, done;
3666
3667 ASSERT(!bio_flagged(bio, BIO_CLONED));
3668 bio_for_each_segment_all(bvec, bio, i) {
3669 struct page *page = bvec->bv_page;
3670
3671 eb = (struct extent_buffer *)page->private;
3672 BUG_ON(!eb);
3673 done = atomic_dec_and_test(&eb->io_pages);
3674
3675 if (bio->bi_status ||
3676 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3677 ClearPageUptodate(page);
3678 set_btree_ioerr(page);
3679 }
3680
3681 end_page_writeback(page);
3682
3683 if (!done)
3684 continue;
3685
3686 end_extent_buffer_writeback(eb);
3687 }
3688
3689 bio_put(bio);
3690}
3691
3692static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
3693 struct btrfs_fs_info *fs_info,
3694 struct writeback_control *wbc,
3695 struct extent_page_data *epd)
3696{
3697 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3698 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3699 u64 offset = eb->start;
3700 u32 nritems;
3701 int i, num_pages;
3702 unsigned long start, end;
3703 unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
3704 int ret = 0;
3705
3706 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
3707 num_pages = num_extent_pages(eb);
3708 atomic_set(&eb->io_pages, num_pages);
3709
3710
3711 nritems = btrfs_header_nritems(eb);
3712 if (btrfs_header_level(eb) > 0) {
3713 end = btrfs_node_key_ptr_offset(nritems);
3714
3715 memzero_extent_buffer(eb, end, eb->len - end);
3716 } else {
3717
3718
3719
3720
3721 start = btrfs_item_nr_offset(nritems);
3722 end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(fs_info, eb);
3723 memzero_extent_buffer(eb, start, end - start);
3724 }
3725
3726 for (i = 0; i < num_pages; i++) {
3727 struct page *p = eb->pages[i];
3728
3729 clear_page_dirty_for_io(p);
3730 set_page_writeback(p);
3731 ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
3732 p, offset, PAGE_SIZE, 0, bdev,
3733 &epd->bio,
3734 end_bio_extent_buffer_writepage,
3735 0, 0, 0, false);
3736 if (ret) {
3737 set_btree_ioerr(p);
3738 if (PageWriteback(p))
3739 end_page_writeback(p);
3740 if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
3741 end_extent_buffer_writeback(eb);
3742 ret = -EIO;
3743 break;
3744 }
3745 offset += PAGE_SIZE;
3746 update_nr_written(wbc, 1);
3747 unlock_page(p);
3748 }
3749
3750 if (unlikely(ret)) {
3751 for (; i < num_pages; i++) {
3752 struct page *p = eb->pages[i];
3753 clear_page_dirty_for_io(p);
3754 unlock_page(p);
3755 }
3756 }
3757
3758 return ret;
3759}
3760
3761int btree_write_cache_pages(struct address_space *mapping,
3762 struct writeback_control *wbc)
3763{
3764 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
3765 struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
3766 struct extent_buffer *eb, *prev_eb = NULL;
3767 struct extent_page_data epd = {
3768 .bio = NULL,
3769 .tree = tree,
3770 .extent_locked = 0,
3771 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
3772 };
3773 int ret = 0;
3774 int done = 0;
3775 int nr_to_write_done = 0;
3776 struct pagevec pvec;
3777 int nr_pages;
3778 pgoff_t index;
3779 pgoff_t end;
3780 int scanned = 0;
3781 int tag;
3782
3783 pagevec_init(&pvec);
3784 if (wbc->range_cyclic) {
3785 index = mapping->writeback_index;
3786 end = -1;
3787 } else {
3788 index = wbc->range_start >> PAGE_SHIFT;
3789 end = wbc->range_end >> PAGE_SHIFT;
3790 scanned = 1;
3791 }
3792 if (wbc->sync_mode == WB_SYNC_ALL)
3793 tag = PAGECACHE_TAG_TOWRITE;
3794 else
3795 tag = PAGECACHE_TAG_DIRTY;
3796retry:
3797 if (wbc->sync_mode == WB_SYNC_ALL)
3798 tag_pages_for_writeback(mapping, index, end);
3799 while (!done && !nr_to_write_done && (index <= end) &&
3800 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
3801 tag))) {
3802 unsigned i;
3803
3804 scanned = 1;
3805 for (i = 0; i < nr_pages; i++) {
3806 struct page *page = pvec.pages[i];
3807
3808 if (!PagePrivate(page))
3809 continue;
3810
3811 spin_lock(&mapping->private_lock);
3812 if (!PagePrivate(page)) {
3813 spin_unlock(&mapping->private_lock);
3814 continue;
3815 }
3816
3817 eb = (struct extent_buffer *)page->private;
3818
3819
3820
3821
3822
3823
3824 if (WARN_ON(!eb)) {
3825 spin_unlock(&mapping->private_lock);
3826 continue;
3827 }
3828
3829 if (eb == prev_eb) {
3830 spin_unlock(&mapping->private_lock);
3831 continue;
3832 }
3833
3834 ret = atomic_inc_not_zero(&eb->refs);
3835 spin_unlock(&mapping->private_lock);
3836 if (!ret)
3837 continue;
3838
3839 prev_eb = eb;
3840 ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
3841 if (!ret) {
3842 free_extent_buffer(eb);
3843 continue;
3844 }
3845
3846 ret = write_one_eb(eb, fs_info, wbc, &epd);
3847 if (ret) {
3848 done = 1;
3849 free_extent_buffer(eb);
3850 break;
3851 }
3852 free_extent_buffer(eb);
3853
3854
3855
3856
3857
3858
3859 nr_to_write_done = wbc->nr_to_write <= 0;
3860 }
3861 pagevec_release(&pvec);
3862 cond_resched();
3863 }
3864 if (!scanned && !done) {
3865
3866
3867
3868
3869 scanned = 1;
3870 index = 0;
3871 goto retry;
3872 }
3873 flush_write_bio(&epd);
3874 return ret;
3875}
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891static int extent_write_cache_pages(struct address_space *mapping,
3892 struct writeback_control *wbc,
3893 struct extent_page_data *epd)
3894{
3895 struct inode *inode = mapping->host;
3896 int ret = 0;
3897 int done = 0;
3898 int nr_to_write_done = 0;
3899 struct pagevec pvec;
3900 int nr_pages;
3901 pgoff_t index;
3902 pgoff_t end;
3903 pgoff_t done_index;
3904 int range_whole = 0;
3905 int scanned = 0;
3906 int tag;
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917 if (!igrab(inode))
3918 return 0;
3919
3920 pagevec_init(&pvec);
3921 if (wbc->range_cyclic) {
3922 index = mapping->writeback_index;
3923 end = -1;
3924 } else {
3925 index = wbc->range_start >> PAGE_SHIFT;
3926 end = wbc->range_end >> PAGE_SHIFT;
3927 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3928 range_whole = 1;
3929 scanned = 1;
3930 }
3931 if (wbc->sync_mode == WB_SYNC_ALL)
3932 tag = PAGECACHE_TAG_TOWRITE;
3933 else
3934 tag = PAGECACHE_TAG_DIRTY;
3935retry:
3936 if (wbc->sync_mode == WB_SYNC_ALL)
3937 tag_pages_for_writeback(mapping, index, end);
3938 done_index = index;
3939 while (!done && !nr_to_write_done && (index <= end) &&
3940 (nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
3941 &index, end, tag))) {
3942 unsigned i;
3943
3944 scanned = 1;
3945 for (i = 0; i < nr_pages; i++) {
3946 struct page *page = pvec.pages[i];
3947
3948 done_index = page->index;
3949
3950
3951
3952
3953
3954
3955
3956 if (!trylock_page(page)) {
3957 flush_write_bio(epd);
3958 lock_page(page);
3959 }
3960
3961 if (unlikely(page->mapping != mapping)) {
3962 unlock_page(page);
3963 continue;
3964 }
3965
3966 if (wbc->sync_mode != WB_SYNC_NONE) {
3967 if (PageWriteback(page))
3968 flush_write_bio(epd);
3969 wait_on_page_writeback(page);
3970 }
3971
3972 if (PageWriteback(page) ||
3973 !clear_page_dirty_for_io(page)) {
3974 unlock_page(page);
3975 continue;
3976 }
3977
3978 ret = __extent_writepage(page, wbc, epd);
3979
3980 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
3981 unlock_page(page);
3982 ret = 0;
3983 }
3984 if (ret < 0) {
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994 done_index = page->index + 1;
3995 done = 1;
3996 break;
3997 }
3998
3999
4000
4001
4002
4003
4004 nr_to_write_done = wbc->nr_to_write <= 0;
4005 }
4006 pagevec_release(&pvec);
4007 cond_resched();
4008 }
4009 if (!scanned && !done) {
4010
4011
4012
4013
4014 scanned = 1;
4015 index = 0;
4016 goto retry;
4017 }
4018
4019 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
4020 mapping->writeback_index = done_index;
4021
4022 btrfs_add_delayed_iput(inode);
4023 return ret;
4024}
4025
4026static void flush_write_bio(struct extent_page_data *epd)
4027{
4028 if (epd->bio) {
4029 int ret;
4030
4031 ret = submit_one_bio(epd->bio, 0, 0);
4032 BUG_ON(ret < 0);
4033 epd->bio = NULL;
4034 }
4035}
4036
4037int extent_write_full_page(struct page *page, struct writeback_control *wbc)
4038{
4039 int ret;
4040 struct extent_page_data epd = {
4041 .bio = NULL,
4042 .tree = &BTRFS_I(page->mapping->host)->io_tree,
4043 .extent_locked = 0,
4044 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4045 };
4046
4047 ret = __extent_writepage(page, wbc, &epd);
4048
4049 flush_write_bio(&epd);
4050 return ret;
4051}
4052
4053int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
4054 int mode)
4055{
4056 int ret = 0;
4057 struct address_space *mapping = inode->i_mapping;
4058 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
4059 struct page *page;
4060 unsigned long nr_pages = (end - start + PAGE_SIZE) >>
4061 PAGE_SHIFT;
4062
4063 struct extent_page_data epd = {
4064 .bio = NULL,
4065 .tree = tree,
4066 .extent_locked = 1,
4067 .sync_io = mode == WB_SYNC_ALL,
4068 };
4069 struct writeback_control wbc_writepages = {
4070 .sync_mode = mode,
4071 .nr_to_write = nr_pages * 2,
4072 .range_start = start,
4073 .range_end = end + 1,
4074 };
4075
4076 while (start <= end) {
4077 page = find_get_page(mapping, start >> PAGE_SHIFT);
4078 if (clear_page_dirty_for_io(page))
4079 ret = __extent_writepage(page, &wbc_writepages, &epd);
4080 else {
4081 if (tree->ops && tree->ops->writepage_end_io_hook)
4082 tree->ops->writepage_end_io_hook(page, start,
4083 start + PAGE_SIZE - 1,
4084 NULL, 1);
4085 unlock_page(page);
4086 }
4087 put_page(page);
4088 start += PAGE_SIZE;
4089 }
4090
4091 flush_write_bio(&epd);
4092 return ret;
4093}
4094
4095int extent_writepages(struct address_space *mapping,
4096 struct writeback_control *wbc)
4097{
4098 int ret = 0;
4099 struct extent_page_data epd = {
4100 .bio = NULL,
4101 .tree = &BTRFS_I(mapping->host)->io_tree,
4102 .extent_locked = 0,
4103 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
4104 };
4105
4106 ret = extent_write_cache_pages(mapping, wbc, &epd);
4107 flush_write_bio(&epd);
4108 return ret;
4109}
4110
4111int extent_readpages(struct address_space *mapping, struct list_head *pages,
4112 unsigned nr_pages)
4113{
4114 struct bio *bio = NULL;
4115 unsigned page_idx;
4116 unsigned long bio_flags = 0;
4117 struct page *pagepool[16];
4118 struct page *page;
4119 struct extent_map *em_cached = NULL;
4120 struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
4121 int nr = 0;
4122 u64 prev_em_start = (u64)-1;
4123
4124 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
4125 page = list_entry(pages->prev, struct page, lru);
4126
4127 prefetchw(&page->flags);
4128 list_del(&page->lru);
4129 if (add_to_page_cache_lru(page, mapping,
4130 page->index,
4131 readahead_gfp_mask(mapping))) {
4132 put_page(page);
4133 continue;
4134 }
4135
4136 pagepool[nr++] = page;
4137 if (nr < ARRAY_SIZE(pagepool))
4138 continue;
4139 __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
4140 &bio_flags, &prev_em_start);
4141 nr = 0;
4142 }
4143 if (nr)
4144 __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
4145 &bio_flags, &prev_em_start);
4146
4147 if (em_cached)
4148 free_extent_map(em_cached);
4149
4150 BUG_ON(!list_empty(pages));
4151 if (bio)
4152 return submit_one_bio(bio, 0, bio_flags);
4153 return 0;
4154}
4155
4156
4157
4158
4159
4160
4161int extent_invalidatepage(struct extent_io_tree *tree,
4162 struct page *page, unsigned long offset)
4163{
4164 struct extent_state *cached_state = NULL;
4165 u64 start = page_offset(page);
4166 u64 end = start + PAGE_SIZE - 1;
4167 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
4168
4169 start += ALIGN(offset, blocksize);
4170 if (start > end)
4171 return 0;
4172
4173 lock_extent_bits(tree, start, end, &cached_state);
4174 wait_on_page_writeback(page);
4175 clear_extent_bit(tree, start, end,
4176 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
4177 EXTENT_DO_ACCOUNTING,
4178 1, 1, &cached_state);
4179 return 0;
4180}
4181
4182
4183
4184
4185
4186
4187static int try_release_extent_state(struct extent_io_tree *tree,
4188 struct page *page, gfp_t mask)
4189{
4190 u64 start = page_offset(page);
4191 u64 end = start + PAGE_SIZE - 1;
4192 int ret = 1;
4193
4194 if (test_range_bit(tree, start, end,
4195 EXTENT_IOBITS, 0, NULL))
4196 ret = 0;
4197 else {
4198
4199
4200
4201
4202 ret = __clear_extent_bit(tree, start, end,
4203 ~(EXTENT_LOCKED | EXTENT_NODATASUM),
4204 0, 0, NULL, mask, NULL);
4205
4206
4207
4208
4209 if (ret < 0)
4210 ret = 0;
4211 else
4212 ret = 1;
4213 }
4214 return ret;
4215}
4216
4217
4218
4219
4220
4221
4222int try_release_extent_mapping(struct page *page, gfp_t mask)
4223{
4224 struct extent_map *em;
4225 u64 start = page_offset(page);
4226 u64 end = start + PAGE_SIZE - 1;
4227 struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
4228 struct extent_io_tree *tree = &btrfs_inode->io_tree;
4229 struct extent_map_tree *map = &btrfs_inode->extent_tree;
4230
4231 if (gfpflags_allow_blocking(mask) &&
4232 page->mapping->host->i_size > SZ_16M) {
4233 u64 len;
4234 while (start <= end) {
4235 len = end - start + 1;
4236 write_lock(&map->lock);
4237 em = lookup_extent_mapping(map, start, len);
4238 if (!em) {
4239 write_unlock(&map->lock);
4240 break;
4241 }
4242 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
4243 em->start != start) {
4244 write_unlock(&map->lock);
4245 free_extent_map(em);
4246 break;
4247 }
4248 if (!test_range_bit(tree, em->start,
4249 extent_map_end(em) - 1,
4250 EXTENT_LOCKED | EXTENT_WRITEBACK,
4251 0, NULL)) {
4252 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4253 &btrfs_inode->runtime_flags);
4254 remove_extent_mapping(map, em);
4255
4256 free_extent_map(em);
4257 }
4258 start = extent_map_end(em);
4259 write_unlock(&map->lock);
4260
4261
4262 free_extent_map(em);
4263 }
4264 }
4265 return try_release_extent_state(tree, page, mask);
4266}
4267
4268
4269
4270
4271
4272static struct extent_map *get_extent_skip_holes(struct inode *inode,
4273 u64 offset, u64 last)
4274{
4275 u64 sectorsize = btrfs_inode_sectorsize(inode);
4276 struct extent_map *em;
4277 u64 len;
4278
4279 if (offset >= last)
4280 return NULL;
4281
4282 while (1) {
4283 len = last - offset;
4284 if (len == 0)
4285 break;
4286 len = ALIGN(len, sectorsize);
4287 em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
4288 len, 0);
4289 if (IS_ERR_OR_NULL(em))
4290 return em;
4291
4292
4293 if (em->block_start != EXTENT_MAP_HOLE)
4294 return em;
4295
4296
4297 offset = extent_map_end(em);
4298 free_extent_map(em);
4299 if (offset >= last)
4300 break;
4301 }
4302 return NULL;
4303}
4304
4305
4306
4307
4308
4309
4310struct fiemap_cache {
4311 u64 offset;
4312 u64 phys;
4313 u64 len;
4314 u32 flags;
4315 bool cached;
4316};
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
4329 struct fiemap_cache *cache,
4330 u64 offset, u64 phys, u64 len, u32 flags)
4331{
4332 int ret = 0;
4333
4334 if (!cache->cached)
4335 goto assign;
4336
4337
4338
4339
4340
4341
4342
4343
4344 if (cache->offset + cache->len > offset) {
4345 WARN_ON(1);
4346 return -EINVAL;
4347 }
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360 if (cache->offset + cache->len == offset &&
4361 cache->phys + cache->len == phys &&
4362 (cache->flags & ~FIEMAP_EXTENT_LAST) ==
4363 (flags & ~FIEMAP_EXTENT_LAST)) {
4364 cache->len += len;
4365 cache->flags |= flags;
4366 goto try_submit_last;
4367 }
4368
4369
4370 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4371 cache->len, cache->flags);
4372 cache->cached = false;
4373 if (ret)
4374 return ret;
4375assign:
4376 cache->cached = true;
4377 cache->offset = offset;
4378 cache->phys = phys;
4379 cache->len = len;
4380 cache->flags = flags;
4381try_submit_last:
4382 if (cache->flags & FIEMAP_EXTENT_LAST) {
4383 ret = fiemap_fill_next_extent(fieinfo, cache->offset,
4384 cache->phys, cache->len, cache->flags);
4385 cache->cached = false;
4386 }
4387 return ret;
4388}
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
4402 struct fiemap_extent_info *fieinfo,
4403 struct fiemap_cache *cache)
4404{
4405 int ret;
4406
4407 if (!cache->cached)
4408 return 0;
4409
4410 ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
4411 cache->len, cache->flags);
4412 cache->cached = false;
4413 if (ret > 0)
4414 ret = 0;
4415 return ret;
4416}
4417
4418int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4419 __u64 start, __u64 len)
4420{
4421 int ret = 0;
4422 u64 off = start;
4423 u64 max = start + len;
4424 u32 flags = 0;
4425 u32 found_type;
4426 u64 last;
4427 u64 last_for_get_extent = 0;
4428 u64 disko = 0;
4429 u64 isize = i_size_read(inode);
4430 struct btrfs_key found_key;
4431 struct extent_map *em = NULL;
4432 struct extent_state *cached_state = NULL;
4433 struct btrfs_path *path;
4434 struct btrfs_root *root = BTRFS_I(inode)->root;
4435 struct fiemap_cache cache = { 0 };
4436 int end = 0;
4437 u64 em_start = 0;
4438 u64 em_len = 0;
4439 u64 em_end = 0;
4440
4441 if (len == 0)
4442 return -EINVAL;
4443
4444 path = btrfs_alloc_path();
4445 if (!path)
4446 return -ENOMEM;
4447 path->leave_spinning = 1;
4448
4449 start = round_down(start, btrfs_inode_sectorsize(inode));
4450 len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
4451
4452
4453
4454
4455
4456 ret = btrfs_lookup_file_extent(NULL, root, path,
4457 btrfs_ino(BTRFS_I(inode)), -1, 0);
4458 if (ret < 0) {
4459 btrfs_free_path(path);
4460 return ret;
4461 } else {
4462 WARN_ON(!ret);
4463 if (ret == 1)
4464 ret = 0;
4465 }
4466
4467 path->slots[0]--;
4468 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4469 found_type = found_key.type;
4470
4471
4472 if (found_key.objectid != btrfs_ino(BTRFS_I(inode)) ||
4473 found_type != BTRFS_EXTENT_DATA_KEY) {
4474
4475 last = (u64)-1;
4476 last_for_get_extent = isize;
4477 } else {
4478
4479
4480
4481
4482
4483 last = found_key.offset;
4484 last_for_get_extent = last + 1;
4485 }
4486 btrfs_release_path(path);
4487
4488
4489
4490
4491
4492
4493 if (last < isize) {
4494 last = (u64)-1;
4495 last_for_get_extent = isize;
4496 }
4497
4498 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4499 &cached_state);
4500
4501 em = get_extent_skip_holes(inode, start, last_for_get_extent);
4502 if (!em)
4503 goto out;
4504 if (IS_ERR(em)) {
4505 ret = PTR_ERR(em);
4506 goto out;
4507 }
4508
4509 while (!end) {
4510 u64 offset_in_extent = 0;
4511
4512
4513 if (em->start >= max || extent_map_end(em) < off)
4514 break;
4515
4516
4517
4518
4519
4520
4521
4522 em_start = max(em->start, off);
4523
4524
4525
4526
4527
4528
4529
4530 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4531 offset_in_extent = em_start - em->start;
4532 em_end = extent_map_end(em);
4533 em_len = em_end - em_start;
4534 flags = 0;
4535 if (em->block_start < EXTENT_MAP_LAST_BYTE)
4536 disko = em->block_start + offset_in_extent;
4537 else
4538 disko = 0;
4539
4540
4541
4542
4543 off = extent_map_end(em);
4544 if (off >= max)
4545 end = 1;
4546
4547 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
4548 end = 1;
4549 flags |= FIEMAP_EXTENT_LAST;
4550 } else if (em->block_start == EXTENT_MAP_INLINE) {
4551 flags |= (FIEMAP_EXTENT_DATA_INLINE |
4552 FIEMAP_EXTENT_NOT_ALIGNED);
4553 } else if (em->block_start == EXTENT_MAP_DELALLOC) {
4554 flags |= (FIEMAP_EXTENT_DELALLOC |
4555 FIEMAP_EXTENT_UNKNOWN);
4556 } else if (fieinfo->fi_extents_max) {
4557 u64 bytenr = em->block_start -
4558 (em->start - em->orig_start);
4559
4560
4561
4562
4563
4564
4565
4566
4567 ret = btrfs_check_shared(root,
4568 btrfs_ino(BTRFS_I(inode)),
4569 bytenr);
4570 if (ret < 0)
4571 goto out_free;
4572 if (ret)
4573 flags |= FIEMAP_EXTENT_SHARED;
4574 ret = 0;
4575 }
4576 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4577 flags |= FIEMAP_EXTENT_ENCODED;
4578 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
4579 flags |= FIEMAP_EXTENT_UNWRITTEN;
4580
4581 free_extent_map(em);
4582 em = NULL;
4583 if ((em_start >= last) || em_len == (u64)-1 ||
4584 (last == (u64)-1 && isize <= em_end)) {
4585 flags |= FIEMAP_EXTENT_LAST;
4586 end = 1;
4587 }
4588
4589
4590 em = get_extent_skip_holes(inode, off, last_for_get_extent);
4591 if (IS_ERR(em)) {
4592 ret = PTR_ERR(em);
4593 goto out;
4594 }
4595 if (!em) {
4596 flags |= FIEMAP_EXTENT_LAST;
4597 end = 1;
4598 }
4599 ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
4600 em_len, flags);
4601 if (ret) {
4602 if (ret == 1)
4603 ret = 0;
4604 goto out_free;
4605 }
4606 }
4607out_free:
4608 if (!ret)
4609 ret = emit_last_fiemap_cache(root->fs_info, fieinfo, &cache);
4610 free_extent_map(em);
4611out:
4612 btrfs_free_path(path);
4613 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
4614 &cached_state);
4615 return ret;
4616}
4617
4618static void __free_extent_buffer(struct extent_buffer *eb)
4619{
4620 btrfs_leak_debug_del(&eb->leak_list);
4621 kmem_cache_free(extent_buffer_cache, eb);
4622}
4623
4624int extent_buffer_under_io(struct extent_buffer *eb)
4625{
4626 return (atomic_read(&eb->io_pages) ||
4627 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4628 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4629}
4630
4631
4632
4633
4634static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
4635{
4636 int i;
4637 int num_pages;
4638 int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4639
4640 BUG_ON(extent_buffer_under_io(eb));
4641
4642 num_pages = num_extent_pages(eb);
4643 for (i = 0; i < num_pages; i++) {
4644 struct page *page = eb->pages[i];
4645
4646 if (!page)
4647 continue;
4648 if (mapped)
4649 spin_lock(&page->mapping->private_lock);
4650
4651
4652
4653
4654
4655
4656
4657 if (PagePrivate(page) &&
4658 page->private == (unsigned long)eb) {
4659 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4660 BUG_ON(PageDirty(page));
4661 BUG_ON(PageWriteback(page));
4662
4663
4664
4665
4666 ClearPagePrivate(page);
4667 set_page_private(page, 0);
4668
4669 put_page(page);
4670 }
4671
4672 if (mapped)
4673 spin_unlock(&page->mapping->private_lock);
4674
4675
4676 put_page(page);
4677 }
4678}
4679
4680
4681
4682
4683static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4684{
4685 btrfs_release_extent_buffer_pages(eb);
4686 __free_extent_buffer(eb);
4687}
4688
4689static struct extent_buffer *
4690__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4691 unsigned long len)
4692{
4693 struct extent_buffer *eb = NULL;
4694
4695 eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL);
4696 eb->start = start;
4697 eb->len = len;
4698 eb->fs_info = fs_info;
4699 eb->bflags = 0;
4700 rwlock_init(&eb->lock);
4701 atomic_set(&eb->write_locks, 0);
4702 atomic_set(&eb->read_locks, 0);
4703 atomic_set(&eb->blocking_readers, 0);
4704 atomic_set(&eb->blocking_writers, 0);
4705 atomic_set(&eb->spinning_readers, 0);
4706 atomic_set(&eb->spinning_writers, 0);
4707 eb->lock_nested = 0;
4708 init_waitqueue_head(&eb->write_lock_wq);
4709 init_waitqueue_head(&eb->read_lock_wq);
4710
4711 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4712
4713 spin_lock_init(&eb->refs_lock);
4714 atomic_set(&eb->refs, 1);
4715 atomic_set(&eb->io_pages, 0);
4716
4717
4718
4719
4720 BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE
4721 > MAX_INLINE_EXTENT_BUFFER_SIZE);
4722 BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
4723
4724 return eb;
4725}
4726
4727struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4728{
4729 int i;
4730 struct page *p;
4731 struct extent_buffer *new;
4732 int num_pages = num_extent_pages(src);
4733
4734 new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
4735 if (new == NULL)
4736 return NULL;
4737
4738 for (i = 0; i < num_pages; i++) {
4739 p = alloc_page(GFP_NOFS);
4740 if (!p) {
4741 btrfs_release_extent_buffer(new);
4742 return NULL;
4743 }
4744 attach_extent_buffer_page(new, p);
4745 WARN_ON(PageDirty(p));
4746 SetPageUptodate(p);
4747 new->pages[i] = p;
4748 copy_page(page_address(p), page_address(src->pages[i]));
4749 }
4750
4751 set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
4752 set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
4753
4754 return new;
4755}
4756
4757struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4758 u64 start, unsigned long len)
4759{
4760 struct extent_buffer *eb;
4761 int num_pages;
4762 int i;
4763
4764 eb = __alloc_extent_buffer(fs_info, start, len);
4765 if (!eb)
4766 return NULL;
4767
4768 num_pages = num_extent_pages(eb);
4769 for (i = 0; i < num_pages; i++) {
4770 eb->pages[i] = alloc_page(GFP_NOFS);
4771 if (!eb->pages[i])
4772 goto err;
4773 }
4774 set_extent_buffer_uptodate(eb);
4775 btrfs_set_header_nritems(eb, 0);
4776 set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
4777
4778 return eb;
4779err:
4780 for (; i > 0; i--)
4781 __free_page(eb->pages[i - 1]);
4782 __free_extent_buffer(eb);
4783 return NULL;
4784}
4785
4786struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
4787 u64 start)
4788{
4789 return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
4790}
4791
4792static void check_buffer_tree_ref(struct extent_buffer *eb)
4793{
4794 int refs;
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815 refs = atomic_read(&eb->refs);
4816 if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4817 return;
4818
4819 spin_lock(&eb->refs_lock);
4820 if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4821 atomic_inc(&eb->refs);
4822 spin_unlock(&eb->refs_lock);
4823}
4824
4825static void mark_extent_buffer_accessed(struct extent_buffer *eb,
4826 struct page *accessed)
4827{
4828 int num_pages, i;
4829
4830 check_buffer_tree_ref(eb);
4831
4832 num_pages = num_extent_pages(eb);
4833 for (i = 0; i < num_pages; i++) {
4834 struct page *p = eb->pages[i];
4835
4836 if (p != accessed)
4837 mark_page_accessed(p);
4838 }
4839}
4840
4841struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4842 u64 start)
4843{
4844 struct extent_buffer *eb;
4845
4846 rcu_read_lock();
4847 eb = radix_tree_lookup(&fs_info->buffer_radix,
4848 start >> PAGE_SHIFT);
4849 if (eb && atomic_inc_not_zero(&eb->refs)) {
4850 rcu_read_unlock();
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866 if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
4867 spin_lock(&eb->refs_lock);
4868 spin_unlock(&eb->refs_lock);
4869 }
4870 mark_extent_buffer_accessed(eb, NULL);
4871 return eb;
4872 }
4873 rcu_read_unlock();
4874
4875 return NULL;
4876}
4877
4878#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4879struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
4880 u64 start)
4881{
4882 struct extent_buffer *eb, *exists = NULL;
4883 int ret;
4884
4885 eb = find_extent_buffer(fs_info, start);
4886 if (eb)
4887 return eb;
4888 eb = alloc_dummy_extent_buffer(fs_info, start);
4889 if (!eb)
4890 return NULL;
4891 eb->fs_info = fs_info;
4892again:
4893 ret = radix_tree_preload(GFP_NOFS);
4894 if (ret)
4895 goto free_eb;
4896 spin_lock(&fs_info->buffer_lock);
4897 ret = radix_tree_insert(&fs_info->buffer_radix,
4898 start >> PAGE_SHIFT, eb);
4899 spin_unlock(&fs_info->buffer_lock);
4900 radix_tree_preload_end();
4901 if (ret == -EEXIST) {
4902 exists = find_extent_buffer(fs_info, start);
4903 if (exists)
4904 goto free_eb;
4905 else
4906 goto again;
4907 }
4908 check_buffer_tree_ref(eb);
4909 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4910
4911
4912
4913
4914
4915
4916
4917 atomic_inc(&eb->refs);
4918 return eb;
4919free_eb:
4920 btrfs_release_extent_buffer(eb);
4921 return exists;
4922}
4923#endif
4924
4925struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4926 u64 start)
4927{
4928 unsigned long len = fs_info->nodesize;
4929 int num_pages;
4930 int i;
4931 unsigned long index = start >> PAGE_SHIFT;
4932 struct extent_buffer *eb;
4933 struct extent_buffer *exists = NULL;
4934 struct page *p;
4935 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4936 int uptodate = 1;
4937 int ret;
4938
4939 if (!IS_ALIGNED(start, fs_info->sectorsize)) {
4940 btrfs_err(fs_info, "bad tree block start %llu", start);
4941 return ERR_PTR(-EINVAL);
4942 }
4943
4944 eb = find_extent_buffer(fs_info, start);
4945 if (eb)
4946 return eb;
4947
4948 eb = __alloc_extent_buffer(fs_info, start, len);
4949 if (!eb)
4950 return ERR_PTR(-ENOMEM);
4951
4952 num_pages = num_extent_pages(eb);
4953 for (i = 0; i < num_pages; i++, index++) {
4954 p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
4955 if (!p) {
4956 exists = ERR_PTR(-ENOMEM);
4957 goto free_eb;
4958 }
4959
4960 spin_lock(&mapping->private_lock);
4961 if (PagePrivate(p)) {
4962
4963
4964
4965
4966
4967
4968
4969 exists = (struct extent_buffer *)p->private;
4970 if (atomic_inc_not_zero(&exists->refs)) {
4971 spin_unlock(&mapping->private_lock);
4972 unlock_page(p);
4973 put_page(p);
4974 mark_extent_buffer_accessed(exists, p);
4975 goto free_eb;
4976 }
4977 exists = NULL;
4978
4979
4980
4981
4982
4983 ClearPagePrivate(p);
4984 WARN_ON(PageDirty(p));
4985 put_page(p);
4986 }
4987 attach_extent_buffer_page(eb, p);
4988 spin_unlock(&mapping->private_lock);
4989 WARN_ON(PageDirty(p));
4990 eb->pages[i] = p;
4991 if (!PageUptodate(p))
4992 uptodate = 0;
4993
4994
4995
4996
4997
4998
4999
5000
5001 }
5002 if (uptodate)
5003 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5004again:
5005 ret = radix_tree_preload(GFP_NOFS);
5006 if (ret) {
5007 exists = ERR_PTR(ret);
5008 goto free_eb;
5009 }
5010
5011 spin_lock(&fs_info->buffer_lock);
5012 ret = radix_tree_insert(&fs_info->buffer_radix,
5013 start >> PAGE_SHIFT, eb);
5014 spin_unlock(&fs_info->buffer_lock);
5015 radix_tree_preload_end();
5016 if (ret == -EEXIST) {
5017 exists = find_extent_buffer(fs_info, start);
5018 if (exists)
5019 goto free_eb;
5020 else
5021 goto again;
5022 }
5023
5024 check_buffer_tree_ref(eb);
5025 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
5026
5027
5028
5029
5030
5031
5032 for (i = 0; i < num_pages; i++)
5033 unlock_page(eb->pages[i]);
5034 return eb;
5035
5036free_eb:
5037 WARN_ON(!atomic_dec_and_test(&eb->refs));
5038 for (i = 0; i < num_pages; i++) {
5039 if (eb->pages[i])
5040 unlock_page(eb->pages[i]);
5041 }
5042
5043 btrfs_release_extent_buffer(eb);
5044 return exists;
5045}
5046
5047static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
5048{
5049 struct extent_buffer *eb =
5050 container_of(head, struct extent_buffer, rcu_head);
5051
5052 __free_extent_buffer(eb);
5053}
5054
5055static int release_extent_buffer(struct extent_buffer *eb)
5056{
5057 lockdep_assert_held(&eb->refs_lock);
5058
5059 WARN_ON(atomic_read(&eb->refs) == 0);
5060 if (atomic_dec_and_test(&eb->refs)) {
5061 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
5062 struct btrfs_fs_info *fs_info = eb->fs_info;
5063
5064 spin_unlock(&eb->refs_lock);
5065
5066 spin_lock(&fs_info->buffer_lock);
5067 radix_tree_delete(&fs_info->buffer_radix,
5068 eb->start >> PAGE_SHIFT);
5069 spin_unlock(&fs_info->buffer_lock);
5070 } else {
5071 spin_unlock(&eb->refs_lock);
5072 }
5073
5074
5075 btrfs_release_extent_buffer_pages(eb);
5076#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
5077 if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
5078 __free_extent_buffer(eb);
5079 return 1;
5080 }
5081#endif
5082 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
5083 return 1;
5084 }
5085 spin_unlock(&eb->refs_lock);
5086
5087 return 0;
5088}
5089
5090void free_extent_buffer(struct extent_buffer *eb)
5091{
5092 int refs;
5093 int old;
5094 if (!eb)
5095 return;
5096
5097 while (1) {
5098 refs = atomic_read(&eb->refs);
5099 if (refs <= 3)
5100 break;
5101 old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
5102 if (old == refs)
5103 return;
5104 }
5105
5106 spin_lock(&eb->refs_lock);
5107 if (atomic_read(&eb->refs) == 2 &&
5108 test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))
5109 atomic_dec(&eb->refs);
5110
5111 if (atomic_read(&eb->refs) == 2 &&
5112 test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
5113 !extent_buffer_under_io(eb) &&
5114 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5115 atomic_dec(&eb->refs);
5116
5117
5118
5119
5120
5121 release_extent_buffer(eb);
5122}
5123
5124void free_extent_buffer_stale(struct extent_buffer *eb)
5125{
5126 if (!eb)
5127 return;
5128
5129 spin_lock(&eb->refs_lock);
5130 set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
5131
5132 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
5133 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
5134 atomic_dec(&eb->refs);
5135 release_extent_buffer(eb);
5136}
5137
5138void clear_extent_buffer_dirty(struct extent_buffer *eb)
5139{
5140 int i;
5141 int num_pages;
5142 struct page *page;
5143
5144 num_pages = num_extent_pages(eb);
5145
5146 for (i = 0; i < num_pages; i++) {
5147 page = eb->pages[i];
5148 if (!PageDirty(page))
5149 continue;
5150
5151 lock_page(page);
5152 WARN_ON(!PagePrivate(page));
5153
5154 clear_page_dirty_for_io(page);
5155 xa_lock_irq(&page->mapping->i_pages);
5156 if (!PageDirty(page)) {
5157 radix_tree_tag_clear(&page->mapping->i_pages,
5158 page_index(page),
5159 PAGECACHE_TAG_DIRTY);
5160 }
5161 xa_unlock_irq(&page->mapping->i_pages);
5162 ClearPageError(page);
5163 unlock_page(page);
5164 }
5165 WARN_ON(atomic_read(&eb->refs) == 0);
5166}
5167
5168int set_extent_buffer_dirty(struct extent_buffer *eb)
5169{
5170 int i;
5171 int num_pages;
5172 int was_dirty = 0;
5173
5174 check_buffer_tree_ref(eb);
5175
5176 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
5177
5178 num_pages = num_extent_pages(eb);
5179 WARN_ON(atomic_read(&eb->refs) == 0);
5180 WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
5181
5182 for (i = 0; i < num_pages; i++)
5183 set_page_dirty(eb->pages[i]);
5184 return was_dirty;
5185}
5186
5187void clear_extent_buffer_uptodate(struct extent_buffer *eb)
5188{
5189 int i;
5190 struct page *page;
5191 int num_pages;
5192
5193 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5194 num_pages = num_extent_pages(eb);
5195 for (i = 0; i < num_pages; i++) {
5196 page = eb->pages[i];
5197 if (page)
5198 ClearPageUptodate(page);
5199 }
5200}
5201
5202void set_extent_buffer_uptodate(struct extent_buffer *eb)
5203{
5204 int i;
5205 struct page *page;
5206 int num_pages;
5207
5208 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5209 num_pages = num_extent_pages(eb);
5210 for (i = 0; i < num_pages; i++) {
5211 page = eb->pages[i];
5212 SetPageUptodate(page);
5213 }
5214}
5215
5216int read_extent_buffer_pages(struct extent_io_tree *tree,
5217 struct extent_buffer *eb, int wait, int mirror_num)
5218{
5219 int i;
5220 struct page *page;
5221 int err;
5222 int ret = 0;
5223 int locked_pages = 0;
5224 int all_uptodate = 1;
5225 int num_pages;
5226 unsigned long num_reads = 0;
5227 struct bio *bio = NULL;
5228 unsigned long bio_flags = 0;
5229
5230 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
5231 return 0;
5232
5233 num_pages = num_extent_pages(eb);
5234 for (i = 0; i < num_pages; i++) {
5235 page = eb->pages[i];
5236 if (wait == WAIT_NONE) {
5237 if (!trylock_page(page))
5238 goto unlock_exit;
5239 } else {
5240 lock_page(page);
5241 }
5242 locked_pages++;
5243 }
5244
5245
5246
5247
5248
5249 for (i = 0; i < num_pages; i++) {
5250 page = eb->pages[i];
5251 if (!PageUptodate(page)) {
5252 num_reads++;
5253 all_uptodate = 0;
5254 }
5255 }
5256
5257 if (all_uptodate) {
5258 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
5259 goto unlock_exit;
5260 }
5261
5262 clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
5263 eb->read_mirror = 0;
5264 atomic_set(&eb->io_pages, num_reads);
5265 for (i = 0; i < num_pages; i++) {
5266 page = eb->pages[i];
5267
5268 if (!PageUptodate(page)) {
5269 if (ret) {
5270 atomic_dec(&eb->io_pages);
5271 unlock_page(page);
5272 continue;
5273 }
5274
5275 ClearPageError(page);
5276 err = __extent_read_full_page(tree, page,
5277 btree_get_extent, &bio,
5278 mirror_num, &bio_flags,
5279 REQ_META);
5280 if (err) {
5281 ret = err;
5282
5283
5284
5285
5286
5287
5288
5289
5290 atomic_dec(&eb->io_pages);
5291 }
5292 } else {
5293 unlock_page(page);
5294 }
5295 }
5296
5297 if (bio) {
5298 err = submit_one_bio(bio, mirror_num, bio_flags);
5299 if (err)
5300 return err;
5301 }
5302
5303 if (ret || wait != WAIT_COMPLETE)
5304 return ret;
5305
5306 for (i = 0; i < num_pages; i++) {
5307 page = eb->pages[i];
5308 wait_on_page_locked(page);
5309 if (!PageUptodate(page))
5310 ret = -EIO;
5311 }
5312
5313 return ret;
5314
5315unlock_exit:
5316 while (locked_pages > 0) {
5317 locked_pages--;
5318 page = eb->pages[locked_pages];
5319 unlock_page(page);
5320 }
5321 return ret;
5322}
5323
5324void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
5325 unsigned long start, unsigned long len)
5326{
5327 size_t cur;
5328 size_t offset;
5329 struct page *page;
5330 char *kaddr;
5331 char *dst = (char *)dstv;
5332 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5333 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5334
5335 if (start + len > eb->len) {
5336 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5337 eb->start, eb->len, start, len);
5338 memset(dst, 0, len);
5339 return;
5340 }
5341
5342 offset = (start_offset + start) & (PAGE_SIZE - 1);
5343
5344 while (len > 0) {
5345 page = eb->pages[i];
5346
5347 cur = min(len, (PAGE_SIZE - offset));
5348 kaddr = page_address(page);
5349 memcpy(dst, kaddr + offset, cur);
5350
5351 dst += cur;
5352 len -= cur;
5353 offset = 0;
5354 i++;
5355 }
5356}
5357
5358int read_extent_buffer_to_user(const struct extent_buffer *eb,
5359 void __user *dstv,
5360 unsigned long start, unsigned long len)
5361{
5362 size_t cur;
5363 size_t offset;
5364 struct page *page;
5365 char *kaddr;
5366 char __user *dst = (char __user *)dstv;
5367 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5368 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5369 int ret = 0;
5370
5371 WARN_ON(start > eb->len);
5372 WARN_ON(start + len > eb->start + eb->len);
5373
5374 offset = (start_offset + start) & (PAGE_SIZE - 1);
5375
5376 while (len > 0) {
5377 page = eb->pages[i];
5378
5379 cur = min(len, (PAGE_SIZE - offset));
5380 kaddr = page_address(page);
5381 if (copy_to_user(dst, kaddr + offset, cur)) {
5382 ret = -EFAULT;
5383 break;
5384 }
5385
5386 dst += cur;
5387 len -= cur;
5388 offset = 0;
5389 i++;
5390 }
5391
5392 return ret;
5393}
5394
5395
5396
5397
5398
5399
5400int map_private_extent_buffer(const struct extent_buffer *eb,
5401 unsigned long start, unsigned long min_len,
5402 char **map, unsigned long *map_start,
5403 unsigned long *map_len)
5404{
5405 size_t offset = start & (PAGE_SIZE - 1);
5406 char *kaddr;
5407 struct page *p;
5408 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5409 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5410 unsigned long end_i = (start_offset + start + min_len - 1) >>
5411 PAGE_SHIFT;
5412
5413 if (start + min_len > eb->len) {
5414 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
5415 eb->start, eb->len, start, min_len);
5416 return -EINVAL;
5417 }
5418
5419 if (i != end_i)
5420 return 1;
5421
5422 if (i == 0) {
5423 offset = start_offset;
5424 *map_start = 0;
5425 } else {
5426 offset = 0;
5427 *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
5428 }
5429
5430 p = eb->pages[i];
5431 kaddr = page_address(p);
5432 *map = kaddr + offset;
5433 *map_len = PAGE_SIZE - offset;
5434 return 0;
5435}
5436
5437int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
5438 unsigned long start, unsigned long len)
5439{
5440 size_t cur;
5441 size_t offset;
5442 struct page *page;
5443 char *kaddr;
5444 char *ptr = (char *)ptrv;
5445 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5446 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5447 int ret = 0;
5448
5449 WARN_ON(start > eb->len);
5450 WARN_ON(start + len > eb->start + eb->len);
5451
5452 offset = (start_offset + start) & (PAGE_SIZE - 1);
5453
5454 while (len > 0) {
5455 page = eb->pages[i];
5456
5457 cur = min(len, (PAGE_SIZE - offset));
5458
5459 kaddr = page_address(page);
5460 ret = memcmp(ptr, kaddr + offset, cur);
5461 if (ret)
5462 break;
5463
5464 ptr += cur;
5465 len -= cur;
5466 offset = 0;
5467 i++;
5468 }
5469 return ret;
5470}
5471
5472void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
5473 const void *srcv)
5474{
5475 char *kaddr;
5476
5477 WARN_ON(!PageUptodate(eb->pages[0]));
5478 kaddr = page_address(eb->pages[0]);
5479 memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
5480 BTRFS_FSID_SIZE);
5481}
5482
5483void write_extent_buffer_fsid(struct extent_buffer *eb, const void *srcv)
5484{
5485 char *kaddr;
5486
5487 WARN_ON(!PageUptodate(eb->pages[0]));
5488 kaddr = page_address(eb->pages[0]);
5489 memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
5490 BTRFS_FSID_SIZE);
5491}
5492
5493void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
5494 unsigned long start, unsigned long len)
5495{
5496 size_t cur;
5497 size_t offset;
5498 struct page *page;
5499 char *kaddr;
5500 char *src = (char *)srcv;
5501 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5502 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5503
5504 WARN_ON(start > eb->len);
5505 WARN_ON(start + len > eb->start + eb->len);
5506
5507 offset = (start_offset + start) & (PAGE_SIZE - 1);
5508
5509 while (len > 0) {
5510 page = eb->pages[i];
5511 WARN_ON(!PageUptodate(page));
5512
5513 cur = min(len, PAGE_SIZE - offset);
5514 kaddr = page_address(page);
5515 memcpy(kaddr + offset, src, cur);
5516
5517 src += cur;
5518 len -= cur;
5519 offset = 0;
5520 i++;
5521 }
5522}
5523
5524void memzero_extent_buffer(struct extent_buffer *eb, unsigned long start,
5525 unsigned long len)
5526{
5527 size_t cur;
5528 size_t offset;
5529 struct page *page;
5530 char *kaddr;
5531 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5532 unsigned long i = (start_offset + start) >> PAGE_SHIFT;
5533
5534 WARN_ON(start > eb->len);
5535 WARN_ON(start + len > eb->start + eb->len);
5536
5537 offset = (start_offset + start) & (PAGE_SIZE - 1);
5538
5539 while (len > 0) {
5540 page = eb->pages[i];
5541 WARN_ON(!PageUptodate(page));
5542
5543 cur = min(len, PAGE_SIZE - offset);
5544 kaddr = page_address(page);
5545 memset(kaddr + offset, 0, cur);
5546
5547 len -= cur;
5548 offset = 0;
5549 i++;
5550 }
5551}
5552
5553void copy_extent_buffer_full(struct extent_buffer *dst,
5554 struct extent_buffer *src)
5555{
5556 int i;
5557 int num_pages;
5558
5559 ASSERT(dst->len == src->len);
5560
5561 num_pages = num_extent_pages(dst);
5562 for (i = 0; i < num_pages; i++)
5563 copy_page(page_address(dst->pages[i]),
5564 page_address(src->pages[i]));
5565}
5566
5567void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
5568 unsigned long dst_offset, unsigned long src_offset,
5569 unsigned long len)
5570{
5571 u64 dst_len = dst->len;
5572 size_t cur;
5573 size_t offset;
5574 struct page *page;
5575 char *kaddr;
5576 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5577 unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
5578
5579 WARN_ON(src->len != dst_len);
5580
5581 offset = (start_offset + dst_offset) &
5582 (PAGE_SIZE - 1);
5583
5584 while (len > 0) {
5585 page = dst->pages[i];
5586 WARN_ON(!PageUptodate(page));
5587
5588 cur = min(len, (unsigned long)(PAGE_SIZE - offset));
5589
5590 kaddr = page_address(page);
5591 read_extent_buffer(src, kaddr + offset, src_offset, cur);
5592
5593 src_offset += cur;
5594 len -= cur;
5595 offset = 0;
5596 i++;
5597 }
5598}
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613static inline void eb_bitmap_offset(struct extent_buffer *eb,
5614 unsigned long start, unsigned long nr,
5615 unsigned long *page_index,
5616 size_t *page_offset)
5617{
5618 size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
5619 size_t byte_offset = BIT_BYTE(nr);
5620 size_t offset;
5621
5622
5623
5624
5625
5626
5627 offset = start_offset + start + byte_offset;
5628
5629 *page_index = offset >> PAGE_SHIFT;
5630 *page_offset = offset & (PAGE_SIZE - 1);
5631}
5632
5633
5634
5635
5636
5637
5638
5639int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
5640 unsigned long nr)
5641{
5642 u8 *kaddr;
5643 struct page *page;
5644 unsigned long i;
5645 size_t offset;
5646
5647 eb_bitmap_offset(eb, start, nr, &i, &offset);
5648 page = eb->pages[i];
5649 WARN_ON(!PageUptodate(page));
5650 kaddr = page_address(page);
5651 return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
5652}
5653
5654
5655
5656
5657
5658
5659
5660
5661void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
5662 unsigned long pos, unsigned long len)
5663{
5664 u8 *kaddr;
5665 struct page *page;
5666 unsigned long i;
5667 size_t offset;
5668 const unsigned int size = pos + len;
5669 int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5670 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
5671
5672 eb_bitmap_offset(eb, start, pos, &i, &offset);
5673 page = eb->pages[i];
5674 WARN_ON(!PageUptodate(page));
5675 kaddr = page_address(page);
5676
5677 while (len >= bits_to_set) {
5678 kaddr[offset] |= mask_to_set;
5679 len -= bits_to_set;
5680 bits_to_set = BITS_PER_BYTE;
5681 mask_to_set = ~0;
5682 if (++offset >= PAGE_SIZE && len > 0) {
5683 offset = 0;
5684 page = eb->pages[++i];
5685 WARN_ON(!PageUptodate(page));
5686 kaddr = page_address(page);
5687 }
5688 }
5689 if (len) {
5690 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
5691 kaddr[offset] |= mask_to_set;
5692 }
5693}
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
5704 unsigned long pos, unsigned long len)
5705{
5706 u8 *kaddr;
5707 struct page *page;
5708 unsigned long i;
5709 size_t offset;
5710 const unsigned int size = pos + len;
5711 int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
5712 u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
5713
5714 eb_bitmap_offset(eb, start, pos, &i, &offset);
5715 page = eb->pages[i];
5716 WARN_ON(!PageUptodate(page));
5717 kaddr = page_address(page);
5718
5719 while (len >= bits_to_clear) {
5720 kaddr[offset] &= ~mask_to_clear;
5721 len -= bits_to_clear;
5722 bits_to_clear = BITS_PER_BYTE;
5723 mask_to_clear = ~0;
5724 if (++offset >= PAGE_SIZE && len > 0) {
5725 offset = 0;
5726 page = eb->pages[++i];
5727 WARN_ON(!PageUptodate(page));
5728 kaddr = page_address(page);
5729 }
5730 }
5731 if (len) {
5732 mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
5733 kaddr[offset] &= ~mask_to_clear;
5734 }
5735}
5736
5737static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
5738{
5739 unsigned long distance = (src > dst) ? src - dst : dst - src;
5740 return distance < len;
5741}
5742
5743static void copy_pages(struct page *dst_page, struct page *src_page,
5744 unsigned long dst_off, unsigned long src_off,
5745 unsigned long len)
5746{
5747 char *dst_kaddr = page_address(dst_page);
5748 char *src_kaddr;
5749 int must_memmove = 0;
5750
5751 if (dst_page != src_page) {
5752 src_kaddr = page_address(src_page);
5753 } else {
5754 src_kaddr = dst_kaddr;
5755 if (areas_overlap(src_off, dst_off, len))
5756 must_memmove = 1;
5757 }
5758
5759 if (must_memmove)
5760 memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
5761 else
5762 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
5763}
5764
5765void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5766 unsigned long src_offset, unsigned long len)
5767{
5768 struct btrfs_fs_info *fs_info = dst->fs_info;
5769 size_t cur;
5770 size_t dst_off_in_page;
5771 size_t src_off_in_page;
5772 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5773 unsigned long dst_i;
5774 unsigned long src_i;
5775
5776 if (src_offset + len > dst->len) {
5777 btrfs_err(fs_info,
5778 "memmove bogus src_offset %lu move len %lu dst len %lu",
5779 src_offset, len, dst->len);
5780 BUG_ON(1);
5781 }
5782 if (dst_offset + len > dst->len) {
5783 btrfs_err(fs_info,
5784 "memmove bogus dst_offset %lu move len %lu dst len %lu",
5785 dst_offset, len, dst->len);
5786 BUG_ON(1);
5787 }
5788
5789 while (len > 0) {
5790 dst_off_in_page = (start_offset + dst_offset) &
5791 (PAGE_SIZE - 1);
5792 src_off_in_page = (start_offset + src_offset) &
5793 (PAGE_SIZE - 1);
5794
5795 dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
5796 src_i = (start_offset + src_offset) >> PAGE_SHIFT;
5797
5798 cur = min(len, (unsigned long)(PAGE_SIZE -
5799 src_off_in_page));
5800 cur = min_t(unsigned long, cur,
5801 (unsigned long)(PAGE_SIZE - dst_off_in_page));
5802
5803 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5804 dst_off_in_page, src_off_in_page, cur);
5805
5806 src_offset += cur;
5807 dst_offset += cur;
5808 len -= cur;
5809 }
5810}
5811
5812void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5813 unsigned long src_offset, unsigned long len)
5814{
5815 struct btrfs_fs_info *fs_info = dst->fs_info;
5816 size_t cur;
5817 size_t dst_off_in_page;
5818 size_t src_off_in_page;
5819 unsigned long dst_end = dst_offset + len - 1;
5820 unsigned long src_end = src_offset + len - 1;
5821 size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
5822 unsigned long dst_i;
5823 unsigned long src_i;
5824
5825 if (src_offset + len > dst->len) {
5826 btrfs_err(fs_info,
5827 "memmove bogus src_offset %lu move len %lu len %lu",
5828 src_offset, len, dst->len);
5829 BUG_ON(1);
5830 }
5831 if (dst_offset + len > dst->len) {
5832 btrfs_err(fs_info,
5833 "memmove bogus dst_offset %lu move len %lu len %lu",
5834 dst_offset, len, dst->len);
5835 BUG_ON(1);
5836 }
5837 if (dst_offset < src_offset) {
5838 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
5839 return;
5840 }
5841 while (len > 0) {
5842 dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
5843 src_i = (start_offset + src_end) >> PAGE_SHIFT;
5844
5845 dst_off_in_page = (start_offset + dst_end) &
5846 (PAGE_SIZE - 1);
5847 src_off_in_page = (start_offset + src_end) &
5848 (PAGE_SIZE - 1);
5849
5850 cur = min_t(unsigned long, len, src_off_in_page + 1);
5851 cur = min(cur, dst_off_in_page + 1);
5852 copy_pages(dst->pages[dst_i], dst->pages[src_i],
5853 dst_off_in_page - cur + 1,
5854 src_off_in_page - cur + 1, cur);
5855
5856 dst_end -= cur;
5857 src_end -= cur;
5858 len -= cur;
5859 }
5860}
5861
5862int try_release_extent_buffer(struct page *page)
5863{
5864 struct extent_buffer *eb;
5865
5866
5867
5868
5869
5870 spin_lock(&page->mapping->private_lock);
5871 if (!PagePrivate(page)) {
5872 spin_unlock(&page->mapping->private_lock);
5873 return 1;
5874 }
5875
5876 eb = (struct extent_buffer *)page->private;
5877 BUG_ON(!eb);
5878
5879
5880
5881
5882
5883
5884 spin_lock(&eb->refs_lock);
5885 if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) {
5886 spin_unlock(&eb->refs_lock);
5887 spin_unlock(&page->mapping->private_lock);
5888 return 0;
5889 }
5890 spin_unlock(&page->mapping->private_lock);
5891
5892
5893
5894
5895
5896 if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
5897 spin_unlock(&eb->refs_lock);
5898 return 0;
5899 }
5900
5901 return release_extent_buffer(eb);
5902}
5903